├── gym_brt ├── quanser │ ├── quanser_wrapper │ │ ├── __init__.py │ │ ├── setup.py │ │ ├── quanser_types.pxd │ │ ├── hil.pxd │ │ └── quanser_wrapper.pyx │ ├── __init__.py │ ├── qube_simulator.py │ └── qube_interfaces.py ├── envs │ ├── rendering │ │ ├── __init__.py │ │ └── qube_render.py │ ├── __init__.py │ ├── qube_rotor_env.py │ ├── qube_test_env.py │ ├── qube_dampen_env.py │ ├── qube_swingup_env.py │ ├── qube_balance_env.py │ └── qube_base_env.py ├── control │ ├── __init__.py │ └── control.py └── __init__.py ├── .gitattributes ├── QUBE-Servo_2_angled_pendulum.jpg ├── docs ├── README.md ├── alternatives.md ├── control.md ├── quanser.md └── envs.md ├── Pipfile ├── LICENSE ├── .gitignore ├── setup.py ├── environment.yaml ├── README.md └── tests ├── lqr.py ├── test.py └── notebooks └── ODE vs Euler speedtest.ipynb /gym_brt/quanser/quanser_wrapper/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | *.ipynb linguist-vendored 2 | -------------------------------------------------------------------------------- /gym_brt/envs/rendering/__init__.py: -------------------------------------------------------------------------------- 1 | from gym_brt.envs.rendering.qube_render import QubeRendererVypthon as QubeRenderer 2 | -------------------------------------------------------------------------------- /QUBE-Servo_2_angled_pendulum.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BlueRiverTech/quanser-openai-driver/HEAD/QUBE-Servo_2_angled_pendulum.jpg -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | 2 | # Table of contents 3 | - [Setup alternatives](./alternatives.md) 4 | - [Installation](./alternatives.md#installation) 5 | - [Usage](./alternatives.md#usage) 6 | - [control](./control.md) 7 | - [envs](./envs.md) 8 | - [quanser](./quanser.md) -------------------------------------------------------------------------------- /gym_brt/quanser/__init__.py: -------------------------------------------------------------------------------- 1 | from gym_brt.quanser.qube_interfaces import QubeSimulator 2 | 3 | try: 4 | from gym_brt.quanser.qube_interfaces import QubeHardware 5 | except ImportError: 6 | print("Warning: Can not import QubeHardware in quanser/__init__.py") 7 | -------------------------------------------------------------------------------- /Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | url = "https://pypi.org/simple" 3 | verify_ssl = true 4 | name = "pypi" 5 | 6 | [dev-packages] 7 | Cython = 0.29.32 8 | autograd = "*" 9 | 10 | [packages] 11 | gym = 0.21.0 12 | numpy = 1.21.6 13 | scipy = 1.7.3 14 | vpython = 7.6.4 15 | gym-brt = {path = ".", editable = true} 16 | -------------------------------------------------------------------------------- /gym_brt/control/__init__.py: -------------------------------------------------------------------------------- 1 | from gym_brt.control.control import ( 2 | zero_policy, 3 | constant_policy, 4 | random_policy, 5 | square_wave_policy, 6 | energy_control_policy, 7 | pd_control_policy, 8 | flip_and_hold_policy, 9 | square_wave_flip_and_hold_policy, 10 | dampen_policy, 11 | pd_tracking_control_policy, 12 | ) 13 | -------------------------------------------------------------------------------- /docs/alternatives.md: -------------------------------------------------------------------------------- 1 | # Alternatives 2 | 3 | ### Usage 4 | In addition to a [context manager (recommended)](../README.md#usage), the environment can also be closed manually by using `env.close()` 5 | 6 | ```python 7 | import gym 8 | from gym_brt import QubeSwingupEnv 9 | 10 | num_episodes = 10 11 | num_steps = 250 12 | 13 | env = QubeSwingupEnv() 14 | try: 15 | for episode in range(num_episodes): 16 | state = env.reset() 17 | for step in range(num_steps): 18 | action = env.action_space.sample() 19 | state, reward, done, _ = env.step(action) 20 | finally: 21 | env.close() 22 | ``` 23 | -------------------------------------------------------------------------------- /docs/control.md: -------------------------------------------------------------------------------- 1 | 2 | # Controllers 3 | 4 | ### Control 5 | Control base class. 6 | 7 | ### NoControl 8 | Applies no action (sets voltages to zero). 9 | 10 | ### RandomControl 11 | Applies a random action (samples from the action space). 12 | 13 | ### AeroControl 14 | Uses PID control to minimize (pitch - reference pitch) + (yaw - reference yaw), where reference is the original position. 15 | 16 | ### QubeFlipUpControl 17 | Uses a mixed mode controller that uses gains found from LQR to do the flip up when the pendulum angle is over than 20 degrees off upright, and uses PID control and filtering to hold the pendulum upright when under 20 degrees. 18 | 19 | ### QubeHoldControl 20 | Holding control uses PID with filtering, and outside of 20 degrees use no control. 21 | -------------------------------------------------------------------------------- /gym_brt/envs/__init__.py: -------------------------------------------------------------------------------- 1 | from gym_brt.envs.qube_swingup_env import ( 2 | QubeSwingupEnv, 3 | QubeSwingupSparseEnv, 4 | QubeSwingupFollowEnv, 5 | QubeSwingupFollowSparseEnv, 6 | ) 7 | from gym_brt.envs.qube_balance_env import ( 8 | QubeBalanceEnv, 9 | QubeBalanceSparseEnv, 10 | QubeBalanceFollowEnv, 11 | QubeBalanceFollowSparseEnv, 12 | ) 13 | from gym_brt.envs.qube_dampen_env import ( 14 | QubeDampenEnv, 15 | QubeDampenSparseEnv, 16 | QubeDampenFollowEnv, 17 | QubeDampenFollowSparseEnv, 18 | ) 19 | from gym_brt.envs.qube_test_env import ( 20 | QubeBalanceFollowSineWaveEnv, 21 | QubeSwingupFollowSineWaveEnv, 22 | QubeRotorFollowSineWaveEnv, 23 | QubeDampenFollowSineWaveEnv, 24 | ) 25 | from gym_brt.envs.qube_rotor_env import QubeRotorEnv, QubeRotorFollowEnv 26 | -------------------------------------------------------------------------------- /gym_brt/quanser/quanser_wrapper/setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup 2 | from distutils.extension import Extension 3 | from Cython.Build import cythonize 4 | import numpy 5 | 6 | extensions = [ 7 | Extension( 8 | "quanser_wrapper", 9 | ["quanser_wrapper.pyx"], 10 | include_dirs=["/opt/quanser/hil_sdk/include", numpy.get_include()], 11 | libraries=[ 12 | "hil", 13 | "quanser_runtime", 14 | "quanser_common", 15 | "rt", 16 | "pthread", 17 | "dl", 18 | "m", 19 | "c", 20 | ], 21 | library_dirs=["/opt/quanser/hil_sdk/lib"], 22 | ) 23 | ] 24 | cy_extensions = cythonize(extensions, compiler_directives={"language_level" : "3"}) 25 | setup(ext_modules=cy_extensions, include_dirs=[numpy.get_include()]) 26 | -------------------------------------------------------------------------------- /docs/quanser.md: -------------------------------------------------------------------------------- 1 | 2 | # Quanser 3 | The OpenAI Gym environments use one of the following to calculate each step. 4 | 5 | Quanser wrapper is a hardware wrapper that allows the Qube Servo 2 and Quanser Aero to be used directly in OpenAI Gym. 6 | Quanser Simulator simulates the Qube Servo 2 only (Aero is currently not supported for simulation). 7 | 8 | # Quanser Wrapper 9 | The implementation of a Python wrapper around Quanser's C-based HIL SDK. 10 | This is written in Cython. 11 | 12 | All changes to this are recompiled if you install the gym_brt package with Cython installed. 13 | 14 | # Quanser Simulator 15 | A simulator and wrapper to use a simulator directly in the Qube environments instead of using the real hardware. 16 | This is useful for transfer learning from simulation to real hardware and for people who don't have access to the Qube Servo 2. 17 | -------------------------------------------------------------------------------- /gym_brt/quanser/quanser_wrapper/quanser_types.pxd: -------------------------------------------------------------------------------- 1 | cimport numpy as np 2 | 3 | cdef extern from "/opt/quanser/hil_sdk/include/quanser_types.h": 4 | ctypedef int t_error 5 | 6 | ctypedef np.npy_int8 t_boolean # must always be 8 bits 7 | ctypedef np.npy_int8 t_byte # must always be 8 bits 8 | ctypedef np.npy_uint8 t_ubyte # must always be 8 bits 9 | ctypedef np.npy_int16 t_short # must always be 16 bits 10 | ctypedef np.npy_uint16 t_ushort # must always be 16 bits 11 | ctypedef np.npy_int32 t_int # must always be 32 bits 12 | ctypedef np.npy_uint32 t_uint # must always be 32 bits 13 | ctypedef np.npy_float64 t_double # must always be 64 bits 14 | ctypedef np.npy_int64 t_long # must always be 64 bits 15 | 16 | ctypedef np.npy_int8 t_int8 17 | ctypedef np.npy_uint8 t_uint8 18 | ctypedef np.npy_int16 t_int16 19 | ctypedef np.npy_uint16 t_uint16 20 | ctypedef np.npy_int32 t_int32 21 | ctypedef np.npy_uint32 t_uint32 22 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Blue River Technology 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /docs/envs.md: -------------------------------------------------------------------------------- 1 | 2 | # Environments 3 | Quanser OpenAI Driver currently supports two pieces of hardware: the Qube Servo 2 USB and the Quanser Aero. 4 | 5 | # Qube 6 | For more information about the Qube click [here](https://www.quanser.com/products/qube-servo-2/) 7 | 8 | ### QubeBaseEnv 9 | The base class for Qube environments. 10 | Info: 11 | 12 | - Reset starts the pendulum from the bottom (at rest). 13 | - Has no reward function. 14 | 15 | 16 | ### QubeBeginDownEnv 17 | Info: 18 | 19 | - Reset starts the pendulum from the bottom (at rest). 20 | - The task is to flip up the pendulum and hold it upright. 21 | - Episode ends once the theta angle is greater than 90 degrees. 22 | - Reward is a function of the angles theta (arm angle) and alpha (pendulum), and the alpha angular velocity. 23 | - Encourages the the arm to stay centered, the pendulum to stay upright, and to stay stationary. 24 | 25 | 26 | ### QubeBeginUprightEnv 27 | Info: 28 | 29 | - Reset starts the pendulum from the top (flipped up/inverted). 30 | - The task is to hold the pendulum upright. 31 | - Episode ends once the alpha angle is greater the 20 degrees or theta angle is greater than 90 degrees. 32 | - Reward is a function of the angles theta (arm angle) and alpha (pendulum), and the alpha angular velocity. 33 | - Encourages the the arm to stay centered, the pendulum to stay upright, and to stay stationary. 34 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.DS_Store 2 | .idea/ 3 | 4 | # Byte-compiled / optimized / DLL files 5 | __pycache__/ 6 | *.py[cod] 7 | *$py.class 8 | *.pyc 9 | 10 | # C extensions 11 | *.so 12 | 13 | # Distribution / packaging 14 | .Python 15 | env/ 16 | build/ 17 | develop-eggs/ 18 | dist/ 19 | downloads/ 20 | eggs/ 21 | .eggs/ 22 | lib/ 23 | lib64/ 24 | parts/ 25 | sdist/ 26 | var/ 27 | wheels/ 28 | *.egg-info/ 29 | .installed.cfg 30 | *.egg 31 | 32 | # PyInstaller 33 | # Usually these files are written by a python script from a template 34 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 35 | *.manifest 36 | *.spec 37 | 38 | # Installer logs 39 | pip-log.txt 40 | pip-delete-this-directory.txt 41 | 42 | # Unit test / coverage reports 43 | htmlcov/ 44 | .tox/ 45 | .coverage 46 | .coverage.* 47 | .cache 48 | nosetests.xml 49 | coverage.xml 50 | *.cover 51 | .hypothesis/ 52 | 53 | # Translations 54 | *.mo 55 | *.pot 56 | 57 | # Django stuff: 58 | *.log 59 | local_settings.py 60 | 61 | # Flask stuff: 62 | instance/ 63 | .webassets-cache 64 | 65 | # Scrapy stuff: 66 | .scrapy 67 | 68 | # Sphinx documentation 69 | docs/_build/ 70 | 71 | # PyBuilder 72 | target/ 73 | 74 | # Jupyter Notebook 75 | .ipynb_checkpoints 76 | 77 | # pyenv 78 | .python-version 79 | 80 | # celery beat schedule file 81 | celerybeat-schedule 82 | 83 | # SageMath parsed files 84 | *.sage.py 85 | 86 | # dotenv 87 | .env 88 | 89 | # virtualenv 90 | .venv 91 | venv/ 92 | ENV/ 93 | 94 | # Spyder project settings 95 | .spyderproject 96 | .spyproject 97 | 98 | # Rope project settings 99 | .ropeproject 100 | 101 | # mkdocs documentation 102 | /site 103 | 104 | # mypy 105 | .mypy_cache/ 106 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | from setuptools import setup, Extension 3 | from setuptools.command.build_ext import build_ext as _build_ext 4 | 5 | 6 | class build_ext(_build_ext): 7 | def finalize_options(self): 8 | _build_ext.finalize_options(self) 9 | # Prevent numpy from thinking it is still in its setup process: 10 | __builtins__.__NUMPY_SETUP__ = False 11 | import numpy 12 | 13 | self.include_dirs.append(numpy.get_include()) 14 | 15 | 16 | extensions = [ 17 | Extension( 18 | "gym_brt.quanser.quanser_wrapper.quanser_wrapper", 19 | ["gym_brt/quanser/quanser_wrapper/quanser_wrapper.pyx"], 20 | include_dirs=["/opt/quanser/hil_sdk/include"], 21 | libraries=[ 22 | "hil", 23 | "quanser_runtime", 24 | "quanser_common", 25 | "rt", 26 | "pthread", 27 | "dl", 28 | "m", 29 | "c", 30 | ], 31 | library_dirs=["/opt/quanser/hil_sdk/lib"], 32 | ) 33 | ] 34 | 35 | # If Cython is installed build from source otherwise use the precompiled version 36 | try: 37 | from Cython.Build import cythonize 38 | 39 | extensions = cythonize(extensions, compiler_directives={"language_level" : "3"}) 40 | except ImportError: 41 | pass 42 | 43 | 44 | # Hacky way to check if the HIL SDK is installed (allows to run on Mac OS) 45 | is_hil_sdk_installed = False 46 | if os.path.isdir("/opt/quanser/hil_sdk/lib"): 47 | is_hil_sdk_installed = True 48 | 49 | setup( 50 | name="gym_brt", 51 | version=0.1, 52 | cmdclass={"build_ext": build_ext} if is_hil_sdk_installed else {}, 53 | install_requires=["numpy", "gym", "scipy", "vpython"], 54 | setup_requires=["numpy"], 55 | ext_modules=extensions if is_hil_sdk_installed else None, 56 | description="Blue River's OpenAI Gym wrapper around Quanser hardware.", 57 | url="https://github.com/BlueRiverTech/quanser-openai-driver/", 58 | author="Blue River Technology", 59 | license="MIT", 60 | ) 61 | -------------------------------------------------------------------------------- /gym_brt/envs/qube_rotor_env.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import print_function 3 | from __future__ import division 4 | 5 | import numpy as np 6 | from gym import spaces 7 | from gym_brt.envs.qube_base_env import QubeBaseEnv 8 | 9 | 10 | class QubeRotorEnv(QubeBaseEnv): 11 | def _reward(self): 12 | # Few options for reward: 13 | # - high reward for large alpha_dot and small theta 14 | # - reward for matching the action of the RPM controller 15 | # - reward for how close the pendulum matches a clock hand going at a certain RPM 16 | 17 | theta_dist = (1 - np.abs(self._target_angle - self._theta)) / np.pi 18 | return np.clip(theta_dist * self._alpha_dot / 20, 0, 20) 19 | 20 | def _isdone(self): 21 | done = False 22 | done |= self._episode_steps >= self._max_episode_steps 23 | done |= abs(self._theta) > (90 * np.pi / 180) 24 | return done 25 | 26 | def reset(self): 27 | super(QubeRotorEnv, self).reset() 28 | state = self._reset_down() 29 | return state 30 | 31 | 32 | class QubeRotorFollowEnv(QubeRotorEnv): 33 | def __init__(self, **kwargs): 34 | super(QubeRotorFollowEnv, self).__init__(**kwargs) 35 | obs_max = np.asarray( 36 | [np.pi / 2, np.pi, np.inf, np.inf, 80 * (np.pi / 180)], dtype=np.float64 37 | ) 38 | self.observation_space = spaces.Box(-obs_max, obs_max) 39 | 40 | def _get_state(self): 41 | state = np.array( 42 | [ 43 | self._theta, 44 | self._alpha, 45 | self._theta_dot, 46 | self._alpha_dot, 47 | self._target_angle, 48 | ], 49 | dtype=np.float64, 50 | ) 51 | return state 52 | 53 | def _next_target_angle(self): 54 | # Update the target angle twice a second on average at random intervals 55 | if np.random.randint(1, self._frequency / 2) == 1: 56 | max_angle = 80 * (np.pi / 180) # 80 degrees 57 | angle = np.random.uniform(-max_angle, max_angle) 58 | else: 59 | angle = self._target_angle 60 | return angle 61 | -------------------------------------------------------------------------------- /environment.yaml: -------------------------------------------------------------------------------- 1 | name: qube 2 | channels: 3 | - defaults 4 | dependencies: 5 | - _libgcc_mutex=0.1=main 6 | - ca-certificates=2019.8.28=0 7 | - certifi=2019.9.11=py37_0 8 | - libedit=3.1.20181209=hc058e9b_0 9 | - libffi=3.2.1=hd88cf55_4 10 | - libgcc-ng=9.1.0=hdf63c60_0 11 | - libstdcxx-ng=9.1.0=hdf63c60_0 12 | - ncurses=6.1=he6710b0_1 13 | - openssl=1.1.1d=h7b6447c_1 14 | - pip=19.2.3=py37_0 15 | - python=3.7.4=h265db76_1 16 | - readline=7.0=h7b6447c_5 17 | - setuptools=41.2.0=py37_0 18 | - sqlite=3.29.0=h7b6447c_0 19 | - tk=8.6.8=hbc83047_0 20 | - wheel=0.33.6=py37_0 21 | - xz=5.2.4=h14c3975_4 22 | - zlib=1.2.11=h7b6447c_3 23 | - pip: 24 | - asn1crypto==0.24.0 25 | - attrs==19.1.0 26 | - autobahn==19.9.3 27 | - backcall==0.1.0 28 | - bleach==3.1.0 29 | - cffi==1.12.3 30 | - cloudpickle==1.2.2 31 | - cryptography==2.7 32 | - decorator==4.4.0 33 | - defusedxml==0.6.0 34 | - entrypoints==0.3 35 | - future==0.17.1 36 | - gym==0.14.0 37 | - ipykernel==5.1.2 38 | - ipython==7.8.0 39 | - ipython-genutils==0.2.0 40 | - ipywidgets==7.5.1 41 | - jedi==0.15.1 42 | - jinja2==2.10.1 43 | - jsonschema==3.0.2 44 | - jupyter==1.0.0 45 | - jupyter-client==5.3.3 46 | - jupyter-console==6.0.0 47 | - jupyter-core==4.5.0 48 | - markupsafe==1.1.1 49 | - mistune==0.8.4 50 | - nbconvert==5.6.0 51 | - nbformat==4.4.0 52 | - notebook==6.0.1 53 | - numpy==1.17.2 54 | - pandocfilters==1.4.2 55 | - parso==0.5.1 56 | - pexpect==4.7.0 57 | - pickleshare==0.7.5 58 | - prometheus-client==0.7.1 59 | - prompt-toolkit==2.0.9 60 | - ptyprocess==0.6.0 61 | - pycparser==2.19 62 | - pyglet==1.3.2 63 | - pygments==2.4.2 64 | - pyrsistent==0.15.4 65 | - python-dateutil==2.8.0 66 | - pyzmq==18.1.0 67 | - qtconsole==4.5.5 68 | - scipy==1.3.1 69 | - send2trash==1.5.0 70 | - six==1.12.0 71 | - terminado==0.8.2 72 | - testpath==0.4.2 73 | - tornado==6.0.3 74 | - traitlets==4.3.2 75 | - txaio==18.8.1 76 | - vpnotebook==0.1.3 77 | - vpython==7.5.1 78 | - wcwidth==0.1.7 79 | - webencodings==0.5.1 80 | - widgetsnbextension==3.5.1 81 | -------------------------------------------------------------------------------- /gym_brt/envs/qube_test_env.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import print_function 3 | from __future__ import division 4 | 5 | import numpy as np 6 | from gym import spaces 7 | from gym_brt.envs.qube_base_env import QubeBaseEnv 8 | from gym_brt.envs.qube_swingup_env import QubeSwingupFollowEnv 9 | from gym_brt.envs.qube_balance_env import QubeBalanceFollowEnv 10 | from gym_brt.envs.qube_dampen_env import QubeDampenFollowEnv 11 | from gym_brt.envs.qube_rotor_env import QubeRotorFollowEnv 12 | 13 | 14 | """ 15 | A series of testing evironments to visualize what a policy learned in each of 16 | the `Follow` environments. 17 | """ 18 | 19 | 20 | class QubeBalanceFollowSineWaveEnv(QubeBalanceFollowEnv): 21 | def _next_target_angle(self): 22 | # Sine wave between -180 to +180 degrees 23 | print( 24 | "T:{:05.2f}, C:{:05.2f}, Diff:{:05.2f}".format( 25 | self._target_angle * (57.3), 26 | 57.3 * self._theta, 27 | 57.3 * (self._target_angle - self._theta), 28 | ) 29 | ) 30 | return np.pi / 3 * np.sin(self._episode_steps / self._frequency) 31 | 32 | 33 | class QubeSwingupFollowSineWaveEnv(QubeSwingupFollowEnv): 34 | def _next_target_angle(self): 35 | # Sine wave between -180 to +180 degrees 36 | print( 37 | "T:{:05.2f}, C:{:05.2f}, Diff:{:05.2f}".format( 38 | self._target_angle * (57.3), 39 | 57.3 * self._theta, 40 | 57.3 * (self._target_angle - self._theta), 41 | ) 42 | ) 43 | return np.pi / 3 * np.sin(self._episode_steps / self._frequency) 44 | 45 | 46 | class QubeRotorFollowSineWaveEnv(QubeRotorFollowEnv): 47 | def _next_target_angle(self): 48 | # Sine wave between -180 to +180 degrees 49 | print( 50 | "T:{:05.2f}, C:{:05.2f}, Diff:{:05.2f}".format( 51 | self._target_angle * (57.3), 52 | 57.3 * self._theta, 53 | 57.3 * (self._target_angle - self._theta), 54 | ) 55 | ) 56 | return np.pi / 3 * np.sin(self._episode_steps / self._frequency) 57 | 58 | 59 | class QubeDampenFollowSineWaveEnv(QubeDampenFollowEnv): 60 | def _next_target_angle(self): 61 | # Sine wave between -180 to +180 degrees 62 | print( 63 | "T:{:05.2f}, C:{:05.2f}, Diff:{:05.2f}".format( 64 | self._target_angle * (57.3), 65 | 57.3 * self._theta, 66 | 57.3 * (self._target_angle - self._theta), 67 | ) 68 | ) 69 | return np.pi * np.sin(self._episode_steps / self._frequency) 70 | -------------------------------------------------------------------------------- /gym_brt/envs/qube_dampen_env.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import print_function 3 | from __future__ import division 4 | 5 | import time 6 | import numpy as np 7 | from gym import spaces 8 | from gym_brt.envs.qube_base_env import QubeBaseEnv 9 | 10 | 11 | class QubeDampenEnv(QubeBaseEnv): 12 | def _reward(self): 13 | alpha = self._alpha 14 | alpha = (alpha + 2 * np.pi) % (2 * np.pi) - np.pi 15 | reward = 1 - ( 16 | (0.8 * np.abs(alpha) + 0.2 * np.abs(self._target_angle - self._theta)) 17 | / np.pi 18 | ) 19 | return max(reward, 0) # Clip for the follow env case 20 | 21 | def _isdone(self): 22 | done = False 23 | done |= self._episode_steps >= self._max_episode_steps 24 | done |= abs(self._theta) > (90 * np.pi / 180) 25 | return done 26 | 27 | def reset(self): 28 | super(QubeDampenEnv, self).reset() 29 | state = self._reset_up() 30 | time.sleep(np.random.randint(1, 3000) / 1000) # Sleep between 1-3000ms 31 | return state 32 | 33 | 34 | class QubeDampenSparseEnv(QubeDampenEnv): 35 | def _reward(self): 36 | within_range = True 37 | # Assume -pi < alpha < pi 38 | within_range &= np.abs(self._alpha) > (189 * np.pi / 180) 39 | within_range &= np.abs(self._theta) < (1 * np.pi / 180) 40 | return 1 if within_range else 0 41 | 42 | 43 | class QubeDampenFollowEnv(QubeDampenEnv): 44 | def __init__(self, **kwargs): 45 | super(QubeDampenFollowEnv, self).__init__(**kwargs) 46 | obs_max = np.asarray( 47 | [np.pi / 2, np.pi, np.inf, np.inf, 80 * (np.pi / 180)], dtype=np.float64 48 | ) 49 | self.observation_space = spaces.Box(-obs_max, obs_max) 50 | 51 | def _get_state(self): 52 | state = np.array( 53 | [ 54 | self._theta, 55 | self._alpha, 56 | self._theta_dot, 57 | self._alpha_dot, 58 | self._target_angle, 59 | ], 60 | dtype=np.float64, 61 | ) 62 | return state 63 | 64 | def _next_target_angle(self): 65 | # Update the target angle twice a second on average at random intervals 66 | if np.random.randint(1, self._frequency / 2) == 1: 67 | max_angle = 80 * (np.pi / 180) # 80 degrees 68 | angle = np.random.uniform(-max_angle, max_angle) 69 | else: 70 | angle = self._target_angle 71 | return angle 72 | 73 | 74 | class QubeDampenFollowSparseEnv(QubeDampenFollowEnv): 75 | def _reward(self): 76 | within_range = True 77 | # Assume -pi < alpha < pi 78 | within_range &= np.abs(self._alpha) > (189 * np.pi / 180) 79 | within_range &= np.abs(self._theta) < (1 * np.pi / 180) 80 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Quanser OpenAI Driver 2 | Has an OpenAI Gym wrapper for the Quanser Qube Servo 2 and Quanser Aero 3 | 4 | - [Setup](#setup) 5 | - [Prerequisites](#prerequisites) 6 | - [Installation](#installation) 7 | - [Recompiling Cython](#recompiling-cython-code) 8 | - [Basic Usage](#usage) 9 | - [Warning](#warning) 10 | 11 | 12 | # Setup 13 | We have tested on Ubuntu 16.04 LTS and Ubuntu 18.04 LTS using Python 2.7 and Python 3.6.5
14 | 15 | 16 | ### Prerequisites 17 | Install the HIL SDK from Quanser.
18 | A mirror is available at https://github.com/quanser/hil_sdk_linux_x86_64.
19 | 20 | You can install the driver by: 21 | ```bash 22 | git clone https://github.com/quanser/hil_sdk_linux_x86_64.git 23 | sudo chmod a+x ./hil_sdk_linux_x86_64/setup_hil_sdk ./hil_sdk_linux_x86_64/uninstall_hil_sdk 24 | sudo ./hil_sdk_linux_x86_64/setup_hil_sdk 25 | ``` 26 | 27 | You also must have pip installed: 28 | ```bash 29 | sudo apt-get install python3-pip 30 | ``` 31 | 32 | 33 | ### Installation 34 | We recommend that you use a virtual environment such as [conda (recommended)](https://conda.io/docs/user-guide/getting-started.html), [virtualenv](https://virtualenv.pypa.io/en/stable/), or [Pipenv](https://pipenv.readthedocs.io/en/latest/) 35 | 36 | You can install the driver by cloning and pip-installing: 37 | ```bash 38 | git clone https://github.com/BlueRiverTech/quanser-openai-driver.git 39 | cd quanser-openai-driver 40 | pip3 install -e . 41 | ``` 42 | 43 | Once you have that setup: Run the classical control baseline (ensure the Qube is connected to your computer)
44 | ```bash 45 | python tests/test.py --env QubeSwingupEnv --controller flip 46 | ``` 47 | 48 | 49 | # Usage 50 | Usage is very similar to most OpenAI gym environments but **requires** that you close the environment when finished. 51 | Without safely closing the Env, bad things may happen. Usually you will not be able to reopen the board. 52 | 53 | This can be done with a context manager using a `with` statement 54 | ```python 55 | import gym 56 | from gym_brt import QubeSwingupEnv 57 | 58 | num_episodes = 10 59 | num_steps = 250 60 | 61 | with QubeSwingupEnv() as env: 62 | for episode in range(num_episodes): 63 | state = env.reset() 64 | for step in range(num_steps): 65 | action = env.action_space.sample() 66 | state, reward, done, _ = env.step(action) 67 | ``` 68 | 69 | Or can be closed manually by using `env.close()`. You can see an [example here](docs/alternatives.md#usage). 70 | 71 | 72 | # Environments 73 | Information about various environments can be found in [docs/envs](docs/envs.md) and our [whitepaper](https://arxiv.org/abs/2001.02254). 74 | 75 | 76 | # Control 77 | Information about baselines can be found in [docs/control](docs/control.md). 78 | 79 | 80 | # Hardware Wrapper 81 | Information about the Python wrapper for Quanser hardware and Qube Servo 2 simulator can be found in [docs/quanser](docs/quanser.md) and our [whitepaper](https://arxiv.org/abs/2001.02254). 82 | 83 | 84 | # Citing 85 | If you use this in your research please cite the following [whitepaper](https://arxiv.org/abs/2001.02254): 86 | 87 | ``` 88 | @misc{2001.02254, 89 | author = {{Polzounov}, Kirill and {Sundar}, Ramitha and {Redden}, Lee}, 90 | title = "{Blue River Controls: A toolkit for Reinforcement Learning Control Systems on Hardware}", 91 | year = {2019}, 92 | eprint = {arXiv:2001.02254}, 93 | howpublished = {Accepted at the Workshop on Deep Reinforcement Learning at the 33rd Conference on Neural Information Processing Systems (NeurIPS 2019), Vancouver, Canada.} 94 | } 95 | ``` 96 | -------------------------------------------------------------------------------- /gym_brt/envs/qube_swingup_env.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import print_function 3 | from __future__ import division 4 | 5 | import numpy as np 6 | from gym import spaces 7 | from gym_brt.envs.qube_base_env import QubeBaseEnv 8 | 9 | """ 10 | Description: 11 | A pendulum is attached to an un-actuated joint to a horizontal arm, 12 | which is actuated by a rotary motor. The pendulum begins 13 | downwards and the goal is flip the pendulum up and then to keep it from 14 | falling by applying a voltage on the motor which causes a torque on the 15 | horizontal arm. 16 | 17 | Source: 18 | This is modified for the Quanser Qube Servo2-USB from the Cart Pole 19 | problem described by Barto, Sutton, and Anderson, and implemented in 20 | OpenAI Gym: https://github.com/openai/gym/blob/master/gym/envs/classic_control/cartpole.py 21 | This description is also modified from the description by the OpenAI 22 | team. 23 | 24 | Observation: 25 | Type: Box(4) 26 | Num Observation Min Max 27 | 0 Rotary arm angle (theta) -90 deg 90 deg 28 | 1 Pendulum angle (alpha) -180 deg 180 deg 29 | 2 Cart Velocity -Inf Inf 30 | 3 Pole Velocity -Inf Inf 31 | Note: the velocities are limited by the physical system. 32 | 33 | Actions: 34 | Type: Real number (1-D Continuous) (voltage applied to motor) 35 | 36 | Reward: 37 | r(s_t, a_t) = 1 - (0.8 * abs(alpha) + 0.2 * abs(theta)) / pi 38 | 39 | Starting State: 40 | Theta = 0 + noise, alpha = pi + noise 41 | 42 | Episode Termination: 43 | When theta is greater than ±90° or after 2048 steps 44 | """ 45 | 46 | 47 | class QubeSwingupEnv(QubeBaseEnv): 48 | def _reward(self): 49 | reward = 1 - ( 50 | (0.8 * np.abs(self._alpha) + 0.2 * np.abs(self._target_angle - self._theta)) 51 | / np.pi 52 | ) 53 | return max(reward, 0) # Clip for the follow env case 54 | 55 | def _isdone(self): 56 | done = False 57 | done |= self._episode_steps >= self._max_episode_steps 58 | done |= abs(self._theta) > (90 * np.pi / 180) 59 | return done 60 | 61 | def reset(self): 62 | super(QubeSwingupEnv, self).reset() 63 | state = self._reset_down() 64 | return state 65 | 66 | 67 | class QubeSwingupSparseEnv(QubeSwingupEnv): 68 | def _reward(self): 69 | within_range = True 70 | within_range &= np.abs(self._alpha) < (1 * np.pi / 180) 71 | within_range &= np.abs(self._theta) < (1 * np.pi / 180) 72 | return 1 if within_range else 0 73 | 74 | 75 | class QubeSwingupFollowEnv(QubeSwingupEnv): 76 | def __init__(self, **kwargs): 77 | super(QubeSwingupFollowEnv, self).__init__(**kwargs) 78 | obs_max = np.asarray( 79 | [np.pi / 2, np.pi, np.inf, np.inf, 80 * (np.pi / 180)], dtype=np.float64 80 | ) 81 | self.observation_space = spaces.Box(-obs_max, obs_max) 82 | 83 | def _get_state(self): 84 | state = np.array( 85 | [ 86 | self._theta, 87 | self._alpha, 88 | self._theta_dot, 89 | self._alpha_dot, 90 | self._target_angle, 91 | ], 92 | dtype=np.float64, 93 | ) 94 | return state 95 | 96 | def _next_target_angle(self): 97 | # Update the target angle twice a second on average at random intervals 98 | if np.random.randint(1, self._frequency / 2) == 1: 99 | max_angle = 80 * (np.pi / 180) # 80 degrees 100 | angle = np.random.uniform(-max_angle, max_angle) 101 | else: 102 | angle = self._target_angle 103 | return angle 104 | 105 | 106 | class QubeSwingupFollowSparseEnv(QubeSwingupFollowEnv): 107 | def _reward(self): 108 | within_range = True 109 | within_range &= np.abs(self._alpha) < (1 * np.pi / 180) 110 | within_range &= np.abs(self._theta) < (1 * np.pi / 180) 111 | return 1 if within_range else 0 112 | -------------------------------------------------------------------------------- /gym_brt/envs/qube_balance_env.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import print_function 3 | from __future__ import division 4 | 5 | import numpy as np 6 | from gym import spaces 7 | from gym_brt.envs.qube_base_env import QubeBaseEnv 8 | 9 | 10 | """ 11 | Description: 12 | A pendulum is attached to an un-actuated joint to a horizontal arm, 13 | which is actuated by a rotary motor. The pendulum begins 14 | upright/inverted and the goal is to keep it from falling by applying a 15 | voltage on the motor which causes a torque on the horizontal arm. 16 | 17 | Source: 18 | This is modified for the Quanser Qube Servo2-USB from the Cart Pole 19 | problem described by Barto, Sutton, and Anderson, and implemented in 20 | OpenAI Gym: https://github.com/openai/gym/blob/master/gym/envs/classic_control/cartpole.py 21 | This description is also modified from the description by the OpenAI 22 | team. 23 | 24 | Observation: 25 | Type: Box(4) 26 | Num Observation Min Max 27 | 0 Rotary arm angle (theta) -90 deg 90 deg 28 | 1 Pendulum angle (alpha) -20 deg 20 deg 29 | 2 Cart Velocity -Inf Inf 30 | 3 Pole Velocity -Inf Inf 31 | Note: the velocities are limited by the physical system. 32 | 33 | Actions: 34 | Type: Real number (1-D Continuous) (voltage applied to motor) 35 | 36 | Reward: 37 | r(s_t, a_t) = 1 - (0.8 * abs(alpha) + 0.2 * abs(theta)) / pi 38 | 39 | Starting State: 40 | Theta = 0 + noise, alpha = 0 + noise 41 | 42 | Episode Termination: 43 | Alpha is greater than ±20° from upright, theta is greater than ±90°, or 44 | after 2048 steps 45 | """ 46 | 47 | 48 | class QubeBalanceEnv(QubeBaseEnv): 49 | def _reward(self): 50 | reward = 1 - ( 51 | (0.8 * np.abs(self._alpha) + 0.2 * np.abs(self._target_angle - self._theta)) 52 | / np.pi 53 | ) 54 | return max(reward, 0) # Clip for the follow env case 55 | 56 | def _isdone(self): 57 | done = False 58 | done |= self._episode_steps >= self._max_episode_steps 59 | done |= abs(self._theta) > (90 * np.pi / 180) 60 | done |= abs(self._alpha) > (20 * np.pi / 180) 61 | return done 62 | 63 | def reset(self): 64 | super(QubeBalanceEnv, self).reset() 65 | # Start the pendulum stationary at the top (stable point) 66 | state = self._reset_up() 67 | return state 68 | 69 | 70 | class QubeBalanceSparseEnv(QubeBalanceEnv): 71 | def _reward(self): 72 | within_range = True 73 | within_range &= np.abs(self._alpha) < (1 * np.pi / 180) 74 | within_range &= np.abs(self._theta) < (1 * np.pi / 180) 75 | return 1 if within_range else 0 76 | 77 | 78 | class QubeBalanceFollowEnv(QubeBalanceEnv): 79 | def __init__(self, **kwargs): 80 | super(QubeBalanceFollowEnv, self).__init__(**kwargs) 81 | obs_max = np.asarray( 82 | [np.pi / 2, np.pi, np.inf, np.inf, 80 * (np.pi / 180)], dtype=np.float64 83 | ) 84 | self.observation_space = spaces.Box(-obs_max, obs_max) 85 | 86 | def _get_state(self): 87 | state = np.array( 88 | [ 89 | self._theta, 90 | self._alpha, 91 | self._theta_dot, 92 | self._alpha_dot, 93 | self._target_angle, 94 | ], 95 | dtype=np.float64, 96 | ) 97 | return state 98 | 99 | def _next_target_angle(self): 100 | # Update the target angle twice a second on average at random intervals 101 | if np.random.randint(1, self._frequency / 2) == 1: 102 | max_angle = 80 * (np.pi / 180) # 80 degrees 103 | angle = np.random.uniform(-max_angle, max_angle) 104 | else: 105 | angle = self._target_angle 106 | return angle 107 | 108 | 109 | class QubeBalanceFollowSparseEnv(QubeBalanceFollowEnv): 110 | def _reward(self): 111 | within_range = True 112 | within_range &= np.abs(self._alpha) < (1 * np.pi / 180) 113 | within_range &= np.abs(self._theta) < (1 * np.pi / 180) 114 | return 1 if within_range else 0 115 | -------------------------------------------------------------------------------- /gym_brt/quanser/qube_simulator.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import print_function 3 | from __future__ import division 4 | 5 | from scipy.integrate import odeint 6 | import numpy as np 7 | import math 8 | 9 | 10 | # Motor 11 | Rm = 8.4 # Resistance 12 | kt = 0.042 # Current-torque (N-m/A) 13 | km = 0.042 # Back-emf constant (V-s/rad) 14 | 15 | # Rotary Arm 16 | mr = 0.095 # Mass (kg) 17 | Lr = 0.085 # Total length (m) 18 | Jr = mr * Lr ** 2 / 12 # Moment of inertia about pivot (kg-m^2) 19 | Dr = 0.00027 # Equivalent viscous damping coefficient (N-m-s/rad) 20 | 21 | # Pendulum Link 22 | mp = 0.024 # Mass (kg) 23 | Lp = 0.129 # Total length (m) 24 | Jp = mp * Lp ** 2 / 12 # Moment of inertia about pivot (kg-m^2) 25 | Dp = 0.00005 # Equivalent viscous damping coefficient (N-m-s/rad) 26 | 27 | g = 9.81 # Gravity constant 28 | 29 | 30 | def diff_forward_model_ode(state, t, action, dt): 31 | theta, alpha, theta_dot, alpha_dot = state 32 | Vm = action 33 | tau = -(km * (Vm - km * theta_dot)) / Rm # torque 34 | 35 | # fmt: off 36 | # From Rotary Pendulum Workbook 37 | theta_dot_dot = (-Lp*Lr*mp*(-8.0*Dp*alpha_dot + Lp**2*mp*theta_dot**2*np.sin(2.0*alpha) + 4.0*Lp*g*mp*np.sin(alpha))*np.cos(alpha) + (4.0*Jp + Lp**2*mp)*(4.0*Dr*theta_dot + Lp**2*alpha_dot*mp*theta_dot*np.sin(2.0*alpha) + 2.0*Lp*Lr*alpha_dot**2*mp*np.sin(alpha) - 4.0*tau))/(4.0*Lp**2*Lr**2*mp**2*np.cos(alpha)**2 - (4.0*Jp + Lp**2*mp)*(4.0*Jr + Lp**2*mp*np.sin(alpha)**2 + 4.0*Lr**2*mp)) 38 | alpha_dot_dot = (2.0*Lp*Lr*mp*(4.0*Dr*theta_dot + Lp**2*alpha_dot*mp*theta_dot*np.sin(2.0*alpha) + 2.0*Lp*Lr*alpha_dot**2*mp*np.sin(alpha) - 4.0*tau)*np.cos(alpha) - 0.5*(4.0*Jr + Lp**2*mp*np.sin(alpha)**2 + 4.0*Lr**2*mp)*(-8.0*Dp*alpha_dot + Lp**2*mp*theta_dot**2*np.sin(2.0*alpha) + 4.0*Lp*g*mp*np.sin(alpha)))/(4.0*Lp**2*Lr**2*mp**2*np.cos(alpha)**2 - (4.0*Jp + Lp**2*mp)*(4.0*Jr + Lp**2*mp*np.sin(alpha)**2 + 4.0*Lr**2*mp)) 39 | # fmt: on 40 | 41 | diff_state = np.array([theta_dot, alpha_dot, theta_dot_dot, alpha_dot_dot]).reshape( 42 | (4,) 43 | ) 44 | diff_state = np.array(diff_state, dtype="float64") 45 | return diff_state 46 | 47 | 48 | def forward_model_ode(theta, alpha, theta_dot, alpha_dot, Vm, dt, integration_steps): 49 | t = np.linspace(0.0, dt, 2) # TODO: add and check integration steps here 50 | 51 | state = np.array([theta, alpha, theta_dot, alpha_dot]) 52 | next_state = np.array(odeint(diff_forward_model_ode, state, t, args=(Vm, dt)))[1, :] 53 | theta, alpha, theta_dot, alpha_dot = next_state 54 | 55 | theta = ((theta + np.pi) % (2 * np.pi)) - np.pi 56 | alpha = ((alpha + np.pi) % (2 * np.pi)) - np.pi 57 | 58 | return theta, alpha, theta_dot, alpha_dot 59 | 60 | 61 | def forward_model_euler(theta, alpha, theta_dot, alpha_dot, Vm, dt, integration_steps): 62 | dt /= integration_steps 63 | for step in range(integration_steps): 64 | tau = -(km * (Vm - km * theta_dot)) / Rm # torque 65 | 66 | # fmt: off 67 | # From Rotary Pendulum Workbook 68 | theta_dot_dot = float((-Lp*Lr*mp*(-8.0*Dp*alpha_dot + Lp**2*mp*theta_dot**2*np.sin(2.0*alpha) + 4.0*Lp*g*mp*np.sin(alpha))*np.cos(alpha) + (4.0*Jp + Lp**2*mp)*(4.0*Dr*theta_dot + Lp**2*alpha_dot*mp*theta_dot*np.sin(2.0*alpha) + 2.0*Lp*Lr*alpha_dot**2*mp*np.sin(alpha) - 4.0*tau))/(4.0*Lp**2*Lr**2*mp**2*np.cos(alpha)**2 - (4.0*Jp + Lp**2*mp)*(4.0*Jr + Lp**2*mp*np.sin(alpha)**2 + 4.0*Lr**2*mp))) 69 | alpha_dot_dot = float((2.0*Lp*Lr*mp*(4.0*Dr*theta_dot + Lp**2*alpha_dot*mp*theta_dot*np.sin(2.0*alpha) + 2.0*Lp*Lr*alpha_dot**2*mp*np.sin(alpha) - 4.0*tau)*np.cos(alpha) - 0.5*(4.0*Jr + Lp**2*mp*np.sin(alpha)**2 + 4.0*Lr**2*mp)*(-8.0*Dp*alpha_dot + Lp**2*mp*theta_dot**2*np.sin(2.0*alpha) + 4.0*Lp*g*mp*np.sin(alpha)))/(4.0*Lp**2*Lr**2*mp**2*np.cos(alpha)**2 - (4.0*Jp + Lp**2*mp)*(4.0*Jr + Lp**2*mp*np.sin(alpha)**2 + 4.0*Lr**2*mp))) 70 | # fmt: on 71 | 72 | if True: # semi-implicit euler (more accurate) 73 | theta_dot += theta_dot_dot * dt 74 | alpha_dot += alpha_dot_dot * dt 75 | theta += theta_dot * dt 76 | alpha += alpha_dot * dt 77 | else: # classic euler 78 | theta += theta_dot * dt 79 | alpha += alpha_dot * dt 80 | theta_dot += theta_dot_dot * dt 81 | alpha_dot += alpha_dot_dot * dt 82 | 83 | theta = ((theta + np.pi) % (2 * np.pi)) - np.pi 84 | alpha = ((alpha + np.pi) % (2 * np.pi)) - np.pi 85 | 86 | return theta, alpha, theta_dot, alpha_dot 87 | -------------------------------------------------------------------------------- /tests/lqr.py: -------------------------------------------------------------------------------- 1 | try: 2 | import autograd.numpy as np 3 | from autograd import grad, jacobian 4 | except: 5 | raise ImportError("Please install autograd.") 6 | try: 7 | import scipy.linalg as sp_linalg 8 | except: 9 | raise ImportError("Please install scipy.") 10 | 11 | 12 | # Motor 13 | Rm = 8.4 # Resistance 14 | kt = 0.042 # Current-torque (N-m/A) 15 | km = 0.042 # Back-emf constant (V-s/rad) 16 | 17 | # Rotary Arm 18 | mr = 0.095 # Mass (kg) 19 | Lr = 0.085 # Total length (m) 20 | Jr = mr * Lr ** 2 / 12 # Moment of inertia about pivot (kg-m^2) 21 | Br = Dr = 0.0015 # Equivalent viscous damping coefficient (N-m-s/rad) 22 | 23 | # Pendulum Link 24 | mp = 0.024 # Mass (kg) 25 | Lp = 0.129 # Total length (m) 26 | Jp = mp * Lp ** 2 / 12 # Moment of inertia about pivot (kg-m^2) 27 | Dp = Bp = 0.0005 # Equivalent viscous damping coefficient (N-m-s/rad) 28 | 29 | g = 9.81 # Gravity constant 30 | 31 | 32 | def forward_model(state, action, dt=1 / 300.0): 33 | theta = state[0] 34 | alpha = state[1] 35 | theta_dot = state[2] 36 | alpha_dot = state[3] 37 | Vm = action 38 | 39 | tau = (km * (Vm - km * theta_dot)) / Rm # torque 40 | 41 | # fmt: off 42 | alpha_dot_dot = (2.0*Lp*Lr*mp*(4.0*Dr*theta_dot + Lp**2*alpha_dot*mp*theta_dot*np.sin(2.0*alpha) + 2.0*Lp*Lr*alpha_dot**2*mp*np.sin(alpha) - 4.0*(tau))*np.cos(alpha) - 0.5*(4.0*Jr + Lp**2*mp*np.sin(alpha)**2 + 4.0*Lr**2*mp)*(-8.0*Dp*alpha_dot + Lp**2*mp*theta_dot**2*np.sin(2.0*alpha) + 4.0*Lp*g*mp*np.sin(alpha)))/(4.0*Lp**2*Lr**2*mp**2*np.cos(alpha)**2 - (4.0*Jp + Lp**2*mp)*(4.0*Jr + Lp**2*mp*np.sin(alpha)**2 + 4.0*Lr**2*mp)) 43 | theta_dot_dot = (-Lp*Lr*mp*(-8.0*Dp*alpha_dot + Lp**2*mp*theta_dot**2*np.sin(2.0*alpha) + 4.0*Lp*g*mp*np.sin(alpha))*np.cos(alpha) + (4.0*Jp + Lp**2*mp)*(4.0*Dr*theta_dot + Lp**2*alpha_dot*mp*theta_dot*np.sin(2.0*alpha) + 2.0*Lp*Lr*alpha_dot**2*mp*np.sin(alpha) - 4.0*(tau)))/(4.0*Lp**2*Lr**2*mp**2*np.cos(alpha)**2 - (4.0*Jp + Lp**2*mp)*(4.0*Jr + Lp**2*mp*np.sin(alpha)**2 + 4.0*Lr**2*mp)) 44 | # fmt: on 45 | 46 | # Works around a single operating point 47 | theta += theta_dot * dt # TODO: Shouldn't the angles be ahead of the velocities? 48 | alpha += alpha_dot * dt 49 | theta_dot += theta_dot_dot * dt 50 | alpha_dot += alpha_dot_dot * dt 51 | 52 | theta %= 2 * np.pi 53 | alpha %= 2 * np.pi 54 | 55 | # For continuous version of LQR 56 | # state = np.array([theta_dot,alpha_dot,theta_dot_dot,alpha_dot_dot]).reshape((4,)) 57 | 58 | # For discrete version of LQR 59 | state = np.array([theta, alpha, theta_dot, alpha_dot]).reshape((4,)) 60 | return state 61 | 62 | 63 | def computeAB(current_state, current_control): 64 | # Linearizing Dynamics 65 | forward_dynamics_model = lambda state, action: forward_model(state, action) 66 | a_mat = jacobian(forward_dynamics_model, 0) 67 | b_mat = jacobian(forward_dynamics_model, 1) 68 | A = a_mat(current_state, current_control) 69 | B = b_mat(current_state, current_control) 70 | 71 | # Correct continuous time linearization from Quanser Workbook - 72 | # A = np.array( [[0,0,1.0000,0],[0,0,0 ,1.0000], [0,149.2751,-0.0104,0],[0,261.6091,-0.0103,0]]).reshape((4,4)) 73 | # B = np.array([ 0,0,49.7275,49.1493]).reshape((4,1)) 74 | 75 | return A, B 76 | 77 | 78 | def LQR_control(): 79 | # Cost matrices for LQR 80 | Q = np.diag(np.array([1, 1, 1, 1])) # state_dimension = 4 81 | R = np.eye(1) # control_dimension = 1 82 | 83 | A, B = computeAB(np.array([0.0, 0.0, 0.0, 0.0]), np.array([0.0])) 84 | 85 | # Use if discrete forward dynamics is used 86 | X = sp_linalg.solve_discrete_are(A, B, Q, R) 87 | K = np.dot(np.linalg.pinv(R + np.dot(B.T, np.dot(X, B))), np.dot(B.T, np.dot(X, A))) 88 | 89 | # Use for continuous version of LQR 90 | # X = sp_linalg.solve_continuous_are(A, B, Q, R) 91 | # K = np.dot(np.linalg.pinv(R), np.dot(B.T, X)) 92 | return np.squeeze(K, 0) 93 | 94 | 95 | def main(): 96 | """ 97 | K obtained from dicrete dynamics + discrete LQR and continuous dynamics + continuous LQR should approximately match 98 | quanser workbook and more importantly achieve balance on the Qube Hardware 99 | """ 100 | K_real = [-2.0, 35.0, -1.5, 3.0] # Correct K from quanser workbook 101 | K_calc = LQR_control().tolist() 102 | print("The two following should be close to each other") 103 | print("\tThe gains from Quanser are:", K_real) 104 | print("\tThe calculated gains are: ", K_calc) 105 | 106 | 107 | if __name__ == "__main__": 108 | main() 109 | -------------------------------------------------------------------------------- /gym_brt/envs/rendering/qube_render.py: -------------------------------------------------------------------------------- 1 | try: 2 | # If you use a device such as a headless raspberry pi this may not work 3 | from gym.envs.classic_control import rendering 4 | except: 5 | print("Warning: Can not import classic_control rendering in qube_render.py") 6 | 7 | import vpython as vp 8 | import numpy as np 9 | 10 | 11 | class QubeRendererVypthon: 12 | def __init__(self, theta, alpha, frequency): 13 | self.frequency = frequency 14 | vp.scene.width, vp.scene.height = 1000, 600 15 | vp.scene.range = 0.25 16 | vp.scene.title = "QubeServo2-USB rotary pendulum" 17 | 18 | # Dimensions of the rotary pendulum parts 19 | base_w, base_h, base_d = 0.102, 0.101, 0.102 # width, height, & len of base 20 | rotor_d, rotor_h = 0.0202, 0.01 # height, diameter of the rotor platform 21 | rotary_top_l, rotary_top_d = 0.032, 0.012 # height, diameter of the rotary top 22 | self.arm_l, arm_d = 0.085, 0.00325 # height, diameter of the arm 23 | self.pen_l, self.pen_d = 0.129, 0.00475 # height, diameter of the pendulum 24 | 25 | # Origin of parts 26 | arm_origin = vp.vec(0, 0, 0) 27 | self.rotary_top_origin = vp.vec(0, 0, -rotary_top_l / 2) 28 | rotor_origin = arm_origin - vp.vec(0, rotor_h + rotary_top_d / 2 - 0.0035, 0) 29 | base_origin = rotor_origin - vp.vec(0, base_h / 2, 0) 30 | 31 | # Create the part objects 32 | base = vp.box( 33 | pos=base_origin, 34 | size=vp.vec(base_w, base_h, base_d), 35 | color=vp.vec(0.45, 0.45, 0.45), 36 | ) 37 | rotor = vp.cylinder( 38 | pos=rotor_origin, 39 | axis=vp.vec(0, 1, 0), 40 | size=vp.vec(rotor_h, rotor_d, rotor_d), 41 | color=vp.color.yellow, 42 | ) 43 | self._rotary_top = vp.cylinder( 44 | pos=self.rotary_top_origin, 45 | axis=vp.vec(0, 0, 1), 46 | size=vp.vec(rotary_top_l, rotary_top_d, rotary_top_d), 47 | color=vp.color.red, 48 | ) 49 | self._arm = vp.cylinder( 50 | pos=arm_origin, 51 | axis=vp.vec(0, 0, 1), 52 | size=vp.vec(self.arm_l, arm_d, arm_d), 53 | color=vp.vec(0.7, 0.7, 0.7), 54 | ) 55 | self._pendulum = vp.cylinder( 56 | pos=self.pendulum_origin(theta), 57 | axis=vp.vec(0, 1, 0), 58 | size=vp.vec(self.pen_l, self.pen_d, self.pen_d), 59 | color=vp.color.red, 60 | ) 61 | 62 | # Rotate parts to their init orientations 63 | self._rotary_top.rotate(angle=theta, axis=vp.vec(0, 1, 0), origin=arm_origin) 64 | self._arm.rotate(angle=theta, axis=vp.vec(0, 1, 0), origin=arm_origin) 65 | self._pendulum.rotate( 66 | angle=alpha, 67 | axis=self.pendulum_axis(theta), 68 | origin=self.pendulum_origin(theta), 69 | ) 70 | self.theta, self.alpha = theta, alpha 71 | 72 | def pendulum_origin(self, theta): 73 | x = self.arm_l * np.sin(theta) 74 | y = 0 75 | z = self.arm_l * np.cos(theta) 76 | return vp.vec(x, y, z) 77 | 78 | def pendulum_axis(self, theta): 79 | x = np.sin(theta) 80 | y = 0 81 | z = np.cos(theta) 82 | return vp.vec(x, y, z) 83 | 84 | def render(self, theta, alpha): 85 | dtheta = theta - self.theta 86 | self.theta = theta 87 | 88 | self._arm.rotate(angle=dtheta, axis=vp.vec(0, 1, 0), origin=vp.vec(0, 0, 0)) 89 | self._rotary_top.rotate( 90 | angle=dtheta, axis=vp.vec(0, 1, 0), origin=vp.vec(0, 0, 0) 91 | ) 92 | self._pendulum.origin = self.pendulum_origin(theta) 93 | self._pendulum.pos = self.pendulum_origin(theta) 94 | self._pendulum.axis = vp.vec(0, 1, 0) 95 | self._pendulum.size = vp.vec(self.pen_l, self.pen_d, self.pen_d) 96 | self._pendulum.rotate( 97 | angle=alpha, 98 | axis=self.pendulum_axis(theta), 99 | origin=self.pendulum_origin(theta), 100 | ) 101 | vp.rate(self.frequency) 102 | 103 | def close(self, *args, **kwargs): 104 | pass 105 | 106 | 107 | class QubeRenderer2D: 108 | def __init__(self, theta, alpha, frequency): 109 | width, height = (640, 240) 110 | self._viewer = rendering.Viewer(width, height) 111 | l, r, t, b = (2, -2, 0, 100) 112 | theta_poly = rendering.make_polygon([(l, b), (l, t), (r, t), (r, b)]) 113 | l, r, t, b = (2, -2, 0, 100) 114 | alpha_poly = rendering.make_polygon([(l, b), (l, t), (r, t), (r, b)]) 115 | theta_circle = rendering.make_circle(radius=100, res=64, filled=False) 116 | theta_circle.set_color(0.5, 0.5, 0.5) # Theta is grey 117 | alpha_circle = rendering.make_circle(radius=100, res=64, filled=False) 118 | alpha_circle.set_color(0.8, 0.0, 0.0) # Alpha is red 119 | theta_origin = (width / 2 - 150, height / 2) 120 | alpha_origin = (width / 2 + 150, height / 2) 121 | self._theta_tx = rendering.Transform(translation=theta_origin) 122 | self._alpha_tx = rendering.Transform(translation=alpha_origin) 123 | theta_poly.add_attr(self._theta_tx) 124 | alpha_poly.add_attr(self._alpha_tx) 125 | theta_circle.add_attr(self._theta_tx) 126 | alpha_circle.add_attr(self._alpha_tx) 127 | self._viewer.add_geom(theta_poly) 128 | self._viewer.add_geom(alpha_poly) 129 | self._viewer.add_geom(theta_circle) 130 | self._viewer.add_geom(alpha_circle) 131 | 132 | def render(self, theta, alpha): 133 | self._theta_tx.set_rotation(self._theta + np.pi) 134 | self._alpha_tx.set_rotation(self._alpha) 135 | return self._viewer.render(return_rgb_array=mode == "rgb_array") 136 | 137 | def close(self): 138 | self._viewer.close() 139 | -------------------------------------------------------------------------------- /gym_brt/envs/qube_base_env.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import print_function 3 | from __future__ import division 4 | 5 | import gym 6 | import time 7 | import math 8 | import numpy as np 9 | 10 | from gym import spaces 11 | from gym.utils import seeding 12 | 13 | # For other platforms where it's impossible to install the HIL SDK 14 | try: 15 | from gym_brt.quanser import QubeHardware 16 | except ImportError: 17 | print("Warning: Can not import QubeHardware in qube_base_env.py") 18 | 19 | from gym_brt.quanser import QubeSimulator 20 | from gym_brt.envs.rendering import QubeRenderer 21 | 22 | 23 | MAX_MOTOR_VOLTAGE = 3 24 | ACT_MAX = np.asarray([MAX_MOTOR_VOLTAGE], dtype=np.float64) 25 | # OBS_MAX = [theta, alpha, theta_dot, alpha_dot] 26 | OBS_MAX = np.asarray([np.pi / 2, np.pi, np.inf, np.inf], dtype=np.float64) 27 | 28 | 29 | class QubeBaseEnv(gym.Env): 30 | """A base class for all qube-based environments.""" 31 | 32 | def __init__( 33 | self, 34 | frequency=250, 35 | batch_size=2048, 36 | use_simulator=False, 37 | encoder_reset_steps=int(1e8), 38 | ): 39 | self.observation_space = spaces.Box(-OBS_MAX, OBS_MAX) 40 | self.action_space = spaces.Box(-ACT_MAX, ACT_MAX) 41 | 42 | self._frequency = frequency 43 | # Ensures that samples in episode are the same as batch size 44 | # Reset every batch_size steps (2048 ~= 8.192 seconds) 45 | self._max_episode_steps = batch_size 46 | self._episode_steps = 0 47 | self._encoder_reset_steps = encoder_reset_steps 48 | self._steps_since_encoder_reset = 0 49 | self._target_angle = 0 50 | 51 | self._theta, self._alpha, self._theta_dot, self._alpha_dot = 0, 0, 0, 0 52 | self._dtheta, self._dalpha = 0, 0 53 | 54 | # Open the Qube 55 | if use_simulator: 56 | # TODO: Check assumption: ODE integration should be ~ once per ms 57 | integration_steps = int(np.ceil(1000 / self._frequency)) 58 | self.qube = QubeSimulator( 59 | forward_model="ode", 60 | frequency=self._frequency, 61 | integration_steps=integration_steps, 62 | max_voltage=MAX_MOTOR_VOLTAGE, 63 | ) 64 | else: 65 | self.qube = QubeHardware( 66 | frequency=self._frequency, max_voltage=MAX_MOTOR_VOLTAGE 67 | ) 68 | self.qube.__enter__() 69 | 70 | self.seed() 71 | self._viewer = None 72 | 73 | def __enter__(self): 74 | return self 75 | 76 | def __exit__(self, type, value, traceback): 77 | self.close(type=type, value=value, traceback=traceback) 78 | 79 | def seed(self, seed=None): 80 | self.np_random, seed = seeding.np_random(seed) 81 | return [seed] 82 | 83 | def _step(self, action): 84 | led = self._led() 85 | 86 | action = np.clip(np.array(action, dtype=np.float64), -ACT_MAX, ACT_MAX) 87 | state = self.qube.step(action, led=led) 88 | 89 | self._dtheta = state[0] - self._theta 90 | self._dalpha = state[1] - self._alpha 91 | self._theta, self._alpha, self._theta_dot, self._alpha_dot = state 92 | 93 | def reset(self): 94 | self._episode_steps = 0 95 | # Occasionaly reset the enocoders to remove sensor drift 96 | if self._steps_since_encoder_reset >= self._encoder_reset_steps: 97 | self.qube.reset_encoders() 98 | self._steps_since_encoder_reset = 0 99 | action = np.zeros(shape=self.action_space.shape, dtype=self.action_space.dtype) 100 | self._step(action) 101 | return self._get_state() 102 | 103 | def _reset_up(self): 104 | self.qube.reset_up() 105 | action = np.zeros(shape=self.action_space.shape, dtype=self.action_space.dtype) 106 | self._step(action) 107 | return self._get_state() 108 | 109 | def _reset_down(self): 110 | self.qube.reset_down() 111 | action = np.zeros(shape=self.action_space.shape, dtype=self.action_space.dtype) 112 | self._step(action) 113 | return self._get_state() 114 | 115 | def _get_state(self): 116 | return np.array( 117 | [self._theta, self._alpha, self._theta_dot, self._alpha_dot], 118 | dtype=np.float64, 119 | ) 120 | 121 | def _next_target_angle(self): 122 | return 0 123 | 124 | def _reward(self): 125 | raise NotImplementedError 126 | 127 | def _isdone(self): 128 | raise NotImplementedError 129 | 130 | def _led(self): 131 | if self._isdone(): # Doing reset 132 | led = [1.0, 1.0, 0.0] # Yellow 133 | else: 134 | if abs(self._alpha) > (20 * np.pi / 180): 135 | led = [1.0, 0.0, 0.0] # Red 136 | elif abs(self._theta) > (90 * np.pi / 180): 137 | led = [1.0, 0.0, 0.0] # Red 138 | else: 139 | led = [0.0, 1.0, 0.0] # Green 140 | return led 141 | 142 | def step(self, action): 143 | self._step(action) 144 | state = self._get_state() 145 | reward = self._reward() 146 | done = self._isdone() 147 | info = { 148 | "theta": self._theta, 149 | "alpha": self._alpha, 150 | "theta_dot": self._theta_dot, 151 | "alpha_dot": self._alpha_dot, 152 | } 153 | 154 | self._episode_steps += 1 155 | self._steps_since_encoder_reset += 1 156 | self._target_angle = self._next_target_angle() 157 | 158 | return state, reward, done, info 159 | 160 | def render(self, mode="human"): 161 | if self._viewer is None: 162 | self._viewer = QubeRenderer(self._theta, self._alpha, self._frequency) 163 | self._viewer.render(self._theta, self._alpha) 164 | 165 | def close(self, type=None, value=None, traceback=None): 166 | # Safely close the Qube (important on hardware) 167 | self.qube.close(type=type, value=value, traceback=traceback) 168 | if self._viewer is not None: 169 | self._viewer.close() 170 | -------------------------------------------------------------------------------- /tests/test.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import print_function 3 | from __future__ import division 4 | 5 | import gym 6 | import time 7 | import argparse 8 | import numpy as np 9 | 10 | from gym_brt.envs import ( 11 | QubeSwingupEnv, 12 | QubeSwingupSparseEnv, 13 | QubeSwingupFollowEnv, 14 | QubeSwingupFollowSparseEnv, 15 | QubeBalanceEnv, 16 | QubeBalanceSparseEnv, 17 | QubeBalanceFollowEnv, 18 | QubeBalanceFollowSparseEnv, 19 | QubeDampenEnv, 20 | QubeDampenSparseEnv, 21 | QubeDampenFollowEnv, 22 | QubeDampenFollowSparseEnv, 23 | QubeRotorEnv, 24 | QubeRotorFollowEnv, 25 | QubeBalanceFollowSineWaveEnv, 26 | QubeSwingupFollowSineWaveEnv, 27 | QubeRotorFollowSineWaveEnv, 28 | QubeDampenFollowSineWaveEnv, 29 | ) 30 | 31 | from gym_brt.control import ( 32 | zero_policy, 33 | constant_policy, 34 | random_policy, 35 | square_wave_policy, 36 | energy_control_policy, 37 | pd_control_policy, 38 | flip_and_hold_policy, 39 | square_wave_flip_and_hold_policy, 40 | dampen_policy, 41 | pd_tracking_control_policy, 42 | ) 43 | 44 | 45 | def print_info(state_info, action, reward): 46 | theta = state_info["theta"] 47 | alpha = state_info["alpha"] 48 | theta_dot = state_info["theta_dot"] 49 | alpha_dot = state_info["alpha_dot"] 50 | print( 51 | "State: theta={:06.3f}, alpha={:06.3f}, theta_dot={:06.3f}, alpha_dot={:06.3f}".format( 52 | theta, alpha, theta_dot, alpha_dot 53 | ) 54 | ) 55 | print("Action={}, Reward={}".format(action, reward)) 56 | 57 | 58 | def test_env( 59 | env_name, 60 | policy, 61 | num_episodes=10, 62 | num_steps=250, 63 | frequency=250, 64 | state_keys=None, 65 | verbose=False, 66 | use_simulator=False, 67 | render=False, 68 | ): 69 | 70 | with env_name(use_simulator=use_simulator, frequency=frequency) as env: 71 | for episode in range(num_episodes): 72 | state = env.reset() 73 | state, reward, done, info = env.step(np.array([0], dtype=np.float64)) 74 | for step in range(num_steps): 75 | action = policy(state, step=step, frequency=frequency) 76 | state, reward, done, info = env.step(action) 77 | if done: 78 | break 79 | if verbose: 80 | print_info(info, action, reward) 81 | if render: 82 | env.render() 83 | 84 | 85 | def main(): 86 | envs = { 87 | "QubeSwingupEnv": QubeSwingupEnv, 88 | "QubeSwingupSparseEnv": QubeSwingupSparseEnv, 89 | "QubeSwingupFollowEnv": QubeSwingupFollowEnv, 90 | "QubeSwingupFollowSparseEnv": QubeSwingupFollowSparseEnv, 91 | "QubeBalanceEnv": QubeBalanceEnv, 92 | "QubeBalanceSparseEnv": QubeBalanceSparseEnv, 93 | "QubeBalanceFollowEnv": QubeBalanceFollowEnv, 94 | "QubeBalanceFollowSparseEnv": QubeBalanceFollowSparseEnv, 95 | "QubeDampenEnv": QubeDampenEnv, 96 | "QubeDampenSparseEnv": QubeDampenSparseEnv, 97 | "QubeDampenFollowEnv": QubeDampenFollowEnv, 98 | "QubeDampenFollowSparseEnv": QubeDampenFollowSparseEnv, 99 | "QubeRotorEnv": QubeRotorEnv, 100 | "QubeRotorFollowEnv": QubeRotorFollowEnv, 101 | "QubeBalanceFollowSineWaveEnv": QubeBalanceFollowSineWaveEnv, 102 | "QubeSwingupFollowSineWaveEnv": QubeSwingupFollowSineWaveEnv, 103 | "QubeRotorFollowSineWaveEnv": QubeRotorFollowSineWaveEnv, 104 | "QubeDampenFollowSineWaveEnv": QubeDampenFollowSineWaveEnv, 105 | } 106 | policies = { 107 | "none": zero_policy, 108 | "zero": zero_policy, 109 | "const": constant_policy, 110 | "rand": random_policy, 111 | "random": random_policy, 112 | "sw": square_wave_policy, 113 | "energy": energy_control_policy, 114 | "pd": pd_control_policy, 115 | "hold": pd_control_policy, 116 | "flip": flip_and_hold_policy, 117 | "sw-hold": square_wave_flip_and_hold_policy, 118 | "damp": dampen_policy, 119 | "track": pd_tracking_control_policy, 120 | } 121 | 122 | # Parse command line args 123 | parser = argparse.ArgumentParser() 124 | parser.add_argument( 125 | "-e", 126 | "--env", 127 | default="QubeSwingupEnv", 128 | choices=list(envs.keys()), 129 | help="Enviroment to run.", 130 | ) 131 | parser.add_argument( 132 | "-c", 133 | "--controller", 134 | default="random", 135 | choices=list(policies.keys()), 136 | help="Select what type of action to take.", 137 | ) 138 | parser.add_argument( 139 | "-ne", 140 | "--num-episodes", 141 | default="10", 142 | type=int, 143 | help="Number of episodes to run.", 144 | ) 145 | parser.add_argument( 146 | "-ns", 147 | "--num-steps", 148 | default="10000", 149 | type=int, 150 | help="Number of step to run per episode.", 151 | ) 152 | parser.add_argument( 153 | "-f", 154 | "--frequency", 155 | "--sample-frequency", 156 | default="250", 157 | type=float, 158 | help="The frequency of samples on the Quanser hardware.", 159 | ) 160 | parser.add_argument("-v", "--verbose", action="store_true") 161 | parser.add_argument("-r", "--render", action="store_true") 162 | parser.add_argument("-s", "--use_simulator", action="store_true") 163 | args, _ = parser.parse_known_args() 164 | 165 | print("Testing Env: {}".format(args.env)) 166 | print("Controller: {}".format(args.controller)) 167 | print("{} steps over {} episodes".format(args.num_steps, args.num_episodes)) 168 | print("Samples freq: {}".format(args.frequency)) 169 | test_env( 170 | envs[args.env], 171 | policies[args.controller], 172 | num_episodes=args.num_episodes, 173 | num_steps=args.num_steps, 174 | frequency=args.frequency, 175 | verbose=args.verbose, 176 | use_simulator=args.use_simulator, 177 | render=args.render, 178 | ) 179 | 180 | 181 | if __name__ == "__main__": 182 | main() 183 | -------------------------------------------------------------------------------- /gym_brt/control/control.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import print_function 3 | from __future__ import division 4 | 5 | import numpy as np 6 | 7 | 8 | def _convert_state(state): 9 | state = np.asarray(state) 10 | if state.shape == (4,): 11 | return state 12 | if state.shape == (5,): 13 | return state 14 | elif state.shape == (6,): 15 | # Get the angles 16 | theta_x, theta_y = state[0], state[1] 17 | alpha_x, alpha_y = state[2], state[3] 18 | theta = np.arctan2(theta_y, theta_x) 19 | alpha = np.arctan2(alpha_y, alpha_x) 20 | theta_dot, alpha_dot = state[4], state[5] 21 | return theta, alpha, theta_dot, alpha_dot 22 | else: 23 | raise ValueError( 24 | "State shape was expected to be one of: (4,1) or (6,1), found: {}".format( 25 | state.shape 26 | ) 27 | ) 28 | 29 | 30 | # No input 31 | def zero_policy(state, **kwargs): 32 | return np.array([0.0]) 33 | 34 | 35 | # Constant input 36 | def constant_policy(state, **kwargs): 37 | return np.array([3.0]) 38 | 39 | 40 | # Rand input 41 | def random_policy(state, **kwargs): 42 | return np.asarray([np.random.randn()]) 43 | 44 | 45 | # Square wave, switch every 85 ms 46 | def square_wave_policy(state, step, frequency=250, **kwargs): 47 | steps_until_85ms = int(85 * (frequency / 300)) 48 | state = _convert_state(state) 49 | # Switch between positive and negative every 85 ms 50 | mod_170ms = step % (2 * steps_until_85ms) 51 | if mod_170ms < steps_until_85ms: 52 | action = 3.0 53 | else: 54 | action = -3.0 55 | return np.array([action]) 56 | 57 | 58 | # Flip policy 59 | def energy_control_policy(state, **kwargs): 60 | state = _convert_state(state) 61 | # Run energy-based control to flip up the pendulum 62 | theta, alpha, theta_dot, alpha_dot = state 63 | # alpha_dot += alpha_dot + 1e-15 64 | 65 | """Implements a energy based swing-up controller""" 66 | mu = 50.0 # in m/s/J 67 | ref_energy = 30.0 / 1000.0 # Er in joules 68 | 69 | max_u = 6 # Max action is 6m/s^2 70 | 71 | # System parameters 72 | jp = 3.3282e-5 73 | lp = 0.129 74 | lr = 0.085 75 | mp = 0.024 76 | mr = 0.095 77 | rm = 8.4 78 | g = 9.81 79 | kt = 0.042 80 | 81 | pend_torque = (1 / 2) * mp * g * lp * (1 + np.cos(alpha)) 82 | energy = pend_torque + (jp / 2.0) * alpha_dot * alpha_dot 83 | 84 | u = mu * (energy - ref_energy) * np.sign(-1 * np.cos(alpha) * alpha_dot) 85 | u = np.clip(u, -max_u, max_u) 86 | 87 | torque = (mr * lr) * u 88 | voltage = (rm / kt) * torque 89 | return np.array([-voltage]) 90 | 91 | 92 | # Hold policy 93 | def pd_control_policy(state, **kwargs): 94 | state = _convert_state(state) 95 | theta, alpha, theta_dot, alpha_dot = state 96 | # multiply by proportional and derivative gains 97 | kp_theta = -2.0 98 | kp_alpha = 35.0 99 | kd_theta = -1.5 100 | kd_alpha = 3.0 101 | 102 | # If pendulum is within 20 degrees of upright, enable balance control, else zero 103 | if np.abs(alpha) <= (20.0 * np.pi / 180.0): 104 | action = ( 105 | theta * kp_theta 106 | + alpha * kp_alpha 107 | + theta_dot * kd_theta 108 | + alpha_dot * kd_alpha 109 | ) 110 | else: 111 | action = 0.0 112 | action = np.clip(action, -3.0, 3.0) 113 | return np.array([action]) 114 | 115 | 116 | # Flip and Hold 117 | def flip_and_hold_policy(state, **kwargs): 118 | state = _convert_state(state) 119 | theta, alpha, theta_dot, alpha_dot = state 120 | 121 | # If pendulum is within 20 degrees of upright, enable balance control 122 | if np.abs(alpha) <= (20.0 * np.pi / 180.0): 123 | action = pd_control_policy(state) 124 | else: 125 | action = energy_control_policy(state) 126 | return action 127 | 128 | 129 | # Square wave instead of energy controller flip and hold 130 | def square_wave_flip_and_hold_policy(state, **kwargs): 131 | state = _convert_state(state) 132 | theta, alpha, theta_dot, alpha_dot = state 133 | 134 | # If pendulum is within 20 degrees of upright, enable balance control 135 | if np.abs(alpha) <= (20.0 * np.pi / 180.0): 136 | action = pd_control_policy(state) 137 | else: 138 | action = square_wave_policy(state, **kwargs) 139 | return action 140 | 141 | 142 | def dampen_policy(state, **kwargs): 143 | state = _convert_state(state) 144 | theta, alpha, theta_dot, alpha_dot = state 145 | 146 | # Alt alpha angle: -pi to +pi, where 0 is the pendulum facing down (at rest) 147 | alt_alpha = (alpha + 2 * np.pi) % (2 * np.pi) - np.pi 148 | if np.abs(alt_alpha) < (20.0 * np.pi / 180.0) and np.abs(theta) < (np.pi / 4): 149 | kp_theta = 0.9 150 | kp_alpha = -3.6 151 | kd_theta = 0.7 152 | kd_alpha = -0.285 153 | if alpha >= 0: 154 | action = ( 155 | theta * kp_theta 156 | + (alpha - np.pi) * kp_alpha 157 | + theta_dot * kd_theta 158 | + alpha_dot * kd_alpha 159 | ) 160 | else: 161 | action = ( 162 | theta * kp_theta 163 | + (alpha + np.pi) * kp_alpha 164 | + theta_dot * kd_theta 165 | + alpha_dot * kd_alpha 166 | ) 167 | else: 168 | action = 0 169 | 170 | action = np.clip(action, -3.0, 3.0) 171 | return np.array([action], dtype=np.float64) 172 | 173 | 174 | # Hold policy 175 | def pd_tracking_control_policy(state, **kwargs): 176 | state = _convert_state(state) 177 | theta, alpha, theta_dot, alpha_dot, theta_target = state 178 | # multiply by proportional and derivative gains 179 | kp_theta = -2.0 180 | kp_alpha = 35.0 181 | kd_theta = -1.5 182 | kd_alpha = 3.0 183 | 184 | # If pendulum is within 20 degrees of upright, enable balance control, else zero 185 | if np.abs(alpha) <= (20.0 * np.pi / 180.0): 186 | action = ( 187 | (theta - theta_target) * kp_theta 188 | + alpha * kp_alpha 189 | + theta_dot * kd_theta 190 | + alpha_dot * kd_alpha 191 | ) 192 | else: 193 | action = 0.0 194 | action = np.clip(action, -3.0, 3.0) 195 | return np.array([action]) 196 | -------------------------------------------------------------------------------- /gym_brt/quanser/quanser_wrapper/hil.pxd: -------------------------------------------------------------------------------- 1 | include "quanser_types.pxd" 2 | 3 | cdef extern from "/opt/quanser/hil_sdk/include/hil.h": 4 | ctypedef struct t_card: 5 | pass 6 | ctypedef struct t_task: 7 | pass 8 | ctypedef struct t_monitor: 9 | pass 10 | cdef enum t_clock: 11 | SYSTEM_CLOCK_4 = -4 12 | SYSTEM_CLOCK_3 = -3 13 | SYSTEM_CLOCK_2 = -2 14 | SYSTEM_CLOCK_1 = -1 15 | HARDWARE_CLOCK_0 = 0 16 | HARDWARE_CLOCK_1 = 1 17 | HARDWARE_CLOCK_2 = 2 18 | HARDWARE_CLOCK_3 = 3 19 | HARDWARE_CLOCK_4 = 4 20 | HARDWARE_CLOCK_5 = 5 21 | HARDWARE_CLOCK_6 = 6 22 | HARDWARE_CLOCK_7 = 7 23 | HARDWARE_CLOCK_8 = 8 24 | HARDWARE_CLOCK_9 = 9 25 | HARDWARE_CLOCK_10 = 10 26 | HARDWARE_CLOCK_11 = 11 27 | HARDWARE_CLOCK_12 = 12 28 | HARDWARE_CLOCK_13 = 13 29 | HARDWARE_CLOCK_14 = 14 30 | HARDWARE_CLOCK_15 = 15 31 | HARDWARE_CLOCK_16 = 16 32 | HARDWARE_CLOCK_17 = 17 33 | HARDWARE_CLOCK_18 = 18 34 | HARDWARE_CLOCK_19 = 19 35 | cdef enum t_buffer_overflow_mode: 36 | BUFFER_MODE_ERROR_ON_OVERFLOW, # return an error on buffer overflow (default). 37 | BUFFER_MODE_OVERWRITE_ON_OVERFLOW, # overwrite old samples on buffer overflow. 38 | BUFFER_MODE_DISCARD_ON_OVERFLOW # discard new samples on buffer overflow. 39 | 40 | t_error hil_open( 41 | const char * card_type, 42 | const char * card_identifier, 43 | t_card * card 44 | ) 45 | t_error hil_close(t_card card) 46 | 47 | # For debugging 48 | t_error hil_close_all() # Sometimes the board doesn't safely close 49 | 50 | # Synchronous reads/writes 51 | t_error hil_set_encoder_counts( 52 | t_card card, 53 | const t_uint32 encoder_channels[], 54 | t_uint32 num_channels, 55 | const t_int32 buffer[] 56 | ) 57 | t_error hil_write_analog( 58 | t_card card, 59 | const t_uint32 analog_channels[], 60 | t_uint32 num_channels, 61 | const t_double buffer[] 62 | ) 63 | t_error hil_write_digital( 64 | t_card card, 65 | const t_uint32 digital_lines[], 66 | t_uint32 num_lines, 67 | const t_boolean buffer[] 68 | ) 69 | t_error hil_write_other( 70 | t_card card, 71 | const t_uint32 other_channels[], 72 | t_uint32 num_channels, 73 | const t_double buffer[] 74 | ) 75 | 76 | t_error hil_read_analog( 77 | t_card card, 78 | const t_uint32 analog_channels[], 79 | t_uint32 num_channels, 80 | t_double buffer[] 81 | ) 82 | t_error hil_read_encoder( 83 | t_card card, 84 | const t_uint32 encoder_channels[], 85 | t_uint32 num_channels, 86 | t_int32 buffer[] 87 | ) 88 | t_error hil_read_digital( 89 | t_card card, 90 | const t_uint32 digital_lines[], 91 | t_uint32 num_lines, 92 | t_boolean buffer[] 93 | ) 94 | t_error hil_read_other( 95 | t_card card, 96 | const t_uint32 other_channels[], 97 | t_uint32 num_channels, 98 | t_double buffer[] 99 | ) 100 | t_error hil_read( 101 | t_card card, 102 | const t_uint32 analog_channels[], t_uint32 num_analog_channels, 103 | const t_uint32 encoder_channels[], t_uint32 num_encoder_channels, 104 | const t_uint32 digital_lines[], t_uint32 num_digital_lines, 105 | const t_uint32 other_channels[], t_uint32 num_other_channels, 106 | t_double analog_buffer[], 107 | t_int32 encoder_buffer[], 108 | t_boolean digital_buffer[], 109 | t_double other_buffer[] 110 | ) 111 | 112 | # Async 113 | t_error hil_task_create_reader_writer( 114 | t_card card, t_uint32 samples_in_buffer, 115 | const t_uint32 analog_input_channels[], 116 | t_uint32 num_analog_input_channels, 117 | const t_uint32 encoder_input_channels[], 118 | t_uint32 num_encoder_input_channels, 119 | const t_uint32 digital_input_lines[], 120 | t_uint32 num_digital_input_lines, 121 | const t_uint32 other_input_channels[], 122 | t_uint32 num_other_input_channels, 123 | 124 | const t_uint32 analog_output_channels[], 125 | t_uint32 num_analog_output_channels, 126 | const t_uint32 pwm_output_channels[], 127 | t_uint32 num_pwm_output_channels, 128 | const t_uint32 digital_output_lines[], 129 | t_uint32 num_digital_output_lines, 130 | const t_uint32 other_output_channels[], 131 | t_uint32 num_other_output_channels, 132 | t_task *task 133 | ) 134 | t_error hil_task_create_reader( 135 | t_card card, t_uint32 samples_in_buffer, 136 | const t_uint32 analog_channels[], 137 | t_uint32 num_analog_channels, 138 | const t_uint32 encoder_channels[], 139 | t_uint32 num_encoder_channels, 140 | const t_uint32 digital_lines[], 141 | t_uint32 num_digital_lines, 142 | const t_uint32 other_channels[], 143 | t_uint32 num_other_channels, 144 | t_task *task 145 | ) 146 | 147 | t_error hil_task_start( 148 | t_task task, 149 | t_clock clock, 150 | t_double frequency, 151 | t_uint32 num_samples 152 | ) 153 | t_error hil_task_flush(t_task task) 154 | t_error hil_task_stop(t_task task) 155 | t_error hil_task_delete(t_task task) 156 | 157 | t_error hil_task_read_write( 158 | t_task task, t_uint32 num_samples, 159 | t_double analog_input_buffer[], 160 | t_int32 encoder_input_buffer[], 161 | t_boolean digital_input_buffer[], 162 | t_double other_input_buffer[], 163 | 164 | const t_double analog_output_buffer[], 165 | const t_double pwm_output_buffer[], 166 | const t_boolean digital_output_buffer[], 167 | const t_double other_output_buffer[] 168 | ) 169 | t_error hil_task_write( 170 | t_task task, t_uint32 num_samples, 171 | const t_double analog_buffer[], 172 | const t_double pwm_buffer[], 173 | const t_boolean digital_buffer[], 174 | const t_double other_buffer[] 175 | ) 176 | t_error hil_task_read( 177 | t_task task, t_uint32 num_samples, 178 | t_double analog_buffer[], 179 | t_int32 encoder_buffer[], 180 | t_boolean digital_buffer[], 181 | t_double other_buffer[] 182 | ) 183 | 184 | # Allow buffer overflow 185 | t_error hil_task_set_buffer_overflow_mode( 186 | t_task task, 187 | t_buffer_overflow_mode mode) 188 | t_int hil_task_get_buffer_overflows(t_task task) 189 | -------------------------------------------------------------------------------- /gym_brt/quanser/qube_interfaces.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import print_function 3 | from __future__ import division 4 | 5 | import gym 6 | import time 7 | import math 8 | import numpy as np 9 | 10 | from gym import spaces 11 | from gym.utils import seeding 12 | 13 | from gym_brt.control import flip_and_hold_policy, dampen_policy 14 | 15 | # For other platforms where it's impossible to install the HIL SDK 16 | try: 17 | from gym_brt.quanser.quanser_wrapper.quanser_wrapper import QubeServo2 18 | except ImportError: 19 | print("Warning: Can not import QubeServo2 in qube_interface.py") 20 | 21 | from gym_brt.quanser.qube_simulator import forward_model_euler, forward_model_ode 22 | 23 | 24 | class QubeHardware(object): 25 | """Simplify the interface between the environment and """ 26 | 27 | def __init__(self, frequency=250, max_voltage=18.0): 28 | self._theta_dot_cstate = 0 29 | self._alpha_dot_cstate = 0 30 | self._frequency = frequency 31 | 32 | # Open the Qube 33 | self.qube = QubeServo2(frequency=frequency) # TODO: max_voltage=max_voltage 34 | self.qube.__enter__() 35 | 36 | self.state = np.array([0, 0, 0, 0], dtype=np.float64) 37 | 38 | def __enter__(self): 39 | return self 40 | 41 | def __exit__(self, type, value, traceback): 42 | self.close() 43 | 44 | def _match_state(s): 45 | theta, alpha, theta_dot, alpha_dot = s 46 | return np.array([]) 47 | 48 | def step(self, action, led=None): 49 | theta, alpha, theta_dot, alpha_dot = self.state 50 | currents, encoders, others = self.qube.action(action, led_w=led) 51 | 52 | # Calculate alpha, theta, alpha_dot, and theta_dot 53 | theta = encoders[0] * (-2.0 * np.pi / 2048) 54 | # Alpha without normalizing 55 | alpha_un = encoders[1] * (2.0 * np.pi / 2048) 56 | # Normalized and shifted alpha 57 | alpha = (alpha_un % (2.0 * np.pi)) - np.pi 58 | 59 | # fmt: off 60 | theta_dot = -2500 * self._theta_dot_cstate + 50 * theta 61 | alpha_dot = -2500 * self._alpha_dot_cstate + 50 * alpha_un 62 | self._theta_dot_cstate += (-50 * self._theta_dot_cstate + theta) / self._frequency 63 | self._alpha_dot_cstate += (-50 * self._alpha_dot_cstate + alpha_un) / self._frequency 64 | # fmt: on 65 | 66 | state = np.array([theta, alpha, theta_dot, alpha_dot], dtype=np.float64) 67 | return state 68 | 69 | def reset_up(self): 70 | """Run classic control for flip-up until the pendulum is inverted for 71 | a set amount of time. Assumes that initial state is stationary 72 | downwards. 73 | """ 74 | time_hold = 1.0 * self._frequency # Hold upright 1 second 75 | sample = 0 # Samples since control system started 76 | samples_upright = 0 # Consecutive samples pendulum is upright 77 | 78 | action = np.array([1], dtype=np.float64) 79 | state = self.step([1.0]) 80 | while True: 81 | action = flip_and_hold_policy(state) 82 | state = self.step(action) 83 | 84 | # Break if pendulum is inverted 85 | alpha = state[1] 86 | if alpha < (10 * np.pi / 180): 87 | if samples_upright > time_hold: 88 | break 89 | samples_upright += 1 90 | else: 91 | samples_upright = 0 92 | sample += 1 93 | 94 | return state 95 | 96 | def reset_down(self): 97 | action = np.array([0], dtype=np.float64) 98 | time_hold = 0.5 * self._frequency # 0.5 seconds 99 | samples_downwards = 0 # Consecutive samples pendulum is stationary 100 | 101 | state = self.step([1.0]) 102 | while True: 103 | action = dampen_policy(state) 104 | state = self.step(action) 105 | 106 | # Break if pendulum is stationary 107 | alpha = state[1] 108 | if abs(alpha) > (178 * np.pi / 180): 109 | if samples_downwards > time_hold: 110 | break 111 | samples_downwards += 1 112 | else: 113 | samples_downwards = 0 114 | return state 115 | 116 | def reset_encoders(self): 117 | """Fully stop the pendulum at the bottom. Then reset the alpha encoder""" 118 | self.reset_down() 119 | 120 | self.step(np.array([0], dtype=np.float64)) 121 | print("Doing a hard reset and, reseting the alpha encoder") 122 | time.sleep(25) # Do nothing for 3 seconds to ensure pendulum is stopped 123 | 124 | # This is needed to prevent sensor drift on the alpha/pendulum angle 125 | # We ONLY reset the alpha channel because the dampen function stops the 126 | # pendulum from moving but does not perfectly center the pendulum at the 127 | # bottom (this way alpha is very close to perfect and theta does not 128 | # drift much) 129 | print("Pre encoder reset:", self.qube.action(np.array([0], dtype=np.float64))) 130 | self.qube.reset_encoders(channels=[1]) # Alpha channel only 131 | print("After encoder reset:", self.qube.action(np.array([0], dtype=np.float64))) 132 | 133 | def close(self, type=None, value=None, traceback=None): 134 | # Safely close the Qube 135 | self.qube.__exit__(type=type, value=value, traceback=traceback) 136 | 137 | 138 | class QubeSimulator(object): 139 | """Simulator that has the same interface as the hardware wrapper.""" 140 | 141 | def __init__( 142 | self, forward_model="ode", frequency=250, integration_steps=1, max_voltage=18.0 143 | ): 144 | if isinstance(forward_model, str): 145 | if forward_model == "ode": 146 | self._forward_model = forward_model_ode 147 | elif forward_model == "euler": 148 | self._forward_model = forward_model_euler 149 | else: 150 | raise ValueError( 151 | "'forward_model' must be one of ['ode', 'euler'] or a callable." 152 | ) 153 | elif callable(forward_model): 154 | self._forward_model = forward_model 155 | else: 156 | raise ValueError( 157 | "'forward_model' must be one of ['ode', 'euler'] or a callable." 158 | ) 159 | 160 | self._dt = 1.0 / frequency 161 | self._integration_steps = integration_steps 162 | self._max_voltage = max_voltage 163 | self.state = ( 164 | np.array([0, 0, 0, 0], dtype=np.float64) + np.random.randn(4) * 0.01 165 | ) 166 | 167 | def __enter__(self): 168 | return self 169 | 170 | def __exit__(self, type, value, traceback): 171 | self.close() 172 | 173 | def step(self, action, led=None): 174 | action = np.clip(action, -self._max_voltage, self._max_voltage) 175 | self.state = self._forward_model( 176 | *self.state, action, self._dt, self._integration_steps 177 | ) 178 | return self.state 179 | 180 | def reset_up(self): 181 | self.state = ( 182 | np.array([0, 0, 0, 0], dtype=np.float64) + np.random.randn(4) * 0.01 183 | ) 184 | return self.state 185 | 186 | def reset_down(self): 187 | self.state = ( 188 | np.array([0, np.pi, 0, 0], dtype=np.float64) + np.random.randn(4) * 0.01 189 | ) 190 | return self.state 191 | 192 | def reset_encoders(self): 193 | pass 194 | 195 | def close(self, type=None, value=None, traceback=None): 196 | pass 197 | -------------------------------------------------------------------------------- /gym_brt/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.envs.registration import register 2 | 3 | ### Hardware Environments ###################################################### 4 | # Swingup Environments 5 | register( 6 | id='QubeSwingupEnv-v0', 7 | entry_point='gym_brt.envs:QubeSwingupEnv', 8 | nondeterministic=True, 9 | max_episode_steps=2048, 10 | kwargs={'use_simulator': False} 11 | ) 12 | register( 13 | id='QubeSwingupSparseEnv-v0', 14 | entry_point='gym_brt.envs:QubeSwingupSparseEnv', 15 | nondeterministic=True, 16 | max_episode_steps=2048, 17 | kwargs={'use_simulator': False} 18 | ) 19 | register( 20 | id='QubeSwingupFollowEnv-v0', 21 | entry_point='gym_brt.envs:QubeSwingupFollowEnv', 22 | nondeterministic=True, 23 | max_episode_steps=2048, 24 | kwargs={'use_simulator': False} 25 | ) 26 | register( 27 | id='QubeSwingupFollowSparseEnv-v0', 28 | entry_point='gym_brt.envs:QubeSwingupFollowSparseEnv', 29 | nondeterministic=True, 30 | max_episode_steps=2048, 31 | kwargs={'use_simulator': False} 32 | ) 33 | # Balance Environments 34 | register( 35 | id='QubeBalanceEnv-v0', 36 | entry_point='gym_brt.envs:QubeBalanceEnv', 37 | nondeterministic=True, 38 | max_episode_steps=2048, 39 | kwargs={'use_simulator': False} 40 | ) 41 | register( 42 | id='QubeBalanceSparseEnv-v0', 43 | entry_point='gym_brt.envs:QubeBalanceSparseEnv', 44 | nondeterministic=True, 45 | max_episode_steps=2048, 46 | kwargs={'use_simulator': False} 47 | ) 48 | register( 49 | id='QubeBalanceFollowEnv-v0', 50 | entry_point='gym_brt.envs:QubeBalanceFollowEnv', 51 | nondeterministic=True, 52 | max_episode_steps=2048, 53 | kwargs={'use_simulator': False} 54 | ) 55 | register( 56 | id='QubeBalanceFollowSparseEnv-v0', 57 | entry_point='gym_brt.envs:QubeBalanceFollowSparseEnv', 58 | nondeterministic=True, 59 | max_episode_steps=2048, 60 | kwargs={'use_simulator': False} 61 | ) 62 | # Dampen Environments 63 | register( 64 | id='QubeDampenEnv-v0', 65 | entry_point='gym_brt.envs:QubeDampenEnv', 66 | nondeterministic=True, 67 | max_episode_steps=2048, 68 | kwargs={'use_simulator': False} 69 | ) 70 | register( 71 | id='QubeDampenSparseEnv-v0', 72 | entry_point='gym_brt.envs:QubeDampenSparseEnv', 73 | nondeterministic=True, 74 | max_episode_steps=2048, 75 | kwargs={'use_simulator': False} 76 | ) 77 | register( 78 | id='QubeDampenFollowEnv-v0', 79 | entry_point='gym_brt.envs:QubeDampenFollowEnv', 80 | nondeterministic=True, 81 | max_episode_steps=2048, 82 | kwargs={'use_simulator': False} 83 | ) 84 | register( 85 | id='QubeDampenFollowSparseEnv-v0', 86 | entry_point='gym_brt.envs:QubeDampenFollowSparseEnv', 87 | nondeterministic=True, 88 | max_episode_steps=2048, 89 | kwargs={'use_simulator': False} 90 | ) 91 | register( 92 | id='QubeBalanceFollowSineWaveEnv-v0', 93 | entry_point='gym_brt.envs:QubeBalanceFollowSineWaveEnv', 94 | nondeterministic=True, 95 | max_episode_steps=2048, 96 | kwargs={'use_simulator': False} 97 | ) 98 | # Sin wave environments 99 | register( 100 | id='QubeSwingupFollowSineWaveEnv-v0', 101 | entry_point='gym_brt.envs:QubeSwingupFollowSineWaveEnv', 102 | nondeterministic=True, 103 | max_episode_steps=2048, 104 | kwargs={'use_simulator': False} 105 | ) 106 | register( 107 | id='QubeRotorFollowSineWaveEnv-v0', 108 | entry_point='gym_brt.envs:QubeRotorFollowSineWaveEnv', 109 | nondeterministic=True, 110 | max_episode_steps=2048, 111 | kwargs={'use_simulator': False} 112 | ) 113 | register( 114 | id='QubeDampenFollowSineWaveEnv-v0', 115 | entry_point='gym_brt.envs:QubeDampenFollowSineWaveEnv', 116 | nondeterministic=True, 117 | max_episode_steps=2048, 118 | kwargs={'use_simulator': False} 119 | ) 120 | # Rotor Evironments 121 | # Note: the rotor environments have issues with the rewards. 122 | # register( 123 | # id='QubeRotorEnv-v0', 124 | # entry_point='gym_brt.envs:QubeRotorEnv', 125 | # nondeterministic=True, 126 | # max_episode_steps=2048, 127 | # kwargs={'use_simulator': False} 128 | # ) 129 | # register( 130 | # id='QubeRotorFollowEnv-v0', 131 | # entry_point='gym_brt.envs:QubeRotorFollowEnv', 132 | # nondeterministic=True, 133 | # max_episode_steps=2048, 134 | # kwargs={'use_simulator': False} 135 | # ) 136 | 137 | ### Simulated Environments ##################################################### 138 | # Swingup Environments 139 | register( 140 | id='QubeSwingupSimEnv-v0', 141 | entry_point='gym_brt.envs:QubeSwingupEnv', 142 | max_episode_steps=2048, 143 | kwargs={'use_simulator': True} 144 | ) 145 | register( 146 | id='QubeSwingupSparseSimEnv-v0', 147 | entry_point='gym_brt.envs:QubeSwingupSparseEnv', 148 | max_episode_steps=2048, 149 | kwargs={'use_simulator': True} 150 | ) 151 | register( 152 | id='QubeSwingupFollowSimEnv-v0', 153 | entry_point='gym_brt.envs:QubeSwingupFollowEnv', 154 | max_episode_steps=2048, 155 | kwargs={'use_simulator': True} 156 | ) 157 | register( 158 | id='QubeSwingupFollowSparseSimEnv-v0', 159 | entry_point='gym_brt.envs:QubeSwingupFollowSparseEnv', 160 | max_episode_steps=2048, 161 | kwargs={'use_simulator': True} 162 | ) 163 | # Balance Environments 164 | register( 165 | id='QubeBalanceSimEnv-v0', 166 | entry_point='gym_brt.envs:QubeBalanceEnv', 167 | max_episode_steps=2048, 168 | kwargs={'use_simulator': True} 169 | ) 170 | register( 171 | id='QubeBalanceSparseSimEnv-v0', 172 | entry_point='gym_brt.envs:QubeBalanceSparseEnv', 173 | max_episode_steps=2048, 174 | kwargs={'use_simulator': True} 175 | ) 176 | register( 177 | id='QubeBalanceFollowSimEnv-v0', 178 | entry_point='gym_brt.envs:QubeBalanceFollowEnv', 179 | max_episode_steps=2048, 180 | kwargs={'use_simulator': True} 181 | ) 182 | register( 183 | id='QubeBalanceFollowSparseSimEnv-v0', 184 | entry_point='gym_brt.envs:QubeBalanceFollowSparseEnv', 185 | max_episode_steps=2048, 186 | kwargs={'use_simulator': True} 187 | ) 188 | # Dampen Environments 189 | register( 190 | id='QubeDampenSimEnv-v0', 191 | entry_point='gym_brt.envs:QubeDampenEnv', 192 | max_episode_steps=2048, 193 | kwargs={'use_simulator': True} 194 | ) 195 | register( 196 | id='QubeDampenSparseSimEnv-v0', 197 | entry_point='gym_brt.envs:QubeDampenSparseEnv', 198 | max_episode_steps=2048, 199 | kwargs={'use_simulator': True} 200 | ) 201 | register( 202 | id='QubeDampenFollowSimEnv-v0', 203 | entry_point='gym_brt.envs:QubeDampenFollowEnv', 204 | max_episode_steps=2048, 205 | kwargs={'use_simulator': True} 206 | ) 207 | register( 208 | id='QubeDampenFollowSparseSimEnv-v0', 209 | entry_point='gym_brt.envs:QubeDampenFollowSparseEnv', 210 | max_episode_steps=2048, 211 | kwargs={'use_simulator': True} 212 | ) 213 | register( 214 | id='QubeBalanceFollowSineWaveSimEnv-v0', 215 | entry_point='gym_brt.envs:QubeBalanceFollowSineWaveEnv', 216 | max_episode_steps=2048, 217 | kwargs={'use_simulator': True} 218 | ) 219 | # Sin wave environments 220 | register( 221 | id='QubeSwingupFollowSineWaveSimEnv-v0', 222 | entry_point='gym_brt.envs:QubeSwingupFollowSineWaveEnv', 223 | max_episode_steps=2048, 224 | kwargs={'use_simulator': True} 225 | ) 226 | register( 227 | id='QubeRotorFollowSineWaveSimEnv-v0', 228 | entry_point='gym_brt.envs:QubeRotorFollowSineWaveEnv', 229 | max_episode_steps=2048, 230 | kwargs={'use_simulator': True} 231 | ) 232 | register( 233 | id='QubeDampenFollowSineWaveSimEnv-v0', 234 | entry_point='gym_brt.envs:QubeDampenFollowSineWaveEnv', 235 | max_episode_steps=2048, 236 | kwargs={'use_simulator': True} 237 | ) 238 | # Rotor Evironments 239 | # Note: the rotor environments have issues with the rewards. 240 | # register( 241 | # id='QubeRotorSimEnv-v0', 242 | # entry_point='gym_brt.envs:QubeRotorEnv', 243 | # max_episode_steps=2048, 244 | # kwargs={'use_simulator': True} 245 | # ) 246 | # register( 247 | # id='QubeRotorFollowSimEnv-v0', 248 | # entry_point='gym_brt.envs:QubeRotorFollowEnv', 249 | # max_episode_steps=2048, 250 | # kwargs={'use_simulator': True} 251 | # ) 252 | 253 | 254 | -------------------------------------------------------------------------------- /tests/notebooks/ODE vs Euler speedtest.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numba\n", 10 | "import numpy as np\n", 11 | "from scipy.integrate import odeint" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 2, 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "# Motor\n", 21 | "Rm = 8.4 # Resistance\n", 22 | "kt = 0.042 # Current-torque (N-m/A)\n", 23 | "km = 0.042 # Back-emf constant (V-s/rad)\n", 24 | "\n", 25 | "# Rotary Arm\n", 26 | "mr = 0.095 # Mass (kg)\n", 27 | "Lr = 0.085 # Total length (m)\n", 28 | "Jr = mr * Lr ** 2 / 12 # Moment of inertia about pivot (kg-m^2)\n", 29 | "\n", 30 | "# Pendulum Link\n", 31 | "mp = 0.024 # Mass (kg)\n", 32 | "Lp = 0.129 # Total length (m)\n", 33 | "Jp = mp * Lp ** 2 / 12 # Moment of inertia about pivot (kg-m^2)\n", 34 | "\n", 35 | "Br = Dr = 0.0005 # Equivalent viscous damping coefficient (N-m-s/rad)\n", 36 | "Bp = Dp = 0.00005 # Equivalent viscous damping coefficient (N-m-s/rad)\n", 37 | "g = 9.81 # Gravity constant\n" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 3, 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "def diff_forward_model_ode(state, t, action, dt):\n", 47 | " theta = state[0]\n", 48 | " alpha = state[1]\n", 49 | " theta_dot = state[2]\n", 50 | " alpha_dot = state[3]\n", 51 | " Vm = action\n", 52 | " tau = (km * (Vm - km * theta_dot)) / Rm # torque\n", 53 | "\n", 54 | " # fmt: off\n", 55 | " alpha_dot_dot = (2.0*Lp*Lr*mp*(4.0*Dr*theta_dot + Lp**2*alpha_dot*mp*theta_dot*np.sin(2.0*alpha) + 2.0*Lp*Lr*alpha_dot**2*mp*np.sin(alpha) - 4.0*tau)*np.cos(alpha) - 0.5*(4.0*Jr + Lp**2*mp*np.sin(alpha)**2 + 4.0*Lr**2*mp)*(-8.0*Dp*alpha_dot + Lp**2*mp*theta_dot**2*np.sin(2.0*alpha) + 4.0*Lp*g*mp*np.sin(alpha)))/(4.0*Lp**2*Lr**2*mp**2*np.cos(alpha)**2 - (4.0*Jp + Lp**2*mp)*(4.0*Jr + Lp**2*mp*np.sin(alpha)**2 + 4.0*Lr**2*mp))\n", 56 | " theta_dot_dot = (-Lp*Lr*mp*(-8.0*Dp*alpha_dot + Lp**2*mp*theta_dot**2*np.sin(2.0*alpha) + 4.0*Lp*g*mp*np.sin(alpha))*np.cos(alpha) + (4.0*Jp + Lp**2*mp)*(4.0*Dr*theta_dot + Lp**2*alpha_dot*mp*theta_dot*np.sin(2.0*alpha) + 2.0*Lp*Lr*alpha_dot**2*mp*np.sin(alpha) - 4.0*tau))/(4.0*Lp**2*Lr**2*mp**2*np.cos(alpha)**2 - (4.0*Jp + Lp**2*mp)*(4.0*Jr + Lp**2*mp*np.sin(alpha)**2 + 4.0*Lr**2*mp))\n", 57 | " # fmt: on\n", 58 | "\n", 59 | " diff_state = np.array([theta_dot, alpha_dot, theta_dot_dot, alpha_dot_dot])\n", 60 | " diff_state = np.array(diff_state, dtype=\"float64\")\n", 61 | " return diff_state\n", 62 | "\n", 63 | "\n", 64 | "def forward_model_ode(theta, alpha, theta_dot, alpha_dot, Vm, dt, integration_steps):\n", 65 | " t = np.linspace(0.0, dt, integration_steps)\n", 66 | "\n", 67 | " state = np.array([theta, alpha, theta_dot, alpha_dot])\n", 68 | " next_state = np.array(odeint(diff_forward_model_ode, state, t, args=(Vm, dt))).reshape(4,)\n", 69 | " \n", 70 | " theta = next_state[0]\n", 71 | " alpha = next_state[1]\n", 72 | " theta_dot = next_state[2]\n", 73 | " alpha_dot = next_state[3]\n", 74 | "\n", 75 | " theta = ((theta + np.pi) % (2 * np.pi)) - np.pi\n", 76 | " alpha = ((alpha + np.pi) % (2 * np.pi)) - np.pi\n", 77 | "\n", 78 | " return theta, alpha, theta_dot, alpha_dot\n" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": 4, 84 | "metadata": {}, 85 | "outputs": [], 86 | "source": [ 87 | "def forward_model_euler(theta, alpha, theta_dot, alpha_dot, Vm, dt, integration_steps):\n", 88 | " dt /= integration_steps\n", 89 | " for step in range(integration_steps):\n", 90 | " tau = (km * (Vm - km * theta_dot)) / Rm # torque\n", 91 | "\n", 92 | " # fmt: off\n", 93 | " alpha_dot_dot = (2.0*Lp*Lr*mp*(4.0*Dr*theta_dot + Lp**2*alpha_dot*mp*theta_dot*np.sin(2.0*alpha) + 2.0*Lp*Lr*alpha_dot**2*mp*np.sin(alpha) - 4.0*tau)*np.cos(alpha) - 0.5*(4.0*Jr + Lp**2*mp*np.sin(alpha)**2 + 4.0*Lr**2*mp)*(-8.0*Dp*alpha_dot + Lp**2*mp*theta_dot**2*np.sin(2.0*alpha) + 4.0*Lp*g*mp*np.sin(alpha)))/(4.0*Lp**2*Lr**2*mp**2*np.cos(alpha)**2 - (4.0*Jp + Lp**2*mp)*(4.0*Jr + Lp**2*mp*np.sin(alpha)**2 + 4.0*Lr**2*mp))\n", 94 | " theta_dot_dot = (-Lp*Lr*mp*(-8.0*Dp*alpha_dot + Lp**2*mp*theta_dot**2*np.sin(2.0*alpha) + 4.0*Lp*g*mp*np.sin(alpha))*np.cos(alpha) + (4.0*Jp + Lp**2*mp)*(4.0*Dr*theta_dot + Lp**2*alpha_dot*mp*theta_dot*np.sin(2.0*alpha) + 2.0*Lp*Lr*alpha_dot**2*mp*np.sin(alpha) - 4.0*tau))/(4.0*Lp**2*Lr**2*mp**2*np.cos(alpha)**2 - (4.0*Jp + Lp**2*mp)*(4.0*Jr + Lp**2*mp*np.sin(alpha)**2 + 4.0*Lr**2*mp))\n", 95 | " # fmt: on\n", 96 | "\n", 97 | " theta_dot += theta_dot_dot * dt\n", 98 | " alpha_dot += alpha_dot_dot * dt\n", 99 | "\n", 100 | " theta += theta_dot * dt\n", 101 | " alpha += alpha_dot * dt\n", 102 | "\n", 103 | " theta %= 2 * np.pi\n", 104 | " alpha %= 2 * np.pi\n", 105 | "\n", 106 | " return theta, alpha, theta_dot, alpha_dot\n" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": 5, 112 | "metadata": {}, 113 | "outputs": [], 114 | "source": [ 115 | "def f(forward_model, n=1000):\n", 116 | " for i in range(n):\n", 117 | " theta, alpha, theta_dot, alpha_dot = np.random.randn(4,) * 0.1\n", 118 | " Vm = np.random.randn()\n", 119 | " theta, alpha, theta_dot, alpha_dot = forward_model(\n", 120 | " theta, alpha, theta_dot, alpha_dot, Vm, dt=0.001, integration_steps=1\n", 121 | " )\n" 122 | ] 123 | }, 124 | { 125 | "cell_type": "markdown", 126 | "metadata": {}, 127 | "source": [ 128 | "# Without numba" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": 6, 134 | "metadata": {}, 135 | "outputs": [ 136 | { 137 | "name": "stdout", 138 | "output_type": "stream", 139 | "text": [ 140 | "41.4 ms ± 3.21 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" 141 | ] 142 | } 143 | ], 144 | "source": [ 145 | "%timeit -n 100 f(forward_model_ode)" 146 | ] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "execution_count": 7, 151 | "metadata": {}, 152 | "outputs": [ 153 | { 154 | "name": "stdout", 155 | "output_type": "stream", 156 | "text": [ 157 | "54.9 ms ± 12.5 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" 158 | ] 159 | } 160 | ], 161 | "source": [ 162 | "%timeit -n 100 f(forward_model_euler)" 163 | ] 164 | }, 165 | { 166 | "cell_type": "markdown", 167 | "metadata": {}, 168 | "source": [ 169 | "# With numba" 170 | ] 171 | }, 172 | { 173 | "cell_type": "code", 174 | "execution_count": 8, 175 | "metadata": {}, 176 | "outputs": [], 177 | "source": [ 178 | "forward_model_ode = numba.jit(forward_model_ode)\n", 179 | "forward_model_euler = numba.jit(forward_model_euler)" 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": 9, 185 | "metadata": {}, 186 | "outputs": [ 187 | { 188 | "name": "stdout", 189 | "output_type": "stream", 190 | "text": [ 191 | "49.4 ms ± 2.37 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" 192 | ] 193 | } 194 | ], 195 | "source": [ 196 | "%timeit -n 100 f(forward_model_ode)" 197 | ] 198 | }, 199 | { 200 | "cell_type": "code", 201 | "execution_count": 10, 202 | "metadata": {}, 203 | "outputs": [ 204 | { 205 | "name": "stdout", 206 | "output_type": "stream", 207 | "text": [ 208 | "5.62 ms ± 1.41 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" 209 | ] 210 | } 211 | ], 212 | "source": [ 213 | "%timeit -n 100 f(forward_model_euler)" 214 | ] 215 | } 216 | ], 217 | "metadata": { 218 | "kernelspec": { 219 | "display_name": "TensorFlow (3.6.6)", 220 | "language": "python", 221 | "name": "tf" 222 | }, 223 | "language_info": { 224 | "codemirror_mode": { 225 | "name": "ipython", 226 | "version": 3 227 | }, 228 | "file_extension": ".py", 229 | "mimetype": "text/x-python", 230 | "name": "python", 231 | "nbconvert_exporter": "python", 232 | "pygments_lexer": "ipython3", 233 | "version": "3.6.7" 234 | } 235 | }, 236 | "nbformat": 4, 237 | "nbformat_minor": 2 238 | } 239 | -------------------------------------------------------------------------------- /gym_brt/quanser/quanser_wrapper/quanser_wrapper.pyx: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import print_function 3 | from __future__ import division 4 | 5 | cimport gym_brt.quanser.quanser_wrapper.quanser_types as qt 6 | cimport gym_brt.quanser.quanser_wrapper.hil as hil 7 | cimport numpy as np 8 | 9 | from gym_brt.quanser.quanser_wrapper.error_codes import error_codes 10 | import numpy as np 11 | 12 | 13 | cdef print_possible_error(int result): 14 | """If there is an error, print the error code""" 15 | if result < 0: 16 | print(error_codes[-result]) 17 | 18 | 19 | cdef class QuanserWrapper: 20 | cdef hil.t_card board 21 | cdef hil.t_task task 22 | 23 | cdef qt.t_uint32[::] analog_r_channels 24 | cdef qt.t_uint32 num_analog_r_channels 25 | cdef qt.t_double[::] currents_r 26 | 27 | cdef qt.t_uint32[::] analog_w_channels 28 | cdef qt.t_uint32 num_analog_w_channels 29 | cdef qt.t_double[::] voltages_w 30 | 31 | cdef qt.t_uint32[::] digital_w_channels 32 | cdef qt.t_uint32 num_digital_w_channels 33 | cdef np.npy_int8[::] enables_w # Uses npy_int8 due to arm problem 34 | 35 | cdef qt.t_uint32[::] encoder_r_channels 36 | cdef qt.t_uint32 num_encoder_r_channels 37 | cdef qt.t_int32[::] encoder_r_buffer 38 | 39 | cdef qt.t_uint32[::] other_r_channels 40 | cdef qt.t_uint32 num_other_r_channels 41 | cdef qt.t_double[::] other_r_buffer 42 | 43 | cdef qt.t_uint32[::] led_w_channels 44 | cdef qt.t_uint32 num_led_w_channels 45 | cdef qt.t_double[::] led_w_buffer 46 | 47 | cdef qt.t_double frequency, max_voltage 48 | cdef qt.t_int samples_overflowed 49 | cdef bint task_started 50 | 51 | def __init__( 52 | self, 53 | max_voltage, 54 | analog_r_channels, 55 | analog_w_channels, 56 | digital_w_channels, 57 | encoder_r_channels, 58 | other_r_channels, 59 | led_w_channels, 60 | frequency=1000 61 | ): 62 | self.max_voltage = max_voltage 63 | # Convert the channels into numpy arrays which are then stored in 64 | # memoryviews (to pass C buffers to the HIL API) 65 | self.num_analog_r_channels = len(analog_r_channels) 66 | self.num_analog_w_channels = len(analog_w_channels) 67 | self.num_digital_w_channels = len(digital_w_channels) 68 | self.num_encoder_r_channels = len(encoder_r_channels) 69 | self.num_other_r_channels = len(other_r_channels) 70 | self.num_led_w_channels = len(led_w_channels) 71 | self.analog_r_channels = np.array(analog_r_channels, dtype=np.uint32) 72 | self.analog_w_channels = np.array(analog_w_channels, dtype=np.uint32) 73 | self.digital_w_channels = np.array(digital_w_channels, dtype=np.uint32) 74 | self.encoder_r_channels = np.array(encoder_r_channels, dtype=np.uint32) 75 | self.other_r_channels = np.array(other_r_channels, dtype=np.uint32) 76 | self.led_w_channels = np.array(led_w_channels, dtype=np.uint32) 77 | 78 | self.frequency = frequency 79 | self.task_started = False 80 | 81 | def __enter__(self): 82 | """Start the hardware in a deterministic way (all motors, 83 | encoders, etc at 0) 84 | """ 85 | # Create memoryviews for read buffers 86 | self.currents_r = np.zeros( 87 | self.num_analog_r_channels, dtype=np.float64 88 | ) # t_double is 64 bits 89 | self.other_r_buffer = np.zeros( 90 | self.num_other_r_channels, dtype=np.float64 91 | ) # t_double is 64 bits 92 | 93 | # Set motor voltages_w and encoders to 0 94 | self.voltages_w = np.zeros( 95 | self.num_analog_w_channels, dtype=np.float64 96 | ) # t_double is 64 bits 97 | result = hil.hil_write_analog( 98 | self.board, 99 | &self.analog_w_channels[0], 100 | self.num_analog_w_channels, 101 | &self.voltages_w[0] 102 | ) 103 | print_possible_error(result) 104 | self.encoder_r_buffer = np.zeros( 105 | self.num_encoder_r_channels, dtype=np.int32 106 | ) # t_int32 is 32 bits 107 | result = hil.hil_set_encoder_counts( 108 | self.board, 109 | &self.encoder_r_channels[0], 110 | self.num_encoder_r_channels, 111 | &self.encoder_r_buffer[0] 112 | ) 113 | print_possible_error(result) 114 | 115 | # Set LED on to white (represents opening qube) 116 | self.led_w_buffer = np.ones( 117 | self.num_led_w_channels, dtype=np.float64 118 | ) # t_double is 64 bits 119 | result = hil.hil_write_other( 120 | self.board, 121 | &self.led_w_channels[0], 122 | self.num_led_w_channels, 123 | &self.led_w_buffer[0] 124 | ) 125 | print_possible_error(result) 126 | 127 | # Enables_r all the motors 128 | self.enables_w = np.ones( 129 | self.num_digital_w_channels, dtype=np.int8 130 | ) # t_bool is char 8 bits 131 | result = hil.hil_write_digital( 132 | self.board, 133 | &self.digital_w_channels[0], 134 | self.num_digital_w_channels, 135 | &self.enables_w[0] 136 | ) 137 | print_possible_error(result) 138 | 139 | return self 140 | 141 | def __exit__(self, type, value, traceback): 142 | """Make sure hardware turns off safely""" 143 | self._stop_task() 144 | 145 | # Set the motor voltages_w to 0 146 | self.voltages_w = np.zeros( 147 | self.num_analog_w_channels, dtype=np.float64 148 | ) # t_double is 64 bits 149 | hil.hil_write_analog( 150 | self.board, 151 | &self.analog_w_channels[0], 152 | self.num_analog_w_channels, 153 | &self.voltages_w[0] 154 | ) 155 | 156 | # Set LED on to default color (red) 157 | self.led_w_buffer = np.array( 158 | [1.0, 0.0, 0.0], dtype=np.float64 159 | ) # t_double is 64 bits 160 | result = hil.hil_write_other( 161 | self.board, 162 | &self.led_w_channels[0], 163 | self.num_led_w_channels, 164 | &self.led_w_buffer[0] 165 | ) 166 | print_possible_error(result) 167 | 168 | 169 | # Disable all the motors 170 | self.enables_w = np.zeros( 171 | self.num_digital_w_channels, dtype=np.int8 172 | ) # t_bool is char 8 bits 173 | hil.hil_write_digital( 174 | self.board, 175 | &self.digital_w_channels[0], 176 | self.num_digital_w_channels, 177 | &self.enables_w[0] 178 | ) 179 | 180 | hil.hil_close(self.board) # Safely close the board 181 | 182 | def _create_task(self): 183 | """Start a task reads and writes at fixed intervals""" 184 | result = hil.hil_task_create_reader( 185 | self.board, 186 | 1, # Read 1 sample at a time 187 | &self.analog_r_channels[0], self.num_analog_r_channels, 188 | &self.encoder_r_channels[0], self.num_encoder_r_channels, 189 | NULL, 0, 190 | &self.other_r_channels[0], self.num_other_r_channels, 191 | &self.task 192 | ) 193 | print_possible_error(result) 194 | 195 | # Allow discarding of old samples after missed reads 196 | hil.hil_task_set_buffer_overflow_mode( 197 | self.task, hil.BUFFER_MODE_OVERWRITE_ON_OVERFLOW 198 | ) 199 | 200 | # Start the task 201 | result = hil.hil_task_start( 202 | self.task, 203 | hil.HARDWARE_CLOCK_0, 204 | self.frequency, 205 | -1 # Read continuously 206 | ) 207 | print_possible_error(result) 208 | if result < 0: 209 | raise ValueError("Could not start hil task") 210 | 211 | self.task_started = True 212 | 213 | def _stop_task(self): 214 | if self.task_started: 215 | self.task_started = False 216 | hil.hil_task_flush(self.task) 217 | hil.hil_task_stop(self.task) 218 | hil.hil_task_delete(self.task) 219 | 220 | def reset_encoders(self, channels=None): 221 | """Reset all or a few of the encoders""" 222 | if channels is None: 223 | # Set the entire encoder encoder_r_buffer to 0 224 | self.encoder_r_buffer = np.zeros( 225 | self.num_encoder_r_channels, dtype=np.int32 226 | ) # t_int32 is 32 bits 227 | else: 228 | # Set only specific encoders to 0, while leaving the others 229 | for channel in channels: 230 | # Check if the channel is valid (in the available encoder 231 | # channels for the hardware) 232 | if channel not in self.encoder_r_channels: 233 | raise ValueError( 234 | "Channel: {} is not a possible channel on this hardware." 235 | ) 236 | self.encoder_r_buffer[channel] = 0 237 | 238 | result = hil.hil_set_encoder_counts( 239 | self.board, 240 | &self.encoder_r_channels[0], 241 | self.num_encoder_r_channels, 242 | &self.encoder_r_buffer[0] 243 | ) 244 | print_possible_error(result) 245 | 246 | def action(self, voltages_w, led_w=None): 247 | # If it"s the first time running action, then start the background r/w 248 | # task 249 | if not self.task_started: 250 | self._create_task() 251 | 252 | # Ensure safe voltage data 253 | if isinstance(voltages_w, list): 254 | voltages_w = np.array(voltages_w, dtype=np.float64) 255 | assert isinstance(voltages_w, np.ndarray) 256 | assert voltages_w.shape == (self.num_analog_w_channels,) 257 | assert voltages_w.dtype == np.float64 258 | # Clip voltages into the range of +- max_voltage 259 | voltages_w = np.clip(voltages_w, -self.max_voltage, self.max_voltage) 260 | 261 | if led_w is not None: 262 | # Ensure safe LED data 263 | if isinstance(led_w, list): 264 | led_w = np.array(led_w, dtype=np.float64) 265 | assert led_w.shape == (self.num_led_w_channels,) 266 | assert led_w.dtype == np.float64 267 | for i in range(self.num_led_w_channels): 268 | assert 0.0 <= led_w[i] <= 1.0 # HIL uses RGB scaled from 0-1 269 | self._set_led(led_w) # An immediate write to LED (not timed task) 270 | 271 | return self._action(voltages_w) 272 | 273 | def _action( 274 | self, np.ndarray[qt.t_double, ndim=1, mode="c"] voltages_w not None 275 | ): 276 | samples_read = hil.hil_task_read( 277 | self.task, 278 | 1, # Number of samples to read 279 | &self.currents_r[0], 280 | &self.encoder_r_buffer[0], 281 | NULL, 282 | &self.other_r_buffer[0] 283 | ) 284 | if samples_read < 0: 285 | print_possible_error(samples_read) 286 | 287 | samples_overflowed = hil.hil_task_get_buffer_overflows(self.task) 288 | if samples_overflowed > self.samples_overflowed: 289 | print( 290 | "Missed {} samples".format(samples_overflowed - self.samples_overflowed) 291 | ) 292 | self.samples_overflowed = samples_overflowed 293 | 294 | # Then write voltages_w calculated for previous time step 295 | self.voltages_w = voltages_w 296 | result_write = hil.hil_write_analog( 297 | self.board, 298 | &self.analog_w_channels[0], 299 | self.num_analog_w_channels, 300 | &self.voltages_w[0] 301 | ) 302 | if result_write < 0: 303 | print_possible_error(result_write) 304 | 305 | return ( 306 | np.asarray(self.currents_r), 307 | np.asarray(self.encoder_r_buffer), 308 | np.asarray(self.other_r_buffer), 309 | ) 310 | 311 | def _set_led( 312 | self, np.ndarray[qt.t_double, ndim=1, mode="c"] led_w not None 313 | ): 314 | self.led_w_buffer = led_w 315 | result = hil.hil_write_other( 316 | self.board, 317 | &self.led_w_channels[0], 318 | self.num_led_w_channels, 319 | &self.led_w_buffer[0] 320 | ) 321 | if result < 0: 322 | print_possible_error(result) 323 | 324 | 325 | cdef class QuanserAero(QuanserWrapper): 326 | def __cinit__(self): 327 | board_type = b"quanser_aero_usb" 328 | board_identifier = b"0" 329 | result = hil.hil_open(board_type, board_identifier, &self.board) 330 | print_possible_error(result) 331 | if result < 0: 332 | raise IOError("Board could not be opened.") 333 | 334 | def __init__(self, frequency=250, max_voltage=18.0): 335 | analog_r_channels = [0, 1] 336 | analog_w_channels = [0, 1] 337 | digital_w_channels = [0, 1] 338 | encoder_r_channels = [0, 1, 2, 3] 339 | other_r_channels = [ 340 | 3000, 3001, 3002, 4000, 4001, 4002, 14000, 14001, 14002, 14003 341 | ] 342 | led_w_channels = [11000, 11001, 11002] 343 | 344 | super(QuanserAero, self).__init__( 345 | max_voltage=max_voltage, 346 | analog_r_channels=analog_r_channels, 347 | analog_w_channels=analog_w_channels, 348 | digital_w_channels=digital_w_channels, 349 | encoder_r_channels=encoder_r_channels, 350 | other_r_channels=other_r_channels, 351 | led_w_channels=led_w_channels, 352 | frequency=frequency 353 | ) 354 | 355 | 356 | cdef class QubeServo2(QuanserWrapper): 357 | def __cinit__(self): 358 | board_type = b"qube_servo2_usb" 359 | board_identifier = b"0" 360 | result = hil.hil_open(board_type, board_identifier, &self.board) 361 | print_possible_error(result) 362 | if result < 0: 363 | raise IOError("Board could not be opened.") 364 | 365 | def __init__(self, frequency=250, max_voltage=18.0): 366 | analog_r_channels = [0] 367 | analog_w_channels = [0] 368 | digital_w_channels = [0] 369 | encoder_r_channels = [0, 1] 370 | other_r_channels = [14000] 371 | led_w_channels = [11000, 11001, 11002] 372 | 373 | super(QubeServo2, self).__init__( 374 | max_voltage=max_voltage, 375 | analog_r_channels=analog_r_channels, 376 | analog_w_channels=analog_w_channels, 377 | digital_w_channels=digital_w_channels, 378 | encoder_r_channels=encoder_r_channels, 379 | other_r_channels=other_r_channels, 380 | led_w_channels=led_w_channels, 381 | frequency=frequency 382 | ) 383 | --------------------------------------------------------------------------------