├── tests ├── __init__.py ├── env │ ├── __init__.py │ ├── test_first_orger_lag.py │ ├── test_two_wheeled.py │ └── test_cartpole.py ├── configs │ ├── __init__.py │ ├── test_first_order_lag.py │ ├── test_two_wheeled.py │ └── test_cartpole.py └── models │ ├── __init__.py │ ├── test_first_order_lag.py │ ├── test_model.py │ ├── test_two_wheeled.py │ └── test_cartpole.py ├── PythonLinearNonlinearControl ├── __init__.py ├── common │ ├── __init__.py │ └── utils.py ├── runners │ ├── __init__.py │ ├── make_runners.py │ └── runner.py ├── plotters │ ├── __init__.py │ ├── animator.py │ ├── plot_objs.py │ └── plot_func.py ├── planners │ ├── __init__.py │ ├── planner.py │ ├── make_planners.py │ ├── const_planner.py │ └── closest_point_planner.py ├── models │ ├── __init__.py │ ├── make_models.py │ ├── first_order_lag.py │ ├── nonlinear_sample_system.py │ ├── two_wheeled.py │ ├── cartpole.py │ └── model.py ├── configs │ ├── __init__.py │ ├── make_configs.py │ ├── first_order_lag.py │ ├── cartpole.py │ └── nonlinear_sample_system.py ├── envs │ ├── __init__.py │ ├── make_envs.py │ ├── env.py │ ├── cost.py │ ├── nonlinear_sample_system.py │ ├── first_order_lag.py │ └── cartpole.py ├── controllers │ ├── __init__.py │ ├── make_controllers.py │ ├── controller.py │ ├── random.py │ ├── nmpc.py │ ├── mppi.py │ ├── cem.py │ ├── mppi_williams.py │ ├── nmpc_cgmres.py │ └── mpc.py └── helper.py ├── assets ├── cartpole.gif ├── cartpole.png ├── concept.png ├── twowheeled.png ├── firstorderlag.png ├── cartpole_score.png ├── quadratic_score.png ├── twowheeledconst.gif ├── twowheeledtrack.gif ├── nonlinear_sample_system.png └── nonlinear_sample_system_score.png ├── .travis.yml ├── .github └── workflows │ └── python-publish.yml ├── setup.py ├── licenses └── MIT.txt ├── scripts ├── show_result.py ├── simple_run.py └── quickstart │ └── quickstart.md ├── Environments.md ├── .gitignore └── README.md /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/env/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/configs/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /PythonLinearNonlinearControl/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /PythonLinearNonlinearControl/common/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /assets/cartpole.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shunichi09/PythonLinearNonlinearControl/HEAD/assets/cartpole.gif -------------------------------------------------------------------------------- /assets/cartpole.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shunichi09/PythonLinearNonlinearControl/HEAD/assets/cartpole.png -------------------------------------------------------------------------------- /assets/concept.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shunichi09/PythonLinearNonlinearControl/HEAD/assets/concept.png -------------------------------------------------------------------------------- /assets/twowheeled.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shunichi09/PythonLinearNonlinearControl/HEAD/assets/twowheeled.png -------------------------------------------------------------------------------- /assets/firstorderlag.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shunichi09/PythonLinearNonlinearControl/HEAD/assets/firstorderlag.png -------------------------------------------------------------------------------- /assets/cartpole_score.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shunichi09/PythonLinearNonlinearControl/HEAD/assets/cartpole_score.png -------------------------------------------------------------------------------- /assets/quadratic_score.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shunichi09/PythonLinearNonlinearControl/HEAD/assets/quadratic_score.png -------------------------------------------------------------------------------- /assets/twowheeledconst.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shunichi09/PythonLinearNonlinearControl/HEAD/assets/twowheeledconst.gif -------------------------------------------------------------------------------- /assets/twowheeledtrack.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shunichi09/PythonLinearNonlinearControl/HEAD/assets/twowheeledtrack.gif -------------------------------------------------------------------------------- /PythonLinearNonlinearControl/runners/__init__.py: -------------------------------------------------------------------------------- 1 | from PythonLinearNonlinearControl.runners.runner \ 2 | import ExpRunner # NOQA 3 | -------------------------------------------------------------------------------- /assets/nonlinear_sample_system.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shunichi09/PythonLinearNonlinearControl/HEAD/assets/nonlinear_sample_system.png -------------------------------------------------------------------------------- /PythonLinearNonlinearControl/runners/make_runners.py: -------------------------------------------------------------------------------- 1 | from .runner import ExpRunner 2 | 3 | 4 | def make_runner(args): 5 | return ExpRunner() 6 | -------------------------------------------------------------------------------- /assets/nonlinear_sample_system_score.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shunichi09/PythonLinearNonlinearControl/HEAD/assets/nonlinear_sample_system_score.png -------------------------------------------------------------------------------- /PythonLinearNonlinearControl/plotters/__init__.py: -------------------------------------------------------------------------------- 1 | from PythonLinearNonlinearControl.plotters.animator \ 2 | import Animator 3 | from PythonLinearNonlinearControl.plotters.plot_func \ 4 | import plot_results -------------------------------------------------------------------------------- /PythonLinearNonlinearControl/planners/__init__.py: -------------------------------------------------------------------------------- 1 | from PythonLinearNonlinearControl.planners.const_planner \ 2 | import ConstantPlanner # NOQA 3 | from PythonLinearNonlinearControl.planners.closest_point_planner \ 4 | import ClosestPointPlanner # NOQA -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | 3 | python: 4 | - 3.7 5 | 6 | install: 7 | - pip install --upgrade pip setuptools wheel 8 | - pip install coveralls 9 | 10 | script: 11 | - coverage run --source=PythonLinearNonlinearControl setup.py pytest 12 | 13 | after_success: 14 | - coveralls -------------------------------------------------------------------------------- /PythonLinearNonlinearControl/models/__init__.py: -------------------------------------------------------------------------------- 1 | from PythonLinearNonlinearControl.models.cartpole \ 2 | import CartPoleModel # NOQA 3 | from PythonLinearNonlinearControl.models.first_order_lag \ 4 | import FirstOrderLagModel # NOQA 5 | from PythonLinearNonlinearControl.models.two_wheeled \ 6 | import TwoWheeledModel # NOQA -------------------------------------------------------------------------------- /PythonLinearNonlinearControl/configs/__init__.py: -------------------------------------------------------------------------------- 1 | from PythonLinearNonlinearControl.configs.cartpole \ 2 | import CartPoleConfigModule # NOQA 3 | from PythonLinearNonlinearControl.configs.first_order_lag \ 4 | import FirstOrderLagConfigModule # NOQA 5 | from PythonLinearNonlinearControl.configs.two_wheeled \ 6 | import TwoWheeledConfigModule # NOQA -------------------------------------------------------------------------------- /PythonLinearNonlinearControl/envs/__init__.py: -------------------------------------------------------------------------------- 1 | from PythonLinearNonlinearControl.envs.cartpole \ 2 | import CartPoleEnv # NOQA 3 | from PythonLinearNonlinearControl.envs.first_order_lag \ 4 | import FirstOrderLagEnv # NOQA 5 | from PythonLinearNonlinearControl.envs.two_wheeled \ 6 | import TwoWheeledConstEnv # NOQA 7 | from PythonLinearNonlinearControl.envs.two_wheeled \ 8 | import TwoWheeledTrackEnv # NOQA 9 | -------------------------------------------------------------------------------- /PythonLinearNonlinearControl/planners/planner.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class Planner(): 5 | """ 6 | """ 7 | 8 | def __init__(self): 9 | """ 10 | """ 11 | pass 12 | 13 | def plan(self, curr_x): 14 | """ 15 | Args: 16 | curr_x (numpy.ndarray): current state, shape(state_size) 17 | Returns: 18 | g_xs (numpy.ndarrya): goal state, shape(pred_len, state_size) 19 | """ 20 | raise NotImplementedError("Implement plan func") 21 | -------------------------------------------------------------------------------- /PythonLinearNonlinearControl/controllers/__init__.py: -------------------------------------------------------------------------------- 1 | from PythonLinearNonlinearControl.controllers.cem \ 2 | import CEM # NOQA 3 | from PythonLinearNonlinearControl.controllers.mppi \ 4 | import MPPI # NOQA 5 | from PythonLinearNonlinearControl.controllers.mppi_williams \ 6 | import MPPIWilliams # NOQA 7 | from PythonLinearNonlinearControl.controllers.random \ 8 | import RandomShooting # NOQA 9 | from PythonLinearNonlinearControl.controllers.ilqr \ 10 | import iLQR # NOQA 11 | from PythonLinearNonlinearControl.controllers.ddp \ 12 | import DDP # NOQA 13 | from PythonLinearNonlinearControl.controllers.mpc \ 14 | import LinearMPC # NOQA -------------------------------------------------------------------------------- /PythonLinearNonlinearControl/planners/make_planners.py: -------------------------------------------------------------------------------- 1 | from .const_planner import ConstantPlanner 2 | from .closest_point_planner import ClosestPointPlanner 3 | 4 | 5 | def make_planner(args, config): 6 | 7 | if args.env == "FirstOrderLag": 8 | return ConstantPlanner(config) 9 | elif args.env == "TwoWheeledConst": 10 | return ConstantPlanner(config) 11 | elif args.env == "TwoWheeledTrack": 12 | return ClosestPointPlanner(config) 13 | elif args.env == "CartPole": 14 | return ConstantPlanner(config) 15 | elif args.env == "NonlinearSample": 16 | return ConstantPlanner(config) 17 | 18 | raise NotImplementedError( 19 | "There is not {} Planner".format(args.planner_type)) 20 | -------------------------------------------------------------------------------- /.github/workflows/python-publish.yml: -------------------------------------------------------------------------------- 1 | name: Upload Python Package 2 | 3 | on: 4 | release: 5 | types: [created] 6 | 7 | jobs: 8 | deploy: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - uses: actions/checkout@v2 12 | - name: Set up Python 3.6 13 | uses: actions/setup-python@v2 14 | with: 15 | python-version: '3.6' 16 | - name: Install dependencies 17 | run: | 18 | python -m pip install --upgrade pip 19 | pip install setuptools wheel twine 20 | - name: Build and publish 21 | env: 22 | TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} 23 | TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} 24 | run: | 25 | python setup.py bdist_wheel 26 | twine upload dist/* -------------------------------------------------------------------------------- /PythonLinearNonlinearControl/models/make_models.py: -------------------------------------------------------------------------------- 1 | from .first_order_lag import FirstOrderLagModel 2 | from .two_wheeled import TwoWheeledModel 3 | from .cartpole import CartPoleModel 4 | from .nonlinear_sample_system import NonlinearSampleSystemModel 5 | 6 | 7 | def make_model(args, config): 8 | 9 | if args.env == "FirstOrderLag": 10 | return FirstOrderLagModel(config) 11 | elif args.env == "TwoWheeledConst" or args.env == "TwoWheeledTrack": 12 | return TwoWheeledModel(config) 13 | elif args.env == "CartPole": 14 | return CartPoleModel(config) 15 | elif args.env == "NonlinearSample": 16 | return NonlinearSampleSystemModel(config) 17 | 18 | raise NotImplementedError("There is not {} Model".format(args.env)) 19 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import find_packages 2 | from setuptools import setup 3 | 4 | install_requires = ['numpy', 'matplotlib', 'cvxopt', 'scipy'] 5 | tests_require = ['pytest'] 6 | setup_requires = ["pytest-runner"] 7 | 8 | setup( 9 | name='PythonLinearNonlinearControl', 10 | version='2.0', 11 | description='Implementing linear and nonlinear control method in python', 12 | author='Shunichi Sekiguchi', 13 | author_email='quick1st97of@gmail.com', 14 | install_requires=install_requires, 15 | url='https://github.com/Shunichi09/PythonLinearNonlinearControl', 16 | license='MIT License', 17 | packages=find_packages(exclude=('tests', 'scripts')), 18 | setup_requires=setup_requires, 19 | test_suite='tests', 20 | tests_require=tests_require 21 | ) -------------------------------------------------------------------------------- /PythonLinearNonlinearControl/envs/make_envs.py: -------------------------------------------------------------------------------- 1 | from .first_order_lag import FirstOrderLagEnv 2 | from .two_wheeled import TwoWheeledConstEnv 3 | from .two_wheeled import TwoWheeledTrackEnv 4 | from .cartpole import CartPoleEnv 5 | from .nonlinear_sample_system import NonlinearSampleSystemEnv 6 | 7 | 8 | def make_env(args): 9 | 10 | if args.env == "FirstOrderLag": 11 | return FirstOrderLagEnv() 12 | elif args.env == "TwoWheeledConst": 13 | return TwoWheeledConstEnv() 14 | elif args.env == "TwoWheeledTrack": 15 | return TwoWheeledTrackEnv() 16 | elif args.env == "CartPole": 17 | return CartPoleEnv() 18 | elif args.env == "NonlinearSample": 19 | return NonlinearSampleSystemEnv() 20 | 21 | raise NotImplementedError("There is not {} Env".format(args.env)) 22 | -------------------------------------------------------------------------------- /PythonLinearNonlinearControl/planners/const_planner.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from .planner import Planner 3 | 4 | 5 | class ConstantPlanner(Planner): 6 | """ This planner make constant goal state 7 | """ 8 | 9 | def __init__(self, config): 10 | """ 11 | """ 12 | super(ConstantPlanner, self).__init__() 13 | self.pred_len = config.PRED_LEN 14 | self.state_size = config.STATE_SIZE 15 | 16 | def plan(self, curr_x, g_x=None): 17 | """ 18 | Args: 19 | curr_x (numpy.ndarray): current state, shape(state_size) 20 | g_x (numpy.ndarray): goal state, shape(state_size), 21 | this state should be obtained from env 22 | Returns: 23 | g_xs (numpy.ndarrya): goal state, shape(pred_len, state_size) 24 | """ 25 | return np.tile(g_x, (self.pred_len+1, 1)) 26 | -------------------------------------------------------------------------------- /PythonLinearNonlinearControl/configs/make_configs.py: -------------------------------------------------------------------------------- 1 | from .first_order_lag import FirstOrderLagConfigModule 2 | from .two_wheeled import TwoWheeledConfigModule, TwoWheeledExtendConfigModule 3 | from .cartpole import CartPoleConfigModule 4 | from .nonlinear_sample_system import NonlinearSampleSystemConfigModule, NonlinearSampleSystemExtendConfigModule 5 | 6 | 7 | def make_config(args): 8 | """ 9 | Returns: 10 | config (ConfigModule class): configuration for the each env 11 | """ 12 | if args.env == "FirstOrderLag": 13 | return FirstOrderLagConfigModule() 14 | elif args.env == "TwoWheeledConst" or args.env == "TwoWheeledTrack": 15 | if args.controller_type == "NMPCCGMRES": 16 | return TwoWheeledExtendConfigModule() 17 | return TwoWheeledConfigModule() 18 | elif args.env == "CartPole": 19 | return CartPoleConfigModule() 20 | elif args.env == "NonlinearSample": 21 | if args.controller_type == "NMPCCGMRES": 22 | return NonlinearSampleSystemExtendConfigModule() 23 | return NonlinearSampleSystemConfigModule() 24 | -------------------------------------------------------------------------------- /licenses/MIT.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Shunichi Sekiguchi 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /tests/env/test_first_orger_lag.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | 4 | from PythonLinearNonlinearControl.envs.first_order_lag import FirstOrderLagEnv 5 | 6 | class TestFirstOrderLagEnv(): 7 | 8 | def test_step(self): 9 | env = FirstOrderLagEnv() 10 | 11 | curr_x = np.ones(4) 12 | 13 | env.reset(init_x=curr_x) 14 | 15 | u = np.ones(2) * 0.1 16 | 17 | next_x, _, _, _ = env.step(u) 18 | 19 | dx = np.dot(env.A, curr_x[:, np.newaxis]) 20 | du = np.dot(env.B, u[:, np.newaxis]) 21 | 22 | expected = (dx + du).flatten() 23 | 24 | assert next_x == pytest.approx(expected, abs=1e-5) 25 | 26 | def test_bound_step(self): 27 | env = FirstOrderLagEnv() 28 | 29 | curr_x = np.ones(4) 30 | 31 | env.reset(init_x=curr_x) 32 | 33 | u = np.ones(2) * 1e5 34 | 35 | next_x, _, _, _ = env.step(u) 36 | 37 | dx = np.dot(env.A, curr_x[:, np.newaxis]) 38 | du = np.dot(env.B, 39 | np.array(env.config["input_upper_bound"])[:, np.newaxis]) 40 | 41 | expected = (dx + du).flatten() 42 | 43 | assert next_x == pytest.approx(expected, abs=1e-5) -------------------------------------------------------------------------------- /PythonLinearNonlinearControl/controllers/make_controllers.py: -------------------------------------------------------------------------------- 1 | from .mpc import LinearMPC 2 | from .cem import CEM 3 | from .random import RandomShooting 4 | from .mppi import MPPI 5 | from .mppi_williams import MPPIWilliams 6 | from .ilqr import iLQR 7 | from .ddp import DDP 8 | from .nmpc import NMPC 9 | from .nmpc_cgmres import NMPCCGMRES 10 | 11 | 12 | def make_controller(args, config, model): 13 | 14 | if args.controller_type == "MPC": 15 | return LinearMPC(config, model) 16 | elif args.controller_type == "CEM": 17 | return CEM(config, model) 18 | elif args.controller_type == "Random": 19 | return RandomShooting(config, model) 20 | elif args.controller_type == "MPPI": 21 | return MPPI(config, model) 22 | elif args.controller_type == "MPPIWilliams": 23 | return MPPIWilliams(config, model) 24 | elif args.controller_type == "iLQR": 25 | return iLQR(config, model) 26 | elif args.controller_type == "DDP": 27 | return DDP(config, model) 28 | elif args.controller_type == "NMPC": 29 | return NMPC(config, model) 30 | elif args.controller_type == "NMPCCGMRES": 31 | return NMPCCGMRES(config, model) 32 | 33 | raise ValueError("No controller: {}".format(args.controller_type)) 34 | -------------------------------------------------------------------------------- /tests/configs/test_first_order_lag.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | 4 | from PythonLinearNonlinearControl.configs.first_order_lag \ 5 | import FirstOrderLagConfigModule 6 | 7 | class TestCalcCost(): 8 | def test_calc_costs(self): 9 | # make config 10 | config = FirstOrderLagConfigModule() 11 | # set 12 | pred_len = 5 13 | state_size = 4 14 | input_size = 2 15 | pop_size = 2 16 | pred_xs = np.ones((pop_size, pred_len, state_size)) 17 | g_xs = np.ones((pop_size, pred_len, state_size)) * 0.5 18 | input_samples = np.ones((pop_size, pred_len, input_size)) * 0.5 19 | 20 | costs = config.input_cost_fn(input_samples) 21 | expected_costs = np.ones((pop_size, pred_len, input_size))*0.5 22 | 23 | assert costs == pytest.approx(expected_costs**2 * np.diag(config.R)) 24 | 25 | costs = config.state_cost_fn(pred_xs, g_xs) 26 | expected_costs = np.ones((pop_size, pred_len, state_size))*0.5 27 | 28 | assert costs == pytest.approx(expected_costs**2 * np.diag(config.Q)) 29 | 30 | costs = config.terminal_state_cost_fn(pred_xs[:, -1, :],\ 31 | g_xs[:, -1, :]) 32 | expected_costs = np.ones((pop_size, state_size))*0.5 33 | 34 | assert costs == pytest.approx(expected_costs**2 * np.diag(config.Sf)) -------------------------------------------------------------------------------- /PythonLinearNonlinearControl/planners/closest_point_planner.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from .planner import Planner 3 | 4 | 5 | class ClosestPointPlanner(Planner): 6 | """ This planner make goal state according to goal path 7 | """ 8 | 9 | def __init__(self, config): 10 | """ 11 | """ 12 | super(ClosestPointPlanner, self).__init__() 13 | self.pred_len = config.PRED_LEN 14 | self.state_size = config.STATE_SIZE 15 | self.n_ahead = config.N_AHEAD 16 | 17 | def plan(self, curr_x, g_traj): 18 | """ 19 | Args: 20 | curr_x (numpy.ndarray): current state, shape(state_size) 21 | g_x (numpy.ndarray): goal state, shape(state_size), 22 | this state should be obtained from env 23 | Returns: 24 | g_xs (numpy.ndarrya): goal state, shape(pred_len+1, state_size) 25 | """ 26 | min_idx = np.argmin(np.linalg.norm(curr_x[:-1] - g_traj[:, :-1], 27 | axis=1)) 28 | 29 | start = (min_idx+self.n_ahead) 30 | if start > len(g_traj): 31 | start = len(g_traj) 32 | 33 | end = min_idx+self.n_ahead+self.pred_len+1 34 | 35 | if (min_idx+self.n_ahead+self.pred_len+1) > len(g_traj): 36 | end = len(g_traj) 37 | 38 | if abs(start - end) != self.pred_len + 1: 39 | return np.tile(g_traj[-1], (self.pred_len+1, 1)) 40 | 41 | return g_traj[start:end] 42 | -------------------------------------------------------------------------------- /tests/models/test_first_order_lag.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | 4 | from PythonLinearNonlinearControl.models.model \ 5 | import LinearModel 6 | from PythonLinearNonlinearControl.models.first_order_lag \ 7 | import FirstOrderLagModel 8 | from PythonLinearNonlinearControl.configs.first_order_lag \ 9 | import FirstOrderLagConfigModule 10 | 11 | from unittest.mock import patch 12 | from unittest.mock import Mock 13 | 14 | class TestFirstOrderLagModel(): 15 | """ 16 | """ 17 | def test_step(self): 18 | config = FirstOrderLagConfigModule() 19 | firstorderlag_model = FirstOrderLagModel(config) 20 | 21 | curr_x = np.ones(config.STATE_SIZE) 22 | u = np.ones((1, config.INPUT_SIZE)) 23 | 24 | with patch.object(LinearModel, "predict_traj") as mock_predict_traj: 25 | firstorderlag_model.predict_traj(curr_x, u) 26 | 27 | mock_predict_traj.assert_called_once_with(curr_x, u) 28 | 29 | def test_predict_traj(self): 30 | 31 | config = FirstOrderLagConfigModule() 32 | firstorderlag_model = FirstOrderLagModel(config) 33 | 34 | curr_x = np.ones(config.STATE_SIZE) 35 | curr_x[-1] = np.pi / 6. 36 | u = np.ones((1, config.INPUT_SIZE)) 37 | 38 | pred_xs = firstorderlag_model.predict_traj(curr_x, u) 39 | 40 | u = np.tile(u, (1, 1, 1)) 41 | pred_xs_alltogether = firstorderlag_model.predict_traj(curr_x, u)[0] 42 | 43 | assert pred_xs_alltogether == pytest.approx(pred_xs) -------------------------------------------------------------------------------- /tests/env/test_two_wheeled.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | 4 | from PythonLinearNonlinearControl.envs.two_wheeled import TwoWheeledConstEnv 5 | 6 | class TestTwoWheeledEnv(): 7 | """ 8 | """ 9 | def test_step(self): 10 | env = TwoWheeledConstEnv() 11 | 12 | curr_x = np.ones(3) 13 | curr_x[-1] = np.pi / 6. 14 | 15 | env.reset(init_x=curr_x) 16 | 17 | u = np.ones(2) 18 | 19 | next_x, _, _, _ = env.step(u) 20 | 21 | pos_x = np.cos(curr_x[-1]) * u[0] * env.config["dt"] + curr_x[0] 22 | pos_y = np.sin(curr_x[-1]) * u[0] * env.config["dt"] + curr_x[1] 23 | 24 | expected = np.array([pos_x, pos_y,\ 25 | curr_x[-1] + u[1] * env.config["dt"]]) 26 | 27 | assert next_x == pytest.approx(expected) 28 | 29 | def test_bound_step(self): 30 | env = TwoWheeledConstEnv() 31 | 32 | curr_x = np.ones(3) 33 | curr_x[-1] = np.pi / 6. 34 | 35 | env.reset(init_x=curr_x) 36 | 37 | u = np.ones(2) * 1e3 38 | 39 | next_x, _, _, _ = env.step(u) 40 | 41 | pos_x = np.cos(curr_x[-1]) * env.config["input_upper_bound"][0] \ 42 | * env.config["dt"] + curr_x[0] 43 | pos_y = np.sin(curr_x[-1]) * env.config["input_upper_bound"][0] \ 44 | * env.config["dt"] + curr_x[1] 45 | 46 | expected = np.array([pos_x, pos_y,\ 47 | curr_x[-1] + env.config["input_upper_bound"][1] \ 48 | * env.config["dt"]]) 49 | 50 | assert next_x == pytest.approx(expected) -------------------------------------------------------------------------------- /PythonLinearNonlinearControl/runners/runner.py: -------------------------------------------------------------------------------- 1 | from logging import getLogger 2 | 3 | import numpy as np 4 | 5 | logger = getLogger(__name__) 6 | 7 | 8 | class ExpRunner(): 9 | """ experiment runner 10 | """ 11 | 12 | def __init__(self): 13 | """ 14 | """ 15 | pass 16 | 17 | def run(self, env, controller, planner): 18 | """ 19 | Returns: 20 | history_x (numpy.ndarray): history of the state, 21 | shape(episode length, state_size) 22 | history_u (numpy.ndarray): history of the state, 23 | shape(episode length, input_size) 24 | """ 25 | done = False 26 | curr_x, info = env.reset() 27 | history_x, history_u, history_g = [], [], [] 28 | step_count = 0 29 | score = 0. 30 | 31 | while not done: 32 | logger.debug("Step = {}".format(step_count)) 33 | # plan 34 | g_xs = planner.plan(curr_x, info["goal_state"]) 35 | 36 | # obtain sol 37 | u = controller.obtain_sol(curr_x, g_xs) 38 | 39 | # step 40 | next_x, cost, done, info = env.step(u) 41 | 42 | # save 43 | history_u.append(u) 44 | history_x.append(curr_x) 45 | history_g.append(g_xs[0]) 46 | # update 47 | curr_x = next_x 48 | score += cost 49 | step_count += 1 50 | 51 | logger.debug("Controller type = {}, Score = {}" 52 | .format(controller, score)) 53 | return np.array(history_x), np.array(history_u), np.array(history_g) 54 | -------------------------------------------------------------------------------- /PythonLinearNonlinearControl/envs/env.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class Env(): 5 | """ Environments class 6 | Attributes: 7 | 8 | curr_x (numpy.ndarray): current state 9 | history_x (list[numpy.ndarray]): historty of state, shape(step_count*state_size) 10 | step_count (int): step count 11 | """ 12 | 13 | def __init__(self, config): 14 | """ 15 | """ 16 | self.config = config 17 | self.curr_x = None 18 | self.goal_state = None 19 | self.history_x = [] 20 | self.history_g_x = [] 21 | self.step_count = None 22 | 23 | def reset(self, init_x=None): 24 | """ reset state 25 | Returns: 26 | init_x (numpy.ndarray): initial state, shape(state_size, ) 27 | info (dict): information 28 | """ 29 | self.step_count = 0 30 | 31 | self.curr_x = np.zeros(self.config["state_size"]) 32 | 33 | if init_x is not None: 34 | self.curr_x = init_x 35 | 36 | # clear memory 37 | self.history_x = [] 38 | self.history_g_x = [] 39 | 40 | return self.curr_x, {} 41 | 42 | def step(self, u): 43 | """ 44 | Args: 45 | u (numpy.ndarray) : input, shape(input_size, ) 46 | Returns: 47 | next_x (numpy.ndarray): next state, shape(state_size, ) 48 | cost (float): costs 49 | done (bool): end the simulation or not 50 | info (dict): information 51 | """ 52 | raise NotImplementedError("Implement step function") 53 | 54 | def __repr__(self): 55 | """ 56 | """ 57 | return self.config 58 | -------------------------------------------------------------------------------- /tests/models/test_model.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | 4 | from PythonLinearNonlinearControl.models.model import LinearModel 5 | 6 | class TestLinearModel(): 7 | """ 8 | """ 9 | def test_predict(self): 10 | 11 | A = np.array([[1., 0.1], 12 | [0.1, 1.5]]) 13 | B = np.array([[0.2], [0.5]]) 14 | curr_x = np.ones(2) * 0.5 15 | u = np.ones((1, 1)) 16 | 17 | linear_model = LinearModel(A, B) 18 | pred_xs = linear_model.predict_traj(curr_x, u) 19 | 20 | assert pred_xs == pytest.approx(np.array([[0.5, 0.5], [0.75, 1.3]])) 21 | 22 | def test_alltogether(self): 23 | 24 | A = np.array([[1., 0.1], 25 | [0.1, 1.5]]) 26 | B = np.array([[0.2], [0.5]]) 27 | curr_x = np.ones(2) * 0.5 28 | u = np.ones((1, 1)) 29 | 30 | linear_model = LinearModel(A, B) 31 | pred_xs = linear_model.predict_traj(curr_x, u) 32 | 33 | u = np.tile(u, (1, 1, 1)) 34 | pred_xs_alltogether = linear_model.predict_traj(curr_x, u)[0] 35 | 36 | assert pred_xs_alltogether == pytest.approx(pred_xs) 37 | 38 | def test_alltogether_val(self): 39 | 40 | A = np.array([[1., 0.1], 41 | [0.1, 1.5]]) 42 | B = np.array([[0.2], [0.5]]) 43 | curr_x = np.ones(2) * 0.5 44 | u = np.stack((np.ones((1, 1)), np.ones((1, 1))*0.5), axis=0) 45 | 46 | linear_model = LinearModel(A, B) 47 | 48 | pred_xs_alltogether = linear_model.predict_traj(curr_x, u) 49 | 50 | expected_val = np.array([[[0.5, 0.5], [0.75, 1.3]], 51 | [[0.5, 0.5], [0.65, 1.05]]]) 52 | 53 | assert pred_xs_alltogether == pytest.approx(expected_val) -------------------------------------------------------------------------------- /PythonLinearNonlinearControl/envs/cost.py: -------------------------------------------------------------------------------- 1 | from logging import getLogger 2 | 3 | import numpy as np 4 | 5 | logger = getLogger(__name__) 6 | 7 | 8 | def calc_cost(pred_xs, input_sample, g_xs, 9 | state_cost_fn, input_cost_fn, terminal_state_cost_fn): 10 | """ calculate the cost 11 | 12 | Args: 13 | pred_xs (numpy.ndarray): predicted state trajectory, 14 | shape(pop_size, pred_len+1, state_size) 15 | input_sample (numpy.ndarray): inputs samples trajectory, 16 | shape(pop_size, pred_len+1, input_size) 17 | g_xs (numpy.ndarray): goal state trajectory, 18 | shape(pop_size, pred_len+1, state_size) 19 | state_cost_fn (function): state cost fucntion 20 | input_cost_fn (function): input cost fucntion 21 | terminal_state_cost_fn (function): terminal state cost fucntion 22 | Returns: 23 | cost (numpy.ndarray): cost of the input sample, shape(pop_size, ) 24 | """ 25 | # state cost 26 | state_cost = 0. 27 | if state_cost_fn is not None: 28 | state_pred_par_cost = state_cost_fn( 29 | pred_xs[:, 1:-1, :], g_xs[:, 1:-1, :]) 30 | state_cost = np.sum(np.sum(state_pred_par_cost, axis=-1), axis=-1) 31 | 32 | # terminal cost 33 | terminal_state_cost = 0. 34 | if terminal_state_cost_fn is not None: 35 | terminal_state_par_cost = terminal_state_cost_fn(pred_xs[:, -1, :], 36 | g_xs[:, -1, :]) 37 | terminal_state_cost = np.sum(terminal_state_par_cost, axis=-1) 38 | 39 | # act cost 40 | act_cost = 0. 41 | if input_cost_fn is not None: 42 | act_pred_par_cost = input_cost_fn(input_sample) 43 | act_cost = np.sum(np.sum(act_pred_par_cost, axis=-1), axis=-1) 44 | 45 | return state_cost + terminal_state_cost + act_cost 46 | -------------------------------------------------------------------------------- /scripts/show_result.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import argparse 4 | import pickle 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | 8 | from PythonLinearNonlinearControl.plotters.plot_func import load_plot_data, \ 9 | plot_multi_result 10 | 11 | 12 | def run(args): 13 | 14 | controllers = ["iLQR", "DDP", "CEM", "MPPI"] 15 | 16 | history_xs = None 17 | history_us = None 18 | history_gs = None 19 | 20 | # load data 21 | for controller in controllers: 22 | history_x, history_u, history_g = \ 23 | load_plot_data(args.env, controller, 24 | result_dir=args.result_dir) 25 | 26 | if history_xs is None: 27 | history_xs = history_x[np.newaxis, :] 28 | history_us = history_u[np.newaxis, :] 29 | history_gs = history_g[np.newaxis, :] 30 | continue 31 | 32 | history_xs = np.concatenate((history_xs, 33 | history_x[np.newaxis, :]), axis=0) 34 | history_us = np.concatenate((history_us, 35 | history_u[np.newaxis, :]), axis=0) 36 | history_gs = np.concatenate((history_gs, 37 | history_g[np.newaxis, :]), axis=0) 38 | 39 | plot_multi_result(history_xs, histories_g=history_gs, labels=controllers, 40 | ylabel="x") 41 | 42 | plot_multi_result(history_us, histories_g=np.zeros_like(history_us), 43 | labels=controllers, ylabel="u", name="input_history") 44 | 45 | 46 | def main(): 47 | parser = argparse.ArgumentParser() 48 | 49 | parser.add_argument("--env", type=str, default="FirstOrderLag") 50 | parser.add_argument("--result_dir", type=str, default="./result") 51 | 52 | args = parser.parse_args() 53 | 54 | run(args) 55 | 56 | 57 | if __name__ == "__main__": 58 | main() 59 | -------------------------------------------------------------------------------- /Environments.md: -------------------------------------------------------------------------------- 1 | # Enviroments 2 | 3 | | Name | Linear | Nonlinear | State Size | Input size | 4 | |:----------|:---------------:|:----------------:|:----------------:|:----------------:| 5 | | First Order Lag System | ✓ | x | 4 | 2 | 6 | | Two wheeled System (Constant Goal) | x | ✓ | 3 | 2 | 7 | | Two wheeled System (Moving Goal) (Coming soon) | x | ✓ | 3 | 2 | 8 | | Cartpole (Swing up) | x | ✓ | 4 | 1 | 9 | | Nonlinear Sample System Env | x | ✓ | 2 | 1 | 10 | 11 | 12 | ## [FistOrderLagEnv](PythonLinearNonlinearControl/envs/first_order_lag.py) 13 | 14 | ### System equation. 15 | 16 | 17 | 18 | You can set arbinatry time constant, tau. The default is 0.63 s 19 | 20 | ### Cost. 21 | 22 | 23 | 24 | Q = diag[1., 1., 1., 1.], 25 | R = diag[1., 1.] 26 | 27 | X_g denote the goal states. 28 | 29 | ## [TwoWheeledEnv](PythonLinearNonlinearControl/envs/two_wheeled.py) 30 | 31 | ### System equation. 32 | 33 | 34 | 35 | ### Cost. 36 | 37 | 38 | 39 | Q = diag[5., 5., 1.], 40 | R = diag[0.1, 0.1] 41 | 42 | X_g denote the goal states. 43 | 44 | ## [CatpoleEnv (Swing up)](PythonLinearNonlinearControl/envs/cartpole.py) 45 | 46 | ## System equation. 47 | 48 | 49 | 50 | You can set arbinatry parameters, mc, mp, l and g. 51 | 52 | Default settings are as follows: 53 | 54 | mc = 1, mp = 0.2, l = 0.5, g = 9.81 55 | 56 | ### Cost. 57 | 58 | 59 | 60 | ## [Nonlinear Sample System Env](PythonLinearNonlinearControl/envs/nonlinear_sample_system.py) 61 | 62 | ## System equation. 63 | 64 | 65 | 66 | ### Cost. 67 | 68 | 69 | -------------------------------------------------------------------------------- /scripts/simple_run.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | from PythonLinearNonlinearControl.helper import bool_flag, make_logger 4 | from PythonLinearNonlinearControl.controllers.make_controllers import make_controller 5 | from PythonLinearNonlinearControl.planners.make_planners import make_planner 6 | from PythonLinearNonlinearControl.configs.make_configs import make_config 7 | from PythonLinearNonlinearControl.models.make_models import make_model 8 | from PythonLinearNonlinearControl.envs.make_envs import make_env 9 | from PythonLinearNonlinearControl.runners.make_runners import make_runner 10 | from PythonLinearNonlinearControl.plotters.plot_func import plot_results, \ 11 | save_plot_data 12 | from PythonLinearNonlinearControl.plotters.animator import Animator 13 | 14 | 15 | def run(args): 16 | make_logger(args.result_dir) 17 | 18 | env = make_env(args) 19 | 20 | config = make_config(args) 21 | 22 | planner = make_planner(args, config) 23 | 24 | model = make_model(args, config) 25 | 26 | controller = make_controller(args, config, model) 27 | 28 | runner = make_runner(args) 29 | 30 | history_x, history_u, history_g = runner.run(env, controller, planner) 31 | 32 | plot_results(history_x, history_u, history_g=history_g, args=args) 33 | save_plot_data(history_x, history_u, history_g=history_g, args=args) 34 | 35 | if args.save_anim: 36 | animator = Animator(env, args=args) 37 | animator.draw(history_x, history_g) 38 | 39 | 40 | def main(): 41 | parser = argparse.ArgumentParser() 42 | 43 | parser.add_argument("--controller_type", type=str, default="NMPCCGMRES") 44 | parser.add_argument("--env", type=str, default="TwoWheeledConst") 45 | parser.add_argument("--save_anim", type=bool_flag, default=0) 46 | parser.add_argument("--result_dir", type=str, default="./result") 47 | 48 | args = parser.parse_args() 49 | 50 | run(args) 51 | 52 | 53 | if __name__ == "__main__": 54 | main() 55 | -------------------------------------------------------------------------------- /scripts/quickstart/quickstart.md: -------------------------------------------------------------------------------- 1 | # PythonLinearnNonlinearControl Quickstart Guide 2 | 3 | This is a quickstart guide for users who just want to try PythonLinearNonlinearControl. 4 | If you have not installed PythonLinearNonLinearControl, please see the section of "how to setup" in README.md 5 | 6 | When we design control systems, we should have Environment, Model, Planner, Controller and Runner. 7 | Therefore your script contains those Modules. 8 | 9 | First, import each Modules from PythonLinearNonlinearControl. 10 | 11 | ```py 12 | from PythonLinearNonlinearControl import configs 13 | from PythonLinearNonlinearControl import envs 14 | from PythonLinearNonlinearControl import models 15 | from PythonLinearNonlinearControl import planners 16 | from PythonLinearNonlinearControl import controllers 17 | from PythonLinearNonlinearControl import runners 18 | ``` 19 | 20 | Configs contains each modules configurations such as cost functions, prediction length, ...etc. 21 | 22 | Then you can make each module. (This is example about CEM and CartPole env) 23 | 24 | ```py 25 | config = configs.CartPoleConfigModule() 26 | env = envs.CartPoleEnv() 27 | model = models.CartPoleModel(config) 28 | planner = controllers.CEM(config, model) 29 | runner = planners.ConstantPlanner(config) 30 | controller = runners.ExpRunner() 31 | ``` 32 | 33 | The preparation for experiment has done! 34 | Please run the runner. 35 | 36 | ```py 37 | history_x, history_u, history_g = runner.run(env, controller, planner) 38 | ``` 39 | 40 | You can get the results of history of state, history of input and history of goal. 41 | Use that histories to visualize the Animation or Figures. 42 | (Note FirstOrderEnv does not support animation) 43 | 44 | ```py 45 | # plot results 46 | plot_results(args, history_x, history_u, history_g=history_g) 47 | save_plot_data(args, history_x, history_u, history_g=history_g) 48 | 49 | # create animation 50 | animator = Animator(args, env) 51 | animator.draw(history_x, history_g) 52 | ``` -------------------------------------------------------------------------------- /PythonLinearNonlinearControl/models/first_order_lag.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy.linalg 3 | from scipy import integrate 4 | from .model import LinearModel 5 | 6 | 7 | class FirstOrderLagModel(LinearModel): 8 | """ first order lag model 9 | Attributes: 10 | curr_x (numpy.ndarray): 11 | u (numpy.ndarray): 12 | history_pred_xs (numpy.ndarray): 13 | """ 14 | 15 | def __init__(self, config, tau=0.63): 16 | """ 17 | Args: 18 | tau (float): time constant 19 | """ 20 | # param 21 | self.A, self.B = self._to_state_space( 22 | tau, dt=config.DT) # discrete system 23 | super(FirstOrderLagModel, self).__init__(self.A, self.B) 24 | 25 | @staticmethod 26 | def _to_state_space(tau, dt=0.05): 27 | """ 28 | Args: 29 | tau (float): time constant 30 | dt (float): discrte time 31 | Returns: 32 | A (numpy.ndarray): discrete A matrix 33 | B (numpy.ndarray): discrete B matrix 34 | """ 35 | # continuous 36 | Ac = np.array([[-1./tau, 0., 0., 0.], 37 | [0., -1./tau, 0., 0.], 38 | [1., 0., 0., 0.], 39 | [0., 1., 0., 0.]]) 40 | Bc = np.array([[1./tau, 0.], 41 | [0., 1./tau], 42 | [0., 0.], 43 | [0., 0.]]) 44 | # to discrete system 45 | A = scipy.linalg.expm(dt*Ac) 46 | # B = np.matmul(np.matmul(scipy.linalg.expm(Ac*dt)-scipy.linalg.expm(Ac*0.), np.linalg.inv(Ac)), Bc) 47 | B = np.zeros_like(Bc) 48 | for m in range(Bc.shape[0]): 49 | for n in range(Bc.shape[1]): 50 | def integrate_fn(tau): return np.matmul( 51 | scipy.linalg.expm(Ac*tau), Bc)[m, n] 52 | sol = integrate.quad(integrate_fn, 0, dt) 53 | B[m, n] = sol[0] 54 | 55 | return A, B 56 | -------------------------------------------------------------------------------- /PythonLinearNonlinearControl/controllers/controller.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from ..envs.cost import calc_cost 4 | 5 | 6 | class Controller(): 7 | """ Controller class 8 | """ 9 | 10 | def __init__(self, config, model): 11 | """ 12 | """ 13 | self.config = config 14 | self.model = model 15 | 16 | # get cost func 17 | self.state_cost_fn = config.state_cost_fn 18 | self.terminal_state_cost_fn = config.terminal_state_cost_fn 19 | self.input_cost_fn = config.input_cost_fn 20 | 21 | def obtain_sol(self, curr_x, g_xs): 22 | """ calculate the optimal inputs 23 | Args: 24 | curr_x (numpy.ndarray): current state, shape(state_size, ) 25 | g_xs (numpy.ndarrya): goal trajectory, shape(plan_len, state_size) 26 | Returns: 27 | opt_input (numpy.ndarray): optimal input, shape(input_size, ) 28 | """ 29 | raise NotImplementedError("Implement the algorithm to \ 30 | get optimal input") 31 | 32 | def calc_cost(self, curr_x, samples, g_xs): 33 | """ calculate the cost of input samples 34 | 35 | Args: 36 | curr_x (numpy.ndarray): shape(state_size), 37 | current robot position 38 | samples (numpy.ndarray): shape(pop_size, opt_dim), 39 | input samples 40 | g_xs (numpy.ndarray): shape(pred_len, state_size), 41 | goal states 42 | Returns: 43 | costs (numpy.ndarray): shape(pop_size, ) 44 | """ 45 | # get size 46 | pop_size = samples.shape[0] 47 | g_xs = np.tile(g_xs, (pop_size, 1, 1)) 48 | 49 | # calc cost, pred_xs.shape = (pop_size, pred_len+1, state_size) 50 | pred_xs = self.model.predict_traj(curr_x, samples) 51 | 52 | # get particle cost 53 | costs = calc_cost(pred_xs, samples, g_xs, 54 | self.state_cost_fn, self.input_cost_fn, 55 | self.terminal_state_cost_fn) 56 | 57 | return costs 58 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # folders 2 | .vscode/ 3 | .pytest_cache/ 4 | result/ 5 | 6 | # Byte-compiled / optimized / DLL files 7 | __pycache__/ 8 | *.py[cod] 9 | *$py.class 10 | 11 | # C extensions 12 | *.so 13 | 14 | # Distribution / packaging 15 | .Python 16 | build/ 17 | develop-eggs/ 18 | dist/ 19 | downloads/ 20 | eggs/ 21 | .eggs/ 22 | lib/ 23 | lib64/ 24 | parts/ 25 | sdist/ 26 | var/ 27 | wheels/ 28 | pip-wheel-metadata/ 29 | share/python-wheels/ 30 | *.egg-info/ 31 | .installed.cfg 32 | *.egg 33 | MANIFEST 34 | 35 | # PyInstaller 36 | # Usually these files are written by a python script from a template 37 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 38 | *.manifest 39 | *.spec 40 | 41 | # Installer logs 42 | pip-log.txt 43 | pip-delete-this-directory.txt 44 | 45 | # Unit test / coverage reports 46 | htmlcov/ 47 | .tox/ 48 | .nox/ 49 | .coverage 50 | .coverage.* 51 | .cache 52 | nosetests.xml 53 | coverage.xml 54 | *.cover 55 | *.py,cover 56 | .hypothesis/ 57 | .pytest_cache/ 58 | 59 | # Translations 60 | *.mo 61 | *.pot 62 | 63 | # Django stuff: 64 | *.log 65 | local_settings.py 66 | db.sqlite3 67 | db.sqlite3-journal 68 | 69 | # Flask stuff: 70 | instance/ 71 | .webassets-cache 72 | 73 | # Scrapy stuff: 74 | .scrapy 75 | 76 | # Sphinx documentation 77 | docs/_build/ 78 | 79 | # PyBuilder 80 | target/ 81 | 82 | # Jupyter Notebook 83 | .ipynb_checkpoints 84 | 85 | # IPython 86 | profile_default/ 87 | ipython_config.py 88 | 89 | # pyenv 90 | .python-version 91 | 92 | # pipenv 93 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 94 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 95 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 96 | # install all needed dependencies. 97 | #Pipfile.lock 98 | 99 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 100 | __pypackages__/ 101 | 102 | # Celery stuff 103 | celerybeat-schedule 104 | celerybeat.pid 105 | 106 | # SageMath parsed files 107 | *.sage.py 108 | 109 | # Environments 110 | .env 111 | .venv 112 | venv/ 113 | ENV/ 114 | env.bak/ 115 | venv.bak/ 116 | 117 | # Spyder project settings 118 | .spyderproject 119 | .spyproject 120 | 121 | # Rope project settings 122 | .ropeproject 123 | 124 | # mkdocs documentation 125 | /site 126 | 127 | # mypy 128 | .mypy_cache/ 129 | .dmypy.json 130 | dmypy.json 131 | 132 | # Pyre type checker 133 | .pyre/ 134 | -------------------------------------------------------------------------------- /tests/env/test_cartpole.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | 4 | from PythonLinearNonlinearControl.envs.cartpole import CartPoleEnv 5 | 6 | class TestCartPoleEnv(): 7 | """ 8 | """ 9 | def test_step(self): 10 | env = CartPoleEnv() 11 | 12 | curr_x = np.ones(4) 13 | curr_x[2] = np.pi / 6. 14 | 15 | env.reset(init_x=curr_x) 16 | 17 | u = np.ones(1) 18 | 19 | next_x, _, _, _ = env.step(u) 20 | 21 | d_x0 = curr_x[1] 22 | d_x1 = (1. + env.config["mp"] * np.sin(np.pi / 6.) \ 23 | * (env.config["l"] * (1.**2) \ 24 | + env.config["g"] * np.cos(np.pi / 6.))) \ 25 | / (env.config["mc"] + env.config["mp"] * np.sin(np.pi / 6.)**2) 26 | d_x2 = curr_x[3] 27 | d_x3 = (-1. * np.cos(np.pi / 6.) \ 28 | - env.config["mp"] * env.config["l"] * (1.**2) \ 29 | * np.cos(np.pi / 6.) * np.sin(np.pi / 6.) \ 30 | - (env.config["mp"] + env.config["mc"]) * env.config["g"] \ 31 | * np.sin(np.pi / 6.)) \ 32 | / (env.config["l"] \ 33 | * (env.config["mc"] \ 34 | + env.config["mp"] * np.sin(np.pi / 6.)**2)) 35 | 36 | expected = np.array([d_x0, d_x1, d_x2, d_x3]) * env.config["dt"] \ 37 | + curr_x 38 | 39 | assert next_x == pytest.approx(expected, abs=1e-5) 40 | 41 | def test_bound_step(self): 42 | env = CartPoleEnv() 43 | 44 | curr_x = np.ones(4) 45 | curr_x[2] = np.pi / 6. 46 | 47 | env.reset(init_x=curr_x) 48 | 49 | u = np.ones(1) * 1e3 50 | 51 | next_x, _, _, _ = env.step(u) 52 | 53 | u = env.config["input_upper_bound"][0] 54 | 55 | d_x0 = curr_x[1] 56 | d_x1 = (u + env.config["mp"] * np.sin(np.pi / 6.) \ 57 | * (env.config["l"] * (1.**2) \ 58 | + env.config["g"] * np.cos(np.pi / 6.))) \ 59 | / (env.config["mc"] + env.config["mp"] * np.sin(np.pi / 6.)**2) 60 | d_x2 = curr_x[3] 61 | d_x3 = (-u * np.cos(np.pi / 6.) \ 62 | - env.config["mp"] * env.config["l"] * (1.**2) \ 63 | * np.cos(np.pi / 6.) * np.sin(np.pi / 6.) \ 64 | - (env.config["mp"] + env.config["mc"]) * env.config["g"] \ 65 | * np.sin(np.pi / 6.)) \ 66 | / (env.config["l"] \ 67 | * (env.config["mc"] \ 68 | + env.config["mp"] * np.sin(np.pi / 6.)**2)) 69 | 70 | expected = np.array([d_x0, d_x1, d_x2, d_x3]) * env.config["dt"] \ 71 | + curr_x 72 | 73 | assert next_x == pytest.approx(expected, abs=1e-5) -------------------------------------------------------------------------------- /PythonLinearNonlinearControl/controllers/random.py: -------------------------------------------------------------------------------- 1 | from logging import getLogger 2 | 3 | import numpy as np 4 | import scipy.stats as stats 5 | 6 | from .controller import Controller 7 | from ..envs.cost import calc_cost 8 | 9 | logger = getLogger(__name__) 10 | 11 | 12 | class RandomShooting(Controller): 13 | """ Random Shooting Method for linear and nonlinear method 14 | 15 | Attributes: 16 | history_u (list[numpy.ndarray]): time history of optimal input 17 | Ref: 18 | Chua, K., Calandra, R., McAllister, R., & Levine, S. (2018). 19 | Deep reinforcement learning in a handful of trials 20 | using probabilistic dynamics models. 21 | In Advances in Neural Information Processing Systems (pp. 4754-4765). 22 | """ 23 | 24 | def __init__(self, config, model): 25 | super(RandomShooting, self).__init__(config, model) 26 | 27 | # model 28 | self.model = model 29 | 30 | # general parameters 31 | self.pred_len = config.PRED_LEN 32 | self.input_size = config.INPUT_SIZE 33 | 34 | # cem parameters 35 | self.pop_size = config.opt_config["Random"]["popsize"] 36 | self.opt_dim = self.input_size * self.pred_len 37 | 38 | # get bound 39 | self.input_upper_bounds = np.tile(config.INPUT_UPPER_BOUND, 40 | self.pred_len) 41 | self.input_lower_bounds = np.tile(config.INPUT_LOWER_BOUND, 42 | self.pred_len) 43 | 44 | # get cost func 45 | self.state_cost_fn = config.state_cost_fn 46 | self.terminal_state_cost_fn = config.terminal_state_cost_fn 47 | self.input_cost_fn = config.input_cost_fn 48 | 49 | # save 50 | self.history_u = [] 51 | 52 | def obtain_sol(self, curr_x, g_xs): 53 | """ calculate the optimal inputs 54 | 55 | Args: 56 | curr_x (numpy.ndarray): current state, shape(state_size, ) 57 | g_xs (numpy.ndarrya): goal trajectory, shape(plan_len, state_size) 58 | Returns: 59 | opt_input (numpy.ndarray): optimal input, shape(input_size, ) 60 | """ 61 | # set different seed 62 | np.random.seed() 63 | 64 | samples = np.random.uniform(self.input_lower_bounds, 65 | self.input_upper_bounds, 66 | [self.pop_size, self.opt_dim]) 67 | # calc cost 68 | costs = self.calc_cost(curr_x, 69 | samples.reshape(self.pop_size, 70 | self.pred_len, 71 | self.input_size), 72 | g_xs) 73 | # solution 74 | sol = samples[np.argmin(costs)] 75 | 76 | return sol.reshape(self.pred_len, self.input_size).copy()[0] 77 | 78 | def __str__(self): 79 | return "RandomShooting" 80 | -------------------------------------------------------------------------------- /PythonLinearNonlinearControl/plotters/animator.py: -------------------------------------------------------------------------------- 1 | import os 2 | from logging import getLogger 3 | 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | from matplotlib.animation import FuncAnimation 7 | import matplotlib.animation as animation 8 | 9 | logger = getLogger(__name__) 10 | 11 | 12 | class Animator(): 13 | """ animation class 14 | """ 15 | 16 | def __init__(self, env, args=None): 17 | """ 18 | """ 19 | self.env_name = "Env" 20 | self.result_dir = "./result" 21 | self.controller_type = "controller" 22 | 23 | if args is not None: 24 | self.env_name = args.env 25 | self.result_dir = args.result_dir 26 | self.controller_type = args.controller_type 27 | 28 | self.interval = env.config["dt"] * 1000. # to ms 29 | self.plot_func = env.plot_func 30 | 31 | self.imgs = [] 32 | 33 | def _setup(self): 34 | """ set up figure of animation 35 | """ 36 | # make fig 37 | self.anim_fig = plt.figure() 38 | 39 | # axis 40 | self.axis = self.anim_fig.add_subplot(111) 41 | self.axis.set_aspect('equal', adjustable='box') 42 | 43 | self.imgs = self.plot_func(self.axis) 44 | 45 | def _update_img(self, i, history_x, history_g_x): 46 | """ update animation 47 | 48 | Args: 49 | i (int): frame count 50 | history_x (numpy.ndarray): history of state, 51 | shape(iters, state_size) 52 | history_g (numpy.ndarray): history of goal state, 53 | shape(iters, input_size) 54 | """ 55 | self.plot_func(self.imgs, i, history_x, history_g_x) 56 | 57 | def draw(self, history_x, history_g_x=None): 58 | """draw the animation and save 59 | 60 | Args: 61 | history_x (numpy.ndarray): history of state, 62 | shape(iters, state_size) 63 | history_g (numpy.ndarray): history of goal state, 64 | shape(iters, input_size) 65 | Returns: 66 | None 67 | """ 68 | # set up animation figures 69 | self._setup() 70 | def _update_img(i): return self._update_img(i, history_x, history_g_x) 71 | 72 | # Set up formatting for the movie files 73 | Writer = animation.writers['ffmpeg'] 74 | writer = Writer(fps=15, metadata=dict(artist='Me'), bitrate=1800) 75 | 76 | # call funcanimation 77 | ani = FuncAnimation( 78 | self.anim_fig, 79 | _update_img, interval=self.interval, frames=len(history_x)-1) 80 | 81 | # save animation 82 | path = os.path.join(self.result_dir, self.controller_type, 83 | "animation-" + self.env_name + ".mp4") 84 | logger.info("Saved Animation to {} ...".format(path)) 85 | 86 | ani.save(path, writer=writer) 87 | -------------------------------------------------------------------------------- /PythonLinearNonlinearControl/envs/nonlinear_sample_system.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy 3 | from scipy import integrate 4 | from .env import Env 5 | from ..common.utils import update_state_with_Runge_Kutta 6 | 7 | 8 | class NonlinearSampleSystemEnv(Env): 9 | """ Nonlinear Sample Env 10 | """ 11 | 12 | def __init__(self): 13 | """ 14 | """ 15 | self.config = {"state_size": 2, 16 | "input_size": 1, 17 | "dt": 0.01, 18 | "max_step": 2000, 19 | "input_lower_bound": [-0.5], 20 | "input_upper_bound": [0.5], 21 | } 22 | 23 | super(NonlinearSampleSystemEnv, self).__init__(self.config) 24 | 25 | def reset(self, init_x=np.array([2., 0.])): 26 | """ reset state 27 | Returns: 28 | init_x (numpy.ndarray): initial state, shape(state_size, ) 29 | info (dict): information 30 | """ 31 | self.step_count = 0 32 | 33 | self.curr_x = np.zeros(self.config["state_size"]) 34 | 35 | if init_x is not None: 36 | self.curr_x = init_x 37 | 38 | # goal 39 | self.g_x = np.array([0., 0.]) 40 | 41 | # clear memory 42 | self.history_x = [] 43 | self.history_g_x = [] 44 | 45 | return self.curr_x, {"goal_state": self.g_x} 46 | 47 | def step(self, u): 48 | """ 49 | Args: 50 | u (numpy.ndarray) : input, shape(input_size, ) 51 | Returns: 52 | next_x (numpy.ndarray): next state, shape(state_size, ) 53 | cost (float): costs 54 | done (bool): end the simulation or not 55 | info (dict): information 56 | """ 57 | # clip action 58 | u = np.clip(u, 59 | self.config["input_lower_bound"], 60 | self.config["input_upper_bound"]) 61 | 62 | functions = [self._func_x_1, self._func_x_2] 63 | 64 | next_x = update_state_with_Runge_Kutta(self.curr_x, u, 65 | functions, self.config["dt"], 66 | batch=False) 67 | 68 | # cost 69 | cost = 0 70 | cost = np.sum(u**2) 71 | cost += np.sum((self.curr_x - self.g_x)**2) 72 | 73 | # save history 74 | self.history_x.append(next_x.flatten()) 75 | self.history_g_x.append(self.g_x.flatten()) 76 | 77 | # update 78 | self.curr_x = next_x.flatten() 79 | # update costs 80 | self.step_count += 1 81 | 82 | return next_x.flatten(), cost, \ 83 | self.step_count > self.config["max_step"], \ 84 | {"goal_state": self.g_x} 85 | 86 | def _func_x_1(self, x, u): 87 | x_dot = x[1] 88 | return x_dot 89 | 90 | def _func_x_2(self, x, u): 91 | x_dot = (1. - x[0]**2 - x[1]**2) * x[1] - x[0] + u[0] 92 | return x_dot 93 | 94 | def plot_func(self, to_plot, i=None, history_x=None, history_g_x=None): 95 | """ 96 | """ 97 | raise ValueError("NonlinearSampleSystemEnv does not have animation") 98 | -------------------------------------------------------------------------------- /tests/models/test_two_wheeled.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | 4 | from PythonLinearNonlinearControl.models.two_wheeled import TwoWheeledModel 5 | from PythonLinearNonlinearControl.configs.two_wheeled \ 6 | import TwoWheeledConfigModule 7 | 8 | class TestTwoWheeledModel(): 9 | """ 10 | """ 11 | def test_step(self): 12 | config = TwoWheeledConfigModule() 13 | two_wheeled_model = TwoWheeledModel(config) 14 | 15 | curr_x = np.ones(config.STATE_SIZE) 16 | curr_x[-1] = np.pi / 6. 17 | u = np.ones((1, config.INPUT_SIZE)) 18 | 19 | next_x = two_wheeled_model.predict_traj(curr_x, u) 20 | 21 | pos_x = np.cos(curr_x[-1]) * u[0, 0] * config.DT + curr_x[0] 22 | pos_y = np.sin(curr_x[-1]) * u[0, 0] * config.DT + curr_x[1] 23 | 24 | expected = np.array([[1., 1., np.pi / 6.], 25 | [pos_x, pos_y, curr_x[-1] + u[0, 1] * config.DT]]) 26 | 27 | assert next_x == pytest.approx(expected) 28 | 29 | def test_predict_traj(self): 30 | config = TwoWheeledConfigModule() 31 | two_wheeled_model = TwoWheeledModel(config) 32 | 33 | curr_x = np.ones(config.STATE_SIZE) 34 | curr_x[-1] = np.pi / 6. 35 | u = np.ones((1, config.INPUT_SIZE)) 36 | 37 | pred_xs = two_wheeled_model.predict_traj(curr_x, u) 38 | 39 | u = np.tile(u, (1, 1, 1)) 40 | pred_xs_alltogether = two_wheeled_model.predict_traj(curr_x, u)[0] 41 | 42 | assert pred_xs_alltogether == pytest.approx(pred_xs) 43 | 44 | def test_gradient_state(self): 45 | 46 | config = TwoWheeledConfigModule() 47 | two_wheeled_model = TwoWheeledModel(config) 48 | 49 | xs = np.ones((1, config.STATE_SIZE)) 50 | xs[0, -1] = np.pi / 6. 51 | us = np.ones((1, config.INPUT_SIZE)) 52 | 53 | grad = two_wheeled_model.calc_f_x(xs, us, config.DT) 54 | 55 | # expected cost 56 | expected_grad = np.zeros((1, config.STATE_SIZE, config.STATE_SIZE)) 57 | eps = 1e-4 58 | 59 | for i in range(config.STATE_SIZE): 60 | tmp_x = xs.copy() 61 | tmp_x[0, i] = xs[0, i] + eps 62 | forward = \ 63 | two_wheeled_model.predict_next_state(tmp_x[0], us[0]) 64 | tmp_x = xs.copy() 65 | tmp_x[0, i] = xs[0, i] - eps 66 | backward = \ 67 | two_wheeled_model.predict_next_state(tmp_x[0], us[0]) 68 | 69 | expected_grad[0, :, i] = (forward - backward) / (2. * eps) 70 | 71 | assert grad == pytest.approx(expected_grad) 72 | 73 | def test_gradient_input(self): 74 | 75 | config = TwoWheeledConfigModule() 76 | two_wheeled_model = TwoWheeledModel(config) 77 | 78 | xs = np.ones((1, config.STATE_SIZE)) 79 | xs[0, -1] = np.pi / 6. 80 | us = np.ones((1, config.INPUT_SIZE)) 81 | 82 | grad = two_wheeled_model.calc_f_u(xs, us, config.DT) 83 | 84 | # expected cost 85 | expected_grad = np.zeros((1, config.STATE_SIZE, config.INPUT_SIZE)) 86 | eps = 1e-4 87 | 88 | for i in range(config.INPUT_SIZE): 89 | tmp_u = us.copy() 90 | tmp_u[0, i] = us[0, i] + eps 91 | forward = \ 92 | two_wheeled_model.predict_next_state(xs[0], tmp_u[0]) 93 | tmp_u = us.copy() 94 | tmp_u[0, i] = us[0, i] - eps 95 | backward = \ 96 | two_wheeled_model.predict_next_state(xs[0], tmp_u[0]) 97 | 98 | expected_grad[0, :, i] = (forward - backward) / (2. * eps) 99 | 100 | assert grad == pytest.approx(expected_grad) 101 | -------------------------------------------------------------------------------- /PythonLinearNonlinearControl/plotters/plot_objs.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | 6 | from ..common.utils import rotate_pos 7 | 8 | 9 | def circle(center_x, center_y, radius, start=0., end=2*np.pi, n_point=100): 10 | """ Create circle matrix 11 | 12 | Args: 13 | center_x (float): the center x position of the circle 14 | center_y (float): the center y position of the circle 15 | radius (float): in meters 16 | start (float): start angle 17 | end (float): end angle 18 | Returns: 19 | circle x : numpy.ndarray 20 | circle y : numpy.ndarray 21 | """ 22 | diff = end - start 23 | 24 | circle_xs = [] 25 | circle_ys = [] 26 | 27 | for i in range(n_point + 1): 28 | circle_xs.append(center_x + radius * np.cos(i*diff/n_point + start)) 29 | circle_ys.append(center_y + radius * np.sin(i*diff/n_point + start)) 30 | 31 | return np.array(circle_xs), np.array(circle_ys) 32 | 33 | 34 | def circle_with_angle(center_x, center_y, radius, angle): 35 | """ Create circle matrix with angle line matrix 36 | 37 | Args: 38 | center_x (float): the center x position of the circle 39 | center_y (float): the center y position of the circle 40 | radius (float): in meters 41 | angle (float): in radians 42 | Returns: 43 | circle_x (numpy.ndarray): x data of circle 44 | circle_y (numpy.ndarray): y data of circle 45 | angle_x (numpy.ndarray): x data of circle angle 46 | angle_y (numpy.ndarray): y data of circle angle 47 | """ 48 | circle_x, circle_y = circle(center_x, center_y, radius) 49 | 50 | angle_x = np.array([center_x, center_x + np.cos(angle) * radius]) 51 | angle_y = np.array([center_y, center_y + np.sin(angle) * radius]) 52 | 53 | return circle_x, circle_y, angle_x, angle_y 54 | 55 | 56 | def square(center_x, center_y, shape, angle): 57 | """ Create square 58 | 59 | Args: 60 | center_x (float): the center x position of the square 61 | center_y (float): the center y position of the square 62 | shape (tuple): the square's shape(width/2, height/2) 63 | angle (float): in radians 64 | Returns: 65 | square_x (numpy.ndarray): shape(5, ), counterclockwise from right-up 66 | square_y (numpy.ndarray): shape(5, ), counterclockwise from right-up 67 | """ 68 | # start with the up right points 69 | # create point in counterclockwise, local 70 | square_xy = np.array([[shape[0], shape[1]], 71 | [-shape[0], shape[1]], 72 | [-shape[0], -shape[1]], 73 | [shape[0], -shape[1]], 74 | [shape[0], shape[1]]]) 75 | # translate position to world 76 | # rotation 77 | trans_points = rotate_pos(square_xy, angle) 78 | # translation 79 | trans_points += np.array([center_x, center_y]) 80 | 81 | return trans_points[:, 0], trans_points[:, 1] 82 | 83 | 84 | def square_with_angle(center_x, center_y, shape, angle): 85 | """ Create square with angle line 86 | 87 | Args: 88 | center_x (float): the center x position of the square 89 | center_y (float): the center y position of the square 90 | shape (tuple): the square's shape(width/2, height/2) 91 | angle (float): in radians 92 | Returns: 93 | square_x (numpy.ndarray): shape(5, ), counterclockwise from right-up 94 | square_y (numpy.ndarray): shape(5, ), counterclockwise from right-up 95 | angle_x (numpy.ndarray): x data of square angle 96 | angle_y (numpy.ndarray): y data of square angle 97 | """ 98 | square_x, square_y = square(center_x, center_y, shape, angle) 99 | 100 | angle_x = np.array([center_x, center_x + np.cos(angle) * shape[0]]) 101 | angle_y = np.array([center_y, center_y + np.sin(angle) * shape[1]]) 102 | 103 | return square_x, square_y, angle_x, angle_y 104 | -------------------------------------------------------------------------------- /PythonLinearNonlinearControl/envs/first_order_lag.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy 3 | from scipy import integrate 4 | from .env import Env 5 | 6 | 7 | class FirstOrderLagEnv(Env): 8 | """ First Order Lag System Env 9 | """ 10 | 11 | def __init__(self, tau=0.63): 12 | """ 13 | """ 14 | self.config = {"state_size": 4, 15 | "input_size": 2, 16 | "dt": 0.05, 17 | "max_step": 500, 18 | "input_lower_bound": [-0.5, -0.5], 19 | "input_upper_bound": [0.5, 0.5], 20 | } 21 | 22 | super(FirstOrderLagEnv, self).__init__(self.config) 23 | 24 | # to get discrete system matrix 25 | self.A, self.B = self._to_state_space(tau, dt=self.config["dt"]) 26 | 27 | @staticmethod 28 | def _to_state_space(tau, dt=0.05): 29 | """ 30 | Args: 31 | tau (float): time constant 32 | dt (float): discrte time 33 | Returns: 34 | A (numpy.ndarray): discrete A matrix 35 | B (numpy.ndarray): discrete B matrix 36 | """ 37 | # continuous 38 | Ac = np.array([[-1./tau, 0., 0., 0.], 39 | [0., -1./tau, 0., 0.], 40 | [1., 0., 0., 0.], 41 | [0., 1., 0., 0.]]) 42 | Bc = np.array([[1./tau, 0.], 43 | [0., 1./tau], 44 | [0., 0.], 45 | [0., 0.]]) 46 | # to discrete system 47 | A = scipy.linalg.expm(dt*Ac) 48 | # B = np.matmul(np.matmul(scipy.linalg.expm(Ac*dt) - 49 | # scipy.linalg.expm(Ac*0.), np.linalg.inv(Ac)),\ 50 | # Bc) 51 | B = np.zeros_like(Bc) 52 | for m in range(Bc.shape[0]): 53 | for n in range(Bc.shape[1]): 54 | integrate_fn =\ 55 | lambda tau: np.matmul(scipy.linalg.expm(Ac*tau), Bc)[m, n] 56 | sol = integrate.quad(integrate_fn, 0, dt) 57 | B[m, n] = sol[0] 58 | 59 | return A, B 60 | 61 | def reset(self, init_x=None): 62 | """ reset state 63 | Returns: 64 | init_x (numpy.ndarray): initial state, shape(state_size, ) 65 | info (dict): information 66 | """ 67 | self.step_count = 0 68 | 69 | self.curr_x = np.zeros(self.config["state_size"]) 70 | 71 | if init_x is not None: 72 | self.curr_x = init_x 73 | 74 | # goal 75 | self.g_x = np.array([0., 0, -2., 3.]) 76 | 77 | # clear memory 78 | self.history_x = [] 79 | self.history_g_x = [] 80 | 81 | return self.curr_x, {"goal_state": self.g_x} 82 | 83 | def step(self, u): 84 | """ 85 | Args: 86 | u (numpy.ndarray) : input, shape(input_size, ) 87 | Returns: 88 | next_x (numpy.ndarray): next state, shape(state_size, ) 89 | cost (float): costs 90 | done (bool): end the simulation or not 91 | info (dict): information 92 | """ 93 | # clip action 94 | u = np.clip(u, 95 | self.config["input_lower_bound"], 96 | self.config["input_upper_bound"]) 97 | 98 | next_x = np.matmul(self.A, self.curr_x[:, np.newaxis]) \ 99 | + np.matmul(self.B, u[:, np.newaxis]) 100 | 101 | # cost 102 | cost = 0 103 | cost = np.sum(u**2) 104 | cost += np.sum((self.curr_x - self.g_x)**2) 105 | 106 | # save history 107 | self.history_x.append(next_x.flatten()) 108 | self.history_g_x.append(self.g_x.flatten()) 109 | 110 | # update 111 | self.curr_x = next_x.flatten() 112 | # update costs 113 | self.step_count += 1 114 | 115 | return next_x.flatten(), cost, \ 116 | self.step_count > self.config["max_step"], \ 117 | {"goal_state": self.g_x} 118 | 119 | def plot_func(self, to_plot, i=None, history_x=None, history_g_x=None): 120 | """ 121 | """ 122 | raise ValueError("FirstOrderLag does not have animation") 123 | -------------------------------------------------------------------------------- /tests/models/test_cartpole.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | 4 | from PythonLinearNonlinearControl.models.cartpole import CartPoleModel 5 | from PythonLinearNonlinearControl.configs.cartpole \ 6 | import CartPoleConfigModule 7 | 8 | class TestCartPoleModel(): 9 | """ 10 | """ 11 | def test_step(self): 12 | config = CartPoleConfigModule() 13 | cartpole_model = CartPoleModel(config) 14 | 15 | curr_x = np.ones(4) 16 | curr_x[2] = np.pi / 6. 17 | 18 | us = np.ones((1, 1)) 19 | 20 | next_x = cartpole_model.predict_traj(curr_x, us) 21 | 22 | d_x0 = curr_x[1] 23 | d_x1 = (1. + config.MP * np.sin(np.pi / 6.) \ 24 | * (config.L * (1.**2) \ 25 | + config.G * np.cos(np.pi / 6.))) \ 26 | / (config.MC + config.MP * np.sin(np.pi / 6.)**2) 27 | d_x2 = curr_x[3] 28 | d_x3 = (-1. * np.cos(np.pi / 6.) \ 29 | - config.MP * config.L * (1.**2) \ 30 | * np.cos(np.pi / 6.) * np.sin(np.pi / 6.) \ 31 | - (config.MP + config.MC) * config.G \ 32 | * np.sin(np.pi / 6.)) \ 33 | / (config.L \ 34 | * (config.MC \ 35 | + config.MP * np.sin(np.pi / 6.)**2)) 36 | 37 | expected = np.array([d_x0, d_x1, d_x2, d_x3]) * config.DT \ 38 | + curr_x 39 | 40 | expected = np.stack((curr_x, expected), axis=0) 41 | 42 | assert next_x == pytest.approx(expected, abs=1e-5) 43 | 44 | def test_predict_traj(self): 45 | config = CartPoleConfigModule() 46 | cartpole_model = CartPoleModel(config) 47 | 48 | curr_x = np.ones(config.STATE_SIZE) 49 | curr_x[-1] = np.pi / 6. 50 | u = np.ones((1, config.INPUT_SIZE)) 51 | 52 | pred_xs = cartpole_model.predict_traj(curr_x, u) 53 | 54 | u = np.tile(u, (2, 1, 1)) 55 | pred_xs_alltogether = cartpole_model.predict_traj(curr_x, u)[0] 56 | 57 | assert pred_xs_alltogether == pytest.approx(pred_xs) 58 | 59 | def test_gradient_state(self): 60 | 61 | config = CartPoleConfigModule() 62 | cartpole_model = CartPoleModel(config) 63 | 64 | xs = np.ones((1, config.STATE_SIZE)) \ 65 | * np.random.rand(1, config.STATE_SIZE) 66 | xs[0, -1] = np.pi / 6. 67 | us = np.ones((1, config.INPUT_SIZE)) 68 | 69 | grad = cartpole_model.calc_f_x(xs, us, config.DT) 70 | 71 | # expected cost 72 | expected_grad = np.zeros((1, config.STATE_SIZE, config.STATE_SIZE)) 73 | eps = 1e-4 74 | 75 | for i in range(config.STATE_SIZE): 76 | tmp_x = xs.copy() 77 | tmp_x[0, i] = xs[0, i] + eps 78 | forward = \ 79 | cartpole_model.predict_next_state(tmp_x[0], us[0]) 80 | tmp_x = xs.copy() 81 | tmp_x[0, i] = xs[0, i] - eps 82 | backward = \ 83 | cartpole_model.predict_next_state(tmp_x[0], us[0]) 84 | 85 | expected_grad[0, :, i] = (forward - backward) / (2. * eps) 86 | 87 | assert grad == pytest.approx(expected_grad) 88 | 89 | def test_gradient_input(self): 90 | 91 | config = CartPoleConfigModule() 92 | cartpole_model = CartPoleModel(config) 93 | 94 | xs = np.ones((1, config.STATE_SIZE)) \ 95 | * np.random.rand(1, config.STATE_SIZE) 96 | xs[0, -1] = np.pi / 6. 97 | us = np.ones((1, config.INPUT_SIZE)) 98 | 99 | grad = cartpole_model.calc_f_u(xs, us, config.DT) 100 | 101 | # expected cost 102 | expected_grad = np.zeros((1, config.STATE_SIZE, config.INPUT_SIZE)) 103 | eps = 1e-4 104 | 105 | for i in range(config.INPUT_SIZE): 106 | tmp_u = us.copy() 107 | tmp_u[0, i] = us[0, i] + eps 108 | forward = \ 109 | cartpole_model.predict_next_state(xs[0], tmp_u[0]) 110 | tmp_u = us.copy() 111 | tmp_u[0, i] = us[0, i] - eps 112 | backward = \ 113 | cartpole_model.predict_next_state(xs[0], tmp_u[0]) 114 | 115 | expected_grad[0, :, i] = (forward - backward) / (2. * eps) 116 | 117 | assert grad == pytest.approx(expected_grad) 118 | -------------------------------------------------------------------------------- /PythonLinearNonlinearControl/controllers/nmpc.py: -------------------------------------------------------------------------------- 1 | from logging import getLogger 2 | 3 | import numpy as np 4 | import scipy.stats as stats 5 | 6 | from .controller import Controller 7 | from ..envs.cost import calc_cost 8 | from ..common.utils import line_search 9 | 10 | logger = getLogger(__name__) 11 | 12 | 13 | class NMPC(Controller): 14 | def __init__(self, config, model): 15 | """ Nonlinear Model Predictive Control using pure gradient algorithm 16 | """ 17 | super(NMPC, self).__init__(config, model) 18 | 19 | # model 20 | self.model = model 21 | 22 | # get cost func 23 | self.state_cost_fn = config.state_cost_fn 24 | self.terminal_state_cost_fn = config.terminal_state_cost_fn 25 | self.input_cost_fn = config.input_cost_fn 26 | 27 | # controller parameters 28 | self.threshold = config.opt_config["NMPC"]["threshold"] 29 | self.max_iters = config.opt_config["NMPC"]["max_iters"] 30 | self.learning_rate = config.opt_config["NMPC"]["learning_rate"] 31 | self.optimizer_mode = config.opt_config["NMPC"]["optimizer_mode"] 32 | 33 | # general parameters 34 | self.pred_len = config.PRED_LEN 35 | self.input_size = config.INPUT_SIZE 36 | self.dt = config.DT 37 | 38 | # initialize 39 | self.prev_sol = np.zeros((self.pred_len, self.input_size)) 40 | 41 | def obtain_sol(self, curr_x, g_xs): 42 | """ calculate the optimal inputs 43 | Args: 44 | curr_x (numpy.ndarray): current state, shape(state_size, ) 45 | g_xs (numpy.ndarrya): goal trajectory, shape(plan_len, state_size) 46 | Returns: 47 | opt_input (numpy.ndarray): optimal input, shape(input_size, ) 48 | """ 49 | sol = self.prev_sol.copy() 50 | count = 0 51 | # use for Conjugate method 52 | conjugate_d = None 53 | conjugate_prev_d = None 54 | conjugate_s = None 55 | conjugate_beta = None 56 | 57 | while True: 58 | # shape(pred_len+1, state_size) 59 | pred_xs = self.model.predict_traj(curr_x, sol) 60 | # shape(pred_len, state_size) 61 | pred_lams = self.model.predict_adjoint_traj(pred_xs, sol, g_xs) 62 | 63 | F_hat = self.config.gradient_hamiltonian_input( 64 | pred_xs, pred_lams, sol, g_xs) 65 | 66 | if np.linalg.norm(F_hat) < self.threshold: 67 | break 68 | 69 | if count > self.max_iters: 70 | logger.debug(" break max iteartion at F : `{}".format( 71 | np.linalg.norm(F_hat))) 72 | break 73 | 74 | if self.optimizer_mode == "conjugate": 75 | conjugate_d = F_hat.flatten() 76 | 77 | if conjugate_prev_d is None: # initial 78 | conjugate_s = conjugate_d 79 | conjugate_prev_d = conjugate_d 80 | F_hat = conjugate_s.reshape(F_hat.shape) 81 | else: 82 | prev_d = np.dot(conjugate_prev_d, conjugate_prev_d) 83 | d = np.dot(conjugate_d, conjugate_d - conjugate_prev_d) 84 | conjugate_beta = (d + 1e-6) / (prev_d + 1e-6) 85 | 86 | conjugate_s = conjugate_d + conjugate_beta * conjugate_s 87 | conjugate_prev_d = conjugate_d 88 | F_hat = conjugate_s.reshape(F_hat.shape) 89 | 90 | def compute_eval_val(u): 91 | pred_xs = self.model.predict_traj(curr_x, u) 92 | state_cost = np.sum(self.config.state_cost_fn( 93 | pred_xs[1:-1], g_xs[1:-1])) 94 | input_cost = np.sum(self.config.input_cost_fn(u)) 95 | terminal_cost = np.sum( 96 | self.config.terminal_state_cost_fn(pred_xs[-1], g_xs[-1])) 97 | return state_cost + input_cost + terminal_cost 98 | 99 | alpha = line_search(F_hat, sol, 100 | compute_eval_val, init_alpha=self.learning_rate) 101 | 102 | sol -= alpha * F_hat 103 | count += 1 104 | 105 | # update us for next optimization 106 | self.prev_sol = np.concatenate( 107 | (sol[1:], np.zeros((1, self.input_size))), axis=0) 108 | 109 | return sol[0] 110 | -------------------------------------------------------------------------------- /PythonLinearNonlinearControl/controllers/mppi.py: -------------------------------------------------------------------------------- 1 | from logging import getLogger 2 | 3 | import numpy as np 4 | import scipy.stats as stats 5 | 6 | from .controller import Controller 7 | from ..envs.cost import calc_cost 8 | 9 | logger = getLogger(__name__) 10 | 11 | 12 | class MPPI(Controller): 13 | """ Model Predictive Path Integral for linear and nonlinear method 14 | 15 | Attributes: 16 | history_u (list[numpy.ndarray]): time history of optimal input 17 | Ref: 18 | Nagabandi, A., Konoglie, K., Levine, S., & Kumar, V. (2019). 19 | Deep Dynamics Models for Learning Dexterous Manipulation. 20 | arXiv preprint arXiv:1909.11652. 21 | """ 22 | 23 | def __init__(self, config, model): 24 | super(MPPI, self).__init__(config, model) 25 | 26 | # model 27 | self.model = model 28 | 29 | # general parameters 30 | self.pred_len = config.PRED_LEN 31 | self.input_size = config.INPUT_SIZE 32 | 33 | # mppi parameters 34 | self.beta = config.opt_config["MPPI"]["beta"] 35 | self.pop_size = config.opt_config["MPPI"]["popsize"] 36 | self.kappa = config.opt_config["MPPI"]["kappa"] 37 | self.noise_sigma = config.opt_config["MPPI"]["noise_sigma"] 38 | self.opt_dim = self.input_size * self.pred_len 39 | 40 | # get bound 41 | self.input_upper_bounds = np.tile(config.INPUT_UPPER_BOUND, 42 | (self.pred_len, 1)) 43 | self.input_lower_bounds = np.tile(config.INPUT_LOWER_BOUND, 44 | (self.pred_len, 1)) 45 | 46 | # get cost func 47 | self.state_cost_fn = config.state_cost_fn 48 | self.terminal_state_cost_fn = config.terminal_state_cost_fn 49 | self.input_cost_fn = config.input_cost_fn 50 | 51 | # init mean 52 | self.prev_sol = np.tile((config.INPUT_UPPER_BOUND 53 | + config.INPUT_LOWER_BOUND) / 2., 54 | self.pred_len) 55 | self.prev_sol = self.prev_sol.reshape(self.pred_len, self.input_size) 56 | 57 | # save 58 | self.history_u = [np.zeros(self.input_size)] 59 | 60 | def clear_sol(self): 61 | """ clear prev sol 62 | """ 63 | logger.debug("Clear Solution") 64 | self.prev_sol = \ 65 | (self.input_upper_bounds + self.input_lower_bounds) / 2. 66 | self.prev_sol = self.prev_sol.reshape(self.pred_len, self.input_size) 67 | 68 | def obtain_sol(self, curr_x, g_xs): 69 | """ calculate the optimal inputs 70 | 71 | Args: 72 | curr_x (numpy.ndarray): current state, shape(state_size, ) 73 | g_xs (numpy.ndarrya): goal trajectory, shape(plan_len, state_size) 74 | Returns: 75 | opt_input (numpy.ndarray): optimal input, shape(input_size, ) 76 | """ 77 | # get noised inputs 78 | noise = np.random.normal( 79 | loc=0, scale=1.0, size=(self.pop_size, self.pred_len, 80 | self.input_size)) * self.noise_sigma 81 | noised_inputs = noise.copy() 82 | 83 | for t in range(self.pred_len): 84 | if t > 0: 85 | noised_inputs[:, t, :] = self.beta \ 86 | * (self.prev_sol[t, :] 87 | + noise[:, t, :]) \ 88 | + (1 - self.beta) \ 89 | * noised_inputs[:, t-1, :] 90 | else: 91 | noised_inputs[:, t, :] = self.beta \ 92 | * (self.prev_sol[t, :] 93 | + noise[:, t, :]) \ 94 | + (1 - self.beta) \ 95 | * self.history_u[-1] 96 | 97 | # clip actions 98 | noised_inputs = np.clip( 99 | noised_inputs, self.input_lower_bounds, self.input_upper_bounds) 100 | 101 | # calc cost 102 | costs = self.calc_cost(curr_x, noised_inputs, g_xs) 103 | rewards = -costs 104 | 105 | # mppi update 106 | # normalize and get sum of reward 107 | # exp_rewards.shape = (N, ) 108 | exp_rewards = np.exp(self.kappa * (rewards - np.max(rewards))) 109 | denom = np.sum(exp_rewards) + 1e-10 # avoid numeric error 110 | 111 | # weight actions 112 | weighted_inputs = exp_rewards[:, np.newaxis, np.newaxis] \ 113 | * noised_inputs 114 | sol = np.sum(weighted_inputs, 0) / denom 115 | 116 | # update 117 | self.prev_sol[:-1] = sol[1:] 118 | self.prev_sol[-1] = sol[-1] # last use the terminal input 119 | 120 | # log 121 | self.history_u.append(sol[0]) 122 | 123 | return sol[0] 124 | 125 | def __str__(self): 126 | return "MPPI" 127 | -------------------------------------------------------------------------------- /PythonLinearNonlinearControl/helper.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import datetime 3 | import json 4 | import os 5 | import sys 6 | import six 7 | import pickle 8 | from logging import DEBUG, basicConfig, getLogger, FileHandler, StreamHandler, Formatter, Logger 9 | 10 | 11 | def make_logger(save_dir): 12 | """ 13 | Args: 14 | save_dir (str): save directory 15 | """ 16 | # base config setting 17 | basicConfig( 18 | format='[%(asctime)s] %(name)s %(levelname)s: %(message)s', 19 | datefmt='%Y-%m-%d %H:%M:%S' 20 | ) 21 | 22 | # mypackage log level 23 | logger = getLogger("PythonLinearNonlinearControl") 24 | logger.setLevel(DEBUG) 25 | 26 | # file handler 27 | log_path = os.path.join(save_dir, "log.txt") 28 | file_handler = FileHandler(log_path) 29 | file_handler.setLevel(DEBUG) 30 | file_handler.setFormatter(Formatter('%(message)s')) 31 | logger.addHandler(file_handler) 32 | 33 | # sh handler 34 | # sh_handler = StreamHandler() 35 | # logger.addHandler(sh_handler) 36 | 37 | 38 | def int_tuple(s): 39 | """ transform str to tuple 40 | Args: 41 | s (str): strings that you want to change 42 | Returns: 43 | tuple 44 | """ 45 | return tuple(int(i) for i in s.split(',')) 46 | 47 | 48 | def bool_flag(s): 49 | """ transform str to bool flg 50 | Args: 51 | s (str): strings that you want to change 52 | """ 53 | if s == '1': 54 | return True 55 | elif s == '0': 56 | return False 57 | msg = 'Invalid value "%s" for bool flag (should be 0 or 1)' 58 | raise ValueError(msg % s) 59 | 60 | 61 | def file_exists(path): 62 | """ Check file existence on given path 63 | Args: 64 | path (str): path of the file to check existence 65 | Returns: 66 | file_existence (bool): True if file exists otherwise False 67 | """ 68 | return os.path.exists(path) 69 | 70 | 71 | def create_dir_if_not_exist(outdir): 72 | """ Check directory existence and creates new directory if not exist 73 | Args: 74 | outdir (str): path of the file to create directory 75 | RuntimeError: 76 | file exists in outdir but it is not a directory 77 | """ 78 | if file_exists(outdir): 79 | if not os.path.isdir(outdir): 80 | raise RuntimeError('{} is not a directory'.format(outdir)) 81 | else: 82 | return 83 | os.makedirs(outdir) 84 | 85 | 86 | def write_text_to_file(file_path, data): 87 | """ Write given text data to file 88 | Args: 89 | file_path (str): path of the file to write data 90 | data (str): text to write to the file 91 | """ 92 | with open(file_path, 'w') as f: 93 | f.write(data) 94 | 95 | 96 | def read_text_from_file(file_path): 97 | """ Read given file as text 98 | Args: 99 | file_path (str): path of the file to read data 100 | Returns 101 | data (str): text read from the file 102 | """ 103 | with open(file_path, 'r') as f: 104 | return f.read() 105 | 106 | 107 | def save_pickle(file_path, data): 108 | """ pickle given data to file 109 | Args: 110 | file_path (str): path of the file to pickle data 111 | data (): data to pickle 112 | """ 113 | with open(file_path, 'wb') as f: 114 | pickle.dump(data, f) 115 | 116 | 117 | def load_pickle(file_path): 118 | """ load pickled data from file 119 | Args: 120 | file_path (str): path of the file to load pickled data 121 | Returns: 122 | data (): data pickled in file 123 | """ 124 | with open(file_path, 'rb') as f: 125 | if six.PY2: 126 | return pickle.load(f) 127 | else: 128 | return pickle.load(f, encoding='bytes') 129 | 130 | 131 | def prepare_output_dir(base_dir, args, time_format='%Y-%m-%d-%H%M%S'): 132 | """ prepare a directory with current datetime as name. 133 | created directory contains the command and args when the script was called as text file. 134 | Args: 135 | base_dir (str): path of the directory to save data 136 | args (dict): arguments when the python script was called 137 | time_format (str): datetime format string for naming directory to save data 138 | Returns: 139 | out_dir (str): directory to save data 140 | """ 141 | time_str = datetime.datetime.now().strftime(time_format) 142 | outdir = os.path.join(base_dir, time_str) 143 | 144 | create_dir_if_not_exist(outdir) 145 | 146 | # Save all the arguments 147 | args_file_path = os.path.join(outdir, 'args.txt') 148 | if isinstance(args, argparse.Namespace): 149 | args = vars(args) 150 | write_text_to_file(args_file_path, json.dumps(args)) 151 | 152 | # Save the command 153 | argv_file_path = os.path.join(outdir, 'command.txt') 154 | argv = ' '.join(sys.argv) 155 | write_text_to_file(argv_file_path, argv) 156 | 157 | return outdir 158 | -------------------------------------------------------------------------------- /tests/configs/test_two_wheeled.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | 4 | from PythonLinearNonlinearControl.configs.two_wheeled \ 5 | import TwoWheeledConfigModule 6 | 7 | 8 | class TestCalcCost(): 9 | def test_calc_costs(self): 10 | # make config 11 | config = TwoWheeledConfigModule() 12 | # set 13 | pred_len = 5 14 | state_size = 3 15 | input_size = 2 16 | pop_size = 2 17 | pred_xs = np.ones((pop_size, pred_len, state_size)) 18 | g_xs = np.ones((pop_size, pred_len, state_size)) * 0.5 19 | input_samples = np.ones((pop_size, pred_len, input_size)) * 0.5 20 | 21 | costs = config.input_cost_fn(input_samples) 22 | expected_costs = np.ones((pop_size, pred_len, input_size))*0.5 23 | 24 | assert costs == pytest.approx(expected_costs**2 * np.diag(config.R)) 25 | 26 | costs = config.state_cost_fn(pred_xs, g_xs) 27 | expected_costs = np.ones((pop_size, pred_len, state_size))*0.5 28 | 29 | assert costs == pytest.approx(expected_costs**2 * np.diag(config.Q)) 30 | 31 | costs = config.terminal_state_cost_fn(pred_xs[:, -1, :], 32 | g_xs[:, -1, :]) 33 | expected_costs = np.ones((pop_size, state_size))*0.5 34 | 35 | assert costs == pytest.approx(expected_costs**2 * np.diag(config.Sf)) 36 | 37 | 38 | class TestGradient(): 39 | def test_state_gradient(self): 40 | """ 41 | """ 42 | xs = np.ones((1, 3)) 43 | g_xs = np.ones((1, 3)) * 0.5 44 | cost_grad =\ 45 | TwoWheeledConfigModule.gradient_cost_fn_state(xs, g_xs) 46 | 47 | # numeric grad 48 | eps = 1e-4 49 | expected_grad = np.zeros((1, 3)) 50 | for i in range(3): 51 | tmp_x = xs.copy() 52 | tmp_x[0, i] = xs[0, i] + eps 53 | forward = \ 54 | TwoWheeledConfigModule.state_cost_fn(tmp_x, g_xs) 55 | forward = np.sum(forward) 56 | tmp_x = xs.copy() 57 | tmp_x[0, i] = xs[0, i] - eps 58 | backward = \ 59 | TwoWheeledConfigModule.state_cost_fn(tmp_x, g_xs) 60 | backward = np.sum(backward) 61 | 62 | expected_grad[0, i] = (forward - backward) / (2. * eps) 63 | 64 | assert cost_grad == pytest.approx(expected_grad) 65 | 66 | def test_input_gradient(self): 67 | """ 68 | """ 69 | us = np.ones((1, 2)) 70 | cost_grad =\ 71 | TwoWheeledConfigModule.gradient_cost_fn_input(None, us) 72 | 73 | # numeric grad 74 | eps = 1e-4 75 | expected_grad = np.zeros((1, 2)) 76 | for i in range(2): 77 | tmp_u = us.copy() 78 | tmp_u[0, i] = us[0, i] + eps 79 | forward = \ 80 | TwoWheeledConfigModule.input_cost_fn(tmp_u) 81 | forward = np.sum(forward) 82 | tmp_u = us.copy() 83 | tmp_u[0, i] = us[0, i] - eps 84 | backward = \ 85 | TwoWheeledConfigModule.input_cost_fn(tmp_u) 86 | backward = np.sum(backward) 87 | 88 | expected_grad[0, i] = (forward - backward) / (2. * eps) 89 | 90 | assert cost_grad == pytest.approx(expected_grad) 91 | 92 | def test_state_hessian(self): 93 | """ 94 | """ 95 | g_xs = np.ones((1, 3)) * 0.5 96 | xs = np.ones((1, 3)) 97 | cost_hess =\ 98 | TwoWheeledConfigModule.hessian_cost_fn_state(xs, g_xs) 99 | 100 | # numeric grad 101 | eps = 1e-4 102 | expected_hess = np.zeros((1, 3, 3)) 103 | for i in range(3): 104 | tmp_x = xs.copy() 105 | tmp_x[0, i] = xs[0, i] + eps 106 | forward = \ 107 | TwoWheeledConfigModule.gradient_cost_fn_state( 108 | tmp_x, g_xs, terminal=False) 109 | tmp_x = xs.copy() 110 | tmp_x[0, i] = xs[0, i] - eps 111 | backward = \ 112 | TwoWheeledConfigModule.gradient_cost_fn_state( 113 | tmp_x, g_xs, terminal=False) 114 | 115 | expected_hess[0, :, i] = (forward - backward) / (2. * eps) 116 | 117 | assert cost_hess == pytest.approx(expected_hess) 118 | 119 | def test_input_hessian(self): 120 | """ 121 | """ 122 | us = np.ones((1, 2)) 123 | xs = np.ones((1, 3)) 124 | cost_hess = TwoWheeledConfigModule.hessian_cost_fn_input(us, xs) 125 | 126 | # numeric grad 127 | eps = 1e-4 128 | expected_hess = np.zeros((1, 2, 2)) 129 | for i in range(2): 130 | tmp_u = us.copy() 131 | tmp_u[0, i] = us[0, i] + eps 132 | forward = \ 133 | TwoWheeledConfigModule.gradient_cost_fn_input( 134 | xs, tmp_u) 135 | tmp_u = us.copy() 136 | tmp_u[0, i] = us[0, i] - eps 137 | backward = \ 138 | TwoWheeledConfigModule.gradient_cost_fn_input( 139 | xs, tmp_u) 140 | 141 | expected_hess[0, :, i] = (forward - backward) / (2. * eps) 142 | 143 | assert cost_hess == pytest.approx(expected_hess) 144 | -------------------------------------------------------------------------------- /PythonLinearNonlinearControl/common/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def rotate_pos(pos, angle): 5 | """ Transformation the coordinate in the angle 6 | 7 | Args: 8 | pos (numpy.ndarray): local state, shape(data_size, 2) 9 | angle (float): rotate angle, in radians 10 | Returns: 11 | rotated_pos (numpy.ndarray): shape(data_size, 2) 12 | """ 13 | rot_mat = np.array([[np.cos(angle), -np.sin(angle)], 14 | [np.sin(angle), np.cos(angle)]]) 15 | 16 | return np.dot(pos, rot_mat.T) 17 | 18 | 19 | def fit_angle_in_range(angles, min_angle=-np.pi, max_angle=np.pi): 20 | """ Check angle range and correct the range 21 | 22 | Args: 23 | angle (numpy.ndarray): in radians 24 | min_angle (float): maximum of range in radians, default -pi 25 | max_angle (float): minimum of range in radians, default pi 26 | Returns: 27 | fitted_angle (numpy.ndarray): range angle in radians 28 | """ 29 | if max_angle < min_angle: 30 | raise ValueError("max angle must be greater than min angle") 31 | if (max_angle - min_angle) < 2.0 * np.pi: 32 | raise ValueError("difference between max_angle \ 33 | and min_angle must be greater than 2.0 * pi") 34 | 35 | output = np.array(angles) 36 | output_shape = output.shape 37 | 38 | output = output.flatten() 39 | output -= min_angle 40 | output %= 2 * np.pi 41 | output += 2 * np.pi 42 | output %= 2 * np.pi 43 | output += min_angle 44 | 45 | output = np.minimum(max_angle, np.maximum(min_angle, output)) 46 | return output.reshape(output_shape) 47 | 48 | 49 | def update_state_with_Runge_Kutta(state, u, functions, dt=0.01, batch=True): 50 | """ update state in Runge Kutta methods 51 | Args: 52 | state (array-like): state of system 53 | u (array-like): input of system 54 | functions (list): update function of each state, 55 | each function will be called like func(state, u) 56 | We expect that this function returns differential of each state 57 | dt (float): float in seconds 58 | batch (bool): state and u is given by batch or not 59 | 60 | Returns: 61 | next_state (np.array): next state of system 62 | 63 | Notes: 64 | sample of function is as follows: 65 | 66 | def func_x(self, x_1, x_2, u): 67 | x_dot = (1. - x_1**2 - x_2**2) * x_2 - x_1 + u 68 | return x_dot 69 | 70 | Note that the function return x_dot. 71 | """ 72 | if not batch: 73 | state_size = len(state) 74 | assert state_size == len(functions), \ 75 | "Invalid functions length, You need to give the state size functions" 76 | 77 | k0 = np.zeros(state_size) 78 | k1 = np.zeros(state_size) 79 | k2 = np.zeros(state_size) 80 | k3 = np.zeros(state_size) 81 | 82 | for i, func in enumerate(functions): 83 | k0[i] = dt * func(state, u) 84 | 85 | for i, func in enumerate(functions): 86 | k1[i] = dt * func(state + k0 / 2., u) 87 | 88 | for i, func in enumerate(functions): 89 | k2[i] = dt * func(state + k1 / 2., u) 90 | 91 | for i, func in enumerate(functions): 92 | k3[i] = dt * func(state + k2, u) 93 | 94 | return state + (k0 + 2. * k1 + 2. * k2 + k3) / 6. 95 | 96 | else: 97 | batch_size, state_size = state.shape 98 | assert state_size == len(functions), \ 99 | "Invalid functions length, You need to give the state size functions" 100 | 101 | k0 = np.zeros((batch_size, state_size)) 102 | k1 = np.zeros((batch_size, state_size)) 103 | k2 = np.zeros((batch_size, state_size)) 104 | k3 = np.zeros((batch_size, state_size)) 105 | 106 | for i, func in enumerate(functions): 107 | k0[:, i] = dt * func(state, u) 108 | 109 | for i, func in enumerate(functions): 110 | k1[:, i] = dt * func(state + k0 / 2., u) 111 | 112 | for i, func in enumerate(functions): 113 | k2[:, i] = dt * func(state + k1 / 2., u) 114 | 115 | for i, func in enumerate(functions): 116 | k3[:, i] = dt * func(state + k2, u) 117 | 118 | return state + (k0 + 2. * k1 + 2. * k2 + k3) / 6. 119 | 120 | 121 | def line_search(grad, sol, compute_eval_val, 122 | init_alpha=0.001, max_iter=100, update_ratio=1.): 123 | """ line search 124 | Args: 125 | grad (numpy.ndarray): gradient 126 | sol (numpy.ndarray): sol 127 | compute_eval_val (numpy.ndarray): function to compute evaluation value 128 | 129 | Returns: 130 | alpha (float): result of line search 131 | """ 132 | assert grad.shape == sol.shape 133 | base_val = np.inf 134 | alpha = init_alpha 135 | original_sol = sol.copy() 136 | 137 | for _ in range(max_iter): 138 | updated_sol = original_sol - alpha * grad 139 | eval_val = compute_eval_val(updated_sol) 140 | 141 | if eval_val < base_val: 142 | alpha += init_alpha * update_ratio 143 | base_val = eval_val 144 | else: 145 | break 146 | 147 | return alpha 148 | -------------------------------------------------------------------------------- /PythonLinearNonlinearControl/controllers/cem.py: -------------------------------------------------------------------------------- 1 | from logging import getLogger 2 | 3 | import numpy as np 4 | import scipy.stats as stats 5 | 6 | from .controller import Controller 7 | from ..envs.cost import calc_cost 8 | 9 | logger = getLogger(__name__) 10 | 11 | 12 | class CEM(Controller): 13 | """ Cross Entropy Method for linear and nonlinear method 14 | 15 | Attributes: 16 | history_u (list[numpy.ndarray]): time history of optimal input 17 | Ref: 18 | Chua, K., Calandra, R., McAllister, R., & Levine, S. (2018). 19 | Deep reinforcement learning in a handful of trials 20 | using probabilistic dynamics models. 21 | In Advances in Neural Information Processing Systems (pp. 4754-4765). 22 | """ 23 | 24 | def __init__(self, config, model): 25 | super(CEM, self).__init__(config, model) 26 | 27 | # model 28 | self.model = model 29 | 30 | # general parameters 31 | self.pred_len = config.PRED_LEN 32 | self.input_size = config.INPUT_SIZE 33 | 34 | # cem parameters 35 | self.alpha = config.opt_config["CEM"]["alpha"] 36 | self.pop_size = config.opt_config["CEM"]["popsize"] 37 | self.max_iters = config.opt_config["CEM"]["max_iters"] 38 | self.num_elites = config.opt_config["CEM"]["num_elites"] 39 | self.epsilon = config.opt_config["CEM"]["threshold"] 40 | self.init_var = config.opt_config["CEM"]["init_var"] 41 | self.opt_dim = self.input_size * self.pred_len 42 | 43 | # get bound 44 | self.input_upper_bounds = np.tile(config.INPUT_UPPER_BOUND, 45 | self.pred_len) 46 | self.input_lower_bounds = np.tile(config.INPUT_LOWER_BOUND, 47 | self.pred_len) 48 | 49 | # get cost func 50 | self.state_cost_fn = config.state_cost_fn 51 | self.terminal_state_cost_fn = config.terminal_state_cost_fn 52 | self.input_cost_fn = config.input_cost_fn 53 | 54 | # init mean 55 | self.init_mean = np.tile((config.INPUT_UPPER_BOUND 56 | + config.INPUT_LOWER_BOUND) / 2., 57 | self.pred_len) 58 | self.prev_sol = self.init_mean.copy() 59 | # init variance 60 | var = np.ones_like(config.INPUT_UPPER_BOUND) \ 61 | * config.opt_config["CEM"]["init_var"] 62 | self.init_var = np.tile(var, self.pred_len) 63 | 64 | # save 65 | self.history_u = [] 66 | 67 | def clear_sol(self): 68 | """ clear prev sol 69 | """ 70 | logger.debug("Clear Sol") 71 | self.prev_sol = self.init_mean.copy() 72 | 73 | def obtain_sol(self, curr_x, g_xs): 74 | """ calculate the optimal inputs 75 | 76 | Args: 77 | curr_x (numpy.ndarray): current state, shape(state_size, ) 78 | g_xs (numpy.ndarrya): goal trajectory, shape(plan_len, state_size) 79 | Returns: 80 | opt_input (numpy.ndarray): optimal input, shape(input_size, ) 81 | """ 82 | # initialize 83 | opt_count = 0 84 | 85 | # get configuration 86 | mean = self.prev_sol.flatten().copy() 87 | var = self.init_var.flatten().copy() 88 | 89 | # make distribution 90 | X = stats.truncnorm(-1, 1, 91 | loc=np.zeros_like(mean), 92 | scale=np.ones_like(mean)) 93 | 94 | while (opt_count < self.max_iters) and np.max(var) > self.epsilon: 95 | # constrained 96 | lb_dist = mean - self.input_lower_bounds 97 | ub_dist = self.input_upper_bounds - mean 98 | constrained_var = np.minimum(np.minimum(np.square(lb_dist), 99 | np.square(ub_dist)), 100 | var) 101 | 102 | # sample 103 | samples = X.rvs(size=[self.pop_size, self.opt_dim]) \ 104 | * np.sqrt(constrained_var) \ 105 | + mean 106 | 107 | # calc cost 108 | # samples.shape = (pop_size, opt_dim) 109 | costs = self.calc_cost(curr_x, 110 | samples.reshape(self.pop_size, 111 | self.pred_len, 112 | self.input_size), 113 | g_xs) 114 | 115 | # sort cost 116 | elites = samples[np.argsort(costs)][:self.num_elites] 117 | 118 | # new mean 119 | new_mean = np.mean(elites, axis=0) 120 | new_var = np.var(elites, axis=0) 121 | 122 | # soft update 123 | mean = self.alpha * mean + (1. - self.alpha) * new_mean 124 | var = self.alpha * var + (1. - self.alpha) * new_var 125 | 126 | logger.debug("Var = {}".format(np.max(var))) 127 | logger.debug("Costs = {}".format(np.mean(costs))) 128 | opt_count += 1 129 | 130 | sol = mean.copy() 131 | self.prev_sol = np.concatenate((mean[self.input_size:], 132 | np.zeros(self.input_size))) 133 | 134 | return sol.reshape(self.pred_len, self.input_size).copy()[0] 135 | 136 | def __str__(self): 137 | return "CEM" 138 | -------------------------------------------------------------------------------- /PythonLinearNonlinearControl/controllers/mppi_williams.py: -------------------------------------------------------------------------------- 1 | from logging import getLogger 2 | 3 | import numpy as np 4 | import scipy.stats as stats 5 | 6 | from .controller import Controller 7 | from ..envs.cost import calc_cost 8 | 9 | logger = getLogger(__name__) 10 | 11 | 12 | class MPPIWilliams(Controller): 13 | """ Model Predictive Path Integral for linear and nonlinear method 14 | 15 | Attributes: 16 | history_u (list[numpy.ndarray]): time history of optimal input 17 | Ref: 18 | G. Williams et al., "Information theoretic MPC 19 | for model-based reinforcement learning," 20 | 2017 IEEE International Conference on Robotics and Automation (ICRA), 21 | Singapore, 2017, pp. 1714-1721. 22 | """ 23 | 24 | def __init__(self, config, model): 25 | super(MPPIWilliams, self).__init__(config, model) 26 | 27 | # model 28 | self.model = model 29 | 30 | # general parameters 31 | self.pred_len = config.PRED_LEN 32 | self.input_size = config.INPUT_SIZE 33 | 34 | # mppi parameters 35 | self.pop_size = config.opt_config["MPPIWilliams"]["popsize"] 36 | self.lam = config.opt_config["MPPIWilliams"]["lambda"] 37 | self.noise_sigma = config.opt_config["MPPIWilliams"]["noise_sigma"] 38 | self.opt_dim = self.input_size * self.pred_len 39 | 40 | # get bound 41 | self.input_upper_bounds = np.tile(config.INPUT_UPPER_BOUND, 42 | (self.pred_len, 1)) 43 | self.input_lower_bounds = np.tile(config.INPUT_LOWER_BOUND, 44 | (self.pred_len, 1)) 45 | 46 | # get cost func 47 | self.state_cost_fn = config.state_cost_fn 48 | self.terminal_state_cost_fn = config.terminal_state_cost_fn 49 | self.input_cost_fn = config.input_cost_fn 50 | 51 | # init mean 52 | self.prev_sol = np.tile((config.INPUT_UPPER_BOUND 53 | + config.INPUT_LOWER_BOUND) / 2., 54 | self.pred_len) 55 | self.prev_sol = self.prev_sol.reshape(self.pred_len, self.input_size) 56 | 57 | # save 58 | self.history_u = [np.zeros(self.input_size)] 59 | 60 | def clear_sol(self): 61 | """ clear prev sol 62 | """ 63 | logger.debug("Clear Solution") 64 | self.prev_sol = \ 65 | (self.input_upper_bounds + self.input_lower_bounds) / 2. 66 | self.prev_sol = self.prev_sol.reshape(self.pred_len, self.input_size) 67 | 68 | def calc_cost(self, curr_x, samples, g_xs): 69 | """ calculate the cost of input samples by using MPPI's eq 70 | 71 | Args: 72 | curr_x (numpy.ndarray): shape(state_size), 73 | current robot position 74 | samples (numpy.ndarray): shape(pop_size, opt_dim), 75 | input samples 76 | g_xs (numpy.ndarray): shape(pred_len, state_size), 77 | goal states 78 | Returns: 79 | costs (numpy.ndarray): shape(pop_size, ) 80 | """ 81 | # get size 82 | pop_size = samples.shape[0] 83 | g_xs = np.tile(g_xs, (pop_size, 1, 1)) 84 | 85 | # calc cost, pred_xs.shape = (pop_size, pred_len+1, state_size) 86 | pred_xs = self.model.predict_traj(curr_x, samples) 87 | 88 | # get particle cost 89 | costs = calc_cost(pred_xs, samples, g_xs, 90 | self.state_cost_fn, None, 91 | self.terminal_state_cost_fn) 92 | 93 | return costs 94 | 95 | def obtain_sol(self, curr_x, g_xs): 96 | """ calculate the optimal inputs 97 | 98 | Args: 99 | curr_x (numpy.ndarray): current state, shape(state_size, ) 100 | g_xs (numpy.ndarrya): goal trajectory, shape(plan_len, state_size) 101 | Returns: 102 | opt_input (numpy.ndarray): optimal input, shape(input_size, ) 103 | """ 104 | # get noised inputs 105 | noise = np.random.normal( 106 | loc=0, scale=1.0, size=(self.pop_size, self.pred_len, 107 | self.input_size)) * self.noise_sigma 108 | 109 | noised_inputs = self.prev_sol + noise 110 | 111 | # clip actions 112 | noised_inputs = np.clip( 113 | noised_inputs, self.input_lower_bounds, self.input_upper_bounds) 114 | 115 | # calc cost 116 | costs = self.calc_cost(curr_x, noised_inputs, g_xs) 117 | 118 | costs += np.sum(np.sum( 119 | self.lam * self.prev_sol * noise / self.noise_sigma, 120 | axis=-1), axis=-1) 121 | 122 | # mppi update 123 | beta = np.min(costs) 124 | eta = np.sum(np.exp(- 1. / self.lam * (costs - beta)), axis=0) \ 125 | + 1e-10 126 | 127 | # weight 128 | # eta.shape = (pred_len, input_size) 129 | weights = np.exp(- 1. / self.lam * (costs - beta)) / eta 130 | 131 | # update inputs 132 | sol = self.prev_sol \ 133 | + np.sum(weights[:, np.newaxis, np.newaxis] * noise, axis=0) 134 | 135 | # update 136 | self.prev_sol[:-1] = sol[1:] 137 | self.prev_sol[-1] = sol[-1] # last use the terminal input 138 | 139 | # log 140 | self.history_u.append(sol[0]) 141 | 142 | return sol[0] 143 | 144 | def __str__(self): 145 | return "MPPIWilliams" 146 | -------------------------------------------------------------------------------- /tests/configs/test_cartpole.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | 4 | from PythonLinearNonlinearControl.configs.cartpole \ 5 | import CartPoleConfigModule 6 | 7 | 8 | class TestCalcCost(): 9 | def test_calc_costs(self): 10 | # make config 11 | config = CartPoleConfigModule() 12 | # set 13 | pred_len = 5 14 | state_size = 4 15 | input_size = 1 16 | pop_size = 2 17 | pred_xs = np.ones((pop_size, pred_len, state_size)) 18 | g_xs = np.ones((pop_size, pred_len, state_size)) * 0.5 19 | input_samples = np.ones((pop_size, pred_len, input_size)) * 0.5 20 | 21 | costs = config.input_cost_fn(input_samples) 22 | 23 | assert costs.shape == (pop_size, pred_len, input_size) 24 | 25 | costs = config.state_cost_fn(pred_xs, g_xs) 26 | 27 | assert costs.shape == (pop_size, pred_len, 1) 28 | 29 | costs = config.terminal_state_cost_fn(pred_xs[:, -1, :], 30 | g_xs[:, -1, :]) 31 | 32 | assert costs.shape == (pop_size, 1) 33 | 34 | 35 | class TestGradient(): 36 | def test_state_gradient(self): 37 | """ 38 | """ 39 | xs = np.ones((1, 4)) 40 | cost_grad = CartPoleConfigModule.gradient_cost_fn_state(xs, None) 41 | 42 | # numeric grad 43 | eps = 1e-4 44 | expected_grad = np.zeros((1, 4)) 45 | for i in range(4): 46 | tmp_x = xs.copy() 47 | tmp_x[0, i] = xs[0, i] + eps 48 | forward = \ 49 | CartPoleConfigModule.state_cost_fn(tmp_x, None) 50 | tmp_x = xs.copy() 51 | tmp_x[0, i] = xs[0, i] - eps 52 | backward = \ 53 | CartPoleConfigModule.state_cost_fn(tmp_x, None) 54 | 55 | expected_grad[0, i] = (forward - backward) / (2. * eps) 56 | 57 | assert cost_grad == pytest.approx(expected_grad) 58 | 59 | def test_terminal_state_gradient(self): 60 | """ 61 | """ 62 | xs = np.ones(4) 63 | cost_grad =\ 64 | CartPoleConfigModule.gradient_cost_fn_state(xs, None, 65 | terminal=True) 66 | 67 | # numeric grad 68 | eps = 1e-4 69 | expected_grad = np.zeros((1, 4)) 70 | for i in range(4): 71 | tmp_x = xs.copy() 72 | tmp_x[i] = xs[i] + eps 73 | forward = \ 74 | CartPoleConfigModule.state_cost_fn(tmp_x, None) 75 | tmp_x = xs.copy() 76 | tmp_x[i] = xs[i] - eps 77 | backward = \ 78 | CartPoleConfigModule.state_cost_fn(tmp_x, None) 79 | 80 | expected_grad[0, i] = (forward - backward) / (2. * eps) 81 | 82 | assert cost_grad == pytest.approx(expected_grad) 83 | 84 | def test_input_gradient(self): 85 | """ 86 | """ 87 | us = np.ones((1, 1)) 88 | cost_grad = CartPoleConfigModule.gradient_cost_fn_input(None, us) 89 | 90 | # numeric grad 91 | eps = 1e-4 92 | expected_grad = np.zeros((1, 1)) 93 | for i in range(1): 94 | tmp_u = us.copy() 95 | tmp_u[0, i] = us[0, i] + eps 96 | forward = \ 97 | CartPoleConfigModule.input_cost_fn(tmp_u) 98 | tmp_u = us.copy() 99 | tmp_u[0, i] = us[0, i] - eps 100 | backward = \ 101 | CartPoleConfigModule.input_cost_fn(tmp_u) 102 | 103 | expected_grad[0, i] = (forward - backward) / (2. * eps) 104 | 105 | assert cost_grad == pytest.approx(expected_grad) 106 | 107 | def test_state_hessian(self): 108 | """ 109 | """ 110 | xs = np.ones((1, 4)) 111 | cost_hess = CartPoleConfigModule.hessian_cost_fn_state(xs, None) 112 | 113 | # numeric grad 114 | eps = 1e-4 115 | expected_hess = np.zeros((1, 4, 4)) 116 | for i in range(4): 117 | tmp_x = xs.copy() 118 | tmp_x[0, i] = xs[0, i] + eps 119 | forward = \ 120 | CartPoleConfigModule.gradient_cost_fn_state( 121 | tmp_x, None, terminal=False) 122 | tmp_x = xs.copy() 123 | tmp_x[0, i] = xs[0, i] - eps 124 | backward = \ 125 | CartPoleConfigModule.gradient_cost_fn_state( 126 | tmp_x, None, terminal=False) 127 | 128 | expected_hess[0, :, i] = (forward - backward) / (2. * eps) 129 | 130 | assert cost_hess == pytest.approx(expected_hess) 131 | 132 | def test_terminal_state_hessian(self): 133 | """ 134 | """ 135 | xs = np.ones(4) 136 | cost_hess =\ 137 | CartPoleConfigModule.hessian_cost_fn_state(xs, None, 138 | terminal=True) 139 | 140 | # numeric grad 141 | eps = 1e-4 142 | expected_hess = np.zeros((1, 4, 4)) 143 | for i in range(4): 144 | tmp_x = xs.copy() 145 | tmp_x[i] = xs[i] + eps 146 | forward = \ 147 | CartPoleConfigModule.gradient_cost_fn_state( 148 | tmp_x, None, terminal=True) 149 | tmp_x = xs.copy() 150 | tmp_x[i] = xs[i] - eps 151 | backward = \ 152 | CartPoleConfigModule.gradient_cost_fn_state( 153 | tmp_x, None, terminal=True) 154 | 155 | expected_hess[0, :, i] = (forward - backward) / (2. * eps) 156 | 157 | assert cost_hess == pytest.approx(expected_hess) 158 | 159 | def test_input_hessian(self): 160 | """ 161 | """ 162 | us = np.ones((1, 1)) 163 | xs = np.ones((1, 4)) 164 | cost_hess = CartPoleConfigModule.hessian_cost_fn_input(us, xs) 165 | 166 | # numeric grad 167 | eps = 1e-4 168 | expected_hess = np.zeros((1, 1, 1)) 169 | for i in range(1): 170 | tmp_u = us.copy() 171 | tmp_u[0, i] = us[0, i] + eps 172 | forward = \ 173 | CartPoleConfigModule.gradient_cost_fn_input( 174 | xs, tmp_u) 175 | tmp_u = us.copy() 176 | tmp_u[0, i] = us[0, i] - eps 177 | backward = \ 178 | CartPoleConfigModule.gradient_cost_fn_input( 179 | xs, tmp_u) 180 | 181 | expected_hess[0, :, i] = (forward - backward) / (2. * eps) 182 | 183 | assert cost_hess == pytest.approx(expected_hess) 184 | -------------------------------------------------------------------------------- /PythonLinearNonlinearControl/plotters/plot_func.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | 6 | from ..helper import save_pickle, load_pickle 7 | 8 | 9 | def plot_result(history, history_g=None, ylabel="x", 10 | save_dir="./result", name="state_history"): 11 | """ 12 | Args: 13 | history (numpy.ndarray): history, shape(iters, size) 14 | """ 15 | (iters, size) = history.shape 16 | for i in range(0, size, 3): 17 | 18 | figure = plt.figure() 19 | axis1 = figure.add_subplot(311) 20 | axis2 = figure.add_subplot(312) 21 | axis3 = figure.add_subplot(313) 22 | 23 | axis1.set_ylabel(ylabel + "_{}".format(i)) 24 | axis2.set_ylabel(ylabel + "_{}".format(i+1)) 25 | axis3.set_ylabel(ylabel + "_{}".format(i+2)) 26 | axis3.set_xlabel("time steps") 27 | 28 | # gt 29 | def plot(axis, history, history_g=None): 30 | axis.plot(range(iters), history, c="r", linewidth=3) 31 | if history_g is not None: 32 | axis.plot(range(iters), history_g, 33 | c="b", linewidth=3, label="goal") 34 | 35 | if i < size: 36 | plot(axis1, history[:, i], history_g=history_g[:, i]) 37 | if i+1 < size: 38 | plot(axis2, history[:, i+1], history_g=history_g[:, i+1]) 39 | if i+2 < size: 40 | plot(axis3, history[:, i+2], history_g=history_g[:, i+2]) 41 | 42 | # save 43 | if save_dir is not None: 44 | path = os.path.join(save_dir, name + "-{}".format(i)) 45 | else: 46 | path = name 47 | 48 | axis1.legend(ncol=1, bbox_to_anchor=(0., 1.02, 1., 0.102), loc=3) 49 | figure.savefig(path, bbox_inches="tight", pad_inches=0.05) 50 | 51 | 52 | def plot_results(history_x, history_u, history_g=None, args=None): 53 | """ 54 | 55 | Args: 56 | history_x (numpy.ndarray): history of state, shape(iters, state_size) 57 | history_u (numpy.ndarray): history of state, shape(iters, input_size) 58 | Returns: 59 | None 60 | """ 61 | env = "Env" 62 | controller_type = "controller" 63 | 64 | if args is not None: 65 | env = args.env 66 | controller_type = args.controller_type 67 | 68 | plot_result(history_x, history_g=history_g, ylabel="x", 69 | name=env + "-state_history", 70 | save_dir="./result/" + controller_type) 71 | plot_result(history_u, history_g=np.zeros_like(history_u), ylabel="u", 72 | name=env + "-input_history", 73 | save_dir="./result/" + controller_type) 74 | 75 | 76 | def save_plot_data(history_x, history_u, history_g=None, args=None): 77 | """ save plot data 78 | 79 | Args: 80 | history_x (numpy.ndarray): history of state, shape(iters, state_size) 81 | history_u (numpy.ndarray): history of state, shape(iters, input_size) 82 | Returns: 83 | None 84 | """ 85 | env = "Env" 86 | controller_type = "controller" 87 | 88 | if args is not None: 89 | env = args.env 90 | controller_type = args.controller_type 91 | 92 | path = os.path.join("./result/" + controller_type, 93 | env + "-history_x.pkl") 94 | save_pickle(path, history_x) 95 | 96 | path = os.path.join("./result/" + controller_type, 97 | env + "-history_u.pkl") 98 | save_pickle(path, history_u) 99 | 100 | path = os.path.join("./result/" + controller_type, 101 | env + "-history_g.pkl") 102 | save_pickle(path, history_g) 103 | 104 | 105 | def load_plot_data(env, controller_type, result_dir="./result"): 106 | """ 107 | Args: 108 | env (str): environments name 109 | controller_type (str): controller type 110 | result_dir (str): result directory 111 | Returns: 112 | history_x (numpy.ndarray): history of state, shape(iters, state_size) 113 | history_u (numpy.ndarray): history of state, shape(iters, input_size) 114 | history_g (numpy.ndarray): history of state, shape(iters, input_size) 115 | """ 116 | path = os.path.join("./result/" + controller_type, 117 | env + "-history_x.pkl") 118 | history_x = load_pickle(path) 119 | 120 | path = os.path.join("./result/" + controller_type, 121 | env + "-history_u.pkl") 122 | history_u = load_pickle(path) 123 | 124 | path = os.path.join("./result/" + controller_type, 125 | env + "-history_g.pkl") 126 | history_g = load_pickle(path) 127 | 128 | return history_x, history_u, history_g 129 | 130 | 131 | def plot_multi_result(histories, histories_g=None, labels=None, ylabel="x", 132 | save_dir="./result", name="state_history"): 133 | """ 134 | Args: 135 | history (numpy.ndarray): history, shape(iters, size) 136 | """ 137 | (_, iters, size) = histories.shape 138 | 139 | for i in range(0, size, 2): 140 | 141 | figure = plt.figure() 142 | axis1 = figure.add_subplot(211) 143 | axis2 = figure.add_subplot(212) 144 | 145 | axis1.set_ylabel(ylabel + "_{}".format(i)) 146 | axis2.set_ylabel(ylabel + "_{}".format(i+1)) 147 | axis2.set_xlabel("time steps") 148 | 149 | # gt 150 | def plot(axis, history, history_g=None, label=""): 151 | axis.plot(range(iters), history, 152 | linewidth=3, label=label, alpha=0.7, linestyle="dashed") 153 | if history_g is not None: 154 | axis.plot(range(iters), history_g, 155 | c="b", linewidth=3) 156 | 157 | if i < size: 158 | for j, (history, history_g) \ 159 | in enumerate(zip(histories, histories_g)): 160 | plot(axis1, history[:, i], 161 | history_g=history_g[:, i], label=labels[j]) 162 | if i+1 < size: 163 | for j, (history, history_g) in \ 164 | enumerate(zip(histories, histories_g)): 165 | plot(axis2, history[:, i+1], 166 | history_g=history_g[:, i+1], label=labels[j]) 167 | 168 | # save 169 | if save_dir is not None: 170 | path = os.path.join(save_dir, name + "-{}".format(i)) 171 | else: 172 | path = name 173 | 174 | axis1.legend(ncol=3, bbox_to_anchor=(0., 1.02, 1., 0.102), loc=3) 175 | figure.savefig(path, bbox_inches="tight", pad_inches=0.05) 176 | -------------------------------------------------------------------------------- /PythonLinearNonlinearControl/controllers/nmpc_cgmres.py: -------------------------------------------------------------------------------- 1 | from logging import getLogger 2 | 3 | import numpy as np 4 | import scipy.stats as stats 5 | 6 | from .controller import Controller 7 | from ..envs.cost import calc_cost 8 | from ..common.utils import line_search 9 | 10 | logger = getLogger(__name__) 11 | 12 | 13 | class NMPCCGMRES(Controller): 14 | def __init__(self, config, model): 15 | """ Nonlinear Model Predictive Control using cgmres 16 | """ 17 | super(NMPCCGMRES, self).__init__(config, model) 18 | 19 | # model 20 | self.model = model 21 | 22 | # get cost func 23 | self.state_cost_fn = config.state_cost_fn 24 | self.terminal_state_cost_fn = config.terminal_state_cost_fn 25 | self.input_cost_fn = config.input_cost_fn 26 | 27 | # general parameters 28 | self.pred_len = config.PRED_LEN 29 | self.input_size = config.INPUT_SIZE 30 | self.dt = config.DT 31 | 32 | # controller parameters 33 | self.threshold = config.opt_config["NMPCCGMRES"]["threshold"] 34 | self.zeta = config.opt_config["NMPCCGMRES"]["zeta"] 35 | self.delta = config.opt_config["NMPCCGMRES"]["delta"] 36 | self.alpha = config.opt_config["NMPCCGMRES"]["alpha"] 37 | self.tf = config.opt_config["NMPCCGMRES"]["tf"] 38 | self.divide_num = config.PRED_LEN 39 | self.with_constraint = config.opt_config["NMPCCGMRES"]["constraint"] 40 | if not self.with_constraint: 41 | raise NotImplementedError 42 | # 3 means u, dummy_u, raw 43 | self.max_iters = 3 * self.input_size * self.divide_num 44 | 45 | # initialize 46 | self.prev_sol = np.zeros((self.pred_len, self.input_size)) 47 | self.opt_count = 1 48 | # add smaller than constraints value 49 | input_constraint = np.abs([config.INPUT_LOWER_BOUND]) 50 | self.prev_dummy_sol = np.ones( 51 | (self.pred_len, self.input_size)) * input_constraint - 1e-3 52 | # add bigger than 0.01 to avoid computational error 53 | self.prev_raw = np.zeros( 54 | (self.pred_len, self.input_size)) + 0.01 + 1e-3 55 | 56 | def _compute_f(self, curr_x, sol, g_xs, dummy_sol=None, raw=None): 57 | # shape(pred_len+1, state_size) 58 | pred_xs = self.model.predict_traj(curr_x, sol) 59 | # shape(pred_len, state_size) 60 | pred_lams = self.model.predict_adjoint_traj(pred_xs, sol, g_xs) 61 | 62 | if self.with_constraint: 63 | F = self.config.gradient_hamiltonian_input_with_constraint( 64 | pred_xs, pred_lams, sol, g_xs, dummy_sol, raw) 65 | 66 | return F 67 | else: 68 | raise NotImplementedError 69 | 70 | def obtain_sol(self, curr_x, g_xs): 71 | """ calculate the optimal inputs 72 | Args: 73 | curr_x (numpy.ndarray): current state, shape(state_size, ) 74 | g_xs (numpy.ndarrya): goal trajectory, shape(plan_len, state_size) 75 | Returns: 76 | opt_input (numpy.ndarray): optimal input, shape(input_size, ) 77 | """ 78 | sol = self.prev_sol.copy() 79 | dummy_sol = self.prev_dummy_sol.copy() 80 | raw = self.prev_raw.copy() 81 | 82 | # compute delta t 83 | time = self.dt * self.opt_count 84 | dt = self.tf * (1. - np.exp(-self.alpha * time)) / \ 85 | float(self.divide_num) 86 | self.model.dt = dt 87 | 88 | # compute Fxt 89 | x_dot = self.model.x_dot(curr_x, sol[0]) 90 | dx = x_dot * self.delta 91 | Fxt = self._compute_f(curr_x+dx, sol, g_xs, dummy_sol, raw).flatten() 92 | 93 | # compute F 94 | F = self._compute_f(curr_x, sol, g_xs, dummy_sol, raw).flatten() 95 | right = - self.zeta * F - ((Fxt - F) / self.delta) 96 | 97 | # compute Fuxt 98 | du = sol * self.delta 99 | ddummy_u = dummy_sol * self.delta 100 | draw = raw * self.delta 101 | Fuxt = self._compute_f(curr_x+dx, sol+du, g_xs, 102 | dummy_sol+ddummy_u, raw+draw).flatten() 103 | left = ((Fuxt - Fxt) / self.delta) 104 | 105 | r0 = right - left 106 | r0_norm = np.linalg.norm(r0) 107 | 108 | vs = np.zeros((self.max_iters, self.max_iters + 1)) 109 | vs[:, 0] = r0 / r0_norm 110 | hs = np.zeros((self.max_iters + 1, self.max_iters + 1)) 111 | e = np.zeros((self.max_iters + 1, 1)) 112 | e[0] = 1. 113 | 114 | for i in range(self.max_iters): 115 | reshaped_vs = vs.reshape( 116 | (self.divide_num, 3, self.input_size, self.max_iters+1)) 117 | du = reshaped_vs[:, 0, :, i] * self.delta 118 | ddummy_u = reshaped_vs[:, 1, :, i] * self.delta 119 | draw = reshaped_vs[:, 2, :, i] * self.delta 120 | 121 | Fuxt = self._compute_f( 122 | curr_x+dx, sol+du, g_xs, dummy_sol+ddummy_u, raw+draw).flatten() 123 | Av = ((Fuxt - Fxt) / self.delta) 124 | 125 | sum_Av = np.zeros(self.max_iters) 126 | 127 | for j in range(i + 1): 128 | hs[j, i] = np.dot(Av, vs[:, j]) 129 | sum_Av = sum_Av + hs[j, i] * vs[:, j] 130 | 131 | v_est = Av - sum_Av 132 | 133 | hs[i+1, i] = np.linalg.norm(v_est) 134 | 135 | vs[:, i+1] = v_est / hs[i+1, i] 136 | 137 | inv_hs = np.linalg.pinv(hs[:i+1, :i]) 138 | ys = np.dot(inv_hs, r0_norm * e[:i+1]) 139 | 140 | judge_value = r0_norm * e[:i+1] - np.dot(hs[:i+1, :i], ys[:i]) 141 | 142 | if np.linalg.norm(judge_value) < self.threshold or i == self.max_iters-1: 143 | update_value = np.dot(vs[:, :i-1], ys_pre[:i-1]).flatten() 144 | 145 | update_value = update_value.reshape( 146 | (self.divide_num, 3, self.input_size)) 147 | du_new = du + update_value[:, 0, :] 148 | ddummy_u_new = ddummy_u + update_value[:, 1, :] 149 | draw_new = draw + update_value[:, 2, :] 150 | break 151 | 152 | ys_pre = ys 153 | 154 | sol += du_new * self.delta 155 | dummy_sol += ddummy_u_new * self.delta 156 | raw += draw_new * self.delta 157 | 158 | F = self._compute_f(curr_x, sol, g_xs, dummy_sol, raw) 159 | logger.debug("check F = {0}".format(np.linalg.norm(F))) 160 | 161 | self.prev_sol = sol.copy() 162 | self.prev_dummy_sol = dummy_sol.copy() 163 | self.prev_raw = raw.copy() 164 | 165 | self.opt_count += 1 166 | 167 | return sol[0] 168 | -------------------------------------------------------------------------------- /PythonLinearNonlinearControl/envs/cartpole.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from matplotlib.axes import Axes 3 | 4 | from .env import Env 5 | from ..plotters.plot_objs import square 6 | 7 | 8 | class CartPoleEnv(Env): 9 | """ Cartpole Environment 10 | 11 | Ref : 12 | https://ocw.mit.edu/courses/ 13 | electrical-engineering-and-computer-science/ 14 | 6-832-underactuated-robotics-spring-2009/readings/ 15 | MIT6_832s09_read_ch03.pdf 16 | """ 17 | 18 | def __init__(self): 19 | """ 20 | """ 21 | self.config = {"state_size": 4, 22 | "input_size": 1, 23 | "dt": 0.02, 24 | "max_step": 500, 25 | "input_lower_bound": [-3.], 26 | "input_upper_bound": [3.], 27 | "mp": 0.2, 28 | "mc": 1., 29 | "l": 0.5, 30 | "g": 9.81, 31 | "cart_size": (0.15, 0.1), 32 | } 33 | 34 | super(CartPoleEnv, self).__init__(self.config) 35 | 36 | def reset(self, init_x=None): 37 | """ reset state 38 | 39 | Returns: 40 | init_x (numpy.ndarray): initial state, shape(state_size, ) 41 | info (dict): information 42 | """ 43 | self.step_count = 0 44 | 45 | theta = np.random.randn(1) 46 | self.curr_x = np.array([0., 0., theta[0], 0.]) 47 | 48 | if init_x is not None: 49 | self.curr_x = init_x 50 | 51 | # goal 52 | self.g_x = np.array([0., 0., -np.pi, 0.]) 53 | 54 | # clear memory 55 | self.history_x = [] 56 | self.history_g_x = [] 57 | 58 | return self.curr_x, {"goal_state": self.g_x} 59 | 60 | def step(self, u): 61 | """ step environments 62 | 63 | Args: 64 | u (numpy.ndarray) : input, shape(input_size, ) 65 | Returns: 66 | next_x (numpy.ndarray): next state, shape(state_size, ) 67 | cost (float): costs 68 | done (bool): end the simulation or not 69 | info (dict): information 70 | """ 71 | # clip action 72 | if self.config["input_lower_bound"] is not None: 73 | u = np.clip(u, 74 | self.config["input_lower_bound"], 75 | self.config["input_upper_bound"]) 76 | 77 | # step 78 | # x 79 | d_x0 = self.curr_x[1] 80 | # v_x 81 | d_x1 = (u[0] + self.config["mp"] * np.sin(self.curr_x[2]) 82 | * (self.config["l"] * (self.curr_x[3]**2) 83 | + self.config["g"] * np.cos(self.curr_x[2]))) \ 84 | / (self.config["mc"] + self.config["mp"] 85 | * (np.sin(self.curr_x[2])**2)) 86 | # theta 87 | d_x2 = self.curr_x[3] 88 | 89 | # v_theta 90 | d_x3 = (-u[0] * np.cos(self.curr_x[2]) 91 | - self.config["mp"] * self.config["l"] * (self.curr_x[3]**2) 92 | * np.cos(self.curr_x[2]) * np.sin(self.curr_x[2]) 93 | - (self.config["mc"] + self.config["mp"]) * self.config["g"] 94 | * np.sin(self.curr_x[2])) \ 95 | / (self.config["l"] * (self.config["mc"] + self.config["mp"] 96 | * (np.sin(self.curr_x[2])**2))) 97 | 98 | next_x = self.curr_x +\ 99 | np.array([d_x0, d_x1, d_x2, d_x3]) * self.config["dt"] 100 | 101 | # TODO: costs 102 | costs = 0. 103 | costs += 0.1 * np.sum(u**2) 104 | costs += 6. * self.curr_x[0]**2 \ 105 | + 12. * (np.cos(self.curr_x[2]) + 1.)**2 \ 106 | + 0.1 * self.curr_x[1]**2 \ 107 | + 0.1 * self.curr_x[3]**2 108 | 109 | # save history 110 | self.history_x.append(next_x.flatten()) 111 | self.history_g_x.append(self.g_x.flatten()) 112 | 113 | # update 114 | self.curr_x = next_x.flatten().copy() 115 | # update costs 116 | self.step_count += 1 117 | 118 | return next_x.flatten(), costs, \ 119 | self.step_count > self.config["max_step"], \ 120 | {"goal_state": self.g_x} 121 | 122 | def plot_func(self, to_plot, i=None, history_x=None, history_g_x=None): 123 | """ plot cartpole object function 124 | 125 | Args: 126 | to_plot (axis or imgs): plotted objects 127 | i (int): frame count 128 | history_x (numpy.ndarray): history of state, shape(iters, state) 129 | history_g_x (numpy.ndarray): history of goal state, 130 | shape(iters, state) 131 | Returns: 132 | None or imgs : imgs order is ["cart_img", "pole_img"] 133 | """ 134 | if isinstance(to_plot, Axes): 135 | imgs = {} # create new imgs 136 | 137 | imgs["cart"] = to_plot.plot([], [], c="k")[0] 138 | imgs["pole"] = to_plot.plot([], [], c="k", linewidth=5)[0] 139 | imgs["center"] = to_plot.plot([], [], marker="o", c="k", 140 | markersize=10)[0] 141 | # centerline 142 | to_plot.plot(np.linspace(-1., 1., num=50), np.zeros(50), 143 | c="k", linestyle="dashed") 144 | 145 | # set axis 146 | to_plot.set_xlim([-1., 1.]) 147 | to_plot.set_ylim([-0.55, 1.5]) 148 | 149 | return imgs 150 | 151 | # set imgs 152 | cart_x, cart_y, pole_x, pole_y = \ 153 | self._plot_cartpole(history_x[i]) 154 | 155 | to_plot["cart"].set_data(cart_x, cart_y) 156 | to_plot["pole"].set_data(pole_x, pole_y) 157 | to_plot["center"].set_data(history_x[i][0], 0.) 158 | 159 | def _plot_cartpole(self, curr_x): 160 | """ plot cartpole fucntions 161 | 162 | Args: 163 | curr_x (numpy.ndarray): current catpole state 164 | Returns: 165 | cart_x (numpy.ndarray): x data of cart 166 | cart_y (numpy.ndarray): y data of cart 167 | pole_x (numpy.ndarray): x data of pole 168 | pole_y (numpy.ndarray): y data of pole 169 | """ 170 | # cart 171 | cart_x, cart_y = square(curr_x[0], 0., 172 | self.config["cart_size"], 0.) 173 | 174 | # pole 175 | pole_x = np.array([curr_x[0], curr_x[0] + self.config["l"] 176 | * np.cos(curr_x[2]-np.pi/2)]) 177 | pole_y = np.array([0., self.config["l"] 178 | * np.sin(curr_x[2]-np.pi/2)]) 179 | 180 | return cart_x, cart_y, pole_x, pole_y 181 | -------------------------------------------------------------------------------- /PythonLinearNonlinearControl/configs/first_order_lag.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class FirstOrderLagConfigModule(): 5 | # parameters 6 | ENV_NAME = "FirstOrderLag-v0" 7 | TYPE = "Linear" 8 | TASK_HORIZON = 1000 9 | PRED_LEN = 50 10 | STATE_SIZE = 4 11 | INPUT_SIZE = 2 12 | DT = 0.05 13 | # cost parameters 14 | R = np.eye(INPUT_SIZE) 15 | Q = np.eye(STATE_SIZE) 16 | Sf = np.eye(STATE_SIZE) 17 | # bounds 18 | INPUT_LOWER_BOUND = np.array([-0.5, -0.5]) 19 | INPUT_UPPER_BOUND = np.array([0.5, 0.5]) 20 | # DT_INPUT_LOWER_BOUND = np.array([-0.5 * DT, -0.5 * DT]) 21 | # DT_INPUT_UPPER_BOUND = np.array([0.25 * DT, 0.25 * DT]) 22 | DT_INPUT_LOWER_BOUND = None 23 | DT_INPUT_UPPER_BOUND = None 24 | 25 | def __init__(self): 26 | """ 27 | """ 28 | # opt configs 29 | self.opt_config = { 30 | "Random": { 31 | "popsize": 5000 32 | }, 33 | "CEM": { 34 | "popsize": 500, 35 | "num_elites": 50, 36 | "max_iters": 15, 37 | "alpha": 0.3, 38 | "init_var": 1., 39 | "threshold": 0.001 40 | }, 41 | "MPPI": { 42 | "beta": 0.6, 43 | "popsize": 5000, 44 | "kappa": 0.9, 45 | "noise_sigma": 0.5, 46 | }, 47 | "MPPIWilliams": { 48 | "popsize": 5000, 49 | "lambda": 1., 50 | "noise_sigma": 0.9, 51 | }, 52 | "MPC": { 53 | }, 54 | "iLQR": { 55 | "max_iters": 500, 56 | "init_mu": 1., 57 | "mu_min": 1e-6, 58 | "mu_max": 1e10, 59 | "init_delta": 2., 60 | "threshold": 1e-6, 61 | }, 62 | "DDP": { 63 | "max_iters": 500, 64 | "init_mu": 1., 65 | "mu_min": 1e-6, 66 | "mu_max": 1e10, 67 | "init_delta": 2., 68 | "threshold": 1e-6, 69 | }, 70 | "NMPC-CGMRES": { 71 | }, 72 | "NMPC-Newton": { 73 | }, 74 | } 75 | 76 | @staticmethod 77 | def input_cost_fn(u): 78 | """ input cost functions 79 | Args: 80 | u (numpy.ndarray): input, shape(pred_len, input_size) 81 | or shape(pop_size, pred_len, input_size) 82 | Returns: 83 | cost (numpy.ndarray): cost of input, shape(pred_len, input_size) or 84 | shape(pop_size, pred_len, input_size) 85 | """ 86 | return (u**2) * np.diag(FirstOrderLagConfigModule.R) 87 | 88 | @staticmethod 89 | def state_cost_fn(x, g_x): 90 | """ state cost function 91 | Args: 92 | x (numpy.ndarray): state, shape(pred_len, state_size) 93 | or shape(pop_size, pred_len, state_size) 94 | g_x (numpy.ndarray): goal state, shape(pred_len, state_size) 95 | or shape(pop_size, pred_len, state_size) 96 | Returns: 97 | cost (numpy.ndarray): cost of state, shape(pred_len, state_size) or 98 | shape(pop_size, pred_len, state_size) 99 | """ 100 | return ((x - g_x)**2) * np.diag(FirstOrderLagConfigModule.Q) 101 | 102 | @staticmethod 103 | def terminal_state_cost_fn(terminal_x, terminal_g_x): 104 | """ 105 | Args: 106 | terminal_x (numpy.ndarray): terminal state, 107 | shape(state_size, ) or shape(pop_size, state_size) 108 | terminal_g_x (numpy.ndarray): terminal goal state, 109 | shape(state_size, ) or shape(pop_size, state_size) 110 | Returns: 111 | cost (numpy.ndarray): cost of state, shape(pred_len, ) or 112 | shape(pop_size, pred_len) 113 | """ 114 | return ((terminal_x - terminal_g_x)**2) \ 115 | * np.diag(FirstOrderLagConfigModule.Sf) 116 | 117 | @staticmethod 118 | def gradient_cost_fn_state(x, g_x, terminal=False): 119 | """ gradient of costs with respect to the state 120 | 121 | Args: 122 | x (numpy.ndarray): state, shape(pred_len, state_size) 123 | g_x (numpy.ndarray): goal state, shape(pred_len, state_size) 124 | 125 | Returns: 126 | l_x (numpy.ndarray): gradient of cost, shape(pred_len, state_size) 127 | or shape(1, state_size) 128 | """ 129 | if not terminal: 130 | return 2. * (x - g_x) * np.diag(FirstOrderLagConfigModule.Q) 131 | 132 | return (2. * (x - g_x) 133 | * np.diag(FirstOrderLagConfigModule.Sf))[np.newaxis, :] 134 | 135 | @staticmethod 136 | def gradient_cost_fn_input(x, u): 137 | """ gradient of costs with respect to the input 138 | 139 | Args: 140 | x (numpy.ndarray): state, shape(pred_len, state_size) 141 | u (numpy.ndarray): goal state, shape(pred_len, input_size) 142 | 143 | Returns: 144 | l_u (numpy.ndarray): gradient of cost, shape(pred_len, input_size) 145 | """ 146 | return 2. * u * np.diag(FirstOrderLagConfigModule.R) 147 | 148 | @staticmethod 149 | def hessian_cost_fn_state(x, g_x, terminal=False): 150 | """ hessian costs with respect to the state 151 | 152 | Args: 153 | x (numpy.ndarray): state, shape(pred_len, state_size) 154 | g_x (numpy.ndarray): goal state, shape(pred_len, state_size) 155 | 156 | Returns: 157 | l_xx (numpy.ndarray): gradient of cost, 158 | shape(pred_len, state_size, state_size) or 159 | shape(1, state_size, state_size) or 160 | """ 161 | if not terminal: 162 | (pred_len, _) = x.shape 163 | return np.tile(2.*FirstOrderLagConfigModule.Q, (pred_len, 1, 1)) 164 | 165 | return np.tile(2.*FirstOrderLagConfigModule.Sf, (1, 1, 1)) 166 | 167 | @staticmethod 168 | def hessian_cost_fn_input(x, u): 169 | """ hessian costs with respect to the input 170 | 171 | Args: 172 | x (numpy.ndarray): state, shape(pred_len, state_size) 173 | u (numpy.ndarray): goal state, shape(pred_len, input_size) 174 | 175 | Returns: 176 | l_uu (numpy.ndarray): gradient of cost, 177 | shape(pred_len, input_size, input_size) 178 | """ 179 | (pred_len, _) = u.shape 180 | 181 | return np.tile(2.*FirstOrderLagConfigModule.R, (pred_len, 1, 1)) 182 | 183 | @staticmethod 184 | def hessian_cost_fn_input_state(x, u): 185 | """ hessian costs with respect to the state and input 186 | 187 | Args: 188 | x (numpy.ndarray): state, shape(pred_len, state_size) 189 | u (numpy.ndarray): goal state, shape(pred_len, input_size) 190 | 191 | Returns: 192 | l_ux (numpy.ndarray): gradient of cost , 193 | shape(pred_len, input_size, state_size) 194 | """ 195 | (_, state_size) = x.shape 196 | (pred_len, input_size) = u.shape 197 | 198 | return np.zeros((pred_len, input_size, state_size)) 199 | -------------------------------------------------------------------------------- /PythonLinearNonlinearControl/models/nonlinear_sample_system.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from .model import Model 4 | from ..common.utils import update_state_with_Runge_Kutta 5 | 6 | 7 | class NonlinearSampleSystemModel(Model): 8 | """ nonlinear sample system model 9 | """ 10 | 11 | def __init__(self, config): 12 | """ 13 | """ 14 | super(NonlinearSampleSystemModel, self).__init__() 15 | self.dt = config.DT 16 | self.gradient_hamiltonian_state = config.gradient_hamiltonian_state 17 | self.gradient_hamiltonian_input = config.gradient_hamiltonian_input 18 | self.gradient_cost_fn_state = config.gradient_cost_fn_state 19 | 20 | def predict_next_state(self, curr_x, u): 21 | """ predict next state 22 | 23 | Args: 24 | curr_x (numpy.ndarray): current state, shape(state_size, ) or 25 | shape(pop_size, state_size) 26 | u (numpy.ndarray): input, shape(input_size, ) or 27 | shape(pop_size, input_size) 28 | Returns: 29 | next_x (numpy.ndarray): next state, shape(state_size, ) or 30 | shape(pop_size, state_size) 31 | """ 32 | if len(u.shape) == 1: 33 | func_1 = self._func_x_1 34 | func_2 = self._func_x_2 35 | functions = [func_1, func_2] 36 | next_x = update_state_with_Runge_Kutta( 37 | curr_x, u, functions, batch=False, dt=self.dt) 38 | return next_x 39 | 40 | elif len(u.shape) == 2: 41 | def func_1(xs, us): return self._func_x_1(xs, us, batch=True) 42 | def func_2(xs, us): return self._func_x_2(xs, us, batch=True) 43 | functions = [func_1, func_2] 44 | next_x = update_state_with_Runge_Kutta( 45 | curr_x, u, functions, batch=True, dt=self.dt) 46 | 47 | return next_x 48 | 49 | def x_dot(self, curr_x, u): 50 | """ 51 | Args: 52 | curr_x (numpy.ndarray): current state, shape(state_size, ) 53 | u (numpy.ndarray): input, shape(input_size, ) 54 | Returns: 55 | x_dot (numpy.ndarray): next state, shape(state_size, ) 56 | """ 57 | state_size = curr_x.shape[0] 58 | x_dot = np.zeros(state_size) 59 | x_dot[0] = self._func_x_1(curr_x, u) 60 | x_dot[1] = self._func_x_2(curr_x, u) 61 | return x_dot 62 | 63 | def predict_adjoint_state(self, lam, x, u, g_x=None): 64 | """ predict adjoint states 65 | 66 | Args: 67 | lam (numpy.ndarray): adjoint state, shape(state_size, ) 68 | x (numpy.ndarray): state, shape(state_size, ) 69 | u (numpy.ndarray): input, shape(input_size, ) 70 | goal (numpy.ndarray): goal state, shape(state_size, ) 71 | Returns: 72 | prev_lam (numpy.ndarrya): previous adjoint state, 73 | shape(state_size, ) 74 | """ 75 | if len(u.shape) == 1: 76 | delta_lam = self.dt * \ 77 | self.gradient_hamiltonian_state(x, lam, u, g_x) 78 | prev_lam = lam + delta_lam 79 | return prev_lam 80 | 81 | elif len(u.shape) == 2: 82 | raise ValueError 83 | 84 | def predict_terminal_adjoint_state(self, terminal_x, terminal_g_x=None): 85 | """ predict terminal adjoint state 86 | 87 | Args: 88 | terminal_x (numpy.ndarray): terminal state, shape(state_size, ) 89 | terminal_g_x (numpy.ndarray): terminal goal state, 90 | shape(state_size, ) 91 | Returns: 92 | terminal_lam (numpy.ndarray): terminal adjoint state, 93 | shape(state_size, ) 94 | """ 95 | terminal_lam = self.gradient_cost_fn_state( 96 | terminal_x, terminal_g_x, terminal=True) # return in shape[1, state_size] 97 | return terminal_lam[0] 98 | 99 | def _func_x_1(self, x, u, batch=False): 100 | if not batch: 101 | x_dot = x[1] 102 | else: 103 | x_dot = x[:, 1] 104 | return x_dot 105 | 106 | def _func_x_2(self, x, u, batch=False): 107 | if not batch: 108 | x_dot = (1. - x[0]**2 - x[1]**2) * x[1] - x[0] + u[0] 109 | else: 110 | x_dot = (1. - x[:, 0]**2 - x[:, 1]**2) * \ 111 | x[:, 1] - x[:, 0] + u[:, 0] 112 | return x_dot 113 | 114 | def calc_f_x(self, xs, us, dt): 115 | """ gradient of model with respect to the state in batch form 116 | Args: 117 | xs (numpy.ndarray): state, shape(pred_len+1, state_size) 118 | us (numpy.ndarray): input, shape(pred_len, input_size,) 119 | 120 | Return: 121 | f_x (numpy.ndarray): gradient of model with respect to x, 122 | shape(pred_len, state_size, state_size) 123 | 124 | Notes: 125 | This should be discrete form !! 126 | """ 127 | # get size 128 | (_, state_size) = xs.shape 129 | (pred_len, _) = us.shape 130 | 131 | f_x = np.zeros((pred_len, state_size, state_size)) 132 | f_x[:, 0, 1] = 1. 133 | f_x[:, 1, 0] = 2. * xs[:, 0] * xs[:, 1] - 1. 134 | f_x[:, 1, 1] = - 2. * xs[:, 1] * xs[:, 1] + \ 135 | (1. - xs[:, 0]**2 - xs[:, 1]**2) 136 | 137 | return f_x * dt + np.eye(state_size) # to discrete form 138 | 139 | def calc_f_u(self, xs, us, dt): 140 | """ gradient of model with respect to the input in batch form 141 | Args: 142 | xs (numpy.ndarray): state, shape(pred_len+1, state_size) 143 | us (numpy.ndarray): input, shape(pred_len, input_size,) 144 | 145 | Return: 146 | f_u (numpy.ndarray): gradient of model with respect to x, 147 | shape(pred_len, state_size, input_size) 148 | 149 | Notes: 150 | This should be discrete form !! 151 | """ 152 | # get size 153 | (_, state_size) = xs.shape 154 | (pred_len, input_size) = us.shape 155 | 156 | f_u = np.zeros((pred_len, state_size, input_size)) 157 | 158 | f_u[:, 1, 0] = 1. 159 | 160 | return f_u * dt # to discrete form 161 | 162 | def calc_f_xx(self, xs, us, dt): 163 | """ hessian of model with respect to the state in batch form 164 | 165 | Args: 166 | xs (numpy.ndarray): state, shape(pred_len+1, state_size) 167 | us (numpy.ndarray): input, shape(pred_len, input_size,) 168 | 169 | Return: 170 | f_xx (numpy.ndarray): gradient of model with respect to x, 171 | shape(pred_len, state_size, state_size, state_size) 172 | """ 173 | # get size 174 | (_, state_size) = xs.shape 175 | (pred_len, _) = us.shape 176 | 177 | f_xx = np.zeros((pred_len, state_size, state_size, state_size)) 178 | 179 | raise NotImplementedError 180 | 181 | def calc_f_ux(self, xs, us, dt): 182 | """ hessian of model with respect to state and input in batch form 183 | 184 | Args: 185 | xs (numpy.ndarray): state, shape(pred_len+1, state_size) 186 | us (numpy.ndarray): input, shape(pred_len, input_size,) 187 | 188 | Return: 189 | f_ux (numpy.ndarray): gradient of model with respect to x, 190 | shape(pred_len, state_size, input_size, state_size) 191 | """ 192 | # get size 193 | (_, state_size) = xs.shape 194 | (pred_len, input_size) = us.shape 195 | 196 | f_ux = np.zeros((pred_len, state_size, input_size, state_size)) 197 | 198 | raise NotImplementedError 199 | 200 | def calc_f_uu(self, xs, us, dt): 201 | """ hessian of model with respect to input in batch form 202 | 203 | Args: 204 | xs (numpy.ndarray): state, shape(pred_len+1, state_size) 205 | us (numpy.ndarray): input, shape(pred_len, input_size,) 206 | 207 | Return: 208 | f_uu (numpy.ndarray): gradient of model with respect to x, 209 | shape(pred_len, state_size, input_size, input_size) 210 | """ 211 | # get size 212 | (_, state_size) = xs.shape 213 | (pred_len, input_size) = us.shape 214 | 215 | f_uu = np.zeros((pred_len, state_size, input_size, input_size)) 216 | 217 | raise NotImplementedError 218 | -------------------------------------------------------------------------------- /PythonLinearNonlinearControl/models/two_wheeled.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from .model import Model 4 | 5 | 6 | class TwoWheeledModel(Model): 7 | """ two wheeled model 8 | """ 9 | 10 | def __init__(self, config): 11 | """ 12 | """ 13 | super(TwoWheeledModel, self).__init__() 14 | self.dt = config.DT 15 | self.gradient_hamiltonian_state = config.gradient_hamiltonian_state 16 | self.gradient_hamiltonian_input = config.gradient_hamiltonian_input 17 | self.gradient_cost_fn_state = config.gradient_cost_fn_state 18 | 19 | def predict_next_state(self, curr_x, u): 20 | """ predict next state 21 | 22 | Args: 23 | curr_x (numpy.ndarray): current state, shape(state_size, ) or 24 | shape(pop_size, state_size) 25 | u (numpy.ndarray): input, shape(input_size, ) or 26 | shape(pop_size, input_size) 27 | Returns: 28 | next_x (numpy.ndarray): next state, shape(state_size, ) or 29 | shape(pop_size, state_size) 30 | """ 31 | if len(u.shape) == 1: 32 | B = np.array([[np.cos(curr_x[-1]), 0.], 33 | [np.sin(curr_x[-1]), 0.], 34 | [0., 1.]]) 35 | # calc dot 36 | x_dot = np.matmul(B, u[:, np.newaxis]) 37 | # next state 38 | next_x = x_dot.flatten() * self.dt + curr_x 39 | 40 | return next_x 41 | 42 | elif len(u.shape) == 2: 43 | (pop_size, state_size) = curr_x.shape 44 | (_, input_size) = u.shape 45 | # B.shape = (pop_size, state_size, input_size) 46 | B = np.zeros((pop_size, state_size, input_size)) 47 | # insert 48 | B[:, 0, 0] = np.cos(curr_x[:, -1]) 49 | B[:, 1, 0] = np.sin(curr_x[:, -1]) 50 | B[:, 2, 1] = np.ones(pop_size) 51 | 52 | # x_dot.shape = (pop_size, state_size, 1) 53 | x_dot = np.matmul(B, u[:, :, np.newaxis]) 54 | # next state 55 | next_x = x_dot[:, :, 0] * self.dt + curr_x 56 | 57 | return next_x 58 | 59 | def x_dot(self, curr_x, u): 60 | """ compute x dot 61 | Args: 62 | curr_x (numpy.ndarray): current state, shape(state_size, ) 63 | u (numpy.ndarray): input, shape(input_size, ) 64 | Returns: 65 | x_dot (numpy.ndarray): next state, shape(state_size, ) 66 | """ 67 | B = np.array([[np.cos(curr_x[-1]), 0.], 68 | [np.sin(curr_x[-1]), 0.], 69 | [0., 1.]]) 70 | x_dot = np.matmul(B, u[:, np.newaxis]) 71 | return x_dot.flatten() 72 | 73 | def predict_adjoint_state(self, lam, x, u, g_x=None, t=None): 74 | """ predict adjoint states 75 | 76 | Args: 77 | lam (numpy.ndarray): adjoint state, shape(state_size, ) 78 | x (numpy.ndarray): state, shape(state_size, ) 79 | u (numpy.ndarray): input, shape(input_size, ) 80 | goal (numpy.ndarray): goal state, shape(state_size, ) 81 | Returns: 82 | prev_lam (numpy.ndarrya): previous adjoint state, 83 | shape(state_size, ) 84 | """ 85 | if len(u.shape) == 1: 86 | delta_lam = self.dt * \ 87 | self.gradient_hamiltonian_state(x, lam, u, g_x) 88 | prev_lam = lam + delta_lam 89 | return prev_lam 90 | 91 | elif len(u.shape) == 2: 92 | raise ValueError 93 | 94 | def predict_terminal_adjoint_state(self, terminal_x, terminal_g_x=None): 95 | """ predict terminal adjoint state 96 | 97 | Args: 98 | terminal_x (numpy.ndarray): terminal state, shape(state_size, ) 99 | terminal_g_x (numpy.ndarray): terminal goal state, 100 | shape(state_size, ) 101 | Returns: 102 | terminal_lam (numpy.ndarray): terminal adjoint state, 103 | shape(state_size, ) 104 | """ 105 | terminal_lam = self.gradient_cost_fn_state( 106 | terminal_x, terminal_g_x, terminal=True) # return in shape[1, state_size] 107 | return terminal_lam[0] 108 | 109 | @staticmethod 110 | def calc_f_x(xs, us, dt): 111 | """ gradient of model with respect to the state in batch form 112 | Args: 113 | xs (numpy.ndarray): state, shape(pred_len+1, state_size) 114 | us (numpy.ndarray): input, shape(pred_len, input_size,) 115 | 116 | Return: 117 | f_x (numpy.ndarray): gradient of model with respect to x, 118 | shape(pred_len, state_size, state_size) 119 | 120 | Notes: 121 | This should be discrete form !! 122 | """ 123 | # get size 124 | (_, state_size) = xs.shape 125 | (pred_len, _) = us.shape 126 | 127 | f_x = np.zeros((pred_len, state_size, state_size)) 128 | f_x[:, 0, 2] = -np.sin(xs[:, 2]) * us[:, 0] 129 | f_x[:, 1, 2] = np.cos(xs[:, 2]) * us[:, 0] 130 | 131 | return f_x * dt + np.eye(state_size) # to discrete form 132 | 133 | @staticmethod 134 | def calc_f_u(xs, us, dt): 135 | """ gradient of model with respect to the input in batch form 136 | Args: 137 | xs (numpy.ndarray): state, shape(pred_len+1, state_size) 138 | us (numpy.ndarray): input, shape(pred_len, input_size,) 139 | 140 | Return: 141 | f_u (numpy.ndarray): gradient of model with respect to x, 142 | shape(pred_len, state_size, input_size) 143 | 144 | Notes: 145 | This should be discrete form !! 146 | """ 147 | # get size 148 | (_, state_size) = xs.shape 149 | (pred_len, input_size) = us.shape 150 | 151 | f_u = np.zeros((pred_len, state_size, input_size)) 152 | f_u[:, 0, 0] = np.cos(xs[:, 2]) 153 | f_u[:, 1, 0] = np.sin(xs[:, 2]) 154 | f_u[:, 2, 1] = 1. 155 | 156 | return f_u * dt # to discrete form 157 | 158 | @staticmethod 159 | def calc_f_xx(xs, us, dt): 160 | """ hessian of model with respect to the state in batch form 161 | 162 | Args: 163 | xs (numpy.ndarray): state, shape(pred_len+1, state_size) 164 | us (numpy.ndarray): input, shape(pred_len, input_size,) 165 | 166 | Return: 167 | f_xx (numpy.ndarray): gradient of model with respect to x, 168 | shape(pred_len, state_size, state_size, state_size) 169 | """ 170 | # get size 171 | (_, state_size) = xs.shape 172 | (pred_len, _) = us.shape 173 | 174 | f_xx = np.zeros((pred_len, state_size, state_size, state_size)) 175 | 176 | f_xx[:, 0, 2, 2] = -np.cos(xs[:, 2]) * us[:, 0] 177 | f_xx[:, 1, 2, 2] = -np.sin(xs[:, 2]) * us[:, 0] 178 | 179 | return f_xx * dt 180 | 181 | @staticmethod 182 | def calc_f_ux(xs, us, dt): 183 | """ hessian of model with respect to state and input in batch form 184 | 185 | Args: 186 | xs (numpy.ndarray): state, shape(pred_len+1, state_size) 187 | us (numpy.ndarray): input, shape(pred_len, input_size,) 188 | 189 | Return: 190 | f_ux (numpy.ndarray): gradient of model with respect to x, 191 | shape(pred_len, state_size, input_size, state_size) 192 | """ 193 | # get size 194 | (_, state_size) = xs.shape 195 | (pred_len, input_size) = us.shape 196 | 197 | f_ux = np.zeros((pred_len, state_size, input_size, state_size)) 198 | 199 | f_ux[:, 0, 0, 2] = -np.sin(xs[:, 2]) 200 | f_ux[:, 1, 0, 2] = np.cos(xs[:, 2]) 201 | 202 | return f_ux * dt 203 | 204 | @staticmethod 205 | def calc_f_uu(xs, us, dt): 206 | """ hessian of model with respect to input in batch form 207 | 208 | Args: 209 | xs (numpy.ndarray): state, shape(pred_len+1, state_size) 210 | us (numpy.ndarray): input, shape(pred_len, input_size,) 211 | 212 | Return: 213 | f_uu (numpy.ndarray): gradient of model with respect to x, 214 | shape(pred_len, state_size, input_size, input_size) 215 | """ 216 | # get size 217 | (_, state_size) = xs.shape 218 | (pred_len, input_size) = us.shape 219 | 220 | f_uu = np.zeros((pred_len, state_size, input_size, input_size)) 221 | 222 | return f_uu * dt 223 | -------------------------------------------------------------------------------- /PythonLinearNonlinearControl/models/cartpole.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from .model import Model 4 | 5 | 6 | class CartPoleModel(Model): 7 | """ cartpole model 8 | """ 9 | 10 | def __init__(self, config): 11 | """ 12 | """ 13 | super(CartPoleModel, self).__init__() 14 | self.dt = config.DT 15 | self.mc = config.MC 16 | self.mp = config.MP 17 | self.l = config.L 18 | self.g = config.G 19 | 20 | def predict_next_state(self, curr_x, u): 21 | """ predict next state 22 | 23 | Args: 24 | curr_x (numpy.ndarray): current state, shape(state_size, ) or 25 | shape(pop_size, state_size) 26 | u (numpy.ndarray): input, shape(input_size, ) or 27 | shape(pop_size, input_size) 28 | Returns: 29 | next_x (numpy.ndarray): next state, shape(state_size, ) or 30 | shape(pop_size, state_size) 31 | """ 32 | if len(u.shape) == 1: 33 | # x 34 | d_x0 = curr_x[1] 35 | # v_x 36 | d_x1 = (u[0] + self.mp * np.sin(curr_x[2]) 37 | * (self.l * (curr_x[3]**2) 38 | + self.g * np.cos(curr_x[2]))) \ 39 | / (self.mc + self.mp * (np.sin(curr_x[2])**2)) 40 | # theta 41 | d_x2 = curr_x[3] 42 | # v_theta 43 | d_x3 = (-u[0] * np.cos(curr_x[2]) 44 | - self.mp * self.l * (curr_x[3]**2) 45 | * np.cos(curr_x[2]) * np.sin(curr_x[2]) 46 | - (self.mc + self.mp) * self.g * np.sin(curr_x[2])) \ 47 | / (self.l * (self.mc + self.mp * (np.sin(curr_x[2])**2))) 48 | 49 | next_x = curr_x +\ 50 | np.array([d_x0, d_x1, d_x2, d_x3]) * self.dt 51 | 52 | return next_x 53 | 54 | elif len(u.shape) == 2: 55 | # x 56 | d_x0 = curr_x[:, 1] 57 | # v_x 58 | d_x1 = (u[:, 0] + self.mp * np.sin(curr_x[:, 2]) 59 | * (self.l * (curr_x[:, 3]**2) 60 | + self.g * np.cos(curr_x[:, 2]))) \ 61 | / (self.mc + self.mp * (np.sin(curr_x[:, 2])**2)) 62 | # theta 63 | d_x2 = curr_x[:, 3] 64 | # v_theta 65 | d_x3 = (-u[:, 0] * np.cos(curr_x[:, 2]) 66 | - self.mp * self.l * (curr_x[:, 3]**2) 67 | * np.cos(curr_x[:, 2]) * np.sin(curr_x[:, 2]) 68 | - (self.mc + self.mp) * self.g * np.sin(curr_x[:, 2])) \ 69 | / (self.l * (self.mc + self.mp * (np.sin(curr_x[:, 2])**2))) 70 | 71 | next_x = curr_x +\ 72 | np.stack((d_x0, d_x1, d_x2, d_x3), axis=1) * self.dt 73 | 74 | return next_x 75 | 76 | def calc_f_x(self, xs, us, dt): 77 | """ gradient of model with respect to the state in batch form 78 | Args: 79 | xs (numpy.ndarray): state, shape(pred_len+1, state_size) 80 | us (numpy.ndarray): input, shape(pred_len, input_size,) 81 | 82 | Return: 83 | f_x (numpy.ndarray): gradient of model with respect to x, 84 | shape(pred_len, state_size, state_size) 85 | 86 | Notes: 87 | This should be discrete form !! 88 | """ 89 | # get size 90 | (_, state_size) = xs.shape 91 | (pred_len, _) = us.shape 92 | 93 | f_x = np.zeros((pred_len, state_size, state_size)) 94 | 95 | # f_x_dot 96 | f_x[:, 0, 1] = np.ones(pred_len) 97 | 98 | # f_theta 99 | tmp = ((self.mc + self.mp * np.sin(xs[:, 2])**2)**(-2)) \ 100 | * self.mp * 2. * np.sin(xs[:, 2]) * np.cos(xs[:, 2]) 101 | tmp2 = 1. / (self.mc + self.mp * (np.sin(xs[:, 2])**2)) 102 | 103 | f_x[:, 1, 2] = - us[:, 0] * tmp \ 104 | - tmp * (self.mp * np.sin(xs[:, 2]) 105 | * (self.l * xs[:, 3]**2 106 | + self.g * np.cos(xs[:, 2]))) \ 107 | + tmp2 * (self.mp * np.cos(xs[:, 2]) * self.l 108 | * xs[:, 3]**2 109 | + self.mp * self.g * (np.cos(xs[:, 2])**2 110 | - np.sin(xs[:, 2])**2)) 111 | f_x[:, 3, 2] = - 1. / self.l * tmp \ 112 | * (-us[:, 0] * np.cos(xs[:, 2]) 113 | - self.mp * self.l * (xs[:, 3]**2) 114 | * np.cos(xs[:, 2]) * np.sin(xs[:, 2]) 115 | - (self.mc + self.mp) * self.g * np.sin(xs[:, 2])) \ 116 | + 1. / self.l * tmp2 \ 117 | * (us[:, 0] * np.sin(xs[:, 2]) 118 | - self.mp * self.l * xs[:, 3]**2 119 | * (np.cos(xs[:, 2])**2 - np.sin(xs[:, 2])**2) 120 | - (self.mc + self.mp) 121 | * self.g * np.cos(xs[:, 2])) 122 | 123 | # f_theta_dot 124 | f_x[:, 1, 3] = tmp2 * (self.mp * np.sin(xs[:, 2]) 125 | * self.l * 2 * xs[:, 3]) 126 | f_x[:, 2, 3] = np.ones(pred_len) 127 | f_x[:, 3, 3] = 1. / self.l * tmp2 \ 128 | * (-2. * self.mp * self.l * xs[:, 3] 129 | * np.cos(xs[:, 2]) * np.sin(xs[:, 2])) 130 | 131 | return f_x * dt + np.eye(state_size) # to discrete form 132 | 133 | def calc_f_u(self, xs, us, dt): 134 | """ gradient of model with respect to the input in batch form 135 | Args: 136 | xs (numpy.ndarray): state, shape(pred_len+1, state_size) 137 | us (numpy.ndarray): input, shape(pred_len, input_size,) 138 | 139 | Return: 140 | f_u (numpy.ndarray): gradient of model with respect to x, 141 | shape(pred_len, state_size, input_size) 142 | 143 | Notes: 144 | This should be discrete form !! 145 | """ 146 | # get size 147 | (_, state_size) = xs.shape 148 | (pred_len, input_size) = us.shape 149 | 150 | f_u = np.zeros((pred_len, state_size, input_size)) 151 | 152 | f_u[:, 1, 0] = 1. / (self.mc + self.mp * (np.sin(xs[:, 2])**2)) 153 | 154 | f_u[:, 3, 0] = -np.cos(xs[:, 2]) \ 155 | / (self.l * (self.mc 156 | + self.mp * (np.sin(xs[:, 2])**2))) 157 | 158 | return f_u * dt # to discrete form 159 | 160 | def calc_f_xx(self, xs, us, dt): 161 | """ hessian of model with respect to the state in batch form 162 | 163 | Args: 164 | xs (numpy.ndarray): state, shape(pred_len+1, state_size) 165 | us (numpy.ndarray): input, shape(pred_len, input_size,) 166 | 167 | Return: 168 | f_xx (numpy.ndarray): gradient of model with respect to x, 169 | shape(pred_len, state_size, state_size, state_size) 170 | """ 171 | # get size 172 | (_, state_size) = xs.shape 173 | (pred_len, _) = us.shape 174 | 175 | f_xx = np.zeros((pred_len, state_size, state_size, state_size)) 176 | 177 | raise NotImplementedError 178 | 179 | def calc_f_ux(self, xs, us, dt): 180 | """ hessian of model with respect to state and input in batch form 181 | 182 | Args: 183 | xs (numpy.ndarray): state, shape(pred_len+1, state_size) 184 | us (numpy.ndarray): input, shape(pred_len, input_size,) 185 | 186 | Return: 187 | f_ux (numpy.ndarray): gradient of model with respect to x, 188 | shape(pred_len, state_size, input_size, state_size) 189 | """ 190 | # get size 191 | (_, state_size) = xs.shape 192 | (pred_len, input_size) = us.shape 193 | 194 | f_ux = np.zeros((pred_len, state_size, input_size, state_size)) 195 | 196 | raise NotImplementedError 197 | 198 | def calc_f_uu(self, xs, us, dt): 199 | """ hessian of model with respect to input in batch form 200 | 201 | Args: 202 | xs (numpy.ndarray): state, shape(pred_len+1, state_size) 203 | us (numpy.ndarray): input, shape(pred_len, input_size,) 204 | 205 | Return: 206 | f_uu (numpy.ndarray): gradient of model with respect to x, 207 | shape(pred_len, state_size, input_size, input_size) 208 | """ 209 | # get size 210 | (_, state_size) = xs.shape 211 | (pred_len, input_size) = us.shape 212 | 213 | f_uu = np.zeros((pred_len, state_size, input_size, input_size)) 214 | 215 | raise NotImplementedError 216 | -------------------------------------------------------------------------------- /PythonLinearNonlinearControl/controllers/mpc.py: -------------------------------------------------------------------------------- 1 | from logging import getLogger 2 | 3 | import numpy as np 4 | from scipy.optimize import minimize 5 | from scipy.optimize import LinearConstraint 6 | 7 | from .controller import Controller 8 | from ..envs.cost import calc_cost 9 | 10 | logger = getLogger(__name__) 11 | 12 | 13 | class LinearMPC(Controller): 14 | """ Model Predictive Controller for linear model 15 | 16 | Attributes: 17 | A (numpy.ndarray): system matrix, shape(state_size, state_size) 18 | B (numpy.ndarray): input matrix, shape(state_size, input_size) 19 | Q (numpy.ndarray): cost function weight for states 20 | R (numpy.ndarray): cost function weight for states 21 | history_us (list[numpy.ndarray]): time history of optimal input 22 | Ref: 23 | Maciejowski, J. M. (2002). Predictive control: with constraints. 24 | """ 25 | 26 | def __init__(self, config, model): 27 | """ 28 | Args: 29 | model (Model): system matrix, shape(state_size, state_size) 30 | config (ConfigModule): input matrix, shape(state_size, input_size) 31 | """ 32 | if config.TYPE != "Linear": 33 | raise ValueError("{} could be not applied to \ 34 | this controller".format(model)) 35 | super(LinearMPC, self).__init__(config, model) 36 | # system parameters 37 | self.model = model 38 | self.A = model.A 39 | self.B = model.B 40 | self.state_size = config.STATE_SIZE 41 | self.input_size = config.INPUT_SIZE 42 | self.pred_len = config.PRED_LEN 43 | 44 | # get cost func 45 | self.state_cost_fn = config.state_cost_fn 46 | self.terminal_state_cost_fn = config.terminal_state_cost_fn 47 | self.input_cost_fn = config.input_cost_fn 48 | 49 | # cost parameters 50 | self.Q = config.Q 51 | self.R = config.R 52 | self.Qs = None 53 | self.Rs = None 54 | 55 | # constraints 56 | self.dt_input_lower_bound = config.DT_INPUT_LOWER_BOUND 57 | self.dt_input_upper_bound = config.DT_INPUT_UPPER_BOUND 58 | self.input_lower_bound = config.INPUT_LOWER_BOUND 59 | self.input_upper_bound = config.INPUT_UPPER_BOUND 60 | 61 | # setup controllers 62 | self.W = None 63 | self.omega = None 64 | self.F = None 65 | self.f = None 66 | self.setup() 67 | self.prev_sol = np.zeros(self.input_size*self.pred_len) 68 | 69 | # history 70 | self.history_u = [np.zeros(self.input_size)] 71 | 72 | def setup(self): 73 | """ 74 | setup Model Predictive Control as a quadratic programming 75 | """ 76 | A_factorials = [self.A] 77 | self.phi_mat = self.A.copy() 78 | 79 | for _ in range(self.pred_len - 1): 80 | temp_mat = np.matmul(A_factorials[-1], self.A) 81 | self.phi_mat = np.vstack((self.phi_mat, temp_mat)) 82 | A_factorials.append(temp_mat) # after we use this factorials 83 | 84 | self.gamma_mat = self.B.copy() 85 | gammma_mat_temp = self.B.copy() 86 | 87 | for i in range(self.pred_len - 1): 88 | temp_1_mat = np.matmul(A_factorials[i], self.B) 89 | gammma_mat_temp = temp_1_mat + gammma_mat_temp 90 | self.gamma_mat = np.vstack((self.gamma_mat, gammma_mat_temp)) 91 | 92 | self.theta_mat = self.gamma_mat.copy() 93 | 94 | for i in range(self.pred_len - 1): 95 | temp_mat = np.zeros_like(self.gamma_mat) 96 | temp_mat[int((i + 1)*self.state_size):, :] =\ 97 | self.gamma_mat[:-int((i + 1)*self.state_size), :] 98 | 99 | self.theta_mat = np.hstack((self.theta_mat, temp_mat)) 100 | 101 | # evaluation function weight 102 | diag_Qs = np.tile(np.diag(self.Q), self.pred_len) 103 | diag_Rs = np.tile(np.diag(self.R), self.pred_len) 104 | self.Qs = np.diag(diag_Qs) 105 | self.Rs = np.diag(diag_Rs) 106 | 107 | # constraints 108 | # about inputs 109 | if self.input_lower_bound is not None: 110 | self.F = np.zeros((self.input_size * 2, 111 | self.pred_len * self.input_size)) 112 | 113 | for i in range(self.input_size): 114 | self.F[i * 2: (i + 1) * 2, i] = np.array([1., -1.]) 115 | temp_F = self.F.copy() 116 | 117 | for i in range(self.pred_len - 1): 118 | for j in range(self.input_size): 119 | temp_F[j * 2: (j + 1) * 2, 120 | ((i+1) * self.input_size) + j] = np.array([1., -1.]) 121 | self.F = np.vstack((self.F, temp_F)) 122 | 123 | self.F1 = self.F[:, :self.input_size] 124 | 125 | temp_f = [] 126 | for i in range(self.input_size): 127 | temp_f.append(-1 * self.input_upper_bound[i]) 128 | temp_f.append(self.input_lower_bound[i]) 129 | 130 | self.f = np.tile(np.array(temp_f).flatten(), self.pred_len) 131 | 132 | # about dt_input constraints 133 | if self.dt_input_lower_bound is not None: 134 | self.W = np.zeros((2, self.pred_len * self.input_size)) 135 | self.W[:, 0] = np.array([1., -1.]) 136 | 137 | for i in range(self.pred_len * self.input_size - 1): 138 | temp_W = np.zeros((2, self.pred_len * self.input_size)) 139 | temp_W[:, i+1] = np.array([1., -1.]) 140 | self.W = np.vstack((self.W, temp_W)) 141 | 142 | temp_omega = [] 143 | 144 | for i in range(self.input_size): 145 | temp_omega.append(self.dt_input_upper_bound[i]) 146 | temp_omega.append(-1. * self.dt_input_lower_bound[i]) 147 | 148 | self.omega = np.tile(np.array(temp_omega).flatten(), 149 | self.pred_len) 150 | 151 | def obtain_sol(self, curr_x, g_xs): 152 | """ calculate the optimal inputs 153 | 154 | Args: 155 | curr_x (numpy.ndarray): current state, shape(state_size, ) 156 | g_xs (numpy.ndarrya): goal trajectory, 157 | shape(plan_len+1, state_size) 158 | Returns: 159 | opt_input (numpy.ndarray): optimal input, shape(input_size, ) 160 | """ 161 | temp_1 = np.matmul(self.phi_mat, curr_x.reshape(-1, 1)) 162 | temp_2 = np.matmul(self.gamma_mat, self.history_u[-1].reshape(-1, 1)) 163 | 164 | error = g_xs[1:].reshape(-1, 1) - temp_1 - temp_2 165 | 166 | G = np.matmul(self.theta_mat.T, np.matmul(self.Qs, error)) 167 | 168 | H = np.matmul(self.theta_mat.T, np.matmul(self.Qs, self.theta_mat)) \ 169 | + self.Rs 170 | H = H * 0.5 171 | 172 | # constraints 173 | A = [] 174 | b = [] 175 | 176 | if self.W is not None: 177 | A.append(self.W) 178 | b.append(self.omega.reshape(-1, 1)) 179 | 180 | if self.F is not None: 181 | b_F = - np.matmul(self.F1, self.history_u[-1].reshape(-1, 1)) \ 182 | - self.f.reshape(-1, 1) 183 | A.append(self.F) 184 | b.append(b_F) 185 | 186 | A = np.array(A).reshape(-1, self.input_size * self.pred_len) 187 | 188 | ub = np.array(b).flatten() 189 | 190 | # using cvxopt 191 | def optimized_func(dt_us): 192 | return (np.dot(dt_us, np.dot(H, dt_us.reshape(-1, 1))) 193 | - np.dot(G.T, dt_us.reshape(-1, 1)))[0] 194 | 195 | # constraint 196 | lb = np.array([-np.inf for _ in range(len(ub))]) # one side cons 197 | cons = LinearConstraint(A, lb, ub) 198 | # solve 199 | opt_sol = minimize(optimized_func, self.prev_sol.flatten(), 200 | constraints=[cons]) 201 | opt_dt_us = opt_sol.x 202 | 203 | """ using cvxopt ver, 204 | if you want to solve more quick please use cvxopt instead of scipy 205 | 206 | # make cvxpy problem formulation 207 | P = 2*matrix(H) 208 | q = matrix(-1 * G) 209 | A = matrix(A) 210 | b = matrix(ub) 211 | 212 | # solve the problem 213 | opt_sol = solvers.qp(P, q, G=A, h=b) 214 | opt_dt_us = np.array(list(opt_sol['x'])) 215 | """ 216 | 217 | # to dt form 218 | opt_dt_u_seq = np.cumsum(opt_dt_us.reshape(self.pred_len, 219 | self.input_size), 220 | axis=0) 221 | self.prev_sol = opt_dt_u_seq.copy() 222 | 223 | opt_u_seq = opt_dt_u_seq + self.history_u[-1] 224 | 225 | # save 226 | self.history_u.append(opt_u_seq[0]) 227 | 228 | # check costs 229 | costs = self.calc_cost(curr_x, 230 | opt_u_seq.reshape(1, 231 | self.pred_len, 232 | self.input_size), 233 | g_xs) 234 | 235 | logger.debug("Cost = {}".format(costs)) 236 | 237 | return opt_u_seq[0] 238 | 239 | def __str__(self): 240 | return "LinearMPC" 241 | -------------------------------------------------------------------------------- /PythonLinearNonlinearControl/configs/cartpole.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class CartPoleConfigModule(): 5 | # parameters 6 | ENV_NAME = "CartPole-v0" 7 | PLANNER_TYPE = "Const" 8 | TYPE = "Nonlinear" 9 | TASK_HORIZON = 500 10 | PRED_LEN = 50 11 | STATE_SIZE = 4 12 | INPUT_SIZE = 1 13 | DT = 0.02 14 | # cost parameters 15 | R = np.diag([0.01]) # 0.01 is worked for MPPI and CEM and MPPIWilliams 16 | # 1. is worked for iLQR 17 | TERMINAL_WEIGHT = 1. 18 | Q = None 19 | Sf = None 20 | # bounds 21 | INPUT_LOWER_BOUND = np.array([-3.]) 22 | INPUT_UPPER_BOUND = np.array([3.]) 23 | # parameters 24 | MP = 0.2 25 | MC = 1. 26 | L = 0.5 27 | G = 9.81 28 | CART_SIZE = (0.15, 0.1) 29 | 30 | def __init__(self): 31 | """ 32 | """ 33 | # opt configs 34 | self.opt_config = { 35 | "Random": { 36 | "popsize": 5000 37 | }, 38 | "CEM": { 39 | "popsize": 500, 40 | "num_elites": 50, 41 | "max_iters": 15, 42 | "alpha": 0.3, 43 | "init_var": 9., 44 | "threshold": 0.001 45 | }, 46 | "MPPI": { 47 | "beta": 0.6, 48 | "popsize": 5000, 49 | "kappa": 0.9, 50 | "noise_sigma": 0.5, 51 | }, 52 | "MPPIWilliams": { 53 | "popsize": 5000, 54 | "lambda": 1., 55 | "noise_sigma": 0.9, 56 | }, 57 | "iLQR": { 58 | "max_iter": 500, 59 | "init_mu": 1., 60 | "mu_min": 1e-6, 61 | "mu_max": 1e10, 62 | "init_delta": 2., 63 | "threshold": 1e-6, 64 | }, 65 | "DDP": { 66 | "max_iter": 500, 67 | "init_mu": 1., 68 | "mu_min": 1e-6, 69 | "mu_max": 1e10, 70 | "init_delta": 2., 71 | "threshold": 1e-6, 72 | }, 73 | "NMPC-CGMRES": { 74 | }, 75 | "NMPC-Newton": { 76 | }, 77 | } 78 | 79 | @staticmethod 80 | def input_cost_fn(u): 81 | """ input cost functions 82 | 83 | Args: 84 | u (numpy.ndarray): input, shape(pred_len, input_size) 85 | or shape(pop_size, pred_len, input_size) 86 | Returns: 87 | cost (numpy.ndarray): cost of input, shape(pred_len, input_size) or 88 | shape(pop_size, pred_len, input_size) 89 | """ 90 | return (u**2) * np.diag(CartPoleConfigModule.R) 91 | 92 | @staticmethod 93 | def state_cost_fn(x, g_x): 94 | """ state cost function 95 | 96 | Args: 97 | x (numpy.ndarray): state, shape(pred_len, state_size) 98 | or shape(pop_size, pred_len, state_size) 99 | g_x (numpy.ndarray): goal state, shape(pred_len, state_size) 100 | or shape(pop_size, pred_len, state_size) 101 | Returns: 102 | cost (numpy.ndarray): cost of state, shape(pred_len, 1) or 103 | shape(pop_size, pred_len, 1) 104 | """ 105 | 106 | if len(x.shape) > 2: 107 | return (6. * (x[:, :, 0]**2) 108 | + 12. * ((np.cos(x[:, :, 2]) + 1.)**2) 109 | + 0.1 * (x[:, :, 1]**2) 110 | + 0.1 * (x[:, :, 3]**2))[:, :, np.newaxis] 111 | 112 | elif len(x.shape) > 1: 113 | return (6. * (x[:, 0]**2) 114 | + 12. * ((np.cos(x[:, 2]) + 1.)**2) 115 | + 0.1 * (x[:, 1]**2) 116 | + 0.1 * (x[:, 3]**2))[:, np.newaxis] 117 | 118 | return 6. * (x[0]**2) \ 119 | + 12. * ((np.cos(x[2]) + 1.)**2) \ 120 | + 0.1 * (x[1]**2) \ 121 | + 0.1 * (x[3]**2) 122 | 123 | @staticmethod 124 | def terminal_state_cost_fn(terminal_x, terminal_g_x): 125 | """ 126 | 127 | Args: 128 | terminal_x (numpy.ndarray): terminal state, 129 | shape(state_size, ) or shape(pop_size, state_size) 130 | terminal_g_x (numpy.ndarray): terminal goal state, 131 | shape(state_size, ) or shape(pop_size, state_size) 132 | Returns: 133 | cost (numpy.ndarray): cost of state, shape(pred_len, ) or 134 | shape(pop_size, pred_len) 135 | """ 136 | 137 | if len(terminal_x.shape) > 1: 138 | return (6. * (terminal_x[:, 0]**2) 139 | + 12. * ((np.cos(terminal_x[:, 2]) + 1.)**2) 140 | + 0.1 * (terminal_x[:, 1]**2) 141 | + 0.1 * (terminal_x[:, 3]**2))[:, np.newaxis] \ 142 | * CartPoleConfigModule.TERMINAL_WEIGHT 143 | 144 | return (6. * (terminal_x[0]**2) 145 | + 12. * ((np.cos(terminal_x[2]) + 1.)**2) 146 | + 0.1 * (terminal_x[1]**2) 147 | + 0.1 * (terminal_x[3]**2)) \ 148 | * CartPoleConfigModule.TERMINAL_WEIGHT 149 | 150 | @staticmethod 151 | def gradient_cost_fn_state(x, g_x, terminal=False): 152 | """ gradient of costs with respect to the state 153 | 154 | Args: 155 | x (numpy.ndarray): state, shape(pred_len, state_size) 156 | g_x (numpy.ndarray): goal state, shape(pred_len, state_size) 157 | 158 | Returns: 159 | l_x (numpy.ndarray): gradient of cost, shape(pred_len, state_size) 160 | or shape(1, state_size) 161 | """ 162 | if not terminal: 163 | cost_dx0 = 12. * x[:, 0] 164 | cost_dx1 = 0.2 * x[:, 1] 165 | cost_dx2 = 24. * (1 + np.cos(x[:, 2])) * -np.sin(x[:, 2]) 166 | cost_dx3 = 0.2 * x[:, 3] 167 | cost_dx = np.stack((cost_dx0, cost_dx1, 168 | cost_dx2, cost_dx3), axis=1) 169 | return cost_dx 170 | 171 | cost_dx0 = 12. * x[0] 172 | cost_dx1 = 0.2 * x[1] 173 | cost_dx2 = 24. * (1 + np.cos(x[2])) * -np.sin(x[2]) 174 | cost_dx3 = 0.2 * x[3] 175 | cost_dx = np.array([[cost_dx0, cost_dx1, cost_dx2, cost_dx3]]) 176 | 177 | return cost_dx * CartPoleConfigModule.TERMINAL_WEIGHT 178 | 179 | @staticmethod 180 | def gradient_cost_fn_input(x, u): 181 | """ gradient of costs with respect to the input 182 | 183 | Args: 184 | x (numpy.ndarray): state, shape(pred_len, state_size) 185 | u (numpy.ndarray): goal state, shape(pred_len, input_size) 186 | Returns: 187 | l_u (numpy.ndarray): gradient of cost, shape(pred_len, input_size) 188 | """ 189 | return 2. * u * np.diag(CartPoleConfigModule.R) 190 | 191 | @staticmethod 192 | def hessian_cost_fn_state(x, g_x, terminal=False): 193 | """ hessian costs with respect to the state 194 | 195 | Args: 196 | x (numpy.ndarray): state, shape(pred_len, state_size) 197 | g_x (numpy.ndarray): goal state, shape(pred_len, state_size) 198 | Returns: 199 | l_xx (numpy.ndarray): gradient of cost, 200 | shape(pred_len, state_size, state_size) or 201 | shape(1, state_size, state_size) or 202 | """ 203 | if not terminal: 204 | (pred_len, state_size) = x.shape 205 | hessian = np.eye(state_size) 206 | hessian = np.tile(hessian, (pred_len, 1, 1)) 207 | hessian[:, 0, 0] = 12. 208 | hessian[:, 1, 1] = 0.2 209 | hessian[:, 2, 2] = 24. * -np.sin(x[:, 2]) \ 210 | * (-np.sin(x[:, 2])) \ 211 | + 24. * (1. + np.cos(x[:, 2])) \ 212 | * -np.cos(x[:, 2]) 213 | hessian[:, 3, 3] = 0.2 214 | 215 | return hessian 216 | 217 | state_size = len(x) 218 | hessian = np.eye(state_size) 219 | hessian[0, 0] = 12. 220 | hessian[1, 1] = 0.2 221 | hessian[2, 2] = 24. * -np.sin(x[2]) \ 222 | * (-np.sin(x[2])) \ 223 | + 24. * (1. + np.cos(x[2])) \ 224 | * -np.cos(x[2]) 225 | hessian[3, 3] = 0.2 226 | 227 | return hessian[np.newaxis, :, :] * CartPoleConfigModule.TERMINAL_WEIGHT 228 | 229 | @staticmethod 230 | def hessian_cost_fn_input(x, u): 231 | """ hessian costs with respect to the input 232 | 233 | Args: 234 | x (numpy.ndarray): state, shape(pred_len, state_size) 235 | u (numpy.ndarray): goal state, shape(pred_len, input_size) 236 | Returns: 237 | l_uu (numpy.ndarray): gradient of cost, 238 | shape(pred_len, input_size, input_size) 239 | """ 240 | (pred_len, _) = u.shape 241 | 242 | return np.tile(2.*CartPoleConfigModule.R, (pred_len, 1, 1)) 243 | 244 | @staticmethod 245 | def hessian_cost_fn_input_state(x, u): 246 | """ hessian costs with respect to the state and input 247 | 248 | Args: 249 | x (numpy.ndarray): state, shape(pred_len, state_size) 250 | u (numpy.ndarray): goal state, shape(pred_len, input_size) 251 | Returns: 252 | l_ux (numpy.ndarray): gradient of cost , 253 | shape(pred_len, input_size, state_size) 254 | """ 255 | (_, state_size) = x.shape 256 | (pred_len, input_size) = u.shape 257 | 258 | return np.zeros((pred_len, input_size, state_size)) 259 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Coverage Status](https://coveralls.io/repos/github/Shunichi09/PythonLinearNonlinearControl/badge.svg?branch=master&service=github)](https://coveralls.io/github/Shunichi09/PythonLinearNonlinearControl?branch=master&service=github) 2 | [![GitHub issues open](https://img.shields.io/github/issues/Shunichi09/PythonLinearNonlinearControl.svg)]() 3 | [![Build Status](https://travis-ci.org/Shunichi09/PythonLinearNonlinearControl.svg?branch=master&service=github)](https://travis-ci.org/Shunichi09/PythonLinearNonlinearControl) 4 | 5 | # PythonLinearNonlinearControl 6 | 7 | PythonLinearNonlinearControl is a library implementing the linear and nonlinear control theories in python. 8 | Due to use only basic libralies (scipy, numpy), this library is easy to extend for your own situations. 9 | 10 |
11 | 12 | 13 | 14 | # Algorithms 15 | 16 | | Algorithm | Use Linear Model | Use Nonlinear Model | Need Gradient (Hamiltonian) | Need Gradient (Model) | Need Hessian (Model) | 17 | |:----------|:---------------: |:----------------:|:----------------:|:----------------:|:----------------:| 18 | | Linear Model Predictive Control (MPC) | ✓ | x | x | x | x | 19 | | Cross Entropy Method (CEM) | ✓ | ✓ | x | x | x | 20 | | Model Predictive Path Integral Control of Nagabandi, A. (MPPI) | ✓ | ✓ | x | x | x | 21 | | Model Predictive Path Integral Control of Williams, G. (MPPIWilliams) | ✓ | ✓ | x | x | x | 22 | | Random Shooting Method (Random) | ✓ | ✓ | x | x | x | 23 | | Iterative LQR (iLQR) | x | ✓ | x | ✓ | x | 24 | | Differential Dynamic Programming (DDP) | x | ✓ | x | ✓ | ✓ | 25 | | Unconstrained Nonlinear Model Predictive Control (NMPC) | x | ✓ | ✓ | x | x | 26 | | Constrained Nonlinear Model Predictive Control CGMRES (NMPC-CGMRES) | x | ✓ | ✓ | x | x | 27 | | Constrained Nonlinear Model Predictive Control Newton (NMPC-Newton) | x | ✓ | x | x | x | 28 | 29 | "Need Gradient" means that you have to implement the gradient of the model or the gradient of hamiltonian. 30 | 31 | Following algorithms are implemented in PythonLinearNonlinearControl 32 | 33 | - [Linear Model Predictive Control (MPC)](http://www2.eng.cam.ac.uk/~jmm1/mpcbook/mpcbook.html) 34 | - Ref: Maciejowski, J. M. (2002). Predictive control: with constraints. 35 | - [script](PythonLinearNonlinearControl/controllers/mpc.py) 36 | - [Cross Entropy Method (CEM)](https://arxiv.org/abs/1805.12114) 37 | - Ref: Chua, K., Calandra, R., McAllister, R., & Levine, S. (2018). Deep reinforcement learning in a handful of trials using probabilistic dynamics models. In Advances in Neural Information Processing Systems (pp. 4754-4765) 38 | - [script](PythonLinearNonlinearControl/controllers/cem.py) 39 | - [Model Predictive Path Integral Control of Nagabandi, A. (MPPI)](https://arxiv.org/abs/1909.11652) 40 | - Ref: Nagabandi, A., Konoglie, K., Levine, S., & Kumar, V. (2019). Deep Dynamics Models for Learning Dexterous Manipulation. arXiv preprint arXiv:1909.11652. 41 | - [script](PythonLinearNonlinearControl/controllers/mppi.py) 42 | - [Model Predictive Path Integral Control of Williams, G. (MPPIWilliams)](https://ieeexplore.ieee.org/abstract/document/7989202) 43 | - Ref: Williams, G., Wagener, N., Goldfain, B., Drews, P., Rehg, J. M., Boots, B., & Theodorou, E. A. (2017, May). Information theoretic MPC for model-based reinforcement learning. In 2017 IEEE International Conference on Robotics and Automation (ICRA) (pp. 1714-1721). IEEE. 44 | - [script](PythonLinearNonlinearControl/controllers/mppi_williams.py) 45 | - [Random Shooting Method (Random)](https://arxiv.org/abs/1805.12114) 46 | - Ref: Chua, K., Calandra, R., McAllister, R., & Levine, S. (2018). Deep reinforcement learning in a handful of trials using probabilistic dynamics models. In Advances in Neural Information Processing Systems (pp. 4754-4765) 47 | - [script](PythonLinearNonlinearControl/controllers/random.py) 48 | - [Iterative LQR (iLQR)](https://ieeexplore.ieee.org/document/6386025) 49 | - Ref: Tassa, Y., Erez, T., & Todorov, E. (2012, October). Synthesis and stabilization of complex behaviors through online trajectory optimization. In 2012 IEEE/RSJ International Conference on Intelligent Robots and Systems (pp. 4906-4913). IEEE. and [Study Wolf](https://github.com/studywolf/control), https://github.com/anassinator/ilqr 50 | - [script](PythonLinearNonlinearControl/controllers/ilqr.py) 51 | - [Dynamic Differential Programming (DDP)](https://ieeexplore.ieee.org/document/6386025) 52 | - Ref: Tassa, Y., Erez, T., & Todorov, E. (2012, October). Synthesis and stabilization of complex behaviors through online trajectory optimization. In 2012 IEEE/RSJ International Conference on Intelligent Robots and Systems (pp. 4906-4913). IEEE. and [Study Wolf](https://github.com/studywolf/control), https://github.com/anassinator/ilqr 53 | - [script](PythonLinearNonlinearControl/controllers/ddp.py) 54 | - [Unconstrained Nonlinear Model Predictive Control (NMPC)](https://www.sciencedirect.com/science/article/pii/S0005109897000058) 55 | - Ref: Ohtsuka, T., & Fujii, H. A. (1997). Real-time optimization algorithm for nonlinear receding-horizon control. Automatica, 33(6), 1147-1154. 56 | - [script](PythonLinearNonlinearControl/controllers/nmpc.py) 57 | - [Constrained Nonlinear Model Predictive Control -CGMRES- (NMPC-CGMRES)](https://www.sciencedirect.com/science/article/pii/S0005109897000058) 58 | - Ref: Ohtsuka, T., & Fujii, H. A. (1997). Real-time optimization algorithm for nonlinear receding-horizon control. Automatica, 33(6), 1147-1154. 59 | - [script](PythonLinearNonlinearControl/controllers/nmpc_cgmres.py) 60 | - [Constrained Nonlinear Model Predictive Control -Newton- (NMPC-Newton)](https://www.sciencedirect.com/science/article/pii/S0005109897000058) 61 | - Ref: Ohtsuka, T., & Fujii, H. A. (1997). Real-time optimization algorithm for nonlinear receding-horizon control. Automatica, 33(6), 1147-1154. 62 | - [script (Coming soon)]() 63 | 64 | # Environments 65 | 66 | There are 4 example environments, "FirstOrderLag", "TwoWheeledConst", "TwoWheeledTrack" and "Cartpole". 67 | 68 | | Name | Linear | Nonlinear | State Size | Input size | 69 | |:----------|:---------------:|:----------------:|:----------------:|:----------------:| 70 | | First Order Lag System | ✓ | x | 4 | 2 | 71 | | Two wheeled System (Constant Goal) | x | ✓ | 3 | 2 | 72 | | Two wheeled System (Moving Goal) | x | ✓ | 3 | 2 | 73 | | Cartpole (Swing up) | x | ✓ | 4 | 1 | 74 | | Nonlinear Sample System Env | x | ✓ | 2 | 1 | 75 | 76 | All states and inputs of environments are continuous. 77 | **It should be noted that the algorithms for linear model could be applied to nonlinear enviroments if you have linealized the model of nonlinear environments.** 78 | 79 | You could know abount our environmets more in [Environments.md](Environments.md) 80 | 81 | # Installation 82 | 83 | ## To install this package 84 | 85 | ``` 86 | $ pip install PythonLinearNonlinearControl 87 | ``` 88 | 89 | ## When developing the package 90 | 91 | ``` 92 | $ python setup.py develop 93 | ``` 94 | 95 | or 96 | 97 | ``` 98 | $ pip install -e . 99 | ``` 100 | 101 | # Basic concepts 102 | 103 | When we design control systems, we should have **Model**, **Planner**, **Controller** and **Runner** as shown in the figure. 104 | It should be noted that **Model** and **Environment** are different. As mentioned before, we the algorithms for linear model could be applied to nonlinear enviroments if you have linealized model of nonlinear environments. In addition, you can use Neural Network or any non-linear functions to the model, although this library can not deal with it now. 105 | 106 | 107 | 108 | ## [Model](PythonLinearNonlinearControl/models/) 109 | 110 | System model. For an instance, in the case that a model is linear, this model should have a form, "x[k+1] = Ax[k] + Bu[k]". 111 | 112 | If you use gradient based control method, you are preferred to implement the gradients of the model, other wise the controllers use numeric gradients. 113 | 114 | ## [Planner](PythonLinearNonlinearControl/planners/) 115 | 116 | Planner make the goal states. 117 | 118 | ## [Controller](PythonLinearNonlinearControl/controllers/) 119 | 120 | Controller calculate the optimal inputs by using the model by using the algorithms. 121 | 122 | ## [Runner](PythonLinearNonlinearControl/runners/) 123 | 124 | Runner runs the simulation. 125 | 126 | Please, see more detail in each scripts. 127 | 128 | # Getting started 129 | 130 | ## [QuickStart Guide](scripts/quickstart/quickstart.md) 131 | 132 | This is a quickstart guide for users who just want to try PythonLinearNonlinearControl. 133 | If you have not installed PythonLinearNonLinearControl, please see the section of "how to setup" in README.md 134 | 135 | When we design control systems, we should have Environment, Model, Planner, Controller and Runner. 136 | Therefore your script contains those Modules. 137 | 138 | First, import each Modules from PythonLinearNonlinearControl. 139 | 140 | ```py 141 | from PythonLinearNonlinearControl import configs 142 | from PythonLinearNonlinearControl import envs 143 | from PythonLinearNonlinearControl import models 144 | from PythonLinearNonlinearControl import planners 145 | from PythonLinearNonlinearControl import controllers 146 | from PythonLinearNonlinearControl import runners 147 | ``` 148 | 149 | Configs contains each modules configurations such as cost functions, prediction length, ...etc. 150 | 151 | Then you can make each module. (This is example about CEM and CartPole env) 152 | 153 | ```py 154 | config = configs.CartPoleConfigModule() 155 | env = envs.CartPoleEnv() 156 | model = models.CartPoleModel(config) 157 | controller = controllers.CEM(config, model) 158 | planner = planners.ConstantPlanner(config) 159 | runner = runners.ExpRunner() 160 | ``` 161 | 162 | The preparation for experiment has done! 163 | Please run the runner. 164 | 165 | ```py 166 | history_x, history_u, history_g = runner.run(env, controller, planner) 167 | ``` 168 | 169 | You can get the results of history of state, history of input and history of goal. 170 | Use that histories to visualize the Animation or Figures. 171 | (Note FirstOrderEnv does not support animation) 172 | 173 | ```py 174 | # plot results 175 | plot_results(history_x, history_u, history_g=history_g) 176 | save_plot_data(history_x, history_u, history_g=history_g) 177 | 178 | # create animation 179 | animator = Animator(env) 180 | animator.draw(history_x, history_g) 181 | ``` 182 | **It should be noted that the controller parameters like Q, R and Sf strongly affect the performence of the controller. 183 | Please, check these parameters before you run the simulation.** 184 | 185 | ## Run Example Script 186 | 187 | You can run the example script as follows: 188 | 189 | ``` 190 | python scripts/simple_run.py --env CartPole --controller CEM --save_anim 1 191 | ``` 192 | 193 | **figures and animations are saved in the ./result folder.** 194 | 195 | # Old version 196 | 197 | If you are interested in the old version of this library, that was not a library just examples, please see [v1.0](https://github.com/Shunichi09/PythonLinearNonlinearControl/tree/v1.0) 198 | 199 | # Documents 200 | 201 | Coming soon !! 202 | 203 | # Requirements 204 | 205 | - numpy 206 | - scipy 207 | - matplotlib (for figures and animations) 208 | - ffmpeg (for animations) 209 | 210 | # License 211 | 212 | PythonLinearNonlinearControl is licensed under the MIT License. However, some scripts are available under different license terms. See below. 213 | 214 | The following parts are licensed under GPL3+: 215 | 216 | - [ilqr](PythonLinearNonlinearControl/controllers/ilqr.py) 217 | - [ddp](PythonLinearNonlinearControl/controllers/ddp.py) 218 | 219 | See the [licenses folder](./licenses) for the full text of the licenses. 220 | 221 | # Citation 222 | 223 | ``` 224 | @Misc{PythonLinearNonLinearControl, 225 | author = {Shunichi Sekiguchi}, 226 | title = {PythonLinearNonlinearControl}, 227 | note = "\url{https://github.com/Shunichi09/PythonLinearNonlinearControl}", 228 | } 229 | ``` 230 | -------------------------------------------------------------------------------- /PythonLinearNonlinearControl/models/model.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class Model(): 5 | """ base class of model 6 | """ 7 | 8 | def __init__(self): 9 | """ 10 | """ 11 | pass 12 | 13 | def predict_traj(self, curr_x, us): 14 | """ predict trajectories 15 | 16 | Args: 17 | curr_x (numpy.ndarray): current state, shape(state_size, ) 18 | us (numpy.ndarray): inputs, 19 | shape(pred_len, input_size) 20 | or shape(pop_size, pred_len, input_size) 21 | Returns: 22 | pred_xs (numpy.ndarray): predicted state, 23 | shape(pred_len+1, state_size) including current state 24 | or shape(pop_size, pred_len+1, state_size) 25 | """ 26 | if len(us.shape) == 3: 27 | pred_xs = self._predict_traj_alltogether(curr_x, us) 28 | elif len(us.shape) == 2: 29 | pred_xs = self._predict_traj(curr_x, us) 30 | else: 31 | raise ValueError("Invalid us") 32 | 33 | return pred_xs 34 | 35 | def _predict_traj(self, curr_x, us): 36 | """ predict trajectories 37 | 38 | Args: 39 | curr_x (numpy.ndarray): current state, shape(state_size, ) 40 | us (numpy.ndarray): inputs, shape(pred_len, input_size) 41 | Returns: 42 | pred_xs (numpy.ndarray): predicted state, 43 | shape(pred_len+1, state_size) including current state 44 | """ 45 | # get size 46 | pred_len = us.shape[0] 47 | # initialze 48 | x = curr_x 49 | pred_xs = curr_x[np.newaxis, :] 50 | 51 | for t in range(pred_len): 52 | next_x = self.predict_next_state(x, us[t]) 53 | # update 54 | pred_xs = np.concatenate((pred_xs, next_x[np.newaxis, :]), axis=0) 55 | x = next_x 56 | 57 | return pred_xs 58 | 59 | def _predict_traj_alltogether(self, curr_x, us): 60 | """ predict trajectories for all samples 61 | 62 | Args: 63 | curr_x (numpy.ndarray): current state, shape(pop_size, state_size) 64 | us (numpy.ndarray): inputs, shape(pop_size, pred_len, input_size) 65 | Returns: 66 | pred_xs (numpy.ndarray): predicted state, 67 | shape(pop_size, pred_len+1, state_size) including current state 68 | """ 69 | # get size 70 | (pop_size, pred_len, _) = us.shape 71 | us = np.transpose(us, (1, 0, 2)) # to (pred_len, pop_size, input_size) 72 | # initialze 73 | x = np.tile(curr_x, (pop_size, 1)) 74 | pred_xs = x[np.newaxis, :, :] # (1, pop_size, state_size) 75 | 76 | for t in range(pred_len): 77 | # next_x.shape = (pop_size, state_size) 78 | next_x = self.predict_next_state(x, us[t]) 79 | # update 80 | pred_xs = np.concatenate((pred_xs, next_x[np.newaxis, :, :]), 81 | axis=0) 82 | x = next_x 83 | 84 | return np.transpose(pred_xs, (1, 0, 2)) 85 | 86 | def predict_next_state(self, curr_x, u): 87 | """ predict next state 88 | """ 89 | raise NotImplementedError("Implement the model") 90 | 91 | def x_dot(self, curr_x, u): 92 | """ compute x dot 93 | """ 94 | raise NotImplementedError("Implement the model") 95 | 96 | def predict_adjoint_traj(self, xs, us, g_xs): 97 | """ 98 | Args: 99 | xs (numpy.ndarray): states trajectory, shape(pred_len+1, state_size) 100 | us (numpy.ndarray): inputs, shape(pred_len, input_size) 101 | g_xs (numpy.ndarray): goal states, shape(pred_len+1, state_size) 102 | Returns: 103 | lams (numpy.ndarray): adjoint state, shape(pred_len, state_size), 104 | adjoint size is the same as state_size 105 | Notes: 106 | Adjoint trajectory be computed by backward path. 107 | Usually, we should -\dot{lam} but in backward path case, we can use \dot{lam} directry 108 | """ 109 | # get size 110 | (pred_len, input_size) = us.shape 111 | # pred final adjoint state 112 | lam = self.predict_terminal_adjoint_state(xs[-1], 113 | terminal_g_x=g_xs[-1]) 114 | lams = lam[np.newaxis, :] 115 | 116 | for t in range(pred_len-1, 0, -1): 117 | prev_lam = \ 118 | self.predict_adjoint_state(lam, xs[t], us[t], 119 | g_x=g_xs[t]) 120 | # update 121 | lams = np.concatenate((prev_lam[np.newaxis, :], lams), axis=0) 122 | lam = prev_lam 123 | 124 | return lams 125 | 126 | def predict_adjoint_state(self, lam, x, u, g_x=None, t=None): 127 | """ predict adjoint states 128 | 129 | Args: 130 | lam (numpy.ndarray): adjoint state, shape(state_size, ) 131 | x (numpy.ndarray): state, shape(state_size, ) 132 | u (numpy.ndarray): input, shape(input_size, ) 133 | g_x (numpy.ndarray): goal state, shape(state_size, ) 134 | Returns: 135 | prev_lam (numpy.ndarrya): previous adjoint state, 136 | shape(state_size, ) 137 | """ 138 | raise NotImplementedError("Implement the adjoint model") 139 | 140 | def predict_terminal_adjoint_state(self, terminal_x, terminal_g_x=None): 141 | """ predict terminal adjoint state 142 | 143 | Args: 144 | terminal_x (numpy.ndarray): terminal state, shape(state_size, ) 145 | terminal_g_x (numpy.ndarray): terminal goal state, 146 | shape(state_size, ) 147 | Returns: 148 | terminal_lam (numpy.ndarray): terminal adjoint state, 149 | shape(state_size, ) 150 | """ 151 | raise NotImplementedError("Implement terminal adjoint state") 152 | 153 | @staticmethod 154 | def calc_f_x(xs, us, dt): 155 | """ gradient of model with respect to the state in batch form 156 | """ 157 | raise NotImplementedError("Implement gradient of model \ 158 | with respect to the state") 159 | 160 | @staticmethod 161 | def calc_f_u(xs, us, dt): 162 | """ gradient of model with respect to the input in batch form 163 | """ 164 | raise NotImplementedError("Implement gradient of model \ 165 | with respect to the input") 166 | 167 | @staticmethod 168 | def calc_f_xx(xs, us, dt): 169 | """ hessian of model with respect to the state in batch form 170 | """ 171 | raise NotImplementedError("Implement hessian of model \ 172 | with respect to the state") 173 | 174 | @staticmethod 175 | def calc_f_ux(xs, us, dt): 176 | """ hessian of model with respect to the input in batch form 177 | """ 178 | raise NotImplementedError("Implement hessian of model \ 179 | with respect to the input and state") 180 | 181 | @staticmethod 182 | def calc_f_uu(xs, us, dt): 183 | """ hessian of model with respect to the state in batch form 184 | """ 185 | raise NotImplementedError("Implement hessian of model \ 186 | with respect to the input") 187 | 188 | 189 | class LinearModel(Model): 190 | """ discrete linear model, x[k+1] = Ax[k] + Bu[k] 191 | 192 | Attributes: 193 | A (numpy.ndarray): shape(state_size, state_size) 194 | B (numpy.ndarray): shape(state_size, input_size) 195 | """ 196 | 197 | def __init__(self, A, B): 198 | """ 199 | """ 200 | super(LinearModel, self).__init__() 201 | self.A = A 202 | self.B = B 203 | 204 | def predict_next_state(self, curr_x, u): 205 | """ predict next state 206 | 207 | Args: 208 | curr_x (numpy.ndarray): current state, shape(state_size, ) or 209 | shape(pop_size, state_size) 210 | u (numpy.ndarray): input, shape(input_size, ) or 211 | shape(pop_size, input_size) 212 | Returns: 213 | next_x (numpy.ndarray): next state, shape(state_size, ) or 214 | shape(pop_size, state_size) 215 | """ 216 | if len(u.shape) == 1: 217 | next_x = np.matmul(self.A, curr_x[:, np.newaxis]) \ 218 | + np.matmul(self.B, u[:, np.newaxis]) 219 | 220 | return next_x.flatten() 221 | 222 | elif len(u.shape) == 2: 223 | next_x = np.matmul(curr_x, self.A.T) + np.matmul(u, self.B.T) 224 | 225 | return next_x 226 | 227 | def calc_f_x(self, xs, us, dt): 228 | """ gradient of model with respect to the state in batch form 229 | 230 | Args: 231 | xs (numpy.ndarray): state, shape(pred_len+1, state_size) 232 | us (numpy.ndarray): input, shape(pred_len, input_size,) 233 | Return: 234 | f_x (numpy.ndarray): gradient of model with respect to x, 235 | shape(pred_len, state_size, state_size) 236 | Notes: 237 | This should be discrete form !! 238 | """ 239 | # get size 240 | (pred_len, _) = us.shape 241 | 242 | return np.tile(self.A, (pred_len, 1, 1)) 243 | 244 | def calc_f_u(self, xs, us, dt): 245 | """ gradient of model with respect to the input in batch form 246 | 247 | Args: 248 | xs (numpy.ndarray): state, shape(pred_len+1, state_size) 249 | us (numpy.ndarray): input, shape(pred_len, input_size,) 250 | Return: 251 | f_u (numpy.ndarray): gradient of model with respect to x, 252 | shape(pred_len, state_size, input_size) 253 | Notes: 254 | This should be discrete form !! 255 | """ 256 | # get size 257 | (pred_len, input_size) = us.shape 258 | 259 | return np.tile(self.B, (pred_len, 1, 1)) 260 | 261 | @staticmethod 262 | def calc_f_xx(xs, us, dt): 263 | """ hessian of model with respect to the state in batch form 264 | 265 | Args: 266 | xs (numpy.ndarray): state, shape(pred_len+1, state_size) 267 | us (numpy.ndarray): input, shape(pred_len, input_size,) 268 | Return: 269 | f_xx (numpy.ndarray): gradient of model with respect to x, 270 | shape(pred_len, state_size, state_size, state_size) 271 | """ 272 | # get size 273 | (_, state_size) = xs.shape 274 | (pred_len, _) = us.shape 275 | 276 | f_xx = np.zeros((pred_len, state_size, state_size, state_size)) 277 | 278 | return f_xx 279 | 280 | @staticmethod 281 | def calc_f_ux(xs, us, dt): 282 | """ hessian of model with respect to state and input in batch form 283 | 284 | Args: 285 | xs (numpy.ndarray): state, shape(pred_len+1, state_size) 286 | us (numpy.ndarray): input, shape(pred_len, input_size,) 287 | Return: 288 | f_ux (numpy.ndarray): gradient of model with respect to x, 289 | shape(pred_len, state_size, input_size, state_size) 290 | """ 291 | # get size 292 | (_, state_size) = xs.shape 293 | (pred_len, input_size) = us.shape 294 | 295 | f_ux = np.zeros((pred_len, state_size, input_size, state_size)) 296 | 297 | return f_ux 298 | 299 | @staticmethod 300 | def calc_f_uu(xs, us, dt): 301 | """ hessian of model with respect to input in batch form 302 | 303 | Args: 304 | xs (numpy.ndarray): state, shape(pred_len+1, state_size) 305 | us (numpy.ndarray): input, shape(pred_len, input_size,) 306 | Return: 307 | f_uu (numpy.ndarray): gradient of model with respect to x, 308 | shape(pred_len, state_size, input_size, input_size) 309 | """ 310 | # get size 311 | (_, state_size) = xs.shape 312 | (pred_len, input_size) = us.shape 313 | 314 | f_uu = np.zeros((pred_len, state_size, input_size, input_size)) 315 | 316 | return f_uu 317 | -------------------------------------------------------------------------------- /PythonLinearNonlinearControl/configs/nonlinear_sample_system.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class NonlinearSampleSystemConfigModule(): 5 | # parameters 6 | ENV_NAME = "NonlinearSampleSystem-v0" 7 | PLANNER_TYPE = "Const" 8 | TYPE = "Nonlinear" 9 | TASK_HORIZON = 2000 10 | PRED_LEN = 10 11 | STATE_SIZE = 2 12 | INPUT_SIZE = 1 13 | DT = 0.01 14 | R = np.diag([1.]) 15 | Q = None 16 | Sf = None 17 | # bounds 18 | INPUT_LOWER_BOUND = np.array([-0.5]) 19 | INPUT_UPPER_BOUND = np.array([0.5]) 20 | 21 | def __init__(self): 22 | """ 23 | """ 24 | # opt configs 25 | self.opt_config = { 26 | "Random": { 27 | "popsize": 5000 28 | }, 29 | "CEM": { 30 | "popsize": 500, 31 | "num_elites": 50, 32 | "max_iters": 15, 33 | "alpha": 0.3, 34 | "init_var": 9., 35 | "threshold": 0.001 36 | }, 37 | "MPPI": { 38 | "beta": 0.6, 39 | "popsize": 5000, 40 | "kappa": 0.9, 41 | "noise_sigma": 0.5, 42 | }, 43 | "MPPIWilliams": { 44 | "popsize": 5000, 45 | "lambda": 1., 46 | "noise_sigma": 0.9, 47 | }, 48 | "iLQR": { 49 | "max_iters": 500, 50 | "init_mu": 1., 51 | "mu_min": 1e-6, 52 | "mu_max": 1e10, 53 | "init_delta": 2., 54 | "threshold": 1e-6, 55 | }, 56 | "DDP": { 57 | "max_iters": 500, 58 | "init_mu": 1., 59 | "mu_min": 1e-6, 60 | "mu_max": 1e10, 61 | "init_delta": 2., 62 | "threshold": 1e-6, 63 | }, 64 | "NMPC": { 65 | "threshold": 0.01, 66 | "max_iters": 5000, 67 | "learning_rate": 0.01, 68 | "optimizer_mode": "conjugate" 69 | } 70 | } 71 | 72 | @staticmethod 73 | def input_cost_fn(u): 74 | """ input cost functions 75 | 76 | Args: 77 | u (numpy.ndarray): input, shape(pred_len, input_size) 78 | or shape(pop_size, pred_len, input_size) 79 | Returns: 80 | cost (numpy.ndarray): cost of input, shape(pred_len, input_size) or 81 | shape(pop_size, pred_len, input_size) 82 | """ 83 | return (u**2) * np.diag(NonlinearSampleSystemConfigModule.R) 84 | 85 | @staticmethod 86 | def state_cost_fn(x, g_x): 87 | """ state cost function 88 | 89 | Args: 90 | x (numpy.ndarray): state, shape(pred_len, state_size) 91 | or shape(pop_size, pred_len, state_size) 92 | g_x (numpy.ndarray): goal state, shape(pred_len, state_size) 93 | or shape(pop_size, pred_len, state_size) 94 | Returns: 95 | cost (numpy.ndarray): cost of state, shape(pred_len, 1) or 96 | shape(pop_size, pred_len, 1) 97 | """ 98 | 99 | if len(x.shape) > 2: 100 | return (0.5 * (x[:, :, 0]**2) + 101 | 0.5 * (x[:, :, 1]**2))[:, :, np.newaxis] 102 | 103 | elif len(x.shape) > 1: 104 | return (0.5 * (x[:, 0]**2) + 0.5 * (x[:, 1]**2))[:, np.newaxis] 105 | 106 | return 0.5 * (x[0]**2) + 0.5 * (x[1]**2) 107 | 108 | @staticmethod 109 | def terminal_state_cost_fn(terminal_x, terminal_g_x): 110 | """ 111 | 112 | Args: 113 | terminal_x (numpy.ndarray): terminal state, 114 | shape(state_size, ) or shape(pop_size, state_size) 115 | terminal_g_x (numpy.ndarray): terminal goal state, 116 | shape(state_size, ) or shape(pop_size, state_size) 117 | Returns: 118 | cost (numpy.ndarray): cost of state, shape(pred_len, ) or 119 | shape(pop_size, pred_len) 120 | """ 121 | 122 | if len(terminal_x.shape) > 1: 123 | return (0.5 * (terminal_x[:, 0]**2) + 124 | 0.5 * (terminal_x[:, 1]**2))[:, np.newaxis] 125 | 126 | return 0.5 * (terminal_x[0]**2) + 0.5 * (terminal_x[1]**2) 127 | 128 | @staticmethod 129 | def gradient_cost_fn_state(x, g_x, terminal=False): 130 | """ gradient of costs with respect to the state 131 | 132 | Args: 133 | x (numpy.ndarray): state, shape(pred_len, state_size) 134 | g_x (numpy.ndarray): goal state, shape(pred_len, state_size) 135 | 136 | Returns: 137 | l_x (numpy.ndarray): gradient of cost, shape(pred_len, state_size) 138 | or shape(1, state_size) 139 | """ 140 | if not terminal: 141 | cost_dx0 = x[:, 0] 142 | cost_dx1 = x[:, 1] 143 | cost_dx = np.stack((cost_dx0, cost_dx1), axis=1) 144 | return cost_dx 145 | 146 | cost_dx0 = x[0] 147 | cost_dx1 = x[1] 148 | cost_dx = np.array([[cost_dx0, cost_dx1]]) 149 | 150 | return cost_dx 151 | 152 | @staticmethod 153 | def gradient_cost_fn_input(x, u): 154 | """ gradient of costs with respect to the input 155 | 156 | Args: 157 | x (numpy.ndarray): state, shape(pred_len, state_size) 158 | u (numpy.ndarray): goal state, shape(pred_len, input_size) 159 | Returns: 160 | l_u (numpy.ndarray): gradient of cost, shape(pred_len, input_size) 161 | """ 162 | return 2. * u * np.diag(NonlinearSampleSystemConfigModule.R) 163 | 164 | @staticmethod 165 | def hessian_cost_fn_state(x, g_x, terminal=False): 166 | """ hessian costs with respect to the state 167 | 168 | Args: 169 | x (numpy.ndarray): state, shape(pred_len, state_size) 170 | g_x (numpy.ndarray): goal state, shape(pred_len, state_size) 171 | Returns: 172 | l_xx (numpy.ndarray): gradient of cost, 173 | shape(pred_len, state_size, state_size) or 174 | shape(1, state_size, state_size) or 175 | """ 176 | if not terminal: 177 | (pred_len, state_size) = x.shape 178 | hessian = np.eye(state_size) 179 | hessian = np.tile(hessian, (pred_len, 1, 1)) 180 | hessian[:, 0, 0] = 1. 181 | hessian[:, 1, 1] = 1. 182 | 183 | return hessian 184 | 185 | state_size = len(x) 186 | hessian = np.eye(state_size) 187 | hessian[0, 0] = 1. 188 | hessian[1, 1] = 1. 189 | 190 | return hessian[np.newaxis, :, :] 191 | 192 | @staticmethod 193 | def hessian_cost_fn_input(x, u): 194 | """ hessian costs with respect to the input 195 | 196 | Args: 197 | x (numpy.ndarray): state, shape(pred_len, state_size) 198 | u (numpy.ndarray): goal state, shape(pred_len, input_size) 199 | Returns: 200 | l_uu (numpy.ndarray): gradient of cost, 201 | shape(pred_len, input_size, input_size) 202 | """ 203 | (pred_len, _) = u.shape 204 | 205 | return np.tile(NonlinearSampleSystemConfigModule.R, (pred_len, 1, 1)) 206 | 207 | @staticmethod 208 | def hessian_cost_fn_input_state(x, u): 209 | """ hessian costs with respect to the state and input 210 | 211 | Args: 212 | x (numpy.ndarray): state, shape(pred_len, state_size) 213 | u (numpy.ndarray): goal state, shape(pred_len, input_size) 214 | Returns: 215 | l_ux (numpy.ndarray): gradient of cost , 216 | shape(pred_len, input_size, state_size) 217 | """ 218 | (_, state_size) = x.shape 219 | (pred_len, input_size) = u.shape 220 | 221 | return np.zeros((pred_len, input_size, state_size)) 222 | 223 | @staticmethod 224 | def gradient_hamiltonian_input(x, lam, u, g_x): 225 | """ 226 | 227 | Args: 228 | x (numpy.ndarray): shape(pred_len+1, state_size) 229 | lam (numpy.ndarray): shape(pred_len, state_size) 230 | u (numpy.ndarray): shape(pred_len, input_size) 231 | g_xs (numpy.ndarray): shape(pred_len, state_size) 232 | 233 | Returns: 234 | F (numpy.ndarray), shape(pred_len, input_size) 235 | """ 236 | if len(x.shape) == 1: 237 | input_size = u.shape[0] 238 | F = np.zeros(input_size) 239 | F[0] = u[0] + lam[1] 240 | 241 | return F 242 | 243 | elif len(x.shape) == 2: 244 | pred_len, input_size = u.shape 245 | F = np.zeros((pred_len, input_size)) 246 | 247 | for i in range(pred_len): 248 | F[i, 0] = u[i, 0] + lam[i, 1] 249 | 250 | return F 251 | 252 | else: 253 | raise NotImplementedError 254 | 255 | @staticmethod 256 | def gradient_hamiltonian_state(x, lam, u, g_x): 257 | """ 258 | Args: 259 | x (numpy.ndarray): shape(pred_len+1, state_size) 260 | lam (numpy.ndarray): shape(pred_len, state_size) 261 | u (numpy.ndarray): shape(pred_len, input_size) 262 | g_xs (numpy.ndarray): shape(pred_len, state_size) 263 | 264 | Returns: 265 | lam_dot (numpy.ndarray), shape(state_size, ) 266 | """ 267 | if len(lam.shape) == 1: 268 | state_size = lam.shape[0] 269 | lam_dot = np.zeros(state_size) 270 | lam_dot[0] = x[0] - (2. * x[0] * x[1] + 1.) * lam[1] 271 | lam_dot[1] = x[1] + lam[0] + \ 272 | (-3. * (x[1]**2) - x[0]**2 + 1.) * lam[1] 273 | 274 | return lam_dot 275 | 276 | elif len(lam.shape) == 2: 277 | pred_len, state_size = lam.shape 278 | lam_dot = np.zeros((pred_len, state_size)) 279 | 280 | for i in range(pred_len): 281 | lam_dot[i, 0] = x[i, 0] - \ 282 | (2. * x[i, 0] * x[i, 1] + 1.) * lam[i, 1] 283 | lam_dot[i, 1] = x[i, 1] + lam[i, 0] + \ 284 | (-3. * (x[i, 1]**2) - x[i, 0]**2 + 1.) * lam[i, 1] 285 | 286 | return lam_dot 287 | 288 | else: 289 | raise NotImplementedError 290 | 291 | 292 | class NonlinearSampleSystemExtendConfigModule(NonlinearSampleSystemConfigModule): 293 | def __init__(self): 294 | super().__init__() 295 | self.opt_config = { 296 | "NMPCCGMRES": { 297 | "threshold": 1e-3, 298 | "zeta": 100., 299 | "delta": 0.01, 300 | "alpha": 0.5, 301 | "tf": 1., 302 | "constraint": True 303 | }, 304 | "NMPCNewton": { 305 | "threshold": 1e-3, 306 | "max_iteration": 500, 307 | "learning_rate": 1e-3 308 | } 309 | } 310 | 311 | @staticmethod 312 | def gradient_hamiltonian_input_with_constraint(x, lam, u, g_x, dummy_u, raw): 313 | """ 314 | 315 | Args: 316 | x (numpy.ndarray): shape(pred_len+1, state_size) 317 | lam (numpy.ndarray): shape(pred_len, state_size) 318 | u (numpy.ndarray): shape(pred_len, input_size) 319 | g_xs (numpy.ndarray): shape(pred_len, state_size) 320 | dummy_u (numpy.ndarray): shape(pred_len, input_size) 321 | raw (numpy.ndarray): shape(pred_len, input_size), Lagrangian for constraints 322 | 323 | Returns: 324 | F (numpy.ndarray), shape(pred_len, 3) 325 | """ 326 | if len(x.shape) == 1: 327 | vanilla_F = np.zeros(1) 328 | extend_F = np.zeros(1) # 1 is the same as input size 329 | extend_C = np.zeros(1) 330 | 331 | vanilla_F[0] = u[0] + lam[1] + 2. * raw[0] * u[0] 332 | extend_F[0] = -0.01 + 2. * raw[0] * dummy_u[0] 333 | extend_C[0] = u[0]**2 + dummy_u[0]**2 - \ 334 | NonlinearSampleSystemConfigModule.INPUT_LOWER_BOUND**2 335 | 336 | F = np.concatenate([vanilla_F, extend_F, extend_C]) 337 | 338 | elif len(x.shape) == 2: 339 | pred_len, _ = u.shape 340 | vanilla_F = np.zeros((pred_len, 1)) 341 | extend_F = np.zeros((pred_len, 1)) # 1 is the same as input size 342 | extend_C = np.zeros((pred_len, 1)) 343 | 344 | for i in range(pred_len): 345 | vanilla_F[i, 0] = \ 346 | u[i, 0] + lam[i, 1] + 2. * raw[i, 0] * u[i, 0] 347 | extend_F[i, 0] = -0.01 + 2. * raw[i, 0] * dummy_u[i, 0] 348 | extend_C[i, 0] = u[i, 0]**2 + dummy_u[i, 0]**2 - \ 349 | NonlinearSampleSystemConfigModule.INPUT_LOWER_BOUND**2 350 | 351 | F = np.concatenate([vanilla_F, extend_F, extend_C], axis=1) 352 | 353 | return F 354 | --------------------------------------------------------------------------------