├── tests
├── __init__.py
├── env
│ ├── __init__.py
│ ├── test_first_orger_lag.py
│ ├── test_two_wheeled.py
│ └── test_cartpole.py
├── configs
│ ├── __init__.py
│ ├── test_first_order_lag.py
│ ├── test_two_wheeled.py
│ └── test_cartpole.py
└── models
│ ├── __init__.py
│ ├── test_first_order_lag.py
│ ├── test_model.py
│ ├── test_two_wheeled.py
│ └── test_cartpole.py
├── PythonLinearNonlinearControl
├── __init__.py
├── common
│ ├── __init__.py
│ └── utils.py
├── runners
│ ├── __init__.py
│ ├── make_runners.py
│ └── runner.py
├── plotters
│ ├── __init__.py
│ ├── animator.py
│ ├── plot_objs.py
│ └── plot_func.py
├── planners
│ ├── __init__.py
│ ├── planner.py
│ ├── make_planners.py
│ ├── const_planner.py
│ └── closest_point_planner.py
├── models
│ ├── __init__.py
│ ├── make_models.py
│ ├── first_order_lag.py
│ ├── nonlinear_sample_system.py
│ ├── two_wheeled.py
│ ├── cartpole.py
│ └── model.py
├── configs
│ ├── __init__.py
│ ├── make_configs.py
│ ├── first_order_lag.py
│ ├── cartpole.py
│ └── nonlinear_sample_system.py
├── envs
│ ├── __init__.py
│ ├── make_envs.py
│ ├── env.py
│ ├── cost.py
│ ├── nonlinear_sample_system.py
│ ├── first_order_lag.py
│ └── cartpole.py
├── controllers
│ ├── __init__.py
│ ├── make_controllers.py
│ ├── controller.py
│ ├── random.py
│ ├── nmpc.py
│ ├── mppi.py
│ ├── cem.py
│ ├── mppi_williams.py
│ ├── nmpc_cgmres.py
│ └── mpc.py
└── helper.py
├── assets
├── cartpole.gif
├── cartpole.png
├── concept.png
├── twowheeled.png
├── firstorderlag.png
├── cartpole_score.png
├── quadratic_score.png
├── twowheeledconst.gif
├── twowheeledtrack.gif
├── nonlinear_sample_system.png
└── nonlinear_sample_system_score.png
├── .travis.yml
├── .github
└── workflows
│ └── python-publish.yml
├── setup.py
├── licenses
└── MIT.txt
├── scripts
├── show_result.py
├── simple_run.py
└── quickstart
│ └── quickstart.md
├── Environments.md
├── .gitignore
└── README.md
/tests/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/tests/env/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/tests/configs/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/tests/models/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/PythonLinearNonlinearControl/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/PythonLinearNonlinearControl/common/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/assets/cartpole.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shunichi09/PythonLinearNonlinearControl/HEAD/assets/cartpole.gif
--------------------------------------------------------------------------------
/assets/cartpole.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shunichi09/PythonLinearNonlinearControl/HEAD/assets/cartpole.png
--------------------------------------------------------------------------------
/assets/concept.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shunichi09/PythonLinearNonlinearControl/HEAD/assets/concept.png
--------------------------------------------------------------------------------
/assets/twowheeled.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shunichi09/PythonLinearNonlinearControl/HEAD/assets/twowheeled.png
--------------------------------------------------------------------------------
/assets/firstorderlag.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shunichi09/PythonLinearNonlinearControl/HEAD/assets/firstorderlag.png
--------------------------------------------------------------------------------
/assets/cartpole_score.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shunichi09/PythonLinearNonlinearControl/HEAD/assets/cartpole_score.png
--------------------------------------------------------------------------------
/assets/quadratic_score.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shunichi09/PythonLinearNonlinearControl/HEAD/assets/quadratic_score.png
--------------------------------------------------------------------------------
/assets/twowheeledconst.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shunichi09/PythonLinearNonlinearControl/HEAD/assets/twowheeledconst.gif
--------------------------------------------------------------------------------
/assets/twowheeledtrack.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shunichi09/PythonLinearNonlinearControl/HEAD/assets/twowheeledtrack.gif
--------------------------------------------------------------------------------
/PythonLinearNonlinearControl/runners/__init__.py:
--------------------------------------------------------------------------------
1 | from PythonLinearNonlinearControl.runners.runner \
2 | import ExpRunner # NOQA
3 |
--------------------------------------------------------------------------------
/assets/nonlinear_sample_system.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shunichi09/PythonLinearNonlinearControl/HEAD/assets/nonlinear_sample_system.png
--------------------------------------------------------------------------------
/PythonLinearNonlinearControl/runners/make_runners.py:
--------------------------------------------------------------------------------
1 | from .runner import ExpRunner
2 |
3 |
4 | def make_runner(args):
5 | return ExpRunner()
6 |
--------------------------------------------------------------------------------
/assets/nonlinear_sample_system_score.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shunichi09/PythonLinearNonlinearControl/HEAD/assets/nonlinear_sample_system_score.png
--------------------------------------------------------------------------------
/PythonLinearNonlinearControl/plotters/__init__.py:
--------------------------------------------------------------------------------
1 | from PythonLinearNonlinearControl.plotters.animator \
2 | import Animator
3 | from PythonLinearNonlinearControl.plotters.plot_func \
4 | import plot_results
--------------------------------------------------------------------------------
/PythonLinearNonlinearControl/planners/__init__.py:
--------------------------------------------------------------------------------
1 | from PythonLinearNonlinearControl.planners.const_planner \
2 | import ConstantPlanner # NOQA
3 | from PythonLinearNonlinearControl.planners.closest_point_planner \
4 | import ClosestPointPlanner # NOQA
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: python
2 |
3 | python:
4 | - 3.7
5 |
6 | install:
7 | - pip install --upgrade pip setuptools wheel
8 | - pip install coveralls
9 |
10 | script:
11 | - coverage run --source=PythonLinearNonlinearControl setup.py pytest
12 |
13 | after_success:
14 | - coveralls
--------------------------------------------------------------------------------
/PythonLinearNonlinearControl/models/__init__.py:
--------------------------------------------------------------------------------
1 | from PythonLinearNonlinearControl.models.cartpole \
2 | import CartPoleModel # NOQA
3 | from PythonLinearNonlinearControl.models.first_order_lag \
4 | import FirstOrderLagModel # NOQA
5 | from PythonLinearNonlinearControl.models.two_wheeled \
6 | import TwoWheeledModel # NOQA
--------------------------------------------------------------------------------
/PythonLinearNonlinearControl/configs/__init__.py:
--------------------------------------------------------------------------------
1 | from PythonLinearNonlinearControl.configs.cartpole \
2 | import CartPoleConfigModule # NOQA
3 | from PythonLinearNonlinearControl.configs.first_order_lag \
4 | import FirstOrderLagConfigModule # NOQA
5 | from PythonLinearNonlinearControl.configs.two_wheeled \
6 | import TwoWheeledConfigModule # NOQA
--------------------------------------------------------------------------------
/PythonLinearNonlinearControl/envs/__init__.py:
--------------------------------------------------------------------------------
1 | from PythonLinearNonlinearControl.envs.cartpole \
2 | import CartPoleEnv # NOQA
3 | from PythonLinearNonlinearControl.envs.first_order_lag \
4 | import FirstOrderLagEnv # NOQA
5 | from PythonLinearNonlinearControl.envs.two_wheeled \
6 | import TwoWheeledConstEnv # NOQA
7 | from PythonLinearNonlinearControl.envs.two_wheeled \
8 | import TwoWheeledTrackEnv # NOQA
9 |
--------------------------------------------------------------------------------
/PythonLinearNonlinearControl/planners/planner.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 |
4 | class Planner():
5 | """
6 | """
7 |
8 | def __init__(self):
9 | """
10 | """
11 | pass
12 |
13 | def plan(self, curr_x):
14 | """
15 | Args:
16 | curr_x (numpy.ndarray): current state, shape(state_size)
17 | Returns:
18 | g_xs (numpy.ndarrya): goal state, shape(pred_len, state_size)
19 | """
20 | raise NotImplementedError("Implement plan func")
21 |
--------------------------------------------------------------------------------
/PythonLinearNonlinearControl/controllers/__init__.py:
--------------------------------------------------------------------------------
1 | from PythonLinearNonlinearControl.controllers.cem \
2 | import CEM # NOQA
3 | from PythonLinearNonlinearControl.controllers.mppi \
4 | import MPPI # NOQA
5 | from PythonLinearNonlinearControl.controllers.mppi_williams \
6 | import MPPIWilliams # NOQA
7 | from PythonLinearNonlinearControl.controllers.random \
8 | import RandomShooting # NOQA
9 | from PythonLinearNonlinearControl.controllers.ilqr \
10 | import iLQR # NOQA
11 | from PythonLinearNonlinearControl.controllers.ddp \
12 | import DDP # NOQA
13 | from PythonLinearNonlinearControl.controllers.mpc \
14 | import LinearMPC # NOQA
--------------------------------------------------------------------------------
/PythonLinearNonlinearControl/planners/make_planners.py:
--------------------------------------------------------------------------------
1 | from .const_planner import ConstantPlanner
2 | from .closest_point_planner import ClosestPointPlanner
3 |
4 |
5 | def make_planner(args, config):
6 |
7 | if args.env == "FirstOrderLag":
8 | return ConstantPlanner(config)
9 | elif args.env == "TwoWheeledConst":
10 | return ConstantPlanner(config)
11 | elif args.env == "TwoWheeledTrack":
12 | return ClosestPointPlanner(config)
13 | elif args.env == "CartPole":
14 | return ConstantPlanner(config)
15 | elif args.env == "NonlinearSample":
16 | return ConstantPlanner(config)
17 |
18 | raise NotImplementedError(
19 | "There is not {} Planner".format(args.planner_type))
20 |
--------------------------------------------------------------------------------
/.github/workflows/python-publish.yml:
--------------------------------------------------------------------------------
1 | name: Upload Python Package
2 |
3 | on:
4 | release:
5 | types: [created]
6 |
7 | jobs:
8 | deploy:
9 | runs-on: ubuntu-latest
10 | steps:
11 | - uses: actions/checkout@v2
12 | - name: Set up Python 3.6
13 | uses: actions/setup-python@v2
14 | with:
15 | python-version: '3.6'
16 | - name: Install dependencies
17 | run: |
18 | python -m pip install --upgrade pip
19 | pip install setuptools wheel twine
20 | - name: Build and publish
21 | env:
22 | TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
23 | TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
24 | run: |
25 | python setup.py bdist_wheel
26 | twine upload dist/*
--------------------------------------------------------------------------------
/PythonLinearNonlinearControl/models/make_models.py:
--------------------------------------------------------------------------------
1 | from .first_order_lag import FirstOrderLagModel
2 | from .two_wheeled import TwoWheeledModel
3 | from .cartpole import CartPoleModel
4 | from .nonlinear_sample_system import NonlinearSampleSystemModel
5 |
6 |
7 | def make_model(args, config):
8 |
9 | if args.env == "FirstOrderLag":
10 | return FirstOrderLagModel(config)
11 | elif args.env == "TwoWheeledConst" or args.env == "TwoWheeledTrack":
12 | return TwoWheeledModel(config)
13 | elif args.env == "CartPole":
14 | return CartPoleModel(config)
15 | elif args.env == "NonlinearSample":
16 | return NonlinearSampleSystemModel(config)
17 |
18 | raise NotImplementedError("There is not {} Model".format(args.env))
19 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import find_packages
2 | from setuptools import setup
3 |
4 | install_requires = ['numpy', 'matplotlib', 'cvxopt', 'scipy']
5 | tests_require = ['pytest']
6 | setup_requires = ["pytest-runner"]
7 |
8 | setup(
9 | name='PythonLinearNonlinearControl',
10 | version='2.0',
11 | description='Implementing linear and nonlinear control method in python',
12 | author='Shunichi Sekiguchi',
13 | author_email='quick1st97of@gmail.com',
14 | install_requires=install_requires,
15 | url='https://github.com/Shunichi09/PythonLinearNonlinearControl',
16 | license='MIT License',
17 | packages=find_packages(exclude=('tests', 'scripts')),
18 | setup_requires=setup_requires,
19 | test_suite='tests',
20 | tests_require=tests_require
21 | )
--------------------------------------------------------------------------------
/PythonLinearNonlinearControl/envs/make_envs.py:
--------------------------------------------------------------------------------
1 | from .first_order_lag import FirstOrderLagEnv
2 | from .two_wheeled import TwoWheeledConstEnv
3 | from .two_wheeled import TwoWheeledTrackEnv
4 | from .cartpole import CartPoleEnv
5 | from .nonlinear_sample_system import NonlinearSampleSystemEnv
6 |
7 |
8 | def make_env(args):
9 |
10 | if args.env == "FirstOrderLag":
11 | return FirstOrderLagEnv()
12 | elif args.env == "TwoWheeledConst":
13 | return TwoWheeledConstEnv()
14 | elif args.env == "TwoWheeledTrack":
15 | return TwoWheeledTrackEnv()
16 | elif args.env == "CartPole":
17 | return CartPoleEnv()
18 | elif args.env == "NonlinearSample":
19 | return NonlinearSampleSystemEnv()
20 |
21 | raise NotImplementedError("There is not {} Env".format(args.env))
22 |
--------------------------------------------------------------------------------
/PythonLinearNonlinearControl/planners/const_planner.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from .planner import Planner
3 |
4 |
5 | class ConstantPlanner(Planner):
6 | """ This planner make constant goal state
7 | """
8 |
9 | def __init__(self, config):
10 | """
11 | """
12 | super(ConstantPlanner, self).__init__()
13 | self.pred_len = config.PRED_LEN
14 | self.state_size = config.STATE_SIZE
15 |
16 | def plan(self, curr_x, g_x=None):
17 | """
18 | Args:
19 | curr_x (numpy.ndarray): current state, shape(state_size)
20 | g_x (numpy.ndarray): goal state, shape(state_size),
21 | this state should be obtained from env
22 | Returns:
23 | g_xs (numpy.ndarrya): goal state, shape(pred_len, state_size)
24 | """
25 | return np.tile(g_x, (self.pred_len+1, 1))
26 |
--------------------------------------------------------------------------------
/PythonLinearNonlinearControl/configs/make_configs.py:
--------------------------------------------------------------------------------
1 | from .first_order_lag import FirstOrderLagConfigModule
2 | from .two_wheeled import TwoWheeledConfigModule, TwoWheeledExtendConfigModule
3 | from .cartpole import CartPoleConfigModule
4 | from .nonlinear_sample_system import NonlinearSampleSystemConfigModule, NonlinearSampleSystemExtendConfigModule
5 |
6 |
7 | def make_config(args):
8 | """
9 | Returns:
10 | config (ConfigModule class): configuration for the each env
11 | """
12 | if args.env == "FirstOrderLag":
13 | return FirstOrderLagConfigModule()
14 | elif args.env == "TwoWheeledConst" or args.env == "TwoWheeledTrack":
15 | if args.controller_type == "NMPCCGMRES":
16 | return TwoWheeledExtendConfigModule()
17 | return TwoWheeledConfigModule()
18 | elif args.env == "CartPole":
19 | return CartPoleConfigModule()
20 | elif args.env == "NonlinearSample":
21 | if args.controller_type == "NMPCCGMRES":
22 | return NonlinearSampleSystemExtendConfigModule()
23 | return NonlinearSampleSystemConfigModule()
24 |
--------------------------------------------------------------------------------
/licenses/MIT.txt:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2020 Shunichi Sekiguchi
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
--------------------------------------------------------------------------------
/tests/env/test_first_orger_lag.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import numpy as np
3 |
4 | from PythonLinearNonlinearControl.envs.first_order_lag import FirstOrderLagEnv
5 |
6 | class TestFirstOrderLagEnv():
7 |
8 | def test_step(self):
9 | env = FirstOrderLagEnv()
10 |
11 | curr_x = np.ones(4)
12 |
13 | env.reset(init_x=curr_x)
14 |
15 | u = np.ones(2) * 0.1
16 |
17 | next_x, _, _, _ = env.step(u)
18 |
19 | dx = np.dot(env.A, curr_x[:, np.newaxis])
20 | du = np.dot(env.B, u[:, np.newaxis])
21 |
22 | expected = (dx + du).flatten()
23 |
24 | assert next_x == pytest.approx(expected, abs=1e-5)
25 |
26 | def test_bound_step(self):
27 | env = FirstOrderLagEnv()
28 |
29 | curr_x = np.ones(4)
30 |
31 | env.reset(init_x=curr_x)
32 |
33 | u = np.ones(2) * 1e5
34 |
35 | next_x, _, _, _ = env.step(u)
36 |
37 | dx = np.dot(env.A, curr_x[:, np.newaxis])
38 | du = np.dot(env.B,
39 | np.array(env.config["input_upper_bound"])[:, np.newaxis])
40 |
41 | expected = (dx + du).flatten()
42 |
43 | assert next_x == pytest.approx(expected, abs=1e-5)
--------------------------------------------------------------------------------
/PythonLinearNonlinearControl/controllers/make_controllers.py:
--------------------------------------------------------------------------------
1 | from .mpc import LinearMPC
2 | from .cem import CEM
3 | from .random import RandomShooting
4 | from .mppi import MPPI
5 | from .mppi_williams import MPPIWilliams
6 | from .ilqr import iLQR
7 | from .ddp import DDP
8 | from .nmpc import NMPC
9 | from .nmpc_cgmres import NMPCCGMRES
10 |
11 |
12 | def make_controller(args, config, model):
13 |
14 | if args.controller_type == "MPC":
15 | return LinearMPC(config, model)
16 | elif args.controller_type == "CEM":
17 | return CEM(config, model)
18 | elif args.controller_type == "Random":
19 | return RandomShooting(config, model)
20 | elif args.controller_type == "MPPI":
21 | return MPPI(config, model)
22 | elif args.controller_type == "MPPIWilliams":
23 | return MPPIWilliams(config, model)
24 | elif args.controller_type == "iLQR":
25 | return iLQR(config, model)
26 | elif args.controller_type == "DDP":
27 | return DDP(config, model)
28 | elif args.controller_type == "NMPC":
29 | return NMPC(config, model)
30 | elif args.controller_type == "NMPCCGMRES":
31 | return NMPCCGMRES(config, model)
32 |
33 | raise ValueError("No controller: {}".format(args.controller_type))
34 |
--------------------------------------------------------------------------------
/tests/configs/test_first_order_lag.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import numpy as np
3 |
4 | from PythonLinearNonlinearControl.configs.first_order_lag \
5 | import FirstOrderLagConfigModule
6 |
7 | class TestCalcCost():
8 | def test_calc_costs(self):
9 | # make config
10 | config = FirstOrderLagConfigModule()
11 | # set
12 | pred_len = 5
13 | state_size = 4
14 | input_size = 2
15 | pop_size = 2
16 | pred_xs = np.ones((pop_size, pred_len, state_size))
17 | g_xs = np.ones((pop_size, pred_len, state_size)) * 0.5
18 | input_samples = np.ones((pop_size, pred_len, input_size)) * 0.5
19 |
20 | costs = config.input_cost_fn(input_samples)
21 | expected_costs = np.ones((pop_size, pred_len, input_size))*0.5
22 |
23 | assert costs == pytest.approx(expected_costs**2 * np.diag(config.R))
24 |
25 | costs = config.state_cost_fn(pred_xs, g_xs)
26 | expected_costs = np.ones((pop_size, pred_len, state_size))*0.5
27 |
28 | assert costs == pytest.approx(expected_costs**2 * np.diag(config.Q))
29 |
30 | costs = config.terminal_state_cost_fn(pred_xs[:, -1, :],\
31 | g_xs[:, -1, :])
32 | expected_costs = np.ones((pop_size, state_size))*0.5
33 |
34 | assert costs == pytest.approx(expected_costs**2 * np.diag(config.Sf))
--------------------------------------------------------------------------------
/PythonLinearNonlinearControl/planners/closest_point_planner.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from .planner import Planner
3 |
4 |
5 | class ClosestPointPlanner(Planner):
6 | """ This planner make goal state according to goal path
7 | """
8 |
9 | def __init__(self, config):
10 | """
11 | """
12 | super(ClosestPointPlanner, self).__init__()
13 | self.pred_len = config.PRED_LEN
14 | self.state_size = config.STATE_SIZE
15 | self.n_ahead = config.N_AHEAD
16 |
17 | def plan(self, curr_x, g_traj):
18 | """
19 | Args:
20 | curr_x (numpy.ndarray): current state, shape(state_size)
21 | g_x (numpy.ndarray): goal state, shape(state_size),
22 | this state should be obtained from env
23 | Returns:
24 | g_xs (numpy.ndarrya): goal state, shape(pred_len+1, state_size)
25 | """
26 | min_idx = np.argmin(np.linalg.norm(curr_x[:-1] - g_traj[:, :-1],
27 | axis=1))
28 |
29 | start = (min_idx+self.n_ahead)
30 | if start > len(g_traj):
31 | start = len(g_traj)
32 |
33 | end = min_idx+self.n_ahead+self.pred_len+1
34 |
35 | if (min_idx+self.n_ahead+self.pred_len+1) > len(g_traj):
36 | end = len(g_traj)
37 |
38 | if abs(start - end) != self.pred_len + 1:
39 | return np.tile(g_traj[-1], (self.pred_len+1, 1))
40 |
41 | return g_traj[start:end]
42 |
--------------------------------------------------------------------------------
/tests/models/test_first_order_lag.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import numpy as np
3 |
4 | from PythonLinearNonlinearControl.models.model \
5 | import LinearModel
6 | from PythonLinearNonlinearControl.models.first_order_lag \
7 | import FirstOrderLagModel
8 | from PythonLinearNonlinearControl.configs.first_order_lag \
9 | import FirstOrderLagConfigModule
10 |
11 | from unittest.mock import patch
12 | from unittest.mock import Mock
13 |
14 | class TestFirstOrderLagModel():
15 | """
16 | """
17 | def test_step(self):
18 | config = FirstOrderLagConfigModule()
19 | firstorderlag_model = FirstOrderLagModel(config)
20 |
21 | curr_x = np.ones(config.STATE_SIZE)
22 | u = np.ones((1, config.INPUT_SIZE))
23 |
24 | with patch.object(LinearModel, "predict_traj") as mock_predict_traj:
25 | firstorderlag_model.predict_traj(curr_x, u)
26 |
27 | mock_predict_traj.assert_called_once_with(curr_x, u)
28 |
29 | def test_predict_traj(self):
30 |
31 | config = FirstOrderLagConfigModule()
32 | firstorderlag_model = FirstOrderLagModel(config)
33 |
34 | curr_x = np.ones(config.STATE_SIZE)
35 | curr_x[-1] = np.pi / 6.
36 | u = np.ones((1, config.INPUT_SIZE))
37 |
38 | pred_xs = firstorderlag_model.predict_traj(curr_x, u)
39 |
40 | u = np.tile(u, (1, 1, 1))
41 | pred_xs_alltogether = firstorderlag_model.predict_traj(curr_x, u)[0]
42 |
43 | assert pred_xs_alltogether == pytest.approx(pred_xs)
--------------------------------------------------------------------------------
/tests/env/test_two_wheeled.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import numpy as np
3 |
4 | from PythonLinearNonlinearControl.envs.two_wheeled import TwoWheeledConstEnv
5 |
6 | class TestTwoWheeledEnv():
7 | """
8 | """
9 | def test_step(self):
10 | env = TwoWheeledConstEnv()
11 |
12 | curr_x = np.ones(3)
13 | curr_x[-1] = np.pi / 6.
14 |
15 | env.reset(init_x=curr_x)
16 |
17 | u = np.ones(2)
18 |
19 | next_x, _, _, _ = env.step(u)
20 |
21 | pos_x = np.cos(curr_x[-1]) * u[0] * env.config["dt"] + curr_x[0]
22 | pos_y = np.sin(curr_x[-1]) * u[0] * env.config["dt"] + curr_x[1]
23 |
24 | expected = np.array([pos_x, pos_y,\
25 | curr_x[-1] + u[1] * env.config["dt"]])
26 |
27 | assert next_x == pytest.approx(expected)
28 |
29 | def test_bound_step(self):
30 | env = TwoWheeledConstEnv()
31 |
32 | curr_x = np.ones(3)
33 | curr_x[-1] = np.pi / 6.
34 |
35 | env.reset(init_x=curr_x)
36 |
37 | u = np.ones(2) * 1e3
38 |
39 | next_x, _, _, _ = env.step(u)
40 |
41 | pos_x = np.cos(curr_x[-1]) * env.config["input_upper_bound"][0] \
42 | * env.config["dt"] + curr_x[0]
43 | pos_y = np.sin(curr_x[-1]) * env.config["input_upper_bound"][0] \
44 | * env.config["dt"] + curr_x[1]
45 |
46 | expected = np.array([pos_x, pos_y,\
47 | curr_x[-1] + env.config["input_upper_bound"][1] \
48 | * env.config["dt"]])
49 |
50 | assert next_x == pytest.approx(expected)
--------------------------------------------------------------------------------
/PythonLinearNonlinearControl/runners/runner.py:
--------------------------------------------------------------------------------
1 | from logging import getLogger
2 |
3 | import numpy as np
4 |
5 | logger = getLogger(__name__)
6 |
7 |
8 | class ExpRunner():
9 | """ experiment runner
10 | """
11 |
12 | def __init__(self):
13 | """
14 | """
15 | pass
16 |
17 | def run(self, env, controller, planner):
18 | """
19 | Returns:
20 | history_x (numpy.ndarray): history of the state,
21 | shape(episode length, state_size)
22 | history_u (numpy.ndarray): history of the state,
23 | shape(episode length, input_size)
24 | """
25 | done = False
26 | curr_x, info = env.reset()
27 | history_x, history_u, history_g = [], [], []
28 | step_count = 0
29 | score = 0.
30 |
31 | while not done:
32 | logger.debug("Step = {}".format(step_count))
33 | # plan
34 | g_xs = planner.plan(curr_x, info["goal_state"])
35 |
36 | # obtain sol
37 | u = controller.obtain_sol(curr_x, g_xs)
38 |
39 | # step
40 | next_x, cost, done, info = env.step(u)
41 |
42 | # save
43 | history_u.append(u)
44 | history_x.append(curr_x)
45 | history_g.append(g_xs[0])
46 | # update
47 | curr_x = next_x
48 | score += cost
49 | step_count += 1
50 |
51 | logger.debug("Controller type = {}, Score = {}"
52 | .format(controller, score))
53 | return np.array(history_x), np.array(history_u), np.array(history_g)
54 |
--------------------------------------------------------------------------------
/PythonLinearNonlinearControl/envs/env.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 |
4 | class Env():
5 | """ Environments class
6 | Attributes:
7 |
8 | curr_x (numpy.ndarray): current state
9 | history_x (list[numpy.ndarray]): historty of state, shape(step_count*state_size)
10 | step_count (int): step count
11 | """
12 |
13 | def __init__(self, config):
14 | """
15 | """
16 | self.config = config
17 | self.curr_x = None
18 | self.goal_state = None
19 | self.history_x = []
20 | self.history_g_x = []
21 | self.step_count = None
22 |
23 | def reset(self, init_x=None):
24 | """ reset state
25 | Returns:
26 | init_x (numpy.ndarray): initial state, shape(state_size, )
27 | info (dict): information
28 | """
29 | self.step_count = 0
30 |
31 | self.curr_x = np.zeros(self.config["state_size"])
32 |
33 | if init_x is not None:
34 | self.curr_x = init_x
35 |
36 | # clear memory
37 | self.history_x = []
38 | self.history_g_x = []
39 |
40 | return self.curr_x, {}
41 |
42 | def step(self, u):
43 | """
44 | Args:
45 | u (numpy.ndarray) : input, shape(input_size, )
46 | Returns:
47 | next_x (numpy.ndarray): next state, shape(state_size, )
48 | cost (float): costs
49 | done (bool): end the simulation or not
50 | info (dict): information
51 | """
52 | raise NotImplementedError("Implement step function")
53 |
54 | def __repr__(self):
55 | """
56 | """
57 | return self.config
58 |
--------------------------------------------------------------------------------
/tests/models/test_model.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import numpy as np
3 |
4 | from PythonLinearNonlinearControl.models.model import LinearModel
5 |
6 | class TestLinearModel():
7 | """
8 | """
9 | def test_predict(self):
10 |
11 | A = np.array([[1., 0.1],
12 | [0.1, 1.5]])
13 | B = np.array([[0.2], [0.5]])
14 | curr_x = np.ones(2) * 0.5
15 | u = np.ones((1, 1))
16 |
17 | linear_model = LinearModel(A, B)
18 | pred_xs = linear_model.predict_traj(curr_x, u)
19 |
20 | assert pred_xs == pytest.approx(np.array([[0.5, 0.5], [0.75, 1.3]]))
21 |
22 | def test_alltogether(self):
23 |
24 | A = np.array([[1., 0.1],
25 | [0.1, 1.5]])
26 | B = np.array([[0.2], [0.5]])
27 | curr_x = np.ones(2) * 0.5
28 | u = np.ones((1, 1))
29 |
30 | linear_model = LinearModel(A, B)
31 | pred_xs = linear_model.predict_traj(curr_x, u)
32 |
33 | u = np.tile(u, (1, 1, 1))
34 | pred_xs_alltogether = linear_model.predict_traj(curr_x, u)[0]
35 |
36 | assert pred_xs_alltogether == pytest.approx(pred_xs)
37 |
38 | def test_alltogether_val(self):
39 |
40 | A = np.array([[1., 0.1],
41 | [0.1, 1.5]])
42 | B = np.array([[0.2], [0.5]])
43 | curr_x = np.ones(2) * 0.5
44 | u = np.stack((np.ones((1, 1)), np.ones((1, 1))*0.5), axis=0)
45 |
46 | linear_model = LinearModel(A, B)
47 |
48 | pred_xs_alltogether = linear_model.predict_traj(curr_x, u)
49 |
50 | expected_val = np.array([[[0.5, 0.5], [0.75, 1.3]],
51 | [[0.5, 0.5], [0.65, 1.05]]])
52 |
53 | assert pred_xs_alltogether == pytest.approx(expected_val)
--------------------------------------------------------------------------------
/PythonLinearNonlinearControl/envs/cost.py:
--------------------------------------------------------------------------------
1 | from logging import getLogger
2 |
3 | import numpy as np
4 |
5 | logger = getLogger(__name__)
6 |
7 |
8 | def calc_cost(pred_xs, input_sample, g_xs,
9 | state_cost_fn, input_cost_fn, terminal_state_cost_fn):
10 | """ calculate the cost
11 |
12 | Args:
13 | pred_xs (numpy.ndarray): predicted state trajectory,
14 | shape(pop_size, pred_len+1, state_size)
15 | input_sample (numpy.ndarray): inputs samples trajectory,
16 | shape(pop_size, pred_len+1, input_size)
17 | g_xs (numpy.ndarray): goal state trajectory,
18 | shape(pop_size, pred_len+1, state_size)
19 | state_cost_fn (function): state cost fucntion
20 | input_cost_fn (function): input cost fucntion
21 | terminal_state_cost_fn (function): terminal state cost fucntion
22 | Returns:
23 | cost (numpy.ndarray): cost of the input sample, shape(pop_size, )
24 | """
25 | # state cost
26 | state_cost = 0.
27 | if state_cost_fn is not None:
28 | state_pred_par_cost = state_cost_fn(
29 | pred_xs[:, 1:-1, :], g_xs[:, 1:-1, :])
30 | state_cost = np.sum(np.sum(state_pred_par_cost, axis=-1), axis=-1)
31 |
32 | # terminal cost
33 | terminal_state_cost = 0.
34 | if terminal_state_cost_fn is not None:
35 | terminal_state_par_cost = terminal_state_cost_fn(pred_xs[:, -1, :],
36 | g_xs[:, -1, :])
37 | terminal_state_cost = np.sum(terminal_state_par_cost, axis=-1)
38 |
39 | # act cost
40 | act_cost = 0.
41 | if input_cost_fn is not None:
42 | act_pred_par_cost = input_cost_fn(input_sample)
43 | act_cost = np.sum(np.sum(act_pred_par_cost, axis=-1), axis=-1)
44 |
45 | return state_cost + terminal_state_cost + act_cost
46 |
--------------------------------------------------------------------------------
/scripts/show_result.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import argparse
4 | import pickle
5 | import numpy as np
6 | import matplotlib.pyplot as plt
7 |
8 | from PythonLinearNonlinearControl.plotters.plot_func import load_plot_data, \
9 | plot_multi_result
10 |
11 |
12 | def run(args):
13 |
14 | controllers = ["iLQR", "DDP", "CEM", "MPPI"]
15 |
16 | history_xs = None
17 | history_us = None
18 | history_gs = None
19 |
20 | # load data
21 | for controller in controllers:
22 | history_x, history_u, history_g = \
23 | load_plot_data(args.env, controller,
24 | result_dir=args.result_dir)
25 |
26 | if history_xs is None:
27 | history_xs = history_x[np.newaxis, :]
28 | history_us = history_u[np.newaxis, :]
29 | history_gs = history_g[np.newaxis, :]
30 | continue
31 |
32 | history_xs = np.concatenate((history_xs,
33 | history_x[np.newaxis, :]), axis=0)
34 | history_us = np.concatenate((history_us,
35 | history_u[np.newaxis, :]), axis=0)
36 | history_gs = np.concatenate((history_gs,
37 | history_g[np.newaxis, :]), axis=0)
38 |
39 | plot_multi_result(history_xs, histories_g=history_gs, labels=controllers,
40 | ylabel="x")
41 |
42 | plot_multi_result(history_us, histories_g=np.zeros_like(history_us),
43 | labels=controllers, ylabel="u", name="input_history")
44 |
45 |
46 | def main():
47 | parser = argparse.ArgumentParser()
48 |
49 | parser.add_argument("--env", type=str, default="FirstOrderLag")
50 | parser.add_argument("--result_dir", type=str, default="./result")
51 |
52 | args = parser.parse_args()
53 |
54 | run(args)
55 |
56 |
57 | if __name__ == "__main__":
58 | main()
59 |
--------------------------------------------------------------------------------
/Environments.md:
--------------------------------------------------------------------------------
1 | # Enviroments
2 |
3 | | Name | Linear | Nonlinear | State Size | Input size |
4 | |:----------|:---------------:|:----------------:|:----------------:|:----------------:|
5 | | First Order Lag System | ✓ | x | 4 | 2 |
6 | | Two wheeled System (Constant Goal) | x | ✓ | 3 | 2 |
7 | | Two wheeled System (Moving Goal) (Coming soon) | x | ✓ | 3 | 2 |
8 | | Cartpole (Swing up) | x | ✓ | 4 | 1 |
9 | | Nonlinear Sample System Env | x | ✓ | 2 | 1 |
10 |
11 |
12 | ## [FistOrderLagEnv](PythonLinearNonlinearControl/envs/first_order_lag.py)
13 |
14 | ### System equation.
15 |
16 |
17 |
18 | You can set arbinatry time constant, tau. The default is 0.63 s
19 |
20 | ### Cost.
21 |
22 |
23 |
24 | Q = diag[1., 1., 1., 1.],
25 | R = diag[1., 1.]
26 |
27 | X_g denote the goal states.
28 |
29 | ## [TwoWheeledEnv](PythonLinearNonlinearControl/envs/two_wheeled.py)
30 |
31 | ### System equation.
32 |
33 |
34 |
35 | ### Cost.
36 |
37 |
38 |
39 | Q = diag[5., 5., 1.],
40 | R = diag[0.1, 0.1]
41 |
42 | X_g denote the goal states.
43 |
44 | ## [CatpoleEnv (Swing up)](PythonLinearNonlinearControl/envs/cartpole.py)
45 |
46 | ## System equation.
47 |
48 |
49 |
50 | You can set arbinatry parameters, mc, mp, l and g.
51 |
52 | Default settings are as follows:
53 |
54 | mc = 1, mp = 0.2, l = 0.5, g = 9.81
55 |
56 | ### Cost.
57 |
58 |
59 |
60 | ## [Nonlinear Sample System Env](PythonLinearNonlinearControl/envs/nonlinear_sample_system.py)
61 |
62 | ## System equation.
63 |
64 |
65 |
66 | ### Cost.
67 |
68 |
69 |
--------------------------------------------------------------------------------
/scripts/simple_run.py:
--------------------------------------------------------------------------------
1 | import argparse
2 |
3 | from PythonLinearNonlinearControl.helper import bool_flag, make_logger
4 | from PythonLinearNonlinearControl.controllers.make_controllers import make_controller
5 | from PythonLinearNonlinearControl.planners.make_planners import make_planner
6 | from PythonLinearNonlinearControl.configs.make_configs import make_config
7 | from PythonLinearNonlinearControl.models.make_models import make_model
8 | from PythonLinearNonlinearControl.envs.make_envs import make_env
9 | from PythonLinearNonlinearControl.runners.make_runners import make_runner
10 | from PythonLinearNonlinearControl.plotters.plot_func import plot_results, \
11 | save_plot_data
12 | from PythonLinearNonlinearControl.plotters.animator import Animator
13 |
14 |
15 | def run(args):
16 | make_logger(args.result_dir)
17 |
18 | env = make_env(args)
19 |
20 | config = make_config(args)
21 |
22 | planner = make_planner(args, config)
23 |
24 | model = make_model(args, config)
25 |
26 | controller = make_controller(args, config, model)
27 |
28 | runner = make_runner(args)
29 |
30 | history_x, history_u, history_g = runner.run(env, controller, planner)
31 |
32 | plot_results(history_x, history_u, history_g=history_g, args=args)
33 | save_plot_data(history_x, history_u, history_g=history_g, args=args)
34 |
35 | if args.save_anim:
36 | animator = Animator(env, args=args)
37 | animator.draw(history_x, history_g)
38 |
39 |
40 | def main():
41 | parser = argparse.ArgumentParser()
42 |
43 | parser.add_argument("--controller_type", type=str, default="NMPCCGMRES")
44 | parser.add_argument("--env", type=str, default="TwoWheeledConst")
45 | parser.add_argument("--save_anim", type=bool_flag, default=0)
46 | parser.add_argument("--result_dir", type=str, default="./result")
47 |
48 | args = parser.parse_args()
49 |
50 | run(args)
51 |
52 |
53 | if __name__ == "__main__":
54 | main()
55 |
--------------------------------------------------------------------------------
/scripts/quickstart/quickstart.md:
--------------------------------------------------------------------------------
1 | # PythonLinearnNonlinearControl Quickstart Guide
2 |
3 | This is a quickstart guide for users who just want to try PythonLinearNonlinearControl.
4 | If you have not installed PythonLinearNonLinearControl, please see the section of "how to setup" in README.md
5 |
6 | When we design control systems, we should have Environment, Model, Planner, Controller and Runner.
7 | Therefore your script contains those Modules.
8 |
9 | First, import each Modules from PythonLinearNonlinearControl.
10 |
11 | ```py
12 | from PythonLinearNonlinearControl import configs
13 | from PythonLinearNonlinearControl import envs
14 | from PythonLinearNonlinearControl import models
15 | from PythonLinearNonlinearControl import planners
16 | from PythonLinearNonlinearControl import controllers
17 | from PythonLinearNonlinearControl import runners
18 | ```
19 |
20 | Configs contains each modules configurations such as cost functions, prediction length, ...etc.
21 |
22 | Then you can make each module. (This is example about CEM and CartPole env)
23 |
24 | ```py
25 | config = configs.CartPoleConfigModule()
26 | env = envs.CartPoleEnv()
27 | model = models.CartPoleModel(config)
28 | planner = controllers.CEM(config, model)
29 | runner = planners.ConstantPlanner(config)
30 | controller = runners.ExpRunner()
31 | ```
32 |
33 | The preparation for experiment has done!
34 | Please run the runner.
35 |
36 | ```py
37 | history_x, history_u, history_g = runner.run(env, controller, planner)
38 | ```
39 |
40 | You can get the results of history of state, history of input and history of goal.
41 | Use that histories to visualize the Animation or Figures.
42 | (Note FirstOrderEnv does not support animation)
43 |
44 | ```py
45 | # plot results
46 | plot_results(args, history_x, history_u, history_g=history_g)
47 | save_plot_data(args, history_x, history_u, history_g=history_g)
48 |
49 | # create animation
50 | animator = Animator(args, env)
51 | animator.draw(history_x, history_g)
52 | ```
--------------------------------------------------------------------------------
/PythonLinearNonlinearControl/models/first_order_lag.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import scipy.linalg
3 | from scipy import integrate
4 | from .model import LinearModel
5 |
6 |
7 | class FirstOrderLagModel(LinearModel):
8 | """ first order lag model
9 | Attributes:
10 | curr_x (numpy.ndarray):
11 | u (numpy.ndarray):
12 | history_pred_xs (numpy.ndarray):
13 | """
14 |
15 | def __init__(self, config, tau=0.63):
16 | """
17 | Args:
18 | tau (float): time constant
19 | """
20 | # param
21 | self.A, self.B = self._to_state_space(
22 | tau, dt=config.DT) # discrete system
23 | super(FirstOrderLagModel, self).__init__(self.A, self.B)
24 |
25 | @staticmethod
26 | def _to_state_space(tau, dt=0.05):
27 | """
28 | Args:
29 | tau (float): time constant
30 | dt (float): discrte time
31 | Returns:
32 | A (numpy.ndarray): discrete A matrix
33 | B (numpy.ndarray): discrete B matrix
34 | """
35 | # continuous
36 | Ac = np.array([[-1./tau, 0., 0., 0.],
37 | [0., -1./tau, 0., 0.],
38 | [1., 0., 0., 0.],
39 | [0., 1., 0., 0.]])
40 | Bc = np.array([[1./tau, 0.],
41 | [0., 1./tau],
42 | [0., 0.],
43 | [0., 0.]])
44 | # to discrete system
45 | A = scipy.linalg.expm(dt*Ac)
46 | # B = np.matmul(np.matmul(scipy.linalg.expm(Ac*dt)-scipy.linalg.expm(Ac*0.), np.linalg.inv(Ac)), Bc)
47 | B = np.zeros_like(Bc)
48 | for m in range(Bc.shape[0]):
49 | for n in range(Bc.shape[1]):
50 | def integrate_fn(tau): return np.matmul(
51 | scipy.linalg.expm(Ac*tau), Bc)[m, n]
52 | sol = integrate.quad(integrate_fn, 0, dt)
53 | B[m, n] = sol[0]
54 |
55 | return A, B
56 |
--------------------------------------------------------------------------------
/PythonLinearNonlinearControl/controllers/controller.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from ..envs.cost import calc_cost
4 |
5 |
6 | class Controller():
7 | """ Controller class
8 | """
9 |
10 | def __init__(self, config, model):
11 | """
12 | """
13 | self.config = config
14 | self.model = model
15 |
16 | # get cost func
17 | self.state_cost_fn = config.state_cost_fn
18 | self.terminal_state_cost_fn = config.terminal_state_cost_fn
19 | self.input_cost_fn = config.input_cost_fn
20 |
21 | def obtain_sol(self, curr_x, g_xs):
22 | """ calculate the optimal inputs
23 | Args:
24 | curr_x (numpy.ndarray): current state, shape(state_size, )
25 | g_xs (numpy.ndarrya): goal trajectory, shape(plan_len, state_size)
26 | Returns:
27 | opt_input (numpy.ndarray): optimal input, shape(input_size, )
28 | """
29 | raise NotImplementedError("Implement the algorithm to \
30 | get optimal input")
31 |
32 | def calc_cost(self, curr_x, samples, g_xs):
33 | """ calculate the cost of input samples
34 |
35 | Args:
36 | curr_x (numpy.ndarray): shape(state_size),
37 | current robot position
38 | samples (numpy.ndarray): shape(pop_size, opt_dim),
39 | input samples
40 | g_xs (numpy.ndarray): shape(pred_len, state_size),
41 | goal states
42 | Returns:
43 | costs (numpy.ndarray): shape(pop_size, )
44 | """
45 | # get size
46 | pop_size = samples.shape[0]
47 | g_xs = np.tile(g_xs, (pop_size, 1, 1))
48 |
49 | # calc cost, pred_xs.shape = (pop_size, pred_len+1, state_size)
50 | pred_xs = self.model.predict_traj(curr_x, samples)
51 |
52 | # get particle cost
53 | costs = calc_cost(pred_xs, samples, g_xs,
54 | self.state_cost_fn, self.input_cost_fn,
55 | self.terminal_state_cost_fn)
56 |
57 | return costs
58 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # folders
2 | .vscode/
3 | .pytest_cache/
4 | result/
5 |
6 | # Byte-compiled / optimized / DLL files
7 | __pycache__/
8 | *.py[cod]
9 | *$py.class
10 |
11 | # C extensions
12 | *.so
13 |
14 | # Distribution / packaging
15 | .Python
16 | build/
17 | develop-eggs/
18 | dist/
19 | downloads/
20 | eggs/
21 | .eggs/
22 | lib/
23 | lib64/
24 | parts/
25 | sdist/
26 | var/
27 | wheels/
28 | pip-wheel-metadata/
29 | share/python-wheels/
30 | *.egg-info/
31 | .installed.cfg
32 | *.egg
33 | MANIFEST
34 |
35 | # PyInstaller
36 | # Usually these files are written by a python script from a template
37 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
38 | *.manifest
39 | *.spec
40 |
41 | # Installer logs
42 | pip-log.txt
43 | pip-delete-this-directory.txt
44 |
45 | # Unit test / coverage reports
46 | htmlcov/
47 | .tox/
48 | .nox/
49 | .coverage
50 | .coverage.*
51 | .cache
52 | nosetests.xml
53 | coverage.xml
54 | *.cover
55 | *.py,cover
56 | .hypothesis/
57 | .pytest_cache/
58 |
59 | # Translations
60 | *.mo
61 | *.pot
62 |
63 | # Django stuff:
64 | *.log
65 | local_settings.py
66 | db.sqlite3
67 | db.sqlite3-journal
68 |
69 | # Flask stuff:
70 | instance/
71 | .webassets-cache
72 |
73 | # Scrapy stuff:
74 | .scrapy
75 |
76 | # Sphinx documentation
77 | docs/_build/
78 |
79 | # PyBuilder
80 | target/
81 |
82 | # Jupyter Notebook
83 | .ipynb_checkpoints
84 |
85 | # IPython
86 | profile_default/
87 | ipython_config.py
88 |
89 | # pyenv
90 | .python-version
91 |
92 | # pipenv
93 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
94 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
95 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
96 | # install all needed dependencies.
97 | #Pipfile.lock
98 |
99 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
100 | __pypackages__/
101 |
102 | # Celery stuff
103 | celerybeat-schedule
104 | celerybeat.pid
105 |
106 | # SageMath parsed files
107 | *.sage.py
108 |
109 | # Environments
110 | .env
111 | .venv
112 | venv/
113 | ENV/
114 | env.bak/
115 | venv.bak/
116 |
117 | # Spyder project settings
118 | .spyderproject
119 | .spyproject
120 |
121 | # Rope project settings
122 | .ropeproject
123 |
124 | # mkdocs documentation
125 | /site
126 |
127 | # mypy
128 | .mypy_cache/
129 | .dmypy.json
130 | dmypy.json
131 |
132 | # Pyre type checker
133 | .pyre/
134 |
--------------------------------------------------------------------------------
/tests/env/test_cartpole.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import numpy as np
3 |
4 | from PythonLinearNonlinearControl.envs.cartpole import CartPoleEnv
5 |
6 | class TestCartPoleEnv():
7 | """
8 | """
9 | def test_step(self):
10 | env = CartPoleEnv()
11 |
12 | curr_x = np.ones(4)
13 | curr_x[2] = np.pi / 6.
14 |
15 | env.reset(init_x=curr_x)
16 |
17 | u = np.ones(1)
18 |
19 | next_x, _, _, _ = env.step(u)
20 |
21 | d_x0 = curr_x[1]
22 | d_x1 = (1. + env.config["mp"] * np.sin(np.pi / 6.) \
23 | * (env.config["l"] * (1.**2) \
24 | + env.config["g"] * np.cos(np.pi / 6.))) \
25 | / (env.config["mc"] + env.config["mp"] * np.sin(np.pi / 6.)**2)
26 | d_x2 = curr_x[3]
27 | d_x3 = (-1. * np.cos(np.pi / 6.) \
28 | - env.config["mp"] * env.config["l"] * (1.**2) \
29 | * np.cos(np.pi / 6.) * np.sin(np.pi / 6.) \
30 | - (env.config["mp"] + env.config["mc"]) * env.config["g"] \
31 | * np.sin(np.pi / 6.)) \
32 | / (env.config["l"] \
33 | * (env.config["mc"] \
34 | + env.config["mp"] * np.sin(np.pi / 6.)**2))
35 |
36 | expected = np.array([d_x0, d_x1, d_x2, d_x3]) * env.config["dt"] \
37 | + curr_x
38 |
39 | assert next_x == pytest.approx(expected, abs=1e-5)
40 |
41 | def test_bound_step(self):
42 | env = CartPoleEnv()
43 |
44 | curr_x = np.ones(4)
45 | curr_x[2] = np.pi / 6.
46 |
47 | env.reset(init_x=curr_x)
48 |
49 | u = np.ones(1) * 1e3
50 |
51 | next_x, _, _, _ = env.step(u)
52 |
53 | u = env.config["input_upper_bound"][0]
54 |
55 | d_x0 = curr_x[1]
56 | d_x1 = (u + env.config["mp"] * np.sin(np.pi / 6.) \
57 | * (env.config["l"] * (1.**2) \
58 | + env.config["g"] * np.cos(np.pi / 6.))) \
59 | / (env.config["mc"] + env.config["mp"] * np.sin(np.pi / 6.)**2)
60 | d_x2 = curr_x[3]
61 | d_x3 = (-u * np.cos(np.pi / 6.) \
62 | - env.config["mp"] * env.config["l"] * (1.**2) \
63 | * np.cos(np.pi / 6.) * np.sin(np.pi / 6.) \
64 | - (env.config["mp"] + env.config["mc"]) * env.config["g"] \
65 | * np.sin(np.pi / 6.)) \
66 | / (env.config["l"] \
67 | * (env.config["mc"] \
68 | + env.config["mp"] * np.sin(np.pi / 6.)**2))
69 |
70 | expected = np.array([d_x0, d_x1, d_x2, d_x3]) * env.config["dt"] \
71 | + curr_x
72 |
73 | assert next_x == pytest.approx(expected, abs=1e-5)
--------------------------------------------------------------------------------
/PythonLinearNonlinearControl/controllers/random.py:
--------------------------------------------------------------------------------
1 | from logging import getLogger
2 |
3 | import numpy as np
4 | import scipy.stats as stats
5 |
6 | from .controller import Controller
7 | from ..envs.cost import calc_cost
8 |
9 | logger = getLogger(__name__)
10 |
11 |
12 | class RandomShooting(Controller):
13 | """ Random Shooting Method for linear and nonlinear method
14 |
15 | Attributes:
16 | history_u (list[numpy.ndarray]): time history of optimal input
17 | Ref:
18 | Chua, K., Calandra, R., McAllister, R., & Levine, S. (2018).
19 | Deep reinforcement learning in a handful of trials
20 | using probabilistic dynamics models.
21 | In Advances in Neural Information Processing Systems (pp. 4754-4765).
22 | """
23 |
24 | def __init__(self, config, model):
25 | super(RandomShooting, self).__init__(config, model)
26 |
27 | # model
28 | self.model = model
29 |
30 | # general parameters
31 | self.pred_len = config.PRED_LEN
32 | self.input_size = config.INPUT_SIZE
33 |
34 | # cem parameters
35 | self.pop_size = config.opt_config["Random"]["popsize"]
36 | self.opt_dim = self.input_size * self.pred_len
37 |
38 | # get bound
39 | self.input_upper_bounds = np.tile(config.INPUT_UPPER_BOUND,
40 | self.pred_len)
41 | self.input_lower_bounds = np.tile(config.INPUT_LOWER_BOUND,
42 | self.pred_len)
43 |
44 | # get cost func
45 | self.state_cost_fn = config.state_cost_fn
46 | self.terminal_state_cost_fn = config.terminal_state_cost_fn
47 | self.input_cost_fn = config.input_cost_fn
48 |
49 | # save
50 | self.history_u = []
51 |
52 | def obtain_sol(self, curr_x, g_xs):
53 | """ calculate the optimal inputs
54 |
55 | Args:
56 | curr_x (numpy.ndarray): current state, shape(state_size, )
57 | g_xs (numpy.ndarrya): goal trajectory, shape(plan_len, state_size)
58 | Returns:
59 | opt_input (numpy.ndarray): optimal input, shape(input_size, )
60 | """
61 | # set different seed
62 | np.random.seed()
63 |
64 | samples = np.random.uniform(self.input_lower_bounds,
65 | self.input_upper_bounds,
66 | [self.pop_size, self.opt_dim])
67 | # calc cost
68 | costs = self.calc_cost(curr_x,
69 | samples.reshape(self.pop_size,
70 | self.pred_len,
71 | self.input_size),
72 | g_xs)
73 | # solution
74 | sol = samples[np.argmin(costs)]
75 |
76 | return sol.reshape(self.pred_len, self.input_size).copy()[0]
77 |
78 | def __str__(self):
79 | return "RandomShooting"
80 |
--------------------------------------------------------------------------------
/PythonLinearNonlinearControl/plotters/animator.py:
--------------------------------------------------------------------------------
1 | import os
2 | from logging import getLogger
3 |
4 | import numpy as np
5 | import matplotlib.pyplot as plt
6 | from matplotlib.animation import FuncAnimation
7 | import matplotlib.animation as animation
8 |
9 | logger = getLogger(__name__)
10 |
11 |
12 | class Animator():
13 | """ animation class
14 | """
15 |
16 | def __init__(self, env, args=None):
17 | """
18 | """
19 | self.env_name = "Env"
20 | self.result_dir = "./result"
21 | self.controller_type = "controller"
22 |
23 | if args is not None:
24 | self.env_name = args.env
25 | self.result_dir = args.result_dir
26 | self.controller_type = args.controller_type
27 |
28 | self.interval = env.config["dt"] * 1000. # to ms
29 | self.plot_func = env.plot_func
30 |
31 | self.imgs = []
32 |
33 | def _setup(self):
34 | """ set up figure of animation
35 | """
36 | # make fig
37 | self.anim_fig = plt.figure()
38 |
39 | # axis
40 | self.axis = self.anim_fig.add_subplot(111)
41 | self.axis.set_aspect('equal', adjustable='box')
42 |
43 | self.imgs = self.plot_func(self.axis)
44 |
45 | def _update_img(self, i, history_x, history_g_x):
46 | """ update animation
47 |
48 | Args:
49 | i (int): frame count
50 | history_x (numpy.ndarray): history of state,
51 | shape(iters, state_size)
52 | history_g (numpy.ndarray): history of goal state,
53 | shape(iters, input_size)
54 | """
55 | self.plot_func(self.imgs, i, history_x, history_g_x)
56 |
57 | def draw(self, history_x, history_g_x=None):
58 | """draw the animation and save
59 |
60 | Args:
61 | history_x (numpy.ndarray): history of state,
62 | shape(iters, state_size)
63 | history_g (numpy.ndarray): history of goal state,
64 | shape(iters, input_size)
65 | Returns:
66 | None
67 | """
68 | # set up animation figures
69 | self._setup()
70 | def _update_img(i): return self._update_img(i, history_x, history_g_x)
71 |
72 | # Set up formatting for the movie files
73 | Writer = animation.writers['ffmpeg']
74 | writer = Writer(fps=15, metadata=dict(artist='Me'), bitrate=1800)
75 |
76 | # call funcanimation
77 | ani = FuncAnimation(
78 | self.anim_fig,
79 | _update_img, interval=self.interval, frames=len(history_x)-1)
80 |
81 | # save animation
82 | path = os.path.join(self.result_dir, self.controller_type,
83 | "animation-" + self.env_name + ".mp4")
84 | logger.info("Saved Animation to {} ...".format(path))
85 |
86 | ani.save(path, writer=writer)
87 |
--------------------------------------------------------------------------------
/PythonLinearNonlinearControl/envs/nonlinear_sample_system.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import scipy
3 | from scipy import integrate
4 | from .env import Env
5 | from ..common.utils import update_state_with_Runge_Kutta
6 |
7 |
8 | class NonlinearSampleSystemEnv(Env):
9 | """ Nonlinear Sample Env
10 | """
11 |
12 | def __init__(self):
13 | """
14 | """
15 | self.config = {"state_size": 2,
16 | "input_size": 1,
17 | "dt": 0.01,
18 | "max_step": 2000,
19 | "input_lower_bound": [-0.5],
20 | "input_upper_bound": [0.5],
21 | }
22 |
23 | super(NonlinearSampleSystemEnv, self).__init__(self.config)
24 |
25 | def reset(self, init_x=np.array([2., 0.])):
26 | """ reset state
27 | Returns:
28 | init_x (numpy.ndarray): initial state, shape(state_size, )
29 | info (dict): information
30 | """
31 | self.step_count = 0
32 |
33 | self.curr_x = np.zeros(self.config["state_size"])
34 |
35 | if init_x is not None:
36 | self.curr_x = init_x
37 |
38 | # goal
39 | self.g_x = np.array([0., 0.])
40 |
41 | # clear memory
42 | self.history_x = []
43 | self.history_g_x = []
44 |
45 | return self.curr_x, {"goal_state": self.g_x}
46 |
47 | def step(self, u):
48 | """
49 | Args:
50 | u (numpy.ndarray) : input, shape(input_size, )
51 | Returns:
52 | next_x (numpy.ndarray): next state, shape(state_size, )
53 | cost (float): costs
54 | done (bool): end the simulation or not
55 | info (dict): information
56 | """
57 | # clip action
58 | u = np.clip(u,
59 | self.config["input_lower_bound"],
60 | self.config["input_upper_bound"])
61 |
62 | functions = [self._func_x_1, self._func_x_2]
63 |
64 | next_x = update_state_with_Runge_Kutta(self.curr_x, u,
65 | functions, self.config["dt"],
66 | batch=False)
67 |
68 | # cost
69 | cost = 0
70 | cost = np.sum(u**2)
71 | cost += np.sum((self.curr_x - self.g_x)**2)
72 |
73 | # save history
74 | self.history_x.append(next_x.flatten())
75 | self.history_g_x.append(self.g_x.flatten())
76 |
77 | # update
78 | self.curr_x = next_x.flatten()
79 | # update costs
80 | self.step_count += 1
81 |
82 | return next_x.flatten(), cost, \
83 | self.step_count > self.config["max_step"], \
84 | {"goal_state": self.g_x}
85 |
86 | def _func_x_1(self, x, u):
87 | x_dot = x[1]
88 | return x_dot
89 |
90 | def _func_x_2(self, x, u):
91 | x_dot = (1. - x[0]**2 - x[1]**2) * x[1] - x[0] + u[0]
92 | return x_dot
93 |
94 | def plot_func(self, to_plot, i=None, history_x=None, history_g_x=None):
95 | """
96 | """
97 | raise ValueError("NonlinearSampleSystemEnv does not have animation")
98 |
--------------------------------------------------------------------------------
/tests/models/test_two_wheeled.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import numpy as np
3 |
4 | from PythonLinearNonlinearControl.models.two_wheeled import TwoWheeledModel
5 | from PythonLinearNonlinearControl.configs.two_wheeled \
6 | import TwoWheeledConfigModule
7 |
8 | class TestTwoWheeledModel():
9 | """
10 | """
11 | def test_step(self):
12 | config = TwoWheeledConfigModule()
13 | two_wheeled_model = TwoWheeledModel(config)
14 |
15 | curr_x = np.ones(config.STATE_SIZE)
16 | curr_x[-1] = np.pi / 6.
17 | u = np.ones((1, config.INPUT_SIZE))
18 |
19 | next_x = two_wheeled_model.predict_traj(curr_x, u)
20 |
21 | pos_x = np.cos(curr_x[-1]) * u[0, 0] * config.DT + curr_x[0]
22 | pos_y = np.sin(curr_x[-1]) * u[0, 0] * config.DT + curr_x[1]
23 |
24 | expected = np.array([[1., 1., np.pi / 6.],
25 | [pos_x, pos_y, curr_x[-1] + u[0, 1] * config.DT]])
26 |
27 | assert next_x == pytest.approx(expected)
28 |
29 | def test_predict_traj(self):
30 | config = TwoWheeledConfigModule()
31 | two_wheeled_model = TwoWheeledModel(config)
32 |
33 | curr_x = np.ones(config.STATE_SIZE)
34 | curr_x[-1] = np.pi / 6.
35 | u = np.ones((1, config.INPUT_SIZE))
36 |
37 | pred_xs = two_wheeled_model.predict_traj(curr_x, u)
38 |
39 | u = np.tile(u, (1, 1, 1))
40 | pred_xs_alltogether = two_wheeled_model.predict_traj(curr_x, u)[0]
41 |
42 | assert pred_xs_alltogether == pytest.approx(pred_xs)
43 |
44 | def test_gradient_state(self):
45 |
46 | config = TwoWheeledConfigModule()
47 | two_wheeled_model = TwoWheeledModel(config)
48 |
49 | xs = np.ones((1, config.STATE_SIZE))
50 | xs[0, -1] = np.pi / 6.
51 | us = np.ones((1, config.INPUT_SIZE))
52 |
53 | grad = two_wheeled_model.calc_f_x(xs, us, config.DT)
54 |
55 | # expected cost
56 | expected_grad = np.zeros((1, config.STATE_SIZE, config.STATE_SIZE))
57 | eps = 1e-4
58 |
59 | for i in range(config.STATE_SIZE):
60 | tmp_x = xs.copy()
61 | tmp_x[0, i] = xs[0, i] + eps
62 | forward = \
63 | two_wheeled_model.predict_next_state(tmp_x[0], us[0])
64 | tmp_x = xs.copy()
65 | tmp_x[0, i] = xs[0, i] - eps
66 | backward = \
67 | two_wheeled_model.predict_next_state(tmp_x[0], us[0])
68 |
69 | expected_grad[0, :, i] = (forward - backward) / (2. * eps)
70 |
71 | assert grad == pytest.approx(expected_grad)
72 |
73 | def test_gradient_input(self):
74 |
75 | config = TwoWheeledConfigModule()
76 | two_wheeled_model = TwoWheeledModel(config)
77 |
78 | xs = np.ones((1, config.STATE_SIZE))
79 | xs[0, -1] = np.pi / 6.
80 | us = np.ones((1, config.INPUT_SIZE))
81 |
82 | grad = two_wheeled_model.calc_f_u(xs, us, config.DT)
83 |
84 | # expected cost
85 | expected_grad = np.zeros((1, config.STATE_SIZE, config.INPUT_SIZE))
86 | eps = 1e-4
87 |
88 | for i in range(config.INPUT_SIZE):
89 | tmp_u = us.copy()
90 | tmp_u[0, i] = us[0, i] + eps
91 | forward = \
92 | two_wheeled_model.predict_next_state(xs[0], tmp_u[0])
93 | tmp_u = us.copy()
94 | tmp_u[0, i] = us[0, i] - eps
95 | backward = \
96 | two_wheeled_model.predict_next_state(xs[0], tmp_u[0])
97 |
98 | expected_grad[0, :, i] = (forward - backward) / (2. * eps)
99 |
100 | assert grad == pytest.approx(expected_grad)
101 |
--------------------------------------------------------------------------------
/PythonLinearNonlinearControl/plotters/plot_objs.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import numpy as np
4 | import matplotlib.pyplot as plt
5 |
6 | from ..common.utils import rotate_pos
7 |
8 |
9 | def circle(center_x, center_y, radius, start=0., end=2*np.pi, n_point=100):
10 | """ Create circle matrix
11 |
12 | Args:
13 | center_x (float): the center x position of the circle
14 | center_y (float): the center y position of the circle
15 | radius (float): in meters
16 | start (float): start angle
17 | end (float): end angle
18 | Returns:
19 | circle x : numpy.ndarray
20 | circle y : numpy.ndarray
21 | """
22 | diff = end - start
23 |
24 | circle_xs = []
25 | circle_ys = []
26 |
27 | for i in range(n_point + 1):
28 | circle_xs.append(center_x + radius * np.cos(i*diff/n_point + start))
29 | circle_ys.append(center_y + radius * np.sin(i*diff/n_point + start))
30 |
31 | return np.array(circle_xs), np.array(circle_ys)
32 |
33 |
34 | def circle_with_angle(center_x, center_y, radius, angle):
35 | """ Create circle matrix with angle line matrix
36 |
37 | Args:
38 | center_x (float): the center x position of the circle
39 | center_y (float): the center y position of the circle
40 | radius (float): in meters
41 | angle (float): in radians
42 | Returns:
43 | circle_x (numpy.ndarray): x data of circle
44 | circle_y (numpy.ndarray): y data of circle
45 | angle_x (numpy.ndarray): x data of circle angle
46 | angle_y (numpy.ndarray): y data of circle angle
47 | """
48 | circle_x, circle_y = circle(center_x, center_y, radius)
49 |
50 | angle_x = np.array([center_x, center_x + np.cos(angle) * radius])
51 | angle_y = np.array([center_y, center_y + np.sin(angle) * radius])
52 |
53 | return circle_x, circle_y, angle_x, angle_y
54 |
55 |
56 | def square(center_x, center_y, shape, angle):
57 | """ Create square
58 |
59 | Args:
60 | center_x (float): the center x position of the square
61 | center_y (float): the center y position of the square
62 | shape (tuple): the square's shape(width/2, height/2)
63 | angle (float): in radians
64 | Returns:
65 | square_x (numpy.ndarray): shape(5, ), counterclockwise from right-up
66 | square_y (numpy.ndarray): shape(5, ), counterclockwise from right-up
67 | """
68 | # start with the up right points
69 | # create point in counterclockwise, local
70 | square_xy = np.array([[shape[0], shape[1]],
71 | [-shape[0], shape[1]],
72 | [-shape[0], -shape[1]],
73 | [shape[0], -shape[1]],
74 | [shape[0], shape[1]]])
75 | # translate position to world
76 | # rotation
77 | trans_points = rotate_pos(square_xy, angle)
78 | # translation
79 | trans_points += np.array([center_x, center_y])
80 |
81 | return trans_points[:, 0], trans_points[:, 1]
82 |
83 |
84 | def square_with_angle(center_x, center_y, shape, angle):
85 | """ Create square with angle line
86 |
87 | Args:
88 | center_x (float): the center x position of the square
89 | center_y (float): the center y position of the square
90 | shape (tuple): the square's shape(width/2, height/2)
91 | angle (float): in radians
92 | Returns:
93 | square_x (numpy.ndarray): shape(5, ), counterclockwise from right-up
94 | square_y (numpy.ndarray): shape(5, ), counterclockwise from right-up
95 | angle_x (numpy.ndarray): x data of square angle
96 | angle_y (numpy.ndarray): y data of square angle
97 | """
98 | square_x, square_y = square(center_x, center_y, shape, angle)
99 |
100 | angle_x = np.array([center_x, center_x + np.cos(angle) * shape[0]])
101 | angle_y = np.array([center_y, center_y + np.sin(angle) * shape[1]])
102 |
103 | return square_x, square_y, angle_x, angle_y
104 |
--------------------------------------------------------------------------------
/PythonLinearNonlinearControl/envs/first_order_lag.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import scipy
3 | from scipy import integrate
4 | from .env import Env
5 |
6 |
7 | class FirstOrderLagEnv(Env):
8 | """ First Order Lag System Env
9 | """
10 |
11 | def __init__(self, tau=0.63):
12 | """
13 | """
14 | self.config = {"state_size": 4,
15 | "input_size": 2,
16 | "dt": 0.05,
17 | "max_step": 500,
18 | "input_lower_bound": [-0.5, -0.5],
19 | "input_upper_bound": [0.5, 0.5],
20 | }
21 |
22 | super(FirstOrderLagEnv, self).__init__(self.config)
23 |
24 | # to get discrete system matrix
25 | self.A, self.B = self._to_state_space(tau, dt=self.config["dt"])
26 |
27 | @staticmethod
28 | def _to_state_space(tau, dt=0.05):
29 | """
30 | Args:
31 | tau (float): time constant
32 | dt (float): discrte time
33 | Returns:
34 | A (numpy.ndarray): discrete A matrix
35 | B (numpy.ndarray): discrete B matrix
36 | """
37 | # continuous
38 | Ac = np.array([[-1./tau, 0., 0., 0.],
39 | [0., -1./tau, 0., 0.],
40 | [1., 0., 0., 0.],
41 | [0., 1., 0., 0.]])
42 | Bc = np.array([[1./tau, 0.],
43 | [0., 1./tau],
44 | [0., 0.],
45 | [0., 0.]])
46 | # to discrete system
47 | A = scipy.linalg.expm(dt*Ac)
48 | # B = np.matmul(np.matmul(scipy.linalg.expm(Ac*dt) -
49 | # scipy.linalg.expm(Ac*0.), np.linalg.inv(Ac)),\
50 | # Bc)
51 | B = np.zeros_like(Bc)
52 | for m in range(Bc.shape[0]):
53 | for n in range(Bc.shape[1]):
54 | integrate_fn =\
55 | lambda tau: np.matmul(scipy.linalg.expm(Ac*tau), Bc)[m, n]
56 | sol = integrate.quad(integrate_fn, 0, dt)
57 | B[m, n] = sol[0]
58 |
59 | return A, B
60 |
61 | def reset(self, init_x=None):
62 | """ reset state
63 | Returns:
64 | init_x (numpy.ndarray): initial state, shape(state_size, )
65 | info (dict): information
66 | """
67 | self.step_count = 0
68 |
69 | self.curr_x = np.zeros(self.config["state_size"])
70 |
71 | if init_x is not None:
72 | self.curr_x = init_x
73 |
74 | # goal
75 | self.g_x = np.array([0., 0, -2., 3.])
76 |
77 | # clear memory
78 | self.history_x = []
79 | self.history_g_x = []
80 |
81 | return self.curr_x, {"goal_state": self.g_x}
82 |
83 | def step(self, u):
84 | """
85 | Args:
86 | u (numpy.ndarray) : input, shape(input_size, )
87 | Returns:
88 | next_x (numpy.ndarray): next state, shape(state_size, )
89 | cost (float): costs
90 | done (bool): end the simulation or not
91 | info (dict): information
92 | """
93 | # clip action
94 | u = np.clip(u,
95 | self.config["input_lower_bound"],
96 | self.config["input_upper_bound"])
97 |
98 | next_x = np.matmul(self.A, self.curr_x[:, np.newaxis]) \
99 | + np.matmul(self.B, u[:, np.newaxis])
100 |
101 | # cost
102 | cost = 0
103 | cost = np.sum(u**2)
104 | cost += np.sum((self.curr_x - self.g_x)**2)
105 |
106 | # save history
107 | self.history_x.append(next_x.flatten())
108 | self.history_g_x.append(self.g_x.flatten())
109 |
110 | # update
111 | self.curr_x = next_x.flatten()
112 | # update costs
113 | self.step_count += 1
114 |
115 | return next_x.flatten(), cost, \
116 | self.step_count > self.config["max_step"], \
117 | {"goal_state": self.g_x}
118 |
119 | def plot_func(self, to_plot, i=None, history_x=None, history_g_x=None):
120 | """
121 | """
122 | raise ValueError("FirstOrderLag does not have animation")
123 |
--------------------------------------------------------------------------------
/tests/models/test_cartpole.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import numpy as np
3 |
4 | from PythonLinearNonlinearControl.models.cartpole import CartPoleModel
5 | from PythonLinearNonlinearControl.configs.cartpole \
6 | import CartPoleConfigModule
7 |
8 | class TestCartPoleModel():
9 | """
10 | """
11 | def test_step(self):
12 | config = CartPoleConfigModule()
13 | cartpole_model = CartPoleModel(config)
14 |
15 | curr_x = np.ones(4)
16 | curr_x[2] = np.pi / 6.
17 |
18 | us = np.ones((1, 1))
19 |
20 | next_x = cartpole_model.predict_traj(curr_x, us)
21 |
22 | d_x0 = curr_x[1]
23 | d_x1 = (1. + config.MP * np.sin(np.pi / 6.) \
24 | * (config.L * (1.**2) \
25 | + config.G * np.cos(np.pi / 6.))) \
26 | / (config.MC + config.MP * np.sin(np.pi / 6.)**2)
27 | d_x2 = curr_x[3]
28 | d_x3 = (-1. * np.cos(np.pi / 6.) \
29 | - config.MP * config.L * (1.**2) \
30 | * np.cos(np.pi / 6.) * np.sin(np.pi / 6.) \
31 | - (config.MP + config.MC) * config.G \
32 | * np.sin(np.pi / 6.)) \
33 | / (config.L \
34 | * (config.MC \
35 | + config.MP * np.sin(np.pi / 6.)**2))
36 |
37 | expected = np.array([d_x0, d_x1, d_x2, d_x3]) * config.DT \
38 | + curr_x
39 |
40 | expected = np.stack((curr_x, expected), axis=0)
41 |
42 | assert next_x == pytest.approx(expected, abs=1e-5)
43 |
44 | def test_predict_traj(self):
45 | config = CartPoleConfigModule()
46 | cartpole_model = CartPoleModel(config)
47 |
48 | curr_x = np.ones(config.STATE_SIZE)
49 | curr_x[-1] = np.pi / 6.
50 | u = np.ones((1, config.INPUT_SIZE))
51 |
52 | pred_xs = cartpole_model.predict_traj(curr_x, u)
53 |
54 | u = np.tile(u, (2, 1, 1))
55 | pred_xs_alltogether = cartpole_model.predict_traj(curr_x, u)[0]
56 |
57 | assert pred_xs_alltogether == pytest.approx(pred_xs)
58 |
59 | def test_gradient_state(self):
60 |
61 | config = CartPoleConfigModule()
62 | cartpole_model = CartPoleModel(config)
63 |
64 | xs = np.ones((1, config.STATE_SIZE)) \
65 | * np.random.rand(1, config.STATE_SIZE)
66 | xs[0, -1] = np.pi / 6.
67 | us = np.ones((1, config.INPUT_SIZE))
68 |
69 | grad = cartpole_model.calc_f_x(xs, us, config.DT)
70 |
71 | # expected cost
72 | expected_grad = np.zeros((1, config.STATE_SIZE, config.STATE_SIZE))
73 | eps = 1e-4
74 |
75 | for i in range(config.STATE_SIZE):
76 | tmp_x = xs.copy()
77 | tmp_x[0, i] = xs[0, i] + eps
78 | forward = \
79 | cartpole_model.predict_next_state(tmp_x[0], us[0])
80 | tmp_x = xs.copy()
81 | tmp_x[0, i] = xs[0, i] - eps
82 | backward = \
83 | cartpole_model.predict_next_state(tmp_x[0], us[0])
84 |
85 | expected_grad[0, :, i] = (forward - backward) / (2. * eps)
86 |
87 | assert grad == pytest.approx(expected_grad)
88 |
89 | def test_gradient_input(self):
90 |
91 | config = CartPoleConfigModule()
92 | cartpole_model = CartPoleModel(config)
93 |
94 | xs = np.ones((1, config.STATE_SIZE)) \
95 | * np.random.rand(1, config.STATE_SIZE)
96 | xs[0, -1] = np.pi / 6.
97 | us = np.ones((1, config.INPUT_SIZE))
98 |
99 | grad = cartpole_model.calc_f_u(xs, us, config.DT)
100 |
101 | # expected cost
102 | expected_grad = np.zeros((1, config.STATE_SIZE, config.INPUT_SIZE))
103 | eps = 1e-4
104 |
105 | for i in range(config.INPUT_SIZE):
106 | tmp_u = us.copy()
107 | tmp_u[0, i] = us[0, i] + eps
108 | forward = \
109 | cartpole_model.predict_next_state(xs[0], tmp_u[0])
110 | tmp_u = us.copy()
111 | tmp_u[0, i] = us[0, i] - eps
112 | backward = \
113 | cartpole_model.predict_next_state(xs[0], tmp_u[0])
114 |
115 | expected_grad[0, :, i] = (forward - backward) / (2. * eps)
116 |
117 | assert grad == pytest.approx(expected_grad)
118 |
--------------------------------------------------------------------------------
/PythonLinearNonlinearControl/controllers/nmpc.py:
--------------------------------------------------------------------------------
1 | from logging import getLogger
2 |
3 | import numpy as np
4 | import scipy.stats as stats
5 |
6 | from .controller import Controller
7 | from ..envs.cost import calc_cost
8 | from ..common.utils import line_search
9 |
10 | logger = getLogger(__name__)
11 |
12 |
13 | class NMPC(Controller):
14 | def __init__(self, config, model):
15 | """ Nonlinear Model Predictive Control using pure gradient algorithm
16 | """
17 | super(NMPC, self).__init__(config, model)
18 |
19 | # model
20 | self.model = model
21 |
22 | # get cost func
23 | self.state_cost_fn = config.state_cost_fn
24 | self.terminal_state_cost_fn = config.terminal_state_cost_fn
25 | self.input_cost_fn = config.input_cost_fn
26 |
27 | # controller parameters
28 | self.threshold = config.opt_config["NMPC"]["threshold"]
29 | self.max_iters = config.opt_config["NMPC"]["max_iters"]
30 | self.learning_rate = config.opt_config["NMPC"]["learning_rate"]
31 | self.optimizer_mode = config.opt_config["NMPC"]["optimizer_mode"]
32 |
33 | # general parameters
34 | self.pred_len = config.PRED_LEN
35 | self.input_size = config.INPUT_SIZE
36 | self.dt = config.DT
37 |
38 | # initialize
39 | self.prev_sol = np.zeros((self.pred_len, self.input_size))
40 |
41 | def obtain_sol(self, curr_x, g_xs):
42 | """ calculate the optimal inputs
43 | Args:
44 | curr_x (numpy.ndarray): current state, shape(state_size, )
45 | g_xs (numpy.ndarrya): goal trajectory, shape(plan_len, state_size)
46 | Returns:
47 | opt_input (numpy.ndarray): optimal input, shape(input_size, )
48 | """
49 | sol = self.prev_sol.copy()
50 | count = 0
51 | # use for Conjugate method
52 | conjugate_d = None
53 | conjugate_prev_d = None
54 | conjugate_s = None
55 | conjugate_beta = None
56 |
57 | while True:
58 | # shape(pred_len+1, state_size)
59 | pred_xs = self.model.predict_traj(curr_x, sol)
60 | # shape(pred_len, state_size)
61 | pred_lams = self.model.predict_adjoint_traj(pred_xs, sol, g_xs)
62 |
63 | F_hat = self.config.gradient_hamiltonian_input(
64 | pred_xs, pred_lams, sol, g_xs)
65 |
66 | if np.linalg.norm(F_hat) < self.threshold:
67 | break
68 |
69 | if count > self.max_iters:
70 | logger.debug(" break max iteartion at F : `{}".format(
71 | np.linalg.norm(F_hat)))
72 | break
73 |
74 | if self.optimizer_mode == "conjugate":
75 | conjugate_d = F_hat.flatten()
76 |
77 | if conjugate_prev_d is None: # initial
78 | conjugate_s = conjugate_d
79 | conjugate_prev_d = conjugate_d
80 | F_hat = conjugate_s.reshape(F_hat.shape)
81 | else:
82 | prev_d = np.dot(conjugate_prev_d, conjugate_prev_d)
83 | d = np.dot(conjugate_d, conjugate_d - conjugate_prev_d)
84 | conjugate_beta = (d + 1e-6) / (prev_d + 1e-6)
85 |
86 | conjugate_s = conjugate_d + conjugate_beta * conjugate_s
87 | conjugate_prev_d = conjugate_d
88 | F_hat = conjugate_s.reshape(F_hat.shape)
89 |
90 | def compute_eval_val(u):
91 | pred_xs = self.model.predict_traj(curr_x, u)
92 | state_cost = np.sum(self.config.state_cost_fn(
93 | pred_xs[1:-1], g_xs[1:-1]))
94 | input_cost = np.sum(self.config.input_cost_fn(u))
95 | terminal_cost = np.sum(
96 | self.config.terminal_state_cost_fn(pred_xs[-1], g_xs[-1]))
97 | return state_cost + input_cost + terminal_cost
98 |
99 | alpha = line_search(F_hat, sol,
100 | compute_eval_val, init_alpha=self.learning_rate)
101 |
102 | sol -= alpha * F_hat
103 | count += 1
104 |
105 | # update us for next optimization
106 | self.prev_sol = np.concatenate(
107 | (sol[1:], np.zeros((1, self.input_size))), axis=0)
108 |
109 | return sol[0]
110 |
--------------------------------------------------------------------------------
/PythonLinearNonlinearControl/controllers/mppi.py:
--------------------------------------------------------------------------------
1 | from logging import getLogger
2 |
3 | import numpy as np
4 | import scipy.stats as stats
5 |
6 | from .controller import Controller
7 | from ..envs.cost import calc_cost
8 |
9 | logger = getLogger(__name__)
10 |
11 |
12 | class MPPI(Controller):
13 | """ Model Predictive Path Integral for linear and nonlinear method
14 |
15 | Attributes:
16 | history_u (list[numpy.ndarray]): time history of optimal input
17 | Ref:
18 | Nagabandi, A., Konoglie, K., Levine, S., & Kumar, V. (2019).
19 | Deep Dynamics Models for Learning Dexterous Manipulation.
20 | arXiv preprint arXiv:1909.11652.
21 | """
22 |
23 | def __init__(self, config, model):
24 | super(MPPI, self).__init__(config, model)
25 |
26 | # model
27 | self.model = model
28 |
29 | # general parameters
30 | self.pred_len = config.PRED_LEN
31 | self.input_size = config.INPUT_SIZE
32 |
33 | # mppi parameters
34 | self.beta = config.opt_config["MPPI"]["beta"]
35 | self.pop_size = config.opt_config["MPPI"]["popsize"]
36 | self.kappa = config.opt_config["MPPI"]["kappa"]
37 | self.noise_sigma = config.opt_config["MPPI"]["noise_sigma"]
38 | self.opt_dim = self.input_size * self.pred_len
39 |
40 | # get bound
41 | self.input_upper_bounds = np.tile(config.INPUT_UPPER_BOUND,
42 | (self.pred_len, 1))
43 | self.input_lower_bounds = np.tile(config.INPUT_LOWER_BOUND,
44 | (self.pred_len, 1))
45 |
46 | # get cost func
47 | self.state_cost_fn = config.state_cost_fn
48 | self.terminal_state_cost_fn = config.terminal_state_cost_fn
49 | self.input_cost_fn = config.input_cost_fn
50 |
51 | # init mean
52 | self.prev_sol = np.tile((config.INPUT_UPPER_BOUND
53 | + config.INPUT_LOWER_BOUND) / 2.,
54 | self.pred_len)
55 | self.prev_sol = self.prev_sol.reshape(self.pred_len, self.input_size)
56 |
57 | # save
58 | self.history_u = [np.zeros(self.input_size)]
59 |
60 | def clear_sol(self):
61 | """ clear prev sol
62 | """
63 | logger.debug("Clear Solution")
64 | self.prev_sol = \
65 | (self.input_upper_bounds + self.input_lower_bounds) / 2.
66 | self.prev_sol = self.prev_sol.reshape(self.pred_len, self.input_size)
67 |
68 | def obtain_sol(self, curr_x, g_xs):
69 | """ calculate the optimal inputs
70 |
71 | Args:
72 | curr_x (numpy.ndarray): current state, shape(state_size, )
73 | g_xs (numpy.ndarrya): goal trajectory, shape(plan_len, state_size)
74 | Returns:
75 | opt_input (numpy.ndarray): optimal input, shape(input_size, )
76 | """
77 | # get noised inputs
78 | noise = np.random.normal(
79 | loc=0, scale=1.0, size=(self.pop_size, self.pred_len,
80 | self.input_size)) * self.noise_sigma
81 | noised_inputs = noise.copy()
82 |
83 | for t in range(self.pred_len):
84 | if t > 0:
85 | noised_inputs[:, t, :] = self.beta \
86 | * (self.prev_sol[t, :]
87 | + noise[:, t, :]) \
88 | + (1 - self.beta) \
89 | * noised_inputs[:, t-1, :]
90 | else:
91 | noised_inputs[:, t, :] = self.beta \
92 | * (self.prev_sol[t, :]
93 | + noise[:, t, :]) \
94 | + (1 - self.beta) \
95 | * self.history_u[-1]
96 |
97 | # clip actions
98 | noised_inputs = np.clip(
99 | noised_inputs, self.input_lower_bounds, self.input_upper_bounds)
100 |
101 | # calc cost
102 | costs = self.calc_cost(curr_x, noised_inputs, g_xs)
103 | rewards = -costs
104 |
105 | # mppi update
106 | # normalize and get sum of reward
107 | # exp_rewards.shape = (N, )
108 | exp_rewards = np.exp(self.kappa * (rewards - np.max(rewards)))
109 | denom = np.sum(exp_rewards) + 1e-10 # avoid numeric error
110 |
111 | # weight actions
112 | weighted_inputs = exp_rewards[:, np.newaxis, np.newaxis] \
113 | * noised_inputs
114 | sol = np.sum(weighted_inputs, 0) / denom
115 |
116 | # update
117 | self.prev_sol[:-1] = sol[1:]
118 | self.prev_sol[-1] = sol[-1] # last use the terminal input
119 |
120 | # log
121 | self.history_u.append(sol[0])
122 |
123 | return sol[0]
124 |
125 | def __str__(self):
126 | return "MPPI"
127 |
--------------------------------------------------------------------------------
/PythonLinearNonlinearControl/helper.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import datetime
3 | import json
4 | import os
5 | import sys
6 | import six
7 | import pickle
8 | from logging import DEBUG, basicConfig, getLogger, FileHandler, StreamHandler, Formatter, Logger
9 |
10 |
11 | def make_logger(save_dir):
12 | """
13 | Args:
14 | save_dir (str): save directory
15 | """
16 | # base config setting
17 | basicConfig(
18 | format='[%(asctime)s] %(name)s %(levelname)s: %(message)s',
19 | datefmt='%Y-%m-%d %H:%M:%S'
20 | )
21 |
22 | # mypackage log level
23 | logger = getLogger("PythonLinearNonlinearControl")
24 | logger.setLevel(DEBUG)
25 |
26 | # file handler
27 | log_path = os.path.join(save_dir, "log.txt")
28 | file_handler = FileHandler(log_path)
29 | file_handler.setLevel(DEBUG)
30 | file_handler.setFormatter(Formatter('%(message)s'))
31 | logger.addHandler(file_handler)
32 |
33 | # sh handler
34 | # sh_handler = StreamHandler()
35 | # logger.addHandler(sh_handler)
36 |
37 |
38 | def int_tuple(s):
39 | """ transform str to tuple
40 | Args:
41 | s (str): strings that you want to change
42 | Returns:
43 | tuple
44 | """
45 | return tuple(int(i) for i in s.split(','))
46 |
47 |
48 | def bool_flag(s):
49 | """ transform str to bool flg
50 | Args:
51 | s (str): strings that you want to change
52 | """
53 | if s == '1':
54 | return True
55 | elif s == '0':
56 | return False
57 | msg = 'Invalid value "%s" for bool flag (should be 0 or 1)'
58 | raise ValueError(msg % s)
59 |
60 |
61 | def file_exists(path):
62 | """ Check file existence on given path
63 | Args:
64 | path (str): path of the file to check existence
65 | Returns:
66 | file_existence (bool): True if file exists otherwise False
67 | """
68 | return os.path.exists(path)
69 |
70 |
71 | def create_dir_if_not_exist(outdir):
72 | """ Check directory existence and creates new directory if not exist
73 | Args:
74 | outdir (str): path of the file to create directory
75 | RuntimeError:
76 | file exists in outdir but it is not a directory
77 | """
78 | if file_exists(outdir):
79 | if not os.path.isdir(outdir):
80 | raise RuntimeError('{} is not a directory'.format(outdir))
81 | else:
82 | return
83 | os.makedirs(outdir)
84 |
85 |
86 | def write_text_to_file(file_path, data):
87 | """ Write given text data to file
88 | Args:
89 | file_path (str): path of the file to write data
90 | data (str): text to write to the file
91 | """
92 | with open(file_path, 'w') as f:
93 | f.write(data)
94 |
95 |
96 | def read_text_from_file(file_path):
97 | """ Read given file as text
98 | Args:
99 | file_path (str): path of the file to read data
100 | Returns
101 | data (str): text read from the file
102 | """
103 | with open(file_path, 'r') as f:
104 | return f.read()
105 |
106 |
107 | def save_pickle(file_path, data):
108 | """ pickle given data to file
109 | Args:
110 | file_path (str): path of the file to pickle data
111 | data (): data to pickle
112 | """
113 | with open(file_path, 'wb') as f:
114 | pickle.dump(data, f)
115 |
116 |
117 | def load_pickle(file_path):
118 | """ load pickled data from file
119 | Args:
120 | file_path (str): path of the file to load pickled data
121 | Returns:
122 | data (): data pickled in file
123 | """
124 | with open(file_path, 'rb') as f:
125 | if six.PY2:
126 | return pickle.load(f)
127 | else:
128 | return pickle.load(f, encoding='bytes')
129 |
130 |
131 | def prepare_output_dir(base_dir, args, time_format='%Y-%m-%d-%H%M%S'):
132 | """ prepare a directory with current datetime as name.
133 | created directory contains the command and args when the script was called as text file.
134 | Args:
135 | base_dir (str): path of the directory to save data
136 | args (dict): arguments when the python script was called
137 | time_format (str): datetime format string for naming directory to save data
138 | Returns:
139 | out_dir (str): directory to save data
140 | """
141 | time_str = datetime.datetime.now().strftime(time_format)
142 | outdir = os.path.join(base_dir, time_str)
143 |
144 | create_dir_if_not_exist(outdir)
145 |
146 | # Save all the arguments
147 | args_file_path = os.path.join(outdir, 'args.txt')
148 | if isinstance(args, argparse.Namespace):
149 | args = vars(args)
150 | write_text_to_file(args_file_path, json.dumps(args))
151 |
152 | # Save the command
153 | argv_file_path = os.path.join(outdir, 'command.txt')
154 | argv = ' '.join(sys.argv)
155 | write_text_to_file(argv_file_path, argv)
156 |
157 | return outdir
158 |
--------------------------------------------------------------------------------
/tests/configs/test_two_wheeled.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import numpy as np
3 |
4 | from PythonLinearNonlinearControl.configs.two_wheeled \
5 | import TwoWheeledConfigModule
6 |
7 |
8 | class TestCalcCost():
9 | def test_calc_costs(self):
10 | # make config
11 | config = TwoWheeledConfigModule()
12 | # set
13 | pred_len = 5
14 | state_size = 3
15 | input_size = 2
16 | pop_size = 2
17 | pred_xs = np.ones((pop_size, pred_len, state_size))
18 | g_xs = np.ones((pop_size, pred_len, state_size)) * 0.5
19 | input_samples = np.ones((pop_size, pred_len, input_size)) * 0.5
20 |
21 | costs = config.input_cost_fn(input_samples)
22 | expected_costs = np.ones((pop_size, pred_len, input_size))*0.5
23 |
24 | assert costs == pytest.approx(expected_costs**2 * np.diag(config.R))
25 |
26 | costs = config.state_cost_fn(pred_xs, g_xs)
27 | expected_costs = np.ones((pop_size, pred_len, state_size))*0.5
28 |
29 | assert costs == pytest.approx(expected_costs**2 * np.diag(config.Q))
30 |
31 | costs = config.terminal_state_cost_fn(pred_xs[:, -1, :],
32 | g_xs[:, -1, :])
33 | expected_costs = np.ones((pop_size, state_size))*0.5
34 |
35 | assert costs == pytest.approx(expected_costs**2 * np.diag(config.Sf))
36 |
37 |
38 | class TestGradient():
39 | def test_state_gradient(self):
40 | """
41 | """
42 | xs = np.ones((1, 3))
43 | g_xs = np.ones((1, 3)) * 0.5
44 | cost_grad =\
45 | TwoWheeledConfigModule.gradient_cost_fn_state(xs, g_xs)
46 |
47 | # numeric grad
48 | eps = 1e-4
49 | expected_grad = np.zeros((1, 3))
50 | for i in range(3):
51 | tmp_x = xs.copy()
52 | tmp_x[0, i] = xs[0, i] + eps
53 | forward = \
54 | TwoWheeledConfigModule.state_cost_fn(tmp_x, g_xs)
55 | forward = np.sum(forward)
56 | tmp_x = xs.copy()
57 | tmp_x[0, i] = xs[0, i] - eps
58 | backward = \
59 | TwoWheeledConfigModule.state_cost_fn(tmp_x, g_xs)
60 | backward = np.sum(backward)
61 |
62 | expected_grad[0, i] = (forward - backward) / (2. * eps)
63 |
64 | assert cost_grad == pytest.approx(expected_grad)
65 |
66 | def test_input_gradient(self):
67 | """
68 | """
69 | us = np.ones((1, 2))
70 | cost_grad =\
71 | TwoWheeledConfigModule.gradient_cost_fn_input(None, us)
72 |
73 | # numeric grad
74 | eps = 1e-4
75 | expected_grad = np.zeros((1, 2))
76 | for i in range(2):
77 | tmp_u = us.copy()
78 | tmp_u[0, i] = us[0, i] + eps
79 | forward = \
80 | TwoWheeledConfigModule.input_cost_fn(tmp_u)
81 | forward = np.sum(forward)
82 | tmp_u = us.copy()
83 | tmp_u[0, i] = us[0, i] - eps
84 | backward = \
85 | TwoWheeledConfigModule.input_cost_fn(tmp_u)
86 | backward = np.sum(backward)
87 |
88 | expected_grad[0, i] = (forward - backward) / (2. * eps)
89 |
90 | assert cost_grad == pytest.approx(expected_grad)
91 |
92 | def test_state_hessian(self):
93 | """
94 | """
95 | g_xs = np.ones((1, 3)) * 0.5
96 | xs = np.ones((1, 3))
97 | cost_hess =\
98 | TwoWheeledConfigModule.hessian_cost_fn_state(xs, g_xs)
99 |
100 | # numeric grad
101 | eps = 1e-4
102 | expected_hess = np.zeros((1, 3, 3))
103 | for i in range(3):
104 | tmp_x = xs.copy()
105 | tmp_x[0, i] = xs[0, i] + eps
106 | forward = \
107 | TwoWheeledConfigModule.gradient_cost_fn_state(
108 | tmp_x, g_xs, terminal=False)
109 | tmp_x = xs.copy()
110 | tmp_x[0, i] = xs[0, i] - eps
111 | backward = \
112 | TwoWheeledConfigModule.gradient_cost_fn_state(
113 | tmp_x, g_xs, terminal=False)
114 |
115 | expected_hess[0, :, i] = (forward - backward) / (2. * eps)
116 |
117 | assert cost_hess == pytest.approx(expected_hess)
118 |
119 | def test_input_hessian(self):
120 | """
121 | """
122 | us = np.ones((1, 2))
123 | xs = np.ones((1, 3))
124 | cost_hess = TwoWheeledConfigModule.hessian_cost_fn_input(us, xs)
125 |
126 | # numeric grad
127 | eps = 1e-4
128 | expected_hess = np.zeros((1, 2, 2))
129 | for i in range(2):
130 | tmp_u = us.copy()
131 | tmp_u[0, i] = us[0, i] + eps
132 | forward = \
133 | TwoWheeledConfigModule.gradient_cost_fn_input(
134 | xs, tmp_u)
135 | tmp_u = us.copy()
136 | tmp_u[0, i] = us[0, i] - eps
137 | backward = \
138 | TwoWheeledConfigModule.gradient_cost_fn_input(
139 | xs, tmp_u)
140 |
141 | expected_hess[0, :, i] = (forward - backward) / (2. * eps)
142 |
143 | assert cost_hess == pytest.approx(expected_hess)
144 |
--------------------------------------------------------------------------------
/PythonLinearNonlinearControl/common/utils.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 |
4 | def rotate_pos(pos, angle):
5 | """ Transformation the coordinate in the angle
6 |
7 | Args:
8 | pos (numpy.ndarray): local state, shape(data_size, 2)
9 | angle (float): rotate angle, in radians
10 | Returns:
11 | rotated_pos (numpy.ndarray): shape(data_size, 2)
12 | """
13 | rot_mat = np.array([[np.cos(angle), -np.sin(angle)],
14 | [np.sin(angle), np.cos(angle)]])
15 |
16 | return np.dot(pos, rot_mat.T)
17 |
18 |
19 | def fit_angle_in_range(angles, min_angle=-np.pi, max_angle=np.pi):
20 | """ Check angle range and correct the range
21 |
22 | Args:
23 | angle (numpy.ndarray): in radians
24 | min_angle (float): maximum of range in radians, default -pi
25 | max_angle (float): minimum of range in radians, default pi
26 | Returns:
27 | fitted_angle (numpy.ndarray): range angle in radians
28 | """
29 | if max_angle < min_angle:
30 | raise ValueError("max angle must be greater than min angle")
31 | if (max_angle - min_angle) < 2.0 * np.pi:
32 | raise ValueError("difference between max_angle \
33 | and min_angle must be greater than 2.0 * pi")
34 |
35 | output = np.array(angles)
36 | output_shape = output.shape
37 |
38 | output = output.flatten()
39 | output -= min_angle
40 | output %= 2 * np.pi
41 | output += 2 * np.pi
42 | output %= 2 * np.pi
43 | output += min_angle
44 |
45 | output = np.minimum(max_angle, np.maximum(min_angle, output))
46 | return output.reshape(output_shape)
47 |
48 |
49 | def update_state_with_Runge_Kutta(state, u, functions, dt=0.01, batch=True):
50 | """ update state in Runge Kutta methods
51 | Args:
52 | state (array-like): state of system
53 | u (array-like): input of system
54 | functions (list): update function of each state,
55 | each function will be called like func(state, u)
56 | We expect that this function returns differential of each state
57 | dt (float): float in seconds
58 | batch (bool): state and u is given by batch or not
59 |
60 | Returns:
61 | next_state (np.array): next state of system
62 |
63 | Notes:
64 | sample of function is as follows:
65 |
66 | def func_x(self, x_1, x_2, u):
67 | x_dot = (1. - x_1**2 - x_2**2) * x_2 - x_1 + u
68 | return x_dot
69 |
70 | Note that the function return x_dot.
71 | """
72 | if not batch:
73 | state_size = len(state)
74 | assert state_size == len(functions), \
75 | "Invalid functions length, You need to give the state size functions"
76 |
77 | k0 = np.zeros(state_size)
78 | k1 = np.zeros(state_size)
79 | k2 = np.zeros(state_size)
80 | k3 = np.zeros(state_size)
81 |
82 | for i, func in enumerate(functions):
83 | k0[i] = dt * func(state, u)
84 |
85 | for i, func in enumerate(functions):
86 | k1[i] = dt * func(state + k0 / 2., u)
87 |
88 | for i, func in enumerate(functions):
89 | k2[i] = dt * func(state + k1 / 2., u)
90 |
91 | for i, func in enumerate(functions):
92 | k3[i] = dt * func(state + k2, u)
93 |
94 | return state + (k0 + 2. * k1 + 2. * k2 + k3) / 6.
95 |
96 | else:
97 | batch_size, state_size = state.shape
98 | assert state_size == len(functions), \
99 | "Invalid functions length, You need to give the state size functions"
100 |
101 | k0 = np.zeros((batch_size, state_size))
102 | k1 = np.zeros((batch_size, state_size))
103 | k2 = np.zeros((batch_size, state_size))
104 | k3 = np.zeros((batch_size, state_size))
105 |
106 | for i, func in enumerate(functions):
107 | k0[:, i] = dt * func(state, u)
108 |
109 | for i, func in enumerate(functions):
110 | k1[:, i] = dt * func(state + k0 / 2., u)
111 |
112 | for i, func in enumerate(functions):
113 | k2[:, i] = dt * func(state + k1 / 2., u)
114 |
115 | for i, func in enumerate(functions):
116 | k3[:, i] = dt * func(state + k2, u)
117 |
118 | return state + (k0 + 2. * k1 + 2. * k2 + k3) / 6.
119 |
120 |
121 | def line_search(grad, sol, compute_eval_val,
122 | init_alpha=0.001, max_iter=100, update_ratio=1.):
123 | """ line search
124 | Args:
125 | grad (numpy.ndarray): gradient
126 | sol (numpy.ndarray): sol
127 | compute_eval_val (numpy.ndarray): function to compute evaluation value
128 |
129 | Returns:
130 | alpha (float): result of line search
131 | """
132 | assert grad.shape == sol.shape
133 | base_val = np.inf
134 | alpha = init_alpha
135 | original_sol = sol.copy()
136 |
137 | for _ in range(max_iter):
138 | updated_sol = original_sol - alpha * grad
139 | eval_val = compute_eval_val(updated_sol)
140 |
141 | if eval_val < base_val:
142 | alpha += init_alpha * update_ratio
143 | base_val = eval_val
144 | else:
145 | break
146 |
147 | return alpha
148 |
--------------------------------------------------------------------------------
/PythonLinearNonlinearControl/controllers/cem.py:
--------------------------------------------------------------------------------
1 | from logging import getLogger
2 |
3 | import numpy as np
4 | import scipy.stats as stats
5 |
6 | from .controller import Controller
7 | from ..envs.cost import calc_cost
8 |
9 | logger = getLogger(__name__)
10 |
11 |
12 | class CEM(Controller):
13 | """ Cross Entropy Method for linear and nonlinear method
14 |
15 | Attributes:
16 | history_u (list[numpy.ndarray]): time history of optimal input
17 | Ref:
18 | Chua, K., Calandra, R., McAllister, R., & Levine, S. (2018).
19 | Deep reinforcement learning in a handful of trials
20 | using probabilistic dynamics models.
21 | In Advances in Neural Information Processing Systems (pp. 4754-4765).
22 | """
23 |
24 | def __init__(self, config, model):
25 | super(CEM, self).__init__(config, model)
26 |
27 | # model
28 | self.model = model
29 |
30 | # general parameters
31 | self.pred_len = config.PRED_LEN
32 | self.input_size = config.INPUT_SIZE
33 |
34 | # cem parameters
35 | self.alpha = config.opt_config["CEM"]["alpha"]
36 | self.pop_size = config.opt_config["CEM"]["popsize"]
37 | self.max_iters = config.opt_config["CEM"]["max_iters"]
38 | self.num_elites = config.opt_config["CEM"]["num_elites"]
39 | self.epsilon = config.opt_config["CEM"]["threshold"]
40 | self.init_var = config.opt_config["CEM"]["init_var"]
41 | self.opt_dim = self.input_size * self.pred_len
42 |
43 | # get bound
44 | self.input_upper_bounds = np.tile(config.INPUT_UPPER_BOUND,
45 | self.pred_len)
46 | self.input_lower_bounds = np.tile(config.INPUT_LOWER_BOUND,
47 | self.pred_len)
48 |
49 | # get cost func
50 | self.state_cost_fn = config.state_cost_fn
51 | self.terminal_state_cost_fn = config.terminal_state_cost_fn
52 | self.input_cost_fn = config.input_cost_fn
53 |
54 | # init mean
55 | self.init_mean = np.tile((config.INPUT_UPPER_BOUND
56 | + config.INPUT_LOWER_BOUND) / 2.,
57 | self.pred_len)
58 | self.prev_sol = self.init_mean.copy()
59 | # init variance
60 | var = np.ones_like(config.INPUT_UPPER_BOUND) \
61 | * config.opt_config["CEM"]["init_var"]
62 | self.init_var = np.tile(var, self.pred_len)
63 |
64 | # save
65 | self.history_u = []
66 |
67 | def clear_sol(self):
68 | """ clear prev sol
69 | """
70 | logger.debug("Clear Sol")
71 | self.prev_sol = self.init_mean.copy()
72 |
73 | def obtain_sol(self, curr_x, g_xs):
74 | """ calculate the optimal inputs
75 |
76 | Args:
77 | curr_x (numpy.ndarray): current state, shape(state_size, )
78 | g_xs (numpy.ndarrya): goal trajectory, shape(plan_len, state_size)
79 | Returns:
80 | opt_input (numpy.ndarray): optimal input, shape(input_size, )
81 | """
82 | # initialize
83 | opt_count = 0
84 |
85 | # get configuration
86 | mean = self.prev_sol.flatten().copy()
87 | var = self.init_var.flatten().copy()
88 |
89 | # make distribution
90 | X = stats.truncnorm(-1, 1,
91 | loc=np.zeros_like(mean),
92 | scale=np.ones_like(mean))
93 |
94 | while (opt_count < self.max_iters) and np.max(var) > self.epsilon:
95 | # constrained
96 | lb_dist = mean - self.input_lower_bounds
97 | ub_dist = self.input_upper_bounds - mean
98 | constrained_var = np.minimum(np.minimum(np.square(lb_dist),
99 | np.square(ub_dist)),
100 | var)
101 |
102 | # sample
103 | samples = X.rvs(size=[self.pop_size, self.opt_dim]) \
104 | * np.sqrt(constrained_var) \
105 | + mean
106 |
107 | # calc cost
108 | # samples.shape = (pop_size, opt_dim)
109 | costs = self.calc_cost(curr_x,
110 | samples.reshape(self.pop_size,
111 | self.pred_len,
112 | self.input_size),
113 | g_xs)
114 |
115 | # sort cost
116 | elites = samples[np.argsort(costs)][:self.num_elites]
117 |
118 | # new mean
119 | new_mean = np.mean(elites, axis=0)
120 | new_var = np.var(elites, axis=0)
121 |
122 | # soft update
123 | mean = self.alpha * mean + (1. - self.alpha) * new_mean
124 | var = self.alpha * var + (1. - self.alpha) * new_var
125 |
126 | logger.debug("Var = {}".format(np.max(var)))
127 | logger.debug("Costs = {}".format(np.mean(costs)))
128 | opt_count += 1
129 |
130 | sol = mean.copy()
131 | self.prev_sol = np.concatenate((mean[self.input_size:],
132 | np.zeros(self.input_size)))
133 |
134 | return sol.reshape(self.pred_len, self.input_size).copy()[0]
135 |
136 | def __str__(self):
137 | return "CEM"
138 |
--------------------------------------------------------------------------------
/PythonLinearNonlinearControl/controllers/mppi_williams.py:
--------------------------------------------------------------------------------
1 | from logging import getLogger
2 |
3 | import numpy as np
4 | import scipy.stats as stats
5 |
6 | from .controller import Controller
7 | from ..envs.cost import calc_cost
8 |
9 | logger = getLogger(__name__)
10 |
11 |
12 | class MPPIWilliams(Controller):
13 | """ Model Predictive Path Integral for linear and nonlinear method
14 |
15 | Attributes:
16 | history_u (list[numpy.ndarray]): time history of optimal input
17 | Ref:
18 | G. Williams et al., "Information theoretic MPC
19 | for model-based reinforcement learning,"
20 | 2017 IEEE International Conference on Robotics and Automation (ICRA),
21 | Singapore, 2017, pp. 1714-1721.
22 | """
23 |
24 | def __init__(self, config, model):
25 | super(MPPIWilliams, self).__init__(config, model)
26 |
27 | # model
28 | self.model = model
29 |
30 | # general parameters
31 | self.pred_len = config.PRED_LEN
32 | self.input_size = config.INPUT_SIZE
33 |
34 | # mppi parameters
35 | self.pop_size = config.opt_config["MPPIWilliams"]["popsize"]
36 | self.lam = config.opt_config["MPPIWilliams"]["lambda"]
37 | self.noise_sigma = config.opt_config["MPPIWilliams"]["noise_sigma"]
38 | self.opt_dim = self.input_size * self.pred_len
39 |
40 | # get bound
41 | self.input_upper_bounds = np.tile(config.INPUT_UPPER_BOUND,
42 | (self.pred_len, 1))
43 | self.input_lower_bounds = np.tile(config.INPUT_LOWER_BOUND,
44 | (self.pred_len, 1))
45 |
46 | # get cost func
47 | self.state_cost_fn = config.state_cost_fn
48 | self.terminal_state_cost_fn = config.terminal_state_cost_fn
49 | self.input_cost_fn = config.input_cost_fn
50 |
51 | # init mean
52 | self.prev_sol = np.tile((config.INPUT_UPPER_BOUND
53 | + config.INPUT_LOWER_BOUND) / 2.,
54 | self.pred_len)
55 | self.prev_sol = self.prev_sol.reshape(self.pred_len, self.input_size)
56 |
57 | # save
58 | self.history_u = [np.zeros(self.input_size)]
59 |
60 | def clear_sol(self):
61 | """ clear prev sol
62 | """
63 | logger.debug("Clear Solution")
64 | self.prev_sol = \
65 | (self.input_upper_bounds + self.input_lower_bounds) / 2.
66 | self.prev_sol = self.prev_sol.reshape(self.pred_len, self.input_size)
67 |
68 | def calc_cost(self, curr_x, samples, g_xs):
69 | """ calculate the cost of input samples by using MPPI's eq
70 |
71 | Args:
72 | curr_x (numpy.ndarray): shape(state_size),
73 | current robot position
74 | samples (numpy.ndarray): shape(pop_size, opt_dim),
75 | input samples
76 | g_xs (numpy.ndarray): shape(pred_len, state_size),
77 | goal states
78 | Returns:
79 | costs (numpy.ndarray): shape(pop_size, )
80 | """
81 | # get size
82 | pop_size = samples.shape[0]
83 | g_xs = np.tile(g_xs, (pop_size, 1, 1))
84 |
85 | # calc cost, pred_xs.shape = (pop_size, pred_len+1, state_size)
86 | pred_xs = self.model.predict_traj(curr_x, samples)
87 |
88 | # get particle cost
89 | costs = calc_cost(pred_xs, samples, g_xs,
90 | self.state_cost_fn, None,
91 | self.terminal_state_cost_fn)
92 |
93 | return costs
94 |
95 | def obtain_sol(self, curr_x, g_xs):
96 | """ calculate the optimal inputs
97 |
98 | Args:
99 | curr_x (numpy.ndarray): current state, shape(state_size, )
100 | g_xs (numpy.ndarrya): goal trajectory, shape(plan_len, state_size)
101 | Returns:
102 | opt_input (numpy.ndarray): optimal input, shape(input_size, )
103 | """
104 | # get noised inputs
105 | noise = np.random.normal(
106 | loc=0, scale=1.0, size=(self.pop_size, self.pred_len,
107 | self.input_size)) * self.noise_sigma
108 |
109 | noised_inputs = self.prev_sol + noise
110 |
111 | # clip actions
112 | noised_inputs = np.clip(
113 | noised_inputs, self.input_lower_bounds, self.input_upper_bounds)
114 |
115 | # calc cost
116 | costs = self.calc_cost(curr_x, noised_inputs, g_xs)
117 |
118 | costs += np.sum(np.sum(
119 | self.lam * self.prev_sol * noise / self.noise_sigma,
120 | axis=-1), axis=-1)
121 |
122 | # mppi update
123 | beta = np.min(costs)
124 | eta = np.sum(np.exp(- 1. / self.lam * (costs - beta)), axis=0) \
125 | + 1e-10
126 |
127 | # weight
128 | # eta.shape = (pred_len, input_size)
129 | weights = np.exp(- 1. / self.lam * (costs - beta)) / eta
130 |
131 | # update inputs
132 | sol = self.prev_sol \
133 | + np.sum(weights[:, np.newaxis, np.newaxis] * noise, axis=0)
134 |
135 | # update
136 | self.prev_sol[:-1] = sol[1:]
137 | self.prev_sol[-1] = sol[-1] # last use the terminal input
138 |
139 | # log
140 | self.history_u.append(sol[0])
141 |
142 | return sol[0]
143 |
144 | def __str__(self):
145 | return "MPPIWilliams"
146 |
--------------------------------------------------------------------------------
/tests/configs/test_cartpole.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import numpy as np
3 |
4 | from PythonLinearNonlinearControl.configs.cartpole \
5 | import CartPoleConfigModule
6 |
7 |
8 | class TestCalcCost():
9 | def test_calc_costs(self):
10 | # make config
11 | config = CartPoleConfigModule()
12 | # set
13 | pred_len = 5
14 | state_size = 4
15 | input_size = 1
16 | pop_size = 2
17 | pred_xs = np.ones((pop_size, pred_len, state_size))
18 | g_xs = np.ones((pop_size, pred_len, state_size)) * 0.5
19 | input_samples = np.ones((pop_size, pred_len, input_size)) * 0.5
20 |
21 | costs = config.input_cost_fn(input_samples)
22 |
23 | assert costs.shape == (pop_size, pred_len, input_size)
24 |
25 | costs = config.state_cost_fn(pred_xs, g_xs)
26 |
27 | assert costs.shape == (pop_size, pred_len, 1)
28 |
29 | costs = config.terminal_state_cost_fn(pred_xs[:, -1, :],
30 | g_xs[:, -1, :])
31 |
32 | assert costs.shape == (pop_size, 1)
33 |
34 |
35 | class TestGradient():
36 | def test_state_gradient(self):
37 | """
38 | """
39 | xs = np.ones((1, 4))
40 | cost_grad = CartPoleConfigModule.gradient_cost_fn_state(xs, None)
41 |
42 | # numeric grad
43 | eps = 1e-4
44 | expected_grad = np.zeros((1, 4))
45 | for i in range(4):
46 | tmp_x = xs.copy()
47 | tmp_x[0, i] = xs[0, i] + eps
48 | forward = \
49 | CartPoleConfigModule.state_cost_fn(tmp_x, None)
50 | tmp_x = xs.copy()
51 | tmp_x[0, i] = xs[0, i] - eps
52 | backward = \
53 | CartPoleConfigModule.state_cost_fn(tmp_x, None)
54 |
55 | expected_grad[0, i] = (forward - backward) / (2. * eps)
56 |
57 | assert cost_grad == pytest.approx(expected_grad)
58 |
59 | def test_terminal_state_gradient(self):
60 | """
61 | """
62 | xs = np.ones(4)
63 | cost_grad =\
64 | CartPoleConfigModule.gradient_cost_fn_state(xs, None,
65 | terminal=True)
66 |
67 | # numeric grad
68 | eps = 1e-4
69 | expected_grad = np.zeros((1, 4))
70 | for i in range(4):
71 | tmp_x = xs.copy()
72 | tmp_x[i] = xs[i] + eps
73 | forward = \
74 | CartPoleConfigModule.state_cost_fn(tmp_x, None)
75 | tmp_x = xs.copy()
76 | tmp_x[i] = xs[i] - eps
77 | backward = \
78 | CartPoleConfigModule.state_cost_fn(tmp_x, None)
79 |
80 | expected_grad[0, i] = (forward - backward) / (2. * eps)
81 |
82 | assert cost_grad == pytest.approx(expected_grad)
83 |
84 | def test_input_gradient(self):
85 | """
86 | """
87 | us = np.ones((1, 1))
88 | cost_grad = CartPoleConfigModule.gradient_cost_fn_input(None, us)
89 |
90 | # numeric grad
91 | eps = 1e-4
92 | expected_grad = np.zeros((1, 1))
93 | for i in range(1):
94 | tmp_u = us.copy()
95 | tmp_u[0, i] = us[0, i] + eps
96 | forward = \
97 | CartPoleConfigModule.input_cost_fn(tmp_u)
98 | tmp_u = us.copy()
99 | tmp_u[0, i] = us[0, i] - eps
100 | backward = \
101 | CartPoleConfigModule.input_cost_fn(tmp_u)
102 |
103 | expected_grad[0, i] = (forward - backward) / (2. * eps)
104 |
105 | assert cost_grad == pytest.approx(expected_grad)
106 |
107 | def test_state_hessian(self):
108 | """
109 | """
110 | xs = np.ones((1, 4))
111 | cost_hess = CartPoleConfigModule.hessian_cost_fn_state(xs, None)
112 |
113 | # numeric grad
114 | eps = 1e-4
115 | expected_hess = np.zeros((1, 4, 4))
116 | for i in range(4):
117 | tmp_x = xs.copy()
118 | tmp_x[0, i] = xs[0, i] + eps
119 | forward = \
120 | CartPoleConfigModule.gradient_cost_fn_state(
121 | tmp_x, None, terminal=False)
122 | tmp_x = xs.copy()
123 | tmp_x[0, i] = xs[0, i] - eps
124 | backward = \
125 | CartPoleConfigModule.gradient_cost_fn_state(
126 | tmp_x, None, terminal=False)
127 |
128 | expected_hess[0, :, i] = (forward - backward) / (2. * eps)
129 |
130 | assert cost_hess == pytest.approx(expected_hess)
131 |
132 | def test_terminal_state_hessian(self):
133 | """
134 | """
135 | xs = np.ones(4)
136 | cost_hess =\
137 | CartPoleConfigModule.hessian_cost_fn_state(xs, None,
138 | terminal=True)
139 |
140 | # numeric grad
141 | eps = 1e-4
142 | expected_hess = np.zeros((1, 4, 4))
143 | for i in range(4):
144 | tmp_x = xs.copy()
145 | tmp_x[i] = xs[i] + eps
146 | forward = \
147 | CartPoleConfigModule.gradient_cost_fn_state(
148 | tmp_x, None, terminal=True)
149 | tmp_x = xs.copy()
150 | tmp_x[i] = xs[i] - eps
151 | backward = \
152 | CartPoleConfigModule.gradient_cost_fn_state(
153 | tmp_x, None, terminal=True)
154 |
155 | expected_hess[0, :, i] = (forward - backward) / (2. * eps)
156 |
157 | assert cost_hess == pytest.approx(expected_hess)
158 |
159 | def test_input_hessian(self):
160 | """
161 | """
162 | us = np.ones((1, 1))
163 | xs = np.ones((1, 4))
164 | cost_hess = CartPoleConfigModule.hessian_cost_fn_input(us, xs)
165 |
166 | # numeric grad
167 | eps = 1e-4
168 | expected_hess = np.zeros((1, 1, 1))
169 | for i in range(1):
170 | tmp_u = us.copy()
171 | tmp_u[0, i] = us[0, i] + eps
172 | forward = \
173 | CartPoleConfigModule.gradient_cost_fn_input(
174 | xs, tmp_u)
175 | tmp_u = us.copy()
176 | tmp_u[0, i] = us[0, i] - eps
177 | backward = \
178 | CartPoleConfigModule.gradient_cost_fn_input(
179 | xs, tmp_u)
180 |
181 | expected_hess[0, :, i] = (forward - backward) / (2. * eps)
182 |
183 | assert cost_hess == pytest.approx(expected_hess)
184 |
--------------------------------------------------------------------------------
/PythonLinearNonlinearControl/plotters/plot_func.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import numpy as np
4 | import matplotlib.pyplot as plt
5 |
6 | from ..helper import save_pickle, load_pickle
7 |
8 |
9 | def plot_result(history, history_g=None, ylabel="x",
10 | save_dir="./result", name="state_history"):
11 | """
12 | Args:
13 | history (numpy.ndarray): history, shape(iters, size)
14 | """
15 | (iters, size) = history.shape
16 | for i in range(0, size, 3):
17 |
18 | figure = plt.figure()
19 | axis1 = figure.add_subplot(311)
20 | axis2 = figure.add_subplot(312)
21 | axis3 = figure.add_subplot(313)
22 |
23 | axis1.set_ylabel(ylabel + "_{}".format(i))
24 | axis2.set_ylabel(ylabel + "_{}".format(i+1))
25 | axis3.set_ylabel(ylabel + "_{}".format(i+2))
26 | axis3.set_xlabel("time steps")
27 |
28 | # gt
29 | def plot(axis, history, history_g=None):
30 | axis.plot(range(iters), history, c="r", linewidth=3)
31 | if history_g is not None:
32 | axis.plot(range(iters), history_g,
33 | c="b", linewidth=3, label="goal")
34 |
35 | if i < size:
36 | plot(axis1, history[:, i], history_g=history_g[:, i])
37 | if i+1 < size:
38 | plot(axis2, history[:, i+1], history_g=history_g[:, i+1])
39 | if i+2 < size:
40 | plot(axis3, history[:, i+2], history_g=history_g[:, i+2])
41 |
42 | # save
43 | if save_dir is not None:
44 | path = os.path.join(save_dir, name + "-{}".format(i))
45 | else:
46 | path = name
47 |
48 | axis1.legend(ncol=1, bbox_to_anchor=(0., 1.02, 1., 0.102), loc=3)
49 | figure.savefig(path, bbox_inches="tight", pad_inches=0.05)
50 |
51 |
52 | def plot_results(history_x, history_u, history_g=None, args=None):
53 | """
54 |
55 | Args:
56 | history_x (numpy.ndarray): history of state, shape(iters, state_size)
57 | history_u (numpy.ndarray): history of state, shape(iters, input_size)
58 | Returns:
59 | None
60 | """
61 | env = "Env"
62 | controller_type = "controller"
63 |
64 | if args is not None:
65 | env = args.env
66 | controller_type = args.controller_type
67 |
68 | plot_result(history_x, history_g=history_g, ylabel="x",
69 | name=env + "-state_history",
70 | save_dir="./result/" + controller_type)
71 | plot_result(history_u, history_g=np.zeros_like(history_u), ylabel="u",
72 | name=env + "-input_history",
73 | save_dir="./result/" + controller_type)
74 |
75 |
76 | def save_plot_data(history_x, history_u, history_g=None, args=None):
77 | """ save plot data
78 |
79 | Args:
80 | history_x (numpy.ndarray): history of state, shape(iters, state_size)
81 | history_u (numpy.ndarray): history of state, shape(iters, input_size)
82 | Returns:
83 | None
84 | """
85 | env = "Env"
86 | controller_type = "controller"
87 |
88 | if args is not None:
89 | env = args.env
90 | controller_type = args.controller_type
91 |
92 | path = os.path.join("./result/" + controller_type,
93 | env + "-history_x.pkl")
94 | save_pickle(path, history_x)
95 |
96 | path = os.path.join("./result/" + controller_type,
97 | env + "-history_u.pkl")
98 | save_pickle(path, history_u)
99 |
100 | path = os.path.join("./result/" + controller_type,
101 | env + "-history_g.pkl")
102 | save_pickle(path, history_g)
103 |
104 |
105 | def load_plot_data(env, controller_type, result_dir="./result"):
106 | """
107 | Args:
108 | env (str): environments name
109 | controller_type (str): controller type
110 | result_dir (str): result directory
111 | Returns:
112 | history_x (numpy.ndarray): history of state, shape(iters, state_size)
113 | history_u (numpy.ndarray): history of state, shape(iters, input_size)
114 | history_g (numpy.ndarray): history of state, shape(iters, input_size)
115 | """
116 | path = os.path.join("./result/" + controller_type,
117 | env + "-history_x.pkl")
118 | history_x = load_pickle(path)
119 |
120 | path = os.path.join("./result/" + controller_type,
121 | env + "-history_u.pkl")
122 | history_u = load_pickle(path)
123 |
124 | path = os.path.join("./result/" + controller_type,
125 | env + "-history_g.pkl")
126 | history_g = load_pickle(path)
127 |
128 | return history_x, history_u, history_g
129 |
130 |
131 | def plot_multi_result(histories, histories_g=None, labels=None, ylabel="x",
132 | save_dir="./result", name="state_history"):
133 | """
134 | Args:
135 | history (numpy.ndarray): history, shape(iters, size)
136 | """
137 | (_, iters, size) = histories.shape
138 |
139 | for i in range(0, size, 2):
140 |
141 | figure = plt.figure()
142 | axis1 = figure.add_subplot(211)
143 | axis2 = figure.add_subplot(212)
144 |
145 | axis1.set_ylabel(ylabel + "_{}".format(i))
146 | axis2.set_ylabel(ylabel + "_{}".format(i+1))
147 | axis2.set_xlabel("time steps")
148 |
149 | # gt
150 | def plot(axis, history, history_g=None, label=""):
151 | axis.plot(range(iters), history,
152 | linewidth=3, label=label, alpha=0.7, linestyle="dashed")
153 | if history_g is not None:
154 | axis.plot(range(iters), history_g,
155 | c="b", linewidth=3)
156 |
157 | if i < size:
158 | for j, (history, history_g) \
159 | in enumerate(zip(histories, histories_g)):
160 | plot(axis1, history[:, i],
161 | history_g=history_g[:, i], label=labels[j])
162 | if i+1 < size:
163 | for j, (history, history_g) in \
164 | enumerate(zip(histories, histories_g)):
165 | plot(axis2, history[:, i+1],
166 | history_g=history_g[:, i+1], label=labels[j])
167 |
168 | # save
169 | if save_dir is not None:
170 | path = os.path.join(save_dir, name + "-{}".format(i))
171 | else:
172 | path = name
173 |
174 | axis1.legend(ncol=3, bbox_to_anchor=(0., 1.02, 1., 0.102), loc=3)
175 | figure.savefig(path, bbox_inches="tight", pad_inches=0.05)
176 |
--------------------------------------------------------------------------------
/PythonLinearNonlinearControl/controllers/nmpc_cgmres.py:
--------------------------------------------------------------------------------
1 | from logging import getLogger
2 |
3 | import numpy as np
4 | import scipy.stats as stats
5 |
6 | from .controller import Controller
7 | from ..envs.cost import calc_cost
8 | from ..common.utils import line_search
9 |
10 | logger = getLogger(__name__)
11 |
12 |
13 | class NMPCCGMRES(Controller):
14 | def __init__(self, config, model):
15 | """ Nonlinear Model Predictive Control using cgmres
16 | """
17 | super(NMPCCGMRES, self).__init__(config, model)
18 |
19 | # model
20 | self.model = model
21 |
22 | # get cost func
23 | self.state_cost_fn = config.state_cost_fn
24 | self.terminal_state_cost_fn = config.terminal_state_cost_fn
25 | self.input_cost_fn = config.input_cost_fn
26 |
27 | # general parameters
28 | self.pred_len = config.PRED_LEN
29 | self.input_size = config.INPUT_SIZE
30 | self.dt = config.DT
31 |
32 | # controller parameters
33 | self.threshold = config.opt_config["NMPCCGMRES"]["threshold"]
34 | self.zeta = config.opt_config["NMPCCGMRES"]["zeta"]
35 | self.delta = config.opt_config["NMPCCGMRES"]["delta"]
36 | self.alpha = config.opt_config["NMPCCGMRES"]["alpha"]
37 | self.tf = config.opt_config["NMPCCGMRES"]["tf"]
38 | self.divide_num = config.PRED_LEN
39 | self.with_constraint = config.opt_config["NMPCCGMRES"]["constraint"]
40 | if not self.with_constraint:
41 | raise NotImplementedError
42 | # 3 means u, dummy_u, raw
43 | self.max_iters = 3 * self.input_size * self.divide_num
44 |
45 | # initialize
46 | self.prev_sol = np.zeros((self.pred_len, self.input_size))
47 | self.opt_count = 1
48 | # add smaller than constraints value
49 | input_constraint = np.abs([config.INPUT_LOWER_BOUND])
50 | self.prev_dummy_sol = np.ones(
51 | (self.pred_len, self.input_size)) * input_constraint - 1e-3
52 | # add bigger than 0.01 to avoid computational error
53 | self.prev_raw = np.zeros(
54 | (self.pred_len, self.input_size)) + 0.01 + 1e-3
55 |
56 | def _compute_f(self, curr_x, sol, g_xs, dummy_sol=None, raw=None):
57 | # shape(pred_len+1, state_size)
58 | pred_xs = self.model.predict_traj(curr_x, sol)
59 | # shape(pred_len, state_size)
60 | pred_lams = self.model.predict_adjoint_traj(pred_xs, sol, g_xs)
61 |
62 | if self.with_constraint:
63 | F = self.config.gradient_hamiltonian_input_with_constraint(
64 | pred_xs, pred_lams, sol, g_xs, dummy_sol, raw)
65 |
66 | return F
67 | else:
68 | raise NotImplementedError
69 |
70 | def obtain_sol(self, curr_x, g_xs):
71 | """ calculate the optimal inputs
72 | Args:
73 | curr_x (numpy.ndarray): current state, shape(state_size, )
74 | g_xs (numpy.ndarrya): goal trajectory, shape(plan_len, state_size)
75 | Returns:
76 | opt_input (numpy.ndarray): optimal input, shape(input_size, )
77 | """
78 | sol = self.prev_sol.copy()
79 | dummy_sol = self.prev_dummy_sol.copy()
80 | raw = self.prev_raw.copy()
81 |
82 | # compute delta t
83 | time = self.dt * self.opt_count
84 | dt = self.tf * (1. - np.exp(-self.alpha * time)) / \
85 | float(self.divide_num)
86 | self.model.dt = dt
87 |
88 | # compute Fxt
89 | x_dot = self.model.x_dot(curr_x, sol[0])
90 | dx = x_dot * self.delta
91 | Fxt = self._compute_f(curr_x+dx, sol, g_xs, dummy_sol, raw).flatten()
92 |
93 | # compute F
94 | F = self._compute_f(curr_x, sol, g_xs, dummy_sol, raw).flatten()
95 | right = - self.zeta * F - ((Fxt - F) / self.delta)
96 |
97 | # compute Fuxt
98 | du = sol * self.delta
99 | ddummy_u = dummy_sol * self.delta
100 | draw = raw * self.delta
101 | Fuxt = self._compute_f(curr_x+dx, sol+du, g_xs,
102 | dummy_sol+ddummy_u, raw+draw).flatten()
103 | left = ((Fuxt - Fxt) / self.delta)
104 |
105 | r0 = right - left
106 | r0_norm = np.linalg.norm(r0)
107 |
108 | vs = np.zeros((self.max_iters, self.max_iters + 1))
109 | vs[:, 0] = r0 / r0_norm
110 | hs = np.zeros((self.max_iters + 1, self.max_iters + 1))
111 | e = np.zeros((self.max_iters + 1, 1))
112 | e[0] = 1.
113 |
114 | for i in range(self.max_iters):
115 | reshaped_vs = vs.reshape(
116 | (self.divide_num, 3, self.input_size, self.max_iters+1))
117 | du = reshaped_vs[:, 0, :, i] * self.delta
118 | ddummy_u = reshaped_vs[:, 1, :, i] * self.delta
119 | draw = reshaped_vs[:, 2, :, i] * self.delta
120 |
121 | Fuxt = self._compute_f(
122 | curr_x+dx, sol+du, g_xs, dummy_sol+ddummy_u, raw+draw).flatten()
123 | Av = ((Fuxt - Fxt) / self.delta)
124 |
125 | sum_Av = np.zeros(self.max_iters)
126 |
127 | for j in range(i + 1):
128 | hs[j, i] = np.dot(Av, vs[:, j])
129 | sum_Av = sum_Av + hs[j, i] * vs[:, j]
130 |
131 | v_est = Av - sum_Av
132 |
133 | hs[i+1, i] = np.linalg.norm(v_est)
134 |
135 | vs[:, i+1] = v_est / hs[i+1, i]
136 |
137 | inv_hs = np.linalg.pinv(hs[:i+1, :i])
138 | ys = np.dot(inv_hs, r0_norm * e[:i+1])
139 |
140 | judge_value = r0_norm * e[:i+1] - np.dot(hs[:i+1, :i], ys[:i])
141 |
142 | if np.linalg.norm(judge_value) < self.threshold or i == self.max_iters-1:
143 | update_value = np.dot(vs[:, :i-1], ys_pre[:i-1]).flatten()
144 |
145 | update_value = update_value.reshape(
146 | (self.divide_num, 3, self.input_size))
147 | du_new = du + update_value[:, 0, :]
148 | ddummy_u_new = ddummy_u + update_value[:, 1, :]
149 | draw_new = draw + update_value[:, 2, :]
150 | break
151 |
152 | ys_pre = ys
153 |
154 | sol += du_new * self.delta
155 | dummy_sol += ddummy_u_new * self.delta
156 | raw += draw_new * self.delta
157 |
158 | F = self._compute_f(curr_x, sol, g_xs, dummy_sol, raw)
159 | logger.debug("check F = {0}".format(np.linalg.norm(F)))
160 |
161 | self.prev_sol = sol.copy()
162 | self.prev_dummy_sol = dummy_sol.copy()
163 | self.prev_raw = raw.copy()
164 |
165 | self.opt_count += 1
166 |
167 | return sol[0]
168 |
--------------------------------------------------------------------------------
/PythonLinearNonlinearControl/envs/cartpole.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from matplotlib.axes import Axes
3 |
4 | from .env import Env
5 | from ..plotters.plot_objs import square
6 |
7 |
8 | class CartPoleEnv(Env):
9 | """ Cartpole Environment
10 |
11 | Ref :
12 | https://ocw.mit.edu/courses/
13 | electrical-engineering-and-computer-science/
14 | 6-832-underactuated-robotics-spring-2009/readings/
15 | MIT6_832s09_read_ch03.pdf
16 | """
17 |
18 | def __init__(self):
19 | """
20 | """
21 | self.config = {"state_size": 4,
22 | "input_size": 1,
23 | "dt": 0.02,
24 | "max_step": 500,
25 | "input_lower_bound": [-3.],
26 | "input_upper_bound": [3.],
27 | "mp": 0.2,
28 | "mc": 1.,
29 | "l": 0.5,
30 | "g": 9.81,
31 | "cart_size": (0.15, 0.1),
32 | }
33 |
34 | super(CartPoleEnv, self).__init__(self.config)
35 |
36 | def reset(self, init_x=None):
37 | """ reset state
38 |
39 | Returns:
40 | init_x (numpy.ndarray): initial state, shape(state_size, )
41 | info (dict): information
42 | """
43 | self.step_count = 0
44 |
45 | theta = np.random.randn(1)
46 | self.curr_x = np.array([0., 0., theta[0], 0.])
47 |
48 | if init_x is not None:
49 | self.curr_x = init_x
50 |
51 | # goal
52 | self.g_x = np.array([0., 0., -np.pi, 0.])
53 |
54 | # clear memory
55 | self.history_x = []
56 | self.history_g_x = []
57 |
58 | return self.curr_x, {"goal_state": self.g_x}
59 |
60 | def step(self, u):
61 | """ step environments
62 |
63 | Args:
64 | u (numpy.ndarray) : input, shape(input_size, )
65 | Returns:
66 | next_x (numpy.ndarray): next state, shape(state_size, )
67 | cost (float): costs
68 | done (bool): end the simulation or not
69 | info (dict): information
70 | """
71 | # clip action
72 | if self.config["input_lower_bound"] is not None:
73 | u = np.clip(u,
74 | self.config["input_lower_bound"],
75 | self.config["input_upper_bound"])
76 |
77 | # step
78 | # x
79 | d_x0 = self.curr_x[1]
80 | # v_x
81 | d_x1 = (u[0] + self.config["mp"] * np.sin(self.curr_x[2])
82 | * (self.config["l"] * (self.curr_x[3]**2)
83 | + self.config["g"] * np.cos(self.curr_x[2]))) \
84 | / (self.config["mc"] + self.config["mp"]
85 | * (np.sin(self.curr_x[2])**2))
86 | # theta
87 | d_x2 = self.curr_x[3]
88 |
89 | # v_theta
90 | d_x3 = (-u[0] * np.cos(self.curr_x[2])
91 | - self.config["mp"] * self.config["l"] * (self.curr_x[3]**2)
92 | * np.cos(self.curr_x[2]) * np.sin(self.curr_x[2])
93 | - (self.config["mc"] + self.config["mp"]) * self.config["g"]
94 | * np.sin(self.curr_x[2])) \
95 | / (self.config["l"] * (self.config["mc"] + self.config["mp"]
96 | * (np.sin(self.curr_x[2])**2)))
97 |
98 | next_x = self.curr_x +\
99 | np.array([d_x0, d_x1, d_x2, d_x3]) * self.config["dt"]
100 |
101 | # TODO: costs
102 | costs = 0.
103 | costs += 0.1 * np.sum(u**2)
104 | costs += 6. * self.curr_x[0]**2 \
105 | + 12. * (np.cos(self.curr_x[2]) + 1.)**2 \
106 | + 0.1 * self.curr_x[1]**2 \
107 | + 0.1 * self.curr_x[3]**2
108 |
109 | # save history
110 | self.history_x.append(next_x.flatten())
111 | self.history_g_x.append(self.g_x.flatten())
112 |
113 | # update
114 | self.curr_x = next_x.flatten().copy()
115 | # update costs
116 | self.step_count += 1
117 |
118 | return next_x.flatten(), costs, \
119 | self.step_count > self.config["max_step"], \
120 | {"goal_state": self.g_x}
121 |
122 | def plot_func(self, to_plot, i=None, history_x=None, history_g_x=None):
123 | """ plot cartpole object function
124 |
125 | Args:
126 | to_plot (axis or imgs): plotted objects
127 | i (int): frame count
128 | history_x (numpy.ndarray): history of state, shape(iters, state)
129 | history_g_x (numpy.ndarray): history of goal state,
130 | shape(iters, state)
131 | Returns:
132 | None or imgs : imgs order is ["cart_img", "pole_img"]
133 | """
134 | if isinstance(to_plot, Axes):
135 | imgs = {} # create new imgs
136 |
137 | imgs["cart"] = to_plot.plot([], [], c="k")[0]
138 | imgs["pole"] = to_plot.plot([], [], c="k", linewidth=5)[0]
139 | imgs["center"] = to_plot.plot([], [], marker="o", c="k",
140 | markersize=10)[0]
141 | # centerline
142 | to_plot.plot(np.linspace(-1., 1., num=50), np.zeros(50),
143 | c="k", linestyle="dashed")
144 |
145 | # set axis
146 | to_plot.set_xlim([-1., 1.])
147 | to_plot.set_ylim([-0.55, 1.5])
148 |
149 | return imgs
150 |
151 | # set imgs
152 | cart_x, cart_y, pole_x, pole_y = \
153 | self._plot_cartpole(history_x[i])
154 |
155 | to_plot["cart"].set_data(cart_x, cart_y)
156 | to_plot["pole"].set_data(pole_x, pole_y)
157 | to_plot["center"].set_data(history_x[i][0], 0.)
158 |
159 | def _plot_cartpole(self, curr_x):
160 | """ plot cartpole fucntions
161 |
162 | Args:
163 | curr_x (numpy.ndarray): current catpole state
164 | Returns:
165 | cart_x (numpy.ndarray): x data of cart
166 | cart_y (numpy.ndarray): y data of cart
167 | pole_x (numpy.ndarray): x data of pole
168 | pole_y (numpy.ndarray): y data of pole
169 | """
170 | # cart
171 | cart_x, cart_y = square(curr_x[0], 0.,
172 | self.config["cart_size"], 0.)
173 |
174 | # pole
175 | pole_x = np.array([curr_x[0], curr_x[0] + self.config["l"]
176 | * np.cos(curr_x[2]-np.pi/2)])
177 | pole_y = np.array([0., self.config["l"]
178 | * np.sin(curr_x[2]-np.pi/2)])
179 |
180 | return cart_x, cart_y, pole_x, pole_y
181 |
--------------------------------------------------------------------------------
/PythonLinearNonlinearControl/configs/first_order_lag.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 |
4 | class FirstOrderLagConfigModule():
5 | # parameters
6 | ENV_NAME = "FirstOrderLag-v0"
7 | TYPE = "Linear"
8 | TASK_HORIZON = 1000
9 | PRED_LEN = 50
10 | STATE_SIZE = 4
11 | INPUT_SIZE = 2
12 | DT = 0.05
13 | # cost parameters
14 | R = np.eye(INPUT_SIZE)
15 | Q = np.eye(STATE_SIZE)
16 | Sf = np.eye(STATE_SIZE)
17 | # bounds
18 | INPUT_LOWER_BOUND = np.array([-0.5, -0.5])
19 | INPUT_UPPER_BOUND = np.array([0.5, 0.5])
20 | # DT_INPUT_LOWER_BOUND = np.array([-0.5 * DT, -0.5 * DT])
21 | # DT_INPUT_UPPER_BOUND = np.array([0.25 * DT, 0.25 * DT])
22 | DT_INPUT_LOWER_BOUND = None
23 | DT_INPUT_UPPER_BOUND = None
24 |
25 | def __init__(self):
26 | """
27 | """
28 | # opt configs
29 | self.opt_config = {
30 | "Random": {
31 | "popsize": 5000
32 | },
33 | "CEM": {
34 | "popsize": 500,
35 | "num_elites": 50,
36 | "max_iters": 15,
37 | "alpha": 0.3,
38 | "init_var": 1.,
39 | "threshold": 0.001
40 | },
41 | "MPPI": {
42 | "beta": 0.6,
43 | "popsize": 5000,
44 | "kappa": 0.9,
45 | "noise_sigma": 0.5,
46 | },
47 | "MPPIWilliams": {
48 | "popsize": 5000,
49 | "lambda": 1.,
50 | "noise_sigma": 0.9,
51 | },
52 | "MPC": {
53 | },
54 | "iLQR": {
55 | "max_iters": 500,
56 | "init_mu": 1.,
57 | "mu_min": 1e-6,
58 | "mu_max": 1e10,
59 | "init_delta": 2.,
60 | "threshold": 1e-6,
61 | },
62 | "DDP": {
63 | "max_iters": 500,
64 | "init_mu": 1.,
65 | "mu_min": 1e-6,
66 | "mu_max": 1e10,
67 | "init_delta": 2.,
68 | "threshold": 1e-6,
69 | },
70 | "NMPC-CGMRES": {
71 | },
72 | "NMPC-Newton": {
73 | },
74 | }
75 |
76 | @staticmethod
77 | def input_cost_fn(u):
78 | """ input cost functions
79 | Args:
80 | u (numpy.ndarray): input, shape(pred_len, input_size)
81 | or shape(pop_size, pred_len, input_size)
82 | Returns:
83 | cost (numpy.ndarray): cost of input, shape(pred_len, input_size) or
84 | shape(pop_size, pred_len, input_size)
85 | """
86 | return (u**2) * np.diag(FirstOrderLagConfigModule.R)
87 |
88 | @staticmethod
89 | def state_cost_fn(x, g_x):
90 | """ state cost function
91 | Args:
92 | x (numpy.ndarray): state, shape(pred_len, state_size)
93 | or shape(pop_size, pred_len, state_size)
94 | g_x (numpy.ndarray): goal state, shape(pred_len, state_size)
95 | or shape(pop_size, pred_len, state_size)
96 | Returns:
97 | cost (numpy.ndarray): cost of state, shape(pred_len, state_size) or
98 | shape(pop_size, pred_len, state_size)
99 | """
100 | return ((x - g_x)**2) * np.diag(FirstOrderLagConfigModule.Q)
101 |
102 | @staticmethod
103 | def terminal_state_cost_fn(terminal_x, terminal_g_x):
104 | """
105 | Args:
106 | terminal_x (numpy.ndarray): terminal state,
107 | shape(state_size, ) or shape(pop_size, state_size)
108 | terminal_g_x (numpy.ndarray): terminal goal state,
109 | shape(state_size, ) or shape(pop_size, state_size)
110 | Returns:
111 | cost (numpy.ndarray): cost of state, shape(pred_len, ) or
112 | shape(pop_size, pred_len)
113 | """
114 | return ((terminal_x - terminal_g_x)**2) \
115 | * np.diag(FirstOrderLagConfigModule.Sf)
116 |
117 | @staticmethod
118 | def gradient_cost_fn_state(x, g_x, terminal=False):
119 | """ gradient of costs with respect to the state
120 |
121 | Args:
122 | x (numpy.ndarray): state, shape(pred_len, state_size)
123 | g_x (numpy.ndarray): goal state, shape(pred_len, state_size)
124 |
125 | Returns:
126 | l_x (numpy.ndarray): gradient of cost, shape(pred_len, state_size)
127 | or shape(1, state_size)
128 | """
129 | if not terminal:
130 | return 2. * (x - g_x) * np.diag(FirstOrderLagConfigModule.Q)
131 |
132 | return (2. * (x - g_x)
133 | * np.diag(FirstOrderLagConfigModule.Sf))[np.newaxis, :]
134 |
135 | @staticmethod
136 | def gradient_cost_fn_input(x, u):
137 | """ gradient of costs with respect to the input
138 |
139 | Args:
140 | x (numpy.ndarray): state, shape(pred_len, state_size)
141 | u (numpy.ndarray): goal state, shape(pred_len, input_size)
142 |
143 | Returns:
144 | l_u (numpy.ndarray): gradient of cost, shape(pred_len, input_size)
145 | """
146 | return 2. * u * np.diag(FirstOrderLagConfigModule.R)
147 |
148 | @staticmethod
149 | def hessian_cost_fn_state(x, g_x, terminal=False):
150 | """ hessian costs with respect to the state
151 |
152 | Args:
153 | x (numpy.ndarray): state, shape(pred_len, state_size)
154 | g_x (numpy.ndarray): goal state, shape(pred_len, state_size)
155 |
156 | Returns:
157 | l_xx (numpy.ndarray): gradient of cost,
158 | shape(pred_len, state_size, state_size) or
159 | shape(1, state_size, state_size) or
160 | """
161 | if not terminal:
162 | (pred_len, _) = x.shape
163 | return np.tile(2.*FirstOrderLagConfigModule.Q, (pred_len, 1, 1))
164 |
165 | return np.tile(2.*FirstOrderLagConfigModule.Sf, (1, 1, 1))
166 |
167 | @staticmethod
168 | def hessian_cost_fn_input(x, u):
169 | """ hessian costs with respect to the input
170 |
171 | Args:
172 | x (numpy.ndarray): state, shape(pred_len, state_size)
173 | u (numpy.ndarray): goal state, shape(pred_len, input_size)
174 |
175 | Returns:
176 | l_uu (numpy.ndarray): gradient of cost,
177 | shape(pred_len, input_size, input_size)
178 | """
179 | (pred_len, _) = u.shape
180 |
181 | return np.tile(2.*FirstOrderLagConfigModule.R, (pred_len, 1, 1))
182 |
183 | @staticmethod
184 | def hessian_cost_fn_input_state(x, u):
185 | """ hessian costs with respect to the state and input
186 |
187 | Args:
188 | x (numpy.ndarray): state, shape(pred_len, state_size)
189 | u (numpy.ndarray): goal state, shape(pred_len, input_size)
190 |
191 | Returns:
192 | l_ux (numpy.ndarray): gradient of cost ,
193 | shape(pred_len, input_size, state_size)
194 | """
195 | (_, state_size) = x.shape
196 | (pred_len, input_size) = u.shape
197 |
198 | return np.zeros((pred_len, input_size, state_size))
199 |
--------------------------------------------------------------------------------
/PythonLinearNonlinearControl/models/nonlinear_sample_system.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from .model import Model
4 | from ..common.utils import update_state_with_Runge_Kutta
5 |
6 |
7 | class NonlinearSampleSystemModel(Model):
8 | """ nonlinear sample system model
9 | """
10 |
11 | def __init__(self, config):
12 | """
13 | """
14 | super(NonlinearSampleSystemModel, self).__init__()
15 | self.dt = config.DT
16 | self.gradient_hamiltonian_state = config.gradient_hamiltonian_state
17 | self.gradient_hamiltonian_input = config.gradient_hamiltonian_input
18 | self.gradient_cost_fn_state = config.gradient_cost_fn_state
19 |
20 | def predict_next_state(self, curr_x, u):
21 | """ predict next state
22 |
23 | Args:
24 | curr_x (numpy.ndarray): current state, shape(state_size, ) or
25 | shape(pop_size, state_size)
26 | u (numpy.ndarray): input, shape(input_size, ) or
27 | shape(pop_size, input_size)
28 | Returns:
29 | next_x (numpy.ndarray): next state, shape(state_size, ) or
30 | shape(pop_size, state_size)
31 | """
32 | if len(u.shape) == 1:
33 | func_1 = self._func_x_1
34 | func_2 = self._func_x_2
35 | functions = [func_1, func_2]
36 | next_x = update_state_with_Runge_Kutta(
37 | curr_x, u, functions, batch=False, dt=self.dt)
38 | return next_x
39 |
40 | elif len(u.shape) == 2:
41 | def func_1(xs, us): return self._func_x_1(xs, us, batch=True)
42 | def func_2(xs, us): return self._func_x_2(xs, us, batch=True)
43 | functions = [func_1, func_2]
44 | next_x = update_state_with_Runge_Kutta(
45 | curr_x, u, functions, batch=True, dt=self.dt)
46 |
47 | return next_x
48 |
49 | def x_dot(self, curr_x, u):
50 | """
51 | Args:
52 | curr_x (numpy.ndarray): current state, shape(state_size, )
53 | u (numpy.ndarray): input, shape(input_size, )
54 | Returns:
55 | x_dot (numpy.ndarray): next state, shape(state_size, )
56 | """
57 | state_size = curr_x.shape[0]
58 | x_dot = np.zeros(state_size)
59 | x_dot[0] = self._func_x_1(curr_x, u)
60 | x_dot[1] = self._func_x_2(curr_x, u)
61 | return x_dot
62 |
63 | def predict_adjoint_state(self, lam, x, u, g_x=None):
64 | """ predict adjoint states
65 |
66 | Args:
67 | lam (numpy.ndarray): adjoint state, shape(state_size, )
68 | x (numpy.ndarray): state, shape(state_size, )
69 | u (numpy.ndarray): input, shape(input_size, )
70 | goal (numpy.ndarray): goal state, shape(state_size, )
71 | Returns:
72 | prev_lam (numpy.ndarrya): previous adjoint state,
73 | shape(state_size, )
74 | """
75 | if len(u.shape) == 1:
76 | delta_lam = self.dt * \
77 | self.gradient_hamiltonian_state(x, lam, u, g_x)
78 | prev_lam = lam + delta_lam
79 | return prev_lam
80 |
81 | elif len(u.shape) == 2:
82 | raise ValueError
83 |
84 | def predict_terminal_adjoint_state(self, terminal_x, terminal_g_x=None):
85 | """ predict terminal adjoint state
86 |
87 | Args:
88 | terminal_x (numpy.ndarray): terminal state, shape(state_size, )
89 | terminal_g_x (numpy.ndarray): terminal goal state,
90 | shape(state_size, )
91 | Returns:
92 | terminal_lam (numpy.ndarray): terminal adjoint state,
93 | shape(state_size, )
94 | """
95 | terminal_lam = self.gradient_cost_fn_state(
96 | terminal_x, terminal_g_x, terminal=True) # return in shape[1, state_size]
97 | return terminal_lam[0]
98 |
99 | def _func_x_1(self, x, u, batch=False):
100 | if not batch:
101 | x_dot = x[1]
102 | else:
103 | x_dot = x[:, 1]
104 | return x_dot
105 |
106 | def _func_x_2(self, x, u, batch=False):
107 | if not batch:
108 | x_dot = (1. - x[0]**2 - x[1]**2) * x[1] - x[0] + u[0]
109 | else:
110 | x_dot = (1. - x[:, 0]**2 - x[:, 1]**2) * \
111 | x[:, 1] - x[:, 0] + u[:, 0]
112 | return x_dot
113 |
114 | def calc_f_x(self, xs, us, dt):
115 | """ gradient of model with respect to the state in batch form
116 | Args:
117 | xs (numpy.ndarray): state, shape(pred_len+1, state_size)
118 | us (numpy.ndarray): input, shape(pred_len, input_size,)
119 |
120 | Return:
121 | f_x (numpy.ndarray): gradient of model with respect to x,
122 | shape(pred_len, state_size, state_size)
123 |
124 | Notes:
125 | This should be discrete form !!
126 | """
127 | # get size
128 | (_, state_size) = xs.shape
129 | (pred_len, _) = us.shape
130 |
131 | f_x = np.zeros((pred_len, state_size, state_size))
132 | f_x[:, 0, 1] = 1.
133 | f_x[:, 1, 0] = 2. * xs[:, 0] * xs[:, 1] - 1.
134 | f_x[:, 1, 1] = - 2. * xs[:, 1] * xs[:, 1] + \
135 | (1. - xs[:, 0]**2 - xs[:, 1]**2)
136 |
137 | return f_x * dt + np.eye(state_size) # to discrete form
138 |
139 | def calc_f_u(self, xs, us, dt):
140 | """ gradient of model with respect to the input in batch form
141 | Args:
142 | xs (numpy.ndarray): state, shape(pred_len+1, state_size)
143 | us (numpy.ndarray): input, shape(pred_len, input_size,)
144 |
145 | Return:
146 | f_u (numpy.ndarray): gradient of model with respect to x,
147 | shape(pred_len, state_size, input_size)
148 |
149 | Notes:
150 | This should be discrete form !!
151 | """
152 | # get size
153 | (_, state_size) = xs.shape
154 | (pred_len, input_size) = us.shape
155 |
156 | f_u = np.zeros((pred_len, state_size, input_size))
157 |
158 | f_u[:, 1, 0] = 1.
159 |
160 | return f_u * dt # to discrete form
161 |
162 | def calc_f_xx(self, xs, us, dt):
163 | """ hessian of model with respect to the state in batch form
164 |
165 | Args:
166 | xs (numpy.ndarray): state, shape(pred_len+1, state_size)
167 | us (numpy.ndarray): input, shape(pred_len, input_size,)
168 |
169 | Return:
170 | f_xx (numpy.ndarray): gradient of model with respect to x,
171 | shape(pred_len, state_size, state_size, state_size)
172 | """
173 | # get size
174 | (_, state_size) = xs.shape
175 | (pred_len, _) = us.shape
176 |
177 | f_xx = np.zeros((pred_len, state_size, state_size, state_size))
178 |
179 | raise NotImplementedError
180 |
181 | def calc_f_ux(self, xs, us, dt):
182 | """ hessian of model with respect to state and input in batch form
183 |
184 | Args:
185 | xs (numpy.ndarray): state, shape(pred_len+1, state_size)
186 | us (numpy.ndarray): input, shape(pred_len, input_size,)
187 |
188 | Return:
189 | f_ux (numpy.ndarray): gradient of model with respect to x,
190 | shape(pred_len, state_size, input_size, state_size)
191 | """
192 | # get size
193 | (_, state_size) = xs.shape
194 | (pred_len, input_size) = us.shape
195 |
196 | f_ux = np.zeros((pred_len, state_size, input_size, state_size))
197 |
198 | raise NotImplementedError
199 |
200 | def calc_f_uu(self, xs, us, dt):
201 | """ hessian of model with respect to input in batch form
202 |
203 | Args:
204 | xs (numpy.ndarray): state, shape(pred_len+1, state_size)
205 | us (numpy.ndarray): input, shape(pred_len, input_size,)
206 |
207 | Return:
208 | f_uu (numpy.ndarray): gradient of model with respect to x,
209 | shape(pred_len, state_size, input_size, input_size)
210 | """
211 | # get size
212 | (_, state_size) = xs.shape
213 | (pred_len, input_size) = us.shape
214 |
215 | f_uu = np.zeros((pred_len, state_size, input_size, input_size))
216 |
217 | raise NotImplementedError
218 |
--------------------------------------------------------------------------------
/PythonLinearNonlinearControl/models/two_wheeled.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from .model import Model
4 |
5 |
6 | class TwoWheeledModel(Model):
7 | """ two wheeled model
8 | """
9 |
10 | def __init__(self, config):
11 | """
12 | """
13 | super(TwoWheeledModel, self).__init__()
14 | self.dt = config.DT
15 | self.gradient_hamiltonian_state = config.gradient_hamiltonian_state
16 | self.gradient_hamiltonian_input = config.gradient_hamiltonian_input
17 | self.gradient_cost_fn_state = config.gradient_cost_fn_state
18 |
19 | def predict_next_state(self, curr_x, u):
20 | """ predict next state
21 |
22 | Args:
23 | curr_x (numpy.ndarray): current state, shape(state_size, ) or
24 | shape(pop_size, state_size)
25 | u (numpy.ndarray): input, shape(input_size, ) or
26 | shape(pop_size, input_size)
27 | Returns:
28 | next_x (numpy.ndarray): next state, shape(state_size, ) or
29 | shape(pop_size, state_size)
30 | """
31 | if len(u.shape) == 1:
32 | B = np.array([[np.cos(curr_x[-1]), 0.],
33 | [np.sin(curr_x[-1]), 0.],
34 | [0., 1.]])
35 | # calc dot
36 | x_dot = np.matmul(B, u[:, np.newaxis])
37 | # next state
38 | next_x = x_dot.flatten() * self.dt + curr_x
39 |
40 | return next_x
41 |
42 | elif len(u.shape) == 2:
43 | (pop_size, state_size) = curr_x.shape
44 | (_, input_size) = u.shape
45 | # B.shape = (pop_size, state_size, input_size)
46 | B = np.zeros((pop_size, state_size, input_size))
47 | # insert
48 | B[:, 0, 0] = np.cos(curr_x[:, -1])
49 | B[:, 1, 0] = np.sin(curr_x[:, -1])
50 | B[:, 2, 1] = np.ones(pop_size)
51 |
52 | # x_dot.shape = (pop_size, state_size, 1)
53 | x_dot = np.matmul(B, u[:, :, np.newaxis])
54 | # next state
55 | next_x = x_dot[:, :, 0] * self.dt + curr_x
56 |
57 | return next_x
58 |
59 | def x_dot(self, curr_x, u):
60 | """ compute x dot
61 | Args:
62 | curr_x (numpy.ndarray): current state, shape(state_size, )
63 | u (numpy.ndarray): input, shape(input_size, )
64 | Returns:
65 | x_dot (numpy.ndarray): next state, shape(state_size, )
66 | """
67 | B = np.array([[np.cos(curr_x[-1]), 0.],
68 | [np.sin(curr_x[-1]), 0.],
69 | [0., 1.]])
70 | x_dot = np.matmul(B, u[:, np.newaxis])
71 | return x_dot.flatten()
72 |
73 | def predict_adjoint_state(self, lam, x, u, g_x=None, t=None):
74 | """ predict adjoint states
75 |
76 | Args:
77 | lam (numpy.ndarray): adjoint state, shape(state_size, )
78 | x (numpy.ndarray): state, shape(state_size, )
79 | u (numpy.ndarray): input, shape(input_size, )
80 | goal (numpy.ndarray): goal state, shape(state_size, )
81 | Returns:
82 | prev_lam (numpy.ndarrya): previous adjoint state,
83 | shape(state_size, )
84 | """
85 | if len(u.shape) == 1:
86 | delta_lam = self.dt * \
87 | self.gradient_hamiltonian_state(x, lam, u, g_x)
88 | prev_lam = lam + delta_lam
89 | return prev_lam
90 |
91 | elif len(u.shape) == 2:
92 | raise ValueError
93 |
94 | def predict_terminal_adjoint_state(self, terminal_x, terminal_g_x=None):
95 | """ predict terminal adjoint state
96 |
97 | Args:
98 | terminal_x (numpy.ndarray): terminal state, shape(state_size, )
99 | terminal_g_x (numpy.ndarray): terminal goal state,
100 | shape(state_size, )
101 | Returns:
102 | terminal_lam (numpy.ndarray): terminal adjoint state,
103 | shape(state_size, )
104 | """
105 | terminal_lam = self.gradient_cost_fn_state(
106 | terminal_x, terminal_g_x, terminal=True) # return in shape[1, state_size]
107 | return terminal_lam[0]
108 |
109 | @staticmethod
110 | def calc_f_x(xs, us, dt):
111 | """ gradient of model with respect to the state in batch form
112 | Args:
113 | xs (numpy.ndarray): state, shape(pred_len+1, state_size)
114 | us (numpy.ndarray): input, shape(pred_len, input_size,)
115 |
116 | Return:
117 | f_x (numpy.ndarray): gradient of model with respect to x,
118 | shape(pred_len, state_size, state_size)
119 |
120 | Notes:
121 | This should be discrete form !!
122 | """
123 | # get size
124 | (_, state_size) = xs.shape
125 | (pred_len, _) = us.shape
126 |
127 | f_x = np.zeros((pred_len, state_size, state_size))
128 | f_x[:, 0, 2] = -np.sin(xs[:, 2]) * us[:, 0]
129 | f_x[:, 1, 2] = np.cos(xs[:, 2]) * us[:, 0]
130 |
131 | return f_x * dt + np.eye(state_size) # to discrete form
132 |
133 | @staticmethod
134 | def calc_f_u(xs, us, dt):
135 | """ gradient of model with respect to the input in batch form
136 | Args:
137 | xs (numpy.ndarray): state, shape(pred_len+1, state_size)
138 | us (numpy.ndarray): input, shape(pred_len, input_size,)
139 |
140 | Return:
141 | f_u (numpy.ndarray): gradient of model with respect to x,
142 | shape(pred_len, state_size, input_size)
143 |
144 | Notes:
145 | This should be discrete form !!
146 | """
147 | # get size
148 | (_, state_size) = xs.shape
149 | (pred_len, input_size) = us.shape
150 |
151 | f_u = np.zeros((pred_len, state_size, input_size))
152 | f_u[:, 0, 0] = np.cos(xs[:, 2])
153 | f_u[:, 1, 0] = np.sin(xs[:, 2])
154 | f_u[:, 2, 1] = 1.
155 |
156 | return f_u * dt # to discrete form
157 |
158 | @staticmethod
159 | def calc_f_xx(xs, us, dt):
160 | """ hessian of model with respect to the state in batch form
161 |
162 | Args:
163 | xs (numpy.ndarray): state, shape(pred_len+1, state_size)
164 | us (numpy.ndarray): input, shape(pred_len, input_size,)
165 |
166 | Return:
167 | f_xx (numpy.ndarray): gradient of model with respect to x,
168 | shape(pred_len, state_size, state_size, state_size)
169 | """
170 | # get size
171 | (_, state_size) = xs.shape
172 | (pred_len, _) = us.shape
173 |
174 | f_xx = np.zeros((pred_len, state_size, state_size, state_size))
175 |
176 | f_xx[:, 0, 2, 2] = -np.cos(xs[:, 2]) * us[:, 0]
177 | f_xx[:, 1, 2, 2] = -np.sin(xs[:, 2]) * us[:, 0]
178 |
179 | return f_xx * dt
180 |
181 | @staticmethod
182 | def calc_f_ux(xs, us, dt):
183 | """ hessian of model with respect to state and input in batch form
184 |
185 | Args:
186 | xs (numpy.ndarray): state, shape(pred_len+1, state_size)
187 | us (numpy.ndarray): input, shape(pred_len, input_size,)
188 |
189 | Return:
190 | f_ux (numpy.ndarray): gradient of model with respect to x,
191 | shape(pred_len, state_size, input_size, state_size)
192 | """
193 | # get size
194 | (_, state_size) = xs.shape
195 | (pred_len, input_size) = us.shape
196 |
197 | f_ux = np.zeros((pred_len, state_size, input_size, state_size))
198 |
199 | f_ux[:, 0, 0, 2] = -np.sin(xs[:, 2])
200 | f_ux[:, 1, 0, 2] = np.cos(xs[:, 2])
201 |
202 | return f_ux * dt
203 |
204 | @staticmethod
205 | def calc_f_uu(xs, us, dt):
206 | """ hessian of model with respect to input in batch form
207 |
208 | Args:
209 | xs (numpy.ndarray): state, shape(pred_len+1, state_size)
210 | us (numpy.ndarray): input, shape(pred_len, input_size,)
211 |
212 | Return:
213 | f_uu (numpy.ndarray): gradient of model with respect to x,
214 | shape(pred_len, state_size, input_size, input_size)
215 | """
216 | # get size
217 | (_, state_size) = xs.shape
218 | (pred_len, input_size) = us.shape
219 |
220 | f_uu = np.zeros((pred_len, state_size, input_size, input_size))
221 |
222 | return f_uu * dt
223 |
--------------------------------------------------------------------------------
/PythonLinearNonlinearControl/models/cartpole.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from .model import Model
4 |
5 |
6 | class CartPoleModel(Model):
7 | """ cartpole model
8 | """
9 |
10 | def __init__(self, config):
11 | """
12 | """
13 | super(CartPoleModel, self).__init__()
14 | self.dt = config.DT
15 | self.mc = config.MC
16 | self.mp = config.MP
17 | self.l = config.L
18 | self.g = config.G
19 |
20 | def predict_next_state(self, curr_x, u):
21 | """ predict next state
22 |
23 | Args:
24 | curr_x (numpy.ndarray): current state, shape(state_size, ) or
25 | shape(pop_size, state_size)
26 | u (numpy.ndarray): input, shape(input_size, ) or
27 | shape(pop_size, input_size)
28 | Returns:
29 | next_x (numpy.ndarray): next state, shape(state_size, ) or
30 | shape(pop_size, state_size)
31 | """
32 | if len(u.shape) == 1:
33 | # x
34 | d_x0 = curr_x[1]
35 | # v_x
36 | d_x1 = (u[0] + self.mp * np.sin(curr_x[2])
37 | * (self.l * (curr_x[3]**2)
38 | + self.g * np.cos(curr_x[2]))) \
39 | / (self.mc + self.mp * (np.sin(curr_x[2])**2))
40 | # theta
41 | d_x2 = curr_x[3]
42 | # v_theta
43 | d_x3 = (-u[0] * np.cos(curr_x[2])
44 | - self.mp * self.l * (curr_x[3]**2)
45 | * np.cos(curr_x[2]) * np.sin(curr_x[2])
46 | - (self.mc + self.mp) * self.g * np.sin(curr_x[2])) \
47 | / (self.l * (self.mc + self.mp * (np.sin(curr_x[2])**2)))
48 |
49 | next_x = curr_x +\
50 | np.array([d_x0, d_x1, d_x2, d_x3]) * self.dt
51 |
52 | return next_x
53 |
54 | elif len(u.shape) == 2:
55 | # x
56 | d_x0 = curr_x[:, 1]
57 | # v_x
58 | d_x1 = (u[:, 0] + self.mp * np.sin(curr_x[:, 2])
59 | * (self.l * (curr_x[:, 3]**2)
60 | + self.g * np.cos(curr_x[:, 2]))) \
61 | / (self.mc + self.mp * (np.sin(curr_x[:, 2])**2))
62 | # theta
63 | d_x2 = curr_x[:, 3]
64 | # v_theta
65 | d_x3 = (-u[:, 0] * np.cos(curr_x[:, 2])
66 | - self.mp * self.l * (curr_x[:, 3]**2)
67 | * np.cos(curr_x[:, 2]) * np.sin(curr_x[:, 2])
68 | - (self.mc + self.mp) * self.g * np.sin(curr_x[:, 2])) \
69 | / (self.l * (self.mc + self.mp * (np.sin(curr_x[:, 2])**2)))
70 |
71 | next_x = curr_x +\
72 | np.stack((d_x0, d_x1, d_x2, d_x3), axis=1) * self.dt
73 |
74 | return next_x
75 |
76 | def calc_f_x(self, xs, us, dt):
77 | """ gradient of model with respect to the state in batch form
78 | Args:
79 | xs (numpy.ndarray): state, shape(pred_len+1, state_size)
80 | us (numpy.ndarray): input, shape(pred_len, input_size,)
81 |
82 | Return:
83 | f_x (numpy.ndarray): gradient of model with respect to x,
84 | shape(pred_len, state_size, state_size)
85 |
86 | Notes:
87 | This should be discrete form !!
88 | """
89 | # get size
90 | (_, state_size) = xs.shape
91 | (pred_len, _) = us.shape
92 |
93 | f_x = np.zeros((pred_len, state_size, state_size))
94 |
95 | # f_x_dot
96 | f_x[:, 0, 1] = np.ones(pred_len)
97 |
98 | # f_theta
99 | tmp = ((self.mc + self.mp * np.sin(xs[:, 2])**2)**(-2)) \
100 | * self.mp * 2. * np.sin(xs[:, 2]) * np.cos(xs[:, 2])
101 | tmp2 = 1. / (self.mc + self.mp * (np.sin(xs[:, 2])**2))
102 |
103 | f_x[:, 1, 2] = - us[:, 0] * tmp \
104 | - tmp * (self.mp * np.sin(xs[:, 2])
105 | * (self.l * xs[:, 3]**2
106 | + self.g * np.cos(xs[:, 2]))) \
107 | + tmp2 * (self.mp * np.cos(xs[:, 2]) * self.l
108 | * xs[:, 3]**2
109 | + self.mp * self.g * (np.cos(xs[:, 2])**2
110 | - np.sin(xs[:, 2])**2))
111 | f_x[:, 3, 2] = - 1. / self.l * tmp \
112 | * (-us[:, 0] * np.cos(xs[:, 2])
113 | - self.mp * self.l * (xs[:, 3]**2)
114 | * np.cos(xs[:, 2]) * np.sin(xs[:, 2])
115 | - (self.mc + self.mp) * self.g * np.sin(xs[:, 2])) \
116 | + 1. / self.l * tmp2 \
117 | * (us[:, 0] * np.sin(xs[:, 2])
118 | - self.mp * self.l * xs[:, 3]**2
119 | * (np.cos(xs[:, 2])**2 - np.sin(xs[:, 2])**2)
120 | - (self.mc + self.mp)
121 | * self.g * np.cos(xs[:, 2]))
122 |
123 | # f_theta_dot
124 | f_x[:, 1, 3] = tmp2 * (self.mp * np.sin(xs[:, 2])
125 | * self.l * 2 * xs[:, 3])
126 | f_x[:, 2, 3] = np.ones(pred_len)
127 | f_x[:, 3, 3] = 1. / self.l * tmp2 \
128 | * (-2. * self.mp * self.l * xs[:, 3]
129 | * np.cos(xs[:, 2]) * np.sin(xs[:, 2]))
130 |
131 | return f_x * dt + np.eye(state_size) # to discrete form
132 |
133 | def calc_f_u(self, xs, us, dt):
134 | """ gradient of model with respect to the input in batch form
135 | Args:
136 | xs (numpy.ndarray): state, shape(pred_len+1, state_size)
137 | us (numpy.ndarray): input, shape(pred_len, input_size,)
138 |
139 | Return:
140 | f_u (numpy.ndarray): gradient of model with respect to x,
141 | shape(pred_len, state_size, input_size)
142 |
143 | Notes:
144 | This should be discrete form !!
145 | """
146 | # get size
147 | (_, state_size) = xs.shape
148 | (pred_len, input_size) = us.shape
149 |
150 | f_u = np.zeros((pred_len, state_size, input_size))
151 |
152 | f_u[:, 1, 0] = 1. / (self.mc + self.mp * (np.sin(xs[:, 2])**2))
153 |
154 | f_u[:, 3, 0] = -np.cos(xs[:, 2]) \
155 | / (self.l * (self.mc
156 | + self.mp * (np.sin(xs[:, 2])**2)))
157 |
158 | return f_u * dt # to discrete form
159 |
160 | def calc_f_xx(self, xs, us, dt):
161 | """ hessian of model with respect to the state in batch form
162 |
163 | Args:
164 | xs (numpy.ndarray): state, shape(pred_len+1, state_size)
165 | us (numpy.ndarray): input, shape(pred_len, input_size,)
166 |
167 | Return:
168 | f_xx (numpy.ndarray): gradient of model with respect to x,
169 | shape(pred_len, state_size, state_size, state_size)
170 | """
171 | # get size
172 | (_, state_size) = xs.shape
173 | (pred_len, _) = us.shape
174 |
175 | f_xx = np.zeros((pred_len, state_size, state_size, state_size))
176 |
177 | raise NotImplementedError
178 |
179 | def calc_f_ux(self, xs, us, dt):
180 | """ hessian of model with respect to state and input in batch form
181 |
182 | Args:
183 | xs (numpy.ndarray): state, shape(pred_len+1, state_size)
184 | us (numpy.ndarray): input, shape(pred_len, input_size,)
185 |
186 | Return:
187 | f_ux (numpy.ndarray): gradient of model with respect to x,
188 | shape(pred_len, state_size, input_size, state_size)
189 | """
190 | # get size
191 | (_, state_size) = xs.shape
192 | (pred_len, input_size) = us.shape
193 |
194 | f_ux = np.zeros((pred_len, state_size, input_size, state_size))
195 |
196 | raise NotImplementedError
197 |
198 | def calc_f_uu(self, xs, us, dt):
199 | """ hessian of model with respect to input in batch form
200 |
201 | Args:
202 | xs (numpy.ndarray): state, shape(pred_len+1, state_size)
203 | us (numpy.ndarray): input, shape(pred_len, input_size,)
204 |
205 | Return:
206 | f_uu (numpy.ndarray): gradient of model with respect to x,
207 | shape(pred_len, state_size, input_size, input_size)
208 | """
209 | # get size
210 | (_, state_size) = xs.shape
211 | (pred_len, input_size) = us.shape
212 |
213 | f_uu = np.zeros((pred_len, state_size, input_size, input_size))
214 |
215 | raise NotImplementedError
216 |
--------------------------------------------------------------------------------
/PythonLinearNonlinearControl/controllers/mpc.py:
--------------------------------------------------------------------------------
1 | from logging import getLogger
2 |
3 | import numpy as np
4 | from scipy.optimize import minimize
5 | from scipy.optimize import LinearConstraint
6 |
7 | from .controller import Controller
8 | from ..envs.cost import calc_cost
9 |
10 | logger = getLogger(__name__)
11 |
12 |
13 | class LinearMPC(Controller):
14 | """ Model Predictive Controller for linear model
15 |
16 | Attributes:
17 | A (numpy.ndarray): system matrix, shape(state_size, state_size)
18 | B (numpy.ndarray): input matrix, shape(state_size, input_size)
19 | Q (numpy.ndarray): cost function weight for states
20 | R (numpy.ndarray): cost function weight for states
21 | history_us (list[numpy.ndarray]): time history of optimal input
22 | Ref:
23 | Maciejowski, J. M. (2002). Predictive control: with constraints.
24 | """
25 |
26 | def __init__(self, config, model):
27 | """
28 | Args:
29 | model (Model): system matrix, shape(state_size, state_size)
30 | config (ConfigModule): input matrix, shape(state_size, input_size)
31 | """
32 | if config.TYPE != "Linear":
33 | raise ValueError("{} could be not applied to \
34 | this controller".format(model))
35 | super(LinearMPC, self).__init__(config, model)
36 | # system parameters
37 | self.model = model
38 | self.A = model.A
39 | self.B = model.B
40 | self.state_size = config.STATE_SIZE
41 | self.input_size = config.INPUT_SIZE
42 | self.pred_len = config.PRED_LEN
43 |
44 | # get cost func
45 | self.state_cost_fn = config.state_cost_fn
46 | self.terminal_state_cost_fn = config.terminal_state_cost_fn
47 | self.input_cost_fn = config.input_cost_fn
48 |
49 | # cost parameters
50 | self.Q = config.Q
51 | self.R = config.R
52 | self.Qs = None
53 | self.Rs = None
54 |
55 | # constraints
56 | self.dt_input_lower_bound = config.DT_INPUT_LOWER_BOUND
57 | self.dt_input_upper_bound = config.DT_INPUT_UPPER_BOUND
58 | self.input_lower_bound = config.INPUT_LOWER_BOUND
59 | self.input_upper_bound = config.INPUT_UPPER_BOUND
60 |
61 | # setup controllers
62 | self.W = None
63 | self.omega = None
64 | self.F = None
65 | self.f = None
66 | self.setup()
67 | self.prev_sol = np.zeros(self.input_size*self.pred_len)
68 |
69 | # history
70 | self.history_u = [np.zeros(self.input_size)]
71 |
72 | def setup(self):
73 | """
74 | setup Model Predictive Control as a quadratic programming
75 | """
76 | A_factorials = [self.A]
77 | self.phi_mat = self.A.copy()
78 |
79 | for _ in range(self.pred_len - 1):
80 | temp_mat = np.matmul(A_factorials[-1], self.A)
81 | self.phi_mat = np.vstack((self.phi_mat, temp_mat))
82 | A_factorials.append(temp_mat) # after we use this factorials
83 |
84 | self.gamma_mat = self.B.copy()
85 | gammma_mat_temp = self.B.copy()
86 |
87 | for i in range(self.pred_len - 1):
88 | temp_1_mat = np.matmul(A_factorials[i], self.B)
89 | gammma_mat_temp = temp_1_mat + gammma_mat_temp
90 | self.gamma_mat = np.vstack((self.gamma_mat, gammma_mat_temp))
91 |
92 | self.theta_mat = self.gamma_mat.copy()
93 |
94 | for i in range(self.pred_len - 1):
95 | temp_mat = np.zeros_like(self.gamma_mat)
96 | temp_mat[int((i + 1)*self.state_size):, :] =\
97 | self.gamma_mat[:-int((i + 1)*self.state_size), :]
98 |
99 | self.theta_mat = np.hstack((self.theta_mat, temp_mat))
100 |
101 | # evaluation function weight
102 | diag_Qs = np.tile(np.diag(self.Q), self.pred_len)
103 | diag_Rs = np.tile(np.diag(self.R), self.pred_len)
104 | self.Qs = np.diag(diag_Qs)
105 | self.Rs = np.diag(diag_Rs)
106 |
107 | # constraints
108 | # about inputs
109 | if self.input_lower_bound is not None:
110 | self.F = np.zeros((self.input_size * 2,
111 | self.pred_len * self.input_size))
112 |
113 | for i in range(self.input_size):
114 | self.F[i * 2: (i + 1) * 2, i] = np.array([1., -1.])
115 | temp_F = self.F.copy()
116 |
117 | for i in range(self.pred_len - 1):
118 | for j in range(self.input_size):
119 | temp_F[j * 2: (j + 1) * 2,
120 | ((i+1) * self.input_size) + j] = np.array([1., -1.])
121 | self.F = np.vstack((self.F, temp_F))
122 |
123 | self.F1 = self.F[:, :self.input_size]
124 |
125 | temp_f = []
126 | for i in range(self.input_size):
127 | temp_f.append(-1 * self.input_upper_bound[i])
128 | temp_f.append(self.input_lower_bound[i])
129 |
130 | self.f = np.tile(np.array(temp_f).flatten(), self.pred_len)
131 |
132 | # about dt_input constraints
133 | if self.dt_input_lower_bound is not None:
134 | self.W = np.zeros((2, self.pred_len * self.input_size))
135 | self.W[:, 0] = np.array([1., -1.])
136 |
137 | for i in range(self.pred_len * self.input_size - 1):
138 | temp_W = np.zeros((2, self.pred_len * self.input_size))
139 | temp_W[:, i+1] = np.array([1., -1.])
140 | self.W = np.vstack((self.W, temp_W))
141 |
142 | temp_omega = []
143 |
144 | for i in range(self.input_size):
145 | temp_omega.append(self.dt_input_upper_bound[i])
146 | temp_omega.append(-1. * self.dt_input_lower_bound[i])
147 |
148 | self.omega = np.tile(np.array(temp_omega).flatten(),
149 | self.pred_len)
150 |
151 | def obtain_sol(self, curr_x, g_xs):
152 | """ calculate the optimal inputs
153 |
154 | Args:
155 | curr_x (numpy.ndarray): current state, shape(state_size, )
156 | g_xs (numpy.ndarrya): goal trajectory,
157 | shape(plan_len+1, state_size)
158 | Returns:
159 | opt_input (numpy.ndarray): optimal input, shape(input_size, )
160 | """
161 | temp_1 = np.matmul(self.phi_mat, curr_x.reshape(-1, 1))
162 | temp_2 = np.matmul(self.gamma_mat, self.history_u[-1].reshape(-1, 1))
163 |
164 | error = g_xs[1:].reshape(-1, 1) - temp_1 - temp_2
165 |
166 | G = np.matmul(self.theta_mat.T, np.matmul(self.Qs, error))
167 |
168 | H = np.matmul(self.theta_mat.T, np.matmul(self.Qs, self.theta_mat)) \
169 | + self.Rs
170 | H = H * 0.5
171 |
172 | # constraints
173 | A = []
174 | b = []
175 |
176 | if self.W is not None:
177 | A.append(self.W)
178 | b.append(self.omega.reshape(-1, 1))
179 |
180 | if self.F is not None:
181 | b_F = - np.matmul(self.F1, self.history_u[-1].reshape(-1, 1)) \
182 | - self.f.reshape(-1, 1)
183 | A.append(self.F)
184 | b.append(b_F)
185 |
186 | A = np.array(A).reshape(-1, self.input_size * self.pred_len)
187 |
188 | ub = np.array(b).flatten()
189 |
190 | # using cvxopt
191 | def optimized_func(dt_us):
192 | return (np.dot(dt_us, np.dot(H, dt_us.reshape(-1, 1)))
193 | - np.dot(G.T, dt_us.reshape(-1, 1)))[0]
194 |
195 | # constraint
196 | lb = np.array([-np.inf for _ in range(len(ub))]) # one side cons
197 | cons = LinearConstraint(A, lb, ub)
198 | # solve
199 | opt_sol = minimize(optimized_func, self.prev_sol.flatten(),
200 | constraints=[cons])
201 | opt_dt_us = opt_sol.x
202 |
203 | """ using cvxopt ver,
204 | if you want to solve more quick please use cvxopt instead of scipy
205 |
206 | # make cvxpy problem formulation
207 | P = 2*matrix(H)
208 | q = matrix(-1 * G)
209 | A = matrix(A)
210 | b = matrix(ub)
211 |
212 | # solve the problem
213 | opt_sol = solvers.qp(P, q, G=A, h=b)
214 | opt_dt_us = np.array(list(opt_sol['x']))
215 | """
216 |
217 | # to dt form
218 | opt_dt_u_seq = np.cumsum(opt_dt_us.reshape(self.pred_len,
219 | self.input_size),
220 | axis=0)
221 | self.prev_sol = opt_dt_u_seq.copy()
222 |
223 | opt_u_seq = opt_dt_u_seq + self.history_u[-1]
224 |
225 | # save
226 | self.history_u.append(opt_u_seq[0])
227 |
228 | # check costs
229 | costs = self.calc_cost(curr_x,
230 | opt_u_seq.reshape(1,
231 | self.pred_len,
232 | self.input_size),
233 | g_xs)
234 |
235 | logger.debug("Cost = {}".format(costs))
236 |
237 | return opt_u_seq[0]
238 |
239 | def __str__(self):
240 | return "LinearMPC"
241 |
--------------------------------------------------------------------------------
/PythonLinearNonlinearControl/configs/cartpole.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 |
4 | class CartPoleConfigModule():
5 | # parameters
6 | ENV_NAME = "CartPole-v0"
7 | PLANNER_TYPE = "Const"
8 | TYPE = "Nonlinear"
9 | TASK_HORIZON = 500
10 | PRED_LEN = 50
11 | STATE_SIZE = 4
12 | INPUT_SIZE = 1
13 | DT = 0.02
14 | # cost parameters
15 | R = np.diag([0.01]) # 0.01 is worked for MPPI and CEM and MPPIWilliams
16 | # 1. is worked for iLQR
17 | TERMINAL_WEIGHT = 1.
18 | Q = None
19 | Sf = None
20 | # bounds
21 | INPUT_LOWER_BOUND = np.array([-3.])
22 | INPUT_UPPER_BOUND = np.array([3.])
23 | # parameters
24 | MP = 0.2
25 | MC = 1.
26 | L = 0.5
27 | G = 9.81
28 | CART_SIZE = (0.15, 0.1)
29 |
30 | def __init__(self):
31 | """
32 | """
33 | # opt configs
34 | self.opt_config = {
35 | "Random": {
36 | "popsize": 5000
37 | },
38 | "CEM": {
39 | "popsize": 500,
40 | "num_elites": 50,
41 | "max_iters": 15,
42 | "alpha": 0.3,
43 | "init_var": 9.,
44 | "threshold": 0.001
45 | },
46 | "MPPI": {
47 | "beta": 0.6,
48 | "popsize": 5000,
49 | "kappa": 0.9,
50 | "noise_sigma": 0.5,
51 | },
52 | "MPPIWilliams": {
53 | "popsize": 5000,
54 | "lambda": 1.,
55 | "noise_sigma": 0.9,
56 | },
57 | "iLQR": {
58 | "max_iter": 500,
59 | "init_mu": 1.,
60 | "mu_min": 1e-6,
61 | "mu_max": 1e10,
62 | "init_delta": 2.,
63 | "threshold": 1e-6,
64 | },
65 | "DDP": {
66 | "max_iter": 500,
67 | "init_mu": 1.,
68 | "mu_min": 1e-6,
69 | "mu_max": 1e10,
70 | "init_delta": 2.,
71 | "threshold": 1e-6,
72 | },
73 | "NMPC-CGMRES": {
74 | },
75 | "NMPC-Newton": {
76 | },
77 | }
78 |
79 | @staticmethod
80 | def input_cost_fn(u):
81 | """ input cost functions
82 |
83 | Args:
84 | u (numpy.ndarray): input, shape(pred_len, input_size)
85 | or shape(pop_size, pred_len, input_size)
86 | Returns:
87 | cost (numpy.ndarray): cost of input, shape(pred_len, input_size) or
88 | shape(pop_size, pred_len, input_size)
89 | """
90 | return (u**2) * np.diag(CartPoleConfigModule.R)
91 |
92 | @staticmethod
93 | def state_cost_fn(x, g_x):
94 | """ state cost function
95 |
96 | Args:
97 | x (numpy.ndarray): state, shape(pred_len, state_size)
98 | or shape(pop_size, pred_len, state_size)
99 | g_x (numpy.ndarray): goal state, shape(pred_len, state_size)
100 | or shape(pop_size, pred_len, state_size)
101 | Returns:
102 | cost (numpy.ndarray): cost of state, shape(pred_len, 1) or
103 | shape(pop_size, pred_len, 1)
104 | """
105 |
106 | if len(x.shape) > 2:
107 | return (6. * (x[:, :, 0]**2)
108 | + 12. * ((np.cos(x[:, :, 2]) + 1.)**2)
109 | + 0.1 * (x[:, :, 1]**2)
110 | + 0.1 * (x[:, :, 3]**2))[:, :, np.newaxis]
111 |
112 | elif len(x.shape) > 1:
113 | return (6. * (x[:, 0]**2)
114 | + 12. * ((np.cos(x[:, 2]) + 1.)**2)
115 | + 0.1 * (x[:, 1]**2)
116 | + 0.1 * (x[:, 3]**2))[:, np.newaxis]
117 |
118 | return 6. * (x[0]**2) \
119 | + 12. * ((np.cos(x[2]) + 1.)**2) \
120 | + 0.1 * (x[1]**2) \
121 | + 0.1 * (x[3]**2)
122 |
123 | @staticmethod
124 | def terminal_state_cost_fn(terminal_x, terminal_g_x):
125 | """
126 |
127 | Args:
128 | terminal_x (numpy.ndarray): terminal state,
129 | shape(state_size, ) or shape(pop_size, state_size)
130 | terminal_g_x (numpy.ndarray): terminal goal state,
131 | shape(state_size, ) or shape(pop_size, state_size)
132 | Returns:
133 | cost (numpy.ndarray): cost of state, shape(pred_len, ) or
134 | shape(pop_size, pred_len)
135 | """
136 |
137 | if len(terminal_x.shape) > 1:
138 | return (6. * (terminal_x[:, 0]**2)
139 | + 12. * ((np.cos(terminal_x[:, 2]) + 1.)**2)
140 | + 0.1 * (terminal_x[:, 1]**2)
141 | + 0.1 * (terminal_x[:, 3]**2))[:, np.newaxis] \
142 | * CartPoleConfigModule.TERMINAL_WEIGHT
143 |
144 | return (6. * (terminal_x[0]**2)
145 | + 12. * ((np.cos(terminal_x[2]) + 1.)**2)
146 | + 0.1 * (terminal_x[1]**2)
147 | + 0.1 * (terminal_x[3]**2)) \
148 | * CartPoleConfigModule.TERMINAL_WEIGHT
149 |
150 | @staticmethod
151 | def gradient_cost_fn_state(x, g_x, terminal=False):
152 | """ gradient of costs with respect to the state
153 |
154 | Args:
155 | x (numpy.ndarray): state, shape(pred_len, state_size)
156 | g_x (numpy.ndarray): goal state, shape(pred_len, state_size)
157 |
158 | Returns:
159 | l_x (numpy.ndarray): gradient of cost, shape(pred_len, state_size)
160 | or shape(1, state_size)
161 | """
162 | if not terminal:
163 | cost_dx0 = 12. * x[:, 0]
164 | cost_dx1 = 0.2 * x[:, 1]
165 | cost_dx2 = 24. * (1 + np.cos(x[:, 2])) * -np.sin(x[:, 2])
166 | cost_dx3 = 0.2 * x[:, 3]
167 | cost_dx = np.stack((cost_dx0, cost_dx1,
168 | cost_dx2, cost_dx3), axis=1)
169 | return cost_dx
170 |
171 | cost_dx0 = 12. * x[0]
172 | cost_dx1 = 0.2 * x[1]
173 | cost_dx2 = 24. * (1 + np.cos(x[2])) * -np.sin(x[2])
174 | cost_dx3 = 0.2 * x[3]
175 | cost_dx = np.array([[cost_dx0, cost_dx1, cost_dx2, cost_dx3]])
176 |
177 | return cost_dx * CartPoleConfigModule.TERMINAL_WEIGHT
178 |
179 | @staticmethod
180 | def gradient_cost_fn_input(x, u):
181 | """ gradient of costs with respect to the input
182 |
183 | Args:
184 | x (numpy.ndarray): state, shape(pred_len, state_size)
185 | u (numpy.ndarray): goal state, shape(pred_len, input_size)
186 | Returns:
187 | l_u (numpy.ndarray): gradient of cost, shape(pred_len, input_size)
188 | """
189 | return 2. * u * np.diag(CartPoleConfigModule.R)
190 |
191 | @staticmethod
192 | def hessian_cost_fn_state(x, g_x, terminal=False):
193 | """ hessian costs with respect to the state
194 |
195 | Args:
196 | x (numpy.ndarray): state, shape(pred_len, state_size)
197 | g_x (numpy.ndarray): goal state, shape(pred_len, state_size)
198 | Returns:
199 | l_xx (numpy.ndarray): gradient of cost,
200 | shape(pred_len, state_size, state_size) or
201 | shape(1, state_size, state_size) or
202 | """
203 | if not terminal:
204 | (pred_len, state_size) = x.shape
205 | hessian = np.eye(state_size)
206 | hessian = np.tile(hessian, (pred_len, 1, 1))
207 | hessian[:, 0, 0] = 12.
208 | hessian[:, 1, 1] = 0.2
209 | hessian[:, 2, 2] = 24. * -np.sin(x[:, 2]) \
210 | * (-np.sin(x[:, 2])) \
211 | + 24. * (1. + np.cos(x[:, 2])) \
212 | * -np.cos(x[:, 2])
213 | hessian[:, 3, 3] = 0.2
214 |
215 | return hessian
216 |
217 | state_size = len(x)
218 | hessian = np.eye(state_size)
219 | hessian[0, 0] = 12.
220 | hessian[1, 1] = 0.2
221 | hessian[2, 2] = 24. * -np.sin(x[2]) \
222 | * (-np.sin(x[2])) \
223 | + 24. * (1. + np.cos(x[2])) \
224 | * -np.cos(x[2])
225 | hessian[3, 3] = 0.2
226 |
227 | return hessian[np.newaxis, :, :] * CartPoleConfigModule.TERMINAL_WEIGHT
228 |
229 | @staticmethod
230 | def hessian_cost_fn_input(x, u):
231 | """ hessian costs with respect to the input
232 |
233 | Args:
234 | x (numpy.ndarray): state, shape(pred_len, state_size)
235 | u (numpy.ndarray): goal state, shape(pred_len, input_size)
236 | Returns:
237 | l_uu (numpy.ndarray): gradient of cost,
238 | shape(pred_len, input_size, input_size)
239 | """
240 | (pred_len, _) = u.shape
241 |
242 | return np.tile(2.*CartPoleConfigModule.R, (pred_len, 1, 1))
243 |
244 | @staticmethod
245 | def hessian_cost_fn_input_state(x, u):
246 | """ hessian costs with respect to the state and input
247 |
248 | Args:
249 | x (numpy.ndarray): state, shape(pred_len, state_size)
250 | u (numpy.ndarray): goal state, shape(pred_len, input_size)
251 | Returns:
252 | l_ux (numpy.ndarray): gradient of cost ,
253 | shape(pred_len, input_size, state_size)
254 | """
255 | (_, state_size) = x.shape
256 | (pred_len, input_size) = u.shape
257 |
258 | return np.zeros((pred_len, input_size, state_size))
259 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | [](https://coveralls.io/github/Shunichi09/PythonLinearNonlinearControl?branch=master&service=github)
2 | []()
3 | [](https://travis-ci.org/Shunichi09/PythonLinearNonlinearControl)
4 |
5 | # PythonLinearNonlinearControl
6 |
7 | PythonLinearNonlinearControl is a library implementing the linear and nonlinear control theories in python.
8 | Due to use only basic libralies (scipy, numpy), this library is easy to extend for your own situations.
9 |
10 |

13 |
14 | # Algorithms
15 |
16 | | Algorithm | Use Linear Model | Use Nonlinear Model | Need Gradient (Hamiltonian) | Need Gradient (Model) | Need Hessian (Model) |
17 | |:----------|:---------------: |:----------------:|:----------------:|:----------------:|:----------------:|
18 | | Linear Model Predictive Control (MPC) | ✓ | x | x | x | x |
19 | | Cross Entropy Method (CEM) | ✓ | ✓ | x | x | x |
20 | | Model Predictive Path Integral Control of Nagabandi, A. (MPPI) | ✓ | ✓ | x | x | x |
21 | | Model Predictive Path Integral Control of Williams, G. (MPPIWilliams) | ✓ | ✓ | x | x | x |
22 | | Random Shooting Method (Random) | ✓ | ✓ | x | x | x |
23 | | Iterative LQR (iLQR) | x | ✓ | x | ✓ | x |
24 | | Differential Dynamic Programming (DDP) | x | ✓ | x | ✓ | ✓ |
25 | | Unconstrained Nonlinear Model Predictive Control (NMPC) | x | ✓ | ✓ | x | x |
26 | | Constrained Nonlinear Model Predictive Control CGMRES (NMPC-CGMRES) | x | ✓ | ✓ | x | x |
27 | | Constrained Nonlinear Model Predictive Control Newton (NMPC-Newton) | x | ✓ | x | x | x |
28 |
29 | "Need Gradient" means that you have to implement the gradient of the model or the gradient of hamiltonian.
30 |
31 | Following algorithms are implemented in PythonLinearNonlinearControl
32 |
33 | - [Linear Model Predictive Control (MPC)](http://www2.eng.cam.ac.uk/~jmm1/mpcbook/mpcbook.html)
34 | - Ref: Maciejowski, J. M. (2002). Predictive control: with constraints.
35 | - [script](PythonLinearNonlinearControl/controllers/mpc.py)
36 | - [Cross Entropy Method (CEM)](https://arxiv.org/abs/1805.12114)
37 | - Ref: Chua, K., Calandra, R., McAllister, R., & Levine, S. (2018). Deep reinforcement learning in a handful of trials using probabilistic dynamics models. In Advances in Neural Information Processing Systems (pp. 4754-4765)
38 | - [script](PythonLinearNonlinearControl/controllers/cem.py)
39 | - [Model Predictive Path Integral Control of Nagabandi, A. (MPPI)](https://arxiv.org/abs/1909.11652)
40 | - Ref: Nagabandi, A., Konoglie, K., Levine, S., & Kumar, V. (2019). Deep Dynamics Models for Learning Dexterous Manipulation. arXiv preprint arXiv:1909.11652.
41 | - [script](PythonLinearNonlinearControl/controllers/mppi.py)
42 | - [Model Predictive Path Integral Control of Williams, G. (MPPIWilliams)](https://ieeexplore.ieee.org/abstract/document/7989202)
43 | - Ref: Williams, G., Wagener, N., Goldfain, B., Drews, P., Rehg, J. M., Boots, B., & Theodorou, E. A. (2017, May). Information theoretic MPC for model-based reinforcement learning. In 2017 IEEE International Conference on Robotics and Automation (ICRA) (pp. 1714-1721). IEEE.
44 | - [script](PythonLinearNonlinearControl/controllers/mppi_williams.py)
45 | - [Random Shooting Method (Random)](https://arxiv.org/abs/1805.12114)
46 | - Ref: Chua, K., Calandra, R., McAllister, R., & Levine, S. (2018). Deep reinforcement learning in a handful of trials using probabilistic dynamics models. In Advances in Neural Information Processing Systems (pp. 4754-4765)
47 | - [script](PythonLinearNonlinearControl/controllers/random.py)
48 | - [Iterative LQR (iLQR)](https://ieeexplore.ieee.org/document/6386025)
49 | - Ref: Tassa, Y., Erez, T., & Todorov, E. (2012, October). Synthesis and stabilization of complex behaviors through online trajectory optimization. In 2012 IEEE/RSJ International Conference on Intelligent Robots and Systems (pp. 4906-4913). IEEE. and [Study Wolf](https://github.com/studywolf/control), https://github.com/anassinator/ilqr
50 | - [script](PythonLinearNonlinearControl/controllers/ilqr.py)
51 | - [Dynamic Differential Programming (DDP)](https://ieeexplore.ieee.org/document/6386025)
52 | - Ref: Tassa, Y., Erez, T., & Todorov, E. (2012, October). Synthesis and stabilization of complex behaviors through online trajectory optimization. In 2012 IEEE/RSJ International Conference on Intelligent Robots and Systems (pp. 4906-4913). IEEE. and [Study Wolf](https://github.com/studywolf/control), https://github.com/anassinator/ilqr
53 | - [script](PythonLinearNonlinearControl/controllers/ddp.py)
54 | - [Unconstrained Nonlinear Model Predictive Control (NMPC)](https://www.sciencedirect.com/science/article/pii/S0005109897000058)
55 | - Ref: Ohtsuka, T., & Fujii, H. A. (1997). Real-time optimization algorithm for nonlinear receding-horizon control. Automatica, 33(6), 1147-1154.
56 | - [script](PythonLinearNonlinearControl/controllers/nmpc.py)
57 | - [Constrained Nonlinear Model Predictive Control -CGMRES- (NMPC-CGMRES)](https://www.sciencedirect.com/science/article/pii/S0005109897000058)
58 | - Ref: Ohtsuka, T., & Fujii, H. A. (1997). Real-time optimization algorithm for nonlinear receding-horizon control. Automatica, 33(6), 1147-1154.
59 | - [script](PythonLinearNonlinearControl/controllers/nmpc_cgmres.py)
60 | - [Constrained Nonlinear Model Predictive Control -Newton- (NMPC-Newton)](https://www.sciencedirect.com/science/article/pii/S0005109897000058)
61 | - Ref: Ohtsuka, T., & Fujii, H. A. (1997). Real-time optimization algorithm for nonlinear receding-horizon control. Automatica, 33(6), 1147-1154.
62 | - [script (Coming soon)]()
63 |
64 | # Environments
65 |
66 | There are 4 example environments, "FirstOrderLag", "TwoWheeledConst", "TwoWheeledTrack" and "Cartpole".
67 |
68 | | Name | Linear | Nonlinear | State Size | Input size |
69 | |:----------|:---------------:|:----------------:|:----------------:|:----------------:|
70 | | First Order Lag System | ✓ | x | 4 | 2 |
71 | | Two wheeled System (Constant Goal) | x | ✓ | 3 | 2 |
72 | | Two wheeled System (Moving Goal) | x | ✓ | 3 | 2 |
73 | | Cartpole (Swing up) | x | ✓ | 4 | 1 |
74 | | Nonlinear Sample System Env | x | ✓ | 2 | 1 |
75 |
76 | All states and inputs of environments are continuous.
77 | **It should be noted that the algorithms for linear model could be applied to nonlinear enviroments if you have linealized the model of nonlinear environments.**
78 |
79 | You could know abount our environmets more in [Environments.md](Environments.md)
80 |
81 | # Installation
82 |
83 | ## To install this package
84 |
85 | ```
86 | $ pip install PythonLinearNonlinearControl
87 | ```
88 |
89 | ## When developing the package
90 |
91 | ```
92 | $ python setup.py develop
93 | ```
94 |
95 | or
96 |
97 | ```
98 | $ pip install -e .
99 | ```
100 |
101 | # Basic concepts
102 |
103 | When we design control systems, we should have **Model**, **Planner**, **Controller** and **Runner** as shown in the figure.
104 | It should be noted that **Model** and **Environment** are different. As mentioned before, we the algorithms for linear model could be applied to nonlinear enviroments if you have linealized model of nonlinear environments. In addition, you can use Neural Network or any non-linear functions to the model, although this library can not deal with it now.
105 |
106 |
107 |
108 | ## [Model](PythonLinearNonlinearControl/models/)
109 |
110 | System model. For an instance, in the case that a model is linear, this model should have a form, "x[k+1] = Ax[k] + Bu[k]".
111 |
112 | If you use gradient based control method, you are preferred to implement the gradients of the model, other wise the controllers use numeric gradients.
113 |
114 | ## [Planner](PythonLinearNonlinearControl/planners/)
115 |
116 | Planner make the goal states.
117 |
118 | ## [Controller](PythonLinearNonlinearControl/controllers/)
119 |
120 | Controller calculate the optimal inputs by using the model by using the algorithms.
121 |
122 | ## [Runner](PythonLinearNonlinearControl/runners/)
123 |
124 | Runner runs the simulation.
125 |
126 | Please, see more detail in each scripts.
127 |
128 | # Getting started
129 |
130 | ## [QuickStart Guide](scripts/quickstart/quickstart.md)
131 |
132 | This is a quickstart guide for users who just want to try PythonLinearNonlinearControl.
133 | If you have not installed PythonLinearNonLinearControl, please see the section of "how to setup" in README.md
134 |
135 | When we design control systems, we should have Environment, Model, Planner, Controller and Runner.
136 | Therefore your script contains those Modules.
137 |
138 | First, import each Modules from PythonLinearNonlinearControl.
139 |
140 | ```py
141 | from PythonLinearNonlinearControl import configs
142 | from PythonLinearNonlinearControl import envs
143 | from PythonLinearNonlinearControl import models
144 | from PythonLinearNonlinearControl import planners
145 | from PythonLinearNonlinearControl import controllers
146 | from PythonLinearNonlinearControl import runners
147 | ```
148 |
149 | Configs contains each modules configurations such as cost functions, prediction length, ...etc.
150 |
151 | Then you can make each module. (This is example about CEM and CartPole env)
152 |
153 | ```py
154 | config = configs.CartPoleConfigModule()
155 | env = envs.CartPoleEnv()
156 | model = models.CartPoleModel(config)
157 | controller = controllers.CEM(config, model)
158 | planner = planners.ConstantPlanner(config)
159 | runner = runners.ExpRunner()
160 | ```
161 |
162 | The preparation for experiment has done!
163 | Please run the runner.
164 |
165 | ```py
166 | history_x, history_u, history_g = runner.run(env, controller, planner)
167 | ```
168 |
169 | You can get the results of history of state, history of input and history of goal.
170 | Use that histories to visualize the Animation or Figures.
171 | (Note FirstOrderEnv does not support animation)
172 |
173 | ```py
174 | # plot results
175 | plot_results(history_x, history_u, history_g=history_g)
176 | save_plot_data(history_x, history_u, history_g=history_g)
177 |
178 | # create animation
179 | animator = Animator(env)
180 | animator.draw(history_x, history_g)
181 | ```
182 | **It should be noted that the controller parameters like Q, R and Sf strongly affect the performence of the controller.
183 | Please, check these parameters before you run the simulation.**
184 |
185 | ## Run Example Script
186 |
187 | You can run the example script as follows:
188 |
189 | ```
190 | python scripts/simple_run.py --env CartPole --controller CEM --save_anim 1
191 | ```
192 |
193 | **figures and animations are saved in the ./result folder.**
194 |
195 | # Old version
196 |
197 | If you are interested in the old version of this library, that was not a library just examples, please see [v1.0](https://github.com/Shunichi09/PythonLinearNonlinearControl/tree/v1.0)
198 |
199 | # Documents
200 |
201 | Coming soon !!
202 |
203 | # Requirements
204 |
205 | - numpy
206 | - scipy
207 | - matplotlib (for figures and animations)
208 | - ffmpeg (for animations)
209 |
210 | # License
211 |
212 | PythonLinearNonlinearControl is licensed under the MIT License. However, some scripts are available under different license terms. See below.
213 |
214 | The following parts are licensed under GPL3+:
215 |
216 | - [ilqr](PythonLinearNonlinearControl/controllers/ilqr.py)
217 | - [ddp](PythonLinearNonlinearControl/controllers/ddp.py)
218 |
219 | See the [licenses folder](./licenses) for the full text of the licenses.
220 |
221 | # Citation
222 |
223 | ```
224 | @Misc{PythonLinearNonLinearControl,
225 | author = {Shunichi Sekiguchi},
226 | title = {PythonLinearNonlinearControl},
227 | note = "\url{https://github.com/Shunichi09/PythonLinearNonlinearControl}",
228 | }
229 | ```
230 |
--------------------------------------------------------------------------------
/PythonLinearNonlinearControl/models/model.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 |
4 | class Model():
5 | """ base class of model
6 | """
7 |
8 | def __init__(self):
9 | """
10 | """
11 | pass
12 |
13 | def predict_traj(self, curr_x, us):
14 | """ predict trajectories
15 |
16 | Args:
17 | curr_x (numpy.ndarray): current state, shape(state_size, )
18 | us (numpy.ndarray): inputs,
19 | shape(pred_len, input_size)
20 | or shape(pop_size, pred_len, input_size)
21 | Returns:
22 | pred_xs (numpy.ndarray): predicted state,
23 | shape(pred_len+1, state_size) including current state
24 | or shape(pop_size, pred_len+1, state_size)
25 | """
26 | if len(us.shape) == 3:
27 | pred_xs = self._predict_traj_alltogether(curr_x, us)
28 | elif len(us.shape) == 2:
29 | pred_xs = self._predict_traj(curr_x, us)
30 | else:
31 | raise ValueError("Invalid us")
32 |
33 | return pred_xs
34 |
35 | def _predict_traj(self, curr_x, us):
36 | """ predict trajectories
37 |
38 | Args:
39 | curr_x (numpy.ndarray): current state, shape(state_size, )
40 | us (numpy.ndarray): inputs, shape(pred_len, input_size)
41 | Returns:
42 | pred_xs (numpy.ndarray): predicted state,
43 | shape(pred_len+1, state_size) including current state
44 | """
45 | # get size
46 | pred_len = us.shape[0]
47 | # initialze
48 | x = curr_x
49 | pred_xs = curr_x[np.newaxis, :]
50 |
51 | for t in range(pred_len):
52 | next_x = self.predict_next_state(x, us[t])
53 | # update
54 | pred_xs = np.concatenate((pred_xs, next_x[np.newaxis, :]), axis=0)
55 | x = next_x
56 |
57 | return pred_xs
58 |
59 | def _predict_traj_alltogether(self, curr_x, us):
60 | """ predict trajectories for all samples
61 |
62 | Args:
63 | curr_x (numpy.ndarray): current state, shape(pop_size, state_size)
64 | us (numpy.ndarray): inputs, shape(pop_size, pred_len, input_size)
65 | Returns:
66 | pred_xs (numpy.ndarray): predicted state,
67 | shape(pop_size, pred_len+1, state_size) including current state
68 | """
69 | # get size
70 | (pop_size, pred_len, _) = us.shape
71 | us = np.transpose(us, (1, 0, 2)) # to (pred_len, pop_size, input_size)
72 | # initialze
73 | x = np.tile(curr_x, (pop_size, 1))
74 | pred_xs = x[np.newaxis, :, :] # (1, pop_size, state_size)
75 |
76 | for t in range(pred_len):
77 | # next_x.shape = (pop_size, state_size)
78 | next_x = self.predict_next_state(x, us[t])
79 | # update
80 | pred_xs = np.concatenate((pred_xs, next_x[np.newaxis, :, :]),
81 | axis=0)
82 | x = next_x
83 |
84 | return np.transpose(pred_xs, (1, 0, 2))
85 |
86 | def predict_next_state(self, curr_x, u):
87 | """ predict next state
88 | """
89 | raise NotImplementedError("Implement the model")
90 |
91 | def x_dot(self, curr_x, u):
92 | """ compute x dot
93 | """
94 | raise NotImplementedError("Implement the model")
95 |
96 | def predict_adjoint_traj(self, xs, us, g_xs):
97 | """
98 | Args:
99 | xs (numpy.ndarray): states trajectory, shape(pred_len+1, state_size)
100 | us (numpy.ndarray): inputs, shape(pred_len, input_size)
101 | g_xs (numpy.ndarray): goal states, shape(pred_len+1, state_size)
102 | Returns:
103 | lams (numpy.ndarray): adjoint state, shape(pred_len, state_size),
104 | adjoint size is the same as state_size
105 | Notes:
106 | Adjoint trajectory be computed by backward path.
107 | Usually, we should -\dot{lam} but in backward path case, we can use \dot{lam} directry
108 | """
109 | # get size
110 | (pred_len, input_size) = us.shape
111 | # pred final adjoint state
112 | lam = self.predict_terminal_adjoint_state(xs[-1],
113 | terminal_g_x=g_xs[-1])
114 | lams = lam[np.newaxis, :]
115 |
116 | for t in range(pred_len-1, 0, -1):
117 | prev_lam = \
118 | self.predict_adjoint_state(lam, xs[t], us[t],
119 | g_x=g_xs[t])
120 | # update
121 | lams = np.concatenate((prev_lam[np.newaxis, :], lams), axis=0)
122 | lam = prev_lam
123 |
124 | return lams
125 |
126 | def predict_adjoint_state(self, lam, x, u, g_x=None, t=None):
127 | """ predict adjoint states
128 |
129 | Args:
130 | lam (numpy.ndarray): adjoint state, shape(state_size, )
131 | x (numpy.ndarray): state, shape(state_size, )
132 | u (numpy.ndarray): input, shape(input_size, )
133 | g_x (numpy.ndarray): goal state, shape(state_size, )
134 | Returns:
135 | prev_lam (numpy.ndarrya): previous adjoint state,
136 | shape(state_size, )
137 | """
138 | raise NotImplementedError("Implement the adjoint model")
139 |
140 | def predict_terminal_adjoint_state(self, terminal_x, terminal_g_x=None):
141 | """ predict terminal adjoint state
142 |
143 | Args:
144 | terminal_x (numpy.ndarray): terminal state, shape(state_size, )
145 | terminal_g_x (numpy.ndarray): terminal goal state,
146 | shape(state_size, )
147 | Returns:
148 | terminal_lam (numpy.ndarray): terminal adjoint state,
149 | shape(state_size, )
150 | """
151 | raise NotImplementedError("Implement terminal adjoint state")
152 |
153 | @staticmethod
154 | def calc_f_x(xs, us, dt):
155 | """ gradient of model with respect to the state in batch form
156 | """
157 | raise NotImplementedError("Implement gradient of model \
158 | with respect to the state")
159 |
160 | @staticmethod
161 | def calc_f_u(xs, us, dt):
162 | """ gradient of model with respect to the input in batch form
163 | """
164 | raise NotImplementedError("Implement gradient of model \
165 | with respect to the input")
166 |
167 | @staticmethod
168 | def calc_f_xx(xs, us, dt):
169 | """ hessian of model with respect to the state in batch form
170 | """
171 | raise NotImplementedError("Implement hessian of model \
172 | with respect to the state")
173 |
174 | @staticmethod
175 | def calc_f_ux(xs, us, dt):
176 | """ hessian of model with respect to the input in batch form
177 | """
178 | raise NotImplementedError("Implement hessian of model \
179 | with respect to the input and state")
180 |
181 | @staticmethod
182 | def calc_f_uu(xs, us, dt):
183 | """ hessian of model with respect to the state in batch form
184 | """
185 | raise NotImplementedError("Implement hessian of model \
186 | with respect to the input")
187 |
188 |
189 | class LinearModel(Model):
190 | """ discrete linear model, x[k+1] = Ax[k] + Bu[k]
191 |
192 | Attributes:
193 | A (numpy.ndarray): shape(state_size, state_size)
194 | B (numpy.ndarray): shape(state_size, input_size)
195 | """
196 |
197 | def __init__(self, A, B):
198 | """
199 | """
200 | super(LinearModel, self).__init__()
201 | self.A = A
202 | self.B = B
203 |
204 | def predict_next_state(self, curr_x, u):
205 | """ predict next state
206 |
207 | Args:
208 | curr_x (numpy.ndarray): current state, shape(state_size, ) or
209 | shape(pop_size, state_size)
210 | u (numpy.ndarray): input, shape(input_size, ) or
211 | shape(pop_size, input_size)
212 | Returns:
213 | next_x (numpy.ndarray): next state, shape(state_size, ) or
214 | shape(pop_size, state_size)
215 | """
216 | if len(u.shape) == 1:
217 | next_x = np.matmul(self.A, curr_x[:, np.newaxis]) \
218 | + np.matmul(self.B, u[:, np.newaxis])
219 |
220 | return next_x.flatten()
221 |
222 | elif len(u.shape) == 2:
223 | next_x = np.matmul(curr_x, self.A.T) + np.matmul(u, self.B.T)
224 |
225 | return next_x
226 |
227 | def calc_f_x(self, xs, us, dt):
228 | """ gradient of model with respect to the state in batch form
229 |
230 | Args:
231 | xs (numpy.ndarray): state, shape(pred_len+1, state_size)
232 | us (numpy.ndarray): input, shape(pred_len, input_size,)
233 | Return:
234 | f_x (numpy.ndarray): gradient of model with respect to x,
235 | shape(pred_len, state_size, state_size)
236 | Notes:
237 | This should be discrete form !!
238 | """
239 | # get size
240 | (pred_len, _) = us.shape
241 |
242 | return np.tile(self.A, (pred_len, 1, 1))
243 |
244 | def calc_f_u(self, xs, us, dt):
245 | """ gradient of model with respect to the input in batch form
246 |
247 | Args:
248 | xs (numpy.ndarray): state, shape(pred_len+1, state_size)
249 | us (numpy.ndarray): input, shape(pred_len, input_size,)
250 | Return:
251 | f_u (numpy.ndarray): gradient of model with respect to x,
252 | shape(pred_len, state_size, input_size)
253 | Notes:
254 | This should be discrete form !!
255 | """
256 | # get size
257 | (pred_len, input_size) = us.shape
258 |
259 | return np.tile(self.B, (pred_len, 1, 1))
260 |
261 | @staticmethod
262 | def calc_f_xx(xs, us, dt):
263 | """ hessian of model with respect to the state in batch form
264 |
265 | Args:
266 | xs (numpy.ndarray): state, shape(pred_len+1, state_size)
267 | us (numpy.ndarray): input, shape(pred_len, input_size,)
268 | Return:
269 | f_xx (numpy.ndarray): gradient of model with respect to x,
270 | shape(pred_len, state_size, state_size, state_size)
271 | """
272 | # get size
273 | (_, state_size) = xs.shape
274 | (pred_len, _) = us.shape
275 |
276 | f_xx = np.zeros((pred_len, state_size, state_size, state_size))
277 |
278 | return f_xx
279 |
280 | @staticmethod
281 | def calc_f_ux(xs, us, dt):
282 | """ hessian of model with respect to state and input in batch form
283 |
284 | Args:
285 | xs (numpy.ndarray): state, shape(pred_len+1, state_size)
286 | us (numpy.ndarray): input, shape(pred_len, input_size,)
287 | Return:
288 | f_ux (numpy.ndarray): gradient of model with respect to x,
289 | shape(pred_len, state_size, input_size, state_size)
290 | """
291 | # get size
292 | (_, state_size) = xs.shape
293 | (pred_len, input_size) = us.shape
294 |
295 | f_ux = np.zeros((pred_len, state_size, input_size, state_size))
296 |
297 | return f_ux
298 |
299 | @staticmethod
300 | def calc_f_uu(xs, us, dt):
301 | """ hessian of model with respect to input in batch form
302 |
303 | Args:
304 | xs (numpy.ndarray): state, shape(pred_len+1, state_size)
305 | us (numpy.ndarray): input, shape(pred_len, input_size,)
306 | Return:
307 | f_uu (numpy.ndarray): gradient of model with respect to x,
308 | shape(pred_len, state_size, input_size, input_size)
309 | """
310 | # get size
311 | (_, state_size) = xs.shape
312 | (pred_len, input_size) = us.shape
313 |
314 | f_uu = np.zeros((pred_len, state_size, input_size, input_size))
315 |
316 | return f_uu
317 |
--------------------------------------------------------------------------------
/PythonLinearNonlinearControl/configs/nonlinear_sample_system.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 |
4 | class NonlinearSampleSystemConfigModule():
5 | # parameters
6 | ENV_NAME = "NonlinearSampleSystem-v0"
7 | PLANNER_TYPE = "Const"
8 | TYPE = "Nonlinear"
9 | TASK_HORIZON = 2000
10 | PRED_LEN = 10
11 | STATE_SIZE = 2
12 | INPUT_SIZE = 1
13 | DT = 0.01
14 | R = np.diag([1.])
15 | Q = None
16 | Sf = None
17 | # bounds
18 | INPUT_LOWER_BOUND = np.array([-0.5])
19 | INPUT_UPPER_BOUND = np.array([0.5])
20 |
21 | def __init__(self):
22 | """
23 | """
24 | # opt configs
25 | self.opt_config = {
26 | "Random": {
27 | "popsize": 5000
28 | },
29 | "CEM": {
30 | "popsize": 500,
31 | "num_elites": 50,
32 | "max_iters": 15,
33 | "alpha": 0.3,
34 | "init_var": 9.,
35 | "threshold": 0.001
36 | },
37 | "MPPI": {
38 | "beta": 0.6,
39 | "popsize": 5000,
40 | "kappa": 0.9,
41 | "noise_sigma": 0.5,
42 | },
43 | "MPPIWilliams": {
44 | "popsize": 5000,
45 | "lambda": 1.,
46 | "noise_sigma": 0.9,
47 | },
48 | "iLQR": {
49 | "max_iters": 500,
50 | "init_mu": 1.,
51 | "mu_min": 1e-6,
52 | "mu_max": 1e10,
53 | "init_delta": 2.,
54 | "threshold": 1e-6,
55 | },
56 | "DDP": {
57 | "max_iters": 500,
58 | "init_mu": 1.,
59 | "mu_min": 1e-6,
60 | "mu_max": 1e10,
61 | "init_delta": 2.,
62 | "threshold": 1e-6,
63 | },
64 | "NMPC": {
65 | "threshold": 0.01,
66 | "max_iters": 5000,
67 | "learning_rate": 0.01,
68 | "optimizer_mode": "conjugate"
69 | }
70 | }
71 |
72 | @staticmethod
73 | def input_cost_fn(u):
74 | """ input cost functions
75 |
76 | Args:
77 | u (numpy.ndarray): input, shape(pred_len, input_size)
78 | or shape(pop_size, pred_len, input_size)
79 | Returns:
80 | cost (numpy.ndarray): cost of input, shape(pred_len, input_size) or
81 | shape(pop_size, pred_len, input_size)
82 | """
83 | return (u**2) * np.diag(NonlinearSampleSystemConfigModule.R)
84 |
85 | @staticmethod
86 | def state_cost_fn(x, g_x):
87 | """ state cost function
88 |
89 | Args:
90 | x (numpy.ndarray): state, shape(pred_len, state_size)
91 | or shape(pop_size, pred_len, state_size)
92 | g_x (numpy.ndarray): goal state, shape(pred_len, state_size)
93 | or shape(pop_size, pred_len, state_size)
94 | Returns:
95 | cost (numpy.ndarray): cost of state, shape(pred_len, 1) or
96 | shape(pop_size, pred_len, 1)
97 | """
98 |
99 | if len(x.shape) > 2:
100 | return (0.5 * (x[:, :, 0]**2) +
101 | 0.5 * (x[:, :, 1]**2))[:, :, np.newaxis]
102 |
103 | elif len(x.shape) > 1:
104 | return (0.5 * (x[:, 0]**2) + 0.5 * (x[:, 1]**2))[:, np.newaxis]
105 |
106 | return 0.5 * (x[0]**2) + 0.5 * (x[1]**2)
107 |
108 | @staticmethod
109 | def terminal_state_cost_fn(terminal_x, terminal_g_x):
110 | """
111 |
112 | Args:
113 | terminal_x (numpy.ndarray): terminal state,
114 | shape(state_size, ) or shape(pop_size, state_size)
115 | terminal_g_x (numpy.ndarray): terminal goal state,
116 | shape(state_size, ) or shape(pop_size, state_size)
117 | Returns:
118 | cost (numpy.ndarray): cost of state, shape(pred_len, ) or
119 | shape(pop_size, pred_len)
120 | """
121 |
122 | if len(terminal_x.shape) > 1:
123 | return (0.5 * (terminal_x[:, 0]**2) +
124 | 0.5 * (terminal_x[:, 1]**2))[:, np.newaxis]
125 |
126 | return 0.5 * (terminal_x[0]**2) + 0.5 * (terminal_x[1]**2)
127 |
128 | @staticmethod
129 | def gradient_cost_fn_state(x, g_x, terminal=False):
130 | """ gradient of costs with respect to the state
131 |
132 | Args:
133 | x (numpy.ndarray): state, shape(pred_len, state_size)
134 | g_x (numpy.ndarray): goal state, shape(pred_len, state_size)
135 |
136 | Returns:
137 | l_x (numpy.ndarray): gradient of cost, shape(pred_len, state_size)
138 | or shape(1, state_size)
139 | """
140 | if not terminal:
141 | cost_dx0 = x[:, 0]
142 | cost_dx1 = x[:, 1]
143 | cost_dx = np.stack((cost_dx0, cost_dx1), axis=1)
144 | return cost_dx
145 |
146 | cost_dx0 = x[0]
147 | cost_dx1 = x[1]
148 | cost_dx = np.array([[cost_dx0, cost_dx1]])
149 |
150 | return cost_dx
151 |
152 | @staticmethod
153 | def gradient_cost_fn_input(x, u):
154 | """ gradient of costs with respect to the input
155 |
156 | Args:
157 | x (numpy.ndarray): state, shape(pred_len, state_size)
158 | u (numpy.ndarray): goal state, shape(pred_len, input_size)
159 | Returns:
160 | l_u (numpy.ndarray): gradient of cost, shape(pred_len, input_size)
161 | """
162 | return 2. * u * np.diag(NonlinearSampleSystemConfigModule.R)
163 |
164 | @staticmethod
165 | def hessian_cost_fn_state(x, g_x, terminal=False):
166 | """ hessian costs with respect to the state
167 |
168 | Args:
169 | x (numpy.ndarray): state, shape(pred_len, state_size)
170 | g_x (numpy.ndarray): goal state, shape(pred_len, state_size)
171 | Returns:
172 | l_xx (numpy.ndarray): gradient of cost,
173 | shape(pred_len, state_size, state_size) or
174 | shape(1, state_size, state_size) or
175 | """
176 | if not terminal:
177 | (pred_len, state_size) = x.shape
178 | hessian = np.eye(state_size)
179 | hessian = np.tile(hessian, (pred_len, 1, 1))
180 | hessian[:, 0, 0] = 1.
181 | hessian[:, 1, 1] = 1.
182 |
183 | return hessian
184 |
185 | state_size = len(x)
186 | hessian = np.eye(state_size)
187 | hessian[0, 0] = 1.
188 | hessian[1, 1] = 1.
189 |
190 | return hessian[np.newaxis, :, :]
191 |
192 | @staticmethod
193 | def hessian_cost_fn_input(x, u):
194 | """ hessian costs with respect to the input
195 |
196 | Args:
197 | x (numpy.ndarray): state, shape(pred_len, state_size)
198 | u (numpy.ndarray): goal state, shape(pred_len, input_size)
199 | Returns:
200 | l_uu (numpy.ndarray): gradient of cost,
201 | shape(pred_len, input_size, input_size)
202 | """
203 | (pred_len, _) = u.shape
204 |
205 | return np.tile(NonlinearSampleSystemConfigModule.R, (pred_len, 1, 1))
206 |
207 | @staticmethod
208 | def hessian_cost_fn_input_state(x, u):
209 | """ hessian costs with respect to the state and input
210 |
211 | Args:
212 | x (numpy.ndarray): state, shape(pred_len, state_size)
213 | u (numpy.ndarray): goal state, shape(pred_len, input_size)
214 | Returns:
215 | l_ux (numpy.ndarray): gradient of cost ,
216 | shape(pred_len, input_size, state_size)
217 | """
218 | (_, state_size) = x.shape
219 | (pred_len, input_size) = u.shape
220 |
221 | return np.zeros((pred_len, input_size, state_size))
222 |
223 | @staticmethod
224 | def gradient_hamiltonian_input(x, lam, u, g_x):
225 | """
226 |
227 | Args:
228 | x (numpy.ndarray): shape(pred_len+1, state_size)
229 | lam (numpy.ndarray): shape(pred_len, state_size)
230 | u (numpy.ndarray): shape(pred_len, input_size)
231 | g_xs (numpy.ndarray): shape(pred_len, state_size)
232 |
233 | Returns:
234 | F (numpy.ndarray), shape(pred_len, input_size)
235 | """
236 | if len(x.shape) == 1:
237 | input_size = u.shape[0]
238 | F = np.zeros(input_size)
239 | F[0] = u[0] + lam[1]
240 |
241 | return F
242 |
243 | elif len(x.shape) == 2:
244 | pred_len, input_size = u.shape
245 | F = np.zeros((pred_len, input_size))
246 |
247 | for i in range(pred_len):
248 | F[i, 0] = u[i, 0] + lam[i, 1]
249 |
250 | return F
251 |
252 | else:
253 | raise NotImplementedError
254 |
255 | @staticmethod
256 | def gradient_hamiltonian_state(x, lam, u, g_x):
257 | """
258 | Args:
259 | x (numpy.ndarray): shape(pred_len+1, state_size)
260 | lam (numpy.ndarray): shape(pred_len, state_size)
261 | u (numpy.ndarray): shape(pred_len, input_size)
262 | g_xs (numpy.ndarray): shape(pred_len, state_size)
263 |
264 | Returns:
265 | lam_dot (numpy.ndarray), shape(state_size, )
266 | """
267 | if len(lam.shape) == 1:
268 | state_size = lam.shape[0]
269 | lam_dot = np.zeros(state_size)
270 | lam_dot[0] = x[0] - (2. * x[0] * x[1] + 1.) * lam[1]
271 | lam_dot[1] = x[1] + lam[0] + \
272 | (-3. * (x[1]**2) - x[0]**2 + 1.) * lam[1]
273 |
274 | return lam_dot
275 |
276 | elif len(lam.shape) == 2:
277 | pred_len, state_size = lam.shape
278 | lam_dot = np.zeros((pred_len, state_size))
279 |
280 | for i in range(pred_len):
281 | lam_dot[i, 0] = x[i, 0] - \
282 | (2. * x[i, 0] * x[i, 1] + 1.) * lam[i, 1]
283 | lam_dot[i, 1] = x[i, 1] + lam[i, 0] + \
284 | (-3. * (x[i, 1]**2) - x[i, 0]**2 + 1.) * lam[i, 1]
285 |
286 | return lam_dot
287 |
288 | else:
289 | raise NotImplementedError
290 |
291 |
292 | class NonlinearSampleSystemExtendConfigModule(NonlinearSampleSystemConfigModule):
293 | def __init__(self):
294 | super().__init__()
295 | self.opt_config = {
296 | "NMPCCGMRES": {
297 | "threshold": 1e-3,
298 | "zeta": 100.,
299 | "delta": 0.01,
300 | "alpha": 0.5,
301 | "tf": 1.,
302 | "constraint": True
303 | },
304 | "NMPCNewton": {
305 | "threshold": 1e-3,
306 | "max_iteration": 500,
307 | "learning_rate": 1e-3
308 | }
309 | }
310 |
311 | @staticmethod
312 | def gradient_hamiltonian_input_with_constraint(x, lam, u, g_x, dummy_u, raw):
313 | """
314 |
315 | Args:
316 | x (numpy.ndarray): shape(pred_len+1, state_size)
317 | lam (numpy.ndarray): shape(pred_len, state_size)
318 | u (numpy.ndarray): shape(pred_len, input_size)
319 | g_xs (numpy.ndarray): shape(pred_len, state_size)
320 | dummy_u (numpy.ndarray): shape(pred_len, input_size)
321 | raw (numpy.ndarray): shape(pred_len, input_size), Lagrangian for constraints
322 |
323 | Returns:
324 | F (numpy.ndarray), shape(pred_len, 3)
325 | """
326 | if len(x.shape) == 1:
327 | vanilla_F = np.zeros(1)
328 | extend_F = np.zeros(1) # 1 is the same as input size
329 | extend_C = np.zeros(1)
330 |
331 | vanilla_F[0] = u[0] + lam[1] + 2. * raw[0] * u[0]
332 | extend_F[0] = -0.01 + 2. * raw[0] * dummy_u[0]
333 | extend_C[0] = u[0]**2 + dummy_u[0]**2 - \
334 | NonlinearSampleSystemConfigModule.INPUT_LOWER_BOUND**2
335 |
336 | F = np.concatenate([vanilla_F, extend_F, extend_C])
337 |
338 | elif len(x.shape) == 2:
339 | pred_len, _ = u.shape
340 | vanilla_F = np.zeros((pred_len, 1))
341 | extend_F = np.zeros((pred_len, 1)) # 1 is the same as input size
342 | extend_C = np.zeros((pred_len, 1))
343 |
344 | for i in range(pred_len):
345 | vanilla_F[i, 0] = \
346 | u[i, 0] + lam[i, 1] + 2. * raw[i, 0] * u[i, 0]
347 | extend_F[i, 0] = -0.01 + 2. * raw[i, 0] * dummy_u[i, 0]
348 | extend_C[i, 0] = u[i, 0]**2 + dummy_u[i, 0]**2 - \
349 | NonlinearSampleSystemConfigModule.INPUT_LOWER_BOUND**2
350 |
351 | F = np.concatenate([vanilla_F, extend_F, extend_C], axis=1)
352 |
353 | return F
354 |
--------------------------------------------------------------------------------