├── .gitignore ├── Extended-Planar-Quadrotor.ipynb ├── LICENSE ├── README.md ├── cart-pole.ipynb ├── core ├── __init__.py ├── controllers │ ├── __init__.py │ ├── constant_controller.py │ ├── controller.py │ ├── energy_controller.py │ ├── fb_lin_controller.py │ ├── linear_controller.py │ ├── lqr_controller.py │ ├── mpc_controller.py │ ├── pd_controller.py │ ├── piecewise_constant_controller.py │ ├── qp_controller.py │ └── random_controller.py ├── dynamics │ ├── __init__.py │ ├── affine_dynamics.py │ ├── affine_quad_clf.py │ ├── configuration_dynamics.py │ ├── configuration_trajectory_dynamics.py │ ├── dynamics.py │ ├── fb_lin_dynamics.py │ ├── fully_actuated_robotic_dynamics.py │ ├── learned_affine_dynamics.py │ ├── learned_dynamics.py │ ├── learned_fb_lin_dynamics.py │ ├── learned_scalar_affine_dynamics.py │ ├── linearizable_dynamics.py │ ├── pd_dynamics.py │ ├── quadratic_clf.py │ ├── robotic_dynamics.py │ ├── scalar_dynamics.py │ └── system_dynamics.py ├── geometry │ ├── __init__.py │ ├── ball.py │ ├── box.py │ ├── convex_body.py │ ├── derived.py │ ├── two_ball.py │ ├── visual.py │ └── visual_derived.py ├── learning │ ├── __init__.py │ ├── gaussian_process.py │ ├── gaussian_process_estimator.py │ ├── keras │ │ ├── __init__.py │ │ ├── keras_residual_affine_model.py │ │ ├── keras_residual_model.py │ │ └── keras_residual_scalar_affine_model.py │ ├── kernels │ │ ├── __init__.py │ │ ├── kernel.py │ │ └── laplacian_kernel.py │ ├── residual_affine_model.py │ ├── residual_model.py │ └── value_estimator.py ├── systems │ ├── __init__.py │ ├── affine_gp_system.py │ ├── car_system.py │ ├── cart_pole.py │ ├── double_inverted_pendulum.py │ ├── inverted_pendulum.py │ ├── linear_system.py │ ├── planar_quadrotor.py │ ├── polynomial_system.py │ ├── segway_3d_system.py │ ├── segway_system.py │ ├── single_track_bicycle_system.py │ └── unicycle.py ├── trajopt │ ├── __init__.py │ ├── gp_trajectory_optimizer.py │ └── trajectory_optimizer.py └── util.py ├── double-inverted-pendulum.ipynb ├── learning-dynamics.ipynb ├── requirements.txt ├── robotic-systems.ipynb ├── setup.py └── tests ├── analytic_jacobians.nb ├── data └── test_systems │ ├── CartPole │ ├── ts.npy │ ├── us.npy │ ├── x_0.npy │ └── xs.npy │ ├── DoubleInvertedPendulum │ ├── ts.npy │ ├── us.npy │ ├── x_0.npy │ └── xs.npy │ └── InvertedPendulum │ ├── ts.npy │ ├── us.npy │ ├── x_0.npy │ └── xs.npy ├── generate_system_test_data.py ├── test_controller.py ├── test_gp_mpc.py ├── test_gp_traj_opt.py ├── test_jacobian.py ├── test_learn_linear_system.py ├── test_mpc_controller.py ├── test_multitask_gp.py ├── test_systems.py └── test_traj_opt.py /.gitignore: -------------------------------------------------------------------------------- 1 | .venv 2 | __pycache__ 3 | .ipynb_checkpoints 4 | *.slides.html 5 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 learning-and-control 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # core 2 | Python simulation and hardware library for learning and control 3 | 4 | ## macOS setup 5 | Set up virtual environment 6 | ``` 7 | python3 -m venv .venv 8 | ``` 9 | Activate virtual environment 10 | ``` 11 | source .venv/bin/activate 12 | ``` 13 | Upgrade package installer for Python 14 | ``` 15 | pip install --upgrade pip 16 | ``` 17 | Install requirements 18 | ``` 19 | pip3 install -r requirements.txt 20 | ``` 21 | Create IPython kernel 22 | ``` 23 | python3 -m ipykernel install --user --name .venv --display-name "Virtual Environment" 24 | ``` 25 | # Papers Implemnted in this Library: 26 | 27 | - [Learning to Control an Unstable System with One Minute of Data: 28 | Leveraging Gaussian Process Differentiation in Predictive Control](https://arxiv.org/pdf/2103.04548.pdf) 29 | 30 | -------------------------------------------------------------------------------- /core/__init__.py: -------------------------------------------------------------------------------- 1 | name = 'core' 2 | 3 | from .util import arr_map, default_fig, differentiate 4 | -------------------------------------------------------------------------------- /core/controllers/__init__.py: -------------------------------------------------------------------------------- 1 | from .constant_controller import ConstantController 2 | from .controller import Controller 3 | from .energy_controller import EnergyController 4 | from .fb_lin_controller import FBLinController 5 | from .linear_controller import LinearController 6 | from .lqr_controller import LQRController 7 | from .pd_controller import PDController 8 | from .qp_controller import QPController 9 | from .random_controller import RandomController 10 | from .piecewise_constant_controller import PiecewiseConstantController 11 | from .mpc_controller import MPCController -------------------------------------------------------------------------------- /core/controllers/constant_controller.py: -------------------------------------------------------------------------------- 1 | from .controller import Controller 2 | 3 | class ConstantController(Controller): 4 | """Class for constant action policies.""" 5 | 6 | def __init__(self, dynamics, u_const): 7 | """Create a ConstantController object. 8 | 9 | Inputs: 10 | Dynamics, dynamics: Dynamics 11 | Constant action, u_const: numpy array 12 | """ 13 | 14 | Controller.__init__(self, dynamics) 15 | self.u_const = u_const 16 | 17 | def forward(self, x, t): 18 | return self.u_const -------------------------------------------------------------------------------- /core/controllers/controller.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | class Controller(nn.Module): 3 | """Abstract policy class for control. 4 | 5 | Override forward. 6 | """ 7 | 8 | def __init__(self, dynamics): 9 | """Create a Controller object. 10 | 11 | Inputs: 12 | Dynamics, dynamics: Dynamics 13 | """ 14 | super().__init__() 15 | self.dynamics = dynamics 16 | 17 | def forward(self, x, t): 18 | """Compute general representation of an action. 19 | 20 | Inputs: 21 | State, x: numpy array 22 | Time, t: float 23 | 24 | Outputs: 25 | Action: object 26 | """ 27 | 28 | pass 29 | 30 | def process(self, u): 31 | """Transform general representation of an action to a numpy array. 32 | 33 | Inputs: 34 | Action, u: object 35 | 36 | Outputs: 37 | Action: numpy array 38 | """ 39 | 40 | return u 41 | 42 | def reset(self): 43 | """Reset any controller state.""" 44 | 45 | pass -------------------------------------------------------------------------------- /core/controllers/energy_controller.py: -------------------------------------------------------------------------------- 1 | from numpy import dot, reshape, zeros 2 | from numpy.linalg import solve 3 | 4 | from .controller import Controller 5 | 6 | class EnergyController(Controller): 7 | """Class for energy-based controllers.""" 8 | 9 | def __init__(self, robotic_dynamics, K_p, K_d, q_d=None): 10 | """Create an EnergyController object. 11 | 12 | Requires fully actuated robotic system with relative degree 2. 13 | 14 | Inputs: 15 | Robotic system, robotic_dynamics: RoboticDynamics 16 | Positive-definite proportional gain matrix, K_p: numpy array 17 | Positive-definite derivative gain matrix, K_d: numpy array 18 | """ 19 | 20 | Controller.__init__(self, robotic_dynamics) 21 | self.K_p = K_p 22 | self.K_d = K_d 23 | if q_d is None: 24 | q_d = zeros(len(K_p)) 25 | self.q_d = q_d 26 | 27 | def forward(self, x, t): 28 | q, q_dot = reshape(x, (2, -1)) 29 | e = q - self.q_d 30 | return solve(self.dynamics.B, self.dynamics.G(q) - dot(self.K_p, e) - dot(self.K_d, q_dot)) -------------------------------------------------------------------------------- /core/controllers/fb_lin_controller.py: -------------------------------------------------------------------------------- 1 | from numpy import dot 2 | from numpy.linalg import solve 3 | 4 | from .controller import Controller 5 | 6 | class FBLinController(Controller): 7 | """Class for linearizing feedback policies.""" 8 | 9 | def __init__(self, fb_lin_dynamics, linear_controller): 10 | """Create an FBLinController object. 11 | 12 | Policy is u = (act)^-1 * (-drift + aux), where drift and act are 13 | components of drift vector and actuation matrix corresponding to 14 | highest-order derivatives of each output coordinate and aux is an 15 | auxilliary linear controller. 16 | 17 | Inputs: 18 | Feedback linearizable dynamics, fb_lin_dynamics: FBLinDynamics 19 | Auxilliary linear controller, linear_controller: LinearController 20 | """ 21 | 22 | Controller.__init__(self, fb_lin_dynamics) 23 | self.linear_controller = linear_controller 24 | self.select = fb_lin_dynamics.select 25 | self.permute = fb_lin_dynamics.permute 26 | 27 | def forward(self, x, t): 28 | drift = self.select(self.permute(self.dynamics.drift(x, t))) 29 | act = self.select(self.permute(self.dynamics.act(x, t))) 30 | return solve(act, -drift + self.linear_controller(x, t)) 31 | 32 | -------------------------------------------------------------------------------- /core/controllers/linear_controller.py: -------------------------------------------------------------------------------- 1 | 2 | from .controller import Controller 3 | 4 | class LinearController(Controller): 5 | """Class for linear policies.""" 6 | 7 | def __init__(self, affine_dynamics, K): 8 | """Create a LinearController object. 9 | 10 | Policy is u = -K * x. 11 | 12 | Inputs: 13 | Affine dynamics, affine_dynamics: AffineDynamics 14 | Gain matrix, K: numpy array 15 | """ 16 | 17 | Controller.__init__(self, affine_dynamics) 18 | self.register_buffer('K', K) 19 | 20 | def forward(self, x, t): 21 | return (-self.K[None] @ self.dynamics.image(x, t)[:, :, None])[:, :, 0] 22 | -------------------------------------------------------------------------------- /core/controllers/lqr_controller.py: -------------------------------------------------------------------------------- 1 | from numpy import dot 2 | from numpy.linalg import solve 3 | 4 | from .controller import Controller 5 | from core.dynamics import AffineQuadCLF 6 | 7 | class LQRController(Controller): 8 | """Class for Linear Quadratic Regulator (LQR) policies. 9 | 10 | Policy is u = -1 / 2 * R^-1 P * B' * x for dynamics x_dot = A * x + B * u 11 | 12 | Policy optimizes infinite-horizon continuous-time LQR problem with stage 13 | cost x(t)' * Q * x(t) + u(t)' * R * u(t) and cost-to-go x(t)' * P * x(t). 14 | """ 15 | 16 | def __init__(self, affine_linearizable_dynamics, P, R): 17 | """Create an LQRController object. 18 | 19 | Inputs: 20 | Positive-definite cost-to-go matrix, P: numpy array 21 | Positive-definite action cost matrix, R: numpy array 22 | """ 23 | 24 | Controller.__init__(self, affine_linearizable_dynamics) 25 | self.P = P 26 | self.R = R 27 | 28 | def forward(self, x, t): 29 | _, B = self.dynamics.linear_system() 30 | return -solve(self.R, dot(B.T, dot(self.P, self.dynamics.image(x, t)))) / 2 31 | 32 | def build(affine_linearizable_dynamics, Q, R): 33 | """Create an LQRController from state and action matrices. 34 | 35 | Inputs: 36 | Affine and linearizable dynamics, affine_linearizable_dynamics: AffineDynamics, LinearizableDynamics 37 | Positive-definite state cost matrix, Q: numpy array 38 | Positive-definite action cost matrix, R: numpy array 39 | 40 | Outputs: 41 | LQR policy: LQRController 42 | """ 43 | 44 | lyap = AffineQuadCLF.build_care(affine_linearizable_dynamics, Q, R) 45 | return LQRController(affine_linearizable_dynamics, lyap.P, R) -------------------------------------------------------------------------------- /core/controllers/mpc_controller.py: -------------------------------------------------------------------------------- 1 | from .controller import Controller 2 | from numpy import concatenate, zeros_like 3 | 4 | class MPCController(Controller): 5 | def __init__(self, dynamics, trajopt): 6 | super().__init__(dynamics) 7 | self.trajopt = trajopt 8 | self.dynamics = dynamics 9 | self.xt_prev = None 10 | self.ut_prev = None 11 | 12 | def forward(self, x, t): 13 | ws_ut = ws_xt = None 14 | if self.xt_prev is not None: 15 | ws_xt = concatenate([x[None,:], self.xt_prev[2:], self.xt_prev[None, -1]]) 16 | if self.ut_prev is not None: 17 | ws_ut = concatenate([self.ut_prev[1:], self.ut_prev[None, -1]]) 18 | 19 | xt, ut = self.trajopt(x, t, 20 | xt_prev=ws_xt, 21 | ut_prev=ws_ut) 22 | self.xt_prev = xt 23 | self.ut_prev = ut 24 | return ut[0] 25 | -------------------------------------------------------------------------------- /core/controllers/pd_controller.py: -------------------------------------------------------------------------------- 1 | from numpy import dot 2 | 3 | from .controller import Controller 4 | 5 | class PDController(Controller): 6 | """Class for proportional-derivative policies.""" 7 | 8 | def __init__(self, pd_dynamics, K_p, K_d): 9 | """Create a PDController object. 10 | 11 | Policy is u = -K_p * e_p - K_d * e_d, where e_p and e_d are propotional 12 | and derivative components of error. 13 | 14 | Inputs: 15 | Proportional-derivative dynamics, pd_dynamics: PDDynamics 16 | Proportional gain matrix, K_p: numpy array 17 | Derivative gain matrix, K_d: numpy array 18 | """ 19 | 20 | Controller.__init__(self, pd_dynamics) 21 | self.K_p = K_p 22 | self.K_d = K_d 23 | 24 | def forward(self, x, t): 25 | e_p = self.dynamics.proportional(x, t) 26 | e_d = self.dynamics.derivative(x, t) 27 | return -dot(self.K_p, e_p) - dot(self.K_d, e_d) 28 | -------------------------------------------------------------------------------- /core/controllers/piecewise_constant_controller.py: -------------------------------------------------------------------------------- 1 | from .controller import Controller 2 | from numpy import rint, argmin 3 | from math import floor 4 | 5 | 6 | class PiecewiseConstantController(Controller): 7 | def __init__(self, dynamics, h, ut, round_mode=True): 8 | Controller.__init__(self, dynamics) 9 | self.ut = ut 10 | self.h = h 11 | self.round_mode_on = round_mode 12 | 13 | def forward(self, x, t): 14 | #technically floor should be correct but floating point arithmetic 15 | #can make you choose the wrong slice 16 | 17 | if type(self.h) is float: 18 | if self.round_mode_on: 19 | t_idx = int(rint(t / self.h)) 20 | else: 21 | t_idx = floor(t / self.h) 22 | if t_idx == self.ut.shape[0]: 23 | #off by one can happen because of floating point errors 24 | t_idx -= 1 25 | else: 26 | t_idx = argmin(abs(self.h - t)) 27 | if t_idx >= self.ut.shape[0]: 28 | raise OverflowError('[ERROR] Controller called outside horizon.') 29 | return self.ut[t_idx] 30 | -------------------------------------------------------------------------------- /core/controllers/qp_controller.py: -------------------------------------------------------------------------------- 1 | from cvxpy import Minimize, Problem, quad_form, square, sum_squares, Variable 2 | from numpy import zeros, identity 3 | from numpy.linalg import eigvals 4 | 5 | from .controller import Controller 6 | from core.dynamics import AffineQuadCLF 7 | 8 | class QPController(Controller): 9 | """Class for controllers solving quadratic programs (QPs). 10 | 11 | QPs are solved using cvxpy. 12 | """ 13 | 14 | def __init__(self, affine_dynamics, m): 15 | """Create a QPController object. 16 | 17 | Inputs: 18 | Affine dynamics, affine_dynamics: AffineDynamics 19 | Number of actions, m: int 20 | """ 21 | 22 | Controller.__init__(self, affine_dynamics) 23 | self.m = m 24 | self.u = Variable(m) 25 | self.variables = [] 26 | self.static_costs = [] 27 | self.dynamic_costs = [] 28 | self.constraints = [] 29 | 30 | def add_static_cost(self, P=None, q=None, r=0): 31 | """Add term to cost of the form u' * P * u + q' * u + r 32 | 33 | Inputs: 34 | Quadratic term, P: numpy array 35 | Linear term, q: numpy array 36 | Constant term, r: float 37 | """ 38 | 39 | if P is None: 40 | P = zeros((self.m, self.m)) 41 | if q is None: 42 | q = zeros(self.m) 43 | cost = quad_form(self.u, P) + q * self.u + r 44 | self.static_costs.append(cost) 45 | 46 | def add_dynamic_cost(self, P, q, r): 47 | """Add term to cost of the form u' * P(x, t) * u + q(x, t)' * u + r(x, t) 48 | 49 | Inputs: 50 | Quadratic term, P: numpy array * float -> numpy array 51 | Linear term, q: numpy array * float -> numpy array 52 | Constant term, r: numpy array * float -> float 53 | """ 54 | 55 | if P is None: 56 | P = lambda x, t: zeros((self.m, self.m)) 57 | if q is None: 58 | q = lambda x, t: zeros(self.m) 59 | if r is None: 60 | r = lambda x, t: 0 61 | cost = lambda x, t: quad_form(self.u, P(x, t)) + q(x, t) * self.u + r(x, t) 62 | self.dynamic_costs.append(cost) 63 | 64 | def add_regularizer(self, controller, coeff=1): 65 | """Add 2-norm regularizer about another controller to cost 66 | 67 | Inputs: 68 | Controller, controller: Controller 69 | Regularizer weight, coeff: float 70 | """ 71 | 72 | cost = lambda x, t: coeff * sum_squares(self.u - controller.process(controller(x, t))) 73 | self.dynamic_costs.append(cost) 74 | 75 | def add_stability_constraint(self, aff_lyap, comp=None, slacked=False, coeff=0): 76 | """Add Lyapunov function derivative constraint 77 | 78 | Inputs: 79 | Affine Lyapunov function: AffineDynamics, ScalarDynamics 80 | Class-K comparison function, comp: float -> float 81 | Flag for slacking constraint, slacked: bool 82 | Coefficient for slack variable in cost function, coeff: float 83 | """ 84 | 85 | if comp is None: 86 | comp = lambda r: 0 87 | if slacked: 88 | delta = Variable() 89 | self.variables.append(delta) 90 | self.static_costs.append(coeff * square(delta)) 91 | constraint = lambda x, t: aff_lyap.drift(x, t) + aff_lyap.act(x, t) * self.u <= -comp(aff_lyap.image(x, t)) + delta 92 | else: 93 | constraint = lambda x, t: aff_lyap.drift(x, t) + aff_lyap.act(x, t) * self.u <= -comp(aff_lyap.image(x, t)) 94 | self.constraints.append(constraint) 95 | 96 | def add_safety_constraint(self, aff_safety, comp=None, slacked=False, coeff=0): 97 | """Add safety function derivative constraint 98 | 99 | Inputs: 100 | Affine safety function: AffineDynamics, ScalarDynamics 101 | Class-K comparison function, comp: float -> float 102 | Flag for slacking constraint, slacked: bool 103 | Coefficient for slack variable in cost function, coeff: float 104 | """ 105 | 106 | if comp is None: 107 | comp = lambda r: 0 108 | if slacked: 109 | delta = Variable() 110 | self.variables.append(delta) 111 | self.static_costs.append(coeff * square(delta)) 112 | constraint = lambda x, t: aff_safety.drift(x, t) + aff_safety.act(x, t) * self.u >= -comp(aff_safety.image(x, t)) - delta 113 | else: 114 | constraint = lambda x, t: aff_safety.drift(x, t) + aff_safety.act(x, t) * self.u >= -comp(aff_safety.image(x, t)) 115 | self.constraints.append(constraint) 116 | @staticmethod 117 | def build_care(aff_dynamics, Q, R): 118 | """Build minimum-norm controller with stability constraint from solving CARE 119 | 120 | Inputs: 121 | Affine dynamics, aff_dynamics: AffineDynamics 122 | Positive-definite state cost matrix, Q: numpy array 123 | Positive-definite action cost matrix, R: numpy array 124 | 125 | Outputs: 126 | QP-based controller: QPController 127 | """ 128 | 129 | return QPController._build(aff_dynamics, None, Q, R, 'CARE') 130 | 131 | @staticmethod 132 | def build_ctle(aff_dynamics, K, Q): 133 | """Build minimum-norm controller with stability constraint from solving CTLE 134 | 135 | Inputs: 136 | Affine dynamics, aff_dynamics: AffineDynamics 137 | Gain matrix, K: numpy array 138 | Positive-definite state cost matrix, Q: numpy array 139 | 140 | Outputs: 141 | QP-based controller: QPController 142 | """ 143 | 144 | return QPController._build(aff_dynamics, K, Q, None, 'CTLE') 145 | 146 | @staticmethod 147 | def _build(aff_dynamics, K, Q, R, method): 148 | """Helper function for build_care and build_ctle""" 149 | 150 | if method == 'CARE': 151 | m = len(R) 152 | lyap = AffineQuadCLF.build_care(aff_dynamics, Q, R) 153 | elif method == 'CTLE': 154 | m = len(K) 155 | R = identity(m) 156 | lyap = AffineQuadCLF.build_ctle(aff_dynamics, K, Q) 157 | qp = QPController(aff_dynamics, m) 158 | qp.add_static_cost(R) 159 | alpha = min(eigvals(Q)) / max(eigvals(lyap.P)) 160 | comp = lambda r: alpha * r 161 | qp.add_stability_constraint(lyap, comp) 162 | return qp 163 | 164 | def forward(self, x, t): 165 | static_cost = sum(self.static_costs) 166 | dynamic_cost = sum([cost(x, t) for cost in self.dynamic_costs]) 167 | obj = Minimize(static_cost + dynamic_cost) 168 | cons = [constraint(x, t) for constraint in self.constraints] 169 | prob = Problem(obj, cons) 170 | prob.solve(warm_start=True) 171 | return self.u.value, [variable.value for variable in self.variables] 172 | 173 | def process(self, u): 174 | u, _ = u 175 | return u -------------------------------------------------------------------------------- /core/controllers/random_controller.py: -------------------------------------------------------------------------------- 1 | from numpy import zeros 2 | from numpy.random import multivariate_normal 3 | 4 | from .controller import Controller 5 | 6 | class RandomController(Controller): 7 | """Class for i.i.d. Gaussian perturbations around policies. 8 | 9 | Perturbations are held constant for multiple time steps to reduce the effect 10 | of aliasing. 11 | """ 12 | 13 | def __init__(self, controller, cov, reps=2): 14 | """Create a RandomController object. 15 | 16 | Mean policy, controller: Controller 17 | Covariance matrix, cov: numpy array 18 | Number of time steps held, reps: int 19 | """ 20 | 21 | Controller.__init__(self, controller.dynamics) 22 | self.controller = controller 23 | self.cov = cov 24 | self.m, _ = cov.shape 25 | self.reps = reps 26 | self.counter = None 27 | self.pert = self.sample() 28 | 29 | def sample(self): 30 | """Sample a perturbation. 31 | 32 | Outputs: 33 | Zero-mean Gaussian sample: numpy array 34 | """ 35 | 36 | return multivariate_normal(zeros(self.m), self.cov) 37 | 38 | def forward(self, x, t): 39 | if self.counter == 0: 40 | self.pert = self.sample() 41 | self.counter = self.reps + 1 42 | self.counter = self.counter - 1 43 | return self.controller(x, t), self.pert 44 | 45 | def process(self, u): 46 | u_nom, u_pert = u 47 | return u_nom + u_pert 48 | 49 | def reset(self): 50 | self.counter = self.reps 51 | self.controller.reset() -------------------------------------------------------------------------------- /core/dynamics/__init__.py: -------------------------------------------------------------------------------- 1 | from .affine_dynamics import AffineDynamics 2 | from .affine_quad_clf import AffineQuadCLF 3 | from .configuration_dynamics import ConfigurationDynamics 4 | from .configuration_trajectory_dynamics import ConfigurationTrajectoryDynamics 5 | from .dynamics import Dynamics 6 | from .fb_lin_dynamics import FBLinDynamics 7 | from .fully_actuated_robotic_dynamics import FullyActuatedRoboticDynamics 8 | from .learned_affine_dynamics import LearnedAffineDynamics 9 | from .learned_dynamics import LearnedDynamics 10 | from .learned_fb_lin_dynamics import LearnedFBLinDynamics 11 | from .learned_scalar_affine_dynamics import LearnedScalarAffineDynamics 12 | from .linearizable_dynamics import LinearizableDynamics 13 | from .pd_dynamics import PDDynamics 14 | from .quadratic_clf import QuadraticCLF 15 | from .robotic_dynamics import RoboticDynamics 16 | from .scalar_dynamics import ScalarDynamics 17 | from .system_dynamics import SystemDynamics -------------------------------------------------------------------------------- /core/dynamics/affine_dynamics.py: -------------------------------------------------------------------------------- 1 | from torch import matmul 2 | 3 | from .dynamics import Dynamics 4 | 5 | class AffineDynamics(Dynamics): 6 | """Abstract class for dynamics of the form x_dot = f(x, t) + g(x, t) * u. 7 | 8 | Override image, drift, act. 9 | """ 10 | 11 | def drift(self, x, t): 12 | """Compute drift vector f(x, t). 13 | 14 | Inputs: 15 | State, x: numpy array 16 | Time, t: float 17 | 18 | Outputs: 19 | Drift vector: numpy array 20 | """ 21 | pass 22 | 23 | def act(self, x, t): 24 | """Compute actuation matrix g(x, t). 25 | 26 | Inputs: 27 | State, x: numpy array 28 | Time, t: float 29 | 30 | Outputs: 31 | Actuation matrix: numpy array 32 | """ 33 | 34 | pass 35 | 36 | def forward(self, x, u, t): 37 | return self.drift(x, t) + (self.act(x,t)@u[:,:,None]).squeeze(-1) -------------------------------------------------------------------------------- /core/dynamics/affine_quad_clf.py: -------------------------------------------------------------------------------- 1 | from numpy import dot 2 | from scipy.linalg import solve_continuous_are, solve_continuous_lyapunov 3 | 4 | from .affine_dynamics import AffineDynamics 5 | from .quadratic_clf import QuadraticCLF 6 | 7 | class AffineQuadCLF(AffineDynamics, QuadraticCLF): 8 | """Class for quadratic Lyapunov functions for affine dynamics.""" 9 | 10 | def __init__(self, affine_dynamics, P): 11 | """Build an AffineQuadCLF object. 12 | 13 | Inputs: 14 | Affine dynamics, affine_dynamics: AffineDynamics 15 | Positive-definite matrix, P: numpy array 16 | """ 17 | 18 | QuadraticCLF.__init__(self, affine_dynamics, P) 19 | 20 | def drift(self, x, t): 21 | return dot(self.eval_grad(x, t), self.dynamics.drift(x, t)) 22 | 23 | def act(self, x, t): 24 | return dot(self.eval_grad(x, t), self.dynamics.act(x, t)) 25 | 26 | def build_care(affine_linearizable_dynamics, Q, R): 27 | """Build AffineQuadCLF from affine and linearizable dynamics by solving continuous-time algebraic Riccati equation (CARE). 28 | 29 | CARE is F' * P + P * F - P * G * R^-1 * G' * P = -Q. 30 | 31 | Inputs: 32 | Affine and linearizable dynamics, affine_linearizable_dynamics: AffineDynamics, LinearizableDynamics 33 | Positive-definite state cost matrix, Q: numpy array 34 | Positive-definite action cost matrix, R: numpy array 35 | """ 36 | 37 | F, G = affine_linearizable_dynamics.linear_system() 38 | P = solve_continuous_are(F, G, Q, R) 39 | return AffineQuadCLF(affine_linearizable_dynamics, P) 40 | 41 | def build_ctle(affine_linearizable_dynamics, K, Q): 42 | """Build AffineQuadCLF from affine and linearizable dynamics with gain matrix by solving continuous-time Lyapunov equation (CTLE). 43 | 44 | CTLE is A' * P + P * A = -Q, where A = F - G * K is closed-loop matrix. 45 | 46 | Inputs: 47 | Affine and linearizable dynamics, affine_linearizable_dynamics: AffineDynamics, LinearizableDynamics 48 | Positive-definite state cost matrix, Q: numpy array 49 | Gain matrix, K: numpy array 50 | """ 51 | 52 | A = affine_linearizable_dynamics.closed_loop_linear_system(K) 53 | P = solve_continuous_lyapunov(A.T, -Q) 54 | return AffineQuadCLF(affine_linearizable_dynamics, P) -------------------------------------------------------------------------------- /core/dynamics/configuration_dynamics.py: -------------------------------------------------------------------------------- 1 | from numpy import array, concatenate, dot, reshape, tensordot, zeros 2 | 3 | from .fb_lin_dynamics import FBLinDynamics 4 | from .pd_dynamics import PDDynamics 5 | 6 | class ConfigurationDynamics(FBLinDynamics, PDDynamics): 7 | def __init__(self, robotic_dynamics, k): 8 | relative_degrees = [2] * k 9 | perm = concatenate([array([j, j + k]) for j in range(k)]) 10 | FBLinDynamics.__init__(self, relative_degrees, perm) 11 | PDDynamics.__init__(self) 12 | self.robotic_dynamics = robotic_dynamics 13 | self.k = k 14 | 15 | def y(self, q): 16 | pass 17 | 18 | def dydq(self, q): 19 | pass 20 | 21 | def d2ydq2(self, q): 22 | pass 23 | 24 | def y_d(self, t): 25 | return zeros(self.k) 26 | 27 | def y_d_dot(self, t): 28 | return zeros(self.k) 29 | 30 | def y_d_ddot(self, t): 31 | return zeros(self.k) 32 | 33 | def image(self, x, t): 34 | return concatenate([self.proportional(x, t), self.derivative(x, t)]) 35 | 36 | def drift(self, x, t): 37 | q, q_dot = reshape(x, (2, -1)) 38 | q_ddot_drift = self.robotic_dynamics.drift(x, t)[self.robotic_dynamics.k:] 39 | err_ddot_drift = dot(tensordot(self.d2ydq2(q), q_dot, 1), q_dot) + dot(self.dydq(q), q_ddot_drift) - self.y_d_ddot(t) 40 | return concatenate([self.derivative(x, t), err_ddot_drift]) 41 | 42 | def act(self, x, t): 43 | q = self.robotic_dynamics.proportional(x, t) 44 | q_ddot_act = self.robotic_dynamics.act(x, t)[self.robotic_dynamics.k:] 45 | return concatenate([zeros((self.k, self.robotic_dynamics.m)), dot(self.dydq(q), q_ddot_act)]) 46 | 47 | def proportional(self, x, t): 48 | q = self.robotic_dynamics.proportional(x, t) 49 | return self.y(q) - self.y_d(t) 50 | 51 | def derivative(self, x, t): 52 | q, q_dot = reshape(x, (2, -1)) 53 | return dot(self.dydq(q), q_dot) - self.y_d_dot(t) 54 | -------------------------------------------------------------------------------- /core/dynamics/configuration_trajectory_dynamics.py: -------------------------------------------------------------------------------- 1 | from numpy import array, dot, arange, where 2 | from numpy.linalg import solve 3 | 4 | from .configuration_dynamics import ConfigurationDynamics 5 | 6 | class ConfigurationTrajectoryDynamics(ConfigurationDynamics): 7 | def __init__(self, robotic_dynamics, k, _y_d=None, _y_d_dot=None, _y_d_ddot=None): 8 | ConfigurationDynamics.__init__(self, robotic_dynamics, k) 9 | self._y_d = _y_d 10 | self._y_d_dot = _y_d_dot 11 | self._y_ddot = _y_d_ddot 12 | 13 | def y(self, q): 14 | raise NotImplementedError 15 | 16 | def dydq(self, q): 17 | raise NotImplementedError 18 | 19 | def d2ydq2(self, q): 20 | raise NotImplementedError 21 | 22 | def y_d(self, t): 23 | return self._y_d(t) 24 | 25 | def y_d_dot(self, t): 26 | return self._y_d_dot(t) 27 | 28 | def y_d_ddot(self, t): 29 | return self._y_d_ddot(t) 30 | 31 | def add_trajectory(self, qs, q_dots, ts): 32 | ys = array([self.y(q) for q in qs]) 33 | y_dots = array([dot(self.dydq(q), q_dot) for q, q_dot in zip(qs, q_dots)]) 34 | 35 | def cubic_spline(t): 36 | before, = where(ts <= t) 37 | after, = where(ts > t) 38 | 39 | if len(after) == 0: 40 | idx_0 = before[-2] 41 | idx_1 = before[-1] 42 | else: 43 | idx_0 = before[-1] 44 | idx_1 = after[0] 45 | 46 | t_0, y_0, y_dot_0 = ts[idx_0], ys[idx_0], y_dots[idx_0] 47 | t_1, y_1, y_dot_1 = ts[idx_1], ys[idx_1], y_dots[idx_1] 48 | 49 | A = array([ 50 | [t_0 ** 3, t_0 ** 2, t_0, 1], 51 | [t_1 ** 3, t_1 ** 2, t_1, 1], 52 | [3 * (t_0 ** 2), 2 * t_0, 1, 0], 53 | [3 * (t_1 ** 2), 2 * t_1, 1, 0] 54 | ]) 55 | 56 | bs = array([y_0, y_1, y_dot_0, y_dot_1]) 57 | 58 | alphas_0 = solve(A, bs) 59 | alphas_1 = array([3 * alphas_0[0], 2 * alphas_0[1], alphas_0[2]]) 60 | alphas_2 = array([2 * alphas_1[0], alphas_1[1]]) 61 | 62 | ts_0 = t ** arange(3, -1, -1) 63 | ts_1 = ts_0[1:] 64 | ts_2 = ts_1[1:] 65 | 66 | y = dot(ts_0, alphas_0) 67 | y_dot = dot(ts_1, alphas_1) 68 | y_ddot = dot(ts_2, alphas_2) 69 | 70 | return y, y_dot, y_ddot 71 | 72 | def _y_d(t): 73 | y, _, _ = cubic_spline(t) 74 | return y 75 | 76 | def _y_d_dot(t): 77 | _, y_dot, _ = cubic_spline(t) 78 | return y_dot 79 | 80 | def _y_d_ddot(t): 81 | _, _, y_ddot = cubic_spline(t) 82 | return y_ddot 83 | 84 | self._y_d = _y_d 85 | self._y_d_dot = _y_d_dot 86 | self._y_d_ddot = _y_d_ddot 87 | -------------------------------------------------------------------------------- /core/dynamics/dynamics.py: -------------------------------------------------------------------------------- 1 | from torch.autograd.functional import jacobian 2 | from torch import eye, matrix_exp, pinverse, stack 3 | from torch.nn import Module 4 | 5 | class Dynamics(Module): 6 | """Abstract dynamics class. 7 | 8 | Override image, forward. 9 | """ 10 | def __init__(self): 11 | super().__init__() 12 | 13 | def image(self, x, t): 14 | """Compute representation. 15 | 16 | Inputs: 17 | State, x: numpy array 18 | Time, t: float 19 | 20 | Outputs: 21 | Representation: numpy array 22 | """ 23 | 24 | pass 25 | 26 | def forward(self, x, u, t): 27 | """Compute dynamics (time derivative of representation). 28 | 29 | Inputs: 30 | State, x: numpy array 31 | Action, u: numpy array 32 | Time, t: float 33 | 34 | Outputs: 35 | Time-derivative: numpy array 36 | """ 37 | 38 | pass 39 | 40 | def jacobian(self, x, u, t): 41 | 42 | F,G = jacobian( 43 | lambda x_in, u_in: self(x_in, u_in, t).sum(dim=0), 44 | (x, u), create_graph=True) 45 | F = F.swapaxes(1,0) 46 | G = G.swapaxes(1,0) 47 | return F, G 48 | 49 | def jacobian_exp(self, x, u, t, delta_t): 50 | ndims = x.ndim 51 | unsqueezed = False 52 | if ndims == 1: 53 | unsqueezed = True 54 | assert x.ndim == u.ndim 55 | x = x.unsqueeze(0) 56 | u = u.unsqueeze(0) 57 | expAs = list() 58 | expBs = list() 59 | for i in range(x.shape[0]): 60 | A, B = jacobian(self, (x[i], u[i], t))[:-1] 61 | expA = matrix_exp(A*delta_t) 62 | expB = pinverse(A) @ (expA - eye(self.n)) @ B 63 | expAs.append(expA) 64 | expBs.append(expB) 65 | if unsqueezed: 66 | return stack(expAs, dim=0)[0], stack(expBs, dim=0)[0] 67 | else: 68 | return stack(expAs, dim=0), stack(expBs, dim=0) -------------------------------------------------------------------------------- /core/dynamics/fb_lin_dynamics.py: -------------------------------------------------------------------------------- 1 | from numpy import arange, argsort, cumsum, diag, identity, ones 2 | from scipy.linalg import block_diag 3 | 4 | from .affine_dynamics import AffineDynamics 5 | from .linearizable_dynamics import LinearizableDynamics 6 | 7 | class FBLinDynamics(AffineDynamics, LinearizableDynamics): 8 | """Abstract class for feedback linearizable affine dynamics. 9 | 10 | Representation must be block form, with each block corresponding to an 11 | output coordinate. If an output has relative degree gamma, then the 12 | corresponding block must express derivatives of order 0 through gamma - 1, 13 | in that order. 14 | 15 | If dynamics are specified in a different order, specify a permutation into 16 | block form. 17 | 18 | Override image, drift, act. 19 | """ 20 | 21 | def __init__(self, relative_degrees, perm=None): 22 | """Create an FBLinDynamics object. 23 | 24 | Inputs: 25 | Relative degrees of each output coordinate, relative_degrees: int list 26 | Indices of coordinates that make up each block, perm: numpy array 27 | """ 28 | 29 | self.relative_degrees = relative_degrees 30 | self.relative_degree_idxs = cumsum(relative_degrees) - 1 31 | if perm is None: 32 | perm = arange(sum(relative_degrees)) 33 | self.perm = perm 34 | self.inv_perm = argsort(perm) 35 | 36 | def select(self, arr): 37 | """Select coordinates of block order corresponding to highest-order derivatives. 38 | 39 | Inputs: 40 | Array, arr: numpy array 41 | 42 | Outputs: 43 | Array of selected coordinates: numpy array 44 | """ 45 | 46 | return arr[self.relative_degree_idxs] 47 | 48 | def permute(self, arr): 49 | """Permute array into block order. 50 | 51 | Inputs: 52 | Array, arr: numpy array 53 | 54 | Outputs: 55 | Array permuted into block form: numpy array 56 | """ 57 | 58 | return arr[self.perm] 59 | 60 | def inv_permute(self, arr): 61 | """Permute array out of block order. 62 | 63 | Inputs: 64 | Array in block form, arr: numpy array 65 | 66 | Outputs: 67 | Array out of block form: numpy array 68 | """ 69 | 70 | return arr[self.inv_perm] 71 | 72 | def linear_system(self): 73 | F = block_diag(*[diag(ones(gamma - 1), 1) for gamma in self.relative_degrees]) 74 | G = (identity(sum(self.relative_degrees))[self.relative_degree_idxs]).T 75 | 76 | F = (self.inv_permute((self.inv_permute(F)).T)).T 77 | G = self.inv_permute(G) 78 | 79 | return F, G -------------------------------------------------------------------------------- /core/dynamics/fully_actuated_robotic_dynamics.py: -------------------------------------------------------------------------------- 1 | from .configuration_dynamics import ConfigurationDynamics 2 | from .fb_lin_dynamics import FBLinDynamics 3 | from .robotic_dynamics import RoboticDynamics 4 | 5 | class FullyActuatedRoboticDynamics(FBLinDynamics, RoboticDynamics): 6 | def __init__(self, n, m): 7 | RoboticDynamics.__init__(self, n, m) 8 | config = ConfigurationDynamics(self, n) 9 | FBLinDynamics.__init__(self, config.relative_degrees, config.perm) 10 | -------------------------------------------------------------------------------- /core/dynamics/learned_affine_dynamics.py: -------------------------------------------------------------------------------- 1 | from numpy import array, concatenate, zeros 2 | 3 | from .. import differentiate 4 | from .affine_dynamics import AffineDynamics 5 | 6 | class LearnedAffineDynamics(AffineDynamics): 7 | def __init__(self, affine_dynamics, res_aff_model): 8 | self.dynamics = affine_dynamics 9 | self.res_model = res_aff_model 10 | 11 | def process_drift(self, x, t): 12 | return concatenate([x, array([t])]) 13 | 14 | def process_act(self, x, t): 15 | return concatenate([x, array([t])]) 16 | 17 | def image(self, x, t): 18 | return self.dynamics.image(x, t) 19 | 20 | def drift(self, x, t): 21 | return self.dynamics.drift(x, t) + self.res_model.eval_drift(self.process_drift(x, t)) 22 | 23 | def act(self, x, t): 24 | return self.dynamics.act(x, t) + self.res_model.eval_act(self.process_act(x, t)) 25 | 26 | def process_episode(self, xs, us, ts, window=3): 27 | half_window = (window - 1) // 2 28 | xs = xs[:len(us)] 29 | ts = ts[:len(us)] 30 | 31 | drift_inputs = array([self.process_drift(x, t) for x, t in zip(xs, ts)]) 32 | act_inputs = array([self.process_act(x, t) for x, t in zip(xs, ts)]) 33 | 34 | reps = array([self.dynamics.image(x, t) for x, t in zip(xs, ts)]) 35 | rep_dots = differentiate(reps, ts) 36 | 37 | rep_dot_noms = array([self.dynamics(x, u, t) for x, u, t in zip(xs, us, ts)]) 38 | 39 | drift_inputs = drift_inputs[half_window:-half_window] 40 | act_inputs = act_inputs[half_window:-half_window] 41 | rep_dot_noms = rep_dot_noms[half_window:-half_window] 42 | 43 | residuals = rep_dots - rep_dot_noms 44 | 45 | return drift_inputs, act_inputs, us, residuals 46 | 47 | def init_data(self, d_drift_in, d_act_in, m, d_out): 48 | return [zeros((0, d_drift_in)), zeros((0, d_act_in)), zeros((0, m)), zeros((0, d_out))] 49 | 50 | def fit(self, data, batch_size=1, num_epochs=1, validation_split=0): 51 | drift_inputs, act_inputs, us, residuals = data 52 | self.res_model.fit(drift_inputs, act_inputs, us, residuals, batch_size, num_epochs, validation_split) 53 | -------------------------------------------------------------------------------- /core/dynamics/learned_dynamics.py: -------------------------------------------------------------------------------- 1 | from numpy import array, concatenate, zeros 2 | 3 | from .. import differentiate 4 | from .dynamics import Dynamics 5 | 6 | class LearnedDynamics(Dynamics): 7 | def __init__(self, dynamics, res_model): 8 | self.dynamics = dynamics 9 | self.res_model = res_model 10 | 11 | def process(self, x, u, t): 12 | return concatenate([x, u, array([t])]) 13 | 14 | def image(self, x, t): 15 | return self.dynamics.image(x, t) 16 | 17 | def forward(self, x, u, t): 18 | return self.dynamics(x, u, t) + self.res_model(self.process(x, u, t)) 19 | 20 | def process_episode(self, xs, us, ts, window=3): 21 | half_window = (window - 1) // 2 22 | xs = xs[:len(us)] 23 | ts = ts[:len(us)] 24 | 25 | inputs = array([self.process(x, u, t) for x, u, t in zip(xs, us, ts)]) 26 | 27 | reps = array([self.dynamics.image(x, t) for x, t in zip(xs, ts)]) 28 | rep_dots = differentiate(reps, ts) 29 | 30 | rep_dot_noms = array([self.dynamics(x, u, t) for x, u, t in zip(xs, us, ts)]) 31 | 32 | inputs = inputs[half_window:-half_window] 33 | rep_dot_noms = rep_dot_noms[half_window:-half_window] 34 | 35 | residuals = rep_dots - rep_dot_noms 36 | 37 | return inputs, residuals 38 | 39 | def init_data(self, d_in, d_out): 40 | return [zeros((0, d_in)), zeros((0, d_out))] 41 | 42 | def aggregate_data(self, old_data, new_data): 43 | return [concatenate(old, new) for old, new in zip(old_data, new_data)] 44 | 45 | def fit(self, data, batch_size=1, num_epochs=1, validation_split=0): 46 | inputs, residuals = data 47 | self.res_model.fit(inputs, residuals, batch_size, num_epochs, validation_split) 48 | -------------------------------------------------------------------------------- /core/dynamics/learned_fb_lin_dynamics.py: -------------------------------------------------------------------------------- 1 | from numpy import array, zeros 2 | 3 | from .. import differentiate 4 | from .fb_lin_dynamics import FBLinDynamics 5 | from .learned_affine_dynamics import LearnedAffineDynamics 6 | 7 | class LearnedFBLinDynamics(LearnedAffineDynamics, FBLinDynamics): 8 | def __init__(self, fb_lin_dynamics, res_aff_model): 9 | LearnedAffineDynamics.__init__(self, fb_lin_dynamics, res_aff_model) 10 | FBLinDynamics.__init__(self, fb_lin_dynamics.relative_degrees, fb_lin_dynamics.perm) 11 | 12 | def drift(self, x, t): 13 | drift_nom = self.dynamics.drift(x, t) 14 | drift_res = zeros(drift_nom.shape) 15 | drift_res[self.dynamics.relative_degree_idxs] = self.res_model.eval_drift(self.process_drift(x, t)) 16 | drift_res = self.inv_permute(drift_res) 17 | return drift_nom + drift_res 18 | 19 | def act(self, x, t): 20 | act_nom = self.dynamics.act(x, t) 21 | act_res = zeros(act_nom.shape) 22 | act_res[self.dynamics.relative_degree_idxs] = self.res_model.eval_act(self.process_act(x, t)) 23 | act_res = self.dynamics.inv_permute(act_res) 24 | return act_nom + act_res 25 | 26 | def process_episode(self, xs, us, ts, window=3): 27 | drift_inputs, act_inputs, us, residuals = super().process_episode(xs, us, ts, window) 28 | residuals = array([self.dynamics.select(self.dynamics.permute(residual)) for residual in residuals]) 29 | return drift_inputs, act_inputs, us, residuals 30 | -------------------------------------------------------------------------------- /core/dynamics/learned_scalar_affine_dynamics.py: -------------------------------------------------------------------------------- 1 | from .learned_affine_dynamics import LearnedAffineDynamics 2 | from numpy import zeros 3 | 4 | class LearnedScalarAffineDynamics(LearnedAffineDynamics): 5 | def __init__(self, affine_dynamics, res_aff_model): 6 | LearnedAffineDynamics.__init__(self, affine_dynamics, res_aff_model) 7 | 8 | def init_data(self, d_drift_in, d_act_in, m, d_out): 9 | return [zeros((0, d_drift_in)), zeros((0, d_act_in)), zeros((0, m)), zeros(0)] -------------------------------------------------------------------------------- /core/dynamics/linearizable_dynamics.py: -------------------------------------------------------------------------------- 1 | from numpy import dot 2 | 3 | from .dynamics import Dynamics 4 | 5 | class LinearizableDynamics(Dynamics): 6 | """Abstract class for dynamics with representations as x_dot = A * x + B * u. 7 | 8 | Override image, forward, linear_system. 9 | """ 10 | 11 | def linear_system(self): 12 | """Compute matrices A and B in linear representation of dynamics. 13 | 14 | Outputs: 15 | A and B matrices: numpy array * numpy array 16 | """ 17 | 18 | pass 19 | 20 | def closed_loop_linear_system(self, K): 21 | """Compute matrix A - B * K in linear representation of closed-loop dynamics. 22 | 23 | Inputs: 24 | Gain matrix, K: numpy array 25 | 26 | Outputs: 27 | Closed-loop matrix: numpy array 28 | """ 29 | 30 | A, B = self.linear_system() 31 | return A - dot(B, K) -------------------------------------------------------------------------------- /core/dynamics/pd_dynamics.py: -------------------------------------------------------------------------------- 1 | from .dynamics import Dynamics 2 | 3 | class PDDynamics(Dynamics): 4 | """Abstract class for dynamics with proportional and derivative components. 5 | 6 | Override image, forward, proportional, derivative. 7 | """ 8 | 9 | def proportional(self, x, t): 10 | """Compute proportional component. 11 | 12 | Inputs: 13 | State, x: numpy array 14 | Time, t: float 15 | 16 | Outputs: 17 | Proportional component: numpy array 18 | """ 19 | 20 | pass 21 | 22 | def derivative(self, x, t): 23 | """Compute derivative component. 24 | 25 | Inputs: 26 | State, x: numpy array 27 | Time, t: float 28 | 29 | Outputs: 30 | Derivative component: numpy array 31 | """ 32 | 33 | pass -------------------------------------------------------------------------------- /core/dynamics/quadratic_clf.py: -------------------------------------------------------------------------------- 1 | from numpy import dot 2 | 3 | from .scalar_dynamics import ScalarDynamics 4 | 5 | class QuadraticCLF(ScalarDynamics): 6 | """Class for Lyapunov functions of the form V(z) = z' * P * z.""" 7 | 8 | def __init__(self, dynamics, P): 9 | """Create a QuadraticCLF object. 10 | 11 | Inputs: 12 | Dynamics, dynamics: Dynamics 13 | Positive-definite matrix, P: numpy array 14 | """ 15 | 16 | self.dynamics = dynamics 17 | self.P = P 18 | 19 | def image(self, x, t): 20 | """Compute V(z). 21 | 22 | Inputs: 23 | State, x: numpy array 24 | Time, t: float 25 | 26 | Outputs: 27 | Lyapunov function value: float 28 | """ 29 | 30 | z = self.dynamics.image(x, t) 31 | return dot(z, dot(self.P, z)) 32 | 33 | def eval_grad(self, x, t): 34 | z = self.dynamics.image(x, t) 35 | return 2 * dot(self.P, z) 36 | 37 | def forward(self, x, u, t): 38 | """Compute dV/dt. 39 | 40 | Inputs: 41 | State, x: numpy array 42 | Action, u: numpy array 43 | Time, t: float 44 | 45 | Outputs: 46 | Lyapunov function time derivative: float 47 | """ 48 | 49 | return dot(self.eval_grad(x, t), self.dynamics(x, u, t)) 50 | 51 | -------------------------------------------------------------------------------- /core/dynamics/robotic_dynamics.py: -------------------------------------------------------------------------------- 1 | from matplotlib.pyplot import figure 2 | from torch import cat, matmul, zeros 3 | from torch.linalg import lstsq 4 | from .affine_dynamics import AffineDynamics 5 | from .pd_dynamics import PDDynamics 6 | from .system_dynamics import SystemDynamics 7 | 8 | 9 | class RoboticDynamics(SystemDynamics, AffineDynamics, PDDynamics): 10 | """Abstract class for unconstrained Euler-Lagrange systems. 11 | 12 | State represented as x = (q, q_dot), where q are generalized coordinates and 13 | q_dot are corresponding rates. 14 | 15 | Dynamics represented as D(q) * q_ddot + C(q, q_dot) * q_dot + G(q) = B(q) * u + F_ext(q, q_dot). 16 | 17 | Override D, C, B, U, G. 18 | """ 19 | 20 | def __init__(self, n, m): 21 | """Create a RoboticDynamics object. 22 | 23 | Inputs: 24 | Configuration space dimension, n: int 25 | Action space dimension, m: int 26 | """ 27 | 28 | SystemDynamics.__init__(self, 2 * n, m) 29 | AffineDynamics.__init__(self) 30 | PDDynamics.__init__(self) 31 | self.k = n 32 | 33 | def D(self, q): 34 | """Compute positive-definite inertia matrix. 35 | 36 | Inputs: 37 | Coordinates, q: numpy array 38 | 39 | Outputs: 40 | Positive-definite inertia matrix: numpy array 41 | """ 42 | 43 | pass 44 | 45 | def C(self, q, q_dot): 46 | """Compute Coriolis terms. 47 | 48 | Inputs: 49 | Coordinates, q: numpy array 50 | Coordinate rates, q_dot, numpy array 51 | 52 | Outputs: 53 | Coriolis terms matrix: numpy array 54 | """ 55 | 56 | pass 57 | 58 | def B(self, q): 59 | """Compute actuation terms. 60 | 61 | Inputs: 62 | Coordinates, q: numpy array 63 | 64 | Outputs: 65 | Actuation matrix: numpy array 66 | """ 67 | 68 | pass 69 | 70 | def U(self, q): 71 | """Compute potential energy. 72 | 73 | Inputs: 74 | Coordinates, q: numpy array 75 | 76 | Outputs: 77 | Potential energy: float 78 | """ 79 | 80 | pass 81 | 82 | def G(self, q): 83 | """Compute potential energy gradient. 84 | 85 | Inputs: 86 | Coordinates, q: numpy array 87 | 88 | Outputs: 89 | Potential energy gradient: numpy array 90 | """ 91 | 92 | pass 93 | 94 | def F_ext(self, q, qdot): 95 | """Compute non-conservative generalized forces. 96 | 97 | Inputs: 98 | Coordinates, q: numpy array 99 | Coordinate rates, q_dot: numpy array 100 | 101 | Outputs: 102 | Generalized forces: numpy array 103 | """ 104 | 105 | return zeros(q.shape[0], self.k) 106 | 107 | def T(self, q, q_dot): 108 | """Compute kinetic energy. 109 | 110 | Inputs: 111 | Coordinates, q: numpy array 112 | Coordinate rates, q_dot: numpy array 113 | 114 | Outputs: 115 | Kinetic energy: float 116 | """ 117 | return matmul(q_dot, matmul(self.D(q), q_dot)) 118 | 119 | def H(self, q, q_dot): 120 | """Compute Coriolis and potential terms. 121 | 122 | Inputs: 123 | Coordinates, q: numpy array 124 | Coordinate rates, q_dot: numpy array 125 | 126 | Outputs: 127 | Coriolis and potential terms: numpy array 128 | """ 129 | C_ = self.C(q, q_dot) 130 | Cq_dot_ = matmul(C_, q_dot[:, :, None])[:, :, 0] 131 | G_ = self.G(q) 132 | F_ext_ = self.F_ext(q, q_dot) 133 | 134 | return Cq_dot_ + G_ - F_ext_ 135 | 136 | def drift(self, x, t): 137 | q, q_dot = self.proportional(x, t), self.derivative(x, t) 138 | H_ = self.H(q, q_dot).unsqueeze(1) 139 | D_ = self.D(q) 140 | soln = lstsq(D_, H_).solution[:, :, 0] 141 | return cat([q_dot, -soln], dim=-1) 142 | 143 | def act(self, x, t): 144 | q = self.proportional(x, t) 145 | B_ = self.B(q) 146 | D_ = self.D(q) 147 | soln = lstsq(D_, B_).solution 148 | return cat([zeros((x.shape[0], self.k, self.m), dtype=soln.dtype), 149 | soln], dim=1) 150 | 151 | def proportional(self, x, t): 152 | return x[:, :self.k] 153 | 154 | def derivative(self, x, t): 155 | return x[:, self.k:] 156 | 157 | def plot_coordinates(self, ts, qs, fig=None, ax=None, labels=None): 158 | if labels is None: 159 | labels = [f'$q_{i}$' for i in range(self.k)] 160 | 161 | return self.plot_timeseries(ts, qs, fig, ax, 'Coordinates', labels) 162 | 163 | def plot(self, xs, us, ts, fig=None, coordinate_labels=None, 164 | action_labels=None): 165 | if fig is None: 166 | fig = figure(figsize=(12, 6), tight_layout=True) 167 | 168 | qs = xs[:, :self.k] 169 | 170 | coordinate_ax = fig.add_subplot(1, 2, 1) 171 | fig, coordinate_ax = self.plot_coordinates(ts, qs, fig, coordinate_ax, 172 | coordinate_labels) 173 | 174 | action_ax = fig.add_subplot(1, 2, 2) 175 | fig, action_ax = self.plot_actions(ts, us, fig, action_ax, 176 | action_labels) 177 | 178 | return fig, (coordinate_ax, action_ax) 179 | -------------------------------------------------------------------------------- /core/dynamics/scalar_dynamics.py: -------------------------------------------------------------------------------- 1 | from .dynamics import Dynamics 2 | 3 | class ScalarDynamics(Dynamics): 4 | """Abstract scalar dynamics class. 5 | 6 | Override image, forward, eval_grad. 7 | """ 8 | 9 | def eval_grad(self, x, t): 10 | """Compute gradient of representation. 11 | 12 | Inputs: 13 | State, x: numpy array 14 | Time, t: float 15 | 16 | Outputs: 17 | Representation: float 18 | """ 19 | 20 | pass -------------------------------------------------------------------------------- /core/dynamics/system_dynamics.py: -------------------------------------------------------------------------------- 1 | import torch as th 2 | from matplotlib.pyplot import figure 3 | from scipy.integrate import solve_ivp 4 | from torchdiffeq import odeint, odeint_adjoint 5 | 6 | from .dynamics import Dynamics 7 | from ..util import default_fig 8 | 9 | class SystemDynamics(Dynamics): 10 | """Abstract dynamics class for simulation. 11 | 12 | Override forward. 13 | """ 14 | 15 | def __init__(self, n, m): 16 | """Create a SystemDynamics object. 17 | 18 | Inputs: 19 | Number of states, n: int 20 | Number of actions, m: int 21 | """ 22 | super().__init__() 23 | self.n = n 24 | self.m = m 25 | 26 | def image(self, x, t): 27 | return x 28 | 29 | def step(self, x_0, u_0, t_0, t_f, atol=1e-6, rtol=1e-6): 30 | """Simulate system from initial state with constant action over a 31 | time interval. 32 | 33 | Approximated using Runge-Kutta 4,5 solver. 34 | 35 | Inputs: 36 | Initial state, x_0: numpy array 37 | Control action, u_0: numpy array 38 | Initial time, t_0: float 39 | Final time, t_f: float 40 | Absolute tolerance, atol: float 41 | Relative tolerance, rtol: float 42 | 43 | Outputs: 44 | State at final time: numpy array 45 | """ 46 | 47 | x_dot = lambda t, x: self(x, u_0, t) 48 | t_span = th.tensor([t_0, t_f]) 49 | res = odeint(func=x_dot, y0=x_0, t=t_span, atol=atol, 50 | rtol=rtol).swapaxes(1,0) 51 | return res[:, -1] 52 | 53 | def simulate(self, x_0, controller, ts, processed=True, atol=1e-6, rtol=1e-6): 54 | """Simulate system from initial state with specified controller. 55 | 56 | Approximated using Runge-Kutta 4,5 solver. 57 | 58 | Actions computed at time steps and held constant over sample period. 59 | 60 | Inputs: 61 | Initial state, x_0: numpy array 62 | Control policy, controller: Controller 63 | Time steps, ts: numpy array 64 | Flag to process actions, processed: bool 65 | Absolute tolerance, atol: float 66 | Relative tolerance, rtol: float 67 | 68 | Outputs: 69 | State history: numpy array 70 | Action history: numpy array 71 | """ 72 | 73 | assert x_0.shape[1] == self.n 74 | 75 | N = len(ts) 76 | controller.reset() 77 | xs = [x_0] 78 | us = [] 79 | 80 | for j in range(N - 1): 81 | x = xs[j] 82 | t = ts[j] 83 | u = controller(x, t) 84 | us.append(u) 85 | u = controller.process(u) 86 | xs.append(self.step(x, u, t, ts[j + 1])) 87 | 88 | if processed: 89 | us = [controller.process(u) for u in us] 90 | return th.stack(xs, dim=1), th.stack(us, dim=1) 91 | 92 | def plot_timeseries(self, ts, data, fig=None, ax=None, title=None, labels=None): 93 | fig, ax = default_fig(fig, ax) 94 | 95 | if title is not None: 96 | ax.set_title(title, fontsize=16) 97 | 98 | ax.set_xlabel('$t$ (sec)', fontsize=16) 99 | ax.plot(ts, data, linewidth=3) 100 | 101 | if labels is not None: 102 | ax.legend(labels, fontsize=16) 103 | 104 | return fig, ax 105 | 106 | def plot_states(self, ts, xs, fig=None, ax=None, labels=None): 107 | if labels is None: 108 | labels = [f'$x_{i}$' for i in range(self.n)] 109 | 110 | return self.plot_timeseries(ts, xs, fig, ax, 'States', labels) 111 | 112 | def plot_actions(self, ts, us, fig=None, ax=None, labels=None): 113 | if labels is None: 114 | labels = [f'$u_{j}$' for j in range(self.m)] 115 | 116 | return self.plot_timeseries(ts[:-1], us, fig, ax, 'Actions', labels) 117 | 118 | def plot(self, xs, us, ts, fig=None, state_labels=None, action_labels=None): 119 | if fig is None: 120 | fig = figure(figsize=(12, 6), tight_layout=True) 121 | 122 | state_ax = fig.add_subplot(1, 2, 1) 123 | fig, state_ax = self.plot_states(ts, xs, fig, state_ax, state_labels) 124 | 125 | action_ax = fig.add_subplot(1, 2, 2) 126 | fig, action_ax = self.plot_actions(ts, us, fig, action_ax, action_labels) 127 | 128 | return fig, (state_ax, action_ax) 129 | -------------------------------------------------------------------------------- /core/geometry/__init__.py: -------------------------------------------------------------------------------- 1 | from .ball import Ball 2 | from .box import Box 3 | from .convex_body import ConvexBody 4 | from .derived import Derived 5 | from .visual import Visual 6 | from .visual_derived import VisualDerived 7 | from .two_ball import TwoBall 8 | -------------------------------------------------------------------------------- /core/geometry/ball.py: -------------------------------------------------------------------------------- 1 | from .box import Box 2 | from .convex_body import ConvexBody 3 | from ..dynamics import AffineDynamics, ScalarDynamics 4 | 5 | from numpy import cos, dot, identity, linspace, pi, reshape, sin, zeros 6 | from numpy.linalg import norm 7 | from numpy.ma import masked_array 8 | from numpy.random import multivariate_normal, rand 9 | 10 | class Ball(ConvexBody): 11 | def __init__(self, dim): 12 | ConvexBody.__init__(self, dim) 13 | 14 | def sample(self, N): 15 | mean = zeros(self.dim) 16 | cov = identity(self.dim) 17 | normal_samples = multivariate_normal(mean, cov, N) 18 | sphere_samples = normal_samples / norm(normal_samples, axis=1, keepdims=True) 19 | radius_samples = rand(N, 1) ** (1 / self.dim) 20 | return radius_samples * sphere_samples 21 | 22 | def is_member(self, xs): 23 | return norm(xs, axis=1) <= 1 24 | 25 | def uniform(self, N): 26 | xs = Box(self.dim).uniform_list(N) 27 | idxs = self.is_member(xs) 28 | return xs, idxs 29 | 30 | def uniform_grid(self, N): 31 | shape = [N] * self.dim 32 | xs, idxs = self.uniform(N) 33 | mask = reshape(~idxs, shape) 34 | return [masked_array(reshape(arr, shape), mask) for arr in xs.T] 35 | 36 | def uniform_list(self, N): 37 | xs, idxs = self.uniform(N) 38 | return xs[idxs] 39 | 40 | def safety(self, affine_dynamics): 41 | return self.SafetyDynamics(affine_dynamics) 42 | 43 | class SafetyDynamics(AffineDynamics, ScalarDynamics): 44 | def __init__(self, affine_dynamics): 45 | self.dynamics = affine_dynamics 46 | 47 | def image(self, x, t): 48 | return 1 - norm(self.dynamics.image(x, t)) ** 2 49 | 50 | def eval_grad(self, x, t): 51 | return -2 * self.dynamics.image(x, t) 52 | 53 | def drift(self, x, t): 54 | return dot(self.eval_grad(x, t), self.dynamics.drift(x, t)) 55 | 56 | def act(self, x, t): 57 | return dot(self.eval_grad(x, t), self.dynamics.act(x, t)) 58 | -------------------------------------------------------------------------------- /core/geometry/box.py: -------------------------------------------------------------------------------- 1 | from numpy import Inf, linspace, meshgrid, reshape 2 | from numpy.linalg import norm 3 | from numpy.ma import masked_array 4 | from numpy.random import rand 5 | 6 | from .convex_body import ConvexBody 7 | from ..util import arr_map 8 | 9 | class Box(ConvexBody): 10 | def sample(self, N): 11 | return 2 * rand(N, self.dim) - 1 12 | 13 | def is_member(self, xs): 14 | return norm(xs, Inf, axis=1) <= 1 15 | 16 | def meshgrid(self, N): 17 | interval = linspace(-1, 1, N) 18 | return meshgrid(*([interval] * self.dim), indexing='ij') 19 | 20 | def uniform_grid(self, N): 21 | grids = self.meshgrid(N) 22 | return [masked_array(grid, mask=(False * grid)) for grid in grids] 23 | 24 | def uniform_list(self, N): 25 | grids = self.meshgrid(N) 26 | return arr_map(lambda grid: reshape(grid, -1), grids).T 27 | -------------------------------------------------------------------------------- /core/geometry/convex_body.py: -------------------------------------------------------------------------------- 1 | from numpy import argmin, array, Inf, mean, sqrt, zeros 2 | from numpy.linalg import norm 3 | from numpy.ma import masked_array 4 | from numpy.random import permutation 5 | from sys import stdout 6 | 7 | from ..util import arr_map 8 | 9 | class ConvexBody: 10 | def __init__(self, dim): 11 | self.dim = dim 12 | 13 | def sample(self, N): 14 | raise NotImplementedError 15 | 16 | def is_member(self, xs): 17 | raise NotImplementedError 18 | 19 | def uniform_grid(self, N): 20 | raise NotImplementedError 21 | 22 | def uniform_list(self, N): 23 | raise NotImplementedError 24 | 25 | def safety(self, affine_dynamics): 26 | raise NotImplementedError 27 | 28 | def voronoi_iteration(self, N, k, tol, verbose=False): 29 | samples = self.sample(N) 30 | 31 | def centers_to_clusters(centers): 32 | cluster_idxs = argmin([norm(samples - center, axis=1) for center in centers], axis=0) 33 | clusters = [samples[cluster_idxs == idx] for idx in range(k)] 34 | return clusters 35 | 36 | centers = samples[permutation(N)[:k]] 37 | clusters = centers_to_clusters(centers) 38 | 39 | def total_distance(clusters, centers): 40 | return sqrt(sum(sum(norm(cluster - center, axis=1) ** 2) for cluster, center in zip(clusters, centers))) 41 | 42 | prev_dist = Inf 43 | dist = total_distance(centers, clusters) 44 | iters = 0 45 | while abs(prev_dist - dist) > tol: 46 | iters += 1 47 | if verbose: 48 | stdout.write('Iterations: ' + str(iters) + '\tChange in total distance: ' + str(abs(prev_dist - dist)) + '\r') 49 | stdout.flush() 50 | prev_dist = dist 51 | centers = array([mean(cluster, axis=0) for cluster in clusters]) 52 | clusters = centers_to_clusters(centers) 53 | dist = total_distance(centers, clusters) 54 | 55 | return clusters, centers 56 | 57 | def grid_map(self, func, grids): 58 | xs = array([grid.compressed() for grid in grids]).T 59 | vals = arr_map(func, xs) 60 | 61 | mask = grids[0].mask 62 | idxs = ~mask 63 | shape = grids[0].shape 64 | val_grid = masked_array(zeros(shape), mask) 65 | val_grid[idxs] = vals 66 | 67 | return val_grid 68 | -------------------------------------------------------------------------------- /core/geometry/derived.py: -------------------------------------------------------------------------------- 1 | from numpy import dot, ones, zeros 2 | from numpy.linalg import inv 3 | from numpy.ma import masked_array 4 | 5 | from .convex_body import ConvexBody 6 | from ..dynamics import AffineDynamics 7 | from ..util import arr_map 8 | 9 | class Derived(ConvexBody): 10 | def __init__(self, primitive, T, c): 11 | ConvexBody.__init__(self, primitive.dim) 12 | self.primitive = primitive 13 | self.T = T 14 | self.T_inv = inv(T) 15 | self.c = c 16 | 17 | def from_primitive(self, xs): 18 | return dot(self.T_inv, xs.T).T + self.c 19 | 20 | def to_primitive(self, xs): 21 | return dot(self.T, (xs - self.c).T).T 22 | 23 | def sample(self, N): 24 | return self.from_primitive(self.primitive.sample(N)) 25 | 26 | def is_member(self, xs): 27 | return self.primitive.is_member(self.to_primitive(xs)) 28 | 29 | def uniform_grid(self, N): 30 | grids = self.primitive.uniform_grid(N) 31 | primitive_list = arr_map(lambda grid: grid.compressed(), grids).T 32 | derived_list = self.from_primitive(primitive_list) 33 | 34 | mask = grids[0].mask 35 | idxs = ~mask 36 | shape = [N] * self.dim 37 | derived_grids = [masked_array(zeros(shape), mask) for _ in range(self.dim)] 38 | for i in range(self.dim): 39 | derived_grids[i][idxs] = derived_list[:, i] 40 | return derived_grids 41 | 42 | def uniform_list(self, N): 43 | return self.from_primitive(self.primitive.uniform_list(N)) 44 | 45 | def safety(self, affine_dynamics): 46 | derived_dynamics = self.DerivedDynamics(self, affine_dynamics) 47 | return self.primitive.safety(derived_dynamics) 48 | 49 | class DerivedDynamics(AffineDynamics): 50 | def __init__(self, derived, affine_dynamics): 51 | self.derived = derived 52 | self.dynamics = affine_dynamics 53 | 54 | def image(self, x, t): 55 | return self.derived.to_primitive(self.dynamics.image(x, t)) 56 | 57 | def drift(self, x, t): 58 | return dot(self.derived.T, self.dynamics.drift(x, t)) 59 | 60 | def act(self, x, t): 61 | return dot(self.derived.T, self.dynamics.act(x, t)) 62 | -------------------------------------------------------------------------------- /core/geometry/two_ball.py: -------------------------------------------------------------------------------- 1 | from numpy import array, cos, linspace, pi, sin 2 | 3 | from .ball import Ball 4 | from .visual import Visual 5 | 6 | class TwoBall(Ball, Visual): 7 | def __init__(self): 8 | Ball.__init__(self, dim=2) 9 | 10 | def boundary(self, N): 11 | thetas = linspace(0, 2 * pi, N) 12 | return array([cos(thetas), sin(thetas)]).T 13 | -------------------------------------------------------------------------------- /core/geometry/visual.py: -------------------------------------------------------------------------------- 1 | class Visual: 2 | def boundary(self, N): 3 | raise NotImplementedError 4 | -------------------------------------------------------------------------------- /core/geometry/visual_derived.py: -------------------------------------------------------------------------------- 1 | from .derived import Derived 2 | from .visual import Visual 3 | 4 | class VisualDerived(Visual, Derived): 5 | def __init__(self, visual_primitive, T, c): 6 | Derived.__init__(self, visual_primitive, T, c) 7 | 8 | def boundary(self, N): 9 | return self.from_primitive(self.primitive.boundary(N)) 10 | -------------------------------------------------------------------------------- /core/learning/__init__.py: -------------------------------------------------------------------------------- 1 | from .gaussian_process_estimator import GaussianProcessEstimator 2 | from .residual_affine_model import ResidualAffineModel 3 | from .residual_model import ResidualModel 4 | from .value_estimator import ValueEstimator 5 | from .gaussian_process import GaussianProcess, RBFKernel, ScaledGaussianProcess, \ 6 | GPScaler, PeriodicKernel, AdditiveKernel, MultiplicativeKernel -------------------------------------------------------------------------------- /core/learning/gaussian_process_estimator.py: -------------------------------------------------------------------------------- 1 | from numpy import array, dot 2 | from numpy.linalg import solve 3 | 4 | from .. import arr_map 5 | 6 | class GaussianProcessEstimator: 7 | def __init__(self, kernel, data): 8 | self.kernel = kernel 9 | self.data = data 10 | self.kernel_mat = arr_map(self.embedding, data) 11 | self.weights = None 12 | 13 | def embedding(self, x): 14 | return array([self.kernel.eval(x_i, x) for x_i in self.data]) 15 | 16 | def fit(self, targets): 17 | self.weights = solve(self.kernel_mat, targets) 18 | return self 19 | 20 | def eval(self, x): 21 | return dot(self.weights, self.embedding(x)) 22 | -------------------------------------------------------------------------------- /core/learning/keras/__init__.py: -------------------------------------------------------------------------------- 1 | from .keras_residual_affine_model import KerasResidualAffineModel 2 | from .keras_residual_model import KerasResidualModel 3 | from .keras_residual_scalar_affine_model import KerasResidualScalarAffineModel -------------------------------------------------------------------------------- /core/learning/keras/keras_residual_affine_model.py: -------------------------------------------------------------------------------- 1 | from keras import Model, Sequential 2 | from keras.layers import Add, Dense, Dot, Input, Reshape 3 | from numpy import array 4 | from numpy.random import permutation 5 | 6 | from .. import ResidualAffineModel 7 | 8 | class KerasResidualAffineModel(ResidualAffineModel): 9 | def __init__(self, d_drift_in, d_act_in, d_hidden, m, d_out, optimizer='sgd', loss='mean_absolute_error'): 10 | drift_model = Sequential() 11 | drift_model.add(Dense(d_hidden, input_shape=(d_drift_in,), activation='relu')) 12 | drift_model.add(Dense(d_out)) 13 | self.drift_model = drift_model 14 | 15 | drift_inputs = Input((d_drift_in,)) 16 | drift_residuals = self.drift_model(drift_inputs) 17 | 18 | act_model = Sequential() 19 | act_model.add(Dense(d_hidden, input_shape=(d_act_in,), activation='relu')) 20 | act_model.add(Dense(d_out * m)) 21 | act_model.add(Reshape((d_out, m))) 22 | self.act_model = act_model 23 | 24 | act_inputs = Input((d_act_in,)) 25 | act_residuals = self.act_model(act_inputs) 26 | 27 | us = Input((m,)) 28 | 29 | residuals = Add()([drift_residuals, Dot([2, 1])([act_residuals, us])]) 30 | model = Model([drift_inputs, act_inputs, us], residuals) 31 | model.compile(optimizer, loss) 32 | self.model = model 33 | 34 | def eval_drift(self, drift_input): 35 | return self.drift_model.predict(array([drift_input]))[0] 36 | 37 | def eval_act(self, act_input): 38 | return self.act_model.predict(array([act_input]))[0] 39 | 40 | def shuffle(self, drift_inputs, act_inputs, us, residuals): 41 | perm = permutation(len(residuals)) 42 | return drift_inputs[perm], act_inputs[perm], us[perm], residuals[perm] 43 | 44 | def fit(self, drift_inputs, act_inputs, us, residuals, batch_size=1, num_epochs=1, validation_split=0): 45 | drift_inputs, act_inputs, us, residuals = self.shuffle(drift_inputs, act_inputs, us, residuals) 46 | self.model.fit([drift_inputs, act_inputs, us], residuals, batch_size=batch_size, epochs=num_epochs, validation_split=validation_split) 47 | -------------------------------------------------------------------------------- /core/learning/keras/keras_residual_model.py: -------------------------------------------------------------------------------- 1 | from keras import Sequential 2 | from keras.layers import Dense 3 | from numpy import array 4 | from numpy.random import permutation 5 | 6 | from .. import ResidualModel 7 | 8 | class KerasResidualModel(ResidualModel): 9 | def __init__(self, d_in, d_hidden, d_out, optimizer='sgd', loss='mean_absolute_error'): 10 | model = Sequential() 11 | model.add(Dense(d_hidden, input_shape=(d_in,), activation='relu')) 12 | model.add(Dense(d_out)) 13 | model.compile(optimizer, loss) 14 | self.model = model 15 | 16 | def eval_dot(self, input): 17 | return self.model.predict(array([input]))[0] 18 | 19 | def shuffle(self, inputs, residuals): 20 | perm = permutation(len(inputs)) 21 | return inputs[perm], residuals[perm] 22 | 23 | def fit(self, inputs, residuals, batch_size=1, num_epochs=1, validation_split=0): 24 | inputs, residuals = self.shuffle(inputs, residuals) 25 | self.model.fit(inputs, residuals, batch_size=batch_size, epochs=num_epochs, validation_split=validation_split) 26 | -------------------------------------------------------------------------------- /core/learning/keras/keras_residual_scalar_affine_model.py: -------------------------------------------------------------------------------- 1 | from .keras_residual_affine_model import KerasResidualAffineModel 2 | from numpy import array 3 | 4 | class KerasResidualScalarAffineModel(KerasResidualAffineModel): 5 | def __init__(self, d_drift_in, d_act_in, d_hidden, m, d_out, optimizer='sgd', loss='mean_absolute_error'): 6 | KerasResidualAffineModel.__init__(self, d_drift_in, d_act_in, d_hidden, m, d_out, optimizer, loss) 7 | 8 | def eval_drift(self, drift_input): 9 | return self.drift_model.predict(array([drift_input]))[0][0] 10 | 11 | def eval_act(self, act_input): 12 | return self.act_model.predict(array([act_input]))[0][0] -------------------------------------------------------------------------------- /core/learning/kernels/__init__.py: -------------------------------------------------------------------------------- 1 | from .kernel import Kernel 2 | from .laplacian_kernel import LaplacianKernel 3 | -------------------------------------------------------------------------------- /core/learning/kernels/kernel.py: -------------------------------------------------------------------------------- 1 | class Kernel: 2 | def eval(self, x, y): 3 | raise NotImplementedError 4 | -------------------------------------------------------------------------------- /core/learning/kernels/laplacian_kernel.py: -------------------------------------------------------------------------------- 1 | from numpy import exp 2 | from numpy.linalg import norm 3 | 4 | from .kernel import Kernel 5 | 6 | class LaplacianKernel(Kernel): 7 | def __init__(self, alpha=1): 8 | self.alpha = alpha 9 | 10 | def eval(self, x, y): 11 | return exp(-self.alpha * norm(x - y, 1)) 12 | -------------------------------------------------------------------------------- /core/learning/residual_affine_model.py: -------------------------------------------------------------------------------- 1 | class ResidualAffineModel: 2 | def eval_drift(self, drift_input): 3 | pass 4 | 5 | def eval_act(self, act_input): 6 | pass 7 | 8 | def fit(self, drift_inputs, act_inputs, us, residuals, batch_size=1, num_epochs=1, validation_split=0): 9 | pass 10 | -------------------------------------------------------------------------------- /core/learning/residual_model.py: -------------------------------------------------------------------------------- 1 | class ResidualModel: 2 | def eval_dot(self, input): 3 | pass 4 | 5 | def fit(self, inputs, residuals, batch_size=1, num_epochs=1, validation_split=0): 6 | pass 7 | -------------------------------------------------------------------------------- /core/learning/value_estimator.py: -------------------------------------------------------------------------------- 1 | from numpy.linalg import solve 2 | 3 | from .gaussian_process_estimator import GaussianProcessEstimator 4 | from .. import arr_map 5 | 6 | class ValueEstimator(GaussianProcessEstimator): 7 | def __init__(self, kernel, gamma, states, next_states, rewards): 8 | GaussianProcessEstimator.__init__(self, kernel, states) 9 | embedded_next_states = arr_map(self.embedding, next_states) 10 | self.weights = solve( self.kernel_mat - gamma * embedded_next_states, (1 - gamma) * rewards ) 11 | 12 | def transition_map(system, dt, atol=1e-6, rtol=1e-6): 13 | 14 | def f(s, a): 15 | return system.step(s, a, 0, dt, atol, rtol) 16 | 17 | return f 18 | 19 | def policy(controller): 20 | 21 | def pi(s): 22 | return controller.process(controller.eval(s, 0)) 23 | 24 | return pi 25 | 26 | def build(kernel, system, controller, R, gamma, states, dt, atol=1e-6, rtol=1e-6): 27 | f = ValueEstimator.transition_map(system, dt, atol, rtol) 28 | pi = ValueEstimator.policy(controller) 29 | return ValueEstimator._build(kernel, f, pi, R, gamma, states) 30 | 31 | def _build(kernel, f, pi, R, gamma, states): 32 | _, next_states, rewards = ValueEstimator.gen_data(f, pi, R, states) 33 | return ValueEstimator(kernel, gamma, states, next_states, rewards) 34 | 35 | def gen_data(f, pi, R, states): 36 | actions = arr_map(pi, states) 37 | next_states = arr_map(f, states, actions) 38 | rewards = arr_map(R, states, actions, next_states) 39 | return actions, next_states, rewards 40 | 41 | 42 | 43 | -------------------------------------------------------------------------------- /core/systems/__init__.py: -------------------------------------------------------------------------------- 1 | from .cart_pole import CartPole 2 | from .affine_gp_system import AffineGPSystem 3 | from .double_inverted_pendulum import DoubleInvertedPendulum 4 | from .inverted_pendulum import InvertedPendulum 5 | from .planar_quadrotor import PlanarQuadrotor 6 | from .polynomial_system import PolynomialSystem 7 | 8 | from .linear_system import LinearSystemDynamics 9 | from .segway_system import Segway 10 | from .segway_3d_system import Segway3D 11 | from .car_system import Car 12 | from .single_track_bicycle_system import SingleTrackBicycle -------------------------------------------------------------------------------- /core/systems/affine_gp_system.py: -------------------------------------------------------------------------------- 1 | import torch as th 2 | from numpy import eye, exp 3 | from numpy.linalg import pinv 4 | from scipy.linalg import logm 5 | 6 | from core.dynamics import SystemDynamics, Dynamics 7 | 8 | class AffineGPSystem(SystemDynamics, Dynamics): 9 | 10 | def __init__(self, gaussianProcess, n, m, delta_t, 11 | dyn_dims=None, 12 | ddim_to_dim=None, 13 | ddim_to_gp_idx=None, 14 | force_delta_mode=False): 15 | SystemDynamics.__init__(self, n, m) 16 | self.gp = gaussianProcess 17 | self.delta_t = delta_t 18 | if dyn_dims is None: 19 | self.dyn_dims = [d for d in range(n)] 20 | else: 21 | self.dyn_dims = dyn_dims 22 | self.force_delta_mode = force_delta_mode 23 | self.delta_mode_on = force_delta_mode or\ 24 | len(self.dyn_dims) != self.n or\ 25 | ddim_to_dim is not None 26 | if ddim_to_dim is None: 27 | self.ddim_to_dim = {} 28 | self.ddim_to_gp_idx={} 29 | if ddim_to_gp_idx is not None: 30 | raise Exception("[ERROR] ddim_to_gp only valid with " 31 | "ddim_to_gp_idx") 32 | else: 33 | self.ddim_to_dim = ddim_to_dim 34 | self.ddim_to_gp_idx = ddim_to_gp_idx 35 | assert set(self.ddim_to_dim.keys()) == set(self.ddim_to_gp_idx.keys()) 36 | 37 | def forward(self, x, u, t): 38 | A, B, C = self._make_AB(x, u) 39 | return A @ x + B @ u + C 40 | 41 | def jacobian_exp(self, xts, uts): 42 | As, Bs, Cs, cov, _ = self._next_step_info(xts, uts) 43 | Cs = Cs.squeeze() 44 | return As, Bs, Cs, cov 45 | 46 | def addFrameDims(self, Js): 47 | Acols = list() 48 | for i in range(self.n): 49 | if i in self.dyn_dims: 50 | idx = self.dyn_dims.index(i) 51 | Acols.append(Js[:, :, idx]) 52 | else: 53 | Acols.append(th.zeros(Js.shape[0], self.n)) 54 | As = th.stack(Acols, dim=2) 55 | return As 56 | 57 | def step(self, x_0, u_0, t_0, t_f, atol=1e-6, rtol=1e-6): 58 | delta_t = t_f - t_0 59 | if abs(self.delta_t - delta_t) < atol: 60 | xtp1 = self.vec_step(x_0[None, :], u_0[None, :]) 61 | return xtp1[0] 62 | else: 63 | return super().step(x_0, u_0, t_0, t_f, atol, rtol) 64 | 65 | def vec_step(self, xts, uts): 66 | delta_xtp1, _ = self.gp(th.cat([xts[:, self.dyn_dims], uts], dim=1)) 67 | 68 | if self.delta_mode_on: 69 | if not self.ddim_to_dim: 70 | xtp1 = xts + delta_xtp1 71 | else: 72 | xtp1 = xts.clone() 73 | for k, v in self.ddim_to_dim.items(): 74 | xtp1[:, [v, k]] += th.stack([ 75 | self.delta_t * xts[:, k], 76 | delta_xtp1[:, self.ddim_to_gp_idx[k]] 77 | ], dim=1) 78 | else: 79 | xtp1 = delta_xtp1 80 | 81 | return xtp1 82 | 83 | def jacobian(self, x, u, t): 84 | A, B, _ = self._make_AB(x, u) 85 | return A, B 86 | 87 | def _make_AB(self, x, u): 88 | assert x.dim() == 1 and u.dim() == 1, "[ERROR] Vectorization not Supported" 89 | 90 | x = x.unsqueeze(0) 91 | u = u.unsqueeze(0) 92 | eAdt, u_factor, nl_residual, _, xtp1 = self._next_step_info(x, u) 93 | 94 | eAdt = eAdt[0] 95 | u_factor = u_factor[0].detach().numpy() 96 | nl_residual = nl_residual.squeeze() 97 | 98 | eAdt_np = eAdt.detach().numpy() # batch size should be 1 here 99 | A = logm(eAdt_np).real * (1 / self.delta_t) 100 | u_c_factor = A @ pinv(eAdt_np - eye(self.n)) 101 | B = u_c_factor @ u_factor 102 | A = th.from_numpy(A) 103 | B = th.from_numpy(B) 104 | C = th.from_numpy(u_c_factor) @ nl_residual 105 | 106 | return A, B, C 107 | 108 | def _embed_kin_dim(self, J_dyn, delta_xtp1, eAdt, x): 109 | assert self.delta_mode_on 110 | n_samples = J_dyn.shape[0] 111 | if eAdt.ndim < 2: 112 | eAdt = eAdt.unsqueeze(0) 113 | full_eAdt = th.zeros(n_samples, self.n, self.n) 114 | u_factor = th.zeros(n_samples, self.n, self.m) 115 | xtp1 = x.clone() 116 | for k, v in self.ddim_to_dim.items(): 117 | full_eAdt[:, k] = eAdt[:, self.ddim_to_gp_idx[k]] 118 | full_eAdt[:, v, k] = self.delta_t 119 | 120 | u_factor[:, k] = J_dyn[:, self.ddim_to_gp_idx[k], -self.m:] 121 | u_factor[:, v] = 0.0 122 | xtp1[:, [v, k]] += th.stack([ 123 | self.delta_t * x[:, k], 124 | delta_xtp1[:, self.ddim_to_gp_idx[k]] 125 | ], dim=1) 126 | return full_eAdt, u_factor, xtp1 127 | 128 | def _next_step_info(self, xts, uts): 129 | delta_xtp1s, cov = self.gp(th.cat([xts[:, self.dyn_dims], uts], dim=1)) 130 | Js, _ = self.gp.ddx(th.cat([xts[:, self.dyn_dims], uts], dim=1)) 131 | if self.delta_mode_on: 132 | As = self.addFrameDims(Js) 133 | if not self.ddim_to_dim: 134 | xtp1s = xts + delta_xtp1s 135 | Bs = Js[:, :, -self.m:] 136 | else: 137 | As, Bs, xtp1s = self._embed_kin_dim(Js, delta_xtp1s, As, xts) 138 | As = th.eye(self.n) + As 139 | else: 140 | As = Js[:, :, :-self.m] 141 | xtp1s = delta_xtp1s 142 | Bs = Js[:, :, -self.m:] 143 | Cs = xtp1s.unsqueeze(2) - As @ xts.unsqueeze(2) - Bs @ uts.unsqueeze(2) 144 | return As, Bs, Cs, cov, delta_xtp1s -------------------------------------------------------------------------------- /core/systems/car_system.py: -------------------------------------------------------------------------------- 1 | from torch import cat, cos, diag, float64, norm, sin, stack, tensor, zeros, set_default_dtype 2 | from torch import atan2 3 | from torch.nn import Module, Parameter 4 | from core.dynamics import SystemDynamics, AffineDynamics 5 | 6 | set_default_dtype(float64) 7 | 8 | 9 | # Car Dynamics Copied from here: 10 | # https://github.com/urosolia/RacingLMPC/blob/master/src/fnc/SysModel.py#L130 11 | 12 | class Car(SystemDynamics, AffineDynamics, Module): 13 | 14 | def __init__(self): 15 | super(SystemDynamics, self).__init__(6, 2) 16 | super(Module, self).__init__() 17 | self.m = Parameter(tensor(1.98)) 18 | self.lf = Parameter(tensor(0.125)) 19 | self.lr = Parameter(tensor(0.125)) 20 | self.Iz = Parameter(tensor(0.024)) 21 | self.Cf = Parameter(tensor(1.25)) 22 | self.Bf = Parameter(tensor(1.0)) 23 | self.Cr = Parameter(tensor(1.25)) 24 | self.Br = Parameter(tensor(1.0)) 25 | 26 | self.Df = 0.8 * self.m * 9.81 / 2.0 27 | self.Dr = 0.8 * self.m * 9.81 / 2.0 28 | 29 | def forward(self, x, u, t): 30 | delta = u[:,0] 31 | a = u[:, 1] 32 | X = Y = psi = tensor(0.0) #assume these are for global coordinate frame 33 | vx = x[:,0] 34 | vy = x[:,1] 35 | wz = x[:,2] 36 | epsi = x[:,3] 37 | s = x[:,4] 38 | ey = x[:,5] 39 | 40 | # Compute tire split angle 41 | alpha_f = self.delta - atan2(vy + self.lf * wz, vx) 42 | alpha_r = - atan2(vy - self.lf * wz, vx) 43 | 44 | # Compute lateral force at front and rear tire 45 | Fyf = self.Df * sin(self.Cf * atan2(self.Bf * alpha_f)) 46 | Fyr = self.Dr * sin(self.Cr * atan2(self.Br * alpha_r)) 47 | return stack([ 48 | vx + self.deltaT * (a - 1 / self.m * Fyf * sin(delta) + wz * vy), 49 | vy + self.deltaT * (1 / self.m * (Fyf * cos(delta) + Fyr) - wz * vx), 50 | wz + self.deltaT * (1 / self.Iz * (self.lf * Fyf * cos(delta) - self.lr * Fyr)), 51 | psi + self.deltaT * (wz), 52 | X + self.deltaT * (vx * cos(psi) - vy * sin(psi)), 53 | Y + self.deltaT * (vx * sin(psi) + vy * cos(psi)), 54 | ], dim=1) -------------------------------------------------------------------------------- /core/systems/cart_pole.py: -------------------------------------------------------------------------------- 1 | from torch import cat, cos, float64, sin, stack, tensor 2 | from torch.nn import Module, Parameter 3 | from core.dynamics import RoboticDynamics 4 | 5 | 6 | 7 | class CartPole(RoboticDynamics, Module): 8 | def __init__(self, m_c, m_p, l, g=9.81): 9 | RoboticDynamics.__init__(self, 2, 1) 10 | Module.__init__(self) 11 | self.params = Parameter(tensor([m_c, m_p, l, g], dtype=float64)) 12 | 13 | def D(self, q): 14 | m_c, m_p, l, _ = self.params 15 | _, theta = q 16 | return stack( 17 | (stack([m_c + m_p, m_p * l * cos(theta)]), 18 | stack([m_p * l * cos(theta), m_p * (l ** 2)]))) 19 | 20 | def C(self, q, q_dot): 21 | _, m_p, l, _ = self.params 22 | z = tensor(0, dtype=float64) 23 | _, theta = q 24 | _, theta_dot = q_dot 25 | return stack((stack([z, -m_p * l * theta_dot * sin(theta)]), 26 | stack([z, z]))) 27 | 28 | def U(self, q): 29 | _, m_p, l, g = self.params 30 | _, theta = q 31 | return m_p * g * l * cos(theta) 32 | 33 | def G(self, q): 34 | _, m_p, l, g = self.params 35 | _, theta = q 36 | z = tensor(0, dtype=float64) 37 | return stack([z, -m_p * g * l * sin(theta)]) 38 | 39 | def B(self, q): 40 | return tensor([[1], [0]], dtype=float64) 41 | -------------------------------------------------------------------------------- /core/systems/double_inverted_pendulum.py: -------------------------------------------------------------------------------- 1 | from matplotlib.pyplot import figure 2 | from numpy import array 3 | import numpy as np 4 | from torch import cos, eye, float64, sin, stack, tensor 5 | from torch.nn import Module, Parameter 6 | from core.dynamics import FullyActuatedRoboticDynamics 7 | from core.util import default_fig 8 | 9 | class DoubleInvertedPendulum(FullyActuatedRoboticDynamics, Module): 10 | def __init__(self, m_1, m_2, l_1, l_2, g=9.81): 11 | FullyActuatedRoboticDynamics.__init__(self, 2, 2) 12 | Module.__init__(self) 13 | self.params = Parameter(tensor([m_1, m_2, l_1, l_2, g], dtype=float64)) 14 | 15 | def D(self, q): 16 | m_1, m_2, l_1, l_2, _ = self.params 17 | _, theta_2 = q 18 | D_11 = (m_1 + m_2) * (l_1 ** 2) + 2 * m_2 * l_1 * l_2 * cos(theta_2) + m_2 * (l_2 ** 2) 19 | D_12 = m_2 * l_1 * l_2 * cos(theta_2) + m_2 * (l_2 ** 2) 20 | D_21 = D_12 21 | D_22 = m_2 * (l_2 ** 2) 22 | return stack((stack([D_11, D_12]), 23 | stack([D_21, D_22]))) 24 | 25 | def C(self, q, q_dot): 26 | _, m_2, l_1, l_2, _ = self.params 27 | _, theta_2 = q 28 | theta_1_dot, theta_2_dot = q_dot 29 | C_11 = tensor(0, dtype=float64) 30 | C_12 = -m_2 * l_1 * l_2 * (2 * theta_1_dot + theta_2_dot) * sin(theta_2) 31 | C_21 = -C_12 / 2 32 | C_22 = -m_2 * l_1 * l_2 * theta_1_dot * sin(theta_2) / 2 33 | return stack((stack([C_11, C_12]), 34 | stack([C_21, C_22]))) 35 | 36 | def B(self, q): 37 | return eye(2, dtype=float64) 38 | 39 | def U(self, q): 40 | m_1, m_2, l_1, l_2, g = self.params 41 | theta_1, theta_2 = q 42 | return (m_1 + m_2) * g * l_1 * cos(theta_1) + m_2 * g * l_2 * cos(theta_1 + theta_2) 43 | 44 | def G(self, q): 45 | m_1, m_2, l_1, l_2, g = self.params 46 | theta_1, theta_2 = q 47 | G_1 = -(m_1 + m_2) * g * l_1 * sin(theta_1) - m_2 * g * l_2 * sin(theta_1 + theta_2) 48 | G_2 = -m_2 * g * l_2 * sin(theta_1 + theta_2) 49 | return stack([G_1, G_2]) 50 | 51 | def plot_coordinates(self, ts, qs, fig=None, ax=None, labels=None): 52 | fig, ax = default_fig(fig, ax) 53 | 54 | ax.set_title('Coordinates', fontsize=16) 55 | ax.set_xlabel('$\\theta_1$ (rad)', fontsize=16) 56 | ax.set_ylabel('$\\theta_2$ (rad)', fontsize=16) 57 | ax.plot(*qs.T, linewidth=3) 58 | 59 | return fig, ax 60 | 61 | def plot_states(self, ts, xs, fig=None, ax=None, labels=None): 62 | fig, ax = default_fig(fig, ax) 63 | 64 | ax.set_title('States', fontsize=16) 65 | ax.set_xlabel('$\\theta$ (rad)', fontsize=16) 66 | ax.set_ylabel('$\\dot{\\theta}$ (rad / sec)', fontsize=16) 67 | ax.plot(xs[:, 0], xs[:, 2], linewidth=3, label='$\\theta_1$') 68 | ax.plot(xs[:, 1], xs[:, 3], linewidth=3, label='$\\theta_2$') 69 | ax.legend(fontsize=16) 70 | 71 | return fig, ax 72 | 73 | def plot_actions(self, ts, us, fig=None, ax=None, labels=None): 74 | fig, ax = default_fig(fig, ax) 75 | 76 | if labels is None: 77 | labels = ['$\\tau_1$ (N $\\cdot m$)', '$\\tau_2$ (N $\\cdot$ m)'] 78 | 79 | ax.set_title('Actions', fontsize=16) 80 | ax.set_xlabel(labels[0], fontsize=16) 81 | ax.set_ylabel(labels[1], fontsize=16) 82 | ax.plot(*us.T, linewidth=3) 83 | 84 | return fig, ax 85 | 86 | def plot_tangents(self, ts, xs, fig=None, ax=None, skip=1): 87 | fig, ax = default_fig(fig, ax) 88 | 89 | ax.set_title('Tangent Vectors', fontsize=16) 90 | ax.set_xlabel('$\\theta_1$ (rad)', fontsize=16) 91 | ax.set_ylabel('$\\theta_2$ (rad)', fontsize=16) 92 | ax.plot(*xs[:, :2].T, linewidth=3) 93 | ax.quiver(*xs[::skip, :2].T, *xs[::skip, 2:].T, angles='xy') 94 | 95 | return fig, ax 96 | 97 | def plot_physical(self, ts, xs, fig=None, ax=None, skip=1): 98 | fig, ax = default_fig(fig, ax) 99 | 100 | _, _, l_1, l_2, _ = self.params.detach().numpy() 101 | theta_1s, theta_2s = xs[:, :2].T 102 | r_1s = l_1 * array([np.sin(theta_1s), 103 | np.cos(theta_1s)]) 104 | r_2s = r_1s + l_2 * array([np.sin(theta_1s + theta_2s), 105 | np.cos(theta_1s +theta_2s)]) 106 | zs = 0 * theta_1s[::skip] 107 | 108 | ax.set_title('Physical space', fontsize=16) 109 | ax.set_xlabel('$x$ (m)', fontsize=16) 110 | ax.set_ylabel('$z$ (m)', fontsize=16) 111 | ax.plot([zs, r_1s[0, ::skip]], [zs, r_1s[1, ::skip]], 'k') 112 | ax.plot([r_1s[0, ::skip], r_2s[0, ::skip]], [r_1s[1, ::skip], r_2s[1, ::skip]], 'k') 113 | ax.plot(*r_1s, linewidth=3) 114 | ax.plot(*r_2s, linewidth=3) 115 | ax.axis('equal') 116 | 117 | return fig, ax 118 | 119 | def plot(self, xs, us, ts, fig=None, action_labels=None, skip=1): 120 | if fig is None: 121 | fig = figure(figsize=(12, 6), tight_layout=True) 122 | 123 | physical_ax = fig.add_subplot(1, 2, 1) 124 | fig, physical_ax = self.plot_physical(ts, xs, fig, physical_ax, skip) 125 | 126 | action_ax = fig.add_subplot(1, 2, 2) 127 | fig, action_ax = self.plot_actions(ts, us, fig, action_ax, action_labels) 128 | 129 | return fig, (physical_ax, action_ax) 130 | -------------------------------------------------------------------------------- /core/systems/inverted_pendulum.py: -------------------------------------------------------------------------------- 1 | from matplotlib.pyplot import figure 2 | from numpy import array 3 | import numpy as np 4 | from torch import cos, float64, sin, stack, tensor 5 | from torch.nn import Module, Parameter 6 | from core.dynamics import FullyActuatedRoboticDynamics 7 | from core.util import default_fig 8 | 9 | 10 | class InvertedPendulum(FullyActuatedRoboticDynamics): 11 | def __init__(self, mass, l, g=9.81): 12 | FullyActuatedRoboticDynamics.__init__(self, 1, 1) 13 | self.params = Parameter(tensor([mass, l, g], dtype=float64)) 14 | 15 | def D(self, q): 16 | return (self.mass * (self.l ** 2))[None, None, None].expand(q.shape[0], -1, -1) 17 | 18 | def C(self, q, q_dot): 19 | return tensor([[[0.0]]], dtype=float64).expand(q.shape[0], -1, -1) 20 | 21 | def U(self, q): 22 | theta = q[:, 0, None] 23 | return self.mass * self.g * self.l * cos(theta) 24 | 25 | def G(self, q): 26 | theta = q[:, 0, None] 27 | return -self.mass * self.g * self.l * sin(theta) 28 | 29 | def B(self, q): 30 | return tensor([[[1]]], dtype=float64).expand(q.shape[0], -1, -1) 31 | 32 | @property 33 | def l(self): 34 | return self.params[1] 35 | 36 | @property 37 | def mass(self): 38 | return self.params[0] 39 | 40 | @property 41 | def g(self): 42 | return self.params[2] 43 | 44 | def plot_states(self, ts, xs, fig=None, ax=None, labels=None, 45 | color="black"): 46 | fig, ax = default_fig(fig, ax) 47 | # angle = xs[:, 0] 48 | # angle = np.arctan2(np.sin(angle), np.cos(angle)) 49 | # xs = np.stack([angle, xs[:, 1]], axis=1) 50 | ax.set_title('States', fontsize=16) 51 | ax.set_xlabel('$\\theta$ (rad)', fontsize=16) 52 | ax.set_ylabel('$\\dot{\\theta}$ (rad / sec)', fontsize=16) 53 | ax.set_xlim(-2 * np.pi, 2 * np.pi) 54 | ax.set_ylim(-10, 10) 55 | ax.scatter(*xs.T, linewidth=1, color=color) 56 | 57 | return fig, ax 58 | 59 | def plot_physical(self, ts, xs, fig=None, ax=None, skip=1): 60 | fig, ax = default_fig(fig, ax) 61 | 62 | _, l, g = self.params 63 | l = l.item() 64 | g = g.item() 65 | thetas = xs[:, 0] 66 | rs = l * array([np.sin(thetas), np.cos(thetas)]) 67 | zs = 0 * thetas[::skip] 68 | 69 | ax.set_title('Physical space', fontsize=16) 70 | ax.set_xlabel('$x$ (m)', fontsize=16) 71 | ax.set_ylabel('$z$ (m)', fontsize=16) 72 | ax.plot([zs, rs[0, ::skip]], [zs, rs[1, ::skip]], 'k') 73 | ax.plot(*rs, linewidth=3) 74 | ax.set_xlim(-l - 0.2, l + 0.2) 75 | ax.set_ylim(-l - 0.2, l + 0.2) 76 | ax.axis('equal') 77 | 78 | return fig, ax 79 | 80 | def plot(self, xs, us, ts, fig=None, action_labels=None, skip=1): 81 | if fig is None: 82 | fig = figure(figsize=(12, 6), tight_layout=True) 83 | 84 | physical_ax = fig.add_subplot(1, 3, 1) 85 | fig, physical_ax = self.plot_physical(ts, xs, fig, physical_ax, skip) 86 | 87 | state_ax = fig.add_subplot(1, 3, 2) 88 | 89 | fig, state_ax = self.plot_states(ts, xs, fig, state_ax) 90 | 91 | action_ax = fig.add_subplot(1, 3, 3) 92 | fig, action_ax = self.plot_actions(ts, us, fig, action_ax, 93 | action_labels) 94 | 95 | return fig, (physical_ax, action_ax) 96 | -------------------------------------------------------------------------------- /core/systems/linear_system.py: -------------------------------------------------------------------------------- 1 | from numpy import dot 2 | from torch import tensor, float64, from_numpy 3 | from torch.nn import Module, Parameter 4 | 5 | from core.dynamics import AffineDynamics 6 | from core.dynamics import LinearizableDynamics 7 | from core.dynamics import SystemDynamics 8 | 9 | class LinearSystemDynamics(SystemDynamics, AffineDynamics, 10 | LinearizableDynamics, Module): 11 | """Class for linear dynamics of the form x_dot = A * x + B * u.""" 12 | 13 | def __init__(self, A, B): 14 | """Create a LinearSystemDynamics object. 15 | 16 | Inputs: 17 | State matrix, A: numpy array 18 | Input matrix, B: numpy array 19 | """ 20 | 21 | n, m = B.shape 22 | assert A.shape == (n, n) 23 | 24 | SystemDynamics.__init__(self, n, m) 25 | Module.__init__(self) 26 | self.A = Parameter(from_numpy(A).to(float64, copy=True)) 27 | self.B = Parameter(from_numpy(B).to(float64, copy=True)) 28 | 29 | def drift(self, x, t): 30 | 31 | return self.A@x 32 | 33 | def act(self, x, t): 34 | return self.B 35 | 36 | def jacobian_impl(self, x, u, t): 37 | return self.A, self.B 38 | 39 | def linear_system(self): 40 | return self.A, self.B -------------------------------------------------------------------------------- /core/systems/planar_quadrotor.py: -------------------------------------------------------------------------------- 1 | from matplotlib.pyplot import figure 2 | from mpl_toolkits.mplot3d import Axes3D 3 | from numpy import array 4 | from torch import arange, atan, diag, float64, cat, cos, sin, \ 5 | stack, tensor, zeros 6 | from torch.nn import Module, Parameter 7 | 8 | from core.dynamics import FBLinDynamics, RoboticDynamics, SystemDynamics 9 | from core.util import default_fig 10 | 11 | 12 | class PlanarQuadrotor(RoboticDynamics, Module): 13 | def __init__(self, m, J, g=9.81): 14 | RoboticDynamics.__init__(self, 3, 2) 15 | Module.__init__(self) 16 | self.params = Parameter(tensor([m, J, g])) 17 | 18 | def D(self, q): 19 | m, J, _ = self.params 20 | return diag(stack([m, m, J])) 21 | 22 | def C(self, q, q_dot): 23 | return tensor((3, 3), dtype=float64) 24 | 25 | def U(self, q): 26 | m, _, g = self.params 27 | _, z, _ = q 28 | return m * g * z 29 | 30 | def G(self, q): 31 | m, _, g = self.params 32 | _zero = tensor(0, dtype=float64) 33 | return stack([_zero, m * g, _zero]) 34 | 35 | def B(self, q): 36 | _, _, theta = q 37 | _zero = tensor(0, dtype=float64) 38 | _one = tensor(1, dtype=float64) 39 | return stack(( 40 | stack([sin(theta), _zero]), 41 | stack([cos(theta), _zero]), 42 | stack([_zero, _one]) 43 | )) 44 | 45 | class Extension(SystemDynamics): 46 | def __init__(self, planar_quadrotor): 47 | SystemDynamics.__init__(self, n=8, m=2) 48 | self.quad = planar_quadrotor 49 | 50 | def step(self, x_0, u_0, t_0, t_f, atol=1e-6, rtol=1e-6): 51 | x = x_0[:6] 52 | f, f_dot = x_0[-2:] 53 | f_ddot, tau = u_0 54 | u = tensor([f, tau]) 55 | 56 | dt = t_f - t_0 57 | f += f_dot * dt 58 | f_dot += f_ddot * dt 59 | x = self.quad.step(x, u, t_0, t_f, atol, rtol) 60 | 61 | return cat([x, cat([f, f_dot])]) 62 | 63 | class Output(FBLinDynamics): 64 | def __init__(self, extension): 65 | relative_degrees = [4, 4] 66 | perm = cat([2 * arange(4), 2 * arange(4) + 1]) 67 | FBLinDynamics.__init__(self, relative_degrees, perm) 68 | self.params = extension.quad.params 69 | 70 | def r_ddot(self, f, theta): 71 | m, _, g = self.params 72 | x_ddot = f * sin(theta) / m 73 | z_ddot = f * cos(theta) / m - g 74 | return cat((x_ddot, z_ddot)) 75 | 76 | def r_dddot(self, f, f_dot, theta, theta_dot): 77 | m, _, _ = self.params 78 | x_dddot = (f_dot * sin(theta) + f * theta_dot * cos(theta)) / m 79 | z_dddot = (f_dot * cos(theta) - f * theta_dot * sin(theta)) / m 80 | return cat([x_dddot, z_dddot]) 81 | 82 | def r_ddddot_drift(self, f, f_dot, theta, theta_dot): 83 | m, _, _ = self.params 84 | x_ddddot_drift = (2 * f_dot * theta_dot * cos(theta) - f * ( 85 | theta_dot ** 2) * sin(theta)) / m 86 | z_ddddot_drift = -(2 * f_dot * theta_dot * sin(theta) + f * ( 87 | theta_dot ** 2) * cos(theta)) / m 88 | return cat([x_ddddot_drift, z_ddddot_drift]) 89 | 90 | def r_ddddot_act(self, f, theta): 91 | m, J, _ = self.params 92 | x_ddddot_act = tensor([sin(theta), f * cos(theta) / J]) / m 93 | z_ddddot_act = tensor([cos(theta), -f * sin(theta) / J]) / m 94 | return cat([x_ddddot_act, z_ddddot_act]) 95 | 96 | def image(self, x, t): 97 | q, q_dot = x[:6].view((2, 3)) 98 | f, f_dot = x[-2:] 99 | r, theta = q[:2], q[-1] 100 | r_dot, theta_dot = q_dot[:2], q_dot[-1] 101 | r_ddot = self.r_ddot(f, theta) 102 | r_dddot = self.r_dddot(f, f_dot, theta, theta_dot) 103 | return cat([r, r_dot, r_ddot, r_dddot]) 104 | 105 | def drift(self, x, t): 106 | eta = self.image(x, t) 107 | theta, theta_dot, f, f_dot = x[tensor([2, 5, -2, -1])] 108 | r_ddddot_drift = self.r_ddddot_drift(f, f_dot, theta, theta_dot) 109 | return cat([eta[2:], r_ddddot_drift]) 110 | 111 | def act(self, x, t): 112 | theta, f = x[2], x[-2] 113 | r_ddddot_act = self.r_ddddot_act(f, theta) 114 | return cat([zeros((6, 2)), r_ddddot_act]) 115 | 116 | def to_state(self, eta): 117 | m, _, g = self.params 118 | 119 | r, r_dot = eta[:4].view((2, 2)) 120 | x_ddot, z_ddot, x_dddot, z_dddot = eta[-4:] 121 | theta = atan(x_ddot / (z_ddot + g)) 122 | theta_dot = ((z_ddot + g) * x_dddot - x_ddot * z_dddot) / ( 123 | (z_ddot + g) ** 2) * (cos(theta) ** 2) 124 | q = cat([r, theta]) 125 | q_dot = cat([r_dot, theta_dot]) 126 | 127 | f = m * (z_ddot + g) / cos(theta) 128 | f_dot = m * (z_dddot + x_ddot * theta_dot) / cos(theta) 129 | 130 | return cat([q, q_dot, cat([f, f_dot])]) 131 | 132 | def plot_coordinates(self, ts, qs, fig=None, ax=None, labels=None): 133 | if fig is None: 134 | fig = figure(figsize=(6, 6), tight_layout=True) 135 | 136 | if ax is None: 137 | ax = fig.add_subplot(1, 1, 1, projection='3d') 138 | 139 | xs, zs, thetas = qs.T 140 | 141 | ax.set_title('Coordinates', fontsize=16) 142 | ax.set_xlabel('$x$ (m)', fontsize=16) 143 | ax.set_ylabel('$\\theta$ (rad)', fontsize=16) 144 | ax.set_zlabel('$z$ (m)', fontsize=16) 145 | ax.plot(xs, thetas, zs, linewidth=3) 146 | 147 | return fig, ax 148 | 149 | def plot_states(self, ts, xs, fig=None, ax=None, labels=None): 150 | fig, ax = default_fig(fig, ax) 151 | 152 | ax.set_title('States', fontsize=16) 153 | ax.set_xlabel('$q$', fontsize=16) 154 | ax.set_ylabel('$\\dot{q}$', fontsize=16) 155 | ax.plot(xs[:, 0], xs[:, 3], linewidth=3, label='$x$ (m)') 156 | ax.plot(xs[:, 1], xs[:, 4], linewidth=3, label='$z$ (m)') 157 | ax.plot(xs[:, 2], xs[:, 5], linewidth=3, label='$\\theta$ (rad)') 158 | ax.legend(fontsize=16) 159 | 160 | return fig, ax 161 | 162 | def plot_actions(self, ts, us, fig=None, ax=None, labels=None): 163 | fig, ax = default_fig(fig, ax) 164 | 165 | if labels is None: 166 | labels = ['$f$ (N)', '$\\tau$ (N $\\cdot$ m)'] 167 | 168 | ax.set_title('Actions', fontsize=16) 169 | ax.set_xlabel(labels[0], fontsize=16) 170 | ax.set_ylabel(labels[1], fontsize=16) 171 | ax.plot(*us.T, linewidth=3) 172 | 173 | return fig, ax 174 | 175 | def plot_tangents(self, ts, xs, fig=None, ax=None, skip=1): 176 | fig, ax = default_fig(fig, ax) 177 | 178 | ax.set_title('Tangent Vectors', fontsize=16) 179 | ax.set_xlabel('$x$ (m)', fontsize=16) 180 | ax.set_ylabel('$z$ (m)', fontsize=16) 181 | ax.plot(*xs[:, :2].T, linewidth=3) 182 | ax.quiver(*xs[::skip, :2].T, *xs[::skip, 3:5].T, angles='xy') 183 | 184 | return fig, ax 185 | 186 | def plot_physical(self, ts, xs, fig=None, ax=None, skip=1): 187 | fig, ax = default_fig(fig, ax) 188 | 189 | xs, zs, thetas = xs[:, :3].T 190 | dirs = array([sin(thetas), cos(thetas)])[:, ::skip] 191 | 192 | ax.set_title('Physical Space', fontsize=16) 193 | ax.set_xlabel('$x$ (m)', fontsize=16) 194 | ax.set_ylabel('$z$ (m)', fontsize=16) 195 | ax.quiver(xs[::skip], zs[::skip], *dirs, angles='xy') 196 | ax.plot(xs, zs, linewidth=3) 197 | ax.axis('equal') 198 | 199 | return fig, ax 200 | 201 | def plot(self, xs, us, ts, fig=None, action_labels=None, skip=1): 202 | if fig is None: 203 | fig = figure(figsize=(12, 6), tight_layout=True) 204 | 205 | physical_ax = fig.add_subplot(1, 2, 1) 206 | fig, physical_ax = self.plot_physical(ts, xs, fig, physical_ax, skip) 207 | 208 | action_ax = fig.add_subplot(1, 2, 2) 209 | fig, action_ax = self.plot_actions(ts, us, fig, action_ax, action_labels) 210 | 211 | return fig, (physical_ax, action_ax) 212 | -------------------------------------------------------------------------------- /core/systems/polynomial_system.py: -------------------------------------------------------------------------------- 1 | from numpy import array 2 | 3 | from core.dynamics import FBLinDynamics, SystemDynamics 4 | 5 | class PolynomialSystem(SystemDynamics, FBLinDynamics): 6 | def __init__(self, root=1, drift_gain=1, act_gain=1): 7 | SystemDynamics.__init__(self, n=1, m=1) 8 | FBLinDynamics.__init__(self, relative_degrees=[1]) 9 | self.root = root 10 | self.drift_gain = drift_gain 11 | self.act_gain = act_gain 12 | 13 | def drift(self, x, t): 14 | return -self.drift_gain * x * (x - self.root) * (x + self.root) 15 | 16 | def act(self, x, t): 17 | return array([[self.act_gain]]) 18 | -------------------------------------------------------------------------------- /core/systems/segway_3d_system.py: -------------------------------------------------------------------------------- 1 | from torch import cat, cos, diag, float64, norm, \ 2 | sin, stack, tensor, zeros, set_default_dtype, pow 3 | from torch import tanh 4 | from torch import atan2 5 | from torch.nn import Module, Parameter 6 | import torch as th 7 | 8 | from core.dynamics import SystemDynamics, AffineDynamics 9 | from core.util import default_fig 10 | from numpy import pi 11 | set_default_dtype(float64) 12 | 13 | model = th.tensor([44.798, # mb 14 | 2.485, # mw 15 | 0.055936595310797, # Jw 16 | -0.02322718759275, # a2 17 | 0.166845864363019, # c2 18 | 3.604960049044268, # A2 19 | 3.836289730154863, # B2 20 | 1.069672194414735, # C2 21 | 1.261650363363571, # K 22 | 0.195, # r 23 | 0.5, # L 24 | 9.81, # gGravity 25 | 0., # FricCoeff 3.185188257847262 26 | 1.0e-3, # velEps 27 | 1.225479467549329 # FricCoeff 1.225479467549329 28 | ], requires_grad=True) 29 | 30 | class Segway3D(SystemDynamics, AffineDynamics, Module): 31 | def __init__(self): 32 | SystemDynamics.__init__(self, 7, 2) 33 | Module.__init__(self) 34 | 35 | # void dynamics(const double t, 36 | # const double X[STATE_LENGTH], 37 | # const double U[INPUT_LENGTH], 38 | # double xDot[STATE_LENGTH]) 39 | # { 40 | # double g[STATE_LENGTH*INPUT_LENGTH]; 41 | # double Fric; 42 | # double a_tmp; 43 | # double b_a_tmp; 44 | # double f_tmp; 45 | # double b_f_tmp; 46 | # double c_f_tmp; 47 | # double d_f_tmp; 48 | # double e_f_tmp; 49 | # double f_f_tmp; 50 | # double g_f_tmp; 51 | # double h_f_tmp; 52 | # double i_f_tmp; 53 | # double j_f_tmp; 54 | # double k_f_tmp; 55 | # double l_f_tmp; 56 | # double m_f_tmp; 57 | # double n_f_tmp; 58 | # double o_f_tmp; 59 | # double p_f_tmp; 60 | # double q_f_tmp; 61 | # double r_f_tmp; 62 | # double s_f_tmp; 63 | # double t_f_tmp; 64 | # double u_f_tmp; 65 | # double v_f_tmp; 66 | # double f_tmp_tmp; 67 | # double b_f_tmp_tmp; 68 | # double w_f_tmp; 69 | # double x_f_tmp; 70 | # 71 | # /* */ 72 | def forward(self, X, U, t): 73 | Fric = X[3] - X[6] * model[9]; 74 | Fric = model[12] * tanh(Fric / model[13]) + model[14] * Fric; 75 | a_tmp = cos(X[5]); 76 | b_a_tmp = sin(X[5]); 77 | xDot_0 = X[3] * cos(X[2]); 78 | xDot_1 = X[3] * sin(X[2]); 79 | xDot_2 = X[4]; 80 | f_tmp = model[3] * model[3]; 81 | b_f_tmp = model[9] * model[9]; 82 | c_f_tmp = model[4] * model[4]; 83 | d_f_tmp = model[0] * model[0]; 84 | e_f_tmp = 4.0 * f_tmp; 85 | f_f_tmp = 4.0 * c_f_tmp; 86 | g_f_tmp = X[4] * X[4]; 87 | h_f_tmp = X[6] * X[6]; 88 | i_f_tmp = 4.0 * h_f_tmp + 3.0 * g_f_tmp; 89 | j_f_tmp = cos(2.0 * X[5]); 90 | k_f_tmp = cos(3.0 * X[5]); 91 | l_f_tmp = pow(model[3], 3.0); 92 | m_f_tmp = 4.0 * model[6] * model[4] * model[0]; 93 | n_f_tmp = pow(model[4], 3.0); 94 | o_f_tmp = sin(2.0 * X[5]); 95 | p_f_tmp = model[5] * model[4] * model[0] * model[9] * g_f_tmp; 96 | q_f_tmp = -model[4] * model[7] * model[0] * model[9] * g_f_tmp; 97 | r_f_tmp = sin(3.0 * X[5]); 98 | s_f_tmp = 3.0 * f_tmp * model[4] * d_f_tmp * model[9] * g_f_tmp; 99 | t_f_tmp = -4.0 * model[3] * model[4]; 100 | u_f_tmp = 2.0 * model[3] * model[4]; 101 | v_f_tmp = f_tmp * d_f_tmp; 102 | f_tmp_tmp = v_f_tmp * b_f_tmp; 103 | b_f_tmp_tmp = c_f_tmp * d_f_tmp * b_f_tmp; 104 | f_f_tmp = 1.0 / ( 105 | (((((((((4.0 * model[6] * model[2] + e_f_tmp * model[2] * 106 | model[0]) + f_f_tmp * model[2] * model[0]) + 2.0 * 107 | model[6] * model[0] * 108 | b_f_tmp) + f_tmp_tmp) + b_f_tmp_tmp) + 4.0 * model[6] * 109 | model[1] * b_f_tmp) 110 | + e_f_tmp * model[0] * model[1] * b_f_tmp) + f_f_tmp * 111 | model[0] * model[1] * b_f_tmp) + (f_tmp + -c_f_tmp) * 112 | d_f_tmp * b_f_tmp * j_f_tmp) + u_f_tmp * d_f_tmp * b_f_tmp * 113 | o_f_tmp); 114 | w_f_tmp = 2.0 * f_tmp; 115 | xDot_3 = 0.5 * model[9] * f_f_tmp * ( 116 | ((((((((((((((((((((((-8.0 * model[6] * 117 | Fric + -8.0 * f_tmp * Fric * model[ 118 | 0]) + -8.0 * c_f_tmp * Fric * model[ 119 | 0]) + 120 | model[0] * model[9] * ((((-8.0 * model[ 121 | 4] * Fric + model[3] * (-model[5] + 122 | model[7]) * g_f_tmp) + 4.0 * 123 | model[3] * model[ 124 | 6] * ( 125 | h_f_tmp + g_f_tmp)) + 126 | l_f_tmp * model[ 127 | 0] * i_f_tmp) + 128 | model[3] * c_f_tmp * 129 | model[0] * i_f_tmp) * 130 | a_tmp) + t_f_tmp * model[11] * d_f_tmp * 131 | model[9] * j_f_tmp) + model[3] * 132 | model[5] * model[0] * model[ 133 | 9] * g_f_tmp * k_f_tmp) + -model[3] * 134 | model[7] * 135 | model[0] * model[ 136 | 9] * g_f_tmp * k_f_tmp) + l_f_tmp * d_f_tmp * 137 | model[9] * 138 | g_f_tmp * k_f_tmp) + -3.0 * model[ 139 | 3] * c_f_tmp * d_f_tmp * model[9] * 140 | g_f_tmp * k_f_tmp) + 8.0 * model[3] * Fric * 141 | model[0] * model[9] * b_a_tmp) 142 | + m_f_tmp * h_f_tmp * model[ 143 | 9] * b_a_tmp) + e_f_tmp * model[4] * d_f_tmp * 144 | h_f_tmp * model[ 145 | 9] * b_a_tmp) + 4.0 * n_f_tmp * d_f_tmp * h_f_tmp * 146 | model[9] 147 | * b_a_tmp) + p_f_tmp * b_a_tmp) + m_f_tmp * model[ 148 | 9] * g_f_tmp * b_a_tmp) + 149 | q_f_tmp * b_a_tmp) + s_f_tmp * b_a_tmp) + 3.0 * n_f_tmp * d_f_tmp * 150 | model[9] 151 | * g_f_tmp * b_a_tmp) + w_f_tmp * model[11] * d_f_tmp * 152 | model[9] * o_f_tmp) + 153 | -2.0 * c_f_tmp * model[11] * d_f_tmp * model[ 154 | 9] * o_f_tmp) + p_f_tmp * 155 | r_f_tmp) + q_f_tmp * r_f_tmp) + s_f_tmp * r_f_tmp) + -n_f_tmp * d_f_tmp * 156 | model[9] * g_f_tmp * r_f_tmp); 157 | e_f_tmp = model[10] * model[10]; 158 | i_f_tmp = -2.0 * f_tmp; 159 | k_f_tmp = 2.0 * c_f_tmp; 160 | l_f_tmp = i_f_tmp * model[0]; 161 | m_f_tmp = k_f_tmp * model[0]; 162 | n_f_tmp = f_tmp * model[0]; 163 | c_f_tmp = c_f_tmp * model[0]; 164 | p_f_tmp = model[4] * model[0]; 165 | q_f_tmp = model[2] * e_f_tmp; 166 | e_f_tmp = e_f_tmp * model[1]; 167 | r_f_tmp = 2.0 * (model[7] + n_f_tmp); 168 | s_f_tmp = 2.0 * (model[5] + c_f_tmp); 169 | u_f_tmp = u_f_tmp * model[0] ; 170 | xDot_4 = b_f_tmp * X[4] * ( 171 | (-2.0 * model[3] * model[0] * X[3] * a_tmp + t_f_tmp * 172 | model[0] * X[6] * j_f_tmp) + -2.0 * ( 173 | p_f_tmp * X[3] + (((model[5] + -model[7]) 174 | + l_f_tmp) + m_f_tmp) * X[ 175 | 6] * a_tmp) * b_a_tmp) * (1.0 / ((((q_f_tmp + 176 | e_f_tmp * b_f_tmp) + r_f_tmp * b_f_tmp * ( 177 | a_tmp * a_tmp)) + s_f_tmp * 178 | b_f_tmp * ( 179 | b_a_tmp * b_a_tmp)) + u_f_tmp * b_f_tmp * o_f_tmp)); 180 | xDot_5 = X[6]; 181 | t_f_tmp = 4.0 * model[4] * model[11]; 182 | k_f_tmp = k_f_tmp * model[2] * model[0]; 183 | m_f_tmp = m_f_tmp * model[1] * b_f_tmp; 184 | x_f_tmp = -(model[4] * model[4]) * d_f_tmp; 185 | xDot_6 = f_f_tmp * ( 186 | (((((((((((((((((((8.0 * Fric * model[2] + 4.0 * Fric * 187 | model[0] * b_f_tmp) + 8.0 * Fric * model[ 188 | 1] * b_f_tmp) + 2.0 * model[0] * 189 | (2.0 * model[4] * Fric * model[9] + model[3] * 190 | model[11] * (2.0 * model[2] + 191 | (model[0] + 2.0 * model[ 192 | 1]) * b_f_tmp)) * a_tmp) + -2.0 * 193 | model[3] * model[4] 194 | * model[0] * ( 195 | model[0] * h_f_tmp * b_f_tmp + -2.0 * ( 196 | model[2] + model[1] * 197 | b_f_tmp) * g_f_tmp) * j_f_tmp) + t_f_tmp * 198 | model[2] * model[0] * b_a_tmp) + 199 | -4.0 * model[3] * Fric * model[0] * model[ 200 | 9] * b_a_tmp) + 2.0 * model[4] * 201 | model[ 202 | 11] * d_f_tmp * b_f_tmp * b_a_tmp) + t_f_tmp * 203 | model[0] * model[1] * 204 | b_f_tmp * b_a_tmp) + v_f_tmp * h_f_tmp * b_f_tmp * o_f_tmp) + x_f_tmp * 205 | h_f_tmp * b_f_tmp * o_f_tmp) + -2.0 * model[5] * 206 | model[2] * g_f_tmp * 207 | o_f_tmp) + 2.0 * model[7] * model[ 208 | 2] * g_f_tmp * o_f_tmp) + i_f_tmp * model 209 | [2] * model[ 210 | 0] * g_f_tmp * o_f_tmp) + k_f_tmp * g_f_tmp * o_f_tmp) + - 211 | model 212 | [5] * model[0] * b_f_tmp * g_f_tmp * o_f_tmp) + model[7] * 213 | model[0] * 214 | b_f_tmp * g_f_tmp * o_f_tmp) + -2.0 * model[5] * model[1] 215 | * b_f_tmp * g_f_tmp * o_f_tmp) + 2.0 * model[7] * model[1] 216 | * b_f_tmp * g_f_tmp * o_f_tmp) + l_f_tmp * model[1] * 217 | b_f_tmp * g_f_tmp * o_f_tmp) + m_f_tmp * g_f_tmp * o_f_tmp); 218 | t_f_tmp = x_f_tmp * b_f_tmp; 219 | l_f_tmp = (((((((2.0 * model[6] * model[2] + w_f_tmp * model[2] * model[ 220 | 0]) + 221 | k_f_tmp) + model[6] * model[0] * b_f_tmp) + f_tmp_tmp) + 222 | b_f_tmp_tmp) + 2.0 * model[6] * model[ 223 | 1] * b_f_tmp) + w_f_tmp * 224 | model[0] * model[1] * b_f_tmp) + m_f_tmp; 225 | j_f_tmp = -f_tmp * d_f_tmp * b_f_tmp; 226 | i_f_tmp = model[3] * model[4] * d_f_tmp * b_f_tmp * o_f_tmp; 227 | g_f_tmp = p_f_tmp * model[9] * a_tmp; 228 | h_f_tmp = -model[3] * model[0] * model[9] * b_a_tmp; 229 | Fric = model[8] * model[9] * ( 230 | (((model[6] + n_f_tmp) + c_f_tmp) + g_f_tmp) + 231 | h_f_tmp); 232 | g_3 = Fric * (1.0 / (((l_f_tmp + t_f_tmp * (a_tmp * a_tmp)) + j_f_tmp * 233 | (b_a_tmp * b_a_tmp)) + i_f_tmp)); 234 | g_10 = Fric * ( 235 | 1.0 / (((l_f_tmp + t_f_tmp * (a_tmp * a_tmp)) + j_f_tmp * 236 | (b_a_tmp * b_a_tmp)) + i_f_tmp)); 237 | t_f_tmp = r_f_tmp * model[9]; 238 | l_f_tmp = q_f_tmp * (1.0 / model[9]) + e_f_tmp * model[9]; 239 | j_f_tmp = s_f_tmp * model[9]; 240 | i_f_tmp = u_f_tmp * model[9] * o_f_tmp; 241 | g_4 = -model[8] * model[10] * ( 242 | 1.0 / (((l_f_tmp + t_f_tmp * (a_tmp * a_tmp)) 243 | + j_f_tmp * (b_a_tmp * b_a_tmp)) + i_f_tmp)); 244 | g_11 = model[8] * model[10] * ( 245 | 1.0 / (((l_f_tmp + t_f_tmp * (a_tmp * a_tmp)) 246 | + j_f_tmp * (b_a_tmp * b_a_tmp)) + i_f_tmp)); 247 | g_0 = th.tensor(0.0); 248 | g_1 = th.tensor(0.0); 249 | g_2 = th.tensor(0.0); 250 | g_5 = th.tensor(0.0); 251 | 252 | g_7 = th.tensor(0.0); 253 | g_8 = th.tensor(0.0); 254 | g_9 = th.tensor(0.0); 255 | g_12 = th.tensor(0.0); 256 | t_f_tmp = -2.0 * model[8] * ( 257 | (((2.0 * model[2] + model[0] * b_f_tmp) + 2.0 * 258 | model[1] * b_f_tmp) + g_f_tmp) + h_f_tmp) * f_f_tmp; 259 | g_6 = t_f_tmp; 260 | g_13 = t_f_tmp; 261 | 262 | g_u1 = th.stack([g_0, g_1, g_2, g_3, g_4, g_5, g_6]) 263 | g_u2 = th.stack([g_7, g_8, g_9, g_10, g_11, g_12, g_13]) 264 | g = th.stack([g_u1, g_u2], dim=1) 265 | xdot = th.stack([xDot_0, xDot_1, xDot_2, xDot_3, xDot_4, xDot_5, 266 | xDot_6]) 267 | return xdot + g @ U -------------------------------------------------------------------------------- /core/systems/segway_system.py: -------------------------------------------------------------------------------- 1 | from torch import cat, cos, diag, float64, norm, sin, stack, tensor, zeros, set_default_dtype 2 | from torch import atan2 3 | from torch.nn import Module, Parameter 4 | 5 | from core.dynamics import SystemDynamics, AffineDynamics 6 | from core.util import default_fig 7 | from numpy import pi 8 | set_default_dtype(float64) 9 | 10 | # Segway Dynamics from here: 11 | # https://github.com/urosolia/MultiRate/blob/master/python/simulator.py 12 | 13 | class Segway(SystemDynamics, AffineDynamics, Module): 14 | def __init__(self): 15 | SystemDynamics.__init__(self, 4, 1) 16 | Module.__init__(self) 17 | 18 | def forward(self, x, u, t): 19 | x_dot = x[2] 20 | y_dot = x[3] 21 | y = x[1] 22 | return cat([ 23 | x_dot[None], 24 | y_dot[None], 25 | (cos(y) * (-1.8 * u + 11.5 * x_dot + 9.8 * sin( 26 | y)) - 10.9 * u + 68.4 * x_dot - 1.2 * y_dot ** 2 * sin(y)) / (cos( 27 | y) - 24.7)[None], 28 | ((9.3 * u - 58.8 * x_dot) * cos(y) + 38.6 * u - 234.5 * x_dot - sin( 29 | y) * (208.3 + y_dot ** 2 * cos(y))) / ((cos(y)) ** 2 - 24.7)[None] 30 | ], dim=0) -------------------------------------------------------------------------------- /core/systems/single_track_bicycle_system.py: -------------------------------------------------------------------------------- 1 | from torch import cat, cos, diag, float64, norm, sin, stack, tensor, zeros, set_default_dtype 2 | from torch import atan2, atan, isnan 3 | from torch.nn import Module, Parameter 4 | 5 | from core.dynamics import SystemDynamics, AffineDynamics 6 | from core.util import default_fig 7 | from numpy import pi 8 | set_default_dtype(float64) 9 | 10 | # Bicycle Dynamics from here: 11 | # https://github.com/urosolia/RacingLMPC/blob/master/src/fnc/SysModel.py#L130 12 | # TODO: Ask Ugo Whats up with this Curvature Stuff 13 | # https://github.com/urosolia/RacingLMPC/blob/master/src/fnc/Utilities.py 14 | 15 | # Vehicle Parameters 16 | class SingleTrackBicycle(SystemDynamics, AffineDynamics, Module): 17 | def __init__(self): 18 | SystemDynamics.__init__(self, 6, 2) 19 | Module.__init__(self) 20 | 21 | def forward(self, x, u, t): 22 | m = 1.98 23 | lf = 0.125 24 | lr = 0.125 25 | Iz = 0.024 26 | Df = 0.8 * m * 9.81 / 2.0 27 | Cf = 1.25 28 | Bf = 1.0 29 | Dr = 0.8 * m * 9.81 / 2.0 30 | Cr = 1.25 31 | Br = 1.0 32 | 33 | a = u[0] 34 | delta = u[1] 35 | 36 | X = x[0] 37 | Y = x[1] 38 | w = x[2] 39 | 40 | vx = x[3] 41 | vy = x[4] 42 | wz = x[5] 43 | 44 | # Compute tire split angle 45 | alpha_f = delta - atan2(vy + lf * wz, vx) 46 | alpha_r = - atan2(vy - lf * wz, vx) 47 | 48 | # Compute lateral force at front and rear tire 49 | Fyf = Df * sin(Cf * atan(Bf * alpha_f)) 50 | Fyr = Dr * sin(Cr * atan(Br * alpha_r)) 51 | 52 | # Propagate the dynamics of deltaT 53 | x_dot = cat([ 54 | ((vx * cos(w) - vy * sin(w)))[None], 55 | (vx * sin(w) + vy * cos(w))[None], 56 | wz[None], 57 | (a - 1 / m * Fyf * sin(delta) + wz * vy)[None], 58 | (1 / m * (Fyf * cos(delta) + Fyr) - wz * vx)[None], 59 | (1 / Iz * (lf * Fyf * cos(delta) - lr * Fyr))[None]], dim=0) 60 | return x_dot -------------------------------------------------------------------------------- /core/systems/unicycle.py: -------------------------------------------------------------------------------- 1 | 2 | from torch import cat, cos, diag, float64, norm, sin, stack, tensor, zeros 3 | from torch.nn import Module, Parameter 4 | from core.dynamics import RoboticDynamics 5 | 6 | class Unicycle(RoboticDynamics, Module): 7 | 8 | def __init__(self, m, J): 9 | RoboticDynamics.__init__(self, 3, 2) 10 | Module.__init__(self) 11 | self.params = Parameter(tensor([m, J], dtype=float64)) 12 | 13 | def D(self, q): 14 | m, J = self.params 15 | return diag(stack([m, m, J])) 16 | 17 | def C(self, q, q_dot): 18 | m, _ = self.params 19 | x, y, theta = q 20 | v = norm(stack([x, y]), 2) 21 | z = tensor(0, dtype=float64) 22 | return stack([ 23 | stack([z, z, m * v * sin(theta)]), 24 | stack([z, z, -m * v * cos(theta)]), 25 | stack([z, z, z])]) 26 | 27 | def B(self, q): 28 | _, _, theta = q 29 | z = tensor(0, dtype=float64) 30 | one_ = tensor(1, dtype=float64) 31 | return stack([ 32 | stack([cos(theta), z]), 33 | stack([sin(theta), z]), 34 | stack([z, one_]) 35 | ]) 36 | 37 | def G(self, q): 38 | return zeros(3) 39 | 40 | def get_state_names(self): 41 | return ['$x$ (m)', '$y$ (m)', '$\\theta$ (rad)$', 42 | '$\\dot{x}$ (m)', '$\\dot{y}$ (m)', '$\\dot{\\theta}$ (rad)$'] 43 | -------------------------------------------------------------------------------- /core/trajopt/__init__.py: -------------------------------------------------------------------------------- 1 | from .trajectory_optimizer import TrajectoryOptimizer 2 | from .gp_trajectory_optimizer import GPTrajectoryOptimizer -------------------------------------------------------------------------------- /core/trajopt/gp_trajectory_optimizer.py: -------------------------------------------------------------------------------- 1 | from core.trajopt import TrajectoryOptimizer 2 | from core.systems import AffineGPSystem 3 | from scipy.linalg import expm 4 | from numpy import array, zeros, zeros_like, eye, ones 5 | from numpy.linalg import pinv, inv, cholesky 6 | from cvxpy import quad_form, reshape, vec, norm, square 7 | 8 | class GPTrajectoryOptimizer(TrajectoryOptimizer): 9 | def __init__(self, T, h_k, 10 | dynamics: AffineGPSystem, 11 | max_delta_x = None, 12 | max_delta_u = None, 13 | solver='OSQP', 14 | cov_penalty=None): 15 | super().__init__(T, h_k, dynamics, 16 | TrajectoryOptimizer.COLLOCATION_MODE.CTN_ONE_PT, 17 | max_delta_x, max_delta_u, solver) 18 | self.cov_penalty=cov_penalty 19 | assert dynamics.delta_t == h_k, "[ERROR] Dynamics and Trajopt must be time-slice aligned" 20 | 21 | def make_continuous_linear_system_approx_constraints(self, xt, ut): 22 | dynamics_constraints = list() 23 | expAs, expBs, expCs, covs = self.dyn.jacobian_exp(xt.value, ut.value) 24 | for t in range(self.T - 1): 25 | dynamics_constraints += [ 26 | xt[t+1] == expAs[t] @ xt[t] + expBs[t] @ ut[t] + expCs[t] 27 | ] 28 | 29 | if self.cov_penalty is not None: 30 | for t in range(1, self.T): 31 | self.var_costs += [quad_form(xt[t] - xt[t].value, 32 | covs[t-1, t-1]*self.cov_penalty)] 33 | return dynamics_constraints -------------------------------------------------------------------------------- /core/trajopt/trajectory_optimizer.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | from core.dynamics.affine_dynamics import AffineDynamics 3 | from core.controllers import PiecewiseConstantController 4 | from scipy.linalg import expm 5 | from numpy import array, zeros, zeros_like, eye, ones 6 | from numpy.linalg import norm, pinv, inv 7 | from cvxpy import ECOS, Variable, Minimize, sum_squares, Problem, quad_form 8 | from cvxpy.atoms import diag, abs 9 | from scipy import optimize 10 | from itertools import count 11 | 12 | class TrajectoryOptimizer: 13 | class COLLOCATION_MODE(Enum): 14 | TRAPEZOIDAL = 1 15 | HERMITE_SIMPSON = 2 16 | CTN_ONE_PT = 3 17 | 18 | def __init__(self, T, h_k, 19 | dynamics: AffineDynamics, 20 | collocation_mode: COLLOCATION_MODE = COLLOCATION_MODE.CTN_ONE_PT, 21 | max_delta_x = None, 22 | max_delta_u = None, 23 | solver='OSQP'): 24 | self.T = T 25 | self.h_k = h_k 26 | self.dyn = dynamics 27 | self.m = dynamics.m 28 | self.n = dynamics.n 29 | self.state_cost = None 30 | self.terminal_cost = None 31 | self.collocation_mode = collocation_mode 32 | self.const_constraints = list() 33 | self.const_costs = list() 34 | self.var_costs = list() 35 | self.xt = None 36 | self.ut = None 37 | self.max_delta_x = max_delta_x 38 | self.max_delta_u = max_delta_u 39 | self.solver = solver 40 | # variable declarations 41 | self.xt = Variable((self.T, self.n)) 42 | self.ut = Variable((self.T, self.m)) 43 | 44 | def make_hermite_simpson_dynamics_constraints(self, xt, ut): 45 | raise NotImplementedError('[ERROR] Hermite-simpson Approx TODO.') 46 | 47 | def make_trapezoidal_dynamics_constraints(self, xt, ut): 48 | dynamics_constraints = list() 49 | for t in range(self.T - 1): 50 | tau = t * self.h_k 51 | taup1 = (t + 1) * self.h_k 52 | (At, Bt) = self.dyn.jacobian(xt[t].value, ut[t].value, tau) 53 | (Atp1, Btp1) = self.dyn.jacobian(xt[t + 1].value, ut[t + 1].value, 54 | taup1) 55 | ft = At @ (xt[t]- xt[t].value) + Bt @ (ut[t] - ut[t].value) + \ 56 | self.dyn(xt[t].value, ut[t].value, tau) 57 | ftp1 = Atp1 @ (xt[t + 1] - xt[t+1].value) + Btp1 @ (ut[t + 1] - ut[t+1].value) \ 58 | + self.dyn(xt[t+1].value, ut[t+1].value, tau) 59 | dynamics_constraints += [ 60 | xt[t + 1] - xt[t] == 0.5 * self.h_k * (ft + ftp1)] 61 | return dynamics_constraints 62 | 63 | def make_continuous_linear_system_approx_constraints(self, xt, ut): 64 | dynamics_constraints = list() 65 | for t in range(self.T -1): 66 | tau = t * self.h_k 67 | (At, Bt) = self.dyn.jacobian(xt[t].value, ut[t].value, tau) 68 | expAt = expm(At * self.h_k) 69 | C = self.dyn(xt[t].value, ut[t].value, tau) \ 70 | - At @xt[t].value - Bt @ ut[t].value 71 | Ainv = pinv(At) 72 | dynamics_constraints += [ 73 | xt[t+1] ==\ 74 | expAt @ xt[t] +\ 75 | Ainv @ (expAt - eye(*At.shape))@ (Bt @ ut[t] + C)] 76 | return dynamics_constraints 77 | 78 | def add_input_constraints(self, u_min, u_max): 79 | for t in range(self.T): 80 | if u_max is not None: 81 | self.const_constraints += [self.ut[t] <= u_max] 82 | if u_min is not None: 83 | self.const_constraints += [self.ut[t] >= u_min] 84 | 85 | def add_static_quad_cost(self, Q=None, R=None, offset=None): 86 | if offset is None: 87 | offset = zeros_like(self.xt[0].value) 88 | # constant costs 89 | if R is not None: 90 | for t in range(self.T): 91 | self.const_costs += [quad_form(self.ut[t], R)] 92 | else: 93 | self.const_costs += [sum_squares(self.ut)] 94 | if Q is not None: 95 | for t in range(self.T): 96 | self.const_costs += [quad_form(self.xt[t] - offset, Q)] 97 | 98 | def add_terminal_cost(self, Q_f, offset=None, R_f=None): 99 | if offset is None: 100 | offset = zeros_like(self.xt[-1].value) 101 | self.const_costs += [quad_form(self.xt[-1]-offset, Q_f)] 102 | 103 | def add_hard_terminal_constraint(self, x_f): 104 | self.const_constraints += [self.xt[-1] == x_f] 105 | 106 | def add_trust_region_constraint(self): 107 | trust_region = list() 108 | if self.max_delta_x is not None: 109 | for t in range(self.T): 110 | trust_region += [abs(self.xt[t] - self.xt[t].value) <= self.max_delta_x ] 111 | if self.max_delta_u is not None: 112 | for t in range(self.T): 113 | trust_region += [abs(self.ut[t] - self.ut[t].value) <= self.max_delta_u ] 114 | return trust_region 115 | 116 | def add_state_box_constraints(self, x_min, x_max): 117 | box_constraints = list() 118 | for t in range(self.T): 119 | box_constraints += [self.xt[t] <= x_max] 120 | box_constraints += [self.xt[t] >= x_min] 121 | self.const_constraints += box_constraints 122 | 123 | 124 | def _warmstart(self, x_0, ws_xt=None, ws_ut=None): 125 | # Simulate zeroinput warmstart if not none is provided 126 | if ws_ut is not None: 127 | self.ut.value = ws_ut 128 | else: 129 | self.ut.value = zeros((self.T, self.m)) 130 | if ws_xt is not None: 131 | self.xt.value = ws_xt 132 | else: 133 | ctrl = PiecewiseConstantController(self.dyn, self.h_k, self.ut.value) 134 | ts = array(range(self.T)) * self.h_k 135 | self.xt.value, _ = self.dyn.simulate(x_0, controller=ctrl, ts=ts) 136 | 137 | def eval(self, x_0, ws_xt=None, ws_ut=None, 138 | max_cvx_iters=100, converge_tol=1e-5, solver_opts=None): 139 | 140 | ts = array([i for i in range(self.T)])*self.h_k 141 | self._warmstart(x_0, ws_xt, ws_ut) 142 | for i in count(): 143 | if i >= max_cvx_iters: 144 | break 145 | self.var_costs = [] 146 | xtprev = self.xt.value.copy() 147 | utprev = self.ut.value.copy() 148 | constraints = self.const_constraints.copy() 149 | constraints += [self.xt[0] == x_0] 150 | if self.collocation_mode == TrajectoryOptimizer.COLLOCATION_MODE.TRAPEZOIDAL: 151 | constraints += self.make_trapezoidal_dynamics_constraints(self.xt,self.ut) 152 | elif self.collocation_mode == TrajectoryOptimizer.COLLOCATION_MODE.HERMITE_SIMPSON: 153 | constraints += self.make_hermite_simpson_dynamics_constraints(self.xt, self.ut) 154 | elif self.collocation_mode == TrajectoryOptimizer.COLLOCATION_MODE.CTN_ONE_PT: 155 | constraints += self.make_continuous_linear_system_approx_constraints(self.xt, self.ut) 156 | else: 157 | raise Exception("[ERROR] Invalid Collocation mode.") 158 | constraints += self.add_trust_region_constraint() 159 | problem = Problem(Minimize(sum(self.const_costs + self.var_costs)), 160 | constraints) 161 | opts = {} if solver_opts is None else solver_opts 162 | if solver_opts is None and self.solver == "GUROBI": 163 | opts = {'Presolve':0, 'NumericFocus':3} 164 | soln = problem.solve(solver=self.solver, verbose=True, **opts) 165 | # Be Warned: Gurobi's Presolve has returned broken solutions before 166 | # soln = problem.solve(verbose=True) 167 | if problem.status == 'infeasible' or \ 168 | problem.status == "infeasible_inaccurate": 169 | print("[WARNING] Problem is infeasible." 170 | "Solving and trying again") 171 | ctrl = PiecewiseConstantController(self.dyn, self.h_k, 172 | utprev) 173 | xs, us = self.dyn.simulate(x_0, controller=ctrl, ts=ts) 174 | self.xt.value = xs 175 | self.ut.value = utprev 176 | continue 177 | # elif : 178 | # print("[WARNING] Problem presumed infeasible to relaxed " 179 | # "numerical tolerance.") 180 | 181 | update_size = \ 182 | ((xtprev - self.xt.value) ** 2).sum() + ( 183 | (utprev - self.ut.value) ** 2).sum() 184 | print(f'[INFO] Iteration {i}: update size: {update_size}') 185 | # check for convergence 186 | if update_size < converge_tol: 187 | print('[INFO] Convergence') 188 | break 189 | 190 | if i == max_cvx_iters: 191 | print("[WARNING] Failed to Converge") 192 | return self.xt.value, self.ut.value 193 | -------------------------------------------------------------------------------- /core/util.py: -------------------------------------------------------------------------------- 1 | from matplotlib.pyplot import figure 2 | from numpy import arange, array, dot, reshape, zeros 3 | from numpy.linalg import solve 4 | from torch import is_tensor, tensor 5 | from warnings import warn 6 | 7 | 8 | def arr_map(func, *arr): 9 | return array(list(map(func, *arr))) 10 | 11 | 12 | def differentiate(xs, ts, L=3): 13 | half_L = (L - 1) // 2 14 | b = zeros(L) 15 | b[1] = 1 16 | 17 | def diff(xs, ts): 18 | t_0 = ts[half_L] 19 | t_diffs = reshape(ts - t_0, (L, 1)) 20 | pows = reshape(arange(L), (1, L)) 21 | A = (t_diffs ** pows).T 22 | w = solve(A, b) 23 | return dot(w, xs) 24 | 25 | return array( 26 | [diff(xs[k - half_L:k + half_L + 1], ts[k - half_L:k + half_L + 1]) for 27 | k in range(half_L, len(ts) - half_L)]) 28 | 29 | 30 | def torch_guard(params, function): 31 | torch_params = params 32 | torch_in = True 33 | are_tensors = [is_tensor(p) for p in params] 34 | if not all(are_tensors): 35 | torch_in = False 36 | torch_params = [tensor(p) for p in params] 37 | if not torch_in and any(are_tensors): 38 | raise TypeError('[ERROR] Inconsistent input types. All numpy or all ' 39 | 'torch.') 40 | torch_output = function(*torch_params) 41 | if not torch_in: 42 | if is_tensor(torch_output): 43 | return torch_output.detach().numpy() 44 | else: 45 | try: 46 | # assuming output is a collection of tensors 47 | return [r.detach().numpy() if not r.is_sparse else 48 | #[WARNING] this is a hack for sparse matrices. 49 | #If needed change this line to return appropriate sparse matrix 50 | r.to_dense().detach().numpy() for r in torch_output] 51 | except TypeError: 52 | warn('[WARNING] Using torch guard with unsupported return ' 53 | 'type.') 54 | return torch_output 55 | 56 | else: 57 | return torch_output 58 | 59 | 60 | def default_fig(fig, ax): 61 | if fig is None: 62 | fig = figure(figsize=(6, 6), tight_layout=True) 63 | 64 | if ax is None: 65 | ax = fig.add_subplot(1, 1, 1) 66 | 67 | return fig, ax 68 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | absl-py 2 | appnope 3 | astor 4 | attrs 5 | backcall 6 | bleach 7 | cvxpy 8 | cycler 9 | decorator 10 | defusedxml 11 | dill 12 | ecos 13 | entrypoints 14 | future 15 | gast 16 | google-pasta 17 | grpcio 18 | h5py 19 | ipykernel 20 | ipython 21 | ipython-genutils 22 | ipywidgets 23 | jedi 24 | Jinja2 25 | jsonschema 26 | jupyter 27 | jupyter-client 28 | jupyter-console 29 | jupyter-core 30 | torch 31 | Keras 32 | Keras-Applications 33 | Keras-Preprocessing 34 | kiwisolver 35 | Markdown 36 | MarkupSafe 37 | matplotlib 38 | numpy 39 | mistune 40 | multiprocess 41 | nbconvert 42 | nbformat 43 | notebook 44 | osqp 45 | pytest 46 | pandocfilters 47 | parso 48 | pexpect 49 | pickleshare 50 | prometheus-client 51 | prompt-toolkit 52 | protobuf 53 | ptyprocess 54 | Pygments 55 | pyparsing 56 | pyrsistent 57 | python-dateutil 58 | PyYAML 59 | pyzmq 60 | qtconsole 61 | scipy 62 | scs 63 | seaborn 64 | Send2Trash 65 | six 66 | tensorboard 67 | tensorflow 68 | tensorflow-estimator 69 | termcolor 70 | terminado 71 | testpath 72 | tornado 73 | traitlets 74 | wcwidth 75 | webencodings 76 | Werkzeug 77 | widgetsnbextension 78 | wrapt 79 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import find_packages, setup 2 | 3 | setup( 4 | name='core', 5 | packages=find_packages(), 6 | install_requires=[ 7 | 'numpy', 8 | 'scipy', 9 | 'matplotlib', 10 | 'cvxpy', 11 | 'keras' 12 | ], 13 | extra_require={ 14 | 'dev': ['pytest'] 15 | } 16 | ) 17 | -------------------------------------------------------------------------------- /tests/data/test_systems/CartPole/ts.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learning-and-control/core/bc162dae74f5649f58e79adb6b4e0ec0ef900911/tests/data/test_systems/CartPole/ts.npy -------------------------------------------------------------------------------- /tests/data/test_systems/CartPole/us.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learning-and-control/core/bc162dae74f5649f58e79adb6b4e0ec0ef900911/tests/data/test_systems/CartPole/us.npy -------------------------------------------------------------------------------- /tests/data/test_systems/CartPole/x_0.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learning-and-control/core/bc162dae74f5649f58e79adb6b4e0ec0ef900911/tests/data/test_systems/CartPole/x_0.npy -------------------------------------------------------------------------------- /tests/data/test_systems/CartPole/xs.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learning-and-control/core/bc162dae74f5649f58e79adb6b4e0ec0ef900911/tests/data/test_systems/CartPole/xs.npy -------------------------------------------------------------------------------- /tests/data/test_systems/DoubleInvertedPendulum/ts.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learning-and-control/core/bc162dae74f5649f58e79adb6b4e0ec0ef900911/tests/data/test_systems/DoubleInvertedPendulum/ts.npy -------------------------------------------------------------------------------- /tests/data/test_systems/DoubleInvertedPendulum/us.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learning-and-control/core/bc162dae74f5649f58e79adb6b4e0ec0ef900911/tests/data/test_systems/DoubleInvertedPendulum/us.npy -------------------------------------------------------------------------------- /tests/data/test_systems/DoubleInvertedPendulum/x_0.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learning-and-control/core/bc162dae74f5649f58e79adb6b4e0ec0ef900911/tests/data/test_systems/DoubleInvertedPendulum/x_0.npy -------------------------------------------------------------------------------- /tests/data/test_systems/DoubleInvertedPendulum/xs.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learning-and-control/core/bc162dae74f5649f58e79adb6b4e0ec0ef900911/tests/data/test_systems/DoubleInvertedPendulum/xs.npy -------------------------------------------------------------------------------- /tests/data/test_systems/InvertedPendulum/ts.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learning-and-control/core/bc162dae74f5649f58e79adb6b4e0ec0ef900911/tests/data/test_systems/InvertedPendulum/ts.npy -------------------------------------------------------------------------------- /tests/data/test_systems/InvertedPendulum/us.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learning-and-control/core/bc162dae74f5649f58e79adb6b4e0ec0ef900911/tests/data/test_systems/InvertedPendulum/us.npy -------------------------------------------------------------------------------- /tests/data/test_systems/InvertedPendulum/x_0.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learning-and-control/core/bc162dae74f5649f58e79adb6b4e0ec0ef900911/tests/data/test_systems/InvertedPendulum/x_0.npy -------------------------------------------------------------------------------- /tests/data/test_systems/InvertedPendulum/xs.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learning-and-control/core/bc162dae74f5649f58e79adb6b4e0ec0ef900911/tests/data/test_systems/InvertedPendulum/xs.npy -------------------------------------------------------------------------------- /tests/generate_system_test_data.py: -------------------------------------------------------------------------------- 1 | from numpy import array, linspace, ones, pi, save 2 | from core.systems import CartPole, InvertedPendulum, DoubleInvertedPendulum 3 | import core.controllers as ctrl 4 | from pathlib import Path 5 | 6 | # Physical and other constants 7 | m = 0.25 8 | l = 0.5 9 | T = 5 10 | dt = 1e-2 11 | f = 1 / dt 12 | N = int(T * f) 13 | ts = linspace(0, T, N + 1) 14 | 15 | 16 | def constant_controller(system): 17 | return ctrl.ConstantController(system, ones([system.m]) * 0.01) 18 | 19 | 20 | def main(): 21 | destination_dir = Path(__file__).parent.absolute() / 'data' / 'test_systems' 22 | 23 | # Define most systems 24 | 25 | systems = [InvertedPendulum(mass=m, l=l), 26 | CartPole(m_c=m * 0.5, m_p=m * .25, l=l * 0.5), 27 | DoubleInvertedPendulum(m_1=m, m_2=m * 2, l_1=l, l_2=l * 2)] 28 | 29 | x0s = [array([pi / 4, .01]), 30 | array([0, pi, .1, -.1]), 31 | array([0, pi, .1, -.1])] 32 | 33 | for sys, x_0 in zip(systems, x0s): 34 | (xs, us) = sys.simulate(x_0=x_0, 35 | controller=constant_controller(sys), ts=ts) 36 | system_path = destination_dir / sys.__class__.__name__ 37 | system_path.mkdir(exist_ok=True, parents=True) 38 | save(system_path / 'xs', xs) 39 | save(system_path / 'us', us) 40 | save(system_path / 'ts', ts) 41 | save(system_path / 'x_0', x_0) 42 | 43 | 44 | if __name__ == '__main__': 45 | main() 46 | -------------------------------------------------------------------------------- /tests/test_controller.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from numpy import allclose, array, cos, identity, linspace, sin 4 | from numpy.linalg import norm 5 | from core.controllers import FBLinController, LQRController, QPController, LinearController 6 | from core.systems import DoubleInvertedPendulum, InvertedPendulum 7 | from core.geometry import Ball 8 | from core.util import arr_map 9 | 10 | @pytest.fixture 11 | def pendulum_with_controller(): 12 | m = 0.25 13 | l = 0.5 14 | g = 9.81 15 | 16 | T = 10 17 | dt = 1e-2 18 | f = 1 / dt 19 | N = int(T * f) 20 | ts = linspace(0, T, N + 1) 21 | 22 | system = InvertedPendulum(m, l, g) 23 | k_p = 1 24 | k_d = 2 25 | K = m * (l ** 2) * array([[g / l + k_p, k_d]]) 26 | controller = LinearController(system, K) 27 | return system, controller, ts 28 | 29 | 30 | def test_pendulum_linear_controller(pendulum_with_controller): 31 | (system, controller, ts) = pendulum_with_controller 32 | x_0 = array([1, 0]) 33 | xs, _ = system.simulate(x_0, controller, ts) 34 | # Check convergence 35 | assert allclose(norm(xs, axis=1)[-10:], 0, atol=1e-3) 36 | 37 | 38 | 39 | def test_pendulum_qp_controller(pendulum_with_controller): 40 | (system, controller, ts) = pendulum_with_controller 41 | 42 | ball = Ball(2) 43 | states = ball.sample(1000) 44 | 45 | invariant_controller = QPController(system, m=1) 46 | invariant_controller.add_regularizer(controller) 47 | invariant_controller.add_safety_constraint(ball.safety(system), lambda r: r) 48 | xs, _ = system.simulate(x_0=array([1, 0]), 49 | controller=invariant_controller, 50 | ts=ts) 51 | # Check convergence 52 | assert allclose(norm(xs, axis=1)[-10:], 0, atol=1e-3) 53 | 54 | 55 | def test_double_pend_with_fb_linearize(): 56 | T = 30 57 | dt = 1e-2 58 | f = 1 / dt 59 | N = int(T * f) 60 | ts = linspace(0, T, N + 1) 61 | 62 | m_1 = .25 63 | m_2 = .25 64 | l_1 = .5 65 | l_2 = .5 66 | g = 9.81 67 | system = DoubleInvertedPendulum(m_1, m_2, l_1, l_2, g) 68 | 69 | x_0 = array([1, 0, 0, 0]) 70 | 71 | controller = FBLinController(system, 72 | LQRController.build(system, 73 | Q=identity(4), 74 | R=identity(2))) 75 | xs, _ = system.simulate(x_0, controller, ts) 76 | # Check convergence 77 | assert allclose(norm(xs, axis=1)[-10:], 0, atol=1e-3) 78 | -------------------------------------------------------------------------------- /tests/test_gp_mpc.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from numpy import array, concatenate, diag, eye, linspace, pi, ones, zeros, \ 3 | stack, zeros_like 4 | from numpy.linalg import norm 5 | from numpy import testing 6 | from core.systems import LinearSystemDynamics, InvertedPendulum, \ 7 | DoubleInvertedPendulum, Segway, AffineGPSystem 8 | from core.controllers import ConstantController, QPController, \ 9 | FBLinController, LQRController, PiecewiseConstantController, MPCController 10 | from core.learning import GaussianProcess, RBFKernel, ScaledGaussianProcess, \ 11 | GPScaler,AdditiveKernel, MultiplicativeKernel, PeriodicKernel 12 | from core.trajopt import TrajectoryOptimizer, GPTrajectoryOptimizer 13 | import matplotlib.pyplot as plt 14 | import numpy as np 15 | import pathlib 16 | import torch as th 17 | 18 | root_save_dir = pathlib.Path(__file__).parent.absolute() / 'data' / 'test_gp_mpc' 19 | 20 | def add_trajectory_arrows(ax, scale, traj): 21 | for xt in traj: 22 | vnorm = norm(xt[2:]) * scale 23 | vnorm = vnorm if vnorm > 0 else 1 24 | ax.arrow(xt[0], xt[1], xt[2] / vnorm, xt[3] / vnorm, 25 | width=0.5 / scale, 26 | length_includes_head=False, 27 | color='r') 28 | 29 | def plot_2D_q_sys(xs, ax=None, traj=None, start=(0,0), end=(50,50), 30 | xlim=(-100, 100), ylim=(-100, 100), scale=1.): 31 | if ax is None: 32 | fig = plt.figure(1) 33 | ax = fig.add_subplot() 34 | end_circ = plt.Circle(end, 2/scale, color='g') 35 | start_circ = plt.Circle(start, 2/scale, color='b') 36 | if traj is not None: 37 | if isinstance(traj, list): 38 | for xt in traj: 39 | add_trajectory_arrows(ax, scale, xt) 40 | else: 41 | add_trajectory_arrows(ax, scale, traj) 42 | for x in xs: 43 | vnorm = norm(x[2:]) * scale 44 | vnorm = vnorm if vnorm > 0 else 1 45 | ax.arrow(x[0], x[1], x[2] / vnorm, x[3] / vnorm, 46 | 47 | width=0.5/scale, 48 | length_includes_head=False) 49 | ax.set_xlim(xlim[0], xlim[1]) 50 | ax.set_ylim(ylim[0], ylim[1]) 51 | ax.add_artist(end_circ) 52 | ax.add_artist(start_circ) 53 | return ax 54 | 55 | def load_data(dir_name, suffix=''): 56 | us = np.load(root_save_dir / dir_name / f'us{suffix}.npy') 57 | xs = np.load(root_save_dir / dir_name / f'xs{suffix}.npy') 58 | X = concatenate([xs[:-1], us], axis=1) 59 | Y = xs[1:] 60 | return th.from_numpy(X), th.from_numpy(Y) 61 | 62 | def test_mpc_linear_system(): 63 | linear_dyn = LinearSystemDynamics( 64 | array([ 65 | [0, 0, 1, 0], 66 | [0, 0, 0, 1], 67 | [-1, 0, 0, 0], 68 | [0, 1, 0, 0], 69 | ]), array([ 70 | [0, 0], 71 | [0, 0], 72 | [1, 0], 73 | [0, 1] 74 | ])) 75 | x_0 = array([0, 0, 10, 0]) 76 | x_f = array([50, 50, 0, 0]) 77 | n = 4 78 | m = 2 79 | xs = list() 80 | 81 | ts, hk = linspace(0, 2.5, 25, retstep=True) 82 | 83 | x_scaler = GPScaler(xmins=th.tensor([-100, -100, -100, -100, -100, -100]), 84 | xmaxs=th.tensor([100, 100, 100, 100, 100, 100])) 85 | y_scaler = GPScaler(xmins=th.tensor([-100, -100, -100, -100]), 86 | xmaxs=th.tensor([100, 100, 100, 100])) 87 | X, Y = load_data('linear') 88 | gp_est = ScaledGaussianProcess(X, Y, RBFKernel(n+m, ard_num_dims=True), 89 | x_scaler=x_scaler, y_scaler=y_scaler) 90 | gp_est.train_model(n_iters=70, lr=0.6) 91 | gp_dyn = AffineGPSystem(gp_est, n, m, hk) 92 | 93 | trajopt = GPTrajectoryOptimizer(T=10, h_k=hk, 94 | dynamics=gp_dyn, 95 | solver="GUROBI", 96 | # collocation_mode=TrajectoryOptimizer.COLLOCATION_MODE.CTN_ONE_PT 97 | ) 98 | trajopt.add_terminal_cost(Q_f=eye(4) * 1e5, offset=x_f) 99 | trajopt.add_input_constraints(u_min=-70 * ones(2), u_max=50* ones(2)) 100 | # trajopt.add_static_quad_cost() 101 | trajopt.add_static_quad_cost(Q=array([[1e2, 0, 0, 0], 102 | [0, 1e2, 0, 0], 103 | [0, 0, 1e1, 0], 104 | [0, 0, 0, 1e1]]), offset=x_f) 105 | def mpc_step(x, t, xt_prev, ut_prev): 106 | xs.append(x) 107 | xt, ut = trajopt.eval(x, max_cvx_iters=1, ws_xt=xt_prev, ws_ut=ut_prev) 108 | ax = plot_2D_q_sys(xs, traj=xt) 109 | plt.show() 110 | return xt, ut 111 | ctrl = MPCController(gp_dyn, mpc_step) 112 | xs, us = linear_dyn.simulate(x_0, controller=ctrl, ts=ts) 113 | # ctrl = ConstantController(linear_dyn, array([0.0, 0.0])) 114 | # testing.assert_allclose(xt[-1], x_f, atol=1e-7, rtol=1e-7) 115 | # uncomment for debugging 116 | testing.assert_allclose(x_f, xs[-1], atol=1e-1, rtol=1) 117 | ax = plot_2D_q_sys(xs) 118 | plt.show() 119 | 120 | def test_mpc_pendulum(): 121 | m = 0.25 122 | l = 0.5 123 | dyn = InvertedPendulum(mass=m, l=l) 124 | T = 35 125 | x_0 = array([-pi, 0.0]) 126 | ts, hk = linspace(0, 3, T, retstep=True) 127 | n = 2 128 | m = 1 129 | 130 | ts, hk = linspace(0, 2.5, 25, retstep=True) 131 | 132 | x_scaler = GPScaler(xmins=th.tensor([-2 * pi, -pi, -.6]), 133 | xmaxs=th.tensor([2 * pi, pi, .6])) 134 | y_scaler = GPScaler(xmins=th.tensor([-2 * pi, -pi]), 135 | xmaxs=th.tensor([2 * pi, pi])) 136 | X, Y = load_data('pendulum') 137 | kernel = AdditiveKernel(kernels=[ 138 | MultiplicativeKernel(kernels=[PeriodicKernel(p_prior=2., 139 | learn_period=False), 140 | RBFKernel(1)], 141 | active_dims=[[0], [0]]), 142 | RBFKernel(2, ard_num_dims=True)], 143 | active_dims=[[0], [1,2]]) 144 | # kernel = RBFKernel(n + m, ard_num_dims=True) 145 | gp_est = ScaledGaussianProcess(X, Y, kernel, 146 | x_scaler=x_scaler, y_scaler=y_scaler) 147 | gp_est.train_model(n_iters=50, lr=0.5) 148 | gp_dyn = AffineGPSystem(gp_est, n, m, hk) 149 | 150 | 151 | trajopt = GPTrajectoryOptimizer(15, hk, gp_dyn, 152 | solver="GUROBI", 153 | # max_delta_x=array([1 * pi / 180, 0.1]), 154 | max_delta_u=array([1]), 155 | # soft_dyn_weight=1e-3, 156 | # collocation_mode=TrajectoryOptimizer.COLLOCATION_MODE.CTN_ONE_PT 157 | ) 158 | 159 | trajopt.add_static_quad_cost(Q=array([[1e3, 0], [0, 1e2]])) 160 | trajopt.add_terminal_cost(array([[1e7, 0], [0, 1e6]])) 161 | trajopt.add_input_constraints(u_min=ones(1) * -.6, u_max=ones(1) * .6) 162 | xs = list() 163 | def mpc_step(x, t, xt_prev, ut_prev): 164 | xs.append(x) 165 | xt, ut = trajopt.eval(x, max_cvx_iters=10, 166 | ws_xt=xt_prev, ws_ut=ut_prev, 167 | converge_tol=1) 168 | fig, ax = dyn.plot_states(ts, stack(xs,axis=0), color="black") 169 | dyn.plot_states(ts, xt, color="red", fig=fig, ax=ax) 170 | plt.show() 171 | return xt, ut 172 | 173 | ctrl = MPCController(gp_dyn, mpc_step) 174 | xs, us = dyn.simulate(x_0=x_0, ts=ts, controller=ctrl) 175 | testing.assert_allclose(actual=xs[-1], desired=zeros_like(xs[-1]), 176 | atol=1e-1, rtol=1e-1) 177 | dyn.plot_states(ts, xs) 178 | plt.show() 179 | 180 | def test_mpc_double_pendulum(): 181 | dyn = DoubleInvertedPendulum(1.5, 1.5, 1, 1) 182 | x_0 = array([-pi, 0, 0, 0]) 183 | x_f = array([0, 0, 0, 0]) 184 | ts, hk = linspace(0, 6, 100, retstep=True) 185 | n=4 186 | m=2 187 | X = list() 188 | Y = list() 189 | for i in range(5): 190 | X_i, Y_i = load_data('double_pendulum', suffix=i) 191 | X.append(X_i) 192 | Y.append(Y_i) 193 | X = th.cat(X, dim=0) 194 | Y = th.cat(Y, dim=0) 195 | x_scaler = GPScaler(xmins=th.tensor([-2 * pi, -2 * pi, -pi, -pi, -8, -8]), 196 | xmaxs=th.tensor([2 * pi, 2 * pi, pi, pi, 8, 8])) 197 | y_scaler = GPScaler(xmins=th.tensor([-2 * pi, -2 * pi, -pi, -pi]), 198 | xmaxs=th.tensor([2 * pi, 2 * pi, pi, pi])) 199 | # kernel = AdditiveKernel(kernels=[ 200 | # MultiplicativeKernel(kernels=[PeriodicKernel(p_prior=2., 201 | # learn_period=False),RBFKernel(1)], active_dims=[[0], [0]]), 202 | # MultiplicativeKernel(kernels=[PeriodicKernel(p_prior=2., 203 | # learn_period=False), 204 | # RBFKernel(1)], active_dims=[[0], [0]]), 205 | # RBFKernel(4, ard_num_dims=True)], 206 | # active_dims=[[0], [1], [2, 3, 4, 5]]) 207 | # kernel = RBFKernel(n + m, ard_num_dims=True) 208 | kernel = MultiplicativeKernel(kernels=[ 209 | PeriodicKernel(p_prior=2.,learn_period=False), 210 | RBFKernel(1), 211 | PeriodicKernel(p_prior=2., learn_period=False), 212 | RBFKernel(1), 213 | RBFKernel(4, ard_num_dims=True), 214 | ], 215 | active_dims=[[0], [0], [1], [1], [2, 3, 4, 5]]) 216 | gp_est = ScaledGaussianProcess(X, Y, kernel, 217 | x_scaler=x_scaler, y_scaler=y_scaler) 218 | # gp_est.train_model(n_iters=10, lr=0.2) 219 | #Periodic Kernel 220 | # gp_est.train_model(n_iters=210, lr=0.2) 221 | #RBF Kernel 222 | gp_est.train_model(n_iters=100, lr=0.2) 223 | gp_dyn = AffineGPSystem(gp_est, n, m, hk) 224 | 225 | trajopt = GPTrajectoryOptimizer(25, hk, gp_dyn, 226 | max_delta_u=array([5, 5]), 227 | # collocation_mode=TrajectoryOptimizer.COLLOCATION_MODE.CTN_ONE_PT, 228 | solver="GUROBI") 229 | trajopt.add_input_constraints(u_min=ones(2) * -8, u_max=ones(2) * 8) 230 | trajopt.add_static_quad_cost(Q=diag(array([1e3, 1e3, 1e1, 1e1]))) 231 | trajopt.add_terminal_cost(diag(array([1e5, 1e5, 1e4, 1e4]))) 232 | xs = list() 233 | xts = list() 234 | us = list() 235 | def mpc_step(x, t, xt_prev, ut_prev): 236 | 237 | cvx_iter = 20 if xt_prev is None else 5 238 | xt, ut = trajopt.eval(x, max_cvx_iters=cvx_iter, 239 | ws_xt=xt_prev, ws_ut=ut_prev, 240 | converge_tol=1e-2) 241 | 242 | if len(xs) > 0: 243 | xs_prev = xs[-1] 244 | us_prev = us[-1] 245 | sample = th.from_numpy(concatenate([xs_prev, us_prev])[None, :]) 246 | xs_expected, cov = gp_est(sample) 247 | xs_expected = xs_expected.squeeze().detach().numpy() 248 | cov = cov.squeeze().detach().numpy() 249 | if (abs(xs_expected - x) / cov).max() > 0.25 or cov.max() > 0.1: 250 | print("[INFO] Found Surprising Example. Appending.") 251 | gp_est.add_samples(sample, th.from_numpy(x[None,:])) 252 | gp_est.train_model(1, lr=0.1) 253 | # plot_2D_q_sys(xs, traj=xt, end=(-pi, 0), 254 | # xlim=(-2*pi, 2*pi), ylim=(-2 * pi, 2 * pi), 255 | # scale=15) 256 | us.append(ut[0]) 257 | xs.append(x) 258 | xts.append(xt[1:]) 259 | # plt.show() 260 | # plt.pause(0.2) 261 | return xt, ut 262 | ctrl = MPCController(gp_dyn, mpc_step) 263 | xs, us = dyn.simulate(x_0, controller=ctrl, ts=ts) 264 | plot_2D_q_sys(xs, traj=xts, end=(-pi, 0), 265 | xlim=(-2*pi, 2*pi), ylim=(-2 * pi, 2 * pi), 266 | scale=15) 267 | plt.show() 268 | testing.assert_allclose(actual=xs[-1], desired=x_f, atol=1e-4, rtol=1e-4) 269 | 270 | def test_mpc_segway(): 271 | dyn = Segway() 272 | x_0 = array([-1, 0, 0, 0]) 273 | x_f = array([1, 0, 0, 0]) 274 | ts, hk = linspace(0, 6, 100, retstep=True) 275 | 276 | n=4 277 | m=1 278 | X, Y = load_data('segway') 279 | x_scaler = GPScaler(xmins=th.tensor([-2, -pi, -pi, -pi, -15 ]), 280 | xmaxs=th.tensor([2, pi, pi, pi, 15])) 281 | y_scaler = GPScaler(xmins=th.tensor([-2, -pi, -pi, -pi]), 282 | xmaxs=th.tensor([2, pi, pi, pi])) 283 | kernel = RBFKernel(n + m, ard_num_dims=True) 284 | gp_est = ScaledGaussianProcess(X, Y, kernel, 285 | x_scaler=x_scaler, y_scaler=y_scaler) 286 | gp_est.train_model(n_iters=50, lr=0.5) 287 | gp_dyn = AffineGPSystem(gp_est, n, m, hk) 288 | 289 | trajopt = GPTrajectoryOptimizer(30, hk, gp_dyn, 290 | max_delta_u=array([5,5]), 291 | # collocation_mode=TrajectoryOptimizer.COLLOCATION_MODE.CTN_ONE_PT, 292 | solver="GUROBI") 293 | trajopt.add_input_constraints(u_min=ones(1) * -15, u_max=ones(1) * 15) 294 | trajopt.add_static_quad_cost(Q=diag(array([1e3, 1e2, 1e1, 1e1])), 295 | offset=x_f) 296 | trajopt.add_terminal_cost(diag(array([1e5, 1e4, 1e3, 1e3])), offset=x_f) 297 | xs = list() 298 | def mpc_step(x, t, xt_prev, ut_prev): 299 | xs.append(x) 300 | cvx_iter = 20 if xt_prev is None else 5 301 | xt, ut = trajopt.eval(x, max_cvx_iters=cvx_iter, 302 | ws_xt=xt_prev, ws_ut=ut_prev, 303 | converge_tol=1e-2) 304 | # plot_2D_q_sys(xs, traj=xt, start=(-1, 0), end=(1, 0), 305 | # xlim=(-2, 2), ylim=(- pi, pi), 306 | # scale=20) 307 | # plt.show() 308 | return xt, ut 309 | ctrl = MPCController(gp_dyn, mpc_step) 310 | xs, us = dyn.simulate(x_0, controller=ctrl, ts=ts) 311 | plot_2D_q_sys(xs, start=(-1, 0), end=(1, 0), 312 | xlim=(-2, 2), ylim=(- pi, pi), 313 | scale=20) 314 | plt.show() 315 | testing.assert_allclose(actual=xs[-1], desired=x_f, atol=1e-3, rtol=1e-3) 316 | 317 | -------------------------------------------------------------------------------- /tests/test_gp_traj_opt.py: -------------------------------------------------------------------------------- 1 | from numpy import concatenate, eye, linspace, pi, ones, random, \ 2 | sin, cos, newaxis, stack, meshgrid, diag, diagonal 3 | from numpy.core._multiarray_umath import array, zeros 4 | from scipy.linalg import expm, logm 5 | from numpy.linalg import norm, pinv 6 | from numpy.testing import assert_allclose 7 | from core.learning import GaussianProcess, RBFKernel, ScaledGaussianProcess, GPScaler 8 | from core.systems import LinearSystemDynamics, InvertedPendulum, \ 9 | AffineGPSystem, DoubleInvertedPendulum, CartPole 10 | from core.controllers import ConstantController, PiecewiseConstantController 11 | from core.trajopt import TrajectoryOptimizer, GPTrajectoryOptimizer 12 | 13 | import matplotlib.pyplot as plt 14 | import torch as th 15 | import seaborn as sns 16 | 17 | def plot_2D_q_sys(xs, ax=None, color='black', box_side=60, 18 | start=(0,0), end=(50,50)): 19 | if ax is None: 20 | fig = plt.figure(1) 21 | ax = fig.add_subplot() 22 | end_circ = plt.Circle(end, .2, color='g') 23 | start_circ = plt.Circle(start, .2, color='b') 24 | for x in xs: 25 | vnorm = norm(x[2:]) * 10 26 | vnorm = vnorm if vnorm > 0 else 1 27 | ax.arrow(x[0], x[1], x[2] / vnorm, x[3] / vnorm, 28 | width=0.05, 29 | length_includes_head=True, 30 | color=color) 31 | ax.set_xlim(-box_side, box_side) 32 | ax.set_ylim(-box_side, box_side) 33 | ax.add_artist(end_circ) 34 | ax.add_artist(start_circ) 35 | return ax 36 | 37 | def plot_2D_dyn_sys(dyn, ax=None, 38 | low_x=-5, high_x=5, 39 | low_y=-5, high_y=5, 40 | n_sample=100): 41 | if ax is None: 42 | fig = plt.figure() 43 | ax = fig.add_subplot() 44 | X, Y = meshgrid(linspace(low_x, high_x, n_sample), 45 | linspace(low_y, high_y, n_sample)) 46 | arrows = array([dyn(stack(xy), zeros((1,)), 0) for xy in 47 | zip(X.ravel(), Y.ravel())]) 48 | arrows = arrows.reshape(X.shape + (2,)) 49 | ax.streamplot(X, Y, arrows[:, :, 0], arrows[:, :, 1]) 50 | # ax.quiver(X, Y, arrows[:, :, 0], arrows[:, :, 1]) 51 | return arrows, ax 52 | 53 | def test_2D_lin_learned_phase(): 54 | true_A = array([ 55 | [0, -3], 56 | [1, 0] 57 | ]) 58 | true_B = array([ 59 | [0], 60 | [1]]) 61 | 62 | u_0 = array([0.0]) 63 | 64 | dyn = LinearSystemDynamics(true_A, true_B) 65 | train_x0s = (random.rand(100, 2) - 0.5) * 10 66 | 67 | dt = 0.01 68 | train_xtp1s = [dyn.step(x_0=x0, u_0=u_0, t_0=0, t_f=dt) for x0 in train_x0s] 69 | train_xtp1s = stack(train_xtp1s, axis=0) 70 | train_X = concatenate((train_x0s, zeros((train_x0s.shape[0], 1))), axis=1) 71 | train_X = th.from_numpy(train_X) 72 | train_Y = th.from_numpy(train_xtp1s) 73 | 74 | gp_est = GaussianProcess(train_X, train_Y, RBFKernel(2+1)) 75 | gp_est.train_model() 76 | gp_dyn = AffineGPSystem(gp_est, n=2, m=1, delta_t=dt) 77 | fig, axs = plt.subplots(1,3) 78 | expected, _ = plot_2D_dyn_sys(dyn, axs[0]) 79 | axs[0].set_title('Expected Phase Plot') 80 | actual, _ = plot_2D_dyn_sys(gp_dyn, axs[1]) 81 | axs[1].set_title('Actual Phase Plot') 82 | error = norm(actual - expected,2, axis=2) 83 | assert error.mean() <= 1e-1 84 | #uncomment plotting for debugging 85 | sns.heatmap(error, ax=axs[2]) 86 | axs[2].set_title('Error of Phase Plot') 87 | plt.show() 88 | 89 | 90 | def test_double_pendulum_trajopt(): 91 | T = 1 92 | K = 100 93 | true_dyn = DoubleInvertedPendulum(1.5,1.5, 1, 1) 94 | n = true_dyn.n 95 | m = true_dyn.m 96 | ts, hk = linspace(0, T, K, retstep=True) 97 | x_0 = array([-pi, 0, 0, 0]) 98 | x_f = array([0, 0, 0, 0]) 99 | 100 | n_data = 1000 101 | 102 | theta0s = random.uniform(-2*pi, 2*pi, (n_data,)) 103 | theta1s = random.uniform(-2*pi, 2*pi, (n_data,)) 104 | 105 | theta0_dots = random.uniform(-5, 5, (n_data,)) 106 | theta1_dots =random.uniform(-5, 5, (n_data,)) 107 | 108 | u0s = random.uniform(-2, 2, (n_data, 2)) 109 | train_x0s = stack([theta0s, theta1s, theta0_dots, theta1_dots], axis=1) 110 | train_xtp1s = stack([ 111 | true_dyn.step(x_0=x0, u_0=u0, t_0=0, t_f=hk) 112 | for (x0, u0) in zip(train_x0s, u0s) 113 | ], axis=0) 114 | X = concatenate([train_x0s, u0s], axis=1) 115 | X = th.from_numpy(X) 116 | Y = th.from_numpy(train_xtp1s) 117 | Y.max(dim=0) 118 | Y.min(dim=0) 119 | x_scaler = GPScaler(xmins=X.min(dim=0)[0], 120 | xmaxs=X.max(dim=0)[0]) 121 | y_scaler = GPScaler(xmins=X[:,:n].min(dim=0)[0], 122 | xmaxs=X[:,:n].max(dim=0)[0]) 123 | gp_est = ScaledGaussianProcess(X, Y, RBFKernel(n+m, ard_num_dims=True), 124 | x_scaler=x_scaler, y_scaler=y_scaler) 125 | gp_est.train_model(n_iters=200, lr=0.5) 126 | gp_dyn = AffineGPSystem(gp_est, n=n, m=m, delta_t=hk) 127 | 128 | gp_trajopt = GPTrajectoryOptimizer(K, hk, gp_dyn, 129 | # max_delta_x=array([pi/10, pi/10, 5/10, 5/10]), 130 | max_delta_u=array([0.05, 0.05]), 131 | solver='GUROBI') # GUROBI because OSQP can 132 | # fail 133 | # gp_trajopt.add_state_box_constraints( 134 | # x_min=X[:,:n].min(dim=0)[0].detach().numpy(), 135 | # x_max=X[:,:n].max(dim=0)[0].detach().numpy()) 136 | gp_trajopt.add_static_quad_cost(Q=diag(array([1e3, 1e3, 1e2, 1e2]))) 137 | gp_trajopt.add_terminal_cost(diag(array([1e7, 1e7, 1e6, 1e6]))) 138 | gp_trajopt.add_input_constraints(u_min=ones(2) * -2, u_max=ones(2) * 2) 139 | xt_gp, ut_gp = gp_trajopt.eval(x_0, max_cvx_iters=20) 140 | xs_gp, us_gp = true_dyn.simulate(x_0, 141 | PiecewiseConstantController(gp_dyn, hk, ut_gp), 142 | ts) 143 | xt_gp_im, ut_gp_im = gp_dyn.simulate(x_0, 144 | PiecewiseConstantController(gp_dyn, hk, ut_gp), 145 | ts) 146 | 147 | 148 | trajopt = TrajectoryOptimizer(K, hk, true_dyn, 149 | max_delta_u=array([0.5, 0.5]), 150 | collocation_mode=TrajectoryOptimizer.COLLOCATION_MODE.CTN_ONE_PT, 151 | solver='GUROBI') # GUROBI because OSQP can fail 152 | trajopt.add_static_quad_cost(Q=diag(array([1e3, 1e3, 1e2, 1e2]))) 153 | trajopt.add_terminal_cost(diag(array([1e7, 1e7, 1e6, 1e6]))) 154 | trajopt.add_input_constraints(u_min=ones(2) * -2, u_max=ones(2) * 2) 155 | xt, ut = trajopt.eval(x_0, max_cvx_iters=20) 156 | xs, us = true_dyn.simulate(x_0, 157 | PiecewiseConstantController(true_dyn, hk, ut), 158 | ts) 159 | true_dyn.plot(xt, ut, ts) 160 | plt.show() 161 | xt_gp.max(axis=0) 162 | xt_gp.min(axis=0) 163 | #plot trajectories for debugging 164 | fig = plt.figure() 165 | ax = fig.add_subplot() 166 | ax.plot(ts, norm(abs(xt - xs), axis=1), label='True Dynamics') 167 | ax.plot(ts, norm(abs(xt_gp - xs_gp), axis=1), label='GP Dynamics') 168 | ax.legend(loc='upper left') 169 | # plot_2D_q_sys(xt, ax=ax, color='black', box_side=10, 170 | # start=x_0[:2], end=x_f[:2]) 171 | # plot_2D_q_sys(xt_gp_im, ax=ax, color='green', box_side=10, 172 | # start=x_0[:2], end=x_f[:2]) 173 | # plot_2D_q_sys(xs_gp, ax=ax, color='red', box_side=10) 174 | # plot_2D_q_sys(xt_gp, ax=ax, color='blue', box_side=10) 175 | plt.show() 176 | 177 | # fig = plt.figure() 178 | # ax = fig.add_subplot() 179 | # plot_2D_q_sys(xt, ax=ax, color='black', box_side=10, start=(-pi, 0), 180 | # end=(0,0)) 181 | # plt.show() 182 | # true_dyn.plot(xt, us, ts) 183 | # plt.show() 184 | 185 | 186 | def test_pend_trajopt(): 187 | m = 1 188 | l = 0.5 189 | T = 4 190 | K = 100 191 | ts, hk = linspace(0, T, K, retstep=True) 192 | true_dyn = InvertedPendulum(m, l) 193 | x_0 = array([-pi, 0.0]) 194 | 195 | trajopt = TrajectoryOptimizer(K, hk, true_dyn, 196 | max_delta_u=array([1.2]), 197 | collocation_mode=TrajectoryOptimizer.COLLOCATION_MODE.CTN_ONE_PT, 198 | solver='GUROBI') #GUROBI because OSQP can fail 199 | trajopt.add_static_quad_cost(Q=array([[1e3, 0], [0, 1e2]])) 200 | trajopt.add_terminal_cost(array([[1e7, 0], [0, 1e6]])) 201 | trajopt.add_input_constraints(u_min=ones(1) * -1.2, u_max=ones(1) * 1.2) 202 | xt, ut = trajopt.eval(x_0, max_cvx_iters=10) 203 | xs, us = true_dyn.simulate(x_0, 204 | PiecewiseConstantController(true_dyn, hk, ut),ts) 205 | theta0s = random.uniform(-2*pi, pi/2, (1000,)) 206 | thetadot0s = random.uniform(-8, 8, (1000,)) 207 | u0s = random.uniform(-1.2, 1.2, (1000,1)) 208 | train_x0s = stack([theta0s, thetadot0s], axis=1) 209 | train_xtp1s = stack([true_dyn.step(x_0=x0, u_0=u0, t_0=0, t_f=hk) 210 | for x0, u0 in zip(train_x0s, u0s)], axis=0) 211 | X = concatenate([train_x0s, u0s], axis=1) 212 | X = th.from_numpy(X) 213 | Y = th.from_numpy(train_xtp1s) 214 | gp_est = GaussianProcess(X, Y, RBFKernel(2+1)) 215 | gp_est.train_model(n_iters=80) 216 | gp_dyn = AffineGPSystem(gp_est, n=2, m=1, delta_t=hk) 217 | 218 | gp_trajopt = TrajectoryOptimizer(K, hk, gp_dyn, 219 | max_delta_u=array([1.2]), 220 | collocation_mode=TrajectoryOptimizer.COLLOCATION_MODE.CTN_ONE_PT, 221 | solver='GUROBI') 222 | gp_trajopt.add_static_quad_cost(Q=array([[1e3, 0], [0, 1e2]])) 223 | gp_trajopt.add_terminal_cost(array([[1e7, 0], [0, 1e6]])) 224 | gp_trajopt.add_input_constraints(u_min=ones(1) * -1.2, u_max=ones(1) * 1.2) 225 | xt_gp, ut_gp = gp_trajopt.eval(x_0, max_cvx_iters=15) 226 | xs_gp, us_gp = true_dyn.simulate(x_0, 227 | PiecewiseConstantController(gp_dyn, hk, ut_gp), 228 | ts) 229 | #TODO: there appear to be spikes in ut_gp - ut. Investigate why. 230 | #open loop estimation gap is reasonable 231 | assert norm(xt_gp - xs_gp, axis=1).mean() - norm(xt - xs, axis=1).mean() < 1e-1 232 | assert (ut_gp - ut).mean() < 1e-1 #controllers should be similar 233 | #plot deviation 234 | plt.semilogy(ts, norm(xs - xt, axis=1), label="True Model") 235 | plt.semilogy(ts, norm(xs_gp - xt_gp, axis=1), label="GP Model") 236 | plt.title('Pendulum System Deviation') 237 | plt.ylabel('$|| x_t - \hat{x}_t ||_2 $') 238 | plt.xlabel('$t$') 239 | plt.legend(loc='upper left') 240 | plt.show() 241 | #uncomment to see dynamics paths 242 | # true_dyn.plot(xs_gp, us_gp, ts) 243 | # plt.show() 244 | 245 | #uncomment to verify nominal trajectory is reasonable 246 | # true_dyn.plot(xs, us, ts) 247 | # plt.show() 248 | 249 | #uncomment to verify dynamics match 250 | # fig, axs = plt.subplots(1,3, figsize=(18,6)) 251 | # expected, _ = plot_2D_dyn_sys(dyn, axs[0], 252 | # low_x=-pi, high_x=pi, n_sample=100) 253 | # axs[0].set_title('Expected Phase Plot') 254 | # actual, _ = plot_2D_dyn_sys(gp_dyn, axs[1], 255 | # low_x=-pi, high_x=pi, n_sample=100) 256 | # axs[1].set_title('Actual Phase Plot') 257 | # error = norm(actual - expected,2, axis=2) 258 | # # assert error.mean() <= 1e-1 259 | # #uncomment plotting for debugging 260 | # sns.heatmap(error, ax=axs[2]) 261 | # axs[2].set_title('Error of Phase Plot') 262 | # plt.show() 263 | 264 | def test_4D_lin_trajopt(): 265 | T = 10 266 | K = 200 267 | true_A = array([ 268 | [0, 0, 1, 0], 269 | [0, 0, 0, 1], 270 | [-1, 0, 0, 0], 271 | [0, 1, 0, 0], 272 | ]) 273 | true_B = array([ 274 | [0, 0], 275 | [0, 0], 276 | [1, 0], 277 | [0, 1] 278 | ]) 279 | true_dyn = LinearSystemDynamics(true_A, true_B) 280 | ts, h_k = linspace(0, T, K, retstep=True) 281 | x_0 = array([0, 0, .01, 0]) 282 | x_f = array([5, 5, 0, 0]) 283 | trajopt = TrajectoryOptimizer(K, h_k, true_dyn, 284 | TrajectoryOptimizer.COLLOCATION_MODE.CTN_ONE_PT) 285 | trajopt.add_static_quad_cost() 286 | trajopt.add_terminal_cost(Q_f=eye(4) * 1e5, offset=x_f) 287 | trajopt.add_input_constraints(ones(2) * -9, ones(2) * .95) 288 | [xt_mpc, ut] = trajopt.eval(x_0, max_cvx_iters=1) 289 | xs, us = true_dyn.simulate( 290 | x_0, 291 | PiecewiseConstantController(true_dyn, trajopt.h_k, ut), 292 | ts) 293 | train_x0s = random.uniform(-7.5, 7.5, (1000, 4)) 294 | train_u0s = random.uniform(-9, 1, (1000, 2)) 295 | X = concatenate([train_x0s, train_u0s], axis=1) 296 | Y = stack([true_dyn.step(x, u, t_0=0, t_f=h_k) 297 | for x,u in zip(train_x0s, train_u0s)], axis=0) 298 | X = th.from_numpy(X) 299 | Y = th.from_numpy(Y) 300 | 301 | gp_est = GaussianProcess(X, Y, RBFKernel(4+2)) 302 | gp_est.train_model(n_iters=300) 303 | gp_system = AffineGPSystem(gp_est, 4, 2, h_k) 304 | gp_trajopt = GPTrajectoryOptimizer(K, h_k, gp_system) 305 | gp_trajopt.add_static_quad_cost() 306 | gp_trajopt.add_terminal_cost(Q_f=eye(4) * 1e5, offset=x_f) 307 | gp_trajopt.add_input_constraints(ones(2) * -9, ones(2) * 1) 308 | [xt_gp, ut_gp] = gp_trajopt.eval(x_0, max_cvx_iters=1) 309 | xs_gp, us_gp = true_dyn.simulate(x_0, 310 | PiecewiseConstantController(true_dyn, h_k, ut_gp), 311 | ts) 312 | xt_gp_im, ut_gp_im = gp_system.simulate(x_0, 313 | PiecewiseConstantController( 314 | gp_system, h_k, ut_gp), 315 | ts) 316 | 317 | assert norm((xs_gp - xs),axis=1).mean() < 1e-1 318 | assert norm(ut_gp - ut, axis=1).mean() < 1e-1 319 | #plot deviation 320 | # plt.semilogy(ts, norm(xs - xt_mpc, axis=1), label="True Model") 321 | # plt.semilogy(ts, norm(xs_gp - xt_gp, axis=1), label="GP Model") 322 | # plt.title('Linear System Deviation') 323 | # plt.ylabel('$|| x_t - \hat{x}_t ||_2 $') 324 | # plt.xlabel('$t$') 325 | # plt.legend(loc='upper left') 326 | # plt.show() 327 | #plot trajectories for debugging 328 | # fig = plt.figure() 329 | # ax = fig.add_subplot() 330 | # plot_2D_q_sys(xs, ax=ax, color='black', box_side=10) 331 | # # plot_2D_q_sys(xt_gp_im, ax=ax, color='red', box_side=100) 332 | # plot_2D_q_sys(xs_gp, ax=ax, color='red', box_side=10) 333 | # plot_2D_q_sys(xt_gp, ax=ax, color='blue', box_side=10) 334 | # plt.show() 335 | 336 | def test_2D_lin_trajopt(): 337 | K = 200 338 | T = 10 339 | max_abs_delta_u = 10 340 | ts, h_k = linspace(0, T, K, retstep=True) 341 | true_A = array([[0, 1], 342 | [1, 0]]) 343 | true_B = array([[0], 344 | [1]]) 345 | 346 | x_0 = array([0, -0.0005]) 347 | x_f = array([10, 0]) 348 | 349 | true_dyn = LinearSystemDynamics(true_A, true_B) 350 | 351 | 352 | trajopt = TrajectoryOptimizer(K, h_k, true_dyn, 353 | TrajectoryOptimizer.COLLOCATION_MODE.CTN_ONE_PT) 354 | trajopt.add_static_quad_cost() 355 | trajopt.add_terminal_cost(Q_f=eye(2) * 1e5, offset=x_f) 356 | trajopt.add_input_constraints(ones(1) * -max_abs_delta_u, ones(1) * max_abs_delta_u) 357 | [xt_mpc, ut] = trajopt.eval(x_0, max_cvx_iters=1) 358 | xs, us = true_dyn.simulate(x_0, 359 | PiecewiseConstantController(true_dyn, 360 | trajopt.h_k, ut), 361 | ts) 362 | 363 | # X0s, V0s, U0s = meshgrid( 364 | # linspace(-20, 20, 10), 365 | # linspace(-10, 10, 10), 366 | # linspace(-max_abs_delta_u, max_abs_delta_u, 10)) 367 | # train_x0s = stack([X0s.ravel(), V0s.ravel()], axis=1) 368 | # train_u0s = U0s.ravel()[:,newaxis] 369 | train_x0s = (random.rand(1000, 2) - 0.5) * 20 370 | train_u0s = (random.rand(1000, 1) - 0.5) * 2 * max_abs_delta_u 371 | 372 | X = concatenate([train_x0s, train_u0s], axis=1) 373 | Y = stack([true_dyn.step(x, u, t_0=0, t_f=h_k) 374 | for x,u in zip(train_x0s, train_u0s)], axis=0) 375 | X = th.from_numpy(X) 376 | Y = th.from_numpy(Y) 377 | gp_est = GaussianProcess(X, Y, RBFKernel(2+1)) 378 | gp_est.train_model(n_iters=300) 379 | gp_system = AffineGPSystem(gp_est, 2, 1, h_k) 380 | gp_trajopt = GPTrajectoryOptimizer(K, h_k, gp_system) 381 | gp_trajopt.add_static_quad_cost() 382 | gp_trajopt.add_terminal_cost(Q_f=eye(2) * 1e5, offset=x_f) 383 | gp_trajopt.add_input_constraints(ones(1) * -max_abs_delta_u, 384 | ones(1) * max_abs_delta_u) 385 | [xt_gp, ut_gp] = gp_trajopt.eval(x_0, max_cvx_iters=1) 386 | xs_gp, us_gp = true_dyn.simulate(x_0, 387 | PiecewiseConstantController(true_dyn, h_k, ut_gp), 388 | ts) 389 | #gp trajectory prediction should be close to true trajectory 390 | assert norm(xs_gp - xt_gp, axis=1).mean() < 1e-1 391 | #gp commands should be close to commands from true system 392 | assert abs(ut_gp - ut).max() < 1e-1 393 | 394 | #plot deviation 395 | # plt.semilogy(ts, norm(xs - xt_mpc, axis=1), label="True Model") 396 | # plt.semilogy(ts, norm(xs_gp - xt_gp, axis=1), label="GP Model") 397 | # plt.title('Linear System Deviation') 398 | # plt.ylabel('$|| x_t - \hat{x}_t ||_2 $') 399 | # plt.xlabel('$t$') 400 | # plt.legend(loc='upper left') 401 | # plt.show() 402 | low_x = -20 403 | high_x = 20 404 | low_y = -10 405 | high_y = 10 406 | #debugging plots for GP phase approximation 407 | # fig, axs = plt.subplots(1,3) 408 | # expected, _ = plot_2D_dyn_sys(true_dyn, axs[0], n_sample=100, 409 | # low_x=low_x, high_x=high_x, 410 | # low_y=low_y, high_y=high_y) 411 | # axs[0].set_title('Expected Phase Plot') 412 | # actual, _ = plot_2D_dyn_sys(gp_system, axs[1], n_sample=100, 413 | # low_x=low_x, high_x=high_x, 414 | # low_y=low_y, high_y=high_y) 415 | # axs[1].set_title('Actual Phase Plot') 416 | # error = norm(actual - expected,2, axis=2) 417 | # sns.heatmap(error, ax=axs[2]) 418 | # axs[2].set_title('Error of Phase Plot') 419 | # plt.show() 420 | # debugging plots for trajectory comparisons 421 | # fig, axs = plt.subplots(2, 2) 422 | # axs[0,0].plot(xs[:, 0], xs[:, 1], color='black', label="Simulated Path") 423 | # axs[0,0].plot(xt_mpc[:,0], xt_mpc[:,1], color='red', label="MPC Path") 424 | # axs[0,0].legend(loc='upper left') 425 | # axs[0,0].set_xlim(left=low_x, right=high_x) 426 | # axs[0,0].set_ylim(bottom=low_y, top=high_y) 427 | # axs[0,0].set_xlabel("Position") 428 | # axs[0,0].set_ylabel("Velocity") 429 | # start_circ = plt.Circle(x_f, 0.3, color='g') 430 | # end_circ = plt.Circle(x_0, 0.3, color='b') 431 | # axs[0,0].add_patch(start_circ) 432 | # axs[0,0].add_patch(end_circ) 433 | # axs[0,0].set_title("State Space") 434 | # axs[0,1].plot(ts[:-1], us) 435 | # axs[0,1].set_title("Action Space") 436 | # axs[0,1].set_xlabel("Time") 437 | # axs[0,1].set_ylabel("u") 438 | # 439 | # axs[1,0].plot(xs_gp[:, 0], xs_gp[:, 1], color='black', label="Simulated Path") 440 | # axs[1,0].plot(xt_gp[:, 0], xt_gp[:,1], color='red', label="MPC Path") 441 | # axs[1,0].legend(loc='upper left') 442 | # axs[1,0].set_xlim(left=low_x, right=high_x) 443 | # axs[1,0].set_ylim(bottom=low_y, top=high_y) 444 | # axs[1,0].set_xlabel("Position") 445 | # axs[1,0].set_ylabel("Velocity") 446 | # start_circ = plt.Circle(x_f, 0.3, color='g') 447 | # end_circ = plt.Circle(x_0, 0.3, color='b') 448 | # axs[1,0].add_patch(start_circ) 449 | # axs[1,0].add_patch(end_circ) 450 | # axs[1,0].set_title("State Space") 451 | # axs[1,1].plot(ts[:-1], us_gp) 452 | # axs[1,1].set_title("Action Space") 453 | # axs[1,1].set_xlabel("Time") 454 | # axs[1,1].set_ylabel("u") 455 | # plt.show() -------------------------------------------------------------------------------- /tests/test_jacobian.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | import torch as th 4 | from core.systems import CartPole, InvertedPendulum 5 | from scipy.linalg import expm 6 | g = 9.18 7 | m = 0.25 8 | l = 0.5 9 | T = 5 10 | dt = 1e-2 11 | f = 1 / dt 12 | N = int(T * f) 13 | ts = np.linspace(0, T, N + 1) 14 | 15 | m_c = m * 0.5 16 | m_p = m * 0.25 17 | l_c = l * .5 18 | 19 | th.manual_seed(0) 20 | 21 | 22 | def analytic_cartpole_jacobian(x, u, t): 23 | # test numpy jacobian 24 | x, th, xd, thd = x 25 | 26 | div1 = (m_c + m_p - m_p * (np.cos(th) ** 2)) ** 2 27 | J21 = m_p * ( 28 | g * m_p - g * (2 * m_c + m_p) * np.cos(2 * th) - 29 | 2 * u * np.sin(2 * th) + l_c * np.cos(th) * ( 30 | 2 * m_c - m_p + m_p * np.cos(2 * th)) * (thd ** 2)) / \ 31 | (2 * div1) 32 | J23 = - (4 * l_c * m_p * np.sin(th) * thd) / ( 33 | -2 * m_c - m_p + m_p * np.cos(2 * th)) 34 | J24 = 1 / (m_c + m_p - m_p * np.cos(th) ** 2) 35 | J31 = (g * (m_c + m_p) * np.cos(th) * ( 36 | 2 * m_c - m_p + m_p * np.cos(2 * th)) + 37 | u * (2 * m_c + 3 * m_p + m_p * np.cos(2 * th)) * np.sin(th) + 38 | l_c * m_p * (m_p - (2 * m_c + m_p) * np.cos(2 * th)) * thd ** 2) / \ 39 | (2 * l_c * div1) 40 | J33 = -(m_p * np.sin(2 * th) * thd) / (m_c + m_p - m_p * np.cos(th) ** 2) 41 | J34 = - np.cos(th) / (l_c * (m_c + m_p - m_p * np.cos(th) ** 2)) 42 | return np.array([[0, 0, 1, 0, 0], 43 | [0, 0, 0, 1, 0], 44 | [0, J21, 0, J23, J24], 45 | [0, J31, 0, J33, J34]]) 46 | 47 | 48 | def analytic_pendulum_jacobian(x, u, t): 49 | th, thd = x 50 | return np.array([[0, 1, 0], 51 | [g * np.cos(th) / l, 0, 1 / (l ** 2 * m)]]) 52 | 53 | 54 | @pytest.mark.parametrize('system,analytic_jacobian', 55 | [ 56 | (CartPole(m_c=m_c, m_p=m_p, l=l_c, g=g), 57 | analytic_cartpole_jacobian), 58 | (InvertedPendulum(mass=m, l=l, g=g), 59 | analytic_pendulum_jacobian) 60 | ]) 61 | def test_system_jacboian(system, analytic_jacobian): 62 | n_tests = 3 63 | x_rands = th.rand((n_tests, system.n), dtype=th.float64) 64 | u_rands = th.rand((n_tests, system.m), dtype=th.float64) 65 | delta_t = 0.1234 66 | t = th.zeros((1)) 67 | th_jacobians = [] 68 | np_jacobians = [] 69 | for i in range(n_tests): 70 | x = x_rands[i, :] 71 | u = u_rands[i, :] 72 | th_jacobian = system.jacobian(x, u, t) 73 | np_jacobian = system.jacobian(x.numpy(), u.numpy(), t.numpy()) 74 | np_exp_jacobian = system.jacobian_exp(x.numpy(), u.numpy(), t.numpy(), delta_t) 75 | true_jacobian = analytic_jacobian(x.numpy(), u, t) 76 | true_A = true_jacobian[:, :system.n] 77 | true_B = true_jacobian[:, system.n:] 78 | 79 | true_exp_jacobian = np.concatenate([ 80 | expm(true_A * delta_t), 81 | np.linalg.pinv(true_A) @ ( 82 | expm(true_A * delta_t) - np.eye(system.n)) @ true_B 83 | ], axis=1) 84 | 85 | assert len(th_jacobian) == len(np_jacobian) == 2 86 | 87 | true_jacobian.shape 88 | np_exp_jacobian[0].shape 89 | np_exp_jacobian[1].shape 90 | np.concatenate(np_exp_jacobian, axis=1).shape 91 | np.testing.assert_array_almost_equal(true_exp_jacobian, 92 | np.concatenate(np_exp_jacobian,axis=1)) 93 | np.testing.assert_array_almost_equal(true_jacobian, 94 | np.concatenate(np_jacobian, 95 | axis=1)) 96 | th.testing.assert_allclose(th.cat(th_jacobian, dim=1), 97 | th.tensor(true_jacobian)) 98 | -------------------------------------------------------------------------------- /tests/test_learn_linear_system.py: -------------------------------------------------------------------------------- 1 | from numpy import concatenate, eye, linspace, pi, ones, random, \ 2 | sin, cos, newaxis, stack, meshgrid 3 | from numpy.core._multiarray_umath import array, zeros 4 | from scipy.linalg import expm, logm 5 | from numpy.linalg import norm, pinv 6 | from numpy.testing import assert_allclose 7 | from core.learning import GaussianProcess, RBFKernel, PeriodicKernel, \ 8 | AdditiveKernel, ScaledGaussianProcess, GPScaler, MultiplicativeKernel 9 | from core.systems import LinearSystemDynamics, InvertedPendulum, AffineGPSystem 10 | from core.controllers import ConstantController, PiecewiseConstantController 11 | from core.trajopt import TrajectoryOptimizer 12 | from torch.autograd.functional import jacobian 13 | import matplotlib.pyplot as plt 14 | from matplotlib.colors import LogNorm 15 | import seaborn as sns 16 | 17 | import gpytorch as gp 18 | import torch as th 19 | 20 | th.set_default_dtype(th.float64) 21 | random.seed(0) 22 | th.manual_seed(0) 23 | 24 | true_A = array([ 25 | [0, 0, 1, 0], 26 | [0, 0, 0, 1], 27 | [-1, 0, 0, 0], 28 | [0, 1, 0, 0], 29 | ]) 30 | true_B = array([ 31 | [0, 0], 32 | [0, 0], 33 | [1, 0], 34 | [0, 1] 35 | ]) 36 | 37 | linear_dyn = LinearSystemDynamics(true_A, true_B) 38 | 39 | T = 10 40 | K = 50 41 | 42 | 43 | def plot_2D_q_sys(xs, ax=None, color='black', box_side=60): 44 | if ax is None: 45 | fig = plt.figure(1) 46 | ax = fig.add_subplot() 47 | end_circ = plt.Circle((50, 50), 2, color='g') 48 | start_circ = plt.Circle((0, 0), 2, color='b') 49 | for x in xs: 50 | vnorm = norm(x[2:]) 51 | vnorm = vnorm if vnorm > 0 else 1 52 | ax.arrow(x[0], x[1], x[2] / vnorm, x[3] / vnorm, 53 | width=0.5, 54 | length_includes_head=True, 55 | color=color) 56 | ax.set_xlim(-box_side, box_side) 57 | ax.set_ylim(-box_side, box_side) 58 | ax.add_artist(end_circ) 59 | ax.add_artist(start_circ) 60 | return ax 61 | 62 | 63 | def generate_optimal_trajectory(dynamics): 64 | ts, h_k = linspace(0, T, K, retstep=True) 65 | x_0 = array([0, 0, 10, 0]) 66 | x_f = array([50, 50, 0, 0]) 67 | trajopt = TrajectoryOptimizer(K, h_k, dynamics, 68 | TrajectoryOptimizer.COLLOCATION_MODE.CTN_ONE_PT) 69 | trajopt.add_static_quad_cost() 70 | # trajopt.add_static_quad_cost(Q=eye(4) * 1e3, offset=x_f) 71 | # trajopt.add_static_quad_cost(Q=eye(4) * 1e4, offset=x_f) 72 | trajopt.add_terminal_cost(Q_f=eye(4) * 1e5, offset=x_f) 73 | trajopt.add_input_constraints(ones(2) * -50, ones(2) * 1) 74 | # can't do hard terminal constraint with very infeasible models 75 | [xt_mpc, ut] = trajopt.eval(x_0, max_cvx_iters=1) 76 | 77 | xut = linear_dyn.simulate( 78 | x_0, 79 | PiecewiseConstantController(dynamics, trajopt.h_k, ut), 80 | ts) 81 | fig, axs = plt.subplots(1,2) 82 | plot_2D_q_sys(xut[0], box_side=100, ax=axs[0]) 83 | plot_2D_q_sys(xt_mpc, color='red', box_side=100, ax= axs[1]) 84 | plt.show() 85 | return xut + (h_k,) 86 | 87 | 88 | def plot_2D_dyn_sys(dyn, ax=None, 89 | low_x=-5, high_x=5, 90 | low_y=-5, high_y=5, 91 | n_sample=100): 92 | if ax is None: 93 | fig = plt.figure() 94 | ax = fig.add_subplot() 95 | X, Y = meshgrid(linspace(low_x, high_x, n_sample), 96 | linspace(low_y, high_y, n_sample)) 97 | arrows = array([dyn(stack(xy), zeros((1,)), 0) for xy in 98 | zip(X.ravel(), Y.ravel())]) 99 | arrows = arrows.reshape(X.shape + (2,)) 100 | ax.streamplot(X, Y, arrows[:, :, 0], arrows[:, :, 1]) 101 | return arrows, ax 102 | 103 | 104 | def XY_from_traj(xt, ut): 105 | xt = th.from_numpy(xt) 106 | ut = th.from_numpy(ut) 107 | X = th.cat([xt[:-1], ut], dim=1).clone() 108 | Y = xt[1:].contiguous() 109 | return X, Y 110 | 111 | 112 | def test_rl_linear_system(): 113 | n = 4 114 | m = 2 115 | xut_opt = generate_optimal_trajectory(linear_dyn) 116 | A_init = true_A + random.rand(4, 4) * 0.05 117 | B_init = true_B + random.rand(4, 2) * 0.05 118 | 119 | max_iter = 5 120 | A = A_init 121 | B = B_init 122 | Aerrs = list() 123 | Berrs = list() 124 | path_errors = list() 125 | xt, ut, hk = generate_optimal_trajectory(LinearSystemDynamics(A, B)) 126 | gp_est = None 127 | for i in range(max_iter): 128 | X, Y = XY_from_traj(xt, ut) 129 | if gp_est is None: 130 | gp_est = GaussianProcess(X, Y, RBFKernel(n+m)) 131 | gp_est.train_model(100) 132 | else: 133 | gp_est.add_samples(X, Y) 134 | gp_est.train_model(10) 135 | 136 | xt, ut, hk = generate_optimal_trajectory( 137 | AffineGPSystem(gp_est, n=n, m=m, delta_t=hk)) 138 | # mu_prime, cov_prime = gp_est.ddx(X) 139 | 140 | 141 | 142 | def test_gp_lin_dyn_trajopt(): 143 | dyn = LinearSystemDynamics(true_A, true_B) 144 | n_samples = 100 145 | train_x0s = (random.rand(n_samples, 4) - 0.5) * 80 146 | train_u0s = (random.rand(n_samples, 2) - 0.5) * 50 147 | dt = 0.01 148 | train_xtp1s = [dyn.step(x_0=x0, u_0=u0, t_0=0, t_f=dt) for x0, u0 in zip(train_x0s, train_u0s)] 149 | train_xtp1s = stack(train_xtp1s, axis=0) 150 | train_X = concatenate((train_x0s, train_u0s), axis=1) 151 | train_X = th.from_numpy(train_X) 152 | train_Y = th.from_numpy(train_xtp1s) 153 | 154 | x_scaler = GPScaler(xmins=th.tensor([-40.] * 4 + [-25] * 2), 155 | xmaxs=th.tensor([40.] * 4 + [25]* 2)) 156 | y_scaler = GPScaler(xmins=th.tensor([-40.] * 4), 157 | xmaxs=th.tensor([40.] * 4)) 158 | gp_est = ScaledGaussianProcess(train_X, train_Y, RBFKernel(4+2), 159 | x_scaler, y_scaler) 160 | # gp_est = GaussianProcess(train_X, train_Y, RBFKernel(4+2)) 161 | gp_est.train_model(n_iters=160, lr=0.1) 162 | gp_dyn = AffineGPSystem(gp_est, n=4, m=2, delta_t=dt) 163 | xt_true, ut_true, hk_true = generate_optimal_trajectory(dyn) 164 | xt_gp, ut_gp, hk_gp = generate_optimal_trajectory(gp_dyn) 165 | error = xt_true - xt_gp 166 | assert norm(error, axis=-1).mean() < 1e-1 167 | 168 | def test_pendulum_periodic_kernel_phase(): 169 | m = 1 170 | l = 0.5 171 | 172 | dyn = InvertedPendulum(m, l) 173 | random.randn() 174 | train_x0s = (random.rand(100, 2) - 0.5) * array([2*pi, 10]) 175 | u_0 = array([0.0]) 176 | dt = 0.01 177 | train_xtp1s = [dyn.step(x_0=x0, u_0=u_0, t_0=0, t_f=dt) for x0 in train_x0s] 178 | train_xtp1s = stack(train_xtp1s, axis=0) 179 | train_X = concatenate((train_x0s, zeros((train_x0s.shape[0], 1))), axis=1) 180 | train_X = th.from_numpy(train_X) 181 | train_Y = th.from_numpy(train_xtp1s) 182 | train_Y = train_Y - train_X[:,:2] 183 | x_scaler = GPScaler(th.tensor([-pi, -5, -1]), th.tensor([pi, 5, 1])) 184 | y_scaler = GPScaler(th.tensor([-pi, -5]), th.tensor([pi, 5])) 185 | from numpy import sqrt 186 | # kernel = MultiplicativeKernel( 187 | # kernels=[PeriodicKernel(p_prior=2., 188 | # 189 | # learn_period=False), 190 | # RBFKernel(1, ard_num_dims=True)], 191 | # active_dims=[[0], [1]] 192 | # ) 193 | # X, Y = meshgrid(linspace(-4, 4, 1000), 194 | # linspace(-4, 4, 1000)) 195 | # errors = array([ 196 | # kernel(th.from_numpy(stack(xy))[None,:], th.zeros(1,2)).detach().numpy() 197 | # for 198 | # xy in 199 | # zip(X.ravel(), Y.ravel())]) 200 | # errors = errors.reshape(X.shape) 201 | # sns.heatmap(errors) 202 | # plt.show() 203 | kernel = MultiplicativeKernel( 204 | kernels=[PeriodicKernel(p_prior=2. * pi, 205 | learn_period=False), 206 | # RBFKernel(1), 207 | # PeriodicKernel(p_prior=2., learn_period=True), 208 | RBFKernel(2) 209 | ], 210 | active_dims=[[0], 211 | # [0], 212 | [1,2], 213 | # [1] 214 | ] 215 | ) 216 | gp_est = GaussianProcess(train_X, train_Y, kernel) 217 | gp_est.train_model(n_iters=55) 218 | # gp_est.train_model(n_iters=310, lr=0.5) 219 | 220 | 221 | # gp_est.train_model(n_iters=500, lr=0.1) 222 | gp_dyn = AffineGPSystem(gp_est, n=2, m=1, delta_t=dt, force_delta_mode=True) 223 | # plt.scatter(train_x0s[:,0], train_x0s[:,1], color='blue', marker='o') 224 | # plt.scatter(train_xtp1s[:, 0], train_xtp1s[:, 1], color='red', marker='o') 225 | # plt.title('Data Set') 226 | # plt.show() 227 | fig, axs = plt.subplots(1,3, figsize=(9,3), dpi=400) 228 | expected, _ = plot_2D_dyn_sys(dyn, axs[0], 229 | # low_x=-pi, high_x=pi, 230 | low_x=-4*pi, high_x=4*pi, 231 | n_sample=10) 232 | axs[0].set_title('Expected Phase Plot') 233 | actual, _ = plot_2D_dyn_sys(gp_dyn, axs[1], 234 | # low_x=-pi, high_x=pi, 235 | low_x=-4*pi, high_x=4*pi, 236 | n_sample=10) 237 | axs[1].set_title('Actual Phase Plot') 238 | error = norm(actual - expected,2, axis=2) 239 | assert error.mean() <= 1e-3 240 | #uncomment plotting for debugging 241 | # sns.heatmap(error, ax=axs[2]) 242 | # axs[2].set_title('Error of Phase Plot') 243 | # plt.show() 244 | 245 | def test_pendulum_learned_phase(): 246 | m = 1 247 | l = 0.5 248 | 249 | dyn = InvertedPendulum(m, l) 250 | random.randn() 251 | train_x0s = (random.rand(100, 2) - 0.5) * array([2*pi, 10]) 252 | u_0 = array([0.0]) 253 | dt = 0.01 254 | train_xtp1s = [dyn.step(x_0=x0, u_0=u_0, t_0=0, t_f=dt) for x0 in train_x0s] 255 | train_xtp1s = stack(train_xtp1s, axis=0) 256 | train_X = concatenate((train_x0s, zeros((train_x0s.shape[0], 1))), axis=1) 257 | train_X = th.from_numpy(train_X) 258 | train_Y = th.from_numpy(train_xtp1s) 259 | 260 | gp_est = GaussianProcess(train_X, train_Y, RBFKernel(2+1, ard_num_dims=True)) 261 | gp_est.train_model(n_iters=45) 262 | gp_dyn = AffineGPSystem(gp_est, n=2, m=1, delta_t=dt) 263 | # plt.scatter(train_x0s[:,0], train_x0s[:,1], color='blue', marker='o') 264 | # plt.scatter(train_xtp1s[:, 0], train_xtp1s[:, 1], color='red', marker='o') 265 | # plt.title('Data Set') 266 | # plt.show() 267 | fig, axs = plt.subplots(1,3, figsize=(9,3), dpi=200) 268 | expected, _ = plot_2D_dyn_sys(dyn, axs[0], 269 | low_x=-pi, high_x=pi, n_sample=100) 270 | axs[0].set_title('Expected Phase Plot') 271 | actual, _ = plot_2D_dyn_sys(gp_dyn, axs[1], 272 | low_x=-pi, high_x=pi, n_sample=100) 273 | axs[1].set_title('Actual Phase Plot') 274 | error = norm(actual - expected,2, axis=2) 275 | assert error.mean() <= 1e-1 276 | #uncomment plotting for debugging 277 | # sns.heatmap(error, ax=axs[2]) 278 | # axs[2].set_title('Error of Phase Plot') 279 | # plt.show() 280 | 281 | def test_pendulum_learned_phase_delta(): 282 | m = 1 283 | l = 0.5 284 | 285 | dyn = InvertedPendulum(m, l) 286 | random.randn() 287 | train_x0s = (random.rand(100, 2) - 0.5) * array([2*pi, 10]) 288 | u_0 = array([0.0]) 289 | dt = 0.01 290 | train_xtp1s = [dyn.step(x_0=x0, u_0=u_0, t_0=0, t_f=dt) for x0 in train_x0s] 291 | train_xtp1s = stack(train_xtp1s, axis=0) 292 | train_X = concatenate((train_x0s, zeros((train_x0s.shape[0], 1))), axis=1) 293 | train_X = th.from_numpy(train_X) 294 | train_Y = th.from_numpy(train_xtp1s) 295 | train_Y = train_Y - train_X[:,:2] 296 | 297 | gp_est = GaussianProcess(train_X, train_Y, 298 | # MultiplicativeKernel( 299 | # kernels=[RBFKernel(2, ard_num_dims=True), 300 | # PeriodicKernel(2. * pi, 301 | # learn_period=False)], 302 | # active_dims=[[1,2], [0]]) 303 | RBFKernel(2+1, ard_num_dims=True) 304 | ) 305 | gp_est.train_model(n_iters=55) 306 | gp_dyn = AffineGPSystem(gp_est, n=2, m=1, delta_t=dt, force_delta_mode=True) 307 | # plt.scatter(train_x0s[:,0], train_x0s[:,1], color='blue', marker='o') 308 | # plt.scatter(train_xtp1s[:, 0], train_xtp1s[:, 1], color='red', marker='o') 309 | # plt.title('Data Set') 310 | # plt.show() 311 | fig, axs = plt.subplots(1,3, figsize=(9,3), dpi=200) 312 | expected, _ = plot_2D_dyn_sys(dyn, axs[0], 313 | low_x=-pi, high_x=pi, n_sample=10) 314 | axs[0].set_title('Expected Phase Plot') 315 | actual, _ = plot_2D_dyn_sys(gp_dyn, axs[1], 316 | low_x=-pi, high_x=pi, n_sample=10) 317 | axs[1].set_title('Actual Phase Plot') 318 | error = norm(actual - expected,2, axis=2) 319 | assert error.mean() <= 1e-3 320 | #uncomment plotting for debugging 321 | # sns.heatmap(error, ax=axs[2], 322 | # # norm=LogNorm(error.min(), error.max()) 323 | # ) 324 | # axs[2].set_title('Error of Phase Plot') 325 | # plt.show() 326 | 327 | def test_pendulum_learned_phase_delta_kin_approx(): 328 | m = 1 329 | l = 0.5 330 | 331 | dyn = InvertedPendulum(m, l) 332 | random.randn() 333 | train_x0s = (random.rand(100, 2) - 0.5) * array([2*pi, 10]) 334 | u_0 = array([0.0]) 335 | dt = 0.01 336 | train_xtp1s = [dyn.step(x_0=x0, u_0=u_0, t_0=0, t_f=dt) for x0 in train_x0s] 337 | train_xtp1s = stack(train_xtp1s, axis=0) 338 | train_X = concatenate((train_x0s, zeros((train_x0s.shape[0], 1))), axis=1) 339 | train_X = th.from_numpy(train_X) 340 | train_Y = th.from_numpy(train_xtp1s) 341 | train_Y = train_Y - train_X[:,:2] 342 | 343 | gp_est = GaussianProcess(train_X, train_Y[:,1:], 344 | # MultiplicativeKernel( 345 | # kernels=[RBFKernel(2, ard_num_dims=True), 346 | # PeriodicKernel(2. * pi, 347 | # learn_period=False)], 348 | # active_dims=[[1,2], [0]]) 349 | RBFKernel(2+1, ard_num_dims=True) 350 | ) 351 | gp_est.train_model(n_iters=55) 352 | gp_dyn = AffineGPSystem(gp_est, n=2, m=1, delta_t=dt, 353 | force_delta_mode=True, 354 | ddim_to_dim={1: 0}, 355 | ddim_to_gp_idx={1: 0}) 356 | # plt.scatter(train_x0s[:,0], train_x0s[:,1], color='blue', marker='o') 357 | # plt.scatter(train_xtp1s[:, 0], train_xtp1s[:, 1], color='red', marker='o') 358 | # plt.title('Data Set') 359 | # plt.show() 360 | 361 | fig, axs = plt.subplots(1,3, figsize=(9,3), dpi=200) 362 | expected, _ = plot_2D_dyn_sys(dyn, axs[0], 363 | low_x=-pi, high_x=pi, n_sample=10) 364 | axs[0].set_title('Expected Phase Plot') 365 | actual, _ = plot_2D_dyn_sys(gp_dyn, axs[1], 366 | low_x=-pi, high_x=pi, n_sample=10) 367 | axs[1].set_title('Actual Phase Plot') 368 | error = norm(actual - expected,2, axis=2) 369 | assert error.mean() <= 1e-1 370 | #uncomment plotting for debugging 371 | sns.heatmap(error, ax=axs[2]) 372 | axs[2].set_title('Error of Phase Plot') 373 | plt.show() 374 | 375 | def test_2D_lin_learned_phase(): 376 | true_A = array([ 377 | [0, -3], 378 | [1, 0] 379 | ]) 380 | true_B = array([ 381 | [0], 382 | [1]]) 383 | u_0 = array([0.0]) 384 | 385 | dyn = LinearSystemDynamics(true_A, true_B) 386 | train_x0s = (random.rand(100, 2) - 0.5) * 10 387 | 388 | dt = 0.01 389 | train_xtp1s = [dyn.step(x_0=x0, u_0=u_0, t_0=0, t_f=dt) for x0 in train_x0s] 390 | train_xtp1s = stack(train_xtp1s, axis=0) 391 | train_X = concatenate((train_x0s, zeros((train_x0s.shape[0], 1))), axis=1) 392 | train_X = th.from_numpy(train_X) 393 | train_Y = th.from_numpy(train_xtp1s) 394 | 395 | gp_est = GaussianProcess(train_X, train_Y, RBFKernel(2+1)) 396 | gp_est.train_model(n_iters=40) 397 | gp_dyn = AffineGPSystem(gp_est, n=2, m=1, delta_t=dt) 398 | 399 | # plt.scatter(train_x0s[:,0], train_x0s[:,1], color='blue', marker='o') 400 | # plt.scatter(train_xtp1s[:, 0], train_xtp1s[:, 1], color='red', marker='o') 401 | # plt.title('Data Set') 402 | # plt.show() 403 | fig, axs = plt.subplots(1,3, figsize=(9,3), dpi=200) 404 | expected, _ = plot_2D_dyn_sys(dyn, axs[0], n_sample=10) 405 | axs[0].set_title('Expected Phase Plot') 406 | actual, _ = plot_2D_dyn_sys(gp_dyn, axs[1], n_sample=10) 407 | axs[1].set_title('Actual Phase Plot') 408 | error = norm(actual - expected,2, axis=2) 409 | assert error.mean() <= 1e-1 410 | #uncomment plotting for debugging 411 | # sns.heatmap(error, ax=axs[2]) 412 | # axs[2].set_title('Error of Phase Plot') 413 | # plt.show() 414 | 415 | def test_2D_lin_point_jacobian(): 416 | true_A = array([ 417 | [0, -3], 418 | [1, 0] 419 | ]) 420 | true_B = array([ 421 | [0], [1]]) 422 | 423 | dyn = LinearSystemDynamics(true_A, true_B) 424 | 425 | ts, hk = linspace(0, 10, 200, retstep=True) 426 | xs_train, us_train = dyn.simulate(array([0, 1]), 427 | ConstantController(dyn, zeros((1,))), 428 | ts=ts) 429 | xs_test, us_test = dyn.simulate(array([0.01, 0.99]), 430 | # array([cos(0.1), sin(0.1)]), 431 | ConstantController(dyn, zeros((1,))), 432 | ts=ts) 433 | 434 | train_x = th.from_numpy(concatenate([xs_train[:-1, :], us_train], axis=1)) 435 | train_y = th.from_numpy(xs_train[1:, :]) 436 | 437 | test_x = th.from_numpy(concatenate([xs_test[:-1, :], us_test], axis=1)) 438 | test_y = th.from_numpy(xs_test[1:, :]) 439 | 440 | gpdyn = GaussianProcess(train_x, train_y, RBFKernel(2+1)) 441 | gpdyn.train_model(n_iters=550, lr=0.03) 442 | mu, cov = gpdyn(test_x) 443 | mu_prime, cov_prime = gpdyn.ddx(test_x) 444 | mu = mu.detach().numpy() 445 | cov = cov.detach().numpy() 446 | mu_prime = mu_prime.detach().numpy() 447 | cov_prime = cov_prime.detach().numpy() 448 | expAts = mu_prime[:, :, :2] 449 | 450 | As = stack([logm(expAt).real * (1/hk) for expAt in expAts], axis=0) 451 | #checks if pointise jacobians are accurate 452 | assert_allclose(As, true_A[newaxis].repeat(As.shape[0], axis=0), atol=1e-5) -------------------------------------------------------------------------------- /tests/test_mpc_controller.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from numpy import array, concatenate, diag, eye, linspace, pi, ones, zeros, stack, zeros_like 3 | from numpy.linalg import norm 4 | from numpy import testing 5 | from core.systems import LinearSystemDynamics, InvertedPendulum, \ 6 | DoubleInvertedPendulum, Segway, SingleTrackBicycle 7 | from core.controllers import ConstantController, QPController, \ 8 | FBLinController, LQRController, PiecewiseConstantController, MPCController 9 | from core.trajopt import TrajectoryOptimizer 10 | from os import makedirs 11 | import pathlib 12 | import matplotlib.pyplot as plt 13 | import numpy as np 14 | 15 | root_save_dir = pathlib.Path(__file__).parent.absolute() / 'data' / 'test_gp_mpc' 16 | 17 | 18 | def add_trajectory_arrows(ax, scale, traj): 19 | for xt in traj: 20 | vnorm = norm(xt[2:]) * scale 21 | vnorm = vnorm if vnorm > 0 else 1 22 | ax.arrow(xt[0], xt[1], xt[2] / vnorm, xt[3] / vnorm, 23 | width=0.5 / scale, 24 | length_includes_head=False, 25 | color='r') 26 | 27 | def plot_2D_q_sys(xs, ax=None, traj=None, start=(0,0), end=(50,50), 28 | xlim=(-100, 100), ylim=(-100, 100), scale=1.): 29 | if ax is None: 30 | fig = plt.figure(1) 31 | ax = fig.add_subplot() 32 | end_circ = plt.Circle(end, 2/scale, color='g') 33 | start_circ = plt.Circle(start, 2/scale, color='b') 34 | if traj is not None: 35 | if isinstance(traj, list): 36 | for xt in traj: 37 | add_trajectory_arrows(ax, scale, xt) 38 | else: 39 | add_trajectory_arrows(ax, scale, traj) 40 | for x in xs: 41 | vnorm = norm(x[2:]) * scale 42 | vnorm = vnorm if vnorm > 0 else 1 43 | ax.arrow(x[0], x[1], x[2] / vnorm, x[3] / vnorm, 44 | 45 | width=0.5/scale, 46 | length_includes_head=False) 47 | ax.set_xlim(xlim[0], xlim[1]) 48 | ax.set_ylim(ylim[0], ylim[1]) 49 | ax.add_artist(end_circ) 50 | ax.add_artist(start_circ) 51 | return ax 52 | 53 | def plot_6D_config(xs, ax=None, traj=None, 54 | start=None, end=None, xlim=None, ylim=None, scale=1): 55 | 56 | return plot_2D_q_sys(xs, ax, traj, start, end, xlim, ylim, scale) 57 | 58 | def save_trajectory(us, xs, dir_name, suffix=''): 59 | save_dir = root_save_dir / dir_name 60 | makedirs(save_dir, exist_ok=True) 61 | np.save(str(save_dir / f'us{suffix}.npy'), us) 62 | np.save(str(save_dir / f'xs{suffix}.npy'), xs) 63 | 64 | def test_mpc_linear_system(): 65 | linear_dyn = LinearSystemDynamics( 66 | array([ 67 | [0, 0, 1, 0], 68 | [0, 0, 0, 1], 69 | [-1, 0, 0, 0], 70 | [0, 1, 0, 0], 71 | ]), array([ 72 | [0, 0], 73 | [0, 0], 74 | [1, 0], 75 | [0, 1] 76 | ])) 77 | x_0 = array([0, 0, 10, 0]) 78 | x_f = array([50, 50, 0, 0]) 79 | xs = list() 80 | ts, hk = linspace(0, 2.5, 25, retstep=True) 81 | trajopt = TrajectoryOptimizer(T=10, h_k=hk, 82 | dynamics=linear_dyn, 83 | solver="GUROBI", 84 | collocation_mode=TrajectoryOptimizer.COLLOCATION_MODE.CTN_ONE_PT) 85 | trajopt.add_terminal_cost(Q_f=eye(4) * 1e5, offset=x_f) 86 | trajopt.add_input_constraints(u_min=-70 * ones(2), u_max=50* ones(2)) 87 | # trajopt.add_static_quad_cost() 88 | trajopt.add_static_quad_cost(Q=array([[1e2, 0, 0, 0], 89 | [0, 1e2, 0, 0], 90 | [0, 0, 1e1, 0], 91 | [0, 0, 0, 1e1]]), offset=x_f) 92 | def mpc_step(x, t, xt_prev, ut_prev): 93 | xs.append(x) 94 | xt, ut = trajopt.eval(x, max_cvx_iters=1, ws_xt=xt_prev, ws_ut=ut_prev) 95 | # ax = plot_2D_q_sys(xs, traj=xt) 96 | # plt.show() 97 | return xt, ut 98 | ctrl = MPCController(linear_dyn, mpc_step) 99 | xs, us = linear_dyn.simulate(x_0, controller=ctrl, 100 | ts=ts) 101 | # ctrl = ConstantController(linear_dyn, array([0.0, 0.0])) 102 | # testing.assert_allclose(xt[-1], x_f, atol=1e-7, rtol=1e-7) 103 | # uncomment for debugging 104 | testing.assert_allclose(x_f, xs[-1], atol=1e-1, rtol=1) 105 | # save_trajectory(us, xs, 'linear') 106 | # ax = plot_2D_q_sys(xs) 107 | # plt.show() 108 | 109 | 110 | def test_mpc_pendulum(): 111 | m = 0.25 112 | l = 0.5 113 | system = InvertedPendulum(mass=m, l=l) 114 | T = 35 115 | x_0 = array([-pi, 0.0]) 116 | ts, hk = linspace(0, 3, T, retstep=True) 117 | trajopt = TrajectoryOptimizer(15, hk, system, 118 | solver="OSQP", 119 | # max_delta_x=array([1 * pi / 180, 0.1]), 120 | max_delta_u=array([1]), 121 | # soft_dyn_weight=1e-3, 122 | collocation_mode=TrajectoryOptimizer.COLLOCATION_MODE.TRAPEZOIDAL) 123 | 124 | trajopt.add_static_quad_cost(Q=array([[1e3, 0], [0, 1e2]])) 125 | trajopt.add_terminal_cost(array([[1e7, 0], [0, 1e6]])) 126 | trajopt.add_input_constraints(u_min=ones(1) * -.6, u_max=ones(1) * .6) 127 | xs = list() 128 | def mpc_step(x, t, xt_prev, ut_prev): 129 | xs.append(x) 130 | xt, ut = trajopt.eval(x, max_cvx_iters=10, 131 | ws_xt=xt_prev, ws_ut=ut_prev, 132 | converge_tol=1) 133 | # fig, ax = system.plot_states(ts, stack(xs,axis=0), color="black") 134 | # system.plot_states(ts, xt, color="red", fig=fig, ax=ax) 135 | # plt.show() 136 | return xt, ut 137 | 138 | ctrl = MPCController(system, mpc_step) 139 | xs, us = system.simulate(x_0=x_0, ts=ts, controller=ctrl) 140 | testing.assert_allclose(actual=xs[-1], desired=zeros_like(xs[-1]), 141 | atol=1e-1, rtol=1e-1) 142 | # save_trajectory(us, xs, 'pendulum') 143 | # system.plot_states(ts, xs) 144 | # plt.show() 145 | 146 | 147 | 148 | def double_pendulum_episode(x_0, x_f): 149 | dyn = DoubleInvertedPendulum(1.5, 1.5, 1, 1) 150 | ts, hk = linspace(0, 6, 100, retstep=True) 151 | trajopt = TrajectoryOptimizer(25, hk, dyn, 152 | max_delta_u=array([5, 5]), 153 | collocation_mode=TrajectoryOptimizer.COLLOCATION_MODE.CTN_ONE_PT, 154 | solver="GUROBI") 155 | trajopt.add_input_constraints(u_min=ones(2) * -8, u_max=ones(2) * 8) 156 | trajopt.add_static_quad_cost(Q=diag(array([1e3, 1e3, 1e1, 1e1]))) 157 | trajopt.add_terminal_cost(diag(array([1e5, 1e5, 1e4, 1e4]))) 158 | xs = list() 159 | xts = list() 160 | def mpc_step(x, t, xt_prev, ut_prev): 161 | xs.append(x) 162 | cvx_iter = 20 if xt_prev is None else 5 163 | xt, ut = trajopt.eval(x, max_cvx_iters=cvx_iter, 164 | ws_xt=xt_prev, ws_ut=ut_prev, 165 | converge_tol=1e-2) 166 | xts.append(xt[1:]) 167 | # plot_2D_q_sys(xs, traj=xt, 168 | # start=(x_0[0], x_0[1]), 169 | # end=(x_f[0], x_f[1]), 170 | # xlim=(-2*pi, 2*pi), ylim=(-2 * pi, 2 * pi),Angela 171 | # scale=15) 172 | # plt.show() 173 | return xt, ut 174 | ctrl = MPCController(dyn, mpc_step) 175 | xs, us = dyn.simulate(x_0, controller=ctrl, ts=ts) 176 | plot_2D_q_sys(xs, traj=xts, 177 | start=(x_0[0], x_0[1]), 178 | end=(x_f[0], x_f[1]), 179 | xlim=(-2*pi, 2*pi), ylim=(-2 * pi, 2 * pi), 180 | scale=15) 181 | plt.show() 182 | # testing.assert_allclose(actual=xs[-1], desired=x_f, atol=1e-4, rtol=1e-4) 183 | return us, xs 184 | 185 | @pytest.mark.skip(reason="Too Unstable Currently") 186 | def test_mpc_double_pendulum(): 187 | x_0 = array([-pi, 0, 0, 0]) 188 | x_f = array([0, 0, 0, 0]) 189 | from numpy import random 190 | noises = random.uniform(low=(-.5, -.5, -.2, -.2), 191 | high=(.5, .5, .2, .2), 192 | size=(1,4) 193 | # size=(10,4) 194 | ) 195 | x_0s = x_0 + noises * 0 196 | for i in range(x_0s.shape[0]): 197 | us, xs = double_pendulum_episode(x_0s[i], x_f) 198 | # plot_2D_q_sys(xs, start=(x_0s[i][0], x_0s[i][1]), 199 | # end=(x_f[0], x_f[1]), 200 | # xlim=(-2*pi, 2*pi), ylim=(-2 * pi, 2 * pi), 201 | # scale=15) 202 | # plt.show() 203 | # save_trajectory(us, xs, 'double_pendulum', suffix=i) 204 | 205 | 206 | def test_mpc_segway(): 207 | dyn = Segway() 208 | x_0 = array([-1, 0, 0, 0]) 209 | x_f = array([1, 0, 0, 0]) 210 | ts, hk = linspace(0, 6, 100, retstep=True) 211 | 212 | trajopt = TrajectoryOptimizer(30, hk, dyn, 213 | max_delta_u=array([5,5]), 214 | collocation_mode=TrajectoryOptimizer.COLLOCATION_MODE.CTN_ONE_PT, 215 | solver="GUROBI") 216 | trajopt.add_input_constraints(u_min=ones(1) * -15, u_max=ones(1) * 15) 217 | trajopt.add_static_quad_cost(Q=diag(array([1e3, 1e2, 1e1, 1e1])), 218 | offset=x_f) 219 | trajopt.add_terminal_cost(diag(array([1e5, 1e4, 1e3, 1e3])), offset=x_f) 220 | xs = list() 221 | def mpc_step(x, t, xt_prev, ut_prev): 222 | xs.append(x) 223 | cvx_iter = 20 if xt_prev is None else 5 224 | xt, ut = trajopt.eval(x, max_cvx_iters=cvx_iter, 225 | ws_xt=xt_prev, ws_ut=ut_prev, 226 | converge_tol=1e-2) 227 | # plot_2D_q_sys(xs, traj=xt, start=(-1, 0), end=(1, 0), 228 | # xlim=(-2, 2), ylim=(- pi, pi), 229 | # scale=20) 230 | # plt.show() 231 | return xt, ut 232 | ctrl = MPCController(dyn, mpc_step) 233 | xs, us = dyn.simulate(x_0, controller=ctrl, ts=ts) 234 | testing.assert_allclose(actual=xs[-1], desired=x_f, atol=1e-3, rtol=1e-3) 235 | save_trajectory(us, xs, 'segway') 236 | # plot_2D_q_sys(xs, start=(-1, 0), end=(1, 0), 237 | # xlim=(-2, 2), ylim=(- pi, pi), 238 | # scale=20) 239 | # plt.show() 240 | 241 | def test_single_track_bicycle(): 242 | dyn = SingleTrackBicycle() 243 | x_0 = array([-1, 0, 0, 0.1, 0, 0]) 244 | x_f = array([1, 1, 0, 0.2, 0, 0]) 245 | ts, hk = linspace(0, 6, 100, retstep=True) 246 | 247 | trajopt = TrajectoryOptimizer(30, hk, dyn, 248 | # max_delta_u=array([5,5]), 249 | collocation_mode=TrajectoryOptimizer.COLLOCATION_MODE.CTN_ONE_PT, 250 | solver="GUROBI") 251 | trajopt.add_input_constraints(u_min=ones(2) * -15, u_max=ones(2) * 15) 252 | trajopt.add_static_quad_cost( 253 | Q=diag(array([1e3, 1e3, 1e3, 1e1, 1e1, 1e1])), 254 | offset=x_f) 255 | trajopt.add_terminal_cost(diag(array([1e5, 1e5, 1e5, 1e2, 1e2, 1e2])), offset=x_f) 256 | xs = list() 257 | def mpc_step(x, t, xt_prev, ut_prev): 258 | xs.append(x) 259 | cvx_iter = 20 if xt_prev is None else 5 260 | xt, ut = trajopt.eval(x, max_cvx_iters=cvx_iter, 261 | ws_xt=xt_prev, ws_ut=ut_prev, 262 | converge_tol=1e-2) 263 | ax = plot_6D_config(xs, xt, start=(-1, 0), end=(1,1), 264 | xlim=(-2, 2), ylim=(- pi, pi), 265 | scale=20) 266 | plt.show() 267 | # plot_2D_q_sys(xs, traj=xt, start=(-1, 0), end=(1, 0), 268 | # xlim=(-2, 2), ylim=(- pi, pi), 269 | # scale=20) 270 | # plt.show() 271 | return xt, ut 272 | ctrl = MPCController(dyn, mpc_step) 273 | xs, us = dyn.simulate(x_0, controller=ctrl, ts=ts) 274 | testing.assert_allclose(actual=xs[-1], desired=x_f, atol=1e-3, rtol=1e-3) 275 | # save_trajectory(us, xs, 'single_track_bicycle') 276 | # plot_2D_q_sys(xs, start=(-1, 0), end=(1, 0), 277 | # xlim=(-2, 2), ylim=(- pi, pi), 278 | # scale=20) 279 | # plt.show() 280 | -------------------------------------------------------------------------------- /tests/test_systems.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from numpy import allclose, linspace, ones, load 3 | from core.systems import CartPole, InvertedPendulum, DoubleInvertedPendulum 4 | import core.controllers as ctrl 5 | from pathlib import Path 6 | 7 | # Physical and other constants 8 | m = 0.25 9 | l = 0.5 10 | T = 5 11 | dt = 1e-2 12 | f = 1 / dt 13 | N = int(T * f) 14 | ts = linspace(0, T, N + 1) 15 | 16 | 17 | def constant_controller(system): 18 | return ctrl.ConstantController(system, ones([system.m]) * 0.01) 19 | 20 | 21 | destination_dir = Path(__file__).parent.absolute() / 'data' / 'test_systems' 22 | 23 | 24 | # Make sure system dynamics have not changed 25 | # systems and initial conditions 26 | 27 | @pytest.mark.parametrize('system', 28 | [ 29 | InvertedPendulum(m=m, l=l), 30 | CartPole(m_c=m * 0.5, m_p=m * .25, l=l * 0.5), 31 | DoubleInvertedPendulum(m_1=m, m_2=m * 2, l_1=l, 32 | l_2=l * 2) 33 | ]) 34 | def test_with_constrant_controller(system): 35 | load_dir = destination_dir / system.__class__.__name__ 36 | ts = load(load_dir / 'ts.npy') 37 | true_xs = load(load_dir / 'xs.npy') 38 | x_0 = load(load_dir / 'x_0.npy') 39 | true_us = load(load_dir / 'us.npy') 40 | (actual_xs, actual_us) = system.simulate(x_0=x_0, 41 | controller=constant_controller( 42 | system), 43 | ts=ts) 44 | # TODO: Should tolerance be lower? The more chaotic systems need it. 45 | # I suspect torch.solve is the cause 46 | assert allclose(actual_xs, true_xs, atol=1e-4) 47 | assert allclose(actual_us, true_us, atol=1e-4) 48 | -------------------------------------------------------------------------------- /tests/test_traj_opt.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from numpy import array, concatenate, diag, eye, linspace, pi, ones, zeros 3 | from numpy.linalg import norm 4 | from numpy import testing 5 | from core.systems import LinearSystemDynamics, InvertedPendulum 6 | from core.controllers import ConstantController, QPController, \ 7 | FBLinController, LQRController, PiecewiseConstantController 8 | from core.trajopt import TrajectoryOptimizer 9 | import matplotlib.pyplot as plt 10 | 11 | linear_dyn = LinearSystemDynamics( 12 | array([ 13 | [0, 0, 1, 0], 14 | [0, 0, 0, 1], 15 | [-1, 0, 0, 0], 16 | [0, 1, 0, 0], 17 | ]), array([ 18 | [0, 0], 19 | [0, 0], 20 | [1, 0], 21 | [0, 1] 22 | ])) 23 | 24 | 25 | def plot_2D_q_sys(xs, ax=None): 26 | if ax is None: 27 | fig = plt.figure(1) 28 | ax = fig.add_subplot() 29 | end_circ = plt.Circle((50, 50), 2, color='g') 30 | start_circ = plt.Circle((0, 0), 2, color='b') 31 | for x in xs: 32 | vnorm = norm(x[2:]) 33 | vnorm = vnorm if vnorm > 0 else 1 34 | ax.arrow(x[0], x[1], x[2] / vnorm, x[3] / vnorm, 35 | width=0.5, 36 | length_includes_head=True) 37 | ax.set_xlim(-100, 100) 38 | ax.set_ylim(-100, 100) 39 | ax.add_artist(end_circ) 40 | ax.add_artist(start_circ) 41 | return ax 42 | 43 | 44 | def test_reach_goal(): 45 | ctrl = ConstantController(linear_dyn, array([0.0, 0.0])) 46 | xs, us = linear_dyn.simulate(x_0=[0, 0, 10, 0], 47 | controller=ctrl, 48 | ts=linspace(0, 100, 100)) 49 | x_0 = array([0, 0, 10, 0]) 50 | x_f = array([50, 50, 0, 0]) 51 | trajopt = TrajectoryOptimizer(100, 0.1, linear_dyn) 52 | trajopt.add_hard_terminal_constraint(x_f) 53 | trajopt.add_static_quad_cost() 54 | [xt, ut] = trajopt.eval(x_0) 55 | ut.min() 56 | ut.max() 57 | testing.assert_allclose(xt[-1], x_f, atol=1e-7, rtol=1e-7) 58 | # uncomment for debugging 59 | # ax = plot_2D_q_sys(xt) 60 | # plt.show() 61 | 62 | def test_continuous_approx(): 63 | ctrl = ConstantController(linear_dyn, array([0.0, 0.0])) 64 | ts, hk = linspace(0, 100, 100,retstep=True) 65 | xs, us = linear_dyn.simulate(x_0=[0, 0, 10, 0], 66 | controller=ctrl, 67 | ts=ts) 68 | x_0 = array([0, 0, 10, 0]) 69 | x_f = array([50, 50, 0, 0]) 70 | trajopt = TrajectoryOptimizer(100, hk, linear_dyn, TrajectoryOptimizer.COLLOCATION_MODE.CTN_ONE_PT) 71 | trajopt.add_hard_terminal_constraint(x_f) 72 | trajopt.add_static_quad_cost() 73 | [xt, ut] = trajopt.eval(x_0) 74 | xs, us = linear_dyn.simulate(x_0=[0, 0, 10, 0], 75 | controller=PiecewiseConstantController(linear_dyn, hk, ut), 76 | ts=ts) 77 | testing.assert_allclose(xs, xt, atol=1e-1) 78 | testing.assert_allclose(xt[-1], x_f, atol=1e-7, rtol=1e-7) 79 | 80 | 81 | def test_input_constraints(): 82 | x_0 = array([0, 0, 10, 0]) 83 | x_f = array([50, 50, 0, 0]) 84 | trajopt = TrajectoryOptimizer(100, 0.1, linear_dyn) 85 | trajopt.add_static_quad_cost() 86 | trajopt.add_terminal_cost(Q_f=eye(4) * 1e5, offset=x_f) 87 | trajopt.add_input_constraints(u_min=ones(2) * -50, u_max=ones(2) * 1) 88 | [xt, ut] = trajopt.eval(x_0) 89 | 90 | if ut.min() < -50: 91 | testing.assert_allclose(ut.min(), -50, rtol=1e-4) 92 | if ut.max() <= 1: 93 | testing.assert_allclose(ut.max(), -50, rtol=1e-4) 94 | 95 | testing.assert_allclose(xt[-1], x_f, atol=1, rtol=1e-2) 96 | 97 | #uncomment for debugging 98 | #ax = plot_2D_q_sys(xt) 99 | #plt.show() 100 | 101 | @pytest.mark.skip( 102 | reason="can be used for debugging but takes too long otherwise") 103 | def test_nl_pendulum_with_constraints(): 104 | from core.systems.inverted_pendulum import InvertedPendulum 105 | m = 0.25 106 | l = 0.5 107 | system = InvertedPendulum(m=m, l=l) 108 | T = 100 109 | ts, hk = linspace(0, 4, T, retstep=True) 110 | x_0 = array([-pi, 0.0]) 111 | # [xs, us] = system.simulate(x_0=x_0, controller=qp, ts=ts) 112 | def sqp_step(ws_ut): 113 | trajopt = TrajectoryOptimizer(T, hk, system, 114 | # max_delta_x=array([1 * pi / 180, 0.1]), 115 | max_delta_u=array([1]), 116 | # soft_dyn_weight=1e-3, 117 | collocation_mode=TrajectoryOptimizer.COLLOCATION_MODE.TRAPEZOIDAL) 118 | 119 | trajopt.add_static_quad_cost(Q=array([[1e3, 0],[0, 1e2]])) 120 | trajopt.add_terminal_cost(array([[1e7, 0],[0, 1e6]])) 121 | trajopt.add_input_constraints(u_min=ones(1)*-.3, u_max=ones(1)*.3) 122 | return trajopt.eval(x_0, max_cvx_iters=10, ws_ut=ws_ut) 123 | 124 | ut = 0.3 * zeros((T, 1)) 125 | for i in range(0, T, 20): 126 | ut[i:i + 10] = -ut[i:i + 10] 127 | for i in range(50): 128 | [xt, ut] = sqp_step(ut) 129 | # system.plot(xt, ut[1:], ts) 130 | # plt.show() 131 | [xs, us] = system.simulate(x_0, PiecewiseConstantController(system, hk,ut), ts) 132 | 133 | # system.plot(xs, us, ts) 134 | 135 | def test_double_inv_pendulum_trapezoiodal_vs_linearization(): 136 | from core.systems.double_inverted_pendulum import DoubleInvertedPendulum 137 | system = DoubleInvertedPendulum(m_1=0.25, m_2=0.25, 138 | l_1=0.5, l_2=0.5) 139 | 140 | T = 100 141 | ts, hk = linspace(0, 4, T, retstep=True) 142 | x_0 = array([-pi, 0.0, 0.0, 0.0]) 143 | 144 | def sqp_step(mode): 145 | trajopt = TrajectoryOptimizer(T, hk, system, 146 | # max_delta_x=array([1 * pi / 180, 0.1]), 147 | max_delta_u=array([0.5]), 148 | # soft_dyn_weight=1e-3, 149 | solver='GUROBI', 150 | collocation_mode=mode) 151 | 152 | # trajopt.build(x_0) 153 | # trajopt.add_static_quad_cost() 154 | trajopt.add_static_quad_cost(Q=diag(array([1e3, 1e3, 1e2, 1e2]))) 155 | trajopt.add_terminal_cost(diag(array([1e7, 1e7, 1e6, 1e6]))) 156 | trajopt.add_input_constraints(u_min=ones(2) * -.3, u_max=ones(2) * .3) 157 | return trajopt.eval(x_0, max_cvx_iters=10) 158 | 159 | [xt_trap, ut_trap] = sqp_step(TrajectoryOptimizer.COLLOCATION_MODE.TRAPEZOIDAL) 160 | [xs_trap, us_trap] = system.simulate(x_0, PiecewiseConstantController(system, hk, ut_trap), ts) 161 | [xt_lin, ut_lin] = sqp_step(TrajectoryOptimizer.COLLOCATION_MODE.CTN_ONE_PT) 162 | [xs_lin, us_lin] = system.simulate(x_0,PiecewiseConstantController(system,hk,ut_lin),ts) 163 | 164 | trap_err = xs_trap - xt_trap 165 | lin_err = xs_lin - xt_lin 166 | assert abs(trap_err).mean() > abs(lin_err).mean() 167 | # plt.semilogy(norm(trap_err,axis=1), 'r', label='Trapezoidal Collocation Error') 168 | # plt.semilogy(norm(lin_err, axis=1), 'b', label='Linearization Error') 169 | # plt.title('Double Pendulum Open Loop Error') 170 | # plt.legend(loc='upper left') 171 | # plt.show() 172 | 173 | def test_nl_constrained_pendulum_trapezoidal_vs_linearization(): 174 | from core.systems.inverted_pendulum import InvertedPendulum 175 | m = 0.25 176 | l = 0.5 177 | system = InvertedPendulum(m=m, l=l) 178 | T = 100 179 | ts, hk = linspace(0, 4, T, retstep=True) 180 | x_0 = array([-pi, 0.0]) 181 | 182 | def sqp_step(mode): 183 | trajopt = TrajectoryOptimizer(T, hk, system, 184 | # max_delta_x=array([1 * pi / 180, 0.1]), 185 | max_delta_u=array([1]), 186 | # soft_dyn_weight=1e-3, 187 | collocation_mode=mode) 188 | 189 | # trajopt.build(x_0) 190 | ws_ut = zeros((T,1)) 191 | # trajopt.add_static_quad_cost() 192 | trajopt.add_static_quad_cost(Q=array([[1e3, 0], [0, 1e2]])) 193 | trajopt.add_terminal_cost(array([[1e7, 0], [0, 1e6]])) 194 | trajopt.add_input_constraints(u_min=ones(1) * -.3, u_max=ones(1) * .3) 195 | return trajopt.eval(x_0, max_cvx_iters=10, ws_ut=ws_ut) 196 | 197 | [xt_trap, ut_trap] = sqp_step(TrajectoryOptimizer.COLLOCATION_MODE.TRAPEZOIDAL) 198 | [xs_trap, us_trap] = system.simulate(x_0, PiecewiseConstantController(system, hk, ut_trap), ts) 199 | [xt_lin, ut_lin] = sqp_step(TrajectoryOptimizer.COLLOCATION_MODE.CTN_ONE_PT) 200 | [xs_lin, us_lin] = system.simulate(x_0,PiecewiseConstantController(system,hk,ut_lin),ts) 201 | 202 | trap_err = xs_trap - xt_trap 203 | lin_err = xs_lin - xt_lin 204 | assert abs(trap_err).mean() > abs(lin_err).mean() 205 | #uncomment plotting for debugging 206 | 207 | # plt.semilogy(norm(trap_err,axis=1), 'r', label='Trapezoidal Collocation Error') 208 | # plt.semilogy(norm(lin_err, axis=1), 'b', label='Linearization Error') 209 | # plt.title('Pendulum Open Loop Error') 210 | # plt.legend(loc='upper left') 211 | # plt.show() 212 | 213 | 214 | 215 | def test_linear_integration(): 216 | from core.systems.inverted_pendulum import InvertedPendulum 217 | system = linear_dyn 218 | T = 100 219 | ts, hk = linspace(0, 10, T, retstep=True) 220 | x_0 = array([0, 0, 10, 0]) 221 | x_f = array([50, 50, 0, 0]) 222 | def sqp_step(mode): 223 | trajopt = TrajectoryOptimizer(T, hk, system, 224 | collocation_mode=mode) 225 | trajopt.add_static_quad_cost() 226 | trajopt.add_terminal_cost(Q_f=eye(4) * 1e5, offset=x_f) 227 | trajopt.add_input_constraints(u_min=ones(2) * -50, u_max=ones(2) * 1) 228 | return trajopt.eval(x_0=x_0, max_cvx_iters=1) 229 | 230 | [xt_lin, ut_lin] = sqp_step(TrajectoryOptimizer.COLLOCATION_MODE.CTN_ONE_PT) 231 | [xs_lin, us_lin] = system.simulate(x_0, PiecewiseConstantController(system,hk,ut_lin), ts) 232 | 233 | lin_err = xs_lin[:-1] - xt_lin[:-1] 234 | assert abs(lin_err.mean()) < 1e-4 235 | #uncomment plotting for debugging 236 | # fig, axs = plt.subplots(1,3) 237 | # 238 | # plot_2D_q_sys(xt_lin[:-2], axs[0]) 239 | # plot_2D_q_sys(xs_lin[:-2], axs[1]) 240 | # axs[2].semilogy(norm(lin_err, axis=1), 'b', label='Linearization Error') 241 | # axs[2].legend(loc='upper left') 242 | # plt.show() 243 | --------------------------------------------------------------------------------