├── __init__.py ├── src ├── modules │ ├── __init__.py │ ├── warm_starter.py │ └── preconditioner.py ├── utils │ ├── __init__.py │ ├── osqp_utils.py │ ├── np_batch_op.py │ ├── rlgame_utils.py │ ├── visualization.py │ ├── torch_utils.py │ ├── sets.py │ └── geometry.py ├── networks │ ├── __init__.py │ └── a2c_qp_unrolled.py └── envs │ ├── mpc_baseline_parameters.py │ └── env_creators.py ├── .gitmodules ├── requirements.txt ├── experiments ├── cartpole │ ├── test_mpc.sh │ ├── test_mlp.sh │ ├── test_qp.sh │ ├── benchmark_stat.py │ ├── benchmark.sh │ ├── run.sh │ ├── reproduce_table.py │ ├── reproduce.sh │ └── visualize_trajectories.py ├── tank │ ├── test_skip_steady.sh │ ├── test_qp.sh │ ├── test_mpc.sh │ ├── test_imitation.sh │ ├── test_residual_loss.sh │ ├── test_reward_shaping.sh │ ├── test_force_feasible.sh │ ├── benchmark.sh │ ├── test_skip_steady.py │ ├── run.sh │ ├── plot_histogram.py │ ├── benchmark_stat.py │ ├── reproduce_table.py │ ├── reproduce.sh │ ├── reproduce_table_disturbed.py │ ├── reproduce_disturbed.sh │ ├── visualize_trajectories.py │ └── visualize_feasible_sets.py └── double_integrator │ ├── run.sh │ ├── dump_parameters.py │ ├── verify_stability.jl │ └── visualize.py ├── setup.py ├── auxiliary ├── profile_bmv.py ├── test_solver_np.py ├── test_solver.py ├── profile.py ├── test_solver_parallel.py └── train_warmstarter.py ├── LICENSE ├── runner_config.yaml ├── README.md ├── .gitignore └── run.py /__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/modules/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/networks/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "rl_games"] 2 | path = rl_games 3 | url = https://github.com/yiwenlu66/rl_games.git 4 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | rl_games==1.6.0 2 | torch==2.1.0 3 | icecream==2.1.3 4 | qpsolvers==3.4.0 5 | pandas==2.1.0 6 | scipy==1.11.1 7 | matplotlib==3.6.3 8 | cvxpy==1.4.1 9 | do_mpc==4.6.4 10 | tqdm==4.66.1 11 | -------------------------------------------------------------------------------- /experiments/cartpole/test_mpc.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | N=16 4 | noise=0 5 | 6 | python ../../run.py test cartpole --num-parallel 10000 --horizon 20 --epochs 2000 --mini-epochs 1 --qp-unrolled --shared-PH --affine-qb --mpc-baseline-N ${N} --noise-level ${noise} --batch-test --use-osqp-for-mpc --mpc-terminal-cost-coef 10 --exp-name qp_test_reward_shaping_10_0.1_0_8_48 --run-name mpc_10t --max-steps-per-episode 100 7 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | def read_requirements(filename): 4 | with open(filename) as f: 5 | return [line.strip() for line in f if line.strip() and not line.startswith('#')] 6 | 7 | setup( 8 | name="learning-qp", 9 | version="0.1.0", 10 | packages=find_packages("src"), 11 | package_dir={"": "src"}, 12 | install_requires=read_requirements('requirements.txt'), 13 | ) 14 | -------------------------------------------------------------------------------- /experiments/tank/test_skip_steady.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Store the command line argument in a global variable 4 | TRAIN_OR_TEST="$1" 5 | 6 | n_qp=8 7 | m_qp=32 8 | noise_level=0 9 | python ../../run.py $TRAIN_OR_TEST tank \ 10 | --num-parallel 100000 \ 11 | --horizon 20 \ 12 | --epochs 2000 \ 13 | --mini-epochs 1 \ 14 | --qp-unrolled \ 15 | --shared-PH \ 16 | --affine-qb \ 17 | --noise-level ${noise_level} \ 18 | --n-qp ${n_qp} \ 19 | --m-qp ${m_qp} \ 20 | --use-residual-loss \ 21 | --no-obs-normalization \ 22 | --skip-to-steady-state \ 23 | --lr-schedule linear \ 24 | --exp-name test_skip_steady -------------------------------------------------------------------------------- /experiments/cartpole/test_mlp.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | run_task() { 4 | local gpu_id=$1 5 | local c1=$2 6 | local c2=$3 7 | local c3=$4 8 | local n_qp=$5 9 | local m_qp=$6 10 | CUDA_VISIBLE_DEVICES=$gpu_id python ../../run.py test cartpole --num-parallel 10000 --horizon 20 --epochs 5000 --mini-epochs 1 --noise-level 0. --reward-shaping ${c1},${c2},${c3} --n-qp $n_qp --m-qp $m_qp --shared-PH --affine-qb --use-residual-loss --no-obs-normalization --force-feasible --batch-test --exp-name mlp_test_reward_shaping_${c1}_${c2}_${c3}_${n_qp}_${m_qp} --lr-schedule linear --initial-lr "5e-4" --max-steps-per-episode 100 11 | } 12 | 13 | run_task 1 10 0.1 0 8 48 & 14 | 15 | wait 16 | -------------------------------------------------------------------------------- /experiments/tank/test_qp.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | run_task() { 4 | local gpu_id=$1 5 | local c1=$2 6 | local c2=$3 7 | local c3=$4 8 | local n_qp=$5 9 | local m_qp=$6 10 | CUDA_VISIBLE_DEVICES=$gpu_id python ../../run.py train tank --num-parallel 10000 --horizon 20 --epochs 5000 --mini-epochs 1 --noise-level 0. --reward-shaping ${c1},${c2},${c3} --n-qp $n_qp --m-qp $m_qp --shared-PH --affine-qb --strict-affine-layer --obs-has-half-ref --use-residual-loss --no-obs-normalization --force-feasible --batch-test --exp-name mlp_test_reward_shaping_${c1}_${c2}_${c3}_${n_qp}_${m_qp} --lr-schedule linear --initial-lr "5e-4" & 11 | } 12 | 13 | run_task 0 50 0.05 2 4 24 14 | 15 | wait 16 | -------------------------------------------------------------------------------- /auxiliary/profile_bmv.py: -------------------------------------------------------------------------------- 1 | """Test performance of Ab, where A is a single matrix, and b is a batch of vectors.""" 2 | 3 | import torch 4 | import time 5 | 6 | def bmv1(A, b): 7 | return (A.unsqueeze(0) @ b.unsqueeze(-1)).squeeze(-1) 8 | 9 | def bmv2(A, b): 10 | return (A @ b.t()).t() 11 | 12 | batch_size = 100000 13 | n = 100 14 | device = "cuda:0" 15 | 16 | def benchmark(f): 17 | for i in range(1000): 18 | A = torch.randn((n, n), device=device, requires_grad=True) 19 | b = torch.randn((batch_size, n), device=device, requires_grad=True) 20 | loss = f(A, b).sum() 21 | loss.backward() 22 | 23 | t = time.time(); benchmark(bmv1); print(time.time() - t) 24 | t = time.time(); benchmark(bmv2); print(time.time() - t) 25 | -------------------------------------------------------------------------------- /src/utils/osqp_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import qpsolvers 3 | 4 | def osqp_solve_qp_guarantee_return( 5 | P, q, G=None, h=None, A=None, b=None, lb=None, ub=None, initvals=None, verbose=False, **kwargs, 6 | ): 7 | problem = qpsolvers.problem.Problem(P, q, G, h, A, b, lb, ub) 8 | solution = qpsolvers.solvers.osqp_.osqp_solve_problem(problem, initvals, verbose, **kwargs) 9 | sol_returned = solution.x if solution.x.dtype == np.float64 else np.zeros(q.shape[0]) 10 | iter_count = solution.extras["info"].iter 11 | return sol_returned, iter_count 12 | 13 | def osqp_oracle(q, b, P, H, return_iter_count=False, max_iter=1000): 14 | sol, iter_count = osqp_solve_qp_guarantee_return( 15 | P=P, q=q, G=-H, h=b, 16 | A=None, b=None, lb=None, ub=None, 17 | max_iter=max_iter, eps_abs=1e-10, eps_rel=1e-10,eps_prim_inf=1e-10, eps_dual_inf=1e-10, verbose=False, 18 | ) 19 | if not return_iter_count: 20 | return sol 21 | else: 22 | return sol, iter_count 23 | -------------------------------------------------------------------------------- /experiments/tank/test_mpc.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | N=10 4 | noise=0 5 | 6 | # python ../../run.py test tank --num-parallel 100 --horizon 20 --epochs 2000 --mini-epochs 1 --qp-unrolled --shared-PH --affine-qb --mpc-baseline-N ${N} --noise-level ${noise} --batch-test --use-osqp-for-mpc --mpc-terminal-cost-coef 10 --quiet --exp-name qp_unrolled_shared_affine --randomize --noise-level 0.1 --run-name mpc_10t_perturbed --max-steps-per-episode 10 7 | # python ../../run.py test tank --num-parallel 100 --horizon 20 --epochs 2000 --mini-epochs 1 --qp-unrolled --shared-PH --affine-qb --mpc-baseline-N ${N} --noise-level ${noise} --batch-test --mpc-terminal-cost-coef 10 --quiet --exp-name qp_unrolled_shared_affine --run-name mpc_scenario --robust-mpc-method scenario --randomize --noise-level 0.1 --max-steps-per-episode 10 8 | python ../../run.py test tank --num-parallel 100 --horizon 20 --epochs 2000 --mini-epochs 1 --qp-unrolled --shared-PH --affine-qb --mpc-baseline-N ${N} --noise-level ${noise} --batch-test --mpc-terminal-cost-coef 10 --quiet --exp-name qp_unrolled_shared_affine --run-name mpc_tube --robust-mpc-method tube --randomize --noise-level 0.1 --max-steps-per-episode 100 9 | -------------------------------------------------------------------------------- /experiments/cartpole/test_qp.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | run_task() { 4 | local gpu_id=$1 5 | local c1=$2 6 | local c2=$3 7 | local c3=$4 8 | local n_qp=$5 9 | local m_qp=$6 10 | CUDA_VISIBLE_DEVICES=$gpu_id python ../../run.py train cartpole --num-parallel 10000 --horizon 20 --epochs 500 --mini-epochs 1 --noise-level 0. --reward-shaping ${c1},${c2},${c3} --qp-unrolled --n-qp $n_qp --m-qp $m_qp --shared-PH --symmetric --affine-qb --use-residual-loss --no-obs-normalization --force-feasible --batch-test --exp-name qp_test_reward_shaping_${c1}_${c2}_${c3}_${n_qp}_${m_qp} --lr-schedule adaptive --max-steps-per-episode 100 11 | CUDA_VISIBLE_DEVICES=$gpu_id python ../../run.py test cartpole --num-parallel 10000 --horizon 20 --epochs 5000 --mini-epochs 1 --noise-level 0. --reward-shaping ${c1},${c2},${c3} --qp-unrolled --n-qp $n_qp --m-qp $m_qp --shared-PH --affine-qb --symmetric --use-residual-loss --no-obs-normalization --force-feasible --batch-test --exp-name qp_test_reward_shaping_${c1}_${c2}_${c3}_${n_qp}_${m_qp} --lr-schedule linear --initial-lr "5e-4" --max-steps-per-episode 100 12 | } 13 | 14 | run_task 1 10 0.1 0 8 48 & 15 | 16 | wait 17 | -------------------------------------------------------------------------------- /experiments/tank/test_imitation.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Store the command line argument in a global variable 4 | TRAIN_OR_TEST="$1" 5 | 6 | run_imitate() { 7 | export CUDA_VISIBLE_DEVICES=0 8 | n_qp=8 9 | m_qp=32 10 | noise_level=0 11 | python ../../run.py $TRAIN_OR_TEST tank \ 12 | --num-parallel 100000 \ 13 | --horizon 20 \ 14 | --epochs 200 \ 15 | --mini-epochs 1 \ 16 | --qp-unrolled \ 17 | --shared-PH \ 18 | --affine-qb \ 19 | --noise-level ${noise_level} \ 20 | --n-qp ${n_qp} \ 21 | --m-qp ${m_qp} \ 22 | --imitate-mpc-N 10 \ 23 | --exp-name imitate 24 | } 25 | 26 | run_fine_tune() { 27 | export CUDA_VISIBLE_DEVICES=0 28 | n_qp=8 29 | m_qp=32 30 | noise_level=0 31 | python ../../run.py $TRAIN_OR_TEST tank \ 32 | --num-parallel 100000 \ 33 | --horizon 20 \ 34 | --epochs 400 \ 35 | --mini-epochs 1 \ 36 | --qp-unrolled \ 37 | --shared-PH \ 38 | --affine-qb \ 39 | --noise-level ${noise_level} \ 40 | --n-qp ${n_qp} \ 41 | --m-qp ${m_qp} \ 42 | --initialize-from-experiment imitate \ 43 | --no-obs-normalization \ 44 | --exp-name fine_tune 45 | } 46 | 47 | run_imitate 48 | run_fine_tune 49 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Yiwen Lu, Zishuo Li, Yihan Zhou, Na Li, and Yilin Mo 4 | 5 | Copyright (c) 2019 Denys88 6 | 7 | Permission is hereby granted, free of charge, to any person obtaining a copy 8 | of this software and associated documentation files (the "Software"), to deal 9 | in the Software without restriction, including without limitation the rights 10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | copies of the Software, and to permit persons to whom the Software is 12 | furnished to do so, subject to the following conditions: 13 | 14 | The above copyright notice and this permission notice shall be included in all 15 | copies or substantial portions of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | SOFTWARE. 24 | -------------------------------------------------------------------------------- /experiments/tank/test_residual_loss.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Store the command line argument in a global variable 4 | TRAIN_OR_TEST="$1" 5 | 6 | group_one() { 7 | export CUDA_VISIBLE_DEVICES=0 8 | n_qp=2 9 | m_qp=64 10 | noise_level=0 11 | python ../../run.py $TRAIN_OR_TEST tank \ 12 | --num-parallel 100000 \ 13 | --horizon 20 \ 14 | --epochs 2000 \ 15 | --mini-epochs 1 \ 16 | --qp-unrolled \ 17 | --shared-PH \ 18 | --affine-qb \ 19 | --noise-level ${noise_level} \ 20 | --n-qp ${n_qp} \ 21 | --m-qp ${m_qp} \ 22 | --exp-name residual_loss_off 23 | } 24 | 25 | group_two() { 26 | export CUDA_VISIBLE_DEVICES=1 27 | n_qp=2 28 | m_qp=64 29 | noise_level=0 30 | python ../../run.py $TRAIN_OR_TEST tank \ 31 | --num-parallel 100000 \ 32 | --horizon 20 \ 33 | --epochs 2000 \ 34 | --mini-epochs 1 \ 35 | --qp-unrolled \ 36 | --shared-PH \ 37 | --affine-qb \ 38 | --noise-level ${noise_level} \ 39 | --n-qp ${n_qp} \ 40 | --m-qp ${m_qp} \ 41 | --use-residual-loss \ 42 | --exp-name residual_loss_on 43 | } 44 | 45 | # Start both groups in parallel 46 | group_one & group_two & 47 | 48 | # Wait for both background tasks to complete 49 | wait 50 | 51 | -------------------------------------------------------------------------------- /auxiliary/test_solver_np.py: -------------------------------------------------------------------------------- 1 | # %% 2 | import numpy as np 3 | import sys 4 | import os 5 | file_path = os.path.dirname(__file__) 6 | sys.path.append(os.path.join(file_path, "..")) 7 | from utils.osqp_utils import osqp_oracle 8 | 9 | problem = np.load("example_problem.npy", allow_pickle=True).item() 10 | q = problem["q"] 11 | b = problem["b"] 12 | P = problem["P"] 13 | H = problem["H"] 14 | 15 | def obj(x): 16 | return 0.5 * np.einsum('i,ij,j->', x, P, x) + np.dot(q, x) 17 | 18 | x_star = osqp_oracle(q, b, P, H) 19 | 20 | # %% 21 | m, n = H.shape 22 | D = np.eye(m) 23 | Dt = np.linalg.inv(D + H @ np.linalg.solve(P, H.T)) 24 | mu = Dt @ (H @ np.linalg.solve(P, q) - b) 25 | A = np.block([ 26 | [Dt @ D, Dt], 27 | [-2 * Dt @ D + np.eye(m), np.eye(m) - 2 * Dt], 28 | ]) 29 | B = np.hstack([ 30 | mu, 31 | -2 * mu 32 | ]) 33 | def iter(X): 34 | X = A @ X + B 35 | X[m:] = np.clip(X[m:], 0, np.inf) 36 | return X 37 | 38 | # %% 39 | def power_func(f, n): 40 | def helper(x): 41 | for _ in range(n): 42 | x = f(x) 43 | return x 44 | return helper 45 | 46 | def get_sol(z): 47 | PinvHt = np.linalg.solve(P, H.T) 48 | M = np.linalg.solve((H @ PinvHt).T, PinvHt.T).T 49 | return -(np.eye(n) - M @ H) @ np.linalg.solve(P, q) + M @ (z - b) 50 | 51 | X = power_func(iter, 10000)(np.zeros(2 * m)) 52 | z = X[m:] 53 | x = get_sol(z) 54 | obj(x) 55 | 56 | # %% 57 | -------------------------------------------------------------------------------- /src/modules/warm_starter.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from torch.nn import functional as F 4 | from torch.linalg import solve, inv, pinv 5 | import numpy as np 6 | from ..utils.torch_utils import vectorize_upper_triangular 7 | 8 | class WarmStarter(nn.Module): 9 | def __init__(self, device, n, m, fixed_P=True, fixed_H=True): 10 | super().__init__() 11 | self.device = device 12 | self.n = n 13 | self.m = m 14 | self.fixed_P = fixed_P 15 | self.fixed_H = fixed_H 16 | num_in = n + m 17 | if not fixed_P: 18 | num_in += n * (n + 1) // 2 19 | if not fixed_H: 20 | num_in += n * m 21 | num_out = 2 * m 22 | num_hidden = max(num_in, num_out) 23 | self.net = nn.Sequential( 24 | nn.Linear(num_in, num_hidden), 25 | nn.ReLU(), 26 | nn.Linear(num_hidden, num_hidden), 27 | nn.ReLU(), 28 | nn.Linear(num_hidden, num_out), 29 | ).to(device=device) 30 | 31 | def forward(self, q, b, P=None, H=None): 32 | """The P argument can be either P or inv(P) in the original PDHG formulation, as long as consistent.""" 33 | net_input = [q, b] 34 | if not self.fixed_P: 35 | net_input.append(vectorize_upper_triangular(P)) 36 | if not self.fixed_H: 37 | net_input.append(H.flatten(start_dim=-2)) 38 | net_input_t = torch.cat(net_input, 1) 39 | X = self.net(net_input_t) 40 | return X 41 | -------------------------------------------------------------------------------- /auxiliary/test_solver.py: -------------------------------------------------------------------------------- 1 | # %% 2 | import os 3 | import sys 4 | file_path = os.path.dirname(__file__) 5 | sys.path.append(os.path.join(file_path, "..")) 6 | from modules.qp_solver import QPSolver 7 | from modules.warm_starter import WarmStarter 8 | from utils.torch_utils import bqf, bmv, bvv 9 | from utils.mpc_utils import generate_random_problem 10 | import torch 11 | from torch.nn import functional as F 12 | import numpy as np 13 | 14 | n = 10 15 | m = 5 16 | 17 | device = "cuda:0" 18 | torch.manual_seed(42) 19 | q0, b0, P0, H0 = generate_random_problem(1, n, m, device) 20 | q0_np = q0.squeeze(0).cpu().numpy() 21 | b0_np = b0.squeeze(0).cpu().numpy() 22 | P0_np = P0.squeeze(0).cpu().numpy() 23 | H0_np = H0.squeeze(0).cpu().numpy() 24 | np.save("example_problem.npy", { 25 | "q": q0_np, 26 | "b": b0_np, 27 | "P": P0_np, 28 | "H": H0_np, 29 | }) 30 | 31 | solver = QPSolver(device, n, m, P=P0_np, H=H0_np) 32 | ws = WarmStarter(device, n, m, fixed_P=True, fixed_H=True) 33 | ws.load_state_dict(torch.load(f"models/warmstarter-{n}-{m}.pth")) 34 | solver_ws = QPSolver(device, n, m, P=P0_np, H=H0_np, warm_starter=ws) 35 | 36 | iters = 1000 37 | X, sol = solver(q0, b0, iters=iters) 38 | X_ws, sol_ws = solver_ws(q0, b0, iters=iters) 39 | 40 | 41 | # %% 42 | from matplotlib import pyplot as plt 43 | obj = [(0.5 * bqf(sol[:, i, :], P0) + bvv(sol[:, i, :], q0)).item() for i in range(sol.shape[1])] 44 | obj_ws = [(0.5 * bqf(sol_ws[:, i, :], P0) + bvv(sol[:, i, :], q0)).item() for i in range(sol_ws.shape[1])] 45 | plt.plot(obj) 46 | plt.plot(obj_ws) 47 | 48 | # %% 49 | X_diff = [(X[:, i, :] - X[:, -1, :]).norm().item() for i in range(X.shape[1])] 50 | X_diff_ws = [(X_ws[:, i, :] - X[:, -1, :]).norm().item() for i in range(X.shape[1])] 51 | plt.plot(X_diff) 52 | plt.plot(X_diff_ws) 53 | 54 | # %% 55 | -------------------------------------------------------------------------------- /runner_config.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | seed: 0 3 | 4 | algo: 5 | name: a2c_continuous 6 | 7 | model: 8 | name: continuous_a2c_logstd 9 | 10 | network: 11 | name: actor_critic 12 | separate: False 13 | 14 | space: 15 | continuous: 16 | mu_activation: None 17 | sigma_activation: None 18 | mu_init: 19 | name: default 20 | sigma_init: 21 | name: const_initializer 22 | val: 0. # std = 1. 23 | fixed_sigma: True 24 | 25 | mlp: 26 | units: [256, 128, 64] 27 | activation: elu 28 | d2rl: False 29 | 30 | initializer: 31 | name: default 32 | regularizer: 33 | name: None 34 | 35 | rnn: 36 | units: 64 37 | layers: 1 38 | name: gru 39 | before_mlp: True 40 | 41 | load_checkpoint: True # flag which sets whether to load the checkpoint 42 | load_path: "checkpoints" # path to the checkpoint to load 43 | 44 | config: 45 | name: default 46 | full_experiment_name: default 47 | env_name: rlgpu 48 | ppo: True 49 | mixed_precision: False 50 | normalize_input: True 51 | normalize_value: True 52 | value_bootstrap: True 53 | num_actors: 100000 54 | reward_shaper: 55 | scale_value: 1.0 56 | normalize_advantage: True 57 | gamma: 0.99 58 | tau: 0.95 59 | e_clip: 0.2 60 | entropy_coef: 0.0 61 | learning_rate: 3.e-4 # overwritten by adaptive lr_schedule 62 | lr_schedule: adaptive 63 | kl_threshold: 0.008 # target kl for adaptive lr 64 | truncate_grads: True 65 | grad_norm: 1. 66 | horizon_length: 200 67 | minibatch_size: 100000 68 | mini_epochs: 5 69 | critic_coef: 2 70 | clip_value: True 71 | seq_len: 10 # only for rnn 72 | bounds_loss_coef: 0.001 73 | 74 | max_epochs: 1000 75 | save_best_after: 20 76 | score_to_win: 20000 77 | save_frequency: 10 78 | print_stats: True 79 | 80 | player: 81 | render: False 82 | render_sleep: 0. 83 | dump_stats: True 84 | games_num: 1 85 | -------------------------------------------------------------------------------- /experiments/tank/test_reward_shaping.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Define the number of GPUs available 4 | NUM_GPUS=6 # Change this to the number of GPUs you have 5 | 6 | # Function to find the first idle GPU 7 | find_idle_gpu() { 8 | for (( i=0; i<$NUM_GPUS; i++ )); do 9 | # Check if there are no processes on GPU i 10 | if [ -z "$(nvidia-smi -i $i --query-compute-apps=pid --format=csv,noheader)" ]; then 11 | echo $i 12 | return 13 | fi 14 | done 15 | echo "-1" # Return -1 if no idle GPU is found 16 | } 17 | 18 | # Function to run the Python script on a specific GPU 19 | run_task() { 20 | local gpu_id=$1 21 | local c1=$2 22 | local c2=$3 23 | local c3=$4 24 | local n_qp=$5 25 | local m_qp=$6 26 | CUDA_VISIBLE_DEVICES=$gpu_id python ../../run.py train tank --num-parallel 100000 --horizon 20 --epochs 5000 --mini-epochs 1 --noise-level 0. --reward-shaping ${c1},${c2},${c3} --qp-unrolled --n-qp $n_qp --m-qp $m_qp --shared-PH --affine-qb --use-residual-loss --no-obs-normalization --force-feasible --exp-name qp_test_reward_shaping_${c1}_${c2}_${c3}_${n_qp}_${m_qp} --lr-schedule linear --initial-lr "5e-4" & 27 | } 28 | 29 | # # Main loop for grid search 30 | # for c1 in 50; do 31 | # for c2 in 0.05; do 32 | # for c3 in 2; do 33 | # gpu_id=-1 34 | # # Wait for an idle GPU to become available 35 | # while [ $gpu_id -eq -1 ]; do 36 | # gpu_id=$(find_idle_gpu) 37 | # sleep 1 # Wait a bit before checking again 38 | # done 39 | 40 | # run_task $gpu_id $c1 $c2 $c3 4 24 41 | # # Optional: wait briefly to allow the task to start 42 | # sleep 10 43 | 44 | # gpu_id=-1 45 | # # Wait for an idle GPU to become available 46 | # while [ $gpu_id -eq -1 ]; do 47 | # gpu_id=$(find_idle_gpu) 48 | # sleep 1 # Wait a bit before checking again 49 | # done 50 | # run_task $gpu_id $c1 $c2 $c3 8 48 51 | # # Optional: wait briefly to allow the task to start 52 | # sleep 10 53 | # done 54 | # done 55 | # done 56 | 57 | run_task 1 50 0.05 2 4 24 58 | 59 | # Wait for all background jobs to finish 60 | wait 61 | -------------------------------------------------------------------------------- /experiments/tank/test_force_feasible.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Store the command line argument in a global variable 4 | TRAIN_OR_TEST="$1" 5 | 6 | group_one() { 7 | export CUDA_VISIBLE_DEVICES=0 8 | # n_qp=8 9 | # m_qp=32 10 | # noise_level=0 11 | # python ../../run.py $TRAIN_OR_TEST tank \ 12 | # --num-parallel 100000 \ 13 | # --horizon 20 \ 14 | # --epochs 2000 \ 15 | # --mini-epochs 1 \ 16 | # --qp-unrolled \ 17 | # --shared-PH \ 18 | # --affine-qb \ 19 | # --noise-level ${noise_level} \ 20 | # --n-qp ${n_qp} \ 21 | # --m-qp ${m_qp} \ 22 | # --use-residual-loss \ 23 | # --no-obs-normalization \ 24 | # --exp-name force_feasible_off 25 | n_qp=2 26 | m_qp=64 27 | noise_level=0 28 | python ../../run.py $TRAIN_OR_TEST tank \ 29 | --num-parallel 100000 \ 30 | --horizon 20 \ 31 | --epochs 2000 \ 32 | --mini-epochs 1 \ 33 | --qp-unrolled \ 34 | --shared-PH \ 35 | --affine-qb \ 36 | --noise-level ${noise_level} \ 37 | --n-qp ${n_qp} \ 38 | --m-qp ${m_qp} \ 39 | --use-residual-loss \ 40 | --no-obs-normalization \ 41 | --exp-name force_feasible_off 42 | } 43 | 44 | group_two() { 45 | export CUDA_VISIBLE_DEVICES=1 46 | # n_qp=8 47 | # m_qp=32 48 | # noise_level=0 49 | # python ../../run.py $TRAIN_OR_TEST tank \ 50 | # --num-parallel 100000 \ 51 | # --horizon 20 \ 52 | # --epochs 2000 \ 53 | # --mini-epochs 1 \ 54 | # --qp-unrolled \ 55 | # --shared-PH \ 56 | # --affine-qb \ 57 | # --noise-level ${noise_level} \ 58 | # --n-qp ${n_qp} \ 59 | # --m-qp ${m_qp} \ 60 | # --use-residual-loss \ 61 | # --force-feasible \ 62 | # --no-obs-normalization \ 63 | # --exp-name force_feasible_on 64 | n_qp=2 65 | m_qp=64 66 | noise_level=0 67 | python ../../run.py $TRAIN_OR_TEST tank \ 68 | --num-parallel 100000 \ 69 | --horizon 20 \ 70 | --epochs 2000 \ 71 | --mini-epochs 1 \ 72 | --qp-unrolled \ 73 | --shared-PH \ 74 | --affine-qb \ 75 | --noise-level ${noise_level} \ 76 | --n-qp ${n_qp} \ 77 | --m-qp ${m_qp} \ 78 | --use-residual-loss \ 79 | --force-feasible \ 80 | --no-obs-normalization \ 81 | --exp-name force_feasible_on 82 | } 83 | 84 | # Start both groups in parallel 85 | group_one & group_two & 86 | 87 | # Wait for both background tasks to complete 88 | wait 89 | -------------------------------------------------------------------------------- /experiments/double_integrator/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Run the double integrator experiment 4 | TRAIN_OR_TEST="$1" 5 | 6 | n_qp=3 7 | m_qp=9 8 | noise_level=0 9 | 10 | g1() { 11 | export CUDA_VISIBLE_DEVICES=0 12 | python ../../run.py $TRAIN_OR_TEST double_integrator \ 13 | --num-parallel 100000 \ 14 | --horizon 20 \ 15 | --epochs 500 \ 16 | --max-steps-per-episode 100 \ 17 | --mini-epochs 1 \ 18 | --qp-unrolled \ 19 | --shared-PH \ 20 | --affine-qb \ 21 | --noise-level ${noise_level} \ 22 | --n-qp ${n_qp} \ 23 | --m-qp ${m_qp} \ 24 | --no-obs-normalization \ 25 | --use-residual-loss \ 26 | --force-feasible \ 27 | --exp-name default 28 | } 29 | 30 | g2() { 31 | export CUDA_VISIBLE_DEVICES=1 32 | # python ../../run.py $TRAIN_OR_TEST double_integrator \ 33 | # --num-parallel 100000 \ 34 | # --horizon 20 \ 35 | # --epochs 500 \ 36 | # --max-steps-per-episode 100 \ 37 | # --mini-epochs 1 \ 38 | # --qp-unrolled \ 39 | # --shared-PH \ 40 | # --affine-qb \ 41 | # --noise-level ${noise_level} \ 42 | # --n-qp ${n_qp} \ 43 | # --m-qp ${m_qp} \ 44 | # --no-obs-normalization \ 45 | # --use-residual-loss \ 46 | # --force-feasible \ 47 | # --symmetric \ 48 | # --exp-name symmetric 49 | python ../../run.py $TRAIN_OR_TEST double_integrator \ 50 | --num-parallel 100000 \ 51 | --horizon 20 \ 52 | --epochs 500 \ 53 | --max-steps-per-episode 100 \ 54 | --mini-epochs 1 \ 55 | --qp-unrolled \ 56 | --shared-PH \ 57 | --affine-qb \ 58 | --noise-level ${noise_level} \ 59 | --n-qp ${n_qp} \ 60 | --m-qp ${m_qp} \ 61 | --no-obs-normalization \ 62 | --use-residual-loss \ 63 | --force-feasible \ 64 | --symmetric \ 65 | --no-b \ 66 | --exp-name symmetric_no_b 67 | } 68 | 69 | g3() { 70 | export CUDA_VISIBLE_DEVICES=1 71 | python ../../run.py $TRAIN_OR_TEST double_integrator \ 72 | --num-parallel 100000 \ 73 | --horizon 20 \ 74 | --epochs 500 \ 75 | --max-steps-per-episode 100 \ 76 | --mini-epochs 1 \ 77 | --noise-level ${noise_level} \ 78 | --exp-name mlp 79 | } 80 | 81 | # g1 & g2 & g3 & 82 | g2 & 83 | 84 | wait 85 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MPC-Inspired Reinforcement Learning for Verifiable Model-Free Control 2 | 3 | Code for the paper: [MPC-Inspired Reinforcement Learning for Verifiable Model-Free Control](https://arxiv.org/pdf/2312.05332) 4 | 5 | ## Installation 6 | 7 | The code is tested on Linux with Python 3.10, PyTorch 2.1.0, and CUDA 12.0. It is recommended to use a conda environment for installation: 8 | 9 | ```bash 10 | # Create and activate conda environment 11 | conda create -n learning-qp python=3.10 12 | conda activate learning-qp 13 | 14 | # Clone repository 15 | git clone --recursive https://github.com/yiwenlu66/learning-qp.git 16 | cd learning-qp 17 | 18 | # Install dependencies 19 | pip install -e . 20 | ``` 21 | 22 | Note: the `--recursive` option is necessary to make the code work correctly. 23 | 24 | ## Usage 25 | 26 | ``` 27 | python run.py train_or_test env_name [--options] 28 | ``` 29 | 30 | The following scripts are also provided to reproduce the results in the paper: 31 | 32 | - `experiments/tank/reproduce.sh` for reproducing the first part of Table 1 33 | - `experiments/cartpole/reproduce.sh` for reproducing the second part of Table 1 34 | - `experiments/tank/reproduce_disturbed.sh` for reproducing Table 2 35 | 36 | **These scripts are run on GPU by default.** After running each reproducing script, the following data will be saved: 37 | 38 | - Training logs in tensorboard format will be saved in `runs` 39 | - Test results, including the trial output for each experiment and a summary table, all in CSV format, will be saved in `test_results` 40 | 41 | ## Code structure 42 | 43 | - `rl_games`: A customized version of the [rl_games](https://github.com/Denys88/rl_games) library for RL training 44 | - `src/envs`: GPU parallelized simulation environments, with interface similar to [Isaac Gym](https://github.com/NVIDIA-Omniverse/IsaacGymEnvs) 45 | - `src/modules`: PyTorch modules, including the proposed QP-based policy and the underlying differentiable QP solver 46 | - `src/networks`: Wrapper around the QP-based policy for interfacing with `rl_games` 47 | - `src/utils`: Utility functions (customized PyTorch operations, MPC baselines, etc.) 48 | - `experiments`: Sample scripts for running experiments 49 | 50 | ## License 51 | 52 | The project is released under the MIT license. See [LICENSE](LICENSE) for details. 53 | 54 | Part of the project is modified from [rl_games](https://github.com/Denys88/rl_games). 55 | 56 | ## Citation 57 | 58 | If you find this project useful in your research, please consider citing: 59 | 60 | ``` 61 | @InProceedings{lu2024mpc, 62 | title={MPC-Inspired Reinforcement Learning for Verifiable Model-Free Control}, 63 | author={Lu, Yiwen and Li, Zishuo and Zhou, Yihan and Li, Na and Mo, Yilin}, 64 | booktitle={Proceedings of the 6th Conference on Learning for Dynamics and Control}, 65 | year={2024} 66 | } 67 | ``` 68 | -------------------------------------------------------------------------------- /auxiliary/profile.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | file_path = os.path.dirname(__file__) 4 | sys.path.append(os.path.join(file_path, "..")) 5 | import torch 6 | import torch.nn as nn 7 | import csv 8 | import time 9 | from modules.qp_unrolled_network import QPUnrolledNetwork 10 | 11 | batch_size = 10000 12 | input_size = 100 13 | device = "cuda:0" 14 | 15 | def mlp_builder(input_size, output_size): 16 | return nn.Sequential( 17 | nn.Linear(input_size, 256), 18 | nn.ReLU(), 19 | nn.Linear(256, 128), 20 | nn.ReLU(), 21 | nn.Linear(128, 64), 22 | nn.ReLU(), 23 | nn.Linear(64, output_size), 24 | ).to(device) 25 | 26 | model_shared = QPUnrolledNetwork( 27 | device=device, 28 | input_size=input_size, 29 | n_qp=10, 30 | m_qp=5, 31 | qp_iter=10, 32 | mlp_builder=mlp_builder, 33 | shared_PH=True, 34 | ) 35 | 36 | model_not_shared = QPUnrolledNetwork( 37 | device=device, 38 | input_size=input_size, 39 | n_qp=10, 40 | m_qp=5, 41 | qp_iter=10, 42 | mlp_builder=mlp_builder, 43 | shared_PH=False, 44 | ) 45 | 46 | def write_csv(prof, filename): 47 | 48 | # Extract key averages 49 | averages = prof.key_averages() 50 | 51 | # Export to CSV 52 | with open(filename, 'w', newline='') as csvfile: 53 | fieldnames = [ 54 | 'Name', 'Self CPU total', 'CPU total', 'CPU time avg', 55 | 'Self CUDA total', 'CUDA total', 'CUDA time avg', 'Number of Calls' 56 | ] 57 | writer = csv.DictWriter(csvfile, fieldnames=fieldnames) 58 | 59 | writer.writeheader() 60 | for avg in averages: 61 | writer.writerow({ 62 | 'Name': avg.key, 63 | 'Self CPU total': avg.self_cpu_time_total, 64 | 'CPU total': avg.cpu_time_total, 65 | 'Self CUDA total': avg.self_cuda_time_total, 66 | 'CUDA total': avg.cuda_time_total, 67 | 'Number of Calls': avg.count 68 | }) 69 | 70 | 71 | def profile(model, tag): 72 | 73 | outputs = [] 74 | 75 | t = time.time() 76 | with torch.autograd.profiler.profile(use_cuda=True) as forward_prof: 77 | for i in range(10): 78 | x = torch.randn((batch_size, input_size), device=device) 79 | outputs.append(model(x)) 80 | print(f"Forward Pass Profiling {tag}:", time.time() - t) 81 | write_csv(forward_prof, f"forward_prof_{tag}.csv") 82 | 83 | t = time.time() 84 | with torch.autograd.profiler.profile(use_cuda=True) as backward_prof: 85 | loss = sum(outputs).mean() 86 | loss.backward() 87 | print(f"Backward Pass Profiling {tag}:", time.time() - t) 88 | write_csv(backward_prof, f"backward_prof_{tag}.csv") 89 | 90 | profile(model_shared, "shared") 91 | profile(model_not_shared, "not_shared") 92 | -------------------------------------------------------------------------------- /src/envs/mpc_baseline_parameters.py: -------------------------------------------------------------------------------- 1 | from .env_creators import sys_param 2 | import numpy as np 3 | import torch 4 | 5 | def get_mpc_baseline_parameters(env_name, N, noise_std=0.): 6 | mpc_parameters = { 7 | "n_mpc": sys_param[env_name]["n"], 8 | "m_mpc": sys_param[env_name]["m"], 9 | "N": N, 10 | **sys_param[env_name], 11 | } 12 | if env_name == "tank": 13 | # Compute state and ref from obs: the first n entries of obs is state, and the latter n entries are ref 14 | mpc_parameters["obs_to_state_and_ref"] = lambda obs: (obs[:, :mpc_parameters["n_mpc"]], obs[:, mpc_parameters["n_mpc"]:]) 15 | A_nom = sys_param[env_name]["A"] 16 | A_max = np.copy(A_nom) 17 | A_max[tuple(zip(*[(0, 0), (0, 2), (1, 1), (1, 3), (2, 2), (3, 3)]))] += 0.002 18 | B_nom = sys_param[env_name]["B"] 19 | B_max = np.copy(B_nom) 20 | B_max *= 1.02 21 | mpc_parameters["A_scenarios"] = [A_nom, A_max] 22 | mpc_parameters["B_scenarios"] = [B_nom, B_max] 23 | n_mpc = mpc_parameters["n_mpc"] 24 | mpc_parameters["w_scenarios"] = [ 25 | np.zeros((n_mpc, 1)), 26 | 3 * noise_std * np.ones((n_mpc, 1)), 27 | -3 * noise_std * np.ones((n_mpc, 1)), 28 | ] 29 | # mpc_parameters["max_disturbance_per_dim"] = 0.5 * (3 * noise_std + 20 * 0.002 * 2 + 8 * 0.02 * 2) 30 | if env_name == "cartpole": 31 | # Compute A, B matrices for linearized system 32 | m_pole = mpc_parameters["m_pole_nom"] 33 | m_cart = mpc_parameters["m_cart_nom"] 34 | l = mpc_parameters["l_nom"] 35 | g = 9.8 36 | 37 | # Continuous time A, B matrices 38 | A_ct = np.array([ 39 | [0, 1, 0, 0], 40 | [0, 0, -g * m_pole / m_cart, 0], 41 | [0, 0, 0, 1], 42 | [0, 0, (m_cart + m_pole) * g / (l * m_cart) , 0], 43 | ]) 44 | B_ct = np.array([ 45 | [0], 46 | [1 / m_cart], 47 | [0], 48 | [-1 / (l * m_cart)], 49 | ]) 50 | 51 | # Discretization 52 | dt = sys_param[env_name]["dt"] 53 | A = np.eye(4) + dt * A_ct 54 | B = dt * B_ct 55 | 56 | mpc_parameters["A"] = A 57 | mpc_parameters["B"] = B 58 | 59 | # Compute state and ref from obs: obs is in format (x, x_ref, x_dot, sin_theta, cos_theta, theta_dot) 60 | def obs_to_state_and_ref(obs): 61 | x, x_dot, theta, theta_dot, x_ref = obs[:, 0], obs[:, 1], obs[:, 2], obs[:, 3], obs[:, 4] 62 | state = torch.stack([x, x_dot, theta, theta_dot], dim=1) 63 | zeros = torch.zeros_like(x_ref) 64 | ref = torch.stack([x_ref, zeros, zeros, zeros], dim=1) 65 | return state, ref 66 | mpc_parameters["obs_to_state_and_ref"] = obs_to_state_and_ref 67 | 68 | return mpc_parameters 69 | -------------------------------------------------------------------------------- /src/utils/np_batch_op.py: -------------------------------------------------------------------------------- 1 | import scipy 2 | import numpy as np 3 | import os 4 | from concurrent.futures import ProcessPoolExecutor 5 | 6 | 7 | def _getindex(arr, i): 8 | """ 9 | Retrieves the ith element of an array, or the entire array if it's a scipy sparse matrix. 10 | 11 | Parameters: 12 | arr (np.ndarray or scipy.sparse.csc_matrix): The array or sparse matrix. 13 | i (int): Index of the element to retrieve. 14 | 15 | Returns: 16 | np.ndarray or scipy.sparse.csc_matrix: The ith element of the array or the entire array if it's a sparse matrix. 17 | """ 18 | if type(arr) == scipy.sparse.csc_matrix: 19 | return arr 20 | else: 21 | return arr[i] if arr.shape[0] > 1 else arr[0] 22 | 23 | def _worker(i): 24 | """ 25 | Worker function to apply the function 'f' on slices of arrays for parallel processing. 26 | 27 | Parameters: 28 | i (int): The index representing which slice of the arrays to process. 29 | 30 | Returns: 31 | tuple: A tuple of results returned by the function 'f'. 32 | """ 33 | f = _worker.f 34 | arrays = _worker.arrays 35 | results = f(*[_getindex(arr, i) for arr in arrays]) 36 | return results if isinstance(results, tuple) else (results,) 37 | 38 | def np_batch_op(f, *arrays, max_workers=int(os.environ.get("MAX_CPU_WORKERS", 8))): 39 | """ 40 | Applies a function in a batch operation on multiple arrays, possibly in parallel, handling multiple return values. 41 | If the function 'f' returns a single value, the function returns a single concatenated value instead of a tuple. 42 | 43 | Parameters: 44 | f (callable): The function to apply. Can return multiple values. 45 | arrays (list of np.ndarray or scipy.sparse.csc_matrix): Arrays on which the function is to be applied. 46 | 47 | Returns: 48 | np.ndarray or tuple: A concatenated array if 'f' returns a single value, otherwise a tuple of concatenated arrays. 49 | """ 50 | get_bs = lambda arr: 1 if type(arr) == scipy.sparse.csc_matrix else arr.shape[0] 51 | bs = max([get_bs(arr) for arr in arrays]) 52 | _worker.f = f 53 | _worker.arrays = arrays 54 | 55 | with ProcessPoolExecutor(max_workers=max_workers) as executor: 56 | all_results = list(executor.map(_worker, range(bs))) 57 | 58 | processed_results = [] 59 | for i in range(len(all_results[0])): 60 | results = [result[i] for result in all_results] 61 | if isinstance(results[0], np.ndarray): 62 | processed_result = np.concatenate([np.expand_dims(arr, 0) for arr in results], 0) 63 | else: 64 | processed_result = np.array(results) 65 | processed_results.append(processed_result) 66 | 67 | # Return a single value if there's only one result, otherwise return a tuple 68 | return processed_results[0] if len(processed_results) == 1 else tuple(processed_results) 69 | -------------------------------------------------------------------------------- /experiments/tank/benchmark.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | group_one() { 4 | export CUDA_VISIBLE_DEVICES=0 5 | # for N in 1 2 4 8 16; do 6 | for N in 4; do 7 | # for noise in 0 0.1; do 8 | for noise in 0.1; do 9 | python ../../run.py test tank --num-parallel 10000 --horizon 20 --epochs 2000 --mini-epochs 1 --qp-unrolled --shared-PH --affine-qb --mpc-baseline-N ${N} --noise-level ${noise} --batch-test --quiet --randomize --exp-name qp_unrolled_shared_affine --run-name N${N}_noise${noise}_rand 10 | done 11 | done 12 | # # for noise in 0 0.1; do 13 | # for noise in 0.1; do 14 | # # for n in 2 4 8 16; do 15 | # for n in 8; do 16 | # # for m in 2 4 8 16 32 64; do 17 | # for m in 32; do 18 | # # python ../../run.py test tank --num-parallel 10000 --horizon 20 --epochs 2000 --mini-epochs 1 --qp-unrolled --shared-PH --affine-qb --noise-level ${noise} --batch-test --n-qp ${n} --m-qp ${m} --quiet --exp-name shared_affine_noise${noise}_n${n}_m${m}+rand --randomize --run-name N0_n${n}_m${m}_noise${noise}_rand 19 | # python ../../run.py test tank --num-parallel 10000 --horizon 20 --epochs 2000 --mini-epochs 1 --qp-unrolled --shared-PH --affine-qb --noise-level ${noise} --batch-test --n-qp ${n} --m-qp ${m} --quiet --randomize --exp-name shared_affine_noise${noise}_n${n}_m${m}+rand --run-name N0_n${n}_m${m}_noise${noise}_rand 20 | # done 21 | # done 22 | # done 23 | } 24 | 25 | group_two() { 26 | export CUDA_VISIBLE_DEVICES=1 27 | # for N in 1 2 4 8 16; do 28 | # for noise in 0.2 0.5; do 29 | # python ../../run.py test tank --num-parallel 10000 --horizon 20 --epochs 2000 --mini-epochs 1 --qp-unrolled --shared-PH --affine-qb --mpc-baseline-N ${N} --noise-level ${noise} --batch-test --quiet --exp-name qp_unrolled_shared_affine --randomize --run-name N${N}_noise${noise}_rand --use-osqp-for-mpc 30 | # done 31 | # done 32 | # for noise in 0.2 0.5; do 33 | # for n in 2 4 8 16; do 34 | # for m in 2 4 8 16 32 64; do 35 | # python ../../run.py test tank --num-parallel 10000 --horizon 20 --epochs 2000 --mini-epochs 1 --qp-unrolled --shared-PH --affine-qb --noise-level ${noise} --batch-test --n-qp ${n} --m-qp ${m} --quiet --exp-name shared_affine_noise${noise}_n${n}_m${m}+rand --randomize --run-name N0_n${n}_m${m}_noise${noise}_rand 36 | # done 37 | # done 38 | # done 39 | # for noise in 0 0.1 0.2 0.5; do 40 | for noise in 0.1; do 41 | python ../../run.py test tank --num-parallel 10000 --horizon 20 --epochs 2000 --mini-epochs 1 --mpc-baseline-N 0 --noise-level ${noise} --batch-test --exp-name mlp_noise${noise}+rand --randomize --run-name mlp_noise${noise}_rand 42 | done 43 | } 44 | 45 | # Start both groups in parallel 46 | # group_one & group_two & 47 | group_two 48 | 49 | # Wait for both background tasks to complete 50 | # wait 51 | -------------------------------------------------------------------------------- /auxiliary/test_solver_parallel.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import sys 3 | import os 4 | file_path = os.path.dirname(__file__) 5 | sys.path.append(os.path.join(file_path, "..")) 6 | from src.envs.mpc_baseline_parameters import get_mpc_baseline_parameters 7 | from src.modules.qp_solver import QPSolver 8 | from src.utils.mpc_utils import mpc2qp 9 | from src.utils.osqp_utils import osqp_oracle 10 | from src.utils.np_batch_op import np_batch_op 11 | import torch 12 | import time 13 | import scipy 14 | import functools 15 | import csv 16 | 17 | 18 | def compare(N, num_parallel, device="cuda:0", iterations=100, seed=42, max_cpu_workers=8): 19 | """ 20 | Compare parallelized solver vs. OSQP on MPC problem with horizon N. 21 | """ 22 | 23 | # Load model and config 24 | mpc_baseline_parameters = get_mpc_baseline_parameters("tank", N) 25 | n_mpc = mpc_baseline_parameters["n_mpc"] 26 | m_mpc = mpc_baseline_parameters["m_mpc"] 27 | Qf = np.eye(n_mpc) # Set terminal cost of MPC as needed 28 | mpc_baseline_parameters["Qf"] = Qf 29 | x_min = mpc_baseline_parameters["x_min"] 30 | x_max = mpc_baseline_parameters["x_max"] 31 | u_min = mpc_baseline_parameters["u_min"] 32 | u_max = mpc_baseline_parameters["u_max"] 33 | A = mpc_baseline_parameters["A"] 34 | B = mpc_baseline_parameters["B"] 35 | Q = mpc_baseline_parameters["Q"] 36 | R = mpc_baseline_parameters["R"] 37 | 38 | # Generate current state 39 | t = lambda a: torch.tensor(a, device=device, dtype=torch.float) 40 | x = t(x_min).unsqueeze(0) + t(x_max - x_min).unsqueeze(0) * torch.rand((num_parallel, 2 * n_mpc), device=device) 41 | 42 | # Translate to QP problem 43 | eps = 1e-3 44 | n, m, P, q, H, b = mpc2qp( 45 | n_mpc, 46 | m_mpc, 47 | N, 48 | t(A), 49 | t(B), 50 | t(Q), 51 | t(R), 52 | x_min + eps, 53 | x_max - eps, 54 | u_min, 55 | u_max, 56 | *mpc_baseline_parameters["obs_to_state_and_ref"](x), 57 | normalize=mpc_baseline_parameters.get("normalize", False), 58 | Qf=t(Qf), 59 | ) 60 | 61 | # Time solving with GPU parallelized solver 62 | solver = QPSolver(device, n, m, P=P, H=H) 63 | t = time.time() 64 | Xs, primal_sols = solver(q, b, iters=iterations) 65 | t_parallel = time.time() - t 66 | 67 | # Time solving with OSQP 68 | f = lambda t: t.detach().cpu().numpy() 69 | f_sparse = lambda t: scipy.sparse.csc_matrix(t.cpu().numpy()) 70 | osqp_oracle_with_iter_count = functools.partial(osqp_oracle, return_iter_count=True, max_iter=iterations) 71 | q_np, b_np, P_np, H_np = f(q), f(b), f_sparse(P), f_sparse(H) 72 | t = time.time() 73 | sol_np, iter_counts = np_batch_op(osqp_oracle_with_iter_count, q_np, b_np, P_np, H_np, max_workers=max_cpu_workers) 74 | t_osqp = time.time() - t 75 | 76 | return n, m, t_parallel, t_osqp 77 | 78 | Ns_mpc = [2 ** i for i in range(1, 5)] 79 | nums_parallel = [2 ** i for i in range(1, 16)] 80 | func_input = [(N, num_parallel) for N in Ns_mpc for num_parallel in nums_parallel] 81 | func_output = [compare(*args) for args in func_input] 82 | # Write to CSV 83 | with open("parallel_vs_osqp.csv", "w") as f: 84 | writer = csv.writer(f) 85 | writer.writerow(["N_mpc", "num_parallel", "n_qp", "m_qp", "t_parallel", "t_osqp"]) 86 | for args, output in zip(func_input, func_output): 87 | writer.writerow([*args, *output]) 88 | -------------------------------------------------------------------------------- /experiments/cartpole/benchmark_stat.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import torch 3 | from glob import glob 4 | 5 | df = pd.DataFrame(columns=[ 6 | "Noise level", 7 | "Parametric uncertainty", 8 | "Method", 9 | "Horizon", 10 | "Num of variables", 11 | "Num of constraints", 12 | "Num of learnable policy parameters", 13 | "Average cost", 14 | "Average cost (with penalty)", 15 | "Frequency of constraint violation (x1000)", 16 | ]) 17 | 18 | def read_csv(wildcard): 19 | filename = sorted(glob(f"test_results/{wildcard}"))[-1] 20 | return pd.read_csv(filename, dtype={"constraint_violated": "bool"}) 21 | 22 | def get_stat(df): 23 | max_episode_length = df['episode_length'].max() 24 | penalty = 100000 25 | avg_cost = df['cumulative_cost'].sum() / df['episode_length'].sum() 26 | avg_cost_penalized = (df['cumulative_cost'].sum() + penalty * df["constraint_violated"].sum()) / df['episode_length'].sum() 27 | freq_violation = df["constraint_violated"].sum() / df['episode_length'].sum() 28 | return avg_cost, avg_cost_penalized, freq_violation * 1000 29 | 30 | def count_parameters(exp_name): 31 | checkpoint_path = f"runs/cartpole_{exp_name}/nn/cartpole.pth" 32 | checkpoint = torch.load(checkpoint_path) 33 | total_params = 0 34 | for key, value in checkpoint['model'].items(): 35 | if key.startswith("a2c_network.policy_net") or key.startswith("a2c_network.actor_mlp"): 36 | total_params += value.numel() 37 | return total_params 38 | 39 | for noise_level in [0, 0.1, 0.2, 0.5]: 40 | for rand in [False, True]: 41 | try: 42 | wildcard = f"mlp_noise{noise_level}{'_rand' if rand else ''}_2*" 43 | mlp_df = read_csv(wildcard) 44 | df.loc[len(df)] = [ 45 | noise_level, 46 | rand, 47 | "MLP", 48 | "-", 49 | "-", 50 | "-", 51 | count_parameters(f"mlp_noise{noise_level}"), 52 | *get_stat(mlp_df), 53 | ] 54 | except: 55 | print(f"Error reading file: {wildcard}") 56 | 57 | for n in [2, 4, 8, 16]: 58 | for m in [2, 4, 8, 16, 32, 64]: 59 | try: 60 | wildcard = f"N0_n{n}_m{m}_noise{noise_level}{'_rand' if rand else ''}_2*" 61 | qp_df = read_csv(wildcard) 62 | df.loc[len(df)] = [ 63 | noise_level, 64 | rand, 65 | "QP", 66 | "-", 67 | n, 68 | m, 69 | count_parameters(f"shared_affine_noise{noise_level}_n{n}_m{m}"), 70 | *get_stat(qp_df), 71 | ] 72 | except: 73 | print(f"Error reading file: {wildcard}") 74 | 75 | for N in [1, 2, 4, 8, 16]: 76 | try: 77 | wildcard = f"N{N}_noise{noise_level}{'_rand' if rand else ''}_2*" 78 | mpc_df = read_csv(wildcard) 79 | df.loc[len(df)] = [ 80 | noise_level, 81 | rand, 82 | "MPC", 83 | N, 84 | 1 * N, 85 | 10 * N, 86 | 0, 87 | *get_stat(mpc_df), 88 | ] 89 | except: 90 | print(f"Error reading file: {wildcard}") 91 | 92 | df.to_csv("benchmark_stat.csv", index=False) 93 | -------------------------------------------------------------------------------- /experiments/cartpole/benchmark.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | group_one() { 4 | export CUDA_VISIBLE_DEVICES=0 5 | # for N in 1 4 16; do 6 | # for noise in 0 0.5; do 7 | # python ../../run.py test cartpole --num-parallel 10000 --horizon 20 --epochs 2000 --mini-epochs 1 --qp-unrolled --shared-PH --affine-qb --mpc-baseline-N ${N} --noise-level ${noise} --batch-test --quiet --exp-name qp_unrolled_shared_affine --run-name N${N}_noise${noise} --use-osqp-for-mpc 8 | # done 9 | # done 10 | for N in 1 4 16; do 11 | for noise in 0.5; do 12 | python ../../run.py test cartpole --num-parallel 10000 --horizon 20 --epochs 2000 --mini-epochs 1 --qp-unrolled --shared-PH --affine-qb --mpc-baseline-N ${N} --noise-level ${noise} --batch-test --quiet --exp-name qp_unrolled_shared_affine --randomize --run-name N${N}_noise${noise}_rand --use-osqp-for-mpc 13 | done 14 | done 15 | # for noise in 0 0.5; do 16 | # for n in 2 16; do 17 | # for m in 4 64; do 18 | # python ../../run.py test cartpole --num-parallel 10000 --horizon 20 --epochs 2000 --mini-epochs 1 --qp-unrolled --shared-PH --affine-qb --noise-level ${noise} --batch-test --n-qp ${n} --m-qp ${m} --quiet --exp-name shared_affine_noise${noise}_n${n}_m${m} --run-name N0_n${n}_m${m}_noise${noise} 19 | # done 20 | # done 21 | # done 22 | # for noise in 0 0.5; do 23 | # for n in 2 16; do 24 | # for m in 4 64; do 25 | # python ../../run.py test cartpole --num-parallel 10000 --horizon 20 --epochs 2000 --mini-epochs 1 --qp-unrolled --shared-PH --affine-qb --noise-level ${noise} --batch-test --n-qp ${n} --m-qp ${m} --quiet --exp-name shared_affine_noise${noise}_n${n}_m${m}+rand --randomize --run-name N0_n${n}_m${m}_noise${noise}_rand 26 | # done 27 | # done 28 | # done 29 | } 30 | 31 | group_two() { 32 | export CUDA_VISIBLE_DEVICES=1 33 | # for N in 1 2 4 8 16; do 34 | # for noise in 0.2 0.5; do 35 | # python ../../run.py test cartpole --num-parallel 10000 --horizon 20 --epochs 2000 --mini-epochs 1 --qp-unrolled --shared-PH --affine-qb --mpc-baseline-N ${N} --noise-level ${noise} --batch-test --quiet --exp-name qp_unrolled_shared_affine --randomize --run-name N${N}_noise${noise}_rand --use-osqp-for-mpc 36 | # done 37 | # done 38 | # for noise in 0.2 0.5; do 39 | # for n in 2 4 8 16; do 40 | # for m in 2 4 8 16 32 64; do 41 | # python ../../run.py test cartpole --num-parallel 10000 --horizon 20 --epochs 2000 --mini-epochs 1 --qp-unrolled --shared-PH --affine-qb --noise-level ${noise} --batch-test --n-qp ${n} --m-qp ${m} --quiet --exp-name shared_affine_noise${noise}_n${n}_m${m}+rand --randomize --run-name N0_n${n}_m${m}_noise${noise}_rand 42 | # done 43 | # done 44 | # done 45 | # for noise in 0 0.5; do 46 | # python ../../run.py test cartpole --num-parallel 10000 --horizon 20 --epochs 2000 --mini-epochs 1 --mpc-baseline-N 0 --noise-level ${noise} --batch-test --exp-name mlp_noise${noise} # --run-name mlp_noise${noise} 47 | # done 48 | # for noise in 0 0.5; do 49 | # python ../../run.py test cartpole --num-parallel 10000 --horizon 20 --epochs 2000 --mini-epochs 1 --mpc-baseline-N 0 --noise-level ${noise} --batch-test --exp-name mlp_noise${noise}+rand --randomize --run-name mlp_noise${noise}_rand 50 | # done 51 | } 52 | 53 | # Start both groups in parallel 54 | group_one & group_two & 55 | 56 | # Wait for both background tasks to complete 57 | wait 58 | -------------------------------------------------------------------------------- /experiments/tank/test_skip_steady.py: -------------------------------------------------------------------------------- 1 | # %% Problem setup 2 | import sys 3 | import os 4 | file_path = os.path.dirname(__file__) 5 | sys.path.append(os.path.join(file_path, "../..")) 6 | 7 | import numpy as np 8 | from src.envs.env_creators import sys_param, env_creators 9 | x_ref = np.array([19., 19., 2., 2.]) 10 | A = sys_param["tank"]["A"] 11 | B = sys_param["tank"]["B"] 12 | Q = sys_param["tank"]["Q"] 13 | R = sys_param["tank"]["R"] 14 | x_min = sys_param["tank"]["x_min"] * np.ones(4) 15 | x_max = sys_param["tank"]["x_max"] * np.ones(4) 16 | u_min = sys_param["tank"]["u_min"] * np.ones(2) 17 | u_max = 1.0 * np.ones(2) 18 | 19 | # %% Oracle 20 | from src.utils.osqp_utils import osqp_oracle 21 | 22 | # min (x - x_ref)' * Q * (x - x_ref) + u' * R * u, s.t., x = (I - A)^{-1} * B * u, x_min <= x <= x_max, u_min <= u <= u_max; cast into min 0.5 * u' * P * u + q' * u, s.t., H * u + b >= 0 23 | 24 | inv_I_minus_A = np.linalg.inv(np.eye(A.shape[0]) - A) 25 | P = 2 * (B.T @ inv_I_minus_A.T @ Q @ inv_I_minus_A @ B + R) 26 | 27 | # Calculate q 28 | q = -2 * inv_I_minus_A.T @ Q.T @ x_ref @ B 29 | 30 | # Calculate c 31 | c = x_ref.T @ Q @ x_ref 32 | 33 | # Calculate H and b 34 | H = np.vstack([ 35 | inv_I_minus_A @ B, 36 | -inv_I_minus_A @ B, 37 | np.eye(u_min.shape[0]), 38 | -np.eye(u_max.shape[0]) 39 | ]) 40 | 41 | b = np.hstack([ 42 | -x_min, 43 | x_max, 44 | -u_min, 45 | u_max 46 | ]) 47 | 48 | u_opt = osqp_oracle(q, b, P, H) 49 | x_opt = inv_I_minus_A @ B @ u_opt 50 | 51 | # %% Evaluation 52 | from icecream import ic 53 | eval_value = lambda u: 0.5 * u.T @ P @ u + q.T @ u + c 54 | opt_val = eval_value(u_opt) 55 | ic(opt_val) 56 | 57 | # %% Evaluate the learned controller 58 | import torch 59 | from src.modules.qp_unrolled_network import QPUnrolledNetwork 60 | 61 | def get_state_dict(checkpoint_path): 62 | checkpoint = torch.load(checkpoint_path) 63 | model = checkpoint["model"] 64 | prefix = "a2c_network.policy_net." 65 | policy_net_state_dict = {k.lstrip(prefix): v for (k, v) in model.items() if k.startswith(prefix)} 66 | if "running_mean_std.running_mean" in model: 67 | running_mean = model["running_mean_std.running_mean"].to(dtype=torch.float) 68 | running_std = model["running_mean_std.running_var"].sqrt().to(dtype=torch.float) 69 | else: 70 | running_mean = torch.tensor([0.]) 71 | running_std = torch.tensor([1.]) 72 | return policy_net_state_dict, running_mean, running_std 73 | 74 | def make_obs(x, x_ref, running_mean, running_std, normalize): 75 | raw_obs = torch.tensor(np.concatenate([x, x_ref]), device=device, dtype=torch.float) 76 | if not normalize: 77 | return raw_obs.unsqueeze(0) 78 | else: 79 | return ((raw_obs - running_mean) / running_std).unsqueeze(0) 80 | 81 | 82 | n_sys = 4 83 | m_sys = 2 84 | input_size = 8 # 4 for x, 4 for x_ref 85 | n = 8 86 | m = 32 87 | qp_iter = 10 88 | device = "cuda:0" 89 | 90 | # Learned QP 91 | net = QPUnrolledNetwork(device, input_size, n, m, qp_iter, None, True, True) 92 | exp_name = "test_skip_steady" 93 | checkpoint_path = f"runs/tank_{exp_name}/nn/tank.pth" 94 | policy_net_state_dict, running_mean, running_std = get_state_dict(checkpoint_path) 95 | net.load_state_dict(policy_net_state_dict) 96 | running_mean, running_std = running_mean.to(device=device), running_std.to(device=device) 97 | net.to(device) 98 | obs = make_obs(0 * np.ones(4), x_ref, running_mean, running_std, False) 99 | action_all, problem_params = net(obs, return_problem_params=True) 100 | u = action_all[:, :2].squeeze(0).detach().cpu().numpy() 101 | learned_val = eval_value(u) 102 | ic(learned_val) 103 | 104 | 105 | # %% 106 | -------------------------------------------------------------------------------- /src/modules/preconditioner.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from torch.nn import functional as F 4 | from torch.linalg import solve, inv, pinv 5 | import numpy as np 6 | 7 | from ..utils.torch_utils import make_psd, vectorize_upper_triangular 8 | 9 | class Preconditioner(nn.Module): 10 | def __init__(self, device, n, m, 11 | P=None, Pinv=None, H=None, 12 | dummy=False, 13 | beta=1, 14 | adaptive=False): 15 | """ 16 | dummy = True: fix D = I 17 | adaptive = False: use same D for all q, b; adaptive = True: determine D based on q, b 18 | Specify P, H if they are fixed; otherwise they need to be passed in when calling forward. 19 | """ 20 | super().__init__() 21 | self.device = device 22 | self.n = n 23 | self.m = m 24 | create_tensor = lambda t: torch.tensor(t, dtype=torch.float, device=device) if type(t) != torch.Tensor and t is not None else t 25 | self.P = create_tensor(P) # (1, n, n) 26 | self.Pinv = create_tensor(Pinv) # (1, n, n) 27 | self.H = create_tensor(H) # (1, m, n) 28 | self.dummy = dummy 29 | self.beta = beta 30 | self.adaptive = adaptive 31 | self.bP = self.P.unsqueeze(0) if P is not None else None 32 | self.bPinv = self.Pinv.unsqueeze(0) if Pinv is not None else None 33 | self.bH = self.H.unsqueeze(0) if H is not None else None 34 | if P is not None and H is not None: 35 | self.bHPinvHt = (self.H @ solve(self.P, self.H.t())).unsqueeze(0) # (1, m, m) 36 | elif Pinv is not None and H is not None: 37 | self.bHPinvHt = (self.H @ self.Pinv @ self.H.t()).unsqueeze(0) 38 | else: 39 | self.bHPinvHt = None 40 | 41 | # Parameterize D using Cholesky decomposition 42 | num_param = m * (m + 1) // 2 43 | if not dummy: 44 | if not adaptive: 45 | self.param = nn.Parameter(torch.zeros((num_param,), device=device)) # (m, m) 46 | else: 47 | num_in = n + m 48 | if self.bP is None: 49 | num_in += n * (n + 1) // 2 50 | if self.bH is None: 51 | num_in += n * m 52 | self.D_net = nn.Sequential( 53 | nn.Linear(num_in, num_in), 54 | nn.ReLU(), 55 | nn.Linear(num_in, num_param), 56 | ).to(device=device) 57 | 58 | def forward(self, q=None, b=None, P=None, H=None, 59 | input_P_is_inversed=False, 60 | output_tD_is_inversed=False, 61 | ): 62 | # q: (bs, n), b: (bs, m) 63 | if self.dummy: 64 | D = torch.eye(self.m, device=self.device) 65 | elif not self.adaptive: 66 | D = make_psd(self.param.unsqueeze(0)) # (1, m, m) 67 | else: 68 | assert q is not None and b is not None 69 | net_input = [q, b] 70 | if self.bP is None: 71 | net_input.append(vectorize_upper_triangular(P)) 72 | if self.bH is None: 73 | net_input.append(H.flatten(start_dim=-2)) 74 | net_input_t = torch.cat(net_input, 1) * 1e-6 75 | D = make_psd(self.D_net(net_input_t)) # (bs, m, m) 76 | D /= self.beta 77 | bH = self.bH if self.bH is not None else H 78 | bP_param = self.bP if self.bP is not None else P 79 | op = solve if not input_P_is_inversed else torch.matmul 80 | bHPinvHt = self.bHPinvHt if self.bHPinvHt is not None else (bH @ op(bP_param, bH.transpose(-1, -2))) 81 | tD_inv = D + bHPinvHt 82 | if output_tD_is_inversed: 83 | return D, tD_inv 84 | else: 85 | tD = inv(tD_inv) # (*, m, m) 86 | return D, tD 87 | -------------------------------------------------------------------------------- /experiments/tank/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Store the command line argument in a global variable 4 | TRAIN_OR_TEST="$1" 5 | 6 | # Function to run commands 1-3 sequentially with VAR=1 7 | group_one() { 8 | export CUDA_VISIBLE_DEVICES=0 9 | # for n_qp in 2 4 8 16; do 10 | for n_qp in 8; do 11 | # for m_qp in 2 4 8 16 32 64; do 12 | for m_qp in 32; do 13 | # for noise_level in 0 0.1; do 14 | for noise_level in 0; do 15 | python ../../run.py $TRAIN_OR_TEST tank \ 16 | --num-parallel 100000 \ 17 | --horizon 20 \ 18 | --epochs 2000 \ 19 | --mini-epochs 1 \ 20 | --qp-unrolled \ 21 | --shared-PH \ 22 | --affine-qb \ 23 | --noise-level ${noise_level} \ 24 | --n-qp ${n_qp} \ 25 | --m-qp ${m_qp} \ 26 | --no-obs-normalization \ 27 | --no-b \ 28 | --exp-name shared_affine_noise${noise_level}_n${n_qp}_m${m_qp}-norm-b 29 | done 30 | done 31 | done 32 | # python ../../run.py $TRAIN_OR_TEST tank --num-parallel 100000 --horizon 20 --epochs 2000 --gamma 0.999 --mini-epochs 1 --exp-name vanilla_rl 33 | # python ../../run.py $TRAIN_OR_TEST tank --num-parallel 100 --horizon 20 --epochs 2000 --mini-epochs 1 --qp-unrolled --shared-PH --affine-qb --mpc-baseline-N 0 --noise-level 0 --batch-test --quiet --exp-name qp_unrolled_shared_affine 34 | # python ../../run.py $TRAIN_OR_TEST tank --num-parallel 100000 --horizon 20 --epochs 2000 --mini-epochs 1 --qp-unrolled --shared-PH --qp-iter 100 --exp-name qp_unrolled_shared_more_iter 35 | } 36 | 37 | # Function to run commands 4-6 sequentially with VAR=2 38 | group_two() { 39 | export CUDA_VISIBLE_DEVICES=1 40 | # python ../../run.py $TRAIN_OR_TEST tank --num-parallel 100000 --horizon 20 --epochs 2000 --mini-epochs 1 --qp-unrolled --shared-PH --exp-name qp_unrolled_shared 41 | # python ../../run.py $TRAIN_OR_TEST tank --num-parallel 100000 --horizon 20 --epochs 2000 --mini-epochs 1 --qp-unrolled --exp-name qp_unrolled 42 | # python ../../run.py $TRAIN_OR_TEST tank --num-parallel 100000 --horizon 20 --epochs 2000 --mini-epochs 1 --qp-unrolled --warm-start --exp-name qp_unrolled_ws 43 | # python ../../run.py $TRAIN_OR_TEST tank --num-parallel 100000 --horizon 20 --epochs 2000 --mini-epochs 1 --qp-unrolled --shared-PH --exp-name qp_unrolled_shared 44 | # python ../../run.py $TRAIN_OR_TEST tank --num-parallel 100000 --horizon 20 --epochs 2000 --mini-epochs 1 --qp-unrolled --shared-PH --qp-iter 10 --warm-start --exp-name qp_unrolled_shared_ws 45 | # python ../../run.py $TRAIN_OR_TEST tank --num-parallel 100000 --horizon 20 --epochs 1 --mini-epochs 1 --qp-unrolled --shared-PH --exp-name computation-test 46 | for n_qp in 2 4 8 16; do 47 | for m_qp in 2 4 8 16 32 64; do 48 | for noise_level in 0.2 0.5; do 49 | python ../../run.py $TRAIN_OR_TEST tank \ 50 | --num-parallel 100000 \ 51 | --horizon 20 \ 52 | --epochs 2000 \ 53 | --mini-epochs 1 \ 54 | --qp-unrolled \ 55 | --shared-PH \ 56 | --affine-qb \ 57 | --noise-level ${noise_level} \ 58 | --n-qp ${n_qp} \ 59 | --m-qp ${m_qp} \ 60 | --randomize \ 61 | --exp-name shared_affine_noise${noise_level}_n${n_qp}_m${m_qp}+rand 62 | done 63 | done 64 | done 65 | for noise_level in 0 0.1 0.2 0.5; do 66 | python ../../run.py $TRAIN_OR_TEST tank \ 67 | --num-parallel 100000 \ 68 | --horizon 20 \ 69 | --epochs 2000 \ 70 | --mini-epochs 1 \ 71 | --noise-level ${noise_level} \ 72 | --randomize \ 73 | --exp-name mlp_noise${noise_level}+rand 74 | done 75 | } 76 | 77 | # Start both groups in parallel 78 | # group_one & group_two & 79 | group_one 80 | 81 | # Wait for both background tasks to complete 82 | # wait 83 | -------------------------------------------------------------------------------- /experiments/cartpole/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Store the command line argument in a global variable 4 | TRAIN_OR_TEST="$1" 5 | 6 | # Function to run commands 1-3 sequentially with VAR=1 7 | group_one() { 8 | export CUDA_VISIBLE_DEVICES=0 9 | for n_qp in 8; do 10 | for m_qp in 32; do 11 | for noise_level in 0; do 12 | python ../../run.py $TRAIN_OR_TEST cartpole \ 13 | --num-parallel 100000 \ 14 | --horizon 20 \ 15 | --epochs 500 \ 16 | --mini-epochs 1 \ 17 | --qp-unrolled \ 18 | --shared-PH \ 19 | --affine-qb \ 20 | --noise-level ${noise_level} \ 21 | --n-qp ${n_qp} \ 22 | --m-qp ${m_qp} \ 23 | --exp-name shared_affine_noise${noise_level}_n${n_qp}_m${m_qp} 24 | done 25 | done 26 | done 27 | # for n_qp in 2 16; do 28 | # for m_qp in 4 64; do 29 | # for noise_level in 0; do 30 | # python ../../run.py $TRAIN_OR_TEST cartpole \ 31 | # --num-parallel 100000 \ 32 | # --horizon 20 \ 33 | # --epochs 500 \ 34 | # --mini-epochs 1 \ 35 | # --qp-unrolled \ 36 | # --shared-PH \ 37 | # --affine-qb \ 38 | # --noise-level ${noise_level} \ 39 | # --n-qp ${n_qp} \ 40 | # --m-qp ${m_qp} \ 41 | # --randomize \ 42 | # --exp-name shared_affine_noise${noise_level}_n${n_qp}_m${m_qp}+rand 43 | # done 44 | # done 45 | # done 46 | # for noise_level in 0 0.5; do 47 | # python ../../run.py $TRAIN_OR_TEST cartpole \ 48 | # --num-parallel 100000 \ 49 | # --horizon 20 \ 50 | # --epochs 500 \ 51 | # --mini-epochs 1 \ 52 | # --noise-level ${noise_level} \ 53 | # --exp-name mlp_noise${noise_level} 54 | # done 55 | } 56 | 57 | # Function to run commands 4-6 sequentially with VAR=2 58 | group_two() { 59 | export CUDA_VISIBLE_DEVICES=1 60 | for n_qp in 2 16; do 61 | for m_qp in 4 64; do 62 | for noise_level in 0.5; do 63 | python ../../run.py $TRAIN_OR_TEST cartpole \ 64 | --num-parallel 100000 \ 65 | --horizon 20 \ 66 | --epochs 500 \ 67 | --mini-epochs 1 \ 68 | --qp-unrolled \ 69 | --shared-PH \ 70 | --affine-qb \ 71 | --noise-level ${noise_level} \ 72 | --n-qp ${n_qp} \ 73 | --m-qp ${m_qp} \ 74 | --exp-name shared_affine_noise${noise_level}_n${n_qp}_m${m_qp} 75 | done 76 | done 77 | done 78 | for n_qp in 2 16; do 79 | for m_qp in 4 64; do 80 | for noise_level in 0.5; do 81 | python ../../run.py $TRAIN_OR_TEST cartpole \ 82 | --num-parallel 100000 \ 83 | --horizon 20 \ 84 | --epochs 500 \ 85 | --mini-epochs 1 \ 86 | --qp-unrolled \ 87 | --shared-PH \ 88 | --affine-qb \ 89 | --noise-level ${noise_level} \ 90 | --n-qp ${n_qp} \ 91 | --m-qp ${m_qp} \ 92 | --randomize \ 93 | --exp-name shared_affine_noise${noise_level}_n${n_qp}_m${m_qp}+rand 94 | done 95 | done 96 | done 97 | 98 | for noise_level in 0 0.5; do 99 | python ../../run.py $TRAIN_OR_TEST cartpole \ 100 | --num-parallel 100000 \ 101 | --horizon 20 \ 102 | --epochs 500 \ 103 | --mini-epochs 1 \ 104 | --noise-level ${noise_level} \ 105 | --randomize \ 106 | --exp-name mlp_noise${noise_level}+rand 107 | done 108 | } 109 | 110 | # Start both groups in parallel 111 | # group_one & group_two & 112 | group_one 113 | 114 | # Wait for both background tasks to complete 115 | # wait 116 | -------------------------------------------------------------------------------- /auxiliary/train_warmstarter.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from tqdm import tqdm 3 | import os 4 | import sys 5 | file_path = os.path.dirname(__file__) 6 | sys.path.append(os.path.join(file_path, "..")) 7 | from modules.warm_starter import WarmStarter 8 | from modules.qp_solver import QPSolver 9 | from utils.mpc_utils import generate_random_problem 10 | import torch 11 | from torch.nn import functional as F 12 | import argparse 13 | import traceback 14 | from pathlib import Path 15 | from datetime import datetime 16 | import copy 17 | from torch.utils.tensorboard import SummaryWriter 18 | 19 | parser = argparse.ArgumentParser() 20 | parser.add_argument("--batch-size", type=int, default=10000) 21 | parser.add_argument("--n", type=int, default=10) 22 | parser.add_argument("--m", type=int, default=5) 23 | parser.add_argument("--fixed-PH", action='store_true') 24 | args = parser.parse_args() 25 | 26 | max_epochs = 50000 27 | bs = args.batch_size 28 | n = args.n 29 | m = args.m 30 | device = "cuda:0" 31 | 32 | torch.manual_seed(42) 33 | q0, b0, P0, H0 = generate_random_problem(1, n, m, device) 34 | P0_np = P0.squeeze(0).cpu().numpy() 35 | H0_np = H0.squeeze(0).cpu().numpy() 36 | P0 = P0.broadcast_to((bs, -1, -1)) 37 | H0 = H0.broadcast_to((bs, -1, -1)) 38 | 39 | warm_starter = WarmStarter(device, n, m, fixed_P=args.fixed_PH, fixed_H=args.fixed_PH) 40 | if not args.fixed_PH: 41 | oracle_solver = QPSolver(device, n, m) 42 | else: 43 | oracle_solver = QPSolver(device, n, m, P=P0_np, H=H0_np) 44 | optimizer = torch.optim.Adam(warm_starter.parameters()) 45 | losses = [] 46 | Path("runs").mkdir(parents=True, exist_ok=True) 47 | writer = SummaryWriter('runs/' + "warmstarter" + datetime.now().strftime("_%y-%m-%d-%H-%M-%S")) 48 | 49 | 50 | try: 51 | def restore_checkpoint(): 52 | global loss_best, no_improvement_count 53 | warm_starter.load_state_dict(checkpoint[0]) 54 | optimizer.load_state_dict(checkpoint[1]) 55 | loss_best = 0 56 | no_improvement_count= 0 57 | loss_best = 0 58 | no_improvement_count = 0 59 | for i_ep in (pbar:= tqdm(range(max_epochs))): 60 | # Check for early stopping 61 | if i_ep > 0: 62 | if loss_best == 0: 63 | loss_best = losses[-1] + 1 64 | if losses[-1] < loss_best: 65 | no_improvement_count = 0 66 | loss_best = 0.95 * loss_best + 0.05 * losses[-1] 67 | checkpoint = [ 68 | copy.deepcopy(warm_starter.state_dict()), 69 | copy.deepcopy(optimizer.state_dict()), 70 | ] 71 | else: 72 | no_improvement_count += 1 73 | if no_improvement_count >= 5: 74 | restore_checkpoint() 75 | optimizer.param_groups[0]['lr'] /= 10 76 | loss_best = 0 77 | if optimizer.param_groups[0]['lr'] < 1e-7: 78 | break 79 | 80 | optimizer.zero_grad() 81 | q, b, P, H = generate_random_problem(bs, n, m, device) 82 | if not args.fixed_PH: 83 | oracle_Xb = oracle_solver(q, b, P, H)[0][:, -1, :] 84 | approx_X = warm_starter(q, b, P, H) 85 | else: 86 | oracle_Xb = oracle_solver(q, b)[0][:, -1, :] 87 | approx_X = warm_starter(q, b) 88 | loss = torch.log((approx_X - oracle_Xb).norm(dim=-1)).mean() 89 | if loss.isfinite(): 90 | loss.backward() 91 | optimizer.step() 92 | losses.append(loss.item()) 93 | else: 94 | restore_checkpoint() 95 | pbar.set_description(f"{optimizer.param_groups[0]['lr']:.2e}, {loss.item():.2f}/{loss_best:.2f}/{no_improvement_count}") 96 | writer.add_scalar("stat/loss", loss.item(), i_ep) 97 | writer.add_scalar("stat/lr", optimizer.param_groups[0]['lr'], i_ep) 98 | 99 | except: 100 | traceback.print_exc() 101 | finally: 102 | Path("models").mkdir(parents=True, exist_ok=True) 103 | torch.save(warm_starter.state_dict(), f"models/warmstarter-{n}-{m}.pth") 104 | -------------------------------------------------------------------------------- /src/utils/rlgame_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copied from IsaacGymEnvs 3 | """ 4 | 5 | import torch 6 | from rl_games.common import env_configurations, vecenv 7 | from rl_games.common.algo_observer import AlgoObserver 8 | from rl_games.algos_torch import torch_ext 9 | 10 | class RLGPUEnv(vecenv.IVecEnv): 11 | def __init__(self, config_name, num_actors, **kwargs): 12 | self.env = env_configurations.configurations[config_name]['env_creator'](**kwargs) 13 | 14 | def step(self, actions): 15 | return self.env.step(actions) 16 | 17 | def reset(self): 18 | return self.env.reset() 19 | 20 | def reset_done(self): 21 | return self.env.reset_done() 22 | 23 | def get_number_of_agents(self): 24 | return self.env.get_number_of_agents() 25 | 26 | def get_env_info(self): 27 | info = {} 28 | info['action_space'] = self.env.action_space 29 | info['observation_space'] = self.env.observation_space 30 | if hasattr(self.env, "amp_observation_space"): 31 | info['amp_observation_space'] = self.env.amp_observation_space 32 | 33 | if self.env.num_states > 0: 34 | info['state_space'] = self.env.state_space 35 | print(info['action_space'], info['observation_space'], info['state_space']) 36 | else: 37 | print(info['action_space'], info['observation_space']) 38 | 39 | return info 40 | 41 | class RLGPUAlgoObserver(AlgoObserver): 42 | """Allows us to log stats from the env along with the algorithm running stats. """ 43 | 44 | def __init__(self): 45 | pass 46 | 47 | def after_init(self, algo): 48 | self.algo = algo 49 | self.mean_scores = torch_ext.AverageMeter(1, self.algo.games_to_track).to(self.algo.ppo_device) 50 | self.ep_infos = [] 51 | self.direct_info = {} 52 | self.writer = self.algo.writer 53 | 54 | def process_infos(self, infos, done_indices): 55 | assert isinstance(infos, dict), "RLGPUAlgoObserver expects dict info" 56 | if isinstance(infos, dict): 57 | if 'episode' in infos: 58 | self.ep_infos.append(infos['episode']) 59 | 60 | if len(infos) > 0 and isinstance(infos, dict): # allow direct logging from env 61 | self.direct_info = {} 62 | for k, v in infos.items(): 63 | # only log scalars 64 | if isinstance(v, float) or isinstance(v, int) or (isinstance(v, torch.Tensor) and len(v.shape) == 0): 65 | self.direct_info[k] = v 66 | 67 | def after_clear_stats(self): 68 | self.mean_scores.clear() 69 | 70 | def after_print_stats(self, frame, epoch_num, total_time): 71 | if self.ep_infos: 72 | for key in self.ep_infos[0]: 73 | infotensor = torch.tensor([], device=self.algo.device) 74 | for ep_info in self.ep_infos: 75 | # handle scalar and zero dimensional tensor infos 76 | if not isinstance(ep_info[key], torch.Tensor): 77 | ep_info[key] = torch.Tensor([ep_info[key]]) 78 | if len(ep_info[key].shape) == 0: 79 | ep_info[key] = ep_info[key].unsqueeze(0) 80 | infotensor = torch.cat((infotensor, ep_info[key].to(self.algo.device))) 81 | value = torch.mean(infotensor) 82 | self.writer.add_scalar('Episode/' + key, value, epoch_num) 83 | self.ep_infos.clear() 84 | 85 | for k, v in self.direct_info.items(): 86 | self.writer.add_scalar(f'{k}/frame', v, frame) 87 | self.writer.add_scalar(f'{k}/iter', v, epoch_num) 88 | self.writer.add_scalar(f'{k}/time', v, total_time) 89 | 90 | if self.mean_scores.current_size > 0: 91 | mean_scores = self.mean_scores.get_mean() 92 | self.writer.add_scalar('scores/mean', mean_scores, frame) 93 | self.writer.add_scalar('scores/iter', mean_scores, epoch_num) 94 | self.writer.add_scalar('scores/time', mean_scores, total_time) 95 | -------------------------------------------------------------------------------- /experiments/double_integrator/dump_parameters.py: -------------------------------------------------------------------------------- 1 | # %% Initialize model 2 | import numpy as np 3 | import torch 4 | import sys 5 | import os 6 | file_path = os.path.dirname(__file__) 7 | sys.path.append(os.path.join(file_path, "../..")) 8 | from src.modules.qp_unrolled_network import QPUnrolledNetwork 9 | 10 | def get_state_dict(checkpoint_path): 11 | checkpoint = torch.load(checkpoint_path) 12 | model = checkpoint["model"] 13 | prefix = "a2c_network.policy_net." 14 | policy_net_state_dict = {k.lstrip(prefix): v for (k, v) in model.items() if k.startswith(prefix)} 15 | if "running_mean_std.running_mean" in model: 16 | running_mean = model["running_mean_std.running_mean"].to(dtype=torch.float) 17 | running_std = model["running_mean_std.running_var"].sqrt().to(dtype=torch.float) 18 | else: 19 | running_mean = torch.tensor([0.]) 20 | running_std = torch.tensor([1.]) 21 | return policy_net_state_dict, running_mean, running_std 22 | 23 | device = "cuda:0" 24 | n_qp = 3 25 | m_qp = 9 26 | qp_iter = 10 27 | symmetric = True 28 | no_b = True 29 | net = QPUnrolledNetwork(device, 2, n_qp, m_qp, qp_iter, None, True, True, force_feasible=True, symmetric=symmetric, no_b=no_b) 30 | if not symmetric: 31 | exp_name = "default" 32 | elif not no_b: 33 | exp_name = "symmetric" 34 | else: 35 | exp_name = "symmetric_no_b" 36 | checkpoint_path = f"runs/double_integrator_{exp_name}/nn/double_integrator.pth" 37 | policy_net_state_dict, running_mean, running_std = get_state_dict(checkpoint_path) 38 | net.load_state_dict(policy_net_state_dict) 39 | running_mean, running_std = running_mean.to(device=device), running_std.to(device=device) 40 | net.to(device) 41 | 42 | t = lambda arr: torch.tensor(arr, device=device, dtype=torch.float).unsqueeze(0) 43 | a = lambda t: t.squeeze(0).detach().cpu().numpy() 44 | # %% Get parameters and reconstruct 45 | from src.utils.torch_utils import make_psd 46 | 47 | feasible_lambda = 10 48 | 49 | P_params = policy_net_state_dict['P_params'].unsqueeze(0) 50 | H_params = policy_net_state_dict['H_params'] 51 | zeros_n = torch.zeros((1, n_qp, 1), device=device) 52 | ones_m = torch.ones((1, m_qp, 1), device=device) 53 | I = torch.eye(1, device=device).unsqueeze(0) 54 | zeros_1 = torch.zeros((1, 1), device=device) 55 | Pinv = make_psd(P_params, min_eig=1e-2) 56 | tilde_P_inv = torch.cat([ 57 | torch.cat([Pinv, zeros_n], dim=2), 58 | torch.cat([zeros_n.transpose(1, 2), 1 / feasible_lambda * I], dim=2) 59 | ], dim=1) 60 | H = H_params.view(m_qp, n_qp).unsqueeze(0) 61 | tilde_H = torch.cat([ 62 | torch.cat([H, ones_m], dim=2), 63 | torch.cat([zeros_n.transpose(1, 2), I], dim=2) 64 | ], dim=1) 65 | P = torch.linalg.inv(tilde_P_inv).squeeze(0).cpu().numpy() 66 | H = tilde_H.squeeze(0).cpu().numpy() 67 | Wq_params = policy_net_state_dict['qb_affine_layer.weight'].unsqueeze(0) 68 | Wq_tilde = torch.cat([ 69 | Wq_params, 70 | torch.zeros((1, 1, Wq_params.shape[2]), device=device), 71 | ], dim=1) 72 | Wq = Wq_tilde.squeeze(0).cpu().numpy() 73 | 74 | # %% Get control invariant set 75 | from src.envs.env_creators import sys_param, env_creators 76 | from src.utils.sets import compute_MCI 77 | from src.utils.geometry import find_supporting_hyperplanes 78 | from matplotlib import pyplot as plt 79 | 80 | A = sys_param["double_integrator"]["A"] 81 | B = sys_param["double_integrator"]["B"] 82 | Q = sys_param["double_integrator"]["Q"] 83 | R = sys_param["double_integrator"]["R"] 84 | x_min_scalar = sys_param["double_integrator"]["x_min"] 85 | x_max_scalar = sys_param["double_integrator"]["x_max"] 86 | u_min_scalar = sys_param["double_integrator"]["u_min"] 87 | u_max_scalar = sys_param["double_integrator"]["u_max"] 88 | x_min = x_min_scalar * np.ones(2) 89 | x_max = x_max_scalar * np.ones(2) 90 | u_min = u_min_scalar * np.ones(1) 91 | u_max = u_max_scalar * np.ones(1) 92 | 93 | MCI = compute_MCI(A, B, x_min, x_max, u_min, u_max, iterations=100) 94 | A_MCI, b_MCI = find_supporting_hyperplanes(MCI) 95 | # %% Dump parameters 96 | np.savez( 97 | "parameters.npz", 98 | A=A, 99 | B=B, 100 | P=P, 101 | H=H, 102 | Wq=Wq, 103 | A_MCI=A_MCI, 104 | b_MCI=b_MCI, 105 | ) 106 | 107 | # %% 108 | -------------------------------------------------------------------------------- /src/utils/visualization.py: -------------------------------------------------------------------------------- 1 | from .geometry import find_interior_point 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | from matplotlib.lines import Line2D 5 | from scipy.spatial import ConvexHull, HalfspaceIntersection 6 | 7 | 8 | def plot_multiple_2d_polytopes_with_contour(polytope_contour_params): 9 | """ 10 | Plot multiple 2D polytopes each defined by Ax <= b and overlay the contour of a quadratic function. 11 | 12 | Parameters: 13 | - polytope_contour_params (list of dict): List of dictionaries containing A, b, optimal_solution, P, q, and label. 14 | 15 | Returns: 16 | - fig (matplotlib.figure.Figure): Figure object. 17 | - ax (matplotlib.axes._subplots.AxesSubplot): Axis object. 18 | """ 19 | 20 | fig, ax = plt.subplots() 21 | 22 | # Determine global x and y limits 23 | all_vertices = [] 24 | for params in polytope_contour_params: 25 | interior_point = find_interior_point(params['A'], params['b']) 26 | if interior_point is not None: 27 | vertices = HalfspaceIntersection(np.hstack([params['A'], -params['b'][:, np.newaxis]]), interior_point).intersections 28 | all_vertices.append(vertices) 29 | all_vertices = np.vstack(all_vertices) 30 | 31 | margin = 0.5 # Additional margin around the polytopes 32 | x_range = np.max(all_vertices[:, 0]) - np.min(all_vertices[:, 0]) 33 | y_range = np.max(all_vertices[:, 1]) - np.min(all_vertices[:, 1]) 34 | max_range = max(x_range, y_range) + 2 * margin 35 | x_margin = (max_range - x_range) / 2 36 | y_margin = (max_range - y_range) / 2 37 | x_min, x_max = np.min(all_vertices[:, 0]) - x_margin, np.max(all_vertices[:, 0]) + x_margin 38 | y_min, y_max = np.min(all_vertices[:, 1]) - y_margin, np.max(all_vertices[:, 1]) + y_margin 39 | x_grid, y_grid = np.meshgrid(np.linspace(x_min, x_max, 100), np.linspace(y_min, y_max, 100)) 40 | 41 | custom_legend_handles = [] 42 | 43 | for params in polytope_contour_params: 44 | A, b, P, q, color, label = params['A'], params['b'], params['P'], params['q'], params['color'], params['label'] 45 | optimal_solution = params.get("optimal_solution", None) 46 | 47 | # Find an interior point 48 | interior_point = find_interior_point(A, b) 49 | if interior_point is None: 50 | continue # Skip this polytope if LP is infeasible 51 | 52 | # Plot polytope 53 | halfspace_intersection = HalfspaceIntersection(np.hstack([A, -b[:, np.newaxis]]), interior_point) 54 | vertices = halfspace_intersection.intersections 55 | hull = ConvexHull(vertices) 56 | ordered_vertices = vertices[hull.vertices] 57 | closed_loop = np.vstack([ordered_vertices, ordered_vertices[0]]) 58 | 59 | ax.fill(closed_loop[:, 0], closed_loop[:, 1], alpha=0.3, color=color, label=f"{label} (Polytope)") 60 | ax.plot(closed_loop[:, 0], closed_loop[:, 1], color=color) 61 | 62 | # Mark the optimal solution 63 | if optimal_solution is not None: 64 | ax.plot(optimal_solution[0], optimal_solution[1], 'o', color=color) 65 | 66 | # Evaluate quadratic function 67 | Z = np.zeros_like(x_grid) 68 | for i in range(x_grid.shape[0]): 69 | for j in range(x_grid.shape[1]): 70 | x_vec = np.array([x_grid[i, j], y_grid[i, j]]) 71 | Z[i, j] = 0.5 * x_vec.T @ P @ x_vec + q.T @ x_vec 72 | 73 | # Plot contour 74 | contour = ax.contour(x_grid, y_grid, Z, levels=5, colors=color) # Reduced number of levels for sparser contour 75 | 76 | # Create a custom legend handle 77 | custom_legend_handles.append(Line2D([0], [0], color=color, lw=4, label=label)) 78 | 79 | # Adjust plot settings 80 | ax.set_aspect('equal', adjustable='box') 81 | ax.set_xlabel('x') 82 | ax.set_ylabel('y') 83 | 84 | # Add custom legend 85 | if custom_legend_handles: 86 | # Move legend outside the plot 87 | ax.legend(handles=custom_legend_handles, loc='upper left', bbox_to_anchor=(1, 1)) 88 | # Adjust layout to prevent clipping 89 | plt.tight_layout(rect=[0, 0, 0.85, 1]) 90 | 91 | return fig, ax 92 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | 162 | .vscode/ 163 | .unison* 164 | 165 | runs/ 166 | data/ 167 | auxiliary/models 168 | auxiliary/*.csv 169 | experiments/tank/run1.sh 170 | experiments/tank/test_results/ 171 | experiments/tank/*.csv 172 | 173 | experiments/cartpole/test_results/ 174 | experiments/cartpole/*.csv 175 | 176 | experiments/double_integrator/*.npz 177 | experiments/double_integrator/*.tex 178 | 179 | learning-qp.txt 180 | -------------------------------------------------------------------------------- /experiments/tank/plot_histogram.py: -------------------------------------------------------------------------------- 1 | # %% Read test data from learned QP with MPC 2 | from benchmark_stat import read_csv, get_stat 3 | from matplotlib import pyplot as plt 4 | import numpy as np 5 | import pandas as pd 6 | 7 | noise_level = 0.1 8 | randomize = True 9 | N = 4 10 | n = 8 11 | m = 32 12 | 13 | randomize_flag = "_rand" if randomize else "" 14 | df_mpc = read_csv(f"N{N}_noise{noise_level}{randomize_flag}_20*") 15 | df_qp = read_csv(f"N0_n{n}_m{m}_noise{noise_level}{randomize_flag}_20*") 16 | df_mlp = read_csv(f"mlp_noise{noise_level}{randomize_flag}_20*") 17 | 18 | # %% Matrix of constraint violation 19 | 20 | def violation_matrix(df1, df2): 21 | """ 22 | Return four lists of indices standing for the trajectory indices where: 23 | 1. constraint_violated=True in both df1 and df2 24 | 2. constraint_violated=True in df1 but not df2 25 | 3. constraint_violated=True in df2 but not df1 26 | 4. constraint_violated=False in both df1 and df2 27 | """ 28 | idx_both = [] 29 | idx_df1 = [] 30 | idx_df2 = [] 31 | idx_none = [] 32 | for i in range(len(df1)): 33 | if df1.iloc[i]["constraint_violated"] and df2.iloc[i]["constraint_violated"]: 34 | idx_both.append(i) 35 | elif df1.iloc[i]["constraint_violated"] and not df2.iloc[i]["constraint_violated"]: 36 | idx_df1.append(i) 37 | elif not df1.iloc[i]["constraint_violated"] and df2.iloc[i]["constraint_violated"]: 38 | idx_df2.append(i) 39 | else: 40 | idx_none.append(i) 41 | return idx_both, idx_df1, idx_df2, idx_none 42 | 43 | violated_both, violated_mpc, violated_qp, violated_none = violation_matrix(df_mpc, df_qp) 44 | 45 | data = {"MPC Success": [len(violated_none), len(violated_qp)], "MPC Fail": [len(violated_mpc), len(violated_both)]} 46 | index_labels = ["QP Success", "QP Fail"] 47 | df = pd.DataFrame(data=data, index=index_labels) 48 | df 49 | 50 | # %% Cost ratio histogram in the cases where both methods succeed 51 | cost_mpc = df_mpc.iloc[violated_none]["cumulative_cost"] 52 | cost_qp = df_qp.iloc[violated_none]["cumulative_cost"] 53 | ratio = cost_mpc / cost_qp 54 | 55 | n, bins, patches = plt.hist(ratio, bins=30, edgecolor='black', alpha=0.7) 56 | max_freq = max(n) 57 | 58 | # Add vertical dashed line at x=1 59 | plt.axvline(x=1, color='r', linestyle='--') 60 | 61 | # Annotations 62 | text_y_pos = max_freq * 1.3 63 | y_max = max_freq * 1.6 64 | plt.arrow(0.8, text_y_pos, -0.6, 0, head_width=20, head_length=0.05, fc='black', ec='black') 65 | plt.text(0.5, text_y_pos, 'MPC better', horizontalalignment='center', verticalalignment='bottom', color='black') 66 | plt.arrow(1.2, text_y_pos, 0.6, 0, head_width=20, head_length=0.05, fc='black', ec='black') 67 | plt.text(1.5, text_y_pos, 'Learned QP better', horizontalalignment='center', verticalalignment='bottom', color='black') 68 | 69 | plt.xlabel('Ratio of average cost (MPC / Learned QP)') 70 | plt.xlim(0, 2) 71 | plt.ylim(0, y_max) 72 | 73 | 74 | # %% Penalized cost ratio histogram in all cases 75 | penalty = 100000 76 | get_penalized_cost = lambda df: (df['cumulative_cost'] + penalty * df["constraint_violated"]) / df['episode_length'] 77 | penalized_cost_mpc = get_penalized_cost(df_mpc) 78 | penalized_cost_qp = get_penalized_cost(df_qp) 79 | penalized_cost_mlp = get_penalized_cost(df_mlp) 80 | 81 | # Export penalized costs to csv; each row is (penalized_cost_mpc, penalized_cost_qp, penalized_cost_mlp) 82 | header_line = "penalized_cost_mpc,penalized_cost_qp,penalized_cost_mlp" 83 | np.savetxt("penalized_costs.csv", np.column_stack((penalized_cost_mpc, penalized_cost_qp, penalized_cost_mlp)), delimiter=",", header=header_line, comments='') 84 | 85 | 86 | 87 | log_penalized_ratio = np.log10(penalized_cost_mpc / penalized_cost_qp) 88 | 89 | n, bins, patches = plt.hist(log_penalized_ratio, bins=30, edgecolor='black', alpha=0.7) 90 | max_freq = max(n) 91 | 92 | # Add vertical dashed line at x=1 93 | plt.axvline(x=0, color='r', linestyle='--') 94 | 95 | # Annotations 96 | text_y_pos = max_freq * 1.3 97 | y_max = max_freq * 1.6 98 | plt.arrow(-1, text_y_pos, -2, 0, head_width=50, head_length=0.05, fc='black', ec='black') 99 | plt.text(-2, text_y_pos, 'MPC better', horizontalalignment='center', verticalalignment='bottom', color='black') 100 | plt.arrow(1, text_y_pos, 2, 0, head_width=50, head_length=0.05, fc='black', ec='black') 101 | plt.text(2, text_y_pos, 'Learned QP better', horizontalalignment='center', verticalalignment='bottom', color='black') 102 | 103 | plt.xlabel('Ratio of penalized average cost (MPC / Learned QP) (log10)') 104 | plt.ylim(0, y_max) 105 | 106 | # %% 107 | -------------------------------------------------------------------------------- /experiments/tank/benchmark_stat.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import torch 3 | from glob import glob 4 | import argparse 5 | 6 | parser = argparse.ArgumentParser() 7 | parser.add_argument("run_name", type=str, default="") 8 | 9 | df = pd.DataFrame(columns=[ 10 | "Noise level", 11 | "Parametric uncertainty", 12 | "Method", 13 | "Horizon", 14 | "Num of variables", 15 | "Num of constraints", 16 | "Num of learnable policy parameters", 17 | "Average cost", 18 | "Average cost (with penalty)", 19 | "Frequency of constraint violation (x1000)", 20 | ]) 21 | 22 | def read_csv(wildcard): 23 | filename = sorted(glob(f"test_results/{wildcard}"))[-1] 24 | return pd.read_csv(filename, dtype={"constraint_violated": "bool"}) 25 | 26 | def get_stat(df): 27 | max_episode_length = df['episode_length'].max() 28 | penalty = 100000 29 | avg_cost = df['cumulative_cost'].sum() / df['episode_length'].sum() 30 | avg_cost_penalized = (df['cumulative_cost'].sum() + penalty * df["constraint_violated"].sum()) / df['episode_length'].sum() 31 | freq_violation = df["constraint_violated"].sum() / df['episode_length'].sum() 32 | return avg_cost, avg_cost_penalized, freq_violation * 1000 33 | 34 | def count_parameters(exp_name): 35 | checkpoint_path = f"runs/tank_{exp_name}/nn/tank.pth" 36 | checkpoint = torch.load(checkpoint_path) 37 | total_params = 0 38 | for key, value in checkpoint['model'].items(): 39 | if key.startswith("a2c_network.policy_net") or key.startswith("a2c_network.actor_mlp"): 40 | total_params += value.numel() 41 | return total_params 42 | 43 | if __name__ == "__main__": 44 | args = parser.parse_args() 45 | if not args.run_name: 46 | # Iterate over all configurations 47 | for noise_level in [0, 0.1, 0.2, 0.5]: 48 | for rand in [False, True]: 49 | try: 50 | wildcard = f"mlp_noise{noise_level}{'_rand' if rand else ''}_2*" 51 | mlp_df = read_csv(wildcard) 52 | df.loc[len(df)] = [ 53 | noise_level, 54 | rand, 55 | "MLP", 56 | "-", 57 | "-", 58 | "-", 59 | count_parameters(f"mlp_noise{noise_level}"), 60 | *get_stat(mlp_df), 61 | ] 62 | except: 63 | print(f"Error reading file: {wildcard}") 64 | 65 | for n in [2, 4, 8, 16]: 66 | for m in [2, 4, 8, 16, 32, 64]: 67 | try: 68 | wildcard = f"N0_n{n}_m{m}_noise{noise_level}{'_rand' if rand else ''}_2*" 69 | qp_df = read_csv(wildcard) 70 | df.loc[len(df)] = [ 71 | noise_level, 72 | rand, 73 | "QP", 74 | "-", 75 | n, 76 | m, 77 | count_parameters(f"shared_affine_noise{noise_level}_n{n}_m{m}"), 78 | *get_stat(qp_df), 79 | ] 80 | except: 81 | print(f"Error reading file: {wildcard}") 82 | 83 | for N in [1, 2, 4, 8, 16]: 84 | try: 85 | wildcard = f"N{N}_noise{noise_level}{'_rand' if rand else ''}_2*" 86 | mpc_df = read_csv(wildcard) 87 | df.loc[len(df)] = [ 88 | noise_level, 89 | rand, 90 | "MPC", 91 | N, 92 | 2 * N, 93 | 12 * N, 94 | 0, 95 | *get_stat(mpc_df), 96 | ] 97 | except: 98 | print(f"Error reading file: {wildcard}") 99 | df.to_csv("benchmark_stat.csv", index=False) 100 | else: 101 | # Stat for particular run 102 | run_name = args.run_name 103 | wildcard = f"{run_name}_2*" 104 | raw_df = read_csv(wildcard) 105 | avg_cost, avg_cost_penalized, freq_violation = get_stat(raw_df) 106 | df.loc[len(df)] = [ 107 | "-", 108 | "-", 109 | "-", 110 | "-", 111 | "-", 112 | "-", 113 | "-", 114 | avg_cost, 115 | avg_cost_penalized, 116 | freq_violation, 117 | ] 118 | df.to_csv(f"benchmark_stat_{run_name}.csv", index=False) 119 | -------------------------------------------------------------------------------- /src/utils/torch_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.nn import functional as F 3 | from contextlib import nullcontext, contextmanager 4 | import numpy as np 5 | 6 | 7 | def bmv(A, b): 8 | """Compute matrix multiply vector in batch mode.""" 9 | bs = b.shape[0] 10 | if A.shape[0] == 1: 11 | # The same A for different b's; use matrix multiplication instead of broadcasting 12 | return (A.squeeze(0) @ b.t()).t() 13 | else: 14 | return (A @ b.unsqueeze(-1)).squeeze(-1) 15 | 16 | def bma(A, B): 17 | """Batch-matrix-times-any, where any can be matrix or vector.""" 18 | return (A @ B) if A.dim() == B.dim() else bmv(A, B) 19 | 20 | def bvv(x, y): 21 | """Compute vector dot product in batch mode.""" 22 | return bmv(x.unsqueeze(-2), y) 23 | 24 | def bqf(x, A): 25 | """Compute quadratic form x' * A * x in batch mode.""" 26 | return torch.einsum('bi,bij,bj->b', x, A, x) 27 | 28 | def bsolve(A, B): 29 | """Compute solve(A, B) in batch mode, where the first dimension of A can be singleton.""" 30 | if A.dim() == 3 and B.dim() == 2 and A.shape[0] == 1: 31 | return torch.linalg.solve(A.squeeze(0), B.t()).t() 32 | else: 33 | return torch.linalg.solve(A, B) 34 | 35 | def make_psd(x, min_eig=0.1): 36 | """Assume x is (bs, N*(N+1)/2), create (bs, N, N) batch of PSD matrices using Cholesky.""" 37 | bs, n_elem = x.shape 38 | N = (int(np.sqrt(1 + 8 * n_elem)) - 1) // 2 39 | cholesky_diag_index = torch.arange(N, dtype=torch.long) + 1 40 | cholesky_diag_index = (cholesky_diag_index * (cholesky_diag_index + 1)) // 2 - 1 # computes the indices of the future diagonal elements of the matrix 41 | elem = x.clone() 42 | elem[:, cholesky_diag_index] = np.sqrt(min_eig) + F.softplus(elem[:, cholesky_diag_index]) 43 | tril_indices = torch.tril_indices(row=N, col=N, offset=0) # Collection that contains the indices of the non-zero elements of a lower triangular matrix 44 | cholesky = torch.zeros(size=(bs, N, N), dtype=torch.float, device=elem.device) #initialize a square matrix to zeros 45 | cholesky[:, tril_indices[0], tril_indices[1]] = elem # Assigns the elements of the vector to their correct position in the lower triangular matrix 46 | return cholesky @ cholesky.transpose(1, 2) 47 | 48 | def vectorize_upper_triangular(matrices): 49 | # Get the shape of the matrices 50 | b, n, _ = matrices.shape 51 | 52 | # Create the indices for the upper triangular part 53 | row_indices, col_indices = torch.triu_indices(n, n, device=matrices.device) 54 | 55 | # Create a mask of shape (b, n, n) 56 | mask = torch.zeros((b, n, n), device=matrices.device, dtype=torch.bool) 57 | 58 | # Set the upper triangular part of the mask to True 59 | mask[:, row_indices, col_indices] = True 60 | 61 | # Use the mask to extract the upper triangular part 62 | upper_triangular = matrices[mask] 63 | 64 | # Reshape the result to the desired shape 65 | upper_triangular = upper_triangular.view(b, -1) 66 | 67 | return upper_triangular 68 | 69 | 70 | def kron(a, b): 71 | """ 72 | Kronecker product of matrices a and b with leading batch dimensions. 73 | Batch dimensions are broadcast. The number of them mush 74 | :type a: torch.Tensor 75 | :type b: torch.Tensor 76 | :rtype: torch.Tensor 77 | """ 78 | siz1 = torch.Size(torch.tensor(a.shape[-2:]) * torch.tensor(b.shape[-2:])) 79 | res = a.unsqueeze(-1).unsqueeze(-3) * b.unsqueeze(-2).unsqueeze(-4) 80 | siz0 = res.shape[:-4] 81 | return res.reshape(siz0 + siz1) 82 | 83 | 84 | def interpolate_state_dicts(state_dict_1, state_dict_2, weight): 85 | return { 86 | key: (1 - weight) * state_dict_1[key] + weight * state_dict_2[key] for key in state_dict_1.keys() 87 | } 88 | 89 | 90 | @contextmanager 91 | def conditional_fork_rng(seed=None, condition=True): 92 | """ 93 | Context manager for conditionally applying PyTorch's fork_rng. 94 | 95 | Parameters: 96 | - seed (int, optional): The seed value for the random number generator. 97 | - condition (bool): Determines whether to apply fork_rng or not. 98 | 99 | Yields: 100 | - None: Yields control back to the caller within the context. 101 | """ 102 | if condition: 103 | with torch.random.fork_rng(): 104 | if seed is not None: 105 | torch.manual_seed(seed) 106 | yield 107 | else: 108 | with nullcontext(): 109 | yield 110 | 111 | def get_rng(device, seed=None): 112 | """ 113 | Get a random number generator. 114 | 115 | Parameters: 116 | - device (torch.device): The device to use for the random number generator. 117 | - seed (int, optional): The seed value for the random number generator. 118 | 119 | Returns: 120 | - torch.Generator: A random number generator. 121 | """ 122 | return torch.Generator(device=device).manual_seed(seed) if seed is not None else torch.Generator(device=device) 123 | -------------------------------------------------------------------------------- /experiments/tank/reproduce_table.py: -------------------------------------------------------------------------------- 1 | # %% 2 | from glob import glob 3 | import pandas as pd 4 | import numpy as np 5 | import torch 6 | 7 | def read_csv(short_name): 8 | wildcard = f"{short_name}_2*" 9 | filename = sorted(glob(f"test_results/{wildcard}"))[-1] 10 | return pd.read_csv(filename, dtype={"constraint_violated": "bool"}) 11 | 12 | def read_mpc_iter_count(short_name): 13 | wildcard = f"{short_name}_mpc_iter_count_2*" 14 | filename = sorted(glob(f"test_results/{wildcard}"))[-1] 15 | return np.genfromtxt(filename) 16 | 17 | 18 | def affine_layer_flops(input_size, output_size, has_bias, has_relu): 19 | flops = 2 * input_size * output_size 20 | if not has_bias: 21 | flops -= output_size 22 | if has_relu: 23 | flops += output_size 24 | return flops 25 | 26 | def qp_flops(n_sys, n_qp, m_qp, qp_iter): 27 | get_q_flops = affine_layer_flops(2 * n_sys, n_qp, False, False) 28 | get_b_flops = affine_layer_flops(n_sys, m_qp, True, False) 29 | get_mu_flops = affine_layer_flops(n_sys, m_qp, False, False) + affine_layer_flops(m_qp, m_qp, False, False) + m_qp 30 | iter_flops = m_qp # Adding primal-dual variables 31 | iter_flops += 2 * m_qp * (m_qp - 1) # Matrix-vector multiplication 32 | iter_flops += 5 * m_qp # Vector additions 33 | return get_q_flops + get_b_flops + get_mu_flops + qp_iter * iter_flops 34 | 35 | def mpc_flops(n_sys, m_sys, N, iter_count_arr): 36 | n_qp = m_sys * N 37 | m_qp = 2 * (m_sys + n_sys) * N 38 | min_iter = np.min(iter_count_arr) 39 | max_iter = np.max(iter_count_arr) 40 | median_iter = np.median(iter_count_arr) 41 | min_flops = qp_flops(n_sys, n_qp, m_qp, min_iter) 42 | max_flops = qp_flops(n_sys, n_qp, m_qp, max_iter) 43 | median_flops = qp_flops(n_sys, n_qp, m_qp, median_iter) 44 | return min_flops, max_flops, median_flops 45 | 46 | def mlp_flops(input_size, output_size, hidden_sizes): 47 | flops = 0 48 | prev_size = input_size 49 | for size in hidden_sizes: 50 | flops += affine_layer_flops(prev_size, size, True, True) 51 | prev_size = size 52 | flops += affine_layer_flops(prev_size, output_size, True, False) 53 | return flops 54 | 55 | def count_parameters(exp_name): 56 | checkpoint_path = f"runs/tank_{exp_name}/nn/tank.pth" 57 | checkpoint = torch.load(checkpoint_path) 58 | total_params = 0 59 | for key, value in checkpoint['model'].items(): 60 | if key.startswith("a2c_network.policy_net") or key.startswith("a2c_network.actor_mlp"): 61 | total_params += value.numel() 62 | return total_params 63 | 64 | def get_row(short_name, method, n_sys=4, m_sys=2, n_qp=None, m_qp=None, qp_iter=10, N_mpc=None, mlp_last_size=None): 65 | """Output (short name, success rate, cost, penalized costs, FLOPs, learnable parameters).""" 66 | result_df = read_csv(short_name) 67 | total_episodes = len(result_df) 68 | penalty = 100000 69 | avg_cost = result_df['cumulative_cost'].sum() / result_df['episode_length'].sum() 70 | avg_cost_penalized = (result_df['cumulative_cost'].sum() + penalty * result_df["constraint_violated"].sum()) / result_df['episode_length'].sum() 71 | freq_violation = result_df["constraint_violated"].sum() / result_df['episode_length'].sum() 72 | success_rate = 1. - result_df["constraint_violated"].sum() / total_episodes 73 | 74 | # Count FLOPs 75 | if method == "qp": 76 | flops = qp_flops(n_sys, n_qp, m_qp, qp_iter) 77 | elif method == "mpc": 78 | iter_count_arr = read_mpc_iter_count(short_name) 79 | flops = mpc_flops(n_sys, m_sys, N_mpc, iter_count_arr) 80 | elif method == "mlp": 81 | flops = mlp_flops(2 * n_sys, m_sys, [i * mlp_last_size for i in [4, 2, 1]]) 82 | 83 | # Count learnable parameters 84 | if method == "mpc": 85 | num_param = 0 86 | else: 87 | num_param = count_parameters(short_name) 88 | 89 | return short_name, success_rate, avg_cost, avg_cost_penalized, flops, num_param 90 | 91 | # %% 92 | rows = [ 93 | get_row("reproduce_mpc_2_0", "mpc", N_mpc=2), 94 | get_row("reproduce_mpc_2_1", "mpc", N_mpc=2), 95 | get_row("reproduce_mpc_2_10", "mpc", N_mpc=2), 96 | get_row("reproduce_mpc_2_100", "mpc", N_mpc=2), 97 | get_row("reproduce_mpc_4_0", "mpc", N_mpc=4), 98 | get_row("reproduce_mpc_4_1", "mpc", N_mpc=4), 99 | get_row("reproduce_mpc_4_10", "mpc", N_mpc=4), 100 | get_row("reproduce_mpc_4_100", "mpc", N_mpc=4), 101 | get_row("reproduce_mpc_8_0", "mpc", N_mpc=8), 102 | get_row("reproduce_mpc_8_1", "mpc", N_mpc=8), 103 | get_row("reproduce_mpc_8_10", "mpc", N_mpc=8), 104 | get_row("reproduce_mpc_8_100", "mpc", N_mpc=8), 105 | get_row("reproduce_mpc_16_0", "mpc", N_mpc=16), 106 | get_row("reproduce_mpc_16_1", "mpc", N_mpc=16), 107 | get_row("reproduce_mpc_16_10", "mpc", N_mpc=16), 108 | get_row("reproduce_mpc_16_100", "mpc", N_mpc=16), 109 | get_row("reproduce_mlp_8", "mlp", mlp_last_size=8), 110 | get_row("reproduce_mlp_16", "mlp", mlp_last_size=16), 111 | get_row("reproduce_mlp_32", "mlp", mlp_last_size=32), 112 | get_row("reproduce_mlp_64", "mlp", mlp_last_size=32), 113 | get_row("reproduce_qp_4_24", "qp", n_qp=4, m_qp=24), 114 | get_row("reproduce_qp_8_48", "qp", n_qp=8, m_qp=48), 115 | get_row("reproduce_qp_16_96", "qp", n_qp=16, m_qp=96), 116 | ] 117 | 118 | df_result = pd.DataFrame(rows, columns=["name", "success_rate", "avg_cost", "avg_cost_penalized", "flops", "num_param"]) 119 | df_result.to_csv("test_results/reproduce_table.csv", index=False) 120 | print(df_result) 121 | -------------------------------------------------------------------------------- /experiments/cartpole/reproduce_table.py: -------------------------------------------------------------------------------- 1 | # %% 2 | from glob import glob 3 | import pandas as pd 4 | import numpy as np 5 | import torch 6 | 7 | def read_csv(short_name): 8 | wildcard = f"{short_name}_2*" 9 | filename = sorted(glob(f"test_results/{wildcard}"))[-1] 10 | return pd.read_csv(filename, dtype={"constraint_violated": "bool"}) 11 | 12 | def read_mpc_iter_count(short_name): 13 | wildcard = f"{short_name}_mpc_iter_count_2*" 14 | filename = sorted(glob(f"test_results/{wildcard}"))[-1] 15 | return np.genfromtxt(filename) 16 | 17 | 18 | def affine_layer_flops(input_size, output_size, has_bias, has_relu): 19 | flops = 2 * input_size * output_size 20 | if not has_bias: 21 | flops -= output_size 22 | if has_relu: 23 | flops += output_size 24 | return flops 25 | 26 | def qp_flops(n_sys, n_qp, m_qp, qp_iter): 27 | get_q_flops = affine_layer_flops(2 * n_sys, n_qp, False, False) 28 | get_b_flops = affine_layer_flops(n_sys, m_qp, True, False) 29 | get_mu_flops = affine_layer_flops(n_sys, m_qp, False, False) + affine_layer_flops(m_qp, m_qp, False, False) + m_qp 30 | iter_flops = m_qp # Adding primal-dual variables 31 | iter_flops += 2 * m_qp * (m_qp - 1) # Matrix-vector multiplication 32 | iter_flops += 5 * m_qp # Vector additions 33 | return get_q_flops + get_b_flops + get_mu_flops + qp_iter * iter_flops 34 | 35 | def mpc_flops(n_sys, m_sys, N, iter_count_arr): 36 | n_qp = m_sys * N 37 | m_qp = 2 * (m_sys + n_sys) * N 38 | min_iter = np.min(iter_count_arr) 39 | max_iter = np.max(iter_count_arr) 40 | median_iter = np.median(iter_count_arr) 41 | min_flops = qp_flops(n_sys, n_qp, m_qp, min_iter) 42 | max_flops = qp_flops(n_sys, n_qp, m_qp, max_iter) 43 | median_flops = qp_flops(n_sys, n_qp, m_qp, median_iter) 44 | return min_flops, max_flops, median_flops 45 | 46 | def mlp_flops(input_size, output_size, hidden_sizes): 47 | flops = 0 48 | prev_size = input_size 49 | for size in hidden_sizes: 50 | flops += affine_layer_flops(prev_size, size, True, True) 51 | prev_size = size 52 | flops += affine_layer_flops(prev_size, output_size, True, False) 53 | return flops 54 | 55 | def count_parameters(exp_name): 56 | checkpoint_path = f"runs/cartpole_{exp_name}/nn/cartpole.pth" 57 | checkpoint = torch.load(checkpoint_path) 58 | total_params = 0 59 | for key, value in checkpoint['model'].items(): 60 | if key.startswith("a2c_network.policy_net") or key.startswith("a2c_network.actor_mlp"): 61 | total_params += value.numel() 62 | return total_params 63 | 64 | def get_row(short_name, method, n_sys=4, m_sys=1, n_qp=None, m_qp=None, qp_iter=10, N_mpc=None, mlp_last_size=None): 65 | """Output (short name, success rate, cost, penalized costs, FLOPs, learnable parameters).""" 66 | result_df = read_csv(short_name) 67 | total_episodes = len(result_df) 68 | penalty = 1000 69 | avg_cost = result_df['cumulative_cost'].sum() / result_df['episode_length'].sum() 70 | avg_cost_penalized = (result_df['cumulative_cost'].sum() + penalty * result_df["constraint_violated"].sum()) / result_df['episode_length'].sum() 71 | freq_violation = result_df["constraint_violated"].sum() / result_df['episode_length'].sum() 72 | success_rate = 1. - result_df["constraint_violated"].sum() / total_episodes 73 | 74 | # Count FLOPs 75 | if method == "qp": 76 | flops = qp_flops(n_sys, n_qp, m_qp, qp_iter) 77 | elif method == "mpc": 78 | iter_count_arr = read_mpc_iter_count(short_name) 79 | flops = mpc_flops(n_sys, m_sys, N_mpc, iter_count_arr) 80 | elif method == "mlp": 81 | flops = mlp_flops(2 * n_sys, m_sys, [i * mlp_last_size for i in [4, 2, 1]]) 82 | 83 | # Count learnable parameters 84 | if method == "mpc": 85 | num_param = 0 86 | else: 87 | num_param = count_parameters(short_name) 88 | 89 | return short_name, success_rate, avg_cost, avg_cost_penalized, flops, num_param 90 | 91 | # %% 92 | rows = [ 93 | get_row("reproduce_mpc_2_0", "mpc", N_mpc=2), 94 | get_row("reproduce_mpc_2_1", "mpc", N_mpc=2), 95 | get_row("reproduce_mpc_2_10", "mpc", N_mpc=2), 96 | get_row("reproduce_mpc_2_100", "mpc", N_mpc=2), 97 | get_row("reproduce_mpc_4_0", "mpc", N_mpc=4), 98 | get_row("reproduce_mpc_4_1", "mpc", N_mpc=4), 99 | get_row("reproduce_mpc_4_10", "mpc", N_mpc=4), 100 | get_row("reproduce_mpc_4_100", "mpc", N_mpc=4), 101 | get_row("reproduce_mpc_8_0", "mpc", N_mpc=8), 102 | get_row("reproduce_mpc_8_1", "mpc", N_mpc=8), 103 | get_row("reproduce_mpc_8_10", "mpc", N_mpc=8), 104 | get_row("reproduce_mpc_8_100", "mpc", N_mpc=8), 105 | get_row("reproduce_mpc_16_0", "mpc", N_mpc=16), 106 | get_row("reproduce_mpc_16_1", "mpc", N_mpc=16), 107 | get_row("reproduce_mpc_16_10", "mpc", N_mpc=16), 108 | get_row("reproduce_mpc_16_100", "mpc", N_mpc=16), 109 | get_row("reproduce_mlp_8", "mlp", mlp_last_size=8), 110 | get_row("reproduce_mlp_16", "mlp", mlp_last_size=16), 111 | get_row("reproduce_mlp_32", "mlp", mlp_last_size=32), 112 | get_row("reproduce_mlp_64", "mlp", mlp_last_size=64), 113 | get_row("reproduce_qp_4_24", "qp", n_qp=4, m_qp=24), 114 | get_row("reproduce_qp_8_48", "qp", n_qp=8, m_qp=48), 115 | get_row("reproduce_qp_16_96", "qp", n_qp=16, m_qp=96), 116 | ] 117 | 118 | df_result = pd.DataFrame(rows, columns=["name", "success_rate", "avg_cost", "avg_cost_penalized", "flops", "num_param"]) 119 | df_result.to_csv("test_results/reproduce_table.csv", index=False) 120 | print(df_result) 121 | -------------------------------------------------------------------------------- /experiments/tank/reproduce.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # 0. Background utils and GPU scheduler 4 | 5 | # Define the number of GPUs available 6 | NUM_GPUS=$(nvidia-smi --list-gpus | wc -l) 7 | 8 | # Function to find the first idle GPU 9 | find_idle_gpu() { 10 | for (( i=0; i<$NUM_GPUS; i++ )); do 11 | # Check if GPU volatile utilization is 0% 12 | if [ "$(nvidia-smi -i $i --query-gpu=utilization.gpu --format=csv,noheader,nounits)" -eq 0 ]; then 13 | echo $i 14 | return 15 | fi 16 | done 17 | echo "-1" # Return -1 if no idle GPU is found 18 | } 19 | 20 | find_gpu_and_run_task() { 21 | local run_task_function="$1" 22 | shift # Remove the first argument (run_task_function name) 23 | 24 | # Initialize GPU ID as -1 indicating no GPU is available initially 25 | local gpu_id=-1 26 | 27 | # Wait for an idle GPU to become available 28 | while [ "$gpu_id" -eq -1 ]; do 29 | gpu_id=$(find_idle_gpu) 30 | sleep 1 # Wait a bit before checking again 31 | done 32 | 33 | # Call the run_task function with the GPU ID and additional arguments, and send it to the background 34 | $run_task_function $gpu_id $@ > /dev/null & 35 | 36 | # Capture the PID of the last background process 37 | local task_pid=$! 38 | 39 | # Optional: wait briefly to allow the task to start 40 | sleep 10 41 | 42 | # Output the PID 43 | echo $task_pid 44 | } 45 | 46 | 47 | # 1. Training 48 | # 1.1 MLP of different sizes 49 | 50 | train_mlp() { 51 | local gpu_id=$1 52 | local c1=$2 53 | local c2=$3 54 | local c3=$4 55 | local mlp_last_size=$5 56 | CUDA_VISIBLE_DEVICES=$gpu_id python ../../run.py train tank --num-parallel 100000 --horizon 20 --epochs 5000 --mini-epochs 1 --noise-level 0. --reward-shaping ${c1},${c2},${c3} --no-obs-normalization --mlp-size-last $mlp_last_size --batch-test --exp-name reproduce_mlp_${mlp_last_size} --lr-schedule linear --initial-lr "5e-4" --quiet 57 | } 58 | 59 | # 1.2 QP of different sizes 60 | 61 | train_qp() { 62 | local gpu_id=$1 63 | local c1=$2 64 | local c2=$3 65 | local c3=$4 66 | local n_qp=$5 67 | local m_qp=$6 68 | CUDA_VISIBLE_DEVICES=$gpu_id python ../../run.py train tank --num-parallel 100000 --horizon 20 --epochs 5000 --mini-epochs 1 --noise-level 0. --reward-shaping ${c1},${c2},${c3} --n-qp $n_qp --m-qp $m_qp --qp-unrolled --shared-PH --affine-qb --strict-affine-layer --obs-has-half-ref --use-residual-loss --no-obs-normalization --force-feasible --batch-test --exp-name reproduce_qp_${n_qp}_${m_qp} --lr-schedule linear --initial-lr "5e-4" --quiet 69 | } 70 | 71 | 72 | # 2. Testing 73 | # 2.1 MPC under different configurations 74 | 75 | test_mpc() { 76 | local gpu_id=$1 77 | local N=$2 78 | local terminal_coef=$3 79 | local n_qp=4 80 | local m_qp=24 81 | CUDA_VISIBLE_DEVICES=$gpu_id python ../../run.py test tank --num-parallel 10000 --horizon 20 --epochs 5000 --mini-epochs 1 --noise-level 0. --reward-shaping 50,0.05,2 --n-qp $n_qp --m-qp $m_qp --qp-unrolled --shared-PH --affine-qb --strict-affine-layer --obs-has-half-ref --use-residual-loss --no-obs-normalization --force-feasible --batch-test --mpc-baseline-N $N --mpc-terminal-cost-coef $terminal_coef --use-osqp-for-mpc --exp-name reproduce_qp_${n_qp}_${m_qp} --run-name reproduce_mpc_${N}_${terminal_coef} --lr-schedule linear --initial-lr "5e-4" --quiet 82 | } 83 | 84 | test_mpc_bg() { 85 | test_mpc $@ > /dev/null & 86 | } 87 | 88 | test_mpc_all() { 89 | test_mpc_bg 0 2 0 90 | test_mpc_bg 0 2 1 91 | test_mpc_bg 0 2 10 92 | test_mpc_bg 0 2 100 93 | test_mpc_bg 0 4 0 94 | test_mpc_bg 0 4 1 95 | test_mpc_bg 0 4 10 96 | test_mpc_bg 0 4 100 97 | test_mpc_bg 1 8 0 98 | test_mpc_bg 1 8 1 99 | test_mpc_bg 1 8 10 100 | test_mpc_bg 1 8 100 101 | test_mpc_bg 1 16 0 102 | test_mpc_bg 1 16 1 103 | test_mpc_bg 1 16 10 104 | test_mpc_bg 1 16 100 105 | wait 106 | } 107 | 108 | # 2.2 MLP 109 | 110 | test_mlp() { 111 | local gpu_id=$1 112 | local c1=$2 113 | local c2=$3 114 | local c3=$4 115 | local mlp_last_size=$5 116 | CUDA_VISIBLE_DEVICES=$gpu_id python ../../run.py test tank --num-parallel 10000 --horizon 20 --epochs 5000 --mini-epochs 1 --noise-level 0. --reward-shaping ${c1},${c2},${c3} --no-obs-normalization --mlp-size-last $mlp_last_size --batch-test --exp-name reproduce_mlp_${mlp_last_size} --lr-schedule linear --initial-lr "5e-4" --quiet 117 | } 118 | 119 | # 2.3 QP 120 | 121 | test_qp() { 122 | local gpu_id=$1 123 | local c1=$2 124 | local c2=$3 125 | local c3=$4 126 | local n_qp=$5 127 | local m_qp=$6 128 | CUDA_VISIBLE_DEVICES=$gpu_id python ../../run.py test tank --num-parallel 10000 --horizon 20 --epochs 5000 --mini-epochs 1 --noise-level 0. --reward-shaping ${c1},${c2},${c3} --n-qp $n_qp --m-qp $m_qp --qp-unrolled --shared-PH --affine-qb --strict-affine-layer --obs-has-half-ref --use-residual-loss --no-obs-normalization --force-feasible --batch-test --exp-name reproduce_qp_${n_qp}_${m_qp} --lr-schedule linear --initial-lr "5e-4" --quiet 129 | } 130 | 131 | # Utility function for train and test 132 | train_and_test() { 133 | local train_function="$1" 134 | shift 135 | local test_function="$1" 136 | shift 137 | 138 | train_pid=$(find_gpu_and_run_task $train_function $@) 139 | while [ -e /proc/$train_pid ]; do 140 | sleep 1 141 | done 142 | test_pid=$(find_gpu_and_run_task $test_function $@) 143 | while [ -e /proc/$test_pid ]; do 144 | sleep 1 145 | done 146 | } 147 | 148 | run_and_delay() { 149 | local run_function="$1" 150 | shift 151 | 152 | $run_function $@ & 153 | local run_pid=$! 154 | sleep 10 155 | echo $run_pid 156 | } 157 | 158 | # Finally run all the tasks 159 | 160 | run_and_delay test_mpc_all 161 | run_and_delay train_and_test train_mlp test_mlp 50 0.05 2 8 162 | run_and_delay train_and_test train_mlp test_mlp 50 0.05 2 16 163 | run_and_delay train_and_test train_mlp test_mlp 50 0.05 2 32 164 | run_and_delay train_and_test train_mlp test_mlp 50 0.05 2 64 165 | run_and_delay train_and_test train_qp test_qp 50 0.05 2 4 24 166 | run_and_delay train_and_test train_qp test_qp 50 0.05 2 8 48 167 | run_and_delay train_and_test train_qp test_qp 50 0.05 2 16 96 168 | 169 | wait 170 | 171 | python reproduce_table.py 172 | -------------------------------------------------------------------------------- /experiments/tank/reproduce_table_disturbed.py: -------------------------------------------------------------------------------- 1 | # %% 2 | from glob import glob 3 | import pandas as pd 4 | import numpy as np 5 | import torch 6 | 7 | def read_csv(short_name): 8 | wildcard = f"{short_name}_2*" 9 | filename = sorted(glob(f"test_results/{wildcard}"))[-1] 10 | return pd.read_csv(filename, dtype={"constraint_violated": "bool"}) 11 | 12 | def read_mpc_iter_count(short_name): 13 | wildcard = f"{short_name}_mpc_iter_count_2*" 14 | filename = sorted(glob(f"test_results/{wildcard}"))[-1] 15 | return np.genfromtxt(filename) 16 | 17 | def read_running_time(short_name): 18 | wildcard = f"{short_name}_running_time_2*" 19 | filename = sorted(glob(f"test_results/{wildcard}"))[-1] 20 | return np.genfromtxt(filename) 21 | 22 | 23 | def affine_layer_flops(input_size, output_size, has_bias, has_relu): 24 | flops = 2 * input_size * output_size 25 | if not has_bias: 26 | flops -= output_size 27 | if has_relu: 28 | flops += output_size 29 | return flops 30 | 31 | def qp_flops(n_sys, n_qp, m_qp, qp_iter): 32 | get_q_flops = affine_layer_flops(2 * n_sys, n_qp, False, False) 33 | get_b_flops = affine_layer_flops(n_sys, m_qp, True, False) 34 | get_mu_flops = affine_layer_flops(n_sys, m_qp, False, False) + affine_layer_flops(m_qp, m_qp, False, False) + m_qp 35 | iter_flops = m_qp # Adding primal-dual variables 36 | iter_flops += 2 * m_qp * (m_qp - 1) # Matrix-vector multiplication 37 | iter_flops += 5 * m_qp # Vector additions 38 | return get_q_flops + get_b_flops + get_mu_flops + qp_iter * iter_flops 39 | 40 | def mpc_flops(n_sys, m_sys, N, iter_count_arr): 41 | n_qp = m_sys * N 42 | m_qp = 2 * (m_sys + n_sys) * N 43 | min_iter = np.min(iter_count_arr) 44 | max_iter = np.max(iter_count_arr) 45 | median_iter = np.median(iter_count_arr) 46 | min_flops = qp_flops(n_sys, n_qp, m_qp, min_iter) 47 | max_flops = qp_flops(n_sys, n_qp, m_qp, max_iter) 48 | median_flops = qp_flops(n_sys, n_qp, m_qp, median_iter) 49 | return min_flops, max_flops, median_flops 50 | 51 | def mlp_flops(input_size, output_size, hidden_sizes): 52 | flops = 0 53 | prev_size = input_size 54 | for size in hidden_sizes: 55 | flops += affine_layer_flops(prev_size, size, True, True) 56 | prev_size = size 57 | flops += affine_layer_flops(prev_size, output_size, True, False) 58 | return flops 59 | 60 | def count_parameters(exp_name): 61 | checkpoint_path = f"runs/tank_{exp_name}/nn/tank.pth" 62 | checkpoint = torch.load(checkpoint_path) 63 | total_params = 0 64 | for key, value in checkpoint['model'].items(): 65 | if key.startswith("a2c_network.policy_net") or key.startswith("a2c_network.actor_mlp"): 66 | total_params += value.numel() 67 | return total_params 68 | 69 | def get_row(short_name, method, n_sys=4, m_sys=2, n_qp=None, m_qp=None, qp_iter=10, N_mpc=None, mlp_last_size=None): 70 | """Output (short name, success rate, cost, penalized costs, FLOPs, learnable parameters).""" 71 | result_df = read_csv(short_name) 72 | total_episodes = len(result_df) 73 | penalty = 100000 74 | avg_cost = result_df['cumulative_cost'].sum() / result_df['episode_length'].sum() 75 | avg_cost_penalized = (result_df['cumulative_cost'].sum() + penalty * result_df["constraint_violated"].sum()) / result_df['episode_length'].sum() 76 | freq_violation = result_df["constraint_violated"].sum() / result_df['episode_length'].sum() 77 | success_rate = 1. - result_df["constraint_violated"].sum() / total_episodes 78 | 79 | # Count FLOPs / running time 80 | baseline_flops = qp_flops(4, 32, 192, 10) 81 | baseline_time = 0.01 82 | if method == "qp": 83 | flops = qp_flops(n_sys, n_qp, m_qp, qp_iter) 84 | running_time = baseline_time * flops / baseline_flops 85 | elif method == "mpc": 86 | iter_count_arr = read_mpc_iter_count(short_name) 87 | flops = mpc_flops(n_sys, m_sys, N_mpc, iter_count_arr) 88 | running_time = tuple(baseline_time * item / baseline_flops for item in flops) 89 | elif method == "mlp": 90 | flops = mlp_flops(2 * n_sys, m_sys, [i * mlp_last_size for i in [4, 2, 1]]) 91 | running_time = baseline_time * flops / baseline_flops 92 | elif method == "robust_mpc": 93 | flops = 0 94 | running_time_arr = read_running_time(short_name) 95 | min_running_time = np.min(running_time_arr) 96 | max_running_time = np.max(running_time_arr) 97 | median_running_time = np.median(running_time_arr) 98 | running_time = (min_running_time, max_running_time, median_running_time) 99 | 100 | 101 | # Count learnable parameters 102 | if method == "mpc" or method == "robust_mpc": 103 | num_param = 0 104 | else: 105 | num_param = count_parameters(short_name) 106 | 107 | return short_name, success_rate, avg_cost, avg_cost_penalized, flops, running_time, num_param 108 | 109 | # %% 110 | rows = [ 111 | get_row("reproduce_disturbed_mpc_16_10_none", "mpc", N_mpc=16), 112 | get_row("reproduce_disturbed_mpc_16_10_scenario", "robust_mpc", N_mpc=16), 113 | get_row("reproduce_disturbed_mpc_16_10_tube_0.05", "robust_mpc", N_mpc=16), 114 | get_row("reproduce_disturbed_mpc_16_10_tube_0.1", "robust_mpc", N_mpc=16), 115 | get_row("reproduce_disturbed_mpc_16_10_tube_0.2", "robust_mpc", N_mpc=16), 116 | get_row("reproduce_disturbed_mpc_16_10_tube_0.25", "robust_mpc", N_mpc=16), 117 | get_row("reproduce_disturbed_mpc_16_10_tube_0.3", "robust_mpc", N_mpc=16), 118 | get_row("reproduce_disturbed_mlp_8", "mlp", mlp_last_size=8), 119 | get_row("reproduce_disturbed_mlp_16", "mlp", mlp_last_size=16), 120 | get_row("reproduce_disturbed_mlp_32", "mlp", mlp_last_size=32), 121 | get_row("reproduce_disturbed_mlp_64", "mlp", mlp_last_size=32), 122 | get_row("reproduce_disturbed_qp_4_24", "qp", n_qp=4, m_qp=24), 123 | get_row("reproduce_disturbed_qp_8_48", "qp", n_qp=8, m_qp=48), 124 | get_row("reproduce_disturbed_qp_16_96", "qp", n_qp=16, m_qp=96), 125 | get_row("reproduce_disturbed_qp_32_192", "qp", n_qp=32, m_qp=192), 126 | ] 127 | 128 | df_result = pd.DataFrame(rows, columns=["name", "success_rate", "avg_cost", "avg_cost_penalized", "flops", "running_time", "num_param"]) 129 | df_result.to_csv("test_results/reproduce_table_disturbed.csv", index=False) 130 | print(df_result) 131 | -------------------------------------------------------------------------------- /experiments/cartpole/reproduce.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # 0. Background utils and GPU scheduler 4 | 5 | # Define the number of GPUs available 6 | NUM_GPUS=$(nvidia-smi --list-gpus | wc -l) 7 | 8 | # Function to find the first idle GPU 9 | find_idle_gpu() { 10 | for (( i=0; i<$NUM_GPUS; i++ )); do 11 | # Check if GPU volatile utilization is 0% 12 | if [ "$(nvidia-smi -i $i --query-gpu=utilization.gpu --format=csv,noheader,nounits)" -eq 0 ]; then 13 | echo $i 14 | return 15 | fi 16 | done 17 | echo "-1" # Return -1 if no idle GPU is found 18 | } 19 | 20 | find_gpu_and_run_task() { 21 | local run_task_function="$1" 22 | shift # Remove the first argument (run_task_function name) 23 | 24 | # Initialize GPU ID as -1 indicating no GPU is available initially 25 | local gpu_id=-1 26 | 27 | # Wait for an idle GPU to become available 28 | while [ "$gpu_id" -eq -1 ]; do 29 | gpu_id=$(find_idle_gpu) 30 | sleep 1 # Wait a bit before checking again 31 | done 32 | 33 | # Call the run_task function with the GPU ID and additional arguments, and send it to the background 34 | $run_task_function $gpu_id $@ > /dev/null & 35 | 36 | # Capture the PID of the last background process 37 | local task_pid=$! 38 | 39 | # Optional: wait briefly to allow the task to start 40 | sleep 15 41 | 42 | # Output the PID 43 | echo $task_pid 44 | } 45 | 46 | 47 | # 1. Training 48 | # 1.1 MLP of different sizes 49 | 50 | train_mlp() { 51 | local gpu_id=$1 52 | local c1=$2 53 | local c2=$3 54 | local c3=$4 55 | local mlp_last_size=$5 56 | CUDA_VISIBLE_DEVICES=$gpu_id python ../../run.py train cartpole --num-parallel 100000 --horizon 20 --epochs 5000 --mini-epochs 1 --noise-level 0. --reward-shaping ${c1},${c2},${c3} --no-obs-normalization --mlp-size-last $mlp_last_size --batch-test --exp-name reproduce_mlp_${mlp_last_size} --lr-schedule linear --initial-lr "5e-4" --quiet --max-steps-per-episode 100 57 | } 58 | 59 | # 1.2 QP of different sizes 60 | 61 | train_qp() { 62 | local gpu_id=$1 63 | local c1=$2 64 | local c2=$3 65 | local c3=$4 66 | local n_qp=$5 67 | local m_qp=$6 68 | CUDA_VISIBLE_DEVICES=$gpu_id python ../../run.py train cartpole --num-parallel 100000 --horizon 20 --epochs 5000 --mini-epochs 1 --noise-level 0. --reward-shaping ${c1},${c2},${c3} --n-qp $n_qp --m-qp $m_qp --qp-unrolled --shared-PH --affine-qb --symmetric --use-residual-loss --no-obs-normalization --force-feasible --batch-test --exp-name reproduce_qp_${n_qp}_${m_qp} --lr-schedule linear --initial-lr "5e-4" --quiet --max-steps-per-episode 100 69 | } 70 | 71 | 72 | # 2. Testing 73 | # 2.1 MPC under different configurations 74 | 75 | test_mpc() { 76 | local gpu_id=$1 77 | local N=$2 78 | local terminal_coef=$3 79 | local n_qp=4 80 | local m_qp=24 81 | CUDA_VISIBLE_DEVICES=$gpu_id python ../../run.py test cartpole --num-parallel 10000 --horizon 20 --epochs 5000 --mini-epochs 1 --noise-level 0. --reward-shaping 50,0.05,2 --n-qp $n_qp --m-qp $m_qp --qp-unrolled --shared-PH --affine-qb --symmetric --use-residual-loss --no-obs-normalization --force-feasible --batch-test --mpc-baseline-N $N --mpc-terminal-cost-coef $terminal_coef --use-osqp-for-mpc --exp-name reproduce_qp_${n_qp}_${m_qp} --run-name reproduce_mpc_${N}_${terminal_coef} --lr-schedule linear --initial-lr "5e-4" --quiet --max-steps-per-episode 100 82 | } 83 | 84 | test_mpc_bg() { 85 | test_mpc $@ > /dev/null & 86 | } 87 | 88 | test_mpc_all() { 89 | test_mpc_bg 2 2 0 90 | test_mpc_bg 2 2 1 91 | test_mpc_bg 2 2 10 92 | test_mpc_bg 2 2 100 93 | test_mpc_bg 2 4 0 94 | test_mpc_bg 2 4 1 95 | test_mpc_bg 2 4 10 96 | test_mpc_bg 2 4 100 97 | test_mpc_bg 3 8 0 98 | test_mpc_bg 3 8 1 99 | test_mpc_bg 3 8 10 100 | test_mpc_bg 3 8 100 101 | test_mpc_bg 3 16 0 102 | test_mpc_bg 3 16 1 103 | test_mpc_bg 3 16 10 104 | test_mpc_bg 3 16 100 105 | wait 106 | } 107 | 108 | # 2.2 MLP 109 | 110 | test_mlp() { 111 | local gpu_id=$1 112 | local c1=$2 113 | local c2=$3 114 | local c3=$4 115 | local mlp_last_size=$5 116 | CUDA_VISIBLE_DEVICES=$gpu_id python ../../run.py test cartpole --num-parallel 10000 --horizon 20 --epochs 5000 --mini-epochs 1 --noise-level 0. --reward-shaping ${c1},${c2},${c3} --no-obs-normalization --mlp-size-last $mlp_last_size --batch-test --exp-name reproduce_mlp_${mlp_last_size} --lr-schedule linear --initial-lr "5e-4" --quiet --max-steps-per-episode 100 117 | } 118 | 119 | # 2.3 QP 120 | 121 | test_qp() { 122 | local gpu_id=$1 123 | local c1=$2 124 | local c2=$3 125 | local c3=$4 126 | local n_qp=$5 127 | local m_qp=$6 128 | CUDA_VISIBLE_DEVICES=$gpu_id python ../../run.py test cartpole --num-parallel 10000 --horizon 20 --epochs 5000 --mini-epochs 1 --noise-level 0. --reward-shaping ${c1},${c2},${c3} --n-qp $n_qp --m-qp $m_qp --qp-unrolled --shared-PH --affine-qb --symmetric --use-residual-loss --no-obs-normalization --force-feasible --batch-test --exp-name reproduce_qp_${n_qp}_${m_qp} --lr-schedule linear --initial-lr "5e-4" --quiet --max-steps-per-episode 100 129 | } 130 | 131 | # Utility function for train and test 132 | train_and_test() { 133 | local train_function="$1" 134 | shift 135 | local test_function="$1" 136 | shift 137 | 138 | train_pid=$(find_gpu_and_run_task $train_function $@) 139 | while [ -e /proc/$train_pid ]; do 140 | sleep 1 141 | done 142 | test_pid=$(find_gpu_and_run_task $test_function $@) 143 | while [ -e /proc/$test_pid ]; do 144 | sleep 1 145 | done 146 | } 147 | 148 | run_and_delay() { 149 | local run_function="$1" 150 | shift 151 | 152 | $run_function $@ & 153 | local run_pid=$! 154 | sleep 15 155 | echo $run_pid 156 | } 157 | 158 | # Finally run all the tasks 159 | 160 | run_and_delay test_mpc_all 161 | run_and_delay train_and_test train_mlp test_mlp 50 0.05 2 8 162 | run_and_delay train_and_test train_mlp test_mlp 50 0.05 2 16 163 | run_and_delay train_and_test train_mlp test_mlp 50 0.05 2 32 164 | run_and_delay train_and_test train_mlp test_mlp 50 0.05 2 64 165 | run_and_delay train_and_test train_qp test_qp 50 0.05 2 4 24 166 | run_and_delay train_and_test train_qp test_qp 50 0.05 2 8 48 167 | run_and_delay train_and_test train_qp test_qp 50 0.05 2 16 96 168 | 169 | wait 170 | 171 | python reproduce_table.py 172 | -------------------------------------------------------------------------------- /src/utils/sets.py: -------------------------------------------------------------------------------- 1 | from scipy.spatial import ConvexHull 2 | from scipy.spatial import Delaunay 3 | import numpy as np 4 | from tqdm import tqdm_notebook as tqdm 5 | 6 | 7 | def backward_reachable_set_linear(A_inv, B, X_set, x_min, x_max, u_min, u_max): 8 | """ 9 | Compute the one-step backward reachable set for a linear system x_{k+1} = Ax + Bu. 10 | 11 | Parameters: 12 | A_inv (numpy.ndarray): Inverse of the A matrix in the system dynamics. 13 | B (numpy.ndarray): B matrix in the system dynamics. 14 | X_set (set): Set of points (as tuples) representing the current state space. 15 | x_min (float or numpy.ndarray): Minimum state constraints. 16 | x_max (float or numpy.ndarray): Maximum state constraints. 17 | u_min (float or numpy.ndarray): Minimum control input constraints. 18 | u_max (float or numpy.ndarray): Maximum control input constraints. 19 | 20 | Returns: 21 | set: One-step backward reachable set as a set of points (as tuples). 22 | """ 23 | new_set = set() 24 | for x in X_set: 25 | for u in np.linspace(u_min, u_max, 5): 26 | prev_x = np.dot(A_inv, x - np.dot(B, u)) 27 | if np.all(x_min <= prev_x) and np.all(prev_x <= x_max): 28 | new_set.add(tuple(prev_x)) 29 | return new_set 30 | 31 | 32 | def one_step_forward_reachable_set(g, S, x_min, x_max): 33 | """ 34 | Compute the one-step forward reachable set for an autonomous system x_{k+1} = g(x_k). 35 | 36 | Parameters: 37 | g (function): Function representing the autonomous system dynamics. 38 | S (numpy.ndarray): Vertices of the initial set. 39 | x_min (numpy.ndarray): Minimum state constraints. 40 | x_max (numpy.ndarray): Maximum state constraints. 41 | 42 | Returns: 43 | numpy.ndarray: Vertices of the one-step forward reachable set. 44 | """ 45 | new_vertices = [] 46 | 47 | for x in S: 48 | next_x = g(x) 49 | 50 | # Check if the next state is within the state constraints 51 | if np.all(x_min <= next_x) and np.all(next_x <= x_max): 52 | new_vertices.append(next_x) 53 | 54 | return np.array(new_vertices) 55 | 56 | 57 | def one_step_backward_reachable_set(g, S_hull, x_min, x_max, num_samples=1000): 58 | """ 59 | Compute the one-step backward reachable set for an autonomous system x_{k+1} = g(x_k). 60 | 61 | Parameters: 62 | g (function): Function representing the autonomous system dynamics. 63 | S_hull (ConvexHull): Convex hull object of the initial set S. 64 | x_min (numpy.ndarray): Minimum state constraints. 65 | x_max (numpy.ndarray): Maximum state constraints. 66 | num_samples (int): Number of samples for approximation. 67 | 68 | Returns: 69 | numpy.ndarray: Vertices of the approximated one-step backward reachable set. 70 | """ 71 | # Sample points within the state constraints 72 | sampled_points = np.random.uniform(x_min, x_max, (num_samples, len(x_min))) 73 | 74 | # Delaunay triangulation to speed up point-in-hull check 75 | delaunay_S = Delaunay(S_hull.points[S_hull.vertices, :]) 76 | 77 | # Check which sampled points have their next state in S 78 | backward_reachable_points = [] 79 | for x in sampled_points: 80 | next_x = g(x) 81 | if Delaunay.find_simplex(delaunay_S, next_x) >= 0: 82 | backward_reachable_points.append(x) 83 | 84 | # Compute the convex hull of the backward reachable points 85 | if len(backward_reachable_points) > 0: 86 | backward_hull = ConvexHull(np.array(backward_reachable_points)) 87 | return backward_hull.points[backward_hull.vertices, :] 88 | else: 89 | return np.array([]) 90 | 91 | 92 | def compute_positive_invariant_set_from_origin(g, x_min, x_max, initial_radius=1.0, iterations=100): 93 | """ 94 | Compute the positive invariant set for an autonomous system x_{k+1} = g(x_k) starting from a neighborhood of the origin. 95 | 96 | Parameters: 97 | g (function): Function representing the autonomous system dynamics. 98 | x_min (numpy.ndarray): Minimum state constraints. 99 | x_max (numpy.ndarray): Maximum state constraints. 100 | initial_radius (float): Radius of the initial neighborhood around the origin. 101 | iterations (int): Number of iterations for approximation. 102 | 103 | Returns: 104 | numpy.ndarray: Vertices of the approximated positive invariant set. 105 | """ 106 | # Start from a neighborhood of the origin defined by the initial_radius 107 | initial_set = np.array([[initial_radius, 0], [0, initial_radius], [-initial_radius, 0], [0, -initial_radius]]) 108 | current_set_hull = ConvexHull(initial_set) 109 | 110 | for _ in tqdm(range(iterations)): 111 | # Determine the sampling bounds based on the current set 112 | current_radius = np.max(np.linalg.norm(current_set_hull.points[current_set_hull.vertices, :], axis=1)) 113 | sampling_min = np.maximum(x_min, -current_radius * 1.5) 114 | sampling_max = np.minimum(x_max, current_radius * 1.5) 115 | 116 | # Compute the one-step backward reachable set from the current set 117 | backward_reachable_vertices = one_step_backward_reachable_set(g, current_set_hull, sampling_min, sampling_max) 118 | 119 | # Update the current set to include the backward reachable set, effectively taking union 120 | if len(backward_reachable_vertices) > 0: 121 | new_hull = ConvexHull(np.vstack((current_set_hull.points[current_set_hull.vertices, :], backward_reachable_vertices))) 122 | current_set_hull = new_hull 123 | 124 | return current_set_hull.points[current_set_hull.vertices, :] 125 | 126 | 127 | def compute_MCI(A, B, x_min, x_max, u_min, u_max, iterations=10): 128 | """ 129 | Compute the Maximal Control Invariant (MCI) set for a given linear system x[k+1] = Ax[k] + Bu[k]. 130 | 131 | Parameters: 132 | A (numpy.ndarray): State transition matrix. 133 | B (numpy.ndarray): Input matrix. 134 | x_min (numpy.ndarray): Minimum state constraints. 135 | x_max (numpy.ndarray): Maximum state constraints. 136 | u_min (numpy.ndarray): Minimum control input constraints. 137 | u_max (numpy.ndarray): Maximum control input constraints. 138 | iterations (int): Number of iterations for approximating the MCI set. 139 | 140 | Returns: 141 | numpy.ndarray: Vertices of the approximated MCI set. 142 | """ 143 | 144 | # Precompute the inverse of A 145 | A_inv = np.linalg.inv(A) 146 | 147 | # Initialize the MCI set as a single point at the origin, using a set for uniqueness 148 | MCI_set = {(0, 0)} 149 | 150 | # Iteratively compute the MCI set 151 | for _ in range(iterations): 152 | MCI_set = backward_reachable_set_linear(A_inv, B, MCI_set, x_min, x_max, u_min, u_max) 153 | if len(MCI_set) == 0: 154 | break 155 | 156 | # Convert the set to an array for further processing or visualization 157 | MCI_array = np.array(list(MCI_set)) 158 | 159 | if len(MCI_array) > 0: 160 | MCI_hull = ConvexHull(MCI_array) 161 | return MCI_hull.points[MCI_hull.vertices, :] 162 | else: 163 | return np.array([]) -------------------------------------------------------------------------------- /src/envs/env_creators.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from .linear_system import LinearSystem 4 | from .cartpole import CartPole 5 | 6 | sys_param = { 7 | "double_integrator": { 8 | "n": 2, 9 | "m": 1, 10 | "A": np.array([ 11 | [1.0, 1.0], 12 | [0.0, 1.0], 13 | ]), 14 | "B": np.array([ 15 | [0.0], 16 | [1.0], 17 | ]), 18 | "Q": np.eye(2), 19 | "R": np.array([[100.0]]), 20 | "x_min": -5., 21 | "x_max": 5., 22 | "u_min": -0.5, 23 | "u_max": 0.5, 24 | }, 25 | "tank": { 26 | "n": 4, 27 | "m": 2, 28 | "A": np.array([ 29 | [0.984, 0.0, 0.0422029, 0.0], 30 | [0.0, 0.98895, 0.0, 0.0326014], 31 | [0.0, 0.0, 0.957453, 0.0], 32 | [0.0, 0.0, 0.0, 0.967216], 33 | ]), 34 | "B": np.array([ 35 | [0.825822, 0.0101995], 36 | [0.00512673, 0.624648], 37 | [0.0, 0.468317], 38 | [0.307042, 0.0], 39 | ]), 40 | "Q": np.eye(4), 41 | "R": 0.1 * np.eye(2), 42 | "x_min": 0, 43 | "x_max": 20, 44 | "u_min": 0, 45 | "u_max": 8, 46 | }, 47 | "cartpole": { 48 | "n": 4, 49 | "m": 1, 50 | "m_cart": [0.7, 1.3], 51 | "m_pole": [0.07, 0.13], 52 | "l": [0.4, 0.7], 53 | "m_cart_nom": 1.0, 54 | "m_pole_nom": 0.1, 55 | "l_nom": 0.55, 56 | "Q": np.diag([1., 1e-4, 1., 1e-4]), 57 | "R": np.array([[1e-4]]), 58 | "x_min": -2, 59 | "x_max": 2, 60 | "u_min": -10, 61 | "u_max": 10, 62 | "dt": 0.1, 63 | }, 64 | } 65 | 66 | def tank_initial_generator(size, device, rng): 67 | """ 68 | Generate initial states for the tank environment. 69 | State components are sampled in [0, 16] to ensure that the initial state stays within the maximal contraint invariant set. 70 | """ 71 | x0 = 16. * torch.rand((size, 4), generator=rng, device=device) 72 | return x0 73 | 74 | def tank_ref_generator(size, device, rng): 75 | """ 76 | Generate reference states for the tank environment. 77 | Sampled across the entire state space. 78 | """ 79 | x_ref = 20. * torch.rand((size, 4), generator=rng, device=device) 80 | return x_ref 81 | 82 | def tank_randomizer(size, device, rng): 83 | """ 84 | Generate \Delta A, \Delta B for the tank environment. 85 | """ 86 | Delta_A11 = 0.002 * (2. * torch.rand((size,), generator=rng, device=device) - 1.) # Leakage of tank 1 87 | Delta_A22 = 0.002 * (2. * torch.rand((size,), generator=rng, device=device) - 1.) # Leakage of tank 2 88 | Delta_A13 = 0.002 * (2. * torch.rand((size,), generator=rng, device=device) - 1.) # Leakage from tank 3 to tank 1 89 | Delta_A33 = -Delta_A13 # Conservation of tank 3 90 | Delta_A24 = 0.002 * (2. * torch.rand((size,), generator=rng, device=device) - 1.) # Leakage from tank 4 to tank 2 91 | Delta_A44 = -Delta_A24 # Conservation of tank 4 92 | zeros = torch.zeros((size,), device=device) # Other elements are not perturbed 93 | # A = [A11 0 A13 0; 0 A22 0 A24; 0 0 A33 0; 0 0 0 A44] 94 | Delta_A = torch.stack([ 95 | torch.stack([Delta_A11, zeros, Delta_A13, zeros], dim=1), 96 | torch.stack([zeros, Delta_A22, zeros, Delta_A24], dim=1), 97 | torch.stack([zeros, zeros, Delta_A33, zeros], dim=1), 98 | torch.stack([zeros, zeros, zeros, Delta_A44], dim=1) 99 | ], dim=1) 100 | 101 | multiplier_B1 = 0.02 * (2. * torch.rand((size,), generator=rng, device=device) - 1.) # Voltage perturbation on pump 1 102 | multiplier_B2 = 0.02 * (2. * torch.rand((size,), generator=rng, device=device) - 1.) # Voltage perturbation on pump 2 103 | B = torch.tensor(sys_param["tank"]["B"], device=device, dtype=torch.float).unsqueeze(0) 104 | Delta_B1 = multiplier_B1.unsqueeze(-1) * B[:, :, 0] 105 | Delta_B2 = multiplier_B2.unsqueeze(-1) * B[:, :, 1] 106 | Delta_B = torch.stack([Delta_B1, Delta_B2], dim=2) 107 | 108 | return Delta_A, Delta_B 109 | 110 | 111 | env_creators = { 112 | "double_integrator": lambda **kwargs: LinearSystem( 113 | A=sys_param["double_integrator"]["A"], 114 | B=sys_param["double_integrator"]["B"], 115 | Q=sys_param["double_integrator"]["Q"], 116 | R=sys_param["double_integrator"]["R"], 117 | sqrt_W=kwargs["noise_level"] * np.eye(2), 118 | x_min=sys_param["double_integrator"]["x_min"] * np.ones(2), 119 | x_max=sys_param["double_integrator"]["x_max"] * np.ones(2), 120 | u_min=sys_param["double_integrator"]["u_min"] * np.ones(1), 121 | u_max=sys_param["double_integrator"]["u_max"] * np.ones(1), 122 | barrier_thresh=0.1, 123 | randomize_std=(0.001 if kwargs["randomize"] else 0.), 124 | stabilization_only=True, 125 | **kwargs 126 | ), 127 | "tank": lambda **kwargs: LinearSystem( 128 | A=sys_param["tank"]["A"], 129 | B=sys_param["tank"]["B"], 130 | Q=sys_param["tank"]["Q"], 131 | R=sys_param["tank"]["R"], 132 | sqrt_W=kwargs["noise_level"] * np.eye(4), 133 | x_min=sys_param["tank"]["x_min"] * np.ones(4), 134 | x_max=sys_param["tank"]["x_max"] * np.ones(4), 135 | u_min=sys_param["tank"]["u_min"] * np.ones(2), 136 | u_max=sys_param["tank"]["u_max"] * np.ones(2) if not kwargs.get("skip_to_steady_state", False) else 1.0 * np.ones(2), 137 | barrier_thresh=1., 138 | randomizer=(tank_randomizer if kwargs["randomize"] else None), 139 | reward_shaping_parameters={ 140 | "steady_c1": kwargs["reward_shaping"][0], 141 | "steady_c2": kwargs["reward_shaping"][1], 142 | "steady_c3": kwargs["reward_shaping"][2], 143 | } if "reward_shaping" in kwargs else {}, 144 | initial_generator=tank_initial_generator, 145 | ref_generator=tank_ref_generator, 146 | **kwargs 147 | ), 148 | "cartpole": lambda **kwargs: CartPole( 149 | parameters={ 150 | "m_cart": [sys_param["cartpole"]["m_cart_nom"], sys_param["cartpole"]["m_cart_nom"]] if not kwargs["randomize"] else sys_param["cartpole"]["m_cart"], 151 | "m_pole": [sys_param["cartpole"]["m_pole_nom"], sys_param["cartpole"]["m_pole_nom"]] if not kwargs["randomize"] else sys_param["cartpole"]["m_pole"], 152 | "l": [sys_param["cartpole"]["l_nom"], sys_param["cartpole"]["l_nom"]] if not kwargs["randomize"] else sys_param["cartpole"]["l"], 153 | "dt": sys_param["cartpole"]["dt"], 154 | }, 155 | Q=sys_param["cartpole"]["Q"], 156 | R=sys_param["cartpole"]["R"], 157 | noise_std=kwargs["noise_level"], 158 | x_min=sys_param["cartpole"]["x_min"], 159 | x_max=sys_param["cartpole"]["x_max"], 160 | u_min=sys_param["cartpole"]["u_min"], 161 | u_max=sys_param["cartpole"]["u_max"], 162 | bs=kwargs["bs"], 163 | barrier_thresh=0.1, 164 | max_steps=kwargs["max_steps"], 165 | keep_stats=kwargs["keep_stats"], 166 | run_name=kwargs["run_name"], 167 | exp_name=kwargs["exp_name"], 168 | ), 169 | } 170 | -------------------------------------------------------------------------------- /src/networks/a2c_qp_unrolled.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | import torch.nn.functional as F 4 | from rl_games.algos_torch.network_builder import NetworkBuilder, A2CBuilder 5 | from ..modules.qp_unrolled_network import QPUnrolledNetwork 6 | import atexit 7 | from datetime import datetime 8 | import os 9 | import numpy as np 10 | 11 | class A2CQPUnrolled(A2CBuilder.Network): 12 | def __init__(self, params, **kwargs): 13 | self.actions_num = kwargs.pop('actions_num') 14 | input_shape = kwargs.pop('input_shape') 15 | self.value_size = kwargs.pop('value_size', 1) 16 | 17 | NetworkBuilder.BaseNetwork.__init__(self) 18 | self.n_obs = input_shape[0] 19 | self.load(params) 20 | 21 | if self.separate: 22 | raise NotImplementedError() 23 | 24 | def mlp_builder(input_size, output_size): 25 | policy_mlp_args = { 26 | 'input_size' : input_size, 27 | 'units' : self.params["mlp"]["units"] + [output_size], 28 | 'activation' : self.activation, 29 | 'norm_func_name' : self.normalization, 30 | 'dense_func' : torch.nn.Linear, 31 | 'd2rl' : self.is_d2rl, 32 | 'norm_only_first_layer' : self.norm_only_first_layer 33 | } 34 | return self._build_mlp(**policy_mlp_args) 35 | 36 | self.policy_net = QPUnrolledNetwork( 37 | self.device, 38 | self.n_obs, 39 | self.n_qp, 40 | self.m_qp, 41 | self.qp_iter, 42 | mlp_builder, 43 | shared_PH=self.shared_PH, 44 | affine_qb=self.affine_qb, 45 | strict_affine_layer=self.strict_affine_layer, 46 | obs_has_half_ref=self.obs_has_half_ref, 47 | symmetric=self.symmetric, 48 | no_b=self.no_b, 49 | use_warm_starter=self.use_warm_starter, 50 | train_warm_starter=self.train_warm_starter, 51 | ws_loss_coef=self.ws_loss_coef, 52 | ws_update_rate=self.ws_update_rate, 53 | mpc_baseline=self.mpc_baseline, 54 | use_osqp_for_mpc=self.use_osqp_for_mpc, 55 | use_residual_loss=self.use_residual_loss, 56 | imitate_mpc=self.imitate_mpc, 57 | force_feasible=self.force_feasible, 58 | feasible_lambda=self.feasible_lambda, 59 | is_test=self.is_test, 60 | ) 61 | 62 | # TODO: exploit structure in value function? 63 | value_mlp_args = { 64 | 'input_size' : self.n_obs, 65 | 'units' : self.params["mlp"]["units"] + [self.value_size], 66 | 'activation' : self.activation, 67 | 'norm_func_name' : self.normalization, 68 | 'dense_func' : torch.nn.Linear, 69 | 'd2rl' : self.is_d2rl, 70 | 'norm_only_first_layer' : self.norm_only_first_layer 71 | } 72 | self.value_net = self._build_mlp(**value_mlp_args) 73 | 74 | sigma_init = self.init_factory.create(**self.space_config['sigma_init']) 75 | self.sigma = nn.Parameter(torch.zeros(self.actions_num, requires_grad=True, dtype=torch.float32), requires_grad=True) 76 | 77 | mlp_init = self.init_factory.create(**self.initializer) 78 | 79 | for m in self.modules(): 80 | if isinstance(m, nn.Linear): 81 | mlp_init(m.weight) 82 | if getattr(m, "bias", None) is not None: 83 | torch.nn.init.zeros_(m.bias) 84 | 85 | sigma_init(self.sigma) 86 | 87 | # Register the cleanup method to be called at exit 88 | atexit.register(self.cleanup) 89 | 90 | def cleanup(self): 91 | # Implement the housekeeping logic here 92 | # For example, dumping internal state to a file 93 | directory = 'test_results' 94 | if not os.path.exists(directory): 95 | os.makedirs(directory) 96 | timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') 97 | if self.mpc_baseline is not None and self.use_osqp_for_mpc: 98 | # When MPC is run using OSQP, dump the iteration counts (collected by QPUnrolledNetwork) to CSV 99 | tag = f"{self.run_name}_mpc_iter_count" 100 | filename = os.path.join(directory, f"{tag}_{timestamp}.csv") 101 | iter_counts = self.policy_net.info['osqp_iter_counts'] 102 | np.savetxt(filename, iter_counts, fmt='%d') 103 | if self.mpc_baseline is not None and self.mpc_baseline.get("robust_method", None) is not None: 104 | # When robust MPC is used, dump the per-step times (collected by QPUnrolledNetwork) to CSV 105 | tag = f"{self.run_name}_running_time" 106 | filename = os.path.join(directory, f"{tag}_{timestamp}.csv") 107 | running_time = self.policy_net.info['running_time'] 108 | np.savetxt(filename, running_time, fmt='%f') 109 | 110 | def forward(self, obs_dict): 111 | obs = obs_dict['obs'] 112 | info = obs_dict.get('info', {}) 113 | mu = self.policy_net(obs, info=info)[:, :self.actions_num] 114 | value = self.value_net(obs) 115 | sigma = self.sigma 116 | states = None # reserved for RNN 117 | if self.policy_net.autonomous_losses: 118 | return mu, mu*0 + sigma, value, states, self.policy_net.autonomous_losses 119 | else: 120 | return mu, mu*0 + sigma, value, states 121 | 122 | def load(self, params): 123 | A2CBuilder.Network.load(self, params) 124 | self.params = params 125 | self.device = params["custom"]["device"] 126 | self.n_qp = params["custom"]["n_qp"] 127 | self.m_qp = params["custom"]["m_qp"] 128 | self.qp_iter = params["custom"]["qp_iter"] 129 | self.shared_PH = params["custom"]["shared_PH"] 130 | self.affine_qb = params["custom"]["affine_qb"] 131 | self.strict_affine_layer = params["custom"]["strict_affine_layer"] 132 | self.obs_has_half_ref = params["custom"]["obs_has_half_ref"] 133 | self.symmetric = params["custom"]["symmetric"] 134 | self.no_b = params["custom"]["no_b"] 135 | self.use_warm_starter = params["custom"]["use_warm_starter"] 136 | self.train_warm_starter = params["custom"]["train_warm_starter"] 137 | self.ws_loss_coef = params["custom"]["ws_loss_coef"] 138 | self.ws_update_rate = params["custom"]["ws_update_rate"] 139 | self.mpc_baseline = params["custom"]["mpc_baseline"] 140 | self.use_osqp_for_mpc = params["custom"]["use_osqp_for_mpc"] 141 | self.use_residual_loss = params["custom"]["use_residual_loss"] 142 | self.imitate_mpc = params["custom"]["imitate_mpc"] 143 | self.force_feasible = params["custom"]["force_feasible"] 144 | self.feasible_lambda = params["custom"]["feasible_lambda"] 145 | self.is_test = params["custom"]["train_or_test"] == "test" 146 | self.run_name = params["custom"]["run_name"] 147 | 148 | class A2CQPUnrolledBuilder(NetworkBuilder): 149 | def __init__(self, **kwargs): 150 | NetworkBuilder.__init__(self) 151 | 152 | def load(self, params): 153 | self.params = params 154 | 155 | def build(self, name, **kwargs): 156 | net = A2CQPUnrolled(self.params, **kwargs) 157 | return net 158 | -------------------------------------------------------------------------------- /experiments/tank/reproduce_disturbed.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # 0. Background utils and GPU scheduler 4 | 5 | # Define the number of GPUs available 6 | NUM_GPUS=$(nvidia-smi --list-gpus | wc -l) 7 | 8 | # Function to find the first idle GPU 9 | find_idle_gpu() { 10 | for (( i=0; i<$NUM_GPUS; i++ )); do 11 | # Check if GPU volatile utilization is 0% 12 | if [ "$(nvidia-smi -i $i --query-gpu=utilization.gpu --format=csv,noheader,nounits)" -eq 0 ]; then 13 | echo $i 14 | return 15 | fi 16 | done 17 | echo "-1" # Return -1 if no idle GPU is found 18 | } 19 | 20 | find_gpu_and_run_task() { 21 | local run_task_function="$1" 22 | shift # Remove the first argument (run_task_function name) 23 | 24 | # Initialize GPU ID as -1 indicating no GPU is available initially 25 | local gpu_id=-1 26 | 27 | # Wait for an idle GPU to become available 28 | while [ "$gpu_id" -eq -1 ]; do 29 | gpu_id=$(find_idle_gpu) 30 | sleep 1 # Wait a bit before checking again 31 | done 32 | 33 | # Call the run_task function with the GPU ID and additional arguments, and send it to the background 34 | $run_task_function $gpu_id $@ > /dev/null & 35 | # $run_task_function $gpu_id $@ & 36 | 37 | # Capture the PID of the last background process 38 | local task_pid=$! 39 | 40 | # Optional: wait briefly to allow the task to start 41 | sleep 10 42 | 43 | # Output the PID 44 | echo $task_pid 45 | } 46 | 47 | 48 | # 1. Training 49 | # 1.1 MLP of different sizes 50 | 51 | train_mlp() { 52 | local gpu_id=$1 53 | local c1=$2 54 | local c2=$3 55 | local c3=$4 56 | local mlp_last_size=$5 57 | CUDA_VISIBLE_DEVICES=$gpu_id python ../../run.py train tank --num-parallel 100000 --horizon 20 --epochs 5000 --mini-epochs 1 --noise-level 0.1 --randomize --reward-shaping ${c1},${c2},${c3} --no-obs-normalization --mlp-size-last $mlp_last_size --batch-test --exp-name reproduce_disturbed_mlp_${mlp_last_size} --lr-schedule linear --initial-lr "5e-4" 58 | } 59 | 60 | # 1.2 QP of different sizes 61 | 62 | train_qp() { 63 | local gpu_id=$1 64 | local c1=$2 65 | local c2=$3 66 | local c3=$4 67 | local n_qp=$5 68 | local m_qp=$6 69 | CUDA_VISIBLE_DEVICES=$gpu_id python ../../run.py train tank --num-parallel 100000 --horizon 20 --epochs 5000 --mini-epochs 1 --noise-level 0.1 --randomize --reward-shaping ${c1},${c2},${c3} --n-qp $n_qp --m-qp $m_qp --qp-unrolled --shared-PH --affine-qb --strict-affine-layer --obs-has-half-ref --use-residual-loss --no-obs-normalization --force-feasible --batch-test --exp-name reproduce_disturbed_qp_${n_qp}_${m_qp} --lr-schedule linear --initial-lr "5e-4" 70 | } 71 | 72 | 73 | # 2. Testing 74 | # 2.1 MPC under different configurations 75 | 76 | test_mpc() { 77 | local gpu_id=$1 78 | local N=$2 79 | local terminal_coef=$3 80 | local robust_method=$4 81 | local max_cpu_workers=$5 82 | local tube_size=$6 # Optional sixth argument 83 | local n_qp=4 84 | local m_qp=24 85 | 86 | # Initial part of the run_name 87 | local run_name="reproduce_disturbed_mpc_${N}_${terminal_coef}_${robust_method}" 88 | 89 | # Append tube_size to run_name if it's specified 90 | if [ -n "$tube_size" ]; then 91 | run_name="${run_name}_${tube_size}" 92 | fi 93 | 94 | # Building the command 95 | local cmd="CUDA_VISIBLE_DEVICES=$gpu_id MAX_CPU_WORKERS=$5 python ../../run.py test tank --num-parallel 1000 --horizon 20 --epochs 5000 --mini-epochs 1 --noise-level 0.1 --randomize --reward-shaping 50,0.05,2 --n-qp $n_qp --m-qp $m_qp --qp-unrolled --shared-PH --affine-qb --strict-affine-layer --obs-has-half-ref --use-residual-loss --no-obs-normalization --force-feasible --batch-test --mpc-baseline-N $N --mpc-terminal-cost-coef $terminal_coef --exp-name reproduce_qp_${n_qp}_${m_qp} --run-name $run_name --lr-schedule linear --initial-lr '5e-4' --quiet" 96 | 97 | # Adding the robust mpc method flag 98 | cmd="$cmd --robust-mpc-method $robust_method" 99 | 100 | # Conditional inclusion of the use-osqp-for-mpc flag 101 | if [ "$robust_method" = "none" ]; then 102 | cmd="$cmd --use-osqp-for-mpc" 103 | fi 104 | 105 | # Conditional inclusion of the tube_size argument 106 | if [ -n "$tube_size" ]; then 107 | cmd="$cmd --tube-mpc-tube-size ${tube_size}" 108 | fi 109 | 110 | # Execute the command 111 | eval $cmd 112 | } 113 | 114 | 115 | 116 | test_mpc_bg() { 117 | test_mpc $@ > /dev/null & 118 | sleep 10 119 | } 120 | 121 | test_mpc_all() { 122 | test_mpc_bg 0 16 10 none 8 123 | test_mpc_bg 1 16 10 scenario 224 124 | test_mpc_bg 2 16 10 tube 100 0.05 125 | test_mpc_bg 2 16 10 tube 100 0.1 126 | test_mpc_bg 2 16 10 tube 100 0.2 127 | test_mpc_bg 2 16 10 tube 100 0.25 128 | test_mpc_bg 2 16 10 tube 100 0.3 129 | wait 130 | } 131 | 132 | # 2.2 MLP 133 | 134 | test_mlp() { 135 | local gpu_id=$1 136 | local c1=$2 137 | local c2=$3 138 | local c3=$4 139 | local mlp_last_size=$5 140 | CUDA_VISIBLE_DEVICES=$gpu_id python ../../run.py test tank --num-parallel 1000 --horizon 20 --epochs 5000 --mini-epochs 1 --noise-level 0.1 --randomize --reward-shaping ${c1},${c2},${c3} --no-obs-normalization --mlp-size-last $mlp_last_size --batch-test --exp-name reproduce_disturbed_mlp_${mlp_last_size} --lr-schedule linear --initial-lr "5e-4" 141 | } 142 | 143 | # 2.3 QP 144 | 145 | test_qp() { 146 | local gpu_id=$1 147 | local c1=$2 148 | local c2=$3 149 | local c3=$4 150 | local n_qp=$5 151 | local m_qp=$6 152 | CUDA_VISIBLE_DEVICES=$gpu_id python ../../run.py test tank --num-parallel 1000 --horizon 20 --epochs 5000 --mini-epochs 1 --noise-level 0.1 --randomize --reward-shaping ${c1},${c2},${c3} --n-qp $n_qp --m-qp $m_qp --qp-unrolled --shared-PH --affine-qb --strict-affine-layer --obs-has-half-ref --use-residual-loss --no-obs-normalization --force-feasible --batch-test --exp-name reproduce_disturbed_qp_${n_qp}_${m_qp} --lr-schedule linear --initial-lr "5e-4" 153 | } 154 | 155 | # Utility function for train and test 156 | train_and_test() { 157 | local train_function="$1" 158 | shift 159 | local test_function="$1" 160 | shift 161 | 162 | train_pid=$(find_gpu_and_run_task $train_function $@) 163 | while [ -e /proc/$train_pid ]; do 164 | sleep 1 165 | done 166 | test_pid=$(find_gpu_and_run_task $test_function $@) 167 | while [ -e /proc/$train_pid ]; do 168 | sleep 1 169 | done 170 | } 171 | 172 | run_and_delay() { 173 | local run_function="$1" 174 | shift 175 | 176 | $run_function $@ & 177 | local run_pid=$! 178 | sleep 10 179 | echo $run_pid 180 | } 181 | 182 | # Finally run all the tasks 183 | 184 | run_and_delay test_mpc_all 185 | run_and_delay train_and_test train_mlp test_mlp 50 0.05 2 8 186 | run_and_delay train_and_test train_mlp test_mlp 50 0.05 2 16 187 | run_and_delay train_and_test train_mlp test_mlp 50 0.05 2 32 188 | run_and_delay train_and_test train_mlp test_mlp 50 0.05 2 64 189 | run_and_delay train_and_test train_mlp test_mlp 50 0.05 2 128 190 | run_and_delay train_and_test train_qp test_qp 50 0.05 2 4 24 191 | run_and_delay train_and_test train_qp test_qp 50 0.05 2 8 48 192 | run_and_delay train_and_test train_qp test_qp 50 0.05 2 16 96 193 | run_and_delay train_and_test train_qp test_qp 50 0.05 2 32 192 194 | 195 | wait 196 | 197 | python reproduce_table_disturbed.py 198 | -------------------------------------------------------------------------------- /src/utils/geometry.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | from scipy.spatial import ConvexHull, HalfspaceIntersection 4 | from scipy.optimize import linprog 5 | from itertools import combinations 6 | 7 | 8 | def find_interior_point(A, b): 9 | """ 10 | Find an interior point of the polytope defined by Ax <= b using linear programming. 11 | 12 | Parameters: 13 | - A (numpy.ndarray): Coefficient matrix for inequalities. 14 | - b (numpy.ndarray): RHS vector for inequalities. 15 | 16 | Returns: 17 | - interior_point (numpy.ndarray): A point inside the polytope, or None if LP is infeasible. 18 | """ 19 | num_vars = A.shape[1] 20 | 21 | # Objective function: zero coefficients as we only need a feasible solution 22 | c = np.zeros(num_vars) 23 | 24 | # Inequality constraints: Ax <= b 25 | eps = 1e-4 26 | A_ineq = A 27 | b_ineq = b - eps 28 | 29 | # Run linear programming to find a feasible point 30 | res = linprog(c, A_ub=A_ineq, b_ub=b_ineq, bounds=(None, None), method='highs') 31 | 32 | if res.success: 33 | from icecream import ic; ic(res.x) 34 | return res.x 35 | else: 36 | return None 37 | 38 | 39 | def find_supporting_hyperplanes(vertices_2D): 40 | """ 41 | Given a set of 2D vertices, find the supporting hyperplanes of the convex hull. 42 | 43 | Parameters: 44 | - vertices_2D (numpy.ndarray): 2D vertices of the polytope. 45 | 46 | Returns: 47 | - A_2D (numpy.ndarray): The coefficient matrix for the 2D inequalities. 48 | - b_2D (numpy.ndarray): The constant terms for the 2D inequalities. 49 | """ 50 | A_list = [] 51 | b_list = [] 52 | hull = ConvexHull(vertices_2D) 53 | centroid = np.mean(vertices_2D, axis=0) 54 | 55 | for simplex in hull.simplices: 56 | v1, v2 = vertices_2D[simplex] 57 | edge = v2 - v1 58 | normal = np.array([-edge[1], edge[0]]) 59 | normal = normal / np.linalg.norm(normal) 60 | 61 | # Choose the direction of the normal so that it points away from the centroid of the polytope 62 | if np.dot(normal, centroid - v1) > 0: 63 | normal = -normal 64 | 65 | b = np.dot(normal, v1) 66 | A_list.append(normal) 67 | b_list.append(b) 68 | 69 | return np.array(A_list), np.array(b_list) 70 | 71 | 72 | 73 | def high_dim_to_2D(A, b): 74 | """ 75 | Converts a high-dimensional polytope {x | Ax <= b} to its 2D projection {x | A_proj x <= b_proj}. 76 | 77 | Parameters: 78 | - A (numpy.ndarray): The coefficient matrix for the high-dimensional inequalities. 79 | - b (numpy.ndarray): The constant terms for the high-dimensional inequalities. 80 | 81 | Returns: 82 | - A_2D (numpy.ndarray): The coefficient matrix for the 2D inequalities. 83 | - b_2D (numpy.ndarray): The constant terms for the 2D inequalities. 84 | """ 85 | def find_high_dim_vertices(A, b): 86 | n = A.shape[1] 87 | m = A.shape[0] 88 | vertices = [] 89 | for idx in combinations(range(m), n): 90 | A_sub = A[idx, :] 91 | b_sub = b[list(idx)] 92 | if np.linalg.matrix_rank(A_sub) == n: 93 | try: 94 | x = np.linalg.solve(A_sub, b_sub) 95 | except np.linalg.LinAlgError: 96 | continue 97 | if all(np.dot(A, x) <= b + 1e-9): 98 | vertices.append(x) 99 | return np.array(vertices) 100 | 101 | # Step 1: Find high-dimensional vertices 102 | vertices_high_dim = find_high_dim_vertices(A, b) 103 | 104 | # Step 2: Project to 2D 105 | vertices_2D = vertices_high_dim[:, :2] 106 | 107 | # Step 3: Find supporting hyperplanes in 2D 108 | A_2D, b_2D = find_supporting_hyperplanes(vertices_2D) 109 | 110 | return A_2D, b_2D 111 | 112 | 113 | def high_dim_to_2D_sampling(A, b, grid_size=50, x_range=(-1, 1)): 114 | """ 115 | Converts a high-dimensional polytope {x | Ax <= b} to its 2D projection {x | A_proj x <= b_proj} 116 | using a sampling-based approximation method. 117 | 118 | Parameters: 119 | - A (numpy.ndarray): The coefficient matrix for the high-dimensional inequalities. 120 | - b (numpy.ndarray): The constant terms for the high-dimensional inequalities. 121 | - grid_size (int): The number of grid points along each dimension in the sampling grid. 122 | - x_range (tuple): The range (min, max) for both x1 and x2 in the 2D plane. 123 | 124 | Returns: 125 | - A_2D (numpy.ndarray): The coefficient matrix for the 2D inequalities. 126 | - b_2D (numpy.ndarray): The constant terms for the 2D inequalities. 127 | """ 128 | 129 | def sample_based_projection_LP(A, b, x1_range, x2_range, grid_size): 130 | x1_min, x1_max = x1_range 131 | x2_min, x2_max = x2_range 132 | x1_vals = np.linspace(x1_min, x1_max, grid_size) 133 | x2_vals = np.linspace(x2_min, x2_max, grid_size) 134 | grid_points = np.array([[x1, x2] for x1 in x1_vals for x2 in x2_vals]) 135 | feasible_points = [] 136 | for point in grid_points: 137 | x_dim = np.zeros(A.shape[1]) 138 | x_dim[:2] = point 139 | c = np.zeros(A.shape[1] - 2) 140 | A_ub = A[:, 2:] 141 | b_ub = b - np.dot(A[:, :2], point) 142 | res = linprog(c, A_ub=A_ub, b_ub=b_ub, bounds=(None, None), method='highs') 143 | if res.success: 144 | feasible_points.append(point) 145 | feasible_points = np.array(feasible_points) 146 | if feasible_points.shape[0] < 3: 147 | return "Insufficient feasible points for a 2D polytope." 148 | hull = ConvexHull(feasible_points) 149 | vertices = hull.points[hull.vertices] 150 | return vertices 151 | 152 | # Step 1: Sample points and find the approximated vertices in 2D 153 | vertices_approx = sample_based_projection_LP(A, b, x_range, x_range, grid_size) 154 | 155 | # Step 2: Find supporting hyperplanes in 2D 156 | A_2D, b_2D = find_supporting_hyperplanes(vertices_approx) 157 | 158 | return A_2D, b_2D 159 | 160 | 161 | def partial_minimization_2D(P, q): 162 | """ 163 | Performs partial minimization over dimensions starting from 3 to obtain a 2D quadratic function. 164 | 165 | Parameters: 166 | - P (numpy.ndarray): The coefficient matrix for the high-dimensional quadratic function. 167 | - q (numpy.ndarray): The coefficient vector for the high-dimensional quadratic function. 168 | 169 | Returns: 170 | - P_2D (numpy.ndarray): The 2x2 coefficient matrix for the resulting 2D quadratic function. 171 | - q_2D (numpy.ndarray): The 2D coefficient vector for the resulting 2D quadratic function. 172 | - c (float): The constant bias term for the resulting 2D quadratic function. 173 | """ 174 | # Decompose P into P11, P12, P21, P22 175 | P11 = P[:2, :2] 176 | P12 = P[:2, 2:] 177 | P21 = P[2:, :2] 178 | P22 = P[2:, 2:] 179 | 180 | # Decompose q into q1 and q2 181 | q1 = q[:2] 182 | q2 = q[2:] 183 | 184 | # Compute the 2D quadratic function parameters 185 | P_2D = P11 - P12 @ np.linalg.inv(P22) @ P21 186 | q_2D = q1 - P12 @ np.linalg.inv(P22) @ q2 187 | c = -0.5 * q2 @ np.linalg.inv(P22) @ q2 188 | 189 | return P_2D, q_2D, c 190 | -------------------------------------------------------------------------------- /experiments/cartpole/visualize_trajectories.py: -------------------------------------------------------------------------------- 1 | # %% Specify test case 2 | import numpy as np 3 | 4 | # Initial position and reference position 5 | x0 = 0. 6 | x_ref = 1. 7 | 8 | # Controlling process noise and parametric uncertainty 9 | noise_level = 0 10 | parametric_uncertainty = True 11 | parameter_randomization_seed = 42 12 | 13 | # %% Set up test bench 14 | import sys 15 | import os 16 | file_path = os.path.dirname(__file__) 17 | sys.path.append(os.path.join(file_path, "../..")) 18 | 19 | from envs.env_creators import sys_param, env_creators 20 | from envs.mpc_baseline_parameters import get_mpc_baseline_parameters 21 | from modules.qp_unrolled_network import QPUnrolledNetwork 22 | import torch 23 | from matplotlib import pyplot as plt 24 | 25 | 26 | # Utilities 27 | 28 | def obs_to_state(obs): 29 | # Convert obs in batch size 1 in form (x, x_ref, x_dot, sin(theta), cos(theta), theta_dot) to state (x, x_dot, theta, theta_dot) 30 | x, x_ref, x_dot, sin_theta, cos_theta, theta_dot = obs[:, 0], obs[:, 1], obs[:, 2], obs[:, 3], obs[:, 4], obs[:, 5] 31 | theta = torch.atan2(sin_theta, cos_theta) 32 | return torch.stack([x, x_dot, theta, theta_dot], dim=1).squeeze(0) 33 | 34 | def make_obs(state, x_ref, running_mean, running_std, normalize): 35 | x, x_dot, theta, theta_dot = state 36 | raw_obs = torch.tensor(np.array([x, x_ref, x_dot, np.sin(theta), np.cos(theta), theta_dot]), device=device, dtype=torch.float) 37 | if not normalize: 38 | return raw_obs.unsqueeze(0) 39 | else: 40 | return ((raw_obs - running_mean) / running_std).unsqueeze(0) 41 | 42 | def get_state_dict(checkpoint_path): 43 | checkpoint = torch.load(checkpoint_path) 44 | model = checkpoint["model"] 45 | prefix = "a2c_network.policy_net." 46 | policy_net_state_dict = {k.lstrip(prefix): v for (k, v) in model.items() if k.startswith(prefix)} 47 | running_mean = model["running_mean_std.running_mean"].to(dtype=torch.float) 48 | running_std = model["running_mean_std.running_var"].sqrt().to(dtype=torch.float) 49 | return policy_net_state_dict, running_mean, running_std 50 | 51 | def rescale_action(action, low=-1., high=8.): 52 | action = action.clamp(-1., 1.) 53 | return low + (high - low) * (action + 1) / 2 54 | 55 | t = lambda arr: torch.tensor(arr, device=device, dtype=torch.float).unsqueeze(0) 56 | a = lambda t: t.detach().cpu().numpy() 57 | 58 | # Constants and options 59 | n_sys = 4 60 | m_sys = 1 61 | input_size = 6 62 | n = 16 63 | m = 32 64 | qp_iter = 10 65 | device = "cuda:0" 66 | 67 | 68 | # # Learned QP 69 | # net = QPUnrolledNetwork(device, input_size, n, m, qp_iter, None, True, True) 70 | exp_name = f"shared_affine_noise{noise_level}_n{n}_m{m}" 71 | # if parametric_uncertainty: 72 | # exp_name += "+rand" 73 | # checkpoint_path = f"runs/tank_{exp_name}/nn/tank.pth" 74 | # policy_net_state_dict, running_mean, running_std = get_state_dict(checkpoint_path) 75 | # net.load_state_dict(policy_net_state_dict) 76 | # running_mean, running_std = running_mean.to(device=device), running_std.to(device=device) 77 | # net.to(device) 78 | 79 | # MPC module 80 | mpc_module = QPUnrolledNetwork( 81 | device, input_size, n, m, qp_iter, None, True, True, 82 | mpc_baseline=get_mpc_baseline_parameters("cartpole", 8), 83 | use_osqp_for_mpc=True, 84 | ) 85 | 86 | # Environment 87 | env = env_creators["cartpole"]( 88 | noise_level=noise_level, 89 | bs=1, 90 | max_steps=300, 91 | keep_stats=True, 92 | run_name=exp_name, 93 | exp_name=exp_name, 94 | randomize=parametric_uncertainty, 95 | ) 96 | 97 | # %% MLP Policy 98 | import sys 99 | mlp_exp_name = f"mlp_noise{noise_level}" 100 | if parametric_uncertainty: 101 | mlp_exp_name += "+rand" 102 | sys.argv = [""] + f"""test tank --num-parallel 1 \ 103 | --noise-level {noise_level} \ 104 | --exp-name {mlp_exp_name}""".split() 105 | import run 106 | mlp_checkpoint_path = f"runs/tank_{mlp_exp_name}/nn/tank.pth" 107 | mlp_player = run.runner.create_player() 108 | mlp_player.restore(mlp_checkpoint_path) 109 | 110 | # %% Test for MPC 111 | raw_obs = env.reset(t(x0), t(x_ref), randomize_seed=parameter_randomization_seed) 112 | done = False 113 | 114 | 115 | xs_mpc = [obs_to_state(raw_obs)] 116 | us_mpc = [] 117 | 118 | while not done: 119 | u_all, problem_params = mpc_module(raw_obs, return_problem_params=True) 120 | u = u_all[:, :m_sys] 121 | raw_obs, reward, done_t, info = env.step(u) 122 | xs_mpc.append(obs_to_state(raw_obs)) 123 | us_mpc.append(u[0, :]) 124 | obs = raw_obs 125 | done = done_t.item() 126 | 127 | # %% Test for learned QP 128 | xs_qp = [t(x0).squeeze(0)] 129 | us_qp = [] 130 | done = False 131 | env.reset(t(x0), t(x_ref), randomize_seed=parameter_randomization_seed) 132 | x = x0 133 | obs = make_obs(x, x_ref, running_mean, running_std, True) 134 | while not done: 135 | action_all, problem_params = net(obs, return_problem_params=True) 136 | u = rescale_action(action_all[:, :m_sys]) 137 | raw_obs, reward, done_t, info = env.step(u) 138 | xs_qp.append(raw_obs[0, :4]) 139 | us_qp.append(u[0, :]) 140 | obs = (raw_obs - running_mean) / running_std 141 | done = done_t.item() 142 | 143 | # %% Test for MLP 144 | xs_mlp = [t(x0).squeeze(0)] 145 | us_mlp = [] 146 | done = False 147 | env.reset(t(x0), t(x_ref), randomize_seed=parameter_randomization_seed) 148 | x = x0 149 | obs = make_obs(x, x_ref, running_mean, running_std, False) 150 | while not done: 151 | action = mlp_player.get_action(obs.squeeze(0), is_deterministic=True) 152 | obs, reward, done_t, info = env.step(action.unsqueeze(0)) 153 | xs_mlp.append(obs[0, :4]) 154 | us_mlp.append(action) 155 | done = done_t.item() 156 | 157 | # %% Plot 1: cost curve 158 | cost_mpc = [env.cost(x - t(x_ref), u.unsqueeze(0)).item() for (x, u) in zip(xs_mpc, us_mpc)] 159 | cost_qp = [env.cost(x - t(x_ref), u.unsqueeze(0)).item() for (x, u) in zip(xs_qp, us_qp)] 160 | cost_mlp = [env.cost(x - t(x_ref), u.unsqueeze(0)).item() for (x, u) in zip(xs_mlp, us_mlp)] 161 | 162 | # Compute the baseline 163 | baseline = min(min(cost_mpc), min(cost_qp), min(cost_mlp)) - 1e-2 164 | 165 | # Deduct the baseline from each data series 166 | cost_mpc_baseline = np.array(cost_mpc) - baseline 167 | cost_qp_baseline = np.array(cost_qp) - baseline 168 | cost_mlp_baseline = np.array(cost_mlp) - baseline 169 | 170 | # Plotting 171 | plt.title("Per-step LQ cost") 172 | plt.plot(cost_mpc_baseline, label="MPC") 173 | plt.plot(cost_qp_baseline, label="QP") 174 | plt.plot(cost_mlp_baseline, label="MLP") 175 | 176 | # Set y-axis to log scale 177 | plt.yscale('log') 178 | 179 | # Modify tick labels to show the true value 180 | yticks = plt.yticks()[0] 181 | plt.yticks(yticks, [f"{y + baseline:.0e}" for y in yticks]) 182 | 183 | plt.legend() 184 | 185 | # %% Plot 2: Trajectory 186 | # Create a 3-row, 2-column matrix of subplots 187 | fig, axes = plt.subplots(nrows=3, ncols=2, figsize=(10, 12)) 188 | 189 | # Example to populate the subplots 190 | for i in range(2): 191 | for j in range(2): 192 | ax = axes[i, j] 193 | subscript = 2 * i + j 194 | ax.plot([a(xs_mpc[k][subscript]) for k in range(len(xs_mpc))], label="MPC") 195 | # ax.plot([a(xs_qp[k][subscript]) for k in range(len(xs_qp))], label="QP") 196 | # ax.plot([a(xs_mlp[k][subscript]) for k in range(len(xs_mlp))], label="MLP") 197 | if subscript == 0: 198 | ax.axhline(y=x_ref, color='r', linestyle='--', label='Ref') 199 | ax.legend() 200 | ax.set_title(['x', 'x_dot', 'theta', 'theta_dot'][subscript]) 201 | 202 | i = 2 203 | for j in range(1): 204 | ax = axes[i, j] 205 | ax.plot([a(us_mpc[k][j]) for k in range(len(us_mpc))], label="MPC") 206 | # ax.plot([a(us_qp[k][j]) for k in range(len(us_qp))], label="QP") 207 | # ax.plot([a(us_mlp[k][j]) for k in range(len(us_mlp))], label="MLP") 208 | ax.legend() 209 | ax.set_title(f'f') 210 | 211 | plt.tight_layout() 212 | plt.show() 213 | 214 | # %% 215 | -------------------------------------------------------------------------------- /experiments/tank/visualize_trajectories.py: -------------------------------------------------------------------------------- 1 | # %% Specify test case 2 | import numpy as np 3 | 4 | # # Case where MPC is better 5 | x0 = np.array([10., 10., 10., 10.]) 6 | # x_ref = np.array([19, 19, 2., 2.]) 7 | x_ref = np.array([13, 17, 3, 2.]) 8 | 9 | x0 = np.array([ 1.5112903, 5.738173, 10.417226, 4.5608387]) 10 | x_ref = np.array([1.1293532, 1.9881264, 1. , 1. ]) 11 | 12 | # Case where MPC fails 13 | # x0 = np.array([ 5.4963946, 10.947876, 1.034516, 18.08066 ]) 14 | # x_ref = np.array([7.522859, 8.169776, 1.1107684, 1. ]) 15 | 16 | # Controlling process noise and parametric uncertainty 17 | noise_level = 0 18 | parametric_uncertainty = False 19 | parameter_randomization_seed = 2 20 | 21 | # %% Set up test bench 22 | import sys 23 | import os 24 | file_path = os.path.dirname(__file__) 25 | sys.path.append(os.path.join(file_path, "../..")) 26 | 27 | from src.envs.env_creators import sys_param, env_creators 28 | from src.envs.mpc_baseline_parameters import get_mpc_baseline_parameters 29 | from src.modules.qp_unrolled_network import QPUnrolledNetwork 30 | import torch 31 | from matplotlib import pyplot as plt 32 | 33 | 34 | # Utilities 35 | 36 | def make_obs(x, x_ref, running_mean, running_std, normalize): 37 | raw_obs = torch.tensor(np.concatenate([x, x_ref]), device=device, dtype=torch.float) 38 | if not normalize: 39 | return raw_obs.unsqueeze(0) 40 | else: 41 | return ((raw_obs - running_mean) / running_std).unsqueeze(0) 42 | 43 | def get_state_dict(checkpoint_path): 44 | checkpoint = torch.load(checkpoint_path) 45 | model = checkpoint["model"] 46 | prefix = "a2c_network.policy_net." 47 | policy_net_state_dict = {k.lstrip(prefix): v for (k, v) in model.items() if k.startswith(prefix)} 48 | if "running_mean_std.running_mean" in model: 49 | running_mean = model["running_mean_std.running_mean"].to(dtype=torch.float) 50 | running_std = model["running_mean_std.running_var"].sqrt().to(dtype=torch.float) 51 | else: 52 | running_mean = torch.tensor([0.]) 53 | running_std = torch.tensor([1.]) 54 | return policy_net_state_dict, running_mean, running_std 55 | 56 | def rescale_action(action, low=-1., high=8.): 57 | action = action.clamp(-1., 1.) 58 | return low + (high - low) * (action + 1) / 2 59 | 60 | t = lambda arr: torch.tensor(arr, device=device, dtype=torch.float).unsqueeze(0) 61 | a = lambda t: t.detach().cpu().numpy() 62 | 63 | # Constants and options 64 | n_sys = 4 65 | m_sys = 2 66 | input_size = 8 # 4 for x, 4 for x_ref 67 | n = 2 68 | m = 64 69 | qp_iter = 10 70 | device = "cuda:0" 71 | 72 | 73 | # Learned QP 74 | net = QPUnrolledNetwork(device, input_size, n, m, qp_iter, None, True, True) 75 | # exp_name = f"shared_affine_noise{noise_level}_n{n}_m{m}-norm" 76 | exp_name = "residual_loss_on" 77 | if parametric_uncertainty: 78 | exp_name += "+rand" 79 | checkpoint_path = f"runs/tank_{exp_name}/nn/tank.pth" 80 | policy_net_state_dict, running_mean, running_std = get_state_dict(checkpoint_path) 81 | net.load_state_dict(policy_net_state_dict) 82 | running_mean, running_std = running_mean.to(device=device), running_std.to(device=device) 83 | net.to(device) 84 | 85 | # MPC module 86 | mpc_module = QPUnrolledNetwork( 87 | device, input_size, n, m, qp_iter, None, True, True, 88 | mpc_baseline=get_mpc_baseline_parameters("tank", 10), 89 | use_osqp_for_mpc=False, 90 | ) 91 | 92 | # Environment 93 | env = env_creators["tank"]( 94 | noise_level=noise_level, 95 | bs=1, 96 | max_steps=300, 97 | keep_stats=True, 98 | run_name=exp_name, 99 | exp_name=exp_name, 100 | randomize=parametric_uncertainty, 101 | ) 102 | 103 | # %% MLP Policy 104 | import sys 105 | mlp_exp_name = f"mlp_noise{noise_level}" 106 | if parametric_uncertainty: 107 | mlp_exp_name += "+rand" 108 | sys.argv = [""] + f"""test tank --num-parallel 1 \ 109 | --noise-level {noise_level} \ 110 | --exp-name {mlp_exp_name}""".split() 111 | import run 112 | mlp_checkpoint_path = f"runs/tank_{mlp_exp_name}/nn/tank.pth" 113 | mlp_player = run.runner.create_player() 114 | mlp_player.restore(mlp_checkpoint_path) 115 | 116 | # %% Test for MPC 117 | env.reset(t(x0), t(x_ref), randomize_seed=parameter_randomization_seed) 118 | done = False 119 | x = x0 120 | obs = make_obs(x, x_ref, running_mean, running_std, False) 121 | 122 | 123 | xs_mpc = [obs[0, :4]] 124 | us_mpc = [] 125 | 126 | while not done: 127 | u_all, problem_params = mpc_module(obs, return_problem_params=True) 128 | u = u_all[:, :m_sys] 129 | raw_obs, reward, done_t, info = env.step(u) 130 | xs_mpc.append(raw_obs[0, :4]) 131 | us_mpc.append(u[0, :]) 132 | obs = raw_obs 133 | done = done_t.item() 134 | 135 | # %% Test for learned QP 136 | xs_qp = [t(x0).squeeze(0)] 137 | us_qp = [] 138 | done = False 139 | env.reset(t(x0), t(x_ref), randomize_seed=parameter_randomization_seed) 140 | x = x0 141 | obs = make_obs(x, x_ref, running_mean, running_std, True) 142 | while not done: 143 | action_all, problem_params = net(obs, return_problem_params=True) 144 | u = rescale_action(action_all[:, :m_sys]) 145 | raw_obs, reward, done_t, info = env.step(u) 146 | xs_qp.append(raw_obs[0, :4]) 147 | us_qp.append(u[0, :]) 148 | obs = (raw_obs - running_mean) / running_std 149 | done = done_t.item() 150 | 151 | # %% Test for MLP 152 | xs_mlp = [t(x0).squeeze(0)] 153 | us_mlp = [] 154 | done = False 155 | env.reset(t(x0), t(x_ref), randomize_seed=parameter_randomization_seed) 156 | x = x0 157 | obs = make_obs(x, x_ref, running_mean, running_std, False) 158 | while not done: 159 | action = mlp_player.get_action(obs.squeeze(0), is_deterministic=True) 160 | obs, reward, done_t, info = env.step(action.unsqueeze(0)) 161 | xs_mlp.append(obs[0, :4]) 162 | us_mlp.append(action) 163 | done = done_t.item() 164 | 165 | # %% Plot 1: cost curve 166 | cost_mpc = [env.cost(x - t(x_ref), u.unsqueeze(0)).item() for (x, u) in zip(xs_mpc, us_mpc)] 167 | cost_qp = [env.cost(x - t(x_ref), u.unsqueeze(0)).item() for (x, u) in zip(xs_qp, us_qp)] 168 | cost_mlp = [env.cost(x - t(x_ref), u.unsqueeze(0)).item() for (x, u) in zip(xs_mlp, us_mlp)] 169 | 170 | # Compute the baseline 171 | baseline = min(min(cost_mpc), min(cost_qp), min(cost_mlp)) - 1e-2 172 | 173 | # Deduct the baseline from each data series 174 | cost_mpc_baseline = np.array(cost_mpc) - baseline 175 | cost_qp_baseline = np.array(cost_qp) - baseline 176 | cost_mlp_baseline = np.array(cost_mlp) - baseline 177 | 178 | # Plotting 179 | plt.title("Per-step LQ cost") 180 | plt.plot(cost_mpc_baseline, label="MPC") 181 | plt.plot(cost_qp_baseline, label="QP") 182 | plt.plot(cost_mlp_baseline, label="MLP") 183 | 184 | # Set y-axis to log scale 185 | plt.yscale('log') 186 | 187 | # Modify tick labels to show the true value 188 | yticks = plt.yticks()[0] 189 | plt.yticks(yticks, [f"{y + baseline:.0e}" for y in yticks]) 190 | 191 | plt.legend() 192 | 193 | # %% Plot 2: Trajectory 194 | # Create a 3-row, 2-column matrix of subplots 195 | fig, axes = plt.subplots(nrows=3, ncols=2, figsize=(10, 12)) 196 | 197 | # Example to populate the subplots 198 | for i in range(2): 199 | for j in range(2): 200 | ax = axes[i, j] 201 | subscript = 2 * i + j 202 | ax.plot([a(xs_mpc[k][subscript]) for k in range(len(xs_mpc))], label="MPC") 203 | ax.plot([a(xs_qp[k][subscript]) for k in range(len(xs_qp))], label="Learned QP") 204 | # ax.plot([a(xs_mlp[k][subscript]) for k in range(len(xs_mlp))], label="MLP") 205 | ax.axhline(y=x_ref[subscript], color='r', linestyle='--', label='Ref') 206 | ax.legend() 207 | ax.set_title(f'x_{subscript+1}') 208 | 209 | i = 2 210 | for j in range(2): 211 | ax = axes[i, j] 212 | ax.plot([a(us_mpc[k][j]) for k in range(len(us_mpc))], label="MPC") 213 | ax.plot([a(us_qp[k][j]) for k in range(len(us_qp))], label="QP") 214 | ax.plot([a(us_mlp[k][j]) for k in range(len(us_mlp))], label="MLP") 215 | ax.legend() 216 | ax.set_title(f'u_{j+1}') 217 | 218 | plt.tight_layout() 219 | plt.show() 220 | 221 | # %% 222 | -------------------------------------------------------------------------------- /experiments/double_integrator/verify_stability.jl: -------------------------------------------------------------------------------- 1 | ## Read parameters 2 | 3 | using NPZ 4 | 5 | data = npzread("parameters.npz") 6 | W_q = data["Wq"] 7 | P = data["P"] 8 | A_MCI = data["A_MCI"] 9 | b_MCI = data["b_MCI"] 10 | H = data["H"] 11 | A = data["A"] 12 | B = data["B"] 13 | m_mci, n_sys = size(A_MCI) 14 | n_qp = size(P, 1) 15 | m_qp = size(H, 1) 16 | m_sys = size(B, 2) 17 | 18 | norm_factor = 0.5 19 | 20 | ## Define the candidate invariant set to be tested 21 | relax = 0.2 22 | G = A_MCI 23 | c = b_MCI .- relax 24 | 25 | ## Formulate the problem of verifying whether a set is invariant into a bilevel optimization problem, and try solving using BilevelJuMP 26 | 27 | using JuMP 28 | using BilevelJuMP 29 | using Ipopt 30 | 31 | # Define the bilevel model 32 | blmodel = BilevelModel(Ipopt.Optimizer; mode = BilevelJuMP.ProductMode(1e-9)) 33 | 34 | # Upper level 35 | @variable(Upper(blmodel), x[1:n_sys], start = 1) 36 | @variable(Upper(blmodel), λ[1:m_mci] >= 0) 37 | @variable(Lower(blmodel), u[1:n_qp]) 38 | @constraint(Upper(blmodel), sum(λ) == 1) 39 | @constraint(Upper(blmodel), G * x .<= c) 40 | @objective(Upper(blmodel), Min, -λ' * (G * (A * x + norm_factor * B * u[1:m_sys]) - c)) 41 | 42 | 43 | # Lower level 44 | @constraint(Lower(blmodel), H*u .<= 1.) 45 | @constraint(Lower(blmodel), -1. .<= H*u) 46 | @constraint(Lower(blmodel), u[1:m_sys] .<= 1.) 47 | @constraint(Lower(blmodel), -1. .<= u[1:m_sys]) 48 | @objective(Lower(blmodel), Min, 0.5 * u' * P * u + x' * W_q' * u) 49 | 50 | # Solve the bilevel problem 51 | optimize!(blmodel) 52 | 53 | # Extract results 54 | optimal_value = objective_value(blmodel) # Is it correct? 55 | # @show optimal_value 56 | @show value.(x) 57 | @show value.(u) 58 | @show value.(λ) 59 | optimal_value = -value.(λ)' * (G * (A * value.(x) + norm_factor * B * value.(u)[1:m_sys]) - c) 60 | @show optimal_value 61 | 62 | ## Visualize 63 | using Polyhedra, CDDLib, Plots, Statistics 64 | 65 | function sort_vertices(vertices) 66 | # Calculate centroid 67 | centroid = mean(vertices, dims=1) 68 | 69 | vertices = [vertices[i, :] for i in 1:size(vertices, 1)] 70 | 71 | # Sort vertices based on polar angle from centroid 72 | sorted_vertices = sort(vertices, by = p -> atan(p[2] - centroid[2], p[1] - centroid[1])) 73 | 74 | return sorted_vertices 75 | end 76 | 77 | function plot_polytope(A, b, fig, label) 78 | poly = polyhedron(hrep(A, b), CDDLib.Library()) 79 | v = sort_vertices(hcat(points(vrep(poly))...)') 80 | x_coords = [x[1] for x in v] 81 | y_coords = [x[2] for x in v] 82 | 83 | Plots.scatter!(fig, x_coords, y_coords, label = label) 84 | 85 | for i = 1:length(v) 86 | Plots.plot!(fig, [x_coords[i], x_coords[(i % length(v)) + 1]], [y_coords[i], y_coords[(i % length(v)) + 1]], color="black", label="") 87 | end 88 | fig 89 | end 90 | 91 | fig = Plots.plot() 92 | plot_polytope(A_MCI, b_MCI, fig, "MCI") 93 | plot_polytope(G, c, fig, "Verified") 94 | Plots.scatter!(fig, [value.(x)[1]], [value.(x)[2]], label = "Worst case", color = "green") 95 | 96 | ## Try SDP Lower bound with Lagrangian relaxation 97 | using JuMP, SCS 98 | using LinearAlgebra 99 | 100 | my = 2 * m_mci + 2 # Number of constraints for outer problem 101 | mx = 2 * m_qp + 2 * m_sys # Number of constraints for inner problem 102 | ny = n_sys + m_mci # Number of variables for outer problem 103 | nx = n_qp # Number of variables for inner problem 104 | 105 | E = [Matrix(1.0I, m_sys, m_sys) zeros(m_sys, n_qp - m_sys)] # Matrix for extracing u from QP solution 106 | Rxx = zeros(n_qp, n_qp) 107 | Rxy = [zeros(n_qp, n_sys) E' * B' * G'] 108 | Ryx = Rxy' 109 | Ryy = [zeros(n_sys, n_sys) -A' * G'; -G * A zeros(m_mci, m_mci)] 110 | sx = zeros(n_qp) 111 | sy = [zeros(n_sys); c] 112 | Gx = zeros(my, nx) 113 | Gy = [-G zeros(m_mci, m_mci); 114 | zeros(m_mci, n_sys) Matrix(1.0I, m_mci, m_mci); 115 | zeros(1, n_sys) ones(1, m_mci) 116 | zeros(1, n_sys) -ones(1, m_mci) 117 | ] 118 | c̃ = [c; zeros(m_mci); -1; 1] 119 | bq = zeros(n_qp) 120 | H̃ = [H; -H; E; -E] 121 | b̃ = ones(mx) 122 | Wb = zeros(mx, ny) 123 | Wq = [W_q zeros(n_qp, m_mci)] 124 | 125 | 126 | # Initialize the model with SCS solver 127 | model = Model(optimizer_with_attributes(SCS.Optimizer)) 128 | 129 | # Define variables 130 | @variable(model, γ) 131 | @variable(model, μ1[1:my] >= 0) 132 | @variable(model, μ2[1:mx] >= 0) 133 | @variable(model, μ3[1:mx] >= 0) 134 | @variable(model, η1[1:nx]) 135 | @variable(model, η2) 136 | @variable(model, η3) 137 | @variable(model, η4) 138 | 139 | # Objective function 140 | @objective(model, Max, γ) 141 | 142 | # SDP constraint 143 | function make_M(γ, μ1, μ2, μ3, η1, η2, η3, η4) 144 | up = [ 145 | Rxx + 2*η4*P Rxy + η4*Wq η2*H̃' sx - Gx'*μ1 - H̃'*μ2 + P*η1 + η4*bq; 146 | zeros(ny, nx) Ryy η2*Wb' + η3*(Wb' - Wq'*inv(P)*H̃') + η4*Wb' sy - Gy'*μ1 - Wb'*μ2 + Wq'*η1; 147 | zeros(mx, nx) zeros(mx, ny) 2*η3*H̃*inv(P)*H̃' -μ3 - H̃*η1 + η2*b̃ + η3*(b̃ - H̃*inv(P)*bq) + η4*b̃; 148 | zeros(1, nx) zeros(1, ny) zeros(1, mx) -2*c̃'*μ1 - 2*b̃'*μ2 + 2*bq'*η1 - γ 149 | ] 150 | return (up + up') / 2 151 | end 152 | 153 | @expression(model, M, make_M(γ, μ1, μ2, μ3, η1, η2, η3, η4)) 154 | @constraint(model, M in PSDCone()) 155 | 156 | # Solve the problem 157 | optimize!(model) 158 | 159 | # It will be infeasible 160 | 161 | ## Formulate nonconvex QCQP and try solving using local solver 162 | 163 | using JuMP, Ipopt 164 | 165 | model = Model(Ipopt.Optimizer) 166 | 167 | @variable(model, x[1:n_sys], start = 1) 168 | @variable(model, λ[1:m_mci] >= 0) 169 | @variable(model, u[1:n_qp]) 170 | @variable(model, μ1[1:m_qp] >= 0) 171 | @variable(model, μ2[1:m_qp] >= 0) 172 | @variable(model, μ3[1:m_sys] >= 0) 173 | @variable(model, μ4[1:m_sys] >= 0) 174 | 175 | p = -λ' * (G * (A * x + norm_factor * B * u[1:m_sys]) - c) 176 | @NLobjective(model, Min, p) 177 | @constraint(model, G * x .<= c) 178 | @constraint(model, sum(λ) == 1) 179 | @constraint(model, P * u + W_q * x + H' * (μ1 - μ2) .== 0) 180 | @constraint(model, H * u .<= 1) 181 | @constraint(model, -1 .<= H * u) 182 | @constraint(model, u[1:m_sys] .<= 1) 183 | @constraint(model, -1 .<= u[1:m_sys]) 184 | @constraint(model, μ1' * (H * u .- 1) == 0) 185 | @constraint(model, μ2' * (-H * u .- 1) == 0) 186 | @constraint(model, μ3' * (u[1:m_sys] .- 1) == 0) 187 | @constraint(model, μ4' * (-u[1:m_sys] .- 1) == 0) 188 | optimize!(model) 189 | # @show objective_value(model) 190 | @show value.(x) 191 | @show value.(u) 192 | @show value.(λ) 193 | optimal_value = -value.(λ)' * (G * (A * value.(x) + norm_factor * B * value.(u)[1:m_sys]) - c) 194 | @show optimal_value 195 | 196 | ## Try lower bound with SOS solver 197 | 198 | using DynamicPolynomials, SumOfSquares 199 | # import SCS 200 | # scs = SCS.Optimizer 201 | import MosekTools 202 | mosek = MosekTools.Optimizer 203 | import Dualization 204 | # dual_scs = Dualization.dual_optimizer(scs) 205 | # model = SOSModel(dual_scs) 206 | dual_mosek = Dualization.dual_optimizer(mosek) 207 | model = SOSModel(dual_mosek) 208 | 209 | @polyvar x[1:n_sys] 210 | @polyvar λ[1:m_mci] 211 | @polyvar u[1:n_qp] 212 | @polyvar μ1[1:m_qp] 213 | @polyvar μ2[1:m_qp] 214 | @polyvar μ3[1:m_sys] 215 | @polyvar μ4[1:m_sys] 216 | 217 | p = -λ' * (G * (A * x + norm_factor * B * u[1:m_sys]) - c) 218 | S = BasicSemialgebraicSet{Float64,Polynomial{true,Float64}}() 219 | 220 | invariance_constraint = - (G * x - c) 221 | for i in 1:m_mci 222 | addinequality!(S, invariance_constraint[i]) 223 | end 224 | for i in 1:m_mci 225 | addinequality!(S, λ[i]) 226 | end 227 | addequality!(S, sum(λ) - 1) 228 | stationarity = P * u + W_q * x + H' * (μ1 - μ2) 229 | for i in 1:n_qp 230 | addequality!(S, stationarity[i]) 231 | end 232 | p_feasibility_1 = -(H * u .- 1) 233 | for i in 1:m_qp 234 | addinequality!(S, p_feasibility_1[i]) 235 | end 236 | p_feasibility_2 = H * u .+ 1 237 | for i in 1:m_qp 238 | addinequality!(S, p_feasibility_2[i]) 239 | end 240 | p_feasibility_3 = 1. .- u[1:m_sys] 241 | for i in 1:m_sys 242 | addinequality!(S, p_feasibility_3[i]) 243 | end 244 | p_feasibility_4 = u[1:m_sys] .+ 1. 245 | for i in 1:m_sys 246 | addinequality!(S, p_feasibility_4[i]) 247 | end 248 | for i in 1:m_qp 249 | addinequality!(S, μ1[i]) 250 | end 251 | for i in 1:m_qp 252 | addinequality!(S, μ2[i]) 253 | end 254 | for i in 1:m_sys 255 | addinequality!(S, μ3[i]) 256 | end 257 | for i in 1:m_sys 258 | addinequality!(S, μ4[i]) 259 | end 260 | addequality!(S, μ1' * (H * u .- 1)) 261 | addequality!(S, μ2' * (-H * u .- 1)) 262 | addequality!(S, μ3' * (u[1:m_sys] .- 1)) 263 | addequality!(S, μ4' * (-u[1:m_sys] .- 1)) 264 | 265 | @variable(model, σ >= 0) 266 | @objective(model, Max, σ) 267 | @constraint(model, p >= σ, domain = S, maxdegree = 3) 268 | optimize!(model) 269 | @show solution_summary(model) 270 | @show objective_value(model) 271 | 272 | ## Some toy examples that exemplify the solver usage 273 | 274 | ## 275 | using DynamicPolynomials, SumOfSquares 276 | import MosekTools 277 | mosek = MosekTools.Optimizer 278 | import Dualization 279 | dual_mosek = Dualization.dual_optimizer(mosek) 280 | 281 | # Create JuMP model 282 | model = SOSModel(dual_mosek) 283 | 284 | @polyvar x y 285 | p = x * y 286 | @variable(model, σ) 287 | @objective(model, Max, σ) 288 | S = @set x + y <= 1 && x - y <= 1 && -x + y <= 1 && -x - y <= 1 289 | # @constraint(model, x + y <= 1) 290 | # @constraint(model, x - y <= 1) 291 | # @constraint(model, -x + y <= 1) 292 | # @constraint(model, -x - y <= 1) 293 | @constraint(model, p >= σ, domain = S, maxdegree = 3) 294 | optimize!(model) 295 | solution_summary(model) 296 | 297 | ## 298 | using DynamicPolynomials 299 | @polyvar x y 300 | p = x^3 - x^2 + 2x*y -y^2 + y^3 301 | using SumOfSquares 302 | S = @set x >= 0 && y >= 0 && x + y >= 1 303 | import Ipopt 304 | model = Model(Ipopt.Optimizer) 305 | @variable(model, a >= 0) 306 | @variable(model, b >= 0) 307 | @constraint(model, a + b >= 1) 308 | @NLobjective(model, Min, a^3 - a^2 + 2a*b - b^2 + b^3) 309 | optimize!(model) 310 | solution_summary(model) 311 | 312 | ## 313 | import MosekTools 314 | mosek = MosekTools.Optimizer 315 | import Dualization 316 | dual_mosek = Dualization.dual_optimizer(mosek) 317 | model = SOSModel(dual_mosek) 318 | @variable(model, α) 319 | @objective(model, Max, α) 320 | @constraint(model, c3, p >= α, domain = S) 321 | optimize!(model) 322 | solution_summary(model) 323 | 324 | ## 325 | model = SOSModel(dual_mosek) 326 | @variable(model, α) 327 | @objective(model, Max, α) 328 | @constraint(model, c4, p >= α, domain = S, maxdegree = 4) 329 | optimize!(model) 330 | solution_summary(model) 331 | 332 | -------------------------------------------------------------------------------- /run.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import sys 4 | file_path = os.path.dirname(__file__) 5 | sys.path.insert(0, os.path.join(file_path, "rl_games")) 6 | import yaml 7 | import torch 8 | import glob 9 | import copy 10 | from contextlib import contextmanager, redirect_stderr, redirect_stdout 11 | import numpy as np 12 | 13 | from rl_games.common import env_configurations, vecenv 14 | from rl_games.torch_runner import Runner 15 | from rl_games.algos_torch import model_builder 16 | 17 | from src.envs.env_creators import env_creators, sys_param 18 | from src.envs.mpc_baseline_parameters import get_mpc_baseline_parameters 19 | from src.utils.rlgame_utils import RLGPUEnv, RLGPUAlgoObserver 20 | from src.networks.a2c_qp_unrolled import A2CQPUnrolledBuilder 21 | 22 | model_builder.register_network('qp_unrolled', A2CQPUnrolledBuilder) 23 | 24 | @contextmanager 25 | def suppress_stdout_stderr(): 26 | """A context manager that redirects stdout and stderr to devnull""" 27 | with open(os.devnull, 'w') as fnull: 28 | with redirect_stderr(fnull) as err, redirect_stdout(fnull) as out: 29 | yield (err, out) 30 | 31 | def float_list(string): 32 | """Convert a string into a list of floats.""" 33 | try: 34 | return [float(item) for item in string.split(',')] 35 | except ValueError: 36 | raise argparse.ArgumentTypeError("Argument must be a comma-separated list of floats") 37 | 38 | 39 | parser = argparse.ArgumentParser() 40 | parser.add_argument("train_or_test", type=str, help="Train or test") 41 | parser.add_argument("env", type=str) 42 | parser.add_argument("--noise-level", type=float, default=0.5) 43 | parser.add_argument("--seed", type=int, default=42) 44 | parser.add_argument("--exp-name", type=str, default="default") 45 | parser.add_argument("--epochs", type=int, default=1000) 46 | parser.add_argument("--num-parallel", type=int, default=100000) 47 | parser.add_argument("--mini-epochs", type=int, default=5) 48 | parser.add_argument("--mlp-size-last", type=int, default=64) 49 | parser.add_argument("--gamma", type=float, default=0.99) 50 | parser.add_argument("--horizon", type=int, default=200) 51 | parser.add_argument("--max-steps-per-episode", type=int, default=500) 52 | parser.add_argument("--score-to-win", type=int, default=int(1e9)) 53 | parser.add_argument("--save-freq", type=int, default=10) 54 | parser.add_argument("--epoch-index", type=int, default=-1, help="For test only, -1 for using latest") 55 | parser.add_argument("--quiet", action='store_true') 56 | parser.add_argument("--device", type=str, default='cuda:0') 57 | parser.add_argument("--qp-unrolled", action='store_true') 58 | parser.add_argument("--n-qp", type=int, default=5) 59 | parser.add_argument("--m-qp", type=int, default=4) 60 | parser.add_argument("--qp-iter", type=int, default=10) 61 | parser.add_argument("--shared-PH", action="store_true") 62 | parser.add_argument("--affine-qb", action="store_true") 63 | parser.add_argument("--strict-affine-layer", action="store_true") 64 | parser.add_argument("--obs-has-half-ref", action="store_true") 65 | parser.add_argument("--symmetric", action="store_true") 66 | parser.add_argument("--no-b", action="store_true") 67 | parser.add_argument("--warm-start", action="store_true") 68 | parser.add_argument("--ws-loss-coef", type=float, default=10.) 69 | parser.add_argument("--ws-update-rate", type=float, default=0.1) 70 | parser.add_argument("--batch-test", action="store_true") 71 | parser.add_argument("--run-name", type=str, default="") 72 | parser.add_argument("--randomize", action="store_true") 73 | parser.add_argument("--use-residual-loss", action="store_true") 74 | parser.add_argument("--no-obs-normalization", action="store_true") 75 | parser.add_argument("--imitate-mpc-N", type=int, default=0) 76 | parser.add_argument("--initialize-from-experiment", type=str, default="") 77 | parser.add_argument("--force-feasible", action="store_true") 78 | parser.add_argument("--skip-to-steady-state", action="store_true") 79 | parser.add_argument("--initial-lr", type=float, default=3e-4) 80 | parser.add_argument("--lr-schedule", type=str, default="adaptive") 81 | parser.add_argument("--reward-shaping", type=float_list, default=[0., 1., 0.]) 82 | 83 | parser.add_argument("--mpc-baseline-N", type=int, default=0) 84 | parser.add_argument("--use-osqp-for-mpc", action="store_true") 85 | parser.add_argument("--mpc-terminal-cost-coef", type=float, default=0.) 86 | parser.add_argument("--robust-mpc-method", type=str, default="none", choices=["none", "scenario", "tube"]) 87 | parser.add_argument("--tube-mpc-tube-size", type=float, default=0.) 88 | args = parser.parse_args() 89 | 90 | 91 | def get_num_parallel(): 92 | if args.train_or_test == "train": 93 | return args.num_parallel 94 | elif args.train_or_test == "test": 95 | if args.batch_test: 96 | return args.num_parallel 97 | else: 98 | return 1 99 | 100 | default_env_config = { 101 | "random_seed": args.seed, 102 | "quiet": args.quiet, 103 | "device": args.device, 104 | "bs": get_num_parallel(), 105 | "noise_level": args.noise_level, 106 | "max_steps": args.max_steps_per_episode, 107 | "keep_stats": (args.train_or_test == "test"), 108 | "run_name": args.run_name or args.exp_name, 109 | "exp_name": args.exp_name, 110 | "randomize": args.randomize, 111 | "skip_to_steady_state": args.skip_to_steady_state, 112 | "reward_shaping": args.reward_shaping, 113 | } 114 | 115 | blacklist_keys = lambda d, blacklist: {k: d[k] for k in d if not (k in blacklist)} 116 | vecenv.register('RLGPU', 117 | lambda config_name, num_actors, **kwargs: RLGPUEnv(config_name, num_actors, **kwargs)) 118 | env_configurations.register('rlgpu', { 119 | 'vecenv_type': 'RLGPU', 120 | 'env_creator': lambda **env_config: env_creators[args.env]( 121 | **blacklist_keys(default_env_config, env_config.keys()), 122 | **env_config, 123 | ), 124 | }) 125 | 126 | runner = Runner(RLGPUAlgoObserver()) 127 | file_path = os.path.dirname(__file__) 128 | with open(os.path.join(file_path, "runner_config.yaml")) as f: 129 | runner_config = yaml.safe_load(f) 130 | full_experiment_name = args.env + "_" + args.exp_name 131 | runner_config["params"]["seed"] = args.seed 132 | runner_config["params"]["config"]["train_or_test"] = args.train_or_test 133 | runner_config["params"]["config"]["num_actors"] = args.num_parallel 134 | runner_config["params"]["config"]["max_epochs"] = args.epochs 135 | runner_config["params"]["config"]["minibatch_size"] = args.num_parallel 136 | runner_config["params"]["config"]["games_to_track"] = args.num_parallel 137 | runner_config["params"]["config"]["steps_to_track_per_game"] = args.max_steps_per_episode 138 | runner_config["params"]["config"]["mini_epochs"] = args.mini_epochs 139 | runner_config["params"]["config"]["gamma"] = args.gamma 140 | runner_config["params"]["config"]["horizon_length"] = args.horizon 141 | runner_config["params"]["config"]["score_to_win"] = args.score_to_win 142 | runner_config["params"]["config"]["name"] = args.env 143 | runner_config["params"]["config"]["full_experiment_name"] = full_experiment_name 144 | runner_config["params"]["network"]["mlp"]["units"] = [args.mlp_size_last * i for i in (4, 2, 1)] 145 | runner_config["params"]["config"]["save_frequency"] = args.save_freq 146 | runner_config["params"]["config"]["device"] = args.device 147 | runner_config["params"]["network"].pop("rnn") 148 | runner_config["params"]["config"]["learning_rate"] = args.initial_lr 149 | runner_config["params"]["config"]["lr_schedule"] = args.lr_schedule 150 | if args.no_obs_normalization: 151 | runner_config["params"]["config"]["normalize_input"] = False 152 | 153 | if args.batch_test: 154 | runner_config["params"]["config"]["player"]["games_num"] = args.num_parallel 155 | 156 | if args.qp_unrolled: 157 | runner_config["params"]["network"]["name"] = "qp_unrolled" 158 | runner_config["params"]["network"]["custom"] = { 159 | "device": args.device, 160 | "n_qp": args.n_qp, 161 | "m_qp": args.m_qp, 162 | "qp_iter": args.qp_iter, 163 | "shared_PH": args.shared_PH, 164 | "affine_qb": args.affine_qb, 165 | "strict_affine_layer": args.strict_affine_layer, 166 | "obs_has_half_ref": args.obs_has_half_ref, 167 | "use_warm_starter": args.warm_start, 168 | "train_warm_starter": args.warm_start and args.train_or_test == "train", 169 | "ws_loss_coef": args.ws_loss_coef, 170 | "ws_update_rate": args.ws_update_rate, 171 | "mpc_baseline": None if (not args.mpc_baseline_N and not args.imitate_mpc_N) else {**get_mpc_baseline_parameters(args.env, args.mpc_baseline_N or args.imitate_mpc_N, noise_std=args.noise_level), "terminal_coef": args.mpc_terminal_cost_coef}, 172 | "imitate_mpc": args.imitate_mpc_N > 0, 173 | "use_osqp_for_mpc": args.use_osqp_for_mpc, 174 | "use_residual_loss": args.use_residual_loss, 175 | "symmetric": args.symmetric, 176 | "no_b": args.no_b, 177 | "force_feasible": args.force_feasible, 178 | "feasible_lambda": 10., 179 | "train_or_test": args.train_or_test, 180 | "run_name": args.run_name, 181 | } 182 | 183 | if args.mpc_baseline_N: 184 | # Unset observation and action normalization 185 | runner_config["params"]["config"]["clip_actions"] = False 186 | runner_config["params"]["config"]["normalize_input"] = False 187 | 188 | if args.imitate_mpc_N: 189 | # Unset observation normalization 190 | runner_config["params"]["config"]["normalize_input"] = False 191 | # Make MPC output normalized action 192 | runner_config["params"]["network"]["custom"]["mpc_baseline"]["normalize"] = True 193 | 194 | if args.robust_mpc_method != "none": 195 | runner_config["params"]["network"]["custom"]["mpc_baseline"]["robust_method"] = args.robust_mpc_method 196 | runner_config["params"]["network"]["custom"]["mpc_baseline"]["max_disturbance_per_dim"] = args.tube_mpc_tube_size 197 | 198 | if args.quiet: 199 | with suppress_stdout_stderr(): 200 | runner.load(runner_config) 201 | else: 202 | runner.load(runner_config) 203 | 204 | if __name__ == "__main__": 205 | if args.train_or_test == "train": 206 | runner_arg = { 207 | 'train': True, 208 | 'play': False, 209 | } 210 | if args.initialize_from_experiment: 211 | full_checkpoint_name = args.env + "_" + args.initialize_from_experiment 212 | checkpoint_dir = f"runs/{full_checkpoint_name}/nn" 213 | checkpoint_name = f"{checkpoint_dir}/{args.env}.pth" 214 | runner_arg['checkpoint'] = checkpoint_name 215 | runner.run(runner_arg) 216 | elif args.train_or_test == "test": 217 | if not args.mpc_baseline_N: 218 | checkpoint_dir = f"runs/{full_experiment_name}/nn" 219 | if args.epoch_index == -1: 220 | checkpoint_name = f"{checkpoint_dir}/{args.env}.pth" 221 | else: 222 | list_of_files = glob.glob(f"{checkpoint_dir}/last_{args.env}_ep_{args.epoch_index}_rew_*.pth") 223 | checkpoint_name = max(list_of_files, key=os.path.getctime) 224 | else: 225 | checkpoint_name = None 226 | runner.run({ 227 | 'train': False, 228 | 'play': True, 229 | 'checkpoint' : checkpoint_name, 230 | }) 231 | -------------------------------------------------------------------------------- /experiments/tank/visualize_feasible_sets.py: -------------------------------------------------------------------------------- 1 | # %% Specify test case 2 | import numpy as np 3 | 4 | # Case where MPC is better 5 | x0 = np.array([10., 10., 10., 10.]) 6 | x_ref = np.array([19, 19, 2.4, 2.4]) 7 | 8 | # # Case where MPC fails 9 | # x0 = np.array([ 5.4963946, 10.947876, 1.034516, 18.08066 ]) 10 | # x_ref = np.array([7.522859, 8.169776, 1.1107684, 1. ]) 11 | 12 | # Controlling process noise and parametric uncertainty 13 | noise_level = 0 14 | parametric_uncertainty = False 15 | parameter_randomization_seed = 2 16 | 17 | # %% Set up test bench 18 | import sys 19 | import os 20 | file_path = os.path.dirname(__file__) 21 | sys.path.append(os.path.join(file_path, "../..")) 22 | 23 | from src.envs.env_creators import sys_param, env_creators 24 | from src.envs.mpc_baseline_parameters import get_mpc_baseline_parameters 25 | from src.modules.qp_unrolled_network import QPUnrolledNetwork 26 | import torch 27 | from matplotlib import pyplot as plt 28 | from icecream import ic 29 | 30 | 31 | # Utilities 32 | 33 | def make_obs(x, x_ref, running_mean, running_std, normalize): 34 | raw_obs = torch.tensor(np.concatenate([x, x_ref]), device=device, dtype=torch.float) 35 | if not normalize: 36 | return raw_obs.unsqueeze(0) 37 | else: 38 | return ((raw_obs - running_mean) / running_std).unsqueeze(0) 39 | 40 | def get_state_dict(checkpoint_path): 41 | checkpoint = torch.load(checkpoint_path) 42 | model = checkpoint["model"] 43 | prefix = "a2c_network.policy_net." 44 | policy_net_state_dict = {k.lstrip(prefix): v for (k, v) in model.items() if k.startswith(prefix)} 45 | if "running_mean_std.running_mean" in model: 46 | running_mean = model["running_mean_std.running_mean"].to(dtype=torch.float) 47 | running_std = model["running_mean_std.running_var"].sqrt().to(dtype=torch.float) 48 | else: 49 | running_mean = torch.tensor([0.]) 50 | running_std = torch.tensor([1.]) 51 | return policy_net_state_dict, running_mean, running_std 52 | 53 | def rescale_action(action, low=-1., high=8.): 54 | action = action.clamp(-1., 1.) 55 | return low + (high - low) * (action + 1) / 2 56 | 57 | t = lambda arr: torch.tensor(arr, device=device, dtype=torch.float).unsqueeze(0) 58 | a = lambda t: t.detach().cpu().numpy() 59 | 60 | # Constants and options 61 | n_sys = 4 62 | m_sys = 2 63 | input_size = 8 # 4 for x, 4 for x_ref 64 | n = 2 65 | m = 64 66 | qp_iter = 10 67 | device = "cuda:0" 68 | 69 | 70 | # MPC module 71 | mpc_baseline = get_mpc_baseline_parameters("tank", 1) 72 | mpc_baseline["normalize"] = True # Solve for normalized action, to be consistent with learned QP 73 | mpc_module = QPUnrolledNetwork( 74 | device, input_size, n, m, qp_iter, None, True, True, 75 | mpc_baseline=mpc_baseline, 76 | use_osqp_for_mpc=True, 77 | ) 78 | 79 | # Environment 80 | env = env_creators["tank"]( 81 | noise_level=noise_level, 82 | bs=1, 83 | max_steps=300, 84 | keep_stats=True, 85 | run_name="", 86 | exp_name="", 87 | randomize=parametric_uncertainty, 88 | ) 89 | 90 | # %% Compare learned QPs learned with / without residual loss, and compare degree of constraint violation 91 | from src.utils.torch_utils import bmv 92 | 93 | def get_qp_net(trained_with_residual_loss, forced_feasibility=False): 94 | exp_name = f"residual_loss_{'on' if trained_with_residual_loss else 'off'}" 95 | if forced_feasibility: 96 | exp_name = "force_feasible_on" 97 | net = QPUnrolledNetwork(device, input_size, n, m, qp_iter, None, True, True, force_feasible=forced_feasibility) 98 | if parametric_uncertainty: 99 | exp_name += "+rand" 100 | checkpoint_path = f"runs/tank_{exp_name}/nn/tank.pth" 101 | policy_net_state_dict, running_mean, running_std = get_state_dict(checkpoint_path) 102 | net.load_state_dict(policy_net_state_dict) 103 | running_mean, running_std = running_mean.to(device=device), running_std.to(device=device) 104 | net.to(device) 105 | return net, running_mean, running_std 106 | 107 | def compute_violation(H, action_all, b): 108 | """ 109 | Number of violated constraints, as well as magnitude of constraint violation. 110 | """ 111 | z_recovered = bmv(H, action_all) + b 112 | violation_count = (z_recovered < 0.).sum(dim=-1) 113 | violation_magnitude = torch.norm(z_recovered.clamp(-torch.inf, 0.), dim=-1) 114 | return violation_count, violation_magnitude 115 | 116 | def rollout(trained_with_residual_loss, is_mpc, steps, forced_feasibility=False): 117 | net, running_mean, running_std = get_qp_net(trained_with_residual_loss, forced_feasibility) 118 | if is_mpc: 119 | net = mpc_module 120 | results = [] 121 | env.reset(t(x0), t(x_ref), randomize_seed=parameter_randomization_seed) 122 | x = x0 123 | obs = make_obs(x, x_ref, running_mean, running_std, not is_mpc) 124 | for i in range(steps): 125 | action_all, problem_params = net(obs, return_problem_params=True) 126 | u = rescale_action(action_all[:, :m_sys]) 127 | raw_obs, reward, done_t, info = env.step(u) 128 | if not is_mpc: 129 | obs = (raw_obs - running_mean) / running_std 130 | else: 131 | obs = raw_obs 132 | done = done_t.item() 133 | P, q, H, b = problem_params 134 | results.append((P, q, H, b, action_all)) 135 | return results 136 | 137 | def evaluate_constraint_violation(trained_with_residual_loss, steps=10, forced_feasibility=False): 138 | """Rollout for multiple steps, and compute average (number of violated constraints, magnitude of violation).""" 139 | rollout_results = rollout(trained_with_residual_loss, False, steps, forced_feasibility) 140 | constraint_violation_indices = [] 141 | for i in range(steps): 142 | H = rollout_results[i][2] 143 | action_all = rollout_results[i][4] 144 | b = rollout_results[i][3] 145 | constraint_violation_indices.append(compute_violation(H, action_all, b)) 146 | average_violation_count = torch.stack([v[0] for v in constraint_violation_indices], dim=0).to(dtype=torch.float).mean(dim=0) 147 | average_violation_magnitude = torch.stack([v[1] for v in constraint_violation_indices], dim=0).mean(dim=0) 148 | return average_violation_count, average_violation_magnitude 149 | 150 | violation_count_with_residual_loss, violation_magnitude_with_residual_loss = evaluate_constraint_violation(True) 151 | violation_count_without_residual_loss, violation_magnitude_without_residual_loss = evaluate_constraint_violation(False) 152 | 153 | ic(violation_count_with_residual_loss, violation_count_without_residual_loss) 154 | ic(violation_magnitude_with_residual_loss, violation_magnitude_without_residual_loss) 155 | 156 | # %% Visualize the feasible set and objective function at a certain step, ignoring constraints that are violated 157 | at_step = 10 158 | 159 | from src.utils.visualization import plot_multiple_2d_polytopes_with_contour 160 | 161 | def get_violated_mask(H, action_all, b): 162 | z_recovered = bmv(H, action_all) + b 163 | return torch.where(z_recovered < 0., torch.ones_like(z_recovered), torch.zeros_like(z_recovered)) 164 | 165 | def get_step_parameters(at_step, trained_with_residual_loss, is_mpc, forced_feasibility=False): 166 | rollout_results = rollout(trained_with_residual_loss, is_mpc, at_step, forced_feasibility) 167 | result_last_step = rollout_results[-1] 168 | P, q, H, b, action_all = result_last_step 169 | violated_mask = get_violated_mask(H, action_all, b) 170 | return P, q, H, b, violated_mask, action_all 171 | 172 | def get_plot_parameters(trained_with_residual_loss, is_mpc, color, label, is_forced_feasibility=False): 173 | a = lambda t: t.squeeze(0).detach().cpu().numpy() 174 | global P, q, H, b, violated_mask, action_all 175 | P, q, H, b, violated_mask, action_all = get_step_parameters(at_step, trained_with_residual_loss, is_mpc, is_forced_feasibility) 176 | if not is_forced_feasibility: 177 | # Filter out violated constraints 178 | satisfied_mask = torch.logical_not(violated_mask) 179 | plot_params = { 180 | "A": a(-H[satisfied_mask, :]), 181 | "b": a(b[satisfied_mask]), 182 | "optimal_solution": a(action_all[:, :m_sys]), 183 | "P": a(P), 184 | "q": a(q), 185 | "color": color, 186 | "label": label, 187 | } 188 | else: 189 | # Learned problem with forced feasibility; recover original P, q, H, b from augmented P, q, H, b 190 | y = action_all[:, -1].item() 191 | P0 = P[:, :n, :n] 192 | q0 = q[:, :n] 193 | H0 = H[:, :m, :n] 194 | b0 = b[:, :m] + y 195 | plot_params = { 196 | "A": a(-H0), 197 | "b": a(b0), 198 | "optimal_solution": a(action_all[:, :m_sys]), 199 | "P": a(P0), 200 | "q": a(q0), 201 | "color": color, 202 | "label": label, 203 | } 204 | return plot_params 205 | 206 | fig, ax = plot_multiple_2d_polytopes_with_contour([ 207 | get_plot_parameters(True, False, "blue", "Learned QP (with residual loss)"), 208 | get_plot_parameters(False, False, "red", "Learned QP (w/o residual loss)"), 209 | get_plot_parameters(False, True, "green", "MPC") 210 | ]) 211 | ax.set_xlabel("$u_1$") 212 | ax.set_ylabel("$u_2$") 213 | ax.set_title(f"Feasible sets and objective functions at step {at_step}") 214 | 215 | # %% Visualize the feasible set and objective function at a certain step, forcing feasibility 216 | fig, ax = plot_multiple_2d_polytopes_with_contour([ 217 | get_plot_parameters(True, False, "blue", "Learned QP (forced feasibility, n=2)", True), 218 | get_plot_parameters(False, True, "green", "MPC (N=1)", True) 219 | ]) 220 | ax.set_xlabel("$u_1$") 221 | ax.set_ylabel("$u_2$") 222 | ax.set_title(f"Feasible sets and objective functions at step {at_step}") 223 | 224 | 225 | # %% Visualize feasible set vs. MPC; Now 226 | # 1. The learned QP is guaranteed to be feasible; no need to ignore violated constraints 227 | # 2. The variable are allowed to be high-dimensional; we project the constraint polytope and the quadratic objective to 2D 228 | from src.utils.geometry import high_dim_to_2D_sampling, partial_minimization_2D 229 | 230 | n = 8 231 | m = 32 232 | mpc_N = 4 233 | at_step = 50 234 | 235 | mpc_baseline = get_mpc_baseline_parameters("tank", mpc_N) 236 | mpc_baseline["normalize"] = True # Solve for normalized action, to be consistent with learned QP 237 | mpc_module = QPUnrolledNetwork( 238 | device, input_size, n, m, qp_iter, None, True, True, 239 | mpc_baseline=mpc_baseline, 240 | use_osqp_for_mpc=True, 241 | ) 242 | 243 | def get_plot_parameters_proj(is_mpc, color, label): 244 | a = lambda t: t.squeeze(0).detach().cpu().numpy() 245 | P, q, H, b, violated_mask, action_all = get_step_parameters(at_step, False, is_mpc, True) 246 | if not is_mpc: 247 | # Learned problem with forced feasibility; recover original P, q, H, b from augmented P, q, H, b 248 | y = action_all[:, -1].item() 249 | P0 = P[:, :n, :n] 250 | q0 = q[:, :n] 251 | H0 = H[:, :m, :n] 252 | b0 = b[:, :m] + y 253 | else: 254 | P0, q0, H0, b0 = P, q, H, b 255 | 256 | A_proj, b_proj = high_dim_to_2D_sampling(-a(H0), a(b0)) 257 | P_proj, q_proj, _ = partial_minimization_2D(a(P0), a(q0)) 258 | plot_params = { 259 | "A": A_proj, 260 | "b": b_proj, 261 | "optimal_solution": a(action_all[:, :m_sys]), 262 | "P": P_proj, 263 | "q": q_proj, 264 | "color": color, 265 | "label": label, 266 | } 267 | return plot_params 268 | 269 | 270 | fig, ax = plot_multiple_2d_polytopes_with_contour([ 271 | get_plot_parameters_proj(True, "green", "MPC"), 272 | get_plot_parameters_proj(False, "blue", "Learned"), 273 | ]) 274 | ax.set_xlabel("$u_1$") 275 | ax.set_ylabel("$u_2$") 276 | ax.set_title(f"Feasible sets and objective functions at step {at_step}") 277 | 278 | # %% 279 | -------------------------------------------------------------------------------- /experiments/double_integrator/visualize.py: -------------------------------------------------------------------------------- 1 | # %% Load system and compute maximal invariant set 2 | import numpy as np 3 | import sys 4 | import os 5 | file_path = os.path.dirname(__file__) 6 | sys.path.append(os.path.join(file_path, "../..")) 7 | 8 | from src.envs.env_creators import sys_param, env_creators 9 | from src.utils.sets import compute_MCI 10 | from matplotlib import pyplot as plt 11 | 12 | A = sys_param["double_integrator"]["A"] 13 | B = sys_param["double_integrator"]["B"] 14 | Q = sys_param["double_integrator"]["Q"] 15 | R = sys_param["double_integrator"]["R"] 16 | x_min_scalar = sys_param["double_integrator"]["x_min"] 17 | x_max_scalar = sys_param["double_integrator"]["x_max"] 18 | u_min_scalar = sys_param["double_integrator"]["u_min"] 19 | u_max_scalar = sys_param["double_integrator"]["u_max"] 20 | x_min = x_min_scalar * np.ones(2) 21 | x_max = x_max_scalar * np.ones(2) 22 | u_min = u_min_scalar * np.ones(1) 23 | u_max = u_max_scalar * np.ones(1) 24 | 25 | MCI = compute_MCI(A, B, x_min, x_max, u_min, u_max, iterations=100) 26 | 27 | fig, ax = plt.subplots() 28 | # ax.fill(X0_vertices[:, 0], X0_vertices[:, 1], alpha=0.3, label='Initial Set $X_0$', color='g') 29 | ax.fill(MCI[:, 0], MCI[:, 1], 30 | alpha=0.7, label='Maximal Control Invariant Set', color='r') 31 | ax.grid() 32 | 33 | # %% Define MPC on the system 34 | from src.utils.mpc_utils import mpc2qp_np 35 | from src.utils.osqp_utils import osqp_oracle 36 | 37 | N_mpc = 3 # The short horizon will make naive MPC fail, as shown in http://cse.lab.imtlucca.it/~bemporad/publications/papers/BBMbook.pdf, p. 247 38 | 39 | def mpc_controller(x, Qf=None): 40 | """ 41 | MPC controller for the double integrator system. 42 | """ 43 | _, _, P, q, H, b = mpc2qp_np( 44 | n_mpc=2, m_mpc=1, N=N_mpc, A=A, B=B, Q=Q, R=R, 45 | x_min=x_min_scalar, x_max=x_max_scalar, u_min=u_min_scalar, u_max=u_max_scalar, 46 | x0=x, x_ref=np.zeros(2), normalize=False, Qf=Qf 47 | ) 48 | sol = osqp_oracle(q, b, P, H) 49 | return np.clip(sol[:1], u_min_scalar, u_max_scalar) 50 | 51 | def mpc_with_predicted_trajectory(x): 52 | """ 53 | Return predicted trajectory (list of (x, u) pairs) of the MPC controller. 54 | """ 55 | _, _, P, q, H, b = mpc2qp_np( 56 | n_mpc=2, m_mpc=1, N=N_mpc, A=A, B=B, Q=Q, R=R, 57 | x_min=x_min_scalar, x_max=x_max_scalar, u_min=u_min_scalar, u_max=u_max_scalar, 58 | x0=x, x_ref=np.zeros(2), normalize=False, 59 | ) 60 | sol = osqp_oracle(q, b, P, H) 61 | sol = np.clip(sol, u_min_scalar, u_max_scalar) 62 | trajectory = [] 63 | for i in range(N_mpc): 64 | u = sol[i:i + 1] 65 | x = A @ x + B @ u 66 | if not (x_min_scalar - 0.01 <= x).all() or not (x <= x_max_scalar + 0.01).all(): 67 | break 68 | trajectory.append((x, u)) 69 | return trajectory 70 | 71 | # %% Define learned controller on the system 72 | from src.modules.qp_unrolled_network import QPUnrolledNetwork 73 | import torch 74 | 75 | def get_state_dict(checkpoint_path): 76 | checkpoint = torch.load(checkpoint_path) 77 | model = checkpoint["model"] 78 | prefix = "a2c_network.policy_net." 79 | policy_net_state_dict = {k.lstrip(prefix): v for (k, v) in model.items() if k.startswith(prefix)} 80 | if "running_mean_std.running_mean" in model: 81 | running_mean = model["running_mean_std.running_mean"].to(dtype=torch.float) 82 | running_std = model["running_mean_std.running_var"].sqrt().to(dtype=torch.float) 83 | else: 84 | running_mean = torch.tensor([0.]) 85 | running_std = torch.tensor([1.]) 86 | return policy_net_state_dict, running_mean, running_std 87 | 88 | device = "cuda:0" 89 | n_qp = 3 90 | m_qp = 9 91 | qp_iter = 10 92 | symmetric = True 93 | no_b = True 94 | net = QPUnrolledNetwork(device, 2, n_qp, m_qp, qp_iter, None, True, True, force_feasible=True, symmetric=symmetric, no_b=no_b) 95 | if not symmetric: 96 | exp_name = "default" 97 | elif not no_b: 98 | exp_name = "symmetric" 99 | else: 100 | exp_name = "symmetric_no_b" 101 | checkpoint_path = f"runs/double_integrator_{exp_name}/nn/double_integrator.pth" 102 | policy_net_state_dict, running_mean, running_std = get_state_dict(checkpoint_path) 103 | net.load_state_dict(policy_net_state_dict) 104 | running_mean, running_std = running_mean.to(device=device), running_std.to(device=device) 105 | net.to(device) 106 | 107 | t = lambda arr: torch.tensor(arr, device=device, dtype=torch.float).unsqueeze(0) 108 | a = lambda t: t.squeeze(0).detach().cpu().numpy() 109 | 110 | def learned_controller(x): 111 | sol = a(net(t(x))) 112 | sol *= 0.5 # Denormalize 113 | return np.clip(sol[:1], u_min_scalar, u_max_scalar) 114 | 115 | # %% Define closed-loop dynamics 116 | 117 | def get_cl_dynamics(controller): 118 | def g(x): 119 | return A @ x + B @ controller(x) 120 | return g 121 | 122 | g_mpc = get_cl_dynamics(mpc_controller) 123 | g_mpc_term = get_cl_dynamics(lambda x: mpc_controller(x, Qf=10 * Q)) 124 | g_learned = get_cl_dynamics(learned_controller) 125 | 126 | 127 | # %% Compute one-step reachable sets starting from MCI 128 | from src.utils.sets import one_step_forward_reachable_set 129 | 130 | reachable_mpc = one_step_forward_reachable_set(g_mpc, MCI, x_min, x_max) 131 | reachable_mpc_term = one_step_forward_reachable_set(g_mpc_term, MCI, x_min, x_max) 132 | reachable_learned = one_step_forward_reachable_set(g_learned, MCI, x_min, x_max) 133 | 134 | fig, ax = plt.subplots() 135 | ax.fill(MCI[:, 0], MCI[:, 1], 136 | alpha=0.3, label='Maximal Control Invariant Set', color='r') 137 | ax.fill(reachable_mpc[:, 0], reachable_mpc[:, 1], 138 | alpha=0.3, label='One-step reachable set (MPC)', color='g') 139 | ax.fill(reachable_mpc_term[:, 0], reachable_mpc_term[:, 1], 140 | alpha=0.3, label='One-step reachable set (MPC with terminal cost)', color='purple') 141 | ax.fill(reachable_learned[:, 0], reachable_learned[:, 1], 142 | alpha=0.3, label='One-step reachable set (Learned)', color='b') 143 | ax.set_xlabel("$x_1$") 144 | ax.set_ylabel("$x_2$") 145 | 146 | ax.legend() 147 | 148 | # %% Compute positive invariant sets under closed-loop dynamics 149 | from src.utils.sets import compute_positive_invariant_set_from_origin 150 | 151 | pis_mpc = compute_positive_invariant_set_from_origin(g_mpc, x_min, x_max, initial_radius=1.5, iterations=150) 152 | pis_mpc_term = compute_positive_invariant_set_from_origin(g_mpc_term, x_min, x_max, initial_radius=1.8, iterations=20) 153 | pis_learned = compute_positive_invariant_set_from_origin(g_learned, x_min, x_max, initial_radius=1.8, iterations=20) 154 | 155 | 156 | # %% 157 | fig, ax = plt.subplots() 158 | ax.fill(MCI[:, 0], MCI[:, 1], 159 | alpha=1.0, label='Maximal Control Invariant Set', color='r') 160 | ax.fill(pis_learned[:, 0], pis_learned[:, 1], 161 | alpha=1.0, label='Positive invariant set (Learned)', color='b') 162 | ax.fill(pis_mpc[:, 0], pis_mpc[:, 1], 163 | alpha=1.0, label='Positive invariant set (MPC)', color='g') 164 | ax.fill(pis_mpc_term[:, 0], pis_mpc_term[:, 1], 165 | alpha=1.0, label='Positive invariant set (MPC with terminal cost)', color='purple') 166 | ax.set_xlabel("$x_1$") 167 | ax.set_ylabel("$x_2$") 168 | ax.grid() 169 | ax.legend() 170 | 171 | 172 | # %% Case study 173 | from matplotlib.patches import Rectangle 174 | 175 | def get_trajectory(controller, x0, max_steps=200): 176 | g = get_cl_dynamics(controller) 177 | x = x0 178 | xs = [x] 179 | total_cost = 0. 180 | for _ in range(max_steps): 181 | u = controller(x) 182 | total_cost += x.T @ Q @ x + u.T @ R @ u 183 | x = g(x) 184 | xs.append(x) 185 | if not (x_min <= x).all() or not (x <= x_max).all(): 186 | total_cost += np.inf 187 | break 188 | if np.linalg.norm(x) < 0.05: 189 | break 190 | average_cost = total_cost / len(xs) 191 | return np.array(xs), average_cost, total_cost 192 | 193 | 194 | def plot_comparison(x0, mark='^'): 195 | traj_mpc, cost_mpc, total_cost_mpc = get_trajectory(mpc_controller, x0) 196 | traj_mpc_term, cost_mpc_term, total_cost_mpc_term = get_trajectory(lambda x: mpc_controller(x, Qf=10000 * Q), x0) 197 | traj_learned, cost_learned, total_cost_learned = get_trajectory(learned_controller, x0) 198 | 199 | fig, ax = plt.subplots() 200 | ax.fill(MCI[:, 0], MCI[:, 1], 201 | alpha=0.1, label='Maximal Control Invariant Set', color='r') 202 | ax.fill(pis_mpc[:, 0], pis_mpc[:, 1], 203 | alpha=0.3, label='Positive invariant set (MPC)', color='g') 204 | ax.fill(pis_learned[:, 0], pis_learned[:, 1], 205 | alpha=0.3, label='Positive invariant set (Learned)', color='b') 206 | ax.plot(traj_mpc[:, 0], traj_mpc[:, 1], f'-{mark}', color='g', label=f"Trajectory (MPC) - Total Cost: {total_cost_mpc:.2f}") 207 | ax.plot(traj_mpc_term[:, 0], traj_mpc_term[:, 1], f'-{mark}', color='purple', label=f"Trajectory (MPC with term.) - Total Cost: {total_cost_mpc_term:.2f}") 208 | ax.plot(traj_learned[:, 0], traj_learned[:, 1], f'-{mark}', color='b', label="Trajectory (Learned) - Total Cost: {:.2f}".format(total_cost_learned)) 209 | ax.grid() 210 | ax.set_xlabel("$x_1$") 211 | ax.set_ylabel("$x_2$") 212 | 213 | # Plot the box constraint 214 | rect = Rectangle((x_min[0], x_min[1]), x_max[0] - x_min[0], x_max[1] - x_min[1], linewidth=1, edgecolor='r', facecolor='none') 215 | ax.add_patch(rect) 216 | ax.set_ylim(-3, 3) 217 | 218 | ax.legend() 219 | return fig, ax 220 | 221 | # %% Plot functions for manuscript 222 | import tikzplotlib 223 | 224 | def tikzplotlib_fix_ncols(obj): 225 | """ 226 | workaround for matplotlib 3.6 renamed legend's _ncol to _ncols, which breaks tikzplotlib 227 | """ 228 | if hasattr(obj, "_ncols"): 229 | obj._ncol = obj._ncols 230 | for child in obj.get_children(): 231 | tikzplotlib_fix_ncols(child) 232 | 233 | def plot_comparison_tex(x0): 234 | traj_mpc, cost_mpc, total_cost_mpc = get_trajectory(mpc_controller, x0) 235 | traj_learned, cost_learned, total_cost_learned = get_trajectory(learned_controller, x0) 236 | traj_mpc_term, cost_mpc_term, total_cost_mpc_term = get_trajectory(lambda x: mpc_controller(x, Qf=np.array([[-0.99, 0.], [0., 50.]])), x0) 237 | # traj_mpc_term_2, cost_mpc_term_2, total_cost_mpc_term_2 = get_trajectory(lambda x: mpc_controller(x, Qf=10000 * Q), x0) 238 | 239 | # Set up canvas 240 | fig, ax = plt.subplots() 241 | ax.fill(MCI[:, 0], MCI[:, 1], 242 | alpha=0.1, color='g') 243 | ax.grid() 244 | ax.set_xlabel("$x_{[1]}$") 245 | ax.set_ylabel("$x_{[2]}$") 246 | 247 | # Plot MPC trajectory 248 | mark = 'o' 249 | ax.plot(traj_mpc[:, 0], traj_mpc[:, 1], f'-{mark}', color='darkorange', label=f"\\makebox[3.2em][l]{{MPC:}} Cost=$\infty$", alpha=0.7, linewidth=2, zorder=0) 250 | 251 | # Plot MPC trajectory with terminal cost 252 | ax.plot(traj_mpc_term[:, 0], traj_mpc_term[:, 1], f'-{mark}', color='purple', label=f"\\makebox[3.2em][l]{{MPC-T:}} Cost={total_cost_mpc_term:.0f}", alpha=0.7, zorder=1, linewidth=2) 253 | 254 | # Plot learned trajectory 255 | ax.plot(traj_learned[:, 0], traj_learned[:, 1], f'-{mark}', color='b', label="\\makebox[3.2em][l]{{LQP:}} Cost={:.0f}".format(total_cost_learned), alpha=0.7, zorder=2, linewidth=2) 256 | 257 | # Plot predicted trajectories 258 | mark = '*' 259 | for i in range(3): 260 | traj = mpc_with_predicted_trajectory(traj_mpc[i]) 261 | xs = np.array([t[0] for t in traj]) 262 | # print(xs) 263 | ax.plot(xs[:, 0], xs[:, 1], f'-{mark}', color='darkorange', alpha=0.5, linewidth=2) 264 | 265 | # Plot the box constraint 266 | # rect = Rectangle((x_min[0], x_min[1]), x_max[0] - x_min[0], x_max[1] - x_min[1], linewidth=2, edgecolor='r', facecolor='none') 267 | # ax.add_patch(rect) 268 | ax.plot([x_min[0], x_max[0]], [x_min[1], x_min[1]], 'r', linewidth=3) 269 | ax.plot([x_max[0], x_max[0]], [x_min[1], x_max[1]], 'r', linewidth=3) 270 | ax.plot([x_max[0], x_min[0]], [x_max[1], x_max[1]], 'r', linewidth=3) 271 | ax.plot([x_min[0], x_min[0]], [x_max[1], x_min[1]], 'r', linewidth=3) 272 | 273 | ax.set_ylim(-0.3, 2.25) 274 | 275 | ax.legend(loc="lower left") 276 | 277 | tikzplotlib_fix_ncols(fig) 278 | 279 | tikzplotlib.save('toy_example.tex', axis_height='\\figureheight', 280 | axis_width='\\figurewidth',) 281 | 282 | return fig, ax 283 | 284 | # %% 285 | 286 | # %% 287 | fig, ax = plot_comparison(np.array([-4, 2]), '') 288 | # ax.set_xlim(-1, 1) 289 | # ax.set_ylim(-0.2, 0.2) 290 | 291 | # %% 292 | fig, ax = plot_comparison_tex(np.array([-4, 2.1])); 293 | 294 | # %% 295 | 296 | # %% 297 | --------------------------------------------------------------------------------