├── __init__.py
├── src
    ├── modules
    │   ├── __init__.py
    │   ├── warm_starter.py
    │   └── preconditioner.py
    ├── utils
    │   ├── __init__.py
    │   ├── osqp_utils.py
    │   ├── np_batch_op.py
    │   ├── rlgame_utils.py
    │   ├── visualization.py
    │   ├── torch_utils.py
    │   ├── sets.py
    │   └── geometry.py
    ├── networks
    │   ├── __init__.py
    │   └── a2c_qp_unrolled.py
    └── envs
    │   ├── mpc_baseline_parameters.py
    │   └── env_creators.py
├── .gitmodules
├── requirements.txt
├── experiments
    ├── cartpole
    │   ├── test_mpc.sh
    │   ├── test_mlp.sh
    │   ├── test_qp.sh
    │   ├── benchmark_stat.py
    │   ├── benchmark.sh
    │   ├── run.sh
    │   ├── reproduce_table.py
    │   ├── reproduce.sh
    │   └── visualize_trajectories.py
    ├── tank
    │   ├── test_skip_steady.sh
    │   ├── test_qp.sh
    │   ├── test_mpc.sh
    │   ├── test_imitation.sh
    │   ├── test_residual_loss.sh
    │   ├── test_reward_shaping.sh
    │   ├── test_force_feasible.sh
    │   ├── benchmark.sh
    │   ├── test_skip_steady.py
    │   ├── run.sh
    │   ├── plot_histogram.py
    │   ├── benchmark_stat.py
    │   ├── reproduce_table.py
    │   ├── reproduce.sh
    │   ├── reproduce_table_disturbed.py
    │   ├── reproduce_disturbed.sh
    │   ├── visualize_trajectories.py
    │   └── visualize_feasible_sets.py
    └── double_integrator
    │   ├── run.sh
    │   ├── dump_parameters.py
    │   ├── verify_stability.jl
    │   └── visualize.py
├── setup.py
├── auxiliary
    ├── profile_bmv.py
    ├── test_solver_np.py
    ├── test_solver.py
    ├── profile.py
    ├── test_solver_parallel.py
    └── train_warmstarter.py
├── LICENSE
├── runner_config.yaml
├── README.md
├── .gitignore
└── run.py


/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/modules/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/networks/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "rl_games"]
2 | 	path = rl_games
3 | 	url = https://github.com/yiwenlu66/rl_games.git
4 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | rl_games==1.6.0
 2 | torch==2.1.0
 3 | icecream==2.1.3
 4 | qpsolvers==3.4.0
 5 | pandas==2.1.0
 6 | scipy==1.11.1
 7 | matplotlib==3.6.3
 8 | cvxpy==1.4.1
 9 | do_mpc==4.6.4
10 | tqdm==4.66.1
11 | 


--------------------------------------------------------------------------------
/experiments/cartpole/test_mpc.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | N=16
4 | noise=0
5 | 
6 | python ../../run.py test cartpole --num-parallel 10000 --horizon 20 --epochs 2000 --mini-epochs 1 --qp-unrolled --shared-PH --affine-qb --mpc-baseline-N ${N} --noise-level ${noise} --batch-test --use-osqp-for-mpc --mpc-terminal-cost-coef 10 --exp-name qp_test_reward_shaping_10_0.1_0_8_48 --run-name mpc_10t --max-steps-per-episode 100
7 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | def read_requirements(filename):
 4 |     with open(filename) as f:
 5 |         return [line.strip() for line in f if line.strip() and not line.startswith('#')]
 6 | 
 7 | setup(
 8 |     name="learning-qp",
 9 |     version="0.1.0",
10 |     packages=find_packages("src"),
11 |     package_dir={"": "src"},
12 |     install_requires=read_requirements('requirements.txt'),
13 | )
14 | 


--------------------------------------------------------------------------------
/experiments/tank/test_skip_steady.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Store the command line argument in a global variable
 4 | TRAIN_OR_TEST="$1"
 5 | 
 6 | n_qp=8
 7 | m_qp=32
 8 | noise_level=0
 9 | python ../../run.py $TRAIN_OR_TEST tank \
10 | --num-parallel 100000 \
11 | --horizon 20 \
12 | --epochs 2000 \
13 | --mini-epochs 1 \
14 | --qp-unrolled \
15 | --shared-PH \
16 | --affine-qb \
17 | --noise-level ${noise_level} \
18 | --n-qp ${n_qp} \
19 | --m-qp ${m_qp} \
20 | --use-residual-loss \
21 | --no-obs-normalization \
22 | --skip-to-steady-state \
23 | --lr-schedule linear \
24 | --exp-name test_skip_steady


--------------------------------------------------------------------------------
/experiments/cartpole/test_mlp.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | run_task() {
 4 |     local gpu_id=$1
 5 |     local c1=$2
 6 |     local c2=$3
 7 |     local c3=$4
 8 |     local n_qp=$5
 9 |     local m_qp=$6
10 |     CUDA_VISIBLE_DEVICES=$gpu_id python ../../run.py test cartpole --num-parallel 10000 --horizon 20 --epochs 5000 --mini-epochs 1 --noise-level 0. --reward-shaping ${c1},${c2},${c3} --n-qp $n_qp --m-qp $m_qp --shared-PH --affine-qb --use-residual-loss --no-obs-normalization --force-feasible --batch-test --exp-name mlp_test_reward_shaping_${c1}_${c2}_${c3}_${n_qp}_${m_qp} --lr-schedule linear --initial-lr "5e-4" --max-steps-per-episode 100
11 | }
12 | 
13 | run_task 1 10 0.1 0 8 48 &
14 | 
15 | wait
16 | 


--------------------------------------------------------------------------------
/experiments/tank/test_qp.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | run_task() {
 4 |     local gpu_id=$1
 5 |     local c1=$2
 6 |     local c2=$3
 7 |     local c3=$4
 8 |     local n_qp=$5
 9 |     local m_qp=$6
10 |     CUDA_VISIBLE_DEVICES=$gpu_id python ../../run.py train tank --num-parallel 10000 --horizon 20 --epochs 5000 --mini-epochs 1 --noise-level 0. --reward-shaping ${c1},${c2},${c3} --n-qp $n_qp --m-qp $m_qp --shared-PH --affine-qb --strict-affine-layer --obs-has-half-ref --use-residual-loss --no-obs-normalization --force-feasible --batch-test --exp-name mlp_test_reward_shaping_${c1}_${c2}_${c3}_${n_qp}_${m_qp} --lr-schedule linear --initial-lr "5e-4" &
11 | }
12 | 
13 | run_task 0 50 0.05 2 4 24
14 | 
15 | wait
16 | 


--------------------------------------------------------------------------------
/auxiliary/profile_bmv.py:
--------------------------------------------------------------------------------
 1 | """Test performance of Ab, where A is a single matrix, and b is a batch of vectors."""
 2 | 
 3 | import torch
 4 | import time
 5 | 
 6 | def bmv1(A, b):
 7 |     return (A.unsqueeze(0) @ b.unsqueeze(-1)).squeeze(-1)
 8 | 
 9 | def bmv2(A, b):
10 |     return (A @ b.t()).t()
11 | 
12 | batch_size = 100000
13 | n = 100
14 | device = "cuda:0"
15 | 
16 | def benchmark(f):
17 |     for i in range(1000):
18 |         A = torch.randn((n, n), device=device, requires_grad=True)
19 |         b = torch.randn((batch_size, n), device=device, requires_grad=True)
20 |         loss = f(A, b).sum()
21 |         loss.backward()
22 | 
23 | t = time.time(); benchmark(bmv1); print(time.time() - t)
24 | t = time.time(); benchmark(bmv2); print(time.time() - t)
25 |     


--------------------------------------------------------------------------------
/src/utils/osqp_utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import qpsolvers
 3 | 
 4 | def osqp_solve_qp_guarantee_return(
 5 |     P, q, G=None, h=None, A=None, b=None, lb=None, ub=None, initvals=None, verbose=False, **kwargs,
 6 | ):
 7 |     problem = qpsolvers.problem.Problem(P, q, G, h, A, b, lb, ub)
 8 |     solution = qpsolvers.solvers.osqp_.osqp_solve_problem(problem, initvals, verbose, **kwargs)
 9 |     sol_returned = solution.x if solution.x.dtype == np.float64 else np.zeros(q.shape[0])
10 |     iter_count = solution.extras["info"].iter
11 |     return sol_returned, iter_count
12 | 
13 | def osqp_oracle(q, b, P, H, return_iter_count=False, max_iter=1000):
14 |     sol, iter_count = osqp_solve_qp_guarantee_return(
15 |         P=P, q=q, G=-H, h=b,
16 |         A=None, b=None, lb=None, ub=None,
17 |         max_iter=max_iter, eps_abs=1e-10, eps_rel=1e-10,eps_prim_inf=1e-10, eps_dual_inf=1e-10, verbose=False,
18 |     )
19 |     if not return_iter_count:
20 |         return sol
21 |     else:
22 |         return sol, iter_count
23 | 


--------------------------------------------------------------------------------
/experiments/tank/test_mpc.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | N=10
4 | noise=0
5 | 
6 | # python ../../run.py test tank --num-parallel 100 --horizon 20 --epochs 2000 --mini-epochs 1 --qp-unrolled --shared-PH --affine-qb --mpc-baseline-N ${N} --noise-level ${noise} --batch-test --use-osqp-for-mpc --mpc-terminal-cost-coef 10 --quiet --exp-name qp_unrolled_shared_affine --randomize --noise-level 0.1 --run-name mpc_10t_perturbed --max-steps-per-episode 10
7 | # python ../../run.py test tank --num-parallel 100 --horizon 20 --epochs 2000 --mini-epochs 1 --qp-unrolled --shared-PH --affine-qb --mpc-baseline-N ${N} --noise-level ${noise} --batch-test --mpc-terminal-cost-coef 10 --quiet --exp-name qp_unrolled_shared_affine --run-name mpc_scenario --robust-mpc-method scenario --randomize --noise-level 0.1 --max-steps-per-episode 10
8 | python ../../run.py test tank --num-parallel 100 --horizon 20 --epochs 2000 --mini-epochs 1 --qp-unrolled --shared-PH --affine-qb --mpc-baseline-N ${N} --noise-level ${noise} --batch-test --mpc-terminal-cost-coef 10 --quiet --exp-name qp_unrolled_shared_affine --run-name mpc_tube --robust-mpc-method tube --randomize --noise-level 0.1 --max-steps-per-episode 100
9 | 


--------------------------------------------------------------------------------
/experiments/cartpole/test_qp.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | run_task() {
 4 |     local gpu_id=$1
 5 |     local c1=$2
 6 |     local c2=$3
 7 |     local c3=$4
 8 |     local n_qp=$5
 9 |     local m_qp=$6
10 |     CUDA_VISIBLE_DEVICES=$gpu_id python ../../run.py train cartpole --num-parallel 10000 --horizon 20 --epochs 500 --mini-epochs 1 --noise-level 0. --reward-shaping ${c1},${c2},${c3} --qp-unrolled --n-qp $n_qp --m-qp $m_qp --shared-PH --symmetric --affine-qb --use-residual-loss --no-obs-normalization --force-feasible --batch-test --exp-name qp_test_reward_shaping_${c1}_${c2}_${c3}_${n_qp}_${m_qp} --lr-schedule adaptive --max-steps-per-episode 100
11 |     CUDA_VISIBLE_DEVICES=$gpu_id python ../../run.py test cartpole --num-parallel 10000 --horizon 20 --epochs 5000 --mini-epochs 1 --noise-level 0. --reward-shaping ${c1},${c2},${c3} --qp-unrolled --n-qp $n_qp --m-qp $m_qp --shared-PH --affine-qb --symmetric --use-residual-loss --no-obs-normalization --force-feasible --batch-test --exp-name qp_test_reward_shaping_${c1}_${c2}_${c3}_${n_qp}_${m_qp} --lr-schedule linear --initial-lr "5e-4" --max-steps-per-episode 100
12 | }
13 | 
14 | run_task 1 10 0.1 0 8 48 &
15 | 
16 | wait
17 | 


--------------------------------------------------------------------------------
/experiments/tank/test_imitation.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Store the command line argument in a global variable
 4 | TRAIN_OR_TEST="$1"
 5 | 
 6 | run_imitate() {
 7 |     export CUDA_VISIBLE_DEVICES=0
 8 |     n_qp=8
 9 |     m_qp=32
10 |     noise_level=0
11 |     python ../../run.py $TRAIN_OR_TEST tank \
12 |     --num-parallel 100000 \
13 |     --horizon 20 \
14 |     --epochs 200 \
15 |     --mini-epochs 1 \
16 |     --qp-unrolled \
17 |     --shared-PH \
18 |     --affine-qb \
19 |     --noise-level ${noise_level} \
20 |     --n-qp ${n_qp} \
21 |     --m-qp ${m_qp} \
22 |     --imitate-mpc-N 10 \
23 |     --exp-name imitate
24 | }
25 | 
26 | run_fine_tune() {
27 |     export CUDA_VISIBLE_DEVICES=0
28 |     n_qp=8
29 |     m_qp=32
30 |     noise_level=0
31 |     python ../../run.py $TRAIN_OR_TEST tank \
32 |     --num-parallel 100000 \
33 |     --horizon 20 \
34 |     --epochs 400 \
35 |     --mini-epochs 1 \
36 |     --qp-unrolled \
37 |     --shared-PH \
38 |     --affine-qb \
39 |     --noise-level ${noise_level} \
40 |     --n-qp ${n_qp} \
41 |     --m-qp ${m_qp} \
42 |     --initialize-from-experiment imitate \
43 |     --no-obs-normalization \
44 |     --exp-name fine_tune
45 | }
46 | 
47 | run_imitate
48 | run_fine_tune
49 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 Yiwen Lu, Zishuo Li, Yihan Zhou, Na Li, and Yilin Mo
 4 | 
 5 | Copyright (c) 2019 Denys88
 6 | 
 7 | Permission is hereby granted, free of charge, to any person obtaining a copy
 8 | of this software and associated documentation files (the "Software"), to deal
 9 | in the Software without restriction, including without limitation the rights
10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | copies of the Software, and to permit persons to whom the Software is
12 | furnished to do so, subject to the following conditions:
13 | 
14 | The above copyright notice and this permission notice shall be included in all
15 | copies or substantial portions of the Software.
16 | 
17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | SOFTWARE.
24 | 


--------------------------------------------------------------------------------
/experiments/tank/test_residual_loss.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Store the command line argument in a global variable
 4 | TRAIN_OR_TEST="$1"
 5 | 
 6 | group_one() {
 7 |     export CUDA_VISIBLE_DEVICES=0
 8 |     n_qp=2
 9 |     m_qp=64
10 |     noise_level=0
11 |     python ../../run.py $TRAIN_OR_TEST tank \
12 |     --num-parallel 100000 \
13 |     --horizon 20 \
14 |     --epochs 2000 \
15 |     --mini-epochs 1 \
16 |     --qp-unrolled \
17 |     --shared-PH \
18 |     --affine-qb \
19 |     --noise-level ${noise_level} \
20 |     --n-qp ${n_qp} \
21 |     --m-qp ${m_qp} \
22 |     --exp-name residual_loss_off
23 | }
24 | 
25 | group_two() {
26 |     export CUDA_VISIBLE_DEVICES=1
27 |     n_qp=2
28 |     m_qp=64
29 |     noise_level=0
30 |     python ../../run.py $TRAIN_OR_TEST tank \
31 |     --num-parallel 100000 \
32 |     --horizon 20 \
33 |     --epochs 2000 \
34 |     --mini-epochs 1 \
35 |     --qp-unrolled \
36 |     --shared-PH \
37 |     --affine-qb \
38 |     --noise-level ${noise_level} \
39 |     --n-qp ${n_qp} \
40 |     --m-qp ${m_qp} \
41 |     --use-residual-loss \
42 |     --exp-name residual_loss_on
43 | }
44 | 
45 | # Start both groups in parallel
46 | group_one & group_two &
47 | 
48 | # Wait for both background tasks to complete
49 | wait
50 | 
51 | 


--------------------------------------------------------------------------------
/auxiliary/test_solver_np.py:
--------------------------------------------------------------------------------
 1 | # %%
 2 | import numpy as np
 3 | import sys
 4 | import os
 5 | file_path = os.path.dirname(__file__)
 6 | sys.path.append(os.path.join(file_path, ".."))
 7 | from utils.osqp_utils import osqp_oracle
 8 | 
 9 | problem = np.load("example_problem.npy", allow_pickle=True).item()
10 | q = problem["q"]
11 | b = problem["b"]
12 | P = problem["P"]
13 | H = problem["H"]
14 | 
15 | def obj(x):
16 |     return 0.5 * np.einsum('i,ij,j->', x, P, x) + np.dot(q, x)
17 | 
18 | x_star = osqp_oracle(q, b, P, H)
19 | 
20 | # %%
21 | m, n = H.shape
22 | D = np.eye(m)
23 | Dt = np.linalg.inv(D + H @ np.linalg.solve(P, H.T))
24 | mu = Dt @ (H @ np.linalg.solve(P, q) - b)
25 | A = np.block([
26 |     [Dt @ D, Dt],
27 |     [-2 * Dt @ D + np.eye(m), np.eye(m) - 2 * Dt],
28 | ])
29 | B = np.hstack([
30 |     mu,
31 |     -2 * mu
32 | ])
33 | def iter(X):
34 |     X = A @ X + B
35 |     X[m:] = np.clip(X[m:], 0, np.inf)
36 |     return X
37 | 
38 | # %%
39 | def power_func(f, n):
40 |     def helper(x):
41 |         for _ in range(n):
42 |             x = f(x)
43 |         return x
44 |     return helper
45 | 
46 | def get_sol(z):
47 |     PinvHt = np.linalg.solve(P, H.T)
48 |     M = np.linalg.solve((H @ PinvHt).T, PinvHt.T).T
49 |     return -(np.eye(n) - M @ H) @ np.linalg.solve(P, q) + M @ (z - b)
50 | 
51 | X = power_func(iter, 10000)(np.zeros(2 * m))
52 | z = X[m:]
53 | x = get_sol(z)
54 | obj(x)
55 | 
56 | # %%
57 | 


--------------------------------------------------------------------------------
/src/modules/warm_starter.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | from torch.nn import functional as F
 4 | from torch.linalg import solve, inv, pinv
 5 | import numpy as np
 6 | from ..utils.torch_utils import vectorize_upper_triangular
 7 | 
 8 | class WarmStarter(nn.Module):
 9 |     def __init__(self, device, n, m, fixed_P=True, fixed_H=True):
10 |         super().__init__()
11 |         self.device = device
12 |         self.n = n
13 |         self.m = m
14 |         self.fixed_P = fixed_P
15 |         self.fixed_H = fixed_H
16 |         num_in = n + m
17 |         if not fixed_P:
18 |             num_in += n * (n + 1) // 2
19 |         if not fixed_H:
20 |             num_in += n * m
21 |         num_out = 2 * m
22 |         num_hidden = max(num_in, num_out)
23 |         self.net = nn.Sequential(
24 |             nn.Linear(num_in, num_hidden),
25 |             nn.ReLU(),
26 |             nn.Linear(num_hidden, num_hidden),
27 |             nn.ReLU(),
28 |             nn.Linear(num_hidden, num_out),
29 |         ).to(device=device)
30 | 
31 |     def forward(self, q, b, P=None, H=None):
32 |         """The P argument can be either P or inv(P) in the original PDHG formulation, as long as consistent."""
33 |         net_input = [q, b]
34 |         if not self.fixed_P:
35 |             net_input.append(vectorize_upper_triangular(P))
36 |         if not self.fixed_H:
37 |             net_input.append(H.flatten(start_dim=-2))
38 |         net_input_t = torch.cat(net_input, 1)
39 |         X = self.net(net_input_t)
40 |         return X
41 | 


--------------------------------------------------------------------------------
/auxiliary/test_solver.py:
--------------------------------------------------------------------------------
 1 | # %%
 2 | import os
 3 | import sys
 4 | file_path = os.path.dirname(__file__)
 5 | sys.path.append(os.path.join(file_path, ".."))
 6 | from modules.qp_solver import QPSolver
 7 | from modules.warm_starter import WarmStarter
 8 | from utils.torch_utils import bqf, bmv, bvv
 9 | from utils.mpc_utils import generate_random_problem
10 | import torch
11 | from torch.nn import functional as F
12 | import numpy as np
13 | 
14 | n = 10
15 | m = 5
16 | 
17 | device = "cuda:0"
18 | torch.manual_seed(42)
19 | q0, b0, P0, H0 = generate_random_problem(1, n, m, device)
20 | q0_np = q0.squeeze(0).cpu().numpy()
21 | b0_np = b0.squeeze(0).cpu().numpy()
22 | P0_np = P0.squeeze(0).cpu().numpy()
23 | H0_np = H0.squeeze(0).cpu().numpy()
24 | np.save("example_problem.npy", {
25 |     "q": q0_np,
26 |     "b": b0_np,
27 |     "P": P0_np,
28 |     "H": H0_np,
29 | })
30 | 
31 | solver = QPSolver(device, n, m, P=P0_np, H=H0_np)
32 | ws = WarmStarter(device, n, m, fixed_P=True, fixed_H=True)
33 | ws.load_state_dict(torch.load(f"models/warmstarter-{n}-{m}.pth"))
34 | solver_ws = QPSolver(device, n, m, P=P0_np, H=H0_np,  warm_starter=ws)
35 | 
36 | iters = 1000
37 | X, sol = solver(q0, b0, iters=iters)
38 | X_ws, sol_ws = solver_ws(q0, b0, iters=iters)
39 | 
40 | 
41 | # %%
42 | from matplotlib import pyplot as plt
43 | obj = [(0.5 * bqf(sol[:, i, :], P0) + bvv(sol[:, i, :], q0)).item() for i in range(sol.shape[1])]
44 | obj_ws = [(0.5 * bqf(sol_ws[:, i, :], P0) + bvv(sol[:, i, :], q0)).item() for i in range(sol_ws.shape[1])]
45 | plt.plot(obj)
46 | plt.plot(obj_ws)
47 | 
48 | # %%
49 | X_diff = [(X[:, i, :] - X[:, -1, :]).norm().item() for i in range(X.shape[1])]
50 | X_diff_ws = [(X_ws[:, i, :] - X[:, -1, :]).norm().item() for i in range(X.shape[1])]
51 | plt.plot(X_diff)
52 | plt.plot(X_diff_ws)
53 | 
54 | # %%
55 | 


--------------------------------------------------------------------------------
/runner_config.yaml:
--------------------------------------------------------------------------------
 1 | params:
 2 |   seed: 0
 3 | 
 4 |   algo:
 5 |     name: a2c_continuous
 6 | 
 7 |   model:
 8 |     name: continuous_a2c_logstd
 9 | 
10 |   network:
11 |     name: actor_critic
12 |     separate: False
13 | 
14 |     space:
15 |       continuous:
16 |         mu_activation: None
17 |         sigma_activation: None
18 |         mu_init:
19 |           name: default
20 |         sigma_init:
21 |           name: const_initializer
22 |           val: 0. # std = 1.
23 |         fixed_sigma: True
24 | 
25 |     mlp:
26 |       units: [256, 128, 64]
27 |       activation: elu
28 |       d2rl: False
29 | 
30 |       initializer:
31 |         name: default
32 |       regularizer:
33 |         name: None
34 | 
35 |     rnn:
36 |       units: 64
37 |       layers: 1
38 |       name: gru
39 |       before_mlp: True
40 | 
41 |   load_checkpoint: True # flag which sets whether to load the checkpoint
42 |   load_path: "checkpoints" # path to the checkpoint to load
43 | 
44 |   config:
45 |     name: default
46 |     full_experiment_name: default
47 |     env_name: rlgpu
48 |     ppo: True
49 |     mixed_precision: False
50 |     normalize_input: True
51 |     normalize_value: True
52 |     value_bootstrap: True
53 |     num_actors: 100000
54 |     reward_shaper:
55 |       scale_value: 1.0
56 |     normalize_advantage: True
57 |     gamma: 0.99
58 |     tau: 0.95
59 |     e_clip: 0.2
60 |     entropy_coef: 0.0
61 |     learning_rate: 3.e-4 # overwritten by adaptive lr_schedule
62 |     lr_schedule: adaptive
63 |     kl_threshold: 0.008 # target kl for adaptive lr
64 |     truncate_grads: True
65 |     grad_norm: 1.
66 |     horizon_length: 200
67 |     minibatch_size: 100000
68 |     mini_epochs: 5
69 |     critic_coef: 2
70 |     clip_value: True
71 |     seq_len: 10 # only for rnn
72 |     bounds_loss_coef: 0.001
73 | 
74 |     max_epochs: 1000
75 |     save_best_after: 20
76 |     score_to_win: 20000
77 |     save_frequency: 10
78 |     print_stats: True
79 | 
80 |     player:
81 |       render: False
82 |       render_sleep: 0.
83 |       dump_stats: True
84 |       games_num: 1
85 | 


--------------------------------------------------------------------------------
/experiments/tank/test_reward_shaping.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Define the number of GPUs available
 4 | NUM_GPUS=6  # Change this to the number of GPUs you have
 5 | 
 6 | # Function to find the first idle GPU
 7 | find_idle_gpu() {
 8 |     for (( i=0; i<$NUM_GPUS; i++ )); do
 9 |         # Check if there are no processes on GPU i
10 |         if [ -z "$(nvidia-smi -i $i --query-compute-apps=pid --format=csv,noheader)" ]; then
11 |             echo $i
12 |             return
13 |         fi
14 |     done
15 |     echo "-1"  # Return -1 if no idle GPU is found
16 | }
17 | 
18 | # Function to run the Python script on a specific GPU
19 | run_task() {
20 |     local gpu_id=$1
21 |     local c1=$2
22 |     local c2=$3
23 |     local c3=$4
24 |     local n_qp=$5
25 |     local m_qp=$6
26 |     CUDA_VISIBLE_DEVICES=$gpu_id python ../../run.py train tank --num-parallel 100000 --horizon 20 --epochs 5000 --mini-epochs 1 --noise-level 0. --reward-shaping ${c1},${c2},${c3} --qp-unrolled --n-qp $n_qp --m-qp $m_qp --shared-PH --affine-qb --use-residual-loss --no-obs-normalization --force-feasible --exp-name qp_test_reward_shaping_${c1}_${c2}_${c3}_${n_qp}_${m_qp} --lr-schedule linear --initial-lr "5e-4" &
27 | }
28 | 
29 | # # Main loop for grid search
30 | # for c1 in 50; do
31 | #     for c2 in 0.05; do
32 | #         for c3 in 2; do
33 | #             gpu_id=-1
34 | #             # Wait for an idle GPU to become available
35 | #             while [ $gpu_id -eq -1 ]; do
36 | #                 gpu_id=$(find_idle_gpu)
37 | #                 sleep 1  # Wait a bit before checking again
38 | #             done
39 | 
40 | #             run_task $gpu_id $c1 $c2 $c3 4 24
41 | #             # Optional: wait briefly to allow the task to start
42 | #             sleep 10
43 | 
44 | #             gpu_id=-1
45 | #             # Wait for an idle GPU to become available
46 | #             while [ $gpu_id -eq -1 ]; do
47 | #                 gpu_id=$(find_idle_gpu)
48 | #                 sleep 1  # Wait a bit before checking again
49 | #             done
50 | #             run_task $gpu_id $c1 $c2 $c3 8 48
51 | #             # Optional: wait briefly to allow the task to start
52 | #             sleep 10
53 | #         done
54 | #     done
55 | # done
56 | 
57 | run_task 1 50 0.05 2 4 24
58 | 
59 | # Wait for all background jobs to finish
60 | wait
61 | 


--------------------------------------------------------------------------------
/experiments/tank/test_force_feasible.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Store the command line argument in a global variable
 4 | TRAIN_OR_TEST="$1"
 5 | 
 6 | group_one() {
 7 |     export CUDA_VISIBLE_DEVICES=0
 8 |     # n_qp=8
 9 |     # m_qp=32
10 |     # noise_level=0
11 |     # python ../../run.py $TRAIN_OR_TEST tank \
12 |     # --num-parallel 100000 \
13 |     # --horizon 20 \
14 |     # --epochs 2000 \
15 |     # --mini-epochs 1 \
16 |     # --qp-unrolled \
17 |     # --shared-PH \
18 |     # --affine-qb \
19 |     # --noise-level ${noise_level} \
20 |     # --n-qp ${n_qp} \
21 |     # --m-qp ${m_qp} \
22 |     # --use-residual-loss \
23 |     # --no-obs-normalization \
24 |     # --exp-name force_feasible_off
25 |     n_qp=2
26 |     m_qp=64
27 |     noise_level=0
28 |     python ../../run.py $TRAIN_OR_TEST tank \
29 |     --num-parallel 100000 \
30 |     --horizon 20 \
31 |     --epochs 2000 \
32 |     --mini-epochs 1 \
33 |     --qp-unrolled \
34 |     --shared-PH \
35 |     --affine-qb \
36 |     --noise-level ${noise_level} \
37 |     --n-qp ${n_qp} \
38 |     --m-qp ${m_qp} \
39 |     --use-residual-loss \
40 |     --no-obs-normalization \
41 |     --exp-name force_feasible_off
42 | }
43 | 
44 | group_two() {
45 |     export CUDA_VISIBLE_DEVICES=1
46 |     # n_qp=8
47 |     # m_qp=32
48 |     # noise_level=0
49 |     # python ../../run.py $TRAIN_OR_TEST tank \
50 |     # --num-parallel 100000 \
51 |     # --horizon 20 \
52 |     # --epochs 2000 \
53 |     # --mini-epochs 1 \
54 |     # --qp-unrolled \
55 |     # --shared-PH \
56 |     # --affine-qb \
57 |     # --noise-level ${noise_level} \
58 |     # --n-qp ${n_qp} \
59 |     # --m-qp ${m_qp} \
60 |     # --use-residual-loss \
61 |     # --force-feasible \
62 |     # --no-obs-normalization \
63 |     # --exp-name force_feasible_on
64 |     n_qp=2
65 |     m_qp=64
66 |     noise_level=0
67 |     python ../../run.py $TRAIN_OR_TEST tank \
68 |     --num-parallel 100000 \
69 |     --horizon 20 \
70 |     --epochs 2000 \
71 |     --mini-epochs 1 \
72 |     --qp-unrolled \
73 |     --shared-PH \
74 |     --affine-qb \
75 |     --noise-level ${noise_level} \
76 |     --n-qp ${n_qp} \
77 |     --m-qp ${m_qp} \
78 |     --use-residual-loss \
79 |     --force-feasible \
80 |     --no-obs-normalization \
81 |     --exp-name force_feasible_on
82 | }
83 | 
84 | # Start both groups in parallel
85 | group_one & group_two &
86 | 
87 | # Wait for both background tasks to complete
88 | wait
89 | 


--------------------------------------------------------------------------------
/experiments/double_integrator/run.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Run the double integrator experiment
 4 | TRAIN_OR_TEST="$1"
 5 | 
 6 | n_qp=3
 7 | m_qp=9
 8 | noise_level=0
 9 | 
10 | g1() {
11 |     export CUDA_VISIBLE_DEVICES=0
12 |     python ../../run.py $TRAIN_OR_TEST double_integrator \
13 |             --num-parallel 100000 \
14 |             --horizon 20 \
15 |             --epochs 500 \
16 |             --max-steps-per-episode 100 \
17 |             --mini-epochs 1 \
18 |             --qp-unrolled \
19 |             --shared-PH \
20 |             --affine-qb \
21 |             --noise-level ${noise_level} \
22 |             --n-qp ${n_qp} \
23 |             --m-qp ${m_qp} \
24 |             --no-obs-normalization \
25 |             --use-residual-loss \
26 |             --force-feasible \
27 |             --exp-name default
28 | }
29 | 
30 | g2() {
31 |     export CUDA_VISIBLE_DEVICES=1
32 |     # python ../../run.py $TRAIN_OR_TEST double_integrator \
33 |     #         --num-parallel 100000 \
34 |     #         --horizon 20 \
35 |     #         --epochs 500 \
36 |     #         --max-steps-per-episode 100 \
37 |     #         --mini-epochs 1 \
38 |     #         --qp-unrolled \
39 |     #         --shared-PH \
40 |     #         --affine-qb \
41 |     #         --noise-level ${noise_level} \
42 |     #         --n-qp ${n_qp} \
43 |     #         --m-qp ${m_qp} \
44 |     #         --no-obs-normalization \
45 |     #         --use-residual-loss \
46 |     #         --force-feasible \
47 |     #         --symmetric \
48 |     #         --exp-name symmetric
49 |     python ../../run.py $TRAIN_OR_TEST double_integrator \
50 |             --num-parallel 100000 \
51 |             --horizon 20 \
52 |             --epochs 500 \
53 |             --max-steps-per-episode 100 \
54 |             --mini-epochs 1 \
55 |             --qp-unrolled \
56 |             --shared-PH \
57 |             --affine-qb \
58 |             --noise-level ${noise_level} \
59 |             --n-qp ${n_qp} \
60 |             --m-qp ${m_qp} \
61 |             --no-obs-normalization \
62 |             --use-residual-loss \
63 |             --force-feasible \
64 |             --symmetric \
65 |             --no-b \
66 |             --exp-name symmetric_no_b
67 | }
68 | 
69 | g3() {
70 |     export CUDA_VISIBLE_DEVICES=1
71 |     python ../../run.py $TRAIN_OR_TEST double_integrator \
72 |          --num-parallel 100000 \
73 |          --horizon 20 \
74 |          --epochs 500 \
75 |          --max-steps-per-episode 100 \
76 |          --mini-epochs 1 \
77 |          --noise-level ${noise_level} \
78 |          --exp-name mlp
79 | }
80 | 
81 | # g1 & g2 & g3 &
82 | g2 &
83 | 
84 | wait
85 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # MPC-Inspired Reinforcement Learning for Verifiable Model-Free Control
 2 | 
 3 | Code for the paper: [MPC-Inspired Reinforcement Learning for Verifiable Model-Free Control](https://arxiv.org/pdf/2312.05332)
 4 | 
 5 | ## Installation
 6 | 
 7 | The code is tested on Linux with Python 3.10, PyTorch 2.1.0, and CUDA 12.0. It is recommended to use a conda environment for installation:
 8 | 
 9 | ```bash
10 | # Create and activate conda environment
11 | conda create -n learning-qp python=3.10
12 | conda activate learning-qp
13 | 
14 | # Clone repository
15 | git clone --recursive https://github.com/yiwenlu66/learning-qp.git
16 | cd learning-qp
17 | 
18 | # Install dependencies
19 | pip install -e .
20 | ```
21 | 
22 | Note: the `--recursive` option is necessary to make the code work correctly.
23 | 
24 | ## Usage
25 | 
26 | ```
27 | python run.py train_or_test env_name [--options]
28 | ```
29 | 
30 | The following scripts are also provided to reproduce the results in the paper:
31 | 
32 | - `experiments/tank/reproduce.sh` for reproducing the first part of Table 1
33 | - `experiments/cartpole/reproduce.sh` for reproducing the second part of Table 1
34 | - `experiments/tank/reproduce_disturbed.sh` for reproducing Table 2
35 | 
36 | **These scripts are run on GPU by default.** After running each reproducing script, the following data will be saved:
37 | 
38 | - Training logs in tensorboard format will be saved in `runs`
39 | - Test results, including the trial output for each experiment and a summary table, all in CSV format, will be saved in `test_results`
40 | 
41 | ## Code structure
42 | 
43 | - `rl_games`: A customized version of the [rl_games](https://github.com/Denys88/rl_games) library for RL training
44 | - `src/envs`: GPU parallelized simulation environments, with interface similar to [Isaac Gym](https://github.com/NVIDIA-Omniverse/IsaacGymEnvs)
45 | - `src/modules`: PyTorch modules, including the proposed QP-based policy and the underlying differentiable QP solver
46 | - `src/networks`: Wrapper around the QP-based policy for interfacing with `rl_games`
47 | - `src/utils`: Utility functions (customized PyTorch operations, MPC baselines, etc.)
48 | - `experiments`: Sample scripts for running experiments
49 | 
50 | ## License
51 | 
52 | The project is released under the MIT license. See [LICENSE](LICENSE) for details.
53 | 
54 | Part of the project is modified from [rl_games](https://github.com/Denys88/rl_games).
55 | 
56 | ## Citation
57 | 
58 | If you find this project useful in your research, please consider citing:
59 | 
60 | ```
61 | @InProceedings{lu2024mpc,
62 |   title={MPC-Inspired Reinforcement Learning for Verifiable Model-Free Control},
63 |   author={Lu, Yiwen and Li, Zishuo and Zhou, Yihan and Li, Na and Mo, Yilin},
64 |   booktitle={Proceedings of the 6th Conference on Learning for Dynamics and Control},
65 |   year={2024}
66 | }
67 | ```
68 | 


--------------------------------------------------------------------------------
/auxiliary/profile.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | file_path = os.path.dirname(__file__)
 4 | sys.path.append(os.path.join(file_path, ".."))
 5 | import torch
 6 | import torch.nn as nn
 7 | import csv
 8 | import time
 9 | from modules.qp_unrolled_network import QPUnrolledNetwork
10 | 
11 | batch_size = 10000
12 | input_size = 100
13 | device = "cuda:0"
14 | 
15 | def mlp_builder(input_size, output_size):
16 |     return nn.Sequential(
17 |         nn.Linear(input_size, 256),
18 |         nn.ReLU(),
19 |         nn.Linear(256, 128),
20 |         nn.ReLU(),
21 |         nn.Linear(128, 64),
22 |         nn.ReLU(),
23 |         nn.Linear(64, output_size),
24 |     ).to(device)
25 | 
26 | model_shared = QPUnrolledNetwork(
27 |     device=device,
28 |     input_size=input_size,
29 |     n_qp=10,
30 |     m_qp=5,
31 |     qp_iter=10,
32 |     mlp_builder=mlp_builder,
33 |     shared_PH=True,
34 | )
35 | 
36 | model_not_shared = QPUnrolledNetwork(
37 |     device=device,
38 |     input_size=input_size,
39 |     n_qp=10,
40 |     m_qp=5,
41 |     qp_iter=10,
42 |     mlp_builder=mlp_builder,
43 |     shared_PH=False,
44 | )
45 | 
46 | def write_csv(prof, filename):
47 | 
48 |     # Extract key averages
49 |     averages = prof.key_averages()
50 | 
51 |     # Export to CSV
52 |     with open(filename, 'w', newline='') as csvfile:
53 |         fieldnames = [
54 |             'Name', 'Self CPU total', 'CPU total', 'CPU time avg', 
55 |             'Self CUDA total', 'CUDA total', 'CUDA time avg', 'Number of Calls'
56 |         ]
57 |         writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
58 | 
59 |         writer.writeheader()
60 |         for avg in averages:
61 |             writer.writerow({
62 |                 'Name': avg.key,
63 |                 'Self CPU total': avg.self_cpu_time_total,
64 |                 'CPU total': avg.cpu_time_total,
65 |                 'Self CUDA total': avg.self_cuda_time_total,
66 |                 'CUDA total': avg.cuda_time_total,
67 |                 'Number of Calls': avg.count
68 |             })
69 | 
70 | 
71 | def profile(model, tag):
72 | 
73 |     outputs = []
74 | 
75 |     t = time.time()
76 |     with torch.autograd.profiler.profile(use_cuda=True) as forward_prof:
77 |         for i in range(10):
78 |             x = torch.randn((batch_size, input_size), device=device)
79 |             outputs.append(model(x))
80 |     print(f"Forward Pass Profiling {tag}:", time.time() - t)
81 |     write_csv(forward_prof, f"forward_prof_{tag}.csv")
82 | 
83 |     t = time.time()
84 |     with torch.autograd.profiler.profile(use_cuda=True) as backward_prof:
85 |         loss = sum(outputs).mean()
86 |         loss.backward()
87 |     print(f"Backward Pass Profiling {tag}:", time.time() - t)
88 |     write_csv(backward_prof, f"backward_prof_{tag}.csv")
89 | 
90 | profile(model_shared, "shared")
91 | profile(model_not_shared, "not_shared")
92 | 


--------------------------------------------------------------------------------
/src/envs/mpc_baseline_parameters.py:
--------------------------------------------------------------------------------
 1 | from .env_creators import sys_param
 2 | import numpy as np
 3 | import torch
 4 | 
 5 | def get_mpc_baseline_parameters(env_name, N, noise_std=0.):
 6 |     mpc_parameters = {
 7 |         "n_mpc": sys_param[env_name]["n"],
 8 |         "m_mpc": sys_param[env_name]["m"],
 9 |         "N": N,
10 |         **sys_param[env_name],
11 |     }
12 |     if env_name == "tank":
13 |         # Compute state and ref from obs: the first n entries of obs is state, and the latter n entries are ref
14 |         mpc_parameters["obs_to_state_and_ref"] = lambda obs: (obs[:, :mpc_parameters["n_mpc"]], obs[:, mpc_parameters["n_mpc"]:])
15 |         A_nom = sys_param[env_name]["A"]
16 |         A_max = np.copy(A_nom)
17 |         A_max[tuple(zip(*[(0, 0), (0, 2), (1, 1), (1, 3), (2, 2), (3, 3)]))] += 0.002
18 |         B_nom = sys_param[env_name]["B"]
19 |         B_max = np.copy(B_nom)
20 |         B_max *= 1.02
21 |         mpc_parameters["A_scenarios"] = [A_nom, A_max]
22 |         mpc_parameters["B_scenarios"] = [B_nom, B_max]
23 |         n_mpc = mpc_parameters["n_mpc"]
24 |         mpc_parameters["w_scenarios"] = [
25 |             np.zeros((n_mpc, 1)),
26 |             3 * noise_std * np.ones((n_mpc, 1)),
27 |             -3 * noise_std * np.ones((n_mpc, 1)),
28 |         ]
29 |         # mpc_parameters["max_disturbance_per_dim"] = 0.5 * (3 * noise_std + 20 * 0.002 * 2 + 8 * 0.02 * 2)
30 |     if env_name == "cartpole":
31 |         # Compute A, B matrices for linearized system
32 |         m_pole = mpc_parameters["m_pole_nom"]
33 |         m_cart = mpc_parameters["m_cart_nom"]
34 |         l = mpc_parameters["l_nom"]
35 |         g = 9.8
36 | 
37 |         # Continuous time A, B matrices
38 |         A_ct = np.array([
39 |             [0, 1, 0, 0],
40 |             [0, 0, -g * m_pole / m_cart, 0],
41 |             [0, 0, 0, 1],
42 |             [0, 0, (m_cart + m_pole) * g / (l * m_cart) , 0],
43 |         ])
44 |         B_ct = np.array([
45 |             [0],
46 |             [1 / m_cart],
47 |             [0],
48 |             [-1 / (l * m_cart)],
49 |         ])
50 | 
51 |         # Discretization
52 |         dt = sys_param[env_name]["dt"]
53 |         A = np.eye(4) + dt * A_ct
54 |         B = dt * B_ct
55 | 
56 |         mpc_parameters["A"] = A
57 |         mpc_parameters["B"] = B
58 | 
59 |         # Compute state and ref from obs: obs is in format (x, x_ref, x_dot, sin_theta, cos_theta, theta_dot)
60 |         def obs_to_state_and_ref(obs):
61 |             x, x_dot, theta, theta_dot, x_ref = obs[:, 0], obs[:, 1], obs[:, 2], obs[:, 3], obs[:, 4]
62 |             state = torch.stack([x, x_dot, theta, theta_dot], dim=1)
63 |             zeros = torch.zeros_like(x_ref)
64 |             ref = torch.stack([x_ref, zeros, zeros, zeros], dim=1)
65 |             return state, ref
66 |         mpc_parameters["obs_to_state_and_ref"] = obs_to_state_and_ref
67 | 
68 |     return mpc_parameters
69 | 


--------------------------------------------------------------------------------
/src/utils/np_batch_op.py:
--------------------------------------------------------------------------------
 1 | import scipy
 2 | import numpy as np
 3 | import os
 4 | from concurrent.futures import ProcessPoolExecutor
 5 | 
 6 | 
 7 | def _getindex(arr, i):
 8 |     """
 9 |     Retrieves the ith element of an array, or the entire array if it's a scipy sparse matrix.
10 | 
11 |     Parameters:
12 |     arr (np.ndarray or scipy.sparse.csc_matrix): The array or sparse matrix.
13 |     i (int): Index of the element to retrieve.
14 | 
15 |     Returns:
16 |     np.ndarray or scipy.sparse.csc_matrix: The ith element of the array or the entire array if it's a sparse matrix.
17 |     """
18 |     if type(arr) == scipy.sparse.csc_matrix:
19 |         return arr
20 |     else:
21 |         return arr[i] if arr.shape[0] > 1 else arr[0]
22 | 
23 | def _worker(i):
24 |     """
25 |     Worker function to apply the function 'f' on slices of arrays for parallel processing.
26 | 
27 |     Parameters:
28 |     i (int): The index representing which slice of the arrays to process.
29 | 
30 |     Returns:
31 |     tuple: A tuple of results returned by the function 'f'.
32 |     """
33 |     f = _worker.f
34 |     arrays = _worker.arrays
35 |     results = f(*[_getindex(arr, i) for arr in arrays])
36 |     return results if isinstance(results, tuple) else (results,)
37 | 
38 | def np_batch_op(f, *arrays, max_workers=int(os.environ.get("MAX_CPU_WORKERS", 8))):
39 |     """
40 |     Applies a function in a batch operation on multiple arrays, possibly in parallel, handling multiple return values.
41 |     If the function 'f' returns a single value, the function returns a single concatenated value instead of a tuple.
42 | 
43 |     Parameters:
44 |     f (callable): The function to apply. Can return multiple values.
45 |     arrays (list of np.ndarray or scipy.sparse.csc_matrix): Arrays on which the function is to be applied.
46 | 
47 |     Returns:
48 |     np.ndarray or tuple: A concatenated array if 'f' returns a single value, otherwise a tuple of concatenated arrays.
49 |     """
50 |     get_bs = lambda arr: 1 if type(arr) == scipy.sparse.csc_matrix else arr.shape[0]
51 |     bs = max([get_bs(arr) for arr in arrays])
52 |     _worker.f = f
53 |     _worker.arrays = arrays
54 | 
55 |     with ProcessPoolExecutor(max_workers=max_workers) as executor:
56 |         all_results = list(executor.map(_worker, range(bs)))
57 | 
58 |     processed_results = []
59 |     for i in range(len(all_results[0])):
60 |         results = [result[i] for result in all_results]
61 |         if isinstance(results[0], np.ndarray):
62 |             processed_result = np.concatenate([np.expand_dims(arr, 0) for arr in results], 0)
63 |         else:
64 |             processed_result = np.array(results)
65 |         processed_results.append(processed_result)
66 | 
67 |     # Return a single value if there's only one result, otherwise return a tuple
68 |     return processed_results[0] if len(processed_results) == 1 else tuple(processed_results)
69 | 


--------------------------------------------------------------------------------
/experiments/tank/benchmark.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | group_one() {
 4 |     export CUDA_VISIBLE_DEVICES=0
 5 |     # for N in 1 2 4 8 16; do
 6 |     for N in 4; do
 7 |         # for noise in 0 0.1; do
 8 |         for noise in 0.1; do
 9 |             python ../../run.py test tank --num-parallel 10000 --horizon 20 --epochs 2000 --mini-epochs 1 --qp-unrolled --shared-PH --affine-qb --mpc-baseline-N ${N} --noise-level ${noise} --batch-test --quiet --randomize --exp-name qp_unrolled_shared_affine --run-name N${N}_noise${noise}_rand
10 |         done
11 |     done
12 |     # # for noise in 0 0.1; do
13 |     # for noise in 0.1; do
14 |     #     # for n in 2 4 8 16; do
15 |     #     for n in 8; do
16 |     #         # for m in 2 4 8 16 32 64; do
17 |     #         for m in 32; do
18 |     #             # python ../../run.py test tank --num-parallel 10000 --horizon 20 --epochs 2000 --mini-epochs 1 --qp-unrolled --shared-PH --affine-qb --noise-level ${noise} --batch-test --n-qp ${n} --m-qp ${m} --quiet --exp-name shared_affine_noise${noise}_n${n}_m${m}+rand --randomize --run-name N0_n${n}_m${m}_noise${noise}_rand
19 |     #             python ../../run.py test tank --num-parallel 10000 --horizon 20 --epochs 2000 --mini-epochs 1 --qp-unrolled --shared-PH --affine-qb --noise-level ${noise} --batch-test --n-qp ${n} --m-qp ${m} --quiet --randomize --exp-name shared_affine_noise${noise}_n${n}_m${m}+rand --run-name N0_n${n}_m${m}_noise${noise}_rand
20 |     #         done
21 |     #     done
22 |     # done
23 | }
24 | 
25 | group_two() {
26 |     export CUDA_VISIBLE_DEVICES=1
27 |     # for N in 1 2 4 8 16; do
28 |     #     for noise in 0.2 0.5; do
29 |     #         python ../../run.py test tank --num-parallel 10000 --horizon 20 --epochs 2000 --mini-epochs 1 --qp-unrolled --shared-PH --affine-qb --mpc-baseline-N ${N} --noise-level ${noise} --batch-test --quiet --exp-name qp_unrolled_shared_affine --randomize --run-name N${N}_noise${noise}_rand --use-osqp-for-mpc
30 |     #     done
31 |     # done
32 |     # for noise in 0.2 0.5; do
33 |     #     for n in 2 4 8 16; do
34 |     #         for m in 2 4 8 16 32 64; do
35 |     #             python ../../run.py test tank --num-parallel 10000 --horizon 20 --epochs 2000 --mini-epochs 1 --qp-unrolled --shared-PH --affine-qb --noise-level ${noise} --batch-test --n-qp ${n} --m-qp ${m} --quiet --exp-name shared_affine_noise${noise}_n${n}_m${m}+rand --randomize --run-name N0_n${n}_m${m}_noise${noise}_rand
36 |     #         done
37 |     #     done
38 |     # done
39 |     # for noise in 0 0.1 0.2 0.5; do
40 |     for noise in 0.1; do
41 |         python ../../run.py test tank --num-parallel 10000 --horizon 20 --epochs 2000 --mini-epochs 1 --mpc-baseline-N 0 --noise-level ${noise} --batch-test --exp-name mlp_noise${noise}+rand --randomize --run-name mlp_noise${noise}_rand
42 |     done
43 | }
44 | 
45 | # Start both groups in parallel
46 | # group_one & group_two &
47 | group_two
48 | 
49 | # Wait for both background tasks to complete
50 | # wait
51 | 


--------------------------------------------------------------------------------
/auxiliary/test_solver_parallel.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import sys
 3 | import os
 4 | file_path = os.path.dirname(__file__)
 5 | sys.path.append(os.path.join(file_path, ".."))
 6 | from src.envs.mpc_baseline_parameters import get_mpc_baseline_parameters
 7 | from src.modules.qp_solver import QPSolver
 8 | from src.utils.mpc_utils import mpc2qp
 9 | from src.utils.osqp_utils import osqp_oracle
10 | from src.utils.np_batch_op import np_batch_op
11 | import torch
12 | import time
13 | import scipy
14 | import functools
15 | import csv
16 | 
17 | 
18 | def compare(N, num_parallel, device="cuda:0", iterations=100, seed=42, max_cpu_workers=8):
19 |     """
20 |     Compare parallelized solver vs. OSQP on MPC problem with horizon N.
21 |     """
22 | 
23 |     # Load model and config
24 |     mpc_baseline_parameters = get_mpc_baseline_parameters("tank", N)
25 |     n_mpc = mpc_baseline_parameters["n_mpc"]
26 |     m_mpc = mpc_baseline_parameters["m_mpc"]
27 |     Qf = np.eye(n_mpc)    # Set terminal cost of MPC as needed
28 |     mpc_baseline_parameters["Qf"] = Qf
29 |     x_min = mpc_baseline_parameters["x_min"]
30 |     x_max = mpc_baseline_parameters["x_max"]
31 |     u_min = mpc_baseline_parameters["u_min"]
32 |     u_max = mpc_baseline_parameters["u_max"]
33 |     A = mpc_baseline_parameters["A"]
34 |     B = mpc_baseline_parameters["B"]
35 |     Q = mpc_baseline_parameters["Q"]
36 |     R = mpc_baseline_parameters["R"]
37 | 
38 |     # Generate current state
39 |     t = lambda a: torch.tensor(a, device=device, dtype=torch.float)
40 |     x = t(x_min).unsqueeze(0) + t(x_max - x_min).unsqueeze(0) * torch.rand((num_parallel, 2 * n_mpc), device=device)
41 | 
42 |     # Translate to QP problem
43 |     eps = 1e-3
44 |     n, m, P, q, H, b = mpc2qp(
45 |         n_mpc,
46 |         m_mpc,
47 |         N,
48 |         t(A),
49 |         t(B),
50 |         t(Q),
51 |         t(R),
52 |         x_min + eps,
53 |         x_max - eps,
54 |         u_min,
55 |         u_max,
56 |         *mpc_baseline_parameters["obs_to_state_and_ref"](x),
57 |         normalize=mpc_baseline_parameters.get("normalize", False),
58 |         Qf=t(Qf),
59 |     )
60 | 
61 |     # Time solving with GPU parallelized solver
62 |     solver = QPSolver(device, n, m, P=P, H=H)
63 |     t = time.time()
64 |     Xs, primal_sols = solver(q, b, iters=iterations)
65 |     t_parallel = time.time() - t
66 | 
67 |     # Time solving with OSQP
68 |     f = lambda t: t.detach().cpu().numpy()
69 |     f_sparse = lambda t: scipy.sparse.csc_matrix(t.cpu().numpy())
70 |     osqp_oracle_with_iter_count = functools.partial(osqp_oracle, return_iter_count=True, max_iter=iterations)
71 |     q_np, b_np, P_np, H_np = f(q), f(b), f_sparse(P), f_sparse(H)
72 |     t = time.time()
73 |     sol_np, iter_counts = np_batch_op(osqp_oracle_with_iter_count, q_np, b_np, P_np, H_np, max_workers=max_cpu_workers)
74 |     t_osqp = time.time() - t
75 | 
76 |     return n, m, t_parallel, t_osqp
77 | 
78 | Ns_mpc = [2 ** i for i in range(1, 5)]
79 | nums_parallel = [2 ** i for i in range(1, 16)]
80 | func_input = [(N, num_parallel) for N in Ns_mpc for num_parallel in nums_parallel]
81 | func_output = [compare(*args) for args in func_input]
82 | # Write to CSV
83 | with open("parallel_vs_osqp.csv", "w") as f:
84 |     writer = csv.writer(f)
85 |     writer.writerow(["N_mpc", "num_parallel", "n_qp", "m_qp", "t_parallel", "t_osqp"])
86 |     for args, output in zip(func_input, func_output):
87 |         writer.writerow([*args, *output])
88 | 


--------------------------------------------------------------------------------
/experiments/cartpole/benchmark_stat.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import torch
 3 | from glob import glob
 4 | 
 5 | df = pd.DataFrame(columns=[
 6 |     "Noise level",
 7 |     "Parametric uncertainty",
 8 |     "Method",
 9 |     "Horizon",
10 |     "Num of variables",
11 |     "Num of constraints",
12 |     "Num of learnable policy parameters",
13 |     "Average cost",
14 |     "Average cost (with penalty)",
15 |     "Frequency of constraint violation (x1000)",
16 | ])
17 | 
18 | def read_csv(wildcard):
19 |     filename = sorted(glob(f"test_results/{wildcard}"))[-1]
20 |     return pd.read_csv(filename, dtype={"constraint_violated": "bool"})
21 | 
22 | def get_stat(df):
23 |     max_episode_length = df['episode_length'].max()
24 |     penalty = 100000
25 |     avg_cost = df['cumulative_cost'].sum() / df['episode_length'].sum()
26 |     avg_cost_penalized = (df['cumulative_cost'].sum() + penalty * df["constraint_violated"].sum()) / df['episode_length'].sum()
27 |     freq_violation = df["constraint_violated"].sum() / df['episode_length'].sum()
28 |     return avg_cost, avg_cost_penalized, freq_violation * 1000
29 | 
30 | def count_parameters(exp_name):
31 |     checkpoint_path = f"runs/cartpole_{exp_name}/nn/cartpole.pth"
32 |     checkpoint = torch.load(checkpoint_path)
33 |     total_params = 0
34 |     for key, value in checkpoint['model'].items():
35 |         if key.startswith("a2c_network.policy_net") or key.startswith("a2c_network.actor_mlp"):
36 |             total_params += value.numel()
37 |     return total_params
38 | 
39 | for noise_level in [0, 0.1, 0.2, 0.5]:
40 |     for rand in [False, True]:
41 |         try:
42 |             wildcard = f"mlp_noise{noise_level}{'_rand' if rand else ''}_2*"
43 |             mlp_df = read_csv(wildcard)
44 |             df.loc[len(df)] = [
45 |                 noise_level,
46 |                 rand,
47 |                 "MLP",
48 |                 "-",
49 |                 "-",
50 |                 "-",
51 |                 count_parameters(f"mlp_noise{noise_level}"),
52 |                 *get_stat(mlp_df),
53 |             ]
54 |         except:
55 |             print(f"Error reading file: {wildcard}")
56 | 
57 |         for n in [2, 4, 8, 16]:
58 |             for m in [2, 4, 8, 16, 32, 64]:
59 |                 try:
60 |                     wildcard = f"N0_n{n}_m{m}_noise{noise_level}{'_rand' if rand else ''}_2*"
61 |                     qp_df = read_csv(wildcard)
62 |                     df.loc[len(df)] = [
63 |                         noise_level,
64 |                         rand,
65 |                         "QP",
66 |                         "-",
67 |                         n,
68 |                         m,
69 |                         count_parameters(f"shared_affine_noise{noise_level}_n{n}_m{m}"),
70 |                         *get_stat(qp_df),
71 |                     ]
72 |                 except:
73 |                     print(f"Error reading file: {wildcard}")
74 | 
75 |         for N in [1, 2, 4, 8, 16]:
76 |             try:
77 |                 wildcard = f"N{N}_noise{noise_level}{'_rand' if rand else ''}_2*"
78 |                 mpc_df = read_csv(wildcard)
79 |                 df.loc[len(df)] = [
80 |                     noise_level,
81 |                     rand,
82 |                     "MPC",
83 |                     N,
84 |                     1 * N,
85 |                     10 * N,
86 |                     0,
87 |                     *get_stat(mpc_df),
88 |                 ]
89 |             except:
90 |                 print(f"Error reading file: {wildcard}")
91 | 
92 | df.to_csv("benchmark_stat.csv", index=False)
93 | 


--------------------------------------------------------------------------------
/experiments/cartpole/benchmark.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | group_one() {
 4 |     export CUDA_VISIBLE_DEVICES=0
 5 |     # for N in 1 4 16; do
 6 |     #     for noise in 0 0.5; do
 7 |     #         python ../../run.py test cartpole --num-parallel 10000 --horizon 20 --epochs 2000 --mini-epochs 1 --qp-unrolled --shared-PH --affine-qb --mpc-baseline-N ${N} --noise-level ${noise} --batch-test --quiet --exp-name qp_unrolled_shared_affine --run-name N${N}_noise${noise} --use-osqp-for-mpc
 8 |     #     done
 9 |     # done
10 |     for N in 1 4 16; do
11 |         for noise in 0.5; do
12 |             python ../../run.py test cartpole --num-parallel 10000 --horizon 20 --epochs 2000 --mini-epochs 1 --qp-unrolled --shared-PH --affine-qb --mpc-baseline-N ${N} --noise-level ${noise} --batch-test --quiet --exp-name qp_unrolled_shared_affine --randomize --run-name N${N}_noise${noise}_rand --use-osqp-for-mpc
13 |         done
14 |     done
15 |     # for noise in 0 0.5; do
16 |     #     for n in 2 16; do
17 |     #         for m in 4 64; do
18 |     #             python ../../run.py test cartpole --num-parallel 10000 --horizon 20 --epochs 2000 --mini-epochs 1 --qp-unrolled --shared-PH --affine-qb --noise-level ${noise} --batch-test --n-qp ${n} --m-qp ${m} --quiet --exp-name shared_affine_noise${noise}_n${n}_m${m} --run-name N0_n${n}_m${m}_noise${noise}
19 |     #         done
20 |     #     done
21 |     # done
22 |     # for noise in 0 0.5; do
23 |     #     for n in 2 16; do
24 |     #         for m in 4 64; do
25 |     #             python ../../run.py test cartpole --num-parallel 10000 --horizon 20 --epochs 2000 --mini-epochs 1 --qp-unrolled --shared-PH --affine-qb --noise-level ${noise} --batch-test --n-qp ${n} --m-qp ${m} --quiet --exp-name shared_affine_noise${noise}_n${n}_m${m}+rand --randomize --run-name N0_n${n}_m${m}_noise${noise}_rand
26 |     #         done
27 |     #     done
28 |     # done
29 | }
30 | 
31 | group_two() {
32 |     export CUDA_VISIBLE_DEVICES=1
33 |     # for N in 1 2 4 8 16; do
34 |     #     for noise in 0.2 0.5; do
35 |     #         python ../../run.py test cartpole --num-parallel 10000 --horizon 20 --epochs 2000 --mini-epochs 1 --qp-unrolled --shared-PH --affine-qb --mpc-baseline-N ${N} --noise-level ${noise} --batch-test --quiet --exp-name qp_unrolled_shared_affine --randomize --run-name N${N}_noise${noise}_rand --use-osqp-for-mpc
36 |     #     done
37 |     # done
38 |     # for noise in 0.2 0.5; do
39 |     #     for n in 2 4 8 16; do
40 |     #         for m in 2 4 8 16 32 64; do
41 |     #             python ../../run.py test cartpole --num-parallel 10000 --horizon 20 --epochs 2000 --mini-epochs 1 --qp-unrolled --shared-PH --affine-qb --noise-level ${noise} --batch-test --n-qp ${n} --m-qp ${m} --quiet --exp-name shared_affine_noise${noise}_n${n}_m${m}+rand --randomize --run-name N0_n${n}_m${m}_noise${noise}_rand
42 |     #         done
43 |     #     done
44 |     # done
45 |     # for noise in 0 0.5; do
46 |     #     python ../../run.py test cartpole --num-parallel 10000 --horizon 20 --epochs 2000 --mini-epochs 1 --mpc-baseline-N 0 --noise-level ${noise} --batch-test --exp-name mlp_noise${noise} # --run-name mlp_noise${noise}
47 |     # done
48 |     # for noise in 0 0.5; do
49 |     #     python ../../run.py test cartpole --num-parallel 10000 --horizon 20 --epochs 2000 --mini-epochs 1 --mpc-baseline-N 0 --noise-level ${noise} --batch-test --exp-name mlp_noise${noise}+rand --randomize --run-name mlp_noise${noise}_rand
50 |     # done
51 | }
52 | 
53 | # Start both groups in parallel
54 | group_one & group_two &
55 | 
56 | # Wait for both background tasks to complete
57 | wait
58 | 


--------------------------------------------------------------------------------
/experiments/tank/test_skip_steady.py:
--------------------------------------------------------------------------------
  1 | # %% Problem setup
  2 | import sys
  3 | import os
  4 | file_path = os.path.dirname(__file__)
  5 | sys.path.append(os.path.join(file_path, "../.."))
  6 | 
  7 | import numpy as np
  8 | from src.envs.env_creators import sys_param, env_creators
  9 | x_ref = np.array([19., 19., 2., 2.])
 10 | A = sys_param["tank"]["A"]
 11 | B = sys_param["tank"]["B"]
 12 | Q = sys_param["tank"]["Q"]
 13 | R = sys_param["tank"]["R"]
 14 | x_min = sys_param["tank"]["x_min"] * np.ones(4)
 15 | x_max = sys_param["tank"]["x_max"] * np.ones(4)
 16 | u_min = sys_param["tank"]["u_min"] * np.ones(2)
 17 | u_max = 1.0 * np.ones(2)
 18 | 
 19 | # %% Oracle
 20 | from src.utils.osqp_utils import osqp_oracle
 21 | 
 22 | # min (x - x_ref)' * Q * (x - x_ref) + u' * R * u, s.t., x = (I - A)^{-1} * B * u, x_min <= x <= x_max, u_min <= u <= u_max; cast into min 0.5 * u' * P * u + q' * u, s.t., H * u + b >= 0
 23 | 
 24 | inv_I_minus_A = np.linalg.inv(np.eye(A.shape[0]) - A)
 25 | P = 2 * (B.T @ inv_I_minus_A.T @ Q @ inv_I_minus_A @ B + R)
 26 | 
 27 | # Calculate q
 28 | q = -2 * inv_I_minus_A.T @ Q.T @ x_ref @ B
 29 | 
 30 | # Calculate c
 31 | c = x_ref.T @ Q @ x_ref
 32 | 
 33 | # Calculate H and b
 34 | H = np.vstack([
 35 |     inv_I_minus_A @ B,
 36 |     -inv_I_minus_A @ B,
 37 |     np.eye(u_min.shape[0]),
 38 |     -np.eye(u_max.shape[0])
 39 | ])
 40 | 
 41 | b = np.hstack([
 42 |     -x_min,
 43 |     x_max,
 44 |     -u_min,
 45 |     u_max
 46 | ])
 47 | 
 48 | u_opt = osqp_oracle(q, b, P, H)
 49 | x_opt = inv_I_minus_A @ B @ u_opt
 50 | 
 51 | # %% Evaluation
 52 | from icecream import ic
 53 | eval_value = lambda u: 0.5 * u.T @ P @ u + q.T @ u + c
 54 | opt_val = eval_value(u_opt)
 55 | ic(opt_val)
 56 | 
 57 | # %% Evaluate the learned controller
 58 | import torch
 59 | from src.modules.qp_unrolled_network import QPUnrolledNetwork
 60 | 
 61 | def get_state_dict(checkpoint_path):
 62 |     checkpoint = torch.load(checkpoint_path)
 63 |     model = checkpoint["model"]
 64 |     prefix = "a2c_network.policy_net."
 65 |     policy_net_state_dict = {k.lstrip(prefix): v for (k, v) in model.items() if k.startswith(prefix)}
 66 |     if "running_mean_std.running_mean" in model:
 67 |         running_mean = model["running_mean_std.running_mean"].to(dtype=torch.float)
 68 |         running_std = model["running_mean_std.running_var"].sqrt().to(dtype=torch.float)
 69 |     else:
 70 |         running_mean = torch.tensor([0.])
 71 |         running_std = torch.tensor([1.])
 72 |     return policy_net_state_dict, running_mean, running_std
 73 | 
 74 | def make_obs(x, x_ref, running_mean, running_std, normalize):
 75 |     raw_obs = torch.tensor(np.concatenate([x, x_ref]), device=device, dtype=torch.float)
 76 |     if not normalize:
 77 |         return raw_obs.unsqueeze(0)
 78 |     else:
 79 |         return ((raw_obs - running_mean) / running_std).unsqueeze(0)
 80 | 
 81 | 
 82 | n_sys = 4
 83 | m_sys = 2
 84 | input_size = 8   # 4 for x, 4 for x_ref
 85 | n = 8
 86 | m = 32
 87 | qp_iter = 10
 88 | device = "cuda:0"
 89 | 
 90 | # Learned QP
 91 | net = QPUnrolledNetwork(device, input_size, n, m, qp_iter, None, True, True)
 92 | exp_name = "test_skip_steady"
 93 | checkpoint_path = f"runs/tank_{exp_name}/nn/tank.pth"
 94 | policy_net_state_dict, running_mean, running_std = get_state_dict(checkpoint_path)
 95 | net.load_state_dict(policy_net_state_dict)
 96 | running_mean, running_std = running_mean.to(device=device), running_std.to(device=device)
 97 | net.to(device)
 98 | obs = make_obs(0 * np.ones(4), x_ref, running_mean, running_std, False)
 99 | action_all, problem_params = net(obs, return_problem_params=True)
100 | u = action_all[:, :2].squeeze(0).detach().cpu().numpy()
101 | learned_val = eval_value(u)
102 | ic(learned_val)
103 | 
104 | 
105 | # %%
106 | 


--------------------------------------------------------------------------------
/src/modules/preconditioner.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | from torch.nn import functional as F
 4 | from torch.linalg import solve, inv, pinv
 5 | import numpy as np
 6 | 
 7 | from ..utils.torch_utils import make_psd, vectorize_upper_triangular
 8 | 
 9 | class Preconditioner(nn.Module):
10 |     def __init__(self, device, n, m,
11 |             P=None, Pinv=None, H=None,
12 |             dummy=False,
13 |             beta=1,
14 |             adaptive=False):
15 |         """
16 |         dummy = True: fix D = I
17 |         adaptive = False: use same D for all q, b; adaptive = True: determine D based on q, b
18 |         Specify P, H if they are fixed; otherwise they need to be passed in when calling forward.
19 |         """
20 |         super().__init__()
21 |         self.device = device
22 |         self.n = n
23 |         self.m = m
24 |         create_tensor = lambda t: torch.tensor(t, dtype=torch.float, device=device) if type(t) != torch.Tensor and t is not None else t
25 |         self.P = create_tensor(P)       # (1, n, n)
26 |         self.Pinv = create_tensor(Pinv)       # (1, n, n)
27 |         self.H = create_tensor(H)       # (1, m, n)
28 |         self.dummy = dummy
29 |         self.beta = beta
30 |         self.adaptive = adaptive
31 |         self.bP = self.P.unsqueeze(0) if P is not None else None
32 |         self.bPinv = self.Pinv.unsqueeze(0) if Pinv is not None else None
33 |         self.bH = self.H.unsqueeze(0) if H is not None else None
34 |         if P is not None and H is not None:
35 |             self.bHPinvHt = (self.H @ solve(self.P, self.H.t())).unsqueeze(0)   # (1, m, m)
36 |         elif Pinv is not None and H is not None:
37 |             self.bHPinvHt = (self.H @ self.Pinv @ self.H.t()).unsqueeze(0)
38 |         else:
39 |             self.bHPinvHt = None
40 | 
41 |         # Parameterize D using Cholesky decomposition
42 |         num_param = m * (m + 1) // 2
43 |         if not dummy:
44 |             if not adaptive:
45 |                 self.param = nn.Parameter(torch.zeros((num_param,), device=device))  # (m, m)
46 |             else:
47 |                 num_in = n + m
48 |                 if self.bP is None:
49 |                     num_in += n * (n + 1) // 2
50 |                 if self.bH is None:
51 |                     num_in += n * m
52 |                 self.D_net = nn.Sequential(
53 |                     nn.Linear(num_in, num_in),
54 |                     nn.ReLU(),
55 |                     nn.Linear(num_in, num_param),
56 |                 ).to(device=device)
57 | 
58 |     def forward(self, q=None, b=None, P=None, H=None,
59 |         input_P_is_inversed=False,
60 |         output_tD_is_inversed=False,
61 |     ):
62 |         # q: (bs, n), b: (bs, m)
63 |         if self.dummy:
64 |             D = torch.eye(self.m, device=self.device)
65 |         elif not self.adaptive:
66 |             D = make_psd(self.param.unsqueeze(0))   # (1, m, m)
67 |         else:
68 |             assert q is not None and b is not None
69 |             net_input = [q, b]
70 |             if self.bP is None:
71 |                 net_input.append(vectorize_upper_triangular(P))
72 |             if self.bH is None:
73 |                 net_input.append(H.flatten(start_dim=-2))
74 |             net_input_t = torch.cat(net_input, 1) * 1e-6
75 |             D = make_psd(self.D_net(net_input_t))        # (bs, m, m)
76 |         D /= self.beta
77 |         bH = self.bH if self.bH is not None else H
78 |         bP_param = self.bP if self.bP is not None else P
79 |         op = solve if not input_P_is_inversed else torch.matmul
80 |         bHPinvHt = self.bHPinvHt if self.bHPinvHt is not None else (bH @ op(bP_param, bH.transpose(-1, -2)))
81 |         tD_inv = D + bHPinvHt
82 |         if output_tD_is_inversed:
83 |             return D, tD_inv
84 |         else:
85 |             tD = inv(tD_inv)   # (*, m, m)
86 |             return D, tD
87 | 


--------------------------------------------------------------------------------
/experiments/tank/run.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Store the command line argument in a global variable
 4 | TRAIN_OR_TEST="$1"
 5 | 
 6 | # Function to run commands 1-3 sequentially with VAR=1
 7 | group_one() {
 8 |     export CUDA_VISIBLE_DEVICES=0
 9 |     # for n_qp in 2 4 8 16; do
10 |     for n_qp in 8; do
11 |         # for m_qp in 2 4 8 16 32 64; do
12 |         for m_qp in 32; do
13 |             # for noise_level in 0 0.1; do
14 |             for noise_level in 0; do
15 |             python ../../run.py $TRAIN_OR_TEST tank \
16 |             --num-parallel 100000 \
17 |             --horizon 20 \
18 |             --epochs 2000 \
19 |             --mini-epochs 1 \
20 |             --qp-unrolled \
21 |             --shared-PH \
22 |             --affine-qb \
23 |             --noise-level ${noise_level} \
24 |             --n-qp ${n_qp} \
25 |             --m-qp ${m_qp} \
26 |             --no-obs-normalization \
27 |             --no-b \
28 |             --exp-name shared_affine_noise${noise_level}_n${n_qp}_m${m_qp}-norm-b
29 |             done
30 |         done
31 |     done
32 |     # python ../../run.py $TRAIN_OR_TEST tank --num-parallel 100000 --horizon 20 --epochs 2000 --gamma 0.999 --mini-epochs 1 --exp-name vanilla_rl
33 |     # python ../../run.py $TRAIN_OR_TEST tank --num-parallel 100 --horizon 20 --epochs 2000 --mini-epochs 1 --qp-unrolled --shared-PH --affine-qb --mpc-baseline-N 0 --noise-level 0 --batch-test --quiet --exp-name qp_unrolled_shared_affine
34 |     # python ../../run.py $TRAIN_OR_TEST tank --num-parallel 100000 --horizon 20 --epochs 2000 --mini-epochs 1 --qp-unrolled --shared-PH --qp-iter 100 --exp-name qp_unrolled_shared_more_iter
35 | }
36 | 
37 | # Function to run commands 4-6 sequentially with VAR=2
38 | group_two() {
39 |     export CUDA_VISIBLE_DEVICES=1
40 |     # python ../../run.py $TRAIN_OR_TEST tank --num-parallel 100000 --horizon 20 --epochs 2000 --mini-epochs 1 --qp-unrolled --shared-PH --exp-name qp_unrolled_shared
41 |     # python ../../run.py $TRAIN_OR_TEST tank --num-parallel 100000 --horizon 20 --epochs 2000 --mini-epochs 1 --qp-unrolled --exp-name qp_unrolled
42 |     # python ../../run.py $TRAIN_OR_TEST tank --num-parallel 100000 --horizon 20 --epochs 2000 --mini-epochs 1 --qp-unrolled --warm-start --exp-name qp_unrolled_ws
43 |     # python ../../run.py $TRAIN_OR_TEST tank --num-parallel 100000 --horizon 20 --epochs 2000 --mini-epochs 1 --qp-unrolled --shared-PH --exp-name qp_unrolled_shared
44 |     # python ../../run.py $TRAIN_OR_TEST tank --num-parallel 100000 --horizon 20 --epochs 2000 --mini-epochs 1 --qp-unrolled --shared-PH --qp-iter 10 --warm-start --exp-name qp_unrolled_shared_ws
45 |     # python ../../run.py $TRAIN_OR_TEST tank --num-parallel 100000 --horizon 20 --epochs 1 --mini-epochs 1 --qp-unrolled --shared-PH --exp-name computation-test
46 |     for n_qp in 2 4 8 16; do
47 |         for m_qp in 2 4 8 16 32 64; do
48 |             for noise_level in 0.2 0.5; do
49 |             python ../../run.py $TRAIN_OR_TEST tank \
50 |             --num-parallel 100000 \
51 |             --horizon 20 \
52 |             --epochs 2000 \
53 |             --mini-epochs 1 \
54 |             --qp-unrolled \
55 |             --shared-PH \
56 |             --affine-qb \
57 |             --noise-level ${noise_level} \
58 |             --n-qp ${n_qp} \
59 |             --m-qp ${m_qp} \
60 |             --randomize \
61 |             --exp-name shared_affine_noise${noise_level}_n${n_qp}_m${m_qp}+rand
62 |             done
63 |         done
64 |     done
65 |     for noise_level in 0 0.1 0.2 0.5; do
66 |         python ../../run.py $TRAIN_OR_TEST tank \
67 |         --num-parallel 100000 \
68 |         --horizon 20 \
69 |         --epochs 2000 \
70 |         --mini-epochs 1 \
71 |         --noise-level ${noise_level} \
72 |         --randomize \
73 |         --exp-name mlp_noise${noise_level}+rand
74 |     done
75 | }
76 | 
77 | # Start both groups in parallel
78 | # group_one & group_two &
79 | group_one
80 | 
81 | # Wait for both background tasks to complete
82 | # wait
83 | 


--------------------------------------------------------------------------------
/experiments/cartpole/run.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # Store the command line argument in a global variable
  4 | TRAIN_OR_TEST="$1"
  5 | 
  6 | # Function to run commands 1-3 sequentially with VAR=1
  7 | group_one() {
  8 |     export CUDA_VISIBLE_DEVICES=0
  9 |     for n_qp in 8; do
 10 |         for m_qp in 32; do
 11 |             for noise_level in 0; do
 12 |             python ../../run.py $TRAIN_OR_TEST cartpole \
 13 |             --num-parallel 100000 \
 14 |             --horizon 20 \
 15 |             --epochs 500 \
 16 |             --mini-epochs 1 \
 17 |             --qp-unrolled \
 18 |             --shared-PH \
 19 |             --affine-qb \
 20 |             --noise-level ${noise_level} \
 21 |             --n-qp ${n_qp} \
 22 |             --m-qp ${m_qp} \
 23 |             --exp-name shared_affine_noise${noise_level}_n${n_qp}_m${m_qp}
 24 |             done
 25 |         done
 26 |     done
 27 |     # for n_qp in 2 16; do
 28 |     #     for m_qp in 4 64; do
 29 |     #         for noise_level in 0; do
 30 |     #         python ../../run.py $TRAIN_OR_TEST cartpole \
 31 |     #         --num-parallel 100000 \
 32 |     #         --horizon 20 \
 33 |     #         --epochs 500 \
 34 |     #         --mini-epochs 1 \
 35 |     #         --qp-unrolled \
 36 |     #         --shared-PH \
 37 |     #         --affine-qb \
 38 |     #         --noise-level ${noise_level} \
 39 |     #         --n-qp ${n_qp} \
 40 |     #         --m-qp ${m_qp} \
 41 |     #         --randomize \
 42 |     #         --exp-name shared_affine_noise${noise_level}_n${n_qp}_m${m_qp}+rand
 43 |     #         done
 44 |     #     done
 45 |     # done
 46 |     # for noise_level in 0 0.5; do
 47 |     #     python ../../run.py $TRAIN_OR_TEST cartpole \
 48 |     #     --num-parallel 100000 \
 49 |     #     --horizon 20 \
 50 |     #     --epochs 500 \
 51 |     #     --mini-epochs 1 \
 52 |     #     --noise-level ${noise_level} \
 53 |     #     --exp-name mlp_noise${noise_level}
 54 |     # done
 55 | }
 56 | 
 57 | # Function to run commands 4-6 sequentially with VAR=2
 58 | group_two() {
 59 |     export CUDA_VISIBLE_DEVICES=1
 60 |     for n_qp in 2 16; do
 61 |         for m_qp in 4 64; do
 62 |             for noise_level in 0.5; do
 63 |             python ../../run.py $TRAIN_OR_TEST cartpole \
 64 |             --num-parallel 100000 \
 65 |             --horizon 20 \
 66 |             --epochs 500 \
 67 |             --mini-epochs 1 \
 68 |             --qp-unrolled \
 69 |             --shared-PH \
 70 |             --affine-qb \
 71 |             --noise-level ${noise_level} \
 72 |             --n-qp ${n_qp} \
 73 |             --m-qp ${m_qp} \
 74 |             --exp-name shared_affine_noise${noise_level}_n${n_qp}_m${m_qp}
 75 |             done
 76 |         done
 77 |     done
 78 |     for n_qp in 2 16; do
 79 |         for m_qp in 4 64; do
 80 |             for noise_level in 0.5; do
 81 |             python ../../run.py $TRAIN_OR_TEST cartpole \
 82 |             --num-parallel 100000 \
 83 |             --horizon 20 \
 84 |             --epochs 500 \
 85 |             --mini-epochs 1 \
 86 |             --qp-unrolled \
 87 |             --shared-PH \
 88 |             --affine-qb \
 89 |             --noise-level ${noise_level} \
 90 |             --n-qp ${n_qp} \
 91 |             --m-qp ${m_qp} \
 92 |             --randomize \
 93 |             --exp-name shared_affine_noise${noise_level}_n${n_qp}_m${m_qp}+rand
 94 |             done
 95 |         done
 96 |     done
 97 | 
 98 |     for noise_level in 0 0.5; do
 99 |         python ../../run.py $TRAIN_OR_TEST cartpole \
100 |         --num-parallel 100000 \
101 |         --horizon 20 \
102 |         --epochs 500 \
103 |         --mini-epochs 1 \
104 |         --noise-level ${noise_level} \
105 |         --randomize \
106 |         --exp-name mlp_noise${noise_level}+rand
107 |     done
108 | }
109 | 
110 | # Start both groups in parallel
111 | # group_one & group_two &
112 | group_one
113 | 
114 | # Wait for both background tasks to complete
115 | # wait
116 | 


--------------------------------------------------------------------------------
/auxiliary/train_warmstarter.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from tqdm import tqdm
  3 | import os
  4 | import sys
  5 | file_path = os.path.dirname(__file__)
  6 | sys.path.append(os.path.join(file_path, ".."))
  7 | from modules.warm_starter import WarmStarter
  8 | from modules.qp_solver import QPSolver
  9 | from utils.mpc_utils import generate_random_problem
 10 | import torch
 11 | from torch.nn import functional as F
 12 | import argparse
 13 | import traceback
 14 | from pathlib import Path
 15 | from datetime import datetime
 16 | import copy
 17 | from torch.utils.tensorboard import SummaryWriter
 18 | 
 19 | parser = argparse.ArgumentParser()
 20 | parser.add_argument("--batch-size", type=int, default=10000)
 21 | parser.add_argument("--n", type=int, default=10)
 22 | parser.add_argument("--m", type=int, default=5)
 23 | parser.add_argument("--fixed-PH", action='store_true')
 24 | args = parser.parse_args()
 25 | 
 26 | max_epochs = 50000
 27 | bs = args.batch_size
 28 | n = args.n
 29 | m = args.m
 30 | device = "cuda:0"
 31 | 
 32 | torch.manual_seed(42)
 33 | q0, b0, P0, H0 = generate_random_problem(1, n, m, device)
 34 | P0_np = P0.squeeze(0).cpu().numpy()
 35 | H0_np = H0.squeeze(0).cpu().numpy()
 36 | P0 = P0.broadcast_to((bs, -1, -1))
 37 | H0 = H0.broadcast_to((bs, -1, -1))
 38 | 
 39 | warm_starter = WarmStarter(device, n, m, fixed_P=args.fixed_PH, fixed_H=args.fixed_PH)
 40 | if not args.fixed_PH:
 41 |     oracle_solver = QPSolver(device, n, m)
 42 | else:
 43 |     oracle_solver = QPSolver(device, n, m, P=P0_np, H=H0_np)
 44 | optimizer = torch.optim.Adam(warm_starter.parameters())
 45 | losses = []
 46 | Path("runs").mkdir(parents=True, exist_ok=True)
 47 | writer = SummaryWriter('runs/' + "warmstarter" + datetime.now().strftime("_%y-%m-%d-%H-%M-%S"))
 48 | 
 49 | 
 50 | try:
 51 |     def restore_checkpoint():
 52 |         global loss_best, no_improvement_count
 53 |         warm_starter.load_state_dict(checkpoint[0])
 54 |         optimizer.load_state_dict(checkpoint[1])
 55 |         loss_best = 0
 56 |         no_improvement_count= 0
 57 |     loss_best = 0
 58 |     no_improvement_count = 0
 59 |     for i_ep in (pbar:= tqdm(range(max_epochs))):
 60 |         # Check for early stopping
 61 |         if i_ep > 0:
 62 |             if loss_best == 0:
 63 |                 loss_best = losses[-1] + 1
 64 |             if losses[-1] < loss_best:
 65 |                 no_improvement_count = 0
 66 |                 loss_best = 0.95 * loss_best + 0.05 * losses[-1]
 67 |                 checkpoint = [
 68 |                     copy.deepcopy(warm_starter.state_dict()),
 69 |                     copy.deepcopy(optimizer.state_dict()),
 70 |                 ]
 71 |             else:
 72 |                 no_improvement_count += 1
 73 |             if no_improvement_count >= 5:
 74 |                 restore_checkpoint()
 75 |                 optimizer.param_groups[0]['lr'] /= 10
 76 |                 loss_best = 0
 77 |                 if optimizer.param_groups[0]['lr'] < 1e-7:
 78 |                     break
 79 | 
 80 |         optimizer.zero_grad()
 81 |         q, b, P, H = generate_random_problem(bs, n, m, device)
 82 |         if not args.fixed_PH:
 83 |             oracle_Xb = oracle_solver(q, b, P, H)[0][:, -1, :]
 84 |             approx_X = warm_starter(q, b, P, H)
 85 |         else:
 86 |             oracle_Xb = oracle_solver(q, b)[0][:, -1, :]
 87 |             approx_X = warm_starter(q, b)
 88 |         loss = torch.log((approx_X - oracle_Xb).norm(dim=-1)).mean()
 89 |         if loss.isfinite():
 90 |             loss.backward()
 91 |             optimizer.step()
 92 |             losses.append(loss.item())
 93 |         else:
 94 |             restore_checkpoint()
 95 |         pbar.set_description(f"{optimizer.param_groups[0]['lr']:.2e}, {loss.item():.2f}/{loss_best:.2f}/{no_improvement_count}")
 96 |         writer.add_scalar("stat/loss", loss.item(), i_ep)
 97 |         writer.add_scalar("stat/lr", optimizer.param_groups[0]['lr'], i_ep)
 98 | 
 99 | except:
100 |     traceback.print_exc()
101 | finally:
102 |     Path("models").mkdir(parents=True, exist_ok=True)
103 |     torch.save(warm_starter.state_dict(), f"models/warmstarter-{n}-{m}.pth")
104 | 


--------------------------------------------------------------------------------
/src/utils/rlgame_utils.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Copied from IsaacGymEnvs
 3 | """
 4 | 
 5 | import torch
 6 | from rl_games.common import env_configurations, vecenv
 7 | from rl_games.common.algo_observer import AlgoObserver
 8 | from rl_games.algos_torch import torch_ext
 9 | 
10 | class RLGPUEnv(vecenv.IVecEnv):
11 |     def __init__(self, config_name, num_actors, **kwargs):
12 |         self.env = env_configurations.configurations[config_name]['env_creator'](**kwargs)
13 | 
14 |     def step(self, actions):
15 |         return self.env.step(actions)
16 | 
17 |     def reset(self):
18 |         return self.env.reset()
19 |     
20 |     def reset_done(self):
21 |         return self.env.reset_done()
22 | 
23 |     def get_number_of_agents(self):
24 |         return self.env.get_number_of_agents()
25 | 
26 |     def get_env_info(self):
27 |         info = {}
28 |         info['action_space'] = self.env.action_space
29 |         info['observation_space'] = self.env.observation_space
30 |         if hasattr(self.env, "amp_observation_space"):
31 |             info['amp_observation_space'] = self.env.amp_observation_space
32 | 
33 |         if self.env.num_states > 0:
34 |             info['state_space'] = self.env.state_space
35 |             print(info['action_space'], info['observation_space'], info['state_space'])
36 |         else:
37 |             print(info['action_space'], info['observation_space'])
38 | 
39 |         return info
40 | 
41 | class RLGPUAlgoObserver(AlgoObserver):
42 |     """Allows us to log stats from the env along with the algorithm running stats. """
43 | 
44 |     def __init__(self):
45 |         pass
46 | 
47 |     def after_init(self, algo):
48 |         self.algo = algo
49 |         self.mean_scores = torch_ext.AverageMeter(1, self.algo.games_to_track).to(self.algo.ppo_device)
50 |         self.ep_infos = []
51 |         self.direct_info = {}
52 |         self.writer = self.algo.writer
53 | 
54 |     def process_infos(self, infos, done_indices):
55 |         assert isinstance(infos, dict), "RLGPUAlgoObserver expects dict info"
56 |         if isinstance(infos, dict):
57 |             if 'episode' in infos:
58 |                 self.ep_infos.append(infos['episode'])
59 | 
60 |             if len(infos) > 0 and isinstance(infos, dict):  # allow direct logging from env
61 |                 self.direct_info = {}
62 |                 for k, v in infos.items():
63 |                     # only log scalars
64 |                     if isinstance(v, float) or isinstance(v, int) or (isinstance(v, torch.Tensor) and len(v.shape) == 0):
65 |                         self.direct_info[k] = v
66 | 
67 |     def after_clear_stats(self):
68 |         self.mean_scores.clear()
69 | 
70 |     def after_print_stats(self, frame, epoch_num, total_time):
71 |         if self.ep_infos:
72 |             for key in self.ep_infos[0]:
73 |                     infotensor = torch.tensor([], device=self.algo.device)
74 |                     for ep_info in self.ep_infos:
75 |                         # handle scalar and zero dimensional tensor infos
76 |                         if not isinstance(ep_info[key], torch.Tensor):
77 |                             ep_info[key] = torch.Tensor([ep_info[key]])
78 |                         if len(ep_info[key].shape) == 0:
79 |                             ep_info[key] = ep_info[key].unsqueeze(0)
80 |                         infotensor = torch.cat((infotensor, ep_info[key].to(self.algo.device)))
81 |                     value = torch.mean(infotensor)
82 |                     self.writer.add_scalar('Episode/' + key, value, epoch_num)
83 |             self.ep_infos.clear()
84 |         
85 |         for k, v in self.direct_info.items():
86 |             self.writer.add_scalar(f'{k}/frame', v, frame)
87 |             self.writer.add_scalar(f'{k}/iter', v, epoch_num)
88 |             self.writer.add_scalar(f'{k}/time', v, total_time)
89 | 
90 |         if self.mean_scores.current_size > 0:
91 |             mean_scores = self.mean_scores.get_mean()
92 |             self.writer.add_scalar('scores/mean', mean_scores, frame)
93 |             self.writer.add_scalar('scores/iter', mean_scores, epoch_num)
94 |             self.writer.add_scalar('scores/time', mean_scores, total_time)
95 | 


--------------------------------------------------------------------------------
/experiments/double_integrator/dump_parameters.py:
--------------------------------------------------------------------------------
  1 | # %% Initialize model
  2 | import numpy as np
  3 | import torch
  4 | import sys
  5 | import os
  6 | file_path = os.path.dirname(__file__)
  7 | sys.path.append(os.path.join(file_path, "../.."))
  8 | from src.modules.qp_unrolled_network import QPUnrolledNetwork
  9 | 
 10 | def get_state_dict(checkpoint_path):
 11 |     checkpoint = torch.load(checkpoint_path)
 12 |     model = checkpoint["model"]
 13 |     prefix = "a2c_network.policy_net."
 14 |     policy_net_state_dict = {k.lstrip(prefix): v for (k, v) in model.items() if k.startswith(prefix)}
 15 |     if "running_mean_std.running_mean" in model:
 16 |         running_mean = model["running_mean_std.running_mean"].to(dtype=torch.float)
 17 |         running_std = model["running_mean_std.running_var"].sqrt().to(dtype=torch.float)
 18 |     else:
 19 |         running_mean = torch.tensor([0.])
 20 |         running_std = torch.tensor([1.])
 21 |     return policy_net_state_dict, running_mean, running_std
 22 | 
 23 | device = "cuda:0"
 24 | n_qp = 3
 25 | m_qp = 9
 26 | qp_iter = 10
 27 | symmetric = True
 28 | no_b = True
 29 | net = QPUnrolledNetwork(device, 2, n_qp, m_qp, qp_iter, None, True, True, force_feasible=True, symmetric=symmetric, no_b=no_b)
 30 | if not symmetric:
 31 |     exp_name = "default"
 32 | elif not no_b:
 33 |     exp_name = "symmetric"
 34 | else:
 35 |     exp_name = "symmetric_no_b"
 36 | checkpoint_path = f"runs/double_integrator_{exp_name}/nn/double_integrator.pth"
 37 | policy_net_state_dict, running_mean, running_std = get_state_dict(checkpoint_path)
 38 | net.load_state_dict(policy_net_state_dict)
 39 | running_mean, running_std = running_mean.to(device=device), running_std.to(device=device)
 40 | net.to(device)
 41 | 
 42 | t = lambda arr: torch.tensor(arr, device=device, dtype=torch.float).unsqueeze(0)
 43 | a = lambda t: t.squeeze(0).detach().cpu().numpy()
 44 | # %% Get parameters and reconstruct
 45 | from src.utils.torch_utils import make_psd
 46 | 
 47 | feasible_lambda = 10
 48 | 
 49 | P_params = policy_net_state_dict['P_params'].unsqueeze(0)
 50 | H_params = policy_net_state_dict['H_params']
 51 | zeros_n = torch.zeros((1, n_qp, 1), device=device)
 52 | ones_m = torch.ones((1, m_qp, 1), device=device)
 53 | I = torch.eye(1, device=device).unsqueeze(0)
 54 | zeros_1 = torch.zeros((1, 1), device=device)
 55 | Pinv = make_psd(P_params, min_eig=1e-2)
 56 | tilde_P_inv = torch.cat([
 57 |     torch.cat([Pinv, zeros_n], dim=2),
 58 |     torch.cat([zeros_n.transpose(1, 2), 1 / feasible_lambda * I], dim=2)
 59 | ], dim=1)
 60 | H = H_params.view(m_qp, n_qp).unsqueeze(0)
 61 | tilde_H = torch.cat([
 62 |     torch.cat([H, ones_m], dim=2),
 63 |     torch.cat([zeros_n.transpose(1, 2), I], dim=2)
 64 | ], dim=1)
 65 | P = torch.linalg.inv(tilde_P_inv).squeeze(0).cpu().numpy()
 66 | H = tilde_H.squeeze(0).cpu().numpy()
 67 | Wq_params = policy_net_state_dict['qb_affine_layer.weight'].unsqueeze(0)
 68 | Wq_tilde = torch.cat([
 69 |     Wq_params,
 70 |     torch.zeros((1, 1, Wq_params.shape[2]), device=device),
 71 | ], dim=1)
 72 | Wq = Wq_tilde.squeeze(0).cpu().numpy()
 73 | 
 74 | # %% Get control invariant set
 75 | from src.envs.env_creators import sys_param, env_creators
 76 | from src.utils.sets import compute_MCI
 77 | from src.utils.geometry import find_supporting_hyperplanes
 78 | from matplotlib import pyplot as plt
 79 | 
 80 | A = sys_param["double_integrator"]["A"]
 81 | B = sys_param["double_integrator"]["B"]
 82 | Q = sys_param["double_integrator"]["Q"]
 83 | R = sys_param["double_integrator"]["R"]
 84 | x_min_scalar = sys_param["double_integrator"]["x_min"]
 85 | x_max_scalar = sys_param["double_integrator"]["x_max"]
 86 | u_min_scalar = sys_param["double_integrator"]["u_min"]
 87 | u_max_scalar = sys_param["double_integrator"]["u_max"]
 88 | x_min = x_min_scalar * np.ones(2)
 89 | x_max = x_max_scalar * np.ones(2)
 90 | u_min = u_min_scalar * np.ones(1)
 91 | u_max = u_max_scalar * np.ones(1)
 92 | 
 93 | MCI = compute_MCI(A, B, x_min, x_max, u_min, u_max, iterations=100)
 94 | A_MCI, b_MCI = find_supporting_hyperplanes(MCI)
 95 | # %% Dump parameters
 96 | np.savez(
 97 |     "parameters.npz",
 98 |     A=A,
 99 |     B=B,
100 |     P=P,
101 |     H=H,
102 |     Wq=Wq,
103 |     A_MCI=A_MCI,
104 |     b_MCI=b_MCI,
105 | )
106 | 
107 | # %%
108 | 


--------------------------------------------------------------------------------
/src/utils/visualization.py:
--------------------------------------------------------------------------------
 1 | from .geometry import find_interior_point
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | from matplotlib.lines import Line2D
 5 | from scipy.spatial import ConvexHull, HalfspaceIntersection
 6 | 
 7 | 
 8 | def plot_multiple_2d_polytopes_with_contour(polytope_contour_params):
 9 |     """
10 |     Plot multiple 2D polytopes each defined by Ax <= b and overlay the contour of a quadratic function.
11 |     
12 |     Parameters:
13 |     - polytope_contour_params (list of dict): List of dictionaries containing A, b, optimal_solution, P, q, and label.
14 |     
15 |     Returns:
16 |     - fig (matplotlib.figure.Figure): Figure object.
17 |     - ax (matplotlib.axes._subplots.AxesSubplot): Axis object.
18 |     """
19 |     
20 |     fig, ax = plt.subplots()
21 |     
22 |     # Determine global x and y limits
23 |     all_vertices = []
24 |     for params in polytope_contour_params:
25 |         interior_point = find_interior_point(params['A'], params['b'])
26 |         if interior_point is not None:
27 |             vertices = HalfspaceIntersection(np.hstack([params['A'], -params['b'][:, np.newaxis]]), interior_point).intersections
28 |             all_vertices.append(vertices)
29 |     all_vertices = np.vstack(all_vertices)
30 |     
31 |     margin = 0.5  # Additional margin around the polytopes
32 |     x_range = np.max(all_vertices[:, 0]) - np.min(all_vertices[:, 0])
33 |     y_range = np.max(all_vertices[:, 1]) - np.min(all_vertices[:, 1])
34 |     max_range = max(x_range, y_range) + 2 * margin
35 |     x_margin = (max_range - x_range) / 2
36 |     y_margin = (max_range - y_range) / 2
37 |     x_min, x_max = np.min(all_vertices[:, 0]) - x_margin, np.max(all_vertices[:, 0]) + x_margin
38 |     y_min, y_max = np.min(all_vertices[:, 1]) - y_margin, np.max(all_vertices[:, 1]) + y_margin
39 |     x_grid, y_grid = np.meshgrid(np.linspace(x_min, x_max, 100), np.linspace(y_min, y_max, 100))
40 |     
41 |     custom_legend_handles = []
42 |     
43 |     for params in polytope_contour_params:
44 |         A, b, P, q, color, label = params['A'], params['b'], params['P'], params['q'], params['color'], params['label']
45 |         optimal_solution = params.get("optimal_solution", None)
46 |         
47 |         # Find an interior point
48 |         interior_point = find_interior_point(A, b)
49 |         if interior_point is None:
50 |             continue  # Skip this polytope if LP is infeasible
51 |         
52 |         # Plot polytope
53 |         halfspace_intersection = HalfspaceIntersection(np.hstack([A, -b[:, np.newaxis]]), interior_point)
54 |         vertices = halfspace_intersection.intersections
55 |         hull = ConvexHull(vertices)
56 |         ordered_vertices = vertices[hull.vertices]
57 |         closed_loop = np.vstack([ordered_vertices, ordered_vertices[0]])
58 |         
59 |         ax.fill(closed_loop[:, 0], closed_loop[:, 1], alpha=0.3, color=color, label=f"{label} (Polytope)")
60 |         ax.plot(closed_loop[:, 0], closed_loop[:, 1], color=color)
61 |         
62 |         # Mark the optimal solution
63 |         if optimal_solution is not None:
64 |             ax.plot(optimal_solution[0], optimal_solution[1], 'o', color=color)
65 |                 
66 |         # Evaluate quadratic function
67 |         Z = np.zeros_like(x_grid)
68 |         for i in range(x_grid.shape[0]):
69 |             for j in range(x_grid.shape[1]):
70 |                 x_vec = np.array([x_grid[i, j], y_grid[i, j]])
71 |                 Z[i, j] = 0.5 * x_vec.T @ P @ x_vec + q.T @ x_vec
72 |                 
73 |         # Plot contour
74 |         contour = ax.contour(x_grid, y_grid, Z, levels=5, colors=color)  # Reduced number of levels for sparser contour
75 | 
76 |         # Create a custom legend handle
77 |         custom_legend_handles.append(Line2D([0], [0], color=color, lw=4, label=label))
78 | 
79 |     # Adjust plot settings
80 |     ax.set_aspect('equal', adjustable='box')
81 |     ax.set_xlabel('x')
82 |     ax.set_ylabel('y')
83 |     
84 |     # Add custom legend
85 |     if custom_legend_handles:
86 |         # Move legend outside the plot
87 |         ax.legend(handles=custom_legend_handles, loc='upper left', bbox_to_anchor=(1, 1))
88 |         # Adjust layout to prevent clipping
89 |         plt.tight_layout(rect=[0, 0, 0.85, 1])
90 |     
91 |     return fig, ax
92 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 | 
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 | 
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 | 
119 | # SageMath parsed files
120 | *.sage.py
121 | 
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 | 
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 | 
135 | # Rope project settings
136 | .ropeproject
137 | 
138 | # mkdocs documentation
139 | /site
140 | 
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 | 
146 | # Pyre type checker
147 | .pyre/
148 | 
149 | # pytype static type analyzer
150 | .pytype/
151 | 
152 | # Cython debug symbols
153 | cython_debug/
154 | 
155 | # PyCharm
156 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
159 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 | 
162 | .vscode/
163 | .unison*
164 | 
165 | runs/
166 | data/
167 | auxiliary/models
168 | auxiliary/*.csv
169 | experiments/tank/run1.sh
170 | experiments/tank/test_results/
171 | experiments/tank/*.csv
172 | 
173 | experiments/cartpole/test_results/
174 | experiments/cartpole/*.csv
175 | 
176 | experiments/double_integrator/*.npz
177 | experiments/double_integrator/*.tex
178 | 
179 | learning-qp.txt
180 | 


--------------------------------------------------------------------------------
/experiments/tank/plot_histogram.py:
--------------------------------------------------------------------------------
  1 | # %% Read test data from learned QP with MPC
  2 | from benchmark_stat import read_csv, get_stat
  3 | from matplotlib import pyplot as plt
  4 | import numpy as np
  5 | import pandas as pd
  6 | 
  7 | noise_level = 0.1
  8 | randomize = True
  9 | N = 4
 10 | n = 8
 11 | m = 32
 12 | 
 13 | randomize_flag = "_rand" if randomize else ""
 14 | df_mpc = read_csv(f"N{N}_noise{noise_level}{randomize_flag}_20*")
 15 | df_qp = read_csv(f"N0_n{n}_m{m}_noise{noise_level}{randomize_flag}_20*")
 16 | df_mlp = read_csv(f"mlp_noise{noise_level}{randomize_flag}_20*")
 17 | 
 18 | # %% Matrix of constraint violation
 19 | 
 20 | def violation_matrix(df1, df2):
 21 |     """
 22 |     Return four lists of indices standing for the trajectory indices where:
 23 |     1. constraint_violated=True in both df1 and df2
 24 |     2. constraint_violated=True in df1 but not df2
 25 |     3. constraint_violated=True in df2 but not df1
 26 |     4. constraint_violated=False in both df1 and df2
 27 |     """
 28 |     idx_both = []
 29 |     idx_df1 = []
 30 |     idx_df2 = []
 31 |     idx_none = []
 32 |     for i in range(len(df1)):
 33 |         if df1.iloc[i]["constraint_violated"] and df2.iloc[i]["constraint_violated"]:
 34 |             idx_both.append(i)
 35 |         elif df1.iloc[i]["constraint_violated"] and not df2.iloc[i]["constraint_violated"]:
 36 |             idx_df1.append(i)
 37 |         elif not df1.iloc[i]["constraint_violated"] and df2.iloc[i]["constraint_violated"]:
 38 |             idx_df2.append(i)
 39 |         else:
 40 |             idx_none.append(i)
 41 |     return idx_both, idx_df1, idx_df2, idx_none
 42 | 
 43 | violated_both, violated_mpc, violated_qp, violated_none = violation_matrix(df_mpc, df_qp)
 44 | 
 45 | data = {"MPC Success": [len(violated_none), len(violated_qp)], "MPC Fail": [len(violated_mpc), len(violated_both)]}
 46 | index_labels = ["QP Success", "QP Fail"]
 47 | df = pd.DataFrame(data=data, index=index_labels)
 48 | df
 49 | 
 50 | # %% Cost ratio histogram in the cases where both methods succeed
 51 | cost_mpc = df_mpc.iloc[violated_none]["cumulative_cost"]
 52 | cost_qp = df_qp.iloc[violated_none]["cumulative_cost"]
 53 | ratio = cost_mpc / cost_qp
 54 | 
 55 | n, bins, patches = plt.hist(ratio, bins=30, edgecolor='black', alpha=0.7)
 56 | max_freq = max(n)
 57 | 
 58 | # Add vertical dashed line at x=1
 59 | plt.axvline(x=1, color='r', linestyle='--')
 60 | 
 61 | # Annotations
 62 | text_y_pos = max_freq * 1.3
 63 | y_max = max_freq * 1.6
 64 | plt.arrow(0.8, text_y_pos, -0.6, 0, head_width=20, head_length=0.05, fc='black', ec='black')
 65 | plt.text(0.5, text_y_pos, 'MPC better', horizontalalignment='center', verticalalignment='bottom', color='black')
 66 | plt.arrow(1.2, text_y_pos, 0.6, 0, head_width=20, head_length=0.05, fc='black', ec='black')
 67 | plt.text(1.5, text_y_pos, 'Learned QP better', horizontalalignment='center', verticalalignment='bottom', color='black')
 68 | 
 69 | plt.xlabel('Ratio of average cost (MPC / Learned QP)')
 70 | plt.xlim(0, 2)
 71 | plt.ylim(0, y_max)
 72 | 
 73 | 
 74 | # %% Penalized cost ratio histogram in all cases
 75 | penalty = 100000
 76 | get_penalized_cost = lambda df: (df['cumulative_cost'] + penalty * df["constraint_violated"]) / df['episode_length']
 77 | penalized_cost_mpc = get_penalized_cost(df_mpc)
 78 | penalized_cost_qp = get_penalized_cost(df_qp)
 79 | penalized_cost_mlp = get_penalized_cost(df_mlp)
 80 | 
 81 | # Export penalized costs to csv; each row is (penalized_cost_mpc, penalized_cost_qp, penalized_cost_mlp)
 82 | header_line = "penalized_cost_mpc,penalized_cost_qp,penalized_cost_mlp"
 83 | np.savetxt("penalized_costs.csv", np.column_stack((penalized_cost_mpc, penalized_cost_qp, penalized_cost_mlp)), delimiter=",", header=header_line, comments='')
 84 | 
 85 | 
 86 | 
 87 | log_penalized_ratio = np.log10(penalized_cost_mpc / penalized_cost_qp)
 88 | 
 89 | n, bins, patches = plt.hist(log_penalized_ratio, bins=30, edgecolor='black', alpha=0.7)
 90 | max_freq = max(n)
 91 | 
 92 | # Add vertical dashed line at x=1
 93 | plt.axvline(x=0, color='r', linestyle='--')
 94 | 
 95 | # Annotations
 96 | text_y_pos = max_freq * 1.3
 97 | y_max = max_freq * 1.6
 98 | plt.arrow(-1, text_y_pos, -2, 0, head_width=50, head_length=0.05, fc='black', ec='black')
 99 | plt.text(-2, text_y_pos, 'MPC better', horizontalalignment='center', verticalalignment='bottom', color='black')
100 | plt.arrow(1, text_y_pos, 2, 0, head_width=50, head_length=0.05, fc='black', ec='black')
101 | plt.text(2, text_y_pos, 'Learned QP better', horizontalalignment='center', verticalalignment='bottom', color='black')
102 | 
103 | plt.xlabel('Ratio of penalized average cost (MPC / Learned QP) (log10)')
104 | plt.ylim(0, y_max)
105 | 
106 | # %%
107 | 


--------------------------------------------------------------------------------
/experiments/tank/benchmark_stat.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import torch
  3 | from glob import glob
  4 | import argparse
  5 | 
  6 | parser = argparse.ArgumentParser()
  7 | parser.add_argument("run_name", type=str, default="")
  8 | 
  9 | df = pd.DataFrame(columns=[
 10 |     "Noise level",
 11 |     "Parametric uncertainty",
 12 |     "Method",
 13 |     "Horizon",
 14 |     "Num of variables",
 15 |     "Num of constraints",
 16 |     "Num of learnable policy parameters",
 17 |     "Average cost",
 18 |     "Average cost (with penalty)",
 19 |     "Frequency of constraint violation (x1000)",
 20 | ])
 21 | 
 22 | def read_csv(wildcard):
 23 |     filename = sorted(glob(f"test_results/{wildcard}"))[-1]
 24 |     return pd.read_csv(filename, dtype={"constraint_violated": "bool"})
 25 | 
 26 | def get_stat(df):
 27 |     max_episode_length = df['episode_length'].max()
 28 |     penalty = 100000
 29 |     avg_cost = df['cumulative_cost'].sum() / df['episode_length'].sum()
 30 |     avg_cost_penalized = (df['cumulative_cost'].sum() + penalty * df["constraint_violated"].sum()) / df['episode_length'].sum()
 31 |     freq_violation = df["constraint_violated"].sum() / df['episode_length'].sum()
 32 |     return avg_cost, avg_cost_penalized, freq_violation * 1000
 33 | 
 34 | def count_parameters(exp_name):
 35 |     checkpoint_path = f"runs/tank_{exp_name}/nn/tank.pth"
 36 |     checkpoint = torch.load(checkpoint_path)
 37 |     total_params = 0
 38 |     for key, value in checkpoint['model'].items():
 39 |         if key.startswith("a2c_network.policy_net") or key.startswith("a2c_network.actor_mlp"):
 40 |             total_params += value.numel()
 41 |     return total_params
 42 | 
 43 | if __name__ == "__main__":
 44 |     args = parser.parse_args()
 45 |     if not args.run_name:
 46 |         # Iterate over all configurations
 47 |         for noise_level in [0, 0.1, 0.2, 0.5]:
 48 |             for rand in [False, True]:
 49 |                 try:
 50 |                     wildcard = f"mlp_noise{noise_level}{'_rand' if rand else ''}_2*"
 51 |                     mlp_df = read_csv(wildcard)
 52 |                     df.loc[len(df)] = [
 53 |                         noise_level,
 54 |                         rand,
 55 |                         "MLP",
 56 |                         "-",
 57 |                         "-",
 58 |                         "-",
 59 |                         count_parameters(f"mlp_noise{noise_level}"),
 60 |                         *get_stat(mlp_df),
 61 |                     ]
 62 |                 except:
 63 |                     print(f"Error reading file: {wildcard}")
 64 | 
 65 |                 for n in [2, 4, 8, 16]:
 66 |                     for m in [2, 4, 8, 16, 32, 64]:
 67 |                         try:
 68 |                             wildcard = f"N0_n{n}_m{m}_noise{noise_level}{'_rand' if rand else ''}_2*"
 69 |                             qp_df = read_csv(wildcard)
 70 |                             df.loc[len(df)] = [
 71 |                                 noise_level,
 72 |                                 rand,
 73 |                                 "QP",
 74 |                                 "-",
 75 |                                 n,
 76 |                                 m,
 77 |                                 count_parameters(f"shared_affine_noise{noise_level}_n{n}_m{m}"),
 78 |                                 *get_stat(qp_df),
 79 |                             ]
 80 |                         except:
 81 |                             print(f"Error reading file: {wildcard}")
 82 | 
 83 |                 for N in [1, 2, 4, 8, 16]:
 84 |                     try:
 85 |                         wildcard = f"N{N}_noise{noise_level}{'_rand' if rand else ''}_2*"
 86 |                         mpc_df = read_csv(wildcard)
 87 |                         df.loc[len(df)] = [
 88 |                             noise_level,
 89 |                             rand,
 90 |                             "MPC",
 91 |                             N,
 92 |                             2 * N,
 93 |                             12 * N,
 94 |                             0,
 95 |                             *get_stat(mpc_df),
 96 |                         ]
 97 |                     except:
 98 |                         print(f"Error reading file: {wildcard}")
 99 |         df.to_csv("benchmark_stat.csv", index=False)
100 |     else:
101 |         # Stat for particular run
102 |         run_name = args.run_name
103 |         wildcard = f"{run_name}_2*"
104 |         raw_df = read_csv(wildcard)
105 |         avg_cost, avg_cost_penalized, freq_violation = get_stat(raw_df)
106 |         df.loc[len(df)] = [
107 |             "-",
108 |             "-",
109 |             "-",
110 |             "-",
111 |             "-",
112 |             "-",
113 |             "-",
114 |             avg_cost,
115 |             avg_cost_penalized,
116 |             freq_violation,
117 |         ]
118 |         df.to_csv(f"benchmark_stat_{run_name}.csv", index=False)
119 | 


--------------------------------------------------------------------------------
/src/utils/torch_utils.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch.nn import functional as F
  3 | from contextlib import nullcontext, contextmanager
  4 | import numpy as np
  5 | 
  6 | 
  7 | def bmv(A, b):
  8 |     """Compute matrix multiply vector in batch mode."""
  9 |     bs = b.shape[0]
 10 |     if A.shape[0] == 1:
 11 |         # The same A for different b's; use matrix multiplication instead of broadcasting
 12 |         return (A.squeeze(0) @ b.t()).t()
 13 |     else:
 14 |         return (A @ b.unsqueeze(-1)).squeeze(-1)
 15 | 
 16 | def bma(A, B):
 17 |     """Batch-matrix-times-any, where any can be matrix or vector."""
 18 |     return (A @ B) if A.dim() == B.dim() else bmv(A, B)
 19 | 
 20 | def bvv(x, y):
 21 |     """Compute vector dot product in batch mode."""
 22 |     return bmv(x.unsqueeze(-2), y)
 23 | 
 24 | def bqf(x, A):
 25 |     """Compute quadratic form x' * A * x in batch mode."""
 26 |     return torch.einsum('bi,bij,bj->b', x, A, x)
 27 | 
 28 | def bsolve(A, B):
 29 |     """Compute solve(A, B) in batch mode, where the first dimension of A can be singleton."""
 30 |     if A.dim() == 3 and B.dim() == 2 and A.shape[0] == 1:
 31 |         return torch.linalg.solve(A.squeeze(0), B.t()).t()
 32 |     else:
 33 |         return torch.linalg.solve(A, B)
 34 | 
 35 | def make_psd(x, min_eig=0.1):
 36 |     """Assume x is (bs, N*(N+1)/2), create (bs, N, N) batch of PSD matrices using Cholesky."""
 37 |     bs, n_elem = x.shape
 38 |     N = (int(np.sqrt(1 + 8 * n_elem)) - 1) // 2
 39 |     cholesky_diag_index = torch.arange(N, dtype=torch.long) + 1
 40 |     cholesky_diag_index = (cholesky_diag_index * (cholesky_diag_index + 1)) // 2 - 1 # computes the indices of the future diagonal elements of the matrix
 41 |     elem = x.clone()
 42 |     elem[:, cholesky_diag_index] = np.sqrt(min_eig) + F.softplus(elem[:, cholesky_diag_index])
 43 |     tril_indices = torch.tril_indices(row=N, col=N, offset=0) # Collection that contains the indices of the non-zero elements of a lower triangular matrix
 44 |     cholesky = torch.zeros(size=(bs, N, N), dtype=torch.float, device=elem.device) #initialize a square matrix to zeros
 45 |     cholesky[:, tril_indices[0], tril_indices[1]] = elem # Assigns the elements of the vector to their correct position in the lower triangular matrix
 46 |     return cholesky @ cholesky.transpose(1, 2)
 47 | 
 48 | def vectorize_upper_triangular(matrices):
 49 |     # Get the shape of the matrices
 50 |     b, n, _ = matrices.shape
 51 | 
 52 |     # Create the indices for the upper triangular part
 53 |     row_indices, col_indices = torch.triu_indices(n, n, device=matrices.device)
 54 | 
 55 |     # Create a mask of shape (b, n, n)
 56 |     mask = torch.zeros((b, n, n), device=matrices.device, dtype=torch.bool)
 57 |     
 58 |     # Set the upper triangular part of the mask to True
 59 |     mask[:, row_indices, col_indices] = True
 60 | 
 61 |     # Use the mask to extract the upper triangular part
 62 |     upper_triangular = matrices[mask]
 63 | 
 64 |     # Reshape the result to the desired shape
 65 |     upper_triangular = upper_triangular.view(b, -1)
 66 | 
 67 |     return upper_triangular
 68 | 
 69 | 
 70 | def kron(a, b):
 71 |     """
 72 |     Kronecker product of matrices a and b with leading batch dimensions.
 73 |     Batch dimensions are broadcast. The number of them mush
 74 |     :type a: torch.Tensor
 75 |     :type b: torch.Tensor
 76 |     :rtype: torch.Tensor
 77 |     """
 78 |     siz1 = torch.Size(torch.tensor(a.shape[-2:]) * torch.tensor(b.shape[-2:]))
 79 |     res = a.unsqueeze(-1).unsqueeze(-3) * b.unsqueeze(-2).unsqueeze(-4)
 80 |     siz0 = res.shape[:-4]
 81 |     return res.reshape(siz0 + siz1)
 82 | 
 83 | 
 84 | def interpolate_state_dicts(state_dict_1, state_dict_2, weight):
 85 |     return {
 86 |         key: (1 - weight) * state_dict_1[key] + weight * state_dict_2[key] for key in state_dict_1.keys()
 87 |     }
 88 | 
 89 | 
 90 | @contextmanager
 91 | def conditional_fork_rng(seed=None, condition=True):
 92 |     """
 93 |     Context manager for conditionally applying PyTorch's fork_rng.
 94 | 
 95 |     Parameters:
 96 |     - seed (int, optional): The seed value for the random number generator.
 97 |     - condition (bool): Determines whether to apply fork_rng or not.
 98 | 
 99 |     Yields:
100 |     - None: Yields control back to the caller within the context.
101 |     """
102 |     if condition:
103 |         with torch.random.fork_rng():
104 |             if seed is not None:
105 |                 torch.manual_seed(seed)
106 |             yield
107 |     else:
108 |         with nullcontext():
109 |             yield
110 | 
111 | def get_rng(device, seed=None):
112 |     """
113 |     Get a random number generator.
114 | 
115 |     Parameters:
116 |     - device (torch.device): The device to use for the random number generator.
117 |     - seed (int, optional): The seed value for the random number generator.
118 | 
119 |     Returns:
120 |     - torch.Generator: A random number generator.
121 |     """
122 |     return torch.Generator(device=device).manual_seed(seed) if seed is not None else torch.Generator(device=device)
123 | 


--------------------------------------------------------------------------------
/experiments/tank/reproduce_table.py:
--------------------------------------------------------------------------------
  1 | # %%
  2 | from glob import glob
  3 | import pandas as pd
  4 | import numpy as np
  5 | import torch
  6 | 
  7 | def read_csv(short_name):
  8 |     wildcard = f"{short_name}_2*"
  9 |     filename = sorted(glob(f"test_results/{wildcard}"))[-1]
 10 |     return pd.read_csv(filename, dtype={"constraint_violated": "bool"})
 11 | 
 12 | def read_mpc_iter_count(short_name):
 13 |     wildcard = f"{short_name}_mpc_iter_count_2*"
 14 |     filename = sorted(glob(f"test_results/{wildcard}"))[-1]
 15 |     return np.genfromtxt(filename)
 16 | 
 17 | 
 18 | def affine_layer_flops(input_size, output_size, has_bias, has_relu):
 19 |     flops = 2 * input_size * output_size
 20 |     if not has_bias:
 21 |         flops -= output_size
 22 |     if has_relu:
 23 |         flops += output_size
 24 |     return flops
 25 | 
 26 | def qp_flops(n_sys, n_qp, m_qp, qp_iter):
 27 |     get_q_flops = affine_layer_flops(2 * n_sys, n_qp, False, False)
 28 |     get_b_flops = affine_layer_flops(n_sys, m_qp, True, False)
 29 |     get_mu_flops = affine_layer_flops(n_sys, m_qp, False, False) + affine_layer_flops(m_qp, m_qp, False, False) + m_qp
 30 |     iter_flops = m_qp   # Adding primal-dual variables
 31 |     iter_flops += 2 * m_qp * (m_qp - 1)   # Matrix-vector multiplication
 32 |     iter_flops += 5 * m_qp   # Vector additions
 33 |     return get_q_flops + get_b_flops + get_mu_flops + qp_iter * iter_flops
 34 | 
 35 | def mpc_flops(n_sys, m_sys, N, iter_count_arr):
 36 |     n_qp = m_sys * N
 37 |     m_qp = 2 * (m_sys + n_sys) * N
 38 |     min_iter = np.min(iter_count_arr)
 39 |     max_iter = np.max(iter_count_arr)
 40 |     median_iter = np.median(iter_count_arr)
 41 |     min_flops = qp_flops(n_sys, n_qp, m_qp, min_iter)
 42 |     max_flops = qp_flops(n_sys, n_qp, m_qp, max_iter)
 43 |     median_flops = qp_flops(n_sys, n_qp, m_qp, median_iter)
 44 |     return min_flops, max_flops, median_flops
 45 | 
 46 | def mlp_flops(input_size, output_size, hidden_sizes):
 47 |     flops = 0
 48 |     prev_size = input_size
 49 |     for size in hidden_sizes:
 50 |         flops += affine_layer_flops(prev_size, size, True, True)
 51 |         prev_size = size
 52 |     flops += affine_layer_flops(prev_size, output_size, True, False)
 53 |     return flops
 54 | 
 55 | def count_parameters(exp_name):
 56 |     checkpoint_path = f"runs/tank_{exp_name}/nn/tank.pth"
 57 |     checkpoint = torch.load(checkpoint_path)
 58 |     total_params = 0
 59 |     for key, value in checkpoint['model'].items():
 60 |         if key.startswith("a2c_network.policy_net") or key.startswith("a2c_network.actor_mlp"):
 61 |             total_params += value.numel()
 62 |     return total_params
 63 | 
 64 | def get_row(short_name, method, n_sys=4, m_sys=2, n_qp=None, m_qp=None, qp_iter=10, N_mpc=None, mlp_last_size=None):
 65 |     """Output (short name, success rate, cost, penalized costs, FLOPs, learnable parameters)."""
 66 |     result_df = read_csv(short_name)
 67 |     total_episodes = len(result_df)
 68 |     penalty = 100000
 69 |     avg_cost = result_df['cumulative_cost'].sum() / result_df['episode_length'].sum()
 70 |     avg_cost_penalized = (result_df['cumulative_cost'].sum() + penalty * result_df["constraint_violated"].sum()) / result_df['episode_length'].sum()
 71 |     freq_violation = result_df["constraint_violated"].sum() / result_df['episode_length'].sum()
 72 |     success_rate = 1. - result_df["constraint_violated"].sum() / total_episodes
 73 | 
 74 |     # Count FLOPs
 75 |     if method == "qp":
 76 |         flops = qp_flops(n_sys, n_qp, m_qp, qp_iter)
 77 |     elif method == "mpc":
 78 |         iter_count_arr = read_mpc_iter_count(short_name)
 79 |         flops = mpc_flops(n_sys, m_sys, N_mpc, iter_count_arr)
 80 |     elif method == "mlp":
 81 |         flops = mlp_flops(2 * n_sys, m_sys, [i * mlp_last_size for i in [4, 2, 1]])
 82 | 
 83 |     # Count learnable parameters
 84 |     if method == "mpc":
 85 |         num_param = 0
 86 |     else:
 87 |         num_param = count_parameters(short_name)
 88 | 
 89 |     return short_name, success_rate, avg_cost, avg_cost_penalized, flops, num_param
 90 | 
 91 | # %%
 92 | rows = [
 93 |     get_row("reproduce_mpc_2_0", "mpc", N_mpc=2),
 94 |     get_row("reproduce_mpc_2_1", "mpc", N_mpc=2),
 95 |     get_row("reproduce_mpc_2_10", "mpc", N_mpc=2),
 96 |     get_row("reproduce_mpc_2_100", "mpc", N_mpc=2),
 97 |     get_row("reproduce_mpc_4_0", "mpc", N_mpc=4),
 98 |     get_row("reproduce_mpc_4_1", "mpc", N_mpc=4),
 99 |     get_row("reproduce_mpc_4_10", "mpc", N_mpc=4),
100 |     get_row("reproduce_mpc_4_100", "mpc", N_mpc=4),
101 |     get_row("reproduce_mpc_8_0", "mpc", N_mpc=8),
102 |     get_row("reproduce_mpc_8_1", "mpc", N_mpc=8),
103 |     get_row("reproduce_mpc_8_10", "mpc", N_mpc=8),
104 |     get_row("reproduce_mpc_8_100", "mpc", N_mpc=8),
105 |     get_row("reproduce_mpc_16_0", "mpc", N_mpc=16),
106 |     get_row("reproduce_mpc_16_1", "mpc", N_mpc=16),
107 |     get_row("reproduce_mpc_16_10", "mpc", N_mpc=16),
108 |     get_row("reproduce_mpc_16_100", "mpc", N_mpc=16),
109 |     get_row("reproduce_mlp_8", "mlp", mlp_last_size=8),
110 |     get_row("reproduce_mlp_16", "mlp", mlp_last_size=16),
111 |     get_row("reproduce_mlp_32", "mlp", mlp_last_size=32),
112 |     get_row("reproduce_mlp_64", "mlp", mlp_last_size=32),
113 |     get_row("reproduce_qp_4_24", "qp", n_qp=4, m_qp=24),
114 |     get_row("reproduce_qp_8_48", "qp", n_qp=8, m_qp=48),
115 |     get_row("reproduce_qp_16_96", "qp", n_qp=16, m_qp=96),
116 | ]
117 | 
118 | df_result = pd.DataFrame(rows, columns=["name", "success_rate", "avg_cost", "avg_cost_penalized", "flops", "num_param"])
119 | df_result.to_csv("test_results/reproduce_table.csv", index=False)
120 | print(df_result)
121 | 


--------------------------------------------------------------------------------
/experiments/cartpole/reproduce_table.py:
--------------------------------------------------------------------------------
  1 | # %%
  2 | from glob import glob
  3 | import pandas as pd
  4 | import numpy as np
  5 | import torch
  6 | 
  7 | def read_csv(short_name):
  8 |     wildcard = f"{short_name}_2*"
  9 |     filename = sorted(glob(f"test_results/{wildcard}"))[-1]
 10 |     return pd.read_csv(filename, dtype={"constraint_violated": "bool"})
 11 | 
 12 | def read_mpc_iter_count(short_name):
 13 |     wildcard = f"{short_name}_mpc_iter_count_2*"
 14 |     filename = sorted(glob(f"test_results/{wildcard}"))[-1]
 15 |     return np.genfromtxt(filename)
 16 | 
 17 | 
 18 | def affine_layer_flops(input_size, output_size, has_bias, has_relu):
 19 |     flops = 2 * input_size * output_size
 20 |     if not has_bias:
 21 |         flops -= output_size
 22 |     if has_relu:
 23 |         flops += output_size
 24 |     return flops
 25 | 
 26 | def qp_flops(n_sys, n_qp, m_qp, qp_iter):
 27 |     get_q_flops = affine_layer_flops(2 * n_sys, n_qp, False, False)
 28 |     get_b_flops = affine_layer_flops(n_sys, m_qp, True, False)
 29 |     get_mu_flops = affine_layer_flops(n_sys, m_qp, False, False) + affine_layer_flops(m_qp, m_qp, False, False) + m_qp
 30 |     iter_flops = m_qp   # Adding primal-dual variables
 31 |     iter_flops += 2 * m_qp * (m_qp - 1)   # Matrix-vector multiplication
 32 |     iter_flops += 5 * m_qp   # Vector additions
 33 |     return get_q_flops + get_b_flops + get_mu_flops + qp_iter * iter_flops
 34 | 
 35 | def mpc_flops(n_sys, m_sys, N, iter_count_arr):
 36 |     n_qp = m_sys * N
 37 |     m_qp = 2 * (m_sys + n_sys) * N
 38 |     min_iter = np.min(iter_count_arr)
 39 |     max_iter = np.max(iter_count_arr)
 40 |     median_iter = np.median(iter_count_arr)
 41 |     min_flops = qp_flops(n_sys, n_qp, m_qp, min_iter)
 42 |     max_flops = qp_flops(n_sys, n_qp, m_qp, max_iter)
 43 |     median_flops = qp_flops(n_sys, n_qp, m_qp, median_iter)
 44 |     return min_flops, max_flops, median_flops
 45 | 
 46 | def mlp_flops(input_size, output_size, hidden_sizes):
 47 |     flops = 0
 48 |     prev_size = input_size
 49 |     for size in hidden_sizes:
 50 |         flops += affine_layer_flops(prev_size, size, True, True)
 51 |         prev_size = size
 52 |     flops += affine_layer_flops(prev_size, output_size, True, False)
 53 |     return flops
 54 | 
 55 | def count_parameters(exp_name):
 56 |     checkpoint_path = f"runs/cartpole_{exp_name}/nn/cartpole.pth"
 57 |     checkpoint = torch.load(checkpoint_path)
 58 |     total_params = 0
 59 |     for key, value in checkpoint['model'].items():
 60 |         if key.startswith("a2c_network.policy_net") or key.startswith("a2c_network.actor_mlp"):
 61 |             total_params += value.numel()
 62 |     return total_params
 63 | 
 64 | def get_row(short_name, method, n_sys=4, m_sys=1, n_qp=None, m_qp=None, qp_iter=10, N_mpc=None, mlp_last_size=None):
 65 |     """Output (short name, success rate, cost, penalized costs, FLOPs, learnable parameters)."""
 66 |     result_df = read_csv(short_name)
 67 |     total_episodes = len(result_df)
 68 |     penalty = 1000
 69 |     avg_cost = result_df['cumulative_cost'].sum() / result_df['episode_length'].sum()
 70 |     avg_cost_penalized = (result_df['cumulative_cost'].sum() + penalty * result_df["constraint_violated"].sum()) / result_df['episode_length'].sum()
 71 |     freq_violation = result_df["constraint_violated"].sum() / result_df['episode_length'].sum()
 72 |     success_rate = 1. - result_df["constraint_violated"].sum() / total_episodes
 73 | 
 74 |     # Count FLOPs
 75 |     if method == "qp":
 76 |         flops = qp_flops(n_sys, n_qp, m_qp, qp_iter)
 77 |     elif method == "mpc":
 78 |         iter_count_arr = read_mpc_iter_count(short_name)
 79 |         flops = mpc_flops(n_sys, m_sys, N_mpc, iter_count_arr)
 80 |     elif method == "mlp":
 81 |         flops = mlp_flops(2 * n_sys, m_sys, [i * mlp_last_size for i in [4, 2, 1]])
 82 | 
 83 |     # Count learnable parameters
 84 |     if method == "mpc":
 85 |         num_param = 0
 86 |     else:
 87 |         num_param = count_parameters(short_name)
 88 | 
 89 |     return short_name, success_rate, avg_cost, avg_cost_penalized, flops, num_param
 90 | 
 91 | # %%
 92 | rows = [
 93 |     get_row("reproduce_mpc_2_0", "mpc", N_mpc=2),
 94 |     get_row("reproduce_mpc_2_1", "mpc", N_mpc=2),
 95 |     get_row("reproduce_mpc_2_10", "mpc", N_mpc=2),
 96 |     get_row("reproduce_mpc_2_100", "mpc", N_mpc=2),
 97 |     get_row("reproduce_mpc_4_0", "mpc", N_mpc=4),
 98 |     get_row("reproduce_mpc_4_1", "mpc", N_mpc=4),
 99 |     get_row("reproduce_mpc_4_10", "mpc", N_mpc=4),
100 |     get_row("reproduce_mpc_4_100", "mpc", N_mpc=4),
101 |     get_row("reproduce_mpc_8_0", "mpc", N_mpc=8),
102 |     get_row("reproduce_mpc_8_1", "mpc", N_mpc=8),
103 |     get_row("reproduce_mpc_8_10", "mpc", N_mpc=8),
104 |     get_row("reproduce_mpc_8_100", "mpc", N_mpc=8),
105 |     get_row("reproduce_mpc_16_0", "mpc", N_mpc=16),
106 |     get_row("reproduce_mpc_16_1", "mpc", N_mpc=16),
107 |     get_row("reproduce_mpc_16_10", "mpc", N_mpc=16),
108 |     get_row("reproduce_mpc_16_100", "mpc", N_mpc=16),
109 |     get_row("reproduce_mlp_8", "mlp", mlp_last_size=8),
110 |     get_row("reproduce_mlp_16", "mlp", mlp_last_size=16),
111 |     get_row("reproduce_mlp_32", "mlp", mlp_last_size=32),
112 |     get_row("reproduce_mlp_64", "mlp", mlp_last_size=64),
113 |     get_row("reproduce_qp_4_24", "qp", n_qp=4, m_qp=24),
114 |     get_row("reproduce_qp_8_48", "qp", n_qp=8, m_qp=48),
115 |     get_row("reproduce_qp_16_96", "qp", n_qp=16, m_qp=96),
116 | ]
117 | 
118 | df_result = pd.DataFrame(rows, columns=["name", "success_rate", "avg_cost", "avg_cost_penalized", "flops", "num_param"])
119 | df_result.to_csv("test_results/reproduce_table.csv", index=False)
120 | print(df_result)
121 | 


--------------------------------------------------------------------------------
/experiments/tank/reproduce.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # 0. Background utils and GPU scheduler
  4 | 
  5 | # Define the number of GPUs available
  6 | NUM_GPUS=$(nvidia-smi --list-gpus | wc -l)
  7 | 
  8 | # Function to find the first idle GPU
  9 | find_idle_gpu() {
 10 |     for (( i=0; i<$NUM_GPUS; i++ )); do
 11 |         # Check if GPU volatile utilization is 0%
 12 |         if [ "$(nvidia-smi -i $i --query-gpu=utilization.gpu --format=csv,noheader,nounits)" -eq 0 ]; then
 13 |             echo $i
 14 |             return
 15 |         fi
 16 |     done
 17 |     echo "-1"  # Return -1 if no idle GPU is found
 18 | }
 19 | 
 20 | find_gpu_and_run_task() {
 21 |     local run_task_function="$1"
 22 |     shift  # Remove the first argument (run_task_function name)
 23 | 
 24 |     # Initialize GPU ID as -1 indicating no GPU is available initially
 25 |     local gpu_id=-1
 26 | 
 27 |     # Wait for an idle GPU to become available
 28 |     while [ "$gpu_id" -eq -1 ]; do
 29 |         gpu_id=$(find_idle_gpu)
 30 |         sleep 1  # Wait a bit before checking again
 31 |     done
 32 | 
 33 |     # Call the run_task function with the GPU ID and additional arguments, and send it to the background
 34 |     $run_task_function $gpu_id $@ > /dev/null &
 35 | 
 36 |     # Capture the PID of the last background process
 37 |     local task_pid=$!
 38 | 
 39 |     # Optional: wait briefly to allow the task to start
 40 |     sleep 10
 41 | 
 42 |     # Output the PID
 43 |     echo $task_pid
 44 | }
 45 | 
 46 | 
 47 | # 1. Training
 48 | # 1.1 MLP of different sizes
 49 | 
 50 | train_mlp() {
 51 |     local gpu_id=$1
 52 |     local c1=$2
 53 |     local c2=$3
 54 |     local c3=$4
 55 |     local mlp_last_size=$5
 56 |     CUDA_VISIBLE_DEVICES=$gpu_id python ../../run.py train tank --num-parallel 100000 --horizon 20 --epochs 5000 --mini-epochs 1 --noise-level 0. --reward-shaping ${c1},${c2},${c3} --no-obs-normalization --mlp-size-last $mlp_last_size --batch-test --exp-name reproduce_mlp_${mlp_last_size} --lr-schedule linear --initial-lr "5e-4" --quiet
 57 | }
 58 | 
 59 | # 1.2 QP of different sizes
 60 | 
 61 | train_qp() {
 62 |     local gpu_id=$1
 63 |     local c1=$2
 64 |     local c2=$3
 65 |     local c3=$4
 66 |     local n_qp=$5
 67 |     local m_qp=$6
 68 |     CUDA_VISIBLE_DEVICES=$gpu_id python ../../run.py train tank --num-parallel 100000 --horizon 20 --epochs 5000 --mini-epochs 1 --noise-level 0. --reward-shaping ${c1},${c2},${c3} --n-qp $n_qp --m-qp $m_qp --qp-unrolled --shared-PH --affine-qb --strict-affine-layer --obs-has-half-ref --use-residual-loss --no-obs-normalization --force-feasible --batch-test --exp-name reproduce_qp_${n_qp}_${m_qp} --lr-schedule linear --initial-lr "5e-4" --quiet
 69 | }
 70 | 
 71 | 
 72 | # 2. Testing
 73 | # 2.1 MPC under different configurations
 74 | 
 75 | test_mpc() {
 76 |     local gpu_id=$1
 77 |     local N=$2
 78 |     local terminal_coef=$3
 79 |     local n_qp=4
 80 |     local m_qp=24
 81 |     CUDA_VISIBLE_DEVICES=$gpu_id python ../../run.py test tank --num-parallel 10000 --horizon 20 --epochs 5000 --mini-epochs 1 --noise-level 0. --reward-shaping 50,0.05,2 --n-qp $n_qp --m-qp $m_qp --qp-unrolled --shared-PH --affine-qb --strict-affine-layer --obs-has-half-ref --use-residual-loss --no-obs-normalization --force-feasible --batch-test --mpc-baseline-N $N --mpc-terminal-cost-coef $terminal_coef --use-osqp-for-mpc --exp-name reproduce_qp_${n_qp}_${m_qp} --run-name reproduce_mpc_${N}_${terminal_coef} --lr-schedule linear --initial-lr "5e-4" --quiet
 82 | }
 83 | 
 84 | test_mpc_bg() {
 85 |     test_mpc $@ > /dev/null &
 86 | }
 87 | 
 88 | test_mpc_all() {
 89 |     test_mpc_bg 0 2 0
 90 |     test_mpc_bg 0 2 1
 91 |     test_mpc_bg 0 2 10
 92 |     test_mpc_bg 0 2 100
 93 |     test_mpc_bg 0 4 0
 94 |     test_mpc_bg 0 4 1
 95 |     test_mpc_bg 0 4 10
 96 |     test_mpc_bg 0 4 100
 97 |     test_mpc_bg 1 8 0
 98 |     test_mpc_bg 1 8 1
 99 |     test_mpc_bg 1 8 10
100 |     test_mpc_bg 1 8 100
101 |     test_mpc_bg 1 16 0
102 |     test_mpc_bg 1 16 1
103 |     test_mpc_bg 1 16 10
104 |     test_mpc_bg 1 16 100
105 |     wait
106 | }
107 | 
108 | # 2.2 MLP
109 | 
110 | test_mlp() {
111 |     local gpu_id=$1
112 |     local c1=$2
113 |     local c2=$3
114 |     local c3=$4
115 |     local mlp_last_size=$5
116 |     CUDA_VISIBLE_DEVICES=$gpu_id python ../../run.py test tank --num-parallel 10000 --horizon 20 --epochs 5000 --mini-epochs 1 --noise-level 0. --reward-shaping ${c1},${c2},${c3} --no-obs-normalization --mlp-size-last $mlp_last_size --batch-test --exp-name reproduce_mlp_${mlp_last_size} --lr-schedule linear --initial-lr "5e-4" --quiet
117 | }
118 | 
119 | # 2.3 QP
120 | 
121 | test_qp() {
122 |     local gpu_id=$1
123 |     local c1=$2
124 |     local c2=$3
125 |     local c3=$4
126 |     local n_qp=$5
127 |     local m_qp=$6
128 |     CUDA_VISIBLE_DEVICES=$gpu_id python ../../run.py test tank --num-parallel 10000 --horizon 20 --epochs 5000 --mini-epochs 1 --noise-level 0. --reward-shaping ${c1},${c2},${c3} --n-qp $n_qp --m-qp $m_qp --qp-unrolled --shared-PH --affine-qb --strict-affine-layer --obs-has-half-ref --use-residual-loss --no-obs-normalization --force-feasible --batch-test --exp-name reproduce_qp_${n_qp}_${m_qp} --lr-schedule linear --initial-lr "5e-4" --quiet
129 | }
130 | 
131 | # Utility function for train and test
132 | train_and_test() {
133 |     local train_function="$1"
134 |     shift
135 |     local test_function="$1"
136 |     shift
137 | 
138 |     train_pid=$(find_gpu_and_run_task $train_function $@)
139 |     while [ -e /proc/$train_pid ]; do
140 |         sleep 1
141 |     done
142 |     test_pid=$(find_gpu_and_run_task $test_function $@)
143 |     while [ -e /proc/$test_pid ]; do
144 |         sleep 1
145 |     done
146 | }
147 | 
148 | run_and_delay() {
149 |     local run_function="$1"
150 |     shift
151 | 
152 |     $run_function $@ &
153 |     local run_pid=$!
154 |     sleep 10
155 |     echo $run_pid
156 | }
157 | 
158 | # Finally run all the tasks
159 | 
160 | run_and_delay test_mpc_all
161 | run_and_delay train_and_test train_mlp test_mlp 50 0.05 2 8
162 | run_and_delay train_and_test train_mlp test_mlp 50 0.05 2 16
163 | run_and_delay train_and_test train_mlp test_mlp 50 0.05 2 32
164 | run_and_delay train_and_test train_mlp test_mlp 50 0.05 2 64
165 | run_and_delay train_and_test train_qp test_qp 50 0.05 2 4 24
166 | run_and_delay train_and_test train_qp test_qp 50 0.05 2 8 48
167 | run_and_delay train_and_test train_qp test_qp 50 0.05 2 16 96
168 | 
169 | wait
170 | 
171 | python reproduce_table.py
172 | 


--------------------------------------------------------------------------------
/experiments/tank/reproduce_table_disturbed.py:
--------------------------------------------------------------------------------
  1 | # %%
  2 | from glob import glob
  3 | import pandas as pd
  4 | import numpy as np
  5 | import torch
  6 | 
  7 | def read_csv(short_name):
  8 |     wildcard = f"{short_name}_2*"
  9 |     filename = sorted(glob(f"test_results/{wildcard}"))[-1]
 10 |     return pd.read_csv(filename, dtype={"constraint_violated": "bool"})
 11 | 
 12 | def read_mpc_iter_count(short_name):
 13 |     wildcard = f"{short_name}_mpc_iter_count_2*"
 14 |     filename = sorted(glob(f"test_results/{wildcard}"))[-1]
 15 |     return np.genfromtxt(filename)
 16 | 
 17 | def read_running_time(short_name):
 18 |     wildcard = f"{short_name}_running_time_2*"
 19 |     filename = sorted(glob(f"test_results/{wildcard}"))[-1]
 20 |     return np.genfromtxt(filename)
 21 | 
 22 | 
 23 | def affine_layer_flops(input_size, output_size, has_bias, has_relu):
 24 |     flops = 2 * input_size * output_size
 25 |     if not has_bias:
 26 |         flops -= output_size
 27 |     if has_relu:
 28 |         flops += output_size
 29 |     return flops
 30 | 
 31 | def qp_flops(n_sys, n_qp, m_qp, qp_iter):
 32 |     get_q_flops = affine_layer_flops(2 * n_sys, n_qp, False, False)
 33 |     get_b_flops = affine_layer_flops(n_sys, m_qp, True, False)
 34 |     get_mu_flops = affine_layer_flops(n_sys, m_qp, False, False) + affine_layer_flops(m_qp, m_qp, False, False) + m_qp
 35 |     iter_flops = m_qp   # Adding primal-dual variables
 36 |     iter_flops += 2 * m_qp * (m_qp - 1)   # Matrix-vector multiplication
 37 |     iter_flops += 5 * m_qp   # Vector additions
 38 |     return get_q_flops + get_b_flops + get_mu_flops + qp_iter * iter_flops
 39 | 
 40 | def mpc_flops(n_sys, m_sys, N, iter_count_arr):
 41 |     n_qp = m_sys * N
 42 |     m_qp = 2 * (m_sys + n_sys) * N
 43 |     min_iter = np.min(iter_count_arr)
 44 |     max_iter = np.max(iter_count_arr)
 45 |     median_iter = np.median(iter_count_arr)
 46 |     min_flops = qp_flops(n_sys, n_qp, m_qp, min_iter)
 47 |     max_flops = qp_flops(n_sys, n_qp, m_qp, max_iter)
 48 |     median_flops = qp_flops(n_sys, n_qp, m_qp, median_iter)
 49 |     return min_flops, max_flops, median_flops
 50 | 
 51 | def mlp_flops(input_size, output_size, hidden_sizes):
 52 |     flops = 0
 53 |     prev_size = input_size
 54 |     for size in hidden_sizes:
 55 |         flops += affine_layer_flops(prev_size, size, True, True)
 56 |         prev_size = size
 57 |     flops += affine_layer_flops(prev_size, output_size, True, False)
 58 |     return flops
 59 | 
 60 | def count_parameters(exp_name):
 61 |     checkpoint_path = f"runs/tank_{exp_name}/nn/tank.pth"
 62 |     checkpoint = torch.load(checkpoint_path)
 63 |     total_params = 0
 64 |     for key, value in checkpoint['model'].items():
 65 |         if key.startswith("a2c_network.policy_net") or key.startswith("a2c_network.actor_mlp"):
 66 |             total_params += value.numel()
 67 |     return total_params
 68 | 
 69 | def get_row(short_name, method, n_sys=4, m_sys=2, n_qp=None, m_qp=None, qp_iter=10, N_mpc=None, mlp_last_size=None):
 70 |     """Output (short name, success rate, cost, penalized costs, FLOPs, learnable parameters)."""
 71 |     result_df = read_csv(short_name)
 72 |     total_episodes = len(result_df)
 73 |     penalty = 100000
 74 |     avg_cost = result_df['cumulative_cost'].sum() / result_df['episode_length'].sum()
 75 |     avg_cost_penalized = (result_df['cumulative_cost'].sum() + penalty * result_df["constraint_violated"].sum()) / result_df['episode_length'].sum()
 76 |     freq_violation = result_df["constraint_violated"].sum() / result_df['episode_length'].sum()
 77 |     success_rate = 1. - result_df["constraint_violated"].sum() / total_episodes
 78 | 
 79 |     # Count FLOPs / running time
 80 |     baseline_flops = qp_flops(4, 32, 192, 10)
 81 |     baseline_time = 0.01
 82 |     if method == "qp":
 83 |         flops = qp_flops(n_sys, n_qp, m_qp, qp_iter)
 84 |         running_time = baseline_time * flops / baseline_flops
 85 |     elif method == "mpc":
 86 |         iter_count_arr = read_mpc_iter_count(short_name)
 87 |         flops = mpc_flops(n_sys, m_sys, N_mpc, iter_count_arr)
 88 |         running_time = tuple(baseline_time * item / baseline_flops for item in flops)
 89 |     elif method == "mlp":
 90 |         flops = mlp_flops(2 * n_sys, m_sys, [i * mlp_last_size for i in [4, 2, 1]])
 91 |         running_time = baseline_time * flops / baseline_flops
 92 |     elif method == "robust_mpc":
 93 |         flops = 0
 94 |         running_time_arr = read_running_time(short_name)
 95 |         min_running_time = np.min(running_time_arr)
 96 |         max_running_time = np.max(running_time_arr)
 97 |         median_running_time = np.median(running_time_arr)
 98 |         running_time = (min_running_time, max_running_time, median_running_time)
 99 | 
100 | 
101 |     # Count learnable parameters
102 |     if method == "mpc" or method == "robust_mpc":
103 |         num_param = 0
104 |     else:
105 |         num_param = count_parameters(short_name)
106 | 
107 |     return short_name, success_rate, avg_cost, avg_cost_penalized, flops, running_time, num_param
108 | 
109 | # %%
110 | rows = [
111 |     get_row("reproduce_disturbed_mpc_16_10_none", "mpc", N_mpc=16),
112 |     get_row("reproduce_disturbed_mpc_16_10_scenario", "robust_mpc", N_mpc=16),
113 |     get_row("reproduce_disturbed_mpc_16_10_tube_0.05", "robust_mpc", N_mpc=16),
114 |     get_row("reproduce_disturbed_mpc_16_10_tube_0.1", "robust_mpc", N_mpc=16),
115 |     get_row("reproduce_disturbed_mpc_16_10_tube_0.2", "robust_mpc", N_mpc=16),
116 |     get_row("reproduce_disturbed_mpc_16_10_tube_0.25", "robust_mpc", N_mpc=16),
117 |     get_row("reproduce_disturbed_mpc_16_10_tube_0.3", "robust_mpc", N_mpc=16),
118 |     get_row("reproduce_disturbed_mlp_8", "mlp", mlp_last_size=8),
119 |     get_row("reproduce_disturbed_mlp_16", "mlp", mlp_last_size=16),
120 |     get_row("reproduce_disturbed_mlp_32", "mlp", mlp_last_size=32),
121 |     get_row("reproduce_disturbed_mlp_64", "mlp", mlp_last_size=32),
122 |     get_row("reproduce_disturbed_qp_4_24", "qp", n_qp=4, m_qp=24),
123 |     get_row("reproduce_disturbed_qp_8_48", "qp", n_qp=8, m_qp=48),
124 |     get_row("reproduce_disturbed_qp_16_96", "qp", n_qp=16, m_qp=96),
125 |     get_row("reproduce_disturbed_qp_32_192", "qp", n_qp=32, m_qp=192),
126 | ]
127 | 
128 | df_result = pd.DataFrame(rows, columns=["name", "success_rate", "avg_cost", "avg_cost_penalized", "flops", "running_time", "num_param"])
129 | df_result.to_csv("test_results/reproduce_table_disturbed.csv", index=False)
130 | print(df_result)
131 | 


--------------------------------------------------------------------------------
/experiments/cartpole/reproduce.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # 0. Background utils and GPU scheduler
  4 | 
  5 | # Define the number of GPUs available
  6 | NUM_GPUS=$(nvidia-smi --list-gpus | wc -l)
  7 | 
  8 | # Function to find the first idle GPU
  9 | find_idle_gpu() {
 10 |     for (( i=0; i<$NUM_GPUS; i++ )); do
 11 |         # Check if GPU volatile utilization is 0%
 12 |         if [ "$(nvidia-smi -i $i --query-gpu=utilization.gpu --format=csv,noheader,nounits)" -eq 0 ]; then
 13 |             echo $i
 14 |             return
 15 |         fi
 16 |     done
 17 |     echo "-1"  # Return -1 if no idle GPU is found
 18 | }
 19 | 
 20 | find_gpu_and_run_task() {
 21 |     local run_task_function="$1"
 22 |     shift  # Remove the first argument (run_task_function name)
 23 | 
 24 |     # Initialize GPU ID as -1 indicating no GPU is available initially
 25 |     local gpu_id=-1
 26 | 
 27 |     # Wait for an idle GPU to become available
 28 |     while [ "$gpu_id" -eq -1 ]; do
 29 |         gpu_id=$(find_idle_gpu)
 30 |         sleep 1  # Wait a bit before checking again
 31 |     done
 32 | 
 33 |     # Call the run_task function with the GPU ID and additional arguments, and send it to the background
 34 |     $run_task_function $gpu_id $@ > /dev/null &
 35 | 
 36 |     # Capture the PID of the last background process
 37 |     local task_pid=$!
 38 | 
 39 |     # Optional: wait briefly to allow the task to start
 40 |     sleep 15
 41 | 
 42 |     # Output the PID
 43 |     echo $task_pid
 44 | }
 45 | 
 46 | 
 47 | # 1. Training
 48 | # 1.1 MLP of different sizes
 49 | 
 50 | train_mlp() {
 51 |     local gpu_id=$1
 52 |     local c1=$2
 53 |     local c2=$3
 54 |     local c3=$4
 55 |     local mlp_last_size=$5
 56 |     CUDA_VISIBLE_DEVICES=$gpu_id python ../../run.py train cartpole --num-parallel 100000 --horizon 20 --epochs 5000 --mini-epochs 1 --noise-level 0. --reward-shaping ${c1},${c2},${c3} --no-obs-normalization --mlp-size-last $mlp_last_size --batch-test --exp-name reproduce_mlp_${mlp_last_size} --lr-schedule linear --initial-lr "5e-4" --quiet --max-steps-per-episode 100
 57 | }
 58 | 
 59 | # 1.2 QP of different sizes
 60 | 
 61 | train_qp() {
 62 |     local gpu_id=$1
 63 |     local c1=$2
 64 |     local c2=$3
 65 |     local c3=$4
 66 |     local n_qp=$5
 67 |     local m_qp=$6
 68 |     CUDA_VISIBLE_DEVICES=$gpu_id python ../../run.py train cartpole --num-parallel 100000 --horizon 20 --epochs 5000 --mini-epochs 1 --noise-level 0. --reward-shaping ${c1},${c2},${c3} --n-qp $n_qp --m-qp $m_qp --qp-unrolled --shared-PH --affine-qb --symmetric --use-residual-loss --no-obs-normalization --force-feasible --batch-test --exp-name reproduce_qp_${n_qp}_${m_qp} --lr-schedule linear --initial-lr "5e-4" --quiet --max-steps-per-episode 100
 69 | }
 70 | 
 71 | 
 72 | # 2. Testing
 73 | # 2.1 MPC under different configurations
 74 | 
 75 | test_mpc() {
 76 |     local gpu_id=$1
 77 |     local N=$2
 78 |     local terminal_coef=$3
 79 |     local n_qp=4
 80 |     local m_qp=24
 81 |     CUDA_VISIBLE_DEVICES=$gpu_id python ../../run.py test cartpole --num-parallel 10000 --horizon 20 --epochs 5000 --mini-epochs 1 --noise-level 0. --reward-shaping 50,0.05,2 --n-qp $n_qp --m-qp $m_qp --qp-unrolled --shared-PH --affine-qb --symmetric --use-residual-loss --no-obs-normalization --force-feasible --batch-test --mpc-baseline-N $N --mpc-terminal-cost-coef $terminal_coef --use-osqp-for-mpc --exp-name reproduce_qp_${n_qp}_${m_qp} --run-name reproduce_mpc_${N}_${terminal_coef} --lr-schedule linear --initial-lr "5e-4" --quiet --max-steps-per-episode 100
 82 | }
 83 | 
 84 | test_mpc_bg() {
 85 |     test_mpc $@ > /dev/null &
 86 | }
 87 | 
 88 | test_mpc_all() {
 89 |     test_mpc_bg 2 2 0
 90 |     test_mpc_bg 2 2 1
 91 |     test_mpc_bg 2 2 10
 92 |     test_mpc_bg 2 2 100
 93 |     test_mpc_bg 2 4 0
 94 |     test_mpc_bg 2 4 1
 95 |     test_mpc_bg 2 4 10
 96 |     test_mpc_bg 2 4 100
 97 |     test_mpc_bg 3 8 0
 98 |     test_mpc_bg 3 8 1
 99 |     test_mpc_bg 3 8 10
100 |     test_mpc_bg 3 8 100
101 |     test_mpc_bg 3 16 0
102 |     test_mpc_bg 3 16 1
103 |     test_mpc_bg 3 16 10
104 |     test_mpc_bg 3 16 100
105 |     wait
106 | }
107 | 
108 | # 2.2 MLP
109 | 
110 | test_mlp() {
111 |     local gpu_id=$1
112 |     local c1=$2
113 |     local c2=$3
114 |     local c3=$4
115 |     local mlp_last_size=$5
116 |     CUDA_VISIBLE_DEVICES=$gpu_id python ../../run.py test cartpole --num-parallel 10000 --horizon 20 --epochs 5000 --mini-epochs 1 --noise-level 0. --reward-shaping ${c1},${c2},${c3} --no-obs-normalization --mlp-size-last $mlp_last_size --batch-test --exp-name reproduce_mlp_${mlp_last_size} --lr-schedule linear --initial-lr "5e-4" --quiet --max-steps-per-episode 100
117 | }
118 | 
119 | # 2.3 QP
120 | 
121 | test_qp() {
122 |     local gpu_id=$1
123 |     local c1=$2
124 |     local c2=$3
125 |     local c3=$4
126 |     local n_qp=$5
127 |     local m_qp=$6
128 |     CUDA_VISIBLE_DEVICES=$gpu_id python ../../run.py test cartpole --num-parallel 10000 --horizon 20 --epochs 5000 --mini-epochs 1 --noise-level 0. --reward-shaping ${c1},${c2},${c3} --n-qp $n_qp --m-qp $m_qp --qp-unrolled --shared-PH --affine-qb --symmetric --use-residual-loss --no-obs-normalization --force-feasible --batch-test --exp-name reproduce_qp_${n_qp}_${m_qp} --lr-schedule linear --initial-lr "5e-4" --quiet --max-steps-per-episode 100
129 | }
130 | 
131 | # Utility function for train and test
132 | train_and_test() {
133 |     local train_function="$1"
134 |     shift
135 |     local test_function="$1"
136 |     shift
137 | 
138 |     train_pid=$(find_gpu_and_run_task $train_function $@)
139 |     while [ -e /proc/$train_pid ]; do
140 |         sleep 1
141 |     done
142 |     test_pid=$(find_gpu_and_run_task $test_function $@)
143 |     while [ -e /proc/$test_pid ]; do
144 |         sleep 1
145 |     done
146 | }
147 | 
148 | run_and_delay() {
149 |     local run_function="$1"
150 |     shift
151 | 
152 |     $run_function $@ &
153 |     local run_pid=$!
154 |     sleep 15
155 |     echo $run_pid
156 | }
157 | 
158 | # Finally run all the tasks
159 | 
160 | run_and_delay test_mpc_all
161 | run_and_delay train_and_test train_mlp test_mlp 50 0.05 2 8
162 | run_and_delay train_and_test train_mlp test_mlp 50 0.05 2 16
163 | run_and_delay train_and_test train_mlp test_mlp 50 0.05 2 32
164 | run_and_delay train_and_test train_mlp test_mlp 50 0.05 2 64
165 | run_and_delay train_and_test train_qp test_qp 50 0.05 2 4 24
166 | run_and_delay train_and_test train_qp test_qp 50 0.05 2 8 48
167 | run_and_delay train_and_test train_qp test_qp 50 0.05 2 16 96
168 | 
169 | wait
170 | 
171 | python reproduce_table.py
172 | 


--------------------------------------------------------------------------------
/src/utils/sets.py:
--------------------------------------------------------------------------------
  1 | from scipy.spatial import ConvexHull
  2 | from scipy.spatial import Delaunay
  3 | import numpy as np
  4 | from tqdm import tqdm_notebook as tqdm
  5 | 
  6 | 
  7 | def backward_reachable_set_linear(A_inv, B, X_set, x_min, x_max, u_min, u_max):
  8 |     """
  9 |     Compute the one-step backward reachable set for a linear system x_{k+1} = Ax + Bu.
 10 | 
 11 |     Parameters:
 12 |     A_inv (numpy.ndarray): Inverse of the A matrix in the system dynamics.
 13 |     B (numpy.ndarray): B matrix in the system dynamics.
 14 |     X_set (set): Set of points (as tuples) representing the current state space.
 15 |     x_min (float or numpy.ndarray): Minimum state constraints.
 16 |     x_max (float or numpy.ndarray): Maximum state constraints.
 17 |     u_min (float or numpy.ndarray): Minimum control input constraints.
 18 |     u_max (float or numpy.ndarray): Maximum control input constraints.
 19 | 
 20 |     Returns:
 21 |     set: One-step backward reachable set as a set of points (as tuples).
 22 |     """
 23 |     new_set = set()
 24 |     for x in X_set:
 25 |         for u in np.linspace(u_min, u_max, 5):
 26 |             prev_x = np.dot(A_inv, x - np.dot(B, u))
 27 |             if np.all(x_min <= prev_x) and np.all(prev_x <= x_max):
 28 |                 new_set.add(tuple(prev_x))
 29 |     return new_set
 30 | 
 31 | 
 32 | def one_step_forward_reachable_set(g, S, x_min, x_max):
 33 |     """
 34 |     Compute the one-step forward reachable set for an autonomous system x_{k+1} = g(x_k).
 35 | 
 36 |     Parameters:
 37 |     g (function): Function representing the autonomous system dynamics.
 38 |     S (numpy.ndarray): Vertices of the initial set.
 39 |     x_min (numpy.ndarray): Minimum state constraints.
 40 |     x_max (numpy.ndarray): Maximum state constraints.
 41 | 
 42 |     Returns:
 43 |     numpy.ndarray: Vertices of the one-step forward reachable set.
 44 |     """
 45 |     new_vertices = []
 46 | 
 47 |     for x in S:
 48 |         next_x = g(x)
 49 | 
 50 |         # Check if the next state is within the state constraints
 51 |         if np.all(x_min <= next_x) and np.all(next_x <= x_max):
 52 |             new_vertices.append(next_x)
 53 | 
 54 |     return np.array(new_vertices)
 55 | 
 56 | 
 57 | def one_step_backward_reachable_set(g, S_hull, x_min, x_max, num_samples=1000):
 58 |     """
 59 |     Compute the one-step backward reachable set for an autonomous system x_{k+1} = g(x_k).
 60 | 
 61 |     Parameters:
 62 |     g (function): Function representing the autonomous system dynamics.
 63 |     S_hull (ConvexHull): Convex hull object of the initial set S.
 64 |     x_min (numpy.ndarray): Minimum state constraints.
 65 |     x_max (numpy.ndarray): Maximum state constraints.
 66 |     num_samples (int): Number of samples for approximation.
 67 | 
 68 |     Returns:
 69 |     numpy.ndarray: Vertices of the approximated one-step backward reachable set.
 70 |     """
 71 |     # Sample points within the state constraints
 72 |     sampled_points = np.random.uniform(x_min, x_max, (num_samples, len(x_min)))
 73 | 
 74 |     # Delaunay triangulation to speed up point-in-hull check
 75 |     delaunay_S = Delaunay(S_hull.points[S_hull.vertices, :])
 76 | 
 77 |     # Check which sampled points have their next state in S
 78 |     backward_reachable_points = []
 79 |     for x in sampled_points:
 80 |         next_x = g(x)
 81 |         if Delaunay.find_simplex(delaunay_S, next_x) >= 0:
 82 |             backward_reachable_points.append(x)
 83 | 
 84 |     # Compute the convex hull of the backward reachable points
 85 |     if len(backward_reachable_points) > 0:
 86 |         backward_hull = ConvexHull(np.array(backward_reachable_points))
 87 |         return backward_hull.points[backward_hull.vertices, :]
 88 |     else:
 89 |         return np.array([])
 90 | 
 91 | 
 92 | def compute_positive_invariant_set_from_origin(g, x_min, x_max, initial_radius=1.0, iterations=100):
 93 |     """
 94 |     Compute the positive invariant set for an autonomous system x_{k+1} = g(x_k) starting from a neighborhood of the origin.
 95 | 
 96 |     Parameters:
 97 |     g (function): Function representing the autonomous system dynamics.
 98 |     x_min (numpy.ndarray): Minimum state constraints.
 99 |     x_max (numpy.ndarray): Maximum state constraints.
100 |     initial_radius (float): Radius of the initial neighborhood around the origin.
101 |     iterations (int): Number of iterations for approximation.
102 | 
103 |     Returns:
104 |     numpy.ndarray: Vertices of the approximated positive invariant set.
105 |     """
106 |     # Start from a neighborhood of the origin defined by the initial_radius
107 |     initial_set = np.array([[initial_radius, 0], [0, initial_radius], [-initial_radius, 0], [0, -initial_radius]])
108 |     current_set_hull = ConvexHull(initial_set)
109 | 
110 |     for _ in tqdm(range(iterations)):
111 |         # Determine the sampling bounds based on the current set
112 |         current_radius = np.max(np.linalg.norm(current_set_hull.points[current_set_hull.vertices, :], axis=1))
113 |         sampling_min = np.maximum(x_min, -current_radius * 1.5)
114 |         sampling_max = np.minimum(x_max, current_radius * 1.5)
115 | 
116 |         # Compute the one-step backward reachable set from the current set
117 |         backward_reachable_vertices = one_step_backward_reachable_set(g, current_set_hull, sampling_min, sampling_max)
118 | 
119 |         # Update the current set to include the backward reachable set, effectively taking union
120 |         if len(backward_reachable_vertices) > 0:
121 |             new_hull = ConvexHull(np.vstack((current_set_hull.points[current_set_hull.vertices, :], backward_reachable_vertices)))
122 |             current_set_hull = new_hull
123 | 
124 |     return current_set_hull.points[current_set_hull.vertices, :]
125 | 
126 | 
127 | def compute_MCI(A, B, x_min, x_max, u_min, u_max, iterations=10):
128 |     """
129 |     Compute the Maximal Control Invariant (MCI) set for a given linear system x[k+1] = Ax[k] + Bu[k].
130 | 
131 |     Parameters:
132 |     A (numpy.ndarray): State transition matrix.
133 |     B (numpy.ndarray): Input matrix.
134 |     x_min (numpy.ndarray): Minimum state constraints.
135 |     x_max (numpy.ndarray): Maximum state constraints.
136 |     u_min (numpy.ndarray): Minimum control input constraints.
137 |     u_max (numpy.ndarray): Maximum control input constraints.
138 |     iterations (int): Number of iterations for approximating the MCI set.
139 | 
140 |     Returns:
141 |     numpy.ndarray: Vertices of the approximated MCI set.
142 |     """
143 | 
144 |     # Precompute the inverse of A
145 |     A_inv = np.linalg.inv(A)
146 | 
147 |     # Initialize the MCI set as a single point at the origin, using a set for uniqueness
148 |     MCI_set = {(0, 0)}
149 | 
150 |     # Iteratively compute the MCI set
151 |     for _ in range(iterations):
152 |         MCI_set = backward_reachable_set_linear(A_inv, B, MCI_set, x_min, x_max, u_min, u_max)
153 |         if len(MCI_set) == 0:
154 |             break
155 | 
156 |     # Convert the set to an array for further processing or visualization
157 |     MCI_array = np.array(list(MCI_set))
158 | 
159 |     if len(MCI_array) > 0:
160 |         MCI_hull = ConvexHull(MCI_array)
161 |         return MCI_hull.points[MCI_hull.vertices, :]
162 |     else:
163 |         return np.array([])


--------------------------------------------------------------------------------
/src/envs/env_creators.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | from .linear_system import LinearSystem
  4 | from .cartpole import CartPole
  5 | 
  6 | sys_param = {
  7 |     "double_integrator": {
  8 |         "n": 2,
  9 |         "m": 1,
 10 |         "A": np.array([
 11 |             [1.0, 1.0],
 12 |             [0.0, 1.0],
 13 |         ]),
 14 |         "B": np.array([
 15 |             [0.0],
 16 |             [1.0],
 17 |         ]),
 18 |         "Q": np.eye(2),
 19 |         "R": np.array([[100.0]]),
 20 |         "x_min": -5.,
 21 |         "x_max": 5.,
 22 |         "u_min": -0.5,
 23 |         "u_max": 0.5,
 24 |     },
 25 |     "tank": {
 26 |         "n": 4,
 27 |         "m": 2,
 28 |         "A": np.array([
 29 |             [0.984,  0.0,      0.0422029,  0.0],
 30 |             [0.0,    0.98895,  0.0,        0.0326014],
 31 |             [0.0,    0.0,      0.957453,   0.0],
 32 |             [0.0,    0.0,      0.0,        0.967216],
 33 |         ]),
 34 |         "B": np.array([
 35 |             [0.825822,    0.0101995],
 36 |             [0.00512673,  0.624648],
 37 |             [0.0,         0.468317],
 38 |             [0.307042,    0.0],
 39 |         ]),
 40 |         "Q": np.eye(4),
 41 |         "R": 0.1 * np.eye(2),
 42 |         "x_min": 0,
 43 |         "x_max": 20,
 44 |         "u_min": 0,
 45 |         "u_max": 8,
 46 |     },
 47 |     "cartpole": {
 48 |         "n": 4,
 49 |         "m": 1,
 50 |         "m_cart": [0.7, 1.3],
 51 |         "m_pole": [0.07, 0.13],
 52 |         "l": [0.4, 0.7],
 53 |         "m_cart_nom": 1.0,
 54 |         "m_pole_nom": 0.1,
 55 |         "l_nom": 0.55,
 56 |         "Q": np.diag([1., 1e-4, 1., 1e-4]),
 57 |         "R": np.array([[1e-4]]),
 58 |         "x_min": -2,
 59 |         "x_max": 2,
 60 |         "u_min": -10,
 61 |         "u_max": 10,
 62 |         "dt": 0.1,
 63 |     },
 64 | }
 65 | 
 66 | def tank_initial_generator(size, device, rng):
 67 |     """
 68 |     Generate initial states for the tank environment.
 69 |     State components are sampled in [0, 16] to ensure that the initial state stays within the maximal contraint invariant set.
 70 |     """
 71 |     x0 = 16. * torch.rand((size, 4), generator=rng, device=device)
 72 |     return x0
 73 | 
 74 | def tank_ref_generator(size, device, rng):
 75 |     """
 76 |     Generate reference states for the tank environment.
 77 |     Sampled across the entire state space.
 78 |     """
 79 |     x_ref = 20. * torch.rand((size, 4), generator=rng, device=device)
 80 |     return x_ref
 81 | 
 82 | def tank_randomizer(size, device, rng):
 83 |     """
 84 |     Generate \Delta A, \Delta B for the tank environment.
 85 |     """
 86 |     Delta_A11 = 0.002 * (2. * torch.rand((size,), generator=rng, device=device) - 1.)   # Leakage of tank 1
 87 |     Delta_A22 = 0.002 * (2. * torch.rand((size,), generator=rng, device=device) - 1.)   # Leakage of tank 2
 88 |     Delta_A13 = 0.002 * (2. * torch.rand((size,), generator=rng, device=device) - 1.)   # Leakage from tank 3 to tank 1
 89 |     Delta_A33 = -Delta_A13  # Conservation of tank 3
 90 |     Delta_A24 = 0.002 * (2. * torch.rand((size,), generator=rng, device=device) - 1.)   # Leakage from tank 4 to tank 2
 91 |     Delta_A44 = -Delta_A24  # Conservation of tank 4
 92 |     zeros = torch.zeros((size,), device=device)   # Other elements are not perturbed
 93 |     # A = [A11 0 A13 0; 0 A22 0 A24; 0 0 A33 0; 0 0 0 A44]
 94 |     Delta_A = torch.stack([
 95 |         torch.stack([Delta_A11, zeros, Delta_A13, zeros], dim=1),
 96 |         torch.stack([zeros, Delta_A22, zeros, Delta_A24], dim=1),
 97 |         torch.stack([zeros, zeros, Delta_A33, zeros], dim=1),
 98 |         torch.stack([zeros, zeros, zeros, Delta_A44], dim=1)
 99 |     ], dim=1)
100 | 
101 |     multiplier_B1 = 0.02 * (2. * torch.rand((size,), generator=rng, device=device) - 1.)   # Voltage perturbation on pump 1
102 |     multiplier_B2 = 0.02 * (2. * torch.rand((size,), generator=rng, device=device) - 1.)   # Voltage perturbation on pump 2
103 |     B = torch.tensor(sys_param["tank"]["B"], device=device, dtype=torch.float).unsqueeze(0)
104 |     Delta_B1 = multiplier_B1.unsqueeze(-1) * B[:, :, 0]
105 |     Delta_B2 = multiplier_B2.unsqueeze(-1) * B[:, :, 1]
106 |     Delta_B = torch.stack([Delta_B1, Delta_B2], dim=2)
107 | 
108 |     return Delta_A, Delta_B
109 | 
110 | 
111 | env_creators = {
112 |     "double_integrator": lambda **kwargs: LinearSystem(
113 |         A=sys_param["double_integrator"]["A"],
114 |         B=sys_param["double_integrator"]["B"],
115 |         Q=sys_param["double_integrator"]["Q"],
116 |         R=sys_param["double_integrator"]["R"],
117 |         sqrt_W=kwargs["noise_level"] * np.eye(2),
118 |         x_min=sys_param["double_integrator"]["x_min"] * np.ones(2),
119 |         x_max=sys_param["double_integrator"]["x_max"] * np.ones(2),
120 |         u_min=sys_param["double_integrator"]["u_min"] * np.ones(1),
121 |         u_max=sys_param["double_integrator"]["u_max"] * np.ones(1),
122 |         barrier_thresh=0.1,
123 |         randomize_std=(0.001 if kwargs["randomize"] else 0.),
124 |         stabilization_only=True,
125 |         **kwargs
126 |     ),
127 |     "tank": lambda **kwargs: LinearSystem(
128 |         A=sys_param["tank"]["A"],
129 |         B=sys_param["tank"]["B"],
130 |         Q=sys_param["tank"]["Q"],
131 |         R=sys_param["tank"]["R"],
132 |         sqrt_W=kwargs["noise_level"] * np.eye(4),
133 |         x_min=sys_param["tank"]["x_min"] * np.ones(4),
134 |         x_max=sys_param["tank"]["x_max"] * np.ones(4),
135 |         u_min=sys_param["tank"]["u_min"] * np.ones(2),
136 |         u_max=sys_param["tank"]["u_max"] * np.ones(2) if not kwargs.get("skip_to_steady_state", False) else 1.0 * np.ones(2),
137 |         barrier_thresh=1.,
138 |         randomizer=(tank_randomizer if kwargs["randomize"] else None),
139 |         reward_shaping_parameters={
140 |             "steady_c1": kwargs["reward_shaping"][0],
141 |             "steady_c2": kwargs["reward_shaping"][1],
142 |             "steady_c3": kwargs["reward_shaping"][2],
143 |         } if "reward_shaping" in kwargs else {},
144 |         initial_generator=tank_initial_generator,
145 |         ref_generator=tank_ref_generator,
146 |         **kwargs
147 |     ),
148 |     "cartpole": lambda **kwargs: CartPole(
149 |         parameters={
150 |             "m_cart": [sys_param["cartpole"]["m_cart_nom"], sys_param["cartpole"]["m_cart_nom"]] if not kwargs["randomize"] else sys_param["cartpole"]["m_cart"],
151 |             "m_pole": [sys_param["cartpole"]["m_pole_nom"], sys_param["cartpole"]["m_pole_nom"]] if not kwargs["randomize"] else sys_param["cartpole"]["m_pole"],
152 |             "l": [sys_param["cartpole"]["l_nom"], sys_param["cartpole"]["l_nom"]] if not kwargs["randomize"] else sys_param["cartpole"]["l"],
153 |             "dt": sys_param["cartpole"]["dt"],
154 |         },
155 |         Q=sys_param["cartpole"]["Q"],
156 |         R=sys_param["cartpole"]["R"],
157 |         noise_std=kwargs["noise_level"],
158 |         x_min=sys_param["cartpole"]["x_min"],
159 |         x_max=sys_param["cartpole"]["x_max"],
160 |         u_min=sys_param["cartpole"]["u_min"],
161 |         u_max=sys_param["cartpole"]["u_max"],
162 |         bs=kwargs["bs"],
163 |         barrier_thresh=0.1,
164 |         max_steps=kwargs["max_steps"],
165 |         keep_stats=kwargs["keep_stats"],
166 |         run_name=kwargs["run_name"],
167 |         exp_name=kwargs["exp_name"],
168 |     ),
169 | }
170 | 


--------------------------------------------------------------------------------
/src/networks/a2c_qp_unrolled.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch import nn
  3 | import torch.nn.functional as F
  4 | from rl_games.algos_torch.network_builder import NetworkBuilder, A2CBuilder
  5 | from ..modules.qp_unrolled_network import QPUnrolledNetwork
  6 | import atexit
  7 | from datetime import datetime
  8 | import os
  9 | import numpy as np
 10 | 
 11 | class A2CQPUnrolled(A2CBuilder.Network):
 12 |     def __init__(self, params, **kwargs):
 13 |         self.actions_num = kwargs.pop('actions_num')
 14 |         input_shape = kwargs.pop('input_shape')
 15 |         self.value_size = kwargs.pop('value_size', 1)
 16 | 
 17 |         NetworkBuilder.BaseNetwork.__init__(self)
 18 |         self.n_obs = input_shape[0]
 19 |         self.load(params)
 20 | 
 21 |         if self.separate:
 22 |             raise NotImplementedError()
 23 | 
 24 |         def mlp_builder(input_size, output_size):
 25 |             policy_mlp_args = {
 26 |                 'input_size' : input_size,
 27 |                 'units' : self.params["mlp"]["units"] + [output_size],
 28 |                 'activation' : self.activation,
 29 |                 'norm_func_name' : self.normalization,
 30 |                 'dense_func' : torch.nn.Linear,
 31 |                 'd2rl' : self.is_d2rl,
 32 |                 'norm_only_first_layer' : self.norm_only_first_layer
 33 |             }
 34 |             return self._build_mlp(**policy_mlp_args)
 35 | 
 36 |         self.policy_net = QPUnrolledNetwork(
 37 |             self.device,
 38 |             self.n_obs,
 39 |             self.n_qp,
 40 |             self.m_qp,
 41 |             self.qp_iter,
 42 |             mlp_builder,
 43 |             shared_PH=self.shared_PH,
 44 |             affine_qb=self.affine_qb,
 45 |             strict_affine_layer=self.strict_affine_layer,
 46 |             obs_has_half_ref=self.obs_has_half_ref,
 47 |             symmetric=self.symmetric,
 48 |             no_b=self.no_b,
 49 |             use_warm_starter=self.use_warm_starter,
 50 |             train_warm_starter=self.train_warm_starter,
 51 |             ws_loss_coef=self.ws_loss_coef,
 52 |             ws_update_rate=self.ws_update_rate,
 53 |             mpc_baseline=self.mpc_baseline,
 54 |             use_osqp_for_mpc=self.use_osqp_for_mpc,
 55 |             use_residual_loss=self.use_residual_loss,
 56 |             imitate_mpc=self.imitate_mpc,
 57 |             force_feasible=self.force_feasible,
 58 |             feasible_lambda=self.feasible_lambda,
 59 |             is_test=self.is_test,
 60 |         )
 61 | 
 62 |         # TODO: exploit structure in value function?
 63 |         value_mlp_args = {
 64 |             'input_size' : self.n_obs,
 65 |             'units' : self.params["mlp"]["units"] + [self.value_size],
 66 |             'activation' : self.activation,
 67 |             'norm_func_name' : self.normalization,
 68 |             'dense_func' : torch.nn.Linear,
 69 |             'd2rl' : self.is_d2rl,
 70 |             'norm_only_first_layer' : self.norm_only_first_layer
 71 |         }
 72 |         self.value_net = self._build_mlp(**value_mlp_args)
 73 | 
 74 |         sigma_init = self.init_factory.create(**self.space_config['sigma_init'])
 75 |         self.sigma = nn.Parameter(torch.zeros(self.actions_num, requires_grad=True, dtype=torch.float32), requires_grad=True)
 76 | 
 77 |         mlp_init = self.init_factory.create(**self.initializer)
 78 | 
 79 |         for m in self.modules():
 80 |             if isinstance(m, nn.Linear):
 81 |                 mlp_init(m.weight)
 82 |                 if getattr(m, "bias", None) is not None:
 83 |                     torch.nn.init.zeros_(m.bias)
 84 | 
 85 |         sigma_init(self.sigma)
 86 | 
 87 |         # Register the cleanup method to be called at exit
 88 |         atexit.register(self.cleanup)
 89 | 
 90 |     def cleanup(self):
 91 |         # Implement the housekeeping logic here
 92 |         # For example, dumping internal state to a file
 93 |         directory = 'test_results'
 94 |         if not os.path.exists(directory):
 95 |             os.makedirs(directory)
 96 |         timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
 97 |         if self.mpc_baseline is not None and self.use_osqp_for_mpc:
 98 |             # When MPC is run using OSQP, dump the iteration counts (collected by QPUnrolledNetwork) to CSV
 99 |             tag = f"{self.run_name}_mpc_iter_count"
100 |             filename = os.path.join(directory, f"{tag}_{timestamp}.csv")
101 |             iter_counts = self.policy_net.info['osqp_iter_counts']
102 |             np.savetxt(filename, iter_counts, fmt='%d')
103 |         if self.mpc_baseline is not None and self.mpc_baseline.get("robust_method", None) is not None:
104 |             # When robust MPC is used, dump the per-step times (collected by QPUnrolledNetwork) to CSV
105 |             tag = f"{self.run_name}_running_time"
106 |             filename = os.path.join(directory, f"{tag}_{timestamp}.csv")
107 |             running_time = self.policy_net.info['running_time']
108 |             np.savetxt(filename, running_time, fmt='%f')
109 | 
110 |     def forward(self, obs_dict):
111 |         obs = obs_dict['obs']
112 |         info = obs_dict.get('info', {})
113 |         mu = self.policy_net(obs, info=info)[:, :self.actions_num]
114 |         value = self.value_net(obs)
115 |         sigma = self.sigma
116 |         states = None   # reserved for RNN
117 |         if self.policy_net.autonomous_losses:
118 |             return mu, mu*0 + sigma, value, states, self.policy_net.autonomous_losses
119 |         else:
120 |             return mu, mu*0 + sigma, value, states
121 | 
122 |     def load(self, params):
123 |         A2CBuilder.Network.load(self, params)
124 |         self.params = params
125 |         self.device = params["custom"]["device"]
126 |         self.n_qp = params["custom"]["n_qp"]
127 |         self.m_qp = params["custom"]["m_qp"]
128 |         self.qp_iter = params["custom"]["qp_iter"]
129 |         self.shared_PH = params["custom"]["shared_PH"]
130 |         self.affine_qb = params["custom"]["affine_qb"]
131 |         self.strict_affine_layer = params["custom"]["strict_affine_layer"]
132 |         self.obs_has_half_ref = params["custom"]["obs_has_half_ref"]
133 |         self.symmetric = params["custom"]["symmetric"]
134 |         self.no_b = params["custom"]["no_b"]
135 |         self.use_warm_starter = params["custom"]["use_warm_starter"]
136 |         self.train_warm_starter = params["custom"]["train_warm_starter"]
137 |         self.ws_loss_coef = params["custom"]["ws_loss_coef"]
138 |         self.ws_update_rate = params["custom"]["ws_update_rate"]
139 |         self.mpc_baseline = params["custom"]["mpc_baseline"]
140 |         self.use_osqp_for_mpc = params["custom"]["use_osqp_for_mpc"]
141 |         self.use_residual_loss = params["custom"]["use_residual_loss"]
142 |         self.imitate_mpc = params["custom"]["imitate_mpc"]
143 |         self.force_feasible = params["custom"]["force_feasible"]
144 |         self.feasible_lambda = params["custom"]["feasible_lambda"]
145 |         self.is_test = params["custom"]["train_or_test"] == "test"
146 |         self.run_name = params["custom"]["run_name"]
147 | 
148 | class A2CQPUnrolledBuilder(NetworkBuilder):
149 |     def __init__(self, **kwargs):
150 |         NetworkBuilder.__init__(self)
151 | 
152 |     def load(self, params):
153 |         self.params = params
154 | 
155 |     def build(self, name, **kwargs):
156 |         net = A2CQPUnrolled(self.params, **kwargs)
157 |         return net
158 | 


--------------------------------------------------------------------------------
/experiments/tank/reproduce_disturbed.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # 0. Background utils and GPU scheduler
  4 | 
  5 | # Define the number of GPUs available
  6 | NUM_GPUS=$(nvidia-smi --list-gpus | wc -l)
  7 | 
  8 | # Function to find the first idle GPU
  9 | find_idle_gpu() {
 10 |     for (( i=0; i<$NUM_GPUS; i++ )); do
 11 |         # Check if GPU volatile utilization is 0%
 12 |         if [ "$(nvidia-smi -i $i --query-gpu=utilization.gpu --format=csv,noheader,nounits)" -eq 0 ]; then
 13 |             echo $i
 14 |             return
 15 |         fi
 16 |     done
 17 |     echo "-1"  # Return -1 if no idle GPU is found
 18 | }
 19 | 
 20 | find_gpu_and_run_task() {
 21 |     local run_task_function="$1"
 22 |     shift  # Remove the first argument (run_task_function name)
 23 | 
 24 |     # Initialize GPU ID as -1 indicating no GPU is available initially
 25 |     local gpu_id=-1
 26 | 
 27 |     # Wait for an idle GPU to become available
 28 |     while [ "$gpu_id" -eq -1 ]; do
 29 |         gpu_id=$(find_idle_gpu)
 30 |         sleep 1  # Wait a bit before checking again
 31 |     done
 32 | 
 33 |     # Call the run_task function with the GPU ID and additional arguments, and send it to the background
 34 |     $run_task_function $gpu_id $@ > /dev/null &
 35 |     # $run_task_function $gpu_id $@ &
 36 | 
 37 |     # Capture the PID of the last background process
 38 |     local task_pid=$!
 39 | 
 40 |     # Optional: wait briefly to allow the task to start
 41 |     sleep 10
 42 | 
 43 |     # Output the PID
 44 |     echo $task_pid
 45 | }
 46 | 
 47 | 
 48 | # 1. Training
 49 | # 1.1 MLP of different sizes
 50 | 
 51 | train_mlp() {
 52 |     local gpu_id=$1
 53 |     local c1=$2
 54 |     local c2=$3
 55 |     local c3=$4
 56 |     local mlp_last_size=$5
 57 |     CUDA_VISIBLE_DEVICES=$gpu_id python ../../run.py train tank --num-parallel 100000 --horizon 20 --epochs 5000 --mini-epochs 1 --noise-level 0.1 --randomize --reward-shaping ${c1},${c2},${c3} --no-obs-normalization --mlp-size-last $mlp_last_size --batch-test --exp-name reproduce_disturbed_mlp_${mlp_last_size} --lr-schedule linear --initial-lr "5e-4"
 58 | }
 59 | 
 60 | # 1.2 QP of different sizes
 61 | 
 62 | train_qp() {
 63 |     local gpu_id=$1
 64 |     local c1=$2
 65 |     local c2=$3
 66 |     local c3=$4
 67 |     local n_qp=$5
 68 |     local m_qp=$6
 69 |     CUDA_VISIBLE_DEVICES=$gpu_id python ../../run.py train tank --num-parallel 100000 --horizon 20 --epochs 5000 --mini-epochs 1 --noise-level 0.1 --randomize --reward-shaping ${c1},${c2},${c3} --n-qp $n_qp --m-qp $m_qp --qp-unrolled --shared-PH --affine-qb --strict-affine-layer --obs-has-half-ref --use-residual-loss --no-obs-normalization --force-feasible --batch-test --exp-name reproduce_disturbed_qp_${n_qp}_${m_qp} --lr-schedule linear --initial-lr "5e-4"
 70 | }
 71 | 
 72 | 
 73 | # 2. Testing
 74 | # 2.1 MPC under different configurations
 75 | 
 76 | test_mpc() {
 77 |     local gpu_id=$1
 78 |     local N=$2
 79 |     local terminal_coef=$3
 80 |     local robust_method=$4
 81 |     local max_cpu_workers=$5
 82 |     local tube_size=$6  # Optional sixth argument
 83 |     local n_qp=4
 84 |     local m_qp=24
 85 | 
 86 |     # Initial part of the run_name
 87 |     local run_name="reproduce_disturbed_mpc_${N}_${terminal_coef}_${robust_method}"
 88 | 
 89 |     # Append tube_size to run_name if it's specified
 90 |     if [ -n "$tube_size" ]; then
 91 |         run_name="${run_name}_${tube_size}"
 92 |     fi
 93 | 
 94 |     # Building the command
 95 |     local cmd="CUDA_VISIBLE_DEVICES=$gpu_id MAX_CPU_WORKERS=$5 python ../../run.py test tank --num-parallel 1000 --horizon 20 --epochs 5000 --mini-epochs 1 --noise-level 0.1 --randomize --reward-shaping 50,0.05,2 --n-qp $n_qp --m-qp $m_qp --qp-unrolled --shared-PH --affine-qb --strict-affine-layer --obs-has-half-ref --use-residual-loss --no-obs-normalization --force-feasible --batch-test --mpc-baseline-N $N --mpc-terminal-cost-coef $terminal_coef --exp-name reproduce_qp_${n_qp}_${m_qp} --run-name $run_name --lr-schedule linear --initial-lr '5e-4' --quiet"
 96 | 
 97 |     # Adding the robust mpc method flag
 98 |     cmd="$cmd --robust-mpc-method $robust_method"
 99 | 
100 |     # Conditional inclusion of the use-osqp-for-mpc flag
101 |     if [ "$robust_method" = "none" ]; then
102 |         cmd="$cmd --use-osqp-for-mpc"
103 |     fi
104 | 
105 |     # Conditional inclusion of the tube_size argument
106 |     if [ -n "$tube_size" ]; then
107 |         cmd="$cmd --tube-mpc-tube-size ${tube_size}"
108 |     fi
109 | 
110 |     # Execute the command
111 |     eval $cmd
112 | }
113 | 
114 | 
115 | 
116 | test_mpc_bg() {
117 |     test_mpc $@ > /dev/null &
118 |     sleep 10
119 | }
120 | 
121 | test_mpc_all() {
122 |     test_mpc_bg 0 16 10 none 8
123 |     test_mpc_bg 1 16 10 scenario 224
124 |     test_mpc_bg 2 16 10 tube 100 0.05
125 |     test_mpc_bg 2 16 10 tube 100 0.1
126 |     test_mpc_bg 2 16 10 tube 100 0.2
127 |     test_mpc_bg 2 16 10 tube 100 0.25
128 |     test_mpc_bg 2 16 10 tube 100 0.3
129 |     wait
130 | }
131 | 
132 | # 2.2 MLP
133 | 
134 | test_mlp() {
135 |     local gpu_id=$1
136 |     local c1=$2
137 |     local c2=$3
138 |     local c3=$4
139 |     local mlp_last_size=$5
140 |     CUDA_VISIBLE_DEVICES=$gpu_id python ../../run.py test tank --num-parallel 1000 --horizon 20 --epochs 5000 --mini-epochs 1 --noise-level 0.1 --randomize --reward-shaping ${c1},${c2},${c3} --no-obs-normalization --mlp-size-last $mlp_last_size --batch-test --exp-name reproduce_disturbed_mlp_${mlp_last_size} --lr-schedule linear --initial-lr "5e-4"
141 | }
142 | 
143 | # 2.3 QP
144 | 
145 | test_qp() {
146 |     local gpu_id=$1
147 |     local c1=$2
148 |     local c2=$3
149 |     local c3=$4
150 |     local n_qp=$5
151 |     local m_qp=$6
152 |     CUDA_VISIBLE_DEVICES=$gpu_id python ../../run.py test tank --num-parallel 1000 --horizon 20 --epochs 5000 --mini-epochs 1 --noise-level 0.1 --randomize --reward-shaping ${c1},${c2},${c3} --n-qp $n_qp --m-qp $m_qp --qp-unrolled --shared-PH --affine-qb --strict-affine-layer --obs-has-half-ref --use-residual-loss --no-obs-normalization --force-feasible --batch-test --exp-name reproduce_disturbed_qp_${n_qp}_${m_qp} --lr-schedule linear --initial-lr "5e-4"
153 | }
154 | 
155 | # Utility function for train and test
156 | train_and_test() {
157 |     local train_function="$1"
158 |     shift
159 |     local test_function="$1"
160 |     shift
161 | 
162 |     train_pid=$(find_gpu_and_run_task $train_function $@)
163 |     while [ -e /proc/$train_pid ]; do
164 |         sleep 1
165 |     done
166 |     test_pid=$(find_gpu_and_run_task $test_function $@)
167 |     while [ -e /proc/$train_pid ]; do
168 |         sleep 1
169 |     done
170 | }
171 | 
172 | run_and_delay() {
173 |     local run_function="$1"
174 |     shift
175 | 
176 |     $run_function $@ &
177 |     local run_pid=$!
178 |     sleep 10
179 |     echo $run_pid
180 | }
181 | 
182 | # Finally run all the tasks
183 | 
184 | run_and_delay test_mpc_all
185 | run_and_delay train_and_test train_mlp test_mlp 50 0.05 2 8
186 | run_and_delay train_and_test train_mlp test_mlp 50 0.05 2 16
187 | run_and_delay train_and_test train_mlp test_mlp 50 0.05 2 32
188 | run_and_delay train_and_test train_mlp test_mlp 50 0.05 2 64
189 | run_and_delay train_and_test train_mlp test_mlp 50 0.05 2 128
190 | run_and_delay train_and_test train_qp test_qp 50 0.05 2 4 24
191 | run_and_delay train_and_test train_qp test_qp 50 0.05 2 8 48
192 | run_and_delay train_and_test train_qp test_qp 50 0.05 2 16 96
193 | run_and_delay train_and_test train_qp test_qp 50 0.05 2 32 192
194 | 
195 | wait
196 | 
197 | python reproduce_table_disturbed.py
198 | 


--------------------------------------------------------------------------------
/src/utils/geometry.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import matplotlib.pyplot as plt
  3 | from scipy.spatial import ConvexHull, HalfspaceIntersection
  4 | from scipy.optimize import linprog
  5 | from itertools import combinations
  6 | 
  7 | 
  8 | def find_interior_point(A, b):
  9 |     """
 10 |     Find an interior point of the polytope defined by Ax <= b using linear programming.
 11 |     
 12 |     Parameters:
 13 |     - A (numpy.ndarray): Coefficient matrix for inequalities.
 14 |     - b (numpy.ndarray): RHS vector for inequalities.
 15 |     
 16 |     Returns:
 17 |     - interior_point (numpy.ndarray): A point inside the polytope, or None if LP is infeasible.
 18 |     """
 19 |     num_vars = A.shape[1]
 20 |     
 21 |     # Objective function: zero coefficients as we only need a feasible solution
 22 |     c = np.zeros(num_vars)
 23 |     
 24 |     # Inequality constraints: Ax <= b
 25 |     eps = 1e-4
 26 |     A_ineq = A
 27 |     b_ineq = b - eps
 28 |     
 29 |     # Run linear programming to find a feasible point
 30 |     res = linprog(c, A_ub=A_ineq, b_ub=b_ineq, bounds=(None, None), method='highs')
 31 |     
 32 |     if res.success:
 33 |         from icecream import ic; ic(res.x)
 34 |         return res.x
 35 |     else:
 36 |         return None
 37 | 
 38 | 
 39 | def find_supporting_hyperplanes(vertices_2D):
 40 |     """
 41 |     Given a set of 2D vertices, find the supporting hyperplanes of the convex hull.
 42 |     
 43 |     Parameters:
 44 |     - vertices_2D (numpy.ndarray): 2D vertices of the polytope.
 45 |     
 46 |     Returns:
 47 |     - A_2D (numpy.ndarray): The coefficient matrix for the 2D inequalities.
 48 |     - b_2D (numpy.ndarray): The constant terms for the 2D inequalities.
 49 |     """
 50 |     A_list = []
 51 |     b_list = []
 52 |     hull = ConvexHull(vertices_2D)
 53 |     centroid = np.mean(vertices_2D, axis=0)
 54 |     
 55 |     for simplex in hull.simplices:
 56 |         v1, v2 = vertices_2D[simplex]
 57 |         edge = v2 - v1
 58 |         normal = np.array([-edge[1], edge[0]])
 59 |         normal = normal / np.linalg.norm(normal)
 60 |         
 61 |         # Choose the direction of the normal so that it points away from the centroid of the polytope
 62 |         if np.dot(normal, centroid - v1) > 0:
 63 |             normal = -normal
 64 |         
 65 |         b = np.dot(normal, v1)
 66 |         A_list.append(normal)
 67 |         b_list.append(b)
 68 |         
 69 |     return np.array(A_list), np.array(b_list)
 70 | 
 71 | 
 72 | 
 73 | def high_dim_to_2D(A, b):
 74 |     """
 75 |     Converts a high-dimensional polytope {x | Ax <= b} to its 2D projection {x | A_proj x <= b_proj}.
 76 |     
 77 |     Parameters:
 78 |     - A (numpy.ndarray): The coefficient matrix for the high-dimensional inequalities.
 79 |     - b (numpy.ndarray): The constant terms for the high-dimensional inequalities.
 80 |     
 81 |     Returns:
 82 |     - A_2D (numpy.ndarray): The coefficient matrix for the 2D inequalities.
 83 |     - b_2D (numpy.ndarray): The constant terms for the 2D inequalities.
 84 |     """
 85 |     def find_high_dim_vertices(A, b):
 86 |         n = A.shape[1]
 87 |         m = A.shape[0]
 88 |         vertices = []
 89 |         for idx in combinations(range(m), n):
 90 |             A_sub = A[idx, :]
 91 |             b_sub = b[list(idx)]
 92 |             if np.linalg.matrix_rank(A_sub) == n:
 93 |                 try:
 94 |                     x = np.linalg.solve(A_sub, b_sub)
 95 |                 except np.linalg.LinAlgError:
 96 |                     continue
 97 |                 if all(np.dot(A, x) <= b + 1e-9):
 98 |                     vertices.append(x)
 99 |         return np.array(vertices)
100 |     
101 |     # Step 1: Find high-dimensional vertices
102 |     vertices_high_dim = find_high_dim_vertices(A, b)
103 |     
104 |     # Step 2: Project to 2D
105 |     vertices_2D = vertices_high_dim[:, :2]
106 |     
107 |     # Step 3: Find supporting hyperplanes in 2D
108 |     A_2D, b_2D = find_supporting_hyperplanes(vertices_2D)
109 |     
110 |     return A_2D, b_2D
111 | 
112 | 
113 | def high_dim_to_2D_sampling(A, b, grid_size=50, x_range=(-1, 1)):
114 |     """
115 |     Converts a high-dimensional polytope {x | Ax <= b} to its 2D projection {x | A_proj x <= b_proj}
116 |     using a sampling-based approximation method.
117 |     
118 |     Parameters:
119 |     - A (numpy.ndarray): The coefficient matrix for the high-dimensional inequalities.
120 |     - b (numpy.ndarray): The constant terms for the high-dimensional inequalities.
121 |     - grid_size (int): The number of grid points along each dimension in the sampling grid.
122 |     - x_range (tuple): The range (min, max) for both x1 and x2 in the 2D plane.
123 |     
124 |     Returns:
125 |     - A_2D (numpy.ndarray): The coefficient matrix for the 2D inequalities.
126 |     - b_2D (numpy.ndarray): The constant terms for the 2D inequalities.
127 |     """
128 |     
129 |     def sample_based_projection_LP(A, b, x1_range, x2_range, grid_size):
130 |         x1_min, x1_max = x1_range
131 |         x2_min, x2_max = x2_range
132 |         x1_vals = np.linspace(x1_min, x1_max, grid_size)
133 |         x2_vals = np.linspace(x2_min, x2_max, grid_size)
134 |         grid_points = np.array([[x1, x2] for x1 in x1_vals for x2 in x2_vals])
135 |         feasible_points = []
136 |         for point in grid_points:
137 |             x_dim = np.zeros(A.shape[1])
138 |             x_dim[:2] = point
139 |             c = np.zeros(A.shape[1] - 2)
140 |             A_ub = A[:, 2:]
141 |             b_ub = b - np.dot(A[:, :2], point)
142 |             res = linprog(c, A_ub=A_ub, b_ub=b_ub, bounds=(None, None), method='highs')
143 |             if res.success:
144 |                 feasible_points.append(point)
145 |         feasible_points = np.array(feasible_points)
146 |         if feasible_points.shape[0] < 3:
147 |             return "Insufficient feasible points for a 2D polytope."
148 |         hull = ConvexHull(feasible_points)
149 |         vertices = hull.points[hull.vertices]
150 |         return vertices
151 |     
152 |     # Step 1: Sample points and find the approximated vertices in 2D
153 |     vertices_approx = sample_based_projection_LP(A, b, x_range, x_range, grid_size)
154 |     
155 |     # Step 2: Find supporting hyperplanes in 2D
156 |     A_2D, b_2D = find_supporting_hyperplanes(vertices_approx)
157 |     
158 |     return A_2D, b_2D
159 | 
160 | 
161 | def partial_minimization_2D(P, q):
162 |     """
163 |     Performs partial minimization over dimensions starting from 3 to obtain a 2D quadratic function.
164 |     
165 |     Parameters:
166 |     - P (numpy.ndarray): The coefficient matrix for the high-dimensional quadratic function.
167 |     - q (numpy.ndarray): The coefficient vector for the high-dimensional quadratic function.
168 |     
169 |     Returns:
170 |     - P_2D (numpy.ndarray): The 2x2 coefficient matrix for the resulting 2D quadratic function.
171 |     - q_2D (numpy.ndarray): The 2D coefficient vector for the resulting 2D quadratic function.
172 |     - c (float): The constant bias term for the resulting 2D quadratic function.
173 |     """
174 |     # Decompose P into P11, P12, P21, P22
175 |     P11 = P[:2, :2]
176 |     P12 = P[:2, 2:]
177 |     P21 = P[2:, :2]
178 |     P22 = P[2:, 2:]
179 |     
180 |     # Decompose q into q1 and q2
181 |     q1 = q[:2]
182 |     q2 = q[2:]
183 | 
184 |     # Compute the 2D quadratic function parameters
185 |     P_2D = P11 - P12 @ np.linalg.inv(P22) @ P21
186 |     q_2D = q1 - P12 @ np.linalg.inv(P22) @ q2
187 |     c = -0.5 * q2 @ np.linalg.inv(P22) @ q2
188 | 
189 |     return P_2D, q_2D, c
190 | 


--------------------------------------------------------------------------------
/experiments/cartpole/visualize_trajectories.py:
--------------------------------------------------------------------------------
  1 | # %% Specify test case
  2 | import numpy as np
  3 | 
  4 | # Initial position and reference position
  5 | x0 = 0.
  6 | x_ref = 1.
  7 | 
  8 | # Controlling process noise and parametric uncertainty
  9 | noise_level = 0
 10 | parametric_uncertainty = True
 11 | parameter_randomization_seed = 42
 12 | 
 13 | # %% Set up test bench
 14 | import sys
 15 | import os
 16 | file_path = os.path.dirname(__file__)
 17 | sys.path.append(os.path.join(file_path, "../.."))
 18 | 
 19 | from envs.env_creators import sys_param, env_creators
 20 | from envs.mpc_baseline_parameters import get_mpc_baseline_parameters
 21 | from modules.qp_unrolled_network import QPUnrolledNetwork
 22 | import torch
 23 | from matplotlib import pyplot as plt
 24 | 
 25 | 
 26 | # Utilities
 27 | 
 28 | def obs_to_state(obs):
 29 |     # Convert obs in batch size 1 in form (x, x_ref, x_dot, sin(theta), cos(theta), theta_dot) to state (x, x_dot, theta, theta_dot)
 30 |     x, x_ref, x_dot, sin_theta, cos_theta, theta_dot = obs[:, 0], obs[:, 1], obs[:, 2], obs[:, 3], obs[:, 4], obs[:, 5]
 31 |     theta = torch.atan2(sin_theta, cos_theta)
 32 |     return torch.stack([x, x_dot, theta, theta_dot], dim=1).squeeze(0)
 33 | 
 34 | def make_obs(state, x_ref, running_mean, running_std, normalize):
 35 |     x, x_dot, theta, theta_dot = state
 36 |     raw_obs = torch.tensor(np.array([x, x_ref, x_dot, np.sin(theta), np.cos(theta), theta_dot]), device=device, dtype=torch.float)
 37 |     if not normalize:
 38 |         return raw_obs.unsqueeze(0)
 39 |     else:
 40 |         return ((raw_obs - running_mean) / running_std).unsqueeze(0)
 41 | 
 42 | def get_state_dict(checkpoint_path):
 43 |     checkpoint = torch.load(checkpoint_path)
 44 |     model = checkpoint["model"]
 45 |     prefix = "a2c_network.policy_net."
 46 |     policy_net_state_dict = {k.lstrip(prefix): v for (k, v) in model.items() if k.startswith(prefix)}
 47 |     running_mean = model["running_mean_std.running_mean"].to(dtype=torch.float)
 48 |     running_std = model["running_mean_std.running_var"].sqrt().to(dtype=torch.float)
 49 |     return policy_net_state_dict, running_mean, running_std
 50 | 
 51 | def rescale_action(action, low=-1., high=8.):
 52 |     action = action.clamp(-1., 1.)
 53 |     return low + (high - low) * (action + 1) / 2
 54 | 
 55 | t = lambda arr: torch.tensor(arr, device=device, dtype=torch.float).unsqueeze(0)
 56 | a = lambda t: t.detach().cpu().numpy()
 57 | 
 58 | # Constants and options
 59 | n_sys = 4
 60 | m_sys = 1
 61 | input_size = 6
 62 | n = 16
 63 | m = 32
 64 | qp_iter = 10
 65 | device = "cuda:0"
 66 | 
 67 | 
 68 | # # Learned QP
 69 | # net = QPUnrolledNetwork(device, input_size, n, m, qp_iter, None, True, True)
 70 | exp_name = f"shared_affine_noise{noise_level}_n{n}_m{m}"
 71 | # if parametric_uncertainty:
 72 | #     exp_name += "+rand"
 73 | # checkpoint_path = f"runs/tank_{exp_name}/nn/tank.pth"
 74 | # policy_net_state_dict, running_mean, running_std = get_state_dict(checkpoint_path)
 75 | # net.load_state_dict(policy_net_state_dict)
 76 | # running_mean, running_std = running_mean.to(device=device), running_std.to(device=device)
 77 | # net.to(device)
 78 | 
 79 | # MPC module
 80 | mpc_module = QPUnrolledNetwork(
 81 |     device, input_size, n, m, qp_iter, None, True, True,
 82 |     mpc_baseline=get_mpc_baseline_parameters("cartpole", 8),
 83 |     use_osqp_for_mpc=True,
 84 | )
 85 | 
 86 | # Environment
 87 | env = env_creators["cartpole"](
 88 |     noise_level=noise_level,
 89 |     bs=1,
 90 |     max_steps=300,
 91 |     keep_stats=True,
 92 |     run_name=exp_name,
 93 |     exp_name=exp_name,
 94 |     randomize=parametric_uncertainty,
 95 | )
 96 | 
 97 | # %% MLP Policy
 98 | import sys
 99 | mlp_exp_name = f"mlp_noise{noise_level}"
100 | if parametric_uncertainty:
101 |     mlp_exp_name += "+rand"
102 | sys.argv = [""] + f"""test tank --num-parallel 1 \
103 |         --noise-level {noise_level} \
104 |         --exp-name {mlp_exp_name}""".split()
105 | import run
106 | mlp_checkpoint_path = f"runs/tank_{mlp_exp_name}/nn/tank.pth"
107 | mlp_player = run.runner.create_player()
108 | mlp_player.restore(mlp_checkpoint_path)
109 | 
110 | # %% Test for MPC
111 | raw_obs = env.reset(t(x0), t(x_ref), randomize_seed=parameter_randomization_seed)
112 | done = False
113 | 
114 | 
115 | xs_mpc = [obs_to_state(raw_obs)]
116 | us_mpc = []
117 | 
118 | while not done:
119 |     u_all, problem_params = mpc_module(raw_obs, return_problem_params=True)
120 |     u = u_all[:, :m_sys]
121 |     raw_obs, reward, done_t, info = env.step(u)
122 |     xs_mpc.append(obs_to_state(raw_obs))
123 |     us_mpc.append(u[0, :])
124 |     obs = raw_obs
125 |     done = done_t.item()
126 | 
127 | # %% Test for learned QP
128 | xs_qp = [t(x0).squeeze(0)]
129 | us_qp = []
130 | done = False
131 | env.reset(t(x0), t(x_ref), randomize_seed=parameter_randomization_seed)
132 | x = x0
133 | obs = make_obs(x, x_ref, running_mean, running_std, True)
134 | while not done:
135 |     action_all, problem_params = net(obs, return_problem_params=True)
136 |     u = rescale_action(action_all[:, :m_sys])
137 |     raw_obs, reward, done_t, info = env.step(u)
138 |     xs_qp.append(raw_obs[0, :4])
139 |     us_qp.append(u[0, :])
140 |     obs = (raw_obs - running_mean) / running_std
141 |     done = done_t.item()
142 | 
143 | # %% Test for MLP
144 | xs_mlp = [t(x0).squeeze(0)]
145 | us_mlp = []
146 | done = False
147 | env.reset(t(x0), t(x_ref), randomize_seed=parameter_randomization_seed)
148 | x = x0
149 | obs = make_obs(x, x_ref, running_mean, running_std, False)
150 | while not done:
151 |     action = mlp_player.get_action(obs.squeeze(0), is_deterministic=True)
152 |     obs, reward, done_t, info = env.step(action.unsqueeze(0))
153 |     xs_mlp.append(obs[0, :4])
154 |     us_mlp.append(action)
155 |     done = done_t.item()
156 | 
157 | # %% Plot 1: cost curve
158 | cost_mpc = [env.cost(x - t(x_ref), u.unsqueeze(0)).item() for (x, u) in zip(xs_mpc, us_mpc)]
159 | cost_qp = [env.cost(x - t(x_ref), u.unsqueeze(0)).item() for (x, u) in zip(xs_qp, us_qp)]
160 | cost_mlp = [env.cost(x - t(x_ref), u.unsqueeze(0)).item() for (x, u) in zip(xs_mlp, us_mlp)]
161 | 
162 | # Compute the baseline
163 | baseline = min(min(cost_mpc), min(cost_qp), min(cost_mlp)) - 1e-2
164 | 
165 | # Deduct the baseline from each data series
166 | cost_mpc_baseline = np.array(cost_mpc) - baseline
167 | cost_qp_baseline = np.array(cost_qp) - baseline
168 | cost_mlp_baseline = np.array(cost_mlp) - baseline
169 | 
170 | # Plotting
171 | plt.title("Per-step LQ cost")
172 | plt.plot(cost_mpc_baseline, label="MPC")
173 | plt.plot(cost_qp_baseline, label="QP")
174 | plt.plot(cost_mlp_baseline, label="MLP")
175 | 
176 | # Set y-axis to log scale
177 | plt.yscale('log')
178 | 
179 | # Modify tick labels to show the true value
180 | yticks = plt.yticks()[0]
181 | plt.yticks(yticks, [f"{y + baseline:.0e}" for y in yticks])
182 | 
183 | plt.legend()
184 | 
185 | # %% Plot 2: Trajectory
186 | # Create a 3-row, 2-column matrix of subplots
187 | fig, axes = plt.subplots(nrows=3, ncols=2, figsize=(10, 12))
188 | 
189 | # Example to populate the subplots
190 | for i in range(2):
191 |     for j in range(2):
192 |         ax = axes[i, j]
193 |         subscript = 2 * i + j
194 |         ax.plot([a(xs_mpc[k][subscript]) for k in range(len(xs_mpc))], label="MPC")
195 |         # ax.plot([a(xs_qp[k][subscript]) for k in range(len(xs_qp))], label="QP")
196 |         # ax.plot([a(xs_mlp[k][subscript]) for k in range(len(xs_mlp))], label="MLP")
197 |         if subscript == 0:
198 |             ax.axhline(y=x_ref, color='r', linestyle='--', label='Ref')
199 |         ax.legend()
200 |         ax.set_title(['x', 'x_dot', 'theta', 'theta_dot'][subscript])
201 | 
202 | i = 2
203 | for j in range(1):
204 |     ax = axes[i, j]
205 |     ax.plot([a(us_mpc[k][j]) for k in range(len(us_mpc))], label="MPC")
206 |     # ax.plot([a(us_qp[k][j]) for k in range(len(us_qp))], label="QP")
207 |     # ax.plot([a(us_mlp[k][j]) for k in range(len(us_mlp))], label="MLP")
208 |     ax.legend()
209 |     ax.set_title(f'f')
210 | 
211 | plt.tight_layout()
212 | plt.show()
213 | 
214 | # %%
215 | 


--------------------------------------------------------------------------------
/experiments/tank/visualize_trajectories.py:
--------------------------------------------------------------------------------
  1 | # %% Specify test case
  2 | import numpy as np
  3 | 
  4 | # # Case where MPC is better
  5 | x0 = np.array([10., 10., 10., 10.])
  6 | # x_ref = np.array([19, 19, 2., 2.])
  7 | x_ref = np.array([13, 17, 3, 2.])
  8 | 
  9 | x0 = np.array([ 1.5112903, 5.738173, 10.417226,  4.5608387])
 10 | x_ref = np.array([1.1293532, 1.9881264, 1. ,      1.        ])
 11 | 
 12 | # Case where MPC fails
 13 | # x0 = np.array([ 5.4963946, 10.947876,   1.034516,  18.08066  ])
 14 | # x_ref = np.array([7.522859,  8.169776,  1.1107684, 1.       ])
 15 | 
 16 | # Controlling process noise and parametric uncertainty
 17 | noise_level = 0
 18 | parametric_uncertainty = False
 19 | parameter_randomization_seed = 2
 20 | 
 21 | # %% Set up test bench
 22 | import sys
 23 | import os
 24 | file_path = os.path.dirname(__file__)
 25 | sys.path.append(os.path.join(file_path, "../.."))
 26 | 
 27 | from src.envs.env_creators import sys_param, env_creators
 28 | from src.envs.mpc_baseline_parameters import get_mpc_baseline_parameters
 29 | from src.modules.qp_unrolled_network import QPUnrolledNetwork
 30 | import torch
 31 | from matplotlib import pyplot as plt
 32 | 
 33 | 
 34 | # Utilities
 35 | 
 36 | def make_obs(x, x_ref, running_mean, running_std, normalize):
 37 |     raw_obs = torch.tensor(np.concatenate([x, x_ref]), device=device, dtype=torch.float)
 38 |     if not normalize:
 39 |         return raw_obs.unsqueeze(0)
 40 |     else:
 41 |         return ((raw_obs - running_mean) / running_std).unsqueeze(0)
 42 | 
 43 | def get_state_dict(checkpoint_path):
 44 |     checkpoint = torch.load(checkpoint_path)
 45 |     model = checkpoint["model"]
 46 |     prefix = "a2c_network.policy_net."
 47 |     policy_net_state_dict = {k.lstrip(prefix): v for (k, v) in model.items() if k.startswith(prefix)}
 48 |     if "running_mean_std.running_mean" in model:
 49 |         running_mean = model["running_mean_std.running_mean"].to(dtype=torch.float)
 50 |         running_std = model["running_mean_std.running_var"].sqrt().to(dtype=torch.float)
 51 |     else:
 52 |         running_mean = torch.tensor([0.])
 53 |         running_std = torch.tensor([1.])
 54 |     return policy_net_state_dict, running_mean, running_std
 55 | 
 56 | def rescale_action(action, low=-1., high=8.):
 57 |     action = action.clamp(-1., 1.)
 58 |     return low + (high - low) * (action + 1) / 2
 59 | 
 60 | t = lambda arr: torch.tensor(arr, device=device, dtype=torch.float).unsqueeze(0)
 61 | a = lambda t: t.detach().cpu().numpy()
 62 | 
 63 | # Constants and options
 64 | n_sys = 4
 65 | m_sys = 2
 66 | input_size = 8   # 4 for x, 4 for x_ref
 67 | n = 2
 68 | m = 64
 69 | qp_iter = 10
 70 | device = "cuda:0"
 71 | 
 72 | 
 73 | # Learned QP
 74 | net = QPUnrolledNetwork(device, input_size, n, m, qp_iter, None, True, True)
 75 | # exp_name = f"shared_affine_noise{noise_level}_n{n}_m{m}-norm"
 76 | exp_name = "residual_loss_on"
 77 | if parametric_uncertainty:
 78 |     exp_name += "+rand"
 79 | checkpoint_path = f"runs/tank_{exp_name}/nn/tank.pth"
 80 | policy_net_state_dict, running_mean, running_std = get_state_dict(checkpoint_path)
 81 | net.load_state_dict(policy_net_state_dict)
 82 | running_mean, running_std = running_mean.to(device=device), running_std.to(device=device)
 83 | net.to(device)
 84 | 
 85 | # MPC module
 86 | mpc_module = QPUnrolledNetwork(
 87 |     device, input_size, n, m, qp_iter, None, True, True,
 88 |     mpc_baseline=get_mpc_baseline_parameters("tank", 10),
 89 |     use_osqp_for_mpc=False,
 90 | )
 91 | 
 92 | # Environment
 93 | env = env_creators["tank"](
 94 |     noise_level=noise_level,
 95 |     bs=1,
 96 |     max_steps=300,
 97 |     keep_stats=True,
 98 |     run_name=exp_name,
 99 |     exp_name=exp_name,
100 |     randomize=parametric_uncertainty,
101 | )
102 | 
103 | # %% MLP Policy
104 | import sys
105 | mlp_exp_name = f"mlp_noise{noise_level}"
106 | if parametric_uncertainty:
107 |     mlp_exp_name += "+rand"
108 | sys.argv = [""] + f"""test tank --num-parallel 1 \
109 |         --noise-level {noise_level} \
110 |         --exp-name {mlp_exp_name}""".split()
111 | import run
112 | mlp_checkpoint_path = f"runs/tank_{mlp_exp_name}/nn/tank.pth"
113 | mlp_player = run.runner.create_player()
114 | mlp_player.restore(mlp_checkpoint_path)
115 | 
116 | # %% Test for MPC
117 | env.reset(t(x0), t(x_ref), randomize_seed=parameter_randomization_seed)
118 | done = False
119 | x = x0
120 | obs = make_obs(x, x_ref, running_mean, running_std, False)
121 | 
122 | 
123 | xs_mpc = [obs[0, :4]]
124 | us_mpc = []
125 | 
126 | while not done:
127 |     u_all, problem_params = mpc_module(obs, return_problem_params=True)
128 |     u = u_all[:, :m_sys]
129 |     raw_obs, reward, done_t, info = env.step(u)
130 |     xs_mpc.append(raw_obs[0, :4])
131 |     us_mpc.append(u[0, :])
132 |     obs = raw_obs
133 |     done = done_t.item()
134 | 
135 | # %% Test for learned QP
136 | xs_qp = [t(x0).squeeze(0)]
137 | us_qp = []
138 | done = False
139 | env.reset(t(x0), t(x_ref), randomize_seed=parameter_randomization_seed)
140 | x = x0
141 | obs = make_obs(x, x_ref, running_mean, running_std, True)
142 | while not done:
143 |     action_all, problem_params = net(obs, return_problem_params=True)
144 |     u = rescale_action(action_all[:, :m_sys])
145 |     raw_obs, reward, done_t, info = env.step(u)
146 |     xs_qp.append(raw_obs[0, :4])
147 |     us_qp.append(u[0, :])
148 |     obs = (raw_obs - running_mean) / running_std
149 |     done = done_t.item()
150 | 
151 | # %% Test for MLP
152 | xs_mlp = [t(x0).squeeze(0)]
153 | us_mlp = []
154 | done = False
155 | env.reset(t(x0), t(x_ref), randomize_seed=parameter_randomization_seed)
156 | x = x0
157 | obs = make_obs(x, x_ref, running_mean, running_std, False)
158 | while not done:
159 |     action = mlp_player.get_action(obs.squeeze(0), is_deterministic=True)
160 |     obs, reward, done_t, info = env.step(action.unsqueeze(0))
161 |     xs_mlp.append(obs[0, :4])
162 |     us_mlp.append(action)
163 |     done = done_t.item()
164 | 
165 | # %% Plot 1: cost curve
166 | cost_mpc = [env.cost(x - t(x_ref), u.unsqueeze(0)).item() for (x, u) in zip(xs_mpc, us_mpc)]
167 | cost_qp = [env.cost(x - t(x_ref), u.unsqueeze(0)).item() for (x, u) in zip(xs_qp, us_qp)]
168 | cost_mlp = [env.cost(x - t(x_ref), u.unsqueeze(0)).item() for (x, u) in zip(xs_mlp, us_mlp)]
169 | 
170 | # Compute the baseline
171 | baseline = min(min(cost_mpc), min(cost_qp), min(cost_mlp)) - 1e-2
172 | 
173 | # Deduct the baseline from each data series
174 | cost_mpc_baseline = np.array(cost_mpc) - baseline
175 | cost_qp_baseline = np.array(cost_qp) - baseline
176 | cost_mlp_baseline = np.array(cost_mlp) - baseline
177 | 
178 | # Plotting
179 | plt.title("Per-step LQ cost")
180 | plt.plot(cost_mpc_baseline, label="MPC")
181 | plt.plot(cost_qp_baseline, label="QP")
182 | plt.plot(cost_mlp_baseline, label="MLP")
183 | 
184 | # Set y-axis to log scale
185 | plt.yscale('log')
186 | 
187 | # Modify tick labels to show the true value
188 | yticks = plt.yticks()[0]
189 | plt.yticks(yticks, [f"{y + baseline:.0e}" for y in yticks])
190 | 
191 | plt.legend()
192 | 
193 | # %% Plot 2: Trajectory
194 | # Create a 3-row, 2-column matrix of subplots
195 | fig, axes = plt.subplots(nrows=3, ncols=2, figsize=(10, 12))
196 | 
197 | # Example to populate the subplots
198 | for i in range(2):
199 |     for j in range(2):
200 |         ax = axes[i, j]
201 |         subscript = 2 * i + j
202 |         ax.plot([a(xs_mpc[k][subscript]) for k in range(len(xs_mpc))], label="MPC")
203 |         ax.plot([a(xs_qp[k][subscript]) for k in range(len(xs_qp))], label="Learned QP")
204 |         # ax.plot([a(xs_mlp[k][subscript]) for k in range(len(xs_mlp))], label="MLP")
205 |         ax.axhline(y=x_ref[subscript], color='r', linestyle='--', label='Ref')
206 |         ax.legend()
207 |         ax.set_title(f'x_{subscript+1}')
208 | 
209 | i = 2
210 | for j in range(2):
211 |     ax = axes[i, j]
212 |     ax.plot([a(us_mpc[k][j]) for k in range(len(us_mpc))], label="MPC")
213 |     ax.plot([a(us_qp[k][j]) for k in range(len(us_qp))], label="QP")
214 |     ax.plot([a(us_mlp[k][j]) for k in range(len(us_mlp))], label="MLP")
215 |     ax.legend()
216 |     ax.set_title(f'u_{j+1}')
217 | 
218 | plt.tight_layout()
219 | plt.show()
220 | 
221 | # %%
222 | 


--------------------------------------------------------------------------------
/experiments/double_integrator/verify_stability.jl:
--------------------------------------------------------------------------------
  1 | ## Read parameters
  2 | 
  3 | using NPZ
  4 | 
  5 | data = npzread("parameters.npz")
  6 | W_q = data["Wq"]
  7 | P = data["P"]
  8 | A_MCI = data["A_MCI"]
  9 | b_MCI = data["b_MCI"]
 10 | H = data["H"]
 11 | A = data["A"]
 12 | B = data["B"]
 13 | m_mci, n_sys = size(A_MCI)
 14 | n_qp = size(P, 1)
 15 | m_qp = size(H, 1)
 16 | m_sys = size(B, 2)
 17 | 
 18 | norm_factor = 0.5
 19 | 
 20 | ## Define the candidate invariant set to be tested
 21 | relax = 0.2
 22 | G = A_MCI
 23 | c = b_MCI .- relax
 24 | 
 25 | ## Formulate the problem of verifying whether a set is invariant into a bilevel optimization problem, and try solving using BilevelJuMP
 26 | 
 27 | using JuMP
 28 | using BilevelJuMP
 29 | using Ipopt
 30 | 
 31 | # Define the bilevel model
 32 | blmodel = BilevelModel(Ipopt.Optimizer; mode = BilevelJuMP.ProductMode(1e-9))
 33 | 
 34 | # Upper level
 35 | @variable(Upper(blmodel), x[1:n_sys], start = 1)
 36 | @variable(Upper(blmodel), λ[1:m_mci] >= 0)
 37 | @variable(Lower(blmodel), u[1:n_qp])
 38 | @constraint(Upper(blmodel), sum(λ) == 1)
 39 | @constraint(Upper(blmodel), G * x .<= c)
 40 | @objective(Upper(blmodel), Min, -λ' * (G * (A * x + norm_factor * B * u[1:m_sys]) - c))
 41 | 
 42 | 
 43 | # Lower level
 44 | @constraint(Lower(blmodel), H*u .<= 1.)
 45 | @constraint(Lower(blmodel), -1. .<= H*u)
 46 | @constraint(Lower(blmodel), u[1:m_sys] .<= 1.)
 47 | @constraint(Lower(blmodel), -1. .<= u[1:m_sys])
 48 | @objective(Lower(blmodel), Min, 0.5 * u' * P * u + x' * W_q' * u)
 49 | 
 50 | # Solve the bilevel problem
 51 | optimize!(blmodel)
 52 | 
 53 | # Extract results
 54 | optimal_value = objective_value(blmodel)   # Is it correct?
 55 | # @show optimal_value
 56 | @show value.(x)
 57 | @show value.(u)
 58 | @show value.(λ)
 59 | optimal_value = -value.(λ)' * (G * (A * value.(x) + norm_factor * B * value.(u)[1:m_sys]) - c)
 60 | @show optimal_value
 61 | 
 62 | ## Visualize
 63 | using Polyhedra, CDDLib, Plots, Statistics
 64 | 
 65 | function sort_vertices(vertices)
 66 |     # Calculate centroid
 67 |     centroid = mean(vertices, dims=1)
 68 | 
 69 |     vertices = [vertices[i, :] for i in 1:size(vertices, 1)]
 70 | 
 71 |     # Sort vertices based on polar angle from centroid
 72 |     sorted_vertices = sort(vertices, by = p -> atan(p[2] - centroid[2], p[1] - centroid[1]))
 73 | 
 74 |     return sorted_vertices
 75 | end
 76 | 
 77 | function plot_polytope(A, b, fig, label)
 78 |     poly = polyhedron(hrep(A, b), CDDLib.Library())
 79 |     v = sort_vertices(hcat(points(vrep(poly))...)')
 80 |     x_coords = [x[1] for x in v]
 81 |     y_coords = [x[2] for x in v]
 82 | 
 83 |     Plots.scatter!(fig, x_coords, y_coords, label = label)
 84 | 
 85 |     for i = 1:length(v)
 86 |         Plots.plot!(fig, [x_coords[i], x_coords[(i % length(v)) + 1]], [y_coords[i], y_coords[(i % length(v)) + 1]], color="black", label="")
 87 |     end
 88 |     fig
 89 | end
 90 | 
 91 | fig = Plots.plot()
 92 | plot_polytope(A_MCI, b_MCI, fig, "MCI")
 93 | plot_polytope(G, c, fig, "Verified")
 94 | Plots.scatter!(fig, [value.(x)[1]], [value.(x)[2]], label = "Worst case", color = "green")
 95 | 
 96 | ## Try SDP Lower bound with Lagrangian relaxation
 97 | using JuMP, SCS
 98 | using LinearAlgebra
 99 | 
100 | my = 2 * m_mci + 2   # Number of constraints for outer problem
101 | mx = 2 * m_qp + 2 * m_sys  # Number of constraints for inner problem
102 | ny = n_sys + m_mci  # Number of variables for outer problem
103 | nx = n_qp  # Number of variables for inner problem
104 | 
105 | E = [Matrix(1.0I, m_sys, m_sys) zeros(m_sys, n_qp - m_sys)]  # Matrix for extracing u from QP solution
106 | Rxx = zeros(n_qp, n_qp)
107 | Rxy = [zeros(n_qp, n_sys) E' * B' * G']
108 | Ryx = Rxy'
109 | Ryy = [zeros(n_sys, n_sys)  -A' * G'; -G * A  zeros(m_mci, m_mci)]
110 | sx = zeros(n_qp)
111 | sy = [zeros(n_sys); c]
112 | Gx = zeros(my, nx)
113 | Gy = [-G zeros(m_mci, m_mci);
114 |       zeros(m_mci, n_sys) Matrix(1.0I, m_mci, m_mci);
115 |       zeros(1, n_sys) ones(1, m_mci)
116 |       zeros(1, n_sys) -ones(1, m_mci)
117 |     ]
118 | c̃ = [c; zeros(m_mci); -1; 1]
119 | bq = zeros(n_qp)
120 | H̃ = [H; -H; E; -E]
121 | b̃ = ones(mx)
122 | Wb = zeros(mx, ny)
123 | Wq = [W_q zeros(n_qp, m_mci)]
124 | 
125 | 
126 | # Initialize the model with SCS solver
127 | model = Model(optimizer_with_attributes(SCS.Optimizer))
128 | 
129 | # Define variables
130 | @variable(model, γ)
131 | @variable(model, μ1[1:my] >= 0)
132 | @variable(model, μ2[1:mx] >= 0)
133 | @variable(model, μ3[1:mx] >= 0)
134 | @variable(model, η1[1:nx])
135 | @variable(model, η2)
136 | @variable(model, η3)
137 | @variable(model, η4)
138 | 
139 | # Objective function
140 | @objective(model, Max, γ)
141 | 
142 | # SDP constraint
143 | function make_M(γ, μ1, μ2, μ3, η1, η2, η3, η4)
144 |     up = [
145 |         Rxx + 2*η4*P    Rxy + η4*Wq    η2*H̃'  sx - Gx'*μ1 - H̃'*μ2 + P*η1 + η4*bq;
146 |         zeros(ny, nx)      Ryy       η2*Wb' + η3*(Wb' - Wq'*inv(P)*H̃') + η4*Wb'   sy - Gy'*μ1 - Wb'*μ2 + Wq'*η1;
147 |         zeros(mx, nx)      zeros(mx, ny)          2*η3*H̃*inv(P)*H̃'  -μ3 - H̃*η1 + η2*b̃ + η3*(b̃ - H̃*inv(P)*bq) + η4*b̃;
148 |         zeros(1, nx)      zeros(1, ny)          zeros(1, mx)             -2*c̃'*μ1 - 2*b̃'*μ2 + 2*bq'*η1 - γ
149 |     ]
150 |     return (up + up') / 2
151 | end
152 | 
153 | @expression(model, M, make_M(γ, μ1, μ2, μ3, η1, η2, η3, η4))
154 | @constraint(model, M in PSDCone())
155 | 
156 | # Solve the problem
157 | optimize!(model)
158 | 
159 | # It will be infeasible
160 | 
161 | ## Formulate nonconvex QCQP and try solving using local solver
162 | 
163 | using JuMP, Ipopt
164 | 
165 | model = Model(Ipopt.Optimizer)
166 | 
167 | @variable(model, x[1:n_sys], start = 1)
168 | @variable(model, λ[1:m_mci] >= 0)
169 | @variable(model, u[1:n_qp])
170 | @variable(model, μ1[1:m_qp] >= 0)
171 | @variable(model, μ2[1:m_qp] >= 0)
172 | @variable(model, μ3[1:m_sys] >= 0)
173 | @variable(model, μ4[1:m_sys] >= 0)
174 | 
175 | p = -λ' * (G * (A * x + norm_factor * B * u[1:m_sys]) - c)
176 | @NLobjective(model, Min, p)
177 | @constraint(model, G * x .<= c)
178 | @constraint(model, sum(λ) == 1)
179 | @constraint(model, P * u + W_q * x + H' * (μ1 - μ2) .== 0)
180 | @constraint(model, H * u .<= 1)
181 | @constraint(model, -1 .<= H * u)
182 | @constraint(model, u[1:m_sys] .<= 1)
183 | @constraint(model, -1 .<= u[1:m_sys])
184 | @constraint(model, μ1' * (H * u .- 1) == 0)
185 | @constraint(model, μ2' * (-H * u .- 1) == 0)
186 | @constraint(model, μ3' * (u[1:m_sys] .- 1) == 0)
187 | @constraint(model, μ4' * (-u[1:m_sys] .- 1) == 0)
188 | optimize!(model)
189 | # @show objective_value(model)
190 | @show value.(x)
191 | @show value.(u)
192 | @show value.(λ)
193 | optimal_value = -value.(λ)' * (G * (A * value.(x) + norm_factor * B * value.(u)[1:m_sys]) - c)
194 | @show optimal_value
195 | 
196 | ## Try lower bound with SOS solver
197 | 
198 | using DynamicPolynomials, SumOfSquares
199 | # import SCS
200 | # scs = SCS.Optimizer
201 | import MosekTools
202 | mosek = MosekTools.Optimizer
203 | import Dualization
204 | # dual_scs = Dualization.dual_optimizer(scs)
205 | # model = SOSModel(dual_scs)
206 | dual_mosek = Dualization.dual_optimizer(mosek)
207 | model = SOSModel(dual_mosek)
208 | 
209 | @polyvar x[1:n_sys]
210 | @polyvar λ[1:m_mci]
211 | @polyvar u[1:n_qp]
212 | @polyvar μ1[1:m_qp]
213 | @polyvar μ2[1:m_qp]
214 | @polyvar μ3[1:m_sys]
215 | @polyvar μ4[1:m_sys]
216 | 
217 | p = -λ' * (G * (A * x + norm_factor * B * u[1:m_sys]) - c)
218 | S = BasicSemialgebraicSet{Float64,Polynomial{true,Float64}}()
219 | 
220 | invariance_constraint = - (G * x - c)
221 | for i in 1:m_mci
222 |     addinequality!(S, invariance_constraint[i])
223 | end
224 | for i in 1:m_mci
225 |     addinequality!(S, λ[i])
226 | end
227 | addequality!(S, sum(λ) - 1)
228 | stationarity = P * u + W_q * x + H' * (μ1 - μ2)
229 | for i in 1:n_qp
230 |     addequality!(S, stationarity[i])
231 | end
232 | p_feasibility_1 = -(H * u .- 1)
233 | for i in 1:m_qp
234 |     addinequality!(S, p_feasibility_1[i])
235 | end
236 | p_feasibility_2 = H * u .+ 1
237 | for i in 1:m_qp
238 |     addinequality!(S, p_feasibility_2[i])
239 | end
240 | p_feasibility_3 = 1. .- u[1:m_sys]
241 | for i in 1:m_sys
242 |     addinequality!(S, p_feasibility_3[i])
243 | end
244 | p_feasibility_4 = u[1:m_sys] .+ 1.
245 | for i in 1:m_sys
246 |     addinequality!(S, p_feasibility_4[i])
247 | end
248 | for i in 1:m_qp
249 |     addinequality!(S, μ1[i])
250 | end
251 | for i in 1:m_qp
252 |     addinequality!(S, μ2[i])
253 | end
254 | for i in 1:m_sys
255 |     addinequality!(S, μ3[i])
256 | end
257 | for i in 1:m_sys
258 |     addinequality!(S, μ4[i])
259 | end
260 | addequality!(S, μ1' * (H * u .- 1))
261 | addequality!(S, μ2' * (-H * u .- 1))
262 | addequality!(S, μ3' * (u[1:m_sys] .- 1))
263 | addequality!(S, μ4' * (-u[1:m_sys] .- 1))
264 | 
265 | @variable(model, σ >= 0)
266 | @objective(model, Max, σ)
267 | @constraint(model, p >= σ, domain = S, maxdegree = 3)
268 | optimize!(model)
269 | @show solution_summary(model)
270 | @show objective_value(model)
271 | 
272 | ## Some toy examples that exemplify the solver usage
273 | 
274 | ##
275 | using DynamicPolynomials, SumOfSquares
276 | import MosekTools
277 | mosek = MosekTools.Optimizer
278 | import Dualization
279 | dual_mosek = Dualization.dual_optimizer(mosek)
280 | 
281 | # Create JuMP model
282 | model = SOSModel(dual_mosek)
283 | 
284 | @polyvar x y
285 | p = x * y
286 | @variable(model, σ)
287 | @objective(model, Max, σ)
288 | S = @set x + y <= 1 && x - y <= 1 && -x + y <= 1 && -x - y <= 1
289 | # @constraint(model, x + y <= 1)
290 | # @constraint(model, x - y <= 1)
291 | # @constraint(model, -x + y <= 1)
292 | # @constraint(model, -x - y <= 1)
293 | @constraint(model, p >= σ, domain = S, maxdegree = 3)
294 | optimize!(model)
295 | solution_summary(model)
296 | 
297 | ##
298 | using DynamicPolynomials
299 | @polyvar x y
300 | p = x^3 - x^2 + 2x*y -y^2 + y^3
301 | using SumOfSquares
302 | S = @set x >= 0 && y >= 0 && x + y >= 1
303 | import Ipopt
304 | model = Model(Ipopt.Optimizer)
305 | @variable(model, a >= 0)
306 | @variable(model, b >= 0)
307 | @constraint(model, a + b >= 1)
308 | @NLobjective(model, Min, a^3 - a^2 + 2a*b - b^2 + b^3)
309 | optimize!(model)
310 | solution_summary(model)
311 | 
312 | ##
313 | import MosekTools
314 | mosek = MosekTools.Optimizer
315 | import Dualization
316 | dual_mosek = Dualization.dual_optimizer(mosek)
317 | model = SOSModel(dual_mosek)
318 | @variable(model, α)
319 | @objective(model, Max, α)
320 | @constraint(model, c3, p >= α, domain = S)
321 | optimize!(model)
322 | solution_summary(model)
323 | 
324 | ##
325 | model = SOSModel(dual_mosek)
326 | @variable(model, α)
327 | @objective(model, Max, α)
328 | @constraint(model, c4, p >= α, domain = S, maxdegree = 4)
329 | optimize!(model)
330 | solution_summary(model)
331 | 
332 | 


--------------------------------------------------------------------------------
/run.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | import sys
  4 | file_path = os.path.dirname(__file__)
  5 | sys.path.insert(0, os.path.join(file_path, "rl_games"))
  6 | import yaml
  7 | import torch
  8 | import glob
  9 | import copy
 10 | from contextlib import contextmanager, redirect_stderr, redirect_stdout
 11 | import numpy as np
 12 | 
 13 | from rl_games.common import env_configurations, vecenv
 14 | from rl_games.torch_runner import Runner
 15 | from rl_games.algos_torch import model_builder
 16 | 
 17 | from src.envs.env_creators import env_creators, sys_param
 18 | from src.envs.mpc_baseline_parameters import get_mpc_baseline_parameters
 19 | from src.utils.rlgame_utils import RLGPUEnv, RLGPUAlgoObserver
 20 | from src.networks.a2c_qp_unrolled import A2CQPUnrolledBuilder
 21 | 
 22 | model_builder.register_network('qp_unrolled', A2CQPUnrolledBuilder)
 23 | 
 24 | @contextmanager
 25 | def suppress_stdout_stderr():
 26 |     """A context manager that redirects stdout and stderr to devnull"""
 27 |     with open(os.devnull, 'w') as fnull:
 28 |         with redirect_stderr(fnull) as err, redirect_stdout(fnull) as out:
 29 |             yield (err, out)
 30 | 
 31 | def float_list(string):
 32 |     """Convert a string into a list of floats."""
 33 |     try:
 34 |         return [float(item) for item in string.split(',')]
 35 |     except ValueError:
 36 |         raise argparse.ArgumentTypeError("Argument must be a comma-separated list of floats")
 37 | 
 38 | 
 39 | parser = argparse.ArgumentParser()
 40 | parser.add_argument("train_or_test", type=str, help="Train or test")
 41 | parser.add_argument("env", type=str)
 42 | parser.add_argument("--noise-level", type=float, default=0.5)
 43 | parser.add_argument("--seed", type=int, default=42)
 44 | parser.add_argument("--exp-name", type=str, default="default")
 45 | parser.add_argument("--epochs", type=int, default=1000)
 46 | parser.add_argument("--num-parallel", type=int, default=100000)
 47 | parser.add_argument("--mini-epochs", type=int, default=5)
 48 | parser.add_argument("--mlp-size-last", type=int, default=64)
 49 | parser.add_argument("--gamma", type=float, default=0.99)
 50 | parser.add_argument("--horizon", type=int, default=200)
 51 | parser.add_argument("--max-steps-per-episode", type=int, default=500)
 52 | parser.add_argument("--score-to-win", type=int, default=int(1e9))
 53 | parser.add_argument("--save-freq", type=int, default=10)
 54 | parser.add_argument("--epoch-index", type=int, default=-1, help="For test only, -1 for using latest")
 55 | parser.add_argument("--quiet", action='store_true')
 56 | parser.add_argument("--device", type=str, default='cuda:0')
 57 | parser.add_argument("--qp-unrolled", action='store_true')
 58 | parser.add_argument("--n-qp", type=int, default=5)
 59 | parser.add_argument("--m-qp", type=int, default=4)
 60 | parser.add_argument("--qp-iter", type=int, default=10)
 61 | parser.add_argument("--shared-PH", action="store_true")
 62 | parser.add_argument("--affine-qb", action="store_true")
 63 | parser.add_argument("--strict-affine-layer", action="store_true")
 64 | parser.add_argument("--obs-has-half-ref", action="store_true")
 65 | parser.add_argument("--symmetric", action="store_true")
 66 | parser.add_argument("--no-b", action="store_true")
 67 | parser.add_argument("--warm-start", action="store_true")
 68 | parser.add_argument("--ws-loss-coef", type=float, default=10.)
 69 | parser.add_argument("--ws-update-rate", type=float, default=0.1)
 70 | parser.add_argument("--batch-test", action="store_true")
 71 | parser.add_argument("--run-name", type=str, default="")
 72 | parser.add_argument("--randomize", action="store_true")
 73 | parser.add_argument("--use-residual-loss", action="store_true")
 74 | parser.add_argument("--no-obs-normalization", action="store_true")
 75 | parser.add_argument("--imitate-mpc-N", type=int, default=0)
 76 | parser.add_argument("--initialize-from-experiment", type=str, default="")
 77 | parser.add_argument("--force-feasible", action="store_true")
 78 | parser.add_argument("--skip-to-steady-state", action="store_true")
 79 | parser.add_argument("--initial-lr", type=float, default=3e-4)
 80 | parser.add_argument("--lr-schedule", type=str, default="adaptive")
 81 | parser.add_argument("--reward-shaping", type=float_list, default=[0., 1., 0.])
 82 | 
 83 | parser.add_argument("--mpc-baseline-N", type=int, default=0)
 84 | parser.add_argument("--use-osqp-for-mpc", action="store_true")
 85 | parser.add_argument("--mpc-terminal-cost-coef", type=float, default=0.)
 86 | parser.add_argument("--robust-mpc-method", type=str, default="none", choices=["none", "scenario", "tube"])
 87 | parser.add_argument("--tube-mpc-tube-size", type=float, default=0.)
 88 | args = parser.parse_args()
 89 | 
 90 | 
 91 | def get_num_parallel():
 92 |     if args.train_or_test == "train":
 93 |         return args.num_parallel
 94 |     elif args.train_or_test == "test":
 95 |         if args.batch_test:
 96 |             return args.num_parallel
 97 |         else:
 98 |             return 1
 99 | 
100 | default_env_config = {
101 |     "random_seed": args.seed,
102 |     "quiet": args.quiet,
103 |     "device": args.device,
104 |     "bs": get_num_parallel(),
105 |     "noise_level": args.noise_level,
106 |     "max_steps": args.max_steps_per_episode,
107 |     "keep_stats": (args.train_or_test == "test"),
108 |     "run_name": args.run_name or args.exp_name,
109 |     "exp_name": args.exp_name,
110 |     "randomize": args.randomize,
111 |     "skip_to_steady_state": args.skip_to_steady_state,
112 |     "reward_shaping": args.reward_shaping,
113 | }
114 | 
115 | blacklist_keys = lambda d, blacklist: {k: d[k] for k in d if not (k in blacklist)}
116 | vecenv.register('RLGPU',
117 |                 lambda config_name, num_actors, **kwargs: RLGPUEnv(config_name, num_actors, **kwargs))
118 | env_configurations.register('rlgpu', {
119 |     'vecenv_type': 'RLGPU',
120 |     'env_creator': lambda **env_config: env_creators[args.env](
121 |         **blacklist_keys(default_env_config, env_config.keys()),
122 |         **env_config,
123 |     ),
124 | })
125 | 
126 | runner = Runner(RLGPUAlgoObserver())
127 | file_path = os.path.dirname(__file__)
128 | with open(os.path.join(file_path, "runner_config.yaml")) as f:
129 |     runner_config = yaml.safe_load(f)
130 | full_experiment_name = args.env + "_" + args.exp_name
131 | runner_config["params"]["seed"] = args.seed
132 | runner_config["params"]["config"]["train_or_test"] = args.train_or_test
133 | runner_config["params"]["config"]["num_actors"] = args.num_parallel
134 | runner_config["params"]["config"]["max_epochs"] = args.epochs
135 | runner_config["params"]["config"]["minibatch_size"] = args.num_parallel
136 | runner_config["params"]["config"]["games_to_track"] = args.num_parallel
137 | runner_config["params"]["config"]["steps_to_track_per_game"] = args.max_steps_per_episode
138 | runner_config["params"]["config"]["mini_epochs"] = args.mini_epochs
139 | runner_config["params"]["config"]["gamma"] = args.gamma
140 | runner_config["params"]["config"]["horizon_length"] = args.horizon
141 | runner_config["params"]["config"]["score_to_win"] = args.score_to_win
142 | runner_config["params"]["config"]["name"] = args.env
143 | runner_config["params"]["config"]["full_experiment_name"] = full_experiment_name
144 | runner_config["params"]["network"]["mlp"]["units"] = [args.mlp_size_last * i for i in (4, 2, 1)]
145 | runner_config["params"]["config"]["save_frequency"] = args.save_freq
146 | runner_config["params"]["config"]["device"] = args.device
147 | runner_config["params"]["network"].pop("rnn")
148 | runner_config["params"]["config"]["learning_rate"] = args.initial_lr
149 | runner_config["params"]["config"]["lr_schedule"] = args.lr_schedule
150 | if args.no_obs_normalization:
151 |     runner_config["params"]["config"]["normalize_input"] = False
152 | 
153 | if args.batch_test:
154 |     runner_config["params"]["config"]["player"]["games_num"] = args.num_parallel
155 | 
156 | if args.qp_unrolled:
157 |     runner_config["params"]["network"]["name"] = "qp_unrolled"
158 |     runner_config["params"]["network"]["custom"] = {
159 |         "device": args.device,
160 |         "n_qp": args.n_qp,
161 |         "m_qp": args.m_qp,
162 |         "qp_iter": args.qp_iter,
163 |         "shared_PH": args.shared_PH,
164 |         "affine_qb": args.affine_qb,
165 |         "strict_affine_layer": args.strict_affine_layer,
166 |         "obs_has_half_ref": args.obs_has_half_ref,
167 |         "use_warm_starter": args.warm_start,
168 |         "train_warm_starter": args.warm_start and args.train_or_test == "train",
169 |         "ws_loss_coef": args.ws_loss_coef,
170 |         "ws_update_rate": args.ws_update_rate,
171 |         "mpc_baseline": None if (not args.mpc_baseline_N and not args.imitate_mpc_N) else {**get_mpc_baseline_parameters(args.env, args.mpc_baseline_N or args.imitate_mpc_N, noise_std=args.noise_level), "terminal_coef": args.mpc_terminal_cost_coef},
172 |         "imitate_mpc": args.imitate_mpc_N > 0,
173 |         "use_osqp_for_mpc": args.use_osqp_for_mpc,
174 |         "use_residual_loss": args.use_residual_loss,
175 |         "symmetric": args.symmetric,
176 |         "no_b": args.no_b,
177 |         "force_feasible": args.force_feasible,
178 |         "feasible_lambda": 10.,
179 |         "train_or_test": args.train_or_test,
180 |         "run_name": args.run_name,
181 |     }
182 | 
183 | if args.mpc_baseline_N:
184 |     # Unset observation and action normalization
185 |     runner_config["params"]["config"]["clip_actions"] = False
186 |     runner_config["params"]["config"]["normalize_input"] = False
187 | 
188 | if args.imitate_mpc_N:
189 |     # Unset observation normalization
190 |     runner_config["params"]["config"]["normalize_input"] = False
191 |     # Make MPC output normalized action
192 |     runner_config["params"]["network"]["custom"]["mpc_baseline"]["normalize"] = True
193 | 
194 | if args.robust_mpc_method != "none":
195 |     runner_config["params"]["network"]["custom"]["mpc_baseline"]["robust_method"] = args.robust_mpc_method
196 |     runner_config["params"]["network"]["custom"]["mpc_baseline"]["max_disturbance_per_dim"] = args.tube_mpc_tube_size
197 | 
198 | if args.quiet:
199 |     with suppress_stdout_stderr():
200 |         runner.load(runner_config)
201 | else:
202 |     runner.load(runner_config)
203 | 
204 | if __name__ == "__main__":
205 |     if args.train_or_test == "train":
206 |         runner_arg = {
207 |             'train': True,
208 |             'play': False,
209 |         }
210 |         if args.initialize_from_experiment:
211 |             full_checkpoint_name = args.env + "_" + args.initialize_from_experiment
212 |             checkpoint_dir = f"runs/{full_checkpoint_name}/nn"
213 |             checkpoint_name = f"{checkpoint_dir}/{args.env}.pth"
214 |             runner_arg['checkpoint'] = checkpoint_name
215 |         runner.run(runner_arg)
216 |     elif args.train_or_test == "test":
217 |         if not args.mpc_baseline_N:
218 |             checkpoint_dir = f"runs/{full_experiment_name}/nn"
219 |             if args.epoch_index == -1:
220 |                 checkpoint_name = f"{checkpoint_dir}/{args.env}.pth"
221 |             else:
222 |                 list_of_files = glob.glob(f"{checkpoint_dir}/last_{args.env}_ep_{args.epoch_index}_rew_*.pth")
223 |                 checkpoint_name = max(list_of_files, key=os.path.getctime)
224 |         else:
225 |             checkpoint_name = None
226 |         runner.run({
227 |             'train': False,
228 |             'play': True,
229 |             'checkpoint' : checkpoint_name,
230 |         })
231 | 


--------------------------------------------------------------------------------
/experiments/tank/visualize_feasible_sets.py:
--------------------------------------------------------------------------------
  1 | # %% Specify test case
  2 | import numpy as np
  3 | 
  4 | # Case where MPC is better
  5 | x0 = np.array([10., 10., 10., 10.])
  6 | x_ref = np.array([19, 19, 2.4, 2.4])   
  7 | 
  8 | # # Case where MPC fails
  9 | # x0 = np.array([ 5.4963946, 10.947876,   1.034516,  18.08066  ])
 10 | # x_ref = np.array([7.522859,  8.169776,  1.1107684, 1.       ])
 11 | 
 12 | # Controlling process noise and parametric uncertainty
 13 | noise_level = 0
 14 | parametric_uncertainty = False
 15 | parameter_randomization_seed = 2
 16 | 
 17 | # %% Set up test bench
 18 | import sys
 19 | import os
 20 | file_path = os.path.dirname(__file__)
 21 | sys.path.append(os.path.join(file_path, "../.."))
 22 | 
 23 | from src.envs.env_creators import sys_param, env_creators
 24 | from src.envs.mpc_baseline_parameters import get_mpc_baseline_parameters
 25 | from src.modules.qp_unrolled_network import QPUnrolledNetwork
 26 | import torch
 27 | from matplotlib import pyplot as plt
 28 | from icecream import ic
 29 | 
 30 | 
 31 | # Utilities
 32 | 
 33 | def make_obs(x, x_ref, running_mean, running_std, normalize):
 34 |     raw_obs = torch.tensor(np.concatenate([x, x_ref]), device=device, dtype=torch.float)
 35 |     if not normalize:
 36 |         return raw_obs.unsqueeze(0)
 37 |     else:
 38 |         return ((raw_obs - running_mean) / running_std).unsqueeze(0)
 39 | 
 40 | def get_state_dict(checkpoint_path):
 41 |     checkpoint = torch.load(checkpoint_path)
 42 |     model = checkpoint["model"]
 43 |     prefix = "a2c_network.policy_net."
 44 |     policy_net_state_dict = {k.lstrip(prefix): v for (k, v) in model.items() if k.startswith(prefix)}
 45 |     if "running_mean_std.running_mean" in model:
 46 |         running_mean = model["running_mean_std.running_mean"].to(dtype=torch.float)
 47 |         running_std = model["running_mean_std.running_var"].sqrt().to(dtype=torch.float)
 48 |     else:
 49 |         running_mean = torch.tensor([0.])
 50 |         running_std = torch.tensor([1.])
 51 |     return policy_net_state_dict, running_mean, running_std
 52 | 
 53 | def rescale_action(action, low=-1., high=8.):
 54 |     action = action.clamp(-1., 1.)
 55 |     return low + (high - low) * (action + 1) / 2
 56 | 
 57 | t = lambda arr: torch.tensor(arr, device=device, dtype=torch.float).unsqueeze(0)
 58 | a = lambda t: t.detach().cpu().numpy()
 59 | 
 60 | # Constants and options
 61 | n_sys = 4
 62 | m_sys = 2
 63 | input_size = 8   # 4 for x, 4 for x_ref
 64 | n = 2
 65 | m = 64
 66 | qp_iter = 10
 67 | device = "cuda:0"
 68 | 
 69 | 
 70 | # MPC module
 71 | mpc_baseline = get_mpc_baseline_parameters("tank", 1)
 72 | mpc_baseline["normalize"] = True   # Solve for normalized action, to be consistent with learned QP
 73 | mpc_module = QPUnrolledNetwork(
 74 |     device, input_size, n, m, qp_iter, None, True, True,
 75 |     mpc_baseline=mpc_baseline,
 76 |     use_osqp_for_mpc=True,
 77 | )
 78 | 
 79 | # Environment
 80 | env = env_creators["tank"](
 81 |     noise_level=noise_level,
 82 |     bs=1,
 83 |     max_steps=300,
 84 |     keep_stats=True,
 85 |     run_name="",
 86 |     exp_name="",
 87 |     randomize=parametric_uncertainty,
 88 | )
 89 | 
 90 | # %% Compare learned QPs learned with / without residual loss, and compare degree of constraint violation
 91 | from src.utils.torch_utils import bmv
 92 | 
 93 | def get_qp_net(trained_with_residual_loss, forced_feasibility=False):
 94 |     exp_name = f"residual_loss_{'on' if trained_with_residual_loss else 'off'}"
 95 |     if forced_feasibility:
 96 |         exp_name = "force_feasible_on"
 97 |     net = QPUnrolledNetwork(device, input_size, n, m, qp_iter, None, True, True, force_feasible=forced_feasibility)
 98 |     if parametric_uncertainty:
 99 |         exp_name += "+rand"
100 |     checkpoint_path = f"runs/tank_{exp_name}/nn/tank.pth"
101 |     policy_net_state_dict, running_mean, running_std = get_state_dict(checkpoint_path)
102 |     net.load_state_dict(policy_net_state_dict)
103 |     running_mean, running_std = running_mean.to(device=device), running_std.to(device=device)
104 |     net.to(device)
105 |     return net, running_mean, running_std
106 | 
107 | def compute_violation(H, action_all, b):
108 |     """
109 |     Number of violated constraints, as well as magnitude of constraint violation.
110 |     """
111 |     z_recovered = bmv(H, action_all) + b
112 |     violation_count = (z_recovered < 0.).sum(dim=-1)
113 |     violation_magnitude = torch.norm(z_recovered.clamp(-torch.inf, 0.), dim=-1)
114 |     return violation_count, violation_magnitude
115 | 
116 | def rollout(trained_with_residual_loss, is_mpc, steps, forced_feasibility=False):
117 |     net, running_mean, running_std = get_qp_net(trained_with_residual_loss, forced_feasibility)
118 |     if is_mpc:
119 |         net = mpc_module
120 |     results = []
121 |     env.reset(t(x0), t(x_ref), randomize_seed=parameter_randomization_seed)
122 |     x = x0
123 |     obs = make_obs(x, x_ref, running_mean, running_std, not is_mpc)
124 |     for i in range(steps):
125 |         action_all, problem_params = net(obs, return_problem_params=True)
126 |         u = rescale_action(action_all[:, :m_sys])
127 |         raw_obs, reward, done_t, info = env.step(u)
128 |         if not is_mpc:
129 |             obs = (raw_obs - running_mean) / running_std
130 |         else:
131 |             obs = raw_obs
132 |         done = done_t.item()
133 |         P, q, H, b = problem_params
134 |         results.append((P, q, H, b, action_all))
135 |     return results
136 | 
137 | def evaluate_constraint_violation(trained_with_residual_loss, steps=10, forced_feasibility=False):
138 |     """Rollout for multiple steps, and compute average (number of violated constraints, magnitude of violation)."""
139 |     rollout_results = rollout(trained_with_residual_loss, False, steps, forced_feasibility)
140 |     constraint_violation_indices = []
141 |     for i in range(steps):
142 |         H = rollout_results[i][2]
143 |         action_all = rollout_results[i][4]
144 |         b = rollout_results[i][3]
145 |         constraint_violation_indices.append(compute_violation(H, action_all, b))
146 |     average_violation_count = torch.stack([v[0] for v in constraint_violation_indices], dim=0).to(dtype=torch.float).mean(dim=0)
147 |     average_violation_magnitude = torch.stack([v[1] for v in constraint_violation_indices], dim=0).mean(dim=0)
148 |     return average_violation_count, average_violation_magnitude
149 | 
150 | violation_count_with_residual_loss, violation_magnitude_with_residual_loss = evaluate_constraint_violation(True)
151 | violation_count_without_residual_loss, violation_magnitude_without_residual_loss = evaluate_constraint_violation(False)
152 | 
153 | ic(violation_count_with_residual_loss, violation_count_without_residual_loss)
154 | ic(violation_magnitude_with_residual_loss, violation_magnitude_without_residual_loss)
155 | 
156 | # %% Visualize the feasible set and objective function at a certain step, ignoring constraints that are violated
157 | at_step = 10
158 | 
159 | from src.utils.visualization import plot_multiple_2d_polytopes_with_contour
160 | 
161 | def get_violated_mask(H, action_all, b):
162 |     z_recovered = bmv(H, action_all) + b
163 |     return torch.where(z_recovered < 0., torch.ones_like(z_recovered), torch.zeros_like(z_recovered))
164 | 
165 | def get_step_parameters(at_step, trained_with_residual_loss, is_mpc, forced_feasibility=False):
166 |     rollout_results = rollout(trained_with_residual_loss, is_mpc, at_step, forced_feasibility)
167 |     result_last_step = rollout_results[-1]
168 |     P, q, H, b, action_all = result_last_step
169 |     violated_mask = get_violated_mask(H, action_all, b)
170 |     return P, q, H, b, violated_mask, action_all
171 | 
172 | def get_plot_parameters(trained_with_residual_loss, is_mpc, color, label, is_forced_feasibility=False):
173 |     a = lambda t: t.squeeze(0).detach().cpu().numpy()
174 |     global P, q, H, b, violated_mask, action_all
175 |     P, q, H, b, violated_mask, action_all = get_step_parameters(at_step, trained_with_residual_loss, is_mpc, is_forced_feasibility)
176 |     if not is_forced_feasibility:
177 |         # Filter out violated constraints
178 |         satisfied_mask = torch.logical_not(violated_mask)
179 |         plot_params = {
180 |             "A": a(-H[satisfied_mask, :]),
181 |             "b": a(b[satisfied_mask]),
182 |             "optimal_solution": a(action_all[:, :m_sys]),
183 |             "P": a(P),
184 |             "q": a(q),
185 |             "color": color,
186 |             "label": label,
187 |         }
188 |     else:
189 |         # Learned problem with forced feasibility; recover original P, q, H, b from augmented P, q, H, b
190 |         y = action_all[:, -1].item()
191 |         P0 = P[:, :n, :n]
192 |         q0 = q[:, :n]
193 |         H0 = H[:, :m, :n]
194 |         b0 = b[:, :m] + y
195 |         plot_params = {
196 |             "A": a(-H0),
197 |             "b": a(b0),
198 |             "optimal_solution": a(action_all[:, :m_sys]),
199 |             "P": a(P0),
200 |             "q": a(q0),
201 |             "color": color,
202 |             "label": label,
203 |         }
204 |     return plot_params
205 | 
206 | fig, ax = plot_multiple_2d_polytopes_with_contour([
207 |     get_plot_parameters(True, False, "blue", "Learned QP (with residual loss)"),
208 |     get_plot_parameters(False, False, "red", "Learned QP (w/o residual loss)"),
209 |     get_plot_parameters(False, True, "green", "MPC")
210 | ])
211 | ax.set_xlabel("$u_1$")
212 | ax.set_ylabel("$u_2$")
213 | ax.set_title(f"Feasible sets and objective functions at step {at_step}")
214 | 
215 | # %% Visualize the feasible set and objective function at a certain step, forcing feasibility
216 | fig, ax = plot_multiple_2d_polytopes_with_contour([
217 |     get_plot_parameters(True, False, "blue", "Learned QP (forced feasibility, n=2)", True),
218 |     get_plot_parameters(False, True, "green", "MPC (N=1)", True)
219 | ])
220 | ax.set_xlabel("$u_1$")
221 | ax.set_ylabel("$u_2$")
222 | ax.set_title(f"Feasible sets and objective functions at step {at_step}")
223 | 
224 | 
225 | # %% Visualize feasible set vs. MPC; Now
226 | # 1. The learned QP is guaranteed to be feasible; no need to ignore violated constraints
227 | # 2. The variable are allowed to be high-dimensional; we project the constraint polytope and the quadratic objective to 2D
228 | from src.utils.geometry import high_dim_to_2D_sampling, partial_minimization_2D
229 | 
230 | n = 8
231 | m = 32
232 | mpc_N = 4
233 | at_step = 50
234 | 
235 | mpc_baseline = get_mpc_baseline_parameters("tank", mpc_N)
236 | mpc_baseline["normalize"] = True   # Solve for normalized action, to be consistent with learned QP
237 | mpc_module = QPUnrolledNetwork(
238 |     device, input_size, n, m, qp_iter, None, True, True,
239 |     mpc_baseline=mpc_baseline,
240 |     use_osqp_for_mpc=True,
241 | )
242 | 
243 | def get_plot_parameters_proj(is_mpc, color, label):
244 |     a = lambda t: t.squeeze(0).detach().cpu().numpy()
245 |     P, q, H, b, violated_mask, action_all = get_step_parameters(at_step, False, is_mpc, True)
246 |     if not is_mpc:
247 |         # Learned problem with forced feasibility; recover original P, q, H, b from augmented P, q, H, b
248 |         y = action_all[:, -1].item()
249 |         P0 = P[:, :n, :n]
250 |         q0 = q[:, :n]
251 |         H0 = H[:, :m, :n]
252 |         b0 = b[:, :m] + y
253 |     else:
254 |         P0, q0, H0, b0 = P, q, H, b
255 | 
256 |     A_proj, b_proj = high_dim_to_2D_sampling(-a(H0), a(b0))
257 |     P_proj, q_proj, _ = partial_minimization_2D(a(P0), a(q0))
258 |     plot_params = {
259 |         "A": A_proj,
260 |         "b": b_proj,
261 |         "optimal_solution": a(action_all[:, :m_sys]),
262 |         "P": P_proj,
263 |         "q": q_proj,
264 |         "color": color,
265 |         "label": label,
266 |     }
267 |     return plot_params
268 | 
269 | 
270 | fig, ax = plot_multiple_2d_polytopes_with_contour([
271 |     get_plot_parameters_proj(True, "green", "MPC"),
272 |     get_plot_parameters_proj(False, "blue", "Learned"),
273 | ])
274 | ax.set_xlabel("$u_1$")
275 | ax.set_ylabel("$u_2$")
276 | ax.set_title(f"Feasible sets and objective functions at step {at_step}")
277 | 
278 | # %%
279 | 


--------------------------------------------------------------------------------
/experiments/double_integrator/visualize.py:
--------------------------------------------------------------------------------
  1 | # %% Load system and compute maximal invariant set
  2 | import numpy as np
  3 | import sys
  4 | import os
  5 | file_path = os.path.dirname(__file__)
  6 | sys.path.append(os.path.join(file_path, "../.."))
  7 | 
  8 | from src.envs.env_creators import sys_param, env_creators
  9 | from src.utils.sets import compute_MCI
 10 | from matplotlib import pyplot as plt
 11 | 
 12 | A = sys_param["double_integrator"]["A"]
 13 | B = sys_param["double_integrator"]["B"]
 14 | Q = sys_param["double_integrator"]["Q"]
 15 | R = sys_param["double_integrator"]["R"]
 16 | x_min_scalar = sys_param["double_integrator"]["x_min"]
 17 | x_max_scalar = sys_param["double_integrator"]["x_max"]
 18 | u_min_scalar = sys_param["double_integrator"]["u_min"]
 19 | u_max_scalar = sys_param["double_integrator"]["u_max"]
 20 | x_min = x_min_scalar * np.ones(2)
 21 | x_max = x_max_scalar * np.ones(2)
 22 | u_min = u_min_scalar * np.ones(1)
 23 | u_max = u_max_scalar * np.ones(1)
 24 | 
 25 | MCI = compute_MCI(A, B, x_min, x_max, u_min, u_max, iterations=100)
 26 | 
 27 | fig, ax = plt.subplots()
 28 | # ax.fill(X0_vertices[:, 0], X0_vertices[:, 1], alpha=0.3, label='Initial Set $X_0$', color='g')
 29 | ax.fill(MCI[:, 0], MCI[:, 1],
 30 |     alpha=0.7, label='Maximal Control Invariant Set', color='r')
 31 | ax.grid()
 32 | 
 33 | # %% Define MPC on the system
 34 | from src.utils.mpc_utils import mpc2qp_np
 35 | from src.utils.osqp_utils import osqp_oracle
 36 | 
 37 | N_mpc = 3    # The short horizon will make naive MPC fail, as shown in http://cse.lab.imtlucca.it/~bemporad/publications/papers/BBMbook.pdf, p. 247
 38 | 
 39 | def mpc_controller(x, Qf=None):
 40 |     """
 41 |     MPC controller for the double integrator system.
 42 |     """
 43 |     _, _, P, q, H, b = mpc2qp_np(
 44 |         n_mpc=2, m_mpc=1, N=N_mpc, A=A, B=B, Q=Q, R=R,
 45 |         x_min=x_min_scalar, x_max=x_max_scalar, u_min=u_min_scalar, u_max=u_max_scalar,
 46 |         x0=x, x_ref=np.zeros(2), normalize=False, Qf=Qf
 47 |     )
 48 |     sol = osqp_oracle(q, b, P, H)
 49 |     return np.clip(sol[:1], u_min_scalar, u_max_scalar)
 50 | 
 51 | def mpc_with_predicted_trajectory(x):
 52 |     """
 53 |     Return predicted trajectory (list of (x, u) pairs) of the MPC controller.
 54 |     """
 55 |     _, _, P, q, H, b = mpc2qp_np(
 56 |         n_mpc=2, m_mpc=1, N=N_mpc, A=A, B=B, Q=Q, R=R,
 57 |         x_min=x_min_scalar, x_max=x_max_scalar, u_min=u_min_scalar, u_max=u_max_scalar,
 58 |         x0=x, x_ref=np.zeros(2), normalize=False,
 59 |     )
 60 |     sol = osqp_oracle(q, b, P, H)
 61 |     sol = np.clip(sol, u_min_scalar, u_max_scalar)
 62 |     trajectory = []
 63 |     for i in range(N_mpc):
 64 |         u = sol[i:i + 1]
 65 |         x = A @ x + B @ u
 66 |         if not (x_min_scalar - 0.01 <= x).all() or not (x <= x_max_scalar + 0.01).all():
 67 |             break
 68 |         trajectory.append((x, u))
 69 |     return trajectory
 70 | 
 71 | # %% Define learned controller on the system
 72 | from src.modules.qp_unrolled_network import QPUnrolledNetwork
 73 | import torch
 74 | 
 75 | def get_state_dict(checkpoint_path):
 76 |     checkpoint = torch.load(checkpoint_path)
 77 |     model = checkpoint["model"]
 78 |     prefix = "a2c_network.policy_net."
 79 |     policy_net_state_dict = {k.lstrip(prefix): v for (k, v) in model.items() if k.startswith(prefix)}
 80 |     if "running_mean_std.running_mean" in model:
 81 |         running_mean = model["running_mean_std.running_mean"].to(dtype=torch.float)
 82 |         running_std = model["running_mean_std.running_var"].sqrt().to(dtype=torch.float)
 83 |     else:
 84 |         running_mean = torch.tensor([0.])
 85 |         running_std = torch.tensor([1.])
 86 |     return policy_net_state_dict, running_mean, running_std
 87 | 
 88 | device = "cuda:0"
 89 | n_qp = 3
 90 | m_qp = 9
 91 | qp_iter = 10
 92 | symmetric = True
 93 | no_b = True
 94 | net = QPUnrolledNetwork(device, 2, n_qp, m_qp, qp_iter, None, True, True, force_feasible=True, symmetric=symmetric, no_b=no_b)
 95 | if not symmetric:
 96 |     exp_name = "default"
 97 | elif not no_b:
 98 |     exp_name = "symmetric"
 99 | else:
100 |     exp_name = "symmetric_no_b"
101 | checkpoint_path = f"runs/double_integrator_{exp_name}/nn/double_integrator.pth"
102 | policy_net_state_dict, running_mean, running_std = get_state_dict(checkpoint_path)
103 | net.load_state_dict(policy_net_state_dict)
104 | running_mean, running_std = running_mean.to(device=device), running_std.to(device=device)
105 | net.to(device)
106 | 
107 | t = lambda arr: torch.tensor(arr, device=device, dtype=torch.float).unsqueeze(0)
108 | a = lambda t: t.squeeze(0).detach().cpu().numpy()
109 | 
110 | def learned_controller(x):
111 |     sol = a(net(t(x)))
112 |     sol *= 0.5    # Denormalize
113 |     return np.clip(sol[:1], u_min_scalar, u_max_scalar)
114 | 
115 | # %% Define closed-loop dynamics
116 | 
117 | def get_cl_dynamics(controller):
118 |     def g(x):
119 |         return A @ x + B @ controller(x)
120 |     return g
121 | 
122 | g_mpc = get_cl_dynamics(mpc_controller)
123 | g_mpc_term = get_cl_dynamics(lambda x: mpc_controller(x, Qf=10 * Q))
124 | g_learned = get_cl_dynamics(learned_controller)
125 | 
126 | 
127 | # %% Compute one-step reachable sets starting from MCI
128 | from src.utils.sets import one_step_forward_reachable_set
129 | 
130 | reachable_mpc = one_step_forward_reachable_set(g_mpc, MCI, x_min, x_max)
131 | reachable_mpc_term = one_step_forward_reachable_set(g_mpc_term, MCI, x_min, x_max)
132 | reachable_learned = one_step_forward_reachable_set(g_learned, MCI, x_min, x_max)
133 | 
134 | fig, ax = plt.subplots()
135 | ax.fill(MCI[:, 0], MCI[:, 1],
136 |     alpha=0.3, label='Maximal Control Invariant Set', color='r')
137 | ax.fill(reachable_mpc[:, 0], reachable_mpc[:, 1],
138 |     alpha=0.3, label='One-step reachable set (MPC)', color='g')
139 | ax.fill(reachable_mpc_term[:, 0], reachable_mpc_term[:, 1],
140 |     alpha=0.3, label='One-step reachable set (MPC with terminal cost)', color='purple')
141 | ax.fill(reachable_learned[:, 0], reachable_learned[:, 1],
142 |     alpha=0.3, label='One-step reachable set (Learned)', color='b')
143 | ax.set_xlabel("$x_1$")
144 | ax.set_ylabel("$x_2$")
145 | 
146 | ax.legend()
147 | 
148 | # %% Compute positive invariant sets under closed-loop dynamics
149 | from src.utils.sets import compute_positive_invariant_set_from_origin
150 | 
151 | pis_mpc = compute_positive_invariant_set_from_origin(g_mpc, x_min, x_max, initial_radius=1.5, iterations=150)
152 | pis_mpc_term = compute_positive_invariant_set_from_origin(g_mpc_term, x_min, x_max, initial_radius=1.8, iterations=20)
153 | pis_learned = compute_positive_invariant_set_from_origin(g_learned, x_min, x_max, initial_radius=1.8, iterations=20)
154 | 
155 | 
156 | # %%
157 | fig, ax = plt.subplots()
158 | ax.fill(MCI[:, 0], MCI[:, 1],
159 |     alpha=1.0, label='Maximal Control Invariant Set', color='r')
160 | ax.fill(pis_learned[:, 0], pis_learned[:, 1],
161 |     alpha=1.0, label='Positive invariant set (Learned)', color='b')
162 | ax.fill(pis_mpc[:, 0], pis_mpc[:, 1],
163 |     alpha=1.0, label='Positive invariant set (MPC)', color='g')
164 | ax.fill(pis_mpc_term[:, 0], pis_mpc_term[:, 1],
165 |     alpha=1.0, label='Positive invariant set (MPC with terminal cost)', color='purple')
166 | ax.set_xlabel("$x_1$")
167 | ax.set_ylabel("$x_2$")
168 | ax.grid()
169 | ax.legend()
170 | 
171 | 
172 | # %% Case study
173 | from matplotlib.patches import Rectangle
174 | 
175 | def get_trajectory(controller, x0, max_steps=200):
176 |     g = get_cl_dynamics(controller)
177 |     x = x0
178 |     xs = [x]
179 |     total_cost = 0.
180 |     for _ in range(max_steps):
181 |         u = controller(x)
182 |         total_cost += x.T @ Q @ x + u.T @ R @ u
183 |         x = g(x)
184 |         xs.append(x)
185 |         if not (x_min <= x).all() or not (x <= x_max).all():
186 |             total_cost += np.inf
187 |             break
188 |         if np.linalg.norm(x) < 0.05:
189 |             break
190 |     average_cost = total_cost / len(xs)
191 |     return np.array(xs), average_cost, total_cost
192 | 
193 | 
194 | def plot_comparison(x0, mark='^'):
195 |     traj_mpc, cost_mpc, total_cost_mpc = get_trajectory(mpc_controller, x0)
196 |     traj_mpc_term, cost_mpc_term, total_cost_mpc_term = get_trajectory(lambda x: mpc_controller(x, Qf=10000 * Q), x0)
197 |     traj_learned, cost_learned, total_cost_learned = get_trajectory(learned_controller, x0)
198 | 
199 |     fig, ax = plt.subplots()
200 |     ax.fill(MCI[:, 0], MCI[:, 1],
201 |         alpha=0.1, label='Maximal Control Invariant Set', color='r')
202 |     ax.fill(pis_mpc[:, 0], pis_mpc[:, 1],
203 |         alpha=0.3, label='Positive invariant set (MPC)', color='g')
204 |     ax.fill(pis_learned[:, 0], pis_learned[:, 1],
205 |         alpha=0.3, label='Positive invariant set (Learned)', color='b')
206 |     ax.plot(traj_mpc[:, 0], traj_mpc[:, 1], f'-{mark}', color='g', label=f"Trajectory (MPC) - Total Cost: {total_cost_mpc:.2f}")
207 |     ax.plot(traj_mpc_term[:, 0], traj_mpc_term[:, 1], f'-{mark}', color='purple', label=f"Trajectory (MPC with term.) - Total Cost: {total_cost_mpc_term:.2f}")
208 |     ax.plot(traj_learned[:, 0], traj_learned[:, 1], f'-{mark}', color='b', label="Trajectory (Learned) - Total Cost: {:.2f}".format(total_cost_learned))
209 |     ax.grid()
210 |     ax.set_xlabel("$x_1$")
211 |     ax.set_ylabel("$x_2$")
212 | 
213 |     # Plot the box constraint
214 |     rect = Rectangle((x_min[0], x_min[1]), x_max[0] - x_min[0], x_max[1] - x_min[1], linewidth=1, edgecolor='r', facecolor='none')
215 |     ax.add_patch(rect)
216 |     ax.set_ylim(-3, 3)
217 | 
218 |     ax.legend()
219 |     return fig, ax
220 | 
221 | # %% Plot functions for manuscript
222 | import tikzplotlib
223 | 
224 | def tikzplotlib_fix_ncols(obj):
225 |     """
226 |     workaround for matplotlib 3.6 renamed legend's _ncol to _ncols, which breaks tikzplotlib
227 |     """
228 |     if hasattr(obj, "_ncols"):
229 |         obj._ncol = obj._ncols
230 |     for child in obj.get_children():
231 |         tikzplotlib_fix_ncols(child)
232 | 
233 | def plot_comparison_tex(x0):
234 |     traj_mpc, cost_mpc, total_cost_mpc = get_trajectory(mpc_controller, x0)
235 |     traj_learned, cost_learned, total_cost_learned = get_trajectory(learned_controller, x0)
236 |     traj_mpc_term, cost_mpc_term, total_cost_mpc_term = get_trajectory(lambda x: mpc_controller(x, Qf=np.array([[-0.99, 0.], [0., 50.]])), x0)
237 |     # traj_mpc_term_2, cost_mpc_term_2, total_cost_mpc_term_2 = get_trajectory(lambda x: mpc_controller(x, Qf=10000 * Q), x0)
238 | 
239 |     # Set up canvas
240 |     fig, ax = plt.subplots()
241 |     ax.fill(MCI[:, 0], MCI[:, 1],
242 |         alpha=0.1, color='g')
243 |     ax.grid()
244 |     ax.set_xlabel("$x_{[1]}$")
245 |     ax.set_ylabel("$x_{[2]}$")
246 | 
247 |     # Plot MPC trajectory
248 |     mark = 'o'
249 |     ax.plot(traj_mpc[:, 0], traj_mpc[:, 1], f'-{mark}', color='darkorange', label=f"\\makebox[3.2em][l]{{MPC:}} Cost=$\infty$", alpha=0.7, linewidth=2, zorder=0)
250 | 
251 |     # Plot MPC trajectory with terminal cost
252 |     ax.plot(traj_mpc_term[:, 0], traj_mpc_term[:, 1], f'-{mark}', color='purple', label=f"\\makebox[3.2em][l]{{MPC-T:}} Cost={total_cost_mpc_term:.0f}", alpha=0.7, zorder=1, linewidth=2)
253 | 
254 |     # Plot learned trajectory
255 |     ax.plot(traj_learned[:, 0], traj_learned[:, 1], f'-{mark}', color='b', label="\\makebox[3.2em][l]{{LQP:}} Cost={:.0f}".format(total_cost_learned), alpha=0.7, zorder=2, linewidth=2)
256 | 
257 |     # Plot predicted trajectories
258 |     mark = '*'
259 |     for i in range(3):
260 |         traj = mpc_with_predicted_trajectory(traj_mpc[i])
261 |         xs = np.array([t[0] for t in traj])
262 |         # print(xs)
263 |         ax.plot(xs[:, 0], xs[:, 1], f'-{mark}', color='darkorange', alpha=0.5, linewidth=2)
264 | 
265 |     # Plot the box constraint
266 |     # rect = Rectangle((x_min[0], x_min[1]), x_max[0] - x_min[0], x_max[1] - x_min[1], linewidth=2, edgecolor='r', facecolor='none')
267 |     # ax.add_patch(rect)
268 |     ax.plot([x_min[0], x_max[0]], [x_min[1], x_min[1]], 'r', linewidth=3)
269 |     ax.plot([x_max[0], x_max[0]], [x_min[1], x_max[1]], 'r', linewidth=3)
270 |     ax.plot([x_max[0], x_min[0]], [x_max[1], x_max[1]], 'r', linewidth=3)
271 |     ax.plot([x_min[0], x_min[0]], [x_max[1], x_min[1]], 'r', linewidth=3)
272 | 
273 |     ax.set_ylim(-0.3, 2.25)
274 | 
275 |     ax.legend(loc="lower left")
276 | 
277 |     tikzplotlib_fix_ncols(fig)
278 | 
279 |     tikzplotlib.save('toy_example.tex', axis_height='\\figureheight',
280 |             axis_width='\\figurewidth',)
281 | 
282 |     return fig, ax
283 | 
284 | # %%
285 | 
286 | # %%
287 | fig, ax = plot_comparison(np.array([-4, 2]), '')
288 | # ax.set_xlim(-1, 1)
289 | # ax.set_ylim(-0.2, 0.2)
290 | 
291 | # %%
292 | fig, ax = plot_comparison_tex(np.array([-4, 2.1]));
293 | 
294 | # %%
295 | 
296 | # %%
297 | 


--------------------------------------------------------------------------------