├── examples ├── docs │ ├── NL-example.jpg │ └── basic-example.py └── 2. pHNN SUBNET demo.ipynb ├── .gitignore ├── deepSI ├── __init__.py ├── normalization.py ├── fitting.py ├── networks.py └── models.py ├── pyproject.toml ├── LICENSE └── README.md /examples/docs/NL-example.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MaartenSchoukens/deepSI/HEAD/examples/docs/NL-example.jpg -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | deepSI.egg-info/ 3 | testing/ 4 | .ipynb_checkpoints/ 5 | examples/.ipynb_checkpoints/ 6 | examples/f.onnx 7 | examples/model* 8 | .vscode/ 9 | build/ 10 | dist/ 11 | -------------------------------------------------------------------------------- /deepSI/__init__.py: -------------------------------------------------------------------------------- 1 | import deepSI.models 2 | import deepSI.fitting 3 | import deepSI.networks 4 | import deepSI.normalization 5 | from nonlinear_benchmarks import Input_output_data 6 | 7 | #default imports 8 | from deepSI.models import SUBNET, SUBNET_CT, Custom_SUBNET, Custom_SUBNET_CT 9 | from deepSI.fitting import fit 10 | from deepSI.networks import MLP_res_net 11 | from deepSI.normalization import Norm, get_nu_ny_and_auto_norm 12 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | 2 | [tool.poetry] 3 | name = "deepSI" 4 | version = "0.4.4" 5 | description = "Data-driven learning of dynamical system in the SUBNET structure and ANNs" 6 | authors = ["Gerben I. Beintema"] 7 | license = "BSD 3-Clause License" 8 | readme = "README.md" 9 | packages = [{include = "deepSI"}] 10 | 11 | [tool.poetry.dependencies] 12 | python = "^3.10" 13 | numpy = ">=1.25" 14 | tqdm = "^4.0.0" 15 | nonlinear_benchmarks = "^0.1.2" 16 | torch = "^2.0.0" 17 | cloudpickle = "*" 18 | 19 | [tool.poetry.urls] 20 | Homepage = "https://github.com/GerbenBeintema/deepSI" 21 | 22 | [build-system] 23 | requires = ["poetry-core"] 24 | build-backend = "poetry.core.masonry.api" 25 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2019-2025, Gerben I. Beintema & Maarten Schoukens 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | * Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | * Redistributions in binary form must reproduce the above copyright 11 | notice, this list of conditions and the following disclaimer in the 12 | documentation and/or other materials provided with the distribution. 13 | * Neither the name of the Eindhoven University of Technology nor the 14 | names of its contributors may be used to endorse or promote products 15 | derived from this software without specific prior written permission. 16 | 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 18 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20 | DISCLAIMED. IN NO EVENT SHALL GERBEN I. BEINTEMA BE LIABLE FOR ANY DIRECT, 21 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 23 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 24 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 26 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | -------------------------------------------------------------------------------- /examples/docs/basic-example.py: -------------------------------------------------------------------------------- 1 | # This document is used to generate the example that is seen in the README.md. 2 | 3 | import deepSI as dsi 4 | import numpy as np 5 | 6 | # Generate data 7 | np.random.seed(0) 8 | ulist = np.random.randn(10_000) #input sequence 9 | x = [0, 0] #initial state 10 | ylist = [] #output sequence 11 | for uk in ulist: 12 | ylist.append(x[1]*x[0]*0.1 + x[0] + np.random.randn()*1e-3) #compute output 13 | x = x[0]/(1.2+x[1]**2) + x[1]*0.4, \ 14 | x[1]/(1.2+x[0]**2) + x[0]*0.4 + uk*(1+x[0]**2/10) #advance state 15 | 16 | # Put the inputs and outputs in a Input_output_data format 17 | data = dsi.Input_output_data(u=ulist, y=np.array(ylist)) 18 | 19 | # Split dataset 20 | train, val, test = data[:8000], data[8000:9000], data[9000:] 21 | 22 | # Create model 23 | nu, ny, norm = dsi.get_nu_ny_and_auto_norm(data) # Characterize data 24 | model = dsi.SUBNET(nu, ny, norm, nx=2, nb=20, na=20) # Creates encoder, f and h as MLP 25 | 26 | # Train model on data 27 | if False: 28 | train_dict = dsi.fit(model, train, val, n_its=10_000, T=20, batch_size=256, val_freq=100) #Adam 29 | else: 30 | import cloudpickle 31 | folder = dsi.fitting.get_checkpoint_dir() 32 | train_dict = cloudpickle.load(open(folder + '/SUBNET-GENrrw.pth', 'rb')) 33 | model = train_dict['best_model'] 34 | 35 | # Simulate model on the test input sequence 36 | test_p = model.simulate(test) 37 | 38 | import matplotlib.pyplot as plt 39 | 40 | # Improved plot settings 41 | plt.figure(figsize=(10, 4), dpi=120) 42 | plt.plot(test.y, label='Real Data', color='dodgerblue', linewidth=1.5) 43 | plt.plot(test_p.y, label=f'Model (NRMSE = {((test.y - test_p.y)**2).mean()**0.5 / test.y.std():.2%})', color='darkorange', linestyle='--', linewidth=1.5) 44 | 45 | # Add labels, legend, and grid 46 | plt.xlabel('Time Index', fontsize=12) 47 | plt.ylabel('y', fontsize=12) 48 | plt.title('Comparison of Real Data and Model Simulation', fontsize=14, fontweight='bold') 49 | plt.legend(fontsize=10, loc='upper right') 50 | plt.grid(visible=True, linestyle='--', alpha=0.7) 51 | 52 | # Adjust layout 53 | plt.tight_layout(pad=0.8) 54 | plt.savefig('NL-example.jpg') 55 | plt.show() 56 | -------------------------------------------------------------------------------- /deepSI/normalization.py: -------------------------------------------------------------------------------- 1 | 2 | import nonlinear_benchmarks as nlb 3 | import numpy as np 4 | import torch 5 | 6 | C = lambda x: torch.as_tensor(x, dtype=torch.float32) if x is not None else None 7 | class IO_normalization_f(torch.nn.Module): 8 | def __init__(self, fun, umean, ustd): 9 | super().__init__() 10 | self.fun, self.umean, self.ustd = fun, C(umean), C(ustd) 11 | def forward(self, x, u): 12 | return self.fun(x, (u-self.umean)/self.ustd) 13 | 14 | class IO_normalization_f_CT(torch.nn.Module): 15 | def __init__(self, fun, umean, ustd, tau): 16 | super().__init__() 17 | self.fun, self.umean, self.ustd, self.tau = fun, C(umean), C(ustd), C(tau) 18 | def forward(self, x, u): 19 | return self.fun(x, (u-self.umean)/self.ustd)/self.tau 20 | 21 | class IO_normalization_h(torch.nn.Module): 22 | def __init__(self, fun, umean, ustd, ymean, ystd): 23 | super().__init__() 24 | self.fun, self.umean, self.ustd, self.ymean, self.ystd = fun, C(umean), C(ustd), C(ymean), C(ystd) 25 | def forward(self, x, u=None): 26 | if u is None: 27 | y_normed = self.fun(x) 28 | else: 29 | y_normed = self.fun(x, (u-self.umean)/self.ustd) 30 | return y_normed*self.ystd + self.ymean 31 | 32 | class IO_normalization_encoder(torch.nn.Module): 33 | def __init__(self, fun, umean, ustd, ymean, ystd): 34 | super().__init__() 35 | self.fun, self.umean, self.ustd, self.ymean, self.ystd = fun, C(umean), C(ustd), C(ymean), C(ystd) 36 | def forward(self, upast, ypast): 37 | return self.fun((upast-self.umean)/self.ustd, (ypast-self.ymean)/self.ystd) 38 | 39 | class Norm: 40 | def __init__(self, umean, ustd, ymean, ystd, sampling_time=1): 41 | self.umean, self.ustd, self.ymean, self.ystd = C(umean), C(ustd), C(ymean), C(ystd) 42 | self.sampling_time = C(sampling_time) 43 | 44 | def f(self, fun): 45 | return IO_normalization_f(fun, self.umean, self.ustd) 46 | def h(self, fun): 47 | return IO_normalization_h(fun, self.umean, self.ustd, self.ymean, self.ystd) 48 | def encoder(self, fun): 49 | return IO_normalization_encoder(fun, self.umean, self.ustd, self.ymean, self.ystd) 50 | def f_CT(self, fun, tau): 51 | return IO_normalization_f_CT(fun, self.umean, self.ustd, tau) 52 | 53 | def transform(self, dataset : nlb.Input_output_data | list): 54 | if isinstance(dataset, (list, tuple)): 55 | return [self.transform(d) for d in dataset] 56 | u = (dataset.u - self.umean.numpy())/self.ustd.numpy() 57 | y = (dataset.y - self.ymean.numpy())/self.ystd.numpy() 58 | sampling_time = None if dataset.sampling_time is None else dataset.sampling_time/self.sampling_time.item() 59 | return nlb.Input_output_data(u, y, sampling_time=sampling_time, name=f'{dataset.name}-normed', \ 60 | state_initialization_window_length=dataset.state_initialization_window_length) 61 | 62 | def __repr__(self): 63 | return (f"Norm(umean={self.umean.numpy()}, ustd={self.ustd.numpy()}, " 64 | f"ymean={self.ymean.numpy()}, ystd={self.ystd.numpy()}, " 65 | f"sampling_time={self.sampling_time.numpy()})") 66 | 67 | def get_nu_ny_and_auto_norm(data: nlb.Input_output_data | list): 68 | if not isinstance(data, (tuple, list)): 69 | data = [data] 70 | u = np.concatenate([d.u for d in data],axis=0) 71 | y = np.concatenate([d.y for d in data],axis=0) 72 | assert u.ndim<=2 and y.ndim<=2, f'auto norm only defined for scalar or vector outputs y and input u {y.shape=} {u.shape=}' 73 | sampling_time = data[0].sampling_time 74 | assert all(sampling_time==d.sampling_time for d in data), f"the given datasets don't have all the sample sampling_time set {[d.sampling_time for d in data]=}" 75 | umean, ustd = u.mean(0), u.std(0) 76 | ymean, ystd = y.mean(0), y.std(0) 77 | norm = Norm(umean, ustd, ymean, ystd, sampling_time) 78 | nu = 'scalar' if u.ndim==1 else u.shape[1] 79 | ny = 'scalar' if y.ndim==1 else y.shape[1] 80 | return nu, ny, norm 81 | 82 | 83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## deepSI 2 | 3 | deepSI provides a lightweight PyTorch-based framework for data-driven learning of dynamical systems (i.e., system identification). It focuses on the SUBNET method, which robustly models many systems. 4 | 5 | ### ⚠️ deepSI has been refactored without backward compatibility (5 December 2024) ⚠️ 6 | 7 | If you need to install the legacy version, you can do so with: 8 | ```bash 9 | pip install git+https://github.com/GerbenBeintema/deepSI@legacy 10 | ``` 11 | and by using the legacy documentation available at [https://github.com/MaartenSchoukens/deepSI/tree/legacy](https://github.com/MaartenSchoukens/deepSI/tree/legacy). 12 | 13 | ## Example usage 14 | 15 | ```python 16 | import numpy as np 17 | import deepSI as dsi 18 | 19 | # Generate or load data 20 | np.random.seed(0) 21 | ulist = np.random.randn(10_000) # Input sequence 22 | x = [0, 0] # Initial state 23 | ylist = [] # Output sequence 24 | for uk in ulist: 25 | ylist.append(x[1]*x[0]*0.1 + x[0] + np.random.randn()*1e-3) # Compute output 26 | x = x[0]/(1.2+x[1]**2) + x[1]*0.4, \ 27 | x[1]/(1.2+x[0]**2) + x[0]*0.4 + uk*(1+x[0]**2/10) # Advance state 28 | 29 | # Put the input and output sequence in the Input_output_data format 30 | data = dsi.Input_output_data(u=ulist, y=np.array(ylist)) 31 | 32 | # Split dataset 33 | train, val, test = data[:8000], data[8000:9000], data[9000:] 34 | 35 | # Create model 36 | nu, ny, norm = dsi.get_nu_ny_and_auto_norm(data) # Characterize data 37 | model = dsi.SUBNET(nu, ny, norm, nx=2, nb=20, na=20) # Creates encoder, f and h as MLP 38 | 39 | # Train model on data using Adam 40 | train_dict = dsi.fit(model, train, val, n_its=10_000, T=20, batch_size=256, val_freq=100) 41 | 42 | # Simulate model on the test input sequence (using the encoder to initialize the state) 43 | test_p = model.simulate(test) 44 | 45 | # Visualize simulation of the model 46 | from matplotlib import pyplot as plt 47 | plt.figure(figsize=(7,3)) 48 | plt.plot(test.y, label='Real Data') 49 | plt.plot(test_p.y, label=f'Model Sim. (NRMS = {((test.y-test_p.y)**2).mean()**0.5/test.y.std():.2%})', linestyle='--') 50 | plt.title('Comparison of Real Data and Model Simulation', fontsize=14, fontweight='bold') 51 | plt.legend(); plt.xlabel('Time Index'); plt.ylabel('y'); plt.grid(); plt.tight_layout(pad=0.5) 52 | plt.show() 53 | ``` 54 | 55 | ![dsi SUBNET result on example](examples/docs/NL-example.jpg) 56 | 57 | ## Installation 58 | 59 | ```bash 60 | pip install deepSI 61 | ``` 62 | 63 | ## Features 64 | 65 | * A number of popular SUBNET model structures 66 | * SUBNET encoder structue (`deepSI.models.SUBNET`). Featuring in: [\[1\]](https://proceedings.mlr.press/v144/beintema21a), [\[2\]](https://www.sciencedirect.com/science/article/pii/S2405896321012167), [\[3\]](https://www.sciencedirect.com/science/article/pii/S2405896321012180), [\[4\]](https://arxiv.org/abs/2303.17305), [\[5\]](https://arxiv.org/abs/2304.02119) 67 | * Continuous time SUBNET encoder structure (`deepSI.models.SUBNET_CT`). Featuring in: [\[6\]](https://arxiv.org/abs/2204.09405), [\[7\]](https://www.sciencedirect.com/science/article/pii/S2405896324013223), [\[8\]](https://www.sciencedirect.com/science/article/pii/S240589632401317X) 68 | * Base class for fully custom SUBNET structures with shared parameters between `f`, `h` or `encoder`. (`deepSI.models.Custom_SUBNET`) as used in: 69 | * CNN SUBNET (`CNN_SUBNET`). Featuring in: [\[9\]](https://research.tue.nl/files/318935789/20240321_Beintema_hf.pdf) Chapter 4, [\[10\]](https://www.sciencedirect.com/science/article/pii/S2405896321012167) 70 | * LPV SUBNET (`SUBNET_LPV` and `SUBNET_LPV_ext_scheduled`). Featuring in: [\[11\]](https://arxiv.org/abs/2204.04060) 71 | * port HNN SUBNET (`pHNN_SUBNET`). Featuring in: [\[12\]](https://arxiv.org/abs/2305.01338) 72 | * Koopman SUBNET (`Koopman_SUBNET`). Featuring in: [\[13\]](https://ieeexplore.ieee.org/abstract/document/9682946) 73 | * Connection to [`nonlinear_benchmarks`](https://github.com/GerbenBeintema/nonlinear_benchmarks) to easily load and evaluate on benchmarks. 74 | * Low amount of code such that it can be easily forked and edited to add missing features. 75 | 76 | ## Futher documentation 77 | 78 | Check out [`examples/1. Overview deepSI.ipynb`](examples/1.%20Overview%20deepSI.ipynb). 79 | 80 | ## Contributing 81 | 82 | deepSI is under ongoing development, and contributions to any part of the module are welcome. 83 | 84 | ## todo list and known issues 85 | 86 | * Expand demonstration notebook with pHNN examples. 87 | * Issue where discrete time is printed in `Input_output_data` with `torch.Tensors`, and `np.array` sample time. 88 | * General documentation improvements. 89 | * Known issue: CT SUBNET and DT SUBNET do not produce the correct initial state when the sampling time is altered (the encoder assumes the sampling time does not change). 90 | * Possible improvement: Improve speed by copying if sufficient memory is available. Also, consider pre-transfer to GPU and asynchronous array retrieval. 91 | * Known issue: Using the compile function in fit may result in a memory leak. 92 | -------------------------------------------------------------------------------- /examples/2. pHNN SUBNET demo.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "81d2f263-ab66-4a60-8c1e-a2c2ab2da548", 6 | "metadata": {}, 7 | "source": [ 8 | "## Port Hamiltonian Neural Networks in `deepSI`\n", 9 | "\n", 10 | "Model structure given by `pHNN_SUBNET`\n", 11 | "\n", 12 | "$$ \\frac{dx}{dt} = \\frac{1}{\\tau} \\left (\\left ( J(x) - R(x) \\right ) \\frac{dH}{dx} + G(x) (u - u_\\text{mean})/u_\\text{std} \\right)$$\n", 13 | "\n", 14 | "$$ (y - y_\\text{mean})/y_\\text{std} = G(x)^T \\frac{dH}{dx} $$\n", 15 | "\n", 16 | "where \n", 17 | "$$ \\tau \\text{ is a constant selected as } 10\\ T_s \\text{ by default}$$\n", 18 | "$$G (\\text{Gnet}) : n_\\text{x} \\rightarrow n_\\text{x} \\times n_\\text{u}$$\n", 19 | "$$J (\\text{Jnet}) : n_\\text{x} \\rightarrow n_\\text{x} \\times n_\\text{x}\\ \\text{(skew symetric)}$$\n", 20 | "$$R (\\text{Rnet}) : n_\\text{x} \\rightarrow n_\\text{x} \\times n_\\text{x}\\ \\text{(semi positive def)}$$\n", 21 | "$$H (\\text{Hnet}) : n_\\text{x} \\rightarrow\\ \\text{scalar}$$\n", 22 | "$$u_\\text{mean},\\ u_\\text{std},\\ y_\\text{mean},\\ y_\\text{std}\\ \\text{given by the `norm.umean`, `norm.ustd`, ect.}$$\n", 23 | "also `model.integrator(f, x, u, dt)` is a function that integrates the state given a certain state derivative $f$ and input $u$ for $dt$ time. \n", 24 | "\n", 25 | "These function are constructed by default by using `MLP_res_net` as a base and than converting the output such that it adhers to the constraints. " 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 3, 31 | "id": "31124692-0273-4630-94be-c9c181f0359d", 32 | "metadata": {}, 33 | "outputs": [ 34 | { 35 | "data": { 36 | "text/plain": [ 37 | "tensor([[ 0.6436, 0.6736, 0.6847, 0.6754, 0.6843, 0.6743, 0.6747, 0.6841,\n", 38 | " 0.6996, 0.6891],\n", 39 | " [-0.1671, -0.1697, -0.1727, -0.1782, -0.1766, -0.1738, -0.1774, -0.1789,\n", 40 | " -0.1820, -0.1827]], grad_fn=)" 41 | ] 42 | }, 43 | "execution_count": 3, 44 | "metadata": {}, 45 | "output_type": "execute_result" 46 | } 47 | ], 48 | "source": [ 49 | "import deepSI as dsi\n", 50 | "import torch\n", 51 | "\n", 52 | "norm = dsi.normalization.Norm(0,1,0,1)\n", 53 | "na = nb = 3\n", 54 | "nx = 4\n", 55 | "nu = ny = 'scalar'\n", 56 | "model = dsi.models.pHNN_SUBNET(nu, ny, norm, nx, na, nb)\n", 57 | "\n", 58 | "# net(torch.randn(3,4)).shape\n", 59 | "r = torch.randn\n", 60 | "b = 2\n", 61 | "T = 10\n", 62 | "upast, ypast, ufuture, yfuture = r(b, nb), r(b, na), r(b, T), r(b, T)\n", 63 | "sampling_time = r(b)\n", 64 | "model(upast, ypast, ufuture, sampling_time=sampling_time)" 65 | ] 66 | }, 67 | { 68 | "cell_type": "markdown", 69 | "id": "5b1fdca4-c7b8-4b15-815f-68941dd4aa41", 70 | "metadata": {}, 71 | "source": [ 72 | "## Customized function for each element: \n", 73 | "\n", 74 | "Example (this will get expanded later)" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": null, 80 | "id": "5b02f337-ab26-48cc-ac3f-d19ce046d846", 81 | "metadata": {}, 82 | "outputs": [ 83 | { 84 | "data": { 85 | "text/plain": [ 86 | "torch.Size([2, 10])" 87 | ] 88 | }, 89 | "execution_count": 5, 90 | "metadata": {}, 91 | "output_type": "execute_result" 92 | } 93 | ], 94 | "source": [ 95 | "import deepSI as dsi\n", 96 | "import torch\n", 97 | "from torch import nn\n", 98 | "from deepSI.networks import Quadratic_net\n", 99 | "\n", 100 | "nx = 4\n", 101 | "\n", 102 | "#Jnet\n", 103 | "Jnet_bias = dsi.networks.Bias_net(nx*nx)\n", 104 | "Jnet_constant = dsi.networks.Contant_net(torch.randn(nx*nx))\n", 105 | "Jnet_mlp = dsi.networks.MLP_res_net(input_size=nx, output_size=nx*nx) #simple \n", 106 | "Jnet = dsi.networks.Sum_net([Jnet_bias, Jnet_constant, Jnet_mlp]) #add these three networks together\n", 107 | "Jnet = dsi.networks.Skew_sym_converter(Jnet) #x -> nx x nx \n", 108 | "\n", 109 | "#Rnet\n", 110 | "Rnet = dsi.networks.Bias_net(nx*nx)\n", 111 | "Rnet = dsi.networks.Sym_pos_semidef_converter(Rnet)\n", 112 | "\n", 113 | "#Hnet\n", 114 | "Hnet_depend = dsi.networks.ELU_lower_bound(dsi.networks.MLP_res_net(nx, 'scalar'), lower_bound=-100)\n", 115 | "Hnet_qaudratic = Quadratic_net(nx)\n", 116 | "Hnet = dsi.networks.Ham_converter(dsi.networks.Sum_net([Hnet_depend,Hnet_qaudratic]))\n", 117 | "\n", 118 | "nu = 'scalar'\n", 119 | "ny = 'scalar'\n", 120 | "\n", 121 | "norm = dsi.normalization.Norm(0,1,0,1)\n", 122 | "na = nb = 3\n", 123 | "model = dsi.models.pHNN_SUBNET(nu, ny, norm, nx, na, nb, Jnet=Jnet, Rnet=Rnet, Hnet=Hnet)\n", 124 | "\n", 125 | "# net(torch.randn(3,4)).shape\n", 126 | "r = torch.randn\n", 127 | "b = 2\n", 128 | "T = 10\n", 129 | "upast, ypast, ufuture, yfuture = r(b, nb), r(b, na), r(b, T), r(b, T)\n", 130 | "sampling_time = r(b)\n", 131 | "model(upast, ypast, ufuture, sampling_time=sampling_time).shape" 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": null, 137 | "id": "a407f7cb", 138 | "metadata": {}, 139 | "outputs": [], 140 | "source": [] 141 | } 142 | ], 143 | "metadata": { 144 | "kernelspec": { 145 | "display_name": "noodle", 146 | "language": "python", 147 | "name": "python3" 148 | }, 149 | "language_info": { 150 | "codemirror_mode": { 151 | "name": "ipython", 152 | "version": 3 153 | }, 154 | "file_extension": ".py", 155 | "mimetype": "text/x-python", 156 | "name": "python", 157 | "nbconvert_exporter": "python", 158 | "pygments_lexer": "ipython3", 159 | "version": "3.11.10" 160 | } 161 | }, 162 | "nbformat": 4, 163 | "nbformat_minor": 5 164 | } 165 | -------------------------------------------------------------------------------- /deepSI/fitting.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | import torch 4 | import cloudpickle, os 5 | from secrets import token_urlsafe 6 | from copy import deepcopy 7 | from tqdm.auto import tqdm 8 | from torch import nn, optim 9 | from nonlinear_benchmarks import Input_output_data 10 | import time 11 | 12 | def compute_NMSE(*A) -> torch.Tensor: 13 | '''Computes the Normalized Mean Squared Error. 14 | Example usage: compute_NMSE(model, *xarrays, yarray) or compute_NMSE(model, upast, ypast, ufuture, yfuture)''' 15 | model, *xarrays, yarray = A 16 | yout = model(*xarrays, yarray) 17 | return torch.mean((yout-yarray)**2/model.norm.ystd**2) 18 | 19 | def data_batcher(*arrays, batch_size=256, seed=0, device=None, indices=None): 20 | rng = np.random.default_rng(seed=seed) 21 | if indices is None: 22 | indices = np.arange(arrays[0].shape[0]) 23 | dataset_size = len(indices) 24 | assert all(array.shape[0] == arrays[0].shape[0] for array in arrays) 25 | assert batch_size <= dataset_size 26 | while True: 27 | perm = rng.permutation(indices) 28 | start, end = 0, batch_size 29 | while end <= dataset_size: 30 | batch_perm = perm[start:end] 31 | yield tuple(array[batch_perm].to(device) for array in arrays) #arrays are already torch arrays 32 | start, end = start + batch_size, end + batch_size 33 | 34 | def fit(model: nn.Module, train:Input_output_data, val:Input_output_data, n_its:int, T:int=50, \ 35 | batch_size:int=256, stride:int=1, val_freq:int=250, optimizer:optim.Optimizer=None, \ 36 | device=None, compile_mode=None, loss_fun=compute_NMSE, val_fun=compute_NMSE): 37 | """ 38 | Trains a PyTorch model, saving the best model and tracking training/validation progress. 39 | 40 | Args: 41 | model (nn.Module): Neural network model to be trained. The model must implement a 42 | `.create_arrays(train, T, stride)` method to generate training arrays. 43 | train (Input_output_data): Training dataset. 44 | val (Input_output_data): Validation dataset. 45 | n_its (int): Number of training iterations (i.e., batch updates). 46 | T (int, optional): Sequence length considered in the loss (unroll length). Default is 50. 47 | batch_size (int, optional): Number of samples per batch during training. Default is 256. 48 | stride (int, optional): Step size for generating batches from the data. Default is 1. 49 | val_freq (int, optional): Frequency of validation checks (in iterations). Default is 250. 50 | optimizer (optim.Optimizer, optional): Optimizer for training. Default is Adam if not provided. 51 | device (torch.device, optional): Device to move the model and data to (e.g., 'cpu', 'cuda'). 52 | compile_mode (optional): Optional mode for torch.compile to optimize the training step. 53 | loss_fun (callable, optional): Loss function used for training. Default is `compute_NMSE`. 54 | val_fun (callable, optional): Function used to compute validation loss. Default is `compute_NMSE`. 55 | 56 | Returns: 57 | dict: Contains the following keys: 58 | - 'best_model': The best model (with the lowest validation loss). 59 | - 'best_optimizer_state': Optimizer state when the best model was found. 60 | - 'last_model': The model at the end of training. 61 | - 'last_optimizer_state': The optimizer state at the end of training. 62 | - 'NRMS_train': Training loss history (normalized root mean square error). 63 | - 'NRMS_val': Validation loss history (normalized root mean square error). 64 | - 'samples/sec': Number of data samples processed per second. 65 | - 'val_freq': Validation frequency. 66 | - 'batch_size': Batch size used during training. 67 | - 'it_counter': List of iteration counts corresponding to each validation point. 68 | """ 69 | 70 | def train_step(model, batch, optimizer): 71 | def closure(backward=True): 72 | loss = loss_fun(model, *batch) 73 | if backward: 74 | optimizer.zero_grad() 75 | loss.backward() 76 | return loss 77 | loss = optimizer.step(closure) #Using closure for the case that LBFGS is used. 78 | return loss.item() 79 | if compile_mode is not None: 80 | train_step = torch.compile(train_step, mode=compile_mode) 81 | 82 | code = token_urlsafe(4).replace('_','0').replace('-','a') 83 | save_filename = os.path.join(get_checkpoint_dir(), f'{model.__class__.__name__}-{code}.pth') 84 | fit_info = {'val_freq': val_freq, 'batch_size':batch_size} 85 | 86 | # Creat optimizer 87 | model.to(device); model.train() 88 | optimizer = torch.optim.Adam(model.parameters()) if optimizer==None else optimizer 89 | if device!=None and optimizer!=None: 90 | try: #check if both the optimizer and model are on the same device (this operation if it fails, should not halt process thus this weird try except structure is used) 91 | assert optimizer.param_groups[0]['params'][0].device==next(model.parameters()).device, \ 92 | 'Model and optimizer are on different devices, make sure to do the following: \n1. Create model, 2. Move model to device with .to(device), 3. Create desired optimizer, 4. Call .fit' 93 | except AssertionError: 94 | raise 95 | except: print('### Warning could not check if optimizer and device are on the same device ###') 96 | 97 | # Create training arrays 98 | arrays, indices = model.create_arrays(train, T=T, stride=stride) 99 | print(f'Number of samples to train on = {len(indices)}') 100 | itter = data_batcher(*arrays, batch_size=batch_size, indices=indices, device=device) 101 | 102 | # Create validation arrays 103 | arrays_val, indices = model.create_arrays(val, T='sim') 104 | arrays_val = [array_val[indices].to(device) for array_val in arrays_val] 105 | 106 | # Initalize all the monitors and best found models 107 | best_val, best_model, best_optimizer_state, loss_acc = float('inf'), deepcopy(model), deepcopy(optimizer.state_dict()), [] 108 | NRMS_val, NRMS_train, time_usage_train = [], [], 0. #initialize the train and val monitor 109 | try: 110 | progress_bar = tqdm(range(n_its + 1), total=n_its) 111 | for it_count, batch in zip(progress_bar, itter): 112 | ### Validation and printing step ### 113 | if it_count%val_freq==0: #make this an or last iteration? 114 | with torch.no_grad(): NRMS_val.append((val_fun(model, *arrays_val)).cpu().numpy()**0.5) 115 | NRMS_train.append((np.mean(loss_acc) if len(loss_acc)>0 else float('nan'))**0.5) 116 | loss_acc = [] 117 | 118 | if NRMS_val[-1]<=best_val: 119 | best_val, best_model, best_optimizer_state = NRMS_val[-1], deepcopy(model).cpu(), deepcopy(optimizer.state_dict()) #does this work nicely with device? 120 | 121 | #saving fit results 122 | samps_per_sec = it_count*batch_size/time_usage_train if time_usage_train>0 else None 123 | cloudpickle.dump({'best_model': best_model, 'best_optimizer_state':best_optimizer_state,\ 124 | 'last_model': deepcopy(model).cpu(), 'last_optimizer_state':optimizer.state_dict(),\ 125 | 'NRMS_train': np.array(NRMS_train), 'NRMS_val':np.array(NRMS_val),\ 126 | 'samples/sec': samps_per_sec, **fit_info, 'it_counter' : np.arange(len(NRMS_val))*val_freq},\ 127 | open(save_filename,'wb')) 128 | print(f'it {it_count:7,} NRMS loss {NRMS_train[-1]:.5f} NRMS val {NRMS_val[-1]:.5f}{"!!" if NRMS_val[-1]==best_val else " "} {(it_count*batch_size/time_usage_train if time_usage_train>0 else float("nan")):.2f} samps/sec') 129 | 130 | if it_count==n_its: break #break upon the final iteration such to skip the added iteration 131 | 132 | ### Train Step ### 133 | start_t = time.time() 134 | loss = train_step(model, batch, optimizer) 135 | time_usage_train += time.time() - start_t 136 | 137 | ### Post Train step ## 138 | if np.isnan(loss): 139 | raise ValueError('Loss became NaN and training will be terminate (see: "10.1 Recovering from a crash" from the example notebook)') 140 | loss_acc.append(loss) # add the loss the the loss accumulator 141 | progress_bar.set_description(f'Sqrt loss: {loss**0.5:.5f}', refresh=False) 142 | except KeyboardInterrupt: 143 | print('Stopping early due to KeyboardInterrupt') 144 | d = cloudpickle.load(open(save_filename,'rb')) #save the last model to disk 145 | d['last_model'], d['last_optimizer_state'] = deepcopy(model).cpu(), optimizer.state_dict() 146 | cloudpickle.dump(d, open(save_filename,'wb')) 147 | model.load_state_dict(best_model.state_dict()); model.cpu() 148 | return d 149 | 150 | 151 | def get_checkpoint_dir(): 152 | '''A utility function which gets the checkpoint directory for each OS 153 | 154 | It creates a working directory called deepSI-checkpoints 155 | in LOCALAPPDATA/deepSI-checkpoints/ for windows 156 | in ~/.deepSI-checkpoints/ for unix like 157 | in ~/Library/Application Support/deepSI-checkpoints/ for darwin 158 | 159 | Returns 160 | ------- 161 | checkpoints_dir 162 | ''' 163 | import os 164 | from sys import platform 165 | if platform == "darwin": #not tested but here it goes 166 | checkpoints_dir = os.path.expanduser('~/Library/Application Support/deepSI-checkpoints/') 167 | elif platform == "win32": 168 | checkpoints_dir = os.path.join(os.getenv('LOCALAPPDATA'),'deepSI-checkpoints/') 169 | else: #unix like, might be problematic for some weird operating systems. 170 | checkpoints_dir = os.path.expanduser('~/.deepSI-checkpoints/')#Path('~/.deepSI/') 171 | if os.path.isdir(checkpoints_dir) is False: 172 | os.mkdir(checkpoints_dir) 173 | return checkpoints_dir 174 | 175 | 176 | def fit_minimal_implementation(model: nn.Module, train: Input_output_data, 177 | val: Input_output_data, n_its: int, T: int = 50, stride: int = 1, batch_size: int = 256, 178 | val_freq: int = 250, optimizer: optim.Optimizer = None, loss_fun=compute_NMSE): 179 | 180 | optimizer = optimizer or torch.optim.Adam(model.parameters()) 181 | arrays, indices = model.create_arrays(train, T=T, stride=stride) 182 | itter = data_batcher(*arrays, batch_size=batch_size, indices=indices) 183 | arrays_val, val_indices = model.create_arrays(val, T=T) 184 | arrays_val = [a[val_indices] for a in arrays_val] 185 | 186 | best_val, best_model = float('inf'), deepcopy(model.state_dict()) 187 | 188 | for it_count, batch in zip(tqdm(range(n_its)), itter): 189 | if it_count % val_freq == 0: # Validation step 190 | val_loss = loss_fun(model, *arrays_val).sqrt().item() 191 | if val_loss <= best_val: 192 | best_val, best_model = val_loss, deepcopy(model.state_dict()) 193 | print(f'Iter {it_count:7,}, Val Loss: {val_loss:.5f}') 194 | 195 | # Training step 196 | loss = loss_fun(model, *batch) 197 | optimizer.zero_grad() 198 | loss.backward() 199 | optimizer.step() 200 | 201 | model.load_state_dict(best_model) 202 | return best_model 203 | -------------------------------------------------------------------------------- /deepSI/networks.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.nn import Sequential 3 | from torch import nn 4 | 5 | ########################################################### 6 | #### Multi layer peceptron/feed forward neural network #### 7 | ########################################################### 8 | 9 | class MLP_res_net(nn.Module): 10 | '''Multi-Layer Perceptron with Residual Connection (MLP_res_net) as follows: 11 | y_pred = net(input) = net_MLP(input) + A * input 12 | where net_MLP(input) is a simple Multi-Layer Perceptron, e.g.: 13 | h_1 = input 14 | h-2 = activation(A_1 h_1 + b_1) #A_1.shape = n_hidden_nodes x input_size 15 | h_3 = activation(A_2 h_2 + b_2) #A_2.shape = n_hidden_nodes x n_hidden_nodes 16 | ... 17 | h_n_hidden_layers = activation(A_n-1 h_n-1 + b_n-1) 18 | return h_n_hidden_layers 19 | ''' 20 | def __init__(self, input_size: str | int | list, output_size: str | int | list, n_hidden_layers = 2, n_hidden_nodes = 64, \ 21 | activation=nn.Tanh, zero_bias=True): 22 | self.input_size = input_size 23 | self.output_size = output_size 24 | super().__init__() 25 | self.scalar_output = output_size=='scalar' 26 | #convert input shape: 27 | def to_num(s): 28 | if isinstance(s, int): 29 | return s 30 | if s=='scalar': 31 | return 1 32 | a = 1 33 | for si in s: 34 | a = a*(1 if si=='scalar' else si) 35 | return a 36 | if isinstance(input_size, list): 37 | input_size = sum(to_num(s) for s in input_size) 38 | 39 | output_size = 1 if self.scalar_output else output_size 40 | self.net_res = nn.Linear(input_size, output_size) 41 | 42 | seq = [nn.Linear(input_size,n_hidden_nodes),activation()] 43 | for i in range(n_hidden_layers-1): 44 | seq.append(nn.Linear(n_hidden_nodes,n_hidden_nodes)) 45 | seq.append(activation()) 46 | seq.append(nn.Linear(n_hidden_nodes,output_size)) 47 | self.net_nonlin = nn.Sequential(*seq) 48 | 49 | if zero_bias: 50 | for m in self.modules(): 51 | if isinstance(m, nn.Linear): 52 | nn.init.constant_(m.bias, val=0) #bias 53 | 54 | def forward(self, *ars): 55 | if len(ars)==1: 56 | net_in = ars[0] 57 | net_in = net_in.view(net_in.shape[0], -1) #adds a dim when needed 58 | else: 59 | net_in = torch.cat([a.view(a.shape[0], -1) for a in ars],dim=1) #flattens everything 60 | out = self.net_nonlin(net_in) + self.net_res(net_in) 61 | return out[:,0] if self.scalar_output else out 62 | 63 | ########################### 64 | ###### Integrators ######## 65 | ########################### 66 | 67 | def euler_integrator(f, x, u, dt, n_steps=1): 68 | dtp = (dt/n_steps)[:,None] 69 | for _ in range(n_steps): #f(x,u) has shape (nbatch, nx) 70 | x = x + f(x,u)*dtp 71 | return x 72 | 73 | def rk4_integrator(f, x, u, dt, n_steps=1): 74 | dtp = (dt/n_steps)[:,None] 75 | for _ in range(n_steps): #f(x,u) has shape (nbatch, nx) 76 | k1 = dtp * f(x,u) 77 | k2 = dtp * f(x+k1*0.5,u) 78 | k3 = dtp * f(x+k2*0.5,u) 79 | k4 = dtp * f(x+k3,u) 80 | x = x + (k1+2*k2+2*k3+k4)/6 81 | return x 82 | 83 | def rk45_integrator(f, x, u, dt, n_steps=1): 84 | dtp = (dt/n_steps)[:,None] 85 | for _ in range(n_steps): #f(x,u) has shape (nbatch, nx) 86 | k1 = dtp * f(x, u) 87 | k2 = dtp * f(x + k1 / 4, u) 88 | k3 = dtp * f(x + 3 * k1 / 32 + 9 * k2 / 32, u) 89 | k4 = dtp * f(x + 1932 * k1 / 2197 - 7200 * k2 / 2197 + 7296 * k3 / 2197, u) 90 | k5 = dtp * f(x + 439 * k1 / 216 - 8 * k2 + 3680 * k3 / 513 - 845 * k4 / 4104, u) 91 | k6 = dtp * f(x - 8 * k1 / 27 + 2 * k2 - 3544 * k3 / 2565 + 1859 * k4 / 4104 - 11 * k5 / 40, u) 92 | 93 | x = x + (16 * k1 / 135 + 6656 * k3 / 12825 + 28561 * k4 / 56430 - 9 * k5 / 50 + 2 * k6 / 55) 94 | return x 95 | 96 | ################################## 97 | ##### LPV SUBNET networks ######## 98 | ################################## 99 | 100 | import numpy as np 101 | class Bilinear(nn.Module): 102 | '''A(p) = A_0 + A_1 p_1 + A_2 p_2 + ... + A_n_schedual p_n_schedual''' 103 | def __init__(self, n_in, n_out, n_schedual, std_output=None, std_input=None, scale_fac=None): 104 | super().__init__() 105 | scale_fac = (n_in*(n_schedual+1))**0.5*10 if scale_fac is None else scale_fac 106 | self.Alin = nn.Parameter(torch.randn((n_out, n_in))/scale_fac) 107 | self.Anlin = nn.Parameter(torch.randn((n_schedual, n_out, n_in))/scale_fac) 108 | self.std_output = torch.as_tensor(std_output,dtype=torch.float32) if std_output is not None else torch.ones((n_out,), dtype=torch.float32) 109 | assert self.std_output.shape == (n_out,) 110 | self.std_input = torch.as_tensor(std_input,dtype=torch.float32) if std_input is not None else torch.ones((n_in,), dtype=torch.float32) 111 | assert self.std_input.shape == (n_in,), f'{self.std_input.shape} == {(n_in,)}' 112 | 113 | def forward(self, p): 114 | #p (Nb, np) 115 | #Anlin (np, n_out, n_in) -> (1, np, n_out, n_in) 116 | #self.Alin (n_out, n_int) -> (None, n_out, n_in) 117 | A = (self.Alin[None] + (self.Anlin[None]*p[:,:,None,None]).sum(1)) #nbatch, n_out, n_in 118 | return self.std_output[:,None]*A/self.std_input[None,:] 119 | 120 | #################### 121 | ### CNN SUBNET #### 122 | #################### 123 | 124 | class ConvShuffle(nn.Module): 125 | def __init__(self, in_channels, out_channels, kernel_size, padding='same', upscale_factor=2, \ 126 | padding_mode='zeros'): 127 | super(ConvShuffle, self).__init__() 128 | self.upscale_factor = upscale_factor 129 | self.conv = nn.Conv2d(in_channels, out_channels*upscale_factor**2, kernel_size, padding=padding, \ 130 | padding_mode=padding_mode) 131 | 132 | def forward(self, X): 133 | X = self.conv(X) #(N, Cout*upscale**2, H, W) 134 | return nn.functional.pixel_shuffle(X, self.upscale_factor) #(N, Cin, H*r, W*r) 135 | 136 | 137 | class Upscale_Conv_block(nn.Module): 138 | def __init__(self, in_channels, out_channels, kernel_size, padding='same', \ 139 | upscale_factor=2, main_upscale=ConvShuffle, shortcut=ConvShuffle, \ 140 | padding_mode='zeros', activation=nn.functional.relu, Ch=0, Cw=0): 141 | assert isinstance(upscale_factor, int) 142 | super(Upscale_Conv_block, self).__init__() 143 | #padding='valid' is weird???? 144 | self.shortcut = shortcut(in_channels, out_channels, kernel_size, padding=padding, padding_mode=padding_mode, upscale_factor=upscale_factor) 145 | self.activation = activation 146 | self.upscale = main_upscale(in_channels, out_channels, kernel_size, padding=padding, padding_mode=padding_mode, upscale_factor=upscale_factor) 147 | self.conv = nn.Conv2d(out_channels, out_channels, kernel_size, padding=padding, padding_mode=padding_mode) 148 | self.Ch = Ch 149 | self.Cw = Cw 150 | 151 | def forward(self, X): 152 | #shortcut 153 | X_shortcut = self.shortcut(X) # (N, Cout, H*r, W*r) 154 | 155 | #main line 156 | X = self.activation(X) # (N, Cin, H, W) 157 | X = self.upscale(X) # (N, Cout, H*r, W*r) 158 | X = self.activation(X) # (N, Cout, H*r, W*r) 159 | X = self.conv(X) # (N, Cout, H*r, W*r) 160 | 161 | #combine 162 | # X.shape[:,Cout,H,W] 163 | H,W = X.shape[2:] 164 | H2,W2 = X_shortcut.shape[2:] 165 | if H2>H or W2>W: 166 | padding_height = (H2-H)//2 167 | padding_width = (W2-W)//2 168 | X = X + X_shortcut[:,:,padding_height:padding_height+H,padding_width:padding_width+W] 169 | else: 170 | X = X + X_shortcut 171 | return X[:,:,self.Ch:,self.Cw:] #slice if needed 172 | #Nnodes = W*H*N(Cout*4*r**2 + Cin) 173 | 174 | class CNN_vec_to_image(nn.Module): 175 | def __init__(self, nx, ny, nu=-1, features_out = 1, kernel_size=3, padding='same', \ 176 | upscale_factor=2, feature_scale_factor=2, final_padding=4, main_upscale=ConvShuffle, shortcut=ConvShuffle, \ 177 | padding_mode='zeros', activation=nn.functional.relu): 178 | super(CNN_vec_to_image, self).__init__() 179 | self.feedthrough = nu!=-1 180 | if self.feedthrough: 181 | self.nu = tuple() if nu is None else ((nu,) if isinstance(nu,int) else nu) 182 | FCnet_in = nx + np.prod(self.nu, dtype=int) 183 | else: 184 | FCnet_in = nx 185 | 186 | self.activation = activation 187 | assert isinstance(ny,(list,tuple)) and (len(ny)==2 or len(ny)==3), 'ny should have 2 or 3 dimentions in the form (nchannels, height, width) or (height, width)' 188 | if len(ny)==2: 189 | self.nchannels = 1 190 | self.None_nchannels = True 191 | self.height_target, self.width_target = ny 192 | else: 193 | self.None_nchannels = False 194 | self.nchannels, self.height_target, self.width_target = ny 195 | 196 | if self.nchannels>self.width_target or self.nchannels>self.height_target: 197 | import warnings 198 | text = f"Interpreting shape of data as (Nnchannels={self.nchannels}, Nheight={self.height_target}, Nwidth={self.width_target}), This might not be what you intended!" 199 | warnings.warn(text) 200 | 201 | #work backwards 202 | features_out = int(features_out*self.nchannels) 203 | self.final_padding = final_padding 204 | height_now = self.height_target + 2*self.final_padding 205 | width_now = self.width_target + 2*self.final_padding 206 | features_now = features_out 207 | 208 | self.upblocks = [] 209 | while height_now>=2*upscale_factor+1 and width_now>=2*upscale_factor+1: 210 | 211 | Ch = (-height_now)%upscale_factor 212 | Cw = (-width_now)%upscale_factor 213 | # print(height_now, width_now, features_now, Ch, Cw) 214 | B = Upscale_Conv_block(int(features_now*feature_scale_factor), int(features_now), kernel_size, padding=padding, \ 215 | upscale_factor=upscale_factor, main_upscale=main_upscale, shortcut=shortcut, \ 216 | padding_mode=padding_mode, activation=activation, Cw=Cw, Ch=Ch) 217 | self.upblocks.append(B) 218 | features_now *= feature_scale_factor 219 | #implement slicing 220 | 221 | height_now += Ch 222 | width_now += Cw 223 | height_now //= upscale_factor 224 | width_now //= upscale_factor 225 | # print(height_now, width_now, features_now) 226 | self.width0 = width_now 227 | self.height0 = height_now 228 | self.features0 = int(features_now) 229 | 230 | self.upblocks = nn.Sequential(*list(reversed(self.upblocks))) 231 | self.FC = MLP_res_net(input_size=FCnet_in,output_size=self.width0*self.height0*self.features0, n_hidden_layers=1) 232 | self.final_conv = nn.Conv2d(features_out, self.nchannels, kernel_size=3, padding=padding, padding_mode='zeros') 233 | 234 | def forward(self, x, u=None): 235 | if self.feedthrough: 236 | xu = torch.cat([x,u.view(u.shape[0],-1)],dim=1) 237 | else: 238 | xu = x 239 | X = self.FC(xu).view(-1, self.features0, self.height0, self.width0) 240 | X = self.upblocks(X) 241 | X = self.activation(X) 242 | Xout = self.final_conv(X) 243 | if self.final_padding>0: 244 | Xout = Xout[:,:,self.final_padding:-self.final_padding,self.final_padding:-self.final_padding] 245 | return Xout[:,0,:,:] if self.None_nchannels else Xout 246 | 247 | class ShuffleConv(nn.Module): 248 | def __init__(self, in_channels, out_channels, kernel_size, padding='same', upscale_factor=2, padding_mode='zeros'): 249 | super(ShuffleConv, self).__init__() 250 | self.upscale_factor = upscale_factor 251 | self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, padding=padding, padding_mode=padding_mode) #kernal larger? 252 | 253 | def forward(self, X): 254 | X = torch.cat([X]*self.upscale_factor**2,dim=1) #(N, Cin*r**2, H, W) 255 | X = nn.functional.pixel_shuffle(X, self.upscale_factor) #(N, Cin, H*r, W*r) 256 | return self.conv(X) 257 | 258 | class ClassicUpConv(nn.Module): 259 | def __init__(self, in_channels, out_channels, kernel_size, padding='same', upscale_factor=2, padding_mode='zeros'): 260 | super(ClassicUpConv, self).__init__() 261 | self.upscale_factor = upscale_factor 262 | self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, padding=padding, padding_mode=padding_mode) #kernal larger? 263 | self.up = nn.Upsample(size=None,scale_factor=upscale_factor,mode='bicubic',align_corners=False) 264 | 265 | def forward(self, X): 266 | X = self.up(X) #(N, Cin, H*r, W*r) 267 | return self.conv(X) 268 | 269 | class Down_Conv_block(nn.Module): 270 | def __init__(self, in_channels, out_channels, kernel_size, padding='same', \ 271 | downscale_factor=2, padding_mode='zeros', activation=nn.functional.relu): 272 | assert isinstance(downscale_factor, int) 273 | super(Down_Conv_block, self).__init__() 274 | #padding='valid' is weird???? 275 | self.shortcut = nn.Conv2d(in_channels, out_channels, kernel_size, padding=padding, padding_mode=padding_mode, stride=downscale_factor) 276 | self.activation = activation 277 | self.conv = nn.Conv2d(in_channels, in_channels, kernel_size, padding='same', padding_mode='zeros') 278 | self.downscale = nn.Conv2d(in_channels, out_channels, kernel_size, padding=padding, padding_mode=padding_mode, stride=downscale_factor) 279 | 280 | def forward(self, X): 281 | #shortcut 282 | X_shortcut = self.shortcut(X) # (N, Cout, H/r, W/r) 283 | 284 | #main line 285 | X = self.activation(X) # (N, Cin, H, W) 286 | X = self.conv(X) # (N, Cout, H, W) 287 | X = self.activation(X) # (N, Cout, H, W) 288 | X = self.downscale(X) # (N, Cout, H/r, W/r) 289 | 290 | #combine 291 | X = X + X_shortcut 292 | return X 293 | 294 | class CNN_chained_downscales(nn.Module): 295 | def __init__(self, ny, kernel_size=3, padding='valid', features_ups_factor=1.5, \ 296 | downscale_factor=2, padding_mode='zeros', activation=nn.functional.relu): 297 | 298 | super(CNN_chained_downscales, self).__init__() 299 | self.activation = activation 300 | assert isinstance(ny,(list,tuple)) and (len(ny)==2 or len(ny)==3), 'ny should have 2 or 3 dimentions in the form (nchannels, height, width) or (height, width)' 301 | if len(ny)==2: 302 | self.nchannels = 1 303 | self.None_nchannels = True 304 | self.height, self.width = ny 305 | else: 306 | self.None_nchannels = False 307 | self.nchannels, self.height, self.width = ny 308 | 309 | #work backwards 310 | Y = torch.randn((1,self.nchannels,self.height,self.width)) 311 | _, features_now, height_now, width_now = Y.shape 312 | 313 | self.downblocks = [] 314 | features_now_base = features_now 315 | while height_now>=2*downscale_factor+1 and width_now>=2*downscale_factor+1: 316 | features_now_base *= features_ups_factor 317 | B = Down_Conv_block(features_now, int(features_now_base), kernel_size, padding=padding, \ 318 | downscale_factor=downscale_factor, padding_mode=padding_mode, activation=activation) 319 | 320 | self.downblocks.append(B) 321 | with torch.no_grad(): 322 | Y = B(Y) 323 | _, features_now, height_now, width_now = Y.shape #i'm lazy sorry 324 | 325 | self.width0 = width_now 326 | self.height0 = height_now 327 | self.features0 = features_now 328 | self.nout = self.width0*self.height0*self.features0 329 | # print('CNN output size=',self.nout) 330 | self.downblocks = nn.Sequential(*self.downblocks) 331 | 332 | def forward(self, Y): 333 | if self.None_nchannels: 334 | Y = Y[:,None,:,:] 335 | return self.downblocks(Y).view(Y.shape[0],-1) 336 | 337 | class CNN_encoder(nn.Module): 338 | def __init__(self, nb, nu, na, ny, nx, n_hidden_nodes=64, n_hidden_layers=2, activation=nn.Tanh, features_ups_factor=1.5): 339 | super(CNN_encoder, self).__init__() 340 | self.nx = nx 341 | self.nu = tuple() if nu=='scalar' else ((nu,) if isinstance(nu,int) else nu) 342 | assert isinstance(ny,(list,tuple)) and (len(ny)==2 or len(ny)==3), 'ny should have 2 or 3 dimentions in the form (nchannels, height, width) or (height, width)' 343 | ny = (ny[0]*na, ny[1], ny[2]) if len(ny)==3 else (na, ny[0], ny[1]) 344 | # print('ny=',ny) 345 | 346 | self.CNN = CNN_chained_downscales(ny, features_ups_factor=features_ups_factor) 347 | self.net = MLP_res_net(input_size=nb*np.prod(self.nu,dtype=int) + self.CNN.nout, \ 348 | output_size=nx, n_hidden_nodes=n_hidden_nodes, n_hidden_layers=n_hidden_layers, activation=activation) 349 | 350 | 351 | def forward(self, upast, ypast): 352 | #ypast = (samples, na, W, H) or (samples, na, C, W, H) to (samples, na*C, W, H) 353 | ypast = ypast.view(ypast.shape[0],-1,ypast.shape[-2],ypast.shape[-1]) 354 | # print('ypast.shape=',ypast.shape) 355 | ypast_encode = self.CNN(ypast) 356 | # print('ypast_encode.shape=',ypast_encode.shape) 357 | net_in = torch.cat([upast.view(upast.shape[0],-1),ypast_encode.view(ypast.shape[0],-1)],axis=1) 358 | return self.net(net_in) 359 | 360 | 361 | ############################################################ 362 | ################ HNN SUBNET function ####################### 363 | ############################################################ 364 | 365 | class ELU_lower_bound(nn.Module): 366 | '''Set a lower bound on a function using a ELU using: 367 | torch.nn.functional.elu(y - b) + b''' 368 | def __init__(self, net, lower_bound=-10): #-10 such that the gradient is not suppressed near zero 369 | super(ELU_lower_bound, self).__init__() 370 | self.net = net 371 | self.lower_bound = lower_bound 372 | 373 | def forward(self, *args, **kwargs): 374 | y = self.net(*args, **kwargs) 375 | b = self.lower_bound + 1 376 | return torch.nn.functional.elu(y - b) + b 377 | 378 | class Ham_converter(nn.Module): #rescales the output such that the std of dH/dx = 1 379 | '''Converts a H(x) to a hamiltonian by multipying the output by sqrt(nx) which gives approximately dH/dx_i = 1''' 380 | def __init__(self, net, norm='auto'): 381 | super().__init__() 382 | self.net = net 383 | self.norm = norm 384 | 385 | def forward(self, x): 386 | if self.norm=='auto': 387 | return self.net(x)*x.shape[1]**0.5 388 | else: 389 | return self.net(x)*self.norm 390 | 391 | class Matrix_converter(nn.Module): 392 | ''' 393 | Converts a net(x) vector to a matrix using a reshape 394 | ''' 395 | def __init__(self, net, nrows, ncols, norm='auto'): 396 | super().__init__() 397 | self.net = net 398 | self.norm = norm 399 | self.nrows = nrows 400 | self.ncols = ncols 401 | 402 | def forward(self, *x): 403 | A = self.net(*x).view(x[0].shape[0], self.nrows, self.ncols) 404 | if self.norm=='auto': 405 | A = A/(self.ncols**0.5) #this can be improved with some additional math 406 | else: 407 | A = A/self.norm #this can be improved with some additional math 408 | return A 409 | 410 | class Skew_sym_converter(nn.Module): 411 | '''converts a net(x) vector to a skew-symtreic matrix (J = -J^T) using 412 | A = shape_to_matrix(net(x)) 413 | return A - A^T 414 | ''' 415 | def __init__(self, net, norm='auto'): 416 | super().__init__() 417 | self.net = net 418 | self.norm = norm 419 | 420 | def forward(self, x): 421 | z = self.net(x) 422 | #z.shape = (Nbatch, nx*nx) 423 | nx = int(round(z.shape[1]**0.5)) 424 | assert nx*nx==z.shape[1], 'the output of net needs to have a sqaure number of elements to be reshaped to a square matrix' 425 | J = z.view(z.shape[0], nx, nx) 426 | if self.norm=='auto': 427 | J = J/(((nx-1)*2)**0.5) #this can be improved with some additional math 428 | else: 429 | J = J/self.norm #this can be improved with some additional math 430 | return J - J.permute(0,2,1) 431 | 432 | class Sym_pos_semidef_converter(nn.Module): 433 | '''converts a net(x) vector to a semi-positive definite matrix using 434 | A = shape_to_matrix(net(x)) 435 | return A^T A 436 | ''' 437 | def __init__(self, net, norm='auto'): 438 | super().__init__() 439 | self.norm = norm 440 | self.net = net 441 | 442 | def forward(self, x): 443 | z = self.net(x) 444 | nx = int(round(z.shape[1]**0.5)) 445 | assert nx*nx==z.shape[1], 'the output of net needs to have a sqaure number of elements to be reshaped to a square matrix' 446 | A = z.view(z.shape[0], nx, nx) 447 | if self.norm=='auto': 448 | A = A/(((nx+2)*nx**2)**0.25) #this might not be entirely correct 449 | else: 450 | A = A/self.norm 451 | R = torch.einsum('bik,bjk->bij', A, A) 452 | return R 453 | 454 | class Bias_net(nn.Module): 455 | '''f(x)=b is a bias (trainable)''' 456 | def __init__(self, num_pars, requires_grad=True): 457 | super().__init__() 458 | self.pars = nn.Parameter(torch.randn(num_pars), requires_grad=requires_grad) 459 | 460 | def forward(self, *args, **kwargs): 461 | return torch.broadcast_to(self.pars, (args[0].shape[0], self.pars.shape[0])) 462 | 463 | class Contant_net(nn.Module): #todo documentation 464 | '''f(x)=c is a constant given by c''' 465 | def __init__(self, c): 466 | super().__init__() 467 | assert isinstance(c, torch.Tensor) 468 | self.c = c 469 | 470 | def forward(self, *args, **kwargs): 471 | return torch.broadcast_to(self.c, (args[0].shape[0],) + self.c.shape) 472 | 473 | 474 | class Sum_net(nn.Module): 475 | '''f_1(x) + f_2(x) + f_3(x) + ... + f_n(x)''' 476 | def __init__(self, nets, scaling_factors='auto'): 477 | super().__init__() 478 | self.nets = nn.ParameterList(nets) 479 | self.scaling_factors = [1/len(nets)**0.5]*len(nets) if scaling_factors=='auto' else scaling_factors 480 | 481 | def forward(self, *args, **kwargs): 482 | outputs = [scaling*net(*args, **kwargs) for scaling, net in zip(self.scaling_factors, self.nets)] 483 | return torch.stack(outputs,dim=0).sum(0) 484 | 485 | 486 | class Quadratic_net(nn.Module): 487 | '''x^T Q X''' 488 | def __init__(self, nx): 489 | super().__init__() 490 | self.net = Skew_sym_converter(Bias_net(nx*nx)) 491 | 492 | def forward(self, x): 493 | Q = self.net(x) 494 | return torch.einsum('bi,bij,bj->b', x, Q, x) 495 | -------------------------------------------------------------------------------- /deepSI/models.py: -------------------------------------------------------------------------------- 1 | import os 2 | os.environ['FOR_DISABLE_CONSOLE_CTRL_HANDLER'] = '1' 3 | 4 | from torch import nn 5 | import torch 6 | from deepSI.networks import MLP_res_net, rk4_integrator 7 | from nonlinear_benchmarks import Input_output_data 8 | import numpy as np 9 | from deepSI.normalization import Norm 10 | from warnings import warn 11 | 12 | 13 | ####################### 14 | ### Helper Function ### 15 | ####################### 16 | 17 | def past_future_arrays(data : Input_output_data | list, na : int, nb : int, T : int | str, stride : int=1, add_sampling_time : bool=False): 18 | ''' 19 | This function extracts sections from the given data as to be used in the SUBNET structure in the format (upast, ypast, ufuture, yfuture), ids. 20 | 21 | For example for a sample [t] you will find that: 22 | npast = max(na,nb) 23 | upast[t] = data.u[t-nb + npast : t + npast] 24 | ypast[t] = data.y[t-na + npast : t + npast] 25 | ufuture[t] = data.u[t + npast : t+T + npast] 26 | yfuture[t] = data.y[t + npast : t+T + npast] 27 | 28 | where it can thus be used as: 29 | net(upast, ypast, ufuture) = y_future_sim 30 | 31 | Parameters: 32 | - data (Input_output_data | list): Input-output data object or a list of such objects, each containing input `u` and output `y` arrays. 33 | - na (int): Number of past output time steps to include in the `ypast` array. 34 | - nb (int): Number of past input time steps to include in the `upast` array. 35 | - T (int or str): Length of future time window (`ufuture`, `yfuture`). If 'sim', uses the full length of the input data. 36 | - stride (int, optional): Step size for moving window across data (default is 1). 37 | - add_sampling_time (bool, optional): If True, includes a `sampling_time` array, representing sampling intervals (default is False). 38 | 39 | Returns: 40 | - Tuple of Tensors: `(upast, ypast, ufuture, yfuture, [optional sampling_time])` where each array is shaped for efficient batch training. 41 | - ids (np.ndarray): Indices for valid data samples, adjusted to avoid overlap when data is a list of datasets. 42 | ''' 43 | 44 | if T=='sim': 45 | if isinstance(data, (tuple,list)): 46 | assert all(len(data[0])==len(d) for d in data), "if T='sim' then all given datasets need to have the same lenght (you should create the arrays in for loop instead)" 47 | T = len(data[0]) - max(na, nb) 48 | else: 49 | T = len(data) - max(na, nb) 50 | 51 | if isinstance(data, (tuple,list)): 52 | u, y = np.concatenate([di.u for di in data], dtype=np.float32), np.concatenate([di.y for di in data], dtype=np.float32) #this always creates a copy 53 | else: 54 | u, y = data.u.astype(np.float32, copy=False), data.y.astype(np.float32, copy=False) 55 | 56 | def window(x,window_shape=T): 57 | x = np.lib.stride_tricks.sliding_window_view(x, window_shape=window_shape,axis=0, writeable=True) #this windowing function does not increase the amount of data used. 58 | s = (0,len(x.shape)-1) + tuple(range(1,len(x.shape)-1)) 59 | return x.transpose(s) 60 | 61 | npast = max(na, nb) 62 | ufuture = window(u[npast:len(u)], window_shape=T) 63 | yfuture = window(y[npast:len(y)], window_shape=T) 64 | upast = window(u[npast-nb:len(u)-T], window_shape=nb) 65 | ypast = window(y[npast-na:len(y)-T], window_shape=na) 66 | 67 | if isinstance(data, (tuple,list)): 68 | acc_L, ids = 0, [] 69 | for d in data: 70 | assert len(d.u)>=npast+T, f'some dataset was shorter than the length required by {max(na,nb)+T=} {len(d.u)=}' 71 | ids.append(np.arange(0,len(d.u)-npast-T+1, stride) + acc_L) #only add ids which are valid for training (no overlap between the different datasets) 72 | acc_L += len(d.u) 73 | ids = np.concatenate(ids) 74 | else: 75 | ids = np.arange(0, len(data)-npast-T+1, stride) 76 | 77 | s = torch.as_tensor 78 | if not add_sampling_time: 79 | return (s(upast), s(ypast), s(ufuture), s(yfuture)), ids #this could return all the valid indicies 80 | else: 81 | if isinstance(data, (tuple,list)): 82 | sampling_time = torch.cat([torch.as_tensor(d.sampling_time,dtype=torch.float32)*torch.ones(len(d)) for d in data])[:len(upast)] 83 | else: 84 | sampling_time = torch.as_tensor(data.sampling_time,dtype=torch.float32)*torch.ones(len(upast)) 85 | return (s(upast), s(ypast), s(ufuture), sampling_time, s(yfuture)), ids 86 | 87 | def validate_SUBNET_structure(model): 88 | nx, nu, ny, na, nb = model.nx, model.nu, model.ny, model.na, model.nb 89 | v = lambda *size: torch.randn(size) 90 | xtest = v(1,nx) 91 | utest = v(1) if nu=='scalar' else v(1,nu) 92 | upast_test = v(1, nb) if nu=='scalar' else v(1, nb, nu) 93 | ypast_test = v(1, na) if ny=='scalar' else v(1, na, ny) 94 | 95 | with torch.no_grad(): 96 | if isinstance(model, (SUBNET, SUBNET_CT)): 97 | f = model.f if isinstance(model, SUBNET) else model.f_CT 98 | xnext_test = f(xtest, utest) 99 | assert xnext_test.shape==(1,nx), f'f returned the incorrect shape it should be f(x, u).shape==(nbatch=1, nx) but got {xnext_test.shape}' 100 | x_encoded = model.encoder(upast_test, ypast_test) 101 | assert x_encoded.shape==(1,nx), f'encoder returned the incorrect shape it should be model.encoder(upast, ypast).shape==(nbatch=1, nx) but got {x_encoded.shape}' 102 | y_pred = model.h(xtest, utest) if model.feedthrough else model.h(xtest) 103 | assert (y_pred.shape==(1,)) if ny=='scalar' else (y_pred.shape==(1,ny)), f'h returned the incorrect shape it should be model.h(x{", u" if model.feedthrough else ""}).shape==(nbatch=1{"" if ny=="scalar" else ", ny"}) but got {y_pred.shape}' 104 | if isinstance(model, SUBNET_CT): 105 | xnext_test = model.integrator(model.f_CT, xtest, utest, torch.ones((1,))) 106 | assert xnext_test.shape==(1,nx), f'integrator returned the incorrect shape it should be model.integrator(model.f_CT, x, u, Ts).shape==(nbatch=1, nx) but got {xnext_test.shape}' 107 | else: 108 | raise NotImplementedError(f'model validation of type {model} cannot be validated yet') 109 | 110 | ############################## 111 | #### Discrete-Time SUBNET #### 112 | ############################## 113 | # see: https://proceedings.mlr.press/v144/beintema21a/beintema21a.pdf or 114 | # Beintema, Gerben, Roland Toth, and Maarten Schoukens. "Nonlinear state-space identification using deep encoder networks." Learning for dynamics and control. PMLR, 2021. 115 | # see: https://www.sciencedirect.com/science/article/pii/S0005109823003710 116 | # Beintema, Gerben I., Maarten Schoukens, and Roland Tóth. "Deep subspace encoders for nonlinear system identification." Automatica 156 (2023): 111210. 117 | 118 | class SUBNET(nn.Module): 119 | def __init__(self, nu:int|str, ny:int|str, norm : Norm, nx:int=10, nb:int=20, na:int=20, \ 120 | f=None, h=None, encoder=None, feedthrough=False, validate=True) -> None: 121 | super().__init__() 122 | self.nu, self.ny, self.norm, self.nx, self.nb, self.na, self.feedthrough = nu, ny, norm, nx, nb, na, feedthrough 123 | self.f = f if f is not None else norm.f(MLP_res_net(input_size = [nx , nu], output_size = nx)) 124 | self.h = h if h is not None else norm.h(MLP_res_net(input_size = [nx , nu] if feedthrough else nx, output_size = ny)) 125 | self.encoder = encoder if encoder is not None else norm.encoder(MLP_res_net(input_size = [(nb,nu) , (na,ny)], output_size = nx)) 126 | if validate: 127 | validate_SUBNET_structure(self) 128 | 129 | def create_arrays(self, data: Input_output_data | list, T : int=50, stride: int=1): 130 | return past_future_arrays(data, self.na, self.nb, T=T, stride=stride) 131 | 132 | def forward_simple(self, upast: torch.Tensor, ypast: torch.Tensor, ufuture: torch.Tensor, yfuture: torch.Tensor=None): 133 | #is a lot simplier but also about 50% slower 134 | yfuture_sim = [] 135 | x = self.encoder(upast, ypast) 136 | for u in ufuture.swapaxes(0,1): 137 | y = self.h(x,u) if self.feedthrough else self.h(x) 138 | yfuture_sim.append(y) 139 | x = self.f(x,u) 140 | return torch.stack(yfuture_sim, dim=1) 141 | 142 | def forward(self, upast: torch.Tensor, ypast: torch.Tensor, ufuture: torch.Tensor, yfuture: torch.Tensor=None): 143 | B, T = ufuture.shape[:2] 144 | x = self.encoder(upast, ypast) 145 | xfuture = [] 146 | for u in ufuture.swapaxes(0,1): #unroll over time dim 147 | xfuture.append(x) 148 | x = self.f(x,u) 149 | xfuture = torch.stack(xfuture,dim=1) #has shape (Nbatch, Ntime=T, nx) 150 | 151 | #compute output at all the future time indicies at the same time by combining the time and batch dim. 152 | fl = lambda ar: torch.flatten(ar, start_dim=0, end_dim=1) #conbine batch dim and time dim (Nbatch, Ntime, ...) -> (Nbatch*Ntim, ...) 153 | yfuture_sim_flat = self.h(fl(xfuture), fl(ufuture)) if self.feedthrough else self.h(fl(xfuture)) #compute the output for all time and and batches in one go 154 | return torch.unflatten(yfuture_sim_flat, dim=0, sizes=(B,T)) #(Nbatch*T, ...) -> (Nbatch, T, ...) 155 | 156 | def simulate(self, data: Input_output_data | list): 157 | if isinstance(data, (list, tuple)): 158 | return [self.simulate(d) for d in data] 159 | if data.sampling_time!=self.norm.sampling_time: 160 | warn('It seems that the model is being simulated at a different sampling time as it was trained on.') 161 | ysim = self(*past_future_arrays(data, self.na, self.nb, T='sim', add_sampling_time=False)[0])[0].detach().numpy() 162 | return Input_output_data(u=data.u, y=np.concatenate([data.y[:max(self.na, self.nb)],ysim],axis=0), state_initialization_window_length=max(self.na, self.nb)) 163 | 164 | def f_unbached(self, x, u): 165 | return self.f(x[None],u[None])[0] 166 | def h_unbached(self, x, u=None): 167 | return self.h(x[None], u[None])[0] if self.feedthrough else self.h(x[None])[0] 168 | def encoder_unbached(self, upast, ypast): 169 | return self.encoder(upast[None],ypast[None])[0] 170 | 171 | ################################ 172 | #### Continuous-Time SUBNET #### 173 | ################################ 174 | # see: https://arxiv.org/abs/2204.09405 175 | # Beintema, G. I., Schoukens, M., & Tóth, R. (2022). Continuous-time identification of dynamic state-space models by deep subspace encoding. Presented at the 11th International Conference on Learning Representations (ICLR) 176 | 177 | class SUBNET_CT(nn.Module): 178 | #both norm, base_sampling_time have a sample time 179 | def __init__(self, nu, ny, norm:Norm, nx=10, nb=20, na=20, f_CT=None, h=None, encoder=None, integrator=None, feedthrough=False, validate=True) -> None: 180 | super().__init__() 181 | self.nu, self.ny, self.norm, self.nx, self.nb, self.na, self.feedthrough = nu, ny, norm, nx, nb, na, feedthrough 182 | self.f_CT = f_CT if f_CT is not None else norm.f_CT(MLP_res_net(input_size = [nx , nu], output_size = nx), tau=norm.sampling_time*50) 183 | self.h = h if h is not None else norm.h(MLP_res_net(input_size = [nx , nu] if feedthrough else nx, output_size = ny)) 184 | self.encoder = encoder if encoder is not None else norm.encoder(MLP_res_net(input_size = [(nb,nu) , (na,ny)], output_size = nx)) 185 | self.integrator = integrator if integrator is not None else rk4_integrator 186 | if validate: 187 | validate_SUBNET_structure(self) 188 | 189 | def create_arrays(self, data: Input_output_data | list, T : int=50, stride: int=1): 190 | return past_future_arrays(data, self.na, self.nb, T=T, stride=stride, add_sampling_time=True) 191 | 192 | def forward(self, upast: torch.Tensor, ypast: torch.Tensor, ufuture: torch.Tensor, sampling_time : float | torch.Tensor, yfuture: torch.Tensor=None): 193 | B, T = ufuture.shape[:2] 194 | x = self.encoder(upast, ypast) 195 | xfuture = [] 196 | for u in ufuture.swapaxes(0,1): 197 | xfuture.append(x) 198 | x = self.integrator(self.f_CT, x, u, sampling_time) 199 | xfuture = torch.stack(xfuture,dim=1) #has shape (Nbatch, Ntime=T, nx) 200 | 201 | #compute output at all the future time indicies at the same time by combining the time and batch dim. 202 | fl = lambda ar: torch.flatten(ar, start_dim=0, end_dim=1) #conbine batch dim and time dim 203 | yfuture_sim_flat = self.h(fl(xfuture), fl(ufuture)) if self.feedthrough else self.h(fl(xfuture)) #compute the output for all time and and batches in one go 204 | return torch.unflatten(yfuture_sim_flat, dim=0, sizes=(B,T)) #(Nbatch*T) -> (Nbatch, T) 205 | 206 | def simulate(self, data: Input_output_data | list): 207 | if isinstance(data, (list, tuple)): 208 | return [self.simulate(d) for d in data] 209 | if data.sampling_time!=self.norm.sampling_time: 210 | warn('It seems that the model is being simulated at a different sampling time as it was trained on. The encoder currently assumes that the sampling_time is kept constant') 211 | ysim = self(*past_future_arrays(data, self.na, self.nb, T='sim', add_sampling_time=True)[0])[0].detach().numpy() 212 | return Input_output_data(u=data.u, y=np.concatenate([data.y[:max(self.na, self.nb)],ysim],axis=0), state_initialization_window_length=max(self.na, self.nb)) 213 | 214 | def f_CT_unbached(self, x, u): 215 | return self.f_CT(x[None],u[None])[0] 216 | def integrator_unbached(self, f_CT, x, u, sampling_time): 217 | return self.integrator(f_CT, x[None], u[None], sampling_time[None])[0] 218 | def h_unbached(self, x, u=None): 219 | return self.h(x[None], u[None])[0] if self.feedthrough else self.h(x[None])[0] 220 | def encoder_unbached(self, upast, ypast): 221 | return self.encoder(upast[None],ypast[None])[0] 222 | 223 | ############################################### 224 | ### Helper Function for Fully Custom SUBNET ### 225 | ############################################### 226 | 227 | class Custom_SUBNET(nn.Module): 228 | def create_arrays(self, data: Input_output_data | list, T : int=50, stride: int=1): 229 | return past_future_arrays(data, self.na, self.nb, T=T, stride=stride, add_sampling_time=False) 230 | 231 | def simulate(self, data: Input_output_data | list): 232 | if isinstance(data, (list, tuple)): 233 | return [self.simulate(d) for d in data] 234 | ysim = self(*past_future_arrays(data, self.na, self.nb, T='sim', add_sampling_time=False)[0])[0].detach().numpy() 235 | return Input_output_data(u=data.u, y=np.concatenate([data.y[:max(self.na, self.nb)],ysim],axis=0), state_initialization_window_length=max(self.na, self.nb)) 236 | 237 | class Custom_SUBNET_CT(nn.Module): 238 | def create_arrays(self, data: Input_output_data | list, T : int=50, stride: int=1): 239 | return past_future_arrays(data, self.na, self.nb, T=T, stride=stride, add_sampling_time=True) 240 | 241 | def simulate(self, data: Input_output_data | list): 242 | if isinstance(data, (list, tuple)): 243 | return [self.simulate(d) for d in data] 244 | ysim = self(*past_future_arrays(data, self.na, self.nb, T='sim', add_sampling_time=True)[0])[0].detach().numpy() 245 | return Input_output_data(u=data.u, y=np.concatenate([data.y[:max(self.na, self.nb)],ysim],axis=0), state_initialization_window_length=max(self.na, self.nb)) 246 | 247 | def validate_custom_SUBNET_structure(model): 248 | nu, ny, na, nb = model.nu, model.ny, model.na, model.nb 249 | for batch_size in [1,2]: 250 | T = 10 251 | v = lambda *size: torch.randn(size) 252 | upast_test = v(batch_size, nb) if nu=='scalar' else v(batch_size, nb, nu) 253 | ypast_test = v(batch_size, na) if ny=='scalar' else v(batch_size, na, ny) 254 | ufuture_test = v(batch_size, T) if nu=='scalar' else v(batch_size, T, nu) 255 | yfuture_test = v(batch_size, T) if ny=='scalar' else v(batch_size, T, ny) 256 | 257 | with torch.no_grad(): 258 | if isinstance(model, Custom_SUBNET): 259 | yfuture_pred = model(upast_test, ypast_test, ufuture_test, yfuture_test) 260 | else: 261 | yfuture_pred = model(upast_test, ypast_test, ufuture_test, v(batch_size)) 262 | assert yfuture_pred.shape==((batch_size,T) if ny=='scalar' else (batch_size,T,ny)) 263 | 264 | ######################### 265 | ####### SUBNET_LPV ###### 266 | ######################### 267 | # See: https://arxiv.org/abs/2204.04060 268 | # Verhoek, Chris, et al. "Deep-learning-based identification of LPV models for nonlinear systems." 2022 IEEE 61st Conference on Decision and Control (CDC). IEEE, 2022. 269 | 270 | from deepSI.networks import Bilinear 271 | class SUBNET_LPV(Custom_SUBNET): 272 | def __init__(self, nu, ny, norm:Norm, nx, n_schedual, na, nb, scheduling_net=None, A=None, B=None, C=None, D=None, encoder=None, feedthrough=True): 273 | if np.any(10*abs(norm.ymean.numpy())>norm.ystd.numpy()) or np.any(10*abs(norm.umean.numpy())>norm.ustd.numpy()): 274 | from warnings import warn 275 | warn('SUBNET_LPV assumes that the data is approximatly zero mean. Not doing so can lead to unintended behaviour.') 276 | assert isinstance(nu, int) and isinstance(ny, int) and isinstance(n_schedual, int) and feedthrough, 'SUBNET_LPV requires the input, output and schedualing parameter to be vectors and feedthrough to be present' 277 | super().__init__() 278 | self.nu, self.ny, self.norm, self.nx, self.n_schedual, self.na, self.nb, self.feedthrough = nu, ny, norm, nx, n_schedual, na, nb, feedthrough 279 | self.A = A if A is not None else Bilinear(n_in=nx, n_out=nx, n_schedual=n_schedual) 280 | self.B = B if B is not None else Bilinear(n_in=nu, n_out=nx, n_schedual=n_schedual, std_input=norm.ustd) 281 | self.C = C if C is not None else Bilinear(n_in=nx, n_out=ny, n_schedual=n_schedual, std_output=norm.ystd) 282 | self.D = D if D is not None else Bilinear(n_in=nu, n_out=ny, n_schedual=n_schedual, std_output=norm.ystd, std_input=norm.ustd) 283 | self.encoder = encoder if encoder is not None else norm.encoder(MLP_res_net(input_size = [(nb,nu) , (na,ny)], output_size = nx)) 284 | self.scheduling_net = scheduling_net if scheduling_net is not None else norm.f(MLP_res_net(input_size = [nx , nu], output_size = n_schedual)) 285 | validate_custom_SUBNET_structure(self) #does checks if forward is working as intended 286 | 287 | def forward(self, upast: torch.Tensor, ypast: torch.Tensor, ufuture: torch.Tensor, yfuture: torch.Tensor=None): 288 | mv = lambda A, x: torch.bmm(A, x[:, :, None])[:,:,0] #batched matrix vector multiply 289 | yfuture_sim = [] 290 | x = self.encoder(upast, ypast) 291 | for u in ufuture.swapaxes(0,1): #iterate over time 292 | p = self.scheduling_net(x, u) 293 | A, B, C, D = self.A(p), self.B(p), self.C(p), self.D(p) 294 | y = mv(C, x) + mv(D, u) 295 | x = mv(A, x) + mv(B, u) 296 | yfuture_sim.append(y) 297 | return torch.stack(yfuture_sim, dim=1) 298 | 299 | 300 | class SUBNET_LPV_ext_scheduled(SUBNET_LPV): 301 | '''LPV system identification approach LPV_SUBNET with external scheduling as seen in Fig. 2 in https://arxiv.org/pdf/2204.04060''' 302 | def forward(self, upast: torch.Tensor, ypast: torch.Tensor, ufuture: torch.Tensor, yfuture: torch.Tensor): 303 | Nbatch, T = ufuture.shape[:2] 304 | #upasts = [upast_k, upast_k+1, u_past_k+2,...] ect 305 | upasts = torch.cat([upast, ufuture[:,:-1]], dim=1).unfold(1,self.nb,1).permute(0,1,3,2).flatten(start_dim=0, end_dim=1) #(Nbatch * T, nb, nu) 306 | ypasts = torch.cat([ypast, yfuture[:,:-1]], dim=1).unfold(1,self.na,1).permute(0,1,3,2).flatten(start_dim=0, end_dim=1) #(Nbatch * T, na, ny) 307 | 308 | x_long = torch.unflatten(self.encoder(upasts, ypasts), dim=0, sizes=(Nbatch, T)) #use encoder to estimate all the initial state in the future 309 | pfuture = torch.unflatten(self.scheduling_net(x_long.flatten(0,1), ufuture.flatten(0,1)), dim=0, sizes=(Nbatch, T)) #construct scheduling parameters 310 | x = x_long[:,0] #set initial state equal to the first of the inital states computed 311 | 312 | mv = lambda A, x: torch.bmm(A, x[:, :, None])[:,:,0] #batched matrix vector multiply 313 | yfuture_sim = [] 314 | for p, u in zip(pfuture.swapaxes(0,1), ufuture.swapaxes(0,1)): #iterate over time 315 | A, B, C, D = self.A(p), self.B(p), self.C(p), self.D(p) 316 | y = mv(C, x) + mv(D, u) 317 | x = mv(A, x) + mv(B, u) 318 | yfuture_sim.append(y) 319 | return torch.stack(yfuture_sim, dim=1) 320 | 321 | ########################## 322 | ####### CNN_SUBNET ####### 323 | ########################## 324 | # see: https://pure.tue.nl/ws/portalfiles/portal/318935789/20240321_Beintema_hf.pdf Chapter 4 325 | # Beintema, Gerben Izaak. PhD Thesis: "Data–driven Learning of Nonlinear Dynamic Systems: A Deep Neural State–Space Approach." (2024). Chapter 4 326 | 327 | class CNN_SUBNET(SUBNET): 328 | def __init__(self, nu, ny, norm, nx, nb, na): 329 | from deepSI.networks import CNN_vec_to_image, CNN_encoder, MLP_res_net 330 | h = norm.h(CNN_vec_to_image(nx, ny=ny)) 331 | f = norm.f(MLP_res_net(input_size=[nx, nu], output_size=nx)) 332 | encoder = norm.encoder(CNN_encoder(nb, nu, na, ny, nx)) 333 | super().__init__(nu, ny, norm, nx, nb, na, f, h, encoder, validate=False) 334 | 335 | ########################### 336 | ####### pHNN_SUBNET ####### 337 | ########################### 338 | # see: https://arxiv.org/abs/2305.01338 339 | # Moradi, Sarvin, et al. "Physics-Informed Learning Using Hamiltonian Neural Networks with Output Error Noise Models." IFAC-PapersOnLine 56.2 (2023): 5152-5157. 340 | 341 | from deepSI.networks import Ham_converter, ELU_lower_bound, Skew_sym_converter, Sym_pos_semidef_converter, Matrix_converter 342 | class pHNN_SUBNET(Custom_SUBNET_CT): 343 | def __init__(self, nu : int | str, ny: int | str, norm : Norm, nx : int, na : int, nb : int, Hnet : None | nn.Module =None, Jnet : None | nn.Module =None, \ 344 | Rnet : None | nn.Module =None, Gnet : None | nn.Module =None, encoder : None | nn.Module =None, integrator=None, tau : float =None): 345 | super().__init__() 346 | assert nu==ny 347 | self.nu, self.ny, self.norm, self.nx, self.na, self.nb = nu, ny, norm, nx, na, nb 348 | self.Hnet = Ham_converter(ELU_lower_bound(MLP_res_net(nx, 'scalar'))) if Hnet is None else Hnet 349 | self.Jnet = Skew_sym_converter(MLP_res_net(nx, nx*nx)) if Jnet is None else Jnet 350 | self.Rnet = Sym_pos_semidef_converter(MLP_res_net(nx, nx*nx)) if Rnet is None else Rnet 351 | nu_val = 1 if nu=='scalar' else nu 352 | self.Gnet = Matrix_converter(MLP_res_net(nx, nx*nu_val), nrows=nx, ncols=nu_val) if Gnet is None else Gnet 353 | self.integrator = rk4_integrator if integrator is None else integrator 354 | self.encoder = norm.encoder(MLP_res_net(input_size = [(nb,nu) , (na,ny)], output_size = nx)) if encoder is None else encoder 355 | self.norm = norm 356 | self.tau = norm.sampling_time*10 if tau is None else tau 357 | 358 | #validation of structure 359 | for Nbatch in [1,2]: 360 | xtest = torch.randn(Nbatch, nx) 361 | J_x, R_x, G_x, dHdx, H = self.get_matricies(xtest) 362 | nu_val = 1 if nu=='scalar' else nu 363 | assert J_x.shape == (Nbatch, nx, nx), f'Jnet(x) has the incorrect shape, expected (Nbatch={Nbatch}, nx={nx}, nx={nx}) but got Jnet(x).shape={J_x.shape}' 364 | assert R_x.shape == (Nbatch, nx, nx), f'Rnet(x) has the incorrect shape, expected (Nbatch={Nbatch}, nx={nx}, nx={nx}) but got Rnet(x).shape={R_x.shape}' 365 | assert H.shape == (Nbatch,), f'Hnet(x) has the incorrect shape, expected (Nbatch={Nbatch},) but got Hnet(x).shape={H.shape}' 366 | assert G_x.shape == (Nbatch, nx, nu_val), f'Gnet(x) has the incorrect shape, expected (Nbatch={Nbatch}, nx={nx}, nu_val={nu_val}) but got Gnet(x).shape={G_x.shape}' 367 | assert dHdx.shape == (Nbatch, nx), f'dHnet(x)/dx has the incorrect shape, expected (Nbatch={Nbatch}, nx={nx}) but got dHdx.shape={dHdx.shape}' 368 | validate_custom_SUBNET_structure(self) 369 | 370 | def get_matricies(self, x): 371 | with torch.enable_grad(): 372 | if x.requires_grad == False: 373 | x.requires_grad = True 374 | H = self.Hnet(x) 375 | Hsum = H.sum() 376 | dHdx = torch.autograd.grad(Hsum, x, create_graph=True)[0] 377 | 378 | J_x = self.Jnet(x) 379 | R_x = self.Rnet(x) 380 | G_x = self.Gnet(x) 381 | return J_x, R_x, G_x, dHdx, H 382 | 383 | def forward(self, upast, ypast, ufuture, sampling_time, yfuture=None): 384 | x = self.encoder(upast, ypast) 385 | ufuture = (ufuture.view(ufuture.shape[0],ufuture.shape[1],-1)-self.norm.umean)/self.norm.ustd #normalize inputs 386 | yfuture_sim = [] 387 | for u in ufuture.swapaxes(0,1): #if using a 1-step euler this can be reduced further 388 | J_x, R_x, G_x, dHdx, H = self.get_matricies(x) #this can be done outside of the loop for a speedup 389 | y_hat = torch.einsum('bij,bi->bj', G_x, dHdx) #bij,bi->bj = A^T @ dHdx 390 | yfuture_sim.append(y_hat) 391 | def f_CT(x, u): 392 | J_x, R_x, G_x, dHdx, H = self.get_matricies(x) 393 | Gu = torch.einsum('bij,bj->bi', G_x, u) # G_x (Nb, nx, nu) times u (Nb, nu) = (Nb, nx) 394 | return (torch.einsum('bij,bj->bi', J_x - R_x, dHdx) + Gu)/self.tau 395 | 396 | x = self.integrator(f_CT, x, u, sampling_time) 397 | 398 | yfuture_sim = torch.stack(yfuture_sim, dim=1) 399 | yfuture_sim = yfuture_sim[:,:,0] if self.ny=='scalar' else yfuture_sim 400 | return yfuture_sim*self.norm.ystd + self.norm.ymean 401 | 402 | ############################## 403 | ####### Koopman SUBNET ####### 404 | ############################## 405 | # see: https://ieeexplore.ieee.org/abstract/document/9682946 406 | # Iacob, Lucian Cristian, et al. "Deep identification of nonlinear systems in Koopman form." 2021 60th IEEE Conference on Decision and Control (CDC). IEEE, 2021. 407 | 408 | class Koopman_SUBNET(Custom_SUBNET): 409 | '''Implements the following structure 410 | x_next = A@x + B(x)@(u - umean)/ustd 411 | y = (C@x) * ystd + ymean 412 | 413 | if feedthrough: y = (C@x) * ystd + ymean + (D@(u - umean)/ustd) * ystd + ymean 414 | if B_depends_on_u: x_next = A@x + B(x, u)@(u - umean)/ustd 415 | 416 | ''' 417 | def __init__(self, nu, ny, norm : Norm, nx, nb, na, encoder=None, A=None, Bnet=None, C=None, D=None, B_depends_on_u=False, feedthrough=False): 418 | super().__init__() 419 | from deepSI.networks import Matrix_converter, MLP_res_net 420 | self.nu, self.ny, self.norm, self.nx, self.na, self.nb, self.feedthrough, self.B_depends_on_u = nu, ny, norm, nx, na, nb, feedthrough, B_depends_on_u 421 | self.nu_vals = 1 if nu=='scalar' else nu 422 | self.ny_vals = 1 if ny=='scalar' else ny 423 | self.encoder = norm.encoder(MLP_res_net(input_size = [(nb,nu) , (na,ny)], output_size = nx)) if encoder is None else encoder 424 | 425 | self.A = nn.Parameter(torch.randn((nx,nx))/(2*nx**0.5)) if A==None else A 426 | self.Bnet = Matrix_converter(MLP_res_net([nx, nu] if B_depends_on_u else nx, nx*self.nu_vals), nrows=nx, ncols=self.nu_vals) if Bnet==None else Bnet 427 | self.C = nn.Parameter(torch.randn((self.ny_vals,nx))/(2*nx**0.5)) if C==None else C 428 | if feedthrough: 429 | self.D = nn.Parameter(torch.randn((self.ny_vals,self.nu_vals))/(2*self.nu_vals**0.5)) if D==None else D 430 | else: 431 | self.D = None 432 | 433 | def forward(self, upast: torch.Tensor, ypast: torch.Tensor, ufuture: torch.Tensor, yfuture: torch.Tensor=None): 434 | mv = lambda A, x: torch.bmm(A, x[:, :, None])[:,:,0] #batched matrix vector multiply 435 | yfuture_sim = [] 436 | x = self.encoder(upast, ypast) #initial state 437 | Nbatch = upast.shape[0] 438 | ufuture = (ufuture - self.norm.umean)/self.norm.ustd # Normalize input 439 | ufuture = ufuture.view(Nbatch, ufuture.shape[1], -1) # Convert all the u from scalars to vectors if needed 440 | # Add batch dimension to matrixes 441 | A = torch.broadcast_to(self.A, (Nbatch, self.nx, self.nx)) 442 | C = torch.broadcast_to(self.C, (Nbatch, self.ny_vals, self.nx)) 443 | D = None if self.feedthrough==False else torch.broadcast_to(self.D, (Nbatch, self.ny_vals, self.nu_vals)) 444 | for u in ufuture.swapaxes(0,1): #iterate over time 445 | y = mv(C, x) + (0 if self.feedthrough==False else mv(D, u)) 446 | yfuture_sim.append(y) 447 | B = self.Bnet(x) if self.B_depends_on_u==False else self.Bnet(x, u[:,0] if self.nu=='scalar' else u) #removes the vector dim if it is scalar 448 | x = mv(A,x) + mv(B, u) 449 | yfuture_sim = torch.stack(yfuture_sim, dim=1) 450 | if self.ny=='scalar': 451 | yfuture_sim = yfuture_sim[:,:,0] 452 | return yfuture_sim*self.norm.ystd + self.norm.ymean 453 | --------------------------------------------------------------------------------