├── .gitattributes ├── engression-python ├── engression │ ├── data │ │ ├── __init__.py │ │ ├── loader.py │ │ └── simulator.py │ ├── __init__.py │ ├── utils.py │ ├── loss_func.py │ ├── engression.py │ └── models.py ├── requirements.txt ├── examples │ ├── .DS_Store │ └── example_air.ipynb ├── setup.py ├── LICENSE └── README.md ├── engression-r ├── LICENSE ├── NAMESPACE ├── man │ ├── dftomat.Rd │ ├── energyloss.Rd │ ├── print.engression.Rd │ ├── energylossbeta.Rd │ ├── energylossall.Rd │ ├── print.engressionBagged.Rd │ ├── engressionfit.Rd │ ├── predict.engression.Rd │ ├── predict.engressionBagged.Rd │ ├── engression.Rd │ └── engressionBagged.Rd ├── R │ ├── dftomat.R │ ├── energyloss.R │ ├── energylossbeta.R │ ├── energylossall.R │ ├── print.engression.R │ ├── print.engressionBagged.R │ ├── predict.engression.R │ ├── predict.engressionBagged.R │ ├── engressionfit.R │ ├── engression.R │ └── engressionBagged.R └── DESCRIPTION ├── LICENSE ├── .gitignore └── README.md /.gitattributes: -------------------------------------------------------------------------------- 1 | *.ipynb linguist-detectable=false -------------------------------------------------------------------------------- /engression-python/engression/data/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /engression-python/requirements.txt: -------------------------------------------------------------------------------- 1 | torch 2 | numpy 3 | matplotlib -------------------------------------------------------------------------------- /engression-r/LICENSE: -------------------------------------------------------------------------------- 1 | YEAR: 2023 2 | COPYRIGHT HOLDER: Xinwei Shen and Nicolai Meinshausen 3 | -------------------------------------------------------------------------------- /engression-python/examples/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xwshen51/engression/HEAD/engression-python/examples/.DS_Store -------------------------------------------------------------------------------- /engression-r/NAMESPACE: -------------------------------------------------------------------------------- 1 | import(torch) 2 | importFrom("stats", "predict", "quantile", "rnorm", "sd") 3 | S3method(predict,engression) 4 | S3method(predict,engressionBagged) 5 | S3method(print,engression) 6 | S3method(print,engressionBagged) 7 | export(engression) 8 | export(engressionBagged) 9 | -------------------------------------------------------------------------------- /engression-python/engression/__init__.py: -------------------------------------------------------------------------------- 1 | from .engression import engression 2 | 3 | try: 4 | # pylint: disable=wrong-import-position 5 | import torch 6 | except ModuleNotFoundError: 7 | raise ModuleNotFoundError( 8 | "No module named 'torch', and engression depends on PyTorch (aka 'torch')." 9 | "Visit https://pytorch.org/ for installation instructions.") 10 | -------------------------------------------------------------------------------- /engression-r/man/dftomat.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dftomat.R 3 | \name{dftomat} 4 | \alias{dftomat} 5 | \title{Convert Data Frame to Numeric Matrix} 6 | \usage{ 7 | dftomat(X) 8 | } 9 | \arguments{ 10 | \item{X}{A data frame to be converted to a numeric matrix.} 11 | } 12 | \value{ 13 | A numeric matrix corresponding to the input data frame. 14 | } 15 | \description{ 16 | This function converts a data frame into a numeric matrix. If the data frame 17 | contains factor or character variables, they are first converted to numeric. 18 | } 19 | \keyword{internal} 20 | -------------------------------------------------------------------------------- /engression-python/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | 4 | with open('README.md') as f: 5 | long_description = f.read() 6 | 7 | with open('requirements.txt') as f: 8 | install_requires = [l.strip() for l in f] 9 | 10 | 11 | setup( 12 | name='engression', 13 | version='0.1.14', 14 | description='Engression Modelling', 15 | url='https://github.com/xwshen51/engression', 16 | author='Xinwei Shen and Nicolai Meinshausen', 17 | author_email='xinwei.shen@stat.math.ethz.ch', 18 | install_requires=install_requires, 19 | long_description=long_description, 20 | long_description_content_type="text/markdown", 21 | packages=find_packages(), 22 | license="BSD 3-Clause License", 23 | ) -------------------------------------------------------------------------------- /engression-r/R/dftomat.R: -------------------------------------------------------------------------------- 1 | #' Convert Data Frame to Numeric Matrix 2 | #' 3 | #' This function converts a data frame into a numeric matrix. If the data frame 4 | #' contains factor or character variables, they are first converted to numeric. 5 | #' 6 | #' @param X A data frame to be converted to a numeric matrix. 7 | #' 8 | #' @return A numeric matrix corresponding to the input data frame. 9 | #' 10 | #' 11 | #' @keywords internal 12 | #' 13 | dftomat <- function(X){ 14 | X <- data.frame(lapply(X, function(x){ 15 | if (is.factor(x)){ 16 | as.numeric(as.character(x)) 17 | }else if(is.character(x)){ 18 | as.numeric(as.factor(x)) 19 | }else{ 20 | as.numeric(x) 21 | } 22 | })) 23 | X = as.matrix(X) 24 | return(X) 25 | } 26 | -------------------------------------------------------------------------------- /engression-r/R/energyloss.R: -------------------------------------------------------------------------------- 1 | #' Energy Loss Calculation 2 | #' 3 | #' This function calculates the energy loss for given tensors. The loss is calculated 4 | #' as the mean of the L2 norms between `yt` and `mxt` and between `yt` and `mxpt`, 5 | #' subtracted by half the mean of the L2 norm between `mxt` and `mxpt`. 6 | #' 7 | #' @param yt A tensor representing the target values. 8 | #' @param mxt A tensor representing the model's stochastic predictions. 9 | #' @param mxpt A tensor representing another draw of the model's stochastic predictions. 10 | #' 11 | #' @return A scalar representing the calculated energy loss. 12 | #' 13 | #' 14 | #' @keywords internal 15 | #' 16 | energyloss <- function(yt,mxt,mxpt){ 17 | s1 = torch_mean(torch_norm(yt - mxt, 2, dim=2)) / 2 + torch_mean(torch_norm(yt - mxpt, 2, dim=2)) / 2 18 | s2 = torch_mean(torch_norm(mxt - mxpt, 2, dim=2)) 19 | return (s1 - s2/2) 20 | } 21 | -------------------------------------------------------------------------------- /engression-r/man/energyloss.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/energyloss.R 3 | \name{energyloss} 4 | \alias{energyloss} 5 | \title{Energy Loss Calculation} 6 | \usage{ 7 | energyloss(yt, mxt, mxpt) 8 | } 9 | \arguments{ 10 | \item{yt}{A tensor representing the target values.} 11 | 12 | \item{mxt}{A tensor representing the model's stochastic predictions.} 13 | 14 | \item{mxpt}{A tensor representing another draw of the model's stochastic predictions.} 15 | } 16 | \value{ 17 | A scalar representing the calculated energy loss. 18 | } 19 | \description{ 20 | This function calculates the energy loss for given tensors. The loss is calculated 21 | as the mean of the L2 norms between \code{yt} and \code{mxt} and between \code{yt} and \code{mxpt}, 22 | subtracted by half the mean of the L2 norm between \code{mxt} and \code{mxpt}. 23 | } 24 | \keyword{internal} 25 | -------------------------------------------------------------------------------- /engression-r/man/print.engression.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/print.engression.R 3 | \name{print.engression} 4 | \alias{print.engression} 5 | \title{Print an Engression Model Object} 6 | \usage{ 7 | \method{print}{engression}(x, ...) 8 | } 9 | \arguments{ 10 | \item{x}{A trained engression model returned from the engressionfit function.} 11 | 12 | \item{...}{additional arguments (currently ignored)} 13 | } 14 | \value{ 15 | This function does not return anything. It prints a summary of the model, 16 | including information about its architecture and training process, and the loss 17 | values achieved at several epochs during training. 18 | } 19 | \description{ 20 | This function is a utility that displays a summary of a fitted Engression model object. 21 | } 22 | \examples{ 23 | \donttest{ 24 | n = 1000 25 | p = 5 26 | 27 | X = matrix(rnorm(n*p),ncol=p) 28 | Y = (X[,1]+rnorm(n)*0.1)^2 + (X[,2]+rnorm(n)*0.1) + rnorm(n)*0.1 29 | 30 | ## fit engression object 31 | engr = engression(X,Y) 32 | print(engr) 33 | } 34 | 35 | } 36 | -------------------------------------------------------------------------------- /engression-r/man/energylossbeta.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/energylossbeta.R 3 | \name{energylossbeta} 4 | \alias{energylossbeta} 5 | \title{Energy Loss Calculation with Beta Scaling} 6 | \usage{ 7 | energylossbeta(yt, mxt, mxpt, beta) 8 | } 9 | \arguments{ 10 | \item{yt}{A tensor representing the target values.} 11 | 12 | \item{mxt}{A tensor representing the model's stochastic predictions.} 13 | 14 | \item{mxpt}{A tensor representing another draw of the model's stochastic predictions.} 15 | 16 | \item{beta}{A numeric value for scaling the energy loss.} 17 | } 18 | \value{ 19 | A scalar representing the calculated energy loss. 20 | } 21 | \description{ 22 | This function calculates the energy loss for given tensors. The loss is calculated 23 | as the mean of the L2 norms between \code{yt} and \code{mxt} and between \code{yt} and \code{mxpt}, 24 | each raised to the power of \code{beta}, subtracted by half the mean of the L2 norm between \code{mxt} and \code{mxpt}, 25 | also raised to the power of \code{beta}. 26 | } 27 | \keyword{internal} 28 | -------------------------------------------------------------------------------- /engression-r/R/energylossbeta.R: -------------------------------------------------------------------------------- 1 | #' Energy Loss Calculation with Beta Scaling 2 | #' 3 | #' This function calculates the energy loss for given tensors. The loss is calculated 4 | #' as the mean of the L2 norms between `yt` and `mxt` and between `yt` and `mxpt`, 5 | #' each raised to the power of `beta`, subtracted by half the mean of the L2 norm between `mxt` and `mxpt`, 6 | #' also raised to the power of `beta`. 7 | #' 8 | #' @param yt A tensor representing the target values. 9 | #' @param mxt A tensor representing the model's stochastic predictions. 10 | #' @param mxpt A tensor representing another draw of the model's stochastic predictions. 11 | #' @param beta A numeric value for scaling the energy loss. 12 | #' 13 | #' @return A scalar representing the calculated energy loss. 14 | #' 15 | #' @keywords internal 16 | #' 17 | energylossbeta <- function(yt,mxt,mxpt,beta){ 18 | s1 = torch_pow(torch_mean(torch_norm(yt - mxt, 2, dim=2)),beta) / 2 + torch_pow(torch_mean(torch_norm(yt - mxpt, 2, dim=2)),beta) / 2 19 | s2 = torch_pow(torch_mean(torch_norm(mxt - mxpt, 2, dim=2)),beta) 20 | return (s1 - s2/2) 21 | } 22 | -------------------------------------------------------------------------------- /engression-r/R/energylossall.R: -------------------------------------------------------------------------------- 1 | #' Energy Loss Calculation (Extended Output) 2 | #' 3 | #' This function calculates the energy loss for given tensors, similar to `energyloss()`. The loss is calculated 4 | #' as the mean of the L2 norms between `yt` and `mxt` and between `yt` and `mxpt`, 5 | #' subtracted by half the mean of the L2 norm between `mxt` and `mxpt`. Unlike `energyloss()`, this function 6 | #' also returns the prediction loss s1 = E(|yt-mxt|) and variance loss s2 = E(|mxt-mxpt'|) as part of the output. 7 | #' 8 | #' @param yt A tensor representing the target values. 9 | #' @param mxt A tensor representing the model's stochastic predictions. 10 | #' @param mxpt A tensor representing another draw of the model's stochastic predictions. 11 | #' 12 | #' @return A vector containing the calculated energy loss, `s1`, and `s2`. 13 | #' 14 | #' 15 | #' @keywords internal 16 | energylossall <- function(yt,mxt,mxpt){ 17 | s1 = torch_mean(torch_norm(yt - mxt, 2, dim=2)) / 2 + torch_mean(torch_norm(yt - mxpt, 2, dim=2)) / 2 18 | s2 = torch_mean(torch_norm(mxt - mxpt, 2, dim=2)) 19 | return (c((s1 - s2/2),s1,s2)) 20 | } 21 | -------------------------------------------------------------------------------- /engression-r/man/energylossall.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/energylossall.R 3 | \name{energylossall} 4 | \alias{energylossall} 5 | \title{Energy Loss Calculation (Extended Output)} 6 | \usage{ 7 | energylossall(yt, mxt, mxpt) 8 | } 9 | \arguments{ 10 | \item{yt}{A tensor representing the target values.} 11 | 12 | \item{mxt}{A tensor representing the model's stochastic predictions.} 13 | 14 | \item{mxpt}{A tensor representing another draw of the model's stochastic predictions.} 15 | } 16 | \value{ 17 | A vector containing the calculated energy loss, \code{s1}, and \code{s2}. 18 | } 19 | \description{ 20 | This function calculates the energy loss for given tensors, similar to \code{energyloss()}. The loss is calculated 21 | as the mean of the L2 norms between \code{yt} and \code{mxt} and between \code{yt} and \code{mxpt}, 22 | subtracted by half the mean of the L2 norm between \code{mxt} and \code{mxpt}. Unlike \code{energyloss()}, this function 23 | also returns the prediction loss s1 = E(|yt-mxt|) and variance loss s2 = E(|mxt-mxpt'|) as part of the output. 24 | } 25 | \keyword{internal} 26 | -------------------------------------------------------------------------------- /engression-r/DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: engression 2 | Title: Engression Modelling 3 | Version: 0.1.3 4 | Authors@R: c(person("Xinwei", "Shen", role = c("aut"), email = "xinwei.shen@stat.math.ethz.ch"), person("Nicolai", "Meinshausen", role = c("aut", "cre"), email = "meinshausen@stat.math.ethz.ch")) 5 | Description: Fits engression models for nonlinear distributional regression. Predictors and targets can be univariate or multivariate. Functionality includes estimation of conditional mean, estimation of conditional quantiles, or sampling from the fitted distribution. Training is done full-batch on CPU (the python version offers GPU-accelerated stochastic gradient descent). Based on "Engression: Extrapolation for nonlinear regression?" by Xinwei Shen and Nicolai Meinshausen (2023) . 6 | URL: https://github.com/xwshen51/engression/ 7 | BugReports: https://github.com/xwshen51/engression/issues 8 | License: MIT + file LICENSE 9 | Encoding: UTF-8 10 | Roxygen: list(markdown = TRUE) 11 | RoxygenNote: 7.2.3 12 | Imports: torch 13 | NeedsCompilation: no 14 | Packaged: 2023-09-15 12:19:37 UTC; nicolai 15 | Author: Xinwei Shen [aut], 16 | Nicolai Meinshausen [aut, cre] 17 | Maintainer: Nicolai Meinshausen 18 | -------------------------------------------------------------------------------- /engression-r/man/print.engressionBagged.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/print.engressionBagged.R 3 | \name{print.engressionBagged} 4 | \alias{print.engressionBagged} 5 | \title{Print a Bagged Engression Model Object} 6 | \usage{ 7 | \method{print}{engressionBagged}(x, ...) 8 | } 9 | \arguments{ 10 | \item{x}{A trained bagged engression model object returned from 11 | the engressionBagged function.} 12 | 13 | \item{...}{additional arguments (currently ignored)} 14 | } 15 | \value{ 16 | This function does not return anything. It prints a summary of the 17 | model, including the architecture of the individual models, the number 18 | of models in the bagged ensemble, and the loss values achieved at several 19 | epochs during training. 20 | } 21 | \description{ 22 | This function displays a summary of a bagged Engression model object. The 23 | summary includes details about the individual models as well as the overall 24 | ensemble. 25 | } 26 | \examples{ 27 | \donttest{ 28 | n = 1000 29 | p = 5 30 | X = matrix(rnorm(n*p),ncol=p) 31 | Y = (X[,1]+rnorm(n)*0.1)^2 + (X[,2]+rnorm(n)*0.1) + rnorm(n)*0.1 32 | 33 | ## fit bagged engression object 34 | engb = engressionBagged(X,Y,K=3) 35 | print(engb) 36 | 37 | } 38 | 39 | } 40 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2023, Xinwei Shen and Nicolai Meinshausen 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, this 9 | list of conditions and the following disclaimer. 10 | 11 | 2. Redistributions in binary form must reproduce the above copyright notice, 12 | this list of conditions and the following disclaimer in the documentation 13 | and/or other materials provided with the distribution. 14 | 15 | 3. Neither the name of the copyright holder nor the names of its 16 | contributors may be used to endorse or promote products derived from 17 | this software without specific prior written permission. 18 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 23 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | -------------------------------------------------------------------------------- /engression-python/LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2023, Xinwei Shen and Nicolai Meinshausen 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, this 9 | list of conditions and the following disclaimer. 10 | 11 | 2. Redistributions in binary form must reproduce the above copyright notice, 12 | this list of conditions and the following disclaimer in the documentation 13 | and/or other materials provided with the distribution. 14 | 15 | 3. Neither the name of the copyright holder nor the names of its 16 | contributors may be used to endorse or promote products derived from 17 | this software without specific prior written permission. 18 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 23 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | -------------------------------------------------------------------------------- /engression-r/R/print.engression.R: -------------------------------------------------------------------------------- 1 | #' Print an Engression Model Object 2 | #' 3 | #' This function is a utility that displays a summary of a fitted Engression model object. 4 | #' 5 | #' @param x A trained engression model returned from the engressionfit function. 6 | #' @param ... additional arguments (currently ignored) 7 | #' 8 | #' @return This function does not return anything. It prints a summary of the model, 9 | #' including information about its architecture and training process, and the loss 10 | #' values achieved at several epochs during training. 11 | #' 12 | #' @examples 13 | #' \donttest{ 14 | #' n = 1000 15 | #' p = 5 16 | #' 17 | #' X = matrix(rnorm(n*p),ncol=p) 18 | #' Y = (X[,1]+rnorm(n)*0.1)^2 + (X[,2]+rnorm(n)*0.1) + rnorm(n)*0.1 19 | #' 20 | #' ## fit engression object 21 | #' engr = engression(X,Y) 22 | #' print(engr) 23 | #' } 24 | #' 25 | #' @export 26 | print.engression <- function(x, ...){ 27 | cat("\n engression object with ") 28 | cat("\n \t noise dimensions: ",x$noise_dim) 29 | cat("\n \t hidden dimensions: ",x$hidden_dim) 30 | cat("\n \t number of layers: ",x$num_layer) 31 | cat("\n \t dropout rate: ",x$dropout) 32 | cat("\n \t batch normalization: ",x$batch_norm) 33 | cat("\n \t number of epochs: ",x$num_epochs) 34 | cat("\n \t learning rate: ",x$lr) 35 | cat("\n \t standardization: ",x$standardize) 36 | 37 | m = nrow(x$lossvec) 38 | printat = pmax(1, floor(seq(1,m, length=11))) 39 | pr = cbind(printat, x$lossvec[printat,]) 40 | colnames(pr) = c("epoch", colnames(x$lossvec)) 41 | cat("\n training loss: \n") 42 | print(pr) 43 | cat("\n prediction-loss E(|Y-Yhat|) and variance-loss E(|Yhat-Yhat'|)should ideally be equally large --\n consider training for more epochs if there is a mismatch \n\n") 44 | 45 | } 46 | -------------------------------------------------------------------------------- /engression-r/man/engressionfit.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/engressionfit.R 3 | \name{engressionfit} 4 | \alias{engressionfit} 5 | \title{Engression Fit Function} 6 | \usage{ 7 | engressionfit( 8 | X, 9 | Y, 10 | noise_dim = 100, 11 | hidden_dim = 100, 12 | num_layer = 3, 13 | dropout = 0.01, 14 | batch_norm = TRUE, 15 | num_epochs = 200, 16 | lr = 10^(-3), 17 | beta = 1, 18 | silent = FALSE 19 | ) 20 | } 21 | \arguments{ 22 | \item{X}{A matrix or data frame representing the predictors.} 23 | 24 | \item{Y}{A matrix representing the target variable(s).} 25 | 26 | \item{noise_dim}{The dimension of the noise introduced in the model (default: 100).} 27 | 28 | \item{hidden_dim}{The size of the hidden layer in the model (default: 100).} 29 | 30 | \item{num_layer}{The number of layers in the model (default: 3).} 31 | 32 | \item{dropout}{The dropout rate to be used in the model in case no batch normalization is used (default: 0.01)} 33 | 34 | \item{batch_norm}{A boolean indicating whether to use batch-normalization (default: TRUE).} 35 | 36 | \item{num_epochs}{The number of epochs to be used in training (default: 200).} 37 | 38 | \item{lr}{The learning rate to be used in training (default: 10^-3).} 39 | 40 | \item{beta}{The beta scaling factor for energy loss (default: 1).} 41 | 42 | \item{silent}{A boolean indicating whether to suppress output during model training (default: FALSE).} 43 | } 44 | \value{ 45 | A list containing the trained engression model and a vector of loss values. 46 | } 47 | \description{ 48 | This function fits an Engression model to the provided data. It allows for the tuning of 49 | several parameters related to model complexity and training. The function is not meant to 50 | be exported but can be used within the package or for internal testing purposes. 51 | } 52 | \keyword{internal} 53 | -------------------------------------------------------------------------------- /engression-r/R/print.engressionBagged.R: -------------------------------------------------------------------------------- 1 | #' Print a Bagged Engression Model Object 2 | #' 3 | #' This function displays a summary of a bagged Engression model object. The 4 | #' summary includes details about the individual models as well as the overall 5 | #' ensemble. 6 | #' 7 | #' @param x A trained bagged engression model object returned from 8 | #' the engressionBagged function. 9 | #' @param ... additional arguments (currently ignored) 10 | #' 11 | #' @return This function does not return anything. It prints a summary of the 12 | #' model, including the architecture of the individual models, the number 13 | #' of models in the bagged ensemble, and the loss values achieved at several 14 | #' epochs during training. 15 | #' 16 | #' @examples 17 | #' \donttest{ 18 | #' n = 1000 19 | #' p = 5 20 | #' X = matrix(rnorm(n*p),ncol=p) 21 | #' Y = (X[,1]+rnorm(n)*0.1)^2 + (X[,2]+rnorm(n)*0.1) + rnorm(n)*0.1 22 | #' 23 | #' ## fit bagged engression object 24 | #' engb = engressionBagged(X,Y,K=3) 25 | #' print(engb) 26 | #' 27 | #' } 28 | #' 29 | #' @export 30 | print.engressionBagged <- function(x, ...){ 31 | cat("\n bagged engression object with", length(x$models), "models") 32 | cat("\n \t noise dimensions: ",x$noise_dim) 33 | cat("\n \t hidden dimensions: ",x$hidden_dim) 34 | cat("\n \t number of layers: ",x$num_layer) 35 | cat("\n \t dropout rate: ",x$dropout) 36 | cat("\n \t batch normalization: ",x$batch_norm) 37 | cat("\n \t number of epochs: ",x$num_epochs) 38 | cat("\n \t learning rate: ",x$lr) 39 | cat("\n \t standardization: ",x$standardize) 40 | 41 | avloss = Reduce("+",lapply(x$models, function(x) x$lossvec))/length(x$models) 42 | m = nrow(avloss) 43 | printat = pmax(1,floor((seq(1,m, length=11)))) 44 | pr = cbind(printat, avloss[printat,]) 45 | colnames(pr) = c("epoch", colnames(avloss)) 46 | cat("\n average training loss : \n") 47 | print(pr) 48 | cat("\n prediction-loss E(|Y-Yhat|) and variance-loss E(|Yhat-Yhat'|)should ideally be equally large --\n consider training for more epochs if there is a mismatch \n\n") 49 | 50 | } 51 | -------------------------------------------------------------------------------- /engression-python/engression/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | 4 | def vectorize(x, multichannel=False): 5 | """Vectorize data in any shape. 6 | 7 | Args: 8 | x (torch.Tensor): input data 9 | multichannel (bool, optional): whether to keep the multiple channels (in the second dimension). Defaults to False. 10 | 11 | Returns: 12 | torch.Tensor: data of shape (sample_size, dimension) or (sample_size, num_channel, dimension) if multichannel is True. 13 | """ 14 | if len(x.shape) == 1: 15 | return x.unsqueeze(1) 16 | if len(x.shape) == 2: 17 | return x 18 | else: 19 | if not multichannel: # one channel 20 | return x.reshape(x.shape[0], -1) 21 | else: # multi-channel 22 | return x.reshape(x.shape[0], x.shape[1], -1) 23 | 24 | def cor(x, y): 25 | """Compute the correlation between two signals. 26 | 27 | Args: 28 | x (torch.Tensor): input data 29 | y (torch.Tensor): input data 30 | 31 | Returns: 32 | torch.Tensor: correlation between x and y 33 | """ 34 | x = vectorize(x) 35 | y = vectorize(y) 36 | x = x - x.mean(0) 37 | y = y - y.mean(0) 38 | return ((x * y).mean()) / (x.std(unbiased=False) * y.std(unbiased=False)) 39 | 40 | def make_folder(name): 41 | """Make a folder. 42 | 43 | Args: 44 | name (str): folder name. 45 | """ 46 | if not os.path.exists(name): 47 | print('Creating folder: {}'.format(name)) 48 | os.makedirs(name) 49 | 50 | def check_for_gpu(device): 51 | """Check if a CUDA device is available. 52 | 53 | Args: 54 | device (torch.device): current set device. 55 | """ 56 | if device.type == "cuda": 57 | if torch.cuda.is_available(): 58 | print("GPU is available, running on GPU.\n") 59 | else: 60 | print("GPU is NOT available, running instead on CPU.\n") 61 | else: 62 | if torch.cuda.is_available(): 63 | print("Warning: You have a CUDA device, so you may consider using GPU for potential acceleration\n by setting device to 'cuda'.\n") 64 | else: 65 | print("Running on CPU.\n") 66 | -------------------------------------------------------------------------------- /engression-python/engression/data/loader.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.utils.data import TensorDataset, DataLoader 3 | 4 | def make_dataloader(x, y=None, batch_size=128, shuffle=True, num_workers=0): 5 | """Make dataloader. 6 | 7 | Args: 8 | x (torch.Tensor): data of predictors. 9 | y (torch.Tensor): data of responses. 10 | batch_size (int, optional): batch size. Defaults to 128. 11 | shuffle (bool, optional): whether to shuffle data. Defaults to True. 12 | num_workers (int, optional): number of workers. Defaults to 0. 13 | 14 | Returns: 15 | DataLoader: data loader 16 | """ 17 | if y is None: 18 | dataset = TensorDataset(x) 19 | else: 20 | dataset = TensorDataset(x, y) 21 | dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers) 22 | return dataloader 23 | 24 | def partition_data(x_full, y_full, cut_quantile=0.3, split_train="smaller"): 25 | """Partition data into training and test sets. 26 | 27 | Args: 28 | x_full (torch.Tensor): full data of x. 29 | y_full (torch.Tensor): full data of y. 30 | cut_quantile (float, optional): quantile of the cutting point of x. Defaults to 0.3. 31 | split_train (str, optional): which subset is used for for training. choices=["smaller", "larger"]. Defaults to "smaller". 32 | 33 | Returns: 34 | tuple of torch.Tensors: training and test data. 35 | """ 36 | # Split data into training and test sets. 37 | x_cut = torch.quantile(x_full, cut_quantile) 38 | train_idx = x_full <= x_cut if split_train == "smaller" else x_full >= x_cut 39 | x_tr = x_full[train_idx] 40 | y_tr = y_full[train_idx] 41 | x_te = x_full[~train_idx] 42 | y_te = y_full[~train_idx] 43 | 44 | # Standardize data based on training statistics. 45 | x_tr_mean = x_tr.mean() 46 | x_tr_std = x_tr.std() 47 | y_tr_mean = y_tr.mean() 48 | y_tr_std = y_tr.std() 49 | x_tr = (x_tr - x_tr_mean)/x_tr_std 50 | y_tr = (y_tr - y_tr_mean)/y_tr_std 51 | x_te = (x_te - x_tr_mean)/x_tr_std 52 | y_te = (y_te - y_tr_mean)/y_tr_std 53 | x_full_normal = (x_full - x_tr_mean)/x_tr_std 54 | return x_tr.unsqueeze(1), y_tr.unsqueeze(1), x_te.unsqueeze(1), y_te.unsqueeze(1), x_full_normal -------------------------------------------------------------------------------- /engression-r/man/predict.engression.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/predict.engression.R 3 | \name{predict.engression} 4 | \alias{predict.engression} 5 | \title{Prediction Function for Engression Models} 6 | \usage{ 7 | \method{predict}{engression}( 8 | object, 9 | Xtest, 10 | type = c("mean", "sample", "quantile")[1], 11 | trim = 0.05, 12 | quantiles = 0.1 * (1:9), 13 | nsample = 200, 14 | drop = TRUE, 15 | ... 16 | ) 17 | } 18 | \arguments{ 19 | \item{object}{A trained engression model returned from engression, engressionBagged or engressionfit functions.} 20 | 21 | \item{Xtest}{A matrix or data frame representing the predictors in the test set.} 22 | 23 | \item{type}{The type of prediction to make. "mean" for point estimates, "sample" for samples from the estimated distribution, 24 | or "quantile" for quantiles of the estimated distribution (default: "mean").} 25 | 26 | \item{trim}{The proportion of extreme values to trim when calculating the mean (default: 0.05).} 27 | 28 | \item{quantiles}{The quantiles to estimate if type is "quantile" (default: 0.1*(1:9)).} 29 | 30 | \item{nsample}{The number of samples to draw if type is "sample" (default: 200).} 31 | 32 | \item{drop}{A boolean indicating whether to drop dimensions of length 1 from the output (default: TRUE).} 33 | 34 | \item{...}{additional arguments (currently ignored)} 35 | } 36 | \value{ 37 | A matrix or array of predictions. 38 | } 39 | \description{ 40 | This function computes predictions from a trained engression model. It allows for the generation of point estimates, quantiles, 41 | or samples from the estimated distribution. 42 | } 43 | \examples{ 44 | \donttest{ 45 | n = 1000 46 | p = 5 47 | 48 | X = matrix(rnorm(n*p),ncol=p) 49 | Y = (X[,1]+rnorm(n)*0.1)^2 + (X[,2]+rnorm(n)*0.1) + rnorm(n)*0.1 50 | Xtest = matrix(rnorm(n*p),ncol=p) 51 | Ytest = (Xtest[,1]+rnorm(n)*0.1)^2 + (Xtest[,2]+rnorm(n)*0.1) + rnorm(n)*0.1 52 | 53 | ## fit engression object 54 | engr = engression(X,Y) 55 | print(engr) 56 | 57 | ## prediction on test data 58 | Yhat = predict(engr,Xtest,type="mean") 59 | cat("\n correlation between predicted and realized values: ", signif(cor(Yhat, Ytest),3)) 60 | plot(Yhat, Ytest,xlab="prediction", ylab="observation") 61 | 62 | ## quantile prediction 63 | Yhatquant = predict(engr,Xtest,type="quantiles") 64 | ord = order(Yhat) 65 | matplot(Yhat[ord], Yhatquant[ord,], type="l", col=2,lty=1,xlab="prediction", ylab="observation") 66 | points(Yhat[ord],Ytest[ord],pch=20,cex=0.5) 67 | 68 | ## sampling from estimated model 69 | Ysample = predict(engr,Xtest,type="sample",nsample=1) 70 | 71 | } 72 | 73 | 74 | } 75 | -------------------------------------------------------------------------------- /engression-python/engression/data/simulator.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import numpy as np 4 | 5 | 6 | def preanm_simulator(true_function="softplus", n=10000, x_lower=0, x_upper=2, noise_std=1, noise_dist="gaussian", train=True, device=torch.device("cpu")): 7 | """Data simulator for a pre-additive noise model (pre-ANM). 8 | 9 | Args: 10 | true_function (str, optional): true function g^\star. Defaults to "softplus". Choices: ["softplus", "cubic","square", "log"]. 11 | n (int, optional): sample size. Defaults to 10000. 12 | x_lower (int, optional): lower bound of the training support. Defaults to 0. 13 | x_upper (int, optional): upper bound of the training support. Defaults to 2. 14 | noise_std (int, optional): standard deviation of the noise. Defaults to 1. 15 | noise_dist (str, optional): noise distribution. Defaults to "gaussian". Choices: ["gaussian", "uniform"]. 16 | train (bool, optional): generate data for training. Defaults to True. 17 | device (str or torch.device, optional): device. Defaults to torch.device("cpu"). 18 | 19 | Returns: 20 | tuple of torch.Tensors: data simulated from a pre-ANM. 21 | """ 22 | if isinstance(true_function, str): 23 | if true_function == "softplus": 24 | true_function = lambda x: nn.Softplus()(x) 25 | elif true_function == "cubic": 26 | true_function = lambda x: x.pow(3)/3 27 | elif true_function == "square": 28 | true_function = lambda x: (nn.functional.relu(x)).pow(2)/2 29 | elif true_function == "log": 30 | true_function = lambda x: (x/3 + np.log(3) - 2/3)*(x <= 2) + (torch.log(1 + x*(x > 2)))*(x > 2) 31 | 32 | if isinstance(device, str): 33 | device = torch.device(device) 34 | 35 | if train: 36 | x = torch.rand(n, 1)*(x_upper - x_lower) + x_lower 37 | if noise_dist == "gaussian": 38 | eps = torch.randn(n, 1)*noise_std 39 | else: 40 | assert noise_dist == "uniform" 41 | eps = (torch.rand(n, 1) - 0.5)*noise_std*np.sqrt(12) 42 | xn = x + eps 43 | y = true_function(xn) 44 | return x.to(device), y.to(device) 45 | 46 | else: 47 | x_eval = torch.linspace(x_lower, x_upper, n).unsqueeze(1) 48 | y_eval_med = true_function(x_eval) 49 | gen_sample_size = 10000 50 | x_rep = torch.repeat_interleave(x_eval, (gen_sample_size * torch.ones(n)).long(), dim=0) 51 | x_rep = x_rep + torch.randn(x_rep.size(0), 1)*noise_std 52 | y_eval_mean = true_function(x_rep) 53 | y_eval_mean = list(torch.split(y_eval_mean, gen_sample_size)) 54 | y_eval_mean = torch.cat([y_eval_mean[i].mean().unsqueeze(0) for i in range(n)], dim=0).unsqueeze(1) 55 | return x_eval.to(device), y_eval_med.to(device), y_eval_mean.to(device) 56 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Initially taken from Github's Python gitignore file 2 | 3 | # Byte-compiled / optimized / DLL files 4 | __pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | _build 8 | 9 | # C extensions 10 | *.so 11 | 12 | # tests and logs 13 | tests/fixtures/cached_*_text.txt 14 | logs/ 15 | lightning_logs/ 16 | lang_code_data/ 17 | log/ 18 | regression_test/*/new_output_models 19 | regression_test/*/new_log 20 | output_dir/ 21 | 22 | # data files 23 | data/ 24 | 25 | # output models 26 | output_models/ 27 | 28 | # Distribution / packaging 29 | .Python 30 | build/ 31 | develop-eggs/ 32 | dist/ 33 | downloads/ 34 | eggs/ 35 | .eggs/ 36 | lib/ 37 | lib64/ 38 | parts/ 39 | sdist/ 40 | var/ 41 | wheels/ 42 | *.egg-info/ 43 | .installed.cfg 44 | *.egg 45 | MANIFEST 46 | 47 | # PyInstaller 48 | # Usually these files are written by a python script from a template 49 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 50 | *.manifest 51 | *.spec 52 | 53 | # Installer logs 54 | pip-log.txt 55 | pip-delete-this-directory.txt 56 | 57 | # Unit test / coverage reports 58 | htmlcov/ 59 | .tox/ 60 | .nox/ 61 | .coverage 62 | .coverage.* 63 | .cache 64 | nosetests.xml 65 | coverage.xml 66 | *.cover 67 | .hypothesis/ 68 | .pytest_cache/ 69 | 70 | # Translations 71 | *.mo 72 | *.pot 73 | 74 | # Django stuff: 75 | *.log 76 | local_settings.py 77 | db.sqlite3 78 | 79 | # Flask stuff: 80 | instance/ 81 | .webassets-cache 82 | 83 | # Scrapy stuff: 84 | .scrapy 85 | 86 | # Sphinx documentation 87 | docs/_build/ 88 | 89 | # PyBuilder 90 | target/ 91 | 92 | # Jupyter Notebook 93 | .ipynb_checkpoints 94 | 95 | # IPython 96 | profile_default/ 97 | ipython_config.py 98 | 99 | # pyenv 100 | .python-version 101 | 102 | # celery beat schedule file 103 | celerybeat-schedule 104 | 105 | # SageMath parsed files 106 | *.sage.py 107 | 108 | # Environments 109 | .env 110 | .venv 111 | env/ 112 | venv/ 113 | ENV/ 114 | env.bak/ 115 | venv.bak/ 116 | 117 | # Spyder project settings 118 | .spyderproject 119 | .spyproject 120 | 121 | # Rope project settings 122 | .ropeproject 123 | 124 | # mkdocs documentation 125 | /site 126 | 127 | # mypy 128 | .mypy_cache/ 129 | .dmypy.json 130 | dmypy.json 131 | 132 | # Pyre type checker 133 | .pyre/ 134 | 135 | # vscode 136 | .vs 137 | .vscode 138 | 139 | # Pycharm 140 | .idea 141 | 142 | # TF code 143 | tensorflow_code 144 | 145 | # Models 146 | proc_data 147 | 148 | # examples 149 | runs 150 | /runs_old 151 | /wandb 152 | /examples/runs 153 | /examples/**/*.args 154 | /examples/rag/sweep 155 | 156 | # data 157 | # /data 158 | serialization_dir 159 | 160 | # emacs 161 | *.*~ 162 | debug.env 163 | 164 | # vim 165 | .*.swp 166 | 167 | #ctags 168 | tags 169 | 170 | # pre-commit 171 | .pre-commit* 172 | 173 | # .lock 174 | *.lock 175 | 176 | # DS_Store (MacOS) 177 | .DS_Store 178 | 179 | # ruff 180 | .ruff_cache 181 | 182 | # lm_evaluation cache 183 | lm_cache/ 184 | 185 | 186 | .codegpt -------------------------------------------------------------------------------- /engression-r/man/predict.engressionBagged.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/predict.engressionBagged.R 3 | \name{predict.engressionBagged} 4 | \alias{predict.engressionBagged} 5 | \title{Prediction Function for Bagged Engression Models} 6 | \usage{ 7 | \method{predict}{engressionBagged}( 8 | object, 9 | Xtest = NULL, 10 | type = c("mean", "sample", "quantile")[1], 11 | trim = 0.05, 12 | quantiles = 0.1 * (1:9), 13 | nsample = 200, 14 | drop = TRUE, 15 | ... 16 | ) 17 | } 18 | \arguments{ 19 | \item{object}{A trained bagged engression model returned from the engressionBagged function.} 20 | 21 | \item{Xtest}{A matrix or data frame representing the predictors in the test set. If NULL, out-of-bag samples from the training 22 | set are used for prediction (default: NULL).} 23 | 24 | \item{type}{The type of prediction to make. "mean" for point estimates, "sample" for samples from the estimated distribution, 25 | or "quantile" for quantiles of the estimated distribution (default: "mean").} 26 | 27 | \item{trim}{The proportion of extreme values to trim when calculating the mean (default: 0.05).} 28 | 29 | \item{quantiles}{The quantiles to estimate if type is "quantile" (default: 0.1*(1:9)).} 30 | 31 | \item{nsample}{The number of samples to draw if type is "sample" (default: 200).} 32 | 33 | \item{drop}{A boolean indicating whether to drop dimensions of length 1 from the output (default: TRUE).} 34 | 35 | \item{...}{additional arguments (currently ignored)} 36 | } 37 | \value{ 38 | A matrix or array of predictions. 39 | #' 40 | } 41 | \description{ 42 | This function computes predictions from a trained bagged Engression model. It allows for the generation of point estimates, 43 | quantiles, or samples from the estimated distribution. 44 | } 45 | \examples{ 46 | \donttest{ 47 | n = 1000 48 | p = 5 49 | X = matrix(rnorm(n*p),ncol=p) 50 | Y = (X[,1]+rnorm(n)*0.1)^2 + (X[,2]+rnorm(n)*0.1) + rnorm(n)*0.1 51 | Xtest = matrix(rnorm(n*p),ncol=p) 52 | Ytest = (Xtest[,1]+rnorm(n)*0.1)^2 + (Xtest[,2]+rnorm(n)*0.1) + rnorm(n)*0.1 53 | 54 | ## fit bagged engression object 55 | engb = engressionBagged(X,Y,K=3) 56 | print(engb) 57 | 58 | ## prediction on test data 59 | Yhat = predict(engb,Xtest,type="mean") 60 | cat("\n correlation between predicted and realized values: ", signif(cor(Yhat, Ytest),3)) 61 | plot(Yhat, Ytest,xlab="estimated conditional mean", ylab="observation") 62 | 63 | ## out-of-bag prediction 64 | Yhat_oob = predict(engb,type="mean") 65 | cat("\n correlation between predicted and realized values on oob data: ") 66 | print(signif(cor(Yhat_oob, Y),3)) 67 | plot(Yhat_oob, Y,xlab="estimated conditional mean", ylab="observation") 68 | 69 | ## quantile prediction 70 | Yhatquant = predict(engb,Xtest,type="quantiles") 71 | ord = order(Yhat) 72 | matplot(Yhat[ord], Yhatquant[ord,], type="l", col=2,lty=1,xlab="prediction", ylab="observation") 73 | points(Yhat[ord],Ytest[ord],pch=20,cex=0.5) 74 | 75 | ## sampling from estimated model 76 | Ysample = predict(engb,Xtest,type="sample",nsample=1) 77 | 78 | } 79 | 80 | } 81 | -------------------------------------------------------------------------------- /engression-r/man/engression.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/engression.R 3 | \name{engression} 4 | \alias{engression} 5 | \title{Engression Function} 6 | \usage{ 7 | engression( 8 | X, 9 | Y, 10 | noise_dim = 5, 11 | hidden_dim = 100, 12 | num_layer = 3, 13 | dropout = 0.05, 14 | batch_norm = TRUE, 15 | num_epochs = 1000, 16 | lr = 10^(-3), 17 | beta = 1, 18 | silent = FALSE, 19 | standardize = TRUE 20 | ) 21 | } 22 | \arguments{ 23 | \item{X}{A matrix or data frame representing the predictors.} 24 | 25 | \item{Y}{A matrix or vector representing the target variable(s).} 26 | 27 | \item{noise_dim}{The dimension of the noise introduced in the model (default: 5).} 28 | 29 | \item{hidden_dim}{The size of the hidden layer in the model (default: 100).} 30 | 31 | \item{num_layer}{The number of layers in the model (default: 3).} 32 | 33 | \item{dropout}{The dropout rate to be used in the model in case no batch normalization is used (default: 0.01)} 34 | 35 | \item{batch_norm}{A boolean indicating whether to use batch-normalization (default: TRUE).} 36 | 37 | \item{num_epochs}{The number of epochs to be used in training (default: 1000).} 38 | 39 | \item{lr}{The learning rate to be used in training (default: 10^-3).} 40 | 41 | \item{beta}{The beta scaling factor for energy loss (default: 1).} 42 | 43 | \item{silent}{A boolean indicating whether to suppress output during model training (default: FALSE).} 44 | 45 | \item{standardize}{A boolean indicating whether to standardize the input data (default: TRUE).} 46 | } 47 | \value{ 48 | An engression model object with class "engression". 49 | } 50 | \description{ 51 | This function fits an engression model to the data. It allows for 52 | the tuning of several parameters related to model complexity. 53 | Variables are per default internally standardized (predictions are on original scale). 54 | } 55 | \examples{ 56 | \donttest{ 57 | n = 1000 58 | p = 5 59 | 60 | X = matrix(rnorm(n*p),ncol=p) 61 | Y = (X[,1]+rnorm(n)*0.1)^2 + (X[,2]+rnorm(n)*0.1) + rnorm(n)*0.1 62 | Xtest = matrix(rnorm(n*p),ncol=p) 63 | Ytest = (Xtest[,1]+rnorm(n)*0.1)^2 + (Xtest[,2]+rnorm(n)*0.1) + rnorm(n)*0.1 64 | 65 | ## fit engression object 66 | engr = engression(X,Y) 67 | print(engr) 68 | 69 | ## prediction on test data 70 | Yhat = predict(engr,Xtest,type="mean") 71 | cat("\n correlation between predicted and realized values: ", signif(cor(Yhat, Ytest),3)) 72 | plot(Yhat, Ytest,xlab="prediction", ylab="observation") 73 | 74 | ## quantile prediction 75 | Yhatquant = predict(engr,Xtest,type="quantiles") 76 | ord = order(Yhat) 77 | matplot(Yhat[ord], Yhatquant[ord,], type="l", col=2,lty=1,xlab="prediction", ylab="observation") 78 | points(Yhat[ord],Ytest[ord],pch=20,cex=0.5) 79 | 80 | ## sampling from estimated model 81 | Ysample = predict(engr,Xtest,type="sample",nsample=1) 82 | 83 | ## plot of realized values against first variable 84 | oldpar <- par() 85 | par(mfrow=c(1,2)) 86 | plot(Xtest[,1], Ytest, xlab="Variable 1", ylab="Observation") 87 | ## plot of sampled values against first variable 88 | plot(Xtest[,1], Ysample, xlab="Variable 1", ylab="Sample from engression model") 89 | par(oldpar) 90 | } 91 | 92 | } 93 | -------------------------------------------------------------------------------- /engression-python/engression/loss_func.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from .utils import vectorize 3 | from torch.linalg import vector_norm 4 | 5 | 6 | def energy_loss(x_true, x_est, beta=1, verbose=True): 7 | """Loss function based on the energy score. 8 | 9 | Args: 10 | x_true (torch.Tensor): iid samples from the true distribution of shape (data_size, data_dim) 11 | x_est (list of torch.Tensor): 12 | - a list of length sample_size, where each element is a tensor of shape (data_size, data_dim) that contains one sample for each data point from the estimated distribution, or 13 | - a tensor of shape (data_size*sample_size, response_dim) such that x_est[data_size*(i-1):data_size*i,:] contains one sample for each data point, for i = 1, ..., sample_size. 14 | beta (float): power parameter in the energy score. 15 | verbose (bool): whether to return two terms of the loss. 16 | 17 | Returns: 18 | loss (torch.Tensor): energy loss. 19 | """ 20 | EPS = 0 if float(beta).is_integer() else 1e-5 21 | x_true = vectorize(x_true).unsqueeze(1) 22 | if not isinstance(x_est, list): 23 | x_est = list(torch.split(x_est, x_true.shape[0], dim=0)) 24 | m = len(x_est) 25 | x_est = [vectorize(x_est[i]).unsqueeze(1) for i in range(m)] 26 | x_est = torch.cat(x_est, dim=1) 27 | 28 | s1 = (vector_norm(x_est - x_true, 2, dim=2) + EPS).pow(beta).mean() 29 | s2 = (torch.cdist(x_est, x_est, 2) + EPS).pow(beta).mean() * m / (m - 1) 30 | if verbose: 31 | return torch.cat([(s1 - s2 / 2).reshape(1), s1.reshape(1), s2.reshape(1)], dim=0) 32 | else: 33 | return (s1 - s2 / 2) 34 | 35 | 36 | def energy_loss_two_sample(x0, x, xp, x0p=None, beta=1, verbose=True, weights=None): 37 | """Loss function based on the energy score (estimated based on two samples). 38 | 39 | Args: 40 | x0 (torch.Tensor): an iid sample from the true distribution. 41 | x (torch.Tensor): an iid sample from the estimated distribution. 42 | xp (torch.Tensor): another iid sample from the estimated distribution. 43 | xp0 (torch.Tensor): another iid sample from the true distribution. 44 | beta (float): power parameter in the energy score. 45 | verbose (bool): whether to return two terms of the loss. 46 | 47 | Returns: 48 | loss (torch.Tensor): energy loss. 49 | """ 50 | EPS = 0 if float(beta).is_integer() else 1e-5 51 | x0 = vectorize(x0) 52 | x = vectorize(x) 53 | xp = vectorize(xp) 54 | if weights is None: 55 | weights = 1 / x0.size(0) 56 | if x0p is None: 57 | s1 = ((vector_norm(x - x0, 2, dim=1) + EPS).pow(beta) * weights).sum() / 2 + ((vector_norm(xp - x0, 2, dim=1) + EPS).pow(beta) * weights).sum() / 2 58 | s2 = ((vector_norm(x - xp, 2, dim=1) + EPS).pow(beta) * weights).sum() 59 | loss = s1 - s2/2 60 | else: 61 | x0p = vectorize(x0p) 62 | s1 = ((vector_norm(x - x0, 2, dim=1) + EPS).pow(beta).sum() + (vector_norm(xp - x0, 2, dim=1) + EPS).pow(beta).sum() + 63 | (vector_norm(x - x0p, 2, dim=1) + EPS).pow(beta).sum() + (vector_norm(xp - x0p, 2, dim=1) + EPS).pow(beta).sum()) / 4 64 | s2 = (vector_norm(x - xp, 2, dim=1) + EPS).pow(beta).sum() 65 | s3 = (vector_norm(x0 - x0p, 2, dim=1) + EPS).pow(beta).sum() 66 | loss = s1 - s2/2 - s3/2 67 | if verbose: 68 | return torch.cat([loss.reshape(1), s1.reshape(1), s2.reshape(1)], dim=0) 69 | else: 70 | return loss 71 | -------------------------------------------------------------------------------- /engression-r/R/predict.engression.R: -------------------------------------------------------------------------------- 1 | #' Prediction Function for Engression Models 2 | #' 3 | #' This function computes predictions from a trained engression model. It allows for the generation of point estimates, quantiles, 4 | #' or samples from the estimated distribution. 5 | #' 6 | #' @param object A trained engression model returned from engression, engressionBagged or engressionfit functions. 7 | #' @param Xtest A matrix or data frame representing the predictors in the test set. 8 | #' @param type The type of prediction to make. "mean" for point estimates, "sample" for samples from the estimated distribution, 9 | #' or "quantile" for quantiles of the estimated distribution (default: "mean"). 10 | #' @param trim The proportion of extreme values to trim when calculating the mean (default: 0.05). 11 | #' @param quantiles The quantiles to estimate if type is "quantile" (default: 0.1*(1:9)). 12 | #' @param nsample The number of samples to draw if type is "sample" (default: 200). 13 | #' @param drop A boolean indicating whether to drop dimensions of length 1 from the output (default: TRUE). 14 | #' @param ... additional arguments (currently ignored) 15 | #' 16 | #' @return A matrix or array of predictions. 17 | #' 18 | #' @examples 19 | #' \donttest{ 20 | #' n = 1000 21 | #' p = 5 22 | #' 23 | #' X = matrix(rnorm(n*p),ncol=p) 24 | #' Y = (X[,1]+rnorm(n)*0.1)^2 + (X[,2]+rnorm(n)*0.1) + rnorm(n)*0.1 25 | #' Xtest = matrix(rnorm(n*p),ncol=p) 26 | #' Ytest = (Xtest[,1]+rnorm(n)*0.1)^2 + (Xtest[,2]+rnorm(n)*0.1) + rnorm(n)*0.1 27 | #' 28 | #' ## fit engression object 29 | #' engr = engression(X,Y) 30 | #' print(engr) 31 | #' 32 | #' ## prediction on test data 33 | #' Yhat = predict(engr,Xtest,type="mean") 34 | #' cat("\n correlation between predicted and realized values: ", signif(cor(Yhat, Ytest),3)) 35 | #' plot(Yhat, Ytest,xlab="prediction", ylab="observation") 36 | #' 37 | #' ## quantile prediction 38 | #' Yhatquant = predict(engr,Xtest,type="quantiles") 39 | #' ord = order(Yhat) 40 | #' matplot(Yhat[ord], Yhatquant[ord,], type="l", col=2,lty=1,xlab="prediction", ylab="observation") 41 | #' points(Yhat[ord],Ytest[ord],pch=20,cex=0.5) 42 | #' 43 | #' ## sampling from estimated model 44 | #' Ysample = predict(engr,Xtest,type="sample",nsample=1) 45 | #' 46 | #' } 47 | #' 48 | #' 49 | #' @export 50 | predict.engression <- function(object, Xtest, type=c("mean","sample","quantile")[1],trim=0.05, quantiles=0.1*(1:9), nsample=200, drop=TRUE, ...){ 51 | 52 | if (is.data.frame(Xtest)) Xtest = dftomat(Xtest) 53 | if (is.vector(Xtest) && is.numeric(Xtest)) Xtest <- matrix(Xtest, ncol = 1) 54 | 55 | if(object$standardize){ 56 | Xtest = sweep(sweep(Xtest,2,object$muX,FUN="-"),2,object$sddX,FUN="/") 57 | } 58 | 59 | Yhat1 = object$engressor(Xtest) 60 | Yhat = array(dim=c(dim(Yhat1)[1], dim(Yhat1)[2], nsample)) 61 | for (sam in 1:nsample) Yhat[, ,sam] = if(!object$standardize) object$engressor(Xtest) else sweep(sweep(object$engressor(Xtest),2,object$sddY,FUN="*"),2,object$muY,FUN="+") 62 | 63 | if(type=="sample") dimnames(Yhat)[[3]] = paste("sample_",1:nsample,sep="") 64 | if(type=="mean") Yhat = apply(Yhat,1:(length(dim(Yhat))-1), mean,trim=trim) 65 | if(type %in% c("quantile","quantiles")){ 66 | if(length(quantiles)==1){ 67 | Yhat = apply(Yhat,1:(length(dim(Yhat))-1), quantile, quantiles) 68 | }else{ 69 | Yhat = aperm( apply(Yhat,1:(length(dim(Yhat))-1), quantile, quantiles), if(length(dim(Yhat)==3)) c(2,3,1) else c(2,1) ) 70 | } 71 | } 72 | 73 | return(if(drop) drop(Yhat) else Yhat) 74 | 75 | } 76 | -------------------------------------------------------------------------------- /engression-r/man/engressionBagged.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/engressionBagged.R 3 | \name{engressionBagged} 4 | \alias{engressionBagged} 5 | \title{Bagged Engression Function} 6 | \usage{ 7 | engressionBagged( 8 | X, 9 | Y, 10 | K = 5, 11 | keepoutbag = TRUE, 12 | noise_dim = 10, 13 | hidden_dim = 100, 14 | num_layer = 3, 15 | dropout = 0.05, 16 | batch_norm = TRUE, 17 | num_epochs = 1000, 18 | lr = 10^(-3), 19 | beta = 1, 20 | silent = FALSE, 21 | standardize = TRUE 22 | ) 23 | } 24 | \arguments{ 25 | \item{X}{A matrix or data frame representing the predictors.} 26 | 27 | \item{Y}{A matrix or vector representing the target variable(s).} 28 | 29 | \item{K}{The number of bagged models to fit (default: 5).} 30 | 31 | \item{keepoutbag}{A boolean indicating whether to keep the out-of-bag samples and training data (default: TRUE).} 32 | 33 | \item{noise_dim}{The dimension of the noise introduced in the model (default: 10).} 34 | 35 | \item{hidden_dim}{The size of the hidden layer in the model (default: 100).} 36 | 37 | \item{num_layer}{The number of layers in the model (default: 3).} 38 | 39 | \item{dropout}{The dropout rate to be used in the model (default: 0.05).} 40 | 41 | \item{batch_norm}{A boolean indicating whether to use batch-normalization (default: TRUE).} 42 | 43 | \item{num_epochs}{The number of epochs to be used in training (default: 1000).} 44 | 45 | \item{lr}{The learning rate to be used in training (default: 10^-3).} 46 | 47 | \item{beta}{The beta scaling factor for energy loss (default: 1).} 48 | 49 | \item{silent}{A boolean indicating whether to suppress output during model training (default: FALSE).} 50 | 51 | \item{standardize}{A boolean indicating whether to standardize the input data (default: TRUE).} 52 | } 53 | \value{ 54 | A bagged engression model object with class "engressionBagged". 55 | } 56 | \description{ 57 | This function fits a bagged engression model to the data by fitting multiple 58 | engression models to subsamples of the data. It allows for the tuning of several parameters 59 | related to model complexity. 60 | } 61 | \examples{ 62 | \donttest{ 63 | n = 1000 64 | p = 5 65 | X = matrix(rnorm(n*p),ncol=p) 66 | Y = (X[,1]+rnorm(n)*0.1)^2 + (X[,2]+rnorm(n)*0.1) + rnorm(n)*0.1 67 | Xtest = matrix(rnorm(n*p),ncol=p) 68 | Ytest = (Xtest[,1]+rnorm(n)*0.1)^2 + (Xtest[,2]+rnorm(n)*0.1) + rnorm(n)*0.1 69 | 70 | ## fit bagged engression object 71 | engb = engressionBagged(X,Y,K=3) 72 | print(engb) 73 | 74 | ## prediction on test data 75 | Yhat = predict(engb,Xtest,type="mean") 76 | cat("\n correlation between predicted and realized values: ", signif(cor(Yhat, Ytest),3)) 77 | plot(Yhat, Ytest,xlab="estimated conditional mean", ylab="observation") 78 | 79 | ## out-of-bag prediction 80 | Yhat_oob = predict(engb,type="mean") 81 | cat("\n correlation between predicted and realized values on oob data: ") 82 | print(signif(cor(Yhat_oob, Y),3)) 83 | plot(Yhat_oob, Y,xlab="prediction", ylab="observation") 84 | 85 | ## quantile prediction 86 | Yhatquant = predict(engb,Xtest,type="quantiles") 87 | ord = order(Yhat) 88 | matplot(Yhat[ord], Yhatquant[ord,], type="l", col=2,lty=1,xlab="prediction", ylab="observation") 89 | points(Yhat[ord],Ytest[ord],pch=20,cex=0.5) 90 | 91 | ## sampling from estimated model 92 | Ysample = predict(engb,Xtest,type="sample",nsample=1) 93 | 94 | ## plot of realized values against first variable 95 | oldpar <- par() 96 | par(mfrow=c(1,2)) 97 | plot(Xtest[,1], Ytest, xlab="Variable 1", ylab="Observation") 98 | ## plot of sampled values against first variable 99 | plot(Xtest[,1], Ysample[,1], xlab="Variable 1", ylab="Sample from engression model") 100 | par(oldpar) 101 | } 102 | 103 | } 104 | -------------------------------------------------------------------------------- /engression-python/README.md: -------------------------------------------------------------------------------- 1 | # Engression 2 | 3 | Engression is a neural network-based distributional regression method proposed in the paper "[*Engression: Extrapolation through the Lens of Distributional Regression?*](https://arxiv.org/abs/2307.00835)" by Xinwei Shen and Nicolai Meinshausen (2023). This repository contains the software implementations of engression in both R and Python. 4 | 5 | Consider targets $Y\in\mathbb{R}^k$ and predictors $X\in\mathbb{R}^d$; both variables can be univariate or multivariate, continuous or discrete. Engression can be used to 6 | * estimate the conditional mean $\mathbb{E}[Y|X=x]$ (as in least-squares regression), 7 | * estimate the conditional quantiles of $Y$ given $X=x$ (as in quantile regression), and 8 | * sample from the fitted conditional distribution of $Y$ given $X=x$ (as a generative model). 9 | 10 | The results in the paper show the advantages of engression over existing regression approaches in terms of extrapolation. 11 | 12 | 13 | ## Installation 14 | The latest release of the Python package can be installed through pip: 15 | ```sh 16 | pip install engression 17 | ``` 18 | 19 | The development version can be installed from github: 20 | 21 | ```sh 22 | pip install -e "git+https://github.com/xwshen51/engression#egg=engression&subdirectory=engression-python" 23 | ``` 24 | 25 | 26 | ## Usage Example 27 | 28 | ### Python 29 | 30 | Below is one simple demonstration. See [this tutorial](https://github.com/xwshen51/engression/blob/main/engression-python/examples/example_simu.ipynb) for more details on simulated data and [this tutorial](https://github.com/xwshen51/engression/blob/main/engression-python/examples/example_air.ipynb) for a real data example. We demonstrate in [another tutorial](https://github.com/xwshen51/engression/blob/main/engression-python/examples/example_bag.ipynb) how to fit a bagged engression model, which also helps with hyperparameter tuning. 31 | ```python 32 | from engression import engression 33 | from engression.data.simulator import preanm_simulator 34 | 35 | ## Simulate data 36 | x, y = preanm_simulator("square", n=10000, x_lower=0, x_upper=2, noise_std=1, train=True, device=device) 37 | x_eval, y_eval_med, y_eval_mean = preanm_simulator("square", n=1000, x_lower=0, x_upper=4, noise_std=1, train=False, device=device) 38 | 39 | ## Fit an engression model 40 | engressor = engression(x, y, lr=0.01, num_epochs=500, batch_size=1000, device="cuda") 41 | ## Summarize model information 42 | engressor.summary() 43 | 44 | ## Evaluation 45 | print("L2 loss:", engressor.eval_loss(x_eval, y_eval_mean, loss_type="l2")) 46 | print("correlation between predicted and true means:", engressor.eval_loss(x_eval, y_eval_mean, loss_type="cor")) 47 | 48 | ## Predictions 49 | y_pred_mean = engressor.predict(x_eval, target="mean") ## for the conditional mean 50 | y_pred_med = engressor.predict(x_eval, target="median") ## for the conditional median 51 | y_pred_quant = engressor.predict(x_eval, target=[0.025, 0.5, 0.975]) ## for the conditional 2.5% and 97.5% quantiles 52 | ``` 53 | 54 | 55 | ## Contact information 56 | If you meet any problems with the code, please submit an issue or contact [Xinwei Shen](mailto:xinwei.shen@stat.math.ethz.ch). 57 | 58 | 59 | ## Citation 60 | If you would refer to or extend our work, please cite the following paper: 61 | ``` 62 | @article{10.1093/jrsssb/qkae108, 63 | author = {Shen, Xinwei and Meinshausen, Nicolai}, 64 | title = {Engression: extrapolation through the lens of distributional regression}, 65 | journal = {Journal of the Royal Statistical Society Series B: Statistical Methodology}, 66 | pages = {qkae108}, 67 | year = {2024}, 68 | month = {11}, 69 | issn = {1369-7412}, 70 | doi = {10.1093/jrsssb/qkae108}, 71 | url = {https://doi.org/10.1093/jrsssb/qkae108}, 72 | eprint = {https://academic.oup.com/jrsssb/advance-article-pdf/doi/10.1093/jrsssb/qkae108/60827977/qkae108.pdf}, 73 | } 74 | ``` -------------------------------------------------------------------------------- /engression-r/R/predict.engressionBagged.R: -------------------------------------------------------------------------------- 1 | #' Prediction Function for Bagged Engression Models 2 | #' 3 | #' This function computes predictions from a trained bagged Engression model. It allows for the generation of point estimates, 4 | #' quantiles, or samples from the estimated distribution. 5 | #' 6 | #' @param object A trained bagged engression model returned from the engressionBagged function. 7 | #' @param Xtest A matrix or data frame representing the predictors in the test set. If NULL, out-of-bag samples from the training 8 | #' set are used for prediction (default: NULL). 9 | #' @param type The type of prediction to make. "mean" for point estimates, "sample" for samples from the estimated distribution, 10 | #' or "quantile" for quantiles of the estimated distribution (default: "mean"). 11 | #' @param trim The proportion of extreme values to trim when calculating the mean (default: 0.05). 12 | #' @param quantiles The quantiles to estimate if type is "quantile" (default: 0.1*(1:9)). 13 | #' @param nsample The number of samples to draw if type is "sample" (default: 200). 14 | #' @param drop A boolean indicating whether to drop dimensions of length 1 from the output (default: TRUE). 15 | #' @param ... additional arguments (currently ignored) 16 | #' 17 | #' @return A matrix or array of predictions. 18 | #'#' 19 | #' @examples 20 | #' \donttest{ 21 | #' n = 1000 22 | #' p = 5 23 | #' X = matrix(rnorm(n*p),ncol=p) 24 | #' Y = (X[,1]+rnorm(n)*0.1)^2 + (X[,2]+rnorm(n)*0.1) + rnorm(n)*0.1 25 | #' Xtest = matrix(rnorm(n*p),ncol=p) 26 | #' Ytest = (Xtest[,1]+rnorm(n)*0.1)^2 + (Xtest[,2]+rnorm(n)*0.1) + rnorm(n)*0.1 27 | #' 28 | #' ## fit bagged engression object 29 | #' engb = engressionBagged(X,Y,K=3) 30 | #' print(engb) 31 | #' 32 | #' ## prediction on test data 33 | #' Yhat = predict(engb,Xtest,type="mean") 34 | #' cat("\n correlation between predicted and realized values: ", signif(cor(Yhat, Ytest),3)) 35 | #' plot(Yhat, Ytest,xlab="estimated conditional mean", ylab="observation") 36 | #' 37 | #' ## out-of-bag prediction 38 | #' Yhat_oob = predict(engb,type="mean") 39 | #' cat("\n correlation between predicted and realized values on oob data: ") 40 | #' print(signif(cor(Yhat_oob, Y),3)) 41 | #' plot(Yhat_oob, Y,xlab="estimated conditional mean", ylab="observation") 42 | #' 43 | #' ## quantile prediction 44 | #' Yhatquant = predict(engb,Xtest,type="quantiles") 45 | #' ord = order(Yhat) 46 | #' matplot(Yhat[ord], Yhatquant[ord,], type="l", col=2,lty=1,xlab="prediction", ylab="observation") 47 | #' points(Yhat[ord],Ytest[ord],pch=20,cex=0.5) 48 | #' 49 | #' ## sampling from estimated model 50 | #' Ysample = predict(engb,Xtest,type="sample",nsample=1) 51 | #' 52 | #' } 53 | #' 54 | #' @export 55 | predict.engressionBagged <- function(object, Xtest=NULL, type=c("mean","sample","quantile")[1],trim=0.05, quantiles=0.1*(1:9), nsample=200, drop=TRUE, ...){ 56 | useoob=FALSE 57 | if(is.null(Xtest)){ 58 | useoob = TRUE 59 | if(!is.null(object$Xtrain)) Xtest = object$Xtrain else stop("if Xtest is not provided, need to set keepoutbag=TRUE when fitting bagged engression model") 60 | } 61 | if (is.data.frame(Xtest)) Xtest = dftomat(Xtest) 62 | if (is.vector(Xtest) && is.numeric(Xtest)) Xtest <- matrix(Xtest, ncol = 1) 63 | 64 | K = length(object$models) 65 | 66 | nsam = if(useoob) 5*ceiling(nsample/K) else ceiling(nsample/K) 67 | Yhat1 = predict.engression(object$models[[1]],Xtest, type="sample", nsample=nsam, drop=FALSE) 68 | 69 | Yhat = array(dim=c(dim(Yhat1),K)) 70 | for (k in 1:K){ 71 | if(!useoob){ 72 | Yhat[,,,k] = predict.engression(object$models[[k]],Xtest, type="sample", nsample=nsam, drop=FALSE) 73 | }else{ 74 | usesam = which(apply(object$inbag!=k,1,all)) 75 | Yhat[usesam,,,k] = predict.engression(object$models[[k]],Xtest[usesam,], type="sample", nsample=nsam, drop=FALSE) 76 | } 77 | } 78 | Yhat = aperm( apply(Yhat,c(1,2),as.vector ),c(2,3,1)) 79 | if(useoob) Yhat = aperm(apply(Yhat,1:2,function(x) x[which(!is.na(x))]),c(2,3,1)) 80 | if(type=="sample") dimnames(Yhat)[[length(dim(Yhat))]] = paste("sample_",1:dim(Yhat)[length(dim(Yhat))],sep="") 81 | if(type=="mean") Yhat = apply(Yhat,1:(length(dim(Yhat))-1), mean, trim=trim) 82 | if(type %in% c("quantile","quantiles")){ 83 | if(length(quantiles)==1){ 84 | Yhat = apply(Yhat,1:(length(dim(Yhat))-1), quantile, quantiles) 85 | }else{ 86 | Yhat = aperm( apply(Yhat,1:(length(dim(Yhat))-1), quantile, quantiles), if(length(dim(Yhat)==3)) c(2,3,1) else c(2,1) ) 87 | } 88 | } 89 | return(if(drop) drop(Yhat) else Yhat) 90 | } -------------------------------------------------------------------------------- /engression-r/R/engressionfit.R: -------------------------------------------------------------------------------- 1 | #' Engression Fit Function 2 | #' 3 | #' This function fits an Engression model to the provided data. It allows for the tuning of 4 | #' several parameters related to model complexity and training. The function is not meant to 5 | #' be exported but can be used within the package or for internal testing purposes. 6 | #' 7 | #' @param X A matrix or data frame representing the predictors. 8 | #' @param Y A matrix representing the target variable(s). 9 | #' @param noise_dim The dimension of the noise introduced in the model (default: 100). 10 | #' @param hidden_dim The size of the hidden layer in the model (default: 100). 11 | #' @param num_layer The number of layers in the model (default: 3). 12 | #' @param dropout The dropout rate to be used in the model in case no batch normalization is used (default: 0.01) 13 | #' @param batch_norm A boolean indicating whether to use batch-normalization (default: TRUE). 14 | #' @param num_epochs The number of epochs to be used in training (default: 200). 15 | #' @param lr The learning rate to be used in training (default: 10^-3). 16 | #' @param beta The beta scaling factor for energy loss (default: 1). 17 | #' @param silent A boolean indicating whether to suppress output during model training (default: FALSE). 18 | #' 19 | #' @return A list containing the trained engression model and a vector of loss values. 20 | #' 21 | #' @keywords internal 22 | #' 23 | engressionfit <- function(X,Y, noise_dim=100, hidden_dim=100, num_layer=3, dropout=0.01,batch_norm=TRUE, num_epochs=200,lr=10^(-3), beta=1, silent=FALSE){ 24 | in_dim = dim(X)[2] 25 | out_dim = dim(Y)[2] 26 | if(num_layer<=2){ 27 | if(!batch_norm){ 28 | model = nn_sequential( nn_linear(in_dim+noise_dim,hidden_dim),nn_dropout(dropout), nn_elu(), nn_linear(hidden_dim,out_dim)) 29 | }else{ 30 | model = nn_sequential( nn_linear(in_dim+noise_dim,hidden_dim), nn_elu(),nn_batch_norm1d(hidden_dim), nn_linear(hidden_dim,out_dim)) 31 | 32 | } 33 | }else{ 34 | if(!batch_norm){ 35 | hid = nn_sequential(nn_linear(hidden_dim, hidden_dim),nn_elu()) 36 | if(num_layer>3) for (lay in 3:num_layer) hid = nn_sequential(hid,nn_sequential(nn_linear(hidden_dim, hidden_dim),nn_elu()) ) 37 | model = nn_sequential( nn_sequential(nn_linear(in_dim+noise_dim,hidden_dim),nn_dropout(dropout), nn_elu()),hid, nn_linear(hidden_dim,out_dim)) 38 | }else{ 39 | hid = nn_sequential(nn_linear(hidden_dim, hidden_dim),nn_elu(),nn_batch_norm1d(hidden_dim)) 40 | if(num_layer>3) for (lay in 3:num_layer) hid = nn_sequential(hid,nn_sequential(nn_linear(hidden_dim, hidden_dim),nn_elu(),nn_batch_norm1d(hidden_dim)) ) 41 | model = nn_sequential( nn_sequential(nn_linear(in_dim+noise_dim,hidden_dim), nn_elu(),nn_batch_norm1d(hidden_dim)),hid, nn_linear(hidden_dim,out_dim)) 42 | } 43 | } 44 | model$train() 45 | 46 | optimizer = optim_adam(model$parameters,lr=lr) 47 | 48 | n= dim(X)[1] 49 | lossvec = matrix(nrow=num_epochs, ncol=3) 50 | colnames(lossvec) = c("energy-loss","E(|Y-Yhat|)","E(|Yhat-Yhat'|)") 51 | printat = pmax(1,floor((seq(1,num_epochs, length=11)))) 52 | 53 | for (iter in 1:num_epochs){ 54 | optimizer$zero_grad() 55 | if(noise_dim>0){ 56 | xt = torch_tensor(cbind(X, matrix(rnorm(n*noise_dim),ncol=noise_dim) ), dtype=torch_float(),requires_grad=TRUE) 57 | xpt = torch_tensor(cbind(X, matrix(rnorm(n*noise_dim),ncol=noise_dim) ), dtype=torch_float(),requires_grad=TRUE) 58 | yt = torch_tensor(Y, dtype=torch_float(),requires_grad=TRUE) 59 | }else{ 60 | xt = torch_tensor(X, dtype=torch_float(),requires_grad=TRUE) 61 | xpt = torch_tensor(X , dtype=torch_float(),requires_grad=TRUE) 62 | yt = torch_tensor(Y, dtype=torch_float(),requires_grad=TRUE) 63 | } 64 | la = energylossall(yt,model(xt),model(xpt)) 65 | lossvec[iter, ] = signif(c(sapply(la, as.numeric)),3 ) 66 | if(beta==1) loss = energyloss(yt,model(xt),model(xpt)) else loss= energylossbeta(yt,model(xt),model(xpt),beta) 67 | loss$backward() 68 | optimizer$step() 69 | if(!silent){ 70 | cat("\r ", round(100*iter/num_epochs), "% complete, epoch: ", iter) 71 | if(iter %in% printat){cat("\n"); print(lossvec[iter,])} 72 | } 73 | } 74 | if(batch_norm) model$train(mode=FALSE) 75 | 76 | if(noise_dim>0){ 77 | engressor = function(x) as.matrix(model( torch_tensor(cbind(x, matrix(rnorm(nrow(x)*noise_dim),ncol=noise_dim) ), dtype=torch_float())),ncol=out_dim) 78 | }else{ 79 | engressor = function(x) as.matrix(model( torch_tensor(x, dtype=torch_float())),ncol=out_dim) 80 | } 81 | return(list(engressor=engressor, lossvec=lossvec)) 82 | } 83 | -------------------------------------------------------------------------------- /engression-r/R/engression.R: -------------------------------------------------------------------------------- 1 | #' Engression Function 2 | #' 3 | #' This function fits an engression model to the data. It allows for 4 | #' the tuning of several parameters related to model complexity. 5 | #' Variables are per default internally standardized (predictions are on original scale). 6 | #' 7 | #' @param X A matrix or data frame representing the predictors. 8 | #' @param Y A matrix or vector representing the target variable(s). 9 | #' @param noise_dim The dimension of the noise introduced in the model (default: 5). 10 | #' @param hidden_dim The size of the hidden layer in the model (default: 100). 11 | #' @param num_layer The number of layers in the model (default: 3). 12 | #' @param dropout The dropout rate to be used in the model in case no batch normalization is used (default: 0.01) 13 | #' @param batch_norm A boolean indicating whether to use batch-normalization (default: TRUE). 14 | #' @param num_epochs The number of epochs to be used in training (default: 1000). 15 | #' @param lr The learning rate to be used in training (default: 10^-3). 16 | #' @param beta The beta scaling factor for energy loss (default: 1). 17 | #' @param silent A boolean indicating whether to suppress output during model training (default: FALSE). 18 | #' @param standardize A boolean indicating whether to standardize the input data (default: TRUE). 19 | #' 20 | #' @return An engression model object with class "engression". 21 | #' 22 | #' @examples 23 | #' \donttest{ 24 | #' n = 1000 25 | #' p = 5 26 | #' 27 | #' X = matrix(rnorm(n*p),ncol=p) 28 | #' Y = (X[,1]+rnorm(n)*0.1)^2 + (X[,2]+rnorm(n)*0.1) + rnorm(n)*0.1 29 | #' Xtest = matrix(rnorm(n*p),ncol=p) 30 | #' Ytest = (Xtest[,1]+rnorm(n)*0.1)^2 + (Xtest[,2]+rnorm(n)*0.1) + rnorm(n)*0.1 31 | #' 32 | #' ## fit engression object 33 | #' engr = engression(X,Y) 34 | #' print(engr) 35 | #' 36 | #' ## prediction on test data 37 | #' Yhat = predict(engr,Xtest,type="mean") 38 | #' cat("\n correlation between predicted and realized values: ", signif(cor(Yhat, Ytest),3)) 39 | #' plot(Yhat, Ytest,xlab="prediction", ylab="observation") 40 | #' 41 | #' ## quantile prediction 42 | #' Yhatquant = predict(engr,Xtest,type="quantiles") 43 | #' ord = order(Yhat) 44 | #' matplot(Yhat[ord], Yhatquant[ord,], type="l", col=2,lty=1,xlab="prediction", ylab="observation") 45 | #' points(Yhat[ord],Ytest[ord],pch=20,cex=0.5) 46 | #' 47 | #' ## sampling from estimated model 48 | #' Ysample = predict(engr,Xtest,type="sample",nsample=1) 49 | #' 50 | #' ## plot of realized values against first variable 51 | #' oldpar <- par() 52 | #' par(mfrow=c(1,2)) 53 | #' plot(Xtest[,1], Ytest, xlab="Variable 1", ylab="Observation") 54 | #' ## plot of sampled values against first variable 55 | #' plot(Xtest[,1], Ysample, xlab="Variable 1", ylab="Sample from engression model") 56 | #' par(oldpar) 57 | #' } 58 | #' 59 | #' @export 60 | 61 | engression <- function(X,Y, noise_dim=5, hidden_dim=100, num_layer=3, dropout=0.05, batch_norm=TRUE, num_epochs=1000,lr=10^(-3),beta=1, silent=FALSE, standardize=TRUE){ 62 | 63 | if (is.data.frame(X)) { 64 | if (any(sapply(X, is.factor))) warning("Data frame contains factor variables. Mapping to numeric values. Dummy variables would need to be created explicitly by the user.") 65 | X = dftomat(X) 66 | } 67 | 68 | if (is.vector(X) && !is.numeric(X)) X <- as.numeric(X) 69 | if (is.vector(X) && is.numeric(X)) X <- matrix(X, ncol = 1) 70 | if(is.vector(Y)) Y= matrix(Y, ncol=1) 71 | for (k in 1:ncol(Y)) Y[,k] = as.numeric(Y[,k]) 72 | 73 | if(dropout<=0 & noise_dim==0){ 74 | warning("dropout and noise_dim cannot both be equal to 0 as model needs to be stochastic. setting dropout to 0.5") 75 | dropout = 0.5 76 | } 77 | 78 | muX = apply(X,2,mean) 79 | sddX = apply(X,2,sd) 80 | if(any(sddX<=0)){ 81 | warning("predictor variable(s) ", colnames(X)[which(sddX<=0)]," are constant on training data -- results might be unreliable") 82 | sddX = pmax(sddX, 10^(03)) 83 | } 84 | muY = apply(Y,2,mean) 85 | sddY = apply(Y,2,sd) 86 | if(any(sddY<=0)){ 87 | warning("target variable(s) ", colnames(Y)[which(sddY<=0)]," are constant on training data -- results might be unreliable") 88 | } 89 | 90 | if(standardize){ 91 | X = sweep(sweep(X,2,muX,FUN="-"),2,sddX,FUN="/") 92 | Y = sweep(sweep(Y,2,muY,FUN="-"),2,sddY,FUN="/") 93 | } 94 | eng = engressionfit(X,Y, noise_dim=noise_dim,hidden_dim=hidden_dim,num_layer=num_layer,dropout=dropout, batch_norm=batch_norm, num_epochs=num_epochs,lr=lr,beta=beta, silent=silent) 95 | engressor = list(engressor = eng$engressor, lossvec= eng$lossvec, muX=muX, sddX=sddX,muY=muY, sddY=sddY, standardize=standardize, noise_dim=noise_dim,hidden_dim=hidden_dim,num_layer=num_layer,dropout=dropout, batch_norm=batch_norm, num_epochs=num_epochs,lr=lr) 96 | class(engressor) = "engression" 97 | return(engressor) 98 | } 99 | -------------------------------------------------------------------------------- /engression-r/R/engressionBagged.R: -------------------------------------------------------------------------------- 1 | #' Bagged Engression Function 2 | #' 3 | #' This function fits a bagged engression model to the data by fitting multiple 4 | #' engression models to subsamples of the data. It allows for the tuning of several parameters 5 | #' related to model complexity. 6 | #' 7 | #' @param X A matrix or data frame representing the predictors. 8 | #' @param Y A matrix or vector representing the target variable(s). 9 | #' @param K The number of bagged models to fit (default: 5). 10 | #' @param keepoutbag A boolean indicating whether to keep the out-of-bag samples and training data (default: TRUE). 11 | #' @param noise_dim The dimension of the noise introduced in the model (default: 10). 12 | #' @param hidden_dim The size of the hidden layer in the model (default: 100). 13 | #' @param num_layer The number of layers in the model (default: 3). 14 | #' @param dropout The dropout rate to be used in the model (default: 0.05). 15 | #' @param batch_norm A boolean indicating whether to use batch-normalization (default: TRUE). 16 | #' @param num_epochs The number of epochs to be used in training (default: 1000). 17 | #' @param lr The learning rate to be used in training (default: 10^-3). 18 | #' @param beta The beta scaling factor for energy loss (default: 1). 19 | #' @param silent A boolean indicating whether to suppress output during model training (default: FALSE). 20 | #' @param standardize A boolean indicating whether to standardize the input data (default: TRUE). 21 | #' 22 | #' @return A bagged engression model object with class "engressionBagged". 23 | #' 24 | #' @examples 25 | #' \donttest{ 26 | #' n = 1000 27 | #' p = 5 28 | #' X = matrix(rnorm(n*p),ncol=p) 29 | #' Y = (X[,1]+rnorm(n)*0.1)^2 + (X[,2]+rnorm(n)*0.1) + rnorm(n)*0.1 30 | #' Xtest = matrix(rnorm(n*p),ncol=p) 31 | #' Ytest = (Xtest[,1]+rnorm(n)*0.1)^2 + (Xtest[,2]+rnorm(n)*0.1) + rnorm(n)*0.1 32 | #' 33 | #' ## fit bagged engression object 34 | #' engb = engressionBagged(X,Y,K=3) 35 | #' print(engb) 36 | #' 37 | #' ## prediction on test data 38 | #' Yhat = predict(engb,Xtest,type="mean") 39 | #' cat("\n correlation between predicted and realized values: ", signif(cor(Yhat, Ytest),3)) 40 | #' plot(Yhat, Ytest,xlab="estimated conditional mean", ylab="observation") 41 | #' 42 | #' ## out-of-bag prediction 43 | #' Yhat_oob = predict(engb,type="mean") 44 | #' cat("\n correlation between predicted and realized values on oob data: ") 45 | #' print(signif(cor(Yhat_oob, Y),3)) 46 | #' plot(Yhat_oob, Y,xlab="prediction", ylab="observation") 47 | #' 48 | #' ## quantile prediction 49 | #' Yhatquant = predict(engb,Xtest,type="quantiles") 50 | #' ord = order(Yhat) 51 | #' matplot(Yhat[ord], Yhatquant[ord,], type="l", col=2,lty=1,xlab="prediction", ylab="observation") 52 | #' points(Yhat[ord],Ytest[ord],pch=20,cex=0.5) 53 | #' 54 | #' ## sampling from estimated model 55 | #' Ysample = predict(engb,Xtest,type="sample",nsample=1) 56 | #' 57 | #' ## plot of realized values against first variable 58 | #' oldpar <- par() 59 | #' par(mfrow=c(1,2)) 60 | #' plot(Xtest[,1], Ytest, xlab="Variable 1", ylab="Observation") 61 | #' ## plot of sampled values against first variable 62 | #' plot(Xtest[,1], Ysample[,1], xlab="Variable 1", ylab="Sample from engression model") 63 | #' par(oldpar) 64 | #' } 65 | #' 66 | #' @export 67 | #' 68 | engressionBagged <- function(X,Y, K=5, keepoutbag=TRUE, noise_dim=10, hidden_dim=100, num_layer=3, dropout=0.05, batch_norm=TRUE, num_epochs=1000,lr=10^(-3),beta=1, silent=FALSE, standardize=TRUE){ 69 | 70 | if (is.data.frame(X)) { 71 | if (any(sapply(X, is.factor))) warning("Data frame contains factor variables. Mapping to numeric values. Dummy variables would need to be created explicitly by the user.") 72 | X <- dftomat(X) 73 | } 74 | if (is.vector(X) && is.numeric(X)) X <- matrix(X, ncol = 1) 75 | if(is.vector(Y)) Y= matrix(Y, ncol=1) 76 | for (k in 1:ncol(Y)) Y[,k] = as.numeric(Y[,k]) 77 | 78 | 79 | if(dropout<=0 & noise_dim==0){ 80 | warning("dropout and noise_dim cannot both be equal to 0 as model needs to be stochastic. setting dropout to 0.5") 81 | dropout = 0.5 82 | } 83 | 84 | inbagno = min(K-1,ceiling(K*0.8)) 85 | inbag = matrix(nrow=nrow(X), ncol=inbagno) 86 | for (i in 1:nrow(X)) inbag[i,] = sort(sample(1:K,inbagno)) 87 | 88 | models = list() 89 | for (k in 1:K){ 90 | if(k==1) pr="st" 91 | if(k==2) pr="nd" 92 | if(k==3) pr="rd" 93 | if(k>=4) pr="th" 94 | if(!silent) cat(paste("\n fitting ",k,"-", pr," out of ",K," engression models \n",sep="")) 95 | useinbag = which(apply(inbag==k,1,any)) 96 | models[[k]] = engression(X[useinbag,],Y[useinbag], noise_dim=noise_dim, hidden_dim=hidden_dim, num_layer=num_layer, dropout=dropout, batch_norm=batch_norm, num_epochs=num_epochs,lr=lr,beta=beta, silent=silent, standardize=standardize) 97 | } 98 | 99 | engBagged = list(models= models, inbag=if(keepoutbag) inbag else NULL, Xtrain=if(keepoutbag) X else NULL, noise_dim=noise_dim,hidden_dim=hidden_dim,num_layer=num_layer,dropout=dropout, batch_norm=batch_norm, num_epochs=num_epochs,lr=lr, standardize=standardize) 100 | class(engBagged) = "engressionBagged" 101 | print(engBagged) 102 | return(engBagged) 103 | } 104 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Engression 2 | 3 | Engression is a neural network-based distributional regression method proposed in the paper "[*Engression: Extrapolation through the Lens of Distributional Regression?*](https://arxiv.org/abs/2307.00835)" by Xinwei Shen and Nicolai Meinshausen (2023). This repository contains the software implementations of engression in both R and Python. 4 | 5 | Consider targets $Y\in\mathbb{R}^k$ and predictors $X\in\mathbb{R}^d$; both variables can be univariate or multivariate, continuous or discrete. Engression can be used to 6 | * estimate the conditional mean $\mathbb{E}[Y|X=x]$ (as in least-squares regression), 7 | * estimate the conditional quantiles of $Y$ given $X=x$ (as in quantile regression), and 8 | * sample from the fitted conditional distribution of $Y$ given $X=x$ (as a generative model). 9 | 10 | The results in the paper show the advantages of engression over existing regression approaches in terms of extrapolation. 11 | 12 | 13 | ## Installation 14 | 15 | ### Python package 16 | The latest release of the Python package can be installed via pip: 17 | ```sh 18 | pip install engression 19 | ``` 20 | 21 | The development version can be installed from github: 22 | 23 | ```sh 24 | pip install -e "git+https://github.com/xwshen51/engression#egg=engression&subdirectory=engression-python" 25 | ``` 26 | 27 | ### R package 28 | 29 | The latest release of the R package can be installed through CRAN: 30 | 31 | ```R 32 | install.packages("engression") 33 | ``` 34 | 35 | The development version can be installed from github: 36 | 37 | ```R 38 | devtools::install_github("xwshen51/engression", subdir = "engression-r") 39 | ``` 40 | 41 | 42 | ## Usage Example 43 | 44 | ### Python 45 | Below is one simple demonstration. See [this tutorial](https://github.com/xwshen51/engression/blob/main/engression-python/examples/example_simu.ipynb) for more details on simulated data and [this tutorial](https://github.com/xwshen51/engression/blob/main/engression-python/examples/example_air.ipynb) for a real data example. We demonstrate in [another tutorial](https://github.com/xwshen51/engression/blob/main/engression-python/examples/example_bag.ipynb) how to fit a bagged engression model, which also helps with hyperparameter tuning. 46 | ```python 47 | from engression import engression 48 | from engression.data.simulator import preanm_simulator 49 | 50 | ## Simulate data 51 | x, y = preanm_simulator("square", n=10000, x_lower=0, x_upper=2, noise_std=1, train=True, device=device) 52 | x_eval, y_eval_med, y_eval_mean = preanm_simulator("square", n=1000, x_lower=0, x_upper=4, noise_std=1, train=False, device=device) 53 | 54 | ## Fit an engression model 55 | engressor = engression(x, y, lr=0.01, num_epochs=500, batch_size=1000, device="cuda") 56 | ## Summarize model information 57 | engressor.summary() 58 | 59 | ## Evaluation 60 | print("L2 loss:", engressor.eval_loss(x_eval, y_eval_mean, loss_type="l2")) 61 | print("correlation between predicted and true means:", engressor.eval_loss(x_eval, y_eval_mean, loss_type="cor")) 62 | 63 | ## Predictions 64 | y_pred_mean = engressor.predict(x_eval, target="mean") ## for the conditional mean 65 | y_pred_med = engressor.predict(x_eval, target="median") ## for the conditional median 66 | y_pred_quant = engressor.predict(x_eval, target=[0.025, 0.5, 0.975]) ## for the conditional 2.5% and 97.5% quantiles 67 | ``` 68 | 69 | ### R 70 | ```R 71 | require(engression) 72 | n = 1000 73 | p = 5 74 | 75 | X = matrix(rnorm(n*p),ncol=p) 76 | Y = (X[,1]+rnorm(n)*0.1)^2 + (X[,2]+rnorm(n)*0.1) + rnorm(n)*0.1 77 | Xtest = matrix(rnorm(n*p),ncol=p) 78 | Ytest = (Xtest[,1]+rnorm(n)*0.1)^2 + (Xtest[,2]+rnorm(n)*0.1) + rnorm(n)*0.1 79 | 80 | ## fit engression object 81 | engr = engression(X,Y) 82 | print(engr) 83 | 84 | ## prediction on test data 85 | Yhat = predict(engr,Xtest,type="mean") 86 | cat("\n correlation between predicted and realized values: ", signif(cor(Yhat, Ytest),3)) 87 | plot(Yhat, Ytest,xlab="prediction", ylab="observation") 88 | 89 | ## quantile prediction 90 | Yhatquant = predict(engr,Xtest,type="quantiles") 91 | ord = order(Yhat) 92 | matplot(Yhat[ord], Yhatquant[ord,], type="l", col=2,lty=1,xlab="prediction", ylab="observation") 93 | points(Yhat[ord],Ytest[ord],pch=20,cex=0.5) 94 | 95 | ## sampling from estimated model 96 | Ysample = predict(engr,Xtest,type="sample",nsample=1) 97 | par(mfrow=c(1,2)) 98 | ## plot of realized values against first variable 99 | plot(Xtest[,1], Ytest, xlab="Variable 1", ylab="Observation") 100 | ## plot of sampled values against first variable 101 | plot(Xtest[,1], Ysample, xlab="Variable 1", ylab="Sample from engression model") 102 | ``` 103 | 104 | 105 | ## Contact information 106 | If you meet any problems with the code, please submit an issue or contact [Xinwei Shen](mailto:xinwei.shen@stat.math.ethz.ch). 107 | 108 | 109 | ## Citation 110 | If you would refer to or extend our work, please cite the following paper: 111 | ``` 112 | @article{10.1093/jrsssb/qkae108, 113 | author = {Shen, Xinwei and Meinshausen, Nicolai}, 114 | title = {Engression: extrapolation through the lens of distributional regression}, 115 | journal = {Journal of the Royal Statistical Society Series B: Statistical Methodology}, 116 | pages = {qkae108}, 117 | year = {2024}, 118 | month = {11}, 119 | issn = {1369-7412}, 120 | doi = {10.1093/jrsssb/qkae108}, 121 | url = {https://doi.org/10.1093/jrsssb/qkae108}, 122 | eprint = {https://academic.oup.com/jrsssb/advance-article-pdf/doi/10.1093/jrsssb/qkae108/60827977/qkae108.pdf}, 123 | } 124 | ``` -------------------------------------------------------------------------------- /engression-python/engression/engression.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import matplotlib.pyplot as plt 3 | 4 | from .loss_func import * 5 | from .models import StoNet 6 | from .data.loader import make_dataloader 7 | from .utils import * 8 | 9 | 10 | def engression(x, y, classification=False, 11 | num_layer=2, hidden_dim=100, noise_dim=100, out_act=None, 12 | add_bn=True, resblock=False, beta=1, 13 | lr=0.0001, num_epochs=500, batch_size=None, 14 | print_every_nepoch=100, print_times_per_epoch=1, 15 | device="cpu", standardize=True, verbose=True): 16 | """This function fits an engression model to the data. It allows multivariate predictors and response variables. Variables are per default internally standardized (training with standardized data, while predictions and evaluations are on original scale). 17 | 18 | Args: 19 | x (torch.Tensor): training data of predictors. 20 | y (torch.Tensor): training data of responses. 21 | classification (bool, optional): classification or not. 22 | num_layer (int, optional): number of (linear) layers. Defaults to 2. 23 | hidden_dim (int, optional): number of neurons per layer. Defaults to 100. 24 | noise_dim (int, optional): noise dimension. Defaults to 100. 25 | out_act (str, optional): output activation function. Defaults to None. 26 | add_bn (bool, optional): whether to add BN layer. Defaults to True. 27 | resblock (bool, optional): whether to use residual blocks (skip connections). Defaults to False. 28 | beta (float, optional): power parameter in the energy loss. 29 | lr (float, optional): learning rate. Defaults to 0.0001. 30 | num_epochs (int, optional): number of epochs. Defaults to 500. 31 | batch_size (int, optional): batch size. Defaults to None. 32 | print_every_nepoch (int, optional): print losses every print_every_nepoch number of epochs. Defaults to 100. 33 | print_times_per_epoch (int, optional): print losses for print_times_per_epoch times per epoch. Defaults to 1. 34 | device (str, torch.device, optional): device. Defaults to "cpu". Choices = ["cpu", "gpu", "cuda"]. 35 | standardize (bool, optional): whether to standardize data during training. Defaults to True. 36 | verbose (bool, optional): whether to print losses and info. Defaults to True. 37 | 38 | Returns: 39 | Engressor object: a fitted engression model. 40 | """ 41 | if x.shape[0] != y.shape[0]: 42 | raise Exception("The sample sizes for the covariates and response do not match. Please check.") 43 | engressor = Engressor(in_dim=x.shape[1], out_dim=y.shape[1], classification=classification, 44 | num_layer=num_layer, hidden_dim=hidden_dim, noise_dim=noise_dim, 45 | out_act=out_act, resblock=resblock, add_bn=add_bn, beta=beta, 46 | lr=lr, num_epochs=num_epochs, batch_size=batch_size, 47 | standardize=standardize, device=device, check_device=verbose, verbose=verbose) 48 | engressor.train(x, y, num_epochs=num_epochs, batch_size=batch_size, 49 | print_every_nepoch=print_every_nepoch, print_times_per_epoch=print_times_per_epoch, 50 | standardize=standardize, verbose=verbose) 51 | return engressor 52 | 53 | 54 | class Engressor(object): 55 | """Engressor class. 56 | 57 | Args: 58 | in_dim (int): input dimension 59 | out_dim (int): output dimension 60 | classification (bool, optional): classification or not. 61 | num_layer (int, optional): number of layers. Defaults to 2. 62 | hidden_dim (int, optional): number of neurons per layer. Defaults to 100. 63 | noise_dim (int, optional): noise dimension. Defaults to 100. 64 | out_act (str, optional): output activation function. Defaults to None. 65 | resblock (bool, optional): whether to use residual blocks (skip-connections). Defaults to False. 66 | add_bn (bool, optional): whether to add BN layer. Defaults to True. 67 | beta (float, optional): power parameter in the energy loss. 68 | lr (float, optional): learning rate. Defaults to 0.0001. 69 | num_epochs (int, optional): number of epochs. Defaults to 500. 70 | batch_size (int, optional): batch size. Defaults to None, referring to the full batch. 71 | standardize (bool, optional): whether to standardize data during training. Defaults to True. 72 | device (str or torch.device, optional): device. Defaults to "cpu". Choices = ["cpu", "gpu", "cuda"]. 73 | check_device (bool, optional): whether to check the device. Defaults to True. 74 | """ 75 | def __init__(self, 76 | in_dim, out_dim, classification=False, 77 | num_layer=2, hidden_dim=100, noise_dim=100, 78 | out_act=False, resblock=False, add_bn=True, beta=1, 79 | lr=0.0001, num_epochs=500, batch_size=None, standardize=True, 80 | device="cpu", check_device=True, verbose=True): 81 | super().__init__() 82 | self.classification = classification 83 | if classification: 84 | out_act = "softmax" 85 | self.num_layer = num_layer 86 | self.hidden_dim = hidden_dim 87 | self.noise_dim = noise_dim 88 | self.out_act = out_act 89 | self.resblock = resblock 90 | self.add_bn = add_bn 91 | self.beta = beta 92 | self.lr = lr 93 | self.num_epochs = num_epochs 94 | self.batch_size = batch_size 95 | if isinstance(device, str): 96 | if device == "gpu" or device == "cuda": 97 | device = torch.device("cuda") 98 | else: 99 | device = torch.device(device) 100 | self.device = device 101 | if check_device: 102 | check_for_gpu(self.device) 103 | self.standardize = standardize 104 | self.x_mean = None 105 | self.x_std = None 106 | self.y_mean = None 107 | self.y_std = None 108 | 109 | self.model = StoNet(in_dim, out_dim, num_layer, hidden_dim, noise_dim, add_bn, out_act, resblock).to(self.device) 110 | self.optimizer = torch.optim.Adam(self.model.parameters(), lr=lr) 111 | self.verbose = verbose 112 | 113 | self.tr_loss = None 114 | 115 | def train_mode(self): 116 | self.model.train() 117 | 118 | def eval_mode(self): 119 | self.model.eval() 120 | 121 | def summary(self): 122 | """Print the model architecture and hyperparameters.""" 123 | print("Engression model with\n" + 124 | "\t number of layers: {}\n".format(self.num_layer) + 125 | "\t hidden dimensions: {}\n".format(self.hidden_dim) + 126 | "\t noise dimensions: {}\n".format(self.noise_dim) + 127 | "\t residual blocks: {}\n".format(self.resblock) + 128 | "\t number of epochs: {}\n".format(self.num_epochs) + 129 | "\t batch size: {}\n".format(self.batch_size) + 130 | "\t learning rate: {}\n".format(self.lr) + 131 | "\t standardization: {}\n".format(self.standardize) + 132 | "\t training mode: {}\n".format(self.model.training) + 133 | "\t device: {}\n".format(self.device)) 134 | print("Training loss (original scale):\n" + 135 | "\t energy-loss: {:.2f}, \n\tE(|Y-Yhat|): {:.2f}, \n\tE(|Yhat-Yhat'|): {:.2f}".format( 136 | self.tr_loss[0], self.tr_loss[1], self.tr_loss[2])) 137 | 138 | def _standardize_data_and_record_stats(self, x, y): 139 | """Standardize the data and record the mean and standard deviation of the training data. 140 | 141 | Args: 142 | x (torch.Tensor): training data of predictors. 143 | y (torch.Tensor): training data of responses. 144 | 145 | Returns: 146 | torch.Tensor: standardized data. 147 | """ 148 | self.x_mean = torch.mean(x, dim=0) 149 | self.x_std = torch.std(x, dim=0) 150 | self.x_std[self.x_std == 0] += 1e-5 151 | if not self.classification: 152 | self.y_mean = torch.mean(y, dim=0) 153 | self.y_std = torch.std(y, dim=0) 154 | self.y_std[self.y_std == 0] += 1e-5 155 | else: 156 | self.y_mean = torch.zeros(y.shape[1:], device=y.device).unsqueeze(0) 157 | self.y_std = torch.ones(y.shape[1:], device=y.device).unsqueeze(0) 158 | x_standardized = (x - self.x_mean) / self.x_std 159 | y_standardized = (y - self.y_mean) / self.y_std 160 | self.x_mean = self.x_mean.to(self.device) 161 | self.x_std = self.x_std.to(self.device) 162 | self.y_mean = self.y_mean.to(self.device) 163 | self.y_std = self.y_std.to(self.device) 164 | return x_standardized, y_standardized 165 | 166 | def standardize_data(self, x, y=None): 167 | """Standardize the data, if self.standardize is True. 168 | 169 | Args: 170 | x (torch.Tensor): training data of predictors. 171 | y (torch.Tensor, optional): training data of responses. Defaults to None. 172 | 173 | Returns: 174 | torch.Tensor: standardized or original data. 175 | """ 176 | if y is None: 177 | if self.standardize: 178 | return (x - self.x_mean) / self.x_std 179 | else: 180 | return x 181 | else: 182 | if self.standardize: 183 | return (x - self.x_mean) / self.x_std, (y - self.y_mean) / self.y_std 184 | else: 185 | return x, y 186 | 187 | def unstandardize_data(self, y, x=None, expand_dim=False): 188 | """Transform the predictions back to the original scale, if self.standardize is True. 189 | 190 | Args: 191 | y (torch.Tensor): data in the standardized scale 192 | 193 | Returns: 194 | torch.Tensor: data in the original scale 195 | """ 196 | if x is None: 197 | if self.standardize: 198 | if expand_dim: 199 | return y * self.y_std.unsqueeze(0).unsqueeze(2) + self.y_mean.unsqueeze(0).unsqueeze(2) 200 | else: 201 | return y * self.y_std + self.y_mean 202 | else: 203 | return y 204 | else: 205 | if self.standardize: 206 | return x * self.x_std + self.x_mean, y * self.y_std + self.y_mean 207 | else: 208 | return x, y 209 | 210 | def train(self, x, y, num_epochs=None, batch_size=None, lr=None, print_every_nepoch=100, print_times_per_epoch=1, standardize=None, verbose=True): 211 | """Fit the model. 212 | 213 | Args: 214 | x (torch.Tensor): training data of predictors. 215 | y (torch.Tensor): trainging data of responses. 216 | num_epochs (int, optional): number of training epochs. Defaults to None. 217 | batch_size (int, optional): batch size for mini-batch SGD. Defaults to None. 218 | lr (float, optional): learning rate. 219 | print_every_nepoch (int, optional): print losses every print_every_nepoch number of epochs. Defaults to 100. 220 | print_times_per_epoch (int, optional): print losses for print_times_per_epoch times per epoch. Defaults to 1. 221 | standardize (bool, optional): whether to standardize the data. Defaults to True. 222 | verbose (bool, optional): whether to print losses and info. Defaults to True. 223 | """ 224 | self.train_mode() 225 | if num_epochs is not None: 226 | self.num_epochs = num_epochs 227 | if batch_size is None: 228 | batch_size = self.batch_size if self.batch_size is not None else x.size(0) 229 | if lr is not None: 230 | if lr != self.lr: 231 | self.lr = lr 232 | self.optimizer = torch.optim.Adam(self.model.parameters(), lr=lr) 233 | if standardize is not None: 234 | self.standardize = standardize 235 | 236 | x = vectorize(x) 237 | y = vectorize(y) 238 | if self.standardize: 239 | if verbose: 240 | print("Data is standardized for training only; the printed training losses are on the standardized scale. \n" + 241 | "However during evaluation, the predictions, evaluation metrics, and plots will be on the original scale.\n") 242 | x, y = self._standardize_data_and_record_stats(x, y) 243 | x = x.to(self.device) 244 | y = y.to(self.device) 245 | 246 | if batch_size >= x.size(0)//2: 247 | if verbose: 248 | print("Batch is larger than half of the sample size. Training based on full-batch gradient descent.") 249 | self.batch_size = x.size(0) 250 | for epoch_idx in range(self.num_epochs): 251 | self.model.zero_grad() 252 | y_sample1 = self.model(x) 253 | y_sample2 = self.model(x) 254 | loss, loss1, loss2 = energy_loss_two_sample(y, y_sample1, y_sample2, beta=self.beta, verbose=True) 255 | loss.backward() 256 | self.optimizer.step() 257 | if (epoch_idx == 0 or (epoch_idx + 1) % print_every_nepoch == 0) and verbose: 258 | print("[Epoch {} ({:.0f}%)] energy-loss: {:.4f}, E(|Y-Yhat|): {:.4f}, E(|Yhat-Yhat'|): {:.4f}".format( 259 | epoch_idx + 1, 100 * epoch_idx / self.num_epochs, loss.item(), loss1.item(), loss2.item())) 260 | else: 261 | train_loader = make_dataloader(x, y, batch_size=batch_size, shuffle=True) 262 | if verbose: 263 | print("Training based on mini-batch gradient descent with a batch size of {}.".format(batch_size)) 264 | for epoch_idx in range(self.num_epochs): 265 | self.zero_loss() 266 | for batch_idx, (x_batch, y_batch) in enumerate(train_loader): 267 | self.train_one_iter(x_batch, y_batch) 268 | if (epoch_idx == 0 or (epoch_idx + 1) % print_every_nepoch == 0) and verbose: 269 | if (batch_idx + 1) % ((len(train_loader) - 1) // print_times_per_epoch) == 0: 270 | self.print_loss(epoch_idx, batch_idx) 271 | 272 | # Evaluate performance on the training data (on the original scale) 273 | self.model.eval() 274 | x, y = self.unstandardize_data(y, x) 275 | self.tr_loss = self.eval_loss(x, y, loss_type="energy", verbose=True) 276 | 277 | if verbose: 278 | print("\nTraining loss on the original (non-standardized) scale:\n" + 279 | "\tEnergy-loss: {:.4f}, E(|Y-Yhat|): {:.4f}, E(|Yhat-Yhat'|): {:.4f}".format( 280 | self.tr_loss[0], self.tr_loss[1], self.tr_loss[2])) 281 | 282 | if verbose: 283 | print("\nPrediction-loss E(|Y-Yhat|) and variance-loss E(|Yhat-Yhat'|) should ideally be equally large" + 284 | "\n-- consider training for more epochs or adjusting hyperparameters if there is a mismatch ") 285 | 286 | def zero_loss(self): 287 | self.tr_loss = 0 288 | self.tr_loss1 = 0 289 | self.tr_loss2 = 0 290 | 291 | def train_one_iter(self, x_batch, y_batch): 292 | self.model.zero_grad() 293 | y_sample1 = self.model(x_batch) 294 | y_sample2 = self.model(x_batch) 295 | loss, loss1, loss2 = energy_loss_two_sample(y_batch, y_sample1, y_sample2, beta=self.beta, verbose=True) 296 | loss.backward() 297 | self.optimizer.step() 298 | self.tr_loss += loss.item() 299 | self.tr_loss1 += loss1.item() 300 | self.tr_loss2 += loss2.item() 301 | 302 | def print_loss(self, epoch_idx, batch_idx, return_loss=False): 303 | loss_str = "[Epoch {} ({:.0f}%), batch {}] energy-loss: {:.4f}, E(|Y-Yhat|): {:.4f}, E(|Yhat-Yhat'|): {:.4f}".format( 304 | epoch_idx + 1, 100 * epoch_idx / self.num_epochs, batch_idx + 1, 305 | self.tr_loss / (batch_idx + 1), self.tr_loss1 / (batch_idx + 1), self.tr_loss2 / (batch_idx + 1)) 306 | if return_loss: 307 | return loss_str 308 | else: 309 | print(loss_str) 310 | 311 | @torch.no_grad() 312 | def predict(self, x, target="mean", sample_size=100): 313 | """Point prediction. 314 | 315 | Args: 316 | x (torch.Tensor): data of predictors. 317 | target (str or float or list, optional): a quantity of interest to predict. float refers to the quantiles. Defaults to "mean". 318 | sample_size (int, optional): generated sample sizes for each x. Defaults to 100. 319 | 320 | Returns: 321 | torch.Tensor or list of torch.Tensor: point predictions. 322 | """ 323 | self.eval_mode() 324 | x = vectorize(x) 325 | x = x.to(self.device) 326 | x = self.standardize_data(x) 327 | y_pred = self.model.predict(x, target, sample_size) 328 | if isinstance(y_pred, list): 329 | for i in range(len(y_pred)): 330 | y_pred[i] = self.unstandardize_data(y_pred[i]) 331 | else: 332 | y_pred = self.unstandardize_data(y_pred) 333 | return y_pred 334 | 335 | @torch.no_grad() 336 | def sample(self, x, sample_size=100, expand_dim=True): 337 | """Sample new response data. 338 | 339 | Args: 340 | x (torch.Tensor): test data of predictors. 341 | sample_size (int, optional): generated sample sizes for each x. Defaults to 100. 342 | expand_dim (bool, optional): whether to expand the sample dimension. Defaults to True. 343 | 344 | Returns: 345 | torch.Tensor of shape (data_size, response_dim, sample_size). 346 | - [:,:,i] consists of the i-th sample of all x. 347 | - [i,:,:] consists of all samples of x_i. 348 | """ 349 | self.eval_mode() 350 | x = vectorize(x) 351 | x = x.to(self.device) 352 | x = self.standardize_data(x) 353 | y_samples = self.model.sample(x, sample_size, expand_dim=expand_dim) 354 | y_samples = self.unstandardize_data(y_samples, expand_dim=expand_dim) 355 | if sample_size == 1: 356 | y_samples = y_samples.squeeze(len(y_samples.shape) - 1) 357 | return y_samples 358 | 359 | @torch.no_grad() 360 | def eval_loss(self, x, y, loss_type="l2", sample_size=None, beta=1, verbose=False): 361 | """Compute the loss for evaluation. 362 | 363 | Args: 364 | x (torch.Tensor): data of predictors. 365 | y (torch.Tensor): data of responses. 366 | loss_type (str, optional): loss type. Defaults to "l2". Choices: ["l2", "l1", "energy", "cor"]. 367 | sample_size (int, optional): generated sample sizes for each x. Defaults to 100. 368 | beta (float, optional): beta in energy score. Defaults to 1. 369 | 370 | Returns: 371 | float: evaluation loss. 372 | """ 373 | if sample_size is None: 374 | sample_size = 2 if loss_type == "energy" else 100 375 | self.eval_mode() 376 | x = vectorize(x) 377 | y = vectorize(y) 378 | x = x.to(self.device) 379 | y = y.to(self.device) 380 | if loss_type == "l2": 381 | y_pred = self.predict(x, target="mean", sample_size=sample_size) 382 | loss = (y - y_pred).pow(2).mean() 383 | elif loss_type == "cor": 384 | y_pred = self.predict(x, target="mean", sample_size=sample_size) 385 | loss = cor(y, y_pred) 386 | elif loss_type == "l1": 387 | y_pred = self.predict(x, target=0.5, sample_size=sample_size) 388 | loss = (y - y_pred).abs().mean() 389 | else: 390 | assert loss_type == "energy" 391 | y_samples = self.sample(x, sample_size=sample_size, expand_dim=False) 392 | loss = energy_loss(y, y_samples, beta=beta, verbose=verbose) 393 | if not verbose: 394 | return loss.item() 395 | else: 396 | loss, loss1, loss2 = loss 397 | return loss.item(), loss1.item(), loss2.item() 398 | 399 | @torch.no_grad() 400 | def plot(self, x_te, y_te, x_tr=None, y_tr=None, x_idx=0, y_idx=0, 401 | target="mean", sample_size=100, save_dir=None, 402 | alpha=0.8, ymin=None, ymax=None): 403 | """Plot true data and predictions. 404 | 405 | Args: 406 | x_te (torch.Tensor): test data of predictors 407 | y_te (torch.Tensor): test data of responses 408 | x_tr (torch.Tensor): training data of predictors 409 | y_tr (torch.Tensor): training data of responses 410 | x_idx (int, optional): index of the predictor to plot (if there are multiple). Defaults to 0. 411 | y_idx (int, optional): index of the response to plot (if there are multiple). Defaults to 0. 412 | target (str or float, optional): target quantity. Defaults to "mean". Choice: ["mean", "median", "sample", float]. 413 | sample_size (int, optional): generated sample sizes for each x. Defaults to 100. 414 | save_dir (str, optional): directory to save the plot. Defaults to None. 415 | alpha (float, optional): transparency of the sampled data points. Defaults to 0.8. 416 | ymin (float, optional): minimum value of y in the plot. Defaults to None. 417 | ymax (float, optional): maximum value of y in the plot. Defaults to None. 418 | """ 419 | if x_tr is not None and y_tr is not None: 420 | # Plot training data as well. 421 | x_tr = vectorize(x_tr) 422 | y_tr = vectorize(y_tr) 423 | plt.scatter(x_tr[:,x_idx].cpu(), y_tr[:,y_idx].cpu(), s=1, label="training data", color="silver") 424 | plt.scatter(x_te[:,x_idx].cpu(), y_te[:,y_idx].cpu(), s=1, label="test data", color="gold") 425 | x = torch.cat((x_tr, x_te), dim=0) 426 | y = torch.cat((y_tr, y_te), dim=0) 427 | else: 428 | # Plot only the test data. 429 | x_te = vectorize(x_te) 430 | y_te = vectorize(y_te) 431 | plt.scatter(x_te[:,x_idx].cpu(), y_te[:,y_idx].cpu(), s=1, label="true data", color="silver") 432 | x = x_te 433 | y = y_te 434 | x = x.to(self.device) 435 | y = y.to(self.device) 436 | 437 | if target != "sample": 438 | y_pred = self.predict(x, target=target, sample_size=sample_size) 439 | plt.scatter(x[:,x_idx].cpu(), y_pred[:,y_idx].cpu(), s=1, label="predictions", color="lightskyblue") 440 | else: 441 | y_sample = self.sample(x, sample_size=sample_size, expand_dim=False) 442 | x_rep = x.repeat(sample_size, 1) 443 | plt.scatter(x_rep[:,x_idx].cpu(), y_sample[:,y_idx].cpu(), s=1, label="samples", color="lightskyblue", alpha=alpha) 444 | plt.legend(markerscale=2) 445 | plt.ylim(ymin, ymax) 446 | if x.shape[1] == 1: 447 | plt.xlabel(r"$x$") 448 | else: 449 | plt.xlabel(r"$x_{}$".format(x_idx)) 450 | if y.shape[1] == 1: 451 | plt.ylabel(r"$y$") 452 | else: 453 | plt.ylabel(r"$y_{}$".format(y_idx)) 454 | if save_dir is not None: 455 | make_folder(save_dir) 456 | plt.savefig(save_dir, bbox_inches="tight") 457 | plt.close() 458 | else: 459 | plt.show() 460 | -------------------------------------------------------------------------------- /engression-python/engression/models.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from .data.loader import make_dataloader 4 | 5 | 6 | class StoLayer(nn.Module): 7 | """A stochastic layer. 8 | 9 | Args: 10 | in_dim (int): input dimension 11 | out_dim (int): output dimension 12 | noise_dim (int, optional): noise dimension. Defaults to 100. 13 | add_bn (bool, optional): whether to add BN layer. Defaults to True. 14 | """ 15 | def __init__(self, in_dim, out_dim, noise_dim=100, add_bn=False, out_act=None, noise_std=1, verbose=True): 16 | super().__init__() 17 | self.in_dim = in_dim 18 | self.out_dim = out_dim 19 | self.noise_dim = noise_dim 20 | self.add_bn = add_bn 21 | self.noise_std = noise_std 22 | self.verbose = verbose 23 | 24 | layer = [nn.Linear(in_dim + noise_dim, out_dim)] 25 | if add_bn: 26 | layer += [nn.BatchNorm1d(out_dim)] 27 | self.layer = nn.Sequential(*layer) 28 | if out_act == "softmax" and out_dim == 1: 29 | out_act = "sigmoid" 30 | self.out_act = get_act_func(out_act) 31 | 32 | def forward(self, x): 33 | device = next(self.layer.parameters()).device 34 | if isinstance(x, int): 35 | # For unconditional generation, x is the batch size. 36 | assert self.in_dim == 0 37 | out = torch.randn(x, self.noise_dim, device=device) * self.noise_std 38 | else: 39 | if x.size(1) < self.in_dim and self.verbose: 40 | print("Warning: covariate dimension does not aligned with the specified input dimension; filling in the remaining dimension with noise.") 41 | eps = torch.randn(x.size(0), self.noise_dim + self.in_dim - x.size(1), device=device) * self.noise_std 42 | out = torch.cat([x, eps], dim=1) 43 | out = self.layer(out) 44 | if self.out_act is not None: 45 | out = self.out_act(out) 46 | return out 47 | 48 | 49 | def get_act_func(name): 50 | if name == "relu": 51 | return nn.ReLU(inplace=True) 52 | elif name == "sigmoid": 53 | return nn.Sigmoid() 54 | elif name == "tanh": 55 | return nn.Tanh() 56 | elif name == "softmax": 57 | return nn.Softmax(dim=1) 58 | elif name == "elu": 59 | return nn.ELU(inplace=True) 60 | elif name == "softplus": 61 | return nn.Softplus() 62 | else: 63 | return None 64 | 65 | 66 | class StoResBlock(nn.Module): 67 | """A stochastic residual net block. 68 | 69 | Args: 70 | dim (int, optional): input dimension. Defaults to 100. 71 | hidden_dim (int, optional): hidden dimension (default to dim). Defaults to None. 72 | out_dim (int, optional): output dimension (default to dim). Defaults to None. 73 | noise_dim (int, optional): noise dimension. Defaults to 100. 74 | add_bn (bool, optional): whether to add batch normalization. Defaults to True. 75 | out_act (str, optional): output activation function. Defaults to None. 76 | """ 77 | def __init__(self, dim=100, hidden_dim=None, out_dim=None, noise_dim=100, add_bn=False, out_act=None, noise_std=1): 78 | super().__init__() 79 | self.noise_dim = noise_dim 80 | self.noise_std = noise_std 81 | if hidden_dim is None: 82 | hidden_dim = dim 83 | if out_dim is None: 84 | out_dim = dim 85 | self.layer1 = [nn.Linear(dim + noise_dim, hidden_dim)] 86 | self.add_bn = add_bn 87 | if add_bn: 88 | self.layer1.append(nn.BatchNorm1d(hidden_dim)) 89 | self.layer1.append(nn.ReLU()) 90 | self.layer1 = nn.Sequential(*self.layer1) 91 | self.layer2 = nn.Linear(hidden_dim + noise_dim, out_dim) 92 | if add_bn and out_act == "relu": # for intermediate blocks 93 | self.layer2 = nn.Sequential(*[self.layer2, nn.BatchNorm1d(out_dim)]) 94 | if out_dim != dim: 95 | self.layer3 = nn.Linear(dim, out_dim) 96 | self.dim = dim 97 | self.out_dim = out_dim 98 | self.noise_dim = noise_dim 99 | if out_act == "softmax" and out_dim == 1: 100 | out_act = "sigmoid" 101 | self.out_act = get_act_func(out_act) 102 | 103 | def forward(self, x): 104 | if self.noise_dim > 0: 105 | eps = torch.randn(x.size(0), self.noise_dim, device=x.device) * self.noise_std 106 | out = self.layer1(torch.cat([x, eps], dim=1)) 107 | eps = torch.randn(x.size(0), self.noise_dim, device=x.device) * self.noise_std 108 | out = self.layer2(torch.cat([out, eps], dim=1)) 109 | else: 110 | out = self.layer2(self.layer1(x)) 111 | if self.out_dim != self.dim: 112 | out2 = self.layer3(x) 113 | out = out + out2 114 | else: 115 | out += x 116 | if self.out_act is not None: 117 | out = self.out_act(out) 118 | return out 119 | 120 | 121 | class FiLMBlock(nn.Module): 122 | def __init__(self, in_dim, out_dim, condition_dim, 123 | hidden_dim=512, noise_dim=0, add_bn=False, resblock=False, 124 | out_act=None, film_pos='out', film_level=1): 125 | super().__init__() 126 | self.film_pos = film_pos 127 | self.film_level = film_level 128 | film_out_dim = out_dim if film_pos == 'out' else in_dim 129 | if film_level > 1: 130 | self.condition_layer = nn.Linear(condition_dim, film_out_dim * 2) 131 | elif film_level == 1: 132 | self.condition_layer = nn.Linear(condition_dim, film_out_dim) 133 | if resblock: 134 | self.net = StoLayer(in_dim, out_dim, noise_dim, add_bn, out_act) 135 | else: 136 | self.net = StoResBlock(in_dim, hidden_dim, out_dim, noise_dim, add_bn, out_act) 137 | 138 | def forward(self, x, condition): 139 | out = self.net(x) if self.film_pos == 'out' else x 140 | if self.film_level > 1: 141 | gamma, beta = self.condition_layer(condition).chunk(2, dim=1) 142 | out = gamma * out + beta 143 | elif self.film_level == 1: 144 | beta = self.condition_layer(condition) 145 | out = out + beta 146 | if self.film_pos == 'in': 147 | out = self.net(out) 148 | return out 149 | 150 | 151 | # class FiLMBlockIn(nn.Module): 152 | # def __init__(self, in_dim, out_dim, condition_dim, 153 | # hidden_dim=512, noise_dim=0, add_bn=False, resblock=False, 154 | # out_act=None, film_level=1): 155 | # super().__init__() 156 | # self.condition_layer = nn.Linear(condition_dim, in_dim * 2) 157 | # if resblock: 158 | # self.net = StoLayer(in_dim, out_dim, noise_dim, add_bn, out_act) 159 | # else: 160 | # self.net = StoResBlock(in_dim, hidden_dim, out_dim, noise_dim, add_bn, out_act) 161 | 162 | # def forward(self, x, condition): 163 | # gamma, beta = self.condition_layer(condition).chunk(2, dim=1) 164 | # out = self.net(gamma * x + beta) 165 | # return out 166 | 167 | 168 | class StoNetBase(nn.Module): 169 | def __init__(self, forward_sampling=True): 170 | super().__init__() 171 | self.sampling_func = self.forward if forward_sampling else self.sampling_func 172 | 173 | @torch.no_grad() 174 | def predict(self, x, target=["mean"], sample_size=100): 175 | """Point prediction. 176 | 177 | Args: 178 | x (torch.Tensor): input data 179 | target (str or float or list, optional): quantities to predict. float refers to the quantiles. Defaults to ["mean"]. 180 | sample_size (int, optional): sample sizes for each x. Defaults to 100. 181 | 182 | Returns: 183 | torch.Tensor or list of torch.Tensor: point predictions 184 | - [:,:,i] gives the i-th sample of all x. 185 | - [i,:,:] gives all samples of x_i. 186 | 187 | Here we do not call `sample` but directly call `forward`. 188 | """ 189 | samples = self.sample(x=x, sample_size=sample_size, expand_dim=True) 190 | if not isinstance(target, list): 191 | target = [target] 192 | results = [] 193 | extremes = [] 194 | for t in target: 195 | if t == "mean": 196 | results.append(samples.mean(dim=len(samples.shape) - 1)) 197 | else: 198 | if t == "median": 199 | t = 0.5 200 | assert isinstance(t, float) 201 | results.append(samples.quantile(t, dim=len(samples.shape) - 1)) 202 | if min(t, 1 - t) * sample_size < 10: 203 | extremes.append(t) 204 | 205 | if len(extremes) > 0: 206 | print("Warning: the estimate for quantiles at {} with a sample size of {} could be inaccurate. Please increase the `sample_size`.".format(extremes, sample_size)) 207 | 208 | if len(results) == 1: 209 | return results[0] 210 | else: 211 | return results 212 | 213 | def sample_onebatch(self, x, sample_size=100, expand_dim=True, require_grad=False): 214 | """Sampling new response data (for one batch of data). 215 | 216 | Args: 217 | x (torch.Tensor): new data of predictors of shape [data_size, covariate_dim] 218 | sample_size (int, optional): new sample size. Defaults to 100. 219 | expand_dim (bool, optional): whether to expand the sample dimension. Defaults to True. 220 | 221 | Returns: 222 | torch.Tensor of shape (data_size, response_dim, sample_size) if expand_dim else (data_size*sample_size, response_dim), where response_dim could have multiple channels. 223 | """ 224 | data_size = x.size(0) ## input data size 225 | if not require_grad: 226 | with torch.no_grad(): 227 | ## repeat the data for sample_size times, get a tensor [data, data, ..., data] 228 | x_rep = x.repeat(sample_size, 1) 229 | ## samples of shape (data_size*sample_size, response_dim) such that samples[data_size*(i-1):data_size*i,:] contains one sample for each data point, for i = 1, ..., sample_size 230 | samples = self.sampling_func(x_rep).detach() 231 | else: 232 | x_rep = x.repeat(sample_size, 1) 233 | samples = self.sampling_func(x_rep) 234 | if not expand_dim:# or sample_size == 1: 235 | return samples 236 | else: 237 | expand_dim = len(samples.shape) 238 | samples = samples.unsqueeze(expand_dim) ## (data_size*sample_size, response_dim, 1) 239 | ## a list of length data_size, each element is a tensor of shape (data_size, response_dim, 1) 240 | samples = list(torch.split(samples, data_size)) 241 | samples = torch.cat(samples, dim=expand_dim) ## (data_size, response_dim, sample_size) 242 | return samples 243 | # without expanding dimensions: 244 | # samples.reshape(-1, *samples.shape[1:-1]) 245 | 246 | def sample_batch(self, x, sample_size=100, expand_dim=True, batch_size=None): 247 | """Sampling with mini-batches; only used when out-of-memory. 248 | 249 | Args: 250 | x (torch.Tensor): new data of predictors of shape [data_size, covariate_dim] 251 | sample_size (int, optional): new sample size. Defaults to 100. 252 | expand_dim (bool, optional): whether to expand the sample dimension. Defaults to True. 253 | batch_size (int, optional): batch size. Defaults to None. 254 | 255 | Returns: 256 | torch.Tensor of shape (data_size, response_dim, sample_size) if expand_dim else (data_size*sample_size, response_dim), where response_dim could have multiple channels. 257 | """ 258 | if batch_size is not None and batch_size < x.shape[0]: 259 | test_loader = make_dataloader(x, batch_size=batch_size, shuffle=False) 260 | samples = [] 261 | for (x_batch,) in test_loader: 262 | samples.append(self.sample_onebatch(x_batch, sample_size, expand_dim)) 263 | samples = torch.cat(samples, dim=0) 264 | else: 265 | samples = self.sample_onebatch(x, sample_size, expand_dim) 266 | return samples 267 | 268 | def sample(self, x, sample_size=100, expand_dim=True, verbose=True): 269 | """Sampling that adaptively adjusts the batch size according to the GPU memory.""" 270 | batch_size = x.shape[0] 271 | while True: 272 | try: 273 | samples = self.sample_batch(x, sample_size, expand_dim, batch_size) 274 | break 275 | except RuntimeError as e: 276 | if "out of memory" in str(e): 277 | batch_size = batch_size // 2 278 | if verbose: 279 | print("Out of memory; reduce the batch size to {}".format(batch_size)) 280 | return samples 281 | 282 | 283 | class StoNet(StoNetBase): 284 | """Stochastic neural network. 285 | 286 | Args: 287 | in_dim (int): input dimension 288 | out_dim (int): output dimension 289 | num_layer (int, optional): number of layers. Defaults to 2. 290 | hidden_dim (int, optional): number of neurons per layer. Defaults to 100. 291 | noise_dim (int, optional): noise dimension. Defaults to 100. 292 | add_bn (bool, optional): whether to add BN layer. Defaults to False. 293 | out_act (str, optional): output activation function. Defaults to None. 294 | resblock (bool, optional): whether to use residual blocks. Defaults to False. 295 | """ 296 | def __init__(self, in_dim, out_dim, num_layer=2, hidden_dim=100, 297 | noise_dim=100, add_bn=False, out_act=None, resblock=False, 298 | noise_all_layer=True, out_bias=True, verbose=True, forward_sampling=True): 299 | super().__init__(forward_sampling=forward_sampling) 300 | self.in_dim = in_dim 301 | self.out_dim = out_dim 302 | self.hidden_dim = hidden_dim 303 | self.noise_dim = noise_dim 304 | self.add_bn = add_bn 305 | self.noise_all_layer = noise_all_layer 306 | self.out_bias = out_bias 307 | if out_act == "softmax" and out_dim == 1: 308 | out_act = "sigmoid" 309 | self.out_act = get_act_func(out_act) 310 | 311 | self.num_blocks = None 312 | if resblock: 313 | if num_layer % 2 != 0: 314 | num_layer += 1 315 | print("The number of layers must be an even number for residual blocks. Changed to {}".format(str(num_layer))) 316 | num_blocks = num_layer // 2 317 | self.num_blocks = num_blocks 318 | self.resblock = resblock 319 | self.num_layer = num_layer 320 | 321 | if self.resblock: 322 | if self.num_blocks == 1: 323 | self.net = StoResBlock(dim=in_dim, hidden_dim=hidden_dim, out_dim=out_dim, 324 | noise_dim=noise_dim, add_bn=add_bn, out_act=out_act) 325 | else: 326 | self.input_layer = StoResBlock(dim=in_dim, hidden_dim=hidden_dim, out_dim=hidden_dim, 327 | noise_dim=noise_dim, add_bn=add_bn, out_act="relu") 328 | if not noise_all_layer: 329 | noise_dim = 0 330 | self.inter_layer = nn.Sequential(*[StoResBlock(dim=hidden_dim, noise_dim=noise_dim, add_bn=add_bn, out_act="relu")]*(self.num_blocks - 2)) 331 | self.out_layer = StoResBlock(dim=hidden_dim, hidden_dim=hidden_dim, out_dim=out_dim, 332 | noise_dim=noise_dim, add_bn=add_bn, out_act=out_act) # output layer with concatinated noise 333 | else: 334 | self.input_layer = StoLayer(in_dim=in_dim, out_dim=hidden_dim, noise_dim=noise_dim, add_bn=add_bn, out_act="relu", verbose=verbose) 335 | if not noise_all_layer: 336 | noise_dim = 0 337 | self.inter_layer = nn.Sequential(*[StoLayer(in_dim=hidden_dim, out_dim=hidden_dim, noise_dim=noise_dim, add_bn=add_bn, out_act="relu")]*(num_layer - 2)) 338 | # self.out_layer = StoLayer(in_dim=hidden_dim, out_dim=out_dim, noise_dim=noise_dim, add_bn=False, out_act=out_act) # output layer with concatinated noise 339 | self.out_layer = nn.Linear(hidden_dim, out_dim, bias=out_bias) 340 | if self.out_act is not None: 341 | self.out_layer = nn.Sequential(*[self.out_layer, self.out_act]) 342 | 343 | def forward(self, x): 344 | if self.num_blocks == 1: 345 | return self.net(x) 346 | else: 347 | return self.out_layer(self.inter_layer(self.input_layer(x))) 348 | 349 | 350 | class CondStoNet(StoNetBase): 351 | """Conditional stochastic neural network. 352 | 353 | Args: 354 | in_dim (int): input dimension 355 | out_dim (int): output dimension 356 | num_layer (int, optional): number of layers. Defaults to 2. 357 | hidden_dim (int, optional): number of neurons per layer. Defaults to 100. 358 | noise_dim (int, optional): noise dimension. Defaults to 100. 359 | add_bn (bool, optional): whether to add BN layer. Defaults to True. 360 | out_act (str, optional): output activation function. Defaults to None. 361 | resblock (bool, optional): whether to use residual blocks. Defaults to False. 362 | condition_dim 363 | """ 364 | def __init__(self, in_dim, out_dim, condition_dim, num_layer=2, hidden_dim=100, 365 | noise_dim=100, add_bn=False, out_act=None, resblock=False, 366 | noise_all_layer=True, film_pos='out', film_level=1): 367 | super().__init__() 368 | self.in_dim = in_dim 369 | self.out_dim = out_dim 370 | self.condition_dim = condition_dim 371 | self.hidden_dim = hidden_dim 372 | self.noise_dim = noise_dim 373 | self.add_bn = add_bn 374 | self.noise_all_layer = noise_all_layer 375 | 376 | self.num_blocks = None 377 | if resblock: 378 | if num_layer % 2 != 0: 379 | num_layer += 1 380 | print("The number of layers must be an even number for residual blocks. Changed to {}".format(str(num_layer))) 381 | self.num_blocks = num_layer // 2 382 | self.resblock = resblock 383 | self.num_layer = num_layer 384 | 385 | if resblock: 386 | num_layer = self.num_blocks 387 | if self.num_blocks == 1: 388 | self.net = nn.ModuleList([FiLMBlock(in_dim=in_dim, out_dim=out_dim, condition_dim=condition_dim, hidden_dim=hidden_dim, noise_dim=noise_dim, add_bn=add_bn, resblock=resblock, out_act=out_act, film_pos=film_pos, film_level=film_level)]) 389 | else: 390 | layers = [FiLMBlock(in_dim=in_dim, out_dim=hidden_dim, condition_dim=condition_dim, hidden_dim=hidden_dim, noise_dim=noise_dim, add_bn=add_bn, resblock=resblock, out_act="relu", film_pos=film_pos, film_level=film_level)] 391 | if not noise_all_layer: 392 | noise_dim = 0 393 | for i in range(num_layer - 2): 394 | layers.append(FiLMBlock(in_dim=hidden_dim, out_dim=hidden_dim, condition_dim=condition_dim, noise_dim=noise_dim, add_bn=add_bn, resblock=resblock, out_act="relu", film_pos=film_pos, film_level=film_level)) 395 | layers.append(FiLMBlock(in_dim=hidden_dim, out_dim=out_dim, condition_dim=condition_dim, hidden_dim=hidden_dim, noise_dim=noise_dim, add_bn=add_bn, resblock=resblock, out_act=out_act, film_pos=film_pos, film_level=film_level)) 396 | self.net = nn.ModuleList(layers) 397 | 398 | def forward(self, x, condition): 399 | out = x 400 | for layer in self.net: 401 | out = layer(out, condition) 402 | return out 403 | 404 | 405 | class Net(nn.Module): 406 | """Deterministic neural network. 407 | 408 | Args: 409 | in_dim (int, optional): input dimension. Defaults to 1. 410 | out_dim (int, optional): output dimension. Defaults to 1. 411 | num_layer (int, optional): number of layers. Defaults to 2. 412 | hidden_dim (int, optional): number of neurons per layer. Defaults to 100. 413 | add_bn (bool, optional): whether to add BN layer. Defaults to False. 414 | sigmoid (bool, optional): whether to add sigmoid or softmax at the end. Defaults to False. 415 | """ 416 | def __init__(self, in_dim=1, out_dim=1, num_layer=2, hidden_dim=100, 417 | add_bn=False, sigmoid=False): 418 | super().__init__() 419 | self.in_dim = in_dim 420 | self.out_dim = out_dim 421 | self.num_layer = num_layer 422 | self.hidden_dim = hidden_dim 423 | self.add_bn = add_bn 424 | self.sigmoid = sigmoid 425 | 426 | net = [nn.Linear(in_dim, hidden_dim)] 427 | if add_bn: 428 | net += [nn.BatchNorm1d(hidden_dim)] 429 | net += [nn.ReLU(inplace=True)] 430 | for _ in range(num_layer - 2): 431 | net += [nn.Linear(hidden_dim, hidden_dim)] 432 | if add_bn: 433 | net += [nn.BatchNorm1d(hidden_dim)] 434 | net += [nn.ReLU(inplace=True)] 435 | net.append(nn.Linear(hidden_dim, out_dim)) 436 | if sigmoid: 437 | out_act = nn.Sigmoid() if out_dim == 1 else nn.Softmax(dim=1) 438 | net.append(out_act) 439 | self.net = nn.Sequential(*net) 440 | 441 | def forward(self, x): 442 | return self.net(x) 443 | 444 | 445 | class ResMLPBlock(nn.Module): 446 | """MLP residual net block. 447 | 448 | Args: 449 | dim (int): dimension of input and output. 450 | """ 451 | def __init__(self, dim): 452 | super().__init__() 453 | self.layer1 = nn.Sequential( 454 | nn.Linear(dim, dim), 455 | nn.BatchNorm1d(dim), 456 | nn.ReLU(inplace=True) 457 | ) 458 | self.layer2 = nn.Sequential( 459 | nn.Linear(dim, dim), 460 | nn.BatchNorm1d(dim), 461 | ) 462 | self.relu = nn.ReLU(inplace=True) 463 | 464 | def forward(self, x): 465 | out = self.layer2(self.layer1(x)) 466 | out += x 467 | return self.relu(out) 468 | 469 | 470 | class ResMLP(nn.Module): 471 | """Residual MLP. 472 | 473 | Args: 474 | in_dim (int, optional): input dimension. Defaults to 1. 475 | out_dim (int, optional): output dimension. Defaults to 1. 476 | num_layer (int, optional): number of layers. Defaults to 2. 477 | hidden_dim (int, optional): number of neurons per layer. Defaults to 100. 478 | """ 479 | def __init__(self, in_dim=1, out_dim=1, num_layer=2, hidden_dim=100, add_bn=False, sigmoid=False): 480 | super().__init__() 481 | out_act = "sigmoid" if sigmoid else None 482 | if num_layer % 2 != 0: 483 | num_layer += 1 484 | print("The number of layers must be an even number for residual blocks. Added one layer.") 485 | num_blocks = num_layer // 2 486 | self.num_blocks = num_blocks 487 | if num_blocks == 1: 488 | self.net = StoResBlock(dim=in_dim, hidden_dim=hidden_dim, out_dim=out_dim, 489 | noise_dim=0, add_bn=add_bn, out_act=out_act) 490 | else: 491 | self.input_layer = StoResBlock(dim=in_dim, hidden_dim=hidden_dim, out_dim=hidden_dim, 492 | noise_dim=0, add_bn=add_bn, out_act="relu") 493 | self.inter_layer = nn.Sequential(*[StoResBlock(dim=hidden_dim, noise_dim=0, add_bn=add_bn, out_act="relu")]*(self.num_blocks - 2)) 494 | self.out_layer = StoResBlock(dim=hidden_dim, hidden_dim=hidden_dim, out_dim=out_dim, 495 | noise_dim=0, add_bn=add_bn, out_act=out_act) 496 | 497 | def forward(self, x): 498 | if self.num_blocks == 1: 499 | return self.net(x) 500 | else: 501 | return self.out_layer(self.inter_layer(self.input_layer(x))) -------------------------------------------------------------------------------- /engression-python/examples/example_air.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import sys\n", 10 | "sys.path.append(\"..\")\n", 11 | "import torch\n", 12 | "import pandas as pd\n", 13 | "import matplotlib.pyplot as plt\n", 14 | "plt.rcParams[\"figure.figsize\"] = [4, 4]\n", 15 | "torch.manual_seed(0)\n", 16 | "\n", 17 | "from engression import engression\n", 18 | "from engression.data.loader import partition_data" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 2, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 3, 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [ 36 | "# load data\n", 37 | "data = pd.read_csv(\"../engression/data/resources/air_quality.csv\")\n", 38 | "x_full = torch.Tensor(data[\"PT08.S2.NMHC.\"]).unsqueeze(1).to(device)\n", 39 | "y_full = torch.Tensor(data[\"PT08.S3.NOx.\"]).unsqueeze(1).to(device)\n", 40 | "# partition training/test\n", 41 | "x_tr, y_tr, x_te, y_te, x_full_normal = partition_data(x_full, y_full, 0.3, \"smaller\")" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 4, 47 | "metadata": {}, 48 | "outputs": [ 49 | { 50 | "name": "stdout", 51 | "output_type": "stream", 52 | "text": [ 53 | "Running on CPU.\n", 54 | "\n", 55 | "Data is standardized for training only; the printed training losses are on the standardized scale. \n", 56 | "However during evaluation, the predictions, evaluation metrics, and plots will be on the original scale.\n", 57 | "\n", 58 | "Batch is larger than half of the sample size. Training based on full-batch gradient descent.\n", 59 | "[Epoch 1 (0%)] energy-loss: 0.6088, E(|Y-Yhat|): 0.9915, E(|Yhat-Yhat'|): 0.7653\n", 60 | "[Epoch 500 (25%)] energy-loss: 0.3914, E(|Y-Yhat|): 0.7620, E(|Yhat-Yhat'|): 0.7412\n", 61 | "[Epoch 1000 (50%)] energy-loss: 0.3842, E(|Y-Yhat|): 0.7734, E(|Yhat-Yhat'|): 0.7784\n", 62 | "[Epoch 1500 (75%)] energy-loss: 0.3809, E(|Y-Yhat|): 0.7390, E(|Yhat-Yhat'|): 0.7161\n", 63 | "[Epoch 2000 (100%)] energy-loss: 0.3861, E(|Y-Yhat|): 0.7765, E(|Yhat-Yhat'|): 0.7806\n", 64 | "\n", 65 | "Training loss on the original (non-standardized) scale:\n", 66 | "\tEnergy-loss: 0.3782, E(|Y-Yhat|): 0.7618, E(|Yhat-Yhat'|): 0.7672\n", 67 | "\n", 68 | "Prediction-loss E(|Y-Yhat|) and variance-loss E(|Yhat-Yhat'|) should ideally be equally large\n", 69 | "-- consider training for more epochs or adjusting hyperparameters if there is a mismatch \n" 70 | ] 71 | } 72 | ], 73 | "source": [ 74 | "# Fit an engression model\n", 75 | "engressor = engression(x_tr, y_tr, resblock=True, num_layer=6, hidden_dim=100, noise_dim=100, \n", 76 | " lr=0.001, num_epochs=2000, print_every_nepoch=500, device=device)" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": 5, 82 | "metadata": {}, 83 | "outputs": [ 84 | { 85 | "name": "stdout", 86 | "output_type": "stream", 87 | "text": [ 88 | "Engression model with\n", 89 | "\t number of layers: 6\n", 90 | "\t hidden dimensions: 100\n", 91 | "\t noise dimensions: 100\n", 92 | "\t residual blocks: True\n", 93 | "\t number of epochs: 2000\n", 94 | "\t batch size: 2707\n", 95 | "\t learning rate: 0.001\n", 96 | "\t standardization: True\n", 97 | "\t training mode: False\n", 98 | "\t device: cpu\n", 99 | "\n", 100 | "Training loss (original scale):\n", 101 | "\t energy-loss: 0.38, \n", 102 | "\tE(|Y-Yhat|): 0.76, \n", 103 | "\tE(|Yhat-Yhat'|): 0.77\n" 104 | ] 105 | } 106 | ], 107 | "source": [ 108 | "# Summarize model information\n", 109 | "engressor.summary()" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": 6, 115 | "metadata": {}, 116 | "outputs": [ 117 | { 118 | "name": "stdout", 119 | "output_type": "stream", 120 | "text": [ 121 | "L2 loss: 0.27850815653800964\n", 122 | "correlation between predicted and true means: 0.6972660422325134\n", 123 | "energy score: 0.27972766757011414\n" 124 | ] 125 | } 126 | ], 127 | "source": [ 128 | "# Evaluation\n", 129 | "print(\"L2 loss:\", engressor.eval_loss(x_te, y_te, loss_type=\"l2\"))\n", 130 | "print(\"correlation between predicted and true means:\", engressor.eval_loss(x_te, y_te, loss_type=\"cor\"))\n", 131 | "print(\"energy score:\", engressor.eval_loss(x_te, y_te, loss_type=\"energy\"))" 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": 7, 137 | "metadata": {}, 138 | "outputs": [], 139 | "source": [ 140 | "# prediction\n", 141 | "y_pred = engressor.predict(x_full_normal)" 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": 8, 147 | "metadata": {}, 148 | "outputs": [ 149 | { 150 | "data": { 151 | "image/png": "", 152 | "text/plain": [ 153 | "
" 154 | ] 155 | }, 156 | "metadata": {}, 157 | "output_type": "display_data" 158 | } 159 | ], 160 | "source": [ 161 | "engressor.plot(x_te, y_te, x_tr, y_tr)" 162 | ] 163 | }, 164 | { 165 | "attachments": {}, 166 | "cell_type": "markdown", 167 | "metadata": {}, 168 | "source": [ 169 | "## Baseline: $L_2$ regression" 170 | ] 171 | }, 172 | { 173 | "cell_type": "code", 174 | "execution_count": 9, 175 | "metadata": {}, 176 | "outputs": [], 177 | "source": [ 178 | "from engression.models import ResMLP" 179 | ] 180 | }, 181 | { 182 | "cell_type": "code", 183 | "execution_count": 10, 184 | "metadata": {}, 185 | "outputs": [ 186 | { 187 | "data": { 188 | "image/png": "", 189 | "text/plain": [ 190 | "
" 191 | ] 192 | }, 193 | "metadata": {}, 194 | "output_type": "display_data" 195 | } 196 | ], 197 | "source": [ 198 | "# Build a model with the same architecture\n", 199 | "model = ResMLP(num_layer=6, hidden_dim=100) # NN with the same architecture\n", 200 | "opt = torch.optim.Adam(model.parameters(), lr=0.001) # same optimizer\n", 201 | "\n", 202 | "# L2 regression training\n", 203 | "model.train()\n", 204 | "for i in range(2000):\n", 205 | " model.zero_grad()\n", 206 | " y_pred = model(x_tr)\n", 207 | " loss = (y_pred - y_tr).pow(2).mean()\n", 208 | " loss.backward()\n", 209 | " opt.step()\n", 210 | "\n", 211 | "# Evaluation\n", 212 | "model.eval()\n", 213 | "y_pred = model(x_full_normal).detach()\n", 214 | "plt.scatter(x_tr.cpu(), y_tr.cpu(), label=\"training data\", s=1, color=\"silver\")\n", 215 | "plt.scatter(x_te.cpu(), y_te.cpu(), label=\"test data\", s=1, color=\"goldenrod\")\n", 216 | "plt.scatter(x_full_normal.cpu(), y_pred.cpu(), label=\"predictions\", s=1, color=\"lightskyblue\")\n", 217 | "plt.legend(); plt.show()" 218 | ] 219 | } 220 | ], 221 | "metadata": { 222 | "kernelspec": { 223 | "display_name": "Python 3", 224 | "language": "python", 225 | "name": "python3" 226 | }, 227 | "language_info": { 228 | "codemirror_mode": { 229 | "name": "ipython", 230 | "version": 3 231 | }, 232 | "file_extension": ".py", 233 | "mimetype": "text/x-python", 234 | "name": "python", 235 | "nbconvert_exporter": "python", 236 | "pygments_lexer": "ipython3", 237 | "version": "3.11.2" 238 | }, 239 | "orig_nbformat": 4 240 | }, 241 | "nbformat": 4, 242 | "nbformat_minor": 2 243 | } 244 | --------------------------------------------------------------------------------