├── vignettes ├── .gitignore └── residuals.Rmd ├── LICENSE ├── data └── .gitignore ├── man ├── omp.check.Rd ├── matrix.deviance.Rd ├── matrix.penalty.Rd ├── pointwise.deviance.Rd ├── make.pos.diag.Rd ├── set.mat.Y.Rd ├── set.mat.X.Rd ├── set.mat.Z.Rd ├── set.mat.offset.Rd ├── norm.procrustes.Rd ├── set.mat.weights.Rd ├── reexports.Rd ├── simulate.Rd ├── set.family.Rd ├── normalize.uv.Rd ├── storedata.Rd ├── procrustes.Rd ├── ols.fit.coef.Rd ├── set.penalty.Rd ├── eigengap.evr.Rd ├── orthogonalize.Rd ├── partition.Rd ├── eigengap.onatski.Rd ├── eigengap.oht.Rd ├── eigengap.act.Rd ├── print.sgdgmf.Rd ├── print.initgmf.Rd ├── simulate.sgdgmf.Rd ├── whitening.matrix.Rd ├── cpp.airwls.glmstep.Rd ├── deviance.sgdgmf.Rd ├── vglm.fit.coef.Rd ├── cpp.airwls.glmfit.Rd ├── fitted.sgdgmf.Rd ├── deviance.initgmf.Rd ├── fitted.initgmf.Rd ├── orthogonalize.uv.Rd ├── coefficients.sgdgmf.Rd ├── cpp.airwls.update.Rd ├── set.control.cv.Rd ├── biplot.sgdgmf.Rd ├── coefficients.initgmf.Rd ├── biplot.initgmf.Rd ├── set.control.alg.Rd ├── sgdGMF-package.Rd ├── storedata.sgdgmf.Rd ├── screeplot.sgdgmf.Rd ├── set.control.init.Rd ├── image.sgdgmf.Rd ├── screeplot.initgmf.Rd ├── image.initgmf.Rd ├── refit.sgdgmf.Rd ├── set.control.newton.Rd ├── plot.sgdgmf.Rd ├── cpp.fit.newton.Rd ├── plot.initgmf.Rd ├── cpp.fit.airwls.Rd ├── set.control.airwls.Rd ├── set.control.block.sgd.Rd ├── set.control.coord.sgd.Rd ├── sgdgmf.cv.step.Rd ├── sim.gmf.data.Rd ├── predict.sgdgmf.Rd ├── cpp.fit.block.sgd.Rd ├── residuals.initgmf.Rd ├── cpp.fit.coord.sgd.Rd ├── cpp.fit.random.block.sgd.Rd ├── residuals.sgdgmf.Rd └── sgdgmf.rank.Rd ├── sgdGMF.Rproj ├── tests ├── testthat.R ├── testcpp │ ├── test-deviance.cpp │ ├── test-minibatch.cpp │ ├── test-misc.cpp │ ├── test-link.cpp │ ├── test-family.cpp │ └── test-utils.cpp ├── testthat │ ├── test-eigengap.R │ ├── test-fit.R │ ├── test-control.R │ ├── test-init.R │ ├── test-vglmfit.R │ └── test-deviance.R └── testrcpp │ ├── test-minibatch.R │ ├── test-newton.R │ ├── test-link.R │ ├── test-misc.R │ └── test-family.R ├── src ├── deviance.h ├── Makevars ├── Makevars.win ├── deviance.cpp ├── minibatch.h ├── variance.cpp ├── variance.h ├── misc.h ├── minibatch.cpp ├── link.cpp ├── family.cpp ├── link.h └── utils.h ├── .Rbuildignore ├── R ├── genericfun.R ├── deviance.R ├── sgdGMF-package.R └── vglmfit.R ├── README.md ├── .github └── workflows │ └── R-CMD-check.yaml ├── NEWS.md ├── .gitignore ├── DESCRIPTION └── NAMESPACE /vignettes/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | *.R 3 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | YEAR: 2025 2 | COPYRIGHT HOLDER: Cristian Castiglione 3 | -------------------------------------------------------------------------------- /data/.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Data folder 3 | splatter 4 | splatter/ 5 | splatter/** 6 | bubble 7 | bubble/ 8 | bubble/** 9 | bubble2 10 | bubble2/ 11 | bubble2/** 12 | 13 | BE1.RData 14 | mixology.RData 15 | PBMC.RData 16 | -------------------------------------------------------------------------------- /man/omp.check.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/RcppExports.R 3 | \name{omp.check} 4 | \alias{omp.check} 5 | \title{Check if OpenMP is enabled} 6 | \usage{ 7 | omp.check() 8 | } 9 | \description{ 10 | Internal function to check if OpenMP is enabled 11 | } 12 | \keyword{internal} 13 | -------------------------------------------------------------------------------- /man/matrix.deviance.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/deviance.R 3 | \name{matrix.deviance} 4 | \alias{matrix.deviance} 5 | \title{Model deviance of a GMF model} 6 | \usage{ 7 | matrix.deviance(mu, y, family = gaussian()) 8 | } 9 | \description{ 10 | Compute the overall deviance averaging the contributions of all data 11 | } 12 | \keyword{internal} 13 | -------------------------------------------------------------------------------- /man/matrix.penalty.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/deviance.R 3 | \name{matrix.penalty} 4 | \alias{matrix.penalty} 5 | \title{Frobenius penalty for the parameters of a GMF model} 6 | \usage{ 7 | matrix.penalty(U, penalty) 8 | } 9 | \description{ 10 | Compute the Frobenius penalty for all the parameters in the model 11 | } 12 | \keyword{internal} 13 | -------------------------------------------------------------------------------- /man/pointwise.deviance.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/deviance.R 3 | \name{pointwise.deviance} 4 | \alias{pointwise.deviance} 5 | \title{Pointwise deviance of a GMF model} 6 | \usage{ 7 | pointwise.deviance(mu, y, family = gaussian()) 8 | } 9 | \description{ 10 | Compute the pointwise deviance for all the observations in the sample 11 | } 12 | \keyword{internal} 13 | -------------------------------------------------------------------------------- /man/make.pos.diag.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{make.pos.diag} 4 | \alias{make.pos.diag} 5 | \title{Fix sign ambiguity of eigen-vectors} 6 | \usage{ 7 | make.pos.diag(U) 8 | } 9 | \arguments{ 10 | \item{U}{target matrix} 11 | } 12 | \description{ 13 | Fix sign ambiguity of eigen-vectors by making U positive diagonal 14 | } 15 | \keyword{internal} 16 | -------------------------------------------------------------------------------- /man/set.mat.Y.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/control.R 3 | \name{set.mat.Y} 4 | \alias{set.mat.Y} 5 | \title{Check and set the response matrix Y} 6 | \usage{ 7 | set.mat.Y(Y) 8 | } 9 | \description{ 10 | Check if the input response matrix is well-defined and return the same 11 | matrix without attributes such as row and column names. 12 | } 13 | \keyword{internal} 14 | -------------------------------------------------------------------------------- /man/set.mat.X.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/control.R 3 | \name{set.mat.X} 4 | \alias{set.mat.X} 5 | \title{Check and set the covariate matrix X} 6 | \usage{ 7 | set.mat.X(X, n, m) 8 | } 9 | \description{ 10 | Check if the input covariate matrix X is well-defined and return the same 11 | matrix without attributes such as row and column names. 12 | } 13 | \keyword{internal} 14 | -------------------------------------------------------------------------------- /man/set.mat.Z.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/control.R 3 | \name{set.mat.Z} 4 | \alias{set.mat.Z} 5 | \title{Check and set the covariate matrix X} 6 | \usage{ 7 | set.mat.Z(Z, n, m) 8 | } 9 | \description{ 10 | Check if the input covariate matrix X is well-defined and return the same 11 | matrix without attributes such as row and column names. 12 | } 13 | \keyword{internal} 14 | -------------------------------------------------------------------------------- /man/set.mat.offset.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/control.R 3 | \name{set.mat.offset} 4 | \alias{set.mat.offset} 5 | \title{Check and set the offset matrix} 6 | \usage{ 7 | set.mat.offset(O, n, m) 8 | } 9 | \description{ 10 | Check if the input offset matrix is well-defined and return the same 11 | matrix without attributes such as row and column names. 12 | } 13 | \keyword{internal} 14 | -------------------------------------------------------------------------------- /man/norm.procrustes.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{norm.procrustes} 4 | \alias{norm.procrustes} 5 | \title{Procrustes distance} 6 | \usage{ 7 | norm.procrustes(A, B) 8 | } 9 | \arguments{ 10 | \item{A}{target matrix} 11 | 12 | \item{B}{matrix to be rotated} 13 | } 14 | \description{ 15 | Compute the Procrustes distance between two matrices 16 | } 17 | \keyword{internal} 18 | -------------------------------------------------------------------------------- /man/set.mat.weights.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/control.R 3 | \name{set.mat.weights} 4 | \alias{set.mat.weights} 5 | \title{Check and set the weighting matrix} 6 | \usage{ 7 | set.mat.weights(W, n, m) 8 | } 9 | \description{ 10 | Check if the input weighting matrix is well-defined and return the same 11 | matrix without attributes such as row and column names. 12 | } 13 | \keyword{internal} 14 | -------------------------------------------------------------------------------- /sgdGMF.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | 18 | BuildType: Package 19 | PackageUseDevtools: Yes 20 | PackageInstallArgs: --no-multiarch --with-keep.source 21 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | # This file is part of the standard setup for testthat. 2 | # It is recommended that you do not modify it. 3 | # 4 | # Where should you do additional test configuration? 5 | # Learn more about the roles of various files in: 6 | # * https://r-pkgs.org/testing-design.html#sec-tests-files-overview 7 | # * https://testthat.r-lib.org/articles/special-files.html 8 | 9 | library(testthat) 10 | library(sgdGMF) 11 | 12 | test_check("sgdGMF") 13 | -------------------------------------------------------------------------------- /man/reexports.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/genericfun.R 3 | \docType{import} 4 | \name{reexports} 5 | \alias{reexports} 6 | \alias{refit} 7 | \title{Objects exported from other packages} 8 | \keyword{internal} 9 | \description{ 10 | These objects are imported from other packages. Follow the links 11 | below to see their documentation. 12 | 13 | \describe{ 14 | \item{generics}{\code{\link[generics]{refit}}} 15 | }} 16 | 17 | -------------------------------------------------------------------------------- /man/simulate.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/genericfun.R 3 | \name{simulate} 4 | \alias{simulate} 5 | \title{Simulate new data} 6 | \usage{ 7 | simulate(object, ...) 8 | } 9 | \arguments{ 10 | \item{object}{an object from which simulate new data} 11 | 12 | \item{...}{additional arguments passed to or from other methods} 13 | } 14 | \value{ 15 | An array containing the simulated data. 16 | } 17 | \description{ 18 | Generic function to simulate new data from a statistical model 19 | } 20 | -------------------------------------------------------------------------------- /man/set.family.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/control.R 3 | \name{set.family} 4 | \alias{set.family} 5 | \title{Check and set the model family} 6 | \usage{ 7 | set.family(family) 8 | } 9 | \arguments{ 10 | \item{family}{a \code{glm} family (see \code{\link{family}} for more details)} 11 | } 12 | \description{ 13 | Check if the model family is allowed and return it eventually with a 14 | different family name for compatibility with the \code{C++} implementation 15 | } 16 | \keyword{internal} 17 | -------------------------------------------------------------------------------- /man/normalize.uv.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{normalize.uv} 4 | \alias{normalize.uv} 5 | \title{Normalize the matrices U and V} 6 | \usage{ 7 | normalize.uv(U, V, method = c("qr", "svd")) 8 | } 9 | \description{ 10 | Rotate U and V using either QR or SVD decompositions. 11 | The QR methods rotate U and V in such a way to obtain an orthogonal U 12 | and a lower triangular V. The SVD method rotate U and V in such a way 13 | to obtain an orthogonal U and a scaled orthogonal V. 14 | } 15 | \keyword{internal} 16 | -------------------------------------------------------------------------------- /man/storedata.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/genericfun.R 3 | \name{storedata} 4 | \alias{storedata} 5 | \title{Store data into an object} 6 | \usage{ 7 | storedata(object, ...) 8 | } 9 | \arguments{ 10 | \item{object}{an object from which simulate new data} 11 | 12 | \item{...}{additional arguments passed to or from other methods} 13 | } 14 | \value{ 15 | An object of the same class as the input containing new data 16 | } 17 | \description{ 18 | Generic function to store data into an object, typically a statistical model 19 | } 20 | -------------------------------------------------------------------------------- /man/procrustes.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{procrustes} 4 | \alias{procrustes} 5 | \title{Procrustes rotation of two configurations} 6 | \usage{ 7 | procrustes(X, Y, scale = TRUE, symmetric = FALSE) 8 | } 9 | \arguments{ 10 | \item{X}{target matrix} 11 | 12 | \item{Y}{matrix to be rotated} 13 | 14 | \item{scale}{allow scaling of axes of Y} 15 | 16 | \item{symmetric}{if \code{TRUE}, use symmetric Procrustes statistic} 17 | } 18 | \description{ 19 | Rotates a configuration to maximum similarity with another configuration 20 | } 21 | \keyword{internal} 22 | -------------------------------------------------------------------------------- /man/ols.fit.coef.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/vglmfit.R 3 | \name{ols.fit.coef} 4 | \alias{ols.fit.coef} 5 | \title{Estimate the coefficients of a multivariate linear model} 6 | \usage{ 7 | ols.fit.coef(Y, X, offset = NULL) 8 | } 9 | \arguments{ 10 | \item{Y}{\eqn{n \times m} matrix of response variables} 11 | 12 | \item{X}{\eqn{n \times p} matrix of covariates} 13 | 14 | \item{offset}{\eqn{n \times m} matrix of offset values} 15 | } 16 | \description{ 17 | Estimate the coefficients of a multivariate linear model via ordinary least squares. 18 | } 19 | \keyword{internal} 20 | -------------------------------------------------------------------------------- /man/set.penalty.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/control.R 3 | \name{set.penalty} 4 | \alias{set.penalty} 5 | \title{Check and set the penalty parameters} 6 | \usage{ 7 | set.penalty(B = 0, A = 0, U = 1, V = 0) 8 | } 9 | \arguments{ 10 | \item{B}{penalty parameter of \code{B}} 11 | 12 | \item{A}{penalty parameter of \code{A}} 13 | 14 | \item{U}{penalty parameter of \code{U}} 15 | 16 | \item{V}{penalty parameter of \code{V}} 17 | } 18 | \description{ 19 | Check if the input penalty parameters are allowed and set them to default 20 | values if they are not. Returns a list of well-defined penalty parameters. 21 | } 22 | \keyword{internal} 23 | -------------------------------------------------------------------------------- /tests/testcpp/test-deviance.cpp: -------------------------------------------------------------------------------- 1 | // test-deviance.h 2 | // author: Cristian Castiglione 3 | // creation: 02/10/2023 4 | // last change: 02/10/2023 5 | 6 | #include "deviance.h" 7 | #include "misc.h" 8 | #include 9 | 10 | using namespace glm; 11 | 12 | //' @keywords internal 13 | // [[Rcpp::export("cpp.deviance")]] 14 | arma::mat cpp_deviance (const arma::mat & y, const arma::mat & mu, const std::string & familyname) { 15 | std::unique_ptr family = make_family(familyname, std::string("identity")); 16 | return deviance(y, mu, family); 17 | } 18 | 19 | //' @keywords internal 20 | // [[Rcpp::export("cpp.penalty")]] 21 | double cpp_penalty (const arma::mat & u, const arma::vec & p) { 22 | return penalty(u, p); 23 | } 24 | -------------------------------------------------------------------------------- /man/eigengap.evr.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/eigengap.R 3 | \name{eigengap.evr} 4 | \alias{eigengap.evr} 5 | \title{Rank selection via eigenvalue ratio maximization} 6 | \usage{ 7 | eigengap.evr(covmat, maxcomp = 50, thr = 0.95) 8 | } 9 | \arguments{ 10 | \item{covmat}{matrix to be decomposed} 11 | 12 | \item{maxcomp}{maximum number of eigenvalues to compute} 13 | } 14 | \description{ 15 | Select the number of significant principal components of a matrix via the 16 | eigenvalue ratio (EVR) maximization method 17 | } 18 | \references{ 19 | Ahn, S.C., Horenstein, A.R. (2013). 20 | \emph{Eigenvalue ratio test for the number of factors.} 21 | Econometrica, 81, 1203-1227 22 | } 23 | \keyword{internal} 24 | -------------------------------------------------------------------------------- /src/deviance.h: -------------------------------------------------------------------------------- 1 | // deviance.h 2 | // author: Cristian Castiglione 3 | // creation: 28/09/2023 4 | // last change: 28/09/2023 5 | 6 | #ifndef DEVIANCE_H 7 | #define DEVIANCE_H 8 | 9 | #include 10 | #include 11 | #include "family.h" 12 | 13 | using namespace glm; 14 | 15 | // Pointwise deviance 16 | void deviance ( 17 | arma::mat & dev, const arma::mat & y, const arma::mat & mu, 18 | const std::unique_ptr & family); 19 | arma::mat deviance ( 20 | const arma::mat & y, const arma::mat & mu, 21 | const std::unique_ptr & family); 22 | 23 | // Penalty function 24 | void penalty (double & pen, const arma::mat & u, const arma::vec & p); 25 | double penalty (const arma::mat & u, const arma::vec & p); 26 | 27 | #endif -------------------------------------------------------------------------------- /man/orthogonalize.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{orthogonalize} 4 | \alias{orthogonalize} 5 | \title{Orthogonalize the matrices U and V with respect to X and Z} 6 | \usage{ 7 | orthogonalize( 8 | X, 9 | Z, 10 | B, 11 | A, 12 | U, 13 | V, 14 | method = c("QR", "SVD", "ZCA", "ZCA-cor", "PCA", "PCA-cor", "Cholesky") 15 | ) 16 | } 17 | \description{ 18 | Orthogonalize \code{[A, U]} and \code{V} with respect to \code{X} and \code{Z}, 19 | respectively, sequentially applying multivariate least squares and residual 20 | whitening on U. The result must satisfy the following contraints: 21 | \eqn{X^\top A = 0}, \eqn{X^\top U = 0}, \eqn{Z^\top V = 0}, \eqn{U^\top U = 0}. 22 | } 23 | \keyword{internal} 24 | -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | #---------------------------- 2 | # VS-Code related 3 | #---------------------------- 4 | ^\.vscode$ 5 | 6 | #---------------------------- 7 | # R related 8 | #---------------------------- 9 | ^.*\.Rproj$ 10 | ^\.Rproj\.user$ 11 | ^\.Rhistory$ 12 | ^\.RData$ 13 | ^\.Rproj$ 14 | 15 | #---------------------------- 16 | # C++ related 17 | #---------------------------- 18 | ^tests/testcpp/ 19 | ^tests/testrcpp/ 20 | 21 | #---------------------------- 22 | # GitHub related 23 | #---------------------------- 24 | ^\.git$ 25 | ^\.github$ 26 | ^\.gitignore$ 27 | ^\.gitattributes$ 28 | ^\.svn$ 29 | 30 | #---------------------------- 31 | # Other 32 | #---------------------------- 33 | ^data/ 34 | ^examples/ 35 | ^img/ 36 | ^old/ 37 | ^sandbox/ 38 | ^sim/ 39 | ^zip/ 40 | ^cran/ 41 | -------------------------------------------------------------------------------- /man/partition.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{partition} 4 | \alias{partition} 5 | \title{Split the data matrix in train and test sets} 6 | \usage{ 7 | partition(y, p = 0.3) 8 | } 9 | \arguments{ 10 | \item{y}{input matrix to be split into train and test sets} 11 | 12 | \item{p}{fraction of observations to be used for the test set} 13 | } 14 | \description{ 15 | Returns a list of two matrices \code{train} and \code{test}. 16 | \code{train} corresponds to the input matrix with a fixed persentage of 17 | entries masked by NA values. \code{test} is the complement of \code{train} 18 | and contains the values of the input matrix in the cells where \code{train} 19 | is NA, while all the other entries are filled by NA values. 20 | } 21 | \keyword{internal} 22 | -------------------------------------------------------------------------------- /man/eigengap.onatski.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/eigengap.R 3 | \name{eigengap.onatski} 4 | \alias{eigengap.onatski} 5 | \title{Rank selection via the Onatski method} 6 | \usage{ 7 | eigengap.onatski(covmat, maxcomp = 50, maxiter = 100) 8 | } 9 | \arguments{ 10 | \item{covmat}{matrix to be decomposed} 11 | 12 | \item{maxcomp}{maximum number of eigenvalues to compute} 13 | 14 | \item{maxiter}{maximum number of iterations} 15 | } 16 | \description{ 17 | Select the number of significant principal components of a matrix via the 18 | Onatski method 19 | } 20 | \references{ 21 | Onatski, A. (2010). 22 | \emph{Determining the number of factors from empirical distribution of eigenvalues.} 23 | Review of Economics and Statistics, 92(4): 1004-1016 24 | } 25 | \keyword{internal} 26 | -------------------------------------------------------------------------------- /man/eigengap.oht.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/eigengap.R 3 | \name{eigengap.oht} 4 | \alias{eigengap.oht} 5 | \title{Rank selection via optimal hard thresholding} 6 | \usage{ 7 | eigengap.oht(covmat, nobs, maxcomp = NULL) 8 | } 9 | \arguments{ 10 | \item{covmat}{matrix to be decomposed} 11 | 12 | \item{nobs}{number of observations used to compute the covariance matrix} 13 | 14 | \item{maxcomp}{maximum number of eigenvalues to compute} 15 | } 16 | \description{ 17 | Select the number of significant principal components of a matrix via optimal 18 | hard thresholding (OHT) 19 | } 20 | \references{ 21 | Gavish, M., Donoho, D.L. (2014) 22 | \emph{The optimal hard thresholding for singular values is 4/sqrt(3).} 23 | IEEE Transactions on Information Theory, 60(8): 5040--5053 24 | } 25 | \keyword{internal} 26 | -------------------------------------------------------------------------------- /man/eigengap.act.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/eigengap.R 3 | \name{eigengap.act} 4 | \alias{eigengap.act} 5 | \title{Rank selection via adjust correlation thresholding} 6 | \usage{ 7 | eigengap.act(covmat, nobs, maxcomp = NULL) 8 | } 9 | \arguments{ 10 | \item{covmat}{matrix to be decomposed} 11 | 12 | \item{nobs}{number of observations used to compute the covariance matrix} 13 | 14 | \item{maxcomp}{maximum number of eigenvalues to compute} 15 | } 16 | \description{ 17 | Select the number of significant principal components of a matrix via adjust 18 | correlation threshold (ACT) 19 | } 20 | \references{ 21 | Fan, J., Guo, j. and Zheng, S. (2020). 22 | \emph{Estimating number of factors by adjusted eigenvalues thresholding.} 23 | Journal of the American Statistical Association, 117(538): 852--861 24 | } 25 | \keyword{internal} 26 | -------------------------------------------------------------------------------- /man/print.sgdgmf.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sgdGMF-class.R 3 | \name{print.sgdgmf} 4 | \alias{print.sgdgmf} 5 | \title{Print the fundamental characteristics of a GMF} 6 | \usage{ 7 | \method{print}{sgdgmf}(x, ...) 8 | } 9 | \arguments{ 10 | \item{x}{an object of class \code{sgdgmf}} 11 | 12 | \item{...}{further arguments passed to or from other methods} 13 | } 14 | \value{ 15 | No return value, called only for printing. 16 | } 17 | \description{ 18 | Print some summary information of a GMF model. 19 | } 20 | \examples{ 21 | # Load the sgdGMF package 22 | library(sgdGMF) 23 | 24 | # Generate data from a Poisson model 25 | data = sim.gmf.data(n = 100, m = 20, ncomp = 5, family = poisson()) 26 | 27 | # Fit a GMF model with 3 latent factors 28 | gmf = sgdgmf.fit(data$Y, ncomp = 3, family = poisson()) 29 | 30 | # Print the GMF object 31 | print(gmf) 32 | 33 | } 34 | -------------------------------------------------------------------------------- /R/genericfun.R: -------------------------------------------------------------------------------- 1 | 2 | #' @export 3 | generics::refit 4 | 5 | #' @title Simulate new data 6 | #' 7 | #' @description 8 | #' Generic function to simulate new data from a statistical model 9 | #' 10 | #' @param object an object from which simulate new data 11 | #' @param ... additional arguments passed to or from other methods 12 | #' 13 | #' @return An array containing the simulated data. 14 | #' 15 | #' @export 16 | simulate = function (object, ...) UseMethod("simulate") 17 | 18 | 19 | #' @title Store data into an object 20 | #' 21 | #' @description 22 | #' Generic function to store data into an object, typically a statistical model 23 | #' 24 | #' @param object an object from which simulate new data 25 | #' @param ... additional arguments passed to or from other methods 26 | #' 27 | #' @return An object of the same class as the input containing new data 28 | #' 29 | #' @export 30 | storedata = function (object, ...) UseMethod("storedata") 31 | -------------------------------------------------------------------------------- /man/print.initgmf.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/initGMF-class.R 3 | \name{print.initgmf} 4 | \alias{print.initgmf} 5 | \title{Print the fundamental characteristics of an initialized GMF} 6 | \usage{ 7 | \method{print}{initgmf}(x, ...) 8 | } 9 | \arguments{ 10 | \item{x}{an object of class \code{initgmf}} 11 | 12 | \item{...}{further arguments passed to or from other methods} 13 | } 14 | \value{ 15 | No return value, called only for printing. 16 | } 17 | \description{ 18 | Print some summary information of an initialized GMF model. 19 | } 20 | \examples{ 21 | # Load the sgdGMF package 22 | library(sgdGMF) 23 | 24 | # Generate data from a Poisson model 25 | data = sim.gmf.data(n = 100, m = 20, ncomp = 5, family = poisson()) 26 | 27 | # Fit a GMF model with 3 latent factors 28 | init = sgdgmf.init(data$Y, ncomp = 3, family = poisson()) 29 | 30 | # Print the GMF object 31 | print(init) 32 | 33 | } 34 | -------------------------------------------------------------------------------- /src/Makevars: -------------------------------------------------------------------------------- 1 | 2 | ## With R 3.1.0 or later, you can uncomment the following line to tell R to 3 | ## enable compilation with C++11 (where available) 4 | ## 5 | ## Also, OpenMP support in Armadillo prefers C++11 support. However, for wider 6 | ## availability of the package we do not yet enforce this here. It is however 7 | ## recommended for client packages to set it. 8 | ## 9 | ## And with R 3.4.0, and RcppArmadillo 0.7.960.*, we turn C++11 on as OpenMP 10 | ## support within Armadillo prefers / requires it 11 | ## 12 | ## R 4.0.0 made C++11 the default, R 4.1.0 switched to C++14, R 4.3.0 to C++17 13 | ## _In general_ we should no longer need to set a standard as any recent R 14 | ## installation will do the right thing. Should you need it, uncomment it and 15 | ## set the appropriate value, possibly CXX17. 16 | #CXX_STD = CXX11 17 | 18 | CXXFLAGS = $(CXXFLAGS) -Os # -Wall 19 | PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS) 20 | PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) 21 | -------------------------------------------------------------------------------- /man/simulate.sgdgmf.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sgdGMF-class.R 3 | \name{simulate.sgdgmf} 4 | \alias{simulate.sgdgmf} 5 | \title{Simulate method for GMF models} 6 | \usage{ 7 | \method{simulate}{sgdgmf}(object, ..., nsim = 1) 8 | } 9 | \arguments{ 10 | \item{object}{an object of class \code{sgdgmf}} 11 | 12 | \item{...}{further arguments passed to or from other methods} 13 | 14 | \item{nsim}{number of samples} 15 | } 16 | \value{ 17 | An 3-fold array containing the simulated data. 18 | } 19 | \description{ 20 | Simulate new data from a fitted generalized matrix factorization models 21 | } 22 | \examples{ 23 | # Load the sgdGMF package 24 | library(sgdGMF) 25 | 26 | # Generate data from a Poisson model 27 | data = sim.gmf.data(n = 100, m = 20, ncomp = 5, family = poisson()) 28 | 29 | # Fit a GMF model 30 | gmf = sgdgmf.fit(data$Y, ncomp = 3, family = poisson()) 31 | 32 | # Simulate new data from a GMF model 33 | str(simulate(gmf)) 34 | 35 | } 36 | -------------------------------------------------------------------------------- /src/Makevars.win: -------------------------------------------------------------------------------- 1 | 2 | ## With R 3.1.0 or later, you can uncomment the following line to tell R to 3 | ## enable compilation with C++11 (where available) 4 | ## 5 | ## Also, OpenMP support in Armadillo prefers C++11 support. However, for wider 6 | ## availability of the package we do not yet enforce this here. It is however 7 | ## recommended for client packages to set it. 8 | ## 9 | ## And with R 3.4.0, and RcppArmadillo 0.7.960.*, we turn C++11 on as OpenMP 10 | ## support within Armadillo prefers / requires it 11 | ## 12 | ## R 4.0.0 made C++11 the default, R 4.1.0 switched to C++14, R 4.3.0 to C++17 13 | ## _In general_ we should no longer need to set a standard as any recent R 14 | ## installation will do the right thing. Should you need it, uncomment it and 15 | ## set the appropriate value, possibly CXX17. 16 | #CXX_STD = CXX11 17 | 18 | CXXFLAGS = $(CXXFLAGS) -Os # -Wall 19 | PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS) 20 | PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) 21 | -------------------------------------------------------------------------------- /man/whitening.matrix.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{whitening.matrix} 4 | \alias{whitening.matrix} 5 | \alias{whitening.zca} 6 | \alias{whitening.zca.cor} 7 | \alias{whitening.pca} 8 | \alias{whitening.pca.cor} 9 | \alias{whitening.chol} 10 | \title{Compute the whitening matrix from a given covariance matrix} 11 | \usage{ 12 | whitening.matrix( 13 | sigma, 14 | method = c("ZCA", "ZCA-cor", "PCA", "PCA-cor", "Cholesky") 15 | ) 16 | 17 | whitening.zca(sigma) 18 | 19 | whitening.zca.cor(sigma) 20 | 21 | whitening.pca(sigma) 22 | 23 | whitening.pca.cor(sigma) 24 | 25 | whitening.chol(sigma) 26 | } 27 | \arguments{ 28 | \item{sigma}{covariance matrix.} 29 | 30 | \item{method}{determines the type of whitening transformation.} 31 | } 32 | \description{ 33 | Compute the whitening matrix from a given covariance matrix 34 | } 35 | \details{ 36 | This function is an internal re-implementation of the function \code{whiteningMatrix} 37 | in the \code{whitening} package. See the original documentation to get more details. 38 | } 39 | \keyword{internal} 40 | -------------------------------------------------------------------------------- /R/deviance.R: -------------------------------------------------------------------------------- 1 | 2 | #' @title Pointwise deviance of a GMF model 3 | #' @description Compute the pointwise deviance for all the observations in the sample 4 | #' @keywords internal 5 | pointwise.deviance = function (mu, y, family = gaussian()) { 6 | if (length(mu) == 1) { 7 | mut = y 8 | mut[] = mu 9 | mu = mut 10 | } 11 | nona = !is.na(y) 12 | dev = y 13 | dev[] = NA 14 | dev[nona] = family$dev.resids(y[nona], mu[nona], 1) 15 | return(dev) 16 | } 17 | 18 | #' @title Model deviance of a GMF model 19 | #' @description Compute the overall deviance averaging the contributions of all data 20 | #' @keywords internal 21 | matrix.deviance = function (mu, y, family = gaussian()) { 22 | dev = pointwise.deviance(mu, y, family) 23 | dev = sum(dev, na.rm = TRUE) 24 | # dev = mean(dev, na.rm = TRUE) 25 | return (dev) 26 | } 27 | 28 | #' @title Frobenius penalty for the parameters of a GMF model 29 | #' @description Compute the Frobenius penalty for all the parameters in the model 30 | #' @keywords internal 31 | matrix.penalty = function (U, penalty) { 32 | pen = sum(sweep(U**2, 2, penalty, "*")) 33 | return (pen) 34 | } 35 | 36 | -------------------------------------------------------------------------------- /man/cpp.airwls.glmstep.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/RcppExports.R 3 | \name{cpp.airwls.glmstep} 4 | \alias{cpp.airwls.glmstep} 5 | \title{Compute one Fisher scoring step for GLMs} 6 | \usage{ 7 | cpp.airwls.glmstep( 8 | beta, 9 | y, 10 | X, 11 | familyname, 12 | linkname, 13 | varfname, 14 | offset, 15 | weights, 16 | penalty 17 | ) 18 | } 19 | \arguments{ 20 | \item{beta}{current value of the regression coefficients to be updated} 21 | 22 | \item{y}{response vector} 23 | 24 | \item{X}{design matrix} 25 | 26 | \item{familyname}{model family name} 27 | 28 | \item{linkname}{link function name} 29 | 30 | \item{varfname}{variance function name} 31 | 32 | \item{offset}{vector of constants to be added to the linear predictor} 33 | 34 | \item{weights}{vector of constants non-negative weights} 35 | 36 | \item{penalty}{penalty parameter of a ridge-type penalty} 37 | } 38 | \description{ 39 | Internal function to compute one Fisher scoring step for GLMs. 40 | It constitutes the building block of the AIRWLS algorithm for the 41 | estimation of GMF models. 42 | } 43 | \keyword{internal} 44 | -------------------------------------------------------------------------------- /src/deviance.cpp: -------------------------------------------------------------------------------- 1 | // deviance.h 2 | // author: Cristian Castiglione 3 | // creation: 28/09/2023 4 | // last change: 28/09/2023 5 | 6 | #include "deviance.h" 7 | 8 | using namespace glm; 9 | 10 | // Pointwise deviance 11 | void deviance ( 12 | arma::mat & dev, const arma::mat & y, const arma::mat & mu, 13 | const std::unique_ptr & family 14 | ) { 15 | bool anyna = !y.is_finite(); 16 | if (anyna) { 17 | arma::uvec notna = arma::find_finite(y); 18 | dev.elem(notna) = family->devresid(y.elem(notna), mu.elem(notna)); 19 | } else { 20 | dev = family->devresid(y, mu); 21 | } 22 | }; 23 | 24 | arma::mat deviance ( 25 | const arma::mat & y, const arma::mat & mu, 26 | const std::unique_ptr & family 27 | ) { 28 | arma::mat dev(arma::size(y)); 29 | deviance(dev, y, mu, family); 30 | return dev; 31 | } 32 | 33 | // Penalty matrix 34 | void penalty (double & pen, const arma::mat & u, const arma::vec & p) { 35 | pen = arma::accu((u % u) * arma::diagmat(p)); 36 | }; 37 | 38 | double penalty (const arma::mat & u, const arma::vec & p) { 39 | double pen; 40 | penalty(pen, u, p); 41 | return pen; 42 | }; -------------------------------------------------------------------------------- /tests/testcpp/test-minibatch.cpp: -------------------------------------------------------------------------------- 1 | // minibatch.cpp 2 | // author: Cristian Castiglione 3 | // creation: 06/10/2023 4 | // last change: 06/10/2023 5 | 6 | #include "minibatch.h" 7 | 8 | //' @keywords internal 9 | // [[Rcpp::export("cpp.get.chunk")]] 10 | arma::uvec cpp_get_chunk ( 11 | const int & iter, const int & n, 12 | const int & size, const bool & randomize 13 | ) { 14 | Chunks chunks; 15 | chunks.set_chunks(n, size, randomize); 16 | return chunks.get_chunk(iter); 17 | } 18 | 19 | //' @keywords internal 20 | // [[Rcpp::export("cpp.get.chunks")]] 21 | std::list cpp_get_chunks ( 22 | const arma::uvec & iters, const int & n, 23 | const int & size, const bool & randomize 24 | ) { 25 | Chunks chunks; 26 | chunks.set_chunks(n, size, randomize); 27 | return chunks.get_chunks(iters); 28 | } 29 | 30 | //' @keywords internal 31 | // [[Rcpp::export("cpp.get.next")]] 32 | Rcpp::List cpp_get_next ( 33 | const int & iter, const int & n, const bool & rnd 34 | ) { 35 | ChunkPile pile(n, rnd); 36 | for (int h = 0; h < iter; h++) { 37 | pile.update(); 38 | } 39 | Rcpp::List output; 40 | output["idx"] = pile.idx; 41 | output["tovisit"] = pile.tovisit; 42 | output["visited"] = pile.visited; 43 | return output; 44 | } -------------------------------------------------------------------------------- /tests/testthat/test-eigengap.R: -------------------------------------------------------------------------------- 1 | # file: test-eigengap.R 2 | # author: Cristian Castiglione 3 | # creation: 23/03/2024 4 | # last change: 04/10/2024 5 | 6 | testthat::test_that("Rank selecion", { 7 | n = 100; m = 20; d = 5 8 | 9 | # Generate data using Poisson, Binomial and Gamma models 10 | data_pois = sim.gmf.data(n = n, m = m, ncomp = d, family = poisson()) 11 | data_bin = sim.gmf.data(n = n, m = m, ncomp = d, family = binomial()) 12 | data_gam = sim.gmf.data(n = n, m = m, ncomp = d, family = Gamma(link = "log"), dispersion = 0.25) 13 | 14 | # Initialize the GMF parameters assuming 3 latent factors 15 | ncomp_pois = sgdgmf.rank(data_pois$Y, family = poisson(), normalize = TRUE) 16 | ncomp_bin = sgdgmf.rank(data_bin$Y, family = binomial(), normalize = TRUE) 17 | ncomp_gam = sgdgmf.rank(data_gam$Y, family = Gamma(link = "log"), normalize = TRUE) 18 | 19 | # Output class 20 | testthat::expect_true(is.numeric(ncomp_pois$ncomp)) 21 | testthat::expect_true(is.numeric(ncomp_bin$ncomp)) 22 | testthat::expect_true(is.numeric(ncomp_gam$ncomp)) 23 | 24 | # Output bounds 25 | testthat::expect_true(ncomp_pois$ncomp > 0 & ncomp_pois$ncomp <= m) 26 | testthat::expect_true(ncomp_bin$ncomp > 0 & ncomp_bin$ncomp <= m) 27 | testthat::expect_true(ncomp_gam$ncomp > 0 & ncomp_gam$ncomp <= m) 28 | }) 29 | -------------------------------------------------------------------------------- /man/deviance.sgdgmf.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sgdGMF-class.R 3 | \name{deviance.sgdgmf} 4 | \alias{deviance.sgdgmf} 5 | \alias{AIC.sgdgmf} 6 | \alias{BIC.sgdgmf} 7 | \title{Compute deviance, AIC and BIC of a GMF model} 8 | \usage{ 9 | \method{deviance}{sgdgmf}(object, ..., normalize = FALSE) 10 | 11 | \method{AIC}{sgdgmf}(object, ..., k = 2) 12 | 13 | \method{BIC}{sgdgmf}(object, ...) 14 | } 15 | \arguments{ 16 | \item{object}{an object of class \code{sgdgmf}} 17 | 18 | \item{...}{further arguments passed to or from other methods} 19 | 20 | \item{normalize}{if \code{TRUE}, normalize the result using the null-deviance} 21 | 22 | \item{k}{the penalty parameter to be used for AIC; the default is \code{k = 2}} 23 | } 24 | \value{ 25 | The value of the deviance extracted from a \code{sgdgmf} object. 26 | } 27 | \description{ 28 | Compute deviance, AIC and BIC of a GMF object 29 | } 30 | \examples{ 31 | # Load the sgdGMF package 32 | library(sgdGMF) 33 | 34 | # Generate data from a Poisson model 35 | data = sim.gmf.data(n = 100, m = 20, ncomp = 5, family = poisson()) 36 | 37 | # Fit a GMF model with 3 latent factors 38 | gmf = sgdgmf.fit(data$Y, ncomp = 3, family = poisson()) 39 | 40 | # Get the GMF deviance, AIC and BIC 41 | deviance(gmf) 42 | AIC(gmf) 43 | BIC(gmf) 44 | 45 | } 46 | -------------------------------------------------------------------------------- /man/vglm.fit.coef.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/vglmfit.R 3 | \name{vglm.fit.coef} 4 | \alias{vglm.fit.coef} 5 | \title{Estimate the coefficients of a vector generalized linear model} 6 | \usage{ 7 | vglm.fit.coef( 8 | Y, 9 | X, 10 | family = gaussian(), 11 | weights = NULL, 12 | offset = NULL, 13 | parallel = FALSE, 14 | nthreads = 1, 15 | clust = NULL 16 | ) 17 | } 18 | \arguments{ 19 | \item{Y}{\eqn{n \times m} matrix of response variables} 20 | 21 | \item{X}{\eqn{n \times p} matrix of covariates} 22 | 23 | \item{family}{a \code{glm} family (see \code{\link{family}} for more details)} 24 | 25 | \item{weights}{\eqn{n \times m} matrix of weighting values} 26 | 27 | \item{offset}{\eqn{n \times m} matrix of offset values} 28 | 29 | \item{parallel}{if \code{TRUE}, allows for parallel computing using the \code{foreach} package} 30 | 31 | \item{nthreads}{number of cores to be used in parallel (only if \code{parallel=TRUE})} 32 | 33 | \item{clust}{registered cluster to be used for distributing the computations (only if \code{parallel=TRUE})} 34 | } 35 | \description{ 36 | Estimate the coefficients of a vector generalized linear model via parallel 37 | iterative re-weighted least squares. Computations can be performed in parallel 38 | to speed up the execution. 39 | } 40 | \keyword{internal} 41 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # sgdGMF 2 | An R package for efficient estimation of generalized matrix factorization (GMF) models [[1,2,3]](#1,#2,#3). 3 | The package implements the adaptive stochastic gradient descent with block- and coordinate-wise sub-sampling strategies proposed in [[4]](#4). 4 | Additionally, sgdGMF implements the alternated iterative re-weighted least squares [[1,3]](#1,#3) and diagonal-Hessian quasi-Newton [[1]](#1) algorithms. 5 | 6 | ## References 7 | [1] 8 | Collins, M., Dasgupta, S., Schapire, R.E. (2001). 9 | A generalization of principal components analysis to the exponential family. 10 | Advances in neural information processing systems, 14. 11 | 12 | [2] 13 | Kidzinski, L., Hui, F.K.C., Warton, D.I., Hastie, T.J. (2022). 14 | Generalized Matrix Factorization: efficient algorithms for fitting generalized linear latent variable models to large data arrays. 15 | Journal of Machine Learning Research, 23(291): 1--29. 16 | 17 | [3] 18 | Wang, L., Carvalho, L. (2023). 19 | Deviance matrix factorization. 20 | Electronic Journal of Statistics, 17(2): 3762--3810. 21 | 22 | [4] 23 | Castiglione, C., Segers, A., Clement, L, Risso, D. (2024). 24 | Stochastic gradient descent estimation of generalized matrix factorization models with application to single-cell RNA sequencing data. 25 | arXiv preprint: arXiv:2412.20509. 26 | 27 | -------------------------------------------------------------------------------- /man/cpp.airwls.glmfit.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/RcppExports.R 3 | \name{cpp.airwls.glmfit} 4 | \alias{cpp.airwls.glmfit} 5 | \title{Fisher scoring algorithm for GLMs} 6 | \usage{ 7 | cpp.airwls.glmfit( 8 | beta, 9 | y, 10 | X, 11 | familyname, 12 | linkname, 13 | varfname, 14 | offset, 15 | weights, 16 | penalty, 17 | nsteps = 100L, 18 | stepsize = 0.1, 19 | print = FALSE 20 | ) 21 | } 22 | \arguments{ 23 | \item{beta}{initial value of the regression coefficients to be estimated} 24 | 25 | \item{y}{response vector} 26 | 27 | \item{X}{design matrix} 28 | 29 | \item{familyname}{model family name} 30 | 31 | \item{linkname}{link function name} 32 | 33 | \item{varfname}{variance function name} 34 | 35 | \item{offset}{vector of constants to be added to the linear predictor} 36 | 37 | \item{weights}{vector of constants non-negative weights} 38 | 39 | \item{penalty}{penalty parameter of a ridge-type penalty} 40 | 41 | \item{nsteps}{number of iterations} 42 | 43 | \item{stepsize}{stepsize parameter of the Fisher scoring algorithm} 44 | 45 | \item{print}{if \code{TRUE}, print the algorithm history} 46 | } 47 | \description{ 48 | Internal function implementing the Fisher scoring algorithms for the 49 | estimation of GLMs. It is used in the AIRWLS algorithm for the 50 | estimation of GMF models. 51 | } 52 | \keyword{internal} 53 | -------------------------------------------------------------------------------- /tests/testrcpp/test-minibatch.R: -------------------------------------------------------------------------------- 1 | # test-minibatch.R 2 | # author: Cristian Castiglione 3 | # creation: 07/10/2023 4 | # last change: 07/10/2023 5 | 6 | ## Workspace setup ---- 7 | rm(list = ls()) 8 | graphics.off() 9 | 10 | # Package compilation and import 11 | devtools::load_all() 12 | 13 | 14 | r_get_next = function (iter, n, rnd) { 15 | idx = -1 16 | tovisit = seq(from = 0, to = n-1, by = 1) 17 | visited = c() 18 | if (iter > 0) { 19 | for (i in 1:iter) { 20 | if (length(tovisit) == 0) { 21 | tovisit = visited 22 | visited = c() 23 | } 24 | if (rnd) { 25 | idx = sample(tovisit, 1, replace = FALSE) 26 | } else { 27 | idx = tovisit[1] 28 | } 29 | j = which(tovisit == idx) 30 | tovisit = tovisit[-j] 31 | visited = c(visited, idx) 32 | } 33 | } 34 | list(idx = idx, tovisit = tovisit, visited = visited) 35 | } 36 | 37 | 38 | ## Test: get_chunk() ---- 39 | sgdGMF::c_get_chunk(3, 10, 3, FALSE) 40 | 41 | ## Test: get_chunks() ---- 42 | sgdGMF::c_get_chunks(0:5, 10, 3, TRUE) 43 | 44 | ## Test: get_next() ---- 45 | print.pile = function (pile) { 46 | cat("idx =", pile$idx, "\n") 47 | cat("tovisit =", drop(pile$tovisit), "\n") 48 | cat("visited =", drop(pile$visited), "\n") 49 | } 50 | 51 | k = 0 52 | print.pile( r_get_next(k, 5, FALSE)) 53 | print.pile(sgdGMF::c_get_next(k, 5, FALSE)) 54 | k = k+1 55 | 56 | ## End of file ---- 57 | -------------------------------------------------------------------------------- /man/fitted.sgdgmf.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sgdGMF-class.R 3 | \name{fitted.sgdgmf} 4 | \alias{fitted.sgdgmf} 5 | \title{Extract the fitted values of a GMF models} 6 | \usage{ 7 | \method{fitted}{sgdgmf}(object, ..., type = c("link", "response", "terms"), partial = FALSE) 8 | } 9 | \arguments{ 10 | \item{object}{an object of class \code{sgdgmf}} 11 | 12 | \item{...}{further arguments passed to or from other methods} 13 | 14 | \item{type}{the type of fitted values which should be returned} 15 | 16 | \item{partial}{if \code{TRUE}, returns the partial fitted values} 17 | } 18 | \value{ 19 | If \code{type="terms"}, a list of fitted values containing the fields \code{XB}, 20 | \code{AZ} and \code{UV}. Otherwise, a matrix of fitted values in the link or 21 | response scale, depending on the selected \code{type}. 22 | } 23 | \description{ 24 | Computes the fitted values of a GMF model. 25 | } 26 | \examples{ 27 | # Load the sgdGMF package 28 | library(sgdGMF) 29 | 30 | # Generate data from a Poisson model 31 | data = sim.gmf.data(n = 100, m = 20, ncomp = 5, family = poisson()) 32 | 33 | # Fit a GMF model with 3 latent factors 34 | gmf = sgdgmf.fit(data$Y, ncomp = 3, family = poisson()) 35 | 36 | # Get the fitted values of a GMF model 37 | str(fitted(gmf)) # returns the overall fitted values in link scale 38 | str(fitted(gmf, type = "response")) # returns the overall fitted values in response scale 39 | 40 | } 41 | -------------------------------------------------------------------------------- /man/deviance.initgmf.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/initGMF-class.R 3 | \name{deviance.initgmf} 4 | \alias{deviance.initgmf} 5 | \alias{AIC.initgmf} 6 | \alias{BIC.initgmf} 7 | \title{Compute deviance, AIC and BIC of an initialized GMF model} 8 | \usage{ 9 | \method{deviance}{initgmf}(object, ..., normalize = FALSE) 10 | 11 | \method{AIC}{initgmf}(object, ..., k = 2) 12 | 13 | \method{BIC}{initgmf}(object, ...) 14 | } 15 | \arguments{ 16 | \item{object}{an object of class \code{initgmf}} 17 | 18 | \item{...}{further arguments passed to or from other methods} 19 | 20 | \item{normalize}{if \code{TRUE}, normalize the result using the null-deviance} 21 | 22 | \item{k}{the penalty parameter to be used for AIC; the default is \code{k = 2}} 23 | } 24 | \value{ 25 | The value of the deviance extracted from a \code{initgmf} object. 26 | } 27 | \description{ 28 | Compute deviance, AIC and BIC of an initialized GMF object 29 | } 30 | \examples{ 31 | # Load the sgdGMF package 32 | library(sgdGMF) 33 | 34 | # Generate data from a Poisson model 35 | data = sim.gmf.data(n = 100, m = 20, ncomp = 5, family = poisson()) 36 | 37 | # Fit a GMF model with 3 latent factors 38 | init = sgdgmf.init(data$Y, ncomp = 3, family = poisson()) 39 | 40 | # Get the GMF deviance, AIC and BIC 41 | deviance(init) 42 | AIC(init) 43 | BIC(init) 44 | 45 | } 46 | \seealso{ 47 | \code{\link{deviance.sgdgmf}}, \code{\link{AIC.sgdgmf}} and \code{\link{AIC.sgdgmf}}. 48 | } 49 | -------------------------------------------------------------------------------- /.github/workflows/R-CMD-check.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | pull_request: 7 | branches: [main, master] 8 | 9 | name: R-CMD-check 10 | 11 | jobs: 12 | R-CMD-check: 13 | runs-on: ${{ matrix.config.os }} 14 | 15 | name: ${{ matrix.config.os }} (${{ matrix.config.r }}) 16 | 17 | strategy: 18 | fail-fast: false 19 | matrix: 20 | config: 21 | - {os: macOS-latest, r: 'release'} 22 | - {os: windows-latest, r: 'release'} 23 | - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} 24 | - {os: ubuntu-latest, r: 'release'} 25 | - {os: ubuntu-latest, r: 'oldrel-1'} 26 | 27 | env: 28 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 29 | R_KEEP_PKG_SOURCE: yes 30 | 31 | steps: 32 | - uses: actions/checkout@v2 33 | 34 | - uses: r-lib/actions/setup-pandoc@v2 35 | 36 | - uses: r-lib/actions/setup-r@v2 37 | with: 38 | r-version: ${{ matrix.config.r }} 39 | http-user-agent: ${{ matrix.config.http-user-agent }} 40 | use-public-rspm: true 41 | 42 | - uses: r-lib/actions/setup-r-dependencies@v2 43 | with: 44 | extra-packages: any::rcmdcheck 45 | needs: check 46 | 47 | - uses: r-lib/actions/check-r-package@v2 48 | with: 49 | upload-snapshots: true 50 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | # sgdGMF 1.0.2 2 | * `sgdgmf.fit` : implemented orthogonality between covariates and latent variables 3 | * `orthogonalize` (new function) : implemented orthogonality between covariates and latent variables 4 | * `sgdgmf.fit` : implemented the possibility to not save a copy of the data and fitted values 5 | * `set.control.airwls` : introduced new argument `savedata` to specify of store a copy of the data or not 6 | * `set.control.newton` : introduced new argument `savedata` to specify of store a copy of the data or not 7 | * `set.control.coord.sgd` : introduced new argument `savedata` to specify if store a copy of the data or not 8 | * `set.control.block.sgd` : introduced new argument `savedata` to specify if store a copy of the data or not 9 | * `storedata` (new function) : implemented ex-post inclusion of data in a generic object 10 | * `storedata.sgdgmf` (new method) : implemented ex-post inclusion of data in a fitted `sgdgmf` object 11 | * `sgdgmf.init` : implemented `method = "light"` and improved the memory usage 12 | * `sgdgmf.init.light` (new function) : implemented a memory efficient version of `sgdgmf.init.ols` with `type = "link"` 13 | 14 | # sgdGMF 1.0.1 15 | 16 | * `sgdgmf.rank` : changed default method from `"onatski"` to `"evr"` method 17 | * `eigengap.evr` (new function) : implemented the eigenvalue ratio method for rank selection 18 | * `eigengap.onatski` : fixed bug occurring when no optimal rank can be selected 19 | * added option `CXXFLAGS = $(CXXFLAGS) -Os` to `Makevars` and `Makevars.win` files to optimize the memory space used by compiled C++ files 20 | -------------------------------------------------------------------------------- /man/fitted.initgmf.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/initGMF-class.R 3 | \name{fitted.initgmf} 4 | \alias{fitted.initgmf} 5 | \title{Extract the fitted values of an initialized GMF model} 6 | \usage{ 7 | \method{fitted}{initgmf}(object, ..., type = c("link", "response", "terms"), partial = FALSE) 8 | } 9 | \arguments{ 10 | \item{object}{an object of class \code{initgmf}} 11 | 12 | \item{...}{further arguments passed to or from other methods} 13 | 14 | \item{type}{the type of fitted values which should be returned} 15 | 16 | \item{partial}{if \code{TRUE}, returns the partial fitted values} 17 | } 18 | \value{ 19 | If \code{type="terms"}, a list of fitted values containing the fields \code{XB}, 20 | \code{AZ} and \code{UV}. Otherwise, a matrix of fitted values in the link or 21 | response scale, depending on the selected \code{type}. 22 | } 23 | \description{ 24 | Computes the fitted values of an initialized GMF model. 25 | } 26 | \examples{ 27 | # Load the sgdGMF package 28 | library(sgdGMF) 29 | 30 | # Generate data from a Poisson model 31 | data = sim.gmf.data(n = 100, m = 20, ncomp = 5, family = poisson()) 32 | 33 | # Fit a GMF model with 3 latent factors 34 | init = sgdgmf.init(data$Y, ncomp = 3, family = poisson()) 35 | 36 | # Get the fitted values of a GMF model 37 | str(fitted(init)) # returns the overall fitted values in link scale 38 | str(fitted(init, type = "response")) # returns the overall fitted values in response scale 39 | str(fitted(init, partial = TRUE)) # returns the partial fitted values in link scale 40 | 41 | } 42 | \seealso{ 43 | \code{\link{fitted.sgdgmf}}. 44 | } 45 | -------------------------------------------------------------------------------- /man/orthogonalize.uv.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{orthogonalize.uv} 4 | \alias{orthogonalize.uv} 5 | \alias{orthogonalize.svd} 6 | \alias{orthogonalize.qr} 7 | \alias{orthogonalize.std} 8 | \title{Normalize the matrices U and V} 9 | \usage{ 10 | orthogonalize.uv( 11 | U, 12 | V, 13 | method = c("QR", "SVD", "ZCA", "ZCA-cor", "PCA", "PCA-cor", "Cholesky") 14 | ) 15 | 16 | orthogonalize.svd(U, V) 17 | 18 | orthogonalize.qr(U, V) 19 | 20 | orthogonalize.std(U, V, method) 21 | } 22 | \description{ 23 | Rotate U and V using either QR or SVD decompositions. 24 | } 25 | \details{ 26 | Orthogonalization is implemented using the following methods: 27 | \itemize{ 28 | \item \code{method = "SVD"}: orthogonal \eqn{U} and scaled orthogonal \eqn{V} based on SVD decomposition; 29 | \item \code{method = "QR"}: orthogonal \eqn{U} and lower triangular \eqn{V} based on QR decomposition; 30 | \item \code{method = "ZCA"}: standardized \eqn{U} and lower triangular \eqn{V} based on ZCA whitening and QR decomposition; 31 | \item \code{method = "ZCA-cor"}: uncorrelated \eqn{U} and lower triangular \eqn{V} based on ZCA whitening and QR decomposition; 32 | \item \code{method = "PCA"}: standardized \eqn{U} and lower triangular \eqn{V} based on PCA whitening and QR decomposition; 33 | \item \code{method = "PCA-cor"}: uncorrelated \eqn{U} and lower triangular \eqn{V} based on PCA whitening and QR decomposition; 34 | \item \code{method = "Cholesky"}: standardized \eqn{U} and lower triangular \eqn{V} based on Cholesky whitening and QR decomposition. 35 | } 36 | } 37 | \keyword{internal} 38 | -------------------------------------------------------------------------------- /man/coefficients.sgdgmf.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sgdGMF-class.R 3 | \name{coefficients.sgdgmf} 4 | \alias{coefficients.sgdgmf} 5 | \alias{coef.sgdgmf} 6 | \title{Extract the coefficient of a GMF model} 7 | \usage{ 8 | \method{coefficients}{sgdgmf}( 9 | object, 10 | ..., 11 | type = c("all", "colreg", "rowreg", "scores", "loadings") 12 | ) 13 | 14 | \method{coef}{sgdgmf}(object, ..., type = c("all", "colreg", "rowreg", "scores", "loadings")) 15 | } 16 | \arguments{ 17 | \item{object}{an object of class \code{sgdgmf}} 18 | 19 | \item{...}{further arguments passed to or from other methods} 20 | 21 | \item{type}{the type of coefficients which should be returned} 22 | } 23 | \value{ 24 | If \code{type="all"}, a list of coefficients containing the fields \code{B}, \code{A}, \code{U} and \code{V}. 25 | Otherwise, a matrix of coefficients, corresponding to the selected \code{type}. 26 | } 27 | \description{ 28 | Return the estimated coefficients of a GMF model, i.e., the row- and column-specific 29 | regression effects, the latent scores and loadings. 30 | } 31 | \examples{ 32 | # Load the sgdGMF package 33 | library(sgdGMF) 34 | 35 | # Generate data from a Poisson model 36 | data = sim.gmf.data(n = 100, m = 20, ncomp = 5, family = poisson()) 37 | 38 | # Fit a GMF model with 3 latent factors 39 | gmf = sgdgmf.fit(data$Y, ncomp = 3, family = poisson()) 40 | 41 | # Get the estimated coefficients of a GMF model 42 | str(coefficients(gmf)) # returns all the coefficients 43 | str(coefficients(gmf, type = "scores")) # returns only the scores, say U 44 | str(coefficients(gmf, type = "loadings")) # returns only the loadings, say V 45 | 46 | } 47 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # History files 2 | .Rhistory 3 | .Rapp.history 4 | 5 | # Session Data files 6 | .RData 7 | .RDataTmp 8 | 9 | # User-specific files 10 | .Ruserdata 11 | 12 | # Example code in package build process 13 | *-Ex.R 14 | 15 | # Output files from R CMD build 16 | /*.tar.gz 17 | 18 | # Output files from R CMD check 19 | /*.Rcheck/ 20 | 21 | # RStudio files 22 | .Rproj.user 23 | .Rproj.user/ 24 | .Rproj.user/** 25 | 26 | # produced vignettes 27 | vignettes/*.html 28 | vignettes/*.pdf 29 | 30 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 31 | .httr-oauth 32 | 33 | # knitr and R markdown default cache directories 34 | *_cache/ 35 | /cache/ 36 | 37 | # Temporary files created by R markdown 38 | *.utf8.md 39 | *.knit.md 40 | 41 | # R Environment Variables 42 | .Renviron 43 | 44 | # pkgdown site 45 | docs/ 46 | 47 | # translation temp files 48 | po/*~ 49 | 50 | # RStudio Connect folder 51 | rsconnect/ 52 | 53 | # R package: bookdown caching files 54 | /*_files/ 55 | 56 | # C++ 57 | *.o 58 | *.so 59 | *.dll 60 | src/*.o 61 | src/*.so 62 | src/*.dll 63 | 64 | # Rcpp 65 | Rcpp.pro 66 | Rcpp.pro.user 67 | *.autosave 68 | 69 | # VS-Code 70 | .vscode 71 | .vscode/ 72 | 73 | # Image folder 74 | img 75 | img/ 76 | img/** 77 | 78 | # Old folder 79 | old 80 | old/ 81 | old/** 82 | 83 | # Simulation folder 84 | sim 85 | sim/ 86 | sim/** 87 | 88 | # Sandbox folder 89 | sandbox 90 | sandbox/ 91 | sandbox/** 92 | 93 | # Zip archive folder 94 | zip 95 | zip/ 96 | zip/** 97 | 98 | # CRAN related folder 99 | cran 100 | cran/ 101 | cran/** 102 | 103 | # Compressed files 104 | *.zip 105 | inst/doc 106 | -------------------------------------------------------------------------------- /man/cpp.airwls.update.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/RcppExports.R 3 | \name{cpp.airwls.update} 4 | \alias{cpp.airwls.update} 5 | \title{AIRWLS update for GMF models} 6 | \usage{ 7 | cpp.airwls.update( 8 | beta, 9 | Y, 10 | X, 11 | familyname, 12 | linkname, 13 | varfname, 14 | idx, 15 | offset, 16 | weights, 17 | penalty, 18 | transp = FALSE, 19 | nsteps = 100L, 20 | stepsize = 0.1, 21 | print = FALSE, 22 | parallel = FALSE, 23 | nthreads = 1L 24 | ) 25 | } 26 | \arguments{ 27 | \item{beta}{initial value of the regression coefficients to be estimated} 28 | 29 | \item{Y}{response vector} 30 | 31 | \item{X}{design matrix} 32 | 33 | \item{familyname}{model family name} 34 | 35 | \item{linkname}{link function name} 36 | 37 | \item{varfname}{variance function name} 38 | 39 | \item{idx}{index identifying the parameters to be updated in \code{beta}} 40 | 41 | \item{offset}{vector of constants to be added to the linear predictor} 42 | 43 | \item{weights}{vector of constants non-negative weights} 44 | 45 | \item{penalty}{penalty parameter of a ridge-type penalty} 46 | 47 | \item{transp}{if \code{TRUE}, transpose the data} 48 | 49 | \item{nsteps}{number of iterations} 50 | 51 | \item{stepsize}{stepsize parameter of the Fisher scoring algorithm} 52 | 53 | \item{print}{if \code{TRUE}, print the algorithm history} 54 | 55 | \item{parallel}{if \code{TRUE}, run the updates in parallel using \code{openMP}} 56 | 57 | \item{nthreads}{number of threads to be run in parallel (only if \code{parallel=TRUE})} 58 | } 59 | \description{ 60 | Internal function implementing one step of AIRWLS for the 61 | estimation of GMF models. 62 | } 63 | \keyword{internal} 64 | -------------------------------------------------------------------------------- /man/set.control.cv.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/control.R 3 | \name{set.control.cv} 4 | \alias{set.control.cv} 5 | \title{Check and set the cross-validation parameters} 6 | \usage{ 7 | set.control.cv( 8 | criterion = c("dev", "mae", "mse", "aic", "bic"), 9 | refit = TRUE, 10 | nfolds = 5, 11 | proportion = 0.3, 12 | init = c("common", "separate"), 13 | verbose = FALSE, 14 | parallel = FALSE, 15 | nthreads = 1 16 | ) 17 | } 18 | \arguments{ 19 | \item{criterion}{information criterion to minimize for selecting the matrix rank} 20 | 21 | \item{refit}{if \code{TRUE}, refit the model with the selected rank and return the fitted model} 22 | 23 | \item{nfolds}{number of cross-validation folds} 24 | 25 | \item{proportion}{proportion of the data to be used as test set in each fold} 26 | 27 | \item{init}{initialization approach to use} 28 | 29 | \item{verbose}{if \code{TRUE}, print the cross-validation status} 30 | 31 | \item{parallel}{if \code{TRUE}, allows for parallel computing} 32 | 33 | \item{nthreads}{number of cores to use in parallel (only if \code{parallel=TRUE})} 34 | } 35 | \value{ 36 | A \code{list} of control parameters for the cross-validation algorithm 37 | } 38 | \description{ 39 | Check if the input cross-validation parameters are allowed and set them to default 40 | values if they are not. Returns a list of well-defined cross-validation parameters. 41 | } 42 | \examples{ 43 | library(sgdGMF) 44 | 45 | # Empty call 46 | set.control.cv() 47 | 48 | # Parametrized call 49 | set.control.cv(criterion = "bic", proportion = 0.2) 50 | 51 | } 52 | \seealso{ 53 | \code{\link{set.control.init}}, \code{\link{set.control.alg}}, \code{\link{sgdgmf.cv}} 54 | } 55 | -------------------------------------------------------------------------------- /man/biplot.sgdgmf.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sgdGMF-class.R 3 | \name{biplot.sgdgmf} 4 | \alias{biplot.sgdgmf} 5 | \title{Biplot of a GMF model} 6 | \usage{ 7 | \method{biplot}{sgdgmf}( 8 | x, 9 | ..., 10 | choices = 1:2, 11 | arrange = TRUE, 12 | byrow = FALSE, 13 | normalize = FALSE, 14 | labels = NULL, 15 | palette = NULL, 16 | titles = c(NULL, NULL) 17 | ) 18 | } 19 | \arguments{ 20 | \item{x}{an object of class \code{sgdgmf}} 21 | 22 | \item{...}{further arguments passed to or from other methods} 23 | 24 | \item{choices}{a length 2 vector specifying the components to plot} 25 | 26 | \item{arrange}{if \code{TRUE}, return a single plot with two panels} 27 | 28 | \item{byrow}{if \code{TRUE}, the panels are arranged row-wise (if \code{arrange=TRUE})} 29 | 30 | \item{normalize}{if \code{TRUE}, orthogonalizes the scores using SVD} 31 | 32 | \item{labels}{a vector of labels which should be plotted} 33 | 34 | \item{palette}{the color-palette which should be used} 35 | 36 | \item{titles}{a 2-dimensional string vector containing the plot titles} 37 | } 38 | \value{ 39 | If \code{arrange=TRUE}, a single ggplot object with the selected biplots, 40 | otherwise, a list of two ggplot objects showing the row and column latent variables. 41 | } 42 | \description{ 43 | Plot the observations on a two-dimensional projection determined by the 44 | estimated score matrix 45 | } 46 | \examples{ 47 | \donttest{# Load the sgdGMF package 48 | library(sgdGMF) 49 | 50 | # Generate data from a Poisson model 51 | data = sim.gmf.data(n = 100, m = 20, ncomp = 5, family = poisson()) 52 | 53 | # Fit a GMF model 54 | gmf = sgdgmf.fit(data$Y, ncomp = 3, family = poisson()) 55 | 56 | # Get the biplot of a GMF model 57 | biplot(gmf) 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /man/coefficients.initgmf.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/initGMF-class.R 3 | \name{coefficients.initgmf} 4 | \alias{coefficients.initgmf} 5 | \alias{coef.initgmf} 6 | \title{Extract the coefficient of an initialized GMF model} 7 | \usage{ 8 | \method{coefficients}{initgmf}( 9 | object, 10 | ..., 11 | type = c("all", "colreg", "rowreg", "scores", "loadings") 12 | ) 13 | 14 | \method{coef}{initgmf}(object, ..., type = c("all", "colreg", "rowreg", "scores", "loadings")) 15 | } 16 | \arguments{ 17 | \item{object}{an object of class \code{initgmf}} 18 | 19 | \item{...}{further arguments passed to or from other methods} 20 | 21 | \item{type}{the type of coefficients which should be returned} 22 | } 23 | \value{ 24 | If \code{type="all"}, a list of coefficients containing the fields \code{B}, \code{A}, \code{U} and \code{V}. 25 | Otherwise, a matrix of coefficients, corresponding to the selected \code{type}. 26 | } 27 | \description{ 28 | Return the initialized coefficients of a GMF model, i.e., the row- and column-specific 29 | regression effects, the latent scores and loadings. 30 | } 31 | \examples{ 32 | # Load the sgdGMF package 33 | library(sgdGMF) 34 | 35 | # Generate data from a Poisson model 36 | data = sim.gmf.data(n = 100, m = 20, ncomp = 5, family = poisson()) 37 | 38 | # Fit a GMF model with 3 latent factors 39 | init = sgdgmf.init(data$Y, ncomp = 3, family = poisson()) 40 | 41 | # Get the estimated coefficients of a GMF model 42 | str(coefficients(init)) # returns all the coefficients 43 | str(coefficients(init, type = "scores")) # returns only the scores, say U 44 | str(coefficients(init, type = "loadings")) # returns only the loadings, say V 45 | 46 | } 47 | \seealso{ 48 | \code{\link{coefficients.sgdgmf}} and \code{\link{coef.sgdgmf}}. 49 | } 50 | -------------------------------------------------------------------------------- /man/biplot.initgmf.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/initGMF-class.R 3 | \name{biplot.initgmf} 4 | \alias{biplot.initgmf} 5 | \title{Biplot of an initialized GMF model} 6 | \usage{ 7 | \method{biplot}{initgmf}( 8 | x, 9 | ..., 10 | choices = 1:2, 11 | arrange = TRUE, 12 | byrow = FALSE, 13 | normalize = FALSE, 14 | labels = NULL, 15 | palette = NULL 16 | ) 17 | } 18 | \arguments{ 19 | \item{x}{an object of class \code{initgmf}} 20 | 21 | \item{...}{further arguments passed to or from other methods} 22 | 23 | \item{choices}{a length 2 vector specifying the components to plot} 24 | 25 | \item{arrange}{if \code{TRUE}, return a single plot with two panels} 26 | 27 | \item{byrow}{if \code{TRUE}, the panels are arranged row-wise (if \code{arrange=TRUE})} 28 | 29 | \item{normalize}{if \code{TRUE}, orthogonalizes the scores using SVD} 30 | 31 | \item{labels}{a vector of labels which should be plotted} 32 | 33 | \item{palette}{the color-palette which should be used} 34 | } 35 | \value{ 36 | If \code{arrange=TRUE}, a single ggplot object with the selected biplots, 37 | otherwise, a list of two ggplot objects showing the row and column latent variables. 38 | } 39 | \description{ 40 | Plot the observations on a two-dimensional projection determined by the 41 | estimated score matrix 42 | } 43 | \examples{ 44 | \donttest{# Load the sgdGMF package 45 | library(sgdGMF) 46 | 47 | # Generate data from a Poisson model 48 | data = sim.gmf.data(n = 100, m = 20, ncomp = 5, family = poisson()) 49 | 50 | # Fit a GMF model 51 | init = sgdgmf.init(data$Y, ncomp = 3, family = poisson()) 52 | 53 | # Get the biplot of a GMF model 54 | biplot(init) # 1st vs 2nd principal components 55 | biplot(init, choices = 2:3) #2nd vs 3rd principal components 56 | } 57 | } 58 | \seealso{ 59 | \code{\link{biplot.sgdgmf}}. 60 | } 61 | -------------------------------------------------------------------------------- /man/set.control.alg.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/control.R 3 | \name{set.control.alg} 4 | \alias{set.control.alg} 5 | \title{Check and set the control parameters for the select optimization algorithm} 6 | \usage{ 7 | set.control.alg( 8 | method = c("airwls", "newton", "sgd"), 9 | sampling = c("block", "coord", "rnd-block"), 10 | control = list() 11 | ) 12 | } 13 | \arguments{ 14 | \item{method}{optimization method to use} 15 | 16 | \item{sampling}{sub-sampling method to use} 17 | 18 | \item{control}{list of algorithm-specific control parameters} 19 | } 20 | \value{ 21 | A \code{list} of control parameters for the selected estimation algorithm 22 | } 23 | \description{ 24 | Check if the input control parameters are allowed and set them to default 25 | values if they are not. Returns a list of well-defined control parameters. 26 | } 27 | \details{ 28 | It is not necessary to provide a complete list of control parameters, one can 29 | just specify a list containing the parameters he/she needs to change from the 30 | default values. Wrongly specified parameters are ignored or set to default values. 31 | For a detailed description of all the algorithm-specific control parameters, 32 | please refer to 33 | \code{\link{set.control.airwls}} (\code{method="airwls"}), 34 | \code{\link{set.control.newton}} (\code{method="newton"}), 35 | \code{\link{set.control.block.sgd}} (\code{method="sgd"}, \code{sampling="block"}). 36 | \code{\link{set.control.coord.sgd}} (\code{method="sgd"}, \code{sampling="coord"}), 37 | } 38 | \examples{ 39 | library(sgdGMF) 40 | 41 | # Empty call 42 | set.control.alg() 43 | 44 | # Parametrized call 45 | set.control.alg(method = "airwls", control = list(maxiter = 200, stepsize = 0.3)) 46 | 47 | 48 | } 49 | \seealso{ 50 | \code{\link{set.control.init}}, \code{\link{set.control.cv}}, \code{\link{sgdgmf.fit}} 51 | } 52 | -------------------------------------------------------------------------------- /man/sgdGMF-package.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sgdGMF-package.R 3 | \docType{package} 4 | \name{sgdGMF-package} 5 | \alias{sgdGMF} 6 | \alias{sgdGMF-package} 7 | \title{sgdGMF: Estimation of Generalized Matrix Factorization Models via Stochastic Gradient Descent} 8 | \description{ 9 | Efficient framework to estimate high-dimensional generalized matrix factorization models using penalized maximum likelihood under a dispersion exponential family specification. Either deterministic and stochastic methods are implemented for the numerical maximization. In particular, the package implements the stochastic gradient descent algorithm with a block-wise mini-batch strategy to speed up the computations and an efficient adaptive learning rate schedule to stabilize the convergence. All the theoretical details can be found in Castiglione et al. (2024, \doi{10.48550/arXiv.2412.20509}). Other methods considered for the optimization are the alternated iterative re-weighted least squares and the quasi-Newton method with diagonal approximation of the Fisher information matrix discussed in Kidzinski et al. (2022, \url{http://jmlr.org/papers/v23/20-1104.html}). 10 | } 11 | \seealso{ 12 | Useful links: 13 | \itemize{ 14 | \item \url{https://github.com/CristianCastiglione/sgdGMF} 15 | \item Report bugs at \url{https://github.com/CristianCastiglione/sgdGMF/issues} 16 | } 17 | 18 | } 19 | \author{ 20 | \strong{Maintainer}: Cristian Castiglione \email{cristian_castiglione@libero.it} (\href{https://orcid.org/0000-0001-5883-4890}{ORCID}) 21 | 22 | Other contributors: 23 | \itemize{ 24 | \item Davide Risso \email{davide.risso@unipd.it} (\href{https://orcid.org/0000-0001-8508-5012}{ORCID}) [contributor] 25 | \item Alexandre Segers \email{alexandre.segers@ugent.be} (\href{https://orcid.org/0009-0004-2028-7595}{ORCID}) [contributor] 26 | } 27 | 28 | } 29 | \keyword{internal} 30 | -------------------------------------------------------------------------------- /man/storedata.sgdgmf.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sgdGMF-class.R 3 | \name{storedata.sgdgmf} 4 | \alias{storedata.sgdgmf} 5 | \title{Save response and covariate data into an empty sgdGMF object} 6 | \usage{ 7 | \method{storedata}{sgdgmf}(object, ..., Y = NULL, X = NULL, Z = NULL) 8 | } 9 | \arguments{ 10 | \item{object}{an object of class \code{sgdgmf}} 11 | 12 | \item{...}{further arguments passed to or from other methods} 13 | 14 | \item{Y}{matrix of responses (\eqn{n \times m})} 15 | 16 | \item{X}{matrix of row fixed effects (\eqn{n \times p})} 17 | 18 | \item{Z}{matrix of column fixed effects (\eqn{q \times m})} 19 | } 20 | \value{ 21 | A \code{sgdgmf} object containing a copy of the data 22 | } 23 | \description{ 24 | Save response and covariate data into an empty sgdGMF object 25 | } 26 | \examples{ 27 | \donttest{# Load the sgdGMF package 28 | library(sgdGMF) 29 | 30 | # Generate data from a Poisson model 31 | data = sim.gmf.data(n = 100, m = 20, ncomp = 5, family = poisson()) 32 | 33 | # Fit a GMF model without storing a copy of the data 34 | gmf = sgdgmf.fit(data$Y, ncomp = 3, family = poisson(), 35 | control.alg = list(savedata = FALSE)) 36 | 37 | cat("savedata:", gmf$control.alg$savedata, "\n") 38 | cat("Is Y null?", is.null(gmf$Y), "\n") 39 | cat("Is X null?", is.null(gmf$X), "\n") 40 | cat("Is Z null?", is.null(gmf$Z), "\n") 41 | cat("Is eta null?", is.null(gmf$eta), "\n") 42 | cat("Is mu null?", is.null(gmf$mu), "\n") 43 | cat("Is var null?", is.null(gmf$var), "\n") 44 | 45 | # Store the data in the GMF object a posteriori 46 | gmf = storedata(gmf, Y = data$Y) 47 | 48 | cat("savedata:", gmf$control.alg$savedata, "\n") 49 | cat("Y:", dim(gmf$Y), "\n") 50 | cat("X:", dim(gmf$X), "\n") 51 | cat("Z:", dim(gmf$Z), "\n") 52 | cat("eta:", dim(gmf$eta), "\n") 53 | cat("mu:", dim(gmf$mu), "\n") 54 | cat("var:", dim(gmf$var), "\n") 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /man/screeplot.sgdgmf.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sgdGMF-class.R 3 | \name{screeplot.sgdgmf} 4 | \alias{screeplot.sgdgmf} 5 | \title{Screeplot for the residuals of a GMF model} 6 | \usage{ 7 | \method{screeplot}{sgdgmf}( 8 | x, 9 | ..., 10 | ncomp = 20, 11 | type = c("deviance", "pearson", "working", "response", "link"), 12 | partial = FALSE, 13 | normalize = FALSE, 14 | cumulative = FALSE, 15 | proportion = FALSE 16 | ) 17 | } 18 | \arguments{ 19 | \item{x}{an object of class \code{sgdgmf}} 20 | 21 | \item{...}{further arguments passed to or from other methods} 22 | 23 | \item{ncomp}{number of components to be plotted} 24 | 25 | \item{type}{the type of residuals which should be used} 26 | 27 | \item{partial}{if \code{TRUE}, plots the eigenvalues of the partial residuals} 28 | 29 | \item{normalize}{if \code{TRUE}, plots the eigenvalues of the standardized residuals} 30 | 31 | \item{cumulative}{if \code{TRUE}, plots the cumulative sum of the eigenvalues} 32 | 33 | \item{proportion}{if \code{TRUE}, plots the fractions of explained variance} 34 | } 35 | \value{ 36 | A ggplot object showing the residual screeplot of the model. 37 | } 38 | \description{ 39 | Plots the variances of the principal components of the residuals against the 40 | number of principal component. 41 | } 42 | \examples{ 43 | \donttest{# Load the sgdGMF package 44 | library(sgdGMF) 45 | 46 | # Generate data from a Poisson model 47 | data = sim.gmf.data(n = 100, m = 20, ncomp = 5, family = poisson()) 48 | 49 | # Fit a GMF model 50 | gmf = sgdgmf.fit(data$Y, ncomp = 3, family = poisson()) 51 | 52 | # Get the partial residual spectrum of a GMF model 53 | screeplot(gmf) # screeplot of the var-cov matrix of the deviance residuals 54 | screeplot(gmf, partial = TRUE) # screeplot of the partial residuals 55 | screeplot(gmf, cumulative = TRUE) # cumulative screeplot 56 | screeplot(gmf, proportion = TRUE) # proportion of explained residual variance 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /tests/testrcpp/test-newton.R: -------------------------------------------------------------------------------- 1 | # test-newton.R 2 | # author: Cristian Castiglione 3 | # creation: 02/10/2023 4 | # last change: 04/10/2023 5 | 6 | ## Workspace setup ---- 7 | rm(list = ls()) 8 | graphics.off() 9 | 10 | # Package compilation and import 11 | devtools::load_all() 12 | 13 | ## Test: synthetic data ---- 14 | n = 100 15 | m = 20 16 | d = 3 17 | p = 3 18 | q = 4 19 | 20 | 21 | 22 | family = poisson() 23 | 24 | X = matrix(rnorm(n*p), nrow = n, ncol = p) / sqrt(3) 25 | B = matrix(rnorm(m*p), nrow = m, ncol = p) / sqrt(3) 26 | A = matrix(rnorm(n*q), nrow = n, ncol = q) / sqrt(3) 27 | Z = matrix(rnorm(m*q), nrow = m, ncol = q) / sqrt(3) 28 | U = matrix(rnorm(n*d), nrow = n, ncol = d) / sqrt(3) 29 | V = matrix(rnorm(m*d), nrow = m, ncol = d) / sqrt(3) 30 | 31 | eta = tcrossprod(cbind(X, A, U), cbind(B, Z, V)) 32 | mu = family$linkinv(eta) 33 | 34 | Y = matrix(rpois(n*m, mu), nrow = n, ncol = m) 35 | 36 | plot3D::image2D(log1p(Y)) 37 | 38 | logY = log(Y + 0.1) 39 | B0 = t(solve(crossprod(X), crossprod(X, logY))) 40 | A0 = t(solve(crossprod(Z), crossprod(Z, t(logY - tcrossprod(X, B0))))) 41 | UV = svd::propack.svd(logY - tcrossprod(cbind(X, A0), cbind(B0, Z)), neig = d) 42 | U0 = UV$u %*% diag(sqrt(UV$d)) 43 | V0 = UV$v %*% diag(sqrt(UV$d)) 44 | 45 | cfit = sgdGMF::c_fit_newton( 46 | Y = Y, X = X, B = B0, A = A0, Z = Z, U = U0, V = V0, 47 | familyname = "poisson", linkname = "log", ncomp = d, 48 | lambda = c(0, 0, 1, 0), maxiter = 500, stepsize = 0.1, 49 | tol = 1e-05, frequency = 50) 50 | 51 | rfit = sgdGMF::sgdgmf(Y, X, Z, family = poisson(), ncomp = d, 52 | init = list(niter = 0), 53 | control = list(maxiter = 500, stepsize = 0.1)) 54 | 55 | fit$mu 56 | fit$eta 57 | fit$U 58 | fit$V 59 | tcrossprod(fit$U, fit$V) 60 | 61 | plot(c(rfit$pred$mu), c(cfit$mu)) 62 | plot(c(Y), c(cfit$mu)) 63 | cor(c(rfit$pred$mu), c(cfit$mu)) 64 | 65 | plot3D::image2D(rfit$pred$mu) 66 | plot3D::image2D(cfit$mu) 67 | plot3D::image2D(Y) 68 | 69 | all.equal(c(rfit$pred$mu), c(cfit$mu)) 70 | 71 | 72 | 73 | -------------------------------------------------------------------------------- /man/set.control.init.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/control.R 3 | \name{set.control.init} 4 | \alias{set.control.init} 5 | \title{Check and set the initialization parameters for a GMF model} 6 | \usage{ 7 | set.control.init( 8 | method = c("ols", "glm", "light", "random", "values"), 9 | type = c("deviance", "pearson", "working", "link"), 10 | values = list(), 11 | niter = 5, 12 | normalize = TRUE, 13 | verbose = FALSE, 14 | parallel = FALSE, 15 | nthreads = 1 16 | ) 17 | } 18 | \arguments{ 19 | \item{method}{initialization method (see \code{\link{sgdgmf.init}} for more details upon the initialization methods used)} 20 | 21 | \item{type}{residual type to be decomposed (see \code{\link{sgdgmf.init}} for more details upon the residuals used)} 22 | 23 | \item{values}{list of custom initialization parameters fixed by the user} 24 | 25 | \item{niter}{number if refinement iterations in the \code{"svd"} method} 26 | 27 | \item{normalize}{if \code{TRUE}, normalize \code{U} and \code{V} to orthogonal \code{U} and lower triangular \code{V}} 28 | 29 | \item{verbose}{if \code{TRUE}, print the initialization state} 30 | 31 | \item{parallel}{if \code{TRUE}, use parallel computing for the \code{"glm"} method} 32 | 33 | \item{nthreads}{number of cores to be used in the \code{"glm"} method} 34 | } 35 | \value{ 36 | A \code{list} of control parameters for the initialization 37 | } 38 | \description{ 39 | Check if the input initialization parameters are allowed and set them to default 40 | values if they are not. Returns a list of well-defined options which specify how 41 | to initialize a GMF model. See \code{\link{sgdgmf.init}} for more details upon the methods used for initialisation. 42 | } 43 | \examples{ 44 | library(sgdGMF) 45 | 46 | # Empty call 47 | set.control.init() 48 | 49 | # Parametrized call 50 | set.control.init(method = "glm", type = "deviance", niter = 10) 51 | 52 | } 53 | \seealso{ 54 | \code{\link{set.control.alg}}, \code{\link{set.control.cv}}, \code{\link{sgdgmf.init}} 55 | } 56 | -------------------------------------------------------------------------------- /man/image.sgdgmf.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sgdGMF-class.R 3 | \name{image.sgdgmf} 4 | \alias{image.sgdgmf} 5 | \title{Heatmap of a GMF model} 6 | \usage{ 7 | \method{image}{sgdgmf}( 8 | x, 9 | ..., 10 | type = c("data", "response", "link", "scores", "loadings", "deviance", "pearson", 11 | "working"), 12 | resid = FALSE, 13 | symmetric = FALSE, 14 | transpose = FALSE, 15 | limits = NULL, 16 | palette = NULL 17 | ) 18 | } 19 | \arguments{ 20 | \item{x}{an object of class \code{sgdgmf}} 21 | 22 | \item{...}{further arguments passed to or from other methods} 23 | 24 | \item{type}{the type of data/predictions/residuals which should be returned} 25 | 26 | \item{resid}{if \code{TRUE}, plots the residual values} 27 | 28 | \item{symmetric}{if \code{TRUE}, symmetrizes the color limits} 29 | 30 | \item{transpose}{if \code{TRUE}, transposes the matrix before plotting it} 31 | 32 | \item{limits}{the color limits which should be used} 33 | 34 | \item{palette}{the color-palette which should be used} 35 | } 36 | \value{ 37 | A ggplot object showing the selected heatmap. 38 | } 39 | \description{ 40 | Plots a heatmap of either the data, the fitted values, or the residual values 41 | of a GMF model allowing for different types of transformations and normalizations. 42 | Moreover, it also permits to plot the latent score and loading matrices. 43 | } 44 | \examples{ 45 | \donttest{# Load the sgdGMF package 46 | library(sgdGMF) 47 | 48 | # Generate data from a Poisson model 49 | data = sim.gmf.data(n = 100, m = 20, ncomp = 5, family = poisson()) 50 | 51 | # Fit a GMF model 52 | gmf = sgdgmf.fit(data$Y, ncomp = 3, family = poisson()) 53 | 54 | # Get the heatmap of a GMF model 55 | image(gmf, type = "data") # original data 56 | image(gmf, type = "response") # fitted values in response scale 57 | image(gmf, type = "scores") # estimated score matrix 58 | image(gmf, type = "loadings") # estimated loading matrix 59 | image(gmf, type = "deviance", resid = TRUE) # deviance residual matrix 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /man/screeplot.initgmf.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/initGMF-class.R 3 | \name{screeplot.initgmf} 4 | \alias{screeplot.initgmf} 5 | \title{Screeplot for the residuals of an initialized GMF model} 6 | \usage{ 7 | \method{screeplot}{initgmf}( 8 | x, 9 | ..., 10 | ncomp = 20, 11 | type = c("deviance", "pearson", "working", "response", "link"), 12 | partial = FALSE, 13 | normalize = FALSE, 14 | cumulative = FALSE, 15 | proportion = FALSE 16 | ) 17 | } 18 | \arguments{ 19 | \item{x}{an object of class \code{sgdgmf}} 20 | 21 | \item{...}{further arguments passed to or from other methods} 22 | 23 | \item{ncomp}{number of components to be plotted} 24 | 25 | \item{type}{the type of residuals which should be used} 26 | 27 | \item{partial}{if \code{TRUE}, plots the eigenvalues of the partial residuals} 28 | 29 | \item{normalize}{if \code{TRUE}, plots the eigenvalues of the standardized residuals} 30 | 31 | \item{cumulative}{if \code{TRUE}, plots the cumulative sum of the eigenvalues} 32 | 33 | \item{proportion}{if \code{TRUE}, plots the fractions of explained variance} 34 | } 35 | \value{ 36 | A ggplot object showing the residual screeplot of the model. 37 | } 38 | \description{ 39 | Plots the variances of the principal components of the residuals against the 40 | number of principal component. 41 | } 42 | \examples{ 43 | \donttest{# Load the sgdGMF package 44 | library(sgdGMF) 45 | 46 | # Generate data from a Poisson model 47 | data = sim.gmf.data(n = 100, m = 20, ncomp = 5, family = poisson()) 48 | 49 | # Fit a GMF model 50 | init = sgdgmf.init(data$Y, ncomp = 3, family = poisson()) 51 | 52 | # Get the partial residual spectrum of a GMF model 53 | screeplot(init) # screeplot of the var-cov matrix of the deviance residuals 54 | screeplot(init, partial = TRUE) # screeplot of the partial residuals 55 | screeplot(init, cumulative = TRUE) # cumulative screeplot 56 | screeplot(init, proportion = TRUE) # proportion of explained residual variance 57 | } 58 | } 59 | \seealso{ 60 | \code{\link{screeplot.sgdgmf}}. 61 | } 62 | -------------------------------------------------------------------------------- /man/image.initgmf.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/initGMF-class.R 3 | \name{image.initgmf} 4 | \alias{image.initgmf} 5 | \title{Heatmap of an initialized GMF model} 6 | \usage{ 7 | \method{image}{initgmf}( 8 | x, 9 | ..., 10 | type = c("data", "response", "link", "scores", "loadings", "deviance", "pearson", 11 | "working"), 12 | resid = FALSE, 13 | symmetric = FALSE, 14 | transpose = FALSE, 15 | limits = NULL, 16 | palette = NULL 17 | ) 18 | } 19 | \arguments{ 20 | \item{x}{an object of class \code{initgmf}} 21 | 22 | \item{...}{further arguments passed to or from other methods} 23 | 24 | \item{type}{the type of data/predictions/residuals which should be returned} 25 | 26 | \item{resid}{if \code{TRUE}, plots the residual values} 27 | 28 | \item{symmetric}{if \code{TRUE}, symmetrizes the color limits} 29 | 30 | \item{transpose}{if \code{TRUE}, transposes the matrix before plotting it} 31 | 32 | \item{limits}{the color limits which should be used} 33 | 34 | \item{palette}{the color-palette which should be used} 35 | } 36 | \value{ 37 | A ggplot object showing the selected heatmap. 38 | } 39 | \description{ 40 | Plots a heatmap of either the data, the fitted values, or the residual values 41 | of a GMF model allowing for different types of transformations and normalizations. 42 | Moreover, it also permits to plot the latent score and loading matrices. 43 | } 44 | \examples{ 45 | \donttest{# Load the sgdGMF package 46 | library(sgdGMF) 47 | 48 | # Generate data from a Poisson model 49 | data = sim.gmf.data(n = 100, m = 20, ncomp = 5, family = poisson()) 50 | 51 | # Fit a GMF model 52 | init = sgdgmf.init(data$Y, ncomp = 3, family = poisson()) 53 | 54 | # Get the heatmap of a GMF model 55 | image(init, type = "data") # original data 56 | image(init, type = "response") # fitted values in response scale 57 | image(init, type = "scores") # estimated score matrix 58 | image(init, type = "loadings") # estimated loading matrix 59 | image(init, type = "deviance", resid = TRUE) # deviance residual matrix 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /R/sgdGMF-package.R: -------------------------------------------------------------------------------- 1 | #' @keywords internal 2 | "_PACKAGE" 3 | 4 | #' @useDynLib sgdGMF, .registration=TRUE 5 | #' @importFrom Rcpp evalCpp 6 | #' @import Rcpp 7 | #' @import RcppArmadillo 8 | #' @importFrom stats glm.fit 9 | #' @importFrom stats family 10 | #' @importFrom stats gaussian 11 | #' @importFrom stats binomial 12 | #' @importFrom stats poisson 13 | #' @importFrom stats Gamma 14 | #' @importFrom stats inverse.gaussian 15 | #' @importFrom stats quasi 16 | #' @importFrom stats quasibinomial 17 | #' @importFrom stats quasipoisson 18 | #' @importFrom MASS neg.bin 19 | #' @importFrom MASS negative.binomial 20 | #' @importFrom RSpectra svds 21 | #' @importFrom RSpectra eigs 22 | #' @importFrom RSpectra eigs_sym 23 | #' @importFrom parallel detectCores 24 | #' @importFrom parallel makeCluster 25 | #' @importFrom parallel stopCluster 26 | #' @importFrom doParallel registerDoParallel 27 | #' @importFrom foreach %do% 28 | #' @importFrom foreach %dopar% 29 | #' @importFrom foreach foreach 30 | #' @importFrom stats var sd 31 | #' @importFrom stats cov cor cov2cor 32 | #' @importFrom stats ecdf density 33 | #' @importFrom stats median quantile 34 | #' @importFrom stats dnorm pnorm qnorm rnorm 35 | #' @importFrom stats dexp pexp qexp rexp 36 | #' @importFrom stats dgamma pgamma qgamma rgamma 37 | #' @importFrom stats dbeta pbeta qbeta rbeta 38 | #' @importFrom stats dunif punif qunif runif 39 | #' @importFrom stats dpois ppois qpois rpois 40 | #' @importFrom stats dbinom pbinom qbinom rbinom 41 | #' @importFrom stats fitted 42 | #' @importFrom stats predict 43 | #' @importFrom stats coef coefficients 44 | #' @importFrom stats resid residuals 45 | #' @importFrom stats deviance 46 | #' @importFrom stats BIC 47 | #' @importFrom stats deviance 48 | #' @importFrom stats qqplot qqnorm qqline 49 | #' @importFrom stats biplot 50 | #' @importFrom stats screeplot 51 | #' @importFrom utils head tail 52 | #' @importFrom graphics image 53 | #' @importFrom generics refit 54 | #' @importFrom methods is 55 | #' @import ggplot2 56 | #' @importFrom reshape2 melt 57 | #' @importFrom viridisLite viridis 58 | NULL 59 | -------------------------------------------------------------------------------- /man/refit.sgdgmf.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sgdGMF-class.R 3 | \name{refit.sgdgmf} 4 | \alias{refit.sgdgmf} 5 | \title{Refine the final estimate of a GMF model} 6 | \usage{ 7 | \method{refit}{sgdgmf}( 8 | object, 9 | ..., 10 | normalize = TRUE, 11 | verbose = FALSE, 12 | parallel = FALSE, 13 | nthreads = 1 14 | ) 15 | } 16 | \arguments{ 17 | \item{object}{an object of class \code{sgdgmf}} 18 | 19 | \item{...}{further arguments passed to or from other methods} 20 | 21 | \item{normalize}{if \code{TRUE}, normalize \code{U} and \code{V} to uncorrelated Gaussian \code{U} and upper triangular \code{V} with positive diagonal} 22 | 23 | \item{verbose}{if \code{TRUE}, print the optimization status} 24 | 25 | \item{parallel}{if \code{TRUE}, use parallel computing using the \code{foreach} package} 26 | 27 | \item{nthreads}{number of cores to be used in the \code{"glm"} method} 28 | } 29 | \value{ 30 | An \code{sgdgmf} object containing the re-fitted model. 31 | } 32 | \description{ 33 | Refine the estimated latent scores of a GMF model via IRWLS 34 | } 35 | \examples{ 36 | \donttest{# Load the sgdGMF package 37 | library(sgdGMF) 38 | 39 | # Generate data from a Poisson model 40 | data = sim.gmf.data(n = 100, m = 20, ncomp = 5, family = poisson()) 41 | 42 | # Fit a GMF model using SGD 43 | gmf_old = sgdgmf.fit(data$Y, ncomp = 3, family = poisson(), method = "sgd") 44 | 45 | # Refine the score matrix estimate 46 | gmf_new = refit(gmf_old) 47 | 48 | # Get the fitted values in the link and response scales 49 | mu_hat_old = fitted(gmf_old, type = "response") 50 | mu_hat_new = fitted(gmf_new, type = "response") 51 | 52 | # Compare the results 53 | oldpar = par(no.readonly = TRUE) 54 | par(mfrow = c(2,2), mar = c(1,1,3,1)) 55 | image(data$Y, axes = FALSE, main = expression(Y)) 56 | image(data$mu, axes = FALSE, main = expression(mu)) 57 | image(mu_hat_old, axes = FALSE, main = expression(hat(mu)[old])) 58 | image(mu_hat_new, axes = FALSE, main = expression(hat(mu)[new])) 59 | par(oldpar) 60 | } 61 | } 62 | \seealso{ 63 | \code{\link{sgdgmf.fit}} 64 | } 65 | -------------------------------------------------------------------------------- /src/minibatch.h: -------------------------------------------------------------------------------- 1 | // minibatch.h 2 | // author: Cristian Castiglione 3 | // creation: 06/10/2023 4 | // last change: 06/10/2023 5 | 6 | #ifndef MINIBATCH_H 7 | #define MINIBATCH_H 8 | 9 | #include 10 | 11 | class Chunks { 12 | public: 13 | int nidx; // number of observations 14 | int nchunks; // number of chunks 15 | bool randomize; // should we reshuffle the indices? 16 | arma::uvec idx; // data index vector 17 | arma::uvec start; // vector of starting indices (of idx) for each chunk 18 | arma::uvec end; // vector of ending indices (of idx) for each chunk 19 | arma::uvec range; // vector of lengths for each chunk 20 | 21 | // Get the data indices corresponding to the chunk at iteration 'iter' 22 | arma::uvec get_chunk (const int & iter); 23 | 24 | // Get the list of data indices corresponding each chunk in the partition 25 | std::list get_chunks (const arma::uvec & iters); 26 | 27 | // Set all the chunks via index partition 28 | void set_chunks (const int & n, const int & size, const bool & randomize); 29 | 30 | // Class constructor 31 | Chunks () {} 32 | Chunks (const int & n, const int & size, const bool & randomize) { 33 | this->set_chunks(n, size, randomize); 34 | } 35 | }; 36 | 37 | class ChunkPile { 38 | public: 39 | int idx; 40 | bool random; 41 | arma::uvec tovisit; 42 | arma::uvec visited; 43 | 44 | void fill_tovisit (); 45 | void empty_visited (); 46 | void pop_tovisit (const int & id); 47 | void push_visited (const int & id); 48 | void sample_idx (); 49 | void update (); 50 | 51 | // Class constructor 52 | ChunkPile () {} 53 | ChunkPile (const int & n, const bool & rnd) { 54 | this->idx = -1; 55 | this->random = rnd; 56 | this->tovisit = arma::linspace(0, n-1, n); 57 | this->visited = {}; 58 | } 59 | }; 60 | 61 | 62 | #endif 63 | -------------------------------------------------------------------------------- /man/set.control.newton.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/control.R 3 | \name{set.control.newton} 4 | \alias{set.control.newton} 5 | \title{Check and set the control parameters for the Newton algorithm} 6 | \usage{ 7 | set.control.newton( 8 | normalize = TRUE, 9 | maxiter = 500, 10 | stepsize = 0.01, 11 | eps = 1e-08, 12 | nafill = 1, 13 | tol = 1e-05, 14 | damping = 0.001, 15 | verbose = FALSE, 16 | frequency = 50, 17 | parallel = FALSE, 18 | nthreads = 1, 19 | savedata = TRUE 20 | ) 21 | } 22 | \arguments{ 23 | \item{normalize}{if \code{TRUE}, normalize \code{U} and \code{V} to uncorrelated Gaussian \code{U} and upper triangular \code{V} with positive diagonal} 24 | 25 | \item{maxiter}{maximum number of iterations} 26 | 27 | \item{stepsize}{step-size parameter scaling each IRWLS step} 28 | 29 | \item{eps}{how much shrinkage has to be introduced on extreme predictions lying outside of the data range} 30 | 31 | \item{nafill}{how frequently the \code{NA} values are filled, by default \code{NA} values are filled at each iteration of the algorithm} 32 | 33 | \item{tol}{tolerance threshold for the stopping criterion} 34 | 35 | \item{damping}{regularization parameter which is added to the Hessian to ensure numerical stability} 36 | 37 | \item{verbose}{if \code{TRUE}, print the optimization status} 38 | 39 | \item{frequency}{how often the optimization status is printed (only if \code{verbose=TRUE}} 40 | 41 | \item{parallel}{if \code{TRUE}, allows for parallel computing using the \code{C++} library \code{OpenMP}} 42 | 43 | \item{nthreads}{number of cores to be used in parallel (only if \code{parallel=TTUE})} 44 | 45 | \item{savedata}{if \code{TRUE}, saves a copy of the data and fitted values} 46 | } 47 | \value{ 48 | A \code{list} of control parameters for the quasi-Newton algorithm 49 | } 50 | \description{ 51 | Check if the input control parameters of the quasi-Newton algorithm are 52 | allowed and set them to default values if they are not. Returns a list of 53 | well-defined control parameters. 54 | } 55 | \examples{ 56 | library(sgdGMF) 57 | 58 | # Empty call 59 | set.control.newton() 60 | 61 | # Parametrized call 62 | set.control.newton(maxiter = 1000, stepsize = 0.01, tol = 1e-04) 63 | 64 | } 65 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: sgdGMF 2 | Type: Package 3 | Title: Estimation of Generalized Matrix Factorization Models via Stochastic Gradient Descent 4 | Version: 1.0.1 5 | Date: 2025-05-17 6 | Authors@R: 7 | c(person("Cristian", "Castiglione", email = "cristian_castiglione@libero.it", 8 | role = c("aut","cre"), comment = c(ORCID = "0000-0001-5883-4890")), 9 | person("Davide", "Risso", email = "davide.risso@unipd.it", 10 | role = c("ctb"), comment = c(ORCID = "0000-0001-8508-5012")), 11 | person("Alexandre", "Segers", email = "alexandre.segers@ugent.be", 12 | role = c("ctb"), comment = c(ORCID = "0009-0004-2028-7595"))) 13 | Description: Efficient framework to estimate high-dimensional generalized matrix factorization models using penalized maximum likelihood under a dispersion exponential family specification. Either deterministic and stochastic methods are implemented for the numerical maximization. In particular, the package implements the stochastic gradient descent algorithm with a block-wise mini-batch strategy to speed up the computations and an efficient adaptive learning rate schedule to stabilize the convergence. All the theoretical details can be found in Castiglione et al. (2024, ). Other methods considered for the optimization are the alternated iterative re-weighted least squares and the quasi-Newton method with diagonal approximation of the Fisher information matrix discussed in Kidzinski et al. (2022, ). 14 | License: MIT + file LICENSE 15 | Imports: 16 | Rcpp (>= 1.0.10), 17 | RcppArmadillo, 18 | RSpectra, 19 | parallel, 20 | doParallel, 21 | foreach, 22 | MASS, 23 | SuppDists, 24 | methods, 25 | generics, 26 | reshape2, 27 | ggpubr, 28 | viridisLite 29 | LinkingTo: 30 | Rcpp, 31 | RcppArmadillo 32 | Depends: 33 | R (>= 4.0.0), 34 | ggplot2 35 | Suggests: 36 | testthat (>= 3.0.0), 37 | Rtsne, 38 | dplyr, 39 | knitr, 40 | rmarkdown 41 | Config/testthat/edition: 3 42 | Encoding: UTF-8 43 | URL: https://github.com/CristianCastiglione/sgdGMF 44 | BugReports: https://github.com/CristianCastiglione/sgdGMF/issues 45 | RoxygenNote: 7.2.3 46 | VignetteBuilder: knitr 47 | -------------------------------------------------------------------------------- /man/plot.sgdgmf.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sgdGMF-class.R 3 | \name{plot.sgdgmf} 4 | \alias{plot.sgdgmf} 5 | \title{Plot diagnostics for a GMF model} 6 | \usage{ 7 | \method{plot}{sgdgmf}( 8 | x, 9 | ..., 10 | type = c("res-idx", "res-fit", "std-fit", "hist", "qq", "ecdf"), 11 | resid = c("deviance", "pearson", "working", "response", "link"), 12 | subsample = FALSE, 13 | sample.size = 500, 14 | partial = FALSE, 15 | normalize = FALSE, 16 | fillna = FALSE 17 | ) 18 | } 19 | \arguments{ 20 | \item{x}{an object of class \code{sgdgmf}} 21 | 22 | \item{...}{further arguments passed to or from other methods} 23 | 24 | \item{type}{the type of plot which should be returned} 25 | 26 | \item{resid}{the type of residuals which should be used} 27 | 28 | \item{subsample}{if \code{TRUE}, computes the residuals over o small fraction of the data} 29 | 30 | \item{sample.size}{the dimension of the sub-sample which should be used} 31 | 32 | \item{partial}{if \code{TRUE}, computes the partial residuals} 33 | 34 | \item{normalize}{if \code{TRUE}, standardizes the residuals column-by-column} 35 | 36 | \item{fillna}{if \code{TRUE}, fills the \code{NA} values with \code{0}} 37 | } 38 | \value{ 39 | A ggplot object showing the selected diagnostic plot. 40 | } 41 | \description{ 42 | Plots (one of) six diagnostics to graphically analyze the marginal and conditional 43 | distribution of the residuals of a GMF model. Currently, the following plots are 44 | available: residuals against observation indices, residuals agains fitted values, 45 | absolute square-root residuals against fitted values, histogram of the residuals, 46 | residual QQ-plot, residual ECDF-plot. 47 | } 48 | \examples{ 49 | \donttest{# Load the sgdGMF package 50 | library(sgdGMF) 51 | 52 | # Generate data from a Poisson model 53 | data = sim.gmf.data(n = 100, m = 20, ncomp = 5, family = poisson()) 54 | 55 | # Fit a GMF model 56 | gmf = sgdgmf.fit(data$Y, ncomp = 3, family = poisson()) 57 | 58 | # Plot the residual-based GMF diagnostics 59 | plot(gmf, type = "res-fit") # Residuals vs fitted values 60 | plot(gmf, type = "std-fit") # Abs-sqrt-transformed residuals vs fitted values 61 | plot(gmf, type = "qq") # Residual QQ-plot 62 | plot(gmf, type = "hist") # Residual histogram 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /man/cpp.fit.newton.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/RcppExports.R 3 | \name{cpp.fit.newton} 4 | \alias{cpp.fit.newton} 5 | \title{Fit a GMF model using the diagonal quasi-Newton algorithm} 6 | \usage{ 7 | cpp.fit.newton( 8 | Y, 9 | X, 10 | B, 11 | A, 12 | Z, 13 | U, 14 | V, 15 | O, 16 | W, 17 | familyname, 18 | linkname, 19 | varfname, 20 | ncomp, 21 | lambda, 22 | maxiter = 500L, 23 | stepsize = 0.1, 24 | eps = 1e-08, 25 | nafill = 1L, 26 | tol = 1e-05, 27 | damping = 0.001, 28 | verbose = TRUE, 29 | frequency = 10L, 30 | parallel = FALSE, 31 | nthreads = 1L 32 | ) 33 | } 34 | \arguments{ 35 | \item{Y}{matrix of responses (\eqn{n \times m})} 36 | 37 | \item{X}{matrix of row fixed effects (\eqn{n \times p})} 38 | 39 | \item{B}{initial row-effect matrix (\eqn{n \times p})} 40 | 41 | \item{A}{initial column-effect matrix (\eqn{n \times q})} 42 | 43 | \item{Z}{matrix of column fixed effects (\eqn{m \times q})} 44 | 45 | \item{U}{initial factor matrix (\eqn{n \times d})} 46 | 47 | \item{V}{initial loading matrix (\eqn{m \times d})} 48 | 49 | \item{O}{matrix of constant offset (\eqn{n \times m})} 50 | 51 | \item{W}{matrix of constant weights (\eqn{n \times m})} 52 | 53 | \item{familyname}{a \code{glm} model family name} 54 | 55 | \item{linkname}{a \code{glm} link function name} 56 | 57 | \item{varfname}{variance function name} 58 | 59 | \item{ncomp}{rank of the latent matrix factorization} 60 | 61 | \item{lambda}{penalization parameters} 62 | 63 | \item{maxiter}{maximum number of iterations} 64 | 65 | \item{stepsize}{stepsize of the quasi-Newton update} 66 | 67 | \item{eps}{shrinkage factor for extreme predictions} 68 | 69 | \item{nafill}{how often the missing values are updated} 70 | 71 | \item{tol}{tolerance threshold for the stopping criterion} 72 | 73 | \item{damping}{diagonal dumping factor for the Hessian matrix} 74 | 75 | \item{verbose}{if \code{TRUE}, print the optimization status} 76 | 77 | \item{frequency}{how often the optimization status is printed} 78 | 79 | \item{parallel}{if \code{TRUE}, allows for parallel computing} 80 | 81 | \item{nthreads}{number of cores to be used in parallel} 82 | } 83 | \description{ 84 | Fit a GMF model using the diagonal quasi-Newton algorithm 85 | } 86 | \keyword{internal} 87 | -------------------------------------------------------------------------------- /man/plot.initgmf.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/initGMF-class.R 3 | \name{plot.initgmf} 4 | \alias{plot.initgmf} 5 | \title{Plot diagnostics for an initialized GMF model} 6 | \usage{ 7 | \method{plot}{initgmf}( 8 | x, 9 | ..., 10 | type = c("res-idx", "res-fit", "std-fit", "hist", "qq", "ecdf"), 11 | resid = c("deviance", "pearson", "working", "response", "link"), 12 | subsample = FALSE, 13 | sample.size = 500, 14 | partial = FALSE, 15 | normalize = FALSE, 16 | fillna = FALSE 17 | ) 18 | } 19 | \arguments{ 20 | \item{x}{an object of class \code{initgmf}} 21 | 22 | \item{...}{further arguments passed to or from other methods} 23 | 24 | \item{type}{the type of plot which should be returned} 25 | 26 | \item{resid}{the type of residuals which should be used} 27 | 28 | \item{subsample}{if \code{TRUE}, computes the residuals over o small fraction of the data} 29 | 30 | \item{sample.size}{the dimension of the sub-sample which should be used} 31 | 32 | \item{partial}{if \code{TRUE}, computes the partial residuals} 33 | 34 | \item{normalize}{if \code{TRUE}, standardizes the residuals column-by-column} 35 | 36 | \item{fillna}{if \code{TRUE}, fills the \code{NA} values with \code{0}} 37 | } 38 | \value{ 39 | A ggplot object showing the selected diagnostic plot. 40 | } 41 | \description{ 42 | Plots (one of) six diagnostics to graphically analyze the marginal and conditional 43 | distribution of the residuals of a GMF model. Currently, the following plots are 44 | available: residuals against observation indices, residuals agains fitted values, 45 | absolute square-root residuals against fitted values, histogram of the residuals, 46 | residual QQ-plot, residual ECDF-plot. 47 | } 48 | \examples{ 49 | \donttest{# Load the sgdGMF package 50 | library(sgdGMF) 51 | 52 | # Generate data from a Poisson model 53 | data = sim.gmf.data(n = 100, m = 20, ncomp = 5, family = poisson()) 54 | 55 | # Fit a GMF model 56 | init = sgdgmf.init(data$Y, ncomp = 3, family = poisson()) 57 | 58 | # Plot the residual-based GMF diagnostics 59 | plot(init, type = "res-fit") # Residuals vs fitted values 60 | plot(init, type = "std-fit") # Abs-sqrt-transformed residuals vs fitted values 61 | plot(init, type = "qq") # Residual QQ-plot 62 | plot(init, type = "hist") # Residual histogram 63 | } 64 | } 65 | \seealso{ 66 | \code{\link{plot.sgdgmf}}. 67 | } 68 | -------------------------------------------------------------------------------- /man/cpp.fit.airwls.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/RcppExports.R 3 | \name{cpp.fit.airwls} 4 | \alias{cpp.fit.airwls} 5 | \title{Fit a GMF model using the AIRWLS algorithm} 6 | \usage{ 7 | cpp.fit.airwls( 8 | Y, 9 | X, 10 | B, 11 | A, 12 | Z, 13 | U, 14 | V, 15 | O, 16 | W, 17 | familyname, 18 | linkname, 19 | varfname, 20 | ncomp, 21 | lambda, 22 | maxiter = 500L, 23 | nsteps = 1L, 24 | stepsize = 0.1, 25 | eps = 1e-08, 26 | nafill = 1L, 27 | tol = 1e-05, 28 | damping = 0.001, 29 | verbose = TRUE, 30 | frequency = 10L, 31 | parallel = FALSE, 32 | nthreads = 1L 33 | ) 34 | } 35 | \arguments{ 36 | \item{Y}{matrix of responses (\eqn{n \times m})} 37 | 38 | \item{X}{matrix of row fixed effects (\eqn{n \times p})} 39 | 40 | \item{B}{initial row-effect matrix (\eqn{n \times p})} 41 | 42 | \item{A}{initial column-effect matrix (\eqn{n \times q})} 43 | 44 | \item{Z}{matrix of column fixed effects (\eqn{m \times q})} 45 | 46 | \item{U}{initial factor matrix (\eqn{n \times d})} 47 | 48 | \item{V}{initial loading matrix (\eqn{m \times d})} 49 | 50 | \item{O}{matrix of constant offset (\eqn{n \times m})} 51 | 52 | \item{W}{matrix of constant weights (\eqn{n \times m})} 53 | 54 | \item{familyname}{a \code{glm} model family name} 55 | 56 | \item{linkname}{a \code{glm} link function name} 57 | 58 | \item{varfname}{variance function name} 59 | 60 | \item{ncomp}{rank of the latent matrix factorization} 61 | 62 | \item{lambda}{penalization parameters} 63 | 64 | \item{maxiter}{maximum number of iterations} 65 | 66 | \item{nsteps}{number of inner Fisher scoring iterations} 67 | 68 | \item{stepsize}{stepsize of the inner Fisher scoring algorithm} 69 | 70 | \item{eps}{shrinkage factor for extreme predictions} 71 | 72 | \item{nafill}{how often the missing values are updated} 73 | 74 | \item{tol}{tolerance threshold for the stopping criterion} 75 | 76 | \item{damping}{diagonal dumping factor for the Hessian matrix} 77 | 78 | \item{verbose}{if \code{TRUE}, print the optimization status} 79 | 80 | \item{frequency}{how often the optimization status is printed} 81 | 82 | \item{parallel}{if \code{TRUE}, allows for parallel computing} 83 | 84 | \item{nthreads}{number of cores to be used in parallel} 85 | } 86 | \description{ 87 | Fit a GMF model using the AIRWLS algorithm 88 | } 89 | \keyword{internal} 90 | -------------------------------------------------------------------------------- /man/set.control.airwls.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/control.R 3 | \name{set.control.airwls} 4 | \alias{set.control.airwls} 5 | \title{Check and set the control parameters for the AIRWLS algorithm} 6 | \usage{ 7 | set.control.airwls( 8 | normalize = TRUE, 9 | maxiter = 100, 10 | nstep = 1, 11 | stepsize = 0.1, 12 | eps = 1e-08, 13 | nafill = 1, 14 | tol = 1e-05, 15 | damping = 0.001, 16 | verbose = FALSE, 17 | frequency = 10, 18 | parallel = FALSE, 19 | nthreads = 1, 20 | savedata = TRUE 21 | ) 22 | } 23 | \arguments{ 24 | \item{normalize}{if \code{TRUE}, normalize \code{U} and \code{V} to uncorrelated Gaussian \code{U} and upper triangular \code{V} with positive diagonal} 25 | 26 | \item{maxiter}{maximum number of iterations} 27 | 28 | \item{nstep}{number of IRWLS steps in each inner loop of AIRWLS} 29 | 30 | \item{stepsize}{step-size parameter scaling each IRWLS step} 31 | 32 | \item{eps}{how much shrinkage has to be introduced on extreme predictions lying outside of the data range} 33 | 34 | \item{nafill}{how frequently the \code{NA} values are filled, by default \code{NA} values are filled at each iteration of the algorithm} 35 | 36 | \item{tol}{tolerance threshold for the stopping criterion} 37 | 38 | \item{damping}{regularization parameter which is added to the diagonal of the Hessian to ensure numerical stability} 39 | 40 | \item{verbose}{if \code{TRUE}, print the optimization status (default \code{TRUE})} 41 | 42 | \item{frequency}{how often the optimization status is printed (only if \code{verbose=TRUE})} 43 | 44 | \item{parallel}{if \code{TRUE}, allows for parallel computing using the \code{C++} library \code{OpenMP}} 45 | 46 | \item{nthreads}{number of cores to be used in parallel (only if \code{parallel=TRUE})} 47 | 48 | \item{savedata}{if \code{TRUE}, saves a copy of the data and fitted values} 49 | } 50 | \value{ 51 | A \code{list} of control parameters for the AIRWLS algorithm 52 | } 53 | \description{ 54 | Check if the input control parameters of the AIRWLS algorithm are allowed 55 | and set them to default values if they are not. Returns a list of 56 | well-defined control parameters. 57 | } 58 | \examples{ 59 | library(sgdGMF) 60 | 61 | # Empty call 62 | set.control.airwls() 63 | 64 | # Parametrized call 65 | set.control.airwls(maxiter = 100, nstep = 5, stepsize = 0.3) 66 | 67 | 68 | } 69 | -------------------------------------------------------------------------------- /tests/testrcpp/test-link.R: -------------------------------------------------------------------------------- 1 | # test-link.R 2 | # author: Cristian Castiglione 3 | # creation: 29/09/2023 4 | # last change: 29/09/2023 5 | 6 | ## Workspace setup ---- 7 | rm(list = ls()) 8 | graphics.off() 9 | 10 | # Package compilation and import 11 | devtools::load_all() 12 | 13 | plot.link <- function (x, y, main = "") { 14 | plot(x, y, type = "l", xlab = "x", ylab = "link", main = main) 15 | } 16 | 17 | ## Test data ---- 18 | x = seq(from = -3, to = +3, length = 201) 19 | y = seq(from = 0.1, to = +5, length = 201) 20 | z = seq(from = 0.001, to = 0.999, length = 201) 21 | 22 | par(mfrow = c(1, 3)) 23 | 24 | ## Test: identity ---- 25 | { 26 | plot.link(x, sgdGMF::cpp.link.identity.linkfun(x), main = "linkfun") 27 | plot.link(x, sgdGMF::cpp.link.identity.linkinv(x), main = "linkinv") 28 | plot.link(x, sgdGMF::cpp.link.identity.mueta(x), main = "linkmueta") 29 | } 30 | 31 | ## Test: logit ---- 32 | { 33 | plot.link(x, sgdGMF::cpp.link.logit.linkfun(z), main = "linkfun") 34 | plot.link(x, sgdGMF::cpp.link.logit.linkinv(x), main = "linkinv") 35 | plot.link(x, sgdGMF::cpp.link.logit.mueta(x), main = "linkmueta") 36 | } 37 | 38 | ## Test: probit ---- 39 | { 40 | plot.link(x, sgdGMF::cpp.link.probit.linkfun(z), main = "linkfun") 41 | plot.link(x, sgdGMF::cpp.link.probit.linkinv(x), main = "linkinv") 42 | plot.link(x, sgdGMF::cpp.link.probit.mueta(x), main = "linkmueta") 43 | } 44 | 45 | ## Test: cauchy ---- 46 | { 47 | plot.link(x, sgdGMF::cpp.link.cauchy.linkfun(z), main = "linkfun") 48 | plot.link(x, sgdGMF::cpp.link.cauchy.linkinv(x), main = "linkinv") 49 | plot.link(x, sgdGMF::cpp.link.cauchy.mueta(x), main = "linkmueta") 50 | } 51 | 52 | ## Test: cloglog ---- 53 | { 54 | plot.link(x, sgdGMF::cpp.link.cloglog.linkfun(z), main = "linkfun") 55 | plot.link(x, sgdGMF::cpp.link.cloglog.linkinv(x), main = "linkinv") 56 | plot.link(x, sgdGMF::cpp.link.cloglog.mueta(x), main = "linkmueta") 57 | } 58 | 59 | ## Test: log ---- 60 | { 61 | plot.link(x, sgdGMF::cpp.link.log.linkfun(y), main = "linkfun") 62 | plot.link(x, sgdGMF::cpp.link.log.linkinv(x), main = "linkinv") 63 | plot.link(x, sgdGMF::cpp.link.log.mueta(x), main = "linkmueta") 64 | } 65 | 66 | ## Test: inverse ---- 67 | { 68 | plot.link(x, sgdGMF::cpp.link.inverse.linkfun(z), main = "linkfun") 69 | plot.link(x, sgdGMF::cpp.link.inverse.linkinv(z), main = "linkinv") 70 | plot.link(x, sgdGMF::cpp.link.inverse.mueta(z), main = "linkmueta") 71 | } 72 | 73 | ## Test: sqrt ---- 74 | { 75 | plot.link(x, sgdGMF::cpp.link.sqrt.linkfun(z), main = "linkfun") 76 | plot.link(x, sgdGMF::cpp.link.sqrt.linkinv(z), main = "linkinv") 77 | plot.link(x, sgdGMF::cpp.link.sqrt.mueta(z), main = "linkmueta") 78 | } 79 | 80 | -------------------------------------------------------------------------------- /man/set.control.block.sgd.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/control.R 3 | \name{set.control.block.sgd} 4 | \alias{set.control.block.sgd} 5 | \title{Check and set the control parameters for the blockwise-SGD algorithm} 6 | \usage{ 7 | set.control.block.sgd( 8 | normalize = TRUE, 9 | maxiter = 1000, 10 | eps = 1e-08, 11 | nafill = 10, 12 | tol = 1e-08, 13 | size = c(100, 100), 14 | burn = 1, 15 | rate0 = 0.01, 16 | decay = 0.01, 17 | damping = 0.001, 18 | rate1 = 0.1, 19 | rate2 = 0.01, 20 | verbose = FALSE, 21 | frequency = 250, 22 | progress = FALSE, 23 | savedata = TRUE 24 | ) 25 | } 26 | \arguments{ 27 | \item{normalize}{if \code{TRUE}, normalize \code{U} and \code{V} to uncorrelated Gaussian \code{U} and upper triangular \code{V} with positive diagonal} 28 | 29 | \item{maxiter}{maximum number of iterations} 30 | 31 | \item{eps}{how much shrinkage has to be introduced on extreme predictions lying outside of the data range} 32 | 33 | \item{nafill}{how frequently the \code{NA} values are filled, by default \code{NA} values are filled at each iteration of the algorithm} 34 | 35 | \item{tol}{tolerance threshold for the stopping criterion} 36 | 37 | \item{size}{mini-batch size, the first value is for row sub-sample, the second value is for column sub-sample} 38 | 39 | \item{burn}{percentage of iterations to ignore before performing Polyak averaging} 40 | 41 | \item{rate0}{initial learning rate} 42 | 43 | \item{decay}{learning rate decay} 44 | 45 | \item{damping}{regularization parameter which is added to the Hessian to ensure numerical stability} 46 | 47 | \item{rate1}{exponential decay rate for the moment estimate of the gradient} 48 | 49 | \item{rate2}{exponential decay rate for the moment estimate of the Hessian} 50 | 51 | \item{verbose}{if \code{TRUE}, print the optimization status} 52 | 53 | \item{frequency}{how often the optimization status is printed (only if \code{verbose=TRUE})} 54 | 55 | \item{progress}{if \code{TRUE}, print a compact progress-bar instead of a full-report of the optimization status (only if \code{verbose=TRUE})} 56 | 57 | \item{savedata}{if \code{TRUE}, saves a copy of the data and fitted values} 58 | } 59 | \value{ 60 | A \code{list} of control parameters for the adaptive SGD algorithm with block-wise sub-sampling 61 | } 62 | \description{ 63 | Check if the input control parameters are allowed and set them to default 64 | values if they are not. Returns a list of well-defined control parameters. 65 | } 66 | \examples{ 67 | library(sgdGMF) 68 | 69 | # Empty call 70 | set.control.block.sgd() 71 | 72 | # Parametrized call 73 | set.control.block.sgd(maxiter = 2000, rate0 = 0.01, decay = 0.01) 74 | 75 | 76 | } 77 | -------------------------------------------------------------------------------- /man/set.control.coord.sgd.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/control.R 3 | \name{set.control.coord.sgd} 4 | \alias{set.control.coord.sgd} 5 | \title{Check and set the control parameters for the coordinate-SGD algorithm} 6 | \usage{ 7 | set.control.coord.sgd( 8 | normalize = TRUE, 9 | maxiter = 1000, 10 | eps = 1e-08, 11 | nafill = 10, 12 | tol = 1e-08, 13 | size = c(100, 100), 14 | burn = 1, 15 | rate0 = 0.01, 16 | decay = 0.01, 17 | damping = 0.001, 18 | rate1 = 0.1, 19 | rate2 = 0.01, 20 | verbose = FALSE, 21 | frequency = 250, 22 | progress = FALSE, 23 | savedata = TRUE 24 | ) 25 | } 26 | \arguments{ 27 | \item{normalize}{if \code{TRUE}, normalize \code{U} and \code{V} to uncorrelated Gaussian \code{U} and upper triangular \code{V} with positive diagonal} 28 | 29 | \item{maxiter}{maximum number of iterations} 30 | 31 | \item{eps}{how much shrinkage has to be introduced on extreme predictions lying outside of the data range} 32 | 33 | \item{nafill}{how frequently the \code{NA} values are filled, by default \code{NA} values are filled at each iteration of the algorithm} 34 | 35 | \item{tol}{tolerance threshold for the stopping criterion} 36 | 37 | \item{size}{mini-batch size, the first value is for row sub-sample, the second value is for column sub-sample} 38 | 39 | \item{burn}{percentage of iterations to ignore before performing Polyak averaging} 40 | 41 | \item{rate0}{initial learning rate} 42 | 43 | \item{decay}{learning rate decay} 44 | 45 | \item{damping}{regularization parameter which is added to the Hessian to ensure numerical stability} 46 | 47 | \item{rate1}{exponential decay rate for the moment estimate of the gradient} 48 | 49 | \item{rate2}{exponential decay rate for the moment estimate of the Hessian} 50 | 51 | \item{verbose}{if \code{TRUE}, print the optimization status} 52 | 53 | \item{frequency}{how often the optimization status is printed (only if \code{verbose=TRUE})} 54 | 55 | \item{progress}{if \code{TRUE}, print a compact progress-bar instead of a full-report of the optimization status (only if \code{verbose=TRUE})} 56 | 57 | \item{savedata}{if \code{TRUE}, saves a copy of the data and fitted values} 58 | } 59 | \value{ 60 | A \code{list} of control parameters for the adaptive SGD algorithm with coordinate-wise sub-sampling 61 | } 62 | \description{ 63 | Check if the input control parameters are allowed and set them to default 64 | values if they are not. Returns a list of well-defined control parameters. 65 | } 66 | \examples{ 67 | library(sgdGMF) 68 | 69 | # Empty call 70 | set.control.coord.sgd() 71 | 72 | # Parametrized call 73 | set.control.coord.sgd(maxiter = 2000, rate0 = 0.01, decay = 0.01) 74 | 75 | } 76 | -------------------------------------------------------------------------------- /tests/testthat/test-fit.R: -------------------------------------------------------------------------------- 1 | # file: test-fit.R 2 | # author: Cristian Castiglione 3 | # creation: 05/02/2024 4 | # last change: 04/10/2024 5 | 6 | testthat::test_that("GMF fit", { 7 | n = 100; m = 20; d = 5 8 | 9 | # Generate data using Poisson, Binomial and Gamma models 10 | data_pois = sim.gmf.data(n = n, m = m, ncomp = d, family = poisson()) 11 | data_bin = sim.gmf.data(n = n, m = m, ncomp = d, family = binomial()) 12 | data_gam = sim.gmf.data(n = n, m = m, ncomp = d, family = Gamma(link = "log"), dispersion = 0.25) 13 | 14 | # Initialize the GMF parameters assuming 3 latent factors 15 | gmf_pois = sgdgmf.fit(data_pois$Y, ncomp = 3, family = poisson()) 16 | gmf_bin = sgdgmf.fit(data_bin$Y, ncomp = 3, family = binomial()) 17 | gmf_gam = sgdgmf.fit(data_gam$Y, ncomp = 3, family = Gamma(link = "log")) 18 | 19 | # Output class 20 | testthat::expect_true(is.list(gmf_pois)) 21 | testthat::expect_true(is.list(gmf_bin)) 22 | testthat::expect_true(is.list(gmf_gam)) 23 | 24 | testthat::expect_s3_class(gmf_pois, "sgdgmf") 25 | testthat::expect_s3_class(gmf_bin, "sgdgmf") 26 | testthat::expect_s3_class(gmf_gam, "sgdgmf") 27 | 28 | # Sub-output checks 29 | testthat::expect_true(is.matrix(gmf_pois$U) && is.numeric(gmf_pois$U)) 30 | testthat::expect_true(is.matrix(gmf_pois$V) && is.numeric(gmf_pois$V)) 31 | testthat::expect_true(is.matrix(gmf_pois$A) && is.numeric(gmf_pois$A)) 32 | testthat::expect_true(is.matrix(gmf_pois$B) && is.numeric(gmf_pois$B)) 33 | testthat::expect_true(is.matrix(gmf_pois$eta) && is.numeric(gmf_pois$eta)) 34 | testthat::expect_true(is.matrix(gmf_pois$mu) && is.numeric(gmf_pois$mu)) 35 | testthat::expect_true(all(gmf_pois$mu >= 0)) 36 | 37 | testthat::expect_true(is.matrix(gmf_bin$U) && is.numeric(gmf_bin$U)) 38 | testthat::expect_true(is.matrix(gmf_bin$V) && is.numeric(gmf_bin$V)) 39 | testthat::expect_true(is.matrix(gmf_bin$A) && is.numeric(gmf_bin$A)) 40 | testthat::expect_true(is.matrix(gmf_bin$B) && is.numeric(gmf_bin$B)) 41 | testthat::expect_true(is.matrix(gmf_bin$eta) && is.numeric(gmf_bin$eta)) 42 | testthat::expect_true(is.matrix(gmf_bin$mu) && is.numeric(gmf_bin$mu)) 43 | testthat::expect_true(all(gmf_bin$mu >= 0 & gmf_bin$mu <= 1)) 44 | 45 | testthat::expect_true(is.matrix(gmf_gam$U) && is.numeric(gmf_gam$U)) 46 | testthat::expect_true(is.matrix(gmf_gam$V) && is.numeric(gmf_gam$V)) 47 | testthat::expect_true(is.matrix(gmf_gam$A) && is.numeric(gmf_gam$A)) 48 | testthat::expect_true(is.matrix(gmf_gam$B) && is.numeric(gmf_gam$B)) 49 | testthat::expect_true(is.matrix(gmf_gam$eta) && is.numeric(gmf_gam$eta)) 50 | testthat::expect_true(is.matrix(gmf_gam$mu) && is.numeric(gmf_gam$mu)) 51 | testthat::expect_true(all(gmf_gam$mu >= 0)) 52 | }) 53 | -------------------------------------------------------------------------------- /man/sgdgmf.cv.step.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/crossval.R 3 | \name{sgdgmf.cv.step} 4 | \alias{sgdgmf.cv.step} 5 | \title{Single step of cross-validation for generalized matrix factorization models} 6 | \usage{ 7 | sgdgmf.cv.step( 8 | train, 9 | test, 10 | X, 11 | Z, 12 | family, 13 | ncomp, 14 | maxcomp, 15 | fold, 16 | nfolds, 17 | weights, 18 | offset, 19 | method, 20 | sampling, 21 | penalty, 22 | control.init, 23 | control.alg, 24 | control.cv 25 | ) 26 | } 27 | \arguments{ 28 | \item{train}{train-set matrix of responses (\eqn{n \times m})} 29 | 30 | \item{test}{test-set matrix of responses (\eqn{n \times m})} 31 | 32 | \item{X}{matrix of row fixed effects (\eqn{n \times p})} 33 | 34 | \item{Z}{matrix of column fixed effects (\eqn{q \times m})} 35 | 36 | \item{family}{a \code{glm} family (see \code{\link{family}} for more details)} 37 | 38 | \item{ncomp}{ranks of the latent matrix factorization used in cross-validation (default 1 to 10)} 39 | 40 | \item{maxcomp}{maximum rank allowed in the cross-validation exploration} 41 | 42 | \item{fold}{integer number identifying the current fold} 43 | 44 | \item{nfolds}{maximum number of folds in the cross-validation} 45 | 46 | \item{weights}{an optional matrix of weights (\eqn{n \times m})} 47 | 48 | \item{offset}{an optional matrix of offset values (\eqn{n \times m}), that specify a known component to be included in the linear predictor.} 49 | 50 | \item{method}{estimation method to minimize the negative penalized log-likelihood} 51 | 52 | \item{sampling}{sub-sampling strategy to use if \code{method = "sgd"}} 53 | 54 | \item{penalty}{list of penalty parameters (see \code{\link{set.penalty}} for more details)} 55 | 56 | \item{control.init}{list of control parameters for the initialization (see \code{\link{set.control.init}} for more details)} 57 | 58 | \item{control.alg}{list of control parameters for the optimization (see \code{\link{set.control.alg}} for more details)} 59 | 60 | \item{control.cv}{list of control parameters for the cross-validation (see \code{\link{set.control.cv}} for more details)} 61 | } 62 | \value{ 63 | Returns a \code{data.frame} containing the current number of latent factors 64 | in the model (\code{ncomp}), the fold identifier (\code{fold}), the degrees of 65 | freedom, i.e. the number of parameters, of the model (\code{df}), the AIC, BIC 66 | and deviance (respectively, \code{aic}, \code{bic}, \code{dev}) 67 | calculated on the train and test sets. 68 | } 69 | \description{ 70 | Internal function running a single step of cross-validation for generalized 71 | matrix factorization (GMF) models and calculating some goodness-of-fit measures 72 | on the train and test sets. 73 | } 74 | \keyword{internal} 75 | -------------------------------------------------------------------------------- /man/sim.gmf.data.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{sim.gmf.data} 4 | \alias{sim.gmf.data} 5 | \title{Simulate non-Gaussian data from a GMF model} 6 | \usage{ 7 | sim.gmf.data(n = 100, m = 20, ncomp = 5, family = gaussian(), dispersion = 1) 8 | } 9 | \arguments{ 10 | \item{n}{number of observations} 11 | 12 | \item{m}{number of variables} 13 | 14 | \item{ncomp}{rank of the latent matrix factorization} 15 | 16 | \item{family}{a \code{glm} family (see \code{\link{family}} for more details)} 17 | 18 | \item{dispersion}{a positive dispersion parameter} 19 | } 20 | \value{ 21 | A list containing the following objects: 22 | \itemize{ 23 | \item \code{Y}: simulated response matrix 24 | \item \code{U}: simulated factor matrix 25 | \item \code{V}: simulated loading matrix 26 | \item \code{eta}: linear predictor matrix 27 | \item \code{mu}: conditional mean matrix 28 | \item \code{phi}: scalar dispersion parameter 29 | \item \code{family}: model family 30 | \item \code{ncomp}: rank of the latent matrix factorization 31 | \item \code{param}: a list containing time, phase, frequency and amplitude vectors used to generate \code{U} 32 | } 33 | } 34 | \description{ 35 | Simulate synthetic non-Gaussian data from a generalized matrix factorization (GMF) model. 36 | } 37 | \details{ 38 | The loadings, \code{V}, are independently sampled from a standard normal distribution. 39 | The scores, \code{U}, are simulated according to sinusoidal signals evaluated at different 40 | phases, frequencies and amplitudes. These parameters are randomly sampled from independent 41 | uniform distributions. 42 | } 43 | \examples{ 44 | library(sgdGMF) 45 | 46 | # Set the data dimensions 47 | n = 100; m = 20; d = 5 48 | 49 | # Generate data using Poisson, Binomial and Gamma models 50 | data_pois = sim.gmf.data(n = n, m = m, ncomp = d, family = poisson()) 51 | data_bin = sim.gmf.data(n = n, m = m, ncomp = d, family = binomial()) 52 | data_gam = sim.gmf.data(n = n, m = m, ncomp = d, family = Gamma(link = "log"), dispersion = 0.25) 53 | 54 | # Compare the results 55 | oldpar = par(no.readonly = TRUE) 56 | par(mfrow = c(3,3), mar = c(1,1,3,1)) 57 | image(data_pois$Y, axes = FALSE, main = expression(Y[Pois])) 58 | image(data_pois$mu, axes = FALSE, main = expression(mu[Pois])) 59 | image(data_pois$U, axes = FALSE, main = expression(U[Pois])) 60 | image(data_bin$Y, axes = FALSE, main = expression(Y[Bin])) 61 | image(data_bin$mu, axes = FALSE, main = expression(mu[Bin])) 62 | image(data_bin$U, axes = FALSE, main = expression(U[Bin])) 63 | image(data_gam$Y, axes = FALSE, main = expression(Y[Gam])) 64 | image(data_gam$mu, axes = FALSE, main = expression(mu[Gam])) 65 | image(data_gam$U, axes = FALSE, main = expression(U[Gam])) 66 | par(oldpar) 67 | 68 | } 69 | -------------------------------------------------------------------------------- /man/predict.sgdgmf.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sgdGMF-class.R 3 | \name{predict.sgdgmf} 4 | \alias{predict.sgdgmf} 5 | \title{Predict method for GMF models} 6 | \usage{ 7 | \method{predict}{sgdgmf}( 8 | object, 9 | ..., 10 | newY = NULL, 11 | newX = NULL, 12 | type = c("link", "response", "terms", "coef"), 13 | parallel = FALSE, 14 | nthreads = 1 15 | ) 16 | } 17 | \arguments{ 18 | \item{object}{an object of class \code{sgdgmf}} 19 | 20 | \item{...}{further arguments passed to or from other methods} 21 | 22 | \item{newY}{optionally, a matrix of new response variable} 23 | 24 | \item{newX}{optionally, a matrix of new covariate values} 25 | 26 | \item{type}{the type of prediction which should be returned} 27 | 28 | \item{parallel}{if \code{TRUE}, allows for parallel computing using the package \code{foreach}} 29 | 30 | \item{nthreads}{number of cores to be used in parallel (only if \code{parallel=TRUE})} 31 | } 32 | \value{ 33 | If \code{type="link"} or \code{typr="response"}, a matrix of predictions. 34 | If \code{type="terms"}, a list of matrices containing the fields \code{XB}, \code{AZ} and \code{UV}. 35 | If \code{type="coef"}, a list of matrices containing the field \code{B}, \code{A}, \code{U} and \code{V}. 36 | } 37 | \description{ 38 | Computes the predictions of a GMF model. Out-of-sample predictions for a new 39 | set of responses and covariates are computed via MLE, by keeping fixed the values 40 | of the estimated \code{B} and \code{V} and maximizing the likelihood with respect 41 | to \code{A} and \code{U}. 42 | } 43 | \details{ 44 | If \code{newY} and \code{newX} are omitted, the predictions are based on the data 45 | used for the fit. In that case, the predictions corresponds to the fitted values. 46 | If \code{newY} and \code{newX} are provided, a corresponding set of \code{A} and 47 | \code{U} are estimated via maximum likelihood using the \code{glm.fit} function. 48 | By doing so, \code{B} and \code{V} are kept fixed. 49 | } 50 | \examples{ 51 | # Load the sgdGMF package 52 | library(sgdGMF) 53 | 54 | # Generate data from a Poisson model 55 | data = sim.gmf.data(n = 120, m = 20, ncomp = 5, family = poisson()) 56 | train = sample(1:120, size = 100) 57 | test = setdiff(1:120, train) 58 | 59 | Y = data$Y[train, ] 60 | newY = data$Y[test, ] 61 | 62 | # Fit a GMF model with 3 latent factors 63 | gmf = sgdgmf.fit(Y, ncomp = 3, family = poisson()) 64 | 65 | # Get the fitted values of a GMF model 66 | str(predict(gmf)) # returns the overall fitted values in link scale 67 | str(predict(gmf, type = "response")) # returns the overall fitted values in response scale 68 | str(predict(gmf, type = "terms")) # returns the partial fitted values in link scale 69 | str(predict(gmf, newY = newY)) # returns the predictions for the new set of responses 70 | 71 | } 72 | -------------------------------------------------------------------------------- /tests/testcpp/test-misc.cpp: -------------------------------------------------------------------------------- 1 | // test-misc.cpp 2 | // author: Cristian Castiglione 3 | // creation: 01/10/2023 4 | // last change: 13/10/2023 5 | 6 | #include "misc.h" 7 | 8 | using namespace glm; 9 | 10 | void cpp_print_link_family (const std::unique_ptr & family) { 11 | Rcpp::Rcout << "Family: " << family->getfamily() << "\n"; 12 | Rcpp::Rcout << "Link: " << family->getlink() << "\n"; 13 | Rcpp::Rcout << "Mu: " << arma::vec{0.25, 0.5, 0.75} << "\n"; 14 | Rcpp::Rcout << "Eta: " << family->linkfun(arma::vec{0.25, 0.5, 0.75}) << "\n"; 15 | } 16 | 17 | //' @keywords internal 18 | // [[Rcpp::export("cpp.make.link.family")]] 19 | void cpp_make_link_family (const std::string & familyname, const std::string & linkname) { 20 | std::unique_ptr family = make_family(familyname, linkname); 21 | cpp_print_link_family(family); 22 | } 23 | 24 | //' @keywords internal 25 | // [[Rcpp::export("cpp.get.data.bounds")]] 26 | Rcpp::List cpp_get_data_bounds ( 27 | const double & eps, const double & ymin, const double & ymax, 28 | const std::string & familyname, const std::string & linkname 29 | ) { 30 | std::unique_ptr family = make_family(familyname, linkname); 31 | 32 | double mulo, muup, etalo, etaup; 33 | set_data_bounds(mulo, muup, etalo, etaup, eps, ymin, ymax, family); 34 | 35 | Rcpp::List out; 36 | out["family"] = family->getfamily(); 37 | out["link"] = family->getlink(); 38 | out["ylim"] = arma::vec{ymin, ymax}; 39 | out["mulim"] = arma::vec{mulo, muup}; 40 | out["etalim"] = arma::vec{etalo, etaup}; 41 | 42 | return out; 43 | } 44 | 45 | //' @keywords internal 46 | // [[Rcpp::export("cpp.get.uv.penalty")]] 47 | Rcpp::List cpp_get_uv_penalty ( 48 | const arma::vec & pen, 49 | const int & p, const int & q, const int & d 50 | ) { 51 | arma::vec penu(p+q+d), penv(p+q+d); 52 | set_uv_penalty(penu, penv, pen, p, q, d); 53 | 54 | Rcpp::List out; 55 | out["penu"] = penu; 56 | out["penv"] = penv; 57 | 58 | return out; 59 | } 60 | 61 | //' @keywords internal 62 | // [[Rcpp::export("cpp.get.uv.indices")]] 63 | Rcpp::List cpp_get_uv_indices ( 64 | const int & p, const int & q, const int & d 65 | ) { 66 | arma::uvec idu, idv; 67 | set_uv_indices(idu, idv, p, q, d); 68 | 69 | Rcpp::List out; 70 | out["idu"] = idu; 71 | out["idv"] = idv; 72 | 73 | return out; 74 | } 75 | 76 | //' @keywords internal 77 | // [[Rcpp::export("cpp.sample.minibatch")]] 78 | std::list cpp_sample_minibatch ( 79 | const int & n, const int & size, const bool & randomize 80 | ) { 81 | return sample_chunks(n, size, randomize); 82 | } 83 | 84 | //' @keywords internal 85 | // [[Rcpp::export("cpp.select.minibatch")]] 86 | int cpp_select_minibatch (const int & iter, const int & nchunks) { 87 | return select_chunk(iter, nchunks); 88 | } -------------------------------------------------------------------------------- /src/variance.cpp: -------------------------------------------------------------------------------- 1 | // variance.cpp 2 | // author: Cristian Castiglione 3 | // creation: 08/11/2023 4 | // last change: 21/11/2024 5 | 6 | #include "variance.h" 7 | 8 | using namespace glm; 9 | 10 | // Constant variance 11 | bool Constant::validmu (const arma::mat & mu) {return true;} 12 | arma::mat Constant::initfun (const arma::mat & y) {return y;} 13 | arma::mat Constant::varfun (const arma::mat & mu, const double & phi) {return arma::ones(arma::size(mu));} 14 | arma::mat Constant::devfun (const arma::mat & y, const arma::mat & mu, const double & phi) { 15 | return arma::square(y - mu); 16 | } 17 | 18 | // Linear variance 19 | bool Linear::validmu (const arma::mat & mu) {return utils::all(mu > 0);} 20 | arma::mat Linear::initfun (const arma::mat & y) {return arma::clamp(y, 0.1, infty);} 21 | arma::mat Linear::varfun (const arma::mat & mu, const double & phi) {return mu;} 22 | arma::mat Linear::devfun (const arma::mat & y, const arma::mat & mu, const double & phi) { 23 | return 2 * (utils::xlogx(y) - y % arma::log(mu) - (y - mu)); 24 | } 25 | 26 | // Squared variance 27 | bool Squared::validmu (const arma::mat & mu) {return utils::all(mu > 0);} 28 | arma::mat Squared::initfun (const arma::mat & y) {return arma::clamp(y, 0.1, infty);} 29 | arma::mat Squared::varfun (const arma::mat & mu, const double & phi) {return mu % mu;} 30 | arma::mat Squared::devfun (const arma::mat & y, const arma::mat & mu, const double & phi) { 31 | return - 2 * (arma::log(y / mu) - (y - mu) / mu); 32 | } 33 | 34 | // Cubic variance 35 | bool Cubic::validmu (const arma::mat & mu) {return utils::all(mu > 0);} 36 | arma::mat Cubic::initfun (const arma::mat & y) {return arma::clamp(y, 0.1, infty);} 37 | arma::mat Cubic::varfun (const arma::mat & mu, const double & phi) {return mu % mu % mu;} 38 | arma::mat Cubic::devfun (const arma::mat & y, const arma::mat & mu, const double & phi) { 39 | return arma::square(y - mu) / (y % mu % mu); 40 | } 41 | 42 | // cSquared variance 43 | bool cSquared::validmu (const arma::mat & mu) {return utils::all(mu > 0) && utils::all(mu < 1);} 44 | arma::mat cSquared::initfun (const arma::mat & y) {return 0.90 * (y - 0.5) + 0.5;} 45 | arma::mat cSquared::varfun (const arma::mat & mu, const double & phi) {return mu % (1 - mu);} 46 | arma::mat cSquared::devfun (const arma::mat & y, const arma::mat & mu, const double & phi) { 47 | return - 2 * (y % arma::log(mu) + (1 - y) % arma::log1p(-mu)); 48 | } 49 | 50 | // Negative-Binomial variance 51 | bool NBVariance::validmu (const arma::mat & mu) {return utils::all(mu > 0);} 52 | arma::mat NBVariance::initfun (const arma::mat & y) {return arma::clamp(y, 0.1, infty);} 53 | arma::mat NBVariance::varfun (const arma::mat & mu, const double & phi) {return mu % (1 + mu / phi);} 54 | arma::mat NBVariance::devfun (const arma::mat & y, const arma::mat & mu, const double & phi) { 55 | return 2 * (utils::xlogx(y) - y % arma::log(mu) - (y + phi) % (arma::log(y + phi) - arma::log(mu + phi))); 56 | } 57 | -------------------------------------------------------------------------------- /tests/testthat/test-control.R: -------------------------------------------------------------------------------- 1 | # file: test-control.R 2 | # author: Cristian Castiglione 3 | # creation: 05/02/2024 4 | # last change: 04/10/2024 5 | 6 | testthat::test_that("Set AIRWLS control parameters", { 7 | # Empty call 8 | testthat::expect_true(is.list(set.control.airwls())) 9 | testthat::expect_true(is.list(set.control.airwls(maxiter = 200, stepsize = 0.5))) 10 | # Wrongly parametrized call I: right parameter, but wrong value 11 | testthat::expect_warning(set.control.airwls(stepsize = -1)) 12 | testthat::expect_warning(set.control.airwls(stepsize = TRUE)) 13 | # Wrongly parametrixed call II: inexistent parameter 14 | testthat::expect_error(set.control.airwls(foo = TRUE)) 15 | }) 16 | 17 | testthat::test_that("Set Newton control parameters", { 18 | # Empty call 19 | testthat::expect_true(is.list(set.control.newton())) 20 | testthat::expect_true(is.list(set.control.newton(maxiter = 200, stepsize = 0.5))) 21 | # Wrongly parametrized call I: right parameter, but wrong value 22 | testthat::expect_warning(set.control.newton(stepsize = -1)) 23 | testthat::expect_warning(set.control.newton(stepsize = TRUE)) 24 | # Wrongly parametrixed call II: inexistent parameter 25 | testthat::expect_error(set.control.newton(foo = TRUE)) 26 | }) 27 | 28 | testthat::test_that("Set C-SGD control parameters", { 29 | # Empty call 30 | testthat::expect_true(is.list(set.control.coord.sgd())) 31 | testthat::expect_true(is.list(set.control.coord.sgd(maxiter = 500, rate0 = 0.5))) 32 | # Wrongly parametrized call I: right parameter, but wrong value 33 | testthat::expect_warning(set.control.coord.sgd(rate0 = -1)) 34 | testthat::expect_warning(set.control.coord.sgd(rate0 = TRUE)) 35 | # Wrongly parametrixed call II: inexistent parameter 36 | testthat::expect_error(set.control.coord.sgd(foo = TRUE)) 37 | }) 38 | 39 | testthat::test_that("Set B-SGD control parameters", { 40 | # Empty call 41 | testthat::expect_true(is.list(set.control.block.sgd())) 42 | testthat::expect_true(is.list(set.control.block.sgd(maxiter = 500, rate0 = 0.5))) 43 | # Wrongly parametrized call I: right parameter, but wrong value 44 | testthat::expect_warning(set.control.block.sgd(rate0 = -1)) 45 | testthat::expect_warning(set.control.block.sgd(rate0 = TRUE)) 46 | # Wrongly parametrixed call II: inexistent parameter 47 | testthat::expect_error(set.control.block.sgd(foo = TRUE)) 48 | }) 49 | 50 | testthat::test_that("Set generic control parameters", { 51 | ctr.airwls = set.control.alg(method = "airwls", control = list()) 52 | ctr.newton = set.control.alg(method = "newton", control = list()) 53 | ctr.csgd = set.control.alg(method = "sgd", sampling = "coord", control = list()) 54 | ctr.bsgd = set.control.alg(method = "sgd", sampling = "block", control = list()) 55 | 56 | testthat::expect_true(is.list(ctr.airwls)) 57 | testthat::expect_true(is.list(ctr.newton)) 58 | testthat::expect_true(is.list(ctr.csgd)) 59 | testthat::expect_true(is.list(ctr.bsgd)) 60 | }) 61 | -------------------------------------------------------------------------------- /src/variance.h: -------------------------------------------------------------------------------- 1 | // variance.h 2 | // author: Cristian Castiglione 3 | // creation: 08/11/2023 4 | // last change: 21/11/2024 5 | 6 | #ifndef VARIANCE_H 7 | #define VARIANCE_H 8 | 9 | #include 10 | #include "utils.h" 11 | 12 | namespace glm { 13 | 14 | class Variance { 15 | public: 16 | std::string varf = "Variance"; 17 | virtual bool validmu (const arma::mat & mu) = 0; 18 | virtual arma::mat varfun (const arma::mat & mu, const double & phi) = 0; 19 | virtual arma::mat initfun (const arma::mat & y) = 0; 20 | virtual arma::mat devfun (const arma::mat & y, const arma::mat & mu, const double & phi) = 0; 21 | virtual ~Variance () {} 22 | }; 23 | 24 | class Constant : public Variance { 25 | public: 26 | bool validmu (const arma::mat & mu); 27 | arma::mat initfun (const arma::mat & y); 28 | arma::mat varfun (const arma::mat & mu, const double & phi); 29 | arma::mat devfun (const arma::mat & y, const arma::mat & mu, const double & phi); 30 | Constant () {this->varf = "const";} 31 | }; 32 | 33 | class Linear : public Variance { 34 | public: 35 | bool validmu (const arma::mat & mu); 36 | arma::mat initfun (const arma::mat & y); 37 | arma::mat varfun (const arma::mat & mu, const double & phi); 38 | arma::mat devfun (const arma::mat & y, const arma::mat & mu, const double & phi); 39 | Linear () {this->varf = "mu";} 40 | }; 41 | 42 | class Squared : public Variance { 43 | public: 44 | bool validmu (const arma::mat & mu); 45 | arma::mat initfun (const arma::mat & y); 46 | arma::mat varfun (const arma::mat & mu, const double & phi); 47 | arma::mat devfun (const arma::mat & y, const arma::mat & mu, const double & phi); 48 | Squared () {this->varf = "mu^2";} 49 | }; 50 | 51 | class Cubic : public Variance { 52 | public: 53 | bool validmu (const arma::mat & mu); 54 | arma::mat initfun (const arma::mat & y); 55 | arma::mat varfun (const arma::mat & mu, const double & phi); 56 | arma::mat devfun (const arma::mat & y, const arma::mat & mu, const double & phi); 57 | Cubic () {this->varf = "mu^3";} 58 | }; 59 | 60 | class cSquared : public Variance { 61 | public: 62 | bool validmu (const arma::mat & mu); 63 | arma::mat initfun (const arma::mat & y); 64 | arma::mat varfun (const arma::mat & mu, const double & phi); 65 | arma::mat devfun (const arma::mat & y, const arma::mat & mu, const double & phi); 66 | cSquared () {this->varf = "mu(1-mu)";} 67 | }; 68 | 69 | class NBVariance : public Variance { 70 | public: 71 | bool validmu (const arma::mat & mu); 72 | arma::mat initfun (const arma::mat & y); 73 | arma::mat varfun (const arma::mat & mu, const double & phi); 74 | arma::mat devfun (const arma::mat & y, const arma::mat & mu, const double & phi); 75 | NBVariance () {this->varf = "mu(1+t*mu)";} 76 | }; 77 | 78 | } 79 | 80 | #endif -------------------------------------------------------------------------------- /man/cpp.fit.block.sgd.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/RcppExports.R 3 | \name{cpp.fit.block.sgd} 4 | \alias{cpp.fit.block.sgd} 5 | \title{Fit a GMF model using the adaptive SGD with block-wise minibatch subsampling} 6 | \usage{ 7 | cpp.fit.block.sgd( 8 | Y, 9 | X, 10 | B, 11 | A, 12 | Z, 13 | U, 14 | V, 15 | O, 16 | W, 17 | familyname, 18 | linkname, 19 | varfname, 20 | ncomp, 21 | lambda, 22 | maxiter = 1000L, 23 | eps = 0.01, 24 | nafill = 10L, 25 | tol = 1e-08, 26 | size1 = 100L, 27 | size2 = 100L, 28 | burn = 0.75, 29 | rate0 = 0.01, 30 | decay = 0.01, 31 | damping = 0.001, 32 | rate1 = 0.95, 33 | rate2 = 0.99, 34 | parallel = FALSE, 35 | nthreads = 1L, 36 | verbose = TRUE, 37 | frequency = 250L, 38 | progress = FALSE 39 | ) 40 | } 41 | \arguments{ 42 | \item{Y}{matrix of responses (\eqn{n \times m})} 43 | 44 | \item{X}{matrix of row fixed effects (\eqn{n \times p})} 45 | 46 | \item{B}{initial row-effect matrix (\eqn{n \times p})} 47 | 48 | \item{A}{initial column-effect matrix (\eqn{n \times q})} 49 | 50 | \item{Z}{matrix of column fixed effects (\eqn{m \times q})} 51 | 52 | \item{U}{initial factor matrix (\eqn{n \times d})} 53 | 54 | \item{V}{initial loading matrix (\eqn{m \times d})} 55 | 56 | \item{O}{matrix of constant offset (\eqn{n \times m})} 57 | 58 | \item{W}{matrix of constant weights (\eqn{n \times m})} 59 | 60 | \item{familyname}{a \code{glm} model family name} 61 | 62 | \item{linkname}{a \code{glm} link function name} 63 | 64 | \item{varfname}{variance function name} 65 | 66 | \item{ncomp}{rank of the latent matrix factorization} 67 | 68 | \item{lambda}{penalization parameters} 69 | 70 | \item{maxiter}{maximum number of iterations} 71 | 72 | \item{eps}{shrinkage factor for extreme predictions} 73 | 74 | \item{nafill}{how often the missing values are updated} 75 | 76 | \item{tol}{tolerance threshold for the stopping criterion} 77 | 78 | \item{size1}{row-minibatch dimension} 79 | 80 | \item{size2}{column-minibatch dimension} 81 | 82 | \item{burn}{burn-in period in which the learning late is not decreased} 83 | 84 | \item{rate0}{initial learning rate} 85 | 86 | \item{decay}{decay rate of the learning rate} 87 | 88 | \item{damping}{diagonal dumping factor for the Hessian matrix} 89 | 90 | \item{rate1}{decay rate of the first moment estimate of the gradient} 91 | 92 | \item{rate2}{decay rate of the second moment estimate of the gradient} 93 | 94 | \item{parallel}{if \code{TRUE}, allows for parallel computing} 95 | 96 | \item{nthreads}{number of cores to be used in parallel} 97 | 98 | \item{verbose}{if \code{TRUE}, print the optimization status} 99 | 100 | \item{frequency}{how often the optimization status is printed} 101 | 102 | \item{progress}{if \code{TRUE}, print an progress bar} 103 | } 104 | \description{ 105 | Fit a GMF model using the adaptive SGD with block-wise minibatch subsampling 106 | } 107 | \keyword{internal} 108 | -------------------------------------------------------------------------------- /man/residuals.initgmf.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/initGMF-class.R 3 | \name{residuals.initgmf} 4 | \alias{residuals.initgmf} 5 | \alias{resid.initgmf} 6 | \title{Extract the residuals of an initialized GMF model} 7 | \usage{ 8 | \method{residuals}{initgmf}( 9 | object, 10 | ..., 11 | type = c("deviance", "pearson", "working", "response", "link"), 12 | partial = FALSE, 13 | normalize = FALSE, 14 | fillna = FALSE, 15 | spectrum = FALSE, 16 | ncomp = 50 17 | ) 18 | 19 | \method{resid}{initgmf}( 20 | object, 21 | ..., 22 | type = c("deviance", "pearson", "working", "response", "link"), 23 | partial = FALSE, 24 | normalize = FALSE, 25 | fillna = FALSE, 26 | spectrum = FALSE, 27 | ncomp = 50 28 | ) 29 | } 30 | \arguments{ 31 | \item{object}{an object of class \code{initgmf}} 32 | 33 | \item{...}{further arguments passed to or from other methods} 34 | 35 | \item{type}{the type of residuals which should be returned} 36 | 37 | \item{partial}{if \code{TRUE}, computes the residuals excluding the matrix factorization from the linear predictor} 38 | 39 | \item{normalize}{if \code{TRUE}, standardize the residuals column-by-column} 40 | 41 | \item{fillna}{if \code{TRUE}, fills \code{NA} values column-by-column} 42 | 43 | \item{spectrum}{if \code{TRUE}, returns the eigenvalues of the residual covariance matrix} 44 | 45 | \item{ncomp}{number of eigenvalues to be calculated (only if \code{spectrum=TRUE})} 46 | } 47 | \value{ 48 | If \code{spectrum=FALSE}, a matrix containing the selected residuals. 49 | If \code{spectrum=TRUE}, a list containing the residuals (\code{res}), the first \code{ncomp} 50 | eigenvalues of the residual covariance matrix, say (\code{lambdas}), the variance explained by the first 51 | \code{ncomp} principal component of the residuals (\code{explained.var}), the variance not 52 | explained by the first \code{ncomp} principal component of the residuals (\code{residual.var}), 53 | the total variance of the residuals (\code{total.var}). 54 | } 55 | \description{ 56 | Extract the residuals of an initialized GMF model and, if required, compute 57 | the eigenvalues of the residuals covariance/correlation matrix. 58 | Moreover, if required, return the partial residual of the model obtained by 59 | excluding the matrix decomposition from the linear predictor. 60 | } 61 | \examples{ 62 | # Load the sgdGMF package 63 | library(sgdGMF) 64 | 65 | # Generate data from a Poisson model 66 | data = sim.gmf.data(n = 100, m = 20, ncomp = 5, family = poisson()) 67 | 68 | # Fit a GMF model with 3 latent factors 69 | init = sgdgmf.init(data$Y, ncomp = 3, family = poisson()) 70 | 71 | # Get the deviance residuals of a GMF model 72 | str(residuals(init)) # returns the overall deviance residuals 73 | str(residuals(init, partial = TRUE)) # returns the partial residuals 74 | str(residuals(init, spectrum = TRUE)) # returns the eigenvalues of the residual var-cov matrix 75 | 76 | } 77 | \seealso{ 78 | \code{\link{residuals.sgdgmf}} and \code{\link{resid.sgdgmf}} for more details on the residual computation. 79 | } 80 | -------------------------------------------------------------------------------- /man/cpp.fit.coord.sgd.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/RcppExports.R 3 | \name{cpp.fit.coord.sgd} 4 | \alias{cpp.fit.coord.sgd} 5 | \title{Fit a GMF model using the adaptive SGD with coordinate-wise minibatch subsampling algorithm} 6 | \usage{ 7 | cpp.fit.coord.sgd( 8 | Y, 9 | X, 10 | B, 11 | A, 12 | Z, 13 | U, 14 | V, 15 | O, 16 | W, 17 | familyname, 18 | linkname, 19 | varfname, 20 | ncomp, 21 | lambda, 22 | maxiter = 1000L, 23 | eps = 0.01, 24 | nafill = 10L, 25 | tol = 1e-08, 26 | size1 = 100L, 27 | size2 = 100L, 28 | burn = 0.75, 29 | rate0 = 0.01, 30 | decay = 0.01, 31 | damping = 0.001, 32 | rate1 = 0.95, 33 | rate2 = 0.99, 34 | parallel = FALSE, 35 | nthreads = 1L, 36 | verbose = TRUE, 37 | frequency = 250L, 38 | progress = FALSE 39 | ) 40 | } 41 | \arguments{ 42 | \item{Y}{matrix of responses (\eqn{n \times m})} 43 | 44 | \item{X}{matrix of row fixed effects (\eqn{n \times p})} 45 | 46 | \item{B}{initial row-effect matrix (\eqn{n \times p})} 47 | 48 | \item{A}{initial column-effect matrix (\eqn{n \times q})} 49 | 50 | \item{Z}{matrix of column fixed effects (\eqn{m \times q})} 51 | 52 | \item{U}{initial factor matrix (\eqn{n \times d})} 53 | 54 | \item{V}{initial loading matrix (\eqn{m \times d})} 55 | 56 | \item{O}{matrix of constant offset (\eqn{n \times m})} 57 | 58 | \item{W}{matrix of constant weights (\eqn{n \times m})} 59 | 60 | \item{familyname}{a \code{glm} model family name} 61 | 62 | \item{linkname}{a \code{glm} link function name} 63 | 64 | \item{varfname}{variance function name} 65 | 66 | \item{ncomp}{rank of the latent matrix factorization} 67 | 68 | \item{lambda}{penalization parameters} 69 | 70 | \item{maxiter}{maximum number of iterations} 71 | 72 | \item{eps}{shrinkage factor for extreme predictions} 73 | 74 | \item{nafill}{how often the missing values are updated} 75 | 76 | \item{tol}{tolerance threshold for the stopping criterion} 77 | 78 | \item{size1}{row-minibatch dimension} 79 | 80 | \item{size2}{column-minibatch dimension} 81 | 82 | \item{burn}{burn-in period in which the learning late is not decreased} 83 | 84 | \item{rate0}{initial learning rate} 85 | 86 | \item{decay}{decay rate of the learning rate} 87 | 88 | \item{damping}{diagonal dumping factor for the Hessian matrix} 89 | 90 | \item{rate1}{decay rate of the first moment estimate of the gradient} 91 | 92 | \item{rate2}{decay rate of the second moment estimate of the gradient} 93 | 94 | \item{parallel}{if \code{TRUE}, allows for parallel computing} 95 | 96 | \item{nthreads}{number of cores to be used in parallel} 97 | 98 | \item{verbose}{if \code{TRUE}, print the optimization status} 99 | 100 | \item{frequency}{how often the optimization status is printed} 101 | 102 | \item{progress}{if \code{TRUE}, print an progress bar} 103 | } 104 | \description{ 105 | Fit a GMF model using the adaptive SGD with coordinate-wise minibatch subsampling algorithm 106 | } 107 | \keyword{internal} 108 | -------------------------------------------------------------------------------- /man/cpp.fit.random.block.sgd.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/RcppExports.R 3 | \name{cpp.fit.random.block.sgd} 4 | \alias{cpp.fit.random.block.sgd} 5 | \title{Fit a GMF model using the adaptive SGD with block-wise minibatch subsampling} 6 | \usage{ 7 | cpp.fit.random.block.sgd( 8 | Y, 9 | X, 10 | B, 11 | A, 12 | Z, 13 | U, 14 | V, 15 | O, 16 | W, 17 | familyname, 18 | linkname, 19 | varfname, 20 | ncomp, 21 | lambda, 22 | maxiter = 1000L, 23 | eps = 0.01, 24 | nafill = 10L, 25 | tol = 1e-08, 26 | size1 = 100L, 27 | size2 = 100L, 28 | burn = 0.75, 29 | rate0 = 0.01, 30 | decay = 0.01, 31 | damping = 0.001, 32 | rate1 = 0.95, 33 | rate2 = 0.99, 34 | parallel = FALSE, 35 | nthreads = 1L, 36 | verbose = TRUE, 37 | frequency = 250L, 38 | progress = FALSE 39 | ) 40 | } 41 | \arguments{ 42 | \item{Y}{matrix of responses (\eqn{n \times m})} 43 | 44 | \item{X}{matrix of row fixed effects (\eqn{n \times p})} 45 | 46 | \item{B}{initial row-effect matrix (\eqn{n \times p})} 47 | 48 | \item{A}{initial column-effect matrix (\eqn{n \times q})} 49 | 50 | \item{Z}{matrix of column fixed effects (\eqn{m \times q})} 51 | 52 | \item{U}{initial factor matrix (\eqn{n \times d})} 53 | 54 | \item{V}{initial loading matrix (\eqn{m \times d})} 55 | 56 | \item{O}{matrix of constant offset (\eqn{n \times m})} 57 | 58 | \item{W}{matrix of constant weights (\eqn{n \times m})} 59 | 60 | \item{familyname}{a \code{glm} model family name} 61 | 62 | \item{linkname}{a \code{glm} link function name} 63 | 64 | \item{varfname}{variance function name} 65 | 66 | \item{ncomp}{rank of the latent matrix factorization} 67 | 68 | \item{lambda}{penalization parameters} 69 | 70 | \item{maxiter}{maximum number of iterations} 71 | 72 | \item{eps}{shrinkage factor for extreme predictions} 73 | 74 | \item{nafill}{how often the missing values are updated} 75 | 76 | \item{tol}{tolerance threshold for the stopping criterion} 77 | 78 | \item{size1}{row-minibatch dimension} 79 | 80 | \item{size2}{column-minibatch dimension} 81 | 82 | \item{burn}{burn-in period in which the learning late is not decreased} 83 | 84 | \item{rate0}{initial learning rate} 85 | 86 | \item{decay}{decay rate of the learning rate} 87 | 88 | \item{damping}{diagonal dumping factor for the Hessian matrix} 89 | 90 | \item{rate1}{decay rate of the first moment estimate of the gradient} 91 | 92 | \item{rate2}{decay rate of the second moment estimate of the gradient} 93 | 94 | \item{parallel}{if \code{TRUE}, allows for parallel computing} 95 | 96 | \item{nthreads}{number of cores to be used in parallel} 97 | 98 | \item{verbose}{if \code{TRUE}, print the optimization status} 99 | 100 | \item{frequency}{how often the optimization status is printed} 101 | 102 | \item{progress}{if \code{TRUE}, print an progress bar} 103 | } 104 | \description{ 105 | Fit a GMF model using the adaptive SGD with block-wise minibatch subsampling 106 | } 107 | \keyword{internal} 108 | -------------------------------------------------------------------------------- /src/misc.h: -------------------------------------------------------------------------------- 1 | // misc.h 2 | // author: Cristian Castiglione 3 | // creation: 30/09/2023 4 | // last change: 16/11/2024 5 | 6 | #include 7 | #include 8 | #include "utils.h" 9 | #include "link.h" 10 | #include "variance.h" 11 | #include "family.h" 12 | #include 13 | 14 | using namespace glm; 15 | 16 | // Create a dynamic pointer to an appropriate link/family class starting 17 | // from a string identifying the correct link/family to chose 18 | std::unique_ptr make_link (const std::string & linkname); 19 | std::unique_ptr make_varf (const std::string & varname); 20 | std::unique_ptr make_family ( 21 | const std::string & familyname, 22 | const std::string & linkname, 23 | const std::string & varfname); 24 | 25 | // Set the lower and upper bounds for mu and eta based on the observed data range 26 | // so as to avoid to produce prediction with too extreme values 27 | // template 28 | void set_data_bounds ( 29 | double & mulo, double & muup, double & etalo, double & etaup, 30 | const double & eps, const double & ymin, const double & ymax, 31 | const std::unique_ptr & family); 32 | 33 | // Set the linear predictor trimming the extreme values 34 | void set_eta ( 35 | arma::mat & eta, const arma::mat & offset, 36 | const arma::mat & u, const arma::mat & v, 37 | const double & etamin, const double & etamax); 38 | 39 | // Get the linear predictor trimming the extreme values 40 | arma::mat get_eta ( 41 | const arma::mat & offset, 42 | const arma::mat & u, const arma::mat & v, 43 | const double & etamin, const double & etamax); 44 | 45 | // Set the augmented u and v matrices merging by column the fixed and latent effect matrices 46 | void set_uv_matrices ( 47 | arma::mat & u, arma::mat & v, 48 | const arma::mat & A, const arma::mat & Z, 49 | const arma::mat & X, const arma::mat & B, 50 | const arma::mat & U, const arma::mat & V); 51 | 52 | // Set the indices of the parameters to be update along the optimization in u and v 53 | void set_uv_indices ( 54 | arma::uvec & idu, arma::uvec & idv, 55 | const int & p, const int & q, const int & d); 56 | 57 | // Set the vectors of penalty parameters to be multiplied columnwise to u and v 58 | void set_uv_penalty ( 59 | arma::vec & penu, arma::vec & penv, const arma::vec & pen, 60 | const int & p, const int & q, const int & d); 61 | 62 | // Convert the cpu clock-time in the elapsed execution time (seconds) 63 | double exetime (const clock_t & start, const clock_t & end); 64 | 65 | // Print the optimization state 66 | void print_state ( 67 | const int & iter, const double & div, 68 | const double & change, const double & time); 69 | 70 | // Print the optimization state 71 | void print_state ( 72 | const int & iter, const double & div, 73 | const double & change, const double & time, 74 | const double & scanned); 75 | 76 | // Divide the data indices in random chunks 77 | std::list sample_chunks ( 78 | const int & n, const int & size, const bool & randomize); 79 | 80 | // Select the appropriate chunk for the current iteration 81 | int select_chunk (const int & iter, const int & nchunks); -------------------------------------------------------------------------------- /tests/testthat/test-init.R: -------------------------------------------------------------------------------- 1 | # file: test-init.R 2 | # author: Cristian Castiglione 3 | # creation: 05/02/2024 4 | # last change: 04/10/2024 5 | 6 | testthat::test_that("OLS initialization", { 7 | n = 100; m = 10; d = 5; f = Gamma(link = "log") 8 | dat = sim.gmf.data(n = n, m = m, ncomp = d, family = f, dispersion = 0.5) 9 | init = sgdgmf.init.ols(dat$Y, ncomp = d, family = f) 10 | 11 | # Output class 12 | testthat::expect_true(is.list(init)) 13 | # Sub-output classes 14 | testthat::expect_true(is.matrix(init$U) && is.numeric(init$U)) 15 | testthat::expect_true(is.matrix(init$V) && is.numeric(init$V)) 16 | testthat::expect_true(is.matrix(init$A) && is.numeric(init$A)) 17 | testthat::expect_true(is.matrix(init$B) && is.numeric(init$B)) 18 | # Output dimensions 19 | testthat::expect_equal(dim(init$U), c(n,d)) 20 | testthat::expect_equal(dim(init$V), c(m,d)) 21 | testthat::expect_equal(dim(init$A), c(n,1)) 22 | testthat::expect_equal(dim(init$B), c(m,1)) 23 | }) 24 | 25 | testthat::test_that("GLM initialization", { 26 | n = 100; m = 10; d = 5; f = Gamma(link = "log") 27 | dat = sim.gmf.data(n = n, m = m, ncomp = d, family = f, dispersion = 0.5) 28 | init = sgdgmf.init.glm(dat$Y, ncomp = d, family = f) 29 | 30 | # Output class 31 | testthat::expect_true(is.list(init)) 32 | # Sub-output classes 33 | testthat::expect_true(is.matrix(init$U) && is.numeric(init$U)) 34 | testthat::expect_true(is.matrix(init$V) && is.numeric(init$V)) 35 | testthat::expect_true(is.matrix(init$A) && is.numeric(init$A)) 36 | testthat::expect_true(is.matrix(init$B) && is.numeric(init$B)) 37 | # Output dimensions 38 | testthat::expect_equal(dim(init$U), c(n,d)) 39 | testthat::expect_equal(dim(init$V), c(m,d)) 40 | testthat::expect_equal(dim(init$A), c(n,1)) 41 | testthat::expect_equal(dim(init$B), c(m,1)) 42 | }) 43 | 44 | testthat::test_that("Random initialization", { 45 | n = 100; m = 10; d = 5; f = Gamma(link = "log") 46 | dat = sim.gmf.data(n = n, m = m, ncomp = d, family = f, dispersion = 0.5) 47 | init = sgdgmf.init.random(dat$Y, ncomp = d, family = f) 48 | 49 | # Output class 50 | testthat::expect_true(is.list(init)) 51 | # Sub-output classes 52 | testthat::expect_true(is.matrix(init$U) && is.numeric(init$U)) 53 | testthat::expect_true(is.matrix(init$V) && is.numeric(init$V)) 54 | testthat::expect_true(is.matrix(init$A) && is.numeric(init$A)) 55 | testthat::expect_true(is.matrix(init$B) && is.numeric(init$B)) 56 | # Output dimensions 57 | testthat::expect_equal(dim(init$U), c(n,d)) 58 | testthat::expect_equal(dim(init$V), c(m,d)) 59 | testthat::expect_equal(dim(init$A), c(n,1)) 60 | testthat::expect_equal(dim(init$B), c(m,1)) 61 | }) 62 | 63 | 64 | testthat::test_that("Random initialization", { 65 | n = 100; m = 10; d = 5; f = Gamma(link = "log") 66 | dat = sim.gmf.data(n = n, m = m, ncomp = d, family = f, dispersion = 0.5) 67 | 68 | init.ols = sgdgmf.init(dat$Y, ncomp = d, family = f, method = "ols") 69 | init.glm = sgdgmf.init(dat$Y, ncomp = d, family = f, method = "glm") 70 | init.rnd = sgdgmf.init(dat$Y, ncomp = d, family = f, method = "random") 71 | 72 | # Output class 73 | testthat::expect_true(is.list(init.ols)) 74 | testthat::expect_true(is.list(init.glm)) 75 | testthat::expect_true(is.list(init.rnd)) 76 | }) 77 | 78 | 79 | 80 | -------------------------------------------------------------------------------- /src/minibatch.cpp: -------------------------------------------------------------------------------- 1 | // minibatch.cpp 2 | // author: Cristian Castiglione 3 | // creation: 06/10/2023 4 | // last change: 06/10/2023 5 | 6 | #include "minibatch.h" 7 | 8 | void Chunks::set_chunks (const int & n, const int & size, const bool & randomize) { 9 | this->nidx = n; 10 | this->nchunks = ceil(double(n) / size); 11 | this->randomize = randomize; 12 | this->idx = arma::linspace(0, n-1, n); 13 | this->start = arma::zeros(this->nchunks); 14 | this->end = arma::zeros(this->nchunks); 15 | this->range = arma::zeros(this->nchunks); 16 | if (this->randomize) { 17 | this->idx = arma::shuffle(this->idx); 18 | } 19 | for (int i = 0; i < this->nchunks; i++) { 20 | this->start(i) = i * size; 21 | this->end(i) = std::min((i + 1) * size, n); 22 | this->range(i) = this->end(i) - this->start(i); 23 | } 24 | } 25 | 26 | arma::uvec Chunks::get_chunk (const int & iter) { 27 | int mod = iter % this->nchunks; 28 | // int i = (mod == 0) ? this->nchunks : mod; 29 | int i; 30 | if (iter == 0 && mod == 0) {i = iter;} 31 | if (iter != 0 && mod != 0) {i = mod;} 32 | if (iter != 0 && mod == 0) {i = 0;} 33 | int a = this->start(i); 34 | int b = this->end(i)-1; 35 | int c = this->range(i); 36 | arma::uvec which = arma::linspace(a, b, c); 37 | arma::uvec chunk = this->idx(which); 38 | return chunk; 39 | } 40 | 41 | std::list Chunks::get_chunks (const arma::uvec & iters) { 42 | arma::uvec chunk; 43 | std::list chunks; 44 | for (int iter : iters) { 45 | chunk = this->get_chunk(iter); 46 | chunks.push_back(chunk); 47 | } 48 | return chunks; 49 | } 50 | 51 | 52 | void ChunkPile::fill_tovisit () { 53 | this->tovisit = this->visited; 54 | } 55 | 56 | void ChunkPile::empty_visited () { 57 | this->visited = {}; 58 | } 59 | 60 | void ChunkPile::pop_tovisit (const int & id) { 61 | int n = this->tovisit.n_elem; 62 | arma::uvec h = arma::find(this->tovisit == id); 63 | int i = h(0); 64 | if (i == 0) {this->tovisit = this->tovisit.tail(n-1);} 65 | if (i == n-1) {this->tovisit = this->tovisit.head(n-1);} 66 | if (i > 0 && i < n-1) { 67 | arma::uvec head = this->tovisit.head(i); 68 | arma::uvec tail = this->tovisit.tail(n-i-1); 69 | this->tovisit = arma::join_cols(head, tail); 70 | } 71 | } 72 | 73 | void ChunkPile::push_visited (const int & id) { 74 | arma::uword i = id; 75 | this->visited = arma::join_cols(this->visited, arma::uvec{i}); 76 | } 77 | 78 | void ChunkPile::sample_idx () { 79 | int n = this->tovisit.n_elem; 80 | int which; 81 | if (this->random) { 82 | which = arma::randi(arma::distr_param(0, n-1)); 83 | } else { 84 | which = 0; 85 | } 86 | this->idx = this->tovisit(which); 87 | } 88 | 89 | void ChunkPile::update () { 90 | // If tovisit is empty, fill it using visited and empty the later 91 | int n = this->tovisit.n_elem; 92 | if (n == 0) { 93 | this->fill_tovisit(); 94 | this->empty_visited(); 95 | } 96 | 97 | // Sample a random index, pop it from to visit and push it to visited 98 | this->sample_idx(); 99 | this->pop_tovisit(idx); 100 | this->push_visited(idx); 101 | } -------------------------------------------------------------------------------- /src/link.cpp: -------------------------------------------------------------------------------- 1 | // link.cpp 2 | // author: Cristian Castiglione 3 | // creation: 28/09/2023 4 | // last change: 21/11/2024 5 | 6 | #include "link.h" 7 | 8 | using namespace glm; 9 | 10 | // Identity link 11 | bool Identity::valideta (const arma::mat & eta){return true;} 12 | arma::mat Identity::linkfun (const arma::mat & mu) {return mu;} 13 | arma::mat Identity::linkinv (const arma::mat & eta) {return eta;} 14 | arma::mat Identity::mueta (const arma::mat & eta) {return arma::ones(arma::size(eta));} 15 | 16 | // Logit link 17 | bool Logit::valideta (const arma::mat & eta){return true;} 18 | arma::mat Logit::linkfun (const arma::mat & mu) {return arma::log(mu) - arma::log1p(-mu);} 19 | arma::mat Logit::linkinv (const arma::mat & eta) {return arma::exp(eta - arma::log1p(arma::exp(eta)));} 20 | arma::mat Logit::mueta (const arma::mat & eta) {return arma::exp(eta - 2 * arma::log1p(arma::exp(eta)));} 21 | 22 | // Probit link 23 | bool Probit::valideta (const arma::mat & eta){return true;} 24 | arma::mat Probit::linkfun (const arma::mat & mu) { 25 | // This code should be replaced with a vectorized implementation 26 | arma::mat eta = mu; 27 | eta.transform([](double & x) {return R::qnorm(x, 0, 1, true, false);}); 28 | return eta; 29 | } 30 | arma::mat Probit::linkinv (const arma::mat & eta) {return arma::normcdf(eta);} 31 | arma::mat Probit::mueta (const arma::mat & eta) {return arma::normpdf(eta);} 32 | 33 | // Cauchit link 34 | bool Cauchit::valideta (const arma::mat & eta){return true;} 35 | arma::mat Cauchit::linkfun (const arma::mat & mu) {return arma::tan(pi * (mu - 0.5));} 36 | arma::mat Cauchit::linkinv (const arma::mat & eta) {return 0.5 + arma::atan(eta) / pi;} 37 | arma::mat Cauchit::mueta (const arma::mat & eta) {return invpi / (eta % eta + 1);} 38 | 39 | // cLogLog link 40 | bool cLogLog::valideta (const arma::mat & eta){return true;} 41 | arma::mat cLogLog::linkfun (const arma::mat & mu) {return arma::log(- arma::log1p(-mu));} 42 | arma::mat cLogLog::linkinv (const arma::mat & eta) {return 1 - arma::exp(- arma::exp(eta));} 43 | arma::mat cLogLog::mueta (const arma::mat & eta) {return arma::exp(- eta - arma::exp(-eta));} 44 | 45 | // Log link 46 | bool Log::valideta (const arma::mat & eta){return true;} 47 | arma::mat Log::linkfun (const arma::mat & mu) {return arma::log(mu);} 48 | arma::mat Log::linkinv (const arma::mat & eta) {return arma::exp(eta);} 49 | arma::mat Log::mueta (const arma::mat & eta) {return arma::exp(eta);} 50 | 51 | // Inverse link 52 | bool Inverse::valideta (const arma::mat & eta){return utils::all(eta > 0);} 53 | arma::mat Inverse::linkfun (const arma::mat & mu) {return 1 / mu;} 54 | arma::mat Inverse::linkinv (const arma::mat & eta) {return 1 / eta;} 55 | arma::mat Inverse::mueta (const arma::mat & eta) {return - 1 / (eta % eta);} 56 | 57 | // Squared inverse link 58 | bool SquaredInverse::valideta (const arma::mat & eta){return utils::all(eta > 0);} 59 | arma::mat SquaredInverse::linkfun (const arma::mat & mu) {return 1 / arma::square(mu);} 60 | arma::mat SquaredInverse::linkinv (const arma::mat & eta) {return 1 / arma::sqrt(eta);} 61 | arma::mat SquaredInverse::mueta (const arma::mat & eta) {return - 1 / (2 * arma::pow(eta, 1.5));} 62 | 63 | // Sqrt link 64 | bool Sqrt::valideta (const arma::mat & eta){return utils::all(eta > 0);} 65 | arma::mat Sqrt::linkfun (const arma::mat & mu) {return arma::sqrt(mu);} 66 | arma::mat Sqrt::linkinv (const arma::mat & eta) {return arma::square(eta);} 67 | arma::mat Sqrt::mueta (const arma::mat & eta) {return 2 * eta;} 68 | 69 | -------------------------------------------------------------------------------- /src/family.cpp: -------------------------------------------------------------------------------- 1 | // family.cpp 2 | // author: Cristian Castiglione 3 | // creation: 28/09/2023 4 | // last change: 21/11/2024 5 | 6 | #include "family.h" 7 | 8 | using namespace glm; 9 | 10 | // Gaussian family 11 | arma::mat Gaussian::variance (const arma::mat & mu) const {return arma::ones(size(mu));} 12 | arma::mat Gaussian::initialize (const arma::mat & y) const {return y;} 13 | arma::mat Gaussian::devresid (const arma::mat & y, const arma::mat & mu) const {return arma::square(y - mu);} 14 | 15 | // Binomial family 16 | arma::mat Binomial::variance (const arma::mat & mu) const {return mu % (1 - mu);} 17 | arma::mat Binomial::initialize (const arma::mat & y) const {return 2 * y - 1;} 18 | arma::mat Binomial::devresid (const arma::mat & y, const arma::mat & mu) const { 19 | return - 2 * (y % arma::log(mu) + (1 - y) % arma::log1p(-mu)); 20 | } 21 | 22 | // Poisson family 23 | arma::mat Poisson::variance (const arma::mat & mu) const {return mu;} 24 | arma::mat Poisson::initialize (const arma::mat & y) const {return this->linkfun(arma::clamp(y, 0.1, infty));} 25 | arma::mat Poisson::devresid (const arma::mat & y, const arma::mat & mu) const { 26 | return 2 * (utils::xlogx(y) - y % arma::log(mu) - (y - mu)); 27 | } 28 | 29 | // Gamma family 30 | arma::mat Gamma::variance (const arma::mat & mu) const {return arma::square(mu);} 31 | arma::mat Gamma::initialize (const arma::mat & y) const {return this->linkfun(y);} 32 | arma::mat Gamma::devresid (const arma::mat & y, const arma::mat & mu) const { 33 | return - 2 * (arma::log(y / mu) - (y - mu) / mu); 34 | } 35 | 36 | // Inverse-Gaussian family 37 | arma::mat InverseGaussian::variance (const arma::mat & mu) const {return mu % mu % mu;} 38 | arma::mat InverseGaussian::initialize (const arma::mat & y) const {return this->linkfun(y);} 39 | arma::mat InverseGaussian::devresid (const arma::mat & y, const arma::mat & mu) const { 40 | return arma::square(y - mu) / (y % mu % mu); 41 | } 42 | 43 | // Negative-Binomial family 44 | arma::mat NegativeBinomial::variance (const arma::mat & mu) const {return mu + (mu % mu) / this->dispersion;} 45 | arma::mat NegativeBinomial::initialize (const arma::mat & y) const {return this->linkfun(arma::clamp(y, 0.1, infty));} 46 | arma::mat NegativeBinomial::devresid (const arma::mat & y, const arma::mat & mu) const { 47 | const double phi = this->dispersion; 48 | return 2 * (utils::xlogx(y) - y % arma::log(mu) - (y + phi) % (arma::log(y + phi) - arma::log(mu + phi))); 49 | } 50 | 51 | // Quasi-Binomial family 52 | arma::mat QuasiBinomial::variance (const arma::mat & mu) const {return mu % (1 - mu);} 53 | arma::mat QuasiBinomial::initialize (const arma::mat & y) const {return 2 * y - 1;} 54 | arma::mat QuasiBinomial::devresid (const arma::mat & y, const arma::mat & mu) const { 55 | return - 2 * (y % arma::log(mu) + (1 - y) % arma::log1p(-mu)); 56 | } 57 | 58 | // Quasi-Poisson family 59 | arma::mat QuasiPoisson::variance (const arma::mat & mu) const {return mu;} 60 | arma::mat QuasiPoisson::initialize (const arma::mat & y) const {return this->linkfun(arma::clamp(y, 0.1, infty));} 61 | arma::mat QuasiPoisson::devresid (const arma::mat & y, const arma::mat & mu) const { 62 | return 2 * (utils::xlogx(y) - y % arma::log(mu) - (y - mu)); 63 | } 64 | 65 | // Quasi family 66 | arma::mat Quasi::variance (const arma::mat & mu) const {return this->varfun(mu);} 67 | arma::mat Quasi::initialize (const arma::mat & y) const {return this->linkfun(this->initfun(y));} 68 | arma::mat Quasi::devresid (const arma::mat & y, const arma::mat & mu) const {return this->devfun(y, mu);} 69 | -------------------------------------------------------------------------------- /src/link.h: -------------------------------------------------------------------------------- 1 | // link.h 2 | // author: Cristian Castiglione 3 | // creation: 28/09/2023 4 | // last change: 21/10/2024 5 | 6 | #ifndef LINK_H 7 | #define LINK_H 8 | 9 | #include 10 | #include "utils.h" 11 | 12 | namespace glm { 13 | 14 | class Link { 15 | public: 16 | std::string link = "Link"; 17 | virtual bool valideta (const arma::mat & eta) = 0; 18 | virtual arma::mat linkfun (const arma::mat & mu) = 0; 19 | virtual arma::mat linkinv (const arma::mat & eta) = 0; 20 | virtual arma::mat mueta (const arma::mat & eta) = 0; 21 | virtual ~Link () {} 22 | }; 23 | 24 | class Identity : public Link { 25 | public: 26 | bool valideta (const arma::mat & eta); 27 | arma::mat linkfun (const arma::mat & mu); 28 | arma::mat linkinv (const arma::mat & eta); 29 | arma::mat mueta (const arma::mat & eta); 30 | Identity () {this->link = "Identity";} 31 | }; 32 | 33 | class Logit : public Link { 34 | public: 35 | bool valideta (const arma::mat & eta); 36 | arma::mat linkfun (const arma::mat & mu); 37 | arma::mat linkinv (const arma::mat & eta); 38 | arma::mat mueta (const arma::mat & eta); 39 | Logit () {this->link = "Logit";} 40 | }; 41 | 42 | class Probit : public Link { 43 | public: 44 | bool valideta (const arma::mat & eta); 45 | arma::mat linkfun (const arma::mat & mu); 46 | arma::mat linkinv (const arma::mat & eta); 47 | arma::mat mueta (const arma::mat & eta); 48 | Probit () {this->link = "Probit";} 49 | }; 50 | 51 | class Cauchit : public Link { 52 | public: 53 | bool valideta (const arma::mat & eta); 54 | arma::mat linkfun (const arma::mat & mu); 55 | arma::mat linkinv (const arma::mat & eta); 56 | arma::mat mueta (const arma::mat & eta); 57 | Cauchit () {this->link = "Cauchit";} 58 | }; 59 | 60 | class cLogLog : public Link { 61 | public: 62 | bool valideta (const arma::mat & eta); 63 | arma::mat linkfun (const arma::mat & mu); 64 | arma::mat linkinv (const arma::mat & eta); 65 | arma::mat mueta (const arma::mat & eta); 66 | cLogLog () {this->link = "cLogLog";} 67 | }; 68 | 69 | class Log : public Link { 70 | public: 71 | bool valideta (const arma::mat & eta); 72 | arma::mat linkfun (const arma::mat & mu); 73 | arma::mat linkinv (const arma::mat & eta); 74 | arma::mat mueta (const arma::mat & eta); 75 | Log () {this->link = "Log";} 76 | }; 77 | 78 | class Inverse : public Link { 79 | public: 80 | bool valideta (const arma::mat & eta); 81 | arma::mat linkfun (const arma::mat & mu); 82 | arma::mat linkinv (const arma::mat & eta); 83 | arma::mat mueta (const arma::mat & eta); 84 | Inverse () {this->link = "Inverse";} 85 | }; 86 | 87 | class SquaredInverse : public Link { 88 | public: 89 | bool valideta (const arma::mat & eta); 90 | arma::mat linkfun (const arma::mat & mu); 91 | arma::mat linkinv (const arma::mat & eta); 92 | arma::mat mueta (const arma::mat & eta); 93 | SquaredInverse () {this->link = "1/mu^2";} 94 | }; 95 | 96 | class Sqrt : public Link { 97 | public: 98 | bool valideta (const arma::mat & eta); 99 | arma::mat linkfun (const arma::mat & mu); 100 | arma::mat linkinv (const arma::mat & eta); 101 | arma::mat mueta (const arma::mat & eta); 102 | Sqrt () {this->link = "Sqrt";} 103 | }; 104 | 105 | } 106 | 107 | #endif -------------------------------------------------------------------------------- /tests/testrcpp/test-misc.R: -------------------------------------------------------------------------------- 1 | # test-misc.R 2 | # author: Cristian Castiglione 3 | # creation: 02/10/2023 4 | # last change: 06/10/2023 5 | 6 | ## Workspace setup ---- 7 | rm(list = ls()) 8 | graphics.off() 9 | 10 | # Package compilation and import 11 | devtools::load_all() 12 | 13 | 14 | ## Test: get_data_bounds() ---- 15 | { 16 | ymin = 0; ymax = 1; eps = 0.01 17 | r.bounds = binomial(link = "probit")$linkfun(c(ymin+eps*(ymax-ymin), ymax-eps*(ymax-ymin))) 18 | c.bounds = drop(sgdGMF::c_get_data_bounds(eps, ymin, ymax, "binomial", "probit")$etalim) 19 | print(all.equal(r.bounds, c.bounds)) 20 | } 21 | 22 | { 23 | ymin = 0; ymax = 1; eps = 0.01 24 | r.bounds = binomial(link = "logit")$linkfun(c(ymin+eps*(ymax-ymin), ymax-eps*(ymax-ymin))) 25 | c.bounds = drop(sgdGMF::c_get_data_bounds(eps, ymin, ymax, "binomial", "logit")$etalim) 26 | print(all.equal(r.bounds, c.bounds)) 27 | } 28 | 29 | ## Test: get_uv_indices() ---- 30 | { 31 | p = 3; q = 1; d = 2 32 | r.idx = list(idu = c(p:(p+q-1), (p+q):(p+q+d-1)), idv = c(0:(p-1), (p+q):(p+q+d-1))) 33 | c.idx = sgdGMF::c_get_uv_indices(p, q, d) 34 | print(all.equal(r.idx$idu, drop(c.idx$idu))) 35 | print(all.equal(r.idx$idv, drop(c.idx$idv))) 36 | } 37 | 38 | ## Test: get_uv_penalty() ---- 39 | { 40 | p = 3; q = 1; d = 2; pen = c(1:4) 41 | r.pen = list(penu = c(rep(0,p), rep(pen[1],q), rep(pen[3],d)), 42 | penv = c(rep(pen[2],p), rep(0,q), rep(pen[4],d))) 43 | c.pen = sgdGMF::c_get_uv_penalty(pen, p, q, d) 44 | print(all.equal(r.pen$penu, drop(c.pen$penu))) 45 | print(all.equal(r.pen$penv, drop(c.pen$penv))) 46 | } 47 | 48 | ## Test: sample_minibatch() ---- 49 | { 50 | n = 9; size = 3; randomize = FALSE 51 | r.chunks = sgdGMF::sample.minibatch(n, size, randomize) 52 | c.chunks = sgdGMF::c_sample_minibatch(n, size, randomize) 53 | 54 | flag = TRUE 55 | for (h in 1:ceiling(n / size)) { 56 | flagh = all.equal(r.chunks[[h]], c.chunks[[h]]+1) 57 | flag = flag && flagh 58 | } 59 | print(flag) 60 | } 61 | 62 | 63 | { 64 | n = 11; size = 3; randomize = FALSE 65 | r.chunks = sgdGMF::sample.minibatch(n, size, randomize) 66 | c.chunks = sgdGMF::c_sample_minibatch(n, size, randomize) 67 | 68 | flag = TRUE 69 | for (h in 1:ceiling(n / size)) { 70 | flagh = all.equal(r.chunks[[h]], c.chunks[[h]]+1) 71 | flag = flag && flagh 72 | } 73 | print(flag) 74 | } 75 | 76 | ## Test: select_chunk ---- 77 | { 78 | iter = 10; nchunks = 3 79 | r.idx = sgdGMF::select.minibatch(iter, nchunks) 80 | c.idx = sgdGMF::c_select_minibatch(iter, nchunks) 81 | print(all.equal(r.idx, c.idx+1)) 82 | } 83 | 84 | ## Test: get_chunks ---- 85 | { 86 | n = 9; size = 3; randomize = FALSE 87 | r.chunks = sgdGMF::sample.minibatch(n, size, randomize) 88 | c.chunks = sgdGMF::c_get_chunks(0:2, n, size, randomize) 89 | 90 | flag = TRUE 91 | for (h in 1:ceiling(n / size)) { 92 | flagh = all.equal(r.chunks[[h]], c.chunks[[h]]+1) 93 | flag = flag && flagh 94 | } 95 | print(flag) 96 | } 97 | 98 | { 99 | n = 10; size = 3; randomize = FALSE 100 | r.chunks = sgdGMF::sample.minibatch(n, size, randomize) 101 | c.chunks = sgdGMF::c_get_chunks(0:3, n, size, randomize) 102 | 103 | flag = TRUE 104 | for (h in 1:ceiling(n / size)) { 105 | flagh = all.equal(r.chunks[[h]], c.chunks[[h]]+1) 106 | flag = flag && flagh 107 | } 108 | print(flag) 109 | } 110 | 111 | { 112 | n = 11; size = 3; randomize = FALSE 113 | r.chunks = sgdGMF::sample.minibatch(n, size, randomize) 114 | c.chunks = sgdGMF::c_get_chunks(0:8, n, size, randomize) 115 | 116 | flag = TRUE 117 | for (h in 1:ceiling(n / size)) { 118 | flagh = all.equal(r.chunks[[h]], c.chunks[[h]]+1) 119 | flag = flag && flagh 120 | } 121 | print(flag) 122 | } 123 | 124 | ## End of file ---- 125 | 126 | -------------------------------------------------------------------------------- /tests/testthat/test-vglmfit.R: -------------------------------------------------------------------------------- 1 | # file: test-vglmfit.R 2 | # author: Cristian Castiglione 3 | # creation: 23/03/2024 4 | # last change: 04/10/2024 5 | 6 | testthat::test_that("Multivariate OLS fitting", { 7 | n = 100; m = 10; p = 5; q = p+1 8 | 9 | O = matrix(rexp(n*m, rate = 2.0), nrow = n, ncol = m) 10 | X = cbind(1, matrix(rnorm(n*p, mean = 0.0, sd = 1.0), nrow = n, ncol = p)) 11 | B = matrix(rnorm(m*q, mean = 0.1, sd = 0.25), nrow = m, ncol = q) 12 | E = matrix(rnorm(n*m, mean = 0.0, sd = 0.1), nrow = n, ncol = m) 13 | Y = O + tcrossprod(X, B) + E 14 | 15 | B.hat = ols.fit.coef(Y, X, offset = O) 16 | mu.hat = O + tcrossprod(X, B.hat) 17 | res.hat = Y - mu.hat 18 | 19 | # Check the dimension and the basic properties of the estimates 20 | testthat::expect_equal(c(m, q), dim(B.hat)) 21 | testthat::expect_equal(crossprod(X, mu.hat), crossprod(X, Y)) 22 | testthat::expect_equal(matrix(0, q, m), crossprod(X, res.hat)) 23 | testthat::expect_equal(0, mean(res.hat)) 24 | }) 25 | 26 | 27 | testthat::test_that("Binomial VGLM fitting", { 28 | n = 100; m = 10; p = 5; q = p+1 29 | family = binomial(link = "probit") 30 | 31 | O = matrix(rexp(n*m, rate = 2.0), nrow = n, ncol = m) 32 | X = cbind(1, matrix(rnorm(n*p, mean = 0.0, sd = 1.0), nrow = n, ncol = p)) 33 | B = matrix(rnorm(m*q, mean = 0.1, sd = 0.25), nrow = m, ncol = q) 34 | eta = O + tcrossprod(X, B) 35 | mu = family$linkinv(eta) 36 | Y = matrix(rbinom(n*m, size = 1, prob = mu), nrow = n, ncol = m) 37 | 38 | B.hat = vglm.fit.coef(Y, X, family, offset = O, parallel = FALSE) 39 | eta.hat = O + tcrossprod(X, B.hat) 40 | mu.hat = family$linkinv(eta.hat) 41 | dmu.hat = family$mu.eta(eta.hat) 42 | var.hat = family$variance(mu.hat) 43 | res.hat = (Y - mu.hat) * dmu.hat / var.hat 44 | 45 | # Check the dimension and the basic properties of the estimates 46 | testthat::expect_equal(c(m, q), dim(B.hat)) 47 | testthat::expect_true(mean(crossprod(X, res.hat)) < 1e-04) 48 | }) 49 | 50 | testthat::test_that("Poisson VGLM fitting", { 51 | n = 100; m = 10; p = 5; q = p+1 52 | family = poisson(link = "log") 53 | 54 | O = matrix(rexp(n*m, rate = 2.0), nrow = n, ncol = m) 55 | X = cbind(1, matrix(rnorm(n*p, mean = 0.0, sd = 1.0), nrow = n, ncol = p)) 56 | B = matrix(rnorm(m*q, mean = 0.1, sd = 0.25), nrow = m, ncol = q) 57 | eta = O + tcrossprod(X, B) 58 | mu = family$linkinv(eta) 59 | Y = matrix(rpois(n*m, lambda = mu), nrow = n, ncol = m) 60 | 61 | B.hat = vglm.fit.coef(Y, X, family, offset = O, parallel = FALSE) 62 | eta.hat = O + tcrossprod(X, B.hat) 63 | mu.hat = family$linkinv(eta.hat) 64 | dmu.hat = family$mu.eta(eta.hat) 65 | var.hat = family$variance(mu.hat) 66 | res.hat = (Y - mu.hat) * dmu.hat / var.hat 67 | 68 | # Check the dimension and the basic properties of the estimates 69 | testthat::expect_equal(c(m, q), dim(B.hat)) 70 | testthat::expect_true(mean(crossprod(X, res.hat)) < 1e-04) 71 | }) 72 | 73 | testthat::test_that("Gamma VGLM fitting", { 74 | n = 100; m = 10; p = 5; q = p+1 75 | family = Gamma(link = "log") 76 | 77 | O = matrix(rexp(n*m, rate = 2.0), nrow = n, ncol = m) 78 | X = cbind(1, matrix(rnorm(n*p, mean = 0.0, sd = 1.0), nrow = n, ncol = p)) 79 | B = matrix(rnorm(m*q, mean = 0.1, sd = 0.25), nrow = m, ncol = q) 80 | eta = O + tcrossprod(X, B) 81 | mu = family$linkinv(eta) 82 | Y = matrix(rgamma(n*m, shape = 2, rate = 2 / mu), nrow = n, ncol = m) 83 | 84 | B.hat = vglm.fit.coef(Y, X, family, offset = O, parallel = FALSE) 85 | eta.hat = O + tcrossprod(X, B.hat) 86 | mu.hat = family$linkinv(eta.hat) 87 | dmu.hat = family$mu.eta(eta.hat) 88 | var.hat = family$variance(mu.hat) 89 | res.hat = (Y - mu.hat) * dmu.hat / var.hat 90 | 91 | # Check the dimension and the basic properties of the estimates 92 | testthat::expect_equal(c(m, q), dim(B.hat)) 93 | testthat::expect_true(mean(crossprod(X, res.hat)) < 1e-03) 94 | }) 95 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method(AIC,initgmf) 4 | S3method(AIC,sgdgmf) 5 | S3method(BIC,initgmf) 6 | S3method(BIC,sgdgmf) 7 | S3method(biplot,initgmf) 8 | S3method(biplot,sgdgmf) 9 | S3method(coef,initgmf) 10 | S3method(coef,sgdgmf) 11 | S3method(coefficients,initgmf) 12 | S3method(coefficients,sgdgmf) 13 | S3method(deviance,initgmf) 14 | S3method(deviance,sgdgmf) 15 | S3method(fitted,initgmf) 16 | S3method(fitted,sgdgmf) 17 | S3method(image,initgmf) 18 | S3method(image,sgdgmf) 19 | S3method(plot,initgmf) 20 | S3method(plot,sgdgmf) 21 | S3method(predict,sgdgmf) 22 | S3method(print,initgmf) 23 | S3method(print,sgdgmf) 24 | S3method(refit,sgdgmf) 25 | S3method(resid,initgmf) 26 | S3method(resid,sgdgmf) 27 | S3method(residuals,initgmf) 28 | S3method(residuals,sgdgmf) 29 | S3method(screeplot,initgmf) 30 | S3method(screeplot,sgdgmf) 31 | S3method(simulate,sgdgmf) 32 | S3method(storedata,sgdgmf) 33 | export(refit) 34 | export(set.control.airwls) 35 | export(set.control.alg) 36 | export(set.control.block.sgd) 37 | export(set.control.coord.sgd) 38 | export(set.control.cv) 39 | export(set.control.init) 40 | export(set.control.newton) 41 | export(sgdgmf.cv) 42 | export(sgdgmf.fit) 43 | export(sgdgmf.init) 44 | export(sgdgmf.rank) 45 | export(sim.gmf.data) 46 | export(simulate) 47 | export(storedata) 48 | import(Rcpp) 49 | import(RcppArmadillo) 50 | import(ggplot2) 51 | importFrom(MASS,neg.bin) 52 | importFrom(MASS,negative.binomial) 53 | importFrom(RSpectra,eigs) 54 | importFrom(RSpectra,eigs_sym) 55 | importFrom(RSpectra,svds) 56 | importFrom(Rcpp,evalCpp) 57 | importFrom(doParallel,registerDoParallel) 58 | importFrom(foreach,"%do%") 59 | importFrom(foreach,"%dopar%") 60 | importFrom(foreach,foreach) 61 | importFrom(generics,refit) 62 | importFrom(graphics,image) 63 | importFrom(methods,is) 64 | importFrom(parallel,detectCores) 65 | importFrom(parallel,makeCluster) 66 | importFrom(parallel,stopCluster) 67 | importFrom(reshape2,melt) 68 | importFrom(stats,BIC) 69 | importFrom(stats,Gamma) 70 | importFrom(stats,binomial) 71 | importFrom(stats,biplot) 72 | importFrom(stats,coef) 73 | importFrom(stats,coefficients) 74 | importFrom(stats,cor) 75 | importFrom(stats,cov) 76 | importFrom(stats,cov2cor) 77 | importFrom(stats,dbeta) 78 | importFrom(stats,dbinom) 79 | importFrom(stats,density) 80 | importFrom(stats,deviance) 81 | importFrom(stats,dexp) 82 | importFrom(stats,dgamma) 83 | importFrom(stats,dnorm) 84 | importFrom(stats,dpois) 85 | importFrom(stats,dunif) 86 | importFrom(stats,ecdf) 87 | importFrom(stats,family) 88 | importFrom(stats,fitted) 89 | importFrom(stats,gaussian) 90 | importFrom(stats,glm.fit) 91 | importFrom(stats,inverse.gaussian) 92 | importFrom(stats,median) 93 | importFrom(stats,pbeta) 94 | importFrom(stats,pbinom) 95 | importFrom(stats,pexp) 96 | importFrom(stats,pgamma) 97 | importFrom(stats,pnorm) 98 | importFrom(stats,poisson) 99 | importFrom(stats,ppois) 100 | importFrom(stats,predict) 101 | importFrom(stats,punif) 102 | importFrom(stats,qbeta) 103 | importFrom(stats,qbinom) 104 | importFrom(stats,qexp) 105 | importFrom(stats,qgamma) 106 | importFrom(stats,qnorm) 107 | importFrom(stats,qpois) 108 | importFrom(stats,qqline) 109 | importFrom(stats,qqnorm) 110 | importFrom(stats,qqplot) 111 | importFrom(stats,quantile) 112 | importFrom(stats,quasi) 113 | importFrom(stats,quasibinomial) 114 | importFrom(stats,quasipoisson) 115 | importFrom(stats,qunif) 116 | importFrom(stats,rbeta) 117 | importFrom(stats,rbinom) 118 | importFrom(stats,resid) 119 | importFrom(stats,residuals) 120 | importFrom(stats,rexp) 121 | importFrom(stats,rgamma) 122 | importFrom(stats,rnorm) 123 | importFrom(stats,rpois) 124 | importFrom(stats,runif) 125 | importFrom(stats,screeplot) 126 | importFrom(stats,sd) 127 | importFrom(stats,var) 128 | importFrom(utils,head) 129 | importFrom(utils,tail) 130 | importFrom(viridisLite,viridis) 131 | useDynLib(sgdGMF, .registration=TRUE) 132 | -------------------------------------------------------------------------------- /R/vglmfit.R: -------------------------------------------------------------------------------- 1 | 2 | #' @title Estimate the coefficients of a multivariate linear model 3 | #' 4 | #' @description 5 | #' Estimate the coefficients of a multivariate linear model via ordinary least squares. 6 | #' 7 | #' @param Y \eqn{n \times m} matrix of response variables 8 | #' @param X \eqn{n \times p} matrix of covariates 9 | #' @param offset \eqn{n \times m} matrix of offset values 10 | #' 11 | #' @keywords internal 12 | ols.fit.coef = function ( 13 | Y, X, offset = NULL 14 | ) { 15 | # Set the offset matrix 16 | if (is.null(offset)) offset = 0 17 | 18 | # Parameter estimation 19 | XtX = crossprod(X) 20 | XtY = crossprod(X, Y - offset) 21 | coefs = t(solve(XtX, XtY)) 22 | 23 | # Return the parameter estimates 24 | return (coefs) 25 | } 26 | 27 | #' @title Estimate the coefficients of a vector generalized linear model 28 | #' 29 | #' @description 30 | #' Estimate the coefficients of a vector generalized linear model via parallel 31 | #' iterative re-weighted least squares. Computations can be performed in parallel 32 | #' to speed up the execution. 33 | #' 34 | #' @param Y \eqn{n \times m} matrix of response variables 35 | #' @param X \eqn{n \times p} matrix of covariates 36 | #' @param family a \code{glm} family (see \code{\link{family}} for more details) 37 | #' @param weights \eqn{n \times m} matrix of weighting values 38 | #' @param offset \eqn{n \times m} matrix of offset values 39 | #' @param parallel if \code{TRUE}, allows for parallel computing using the \code{foreach} package 40 | #' @param nthreads number of cores to be used in parallel (only if \code{parallel=TRUE}) 41 | #' @param clust registered cluster to be used for distributing the computations (only if \code{parallel=TRUE}) 42 | #' 43 | #' @keywords internal 44 | vglm.fit.coef = function ( 45 | Y, X, family = gaussian(), weights = NULL, offset = NULL, 46 | parallel = FALSE, nthreads = 1, clust = NULL 47 | ) { 48 | # Set the model dimensions 49 | n = nrow(Y) 50 | m = ncol(Y) 51 | 52 | # Set the offset matrix 53 | if (is.null(weights)) weights = matrix(1, nrow = n, ncol = m) 54 | if (is.null(offset)) offset = matrix(0, nrow = n, ncol = m) 55 | 56 | # Register the clusters 57 | if (parallel) { 58 | nullclust = is.null(clust) 59 | if (nullclust) { 60 | ncores = parallel::detectCores() - 1 61 | ncores = max(1, min(nthreads, ncores)) 62 | clust = parallel::makeCluster(ncores) 63 | doParallel::registerDoParallel(clust) 64 | } 65 | } 66 | 67 | j = NULL 68 | if (!parallel) { 69 | # Sequential parameter estimation 70 | coefs = foreach(j = 1:m, .combine = "rbind") %do% { 71 | yj = as.vector(Y[,j]) 72 | oj = as.vector(offset[,j]) 73 | wj = as.vector(weights[,j]) 74 | fit = stats::glm.fit(x = X, y = yj, family = family, weights = wj, offset = oj) 75 | t(fit$coefficients) 76 | } 77 | 78 | ## # As an alternative, we may use the following R code, 79 | ## # which does not depend on the foreach package 80 | ## coefs = matrix(NA, nrow = m, ncol = p) 81 | ## for (j in 1:m) { 82 | ## yj = as.vector(Y[,j]) 83 | ## oj = as.vector(offset[,j]) 84 | ## fit = stats::glm.fit(x = X, y = yj, family = family, weights = wj, offset = oj) 85 | ## coefs[j, ] = as.vector(fit$coefficients) 86 | ## } 87 | } else { 88 | # Parallel parameter estimation 89 | coefs = foreach(j = 1:m, .combine = "rbind") %dopar% { 90 | yj = as.vector(Y[,j]) 91 | oj = as.vector(offset[,j]) 92 | wj = as.vector(weights[,j]) 93 | fit = stats::glm.fit(x = X, y = yj, family = family, weights = wj, offset = oj) 94 | t(fit$coefficients) 95 | } 96 | } 97 | 98 | # Close the connection to the clusters 99 | if (parallel) { 100 | if (nullclust) { 101 | parallel::stopCluster(clust) 102 | } 103 | } 104 | 105 | # Return the parameter estimates 106 | return (coefs) 107 | } 108 | -------------------------------------------------------------------------------- /vignettes/residuals.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Analysis of the residuals" 3 | author: "Cristian Castiglione" 4 | output: rmarkdown::html_vignette 5 | vignette: > 6 | %\VignetteIndexEntry{residuals} 7 | %\VignetteEngine{knitr::rmarkdown} 8 | %\VignetteEncoding{UTF-8} 9 | --- 10 | 11 | ## Workspace setup 12 | 13 | ```{r setup, include = FALSE} 14 | options(rmarkdown.html_vignette.check_title = FALSE) 15 | 16 | knitr::opts_chunk$set( 17 | collapse = TRUE, 18 | comment = "#>" 19 | ) 20 | ``` 21 | 22 | Load the \code{sgdGMF} package in the workspace. 23 | 24 | ```{r sgdgmf} 25 | library(sgdGMF) 26 | ``` 27 | 28 | Load other useful packages in the workspace. 29 | 30 | ```{r libraries} 31 | library(ggplot2) 32 | library(ggpubr) 33 | library(reshape2) 34 | ``` 35 | 36 | ## Ant traits data 37 | 38 | Load the ant traits data in the workspace and define the response matrix `Y` and covariate matrices `X` and `Z`. 39 | 40 | ```{r data} 41 | # install.packages("mvabund") 42 | # data(antTraits, package = "mvabund") 43 | 44 | load(url("https://raw.githubusercontent.com/cran/mvabund/master/data/antTraits.RData")) 45 | 46 | Y = as.matrix(antTraits$abund) 47 | X = as.matrix(antTraits$env[,-3]) 48 | Z = matrix(1, nrow = ncol(Y), ncol = 1) 49 | 50 | n = nrow(Y) 51 | m = ncol(Y) 52 | ``` 53 | 54 | ## Model specification 55 | 56 | Set the model family to Poisson since the response matrix contain count data. 57 | 58 | ```{r family} 59 | family = poisson() 60 | ``` 61 | 62 | Select the optimal number of latent factors using the function \code{sgdgmf.rank}, 63 | which employs an adjusted eigenvalue thresholding method to identify the optimal 64 | elbow point of a screeplot. 65 | 66 | ```{r rank} 67 | ncomp = sgdgmf.rank(Y = Y, X = X, Z = Z, family = family)$ncomp 68 | cat("Selected rank: ", ncomp) 69 | ``` 70 | 71 | ## Model estimation 72 | 73 | Estimate a Poisson GMF model using iterated least squares. 74 | 75 | ```{r fit} 76 | gmf = sgdgmf.fit(Y, X, Z, ncomp = ncomp, family = family, method = "airwls") 77 | ``` 78 | 79 | ## Model validation 80 | 81 | Compute the deviance residuals of the model the estimated matrix factorization. 82 | Additionally, compute the spectrum of such a residual matrix. 83 | 84 | ```{r resid} 85 | res = residuals(gmf, spectrum = TRUE, ncomp = 20) 86 | ``` 87 | 88 | Compare the residuals of two competing models: VGLM and GMF. 89 | Notice that VGLM is a particular case of GMF of which only include the regression 90 | effects and does not include a residual matrix factorization in the linear predictor. 91 | 92 | ```{r plot, fig.width = 7, fig.height = 5} 93 | ggpubr::ggarrange( 94 | plot(gmf, type = "res-idx"), 95 | plot(gmf, type = "res-fit"), 96 | plot(gmf, type = "hist"), 97 | plot(gmf, type = "qq"), 98 | nrow = 2, ncol = 2, align = "hv") 99 | ``` 100 | 101 | We now have a look to the spectrum of the residual matrices, i.e., the eigenvalues 102 | of the corresponding covariance matrix. However, instead of analyzing the actual 103 | values of the eigenvalues, we normalize them in such a way to plot the percentage of 104 | variance explained by each principal component. 105 | 106 | ```{r spectrum, fig.width = 7, fig.height = 3} 107 | ggpubr::ggarrange( 108 | screeplot(gmf, cumulative = FALSE, proportion = TRUE), 109 | screeplot(gmf, cumulative = TRUE, proportion = TRUE), 110 | nrow = 1, ncol = 2, align = "hv") 111 | ``` 112 | 113 | ## Observations vs fitted values 114 | 115 | Plot the deviance and Pearson residuals using a heatmap. This could be helpful to 116 | graphically detect if there are some structured patterns in the matrix that have not 117 | been captured by the model. 118 | 119 | ```{r resid2, fig.width = 7, fig.height = 3.5} 120 | plt.dev = image(gmf, type = "deviance", resid = TRUE, symmetric = TRUE) 121 | plt.prs = image(gmf, type = "pearson", resid = TRUE, symmetric = TRUE) 122 | 123 | ggpubr::ggarrange( 124 | plt.dev + labs(x = "Species", y = "Environments", title = "Deviance residuals"), 125 | plt.prs + labs(x = "Species", y = "Environments", title = "Pearson residuals"), 126 | nrow = 1, ncol = 2, common.legend = FALSE, legend = "bottom", align = "hv") 127 | ``` 128 | 129 | 130 | 131 | -------------------------------------------------------------------------------- /tests/testrcpp/test-family.R: -------------------------------------------------------------------------------- 1 | # test-family.R 2 | # author: Cristian Castiglione 3 | # creation: 29/09/2023 4 | # last change: 29/09/2023 5 | 6 | ## Workspace setup ---- 7 | rm(list = ls()) 8 | graphics.off() 9 | 10 | # Package compilation and import 11 | devtools::load_all() 12 | 13 | plot.link <- function (x, y, main = "") { 14 | plot(x, y, type = "l", xlab = "x", ylab = "link", main = main) 15 | } 16 | 17 | par(mfrow = c(1, 3)) 18 | 19 | ## Test: gaussian ---- 20 | { 21 | n = 100 22 | x = seq(from = -3, to = +3, length = n) 23 | y = seq(from = -3, to = +3, length = n) 24 | z = rep(0, length = n) 25 | plot.link(x, sgdGMF::cpp.gaussian.variance(x), main = "Gaussian \n variance") 26 | plot.link(x, sgdGMF::cpp.gaussian.initialize(y), main = "Gaussian \n initialize") 27 | plot.link(x, sgdGMF::cpp.gaussian.devresid(z, x), main = "Gaussian \n devresid") 28 | 29 | r.variance = gaussian()$variance(x) 30 | c.variance = drop(sgdGMF::cpp.gaussian.variance(x)) 31 | print(all.equal(r.variance, c.variance)) 32 | 33 | r.devresid = gaussian()$dev.resid(z, x, 1) 34 | c.devresid = drop(sgdGMF::cpp.gaussian.devresid(z, x)) 35 | print(all.equal(r.devresid, c.devresid)) 36 | } 37 | 38 | ## Test: binomial ---- 39 | { 40 | n = 100 41 | x = seq(from = +0.001, to = +0.999, length = n) 42 | y = c(rep(0, length = n/2), rep(1, length = n/2)) 43 | z = rep(0, length = n) 44 | plot.link(x, sgdGMF::cpp.binomial.variance(x), main = "Binomial \n variance") 45 | plot.link(x, sgdGMF::cpp.binomial.initialize(y), main = "Binomial \n initialize") 46 | plot.link(x, sgdGMF::cpp.binomial.devresid(z, x), main = "Binomial \n devresid") 47 | 48 | r.variance = binomial()$variance(x) 49 | c.variance = drop(sgdGMF::cpp.binomial.variance(x)) 50 | print(all.equal(r.variance, c.variance)) 51 | 52 | r.devresid = binomial()$dev.resid(z, x, 1) 53 | c.devresid = drop(sgdGMF::cpp.binomial.devresid(z, x)) 54 | print(all.equal(r.devresid, c.devresid)) 55 | } 56 | 57 | ## Test: poisson ---- 58 | { 59 | n = 100 60 | x = seq(from = 1, to = 10, length = n) 61 | y = seq(from = 1, to = 20, by = 1) 62 | z = rep(3, length = n) 63 | plot.link(x, sgdGMF::cpp.poisson.variance(x), main = "Poisson \n variance") 64 | plot.link(y, sgdGMF::cpp.poisson.initialize(y), main = "Poisson \n initialize") 65 | plot.link(x, sgdGMF::cpp.poisson.devresid(z, x), main = "Poisson \n devresid") 66 | 67 | r.variance = poisson()$variance(x) 68 | c.variance = drop(sgdGMF::cpp.poisson.variance(x)) 69 | print(all.equal(r.variance, c.variance)) 70 | 71 | r.devresid = poisson()$dev.resid(z, x, 1) 72 | c.devresid = drop(sgdGMF::cpp.poisson.devresid(z, x)) 73 | print(all.equal(r.devresid, c.devresid)) 74 | } 75 | 76 | ## Test: gamma ---- 77 | { 78 | n = 100 79 | x = seq(from = 0.1, to = 5, length = n) 80 | y = seq(from = 0.1, to = 5, length = n) 81 | z = rep(1, length = n) 82 | plot.link(x, sgdGMF::cpp.gamma.variance(x), main = "Gamma \n variance") 83 | plot.link(x, sgdGMF::cpp.gamma.initialize(y), main = "Gamma \n initialize") 84 | plot.link(x, sgdGMF::cpp.gamma.devresid(z, x), main = "Gamma \n devresid") 85 | 86 | r.variance = Gamma()$variance(x) 87 | c.variance = drop(sgdGMF::cpp.gamma.variance(x)) 88 | print(all.equal(r.variance, c.variance)) 89 | 90 | r.devresid = Gamma()$dev.resid(z, x, 1) 91 | c.devresid = drop(sgdGMF::cpp.gamma.devresid(z, x)) 92 | print(all.equal(r.devresid, c.devresid)) 93 | } 94 | 95 | ## Test: negative binomial ---- 96 | { 97 | n = 100 98 | x = seq(from = 0.1, to = 5, length = n) 99 | y = seq(from = 0.1, to = 5, length = n) 100 | z = rep(1, length = n) 101 | plot.link(x, sgdGMF::cpp.negbinom.variance(x), main = "Negative Binomial \n variance") 102 | plot.link(x, sgdGMF::cpp.negbinom.initialize(y), main = "Negative Binomial \n initialize") 103 | plot.link(x, sgdGMF::cpp.negbinom.devresid(z, x), main = "Negative Binomial \n devresid") 104 | 105 | r.variance = MASS::neg.bin(10)$variance(x) 106 | c.variance = drop(sgdGMF::cpp.negbinom.variance(x)) 107 | print(all.equal(r.variance, c.variance)) 108 | 109 | r.devresid = MASS::neg.bin(10)$dev.resid(z, x, 1) 110 | c.devresid = drop(sgdGMF::cpp.negbinom.devresid(z, x)) 111 | print(all.equal(r.devresid, c.devresid)) 112 | } 113 | 114 | 115 | -------------------------------------------------------------------------------- /src/utils.h: -------------------------------------------------------------------------------- 1 | // utils.h 2 | // author: Cristian Castiglione 3 | // creation: 28/09/2023 4 | // last change: 19/11/2024 5 | 6 | #ifndef UTILS_H 7 | #define UTILS_H 8 | 9 | #include 10 | 11 | // Standard constants used in numerical computations 12 | const double pi = M_PI; 13 | const double invpi = 1.0 / M_PI; 14 | const double log2pi = std::log(2.0 * M_PI); 15 | const double sqrt2 = std::sqrt(2.0); 16 | const double sqrtpi = std::sqrt(M_PI); 17 | const double sqrt2pi = std::sqrt(2.0 * M_PI); 18 | const double infty = arma::datum::inf; 19 | 20 | // The following vectors of coefficients serves to approximate the 21 | // quantile function of a standard normal distribution. 22 | // For more details see: https://ar5iv.labs.arxiv.org/html/1002.0567 23 | const arma::vec qn_inner_coef = { 24 | + 0.195740115269792, - 0.652871358365296, + 1.246899760652504, 25 | + 0.155331081623168, - 0.839293158122257}; 26 | const arma::vec qn_tails_coef = { 27 | +16.682320830719986527, + 4.120411523939115059, + 0.029814187308200211, 28 | - 1.000182518730158122, + 7.173787663925508066, + 8.759693508958633869}; 29 | 30 | namespace utils { 31 | 32 | // Maximum relative difference between two scalars/vectors 33 | double absmax (const double & u, const double & v); 34 | double absmax (const arma::vec & u, const arma::vec & v); 35 | 36 | // Truncated representation of a vector/matrix x, such that a <= x[i,j] <= b 37 | void trim (arma::mat & x, const double & a, const double & b); 38 | void trim (arma::mat & x, const double & a, const double & b, const arma::uvec & idx); 39 | void trim (arma::mat & x, const double & a, const double & b, const arma::uvec & idx, const arma::uvec & idy); 40 | 41 | // All and any operator for boolean matrices 42 | bool all(const arma::umat & x); 43 | bool any(const arma::umat & x); 44 | 45 | // Lp norm of a vector/matrix 46 | double norm (const arma::mat & x); 47 | double norm (const arma::mat & x, const double & p); 48 | 49 | // Pointwise maximum between 0 and x (to the power of p) 50 | arma::mat max0 (const arma::mat & x); 51 | arma::mat max0 (const arma::mat & x, const double & p); 52 | 53 | // Stable calculation of x*log(x), with 0*log(0) = 0; 54 | arma::mat xlogx (const arma::mat & x); 55 | 56 | // Stable calculation of log(1 + exp(x)) 57 | arma::mat log1pexp (const arma::mat & x); 58 | 59 | // Stable calculation of log(1 - exp(-x)) 60 | arma::mat log1mexp (const arma::mat & x); 61 | 62 | // Logistic transformation 63 | arma::mat logit (const arma::mat & x); 64 | 65 | // Inverse of the logistic transformation 66 | arma::mat expit (const arma::mat & x); 67 | arma::mat expit2 (const arma::mat & x); 68 | arma::mat expitn (const arma::mat & x, const double & n); 69 | 70 | // Complementary log-log and exp-exp transformation 71 | arma::mat cloglog (const arma::mat & x); 72 | arma::mat cexpexp (const arma::mat & x); 73 | 74 | // Log-log and exp-exp transformations 75 | arma::mat loglog (const arma::mat & x); 76 | arma::mat expexp (const arma::mat & x); 77 | 78 | // Standard Gaussian probability and cumulative density function 79 | arma::mat pdfn (const arma::mat & x); 80 | arma::mat cdfn (const arma::mat & x); 81 | arma::mat qdfn (const arma::mat & p); 82 | 83 | // Standard Gaussian log-probability and cumulative density function 84 | arma::mat logpdfn (const arma::mat & x); 85 | arma::mat logcdfn (const arma::mat & x); 86 | 87 | // Gamma function 88 | arma::mat gamma (const arma::mat & x); 89 | arma::mat loggamma (const arma::mat & x); 90 | arma::mat digamma (const arma::mat & x); 91 | arma::mat trigamma (const arma::mat & x); 92 | 93 | // Beta function 94 | arma::mat beta (const arma::mat & x, const arma::mat & y); 95 | arma::mat logbeta (const arma::mat & x, const arma::mat & y); 96 | arma::mat dibeta (const arma::mat & x, const arma::mat & y); 97 | arma::mat tribeta (const arma::mat & x, const arma::mat & y); 98 | 99 | // Hinge loss function 100 | arma::mat hinge (const arma::mat & x); 101 | 102 | // Delta function 103 | arma::mat dirac (const arma::mat & x, const double & a); 104 | 105 | // Step function 106 | arma::mat step (const arma::mat & x, const double & a, const bool & lower); 107 | 108 | // Extract the half-vectorization of the square matrix M 109 | arma::vec vech (const arma::mat & A); 110 | 111 | } 112 | 113 | #endif 114 | -------------------------------------------------------------------------------- /tests/testcpp/test-link.cpp: -------------------------------------------------------------------------------- 1 | // test-link.cpp 2 | // author: Cristian Castiglione 3 | // creation: 29/09/2023 4 | // last change: 29/09/2023 5 | 6 | #include "link.h" 7 | 8 | using namespace glm; 9 | 10 | //' @keywords internal 11 | // [[Rcpp::export("cpp.link.identity.linkfun")]] 12 | arma::vec cpp_link_identity_linkfun (const arma::vec & mu) {Identity link; return link.linkfun(mu);} 13 | //' @keywords internal 14 | // [[Rcpp::export("cpp.link.identity.linkinv")]] 15 | arma::vec cpp_link_identity_linkinv (const arma::vec & eta) {Identity link; return link.linkinv(eta);} 16 | //' @keywords internal 17 | // [[Rcpp::export("cpp.link.identity.mueta")]] 18 | arma::vec cpp_link_identity_mueta (const arma::vec & eta) {Identity link; return link.mueta(eta);} 19 | 20 | //' @keywords internal 21 | // [[Rcpp::export("cpp.link.logit.linkfun")]] 22 | arma::vec cpp_link_logit_linkfun (const arma::vec & mu) {Logit link; return link.linkfun(mu);} 23 | //' @keywords internal 24 | // [[Rcpp::export("cpp.link.logit.linkinv")]] 25 | arma::vec cpp_link_logit_linkinv (const arma::vec & eta) {Logit link; return link.linkinv(eta);} 26 | //' @keywords internal 27 | // [[Rcpp::export("cpp.link.logit.mueta")]] 28 | arma::vec cpp_link_logit_mueta (const arma::vec & eta) {Logit link; return link.mueta(eta);} 29 | 30 | //' @keywords internal 31 | // [[Rcpp::export("cpp.link.probit.linkfun")]] 32 | arma::vec cpp_link_probit_linkfun (const arma::vec & mu) {Probit link; return link.linkfun(mu);} 33 | //' @keywords internal 34 | // [[Rcpp::export("cpp.link.probit.linkinv")]] 35 | arma::vec cpp_link_probit_linkinv (const arma::vec & eta) {Probit link; return link.linkinv(eta);} 36 | //' @keywords internal 37 | // [[Rcpp::export("cpp.link.probit.mueta")]] 38 | arma::vec cpp_link_probit_mueta (const arma::vec & eta) {Probit link; return link.mueta(eta);} 39 | 40 | //' @keywords internal 41 | // [[Rcpp::export("cpp.link.cauchy.linkfun")]] 42 | arma::vec cpp_link_cauchy_linkfun (const arma::vec & mu) {Cauchy link; return link.linkfun(mu);} 43 | //' @keywords internal 44 | // [[Rcpp::export("cpp.link.cauchy.linkinv")]] 45 | arma::vec cpp_link_cauchy_linkinv (const arma::vec & eta) {Cauchy link; return link.linkinv(eta);} 46 | //' @keywords internal 47 | // [[Rcpp::export("cpp.link.cauchy.mueta")]] 48 | arma::vec cpp_link_cauchy_mueta (const arma::vec & eta) {Cauchy link; return link.mueta(eta);} 49 | 50 | //' @keywords internal 51 | // [[Rcpp::export("cpp.link.cloglog.linkfun")]] 52 | arma::vec cpp_link_cloglog_linkfun (const arma::vec & mu) {cLogLog link; return link.linkfun(mu);} 53 | //' @keywords internal 54 | // [[Rcpp::export("cpp.link.cloglog.linkinv")]] 55 | arma::vec cpp_link_cloglog_linkinv (const arma::vec & eta) {cLogLog link; return link.linkinv(eta);} 56 | //' @keywords internal 57 | // [[Rcpp::export("cpp.link.cloglog.mueta")]] 58 | arma::vec cpp_link_cloglog_mueta (const arma::vec & eta) {cLogLog link; return link.mueta(eta);} 59 | 60 | //' @keywords internal 61 | // [[Rcpp::export("cpp.link.log.linkfun")]] 62 | arma::vec cpp_link_log_linkfun (const arma::vec & mu) {Log link; return link.linkfun(mu);} 63 | //' @keywords internal 64 | // [[Rcpp::export("cpp.link.log.linkinv")]] 65 | arma::vec cpp_link_log_linkinv (const arma::vec & eta) {Log link; return link.linkinv(eta);} 66 | //' @keywords internal 67 | // [[Rcpp::export("cpp.link.log.mueta")]] 68 | arma::vec cpp_link_log_mueta (const arma::vec & eta) {Log link; return link.mueta(eta);} 69 | 70 | //' @keywords internal 71 | // [[Rcpp::export("cpp.link.inverse.linkfun")]] 72 | arma::vec cpp_link_inverse_linkfun (const arma::vec & mu) {Inverse link; return link.linkfun(mu);} 73 | //' @keywords internal 74 | // [[Rcpp::export("cpp.link.inverse.linkinv")]] 75 | arma::vec cpp_link_inverse_linkinv (const arma::vec & eta) {Inverse link; return link.linkinv(eta);} 76 | //' @keywords internal 77 | // [[Rcpp::export("cpp.link.inverse.mueta")]] 78 | arma::vec cpp_link_inverse_mueta (const arma::vec & eta) {Inverse link; return link.mueta(eta);} 79 | 80 | //' @keywords internal 81 | // [[Rcpp::export("cpp.link.sqrt.linkfun")]] 82 | arma::vec cpp_link_sqrt_linkfun (const arma::vec & mu) {Sqrt link; return link.linkfun(mu);} 83 | //' @keywords internal 84 | // [[Rcpp::export("cpp.link.sqrt.linkinv")]] 85 | arma::vec cpp_link_sqrt_linkinv (const arma::vec & eta) {Sqrt link; return link.linkinv(eta);} 86 | //' @keywords internal 87 | // [[Rcpp::export("cpp.link.sqrt.mueta")]] 88 | arma::vec cpp_link_sqrt_mueta (const arma::vec & eta) {Sqrt link; return link.mueta(eta);} 89 | -------------------------------------------------------------------------------- /man/residuals.sgdgmf.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sgdGMF-class.R 3 | \name{residuals.sgdgmf} 4 | \alias{residuals.sgdgmf} 5 | \alias{resid.sgdgmf} 6 | \title{Extract the residuals of a GMF model} 7 | \usage{ 8 | \method{residuals}{sgdgmf}( 9 | object, 10 | ..., 11 | type = c("deviance", "pearson", "working", "response", "link"), 12 | partial = FALSE, 13 | normalize = FALSE, 14 | fillna = FALSE, 15 | spectrum = FALSE, 16 | ncomp = 50 17 | ) 18 | 19 | \method{resid}{sgdgmf}( 20 | object, 21 | ..., 22 | type = c("deviance", "pearson", "working", "response", "link"), 23 | partial = FALSE, 24 | normalize = FALSE, 25 | fillna = FALSE, 26 | spectrum = FALSE, 27 | ncomp = 50 28 | ) 29 | } 30 | \arguments{ 31 | \item{object}{an object of class \code{sgdgmf}} 32 | 33 | \item{...}{further arguments passed to or from other methods} 34 | 35 | \item{type}{the type of residuals which should be returned} 36 | 37 | \item{partial}{if \code{TRUE}, computes the residuals excluding the matrix factorization from the linear predictor} 38 | 39 | \item{normalize}{if \code{TRUE}, standardize the residuals column-by-column} 40 | 41 | \item{fillna}{if \code{TRUE}, fills \code{NA} values column-by-column} 42 | 43 | \item{spectrum}{if \code{TRUE}, returns the eigenvalues of the residual covariance matrix} 44 | 45 | \item{ncomp}{number of eigenvalues to be calculated (only if \code{spectrum=TRUE})} 46 | } 47 | \value{ 48 | If \code{spectrum=FALSE}, a matrix containing the selected residuals. 49 | If \code{spectrum=TRUE}, a list containing the residuals (\code{res}), the first \code{ncomp} 50 | eigenvalues of the residual covariance matrix, say (\code{lambdas}), the variance explained by the first 51 | \code{ncomp} principal component of the residuals (\code{explained.var}), the variance not 52 | explained by the first \code{ncomp} principal component of the residuals (\code{residual.var}), 53 | the total variance of the residuals (\code{total.var}). 54 | } 55 | \description{ 56 | Extract the residuals of a GMF model and, if required, compute the eigenvalues 57 | of the residuals covariance/correlation matrix. 58 | Moreover, if required, return the partial residual of the model obtained by 59 | excluding the matrix decomposition from the linear predictor. 60 | } 61 | \details{ 62 | Let \eqn{g(\mu) = \eta = X B^\top + \Gamma Z^\top + U V^\top} be the linear predictor of a 63 | GMF model. Let \eqn{R = (r_{ij})} be the correspondent residual matrix. 64 | The following residuals can be considered: 65 | \itemize{ 66 | \item deviance: \eqn{r_{ij}^{_D} = \textrm{sign}(y_{ij} - \mu_{ij}) \sqrt{D(y_{ij}, \mu_{ij})}}; 67 | \item Pearson: \eqn{r_{ij}^{_P} = (y_{ij} - \mu_{ij}) / \sqrt{\nu(\mu_{ij})}}; 68 | \item working: \eqn{r_{ij}^{_W} = (y_{ij} - \mu_{ij}) / \{g'(\mu_{ij}) \,\nu(\mu_{ij})\}}; 69 | \item response: \eqn{r_{ij}^{_R} = y_{ij} - \mu_{ij}}; 70 | \item link: \eqn{r_{ij}^{_G} = g(y_{ij}) - \eta_{ij}}. 71 | } 72 | If \code{partial=TRUE}, \eqn{mu} is computed excluding the latent matrix decomposition 73 | from the linear predictor, so as to obtain the partial residuals. 74 | 75 | Let \eqn{\Sigma} be the empirical variance-covariance matrix of \eqn{R}, being 76 | \eqn{\sigma_{ij} = \textrm{Cov}(r_{:i}, r_{:j})}. Then, the latent spectrum of 77 | the model is the collection of eigenvalues of \eqn{\Sigma}. 78 | 79 | Notice that, in case of Gaussian data, the latent spectrum corresponds to the principal 80 | component analysis on the regression residuals, whose eigenvalues can be used to 81 | infer the amount of variance explained by each principal component. Similarly, 82 | we can use the (partial) latent spectrum in non-Gaussian data settings to infer 83 | the correct number of principal components to include into the GMF model or to 84 | detect some residual dependence structures not already explained by the model. 85 | } 86 | \examples{ 87 | # Load the sgdGMF package 88 | library(sgdGMF) 89 | 90 | # Generate data from a Poisson model 91 | data = sim.gmf.data(n = 100, m = 20, ncomp = 5, family = poisson()) 92 | 93 | # Fit a GMF model with 3 latent factors 94 | gmf = sgdgmf.fit(data$Y, ncomp = 3, family = poisson()) 95 | 96 | # Get the deviance residuals of a GMF model 97 | str(residuals(gmf)) # returns the overall deviance residuals 98 | str(residuals(gmf, partial = TRUE)) # returns the partial residuals 99 | str(residuals(gmf, spectrum = TRUE)) # returns the eigenvalues of the residual var-cov matrix 100 | 101 | } 102 | -------------------------------------------------------------------------------- /tests/testcpp/test-family.cpp: -------------------------------------------------------------------------------- 1 | // test-family.cpp 2 | // author: Cristian Castiglione 3 | // creation: 28/09/2023 4 | // last change: 30/09/2023 5 | 6 | #include "family.h" 7 | 8 | using namespace glm; 9 | 10 | //' @keywords internal 11 | // [[Rcpp::export("cpp.family.gaussian.variance")]] 12 | arma::vec cpp_gaussian_variance (const arma::vec & mu) { 13 | std::unique_ptr ptr = std::make_unique(); 14 | Gaussian f(ptr); 15 | return f.variance(mu); 16 | } 17 | 18 | //' @keywords internal 19 | // [[Rcpp::export("cpp.family.gaussian.initialize")]] 20 | arma::vec cpp_gaussian_initialize (const arma::vec & y) { 21 | std::unique_ptr ptr = std::make_unique(); 22 | Gaussian f(ptr); 23 | return f.initialize(y); 24 | } 25 | 26 | //' @keywords internal 27 | // [[Rcpp::export("cpp.family.gaussian.devresid")]] 28 | arma::vec cpp_gaussian_devresid (const arma::vec & y, const arma::vec & mu) { 29 | std::unique_ptr ptr = std::make_unique(); 30 | Gaussian f(ptr); 31 | return f.devresid(y, mu); 32 | } 33 | 34 | //' @keywords internal 35 | // [[Rcpp::export("cpp.family.binomial.variance")]] 36 | arma::vec cpp_binomial_variance (const arma::vec & mu) { 37 | std::unique_ptr ptr = std::make_unique(); 38 | Binomial f(ptr); 39 | return f.variance(mu); 40 | } 41 | 42 | //' @keywords internal 43 | // [[Rcpp::export("cpp.family.binomial.initialize")]] 44 | arma::vec cpp_binomial_initialize (const arma::vec & y) { 45 | std::unique_ptr ptr = std::make_unique(); 46 | Binomial f(ptr); 47 | return f.initialize(y); 48 | } 49 | 50 | //' @keywords internal 51 | // [[Rcpp::export("cpp.family.binomial.devresid")]] 52 | arma::vec cpp_binomial_devresid (const arma::vec & y, const arma::vec & mu) { 53 | std::unique_ptr ptr = std::make_unique(); 54 | Binomial f(ptr); 55 | return f.devresid(y, mu); 56 | } 57 | 58 | //' @keywords internal 59 | // [[Rcpp::export("cpp.family.poisson.variance")]] 60 | arma::vec cpp_poisson_variance (const arma::vec & mu) { 61 | std::unique_ptr ptr = std::make_unique(); 62 | Poisson f(ptr); 63 | return f.variance(mu); 64 | } 65 | 66 | //' @keywords internal 67 | // [[Rcpp::export("cpp.family.poisson.initialize")]] 68 | arma::vec cpp_poisson_initialize (const arma::vec & y) { 69 | std::unique_ptr ptr = std::make_unique(); 70 | Poisson f(ptr); 71 | return f.initialize(y); 72 | } 73 | 74 | //' @keywords internal 75 | // [[Rcpp::export("cpp.family.poisson.devresid")]] 76 | arma::vec cpp_poisson_devresid (const arma::vec & y, const arma::vec & mu) { 77 | std::unique_ptr ptr = std::make_unique(); 78 | Poisson f(ptr); 79 | return f.devresid(y, mu); 80 | } 81 | 82 | //' @keywords internal 83 | // [[Rcpp::export("cpp.family.gamma.variance")]] 84 | arma::vec cpp_gamma_variance (const arma::vec & mu) { 85 | std::unique_ptr ptr = std::make_unique(); 86 | Gamma f(ptr); 87 | return f.variance(mu); 88 | } 89 | 90 | //' @keywords internal 91 | // [[Rcpp::export("cpp.family.gamma.initialize")]] 92 | arma::vec cpp_gamma_initialize (const arma::vec & y) { 93 | std::unique_ptr ptr = std::make_unique(); 94 | Gamma f(ptr); 95 | return f.initialize(y); 96 | } 97 | 98 | //' @keywords internal 99 | // [[Rcpp::export("cpp.family.gamma.devresid")]] 100 | arma::vec cpp_gamma_devresid (const arma::vec & y, const arma::vec & mu) { 101 | std::unique_ptr ptr = std::make_unique(); 102 | Gamma f(ptr); 103 | return f.devresid(y, mu); 104 | } 105 | 106 | //' @keywords internal 107 | // [[Rcpp::export("cpp.family.negbinom.variance")]] 108 | arma::vec cpp_negbinom_variance (const arma::vec & mu) { 109 | std::unique_ptr ptr = std::make_unique(); 110 | NegativeBinomial f(ptr); 111 | return f.variance(mu); 112 | } 113 | 114 | //' @keywords internal 115 | // [[Rcpp::export("cpp.family.negbinom.initialize")]] 116 | arma::vec cpp_negbinom_initialize (const arma::vec & y) { 117 | std::unique_ptr ptr = std::make_unique(); 118 | NegativeBinomial f(ptr); 119 | return f.initialize(y); 120 | } 121 | 122 | //' @keywords internal 123 | // [[Rcpp::export("cpp.family.negbinom.devresid")]] 124 | arma::vec cpp_negbinom_devresid (const arma::vec & y, const arma::vec & mu) { 125 | std::unique_ptr ptr = std::make_unique(); 126 | NegativeBinomial f(ptr); 127 | return f.devresid(y, mu); 128 | } -------------------------------------------------------------------------------- /man/sgdgmf.rank.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/eigengap.R 3 | \name{sgdgmf.rank} 4 | \alias{sgdgmf.rank} 5 | \title{Rank selection via eigenvalue-gap methods} 6 | \usage{ 7 | sgdgmf.rank( 8 | Y, 9 | X = NULL, 10 | Z = NULL, 11 | maxcomp = ncol(Y), 12 | family = gaussian(), 13 | weights = NULL, 14 | offset = NULL, 15 | method = c("evr", "onatski", "act", "oht"), 16 | type.reg = c("ols", "glm"), 17 | type.res = c("deviance", "pearson", "working", "link"), 18 | normalize = FALSE, 19 | maxiter = 10, 20 | parallel = FALSE, 21 | nthreads = 1, 22 | return.eta = FALSE, 23 | return.mu = FALSE, 24 | return.res = FALSE, 25 | return.cov = FALSE 26 | ) 27 | } 28 | \arguments{ 29 | \item{Y}{matrix of responses (\eqn{n \times m})} 30 | 31 | \item{X}{matrix of row-specific fixed effects (\eqn{n \times p})} 32 | 33 | \item{Z}{matrix of column-specific fixed effects (\eqn{q \times m})} 34 | 35 | \item{maxcomp}{maximum number of eigenvalues to compute} 36 | 37 | \item{family}{a family as in the \code{\link{glm}} interface (default \code{gaussian()})} 38 | 39 | \item{weights}{matrix of optional weights (\eqn{n \times m})} 40 | 41 | \item{offset}{matrix of optional offsets (\eqn{n \times m})} 42 | 43 | \item{method}{rank selection method} 44 | 45 | \item{type.reg}{regression method to be used to profile out the covariate effects} 46 | 47 | \item{type.res}{residual type to be decomposed} 48 | 49 | \item{normalize}{if \code{TRUE}, standardize column-by-column the residual matrix} 50 | 51 | \item{maxiter}{maximum number of iterations} 52 | 53 | \item{parallel}{if \code{TRUE}, allows for parallel computing using \code{foreach}} 54 | 55 | \item{nthreads}{number of cores to be used in parallel (only if \code{parallel=TRUE})} 56 | 57 | \item{return.eta}{if \code{TRUE}, return the linear predictor martix} 58 | 59 | \item{return.mu}{if \code{TRUE}, return the fitted value martix} 60 | 61 | \item{return.res}{if \code{TRUE}, return the residual matrix} 62 | 63 | \item{return.cov}{if \code{TRUE}, return the covariance matrix of the residuals} 64 | } 65 | \value{ 66 | A list containing the \code{method}, the selected latent rank \code{ncomp}, 67 | and the eigenvalues used to select the latent rank \code{lambdas}. 68 | Additionally, if required, in the output list will also provide the linear predictor 69 | \code{eta}, the predicted mean matrix \code{mu}, the residual matrix \code{res}, and 70 | the implied residual covariance matrix \code{covmat}. 71 | } 72 | \description{ 73 | Select the number of significant principal components of a GMF model via 74 | exploitation of eigenvalue-gap methods 75 | } 76 | \examples{ 77 | library(sgdGMF) 78 | 79 | # Set the data dimensions 80 | n = 100; m = 20; d = 5 81 | 82 | # Generate data using Poisson, Binomial and Gamma models 83 | data_pois = sim.gmf.data(n = n, m = m, ncomp = d, family = poisson()) 84 | data_bin = sim.gmf.data(n = n, m = m, ncomp = d, family = binomial()) 85 | data_gam = sim.gmf.data(n = n, m = m, ncomp = d, family = Gamma(link = "log"), dispersion = 0.25) 86 | 87 | # Initialize the GMF parameters assuming 3 latent factors 88 | ncomp_pois = sgdgmf.rank(data_pois$Y, family = poisson(), normalize = TRUE) 89 | ncomp_bin = sgdgmf.rank(data_bin$Y, family = binomial(), normalize = TRUE) 90 | ncomp_gam = sgdgmf.rank(data_gam$Y, family = Gamma(link = "log"), normalize = TRUE) 91 | 92 | # Get the selected number of components 93 | print(paste("Poisson:", ncomp_pois$ncomp)) 94 | print(paste("Binomial:", ncomp_bin$ncomp)) 95 | print(paste("Gamma:", ncomp_gam$ncomp)) 96 | 97 | # Plot the screeplot used for the component determination 98 | oldpar = par(no.readonly = TRUE) 99 | par(mfrow = c(3,1)) 100 | barplot(ncomp_pois$lambdas, main = "Poisson screeplot") 101 | barplot(ncomp_bin$lambdas, main = "Binomial screeplot") 102 | barplot(ncomp_gam$lambdas, main = "Gamma screeplot") 103 | par(oldpar) 104 | 105 | } 106 | \references{ 107 | Onatski, A. (2010). 108 | \emph{Determining the number of factors from empirical distribution of eigenvalues.} 109 | Review of Economics and Statistics, 92(4): 1004-1016 110 | 111 | Ahn, S.C., Horenstein, A.R. (2013). 112 | \emph{Eigenvalue ratio test for the number of factors.} 113 | Econometrica, 81, 1203-1227 114 | 115 | Gavish, M., Donoho, D.L. (2014) 116 | \emph{The optimal hard thresholding for singular values is 4/sqrt(3).} 117 | IEEE Transactions on Information Theory, 60(8): 5040--5053 118 | 119 | Fan, J., Guo, J. and Zheng, S. (2020). 120 | \emph{Estimating number of factors by adjusted eigenvalues thresholding.} 121 | Journal of the American Statistical Association, 117(538): 852--861 122 | 123 | Wang, L. and Carvalho, L. (2023). 124 | \emph{Deviance matrix factorization.} 125 | Electronic Journal of Statistics, 17(2): 3762-3810 126 | } 127 | -------------------------------------------------------------------------------- /tests/testthat/test-deviance.R: -------------------------------------------------------------------------------- 1 | # file: test-deviance.R 2 | # author: Cristian Castiglione 3 | # creation: 05/02/2024 4 | # last change: 25/02/2024 5 | 6 | testthat::test_that("Elementwise Gaussian deviance", { 7 | n = 100; m = 10 8 | 9 | mu = matrix(rnorm(n*m), nrow = n, ncol = m) 10 | y = matrix(rnorm(n*m, mean = mu, sd = .1), nrow = n, ncol = m) 11 | dev = pointwise.deviance(mu, y, gaussian()) 12 | 13 | testthat::expect_equal(dim(dev), c(n, m)) 14 | testthat::expect_true(all(dev >= 0)) 15 | testthat::expect_true(all(is.finite(dev))) 16 | testthat::expect_false(anyNA(dev)) 17 | }) 18 | 19 | testthat::test_that("Elementwise Poisson deviance", { 20 | n = 100; m = 10 21 | 22 | mu = matrix(exp(rnorm(n*m)), nrow = n, ncol = m) 23 | y = matrix(rpois(n*m, lambda = mu), nrow = n, ncol = m) 24 | dev = pointwise.deviance(mu, y, poisson()) 25 | 26 | testthat::expect_equal(dim(dev), c(n, m)) 27 | testthat::expect_true(all(dev >= 0)) 28 | testthat::expect_true(all(is.finite(dev))) 29 | testthat::expect_false(anyNA(dev)) 30 | }) 31 | 32 | testthat::test_that("Elementwise Binomial deviance", { 33 | n = 100; m = 10 34 | 35 | mu = matrix(plogis(rnorm(n*m)), nrow = n, ncol = m) 36 | y = matrix(rbinom(n*m, size = 1, prob = mu), nrow = n, ncol = m) 37 | dev = pointwise.deviance(mu, y, binomial()) 38 | 39 | testthat::expect_equal(dim(dev), c(n, m)) 40 | testthat::expect_true(all(dev >= 0)) 41 | testthat::expect_true(all(is.finite(dev))) 42 | testthat::expect_false(anyNA(dev)) 43 | }) 44 | 45 | testthat::test_that("Elementwise Gamma deviance", { 46 | n = 100; m = 10 47 | 48 | mu = matrix(exp(rnorm(n*m)), nrow = n, ncol = m) 49 | y = matrix(rgamma(n*m, shape = 1, rate = mu), nrow = n, ncol = m) 50 | dev = pointwise.deviance(mu, y, Gamma()) 51 | 52 | testthat::expect_equal(dim(dev), c(n, m)) 53 | testthat::expect_true(all(dev >= 0)) 54 | testthat::expect_true(all(is.finite(dev))) 55 | testthat::expect_false(anyNA(dev)) 56 | }) 57 | 58 | testthat::test_that("Elementwise deviance with missing", { 59 | n = 100; m = 10; f = floor(.3 * n * m) 60 | 61 | mask = unique(cbind( 62 | sample(1:n, size = f, replace = TRUE), 63 | sample(1:m, size = f, replace = TRUE))) 64 | 65 | mu = matrix(exp(rnorm(n*m)), nrow = n, ncol = m) 66 | y = matrix(rgamma(n*m, shape = 1, rate = mu), nrow = n, ncol = m) 67 | y[mask] = NA 68 | 69 | dev = pointwise.deviance(mu, y, Gamma()) 70 | 71 | testthat::expect_equal(dim(dev), c(n, m)) 72 | testthat::expect_true(all(dev[-mask[,1],-mask[,2]] >= 0)) 73 | testthat::expect_true(all(is.finite(dev[-mask[,1],-mask[,2]]))) 74 | testthat::expect_equal(sum(is.na(dev)), nrow(mask)) 75 | }) 76 | 77 | testthat::test_that("Matrix Gaussian deviance", { 78 | n = 100; m = 10 79 | 80 | mu = matrix(rnorm(n*m), nrow = n, ncol = m) 81 | y = matrix(rnorm(n*m, mean = mu, sd = .1), nrow = n, ncol = m) 82 | dev = matrix.deviance(mu, y, gaussian()) 83 | 84 | testthat::expect_true(is.finite(dev)) 85 | testthat::expect_true(dev >= 0) 86 | }) 87 | 88 | testthat::test_that("Matrix Poisson deviance", { 89 | n = 100; m = 10 90 | 91 | mu = matrix(exp(rnorm(n*m)), nrow = n, ncol = m) 92 | y = matrix(rpois(n*m, lambda = mu), nrow = n, ncol = m) 93 | dev = matrix.deviance(mu, y, poisson()) 94 | 95 | testthat::expect_true(is.finite(dev)) 96 | testthat::expect_true(dev >= 0) 97 | }) 98 | 99 | testthat::test_that("Matrix Binomial deviance", { 100 | n = 100; m = 10 101 | 102 | mu = matrix(plogis(rnorm(n*m)), nrow = n, ncol = m) 103 | y = matrix(rbinom(n*m, size = 1, prob = mu), nrow = n, ncol = m) 104 | dev = matrix.deviance(mu, y, binomial()) 105 | 106 | testthat::expect_true(is.finite(dev)) 107 | testthat::expect_true(dev >= 0) 108 | }) 109 | 110 | testthat::test_that("Matrix Gamma deviance", { 111 | n = 100; m = 10 112 | 113 | mu = matrix(exp(rnorm(n*m)), nrow = n, ncol = m) 114 | y = matrix(rgamma(n*m, shape = 1, rate = mu), nrow = n, ncol = m) 115 | dev = matrix.deviance(mu, y, Gamma()) 116 | 117 | testthat::expect_true(is.finite(dev)) 118 | testthat::expect_true(dev >= 0) 119 | }) 120 | 121 | 122 | testthat::test_that("Matrix deviance with missing", { 123 | n = 100; m = 10; f = floor(.3 * n * m) 124 | 125 | mask = unique(cbind( 126 | sample(1:n, size = f, replace = TRUE), 127 | sample(1:m, size = f, replace = TRUE))) 128 | 129 | mu = matrix(exp(rnorm(n*m)), nrow = n, ncol = m) 130 | y = matrix(rgamma(n*m, shape = 1, rate = mu), nrow = n, ncol = m) 131 | y[mask] = NA 132 | 133 | dev = matrix.deviance(mu, y, Gamma()) 134 | 135 | testthat::expect_true(is.finite(dev)) 136 | testthat::expect_false(is.na(dev)) 137 | testthat::expect_true(dev >= 0) 138 | }) 139 | 140 | testthat::test_that("Frobenious matrix penalty", { 141 | n = 100; m = 3 142 | 143 | U = matrix(rnorm(n*m), nrow = n, ncol = m) 144 | lambda = rexp(m) 145 | pen = matrix.penalty(U, lambda) 146 | 147 | testthat::expect_equal(pen, sum((U * U) %*% diag(lambda))) 148 | }) 149 | 150 | -------------------------------------------------------------------------------- /tests/testcpp/test-utils.cpp: -------------------------------------------------------------------------------- 1 | // test-utils.cpp 2 | // author: Cristian Castiglione 3 | // creation: 29/09/2023 4 | // last change: 29/09/2023 5 | 6 | #include "utils.h" 7 | 8 | //' @keywords internal 9 | // [[Rcpp::export("cpp.utils.dabsmax")]] 10 | double cpp_dabsmax (const double & u, const double & v) {return utils::absmax(u, v);} 11 | 12 | //' @keywords internal 13 | // [[Rcpp::export("cpp.utils.vabsmax")]] 14 | double cpp_vabsmax (const arma::vec & u, const arma::vec & v) {return utils::absmax(u, v);} 15 | 16 | //' @keywords internal 17 | // [[Rcpp::export("cpp.utils.trim")]] 18 | arma::vec cpp_trim (const arma::vec & x, double a, double b) { 19 | arma::vec y = x; 20 | utils::trim(y, a, b); 21 | return y; 22 | } 23 | 24 | //' @keywords internal 25 | // [[Rcpp::export("cpp.utils.xlogx")]] 26 | arma::vec cpp_xlogx (const arma::vec & x) {return utils::xlogx(x);} 27 | 28 | //' @keywords internal 29 | // [[Rcpp::export("cpp.utils.log1pexp")]] 30 | arma::vec cpp_log1pexp (const arma::vec & x) {return utils::log1pexp(x);} 31 | 32 | //' @keywords internal 33 | // [[Rcpp::export("cpp.utils.log1mexp")]] 34 | arma::vec cpp_log1mexp (const arma::vec & x) {return utils::log1mexp(x);} 35 | 36 | //' @keywords internal 37 | // [[Rcpp::export("cpp.utils.logit")]] 38 | arma::vec cpp_logit (const arma::vec & x) {return utils::logit(x);} 39 | 40 | //' @keywords internal 41 | // [[Rcpp::export("cpp.utils.expit")]] 42 | arma::vec cpp_expit (const arma::vec & x) {return utils::expit(x);} 43 | 44 | //' @keywords internal 45 | // [[Rcpp::export("cpp.utils.expit2")]] 46 | arma::vec cpp_expit2 (const arma::vec & x) {return utils::expit2(x);} 47 | 48 | //' @keywords internal 49 | // [[Rcpp::export("cpp.utils.expitn")]] 50 | arma::vec cpp_expitn (const arma::vec & x, double n = 1) {return utils::expitn(x, n);} 51 | 52 | //' @keywords internal 53 | // [[Rcpp::export("cpp.utils.cloglog")]] 54 | arma::vec cpp_cloglog (const arma::vec & x) {return utils::cloglog(x);} 55 | 56 | //' @keywords internal 57 | // [[Rcpp::export("cpp.utils.cexpexp")]] 58 | arma::vec cpp_cexpexp (const arma::vec & x) {return utils::cexpexp(x);} 59 | 60 | //' @keywords internal 61 | // [[Rcpp::export("cpp.utils.loglog")]] 62 | arma::vec cpp_loglog (const arma::vec & x) {return utils::loglog(x);} 63 | 64 | //' @keywords internal 65 | // [[Rcpp::export("cpp.utils.expexp")]] 66 | arma::vec cpp_expexp (const arma::vec & x) {return utils::expexp(x);} 67 | 68 | //' @keywords internal 69 | // [[Rcpp::export("cpp.utils.pdfn")]] 70 | arma::vec cpp_pdfn (const arma::vec & x) {return utils::pdfn(x);} 71 | 72 | //' @keywords internal 73 | // [[Rcpp::export("cpp.utils.cdfn")]] 74 | arma::vec cpp_cdfn (const arma::vec & x) {return utils::cdfn(x);} 75 | 76 | //' @keywords internal 77 | // [[Rcpp::export("cpp.utils.logpdfn")]] 78 | arma::vec cpp_logpdfn (const arma::vec & x) {return utils::logpdfn(x);} 79 | 80 | //' @keywords internal 81 | // [[Rcpp::export("cpp.utils.logcdfn")]] 82 | arma::vec cpp_logcdfn (const arma::vec & x) {return utils::logcdfn(x);} 83 | 84 | //' @keywords internal 85 | // [[Rcpp::export("cpp.utils.gamma")]] 86 | arma::vec cpp_gamma (const arma::vec & x) {return utils::gamma(x);} 87 | 88 | //' @keywords internal 89 | // [[Rcpp::export("cpp.utils.loggamma")]] 90 | arma::vec cpp_loggamma (const arma::vec & x) {return utils::loggamma(x);} 91 | 92 | //' @keywords internal 93 | // [[Rcpp::export("cpp.utils.digamma")]] 94 | arma::vec cpp_digamma (const arma::vec & x) {return utils::digamma(x);} 95 | 96 | //' @keywords internal 97 | // [[Rcpp::export("cpp.utils.trigamma")]] 98 | arma::vec cpp_trigamma (const arma::vec & x) {return utils::trigamma(x);} 99 | 100 | //' @keywords internal 101 | // [[Rcpp::export("cpp.utils.beta")]] 102 | arma::vec cpp_beta (const arma::vec & x, const arma::vec & y) {return utils::beta(x, y);} 103 | 104 | //' @keywords internal 105 | // [[Rcpp::export("cpp.utils.logbeta")]] 106 | arma::vec cpp_logbeta (const arma::vec & x, const arma::vec & y) {return utils::logbeta(x, y);} 107 | 108 | //' @keywords internal 109 | // [[Rcpp::export("cpp.utils.dibeta")]] 110 | arma::vec cpp_dibeta (const arma::vec & x, const arma::vec & y) {return utils::dibeta(x, y);} 111 | 112 | //' @keywords internal 113 | // [[Rcpp::export("cpp.utils.tribeta")]] 114 | arma::vec cpp_tribeta (const arma::vec & x, const arma::vec & y) {return utils::tribeta(x, y);} 115 | 116 | //' @keywords internal 117 | // [[Rcpp::export("cpp.utils.hinge")]] 118 | arma::vec cpp_hinge (const arma::vec & x) {return utils::hinge(x);} 119 | 120 | //' @keywords internal 121 | // [[Rcpp::export("cpp.utils.dirac")]] 122 | arma::vec cpp_dirac (const arma::vec & x, double a = 0) {return utils::dirac(x, a);} 123 | 124 | //' @keywords internal 125 | // [[Rcpp::export("cpp.utils.step")]] 126 | arma::vec cpp_step (const arma::vec & x, double a = 0, bool lower = true) {return utils::step(x, a, lower);} 127 | 128 | //' @keywords internal 129 | // [[Rcpp::export("cpp.utils.vech")]] 130 | arma::vec cpp_vech(const arma::mat & A) {return utils::vech(A);} 131 | --------------------------------------------------------------------------------