├── vignettes
    ├── .gitignore
    └── residuals.Rmd
├── LICENSE
├── data
    └── .gitignore
├── man
    ├── omp.check.Rd
    ├── matrix.deviance.Rd
    ├── matrix.penalty.Rd
    ├── pointwise.deviance.Rd
    ├── make.pos.diag.Rd
    ├── set.mat.Y.Rd
    ├── set.mat.X.Rd
    ├── set.mat.Z.Rd
    ├── set.mat.offset.Rd
    ├── norm.procrustes.Rd
    ├── set.mat.weights.Rd
    ├── reexports.Rd
    ├── simulate.Rd
    ├── set.family.Rd
    ├── normalize.uv.Rd
    ├── storedata.Rd
    ├── procrustes.Rd
    ├── ols.fit.coef.Rd
    ├── set.penalty.Rd
    ├── eigengap.evr.Rd
    ├── orthogonalize.Rd
    ├── partition.Rd
    ├── eigengap.onatski.Rd
    ├── eigengap.oht.Rd
    ├── eigengap.act.Rd
    ├── print.sgdgmf.Rd
    ├── print.initgmf.Rd
    ├── simulate.sgdgmf.Rd
    ├── whitening.matrix.Rd
    ├── cpp.airwls.glmstep.Rd
    ├── deviance.sgdgmf.Rd
    ├── vglm.fit.coef.Rd
    ├── cpp.airwls.glmfit.Rd
    ├── fitted.sgdgmf.Rd
    ├── deviance.initgmf.Rd
    ├── fitted.initgmf.Rd
    ├── orthogonalize.uv.Rd
    ├── coefficients.sgdgmf.Rd
    ├── cpp.airwls.update.Rd
    ├── set.control.cv.Rd
    ├── biplot.sgdgmf.Rd
    ├── coefficients.initgmf.Rd
    ├── biplot.initgmf.Rd
    ├── set.control.alg.Rd
    ├── sgdGMF-package.Rd
    ├── storedata.sgdgmf.Rd
    ├── screeplot.sgdgmf.Rd
    ├── set.control.init.Rd
    ├── image.sgdgmf.Rd
    ├── screeplot.initgmf.Rd
    ├── image.initgmf.Rd
    ├── refit.sgdgmf.Rd
    ├── set.control.newton.Rd
    ├── plot.sgdgmf.Rd
    ├── cpp.fit.newton.Rd
    ├── plot.initgmf.Rd
    ├── cpp.fit.airwls.Rd
    ├── set.control.airwls.Rd
    ├── set.control.block.sgd.Rd
    ├── set.control.coord.sgd.Rd
    ├── sgdgmf.cv.step.Rd
    ├── sim.gmf.data.Rd
    ├── predict.sgdgmf.Rd
    ├── cpp.fit.block.sgd.Rd
    ├── residuals.initgmf.Rd
    ├── cpp.fit.coord.sgd.Rd
    ├── cpp.fit.random.block.sgd.Rd
    ├── residuals.sgdgmf.Rd
    └── sgdgmf.rank.Rd
├── sgdGMF.Rproj
├── tests
    ├── testthat.R
    ├── testcpp
    │   ├── test-deviance.cpp
    │   ├── test-minibatch.cpp
    │   ├── test-misc.cpp
    │   ├── test-link.cpp
    │   ├── test-family.cpp
    │   └── test-utils.cpp
    ├── testthat
    │   ├── test-eigengap.R
    │   ├── test-fit.R
    │   ├── test-control.R
    │   ├── test-init.R
    │   ├── test-vglmfit.R
    │   └── test-deviance.R
    └── testrcpp
    │   ├── test-minibatch.R
    │   ├── test-newton.R
    │   ├── test-link.R
    │   ├── test-misc.R
    │   └── test-family.R
├── src
    ├── deviance.h
    ├── Makevars
    ├── Makevars.win
    ├── deviance.cpp
    ├── minibatch.h
    ├── variance.cpp
    ├── variance.h
    ├── misc.h
    ├── minibatch.cpp
    ├── link.cpp
    ├── family.cpp
    ├── link.h
    └── utils.h
├── .Rbuildignore
├── R
    ├── genericfun.R
    ├── deviance.R
    ├── sgdGMF-package.R
    └── vglmfit.R
├── README.md
├── .github
    └── workflows
    │   └── R-CMD-check.yaml
├── NEWS.md
├── .gitignore
├── DESCRIPTION
└── NAMESPACE


/vignettes/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | *.R
3 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | YEAR: 2025
2 | COPYRIGHT HOLDER: Cristian Castiglione
3 | 


--------------------------------------------------------------------------------
/data/.gitignore:
--------------------------------------------------------------------------------
 1 | 
 2 | # Data folder
 3 | splatter
 4 | splatter/
 5 | splatter/**
 6 | bubble
 7 | bubble/
 8 | bubble/**
 9 | bubble2
10 | bubble2/
11 | bubble2/**
12 | 
13 | BE1.RData
14 | mixology.RData
15 | PBMC.RData
16 | 


--------------------------------------------------------------------------------
/man/omp.check.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/RcppExports.R
 3 | \name{omp.check}
 4 | \alias{omp.check}
 5 | \title{Check if OpenMP is enabled}
 6 | \usage{
 7 | omp.check()
 8 | }
 9 | \description{
10 | Internal function to check if OpenMP is enabled
11 | }
12 | \keyword{internal}
13 | 


--------------------------------------------------------------------------------
/man/matrix.deviance.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/deviance.R
 3 | \name{matrix.deviance}
 4 | \alias{matrix.deviance}
 5 | \title{Model deviance of a GMF model}
 6 | \usage{
 7 | matrix.deviance(mu, y, family = gaussian())
 8 | }
 9 | \description{
10 | Compute the overall deviance averaging the contributions of all data
11 | }
12 | \keyword{internal}
13 | 


--------------------------------------------------------------------------------
/man/matrix.penalty.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/deviance.R
 3 | \name{matrix.penalty}
 4 | \alias{matrix.penalty}
 5 | \title{Frobenius penalty for the parameters of a GMF model}
 6 | \usage{
 7 | matrix.penalty(U, penalty)
 8 | }
 9 | \description{
10 | Compute the Frobenius penalty for all the parameters in the model
11 | }
12 | \keyword{internal}
13 | 


--------------------------------------------------------------------------------
/man/pointwise.deviance.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/deviance.R
 3 | \name{pointwise.deviance}
 4 | \alias{pointwise.deviance}
 5 | \title{Pointwise deviance of a GMF model}
 6 | \usage{
 7 | pointwise.deviance(mu, y, family = gaussian())
 8 | }
 9 | \description{
10 | Compute the pointwise deviance for all the observations in the sample
11 | }
12 | \keyword{internal}
13 | 


--------------------------------------------------------------------------------
/man/make.pos.diag.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{make.pos.diag}
 4 | \alias{make.pos.diag}
 5 | \title{Fix sign ambiguity of eigen-vectors}
 6 | \usage{
 7 | make.pos.diag(U)
 8 | }
 9 | \arguments{
10 | \item{U}{target matrix}
11 | }
12 | \description{
13 | Fix sign ambiguity of eigen-vectors by making U positive diagonal
14 | }
15 | \keyword{internal}
16 | 


--------------------------------------------------------------------------------
/man/set.mat.Y.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/control.R
 3 | \name{set.mat.Y}
 4 | \alias{set.mat.Y}
 5 | \title{Check and set the response matrix Y}
 6 | \usage{
 7 | set.mat.Y(Y)
 8 | }
 9 | \description{
10 | Check if the input response matrix is well-defined and return the same
11 | matrix without attributes such as row and column names.
12 | }
13 | \keyword{internal}
14 | 


--------------------------------------------------------------------------------
/man/set.mat.X.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/control.R
 3 | \name{set.mat.X}
 4 | \alias{set.mat.X}
 5 | \title{Check and set the covariate matrix X}
 6 | \usage{
 7 | set.mat.X(X, n, m)
 8 | }
 9 | \description{
10 | Check if the input covariate matrix X is well-defined and return the same
11 | matrix without attributes such as row and column names.
12 | }
13 | \keyword{internal}
14 | 


--------------------------------------------------------------------------------
/man/set.mat.Z.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/control.R
 3 | \name{set.mat.Z}
 4 | \alias{set.mat.Z}
 5 | \title{Check and set the covariate matrix X}
 6 | \usage{
 7 | set.mat.Z(Z, n, m)
 8 | }
 9 | \description{
10 | Check if the input covariate matrix X is well-defined and return the same
11 | matrix without attributes such as row and column names.
12 | }
13 | \keyword{internal}
14 | 


--------------------------------------------------------------------------------
/man/set.mat.offset.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/control.R
 3 | \name{set.mat.offset}
 4 | \alias{set.mat.offset}
 5 | \title{Check and set the offset matrix}
 6 | \usage{
 7 | set.mat.offset(O, n, m)
 8 | }
 9 | \description{
10 | Check if the input offset matrix is well-defined and return the same
11 | matrix without attributes such as row and column names.
12 | }
13 | \keyword{internal}
14 | 


--------------------------------------------------------------------------------
/man/norm.procrustes.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{norm.procrustes}
 4 | \alias{norm.procrustes}
 5 | \title{Procrustes distance}
 6 | \usage{
 7 | norm.procrustes(A, B)
 8 | }
 9 | \arguments{
10 | \item{A}{target matrix}
11 | 
12 | \item{B}{matrix to be rotated}
13 | }
14 | \description{
15 | Compute the Procrustes distance between two matrices
16 | }
17 | \keyword{internal}
18 | 


--------------------------------------------------------------------------------
/man/set.mat.weights.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/control.R
 3 | \name{set.mat.weights}
 4 | \alias{set.mat.weights}
 5 | \title{Check and set the weighting matrix}
 6 | \usage{
 7 | set.mat.weights(W, n, m)
 8 | }
 9 | \description{
10 | Check if the input weighting matrix is well-defined and return the same
11 | matrix without attributes such as row and column names.
12 | }
13 | \keyword{internal}
14 | 


--------------------------------------------------------------------------------
/sgdGMF.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | 
18 | BuildType: Package
19 | PackageUseDevtools: Yes
20 | PackageInstallArgs: --no-multiarch --with-keep.source
21 | 


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
 1 | # This file is part of the standard setup for testthat.
 2 | # It is recommended that you do not modify it.
 3 | #
 4 | # Where should you do additional test configuration?
 5 | # Learn more about the roles of various files in:
 6 | # * https://r-pkgs.org/testing-design.html#sec-tests-files-overview
 7 | # * https://testthat.r-lib.org/articles/special-files.html
 8 | 
 9 | library(testthat)
10 | library(sgdGMF)
11 | 
12 | test_check("sgdGMF")
13 | 


--------------------------------------------------------------------------------
/man/reexports.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/genericfun.R
 3 | \docType{import}
 4 | \name{reexports}
 5 | \alias{reexports}
 6 | \alias{refit}
 7 | \title{Objects exported from other packages}
 8 | \keyword{internal}
 9 | \description{
10 | These objects are imported from other packages. Follow the links
11 | below to see their documentation.
12 | 
13 | \describe{
14 |   \item{generics}{\code{\link[generics]{refit}}}
15 | }}
16 | 
17 | 


--------------------------------------------------------------------------------
/man/simulate.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/genericfun.R
 3 | \name{simulate}
 4 | \alias{simulate}
 5 | \title{Simulate new data}
 6 | \usage{
 7 | simulate(object, ...)
 8 | }
 9 | \arguments{
10 | \item{object}{an object from which simulate new data}
11 | 
12 | \item{...}{additional arguments passed to or from other methods}
13 | }
14 | \value{
15 | An array containing the simulated data.
16 | }
17 | \description{
18 | Generic function to simulate new data from a statistical model
19 | }
20 | 


--------------------------------------------------------------------------------
/man/set.family.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/control.R
 3 | \name{set.family}
 4 | \alias{set.family}
 5 | \title{Check and set the model family}
 6 | \usage{
 7 | set.family(family)
 8 | }
 9 | \arguments{
10 | \item{family}{a \code{glm} family (see \code{\link{family}} for more details)}
11 | }
12 | \description{
13 | Check if the model family is allowed and return it eventually with a
14 | different family name for compatibility with the \code{C++} implementation
15 | }
16 | \keyword{internal}
17 | 


--------------------------------------------------------------------------------
/man/normalize.uv.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{normalize.uv}
 4 | \alias{normalize.uv}
 5 | \title{Normalize the matrices U and V}
 6 | \usage{
 7 | normalize.uv(U, V, method = c("qr", "svd"))
 8 | }
 9 | \description{
10 | Rotate U and V using either QR or SVD decompositions.
11 | The QR methods rotate U and V in such a way to obtain an orthogonal U
12 | and a lower triangular V.  The SVD method rotate U and V in such a way
13 | to obtain an orthogonal U and a scaled orthogonal V.
14 | }
15 | \keyword{internal}
16 | 


--------------------------------------------------------------------------------
/man/storedata.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/genericfun.R
 3 | \name{storedata}
 4 | \alias{storedata}
 5 | \title{Store data into an object}
 6 | \usage{
 7 | storedata(object, ...)
 8 | }
 9 | \arguments{
10 | \item{object}{an object from which simulate new data}
11 | 
12 | \item{...}{additional arguments passed to or from other methods}
13 | }
14 | \value{
15 | An object of the same class as the input containing new data
16 | }
17 | \description{
18 | Generic function to store data into an object, typically a statistical model
19 | }
20 | 


--------------------------------------------------------------------------------
/man/procrustes.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{procrustes}
 4 | \alias{procrustes}
 5 | \title{Procrustes rotation of two configurations}
 6 | \usage{
 7 | procrustes(X, Y, scale = TRUE, symmetric = FALSE)
 8 | }
 9 | \arguments{
10 | \item{X}{target matrix}
11 | 
12 | \item{Y}{matrix to be rotated}
13 | 
14 | \item{scale}{allow scaling of axes of Y}
15 | 
16 | \item{symmetric}{if \code{TRUE}, use symmetric Procrustes statistic}
17 | }
18 | \description{
19 | Rotates a configuration to maximum similarity with another configuration
20 | }
21 | \keyword{internal}
22 | 


--------------------------------------------------------------------------------
/man/ols.fit.coef.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/vglmfit.R
 3 | \name{ols.fit.coef}
 4 | \alias{ols.fit.coef}
 5 | \title{Estimate the coefficients of a multivariate linear model}
 6 | \usage{
 7 | ols.fit.coef(Y, X, offset = NULL)
 8 | }
 9 | \arguments{
10 | \item{Y}{\eqn{n \times m} matrix of response variables}
11 | 
12 | \item{X}{\eqn{n \times p} matrix of covariates}
13 | 
14 | \item{offset}{\eqn{n \times m} matrix of offset values}
15 | }
16 | \description{
17 | Estimate the coefficients of a multivariate linear model via ordinary least squares.
18 | }
19 | \keyword{internal}
20 | 


--------------------------------------------------------------------------------
/man/set.penalty.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/control.R
 3 | \name{set.penalty}
 4 | \alias{set.penalty}
 5 | \title{Check and set the penalty parameters}
 6 | \usage{
 7 | set.penalty(B = 0, A = 0, U = 1, V = 0)
 8 | }
 9 | \arguments{
10 | \item{B}{penalty parameter of \code{B}}
11 | 
12 | \item{A}{penalty parameter of \code{A}}
13 | 
14 | \item{U}{penalty parameter of \code{U}}
15 | 
16 | \item{V}{penalty parameter of \code{V}}
17 | }
18 | \description{
19 | Check if the input penalty parameters are allowed and set them to default
20 | values if they are not. Returns a list of well-defined penalty parameters.
21 | }
22 | \keyword{internal}
23 | 


--------------------------------------------------------------------------------
/tests/testcpp/test-deviance.cpp:
--------------------------------------------------------------------------------
 1 | // test-deviance.h
 2 | // author: Cristian Castiglione
 3 | // creation: 02/10/2023
 4 | // last change: 02/10/2023
 5 | 
 6 | #include "deviance.h"
 7 | #include "misc.h"
 8 | #include <memory>
 9 | 
10 | using namespace glm;
11 | 
12 | //' @keywords internal
13 | // [[Rcpp::export("cpp.deviance")]]
14 | arma::mat cpp_deviance (const arma::mat & y, const arma::mat & mu, const std::string & familyname) {
15 |     std::unique_ptr<Family> family = make_family(familyname, std::string("identity"));
16 |     return deviance(y, mu, family);
17 | }
18 | 
19 | //' @keywords internal
20 | // [[Rcpp::export("cpp.penalty")]]
21 | double cpp_penalty (const arma::mat & u, const arma::vec & p) {
22 |     return penalty(u, p);
23 | }
24 | 


--------------------------------------------------------------------------------
/man/eigengap.evr.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/eigengap.R
 3 | \name{eigengap.evr}
 4 | \alias{eigengap.evr}
 5 | \title{Rank selection via eigenvalue ratio maximization}
 6 | \usage{
 7 | eigengap.evr(covmat, maxcomp = 50, thr = 0.95)
 8 | }
 9 | \arguments{
10 | \item{covmat}{matrix to be decomposed}
11 | 
12 | \item{maxcomp}{maximum number of eigenvalues to compute}
13 | }
14 | \description{
15 | Select the number of significant principal components of a matrix via the
16 | eigenvalue ratio (EVR) maximization method
17 | }
18 | \references{
19 | Ahn, S.C., Horenstein, A.R. (2013).
20 | \emph{Eigenvalue ratio test for the number of factors.}
21 | Econometrica, 81, 1203-1227
22 | }
23 | \keyword{internal}
24 | 


--------------------------------------------------------------------------------
/src/deviance.h:
--------------------------------------------------------------------------------
 1 | // deviance.h
 2 | // author: Cristian Castiglione
 3 | // creation: 28/09/2023
 4 | // last change: 28/09/2023
 5 | 
 6 | #ifndef DEVIANCE_H
 7 | #define DEVIANCE_H
 8 | 
 9 | #include <RcppArmadillo.h>
10 | #include <memory>
11 | #include "family.h"
12 | 
13 | using namespace glm;
14 | 
15 | // Pointwise deviance
16 | void deviance (
17 |     arma::mat & dev, const arma::mat & y, const arma::mat & mu, 
18 |     const std::unique_ptr<Family> & family);
19 | arma::mat deviance (
20 |     const arma::mat & y, const arma::mat & mu, 
21 |     const std::unique_ptr<Family> & family);
22 | 
23 | // Penalty function
24 | void penalty (double & pen, const arma::mat & u, const arma::vec & p);
25 | double penalty (const arma::mat & u, const arma::vec & p);
26 | 
27 | #endif


--------------------------------------------------------------------------------
/man/orthogonalize.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{orthogonalize}
 4 | \alias{orthogonalize}
 5 | \title{Orthogonalize the matrices U and V with respect to X and Z}
 6 | \usage{
 7 | orthogonalize(
 8 |   X,
 9 |   Z,
10 |   B,
11 |   A,
12 |   U,
13 |   V,
14 |   method = c("QR", "SVD", "ZCA", "ZCA-cor", "PCA", "PCA-cor", "Cholesky")
15 | )
16 | }
17 | \description{
18 | Orthogonalize \code{[A, U]} and \code{V} with respect to \code{X} and \code{Z},
19 | respectively, sequentially applying multivariate least squares and residual
20 | whitening on U. The result must satisfy the following contraints:
21 | \eqn{X^\top A = 0}, \eqn{X^\top U = 0}, \eqn{Z^\top V = 0}, \eqn{U^\top U = 0}.
22 | }
23 | \keyword{internal}
24 | 


--------------------------------------------------------------------------------
/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | #----------------------------
 2 | # VS-Code related
 3 | #----------------------------
 4 | ^\.vscode$
 5 | 
 6 | #----------------------------
 7 | # R related
 8 | #----------------------------
 9 | ^.*\.Rproj$
10 | ^\.Rproj\.user$
11 | ^\.Rhistory$
12 | ^\.RData$
13 | ^\.Rproj$
14 | 
15 | #----------------------------
16 | # C++ related
17 | #----------------------------
18 | ^tests/testcpp/
19 | ^tests/testrcpp/
20 | 
21 | #----------------------------
22 | # GitHub related
23 | #----------------------------
24 | ^\.git$
25 | ^\.github$
26 | ^\.gitignore$
27 | ^\.gitattributes$
28 | ^\.svn$
29 | 
30 | #----------------------------
31 | # Other
32 | #----------------------------
33 | ^data/
34 | ^examples/
35 | ^img/
36 | ^old/
37 | ^sandbox/
38 | ^sim/
39 | ^zip/
40 | ^cran/
41 | 


--------------------------------------------------------------------------------
/man/partition.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{partition}
 4 | \alias{partition}
 5 | \title{Split the data matrix in train and test sets}
 6 | \usage{
 7 | partition(y, p = 0.3)
 8 | }
 9 | \arguments{
10 | \item{y}{input matrix to be split into train and test sets}
11 | 
12 | \item{p}{fraction of observations to be used for the test set}
13 | }
14 | \description{
15 | Returns a list of two matrices \code{train} and \code{test}.
16 | \code{train} corresponds to the input matrix with a fixed persentage of
17 | entries masked by NA values. \code{test} is the complement of \code{train}
18 | and contains the values of the input matrix in the cells where \code{train}
19 | is NA, while all the other entries are filled by NA values.
20 | }
21 | \keyword{internal}
22 | 


--------------------------------------------------------------------------------
/man/eigengap.onatski.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/eigengap.R
 3 | \name{eigengap.onatski}
 4 | \alias{eigengap.onatski}
 5 | \title{Rank selection via the Onatski method}
 6 | \usage{
 7 | eigengap.onatski(covmat, maxcomp = 50, maxiter = 100)
 8 | }
 9 | \arguments{
10 | \item{covmat}{matrix to be decomposed}
11 | 
12 | \item{maxcomp}{maximum number of eigenvalues to compute}
13 | 
14 | \item{maxiter}{maximum number of iterations}
15 | }
16 | \description{
17 | Select the number of significant principal components of a matrix via the
18 | Onatski method
19 | }
20 | \references{
21 | Onatski, A. (2010).
22 | \emph{Determining the number of factors from empirical distribution of eigenvalues.}
23 | Review of Economics and Statistics, 92(4): 1004-1016
24 | }
25 | \keyword{internal}
26 | 


--------------------------------------------------------------------------------
/man/eigengap.oht.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/eigengap.R
 3 | \name{eigengap.oht}
 4 | \alias{eigengap.oht}
 5 | \title{Rank selection via optimal hard thresholding}
 6 | \usage{
 7 | eigengap.oht(covmat, nobs, maxcomp = NULL)
 8 | }
 9 | \arguments{
10 | \item{covmat}{matrix to be decomposed}
11 | 
12 | \item{nobs}{number of observations used to compute the covariance matrix}
13 | 
14 | \item{maxcomp}{maximum number of eigenvalues to compute}
15 | }
16 | \description{
17 | Select the number of significant principal components of a matrix via optimal
18 | hard thresholding (OHT)
19 | }
20 | \references{
21 | Gavish, M., Donoho, D.L. (2014)
22 | \emph{The optimal hard thresholding for singular values is 4/sqrt(3).}
23 | IEEE Transactions on Information Theory, 60(8): 5040--5053
24 | }
25 | \keyword{internal}
26 | 


--------------------------------------------------------------------------------
/man/eigengap.act.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/eigengap.R
 3 | \name{eigengap.act}
 4 | \alias{eigengap.act}
 5 | \title{Rank selection via adjust correlation thresholding}
 6 | \usage{
 7 | eigengap.act(covmat, nobs, maxcomp = NULL)
 8 | }
 9 | \arguments{
10 | \item{covmat}{matrix to be decomposed}
11 | 
12 | \item{nobs}{number of observations used to compute the covariance matrix}
13 | 
14 | \item{maxcomp}{maximum number of eigenvalues to compute}
15 | }
16 | \description{
17 | Select the number of significant principal components of a matrix via adjust
18 | correlation threshold (ACT)
19 | }
20 | \references{
21 | Fan, J., Guo, j. and Zheng, S. (2020).
22 | \emph{Estimating number of factors by adjusted eigenvalues thresholding.}
23 | Journal of the American Statistical Association, 117(538): 852--861
24 | }
25 | \keyword{internal}
26 | 


--------------------------------------------------------------------------------
/man/print.sgdgmf.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sgdGMF-class.R
 3 | \name{print.sgdgmf}
 4 | \alias{print.sgdgmf}
 5 | \title{Print the fundamental characteristics of a GMF}
 6 | \usage{
 7 | \method{print}{sgdgmf}(x, ...)
 8 | }
 9 | \arguments{
10 | \item{x}{an object of class \code{sgdgmf}}
11 | 
12 | \item{...}{further arguments passed to or from other methods}
13 | }
14 | \value{
15 | No return value, called only for printing.
16 | }
17 | \description{
18 | Print some summary information of a GMF model.
19 | }
20 | \examples{
21 | # Load the sgdGMF package
22 | library(sgdGMF)
23 | 
24 | # Generate data from a Poisson model
25 | data = sim.gmf.data(n = 100, m = 20, ncomp = 5, family = poisson())
26 | 
27 | # Fit a GMF model with 3 latent factors
28 | gmf = sgdgmf.fit(data$Y, ncomp = 3, family = poisson())
29 | 
30 | # Print the GMF object
31 | print(gmf)
32 | 
33 | }
34 | 


--------------------------------------------------------------------------------
/R/genericfun.R:
--------------------------------------------------------------------------------
 1 | 
 2 | #' @export
 3 | generics::refit
 4 | 
 5 | #' @title Simulate new data
 6 | #'
 7 | #' @description
 8 | #' Generic function to simulate new data from a statistical model
 9 | #'
10 | #' @param object an object from which simulate new data
11 | #' @param ... additional arguments passed to or from other methods
12 | #'
13 | #' @return An array containing the simulated data.
14 | #'
15 | #' @export
16 | simulate = function (object, ...) UseMethod("simulate")
17 | 
18 | 
19 | #' @title Store data into an object
20 | #'
21 | #' @description
22 | #' Generic function to store data into an object, typically a statistical model
23 | #'
24 | #' @param object an object from which simulate new data
25 | #' @param ... additional arguments passed to or from other methods
26 | #'
27 | #' @return An object of the same class as the input containing new data
28 | #'
29 | #' @export
30 | storedata = function (object, ...) UseMethod("storedata")
31 | 


--------------------------------------------------------------------------------
/man/print.initgmf.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/initGMF-class.R
 3 | \name{print.initgmf}
 4 | \alias{print.initgmf}
 5 | \title{Print the fundamental characteristics of an initialized GMF}
 6 | \usage{
 7 | \method{print}{initgmf}(x, ...)
 8 | }
 9 | \arguments{
10 | \item{x}{an object of class \code{initgmf}}
11 | 
12 | \item{...}{further arguments passed to or from other methods}
13 | }
14 | \value{
15 | No return value, called only for printing.
16 | }
17 | \description{
18 | Print some summary information of an initialized GMF model.
19 | }
20 | \examples{
21 | # Load the sgdGMF package
22 | library(sgdGMF)
23 | 
24 | # Generate data from a Poisson model
25 | data = sim.gmf.data(n = 100, m = 20, ncomp = 5, family = poisson())
26 | 
27 | # Fit a GMF model with 3 latent factors
28 | init = sgdgmf.init(data$Y, ncomp = 3, family = poisson())
29 | 
30 | # Print the GMF object
31 | print(init)
32 | 
33 | }
34 | 


--------------------------------------------------------------------------------
/src/Makevars:
--------------------------------------------------------------------------------
 1 | 
 2 | ## With R 3.1.0 or later, you can uncomment the following line to tell R to
 3 | ## enable compilation with C++11 (where available)
 4 | ##
 5 | ## Also, OpenMP support in Armadillo prefers C++11 support. However, for wider
 6 | ## availability of the package we do not yet enforce this here.  It is however
 7 | ## recommended for client packages to set it.
 8 | ##
 9 | ## And with R 3.4.0, and RcppArmadillo 0.7.960.*, we turn C++11 on as OpenMP
10 | ## support within Armadillo prefers / requires it
11 | ##
12 | ## R 4.0.0 made C++11 the default, R 4.1.0 switched to C++14, R 4.3.0 to C++17
13 | ## _In general_ we should no longer need to set a standard as any recent R
14 | ## installation will do the right thing. Should you need it, uncomment it and
15 | ## set the appropriate value, possibly CXX17.
16 | #CXX_STD = CXX11
17 | 
18 | CXXFLAGS = $(CXXFLAGS) -Os # -Wall
19 | PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS)
20 | PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS)
21 | 


--------------------------------------------------------------------------------
/man/simulate.sgdgmf.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sgdGMF-class.R
 3 | \name{simulate.sgdgmf}
 4 | \alias{simulate.sgdgmf}
 5 | \title{Simulate method for GMF models}
 6 | \usage{
 7 | \method{simulate}{sgdgmf}(object, ..., nsim = 1)
 8 | }
 9 | \arguments{
10 | \item{object}{an object of class \code{sgdgmf}}
11 | 
12 | \item{...}{further arguments passed to or from other methods}
13 | 
14 | \item{nsim}{number of samples}
15 | }
16 | \value{
17 | An 3-fold array containing the simulated data.
18 | }
19 | \description{
20 | Simulate new data from a fitted generalized matrix factorization models
21 | }
22 | \examples{
23 | # Load the sgdGMF package
24 | library(sgdGMF)
25 | 
26 | # Generate data from a Poisson model
27 | data = sim.gmf.data(n = 100, m = 20, ncomp = 5, family = poisson())
28 | 
29 | # Fit a GMF model
30 | gmf = sgdgmf.fit(data$Y, ncomp = 3, family = poisson())
31 | 
32 | # Simulate new data from a GMF model
33 | str(simulate(gmf))
34 | 
35 | }
36 | 


--------------------------------------------------------------------------------
/src/Makevars.win:
--------------------------------------------------------------------------------
 1 | 
 2 | ## With R 3.1.0 or later, you can uncomment the following line to tell R to
 3 | ## enable compilation with C++11 (where available)
 4 | ##
 5 | ## Also, OpenMP support in Armadillo prefers C++11 support. However, for wider
 6 | ## availability of the package we do not yet enforce this here.  It is however
 7 | ## recommended for client packages to set it.
 8 | ##
 9 | ## And with R 3.4.0, and RcppArmadillo 0.7.960.*, we turn C++11 on as OpenMP
10 | ## support within Armadillo prefers / requires it
11 | ##
12 | ## R 4.0.0 made C++11 the default, R 4.1.0 switched to C++14, R 4.3.0 to C++17
13 | ## _In general_ we should no longer need to set a standard as any recent R
14 | ## installation will do the right thing. Should you need it, uncomment it and
15 | ## set the appropriate value, possibly CXX17.
16 | #CXX_STD = CXX11
17 | 
18 | CXXFLAGS = $(CXXFLAGS) -Os # -Wall
19 | PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS)
20 | PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS)
21 | 


--------------------------------------------------------------------------------
/man/whitening.matrix.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{whitening.matrix}
 4 | \alias{whitening.matrix}
 5 | \alias{whitening.zca}
 6 | \alias{whitening.zca.cor}
 7 | \alias{whitening.pca}
 8 | \alias{whitening.pca.cor}
 9 | \alias{whitening.chol}
10 | \title{Compute the whitening matrix from a given covariance matrix}
11 | \usage{
12 | whitening.matrix(
13 |   sigma,
14 |   method = c("ZCA", "ZCA-cor", "PCA", "PCA-cor", "Cholesky")
15 | )
16 | 
17 | whitening.zca(sigma)
18 | 
19 | whitening.zca.cor(sigma)
20 | 
21 | whitening.pca(sigma)
22 | 
23 | whitening.pca.cor(sigma)
24 | 
25 | whitening.chol(sigma)
26 | }
27 | \arguments{
28 | \item{sigma}{covariance matrix.}
29 | 
30 | \item{method}{determines the type of whitening transformation.}
31 | }
32 | \description{
33 | Compute the whitening matrix from a given covariance matrix
34 | }
35 | \details{
36 | This function is an internal re-implementation of the function \code{whiteningMatrix}
37 | in the \code{whitening} package. See the original documentation to get more details.
38 | }
39 | \keyword{internal}
40 | 


--------------------------------------------------------------------------------
/R/deviance.R:
--------------------------------------------------------------------------------
 1 | 
 2 | #' @title Pointwise deviance of a GMF model
 3 | #' @description Compute the pointwise deviance for all the observations in the sample
 4 | #' @keywords internal
 5 | pointwise.deviance = function (mu, y, family = gaussian()) {
 6 |   if (length(mu) == 1) {
 7 |     mut = y
 8 |     mut[] = mu
 9 |     mu = mut
10 |   }
11 |   nona = !is.na(y)
12 |   dev = y
13 |   dev[] = NA
14 |   dev[nona] = family$dev.resids(y[nona], mu[nona], 1)
15 |   return(dev)
16 | }
17 | 
18 | #' @title Model deviance of a GMF model
19 | #' @description Compute the overall deviance averaging the contributions of all data
20 | #' @keywords internal
21 | matrix.deviance = function (mu, y, family = gaussian()) {
22 |   dev = pointwise.deviance(mu, y, family)
23 |   dev = sum(dev, na.rm = TRUE)
24 |   # dev = mean(dev, na.rm = TRUE)
25 |   return (dev)
26 | }
27 | 
28 | #' @title Frobenius penalty for the parameters of a GMF model
29 | #' @description Compute the Frobenius penalty for all the parameters in the model
30 | #' @keywords internal
31 | matrix.penalty = function (U, penalty) {
32 |   pen = sum(sweep(U**2, 2, penalty, "*"))
33 |   return (pen)
34 | }
35 | 
36 | 


--------------------------------------------------------------------------------
/man/cpp.airwls.glmstep.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/RcppExports.R
 3 | \name{cpp.airwls.glmstep}
 4 | \alias{cpp.airwls.glmstep}
 5 | \title{Compute one Fisher scoring step for GLMs}
 6 | \usage{
 7 | cpp.airwls.glmstep(
 8 |   beta,
 9 |   y,
10 |   X,
11 |   familyname,
12 |   linkname,
13 |   varfname,
14 |   offset,
15 |   weights,
16 |   penalty
17 | )
18 | }
19 | \arguments{
20 | \item{beta}{current value of the regression coefficients to be updated}
21 | 
22 | \item{y}{response vector}
23 | 
24 | \item{X}{design matrix}
25 | 
26 | \item{familyname}{model family name}
27 | 
28 | \item{linkname}{link function name}
29 | 
30 | \item{varfname}{variance function name}
31 | 
32 | \item{offset}{vector of constants to be added to the linear predictor}
33 | 
34 | \item{weights}{vector of constants non-negative weights}
35 | 
36 | \item{penalty}{penalty parameter of a ridge-type penalty}
37 | }
38 | \description{
39 | Internal function to compute one Fisher scoring step for GLMs.
40 | It constitutes the building block of the AIRWLS algorithm for the
41 | estimation of GMF models.
42 | }
43 | \keyword{internal}
44 | 


--------------------------------------------------------------------------------
/src/deviance.cpp:
--------------------------------------------------------------------------------
 1 | // deviance.h
 2 | // author: Cristian Castiglione
 3 | // creation: 28/09/2023
 4 | // last change: 28/09/2023
 5 | 
 6 | #include "deviance.h"
 7 | 
 8 | using namespace glm;
 9 | 
10 | // Pointwise deviance
11 | void deviance (
12 |     arma::mat & dev, const arma::mat & y, const arma::mat & mu, 
13 |     const std::unique_ptr<Family> & family
14 | ) {
15 |     bool anyna = !y.is_finite();
16 |     if (anyna) {
17 |         arma::uvec notna = arma::find_finite(y);
18 |         dev.elem(notna) = family->devresid(y.elem(notna), mu.elem(notna));
19 |     } else {
20 |         dev = family->devresid(y, mu);
21 |     }
22 | };
23 | 
24 | arma::mat deviance (
25 |     const arma::mat & y, const arma::mat & mu, 
26 |     const std::unique_ptr<Family> & family
27 | ) {
28 |     arma::mat dev(arma::size(y));
29 |     deviance(dev, y, mu, family);
30 |     return dev;
31 | }
32 | 
33 | // Penalty matrix
34 | void penalty (double & pen, const arma::mat & u, const arma::vec & p) {
35 |     pen = arma::accu((u % u) * arma::diagmat(p));
36 | };
37 | 
38 | double penalty (const arma::mat & u, const arma::vec & p) {
39 |     double pen;
40 |     penalty(pen, u, p);
41 |     return pen;
42 | };


--------------------------------------------------------------------------------
/tests/testcpp/test-minibatch.cpp:
--------------------------------------------------------------------------------
 1 | // minibatch.cpp
 2 | // author: Cristian Castiglione
 3 | // creation: 06/10/2023
 4 | // last change: 06/10/2023
 5 | 
 6 | #include "minibatch.h"
 7 | 
 8 | //' @keywords internal
 9 | // [[Rcpp::export("cpp.get.chunk")]]
10 | arma::uvec cpp_get_chunk (
11 |     const int & iter, const int & n, 
12 |     const int & size, const bool & randomize
13 | ) {
14 |     Chunks chunks;
15 |     chunks.set_chunks(n, size, randomize);
16 |     return chunks.get_chunk(iter);
17 | }
18 | 
19 | //' @keywords internal
20 | // [[Rcpp::export("cpp.get.chunks")]]
21 | std::list<arma::uvec> cpp_get_chunks (
22 |     const arma::uvec & iters, const int & n, 
23 |     const int & size, const bool & randomize
24 | ) {
25 |     Chunks chunks;
26 |     chunks.set_chunks(n, size, randomize);
27 |     return chunks.get_chunks(iters);
28 | }
29 | 
30 | //' @keywords internal
31 | // [[Rcpp::export("cpp.get.next")]]
32 | Rcpp::List cpp_get_next (
33 |     const int & iter, const int & n, const bool & rnd
34 | ) {
35 |     ChunkPile pile(n, rnd);
36 |     for (int h = 0; h < iter; h++) {
37 |         pile.update();
38 |     }
39 |     Rcpp::List output;
40 |     output["idx"] = pile.idx;
41 |     output["tovisit"] = pile.tovisit;
42 |     output["visited"] = pile.visited;
43 |     return output;
44 | }


--------------------------------------------------------------------------------
/tests/testthat/test-eigengap.R:
--------------------------------------------------------------------------------
 1 | # file: test-eigengap.R
 2 | # author: Cristian Castiglione
 3 | # creation: 23/03/2024
 4 | # last change: 04/10/2024
 5 | 
 6 | testthat::test_that("Rank selecion", {
 7 |   n = 100; m = 20; d = 5
 8 | 
 9 |   # Generate data using Poisson, Binomial and Gamma models
10 |   data_pois = sim.gmf.data(n = n, m = m, ncomp = d, family = poisson())
11 |   data_bin = sim.gmf.data(n = n, m = m, ncomp = d, family = binomial())
12 |   data_gam = sim.gmf.data(n = n, m = m, ncomp = d, family = Gamma(link = "log"), dispersion = 0.25)
13 | 
14 |   # Initialize the GMF parameters assuming 3 latent factors
15 |   ncomp_pois = sgdgmf.rank(data_pois$Y, family = poisson(), normalize = TRUE)
16 |   ncomp_bin = sgdgmf.rank(data_bin$Y, family = binomial(), normalize = TRUE)
17 |   ncomp_gam = sgdgmf.rank(data_gam$Y, family = Gamma(link = "log"), normalize = TRUE)
18 | 
19 |   # Output class
20 |   testthat::expect_true(is.numeric(ncomp_pois$ncomp))
21 |   testthat::expect_true(is.numeric(ncomp_bin$ncomp))
22 |   testthat::expect_true(is.numeric(ncomp_gam$ncomp))
23 | 
24 |   # Output bounds
25 |   testthat::expect_true(ncomp_pois$ncomp > 0 & ncomp_pois$ncomp <= m)
26 |   testthat::expect_true(ncomp_bin$ncomp > 0 & ncomp_bin$ncomp <= m)
27 |   testthat::expect_true(ncomp_gam$ncomp > 0 & ncomp_gam$ncomp <= m)
28 | })
29 | 


--------------------------------------------------------------------------------
/man/deviance.sgdgmf.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sgdGMF-class.R
 3 | \name{deviance.sgdgmf}
 4 | \alias{deviance.sgdgmf}
 5 | \alias{AIC.sgdgmf}
 6 | \alias{BIC.sgdgmf}
 7 | \title{Compute deviance, AIC and BIC of a GMF model}
 8 | \usage{
 9 | \method{deviance}{sgdgmf}(object, ..., normalize = FALSE)
10 | 
11 | \method{AIC}{sgdgmf}(object, ..., k = 2)
12 | 
13 | \method{BIC}{sgdgmf}(object, ...)
14 | }
15 | \arguments{
16 | \item{object}{an object of class \code{sgdgmf}}
17 | 
18 | \item{...}{further arguments passed to or from other methods}
19 | 
20 | \item{normalize}{if \code{TRUE}, normalize the result using the null-deviance}
21 | 
22 | \item{k}{the penalty parameter to be used for AIC; the default is \code{k = 2}}
23 | }
24 | \value{
25 | The value of the deviance extracted from a \code{sgdgmf} object.
26 | }
27 | \description{
28 | Compute deviance, AIC and BIC of a GMF object
29 | }
30 | \examples{
31 | # Load the sgdGMF package
32 | library(sgdGMF)
33 | 
34 | # Generate data from a Poisson model
35 | data = sim.gmf.data(n = 100, m = 20, ncomp = 5, family = poisson())
36 | 
37 | # Fit a GMF model with 3 latent factors
38 | gmf = sgdgmf.fit(data$Y, ncomp = 3, family = poisson())
39 | 
40 | # Get the GMF deviance, AIC and BIC
41 | deviance(gmf)
42 | AIC(gmf)
43 | BIC(gmf)
44 | 
45 | }
46 | 


--------------------------------------------------------------------------------
/man/vglm.fit.coef.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/vglmfit.R
 3 | \name{vglm.fit.coef}
 4 | \alias{vglm.fit.coef}
 5 | \title{Estimate the coefficients of a vector generalized linear model}
 6 | \usage{
 7 | vglm.fit.coef(
 8 |   Y,
 9 |   X,
10 |   family = gaussian(),
11 |   weights = NULL,
12 |   offset = NULL,
13 |   parallel = FALSE,
14 |   nthreads = 1,
15 |   clust = NULL
16 | )
17 | }
18 | \arguments{
19 | \item{Y}{\eqn{n \times m} matrix of response variables}
20 | 
21 | \item{X}{\eqn{n \times p} matrix of covariates}
22 | 
23 | \item{family}{a \code{glm} family (see \code{\link{family}} for more details)}
24 | 
25 | \item{weights}{\eqn{n \times m} matrix of weighting values}
26 | 
27 | \item{offset}{\eqn{n \times m} matrix of offset values}
28 | 
29 | \item{parallel}{if \code{TRUE}, allows for parallel computing using the \code{foreach} package}
30 | 
31 | \item{nthreads}{number of cores to be used in parallel (only if \code{parallel=TRUE})}
32 | 
33 | \item{clust}{registered cluster to be used for distributing the computations (only if \code{parallel=TRUE})}
34 | }
35 | \description{
36 | Estimate the coefficients of a vector generalized linear model via parallel
37 | iterative re-weighted least squares. Computations can be performed in parallel
38 | to speed up the execution.
39 | }
40 | \keyword{internal}
41 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # sgdGMF
 2 | An R package for efficient estimation of generalized matrix factorization (GMF) models [[1,2,3]](#1,#2,#3).
 3 | The package implements the adaptive stochastic gradient descent with block- and coordinate-wise sub-sampling strategies proposed in [[4]](#4).
 4 | Additionally, sgdGMF implements the alternated iterative re-weighted least squares [[1,3]](#1,#3) and diagonal-Hessian quasi-Newton [[1]](#1) algorithms.
 5 | 
 6 | ## References
 7 | <a id="1">[1]</a>
 8 | Collins, M., Dasgupta, S., Schapire, R.E. (2001).
 9 | A generalization of principal components analysis to the exponential family.
10 | Advances in neural information processing systems, 14.
11 | 
12 | <a id="2">[2]</a>
13 | Kidzinski, L., Hui, F.K.C., Warton, D.I., Hastie, T.J. (2022).
14 | Generalized Matrix Factorization: efficient algorithms for fitting generalized linear latent variable models to large data arrays.
15 | Journal of Machine Learning Research, 23(291): 1--29.
16 | 
17 | <a id="3">[3]</a>
18 | Wang, L., Carvalho, L. (2023).
19 | Deviance matrix factorization.
20 | Electronic Journal of Statistics, 17(2): 3762--3810.
21 | 
22 | <a id="4">[4]</a>
23 | Castiglione, C., Segers, A., Clement, L, Risso, D. (2024).
24 | Stochastic gradient descent estimation of generalized matrix factorization models with application to single-cell RNA sequencing data.
25 | arXiv preprint: arXiv:2412.20509.
26 | 
27 | 


--------------------------------------------------------------------------------
/man/cpp.airwls.glmfit.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/RcppExports.R
 3 | \name{cpp.airwls.glmfit}
 4 | \alias{cpp.airwls.glmfit}
 5 | \title{Fisher scoring algorithm for GLMs}
 6 | \usage{
 7 | cpp.airwls.glmfit(
 8 |   beta,
 9 |   y,
10 |   X,
11 |   familyname,
12 |   linkname,
13 |   varfname,
14 |   offset,
15 |   weights,
16 |   penalty,
17 |   nsteps = 100L,
18 |   stepsize = 0.1,
19 |   print = FALSE
20 | )
21 | }
22 | \arguments{
23 | \item{beta}{initial value of the regression coefficients to be estimated}
24 | 
25 | \item{y}{response vector}
26 | 
27 | \item{X}{design matrix}
28 | 
29 | \item{familyname}{model family name}
30 | 
31 | \item{linkname}{link function name}
32 | 
33 | \item{varfname}{variance function name}
34 | 
35 | \item{offset}{vector of constants to be added to the linear predictor}
36 | 
37 | \item{weights}{vector of constants non-negative weights}
38 | 
39 | \item{penalty}{penalty parameter of a ridge-type penalty}
40 | 
41 | \item{nsteps}{number of iterations}
42 | 
43 | \item{stepsize}{stepsize parameter of the Fisher scoring algorithm}
44 | 
45 | \item{print}{if \code{TRUE}, print the algorithm history}
46 | }
47 | \description{
48 | Internal function implementing the Fisher scoring algorithms for the
49 | estimation of GLMs. It is used in the AIRWLS algorithm for the 
50 | estimation of GMF models.
51 | }
52 | \keyword{internal}
53 | 


--------------------------------------------------------------------------------
/tests/testrcpp/test-minibatch.R:
--------------------------------------------------------------------------------
 1 | # test-minibatch.R
 2 | # author: Cristian Castiglione
 3 | # creation: 07/10/2023
 4 | # last change: 07/10/2023
 5 | 
 6 | ## Workspace setup ----
 7 | rm(list = ls())
 8 | graphics.off()
 9 | 
10 | # Package compilation and import
11 | devtools::load_all()
12 | 
13 | 
14 | r_get_next = function (iter, n, rnd) {
15 |   idx = -1
16 |   tovisit = seq(from = 0, to = n-1, by = 1)
17 |   visited = c()
18 |   if (iter > 0) {
19 |     for (i in 1:iter) {
20 |       if (length(tovisit) == 0) {
21 |         tovisit = visited
22 |         visited = c()
23 |       }
24 |       if (rnd) {
25 |         idx = sample(tovisit, 1, replace = FALSE)
26 |       } else {
27 |         idx = tovisit[1]
28 |       }
29 |       j = which(tovisit == idx)
30 |       tovisit = tovisit[-j]
31 |       visited = c(visited, idx)
32 |     }
33 |   }
34 |   list(idx = idx, tovisit = tovisit, visited = visited)
35 | }
36 | 
37 | 
38 | ## Test: get_chunk() ----
39 | sgdGMF::c_get_chunk(3, 10, 3, FALSE)
40 | 
41 | ## Test: get_chunks() ----
42 | sgdGMF::c_get_chunks(0:5, 10, 3, TRUE)
43 | 
44 | ## Test: get_next() ----
45 | print.pile = function (pile) {
46 |   cat("idx =", pile$idx, "\n")
47 |   cat("tovisit =", drop(pile$tovisit), "\n")
48 |   cat("visited =", drop(pile$visited), "\n")
49 | }
50 | 
51 | k = 0
52 | print.pile(        r_get_next(k, 5, FALSE))
53 | print.pile(sgdGMF::c_get_next(k, 5, FALSE))
54 | k = k+1
55 | 
56 | ## End of file ----
57 | 


--------------------------------------------------------------------------------
/man/fitted.sgdgmf.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sgdGMF-class.R
 3 | \name{fitted.sgdgmf}
 4 | \alias{fitted.sgdgmf}
 5 | \title{Extract the fitted values of a GMF models}
 6 | \usage{
 7 | \method{fitted}{sgdgmf}(object, ..., type = c("link", "response", "terms"), partial = FALSE)
 8 | }
 9 | \arguments{
10 | \item{object}{an object of class \code{sgdgmf}}
11 | 
12 | \item{...}{further arguments passed to or from other methods}
13 | 
14 | \item{type}{the type of fitted values which should be returned}
15 | 
16 | \item{partial}{if \code{TRUE}, returns the partial fitted values}
17 | }
18 | \value{
19 | If \code{type="terms"}, a list of fitted values containing the fields \code{XB},
20 | \code{AZ} and \code{UV}. Otherwise, a matrix of fitted values in the link or
21 | response scale, depending on the selected \code{type}.
22 | }
23 | \description{
24 | Computes the fitted values of a GMF model.
25 | }
26 | \examples{
27 | # Load the sgdGMF package
28 | library(sgdGMF)
29 | 
30 | # Generate data from a Poisson model
31 | data = sim.gmf.data(n = 100, m = 20, ncomp = 5, family = poisson())
32 | 
33 | # Fit a GMF model with 3 latent factors
34 | gmf = sgdgmf.fit(data$Y, ncomp = 3, family = poisson())
35 | 
36 | # Get the fitted values of a GMF model
37 | str(fitted(gmf)) # returns the overall fitted values in link scale
38 | str(fitted(gmf, type = "response")) # returns the overall fitted values in response scale
39 | 
40 | }
41 | 


--------------------------------------------------------------------------------
/man/deviance.initgmf.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/initGMF-class.R
 3 | \name{deviance.initgmf}
 4 | \alias{deviance.initgmf}
 5 | \alias{AIC.initgmf}
 6 | \alias{BIC.initgmf}
 7 | \title{Compute deviance, AIC and BIC of an initialized GMF model}
 8 | \usage{
 9 | \method{deviance}{initgmf}(object, ..., normalize = FALSE)
10 | 
11 | \method{AIC}{initgmf}(object, ..., k = 2)
12 | 
13 | \method{BIC}{initgmf}(object, ...)
14 | }
15 | \arguments{
16 | \item{object}{an object of class \code{initgmf}}
17 | 
18 | \item{...}{further arguments passed to or from other methods}
19 | 
20 | \item{normalize}{if \code{TRUE}, normalize the result using the null-deviance}
21 | 
22 | \item{k}{the penalty parameter to be used for AIC; the default is \code{k = 2}}
23 | }
24 | \value{
25 | The value of the deviance extracted from a \code{initgmf} object.
26 | }
27 | \description{
28 | Compute deviance, AIC and BIC of an initialized GMF object
29 | }
30 | \examples{
31 | # Load the sgdGMF package
32 | library(sgdGMF)
33 | 
34 | # Generate data from a Poisson model
35 | data = sim.gmf.data(n = 100, m = 20, ncomp = 5, family = poisson())
36 | 
37 | # Fit a GMF model with 3 latent factors
38 | init = sgdgmf.init(data$Y, ncomp = 3, family = poisson())
39 | 
40 | # Get the GMF deviance, AIC and BIC
41 | deviance(init)
42 | AIC(init)
43 | BIC(init)
44 | 
45 | }
46 | \seealso{
47 | \code{\link{deviance.sgdgmf}}, \code{\link{AIC.sgdgmf}} and \code{\link{AIC.sgdgmf}}.
48 | }
49 | 


--------------------------------------------------------------------------------
/.github/workflows/R-CMD-check.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   push:
 5 |     branches: [main, master]
 6 |   pull_request:
 7 |     branches: [main, master]
 8 | 
 9 | name: R-CMD-check
10 | 
11 | jobs:
12 |   R-CMD-check:
13 |     runs-on: ${{ matrix.config.os }}
14 | 
15 |     name: ${{ matrix.config.os }} (${{ matrix.config.r }})
16 | 
17 |     strategy:
18 |       fail-fast: false
19 |       matrix:
20 |         config:
21 |           - {os: macOS-latest,   r: 'release'}
22 |           - {os: windows-latest, r: 'release'}
23 |           - {os: ubuntu-latest,   r: 'devel', http-user-agent: 'release'}
24 |           - {os: ubuntu-latest,   r: 'release'}
25 |           - {os: ubuntu-latest,   r: 'oldrel-1'}
26 | 
27 |     env:
28 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
29 |       R_KEEP_PKG_SOURCE: yes
30 | 
31 |     steps:
32 |       - uses: actions/checkout@v2
33 | 
34 |       - uses: r-lib/actions/setup-pandoc@v2
35 | 
36 |       - uses: r-lib/actions/setup-r@v2
37 |         with:
38 |           r-version: ${{ matrix.config.r }}
39 |           http-user-agent: ${{ matrix.config.http-user-agent }}
40 |           use-public-rspm: true
41 | 
42 |       - uses: r-lib/actions/setup-r-dependencies@v2
43 |         with:
44 |           extra-packages: any::rcmdcheck
45 |           needs: check
46 | 
47 |       - uses: r-lib/actions/check-r-package@v2
48 |         with:
49 |           upload-snapshots: true
50 | 


--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
 1 | # sgdGMF 1.0.2
 2 | * `sgdgmf.fit` : implemented orthogonality between covariates and latent variables
 3 | * `orthogonalize` (new function) : implemented orthogonality between covariates and latent variables
 4 | * `sgdgmf.fit` : implemented the possibility to not save a copy of the data and fitted values
 5 | * `set.control.airwls` : introduced new argument `savedata` to specify of store a copy of the data or not
 6 | * `set.control.newton` : introduced new argument `savedata` to specify of store a copy of the data or not
 7 | * `set.control.coord.sgd` : introduced new argument `savedata` to specify if store a copy of the data or not
 8 | * `set.control.block.sgd` : introduced new argument `savedata` to specify if store a copy of the data or not
 9 | * `storedata` (new function) : implemented ex-post inclusion of data in a generic object
10 | * `storedata.sgdgmf` (new method) : implemented ex-post inclusion of data in a fitted `sgdgmf` object
11 | * `sgdgmf.init` : implemented `method = "light"` and improved the memory usage
12 | * `sgdgmf.init.light` (new function) : implemented a memory efficient version of `sgdgmf.init.ols` with `type = "link"`
13 | 
14 | # sgdGMF 1.0.1
15 | 
16 | * `sgdgmf.rank` : changed default method from `"onatski"` to `"evr"` method
17 | * `eigengap.evr` (new function) : implemented the eigenvalue ratio method for rank selection
18 | * `eigengap.onatski` : fixed bug occurring when no optimal rank can be selected
19 | * added option `CXXFLAGS = $(CXXFLAGS) -Os` to `Makevars` and `Makevars.win` files to optimize the memory space used by compiled C++ files
20 | 


--------------------------------------------------------------------------------
/man/fitted.initgmf.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/initGMF-class.R
 3 | \name{fitted.initgmf}
 4 | \alias{fitted.initgmf}
 5 | \title{Extract the fitted values of an initialized GMF model}
 6 | \usage{
 7 | \method{fitted}{initgmf}(object, ..., type = c("link", "response", "terms"), partial = FALSE)
 8 | }
 9 | \arguments{
10 | \item{object}{an object of class \code{initgmf}}
11 | 
12 | \item{...}{further arguments passed to or from other methods}
13 | 
14 | \item{type}{the type of fitted values which should be returned}
15 | 
16 | \item{partial}{if \code{TRUE}, returns the partial fitted values}
17 | }
18 | \value{
19 | If \code{type="terms"}, a list of fitted values containing the fields \code{XB},
20 | \code{AZ} and \code{UV}. Otherwise, a matrix of fitted values in the link or
21 | response scale, depending on the selected \code{type}.
22 | }
23 | \description{
24 | Computes the fitted values of an initialized GMF model.
25 | }
26 | \examples{
27 | # Load the sgdGMF package
28 | library(sgdGMF)
29 | 
30 | # Generate data from a Poisson model
31 | data = sim.gmf.data(n = 100, m = 20, ncomp = 5, family = poisson())
32 | 
33 | # Fit a GMF model with 3 latent factors
34 | init = sgdgmf.init(data$Y, ncomp = 3, family = poisson())
35 | 
36 | # Get the fitted values of a GMF model
37 | str(fitted(init)) # returns the overall fitted values in link scale
38 | str(fitted(init, type = "response")) # returns the overall fitted values in response scale
39 | str(fitted(init, partial = TRUE)) # returns the partial fitted values in link scale
40 | 
41 | }
42 | \seealso{
43 | \code{\link{fitted.sgdgmf}}.
44 | }
45 | 


--------------------------------------------------------------------------------
/man/orthogonalize.uv.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{orthogonalize.uv}
 4 | \alias{orthogonalize.uv}
 5 | \alias{orthogonalize.svd}
 6 | \alias{orthogonalize.qr}
 7 | \alias{orthogonalize.std}
 8 | \title{Normalize the matrices U and V}
 9 | \usage{
10 | orthogonalize.uv(
11 |   U,
12 |   V,
13 |   method = c("QR", "SVD", "ZCA", "ZCA-cor", "PCA", "PCA-cor", "Cholesky")
14 | )
15 | 
16 | orthogonalize.svd(U, V)
17 | 
18 | orthogonalize.qr(U, V)
19 | 
20 | orthogonalize.std(U, V, method)
21 | }
22 | \description{
23 | Rotate U and V using either QR or SVD decompositions.
24 | }
25 | \details{
26 | Orthogonalization is implemented using the following methods:
27 | \itemize{
28 |   \item \code{method = "SVD"}: orthogonal \eqn{U} and scaled orthogonal \eqn{V} based on SVD decomposition;
29 |   \item \code{method = "QR"}: orthogonal \eqn{U} and lower triangular \eqn{V} based on QR decomposition;
30 |   \item \code{method = "ZCA"}: standardized \eqn{U} and lower triangular \eqn{V} based on ZCA whitening and QR decomposition;
31 |   \item \code{method = "ZCA-cor"}: uncorrelated \eqn{U} and lower triangular \eqn{V} based on ZCA whitening and QR decomposition;
32 |   \item \code{method = "PCA"}: standardized \eqn{U} and lower triangular \eqn{V} based on PCA whitening and QR decomposition;
33 |   \item \code{method = "PCA-cor"}: uncorrelated \eqn{U} and lower triangular \eqn{V} based on PCA whitening and QR decomposition;
34 |   \item \code{method = "Cholesky"}: standardized \eqn{U} and lower triangular \eqn{V} based on Cholesky whitening and QR decomposition.
35 | }
36 | }
37 | \keyword{internal}
38 | 


--------------------------------------------------------------------------------
/man/coefficients.sgdgmf.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sgdGMF-class.R
 3 | \name{coefficients.sgdgmf}
 4 | \alias{coefficients.sgdgmf}
 5 | \alias{coef.sgdgmf}
 6 | \title{Extract the coefficient of a GMF model}
 7 | \usage{
 8 | \method{coefficients}{sgdgmf}(
 9 |   object,
10 |   ...,
11 |   type = c("all", "colreg", "rowreg", "scores", "loadings")
12 | )
13 | 
14 | \method{coef}{sgdgmf}(object, ..., type = c("all", "colreg", "rowreg", "scores", "loadings"))
15 | }
16 | \arguments{
17 | \item{object}{an object of class \code{sgdgmf}}
18 | 
19 | \item{...}{further arguments passed to or from other methods}
20 | 
21 | \item{type}{the type of coefficients which should be returned}
22 | }
23 | \value{
24 | If \code{type="all"}, a list of coefficients containing the fields \code{B}, \code{A}, \code{U} and \code{V}.
25 | Otherwise, a matrix of coefficients, corresponding to the selected \code{type}.
26 | }
27 | \description{
28 | Return the estimated coefficients of a GMF model, i.e., the row- and column-specific
29 | regression effects, the latent scores and loadings.
30 | }
31 | \examples{
32 | # Load the sgdGMF package
33 | library(sgdGMF)
34 | 
35 | # Generate data from a Poisson model
36 | data = sim.gmf.data(n = 100, m = 20, ncomp = 5, family = poisson())
37 | 
38 | # Fit a GMF model with 3 latent factors
39 | gmf = sgdgmf.fit(data$Y, ncomp = 3, family = poisson())
40 | 
41 | # Get the estimated coefficients of a GMF model
42 | str(coefficients(gmf)) # returns all the coefficients
43 | str(coefficients(gmf, type = "scores")) # returns only the scores, say U
44 | str(coefficients(gmf, type = "loadings")) # returns only the loadings, say V
45 | 
46 | }
47 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # History files
  2 | .Rhistory
  3 | .Rapp.history
  4 | 
  5 | # Session Data files
  6 | .RData
  7 | .RDataTmp
  8 | 
  9 | # User-specific files
 10 | .Ruserdata
 11 | 
 12 | # Example code in package build process
 13 | *-Ex.R
 14 | 
 15 | # Output files from R CMD build
 16 | /*.tar.gz
 17 | 
 18 | # Output files from R CMD check
 19 | /*.Rcheck/
 20 | 
 21 | # RStudio files
 22 | .Rproj.user
 23 | .Rproj.user/
 24 | .Rproj.user/**
 25 | 
 26 | # produced vignettes
 27 | vignettes/*.html
 28 | vignettes/*.pdf
 29 | 
 30 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
 31 | .httr-oauth
 32 | 
 33 | # knitr and R markdown default cache directories
 34 | *_cache/
 35 | /cache/
 36 | 
 37 | # Temporary files created by R markdown
 38 | *.utf8.md
 39 | *.knit.md
 40 | 
 41 | # R Environment Variables
 42 | .Renviron
 43 | 
 44 | # pkgdown site
 45 | docs/
 46 | 
 47 | # translation temp files
 48 | po/*~
 49 | 
 50 | # RStudio Connect folder
 51 | rsconnect/
 52 | 
 53 | # R package: bookdown caching files
 54 | /*_files/
 55 | 
 56 | # C++
 57 | *.o
 58 | *.so
 59 | *.dll
 60 | src/*.o
 61 | src/*.so
 62 | src/*.dll
 63 | 
 64 | # Rcpp
 65 | Rcpp.pro
 66 | Rcpp.pro.user
 67 | *.autosave
 68 | 
 69 | # VS-Code
 70 | .vscode
 71 | .vscode/
 72 | 
 73 | # Image folder
 74 | img
 75 | img/
 76 | img/**
 77 | 
 78 | # Old folder
 79 | old
 80 | old/
 81 | old/**
 82 | 
 83 | # Simulation folder
 84 | sim
 85 | sim/
 86 | sim/**
 87 | 
 88 | # Sandbox folder
 89 | sandbox
 90 | sandbox/
 91 | sandbox/**
 92 | 
 93 | # Zip archive folder
 94 | zip
 95 | zip/
 96 | zip/**
 97 | 
 98 | # CRAN related folder
 99 | cran
100 | cran/
101 | cran/**
102 | 
103 | # Compressed files
104 | *.zip
105 | inst/doc
106 | 


--------------------------------------------------------------------------------
/man/cpp.airwls.update.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/RcppExports.R
 3 | \name{cpp.airwls.update}
 4 | \alias{cpp.airwls.update}
 5 | \title{AIRWLS update for GMF models}
 6 | \usage{
 7 | cpp.airwls.update(
 8 |   beta,
 9 |   Y,
10 |   X,
11 |   familyname,
12 |   linkname,
13 |   varfname,
14 |   idx,
15 |   offset,
16 |   weights,
17 |   penalty,
18 |   transp = FALSE,
19 |   nsteps = 100L,
20 |   stepsize = 0.1,
21 |   print = FALSE,
22 |   parallel = FALSE,
23 |   nthreads = 1L
24 | )
25 | }
26 | \arguments{
27 | \item{beta}{initial value of the regression coefficients to be estimated}
28 | 
29 | \item{Y}{response vector}
30 | 
31 | \item{X}{design matrix}
32 | 
33 | \item{familyname}{model family name}
34 | 
35 | \item{linkname}{link function name}
36 | 
37 | \item{varfname}{variance function name}
38 | 
39 | \item{idx}{index identifying the parameters to be updated in \code{beta}}
40 | 
41 | \item{offset}{vector of constants to be added to the linear predictor}
42 | 
43 | \item{weights}{vector of constants non-negative weights}
44 | 
45 | \item{penalty}{penalty parameter of a ridge-type penalty}
46 | 
47 | \item{transp}{if \code{TRUE}, transpose the data}
48 | 
49 | \item{nsteps}{number of iterations}
50 | 
51 | \item{stepsize}{stepsize parameter of the Fisher scoring algorithm}
52 | 
53 | \item{print}{if \code{TRUE}, print the algorithm history}
54 | 
55 | \item{parallel}{if \code{TRUE}, run the updates in parallel using \code{openMP}}
56 | 
57 | \item{nthreads}{number of threads to be run in parallel (only if \code{parallel=TRUE})}
58 | }
59 | \description{
60 | Internal function implementing one step of AIRWLS for the
61 | estimation of GMF models.
62 | }
63 | \keyword{internal}
64 | 


--------------------------------------------------------------------------------
/man/set.control.cv.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/control.R
 3 | \name{set.control.cv}
 4 | \alias{set.control.cv}
 5 | \title{Check and set the cross-validation parameters}
 6 | \usage{
 7 | set.control.cv(
 8 |   criterion = c("dev", "mae", "mse", "aic", "bic"),
 9 |   refit = TRUE,
10 |   nfolds = 5,
11 |   proportion = 0.3,
12 |   init = c("common", "separate"),
13 |   verbose = FALSE,
14 |   parallel = FALSE,
15 |   nthreads = 1
16 | )
17 | }
18 | \arguments{
19 | \item{criterion}{information criterion to minimize for selecting the matrix rank}
20 | 
21 | \item{refit}{if \code{TRUE}, refit the model with the selected rank and return the fitted model}
22 | 
23 | \item{nfolds}{number of cross-validation folds}
24 | 
25 | \item{proportion}{proportion of the data to be used as test set in each fold}
26 | 
27 | \item{init}{initialization approach to use}
28 | 
29 | \item{verbose}{if \code{TRUE}, print the cross-validation status}
30 | 
31 | \item{parallel}{if \code{TRUE}, allows for parallel computing}
32 | 
33 | \item{nthreads}{number of cores to use in parallel (only if \code{parallel=TRUE})}
34 | }
35 | \value{
36 | A \code{list} of control parameters for the cross-validation algorithm
37 | }
38 | \description{
39 | Check if the input cross-validation parameters are allowed and set them to default
40 | values if they are not. Returns a list of well-defined cross-validation parameters.
41 | }
42 | \examples{
43 | library(sgdGMF)
44 | 
45 | # Empty call
46 | set.control.cv()
47 | 
48 | # Parametrized call
49 | set.control.cv(criterion = "bic", proportion = 0.2)
50 | 
51 | }
52 | \seealso{
53 | \code{\link{set.control.init}}, \code{\link{set.control.alg}}, \code{\link{sgdgmf.cv}}
54 | }
55 | 


--------------------------------------------------------------------------------
/man/biplot.sgdgmf.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sgdGMF-class.R
 3 | \name{biplot.sgdgmf}
 4 | \alias{biplot.sgdgmf}
 5 | \title{Biplot of a GMF model}
 6 | \usage{
 7 | \method{biplot}{sgdgmf}(
 8 |   x,
 9 |   ...,
10 |   choices = 1:2,
11 |   arrange = TRUE,
12 |   byrow = FALSE,
13 |   normalize = FALSE,
14 |   labels = NULL,
15 |   palette = NULL,
16 |   titles = c(NULL, NULL)
17 | )
18 | }
19 | \arguments{
20 | \item{x}{an object of class \code{sgdgmf}}
21 | 
22 | \item{...}{further arguments passed to or from other methods}
23 | 
24 | \item{choices}{a length 2 vector specifying the components to plot}
25 | 
26 | \item{arrange}{if \code{TRUE}, return a single plot with two panels}
27 | 
28 | \item{byrow}{if \code{TRUE}, the panels are arranged row-wise (if \code{arrange=TRUE})}
29 | 
30 | \item{normalize}{if \code{TRUE}, orthogonalizes the scores using SVD}
31 | 
32 | \item{labels}{a vector of labels which should be plotted}
33 | 
34 | \item{palette}{the color-palette which should be used}
35 | 
36 | \item{titles}{a 2-dimensional string vector containing the plot titles}
37 | }
38 | \value{
39 | If \code{arrange=TRUE}, a single ggplot object with the selected biplots,
40 | otherwise, a list of two ggplot objects showing the row and column latent variables.
41 | }
42 | \description{
43 | Plot the observations on a two-dimensional projection determined by the
44 | estimated score matrix
45 | }
46 | \examples{
47 | \donttest{# Load the sgdGMF package
48 | library(sgdGMF)
49 | 
50 | # Generate data from a Poisson model
51 | data = sim.gmf.data(n = 100, m = 20, ncomp = 5, family = poisson())
52 | 
53 | # Fit a GMF model
54 | gmf = sgdgmf.fit(data$Y, ncomp = 3, family = poisson())
55 | 
56 | # Get the biplot of a GMF model
57 | biplot(gmf)
58 | }
59 | }
60 | 


--------------------------------------------------------------------------------
/man/coefficients.initgmf.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/initGMF-class.R
 3 | \name{coefficients.initgmf}
 4 | \alias{coefficients.initgmf}
 5 | \alias{coef.initgmf}
 6 | \title{Extract the coefficient of an initialized GMF model}
 7 | \usage{
 8 | \method{coefficients}{initgmf}(
 9 |   object,
10 |   ...,
11 |   type = c("all", "colreg", "rowreg", "scores", "loadings")
12 | )
13 | 
14 | \method{coef}{initgmf}(object, ..., type = c("all", "colreg", "rowreg", "scores", "loadings"))
15 | }
16 | \arguments{
17 | \item{object}{an object of class \code{initgmf}}
18 | 
19 | \item{...}{further arguments passed to or from other methods}
20 | 
21 | \item{type}{the type of coefficients which should be returned}
22 | }
23 | \value{
24 | If \code{type="all"}, a list of coefficients containing the fields \code{B}, \code{A}, \code{U} and \code{V}.
25 | Otherwise, a matrix of coefficients, corresponding to the selected \code{type}.
26 | }
27 | \description{
28 | Return the initialized coefficients of a GMF model, i.e., the row- and column-specific
29 | regression effects, the latent scores and loadings.
30 | }
31 | \examples{
32 | # Load the sgdGMF package
33 | library(sgdGMF)
34 | 
35 | # Generate data from a Poisson model
36 | data = sim.gmf.data(n = 100, m = 20, ncomp = 5, family = poisson())
37 | 
38 | # Fit a GMF model with 3 latent factors
39 | init = sgdgmf.init(data$Y, ncomp = 3, family = poisson())
40 | 
41 | # Get the estimated coefficients of a GMF model
42 | str(coefficients(init)) # returns all the coefficients
43 | str(coefficients(init, type = "scores")) # returns only the scores, say U
44 | str(coefficients(init, type = "loadings")) # returns only the loadings, say V
45 | 
46 | }
47 | \seealso{
48 | \code{\link{coefficients.sgdgmf}} and \code{\link{coef.sgdgmf}}.
49 | }
50 | 


--------------------------------------------------------------------------------
/man/biplot.initgmf.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/initGMF-class.R
 3 | \name{biplot.initgmf}
 4 | \alias{biplot.initgmf}
 5 | \title{Biplot of an initialized GMF model}
 6 | \usage{
 7 | \method{biplot}{initgmf}(
 8 |   x,
 9 |   ...,
10 |   choices = 1:2,
11 |   arrange = TRUE,
12 |   byrow = FALSE,
13 |   normalize = FALSE,
14 |   labels = NULL,
15 |   palette = NULL
16 | )
17 | }
18 | \arguments{
19 | \item{x}{an object of class \code{initgmf}}
20 | 
21 | \item{...}{further arguments passed to or from other methods}
22 | 
23 | \item{choices}{a length 2 vector specifying the components to plot}
24 | 
25 | \item{arrange}{if \code{TRUE}, return a single plot with two panels}
26 | 
27 | \item{byrow}{if \code{TRUE}, the panels are arranged row-wise (if \code{arrange=TRUE})}
28 | 
29 | \item{normalize}{if \code{TRUE}, orthogonalizes the scores using SVD}
30 | 
31 | \item{labels}{a vector of labels which should be plotted}
32 | 
33 | \item{palette}{the color-palette which should be used}
34 | }
35 | \value{
36 | If \code{arrange=TRUE}, a single ggplot object with the selected biplots,
37 | otherwise, a list of two ggplot objects showing the row and column latent variables.
38 | }
39 | \description{
40 | Plot the observations on a two-dimensional projection determined by the
41 | estimated score matrix
42 | }
43 | \examples{
44 | \donttest{# Load the sgdGMF package
45 | library(sgdGMF)
46 | 
47 | # Generate data from a Poisson model
48 | data = sim.gmf.data(n = 100, m = 20, ncomp = 5, family = poisson())
49 | 
50 | # Fit a GMF model
51 | init = sgdgmf.init(data$Y, ncomp = 3, family = poisson())
52 | 
53 | # Get the biplot of a GMF model
54 | biplot(init) # 1st vs 2nd principal components
55 | biplot(init, choices = 2:3) #2nd vs 3rd principal components
56 | }
57 | }
58 | \seealso{
59 | \code{\link{biplot.sgdgmf}}.
60 | }
61 | 


--------------------------------------------------------------------------------
/man/set.control.alg.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/control.R
 3 | \name{set.control.alg}
 4 | \alias{set.control.alg}
 5 | \title{Check and set the control parameters for the select optimization algorithm}
 6 | \usage{
 7 | set.control.alg(
 8 |   method = c("airwls", "newton", "sgd"),
 9 |   sampling = c("block", "coord", "rnd-block"),
10 |   control = list()
11 | )
12 | }
13 | \arguments{
14 | \item{method}{optimization method to use}
15 | 
16 | \item{sampling}{sub-sampling method to use}
17 | 
18 | \item{control}{list of algorithm-specific control parameters}
19 | }
20 | \value{
21 | A \code{list} of control parameters for the selected estimation algorithm
22 | }
23 | \description{
24 | Check if the input control parameters are allowed and set them to default
25 | values if they are not. Returns a list of well-defined control parameters.
26 | }
27 | \details{
28 | It is not necessary to provide a complete list of control parameters, one can
29 | just specify a list containing the parameters he/she needs to change from the
30 | default values. Wrongly specified parameters are ignored or set to default values.
31 | For a detailed description of all the algorithm-specific control parameters,
32 | please refer to
33 | \code{\link{set.control.airwls}} (\code{method="airwls"}),
34 | \code{\link{set.control.newton}} (\code{method="newton"}),
35 | \code{\link{set.control.block.sgd}} (\code{method="sgd"}, \code{sampling="block"}).
36 | \code{\link{set.control.coord.sgd}} (\code{method="sgd"}, \code{sampling="coord"}),
37 | }
38 | \examples{
39 | library(sgdGMF)
40 | 
41 | # Empty call
42 | set.control.alg()
43 | 
44 | # Parametrized call
45 | set.control.alg(method = "airwls", control = list(maxiter = 200, stepsize = 0.3))
46 | 
47 | 
48 | }
49 | \seealso{
50 | \code{\link{set.control.init}}, \code{\link{set.control.cv}}, \code{\link{sgdgmf.fit}}
51 | }
52 | 


--------------------------------------------------------------------------------
/man/sgdGMF-package.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sgdGMF-package.R
 3 | \docType{package}
 4 | \name{sgdGMF-package}
 5 | \alias{sgdGMF}
 6 | \alias{sgdGMF-package}
 7 | \title{sgdGMF: Estimation of Generalized Matrix Factorization Models via Stochastic Gradient Descent}
 8 | \description{
 9 | Efficient framework to estimate high-dimensional generalized matrix factorization models using penalized maximum likelihood under a dispersion exponential family specification. Either deterministic and stochastic methods are implemented for the numerical maximization. In particular, the package implements the stochastic gradient descent algorithm with a block-wise mini-batch strategy to speed up the computations and an efficient adaptive learning rate schedule to stabilize the convergence. All the theoretical details can be found in Castiglione et al. (2024, \doi{10.48550/arXiv.2412.20509}). Other methods considered for the optimization are the alternated iterative re-weighted least squares and the quasi-Newton method with diagonal approximation of the Fisher information matrix discussed in Kidzinski et al. (2022, \url{http://jmlr.org/papers/v23/20-1104.html}).
10 | }
11 | \seealso{
12 | Useful links:
13 | \itemize{
14 |   \item \url{https://github.com/CristianCastiglione/sgdGMF}
15 |   \item Report bugs at \url{https://github.com/CristianCastiglione/sgdGMF/issues}
16 | }
17 | 
18 | }
19 | \author{
20 | \strong{Maintainer}: Cristian Castiglione \email{cristian_castiglione@libero.it} (\href{https://orcid.org/0000-0001-5883-4890}{ORCID})
21 | 
22 | Other contributors:
23 | \itemize{
24 |   \item Davide Risso \email{davide.risso@unipd.it} (\href{https://orcid.org/0000-0001-8508-5012}{ORCID}) [contributor]
25 |   \item Alexandre Segers \email{alexandre.segers@ugent.be} (\href{https://orcid.org/0009-0004-2028-7595}{ORCID}) [contributor]
26 | }
27 | 
28 | }
29 | \keyword{internal}
30 | 


--------------------------------------------------------------------------------
/man/storedata.sgdgmf.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sgdGMF-class.R
 3 | \name{storedata.sgdgmf}
 4 | \alias{storedata.sgdgmf}
 5 | \title{Save response and covariate data into an empty sgdGMF object}
 6 | \usage{
 7 | \method{storedata}{sgdgmf}(object, ..., Y = NULL, X = NULL, Z = NULL)
 8 | }
 9 | \arguments{
10 | \item{object}{an object of class \code{sgdgmf}}
11 | 
12 | \item{...}{further arguments passed to or from other methods}
13 | 
14 | \item{Y}{matrix of responses (\eqn{n \times m})}
15 | 
16 | \item{X}{matrix of row fixed effects (\eqn{n \times p})}
17 | 
18 | \item{Z}{matrix of column fixed effects (\eqn{q \times m})}
19 | }
20 | \value{
21 | A \code{sgdgmf} object containing a copy of the data
22 | }
23 | \description{
24 | Save response and covariate data into an empty sgdGMF object
25 | }
26 | \examples{
27 | \donttest{# Load the sgdGMF package
28 | library(sgdGMF)
29 | 
30 | # Generate data from a Poisson model
31 | data = sim.gmf.data(n = 100, m = 20, ncomp = 5, family = poisson())
32 | 
33 | # Fit a GMF model without storing a copy of the data
34 | gmf = sgdgmf.fit(data$Y, ncomp = 3, family = poisson(),
35 |                  control.alg = list(savedata = FALSE))
36 | 
37 | cat("savedata:", gmf$control.alg$savedata, "\n")
38 | cat("Is Y null?", is.null(gmf$Y), "\n")
39 | cat("Is X null?", is.null(gmf$X), "\n")
40 | cat("Is Z null?", is.null(gmf$Z), "\n")
41 | cat("Is eta null?", is.null(gmf$eta), "\n")
42 | cat("Is mu null?", is.null(gmf$mu), "\n")
43 | cat("Is var null?", is.null(gmf$var), "\n")
44 | 
45 | # Store the data in the GMF object a posteriori
46 | gmf = storedata(gmf, Y = data$Y)
47 | 
48 | cat("savedata:", gmf$control.alg$savedata, "\n")
49 | cat("Y:", dim(gmf$Y), "\n")
50 | cat("X:", dim(gmf$X), "\n")
51 | cat("Z:", dim(gmf$Z), "\n")
52 | cat("eta:", dim(gmf$eta), "\n")
53 | cat("mu:", dim(gmf$mu), "\n")
54 | cat("var:", dim(gmf$var), "\n")
55 | }
56 | }
57 | 


--------------------------------------------------------------------------------
/man/screeplot.sgdgmf.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sgdGMF-class.R
 3 | \name{screeplot.sgdgmf}
 4 | \alias{screeplot.sgdgmf}
 5 | \title{Screeplot for the residuals of a GMF model}
 6 | \usage{
 7 | \method{screeplot}{sgdgmf}(
 8 |   x,
 9 |   ...,
10 |   ncomp = 20,
11 |   type = c("deviance", "pearson", "working", "response", "link"),
12 |   partial = FALSE,
13 |   normalize = FALSE,
14 |   cumulative = FALSE,
15 |   proportion = FALSE
16 | )
17 | }
18 | \arguments{
19 | \item{x}{an object of class \code{sgdgmf}}
20 | 
21 | \item{...}{further arguments passed to or from other methods}
22 | 
23 | \item{ncomp}{number of components to be plotted}
24 | 
25 | \item{type}{the type of residuals which should be used}
26 | 
27 | \item{partial}{if \code{TRUE}, plots the eigenvalues of the partial residuals}
28 | 
29 | \item{normalize}{if \code{TRUE}, plots the eigenvalues of the standardized residuals}
30 | 
31 | \item{cumulative}{if \code{TRUE}, plots the cumulative sum of the eigenvalues}
32 | 
33 | \item{proportion}{if \code{TRUE}, plots the fractions of explained variance}
34 | }
35 | \value{
36 | A ggplot object showing the residual screeplot of the model.
37 | }
38 | \description{
39 | Plots the variances of the principal components of the residuals against the
40 | number of principal component.
41 | }
42 | \examples{
43 | \donttest{# Load the sgdGMF package
44 | library(sgdGMF)
45 | 
46 | # Generate data from a Poisson model
47 | data = sim.gmf.data(n = 100, m = 20, ncomp = 5, family = poisson())
48 | 
49 | # Fit a GMF model
50 | gmf = sgdgmf.fit(data$Y, ncomp = 3, family = poisson())
51 | 
52 | # Get the partial residual spectrum of a GMF model
53 | screeplot(gmf) # screeplot of the var-cov matrix of the deviance residuals
54 | screeplot(gmf, partial = TRUE) # screeplot of the partial residuals
55 | screeplot(gmf, cumulative = TRUE) # cumulative screeplot
56 | screeplot(gmf, proportion = TRUE) # proportion of explained residual variance
57 | }
58 | }
59 | 


--------------------------------------------------------------------------------
/tests/testrcpp/test-newton.R:
--------------------------------------------------------------------------------
 1 | # test-newton.R
 2 | # author: Cristian Castiglione
 3 | # creation: 02/10/2023
 4 | # last change: 04/10/2023
 5 | 
 6 | ## Workspace setup ----
 7 | rm(list = ls())
 8 | graphics.off()
 9 | 
10 | # Package compilation and import
11 | devtools::load_all()
12 | 
13 | ## Test: synthetic data ----
14 | n = 100
15 | m = 20
16 | d = 3
17 | p = 3
18 | q = 4
19 | 
20 | 
21 | 
22 | family = poisson()
23 | 
24 | X = matrix(rnorm(n*p), nrow = n, ncol = p) / sqrt(3)
25 | B = matrix(rnorm(m*p), nrow = m, ncol = p) / sqrt(3)
26 | A = matrix(rnorm(n*q), nrow = n, ncol = q) / sqrt(3)
27 | Z = matrix(rnorm(m*q), nrow = m, ncol = q) / sqrt(3)
28 | U = matrix(rnorm(n*d), nrow = n, ncol = d) / sqrt(3)
29 | V = matrix(rnorm(m*d), nrow = m, ncol = d) / sqrt(3)
30 | 
31 | eta = tcrossprod(cbind(X, A, U), cbind(B, Z, V))
32 | mu = family$linkinv(eta)
33 | 
34 | Y = matrix(rpois(n*m, mu), nrow = n, ncol = m)
35 | 
36 | plot3D::image2D(log1p(Y))
37 | 
38 | logY = log(Y + 0.1)
39 | B0 = t(solve(crossprod(X), crossprod(X, logY)))
40 | A0 = t(solve(crossprod(Z), crossprod(Z, t(logY - tcrossprod(X, B0)))))
41 | UV = svd::propack.svd(logY - tcrossprod(cbind(X, A0), cbind(B0, Z)), neig = d)
42 | U0 = UV$u %*% diag(sqrt(UV$d))
43 | V0 = UV$v %*% diag(sqrt(UV$d))
44 | 
45 | cfit = sgdGMF::c_fit_newton(
46 |   Y = Y, X = X, B = B0, A = A0, Z = Z, U = U0, V = V0,
47 |   familyname = "poisson", linkname = "log", ncomp = d,
48 |   lambda = c(0, 0, 1, 0), maxiter = 500, stepsize = 0.1,
49 |   tol = 1e-05, frequency = 50)
50 | 
51 | rfit = sgdGMF::sgdgmf(Y, X, Z, family = poisson(), ncomp = d,
52 |                       init = list(niter = 0),
53 |                       control = list(maxiter = 500, stepsize = 0.1))
54 | 
55 | fit$mu
56 | fit$eta
57 | fit$U
58 | fit$V
59 | tcrossprod(fit$U, fit$V)
60 | 
61 | plot(c(rfit$pred$mu), c(cfit$mu))
62 | plot(c(Y), c(cfit$mu))
63 | cor(c(rfit$pred$mu), c(cfit$mu))
64 | 
65 | plot3D::image2D(rfit$pred$mu)
66 | plot3D::image2D(cfit$mu)
67 | plot3D::image2D(Y)
68 | 
69 | all.equal(c(rfit$pred$mu), c(cfit$mu))
70 | 
71 | 
72 | 
73 | 


--------------------------------------------------------------------------------
/man/set.control.init.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/control.R
 3 | \name{set.control.init}
 4 | \alias{set.control.init}
 5 | \title{Check and set the initialization parameters for a GMF model}
 6 | \usage{
 7 | set.control.init(
 8 |   method = c("ols", "glm", "light", "random", "values"),
 9 |   type = c("deviance", "pearson", "working", "link"),
10 |   values = list(),
11 |   niter = 5,
12 |   normalize = TRUE,
13 |   verbose = FALSE,
14 |   parallel = FALSE,
15 |   nthreads = 1
16 | )
17 | }
18 | \arguments{
19 | \item{method}{initialization method (see \code{\link{sgdgmf.init}} for more details upon the initialization methods used)}
20 | 
21 | \item{type}{residual type to be decomposed (see \code{\link{sgdgmf.init}} for more details upon the residuals used)}
22 | 
23 | \item{values}{list of custom initialization parameters fixed by the user}
24 | 
25 | \item{niter}{number if refinement iterations in the \code{"svd"} method}
26 | 
27 | \item{normalize}{if \code{TRUE}, normalize \code{U} and \code{V} to orthogonal \code{U} and lower triangular \code{V}}
28 | 
29 | \item{verbose}{if \code{TRUE}, print the initialization state}
30 | 
31 | \item{parallel}{if \code{TRUE}, use parallel computing for the \code{"glm"} method}
32 | 
33 | \item{nthreads}{number of cores to be used in the \code{"glm"} method}
34 | }
35 | \value{
36 | A \code{list} of control parameters for the initialization
37 | }
38 | \description{
39 | Check if the input initialization parameters are allowed and set them to default
40 | values if they are not. Returns a list of well-defined options which specify how
41 | to initialize a GMF model. See \code{\link{sgdgmf.init}} for more details upon the methods used for initialisation.
42 | }
43 | \examples{
44 | library(sgdGMF)
45 | 
46 | # Empty call
47 | set.control.init()
48 | 
49 | # Parametrized call
50 | set.control.init(method = "glm", type = "deviance", niter = 10)
51 | 
52 | }
53 | \seealso{
54 | \code{\link{set.control.alg}}, \code{\link{set.control.cv}}, \code{\link{sgdgmf.init}}
55 | }
56 | 


--------------------------------------------------------------------------------
/man/image.sgdgmf.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sgdGMF-class.R
 3 | \name{image.sgdgmf}
 4 | \alias{image.sgdgmf}
 5 | \title{Heatmap of a GMF model}
 6 | \usage{
 7 | \method{image}{sgdgmf}(
 8 |   x,
 9 |   ...,
10 |   type = c("data", "response", "link", "scores", "loadings", "deviance", "pearson",
11 |     "working"),
12 |   resid = FALSE,
13 |   symmetric = FALSE,
14 |   transpose = FALSE,
15 |   limits = NULL,
16 |   palette = NULL
17 | )
18 | }
19 | \arguments{
20 | \item{x}{an object of class \code{sgdgmf}}
21 | 
22 | \item{...}{further arguments passed to or from other methods}
23 | 
24 | \item{type}{the type of data/predictions/residuals which should be returned}
25 | 
26 | \item{resid}{if \code{TRUE}, plots the residual values}
27 | 
28 | \item{symmetric}{if \code{TRUE}, symmetrizes the color limits}
29 | 
30 | \item{transpose}{if \code{TRUE}, transposes the matrix before plotting it}
31 | 
32 | \item{limits}{the color limits which should be used}
33 | 
34 | \item{palette}{the color-palette which should be used}
35 | }
36 | \value{
37 | A ggplot object showing the selected heatmap.
38 | }
39 | \description{
40 | Plots a heatmap of either the data, the fitted values, or the residual values
41 | of a GMF model allowing for different types of transformations and normalizations.
42 | Moreover, it also permits to plot the latent score and loading matrices.
43 | }
44 | \examples{
45 | \donttest{# Load the sgdGMF package
46 | library(sgdGMF)
47 | 
48 | # Generate data from a Poisson model
49 | data = sim.gmf.data(n = 100, m = 20, ncomp = 5, family = poisson())
50 | 
51 | # Fit a GMF model
52 | gmf = sgdgmf.fit(data$Y, ncomp = 3, family = poisson())
53 | 
54 | # Get the heatmap of a GMF model
55 | image(gmf, type = "data") # original data
56 | image(gmf, type = "response") # fitted values in response scale
57 | image(gmf, type = "scores") # estimated score matrix
58 | image(gmf, type = "loadings") # estimated loading matrix
59 | image(gmf, type = "deviance", resid = TRUE) # deviance residual matrix
60 | }
61 | }
62 | 


--------------------------------------------------------------------------------
/man/screeplot.initgmf.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/initGMF-class.R
 3 | \name{screeplot.initgmf}
 4 | \alias{screeplot.initgmf}
 5 | \title{Screeplot for the residuals of an initialized GMF model}
 6 | \usage{
 7 | \method{screeplot}{initgmf}(
 8 |   x,
 9 |   ...,
10 |   ncomp = 20,
11 |   type = c("deviance", "pearson", "working", "response", "link"),
12 |   partial = FALSE,
13 |   normalize = FALSE,
14 |   cumulative = FALSE,
15 |   proportion = FALSE
16 | )
17 | }
18 | \arguments{
19 | \item{x}{an object of class \code{sgdgmf}}
20 | 
21 | \item{...}{further arguments passed to or from other methods}
22 | 
23 | \item{ncomp}{number of components to be plotted}
24 | 
25 | \item{type}{the type of residuals which should be used}
26 | 
27 | \item{partial}{if \code{TRUE}, plots the eigenvalues of the partial residuals}
28 | 
29 | \item{normalize}{if \code{TRUE}, plots the eigenvalues of the standardized residuals}
30 | 
31 | \item{cumulative}{if \code{TRUE}, plots the cumulative sum of the eigenvalues}
32 | 
33 | \item{proportion}{if \code{TRUE}, plots the fractions of explained variance}
34 | }
35 | \value{
36 | A ggplot object showing the residual screeplot of the model.
37 | }
38 | \description{
39 | Plots the variances of the principal components of the residuals against the
40 | number of principal component.
41 | }
42 | \examples{
43 | \donttest{# Load the sgdGMF package
44 | library(sgdGMF)
45 | 
46 | # Generate data from a Poisson model
47 | data = sim.gmf.data(n = 100, m = 20, ncomp = 5, family = poisson())
48 | 
49 | # Fit a GMF model
50 | init = sgdgmf.init(data$Y, ncomp = 3, family = poisson())
51 | 
52 | # Get the partial residual spectrum of a GMF model
53 | screeplot(init) # screeplot of the var-cov matrix of the deviance residuals
54 | screeplot(init, partial = TRUE) # screeplot of the partial residuals
55 | screeplot(init, cumulative = TRUE) # cumulative screeplot
56 | screeplot(init, proportion = TRUE) # proportion of explained residual variance
57 | }
58 | }
59 | \seealso{
60 | \code{\link{screeplot.sgdgmf}}.
61 | }
62 | 


--------------------------------------------------------------------------------
/man/image.initgmf.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/initGMF-class.R
 3 | \name{image.initgmf}
 4 | \alias{image.initgmf}
 5 | \title{Heatmap of an initialized GMF model}
 6 | \usage{
 7 | \method{image}{initgmf}(
 8 |   x,
 9 |   ...,
10 |   type = c("data", "response", "link", "scores", "loadings", "deviance", "pearson",
11 |     "working"),
12 |   resid = FALSE,
13 |   symmetric = FALSE,
14 |   transpose = FALSE,
15 |   limits = NULL,
16 |   palette = NULL
17 | )
18 | }
19 | \arguments{
20 | \item{x}{an object of class \code{initgmf}}
21 | 
22 | \item{...}{further arguments passed to or from other methods}
23 | 
24 | \item{type}{the type of data/predictions/residuals which should be returned}
25 | 
26 | \item{resid}{if \code{TRUE}, plots the residual values}
27 | 
28 | \item{symmetric}{if \code{TRUE}, symmetrizes the color limits}
29 | 
30 | \item{transpose}{if \code{TRUE}, transposes the matrix before plotting it}
31 | 
32 | \item{limits}{the color limits which should be used}
33 | 
34 | \item{palette}{the color-palette which should be used}
35 | }
36 | \value{
37 | A ggplot object showing the selected heatmap.
38 | }
39 | \description{
40 | Plots a heatmap of either the data, the fitted values, or the residual values
41 | of a GMF model allowing for different types of transformations and normalizations.
42 | Moreover, it also permits to plot the latent score and loading matrices.
43 | }
44 | \examples{
45 | \donttest{# Load the sgdGMF package
46 | library(sgdGMF)
47 | 
48 | # Generate data from a Poisson model
49 | data = sim.gmf.data(n = 100, m = 20, ncomp = 5, family = poisson())
50 | 
51 | # Fit a GMF model
52 | init = sgdgmf.init(data$Y, ncomp = 3, family = poisson())
53 | 
54 | # Get the heatmap of a GMF model
55 | image(init, type = "data") # original data
56 | image(init, type = "response") # fitted values in response scale
57 | image(init, type = "scores") # estimated score matrix
58 | image(init, type = "loadings") # estimated loading matrix
59 | image(init, type = "deviance", resid = TRUE) # deviance residual matrix
60 | }
61 | }
62 | 


--------------------------------------------------------------------------------
/R/sgdGMF-package.R:
--------------------------------------------------------------------------------
 1 | #' @keywords internal
 2 | "_PACKAGE"
 3 | 
 4 | #' @useDynLib sgdGMF, .registration=TRUE
 5 | #' @importFrom Rcpp evalCpp
 6 | #' @import Rcpp
 7 | #' @import RcppArmadillo
 8 | #' @importFrom stats glm.fit
 9 | #' @importFrom stats family
10 | #' @importFrom stats gaussian
11 | #' @importFrom stats binomial
12 | #' @importFrom stats poisson
13 | #' @importFrom stats Gamma
14 | #' @importFrom stats inverse.gaussian
15 | #' @importFrom stats quasi
16 | #' @importFrom stats quasibinomial
17 | #' @importFrom stats quasipoisson
18 | #' @importFrom MASS neg.bin
19 | #' @importFrom MASS negative.binomial
20 | #' @importFrom RSpectra svds
21 | #' @importFrom RSpectra eigs
22 | #' @importFrom RSpectra eigs_sym
23 | #' @importFrom parallel detectCores
24 | #' @importFrom parallel makeCluster
25 | #' @importFrom parallel stopCluster
26 | #' @importFrom doParallel registerDoParallel
27 | #' @importFrom foreach %do%
28 | #' @importFrom foreach %dopar%
29 | #' @importFrom foreach foreach
30 | #' @importFrom stats var sd
31 | #' @importFrom stats cov cor cov2cor
32 | #' @importFrom stats ecdf density
33 | #' @importFrom stats median quantile
34 | #' @importFrom stats dnorm pnorm qnorm rnorm
35 | #' @importFrom stats dexp pexp qexp rexp
36 | #' @importFrom stats dgamma pgamma qgamma rgamma
37 | #' @importFrom stats dbeta pbeta qbeta rbeta
38 | #' @importFrom stats dunif punif qunif runif
39 | #' @importFrom stats dpois ppois qpois rpois
40 | #' @importFrom stats dbinom pbinom qbinom rbinom
41 | #' @importFrom stats fitted
42 | #' @importFrom stats predict
43 | #' @importFrom stats coef coefficients
44 | #' @importFrom stats resid residuals
45 | #' @importFrom stats deviance
46 | #' @importFrom stats BIC
47 | #' @importFrom stats deviance
48 | #' @importFrom stats qqplot qqnorm qqline
49 | #' @importFrom stats biplot
50 | #' @importFrom stats screeplot
51 | #' @importFrom utils head tail
52 | #' @importFrom graphics image
53 | #' @importFrom generics refit
54 | #' @importFrom methods is
55 | #' @import ggplot2
56 | #' @importFrom reshape2 melt
57 | #' @importFrom viridisLite viridis
58 | NULL
59 | 


--------------------------------------------------------------------------------
/man/refit.sgdgmf.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sgdGMF-class.R
 3 | \name{refit.sgdgmf}
 4 | \alias{refit.sgdgmf}
 5 | \title{Refine the final estimate of a GMF model}
 6 | \usage{
 7 | \method{refit}{sgdgmf}(
 8 |   object,
 9 |   ...,
10 |   normalize = TRUE,
11 |   verbose = FALSE,
12 |   parallel = FALSE,
13 |   nthreads = 1
14 | )
15 | }
16 | \arguments{
17 | \item{object}{an object of class \code{sgdgmf}}
18 | 
19 | \item{...}{further arguments passed to or from other methods}
20 | 
21 | \item{normalize}{if \code{TRUE}, normalize \code{U} and \code{V} to uncorrelated Gaussian \code{U} and upper triangular \code{V} with positive diagonal}
22 | 
23 | \item{verbose}{if \code{TRUE}, print the optimization status}
24 | 
25 | \item{parallel}{if \code{TRUE}, use parallel computing using the \code{foreach} package}
26 | 
27 | \item{nthreads}{number of cores to be used in the \code{"glm"} method}
28 | }
29 | \value{
30 | An \code{sgdgmf} object containing the re-fitted model.
31 | }
32 | \description{
33 | Refine the estimated latent scores of a GMF model via IRWLS
34 | }
35 | \examples{
36 | \donttest{# Load the sgdGMF package
37 | library(sgdGMF)
38 | 
39 | # Generate data from a Poisson model
40 | data = sim.gmf.data(n = 100, m = 20, ncomp = 5, family = poisson())
41 | 
42 | # Fit a GMF model using SGD
43 | gmf_old = sgdgmf.fit(data$Y, ncomp = 3, family = poisson(), method = "sgd")
44 | 
45 | # Refine the score matrix estimate
46 | gmf_new = refit(gmf_old)
47 | 
48 | # Get the fitted values in the link and response scales
49 | mu_hat_old = fitted(gmf_old, type = "response")
50 | mu_hat_new = fitted(gmf_new, type = "response")
51 | 
52 | # Compare the results
53 | oldpar = par(no.readonly = TRUE)
54 | par(mfrow = c(2,2), mar = c(1,1,3,1))
55 | image(data$Y, axes = FALSE, main = expression(Y))
56 | image(data$mu, axes = FALSE, main = expression(mu))
57 | image(mu_hat_old, axes = FALSE, main = expression(hat(mu)[old]))
58 | image(mu_hat_new, axes = FALSE, main = expression(hat(mu)[new]))
59 | par(oldpar)
60 | }
61 | }
62 | \seealso{
63 | \code{\link{sgdgmf.fit}}
64 | }
65 | 


--------------------------------------------------------------------------------
/src/minibatch.h:
--------------------------------------------------------------------------------
 1 | // minibatch.h
 2 | // author: Cristian Castiglione
 3 | // creation: 06/10/2023
 4 | // last change: 06/10/2023
 5 | 
 6 | #ifndef MINIBATCH_H
 7 | #define MINIBATCH_H
 8 | 
 9 | #include <RcppArmadillo.h>
10 | 
11 | class Chunks {
12 |     public:
13 |         int nidx;   // number of observations
14 |         int nchunks;    // number of chunks
15 |         bool randomize; // should we reshuffle the indices?
16 |         arma::uvec idx;     // data index vector
17 |         arma::uvec start;   // vector of starting indices (of idx) for each chunk
18 |         arma::uvec end;     // vector of ending indices (of idx) for each chunk
19 |         arma::uvec range;   // vector of lengths for each chunk
20 | 
21 |         // Get the data indices corresponding to the chunk at iteration 'iter'
22 |         arma::uvec get_chunk (const int & iter);
23 | 
24 |         // Get the list of data indices corresponding each chunk in the partition
25 |         std::list<arma::uvec> get_chunks (const arma::uvec & iters);
26 | 
27 |         // Set all the chunks via index partition
28 |         void set_chunks (const int & n, const int & size, const bool & randomize);
29 | 
30 |         // Class constructor
31 |         Chunks () {}
32 |         Chunks (const int & n, const int & size, const bool & randomize) {
33 |             this->set_chunks(n, size, randomize);
34 |         }
35 | };
36 | 
37 | class ChunkPile {
38 |     public:
39 |         int idx;
40 |         bool random;
41 |         arma::uvec tovisit;
42 |         arma::uvec visited;
43 | 
44 |         void fill_tovisit ();
45 |         void empty_visited ();
46 |         void pop_tovisit (const int & id);
47 |         void push_visited (const int & id);
48 |         void sample_idx ();
49 |         void update ();
50 | 
51 |         // Class constructor
52 |         ChunkPile () {}
53 |         ChunkPile (const int & n, const bool & rnd)  {
54 |             this->idx = -1;
55 |             this->random = rnd;
56 |             this->tovisit = arma::linspace<arma::uvec>(0, n-1, n);
57 |             this->visited = {};
58 |         }
59 | };
60 | 
61 | 
62 | #endif
63 | 


--------------------------------------------------------------------------------
/man/set.control.newton.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/control.R
 3 | \name{set.control.newton}
 4 | \alias{set.control.newton}
 5 | \title{Check and set the control parameters for the Newton algorithm}
 6 | \usage{
 7 | set.control.newton(
 8 |   normalize = TRUE,
 9 |   maxiter = 500,
10 |   stepsize = 0.01,
11 |   eps = 1e-08,
12 |   nafill = 1,
13 |   tol = 1e-05,
14 |   damping = 0.001,
15 |   verbose = FALSE,
16 |   frequency = 50,
17 |   parallel = FALSE,
18 |   nthreads = 1,
19 |   savedata = TRUE
20 | )
21 | }
22 | \arguments{
23 | \item{normalize}{if \code{TRUE}, normalize \code{U} and \code{V} to uncorrelated Gaussian \code{U} and upper triangular \code{V} with positive diagonal}
24 | 
25 | \item{maxiter}{maximum number of iterations}
26 | 
27 | \item{stepsize}{step-size parameter scaling each IRWLS step}
28 | 
29 | \item{eps}{how much shrinkage has to be introduced on extreme predictions lying outside of the data range}
30 | 
31 | \item{nafill}{how frequently the \code{NA} values are filled, by default \code{NA} values are filled at each iteration of the algorithm}
32 | 
33 | \item{tol}{tolerance threshold for the stopping criterion}
34 | 
35 | \item{damping}{regularization parameter which is added to the Hessian to ensure numerical stability}
36 | 
37 | \item{verbose}{if \code{TRUE}, print the optimization status}
38 | 
39 | \item{frequency}{how often the optimization status is printed (only if \code{verbose=TRUE}}
40 | 
41 | \item{parallel}{if \code{TRUE}, allows for parallel computing using the \code{C++} library \code{OpenMP}}
42 | 
43 | \item{nthreads}{number of cores to be used in parallel (only if \code{parallel=TTUE})}
44 | 
45 | \item{savedata}{if \code{TRUE}, saves a copy of the data and fitted values}
46 | }
47 | \value{
48 | A \code{list} of control parameters for the quasi-Newton algorithm
49 | }
50 | \description{
51 | Check if the input control parameters of the quasi-Newton algorithm  are
52 | allowed and set them to default values if they are not. Returns a list of
53 | well-defined control parameters.
54 | }
55 | \examples{
56 | library(sgdGMF)
57 | 
58 | # Empty call
59 | set.control.newton()
60 | 
61 | # Parametrized call
62 | set.control.newton(maxiter = 1000, stepsize = 0.01, tol = 1e-04)
63 | 
64 | }
65 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: sgdGMF
 2 | Type: Package
 3 | Title: Estimation of Generalized Matrix Factorization Models via Stochastic Gradient Descent
 4 | Version: 1.0.1
 5 | Date: 2025-05-17
 6 | Authors@R: 
 7 |     c(person("Cristian", "Castiglione", email = "cristian_castiglione@libero.it", 
 8 |              role = c("aut","cre"), comment = c(ORCID = "0000-0001-5883-4890")),
 9 |       person("Davide", "Risso", email = "davide.risso@unipd.it", 
10 |              role = c("ctb"), comment = c(ORCID = "0000-0001-8508-5012")),
11 |       person("Alexandre", "Segers", email = "alexandre.segers@ugent.be", 
12 |              role = c("ctb"), comment = c(ORCID = "0009-0004-2028-7595")))
13 | Description: Efficient framework to estimate high-dimensional generalized matrix factorization models using penalized maximum likelihood under a dispersion exponential family specification. Either deterministic and stochastic methods are implemented for the numerical maximization. In particular, the package implements the stochastic gradient descent algorithm with a block-wise mini-batch strategy to speed up the computations and an efficient adaptive learning rate schedule to stabilize the convergence. All the theoretical details can be found in Castiglione et al. (2024, <doi:10.48550/arXiv.2412.20509>). Other methods considered for the optimization are the alternated iterative re-weighted least squares and the quasi-Newton method with diagonal approximation of the Fisher information matrix discussed in Kidzinski et al. (2022, <http://jmlr.org/papers/v23/20-1104.html>).
14 | License: MIT + file LICENSE
15 | Imports: 
16 |     Rcpp (>= 1.0.10), 
17 |     RcppArmadillo,
18 |     RSpectra, 
19 |     parallel, 
20 |     doParallel, 
21 |     foreach,
22 |     MASS,
23 |     SuppDists,
24 |     methods,
25 |     generics,
26 |     reshape2,
27 |     ggpubr,
28 |     viridisLite
29 | LinkingTo: 
30 |     Rcpp, 
31 |     RcppArmadillo
32 | Depends: 
33 |     R (>= 4.0.0),
34 |     ggplot2
35 | Suggests: 
36 |     testthat (>= 3.0.0),
37 |     Rtsne,
38 |     dplyr,
39 |     knitr,
40 |     rmarkdown
41 | Config/testthat/edition: 3
42 | Encoding: UTF-8
43 | URL: https://github.com/CristianCastiglione/sgdGMF
44 | BugReports: https://github.com/CristianCastiglione/sgdGMF/issues
45 | RoxygenNote: 7.2.3
46 | VignetteBuilder: knitr
47 | 


--------------------------------------------------------------------------------
/man/plot.sgdgmf.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sgdGMF-class.R
 3 | \name{plot.sgdgmf}
 4 | \alias{plot.sgdgmf}
 5 | \title{Plot diagnostics for a GMF model}
 6 | \usage{
 7 | \method{plot}{sgdgmf}(
 8 |   x,
 9 |   ...,
10 |   type = c("res-idx", "res-fit", "std-fit", "hist", "qq", "ecdf"),
11 |   resid = c("deviance", "pearson", "working", "response", "link"),
12 |   subsample = FALSE,
13 |   sample.size = 500,
14 |   partial = FALSE,
15 |   normalize = FALSE,
16 |   fillna = FALSE
17 | )
18 | }
19 | \arguments{
20 | \item{x}{an object of class \code{sgdgmf}}
21 | 
22 | \item{...}{further arguments passed to or from other methods}
23 | 
24 | \item{type}{the type of plot which should be returned}
25 | 
26 | \item{resid}{the type of residuals which should be used}
27 | 
28 | \item{subsample}{if \code{TRUE}, computes the residuals over o small fraction of the data}
29 | 
30 | \item{sample.size}{the dimension of the sub-sample which should be used}
31 | 
32 | \item{partial}{if \code{TRUE}, computes the partial residuals}
33 | 
34 | \item{normalize}{if \code{TRUE}, standardizes the residuals column-by-column}
35 | 
36 | \item{fillna}{if \code{TRUE}, fills the \code{NA} values with \code{0}}
37 | }
38 | \value{
39 | A ggplot object showing the selected diagnostic plot.
40 | }
41 | \description{
42 | Plots (one of) six diagnostics to graphically analyze the marginal and conditional
43 | distribution of the residuals of a GMF model. Currently, the following plots are
44 | available: residuals against observation indices, residuals agains fitted values,
45 | absolute square-root residuals against fitted values, histogram of the residuals,
46 | residual QQ-plot, residual ECDF-plot.
47 | }
48 | \examples{
49 | \donttest{# Load the sgdGMF package
50 | library(sgdGMF)
51 | 
52 | # Generate data from a Poisson model
53 | data = sim.gmf.data(n = 100, m = 20, ncomp = 5, family = poisson())
54 | 
55 | # Fit a GMF model
56 | gmf = sgdgmf.fit(data$Y, ncomp = 3, family = poisson())
57 | 
58 | # Plot the residual-based GMF diagnostics
59 | plot(gmf, type = "res-fit") # Residuals vs fitted values
60 | plot(gmf, type = "std-fit") # Abs-sqrt-transformed residuals vs fitted values
61 | plot(gmf, type = "qq") # Residual QQ-plot
62 | plot(gmf, type = "hist") # Residual histogram
63 | }
64 | }
65 | 


--------------------------------------------------------------------------------
/man/cpp.fit.newton.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/RcppExports.R
 3 | \name{cpp.fit.newton}
 4 | \alias{cpp.fit.newton}
 5 | \title{Fit a GMF model using the diagonal quasi-Newton algorithm}
 6 | \usage{
 7 | cpp.fit.newton(
 8 |   Y,
 9 |   X,
10 |   B,
11 |   A,
12 |   Z,
13 |   U,
14 |   V,
15 |   O,
16 |   W,
17 |   familyname,
18 |   linkname,
19 |   varfname,
20 |   ncomp,
21 |   lambda,
22 |   maxiter = 500L,
23 |   stepsize = 0.1,
24 |   eps = 1e-08,
25 |   nafill = 1L,
26 |   tol = 1e-05,
27 |   damping = 0.001,
28 |   verbose = TRUE,
29 |   frequency = 10L,
30 |   parallel = FALSE,
31 |   nthreads = 1L
32 | )
33 | }
34 | \arguments{
35 | \item{Y}{matrix of responses (\eqn{n \times m})}
36 | 
37 | \item{X}{matrix of row fixed effects (\eqn{n \times p})}
38 | 
39 | \item{B}{initial row-effect matrix (\eqn{n \times p})}
40 | 
41 | \item{A}{initial column-effect matrix (\eqn{n \times q})}
42 | 
43 | \item{Z}{matrix of column fixed effects (\eqn{m \times q})}
44 | 
45 | \item{U}{initial factor matrix (\eqn{n \times d})}
46 | 
47 | \item{V}{initial loading matrix (\eqn{m \times d})}
48 | 
49 | \item{O}{matrix of constant offset (\eqn{n \times m})}
50 | 
51 | \item{W}{matrix of constant weights (\eqn{n \times m})}
52 | 
53 | \item{familyname}{a \code{glm} model family name}
54 | 
55 | \item{linkname}{a \code{glm} link function name}
56 | 
57 | \item{varfname}{variance function name}
58 | 
59 | \item{ncomp}{rank of the latent matrix factorization}
60 | 
61 | \item{lambda}{penalization parameters}
62 | 
63 | \item{maxiter}{maximum number of iterations}
64 | 
65 | \item{stepsize}{stepsize of the quasi-Newton update}
66 | 
67 | \item{eps}{shrinkage factor for extreme predictions}
68 | 
69 | \item{nafill}{how often the missing values are updated}
70 | 
71 | \item{tol}{tolerance threshold for the stopping criterion}
72 | 
73 | \item{damping}{diagonal dumping factor for the Hessian matrix}
74 | 
75 | \item{verbose}{if \code{TRUE}, print the optimization status}
76 | 
77 | \item{frequency}{how often the optimization status is printed}
78 | 
79 | \item{parallel}{if \code{TRUE}, allows for parallel computing}
80 | 
81 | \item{nthreads}{number of cores to be used in parallel}
82 | }
83 | \description{
84 | Fit a GMF model using the diagonal quasi-Newton algorithm
85 | }
86 | \keyword{internal}
87 | 


--------------------------------------------------------------------------------
/man/plot.initgmf.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/initGMF-class.R
 3 | \name{plot.initgmf}
 4 | \alias{plot.initgmf}
 5 | \title{Plot diagnostics for an initialized GMF model}
 6 | \usage{
 7 | \method{plot}{initgmf}(
 8 |   x,
 9 |   ...,
10 |   type = c("res-idx", "res-fit", "std-fit", "hist", "qq", "ecdf"),
11 |   resid = c("deviance", "pearson", "working", "response", "link"),
12 |   subsample = FALSE,
13 |   sample.size = 500,
14 |   partial = FALSE,
15 |   normalize = FALSE,
16 |   fillna = FALSE
17 | )
18 | }
19 | \arguments{
20 | \item{x}{an object of class \code{initgmf}}
21 | 
22 | \item{...}{further arguments passed to or from other methods}
23 | 
24 | \item{type}{the type of plot which should be returned}
25 | 
26 | \item{resid}{the type of residuals which should be used}
27 | 
28 | \item{subsample}{if \code{TRUE}, computes the residuals over o small fraction of the data}
29 | 
30 | \item{sample.size}{the dimension of the sub-sample which should be used}
31 | 
32 | \item{partial}{if \code{TRUE}, computes the partial residuals}
33 | 
34 | \item{normalize}{if \code{TRUE}, standardizes the residuals column-by-column}
35 | 
36 | \item{fillna}{if \code{TRUE}, fills the \code{NA} values with \code{0}}
37 | }
38 | \value{
39 | A ggplot object showing the selected diagnostic plot.
40 | }
41 | \description{
42 | Plots (one of) six diagnostics to graphically analyze the marginal and conditional
43 | distribution of the residuals of a GMF model. Currently, the following plots are
44 | available: residuals against observation indices, residuals agains fitted values,
45 | absolute square-root residuals against fitted values, histogram of the residuals,
46 | residual QQ-plot, residual ECDF-plot.
47 | }
48 | \examples{
49 | \donttest{# Load the sgdGMF package
50 | library(sgdGMF)
51 | 
52 | # Generate data from a Poisson model
53 | data = sim.gmf.data(n = 100, m = 20, ncomp = 5, family = poisson())
54 | 
55 | # Fit a GMF model
56 | init = sgdgmf.init(data$Y, ncomp = 3, family = poisson())
57 | 
58 | # Plot the residual-based GMF diagnostics
59 | plot(init, type = "res-fit") # Residuals vs fitted values
60 | plot(init, type = "std-fit") # Abs-sqrt-transformed residuals vs fitted values
61 | plot(init, type = "qq") # Residual QQ-plot
62 | plot(init, type = "hist") # Residual histogram
63 | }
64 | }
65 | \seealso{
66 | \code{\link{plot.sgdgmf}}.
67 | }
68 | 


--------------------------------------------------------------------------------
/man/cpp.fit.airwls.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/RcppExports.R
 3 | \name{cpp.fit.airwls}
 4 | \alias{cpp.fit.airwls}
 5 | \title{Fit a GMF model using the AIRWLS algorithm}
 6 | \usage{
 7 | cpp.fit.airwls(
 8 |   Y,
 9 |   X,
10 |   B,
11 |   A,
12 |   Z,
13 |   U,
14 |   V,
15 |   O,
16 |   W,
17 |   familyname,
18 |   linkname,
19 |   varfname,
20 |   ncomp,
21 |   lambda,
22 |   maxiter = 500L,
23 |   nsteps = 1L,
24 |   stepsize = 0.1,
25 |   eps = 1e-08,
26 |   nafill = 1L,
27 |   tol = 1e-05,
28 |   damping = 0.001,
29 |   verbose = TRUE,
30 |   frequency = 10L,
31 |   parallel = FALSE,
32 |   nthreads = 1L
33 | )
34 | }
35 | \arguments{
36 | \item{Y}{matrix of responses (\eqn{n \times m})}
37 | 
38 | \item{X}{matrix of row fixed effects (\eqn{n \times p})}
39 | 
40 | \item{B}{initial row-effect matrix (\eqn{n \times p})}
41 | 
42 | \item{A}{initial column-effect matrix (\eqn{n \times q})}
43 | 
44 | \item{Z}{matrix of column fixed effects (\eqn{m \times q})}
45 | 
46 | \item{U}{initial factor matrix (\eqn{n \times d})}
47 | 
48 | \item{V}{initial loading matrix (\eqn{m \times d})}
49 | 
50 | \item{O}{matrix of constant offset (\eqn{n \times m})}
51 | 
52 | \item{W}{matrix of constant weights (\eqn{n \times m})}
53 | 
54 | \item{familyname}{a \code{glm} model family name}
55 | 
56 | \item{linkname}{a \code{glm} link function name}
57 | 
58 | \item{varfname}{variance function name}
59 | 
60 | \item{ncomp}{rank of the latent matrix factorization}
61 | 
62 | \item{lambda}{penalization parameters}
63 | 
64 | \item{maxiter}{maximum number of iterations}
65 | 
66 | \item{nsteps}{number of inner Fisher scoring iterations}
67 | 
68 | \item{stepsize}{stepsize of the inner Fisher scoring algorithm}
69 | 
70 | \item{eps}{shrinkage factor for extreme predictions}
71 | 
72 | \item{nafill}{how often the missing values are updated}
73 | 
74 | \item{tol}{tolerance threshold for the stopping criterion}
75 | 
76 | \item{damping}{diagonal dumping factor for the Hessian matrix}
77 | 
78 | \item{verbose}{if \code{TRUE}, print the optimization status}
79 | 
80 | \item{frequency}{how often the optimization status is printed}
81 | 
82 | \item{parallel}{if \code{TRUE}, allows for parallel computing}
83 | 
84 | \item{nthreads}{number of cores to be used in parallel}
85 | }
86 | \description{
87 | Fit a GMF model using the AIRWLS algorithm
88 | }
89 | \keyword{internal}
90 | 


--------------------------------------------------------------------------------
/man/set.control.airwls.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/control.R
 3 | \name{set.control.airwls}
 4 | \alias{set.control.airwls}
 5 | \title{Check and set the control parameters for the AIRWLS algorithm}
 6 | \usage{
 7 | set.control.airwls(
 8 |   normalize = TRUE,
 9 |   maxiter = 100,
10 |   nstep = 1,
11 |   stepsize = 0.1,
12 |   eps = 1e-08,
13 |   nafill = 1,
14 |   tol = 1e-05,
15 |   damping = 0.001,
16 |   verbose = FALSE,
17 |   frequency = 10,
18 |   parallel = FALSE,
19 |   nthreads = 1,
20 |   savedata = TRUE
21 | )
22 | }
23 | \arguments{
24 | \item{normalize}{if \code{TRUE}, normalize \code{U} and \code{V} to uncorrelated Gaussian \code{U} and upper triangular \code{V} with positive diagonal}
25 | 
26 | \item{maxiter}{maximum number of iterations}
27 | 
28 | \item{nstep}{number of IRWLS steps in each inner loop of AIRWLS}
29 | 
30 | \item{stepsize}{step-size parameter scaling each IRWLS step}
31 | 
32 | \item{eps}{how much shrinkage has to be introduced on extreme predictions lying outside of the data range}
33 | 
34 | \item{nafill}{how frequently the \code{NA} values are filled, by default \code{NA} values are filled at each iteration of the algorithm}
35 | 
36 | \item{tol}{tolerance threshold for the stopping criterion}
37 | 
38 | \item{damping}{regularization parameter which is added to the diagonal of the Hessian to ensure numerical stability}
39 | 
40 | \item{verbose}{if \code{TRUE}, print the optimization status (default \code{TRUE})}
41 | 
42 | \item{frequency}{how often the optimization status is printed (only if \code{verbose=TRUE})}
43 | 
44 | \item{parallel}{if \code{TRUE}, allows for parallel computing using the \code{C++} library \code{OpenMP}}
45 | 
46 | \item{nthreads}{number of cores to be used in parallel (only if \code{parallel=TRUE})}
47 | 
48 | \item{savedata}{if \code{TRUE}, saves a copy of the data and fitted values}
49 | }
50 | \value{
51 | A \code{list} of control parameters for the AIRWLS algorithm
52 | }
53 | \description{
54 | Check if the input control parameters of the AIRWLS algorithm are allowed
55 | and set them to default values if they are not. Returns a list of
56 | well-defined control parameters.
57 | }
58 | \examples{
59 | library(sgdGMF)
60 | 
61 | # Empty call
62 | set.control.airwls()
63 | 
64 | # Parametrized call
65 | set.control.airwls(maxiter = 100, nstep = 5, stepsize = 0.3)
66 | 
67 | 
68 | }
69 | 


--------------------------------------------------------------------------------
/tests/testrcpp/test-link.R:
--------------------------------------------------------------------------------
 1 | # test-link.R
 2 | # author: Cristian Castiglione
 3 | # creation: 29/09/2023
 4 | # last change: 29/09/2023
 5 | 
 6 | ## Workspace setup ----
 7 | rm(list = ls())
 8 | graphics.off()
 9 | 
10 | # Package compilation and import
11 | devtools::load_all()
12 | 
13 | plot.link <- function (x, y, main = "") {
14 |   plot(x, y, type = "l", xlab = "x", ylab = "link", main = main)
15 | }
16 | 
17 | ## Test data ----
18 | x = seq(from = -3, to = +3, length = 201)
19 | y = seq(from = 0.1, to = +5, length = 201)
20 | z = seq(from = 0.001, to = 0.999, length = 201)
21 | 
22 | par(mfrow = c(1, 3))
23 | 
24 | ## Test: identity ----
25 | {
26 |   plot.link(x, sgdGMF::cpp.link.identity.linkfun(x), main = "linkfun")
27 |   plot.link(x, sgdGMF::cpp.link.identity.linkinv(x), main = "linkinv")
28 |   plot.link(x, sgdGMF::cpp.link.identity.mueta(x), main = "linkmueta")
29 | }
30 | 
31 | ## Test: logit ----
32 | {
33 |   plot.link(x, sgdGMF::cpp.link.logit.linkfun(z), main = "linkfun")
34 |   plot.link(x, sgdGMF::cpp.link.logit.linkinv(x), main = "linkinv")
35 |   plot.link(x, sgdGMF::cpp.link.logit.mueta(x), main = "linkmueta")
36 | }
37 | 
38 | ## Test: probit ----
39 | {
40 |   plot.link(x, sgdGMF::cpp.link.probit.linkfun(z), main = "linkfun")
41 |   plot.link(x, sgdGMF::cpp.link.probit.linkinv(x), main = "linkinv")
42 |   plot.link(x, sgdGMF::cpp.link.probit.mueta(x), main = "linkmueta")
43 | }
44 | 
45 | ## Test: cauchy ----
46 | {
47 |   plot.link(x, sgdGMF::cpp.link.cauchy.linkfun(z), main = "linkfun")
48 |   plot.link(x, sgdGMF::cpp.link.cauchy.linkinv(x), main = "linkinv")
49 |   plot.link(x, sgdGMF::cpp.link.cauchy.mueta(x), main = "linkmueta")
50 | }
51 | 
52 | ## Test: cloglog ----
53 | {
54 |   plot.link(x, sgdGMF::cpp.link.cloglog.linkfun(z), main = "linkfun")
55 |   plot.link(x, sgdGMF::cpp.link.cloglog.linkinv(x), main = "linkinv")
56 |   plot.link(x, sgdGMF::cpp.link.cloglog.mueta(x), main = "linkmueta")
57 | }
58 | 
59 | ## Test: log ----
60 | {
61 |   plot.link(x, sgdGMF::cpp.link.log.linkfun(y), main = "linkfun")
62 |   plot.link(x, sgdGMF::cpp.link.log.linkinv(x), main = "linkinv")
63 |   plot.link(x, sgdGMF::cpp.link.log.mueta(x), main = "linkmueta")
64 | }
65 | 
66 | ## Test: inverse ----
67 | {
68 |   plot.link(x, sgdGMF::cpp.link.inverse.linkfun(z), main = "linkfun")
69 |   plot.link(x, sgdGMF::cpp.link.inverse.linkinv(z), main = "linkinv")
70 |   plot.link(x, sgdGMF::cpp.link.inverse.mueta(z), main = "linkmueta")
71 | }
72 | 
73 | ## Test: sqrt ----
74 | {
75 |   plot.link(x, sgdGMF::cpp.link.sqrt.linkfun(z), main = "linkfun")
76 |   plot.link(x, sgdGMF::cpp.link.sqrt.linkinv(z), main = "linkinv")
77 |   plot.link(x, sgdGMF::cpp.link.sqrt.mueta(z), main = "linkmueta")
78 | }
79 | 
80 | 


--------------------------------------------------------------------------------
/man/set.control.block.sgd.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/control.R
 3 | \name{set.control.block.sgd}
 4 | \alias{set.control.block.sgd}
 5 | \title{Check and set the control parameters for the blockwise-SGD algorithm}
 6 | \usage{
 7 | set.control.block.sgd(
 8 |   normalize = TRUE,
 9 |   maxiter = 1000,
10 |   eps = 1e-08,
11 |   nafill = 10,
12 |   tol = 1e-08,
13 |   size = c(100, 100),
14 |   burn = 1,
15 |   rate0 = 0.01,
16 |   decay = 0.01,
17 |   damping = 0.001,
18 |   rate1 = 0.1,
19 |   rate2 = 0.01,
20 |   verbose = FALSE,
21 |   frequency = 250,
22 |   progress = FALSE,
23 |   savedata = TRUE
24 | )
25 | }
26 | \arguments{
27 | \item{normalize}{if \code{TRUE}, normalize \code{U} and \code{V} to uncorrelated Gaussian \code{U} and upper triangular \code{V} with positive diagonal}
28 | 
29 | \item{maxiter}{maximum number of iterations}
30 | 
31 | \item{eps}{how much shrinkage has to be introduced on extreme predictions lying outside of the data range}
32 | 
33 | \item{nafill}{how frequently the \code{NA} values are filled, by default \code{NA} values are filled at each iteration of the algorithm}
34 | 
35 | \item{tol}{tolerance threshold for the stopping criterion}
36 | 
37 | \item{size}{mini-batch size, the first value is for row sub-sample, the second value is for column sub-sample}
38 | 
39 | \item{burn}{percentage of iterations to ignore before performing Polyak averaging}
40 | 
41 | \item{rate0}{initial learning rate}
42 | 
43 | \item{decay}{learning rate decay}
44 | 
45 | \item{damping}{regularization parameter which is added to the Hessian to ensure numerical stability}
46 | 
47 | \item{rate1}{exponential decay rate for the moment estimate of the gradient}
48 | 
49 | \item{rate2}{exponential decay rate for the moment estimate of the Hessian}
50 | 
51 | \item{verbose}{if \code{TRUE}, print the optimization status}
52 | 
53 | \item{frequency}{how often the optimization status is printed (only if \code{verbose=TRUE})}
54 | 
55 | \item{progress}{if \code{TRUE}, print a compact progress-bar instead of a full-report of the optimization status (only if \code{verbose=TRUE})}
56 | 
57 | \item{savedata}{if \code{TRUE}, saves a copy of the data and fitted values}
58 | }
59 | \value{
60 | A \code{list} of control parameters for the adaptive SGD algorithm with block-wise sub-sampling
61 | }
62 | \description{
63 | Check if the input control parameters are allowed and set them to default
64 | values if they are not. Returns a list of well-defined control parameters.
65 | }
66 | \examples{
67 | library(sgdGMF)
68 | 
69 | # Empty call
70 | set.control.block.sgd()
71 | 
72 | # Parametrized call
73 | set.control.block.sgd(maxiter = 2000, rate0 = 0.01, decay = 0.01)
74 | 
75 | 
76 | }
77 | 


--------------------------------------------------------------------------------
/man/set.control.coord.sgd.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/control.R
 3 | \name{set.control.coord.sgd}
 4 | \alias{set.control.coord.sgd}
 5 | \title{Check and set the control parameters for the coordinate-SGD algorithm}
 6 | \usage{
 7 | set.control.coord.sgd(
 8 |   normalize = TRUE,
 9 |   maxiter = 1000,
10 |   eps = 1e-08,
11 |   nafill = 10,
12 |   tol = 1e-08,
13 |   size = c(100, 100),
14 |   burn = 1,
15 |   rate0 = 0.01,
16 |   decay = 0.01,
17 |   damping = 0.001,
18 |   rate1 = 0.1,
19 |   rate2 = 0.01,
20 |   verbose = FALSE,
21 |   frequency = 250,
22 |   progress = FALSE,
23 |   savedata = TRUE
24 | )
25 | }
26 | \arguments{
27 | \item{normalize}{if \code{TRUE}, normalize \code{U} and \code{V} to uncorrelated Gaussian \code{U} and upper triangular \code{V} with positive diagonal}
28 | 
29 | \item{maxiter}{maximum number of iterations}
30 | 
31 | \item{eps}{how much shrinkage has to be introduced on extreme predictions lying outside of the data range}
32 | 
33 | \item{nafill}{how frequently the \code{NA} values are filled, by default \code{NA} values are filled at each iteration of the algorithm}
34 | 
35 | \item{tol}{tolerance threshold for the stopping criterion}
36 | 
37 | \item{size}{mini-batch size, the first value is for row sub-sample, the second value is for column sub-sample}
38 | 
39 | \item{burn}{percentage of iterations to ignore before performing Polyak averaging}
40 | 
41 | \item{rate0}{initial learning rate}
42 | 
43 | \item{decay}{learning rate decay}
44 | 
45 | \item{damping}{regularization parameter which is added to the Hessian to ensure numerical stability}
46 | 
47 | \item{rate1}{exponential decay rate for the moment estimate of the gradient}
48 | 
49 | \item{rate2}{exponential decay rate for the moment estimate of the Hessian}
50 | 
51 | \item{verbose}{if \code{TRUE}, print the optimization status}
52 | 
53 | \item{frequency}{how often the optimization status is printed (only if \code{verbose=TRUE})}
54 | 
55 | \item{progress}{if \code{TRUE}, print a compact progress-bar instead of a full-report of the optimization status (only if \code{verbose=TRUE})}
56 | 
57 | \item{savedata}{if \code{TRUE}, saves a copy of the data and fitted values}
58 | }
59 | \value{
60 | A \code{list} of control parameters for the adaptive SGD algorithm with coordinate-wise sub-sampling
61 | }
62 | \description{
63 | Check if the input control parameters are allowed and set them to default
64 | values if they are not. Returns a list of well-defined control parameters.
65 | }
66 | \examples{
67 | library(sgdGMF)
68 | 
69 | # Empty call
70 | set.control.coord.sgd()
71 | 
72 | # Parametrized call
73 | set.control.coord.sgd(maxiter = 2000, rate0 = 0.01, decay = 0.01)
74 | 
75 | }
76 | 


--------------------------------------------------------------------------------
/tests/testthat/test-fit.R:
--------------------------------------------------------------------------------
 1 | # file: test-fit.R
 2 | # author: Cristian Castiglione
 3 | # creation: 05/02/2024
 4 | # last change: 04/10/2024
 5 | 
 6 | testthat::test_that("GMF fit", {
 7 |   n = 100; m = 20; d = 5
 8 | 
 9 |   # Generate data using Poisson, Binomial and Gamma models
10 |   data_pois = sim.gmf.data(n = n, m = m, ncomp = d, family = poisson())
11 |   data_bin = sim.gmf.data(n = n, m = m, ncomp = d, family = binomial())
12 |   data_gam = sim.gmf.data(n = n, m = m, ncomp = d, family = Gamma(link = "log"), dispersion = 0.25)
13 | 
14 |   # Initialize the GMF parameters assuming 3 latent factors
15 |   gmf_pois = sgdgmf.fit(data_pois$Y, ncomp = 3, family = poisson())
16 |   gmf_bin = sgdgmf.fit(data_bin$Y, ncomp = 3, family = binomial())
17 |   gmf_gam = sgdgmf.fit(data_gam$Y, ncomp = 3, family = Gamma(link = "log"))
18 | 
19 |   # Output class
20 |   testthat::expect_true(is.list(gmf_pois))
21 |   testthat::expect_true(is.list(gmf_bin))
22 |   testthat::expect_true(is.list(gmf_gam))
23 | 
24 |   testthat::expect_s3_class(gmf_pois, "sgdgmf")
25 |   testthat::expect_s3_class(gmf_bin, "sgdgmf")
26 |   testthat::expect_s3_class(gmf_gam, "sgdgmf")
27 | 
28 |   # Sub-output checks
29 |   testthat::expect_true(is.matrix(gmf_pois$U) && is.numeric(gmf_pois$U))
30 |   testthat::expect_true(is.matrix(gmf_pois$V) && is.numeric(gmf_pois$V))
31 |   testthat::expect_true(is.matrix(gmf_pois$A) && is.numeric(gmf_pois$A))
32 |   testthat::expect_true(is.matrix(gmf_pois$B) && is.numeric(gmf_pois$B))
33 |   testthat::expect_true(is.matrix(gmf_pois$eta) && is.numeric(gmf_pois$eta))
34 |   testthat::expect_true(is.matrix(gmf_pois$mu) && is.numeric(gmf_pois$mu))
35 |   testthat::expect_true(all(gmf_pois$mu >= 0))
36 | 
37 |   testthat::expect_true(is.matrix(gmf_bin$U) && is.numeric(gmf_bin$U))
38 |   testthat::expect_true(is.matrix(gmf_bin$V) && is.numeric(gmf_bin$V))
39 |   testthat::expect_true(is.matrix(gmf_bin$A) && is.numeric(gmf_bin$A))
40 |   testthat::expect_true(is.matrix(gmf_bin$B) && is.numeric(gmf_bin$B))
41 |   testthat::expect_true(is.matrix(gmf_bin$eta) && is.numeric(gmf_bin$eta))
42 |   testthat::expect_true(is.matrix(gmf_bin$mu) && is.numeric(gmf_bin$mu))
43 |   testthat::expect_true(all(gmf_bin$mu >= 0 & gmf_bin$mu <= 1))
44 | 
45 |   testthat::expect_true(is.matrix(gmf_gam$U) && is.numeric(gmf_gam$U))
46 |   testthat::expect_true(is.matrix(gmf_gam$V) && is.numeric(gmf_gam$V))
47 |   testthat::expect_true(is.matrix(gmf_gam$A) && is.numeric(gmf_gam$A))
48 |   testthat::expect_true(is.matrix(gmf_gam$B) && is.numeric(gmf_gam$B))
49 |   testthat::expect_true(is.matrix(gmf_gam$eta) && is.numeric(gmf_gam$eta))
50 |   testthat::expect_true(is.matrix(gmf_gam$mu) && is.numeric(gmf_gam$mu))
51 |   testthat::expect_true(all(gmf_gam$mu >= 0))
52 | })
53 | 


--------------------------------------------------------------------------------
/man/sgdgmf.cv.step.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/crossval.R
 3 | \name{sgdgmf.cv.step}
 4 | \alias{sgdgmf.cv.step}
 5 | \title{Single step of cross-validation for generalized matrix factorization models}
 6 | \usage{
 7 | sgdgmf.cv.step(
 8 |   train,
 9 |   test,
10 |   X,
11 |   Z,
12 |   family,
13 |   ncomp,
14 |   maxcomp,
15 |   fold,
16 |   nfolds,
17 |   weights,
18 |   offset,
19 |   method,
20 |   sampling,
21 |   penalty,
22 |   control.init,
23 |   control.alg,
24 |   control.cv
25 | )
26 | }
27 | \arguments{
28 | \item{train}{train-set matrix of responses (\eqn{n \times m})}
29 | 
30 | \item{test}{test-set matrix of responses (\eqn{n \times m})}
31 | 
32 | \item{X}{matrix of row fixed effects (\eqn{n \times p})}
33 | 
34 | \item{Z}{matrix of column fixed effects (\eqn{q \times m})}
35 | 
36 | \item{family}{a \code{glm} family (see \code{\link{family}} for more details)}
37 | 
38 | \item{ncomp}{ranks of the latent matrix factorization used in cross-validation (default 1 to 10)}
39 | 
40 | \item{maxcomp}{maximum rank allowed in the cross-validation exploration}
41 | 
42 | \item{fold}{integer number identifying the current fold}
43 | 
44 | \item{nfolds}{maximum number of folds in the cross-validation}
45 | 
46 | \item{weights}{an optional matrix of weights (\eqn{n \times m})}
47 | 
48 | \item{offset}{an optional matrix of offset values (\eqn{n \times m}), that specify a known component to be included in the linear predictor.}
49 | 
50 | \item{method}{estimation method to minimize the negative penalized log-likelihood}
51 | 
52 | \item{sampling}{sub-sampling strategy to use if \code{method = "sgd"}}
53 | 
54 | \item{penalty}{list of penalty parameters (see \code{\link{set.penalty}} for more details)}
55 | 
56 | \item{control.init}{list of control parameters for the initialization (see \code{\link{set.control.init}} for more details)}
57 | 
58 | \item{control.alg}{list of control parameters for the optimization (see \code{\link{set.control.alg}} for more details)}
59 | 
60 | \item{control.cv}{list of control parameters for the cross-validation (see \code{\link{set.control.cv}} for more details)}
61 | }
62 | \value{
63 | Returns a \code{data.frame}  containing the current number of latent factors
64 | in the model (\code{ncomp}), the fold identifier (\code{fold}), the degrees of
65 | freedom, i.e. the number of parameters, of the model (\code{df}), the AIC, BIC
66 | and deviance (respectively, \code{aic}, \code{bic}, \code{dev})
67 | calculated on the train and test sets.
68 | }
69 | \description{
70 | Internal function running a single step of cross-validation for generalized
71 | matrix factorization (GMF) models and calculating some goodness-of-fit measures
72 | on the train and test sets.
73 | }
74 | \keyword{internal}
75 | 


--------------------------------------------------------------------------------
/man/sim.gmf.data.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{sim.gmf.data}
 4 | \alias{sim.gmf.data}
 5 | \title{Simulate non-Gaussian data from a GMF model}
 6 | \usage{
 7 | sim.gmf.data(n = 100, m = 20, ncomp = 5, family = gaussian(), dispersion = 1)
 8 | }
 9 | \arguments{
10 | \item{n}{number of observations}
11 | 
12 | \item{m}{number of variables}
13 | 
14 | \item{ncomp}{rank of the latent matrix factorization}
15 | 
16 | \item{family}{a \code{glm} family (see \code{\link{family}} for more details)}
17 | 
18 | \item{dispersion}{a positive dispersion parameter}
19 | }
20 | \value{
21 | A list containing the following objects:
22 | \itemize{
23 |   \item \code{Y}: simulated response matrix
24 |   \item \code{U}: simulated factor matrix
25 |   \item \code{V}: simulated loading matrix
26 |   \item \code{eta}: linear predictor matrix
27 |   \item \code{mu}: conditional mean matrix
28 |   \item \code{phi}: scalar dispersion parameter
29 |   \item \code{family}: model family
30 |   \item \code{ncomp}: rank of the latent matrix factorization
31 |   \item \code{param}: a list containing time, phase, frequency and amplitude vectors used to generate \code{U}
32 | }
33 | }
34 | \description{
35 | Simulate synthetic non-Gaussian data from a generalized matrix factorization (GMF) model.
36 | }
37 | \details{
38 | The loadings, \code{V}, are independently sampled from a standard normal distribution.
39 | The scores, \code{U}, are simulated according to sinusoidal signals evaluated at different
40 | phases, frequencies and amplitudes. These parameters are randomly sampled from independent
41 | uniform distributions.
42 | }
43 | \examples{
44 | library(sgdGMF)
45 | 
46 | # Set the data dimensions
47 | n = 100; m = 20; d = 5
48 | 
49 | # Generate data using Poisson, Binomial and Gamma models
50 | data_pois = sim.gmf.data(n = n, m = m, ncomp = d, family = poisson())
51 | data_bin = sim.gmf.data(n = n, m = m, ncomp = d, family = binomial())
52 | data_gam = sim.gmf.data(n = n, m = m, ncomp = d, family = Gamma(link = "log"), dispersion = 0.25)
53 | 
54 | # Compare the results
55 | oldpar = par(no.readonly = TRUE)
56 | par(mfrow = c(3,3), mar = c(1,1,3,1))
57 | image(data_pois$Y, axes = FALSE, main = expression(Y[Pois]))
58 | image(data_pois$mu, axes = FALSE, main = expression(mu[Pois]))
59 | image(data_pois$U, axes = FALSE, main = expression(U[Pois]))
60 | image(data_bin$Y, axes = FALSE, main = expression(Y[Bin]))
61 | image(data_bin$mu, axes = FALSE, main = expression(mu[Bin]))
62 | image(data_bin$U, axes = FALSE, main = expression(U[Bin]))
63 | image(data_gam$Y, axes = FALSE, main = expression(Y[Gam]))
64 | image(data_gam$mu, axes = FALSE, main = expression(mu[Gam]))
65 | image(data_gam$U, axes = FALSE, main = expression(U[Gam]))
66 | par(oldpar)
67 | 
68 | }
69 | 


--------------------------------------------------------------------------------
/man/predict.sgdgmf.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sgdGMF-class.R
 3 | \name{predict.sgdgmf}
 4 | \alias{predict.sgdgmf}
 5 | \title{Predict method for GMF models}
 6 | \usage{
 7 | \method{predict}{sgdgmf}(
 8 |   object,
 9 |   ...,
10 |   newY = NULL,
11 |   newX = NULL,
12 |   type = c("link", "response", "terms", "coef"),
13 |   parallel = FALSE,
14 |   nthreads = 1
15 | )
16 | }
17 | \arguments{
18 | \item{object}{an object of class \code{sgdgmf}}
19 | 
20 | \item{...}{further arguments passed to or from other methods}
21 | 
22 | \item{newY}{optionally, a matrix of new response variable}
23 | 
24 | \item{newX}{optionally, a matrix of new covariate values}
25 | 
26 | \item{type}{the type of prediction which should be returned}
27 | 
28 | \item{parallel}{if \code{TRUE}, allows for parallel computing using the package \code{foreach}}
29 | 
30 | \item{nthreads}{number of cores to be used in parallel (only if \code{parallel=TRUE})}
31 | }
32 | \value{
33 | If \code{type="link"} or \code{typr="response"}, a matrix of predictions.
34 | If \code{type="terms"}, a list of matrices containing the fields \code{XB}, \code{AZ} and \code{UV}.
35 | If \code{type="coef"}, a list of matrices containing the field \code{B}, \code{A}, \code{U} and \code{V}.
36 | }
37 | \description{
38 | Computes the predictions of a GMF model. Out-of-sample predictions for a new
39 | set of responses and covariates are computed via MLE, by keeping fixed the values
40 | of the estimated \code{B} and \code{V} and maximizing the likelihood with respect
41 | to \code{A} and \code{U}.
42 | }
43 | \details{
44 | If \code{newY} and \code{newX} are omitted, the predictions are based on the data
45 | used for the fit. In that case, the predictions corresponds to the fitted values.
46 | If \code{newY} and \code{newX} are provided, a corresponding set of \code{A} and
47 | \code{U} are estimated via maximum likelihood using the \code{glm.fit} function.
48 | By doing so, \code{B} and \code{V} are kept fixed.
49 | }
50 | \examples{
51 | # Load the sgdGMF package
52 | library(sgdGMF)
53 | 
54 | # Generate data from a Poisson model
55 | data = sim.gmf.data(n = 120, m = 20, ncomp = 5, family = poisson())
56 | train = sample(1:120, size = 100)
57 | test = setdiff(1:120, train)
58 | 
59 | Y = data$Y[train, ]
60 | newY = data$Y[test, ]
61 | 
62 | # Fit a GMF model with 3 latent factors
63 | gmf = sgdgmf.fit(Y, ncomp = 3, family = poisson())
64 | 
65 | # Get the fitted values of a GMF model
66 | str(predict(gmf)) # returns the overall fitted values in link scale
67 | str(predict(gmf, type = "response")) # returns the overall fitted values in response scale
68 | str(predict(gmf, type = "terms")) # returns the partial fitted values in link scale
69 | str(predict(gmf, newY = newY)) # returns the predictions for the new set of responses
70 | 
71 | }
72 | 


--------------------------------------------------------------------------------
/tests/testcpp/test-misc.cpp:
--------------------------------------------------------------------------------
 1 | // test-misc.cpp
 2 | // author: Cristian Castiglione
 3 | // creation: 01/10/2023
 4 | // last change: 13/10/2023
 5 | 
 6 | #include "misc.h"
 7 | 
 8 | using namespace glm;
 9 | 
10 | void cpp_print_link_family (const std::unique_ptr<Family> & family) {
11 |     Rcpp::Rcout << "Family: " << family->getfamily() << "\n";
12 |     Rcpp::Rcout << "Link: " << family->getlink() << "\n";
13 |     Rcpp::Rcout << "Mu: " << arma::vec{0.25, 0.5, 0.75} << "\n";
14 |     Rcpp::Rcout << "Eta: " << family->linkfun(arma::vec{0.25, 0.5, 0.75}) << "\n";
15 | }
16 | 
17 | //' @keywords internal
18 | // [[Rcpp::export("cpp.make.link.family")]]
19 | void cpp_make_link_family (const std::string & familyname, const std::string & linkname) {
20 |     std::unique_ptr<Family> family = make_family(familyname, linkname);
21 |     cpp_print_link_family(family);
22 | }
23 | 
24 | //' @keywords internal
25 | // [[Rcpp::export("cpp.get.data.bounds")]]
26 | Rcpp::List cpp_get_data_bounds (
27 |     const double & eps, const double & ymin, const double & ymax, 
28 |     const std::string & familyname, const std::string & linkname
29 | ) {
30 |     std::unique_ptr<Family> family = make_family(familyname, linkname);
31 | 
32 |     double mulo, muup, etalo, etaup;
33 |     set_data_bounds(mulo, muup, etalo, etaup, eps, ymin, ymax, family);
34 | 
35 |     Rcpp::List out;
36 |     out["family"] = family->getfamily();
37 |     out["link"] = family->getlink();
38 |     out["ylim"] = arma::vec{ymin, ymax};
39 |     out["mulim"] = arma::vec{mulo, muup};
40 |     out["etalim"] = arma::vec{etalo, etaup};
41 | 
42 |     return out;
43 | }
44 | 
45 | //' @keywords internal
46 | // [[Rcpp::export("cpp.get.uv.penalty")]]
47 | Rcpp::List cpp_get_uv_penalty (
48 |     const arma::vec & pen, 
49 |     const int & p, const int & q, const int & d
50 | ) {
51 |     arma::vec penu(p+q+d), penv(p+q+d);
52 |     set_uv_penalty(penu, penv, pen, p, q, d);
53 |     
54 |     Rcpp::List out;
55 |     out["penu"] = penu;
56 |     out["penv"] = penv;
57 | 
58 |     return out;
59 | }
60 | 
61 | //' @keywords internal
62 | // [[Rcpp::export("cpp.get.uv.indices")]]
63 | Rcpp::List cpp_get_uv_indices (
64 |     const int & p, const int & q, const int & d
65 | ) {
66 |     arma::uvec idu, idv;
67 |     set_uv_indices(idu, idv, p, q, d);
68 | 
69 |     Rcpp::List out;
70 |     out["idu"] = idu;
71 |     out["idv"] = idv;
72 | 
73 |     return out;
74 | }
75 | 
76 | //' @keywords internal
77 | // [[Rcpp::export("cpp.sample.minibatch")]]
78 | std::list<arma::uvec> cpp_sample_minibatch (
79 |     const int & n, const int & size, const bool & randomize
80 | ) {
81 |     return sample_chunks(n, size, randomize);
82 | }
83 | 
84 | //' @keywords internal
85 | // [[Rcpp::export("cpp.select.minibatch")]]
86 | int cpp_select_minibatch (const int & iter, const int & nchunks) {
87 |     return select_chunk(iter, nchunks);
88 | }


--------------------------------------------------------------------------------
/src/variance.cpp:
--------------------------------------------------------------------------------
 1 | // variance.cpp
 2 | // author: Cristian Castiglione
 3 | // creation: 08/11/2023
 4 | // last change: 21/11/2024
 5 | 
 6 | #include "variance.h"
 7 | 
 8 | using namespace glm;
 9 | 
10 | // Constant variance
11 | bool Constant::validmu (const arma::mat & mu) {return true;}
12 | arma::mat Constant::initfun (const arma::mat & y) {return y;}
13 | arma::mat Constant::varfun (const arma::mat & mu, const double & phi) {return arma::ones(arma::size(mu));}
14 | arma::mat Constant::devfun (const arma::mat & y, const arma::mat & mu, const double & phi) {
15 |     return arma::square(y - mu);
16 | }
17 | 
18 | // Linear variance
19 | bool Linear::validmu (const arma::mat & mu) {return utils::all(mu > 0);}
20 | arma::mat Linear::initfun (const arma::mat & y) {return arma::clamp(y, 0.1, infty);}
21 | arma::mat Linear::varfun (const arma::mat & mu, const double & phi) {return mu;}
22 | arma::mat Linear::devfun (const arma::mat & y, const arma::mat & mu, const double & phi) {
23 |     return 2 * (utils::xlogx(y) - y % arma::log(mu) - (y - mu));
24 | }
25 | 
26 | // Squared variance
27 | bool Squared::validmu (const arma::mat & mu) {return utils::all(mu > 0);}
28 | arma::mat Squared::initfun (const arma::mat & y) {return arma::clamp(y, 0.1, infty);}
29 | arma::mat Squared::varfun (const arma::mat & mu, const double & phi) {return mu % mu;}
30 | arma::mat Squared::devfun (const arma::mat & y, const arma::mat & mu, const double & phi) {
31 |     return - 2 * (arma::log(y / mu) - (y - mu) / mu);
32 | }
33 | 
34 | // Cubic variance
35 | bool Cubic::validmu (const arma::mat & mu) {return utils::all(mu > 0);}
36 | arma::mat Cubic::initfun (const arma::mat & y) {return arma::clamp(y, 0.1, infty);}
37 | arma::mat Cubic::varfun (const arma::mat & mu, const double & phi) {return mu % mu % mu;}
38 | arma::mat Cubic::devfun (const arma::mat & y, const arma::mat & mu, const double & phi) {
39 |     return arma::square(y - mu) / (y % mu % mu);
40 | }
41 | 
42 | // cSquared variance
43 | bool cSquared::validmu (const arma::mat & mu) {return utils::all(mu > 0) && utils::all(mu < 1);}
44 | arma::mat cSquared::initfun (const arma::mat & y) {return 0.90 * (y - 0.5) + 0.5;}
45 | arma::mat cSquared::varfun (const arma::mat & mu, const double & phi) {return mu % (1 - mu);}
46 | arma::mat cSquared::devfun (const arma::mat & y, const arma::mat & mu, const double & phi) {
47 |     return - 2 * (y % arma::log(mu) + (1 - y) % arma::log1p(-mu));
48 | }
49 | 
50 | // Negative-Binomial variance
51 | bool NBVariance::validmu (const arma::mat & mu) {return utils::all(mu > 0);}
52 | arma::mat NBVariance::initfun (const arma::mat & y) {return arma::clamp(y, 0.1, infty);}
53 | arma::mat NBVariance::varfun (const arma::mat & mu, const double & phi) {return mu % (1 + mu / phi);}
54 | arma::mat NBVariance::devfun (const arma::mat & y, const arma::mat & mu, const double & phi) {
55 |     return 2 * (utils::xlogx(y) - y % arma::log(mu) - (y + phi) % (arma::log(y + phi) - arma::log(mu + phi)));
56 | }
57 | 


--------------------------------------------------------------------------------
/tests/testthat/test-control.R:
--------------------------------------------------------------------------------
 1 | # file: test-control.R
 2 | # author: Cristian Castiglione
 3 | # creation: 05/02/2024
 4 | # last change: 04/10/2024
 5 | 
 6 | testthat::test_that("Set AIRWLS control parameters", {
 7 |   # Empty call
 8 |   testthat::expect_true(is.list(set.control.airwls()))
 9 |   testthat::expect_true(is.list(set.control.airwls(maxiter = 200, stepsize = 0.5)))
10 |   # Wrongly parametrized call I: right parameter, but wrong value
11 |   testthat::expect_warning(set.control.airwls(stepsize = -1))
12 |   testthat::expect_warning(set.control.airwls(stepsize = TRUE))
13 |   # Wrongly parametrixed call II: inexistent parameter
14 |   testthat::expect_error(set.control.airwls(foo = TRUE))
15 | })
16 | 
17 | testthat::test_that("Set Newton control parameters", {
18 |   # Empty call
19 |   testthat::expect_true(is.list(set.control.newton()))
20 |   testthat::expect_true(is.list(set.control.newton(maxiter = 200, stepsize = 0.5)))
21 |   # Wrongly parametrized call I: right parameter, but wrong value
22 |   testthat::expect_warning(set.control.newton(stepsize = -1))
23 |   testthat::expect_warning(set.control.newton(stepsize = TRUE))
24 |   # Wrongly parametrixed call II: inexistent parameter
25 |   testthat::expect_error(set.control.newton(foo = TRUE))
26 | })
27 | 
28 | testthat::test_that("Set C-SGD control parameters", {
29 |   # Empty call
30 |   testthat::expect_true(is.list(set.control.coord.sgd()))
31 |   testthat::expect_true(is.list(set.control.coord.sgd(maxiter = 500, rate0 = 0.5)))
32 |   # Wrongly parametrized call I: right parameter, but wrong value
33 |   testthat::expect_warning(set.control.coord.sgd(rate0 = -1))
34 |   testthat::expect_warning(set.control.coord.sgd(rate0 = TRUE))
35 |   # Wrongly parametrixed call II: inexistent parameter
36 |   testthat::expect_error(set.control.coord.sgd(foo = TRUE))
37 | })
38 | 
39 | testthat::test_that("Set B-SGD control parameters", {
40 |   # Empty call
41 |   testthat::expect_true(is.list(set.control.block.sgd()))
42 |   testthat::expect_true(is.list(set.control.block.sgd(maxiter = 500, rate0 = 0.5)))
43 |   # Wrongly parametrized call I: right parameter, but wrong value
44 |   testthat::expect_warning(set.control.block.sgd(rate0 = -1))
45 |   testthat::expect_warning(set.control.block.sgd(rate0 = TRUE))
46 |   # Wrongly parametrixed call II: inexistent parameter
47 |   testthat::expect_error(set.control.block.sgd(foo = TRUE))
48 | })
49 | 
50 | testthat::test_that("Set generic control parameters", {
51 |   ctr.airwls = set.control.alg(method = "airwls", control = list())
52 |   ctr.newton = set.control.alg(method = "newton", control = list())
53 |   ctr.csgd = set.control.alg(method = "sgd", sampling = "coord", control = list())
54 |   ctr.bsgd = set.control.alg(method = "sgd", sampling = "block", control = list())
55 | 
56 |   testthat::expect_true(is.list(ctr.airwls))
57 |   testthat::expect_true(is.list(ctr.newton))
58 |   testthat::expect_true(is.list(ctr.csgd))
59 |   testthat::expect_true(is.list(ctr.bsgd))
60 | })
61 | 


--------------------------------------------------------------------------------
/src/variance.h:
--------------------------------------------------------------------------------
 1 | // variance.h
 2 | // author: Cristian Castiglione
 3 | // creation: 08/11/2023
 4 | // last change: 21/11/2024
 5 | 
 6 | #ifndef VARIANCE_H
 7 | #define VARIANCE_H
 8 | 
 9 | #include <RcppArmadillo.h>
10 | #include "utils.h"
11 | 
12 | namespace glm {
13 | 
14 | class Variance {
15 |     public:
16 |         std::string varf = "Variance";
17 |         virtual bool validmu (const arma::mat & mu) = 0;
18 |         virtual arma::mat varfun (const arma::mat & mu, const double & phi) = 0;
19 |         virtual arma::mat initfun (const arma::mat & y) = 0;
20 |         virtual arma::mat devfun (const arma::mat & y, const arma::mat & mu, const double & phi) = 0;
21 |         virtual ~Variance () {}
22 | };
23 | 
24 | class Constant : public Variance {
25 |     public:
26 |         bool validmu (const arma::mat & mu);
27 |         arma::mat initfun (const arma::mat & y);
28 |         arma::mat varfun (const arma::mat & mu, const double & phi);
29 |         arma::mat devfun (const arma::mat & y, const arma::mat & mu, const double & phi);
30 |         Constant () {this->varf = "const";}
31 | };
32 | 
33 | class Linear : public Variance {
34 |     public:
35 |         bool validmu (const arma::mat & mu);
36 |         arma::mat initfun (const arma::mat & y);
37 |         arma::mat varfun (const arma::mat & mu, const double & phi);
38 |         arma::mat devfun (const arma::mat & y, const arma::mat & mu, const double & phi);
39 |         Linear () {this->varf = "mu";}
40 | };
41 | 
42 | class Squared : public Variance {
43 |     public:
44 |         bool validmu (const arma::mat & mu);
45 |         arma::mat initfun (const arma::mat & y);
46 |         arma::mat varfun (const arma::mat & mu, const double & phi);
47 |         arma::mat devfun (const arma::mat & y, const arma::mat & mu, const double & phi);
48 |         Squared () {this->varf = "mu^2";}
49 | };
50 | 
51 | class Cubic : public Variance {
52 |     public:
53 |         bool validmu (const arma::mat & mu);
54 |         arma::mat initfun (const arma::mat & y);
55 |         arma::mat varfun (const arma::mat & mu, const double & phi);
56 |         arma::mat devfun (const arma::mat & y, const arma::mat & mu, const double & phi);
57 |         Cubic () {this->varf = "mu^3";}
58 | };
59 | 
60 | class cSquared : public Variance {
61 |     public:
62 |         bool validmu (const arma::mat & mu);
63 |         arma::mat initfun (const arma::mat & y);
64 |         arma::mat varfun (const arma::mat & mu, const double & phi);
65 |         arma::mat devfun (const arma::mat & y, const arma::mat & mu, const double & phi);
66 |         cSquared () {this->varf = "mu(1-mu)";}
67 | };
68 | 
69 | class NBVariance : public Variance {
70 |     public:
71 |         bool validmu (const arma::mat & mu);
72 |         arma::mat initfun (const arma::mat & y);
73 |         arma::mat varfun (const arma::mat & mu, const double & phi);
74 |         arma::mat devfun (const arma::mat & y, const arma::mat & mu, const double & phi);
75 |         NBVariance () {this->varf = "mu(1+t*mu)";}
76 | };
77 | 
78 | }
79 | 
80 | #endif


--------------------------------------------------------------------------------
/man/cpp.fit.block.sgd.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/RcppExports.R
  3 | \name{cpp.fit.block.sgd}
  4 | \alias{cpp.fit.block.sgd}
  5 | \title{Fit a GMF model using the adaptive SGD with block-wise minibatch subsampling}
  6 | \usage{
  7 | cpp.fit.block.sgd(
  8 |   Y,
  9 |   X,
 10 |   B,
 11 |   A,
 12 |   Z,
 13 |   U,
 14 |   V,
 15 |   O,
 16 |   W,
 17 |   familyname,
 18 |   linkname,
 19 |   varfname,
 20 |   ncomp,
 21 |   lambda,
 22 |   maxiter = 1000L,
 23 |   eps = 0.01,
 24 |   nafill = 10L,
 25 |   tol = 1e-08,
 26 |   size1 = 100L,
 27 |   size2 = 100L,
 28 |   burn = 0.75,
 29 |   rate0 = 0.01,
 30 |   decay = 0.01,
 31 |   damping = 0.001,
 32 |   rate1 = 0.95,
 33 |   rate2 = 0.99,
 34 |   parallel = FALSE,
 35 |   nthreads = 1L,
 36 |   verbose = TRUE,
 37 |   frequency = 250L,
 38 |   progress = FALSE
 39 | )
 40 | }
 41 | \arguments{
 42 | \item{Y}{matrix of responses (\eqn{n \times m})}
 43 | 
 44 | \item{X}{matrix of row fixed effects (\eqn{n \times p})}
 45 | 
 46 | \item{B}{initial row-effect matrix (\eqn{n \times p})}
 47 | 
 48 | \item{A}{initial column-effect matrix (\eqn{n \times q})}
 49 | 
 50 | \item{Z}{matrix of column fixed effects (\eqn{m \times q})}
 51 | 
 52 | \item{U}{initial factor matrix (\eqn{n \times d})}
 53 | 
 54 | \item{V}{initial loading matrix (\eqn{m \times d})}
 55 | 
 56 | \item{O}{matrix of constant offset (\eqn{n \times m})}
 57 | 
 58 | \item{W}{matrix of constant weights (\eqn{n \times m})}
 59 | 
 60 | \item{familyname}{a \code{glm} model family name}
 61 | 
 62 | \item{linkname}{a \code{glm} link function name}
 63 | 
 64 | \item{varfname}{variance function name}
 65 | 
 66 | \item{ncomp}{rank of the latent matrix factorization}
 67 | 
 68 | \item{lambda}{penalization parameters}
 69 | 
 70 | \item{maxiter}{maximum number of iterations}
 71 | 
 72 | \item{eps}{shrinkage factor for extreme predictions}
 73 | 
 74 | \item{nafill}{how often the missing values are updated}
 75 | 
 76 | \item{tol}{tolerance threshold for the stopping criterion}
 77 | 
 78 | \item{size1}{row-minibatch dimension}
 79 | 
 80 | \item{size2}{column-minibatch dimension}
 81 | 
 82 | \item{burn}{burn-in period in which the learning late is not decreased}
 83 | 
 84 | \item{rate0}{initial learning rate}
 85 | 
 86 | \item{decay}{decay rate of the learning rate}
 87 | 
 88 | \item{damping}{diagonal dumping factor for the Hessian matrix}
 89 | 
 90 | \item{rate1}{decay rate of the first moment estimate of the gradient}
 91 | 
 92 | \item{rate2}{decay rate of the second moment estimate of the gradient}
 93 | 
 94 | \item{parallel}{if \code{TRUE}, allows for parallel computing}
 95 | 
 96 | \item{nthreads}{number of cores to be used in parallel}
 97 | 
 98 | \item{verbose}{if \code{TRUE}, print the optimization status}
 99 | 
100 | \item{frequency}{how often the optimization status is printed}
101 | 
102 | \item{progress}{if \code{TRUE}, print an progress bar}
103 | }
104 | \description{
105 | Fit a GMF model using the adaptive SGD with block-wise minibatch subsampling
106 | }
107 | \keyword{internal}
108 | 


--------------------------------------------------------------------------------
/man/residuals.initgmf.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/initGMF-class.R
 3 | \name{residuals.initgmf}
 4 | \alias{residuals.initgmf}
 5 | \alias{resid.initgmf}
 6 | \title{Extract the residuals of an initialized GMF model}
 7 | \usage{
 8 | \method{residuals}{initgmf}(
 9 |   object,
10 |   ...,
11 |   type = c("deviance", "pearson", "working", "response", "link"),
12 |   partial = FALSE,
13 |   normalize = FALSE,
14 |   fillna = FALSE,
15 |   spectrum = FALSE,
16 |   ncomp = 50
17 | )
18 | 
19 | \method{resid}{initgmf}(
20 |   object,
21 |   ...,
22 |   type = c("deviance", "pearson", "working", "response", "link"),
23 |   partial = FALSE,
24 |   normalize = FALSE,
25 |   fillna = FALSE,
26 |   spectrum = FALSE,
27 |   ncomp = 50
28 | )
29 | }
30 | \arguments{
31 | \item{object}{an object of class \code{initgmf}}
32 | 
33 | \item{...}{further arguments passed to or from other methods}
34 | 
35 | \item{type}{the type of residuals which should be returned}
36 | 
37 | \item{partial}{if \code{TRUE}, computes the residuals excluding the matrix factorization from the linear predictor}
38 | 
39 | \item{normalize}{if \code{TRUE}, standardize the residuals column-by-column}
40 | 
41 | \item{fillna}{if \code{TRUE}, fills \code{NA} values column-by-column}
42 | 
43 | \item{spectrum}{if \code{TRUE}, returns the eigenvalues of the residual covariance matrix}
44 | 
45 | \item{ncomp}{number of eigenvalues to be calculated (only if \code{spectrum=TRUE})}
46 | }
47 | \value{
48 | If \code{spectrum=FALSE}, a matrix containing the selected residuals.
49 | If \code{spectrum=TRUE}, a list containing the residuals (\code{res}), the first \code{ncomp}
50 | eigenvalues of the residual covariance matrix, say (\code{lambdas}), the variance explained by the first
51 | \code{ncomp} principal component of the residuals (\code{explained.var}), the variance not
52 | explained by the first \code{ncomp} principal component of the residuals (\code{residual.var}),
53 | the total variance of the residuals (\code{total.var}).
54 | }
55 | \description{
56 | Extract the residuals of an initialized GMF model and, if required, compute
57 | the eigenvalues of the residuals covariance/correlation matrix.
58 | Moreover, if required, return the partial residual of the model obtained by
59 | excluding the matrix decomposition from the linear predictor.
60 | }
61 | \examples{
62 | # Load the sgdGMF package
63 | library(sgdGMF)
64 | 
65 | # Generate data from a Poisson model
66 | data = sim.gmf.data(n = 100, m = 20, ncomp = 5, family = poisson())
67 | 
68 | # Fit a GMF model with 3 latent factors
69 | init = sgdgmf.init(data$Y, ncomp = 3, family = poisson())
70 | 
71 | # Get the deviance residuals of a GMF model
72 | str(residuals(init)) # returns the overall deviance residuals
73 | str(residuals(init, partial = TRUE)) # returns the partial residuals
74 | str(residuals(init, spectrum = TRUE)) # returns the eigenvalues of the residual var-cov matrix
75 | 
76 | }
77 | \seealso{
78 | \code{\link{residuals.sgdgmf}} and \code{\link{resid.sgdgmf}} for more details on the residual computation.
79 | }
80 | 


--------------------------------------------------------------------------------
/man/cpp.fit.coord.sgd.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/RcppExports.R
  3 | \name{cpp.fit.coord.sgd}
  4 | \alias{cpp.fit.coord.sgd}
  5 | \title{Fit a GMF model using the adaptive SGD with coordinate-wise minibatch subsampling algorithm}
  6 | \usage{
  7 | cpp.fit.coord.sgd(
  8 |   Y,
  9 |   X,
 10 |   B,
 11 |   A,
 12 |   Z,
 13 |   U,
 14 |   V,
 15 |   O,
 16 |   W,
 17 |   familyname,
 18 |   linkname,
 19 |   varfname,
 20 |   ncomp,
 21 |   lambda,
 22 |   maxiter = 1000L,
 23 |   eps = 0.01,
 24 |   nafill = 10L,
 25 |   tol = 1e-08,
 26 |   size1 = 100L,
 27 |   size2 = 100L,
 28 |   burn = 0.75,
 29 |   rate0 = 0.01,
 30 |   decay = 0.01,
 31 |   damping = 0.001,
 32 |   rate1 = 0.95,
 33 |   rate2 = 0.99,
 34 |   parallel = FALSE,
 35 |   nthreads = 1L,
 36 |   verbose = TRUE,
 37 |   frequency = 250L,
 38 |   progress = FALSE
 39 | )
 40 | }
 41 | \arguments{
 42 | \item{Y}{matrix of responses (\eqn{n \times m})}
 43 | 
 44 | \item{X}{matrix of row fixed effects (\eqn{n \times p})}
 45 | 
 46 | \item{B}{initial row-effect matrix (\eqn{n \times p})}
 47 | 
 48 | \item{A}{initial column-effect matrix (\eqn{n \times q})}
 49 | 
 50 | \item{Z}{matrix of column fixed effects (\eqn{m \times q})}
 51 | 
 52 | \item{U}{initial factor matrix (\eqn{n \times d})}
 53 | 
 54 | \item{V}{initial loading matrix (\eqn{m \times d})}
 55 | 
 56 | \item{O}{matrix of constant offset (\eqn{n \times m})}
 57 | 
 58 | \item{W}{matrix of constant weights (\eqn{n \times m})}
 59 | 
 60 | \item{familyname}{a \code{glm} model family name}
 61 | 
 62 | \item{linkname}{a \code{glm} link function name}
 63 | 
 64 | \item{varfname}{variance function name}
 65 | 
 66 | \item{ncomp}{rank of the latent matrix factorization}
 67 | 
 68 | \item{lambda}{penalization parameters}
 69 | 
 70 | \item{maxiter}{maximum number of iterations}
 71 | 
 72 | \item{eps}{shrinkage factor for extreme predictions}
 73 | 
 74 | \item{nafill}{how often the missing values are updated}
 75 | 
 76 | \item{tol}{tolerance threshold for the stopping criterion}
 77 | 
 78 | \item{size1}{row-minibatch dimension}
 79 | 
 80 | \item{size2}{column-minibatch dimension}
 81 | 
 82 | \item{burn}{burn-in period in which the learning late is not decreased}
 83 | 
 84 | \item{rate0}{initial learning rate}
 85 | 
 86 | \item{decay}{decay rate of the learning rate}
 87 | 
 88 | \item{damping}{diagonal dumping factor for the Hessian matrix}
 89 | 
 90 | \item{rate1}{decay rate of the first moment estimate of the gradient}
 91 | 
 92 | \item{rate2}{decay rate of the second moment estimate of the gradient}
 93 | 
 94 | \item{parallel}{if \code{TRUE}, allows for parallel computing}
 95 | 
 96 | \item{nthreads}{number of cores to be used in parallel}
 97 | 
 98 | \item{verbose}{if \code{TRUE}, print the optimization status}
 99 | 
100 | \item{frequency}{how often the optimization status is printed}
101 | 
102 | \item{progress}{if \code{TRUE}, print an progress bar}
103 | }
104 | \description{
105 | Fit a GMF model using the adaptive SGD with coordinate-wise minibatch subsampling algorithm
106 | }
107 | \keyword{internal}
108 | 


--------------------------------------------------------------------------------
/man/cpp.fit.random.block.sgd.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/RcppExports.R
  3 | \name{cpp.fit.random.block.sgd}
  4 | \alias{cpp.fit.random.block.sgd}
  5 | \title{Fit a GMF model using the adaptive SGD with block-wise minibatch subsampling}
  6 | \usage{
  7 | cpp.fit.random.block.sgd(
  8 |   Y,
  9 |   X,
 10 |   B,
 11 |   A,
 12 |   Z,
 13 |   U,
 14 |   V,
 15 |   O,
 16 |   W,
 17 |   familyname,
 18 |   linkname,
 19 |   varfname,
 20 |   ncomp,
 21 |   lambda,
 22 |   maxiter = 1000L,
 23 |   eps = 0.01,
 24 |   nafill = 10L,
 25 |   tol = 1e-08,
 26 |   size1 = 100L,
 27 |   size2 = 100L,
 28 |   burn = 0.75,
 29 |   rate0 = 0.01,
 30 |   decay = 0.01,
 31 |   damping = 0.001,
 32 |   rate1 = 0.95,
 33 |   rate2 = 0.99,
 34 |   parallel = FALSE,
 35 |   nthreads = 1L,
 36 |   verbose = TRUE,
 37 |   frequency = 250L,
 38 |   progress = FALSE
 39 | )
 40 | }
 41 | \arguments{
 42 | \item{Y}{matrix of responses (\eqn{n \times m})}
 43 | 
 44 | \item{X}{matrix of row fixed effects (\eqn{n \times p})}
 45 | 
 46 | \item{B}{initial row-effect matrix (\eqn{n \times p})}
 47 | 
 48 | \item{A}{initial column-effect matrix (\eqn{n \times q})}
 49 | 
 50 | \item{Z}{matrix of column fixed effects (\eqn{m \times q})}
 51 | 
 52 | \item{U}{initial factor matrix (\eqn{n \times d})}
 53 | 
 54 | \item{V}{initial loading matrix (\eqn{m \times d})}
 55 | 
 56 | \item{O}{matrix of constant offset (\eqn{n \times m})}
 57 | 
 58 | \item{W}{matrix of constant weights (\eqn{n \times m})}
 59 | 
 60 | \item{familyname}{a \code{glm} model family name}
 61 | 
 62 | \item{linkname}{a \code{glm} link function name}
 63 | 
 64 | \item{varfname}{variance function name}
 65 | 
 66 | \item{ncomp}{rank of the latent matrix factorization}
 67 | 
 68 | \item{lambda}{penalization parameters}
 69 | 
 70 | \item{maxiter}{maximum number of iterations}
 71 | 
 72 | \item{eps}{shrinkage factor for extreme predictions}
 73 | 
 74 | \item{nafill}{how often the missing values are updated}
 75 | 
 76 | \item{tol}{tolerance threshold for the stopping criterion}
 77 | 
 78 | \item{size1}{row-minibatch dimension}
 79 | 
 80 | \item{size2}{column-minibatch dimension}
 81 | 
 82 | \item{burn}{burn-in period in which the learning late is not decreased}
 83 | 
 84 | \item{rate0}{initial learning rate}
 85 | 
 86 | \item{decay}{decay rate of the learning rate}
 87 | 
 88 | \item{damping}{diagonal dumping factor for the Hessian matrix}
 89 | 
 90 | \item{rate1}{decay rate of the first moment estimate of the gradient}
 91 | 
 92 | \item{rate2}{decay rate of the second moment estimate of the gradient}
 93 | 
 94 | \item{parallel}{if \code{TRUE}, allows for parallel computing}
 95 | 
 96 | \item{nthreads}{number of cores to be used in parallel}
 97 | 
 98 | \item{verbose}{if \code{TRUE}, print the optimization status}
 99 | 
100 | \item{frequency}{how often the optimization status is printed}
101 | 
102 | \item{progress}{if \code{TRUE}, print an progress bar}
103 | }
104 | \description{
105 | Fit a GMF model using the adaptive SGD with block-wise minibatch subsampling
106 | }
107 | \keyword{internal}
108 | 


--------------------------------------------------------------------------------
/src/misc.h:
--------------------------------------------------------------------------------
 1 | // misc.h
 2 | // author: Cristian Castiglione
 3 | // creation: 30/09/2023
 4 | // last change: 16/11/2024
 5 | 
 6 | #include <RcppArmadillo.h>
 7 | #include <time.h>
 8 | #include "utils.h"
 9 | #include "link.h"
10 | #include "variance.h"
11 | #include "family.h"
12 | #include <memory>
13 | 
14 | using namespace glm;
15 | 
16 | // Create a dynamic pointer to an appropriate link/family class starting  
17 | // from a string identifying the correct link/family to chose
18 | std::unique_ptr<Link> make_link (const std::string & linkname);
19 | std::unique_ptr<Variance> make_varf (const std::string & varname);
20 | std::unique_ptr<Family> make_family (
21 |     const std::string & familyname, 
22 |     const std::string & linkname, 
23 |     const std::string & varfname);
24 | 
25 | // Set the lower and upper bounds for mu and eta based on the observed data range
26 | // so as to avoid to produce prediction with too extreme values 
27 | // template<class F, class L>
28 | void set_data_bounds (
29 |     double & mulo, double & muup, double & etalo, double & etaup, 
30 |     const double & eps, const double & ymin, const double & ymax, 
31 |     const std::unique_ptr<Family> & family);
32 | 
33 | // Set the linear predictor trimming the extreme values
34 | void set_eta (
35 |     arma::mat & eta, const arma::mat & offset,
36 |     const arma::mat & u, const arma::mat & v, 
37 |     const double & etamin, const double & etamax);
38 | 
39 | // Get the linear predictor trimming the extreme values
40 | arma::mat get_eta (
41 |     const arma::mat & offset,
42 |     const arma::mat & u, const arma::mat & v, 
43 |     const double & etamin, const double & etamax);
44 | 
45 | // Set the augmented u and v matrices merging by column the fixed and latent effect matrices
46 | void set_uv_matrices (
47 |     arma::mat & u, arma::mat & v,
48 |     const arma::mat & A, const arma::mat & Z,
49 |     const arma::mat & X, const arma::mat & B,
50 |     const arma::mat & U, const arma::mat & V);
51 | 
52 | // Set the indices of the parameters to be update along the optimization in u and v
53 | void set_uv_indices (
54 |     arma::uvec & idu, arma::uvec & idv, 
55 |     const int & p, const int & q, const int & d);
56 | 
57 | // Set the vectors of penalty parameters to be multiplied columnwise to u and v
58 | void set_uv_penalty (
59 |     arma::vec & penu, arma::vec & penv, const arma::vec & pen,
60 |     const int & p, const int & q, const int & d);
61 | 
62 | // Convert the cpu clock-time in the elapsed execution time (seconds)
63 | double exetime (const clock_t & start, const clock_t & end);
64 | 
65 | // Print the optimization state
66 | void print_state (
67 |     const int & iter, const double & div, 
68 |     const double & change, const double & time);
69 | 
70 | // Print the optimization state
71 | void print_state (
72 |     const int & iter, const double & div, 
73 |     const double & change, const double & time,
74 |     const double & scanned);
75 | 
76 | // Divide the data indices in random chunks
77 | std::list<arma::uvec> sample_chunks (
78 |     const int & n, const int & size, const bool & randomize);
79 | 
80 | // Select the appropriate chunk for the current iteration
81 | int select_chunk (const int & iter, const int & nchunks);


--------------------------------------------------------------------------------
/tests/testthat/test-init.R:
--------------------------------------------------------------------------------
 1 | # file: test-init.R
 2 | # author: Cristian Castiglione
 3 | # creation: 05/02/2024
 4 | # last change: 04/10/2024
 5 | 
 6 | testthat::test_that("OLS initialization", {
 7 |   n = 100; m = 10; d = 5; f = Gamma(link = "log")
 8 |   dat = sim.gmf.data(n = n, m = m, ncomp = d, family = f, dispersion = 0.5)
 9 |   init = sgdgmf.init.ols(dat$Y, ncomp = d, family = f)
10 | 
11 |   # Output class
12 |   testthat::expect_true(is.list(init))
13 |   # Sub-output classes
14 |   testthat::expect_true(is.matrix(init$U) && is.numeric(init$U))
15 |   testthat::expect_true(is.matrix(init$V) && is.numeric(init$V))
16 |   testthat::expect_true(is.matrix(init$A) && is.numeric(init$A))
17 |   testthat::expect_true(is.matrix(init$B) && is.numeric(init$B))
18 |   # Output dimensions
19 |   testthat::expect_equal(dim(init$U), c(n,d))
20 |   testthat::expect_equal(dim(init$V), c(m,d))
21 |   testthat::expect_equal(dim(init$A), c(n,1))
22 |   testthat::expect_equal(dim(init$B), c(m,1))
23 | })
24 | 
25 | testthat::test_that("GLM initialization", {
26 |   n = 100; m = 10; d = 5; f = Gamma(link = "log")
27 |   dat = sim.gmf.data(n = n, m = m, ncomp = d, family = f, dispersion = 0.5)
28 |   init = sgdgmf.init.glm(dat$Y, ncomp = d, family = f)
29 | 
30 |   # Output class
31 |   testthat::expect_true(is.list(init))
32 |   # Sub-output classes
33 |   testthat::expect_true(is.matrix(init$U) && is.numeric(init$U))
34 |   testthat::expect_true(is.matrix(init$V) && is.numeric(init$V))
35 |   testthat::expect_true(is.matrix(init$A) && is.numeric(init$A))
36 |   testthat::expect_true(is.matrix(init$B) && is.numeric(init$B))
37 |   # Output dimensions
38 |   testthat::expect_equal(dim(init$U), c(n,d))
39 |   testthat::expect_equal(dim(init$V), c(m,d))
40 |   testthat::expect_equal(dim(init$A), c(n,1))
41 |   testthat::expect_equal(dim(init$B), c(m,1))
42 | })
43 | 
44 | testthat::test_that("Random initialization", {
45 |   n = 100; m = 10; d = 5; f = Gamma(link = "log")
46 |   dat = sim.gmf.data(n = n, m = m, ncomp = d, family = f, dispersion = 0.5)
47 |   init = sgdgmf.init.random(dat$Y, ncomp = d, family = f)
48 | 
49 |   # Output class
50 |   testthat::expect_true(is.list(init))
51 |   # Sub-output classes
52 |   testthat::expect_true(is.matrix(init$U) && is.numeric(init$U))
53 |   testthat::expect_true(is.matrix(init$V) && is.numeric(init$V))
54 |   testthat::expect_true(is.matrix(init$A) && is.numeric(init$A))
55 |   testthat::expect_true(is.matrix(init$B) && is.numeric(init$B))
56 |   # Output dimensions
57 |   testthat::expect_equal(dim(init$U), c(n,d))
58 |   testthat::expect_equal(dim(init$V), c(m,d))
59 |   testthat::expect_equal(dim(init$A), c(n,1))
60 |   testthat::expect_equal(dim(init$B), c(m,1))
61 | })
62 | 
63 | 
64 | testthat::test_that("Random initialization", {
65 |   n = 100; m = 10; d = 5; f = Gamma(link = "log")
66 |   dat = sim.gmf.data(n = n, m = m, ncomp = d, family = f, dispersion = 0.5)
67 | 
68 |   init.ols = sgdgmf.init(dat$Y, ncomp = d, family = f, method = "ols")
69 |   init.glm = sgdgmf.init(dat$Y, ncomp = d, family = f, method = "glm")
70 |   init.rnd = sgdgmf.init(dat$Y, ncomp = d, family = f, method = "random")
71 | 
72 |   # Output class
73 |   testthat::expect_true(is.list(init.ols))
74 |   testthat::expect_true(is.list(init.glm))
75 |   testthat::expect_true(is.list(init.rnd))
76 | })
77 | 
78 | 
79 | 
80 | 


--------------------------------------------------------------------------------
/src/minibatch.cpp:
--------------------------------------------------------------------------------
  1 | // minibatch.cpp
  2 | // author: Cristian Castiglione
  3 | // creation: 06/10/2023
  4 | // last change: 06/10/2023
  5 | 
  6 | #include "minibatch.h"
  7 | 
  8 | void Chunks::set_chunks (const int & n, const int & size, const bool & randomize) {
  9 |     this->nidx = n;
 10 |     this->nchunks = ceil(double(n) / size);
 11 |     this->randomize = randomize;
 12 |     this->idx = arma::linspace<arma::uvec>(0, n-1, n);
 13 |     this->start = arma::zeros<arma::uvec>(this->nchunks);
 14 |     this->end = arma::zeros<arma::uvec>(this->nchunks);
 15 |     this->range = arma::zeros<arma::uvec>(this->nchunks);
 16 |     if (this->randomize) {
 17 |         this->idx = arma::shuffle(this->idx);
 18 |     }
 19 |     for (int i = 0; i < this->nchunks; i++) {
 20 |         this->start(i) = i * size;
 21 |         this->end(i) = std::min((i + 1) * size, n);
 22 |         this->range(i) = this->end(i) - this->start(i);
 23 |     }
 24 | }
 25 | 
 26 | arma::uvec Chunks::get_chunk (const int & iter) {
 27 |     int mod = iter % this->nchunks;
 28 |     // int i = (mod == 0) ? this->nchunks : mod;
 29 |     int i;
 30 |     if (iter == 0 && mod == 0) {i = iter;}
 31 |     if (iter != 0 && mod != 0) {i = mod;}
 32 |     if (iter != 0 && mod == 0) {i = 0;}
 33 |     int a = this->start(i);
 34 |     int b = this->end(i)-1;
 35 |     int c = this->range(i);
 36 |     arma::uvec which = arma::linspace<arma::uvec>(a, b, c);
 37 |     arma::uvec chunk = this->idx(which);
 38 |     return chunk;
 39 | }
 40 | 
 41 | std::list<arma::uvec> Chunks::get_chunks (const arma::uvec & iters) {
 42 |     arma::uvec chunk;
 43 |     std::list<arma::uvec> chunks;
 44 |     for (int iter : iters) {
 45 |         chunk = this->get_chunk(iter);
 46 |         chunks.push_back(chunk);
 47 |     }
 48 |     return chunks;
 49 | }
 50 | 
 51 | 
 52 | void ChunkPile::fill_tovisit () {
 53 |     this->tovisit = this->visited;
 54 | }
 55 | 
 56 | void ChunkPile::empty_visited () {
 57 |     this->visited = {};
 58 | }
 59 | 
 60 | void ChunkPile::pop_tovisit (const int & id) {
 61 |     int n = this->tovisit.n_elem;
 62 |     arma::uvec h = arma::find(this->tovisit == id);
 63 |     int i = h(0);
 64 |     if (i ==   0) {this->tovisit = this->tovisit.tail(n-1);}
 65 |     if (i == n-1) {this->tovisit = this->tovisit.head(n-1);}
 66 |     if (i > 0 && i < n-1) {
 67 |         arma::uvec head = this->tovisit.head(i);
 68 |         arma::uvec tail = this->tovisit.tail(n-i-1);
 69 |         this->tovisit = arma::join_cols(head, tail);
 70 |     }
 71 | }
 72 | 
 73 | void ChunkPile::push_visited (const int & id) {
 74 |     arma::uword i = id;
 75 |     this->visited = arma::join_cols(this->visited, arma::uvec{i});
 76 | }
 77 | 
 78 | void ChunkPile::sample_idx () {
 79 |     int n = this->tovisit.n_elem;
 80 |     int which;
 81 |     if (this->random) {
 82 |         which = arma::randi<int>(arma::distr_param(0, n-1));
 83 |     } else {
 84 |         which = 0;
 85 |     }
 86 |     this->idx = this->tovisit(which);
 87 | }
 88 | 
 89 | void ChunkPile::update () {
 90 |     // If tovisit is empty, fill it using visited and empty the later
 91 |     int n = this->tovisit.n_elem;
 92 |     if (n == 0) {
 93 |         this->fill_tovisit();
 94 |         this->empty_visited();
 95 |     }
 96 | 
 97 |     // Sample a random index, pop it from to visit and push it to visited
 98 |     this->sample_idx();
 99 |     this->pop_tovisit(idx);
100 |     this->push_visited(idx);
101 | }


--------------------------------------------------------------------------------
/src/link.cpp:
--------------------------------------------------------------------------------
 1 | // link.cpp
 2 | // author: Cristian Castiglione
 3 | // creation: 28/09/2023
 4 | // last change: 21/11/2024
 5 | 
 6 | #include "link.h"
 7 | 
 8 | using namespace glm;
 9 | 
10 | // Identity link
11 | bool Identity::valideta (const arma::mat & eta){return true;}
12 | arma::mat Identity::linkfun (const arma::mat & mu) {return mu;}
13 | arma::mat Identity::linkinv (const arma::mat & eta) {return eta;}
14 | arma::mat Identity::mueta (const arma::mat & eta) {return arma::ones(arma::size(eta));}
15 | 
16 | // Logit link
17 | bool Logit::valideta (const arma::mat & eta){return true;}
18 | arma::mat Logit::linkfun (const arma::mat & mu) {return arma::log(mu) - arma::log1p(-mu);}
19 | arma::mat Logit::linkinv (const arma::mat & eta) {return arma::exp(eta - arma::log1p(arma::exp(eta)));}
20 | arma::mat Logit::mueta (const arma::mat & eta) {return arma::exp(eta - 2 * arma::log1p(arma::exp(eta)));}
21 | 
22 | // Probit link
23 | bool Probit::valideta (const arma::mat & eta){return true;}
24 | arma::mat Probit::linkfun (const arma::mat & mu) {
25 |     // This code should be replaced with a vectorized implementation
26 |     arma::mat eta = mu;
27 |     eta.transform([](double & x) {return R::qnorm(x, 0, 1, true, false);});
28 |     return eta;
29 | }
30 | arma::mat Probit::linkinv (const arma::mat & eta) {return arma::normcdf(eta);}
31 | arma::mat Probit::mueta (const arma::mat & eta) {return arma::normpdf(eta);}
32 | 
33 | // Cauchit link
34 | bool Cauchit::valideta (const arma::mat & eta){return true;}
35 | arma::mat Cauchit::linkfun (const arma::mat & mu) {return arma::tan(pi * (mu - 0.5));}
36 | arma::mat Cauchit::linkinv (const arma::mat & eta) {return 0.5 + arma::atan(eta) / pi;}
37 | arma::mat Cauchit::mueta (const arma::mat & eta) {return invpi / (eta % eta + 1);}
38 | 
39 | // cLogLog link
40 | bool cLogLog::valideta (const arma::mat & eta){return true;}
41 | arma::mat cLogLog::linkfun (const arma::mat & mu) {return arma::log(- arma::log1p(-mu));}
42 | arma::mat cLogLog::linkinv (const arma::mat & eta) {return 1 - arma::exp(- arma::exp(eta));}
43 | arma::mat cLogLog::mueta (const arma::mat & eta) {return arma::exp(- eta - arma::exp(-eta));}
44 | 
45 | // Log link
46 | bool Log::valideta (const arma::mat & eta){return true;}
47 | arma::mat Log::linkfun (const arma::mat & mu) {return arma::log(mu);}
48 | arma::mat Log::linkinv (const arma::mat & eta) {return arma::exp(eta);}
49 | arma::mat Log::mueta (const arma::mat & eta) {return arma::exp(eta);}
50 | 
51 | // Inverse link
52 | bool Inverse::valideta (const arma::mat & eta){return utils::all(eta > 0);}
53 | arma::mat Inverse::linkfun (const arma::mat & mu) {return 1 / mu;}
54 | arma::mat Inverse::linkinv (const arma::mat & eta) {return 1 / eta;}
55 | arma::mat Inverse::mueta (const arma::mat & eta) {return - 1 / (eta % eta);}
56 | 
57 | // Squared inverse link
58 | bool SquaredInverse::valideta (const arma::mat & eta){return utils::all(eta > 0);}
59 | arma::mat SquaredInverse::linkfun (const arma::mat & mu) {return 1 / arma::square(mu);}
60 | arma::mat SquaredInverse::linkinv (const arma::mat & eta) {return 1 / arma::sqrt(eta);}
61 | arma::mat SquaredInverse::mueta (const arma::mat & eta) {return - 1 / (2 * arma::pow(eta, 1.5));}
62 | 
63 | // Sqrt link
64 | bool Sqrt::valideta (const arma::mat & eta){return utils::all(eta > 0);}
65 | arma::mat Sqrt::linkfun (const arma::mat & mu) {return arma::sqrt(mu);}
66 | arma::mat Sqrt::linkinv (const arma::mat & eta) {return arma::square(eta);}
67 | arma::mat Sqrt::mueta (const arma::mat & eta) {return 2 * eta;}
68 | 
69 | 


--------------------------------------------------------------------------------
/src/family.cpp:
--------------------------------------------------------------------------------
 1 | // family.cpp
 2 | // author: Cristian Castiglione
 3 | // creation: 28/09/2023
 4 | // last change: 21/11/2024
 5 | 
 6 | #include "family.h"
 7 | 
 8 | using namespace glm;
 9 | 
10 | // Gaussian family
11 | arma::mat Gaussian::variance (const arma::mat & mu) const {return arma::ones(size(mu));}
12 | arma::mat Gaussian::initialize (const arma::mat & y) const {return y;}
13 | arma::mat Gaussian::devresid (const arma::mat & y, const arma::mat & mu) const {return arma::square(y - mu);}
14 | 
15 | // Binomial family
16 | arma::mat Binomial::variance (const arma::mat & mu) const {return mu % (1 - mu);}
17 | arma::mat Binomial::initialize (const arma::mat & y) const {return 2 * y - 1;}
18 | arma::mat Binomial::devresid (const arma::mat & y, const arma::mat & mu) const {
19 |     return - 2 * (y % arma::log(mu) + (1 - y) % arma::log1p(-mu));
20 | }
21 | 
22 | // Poisson family
23 | arma::mat Poisson::variance (const arma::mat & mu) const {return mu;}
24 | arma::mat Poisson::initialize (const arma::mat & y) const {return this->linkfun(arma::clamp(y, 0.1, infty));}
25 | arma::mat Poisson::devresid (const arma::mat & y, const arma::mat & mu) const {
26 |     return 2 * (utils::xlogx(y) - y % arma::log(mu) - (y - mu));
27 | }
28 | 
29 | // Gamma family
30 | arma::mat Gamma::variance (const arma::mat & mu) const {return arma::square(mu);}
31 | arma::mat Gamma::initialize (const arma::mat & y) const {return this->linkfun(y);}
32 | arma::mat Gamma::devresid (const arma::mat & y, const arma::mat & mu) const {
33 |     return - 2 * (arma::log(y / mu) - (y - mu) / mu);
34 | }
35 | 
36 | // Inverse-Gaussian family
37 | arma::mat InverseGaussian::variance (const arma::mat & mu) const {return mu % mu % mu;}
38 | arma::mat InverseGaussian::initialize (const arma::mat & y) const {return this->linkfun(y);}
39 | arma::mat InverseGaussian::devresid (const arma::mat & y, const arma::mat & mu) const {
40 |     return arma::square(y - mu) / (y % mu % mu);
41 | }
42 | 
43 | // Negative-Binomial family
44 | arma::mat NegativeBinomial::variance (const arma::mat & mu) const {return mu + (mu % mu) / this->dispersion;}
45 | arma::mat NegativeBinomial::initialize (const arma::mat & y) const {return this->linkfun(arma::clamp(y, 0.1, infty));}
46 | arma::mat NegativeBinomial::devresid (const arma::mat & y, const arma::mat & mu) const {
47 |     const double phi = this->dispersion;
48 |     return 2 * (utils::xlogx(y) - y % arma::log(mu) - (y + phi) % (arma::log(y + phi) - arma::log(mu + phi)));
49 | }
50 | 
51 | // Quasi-Binomial family
52 | arma::mat QuasiBinomial::variance (const arma::mat & mu) const {return mu % (1 - mu);}
53 | arma::mat QuasiBinomial::initialize (const arma::mat & y) const {return 2 * y - 1;}
54 | arma::mat QuasiBinomial::devresid (const arma::mat & y, const arma::mat & mu) const {
55 |     return - 2 * (y % arma::log(mu) + (1 - y) % arma::log1p(-mu));
56 | }
57 | 
58 | // Quasi-Poisson family
59 | arma::mat QuasiPoisson::variance (const arma::mat & mu) const {return mu;}
60 | arma::mat QuasiPoisson::initialize (const arma::mat & y) const {return this->linkfun(arma::clamp(y, 0.1, infty));}
61 | arma::mat QuasiPoisson::devresid (const arma::mat & y, const arma::mat & mu) const {
62 |     return 2 * (utils::xlogx(y) - y % arma::log(mu) - (y - mu));
63 | }
64 | 
65 | // Quasi family
66 | arma::mat Quasi::variance (const arma::mat & mu) const {return this->varfun(mu);}
67 | arma::mat Quasi::initialize (const arma::mat & y) const {return this->linkfun(this->initfun(y));}
68 | arma::mat Quasi::devresid (const arma::mat & y, const arma::mat & mu) const {return this->devfun(y, mu);}
69 | 


--------------------------------------------------------------------------------
/src/link.h:
--------------------------------------------------------------------------------
  1 | // link.h
  2 | // author: Cristian Castiglione
  3 | // creation: 28/09/2023
  4 | // last change: 21/10/2024
  5 | 
  6 | #ifndef LINK_H
  7 | #define LINK_H
  8 | 
  9 | #include <RcppArmadillo.h>
 10 | #include "utils.h"
 11 | 
 12 | namespace glm {
 13 | 
 14 | class Link {
 15 |     public:
 16 |         std::string link = "Link";
 17 |         virtual bool valideta (const arma::mat & eta) = 0;
 18 |         virtual arma::mat linkfun (const arma::mat & mu) = 0;
 19 |         virtual arma::mat linkinv (const arma::mat & eta) = 0;
 20 |         virtual arma::mat mueta (const arma::mat & eta) = 0;
 21 |         virtual ~Link () {}
 22 | };
 23 | 
 24 | class Identity : public Link {
 25 |     public:
 26 |         bool valideta (const arma::mat & eta);
 27 |         arma::mat linkfun (const arma::mat & mu);
 28 |         arma::mat linkinv (const arma::mat & eta);
 29 |         arma::mat mueta (const arma::mat & eta);
 30 |         Identity () {this->link = "Identity";}
 31 | };
 32 | 
 33 | class Logit : public Link {
 34 |     public:
 35 |         bool valideta (const arma::mat & eta);
 36 |         arma::mat linkfun (const arma::mat & mu);
 37 |         arma::mat linkinv (const arma::mat & eta);
 38 |         arma::mat mueta (const arma::mat & eta);
 39 |         Logit () {this->link = "Logit";}
 40 | };
 41 | 
 42 | class Probit : public Link {
 43 |     public:
 44 |         bool valideta (const arma::mat & eta);
 45 |         arma::mat linkfun (const arma::mat & mu);
 46 |         arma::mat linkinv (const arma::mat & eta);
 47 |         arma::mat mueta (const arma::mat & eta);
 48 |         Probit () {this->link = "Probit";}
 49 | };
 50 | 
 51 | class Cauchit : public Link {
 52 |     public:
 53 |         bool valideta (const arma::mat & eta);
 54 |         arma::mat linkfun (const arma::mat & mu);
 55 |         arma::mat linkinv (const arma::mat & eta);
 56 |         arma::mat mueta (const arma::mat & eta);
 57 |         Cauchit () {this->link = "Cauchit";}
 58 | };
 59 | 
 60 | class cLogLog : public Link {
 61 |     public:
 62 |         bool valideta (const arma::mat & eta);
 63 |         arma::mat linkfun (const arma::mat & mu);
 64 |         arma::mat linkinv (const arma::mat & eta);
 65 |         arma::mat mueta (const arma::mat & eta);
 66 |         cLogLog () {this->link = "cLogLog";}
 67 | };
 68 | 
 69 | class Log : public Link {
 70 |     public:
 71 |         bool valideta (const arma::mat & eta);
 72 |         arma::mat linkfun (const arma::mat & mu);
 73 |         arma::mat linkinv (const arma::mat & eta);
 74 |         arma::mat mueta (const arma::mat & eta);
 75 |         Log () {this->link = "Log";}
 76 | };
 77 | 
 78 | class Inverse : public Link {
 79 |     public:
 80 |         bool valideta (const arma::mat & eta);
 81 |         arma::mat linkfun (const arma::mat & mu);
 82 |         arma::mat linkinv (const arma::mat & eta);
 83 |         arma::mat mueta (const arma::mat & eta);
 84 |         Inverse () {this->link = "Inverse";}
 85 | };
 86 | 
 87 | class SquaredInverse : public Link {
 88 |     public:
 89 |         bool valideta (const arma::mat & eta);
 90 |         arma::mat linkfun (const arma::mat & mu);
 91 |         arma::mat linkinv (const arma::mat & eta);
 92 |         arma::mat mueta (const arma::mat & eta);
 93 |         SquaredInverse () {this->link = "1/mu^2";}
 94 | };
 95 | 
 96 | class Sqrt : public Link {
 97 |     public:
 98 |         bool valideta (const arma::mat & eta);
 99 |         arma::mat linkfun (const arma::mat & mu);
100 |         arma::mat linkinv (const arma::mat & eta);
101 |         arma::mat mueta (const arma::mat & eta);
102 |         Sqrt () {this->link = "Sqrt";}
103 | };
104 | 
105 | }
106 | 
107 | #endif


--------------------------------------------------------------------------------
/tests/testrcpp/test-misc.R:
--------------------------------------------------------------------------------
  1 | # test-misc.R
  2 | # author: Cristian Castiglione
  3 | # creation: 02/10/2023
  4 | # last change: 06/10/2023
  5 | 
  6 | ## Workspace setup ----
  7 | rm(list = ls())
  8 | graphics.off()
  9 | 
 10 | # Package compilation and import
 11 | devtools::load_all()
 12 | 
 13 | 
 14 | ## Test: get_data_bounds() ----
 15 | {
 16 |   ymin = 0; ymax = 1; eps = 0.01
 17 |   r.bounds = binomial(link = "probit")$linkfun(c(ymin+eps*(ymax-ymin), ymax-eps*(ymax-ymin)))
 18 |   c.bounds = drop(sgdGMF::c_get_data_bounds(eps, ymin, ymax, "binomial", "probit")$etalim)
 19 |   print(all.equal(r.bounds, c.bounds))
 20 | }
 21 | 
 22 | {
 23 |   ymin = 0; ymax = 1; eps = 0.01
 24 |   r.bounds = binomial(link = "logit")$linkfun(c(ymin+eps*(ymax-ymin), ymax-eps*(ymax-ymin)))
 25 |   c.bounds = drop(sgdGMF::c_get_data_bounds(eps, ymin, ymax, "binomial", "logit")$etalim)
 26 |   print(all.equal(r.bounds, c.bounds))
 27 | }
 28 | 
 29 | ## Test: get_uv_indices() ----
 30 | {
 31 |   p = 3; q = 1; d = 2
 32 |   r.idx = list(idu = c(p:(p+q-1), (p+q):(p+q+d-1)), idv = c(0:(p-1), (p+q):(p+q+d-1)))
 33 |   c.idx = sgdGMF::c_get_uv_indices(p, q, d)
 34 |   print(all.equal(r.idx$idu, drop(c.idx$idu)))
 35 |   print(all.equal(r.idx$idv, drop(c.idx$idv)))
 36 | }
 37 | 
 38 | ## Test: get_uv_penalty() ----
 39 | {
 40 |   p = 3; q = 1; d = 2; pen = c(1:4)
 41 |   r.pen = list(penu = c(rep(0,p), rep(pen[1],q), rep(pen[3],d)),
 42 |                penv = c(rep(pen[2],p), rep(0,q), rep(pen[4],d)))
 43 |   c.pen = sgdGMF::c_get_uv_penalty(pen, p, q, d)
 44 |   print(all.equal(r.pen$penu, drop(c.pen$penu)))
 45 |   print(all.equal(r.pen$penv, drop(c.pen$penv)))
 46 | }
 47 | 
 48 | ## Test: sample_minibatch() ----
 49 | {
 50 |   n = 9; size = 3; randomize = FALSE
 51 |   r.chunks = sgdGMF::sample.minibatch(n, size, randomize)
 52 |   c.chunks = sgdGMF::c_sample_minibatch(n, size, randomize)
 53 | 
 54 |   flag = TRUE
 55 |   for (h in 1:ceiling(n / size)) {
 56 |     flagh = all.equal(r.chunks[[h]], c.chunks[[h]]+1)
 57 |     flag = flag && flagh
 58 |   }
 59 |   print(flag)
 60 | }
 61 | 
 62 | 
 63 | {
 64 |   n = 11; size = 3; randomize = FALSE
 65 |   r.chunks = sgdGMF::sample.minibatch(n, size, randomize)
 66 |   c.chunks = sgdGMF::c_sample_minibatch(n, size, randomize)
 67 | 
 68 |   flag = TRUE
 69 |   for (h in 1:ceiling(n / size)) {
 70 |     flagh = all.equal(r.chunks[[h]], c.chunks[[h]]+1)
 71 |     flag = flag && flagh
 72 |   }
 73 |   print(flag)
 74 | }
 75 | 
 76 | ## Test: select_chunk ----
 77 | {
 78 |   iter = 10; nchunks = 3
 79 |   r.idx = sgdGMF::select.minibatch(iter, nchunks)
 80 |   c.idx = sgdGMF::c_select_minibatch(iter, nchunks)
 81 |   print(all.equal(r.idx, c.idx+1))
 82 | }
 83 | 
 84 | ## Test: get_chunks ----
 85 | {
 86 |   n = 9; size = 3; randomize = FALSE
 87 |   r.chunks = sgdGMF::sample.minibatch(n, size, randomize)
 88 |   c.chunks = sgdGMF::c_get_chunks(0:2, n, size, randomize)
 89 | 
 90 |   flag = TRUE
 91 |   for (h in 1:ceiling(n / size)) {
 92 |     flagh = all.equal(r.chunks[[h]], c.chunks[[h]]+1)
 93 |     flag = flag && flagh
 94 |   }
 95 |   print(flag)
 96 | }
 97 | 
 98 | {
 99 |   n = 10; size = 3; randomize = FALSE
100 |   r.chunks = sgdGMF::sample.minibatch(n, size, randomize)
101 |   c.chunks = sgdGMF::c_get_chunks(0:3, n, size, randomize)
102 | 
103 |   flag = TRUE
104 |   for (h in 1:ceiling(n / size)) {
105 |     flagh = all.equal(r.chunks[[h]], c.chunks[[h]]+1)
106 |     flag = flag && flagh
107 |   }
108 |   print(flag)
109 | }
110 | 
111 | {
112 |   n = 11; size = 3; randomize = FALSE
113 |   r.chunks = sgdGMF::sample.minibatch(n, size, randomize)
114 |   c.chunks = sgdGMF::c_get_chunks(0:8, n, size, randomize)
115 | 
116 |   flag = TRUE
117 |   for (h in 1:ceiling(n / size)) {
118 |     flagh = all.equal(r.chunks[[h]], c.chunks[[h]]+1)
119 |     flag = flag && flagh
120 |   }
121 |   print(flag)
122 | }
123 | 
124 | ## End of file ----
125 | 
126 | 


--------------------------------------------------------------------------------
/tests/testthat/test-vglmfit.R:
--------------------------------------------------------------------------------
 1 | # file: test-vglmfit.R
 2 | # author: Cristian Castiglione
 3 | # creation: 23/03/2024
 4 | # last change: 04/10/2024
 5 | 
 6 | testthat::test_that("Multivariate OLS fitting", {
 7 |   n = 100; m = 10; p = 5; q = p+1
 8 | 
 9 |   O = matrix(rexp(n*m, rate = 2.0), nrow = n, ncol = m)
10 |   X = cbind(1, matrix(rnorm(n*p, mean = 0.0, sd = 1.0),  nrow = n, ncol = p))
11 |   B = matrix(rnorm(m*q, mean = 0.1, sd = 0.25), nrow = m, ncol = q)
12 |   E = matrix(rnorm(n*m, mean = 0.0, sd = 0.1), nrow = n, ncol = m)
13 |   Y = O + tcrossprod(X, B) + E
14 | 
15 |   B.hat = ols.fit.coef(Y, X, offset = O)
16 |   mu.hat = O + tcrossprod(X, B.hat)
17 |   res.hat = Y - mu.hat
18 | 
19 |   # Check the dimension and the basic properties of the estimates
20 |   testthat::expect_equal(c(m, q), dim(B.hat))
21 |   testthat::expect_equal(crossprod(X, mu.hat), crossprod(X, Y))
22 |   testthat::expect_equal(matrix(0, q, m), crossprod(X, res.hat))
23 |   testthat::expect_equal(0, mean(res.hat))
24 | })
25 | 
26 | 
27 | testthat::test_that("Binomial VGLM fitting", {
28 |   n = 100; m = 10; p = 5; q = p+1
29 |   family = binomial(link = "probit")
30 | 
31 |   O = matrix(rexp(n*m, rate = 2.0), nrow = n, ncol = m)
32 |   X = cbind(1, matrix(rnorm(n*p, mean = 0.0, sd = 1.0),  nrow = n, ncol = p))
33 |   B = matrix(rnorm(m*q, mean = 0.1, sd = 0.25), nrow = m, ncol = q)
34 |   eta = O + tcrossprod(X, B)
35 |   mu = family$linkinv(eta)
36 |   Y = matrix(rbinom(n*m, size = 1, prob = mu), nrow = n, ncol = m)
37 | 
38 |   B.hat = vglm.fit.coef(Y, X, family, offset = O, parallel = FALSE)
39 |   eta.hat = O + tcrossprod(X, B.hat)
40 |   mu.hat = family$linkinv(eta.hat)
41 |   dmu.hat = family$mu.eta(eta.hat)
42 |   var.hat = family$variance(mu.hat)
43 |   res.hat = (Y - mu.hat) * dmu.hat / var.hat
44 | 
45 |   # Check the dimension and the basic properties of the estimates
46 |   testthat::expect_equal(c(m, q), dim(B.hat))
47 |   testthat::expect_true(mean(crossprod(X, res.hat)) < 1e-04)
48 | })
49 | 
50 | testthat::test_that("Poisson VGLM fitting", {
51 |   n = 100; m = 10; p = 5; q = p+1
52 |   family = poisson(link = "log")
53 | 
54 |   O = matrix(rexp(n*m, rate = 2.0), nrow = n, ncol = m)
55 |   X = cbind(1, matrix(rnorm(n*p, mean = 0.0, sd = 1.0),  nrow = n, ncol = p))
56 |   B = matrix(rnorm(m*q, mean = 0.1, sd = 0.25), nrow = m, ncol = q)
57 |   eta = O + tcrossprod(X, B)
58 |   mu = family$linkinv(eta)
59 |   Y = matrix(rpois(n*m, lambda = mu), nrow = n, ncol = m)
60 | 
61 |   B.hat = vglm.fit.coef(Y, X, family, offset = O, parallel = FALSE)
62 |   eta.hat = O + tcrossprod(X, B.hat)
63 |   mu.hat = family$linkinv(eta.hat)
64 |   dmu.hat = family$mu.eta(eta.hat)
65 |   var.hat = family$variance(mu.hat)
66 |   res.hat = (Y - mu.hat) * dmu.hat / var.hat
67 | 
68 |   # Check the dimension and the basic properties of the estimates
69 |   testthat::expect_equal(c(m, q), dim(B.hat))
70 |   testthat::expect_true(mean(crossprod(X, res.hat)) < 1e-04)
71 | })
72 | 
73 | testthat::test_that("Gamma VGLM fitting", {
74 |   n = 100; m = 10; p = 5; q = p+1
75 |   family = Gamma(link = "log")
76 | 
77 |   O = matrix(rexp(n*m, rate = 2.0), nrow = n, ncol = m)
78 |   X = cbind(1, matrix(rnorm(n*p, mean = 0.0, sd = 1.0),  nrow = n, ncol = p))
79 |   B = matrix(rnorm(m*q, mean = 0.1, sd = 0.25), nrow = m, ncol = q)
80 |   eta = O + tcrossprod(X, B)
81 |   mu = family$linkinv(eta)
82 |   Y = matrix(rgamma(n*m, shape = 2, rate = 2 / mu), nrow = n, ncol = m)
83 | 
84 |   B.hat = vglm.fit.coef(Y, X, family, offset = O, parallel = FALSE)
85 |   eta.hat = O + tcrossprod(X, B.hat)
86 |   mu.hat = family$linkinv(eta.hat)
87 |   dmu.hat = family$mu.eta(eta.hat)
88 |   var.hat = family$variance(mu.hat)
89 |   res.hat = (Y - mu.hat) * dmu.hat / var.hat
90 | 
91 |   # Check the dimension and the basic properties of the estimates
92 |   testthat::expect_equal(c(m, q), dim(B.hat))
93 |   testthat::expect_true(mean(crossprod(X, res.hat)) < 1e-03)
94 | })
95 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
  1 | # Generated by roxygen2: do not edit by hand
  2 | 
  3 | S3method(AIC,initgmf)
  4 | S3method(AIC,sgdgmf)
  5 | S3method(BIC,initgmf)
  6 | S3method(BIC,sgdgmf)
  7 | S3method(biplot,initgmf)
  8 | S3method(biplot,sgdgmf)
  9 | S3method(coef,initgmf)
 10 | S3method(coef,sgdgmf)
 11 | S3method(coefficients,initgmf)
 12 | S3method(coefficients,sgdgmf)
 13 | S3method(deviance,initgmf)
 14 | S3method(deviance,sgdgmf)
 15 | S3method(fitted,initgmf)
 16 | S3method(fitted,sgdgmf)
 17 | S3method(image,initgmf)
 18 | S3method(image,sgdgmf)
 19 | S3method(plot,initgmf)
 20 | S3method(plot,sgdgmf)
 21 | S3method(predict,sgdgmf)
 22 | S3method(print,initgmf)
 23 | S3method(print,sgdgmf)
 24 | S3method(refit,sgdgmf)
 25 | S3method(resid,initgmf)
 26 | S3method(resid,sgdgmf)
 27 | S3method(residuals,initgmf)
 28 | S3method(residuals,sgdgmf)
 29 | S3method(screeplot,initgmf)
 30 | S3method(screeplot,sgdgmf)
 31 | S3method(simulate,sgdgmf)
 32 | S3method(storedata,sgdgmf)
 33 | export(refit)
 34 | export(set.control.airwls)
 35 | export(set.control.alg)
 36 | export(set.control.block.sgd)
 37 | export(set.control.coord.sgd)
 38 | export(set.control.cv)
 39 | export(set.control.init)
 40 | export(set.control.newton)
 41 | export(sgdgmf.cv)
 42 | export(sgdgmf.fit)
 43 | export(sgdgmf.init)
 44 | export(sgdgmf.rank)
 45 | export(sim.gmf.data)
 46 | export(simulate)
 47 | export(storedata)
 48 | import(Rcpp)
 49 | import(RcppArmadillo)
 50 | import(ggplot2)
 51 | importFrom(MASS,neg.bin)
 52 | importFrom(MASS,negative.binomial)
 53 | importFrom(RSpectra,eigs)
 54 | importFrom(RSpectra,eigs_sym)
 55 | importFrom(RSpectra,svds)
 56 | importFrom(Rcpp,evalCpp)
 57 | importFrom(doParallel,registerDoParallel)
 58 | importFrom(foreach,"%do%")
 59 | importFrom(foreach,"%dopar%")
 60 | importFrom(foreach,foreach)
 61 | importFrom(generics,refit)
 62 | importFrom(graphics,image)
 63 | importFrom(methods,is)
 64 | importFrom(parallel,detectCores)
 65 | importFrom(parallel,makeCluster)
 66 | importFrom(parallel,stopCluster)
 67 | importFrom(reshape2,melt)
 68 | importFrom(stats,BIC)
 69 | importFrom(stats,Gamma)
 70 | importFrom(stats,binomial)
 71 | importFrom(stats,biplot)
 72 | importFrom(stats,coef)
 73 | importFrom(stats,coefficients)
 74 | importFrom(stats,cor)
 75 | importFrom(stats,cov)
 76 | importFrom(stats,cov2cor)
 77 | importFrom(stats,dbeta)
 78 | importFrom(stats,dbinom)
 79 | importFrom(stats,density)
 80 | importFrom(stats,deviance)
 81 | importFrom(stats,dexp)
 82 | importFrom(stats,dgamma)
 83 | importFrom(stats,dnorm)
 84 | importFrom(stats,dpois)
 85 | importFrom(stats,dunif)
 86 | importFrom(stats,ecdf)
 87 | importFrom(stats,family)
 88 | importFrom(stats,fitted)
 89 | importFrom(stats,gaussian)
 90 | importFrom(stats,glm.fit)
 91 | importFrom(stats,inverse.gaussian)
 92 | importFrom(stats,median)
 93 | importFrom(stats,pbeta)
 94 | importFrom(stats,pbinom)
 95 | importFrom(stats,pexp)
 96 | importFrom(stats,pgamma)
 97 | importFrom(stats,pnorm)
 98 | importFrom(stats,poisson)
 99 | importFrom(stats,ppois)
100 | importFrom(stats,predict)
101 | importFrom(stats,punif)
102 | importFrom(stats,qbeta)
103 | importFrom(stats,qbinom)
104 | importFrom(stats,qexp)
105 | importFrom(stats,qgamma)
106 | importFrom(stats,qnorm)
107 | importFrom(stats,qpois)
108 | importFrom(stats,qqline)
109 | importFrom(stats,qqnorm)
110 | importFrom(stats,qqplot)
111 | importFrom(stats,quantile)
112 | importFrom(stats,quasi)
113 | importFrom(stats,quasibinomial)
114 | importFrom(stats,quasipoisson)
115 | importFrom(stats,qunif)
116 | importFrom(stats,rbeta)
117 | importFrom(stats,rbinom)
118 | importFrom(stats,resid)
119 | importFrom(stats,residuals)
120 | importFrom(stats,rexp)
121 | importFrom(stats,rgamma)
122 | importFrom(stats,rnorm)
123 | importFrom(stats,rpois)
124 | importFrom(stats,runif)
125 | importFrom(stats,screeplot)
126 | importFrom(stats,sd)
127 | importFrom(stats,var)
128 | importFrom(utils,head)
129 | importFrom(utils,tail)
130 | importFrom(viridisLite,viridis)
131 | useDynLib(sgdGMF, .registration=TRUE)
132 | 


--------------------------------------------------------------------------------
/R/vglmfit.R:
--------------------------------------------------------------------------------
  1 | 
  2 | #' @title Estimate the coefficients of a multivariate linear model
  3 | #'
  4 | #' @description
  5 | #' Estimate the coefficients of a multivariate linear model via ordinary least squares.
  6 | #'
  7 | #' @param Y \eqn{n \times m} matrix of response variables
  8 | #' @param X \eqn{n \times p} matrix of covariates
  9 | #' @param offset \eqn{n \times m} matrix of offset values
 10 | #'
 11 | #' @keywords internal
 12 | ols.fit.coef = function (
 13 |     Y, X, offset = NULL
 14 | ) {
 15 |   # Set the offset matrix
 16 |   if (is.null(offset)) offset = 0
 17 | 
 18 |   # Parameter estimation
 19 |   XtX = crossprod(X)
 20 |   XtY = crossprod(X, Y - offset)
 21 |   coefs = t(solve(XtX, XtY))
 22 | 
 23 |   # Return the parameter estimates
 24 |   return (coefs)
 25 | }
 26 | 
 27 | #' @title Estimate the coefficients of a vector generalized linear model
 28 | #'
 29 | #' @description
 30 | #' Estimate the coefficients of a vector generalized linear model via parallel
 31 | #' iterative re-weighted least squares. Computations can be performed in parallel
 32 | #' to speed up the execution.
 33 | #'
 34 | #' @param Y \eqn{n \times m} matrix of response variables
 35 | #' @param X \eqn{n \times p} matrix of covariates
 36 | #' @param family a \code{glm} family (see \code{\link{family}} for more details)
 37 | #' @param weights \eqn{n \times m} matrix of weighting values
 38 | #' @param offset \eqn{n \times m} matrix of offset values
 39 | #' @param parallel if \code{TRUE}, allows for parallel computing using the \code{foreach} package
 40 | #' @param nthreads number of cores to be used in parallel (only if \code{parallel=TRUE})
 41 | #' @param clust registered cluster to be used for distributing the computations (only if \code{parallel=TRUE})
 42 | #'
 43 | #' @keywords internal
 44 | vglm.fit.coef = function (
 45 |     Y, X, family = gaussian(), weights = NULL, offset = NULL,
 46 |     parallel = FALSE, nthreads = 1, clust = NULL
 47 | ) {
 48 |   # Set the model dimensions
 49 |   n = nrow(Y)
 50 |   m = ncol(Y)
 51 | 
 52 |   # Set the offset matrix
 53 |   if (is.null(weights)) weights = matrix(1, nrow = n, ncol = m)
 54 |   if (is.null(offset)) offset = matrix(0, nrow = n, ncol = m)
 55 | 
 56 |   # Register the clusters
 57 |   if (parallel) {
 58 |     nullclust = is.null(clust)
 59 |     if (nullclust) {
 60 |       ncores = parallel::detectCores() - 1
 61 |       ncores = max(1, min(nthreads, ncores))
 62 |       clust = parallel::makeCluster(ncores)
 63 |       doParallel::registerDoParallel(clust)
 64 |     }
 65 |   }
 66 | 
 67 |   j = NULL
 68 |   if (!parallel) {
 69 |     # Sequential parameter estimation
 70 |     coefs = foreach(j = 1:m, .combine = "rbind") %do% {
 71 |       yj = as.vector(Y[,j])
 72 |       oj = as.vector(offset[,j])
 73 |       wj = as.vector(weights[,j])
 74 |       fit = stats::glm.fit(x = X, y = yj, family = family, weights = wj, offset = oj)
 75 |       t(fit$coefficients)
 76 |     }
 77 | 
 78 |     ## # As an alternative, we may use the following R code,
 79 |     ## # which does not depend on the foreach package
 80 |     ## coefs = matrix(NA, nrow = m, ncol = p)
 81 |     ## for (j in 1:m) {
 82 |     ##   yj = as.vector(Y[,j])
 83 |     ##   oj = as.vector(offset[,j])
 84 |     ##   fit = stats::glm.fit(x = X, y = yj, family = family, weights = wj, offset = oj)
 85 |     ##   coefs[j, ] = as.vector(fit$coefficients)
 86 |     ## }
 87 |   } else {
 88 |     # Parallel parameter estimation
 89 |     coefs = foreach(j = 1:m, .combine = "rbind") %dopar% {
 90 |       yj = as.vector(Y[,j])
 91 |       oj = as.vector(offset[,j])
 92 |       wj = as.vector(weights[,j])
 93 |       fit = stats::glm.fit(x = X, y = yj, family = family, weights = wj, offset = oj)
 94 |       t(fit$coefficients)
 95 |     }
 96 |   }
 97 | 
 98 |   # Close the connection to the clusters
 99 |   if (parallel) {
100 |     if (nullclust) {
101 |       parallel::stopCluster(clust)
102 |     }
103 |   }
104 | 
105 |   # Return the parameter estimates
106 |   return (coefs)
107 | }
108 | 


--------------------------------------------------------------------------------
/vignettes/residuals.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Analysis of the residuals"
  3 | author: "Cristian Castiglione"
  4 | output: rmarkdown::html_vignette
  5 | vignette: >
  6 |   %\VignetteIndexEntry{residuals}
  7 |   %\VignetteEngine{knitr::rmarkdown}
  8 |   %\VignetteEncoding{UTF-8}
  9 | ---
 10 | 
 11 | ## Workspace setup
 12 | 
 13 | ```{r setup, include = FALSE}
 14 | options(rmarkdown.html_vignette.check_title = FALSE)
 15 | 
 16 | knitr::opts_chunk$set(
 17 |   collapse = TRUE,
 18 |   comment = "#>"
 19 | )
 20 | ```
 21 | 
 22 | Load the \code{sgdGMF} package in the workspace.
 23 | 
 24 | ```{r sgdgmf}
 25 | library(sgdGMF)
 26 | ```
 27 | 
 28 | Load other useful packages in the workspace.
 29 | 
 30 | ```{r libraries}
 31 | library(ggplot2)
 32 | library(ggpubr)
 33 | library(reshape2)
 34 | ```
 35 | 
 36 | ## Ant traits data
 37 | 
 38 | Load the ant traits data in the workspace and define the response matrix `Y` and covariate matrices `X` and `Z`.
 39 | 
 40 | ```{r data}
 41 | # install.packages("mvabund")
 42 | # data(antTraits, package = "mvabund")
 43 | 
 44 | load(url("https://raw.githubusercontent.com/cran/mvabund/master/data/antTraits.RData"))
 45 | 
 46 | Y = as.matrix(antTraits$abund)
 47 | X = as.matrix(antTraits$env[,-3])
 48 | Z = matrix(1, nrow = ncol(Y), ncol = 1)
 49 | 
 50 | n = nrow(Y)
 51 | m = ncol(Y)
 52 | ```
 53 | 
 54 | ## Model specification
 55 | 
 56 | Set the model family to Poisson since the response matrix contain count data.
 57 | 
 58 | ```{r family}
 59 | family = poisson()
 60 | ```
 61 | 
 62 | Select the optimal number of latent factors using the function \code{sgdgmf.rank},
 63 | which employs an adjusted eigenvalue thresholding method to identify the optimal
 64 | elbow point of a screeplot.
 65 | 
 66 | ```{r rank}
 67 | ncomp = sgdgmf.rank(Y = Y, X = X, Z = Z, family = family)$ncomp
 68 | cat("Selected rank: ", ncomp)
 69 | ```
 70 | 
 71 | ## Model estimation
 72 | 
 73 | Estimate a Poisson GMF model using iterated least squares.
 74 | 
 75 | ```{r fit}
 76 | gmf = sgdgmf.fit(Y, X, Z, ncomp = ncomp, family = family, method = "airwls")
 77 | ```
 78 | 
 79 | ## Model validation
 80 | 
 81 | Compute the deviance residuals of the model the estimated matrix factorization. 
 82 | Additionally, compute the spectrum of such a residual matrix.
 83 | 
 84 | ```{r resid}
 85 | res = residuals(gmf, spectrum = TRUE, ncomp = 20)
 86 | ```
 87 | 
 88 | Compare the residuals of two competing models: VGLM and GMF.
 89 | Notice that VGLM is a particular case of GMF of which only include the regression
 90 | effects and does not include a residual matrix factorization in the linear predictor.
 91 | 
 92 | ```{r plot, fig.width = 7, fig.height = 5}
 93 | ggpubr::ggarrange(
 94 |   plot(gmf, type = "res-idx"),
 95 |   plot(gmf, type = "res-fit"),
 96 |   plot(gmf, type = "hist"),
 97 |   plot(gmf, type = "qq"),
 98 |   nrow = 2, ncol = 2, align = "hv")
 99 | ```
100 | 
101 | We now have a look to the spectrum of the residual matrices, i.e., the eigenvalues
102 | of the corresponding covariance matrix. However, instead of analyzing the actual
103 | values of the eigenvalues, we normalize them in such a way to plot the percentage of
104 | variance explained by each principal component.
105 | 
106 | ```{r spectrum, fig.width = 7, fig.height = 3}
107 | ggpubr::ggarrange(
108 |   screeplot(gmf, cumulative = FALSE, proportion = TRUE),
109 |   screeplot(gmf, cumulative = TRUE, proportion = TRUE),
110 |   nrow = 1, ncol = 2, align = "hv")
111 | ```
112 | 
113 | ## Observations vs fitted values
114 | 
115 | Plot the deviance and Pearson residuals using a heatmap. This could be helpful to
116 | graphically detect if there are some structured patterns in the matrix that have not 
117 | been captured by the model.
118 | 
119 | ```{r resid2, fig.width = 7, fig.height = 3.5}
120 | plt.dev = image(gmf, type = "deviance", resid = TRUE, symmetric = TRUE)
121 | plt.prs = image(gmf, type = "pearson", resid = TRUE, symmetric = TRUE)
122 | 
123 | ggpubr::ggarrange(
124 |   plt.dev + labs(x = "Species", y = "Environments", title = "Deviance residuals"), 
125 |   plt.prs + labs(x = "Species", y = "Environments", title = "Pearson residuals"),
126 |   nrow = 1, ncol = 2, common.legend = FALSE, legend = "bottom", align = "hv")
127 | ```
128 | 
129 | 
130 | 
131 | 


--------------------------------------------------------------------------------
/tests/testrcpp/test-family.R:
--------------------------------------------------------------------------------
  1 | # test-family.R
  2 | # author: Cristian Castiglione
  3 | # creation: 29/09/2023
  4 | # last change: 29/09/2023
  5 | 
  6 | ## Workspace setup ----
  7 | rm(list = ls())
  8 | graphics.off()
  9 | 
 10 | # Package compilation and import
 11 | devtools::load_all()
 12 | 
 13 | plot.link <- function (x, y, main = "") {
 14 |   plot(x, y, type = "l", xlab = "x", ylab = "link", main = main)
 15 | }
 16 | 
 17 | par(mfrow = c(1, 3))
 18 | 
 19 | ## Test: gaussian ----
 20 | {
 21 |   n = 100
 22 |   x = seq(from = -3, to = +3, length = n)
 23 |   y = seq(from = -3, to = +3, length = n)
 24 |   z = rep(0, length = n)
 25 |   plot.link(x, sgdGMF::cpp.gaussian.variance(x), main = "Gaussian \n variance")
 26 |   plot.link(x, sgdGMF::cpp.gaussian.initialize(y), main = "Gaussian \n initialize")
 27 |   plot.link(x, sgdGMF::cpp.gaussian.devresid(z, x), main = "Gaussian \n devresid")
 28 | 
 29 |   r.variance = gaussian()$variance(x)
 30 |   c.variance = drop(sgdGMF::cpp.gaussian.variance(x))
 31 |   print(all.equal(r.variance, c.variance))
 32 | 
 33 |   r.devresid = gaussian()$dev.resid(z, x, 1)
 34 |   c.devresid = drop(sgdGMF::cpp.gaussian.devresid(z, x))
 35 |   print(all.equal(r.devresid, c.devresid))
 36 | }
 37 | 
 38 | ## Test: binomial ----
 39 | {
 40 |   n = 100
 41 |   x = seq(from = +0.001, to = +0.999, length = n)
 42 |   y = c(rep(0, length = n/2), rep(1, length = n/2))
 43 |   z = rep(0, length = n)
 44 |   plot.link(x, sgdGMF::cpp.binomial.variance(x), main = "Binomial \n variance")
 45 |   plot.link(x, sgdGMF::cpp.binomial.initialize(y), main = "Binomial \n initialize")
 46 |   plot.link(x, sgdGMF::cpp.binomial.devresid(z, x), main = "Binomial \n devresid")
 47 | 
 48 |   r.variance = binomial()$variance(x)
 49 |   c.variance = drop(sgdGMF::cpp.binomial.variance(x))
 50 |   print(all.equal(r.variance, c.variance))
 51 | 
 52 |   r.devresid = binomial()$dev.resid(z, x, 1)
 53 |   c.devresid = drop(sgdGMF::cpp.binomial.devresid(z, x))
 54 |   print(all.equal(r.devresid, c.devresid))
 55 | }
 56 | 
 57 | ## Test: poisson ----
 58 | {
 59 |   n = 100
 60 |   x = seq(from = 1, to = 10, length = n)
 61 |   y = seq(from = 1, to = 20, by = 1)
 62 |   z = rep(3, length = n)
 63 |   plot.link(x, sgdGMF::cpp.poisson.variance(x), main = "Poisson \n variance")
 64 |   plot.link(y, sgdGMF::cpp.poisson.initialize(y), main = "Poisson \n initialize")
 65 |   plot.link(x, sgdGMF::cpp.poisson.devresid(z, x), main = "Poisson \n devresid")
 66 | 
 67 |   r.variance = poisson()$variance(x)
 68 |   c.variance = drop(sgdGMF::cpp.poisson.variance(x))
 69 |   print(all.equal(r.variance, c.variance))
 70 | 
 71 |   r.devresid = poisson()$dev.resid(z, x, 1)
 72 |   c.devresid = drop(sgdGMF::cpp.poisson.devresid(z, x))
 73 |   print(all.equal(r.devresid, c.devresid))
 74 | }
 75 | 
 76 | ## Test: gamma ----
 77 | {
 78 |   n = 100
 79 |   x = seq(from = 0.1, to = 5, length = n)
 80 |   y = seq(from = 0.1, to = 5, length = n)
 81 |   z = rep(1, length = n)
 82 |   plot.link(x, sgdGMF::cpp.gamma.variance(x), main = "Gamma \n variance")
 83 |   plot.link(x, sgdGMF::cpp.gamma.initialize(y), main = "Gamma \n initialize")
 84 |   plot.link(x, sgdGMF::cpp.gamma.devresid(z, x), main = "Gamma \n devresid")
 85 | 
 86 |   r.variance = Gamma()$variance(x)
 87 |   c.variance = drop(sgdGMF::cpp.gamma.variance(x))
 88 |   print(all.equal(r.variance, c.variance))
 89 | 
 90 |   r.devresid = Gamma()$dev.resid(z, x, 1)
 91 |   c.devresid = drop(sgdGMF::cpp.gamma.devresid(z, x))
 92 |   print(all.equal(r.devresid, c.devresid))
 93 | }
 94 | 
 95 | ## Test: negative binomial ----
 96 | {
 97 |   n = 100
 98 |   x = seq(from = 0.1, to = 5, length = n)
 99 |   y = seq(from = 0.1, to = 5, length = n)
100 |   z = rep(1, length = n)
101 |   plot.link(x, sgdGMF::cpp.negbinom.variance(x), main = "Negative Binomial \n variance")
102 |   plot.link(x, sgdGMF::cpp.negbinom.initialize(y), main = "Negative Binomial \n initialize")
103 |   plot.link(x, sgdGMF::cpp.negbinom.devresid(z, x), main = "Negative Binomial \n devresid")
104 | 
105 |   r.variance = MASS::neg.bin(10)$variance(x)
106 |   c.variance = drop(sgdGMF::cpp.negbinom.variance(x))
107 |   print(all.equal(r.variance, c.variance))
108 | 
109 |   r.devresid = MASS::neg.bin(10)$dev.resid(z, x, 1)
110 |   c.devresid = drop(sgdGMF::cpp.negbinom.devresid(z, x))
111 |   print(all.equal(r.devresid, c.devresid))
112 | }
113 | 
114 | 
115 | 


--------------------------------------------------------------------------------
/src/utils.h:
--------------------------------------------------------------------------------
  1 | // utils.h
  2 | // author: Cristian Castiglione
  3 | // creation: 28/09/2023
  4 | // last change: 19/11/2024
  5 | 
  6 | #ifndef UTILS_H
  7 | #define UTILS_H
  8 | 
  9 | #include <RcppArmadillo.h>
 10 | 
 11 | // Standard constants used in numerical computations
 12 | const double pi = M_PI;
 13 | const double invpi = 1.0 / M_PI;
 14 | const double log2pi = std::log(2.0 * M_PI);
 15 | const double sqrt2 = std::sqrt(2.0);
 16 | const double sqrtpi = std::sqrt(M_PI);
 17 | const double sqrt2pi = std::sqrt(2.0 * M_PI);
 18 | const double infty = arma::datum::inf;
 19 | 
 20 | // The following vectors of coefficients serves to approximate the 
 21 | // quantile function of a standard normal distribution.
 22 | // For more details see: https://ar5iv.labs.arxiv.org/html/1002.0567
 23 | const arma::vec qn_inner_coef = {
 24 |     + 0.195740115269792, - 0.652871358365296, + 1.246899760652504,
 25 |     + 0.155331081623168, - 0.839293158122257};
 26 | const arma::vec qn_tails_coef = {
 27 |     +16.682320830719986527, + 4.120411523939115059, + 0.029814187308200211, 
 28 |     - 1.000182518730158122, + 7.173787663925508066, + 8.759693508958633869};
 29 | 
 30 | namespace utils {
 31 | 
 32 | // Maximum relative difference between two scalars/vectors
 33 | double absmax (const double & u, const double & v);
 34 | double absmax (const arma::vec & u, const arma::vec & v);
 35 | 
 36 | // Truncated representation of a vector/matrix x, such that a <= x[i,j] <= b
 37 | void trim (arma::mat & x, const double & a, const double & b);
 38 | void trim (arma::mat & x, const double & a, const double & b, const arma::uvec & idx);
 39 | void trim (arma::mat & x, const double & a, const double & b, const arma::uvec & idx, const arma::uvec & idy);
 40 | 
 41 | // All and any operator for boolean matrices
 42 | bool all(const arma::umat & x);
 43 | bool any(const arma::umat & x);
 44 | 
 45 | // Lp norm of a vector/matrix
 46 | double norm (const arma::mat & x);
 47 | double norm (const arma::mat & x, const double & p);
 48 | 
 49 | // Pointwise maximum between 0 and x (to the power of p)
 50 | arma::mat max0 (const arma::mat & x);
 51 | arma::mat max0 (const arma::mat & x, const double & p);
 52 | 
 53 | // Stable calculation of x*log(x), with 0*log(0) = 0;
 54 | arma::mat xlogx (const arma::mat & x);
 55 | 
 56 | // Stable calculation of log(1 + exp(x))
 57 | arma::mat log1pexp (const arma::mat & x);
 58 | 
 59 | // Stable calculation of log(1 - exp(-x))
 60 | arma::mat log1mexp (const arma::mat & x);
 61 | 
 62 | // Logistic transformation
 63 | arma::mat logit (const arma::mat & x);
 64 | 
 65 | // Inverse of the logistic transformation
 66 | arma::mat expit (const arma::mat & x);
 67 | arma::mat expit2 (const arma::mat & x);
 68 | arma::mat expitn (const arma::mat & x, const double & n);
 69 | 
 70 | // Complementary log-log and exp-exp transformation
 71 | arma::mat cloglog (const arma::mat & x);
 72 | arma::mat cexpexp (const arma::mat & x);
 73 | 
 74 | // Log-log and exp-exp transformations
 75 | arma::mat loglog (const arma::mat & x);
 76 | arma::mat expexp (const arma::mat & x);
 77 | 
 78 | // Standard Gaussian probability and cumulative density function
 79 | arma::mat pdfn (const arma::mat & x);
 80 | arma::mat cdfn (const arma::mat & x);
 81 | arma::mat qdfn (const arma::mat & p);
 82 | 
 83 | // Standard Gaussian log-probability and cumulative density function
 84 | arma::mat logpdfn (const arma::mat & x);
 85 | arma::mat logcdfn (const arma::mat & x);
 86 | 
 87 | // Gamma function
 88 | arma::mat gamma (const arma::mat & x);
 89 | arma::mat loggamma (const arma::mat & x);
 90 | arma::mat digamma (const arma::mat & x);
 91 | arma::mat trigamma (const arma::mat & x);
 92 | 
 93 | // Beta function
 94 | arma::mat beta (const arma::mat & x, const arma::mat & y);
 95 | arma::mat logbeta (const arma::mat & x, const arma::mat & y);
 96 | arma::mat dibeta (const arma::mat & x, const arma::mat & y);
 97 | arma::mat tribeta (const arma::mat & x, const arma::mat & y);
 98 | 
 99 | // Hinge loss function
100 | arma::mat hinge (const arma::mat & x);
101 | 
102 | // Delta function
103 | arma::mat dirac (const arma::mat & x, const double & a);
104 | 
105 | // Step function
106 | arma::mat step (const arma::mat & x, const double & a, const bool & lower);
107 | 
108 | // Extract the half-vectorization of the square matrix M
109 | arma::vec vech (const arma::mat & A);
110 | 
111 | }
112 | 
113 | #endif
114 | 


--------------------------------------------------------------------------------
/tests/testcpp/test-link.cpp:
--------------------------------------------------------------------------------
 1 | // test-link.cpp
 2 | // author: Cristian Castiglione
 3 | // creation: 29/09/2023
 4 | // last change: 29/09/2023
 5 | 
 6 | #include "link.h"
 7 | 
 8 | using namespace glm;
 9 | 
10 | //' @keywords internal
11 | // [[Rcpp::export("cpp.link.identity.linkfun")]]
12 | arma::vec cpp_link_identity_linkfun (const arma::vec & mu) {Identity link; return link.linkfun(mu);}
13 | //' @keywords internal
14 | // [[Rcpp::export("cpp.link.identity.linkinv")]]
15 | arma::vec cpp_link_identity_linkinv (const arma::vec & eta) {Identity link; return link.linkinv(eta);}
16 | //' @keywords internal
17 | // [[Rcpp::export("cpp.link.identity.mueta")]]
18 | arma::vec cpp_link_identity_mueta (const arma::vec & eta) {Identity link; return link.mueta(eta);}
19 | 
20 | //' @keywords internal
21 | // [[Rcpp::export("cpp.link.logit.linkfun")]]
22 | arma::vec cpp_link_logit_linkfun (const arma::vec & mu) {Logit link; return link.linkfun(mu);}
23 | //' @keywords internal
24 | // [[Rcpp::export("cpp.link.logit.linkinv")]]
25 | arma::vec cpp_link_logit_linkinv (const arma::vec & eta) {Logit link; return link.linkinv(eta);}
26 | //' @keywords internal
27 | // [[Rcpp::export("cpp.link.logit.mueta")]]
28 | arma::vec cpp_link_logit_mueta (const arma::vec & eta) {Logit link; return link.mueta(eta);}
29 | 
30 | //' @keywords internal
31 | // [[Rcpp::export("cpp.link.probit.linkfun")]]
32 | arma::vec cpp_link_probit_linkfun (const arma::vec & mu) {Probit link; return link.linkfun(mu);}
33 | //' @keywords internal
34 | // [[Rcpp::export("cpp.link.probit.linkinv")]]
35 | arma::vec cpp_link_probit_linkinv (const arma::vec & eta) {Probit link; return link.linkinv(eta);}
36 | //' @keywords internal
37 | // [[Rcpp::export("cpp.link.probit.mueta")]]
38 | arma::vec cpp_link_probit_mueta (const arma::vec & eta) {Probit link; return link.mueta(eta);}
39 | 
40 | //' @keywords internal
41 | // [[Rcpp::export("cpp.link.cauchy.linkfun")]]
42 | arma::vec cpp_link_cauchy_linkfun (const arma::vec & mu) {Cauchy link; return link.linkfun(mu);}
43 | //' @keywords internal
44 | // [[Rcpp::export("cpp.link.cauchy.linkinv")]]
45 | arma::vec cpp_link_cauchy_linkinv (const arma::vec & eta) {Cauchy link; return link.linkinv(eta);}
46 | //' @keywords internal
47 | // [[Rcpp::export("cpp.link.cauchy.mueta")]]
48 | arma::vec cpp_link_cauchy_mueta (const arma::vec & eta) {Cauchy link; return link.mueta(eta);}
49 | 
50 | //' @keywords internal
51 | // [[Rcpp::export("cpp.link.cloglog.linkfun")]]
52 | arma::vec cpp_link_cloglog_linkfun (const arma::vec & mu) {cLogLog link; return link.linkfun(mu);}
53 | //' @keywords internal
54 | // [[Rcpp::export("cpp.link.cloglog.linkinv")]]
55 | arma::vec cpp_link_cloglog_linkinv (const arma::vec & eta) {cLogLog link; return link.linkinv(eta);}
56 | //' @keywords internal
57 | // [[Rcpp::export("cpp.link.cloglog.mueta")]]
58 | arma::vec cpp_link_cloglog_mueta (const arma::vec & eta) {cLogLog link; return link.mueta(eta);}
59 | 
60 | //' @keywords internal
61 | // [[Rcpp::export("cpp.link.log.linkfun")]]
62 | arma::vec cpp_link_log_linkfun (const arma::vec & mu) {Log link; return link.linkfun(mu);}
63 | //' @keywords internal
64 | // [[Rcpp::export("cpp.link.log.linkinv")]]
65 | arma::vec cpp_link_log_linkinv (const arma::vec & eta) {Log link; return link.linkinv(eta);}
66 | //' @keywords internal
67 | // [[Rcpp::export("cpp.link.log.mueta")]]
68 | arma::vec cpp_link_log_mueta (const arma::vec & eta) {Log link; return link.mueta(eta);}
69 | 
70 | //' @keywords internal
71 | // [[Rcpp::export("cpp.link.inverse.linkfun")]]
72 | arma::vec cpp_link_inverse_linkfun (const arma::vec & mu) {Inverse link; return link.linkfun(mu);}
73 | //' @keywords internal
74 | // [[Rcpp::export("cpp.link.inverse.linkinv")]]
75 | arma::vec cpp_link_inverse_linkinv (const arma::vec & eta) {Inverse link; return link.linkinv(eta);}
76 | //' @keywords internal
77 | // [[Rcpp::export("cpp.link.inverse.mueta")]]
78 | arma::vec cpp_link_inverse_mueta (const arma::vec & eta) {Inverse link; return link.mueta(eta);}
79 | 
80 | //' @keywords internal
81 | // [[Rcpp::export("cpp.link.sqrt.linkfun")]]
82 | arma::vec cpp_link_sqrt_linkfun (const arma::vec & mu) {Sqrt link; return link.linkfun(mu);}
83 | //' @keywords internal
84 | // [[Rcpp::export("cpp.link.sqrt.linkinv")]]
85 | arma::vec cpp_link_sqrt_linkinv (const arma::vec & eta) {Sqrt link; return link.linkinv(eta);}
86 | //' @keywords internal
87 | // [[Rcpp::export("cpp.link.sqrt.mueta")]]
88 | arma::vec cpp_link_sqrt_mueta (const arma::vec & eta) {Sqrt link; return link.mueta(eta);}
89 | 


--------------------------------------------------------------------------------
/man/residuals.sgdgmf.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/sgdGMF-class.R
  3 | \name{residuals.sgdgmf}
  4 | \alias{residuals.sgdgmf}
  5 | \alias{resid.sgdgmf}
  6 | \title{Extract the residuals of a GMF model}
  7 | \usage{
  8 | \method{residuals}{sgdgmf}(
  9 |   object,
 10 |   ...,
 11 |   type = c("deviance", "pearson", "working", "response", "link"),
 12 |   partial = FALSE,
 13 |   normalize = FALSE,
 14 |   fillna = FALSE,
 15 |   spectrum = FALSE,
 16 |   ncomp = 50
 17 | )
 18 | 
 19 | \method{resid}{sgdgmf}(
 20 |   object,
 21 |   ...,
 22 |   type = c("deviance", "pearson", "working", "response", "link"),
 23 |   partial = FALSE,
 24 |   normalize = FALSE,
 25 |   fillna = FALSE,
 26 |   spectrum = FALSE,
 27 |   ncomp = 50
 28 | )
 29 | }
 30 | \arguments{
 31 | \item{object}{an object of class \code{sgdgmf}}
 32 | 
 33 | \item{...}{further arguments passed to or from other methods}
 34 | 
 35 | \item{type}{the type of residuals which should be returned}
 36 | 
 37 | \item{partial}{if \code{TRUE}, computes the residuals excluding the matrix factorization from the linear predictor}
 38 | 
 39 | \item{normalize}{if \code{TRUE}, standardize the residuals column-by-column}
 40 | 
 41 | \item{fillna}{if \code{TRUE}, fills \code{NA} values column-by-column}
 42 | 
 43 | \item{spectrum}{if \code{TRUE}, returns the eigenvalues of the residual covariance matrix}
 44 | 
 45 | \item{ncomp}{number of eigenvalues to be calculated (only if \code{spectrum=TRUE})}
 46 | }
 47 | \value{
 48 | If \code{spectrum=FALSE}, a matrix containing the selected residuals.
 49 | If \code{spectrum=TRUE}, a list containing the residuals (\code{res}), the first \code{ncomp}
 50 | eigenvalues of the residual covariance matrix, say (\code{lambdas}), the variance explained by the first
 51 | \code{ncomp} principal component of the residuals (\code{explained.var}), the variance not
 52 | explained by the first \code{ncomp} principal component of the residuals (\code{residual.var}),
 53 | the total variance of the residuals (\code{total.var}).
 54 | }
 55 | \description{
 56 | Extract the residuals of a GMF model and, if required, compute the eigenvalues
 57 | of the residuals covariance/correlation matrix.
 58 | Moreover, if required, return the partial residual of the model obtained by
 59 | excluding the matrix decomposition from the linear predictor.
 60 | }
 61 | \details{
 62 | Let \eqn{g(\mu) = \eta = X B^\top + \Gamma Z^\top + U V^\top} be the linear predictor of a
 63 | GMF model. Let \eqn{R = (r_{ij})} be the correspondent residual matrix.
 64 | The following residuals can be considered:
 65 | \itemize{
 66 | \item deviance: \eqn{r_{ij}^{_D} = \textrm{sign}(y_{ij} - \mu_{ij}) \sqrt{D(y_{ij}, \mu_{ij})}};
 67 | \item Pearson: \eqn{r_{ij}^{_P} = (y_{ij} - \mu_{ij}) / \sqrt{\nu(\mu_{ij})}};
 68 | \item working: \eqn{r_{ij}^{_W} = (y_{ij} - \mu_{ij}) / \{g'(\mu_{ij}) \,\nu(\mu_{ij})\}};
 69 | \item response: \eqn{r_{ij}^{_R} = y_{ij} - \mu_{ij}};
 70 | \item link: \eqn{r_{ij}^{_G} = g(y_{ij}) - \eta_{ij}}.
 71 | }
 72 | If \code{partial=TRUE}, \eqn{mu} is computed excluding the latent matrix decomposition
 73 | from the linear predictor, so as to obtain the partial residuals.
 74 | 
 75 | Let \eqn{\Sigma} be the empirical variance-covariance matrix of \eqn{R}, being
 76 | \eqn{\sigma_{ij} = \textrm{Cov}(r_{:i}, r_{:j})}. Then, the latent spectrum of
 77 | the model is the collection of eigenvalues of \eqn{\Sigma}.
 78 | 
 79 | Notice that, in case of Gaussian data, the latent spectrum corresponds to the principal
 80 | component analysis on the regression residuals, whose eigenvalues can be used to
 81 | infer the amount of variance explained by each principal component. Similarly,
 82 | we can use the (partial) latent spectrum in non-Gaussian data settings to infer
 83 | the correct number of principal components to include into the GMF model or to
 84 | detect some residual dependence structures not already explained by the model.
 85 | }
 86 | \examples{
 87 | # Load the sgdGMF package
 88 | library(sgdGMF)
 89 | 
 90 | # Generate data from a Poisson model
 91 | data = sim.gmf.data(n = 100, m = 20, ncomp = 5, family = poisson())
 92 | 
 93 | # Fit a GMF model with 3 latent factors
 94 | gmf = sgdgmf.fit(data$Y, ncomp = 3, family = poisson())
 95 | 
 96 | # Get the deviance residuals of a GMF model
 97 | str(residuals(gmf)) # returns the overall deviance residuals
 98 | str(residuals(gmf, partial = TRUE)) # returns the partial residuals
 99 | str(residuals(gmf, spectrum = TRUE)) # returns the eigenvalues of the residual var-cov matrix
100 | 
101 | }
102 | 


--------------------------------------------------------------------------------
/tests/testcpp/test-family.cpp:
--------------------------------------------------------------------------------
  1 | // test-family.cpp
  2 | // author: Cristian Castiglione
  3 | // creation: 28/09/2023
  4 | // last change: 30/09/2023
  5 | 
  6 | #include "family.h"
  7 | 
  8 | using namespace glm;
  9 | 
 10 | //' @keywords internal
 11 | // [[Rcpp::export("cpp.family.gaussian.variance")]]
 12 | arma::vec cpp_gaussian_variance (const arma::vec & mu) {
 13 |     std::unique_ptr<Link> ptr = std::make_unique<Identity>();
 14 |     Gaussian f(ptr);
 15 |     return f.variance(mu);
 16 | }
 17 | 
 18 | //' @keywords internal
 19 | // [[Rcpp::export("cpp.family.gaussian.initialize")]]
 20 | arma::vec cpp_gaussian_initialize (const arma::vec & y) {
 21 |     std::unique_ptr<Link> ptr = std::make_unique<Identity>();
 22 |     Gaussian f(ptr);
 23 |     return f.initialize(y);
 24 | }
 25 | 
 26 | //' @keywords internal
 27 | // [[Rcpp::export("cpp.family.gaussian.devresid")]]
 28 | arma::vec cpp_gaussian_devresid (const arma::vec & y, const arma::vec & mu) {
 29 |     std::unique_ptr<Link> ptr = std::make_unique<Identity>();
 30 |     Gaussian f(ptr);
 31 |     return f.devresid(y, mu);
 32 | }
 33 | 
 34 | //' @keywords internal
 35 | // [[Rcpp::export("cpp.family.binomial.variance")]]
 36 | arma::vec cpp_binomial_variance (const arma::vec & mu) {
 37 |     std::unique_ptr<Link> ptr = std::make_unique<Logit>();
 38 |     Binomial f(ptr);
 39 |     return f.variance(mu);
 40 | }
 41 | 
 42 | //' @keywords internal
 43 | // [[Rcpp::export("cpp.family.binomial.initialize")]]
 44 | arma::vec cpp_binomial_initialize (const arma::vec & y) {
 45 |     std::unique_ptr<Link> ptr = std::make_unique<Logit>();
 46 |     Binomial f(ptr);
 47 |     return f.initialize(y);
 48 | }
 49 | 
 50 | //' @keywords internal
 51 | // [[Rcpp::export("cpp.family.binomial.devresid")]]
 52 | arma::vec cpp_binomial_devresid (const arma::vec & y, const arma::vec & mu) {
 53 |     std::unique_ptr<Link> ptr = std::make_unique<Logit>();
 54 |     Binomial f(ptr);
 55 |     return f.devresid(y, mu);
 56 | }
 57 | 
 58 | //' @keywords internal
 59 | // [[Rcpp::export("cpp.family.poisson.variance")]]
 60 | arma::vec cpp_poisson_variance (const arma::vec & mu) {
 61 |     std::unique_ptr<Link> ptr = std::make_unique<Log>();
 62 |     Poisson f(ptr);
 63 |     return f.variance(mu);
 64 | }
 65 | 
 66 | //' @keywords internal
 67 | // [[Rcpp::export("cpp.family.poisson.initialize")]]
 68 | arma::vec cpp_poisson_initialize (const arma::vec & y) {
 69 |     std::unique_ptr<Link> ptr = std::make_unique<Log>();
 70 |     Poisson f(ptr);
 71 |     return f.initialize(y);
 72 | }
 73 | 
 74 | //' @keywords internal
 75 | // [[Rcpp::export("cpp.family.poisson.devresid")]]
 76 | arma::vec cpp_poisson_devresid (const arma::vec & y, const arma::vec & mu) {
 77 |     std::unique_ptr<Link> ptr = std::make_unique<Log>();
 78 |     Poisson f(ptr);
 79 |     return f.devresid(y, mu);
 80 | }
 81 | 
 82 | //' @keywords internal
 83 | // [[Rcpp::export("cpp.family.gamma.variance")]]
 84 | arma::vec cpp_gamma_variance (const arma::vec & mu) {
 85 |     std::unique_ptr<Link> ptr = std::make_unique<Log>();
 86 |     Gamma f(ptr);
 87 |     return f.variance(mu);
 88 | }
 89 | 
 90 | //' @keywords internal
 91 | // [[Rcpp::export("cpp.family.gamma.initialize")]]
 92 | arma::vec cpp_gamma_initialize (const arma::vec & y) {
 93 |     std::unique_ptr<Link> ptr = std::make_unique<Log>();
 94 |     Gamma f(ptr);
 95 |     return f.initialize(y);
 96 | }
 97 | 
 98 | //' @keywords internal
 99 | // [[Rcpp::export("cpp.family.gamma.devresid")]]
100 | arma::vec cpp_gamma_devresid (const arma::vec & y, const arma::vec & mu) {
101 |     std::unique_ptr<Link> ptr = std::make_unique<Log>();
102 |     Gamma f(ptr);
103 |     return f.devresid(y, mu);
104 | }
105 | 
106 | //' @keywords internal
107 | // [[Rcpp::export("cpp.family.negbinom.variance")]]
108 | arma::vec cpp_negbinom_variance (const arma::vec & mu) {
109 |     std::unique_ptr<Link> ptr = std::make_unique<Log>();
110 |     NegativeBinomial f(ptr);
111 |     return f.variance(mu);
112 | }
113 | 
114 | //' @keywords internal
115 | // [[Rcpp::export("cpp.family.negbinom.initialize")]]
116 | arma::vec cpp_negbinom_initialize (const arma::vec & y) {
117 |     std::unique_ptr<Link> ptr = std::make_unique<Log>();
118 |     NegativeBinomial f(ptr);
119 |     return f.initialize(y);
120 | }
121 | 
122 | //' @keywords internal
123 | // [[Rcpp::export("cpp.family.negbinom.devresid")]]
124 | arma::vec cpp_negbinom_devresid (const arma::vec & y, const arma::vec & mu) {
125 |     std::unique_ptr<Link> ptr = std::make_unique<Log>();
126 |     NegativeBinomial f(ptr);
127 |     return f.devresid(y, mu);
128 | }


--------------------------------------------------------------------------------
/man/sgdgmf.rank.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/eigengap.R
  3 | \name{sgdgmf.rank}
  4 | \alias{sgdgmf.rank}
  5 | \title{Rank selection via eigenvalue-gap methods}
  6 | \usage{
  7 | sgdgmf.rank(
  8 |   Y,
  9 |   X = NULL,
 10 |   Z = NULL,
 11 |   maxcomp = ncol(Y),
 12 |   family = gaussian(),
 13 |   weights = NULL,
 14 |   offset = NULL,
 15 |   method = c("evr", "onatski", "act", "oht"),
 16 |   type.reg = c("ols", "glm"),
 17 |   type.res = c("deviance", "pearson", "working", "link"),
 18 |   normalize = FALSE,
 19 |   maxiter = 10,
 20 |   parallel = FALSE,
 21 |   nthreads = 1,
 22 |   return.eta = FALSE,
 23 |   return.mu = FALSE,
 24 |   return.res = FALSE,
 25 |   return.cov = FALSE
 26 | )
 27 | }
 28 | \arguments{
 29 | \item{Y}{matrix of responses (\eqn{n \times m})}
 30 | 
 31 | \item{X}{matrix of row-specific fixed effects (\eqn{n \times p})}
 32 | 
 33 | \item{Z}{matrix of column-specific fixed effects (\eqn{q \times m})}
 34 | 
 35 | \item{maxcomp}{maximum number of eigenvalues to compute}
 36 | 
 37 | \item{family}{a family as in the \code{\link{glm}} interface (default \code{gaussian()})}
 38 | 
 39 | \item{weights}{matrix of optional weights (\eqn{n \times m})}
 40 | 
 41 | \item{offset}{matrix of optional offsets (\eqn{n \times m})}
 42 | 
 43 | \item{method}{rank selection method}
 44 | 
 45 | \item{type.reg}{regression method to be used to profile out the covariate effects}
 46 | 
 47 | \item{type.res}{residual type to be decomposed}
 48 | 
 49 | \item{normalize}{if \code{TRUE}, standardize column-by-column the residual matrix}
 50 | 
 51 | \item{maxiter}{maximum number of iterations}
 52 | 
 53 | \item{parallel}{if \code{TRUE}, allows for parallel computing using \code{foreach}}
 54 | 
 55 | \item{nthreads}{number of cores to be used in parallel (only if \code{parallel=TRUE})}
 56 | 
 57 | \item{return.eta}{if \code{TRUE}, return the linear predictor martix}
 58 | 
 59 | \item{return.mu}{if \code{TRUE}, return the fitted value martix}
 60 | 
 61 | \item{return.res}{if \code{TRUE}, return the residual matrix}
 62 | 
 63 | \item{return.cov}{if \code{TRUE}, return the covariance matrix of the residuals}
 64 | }
 65 | \value{
 66 | A list containing the \code{method}, the selected latent rank \code{ncomp},
 67 | and the eigenvalues used to select the latent rank \code{lambdas}.
 68 | Additionally, if required, in the output list will also provide the linear predictor
 69 | \code{eta}, the predicted mean matrix \code{mu}, the residual matrix \code{res}, and
 70 | the implied residual covariance matrix \code{covmat}.
 71 | }
 72 | \description{
 73 | Select the number of significant principal components of a GMF model via
 74 | exploitation of eigenvalue-gap methods
 75 | }
 76 | \examples{
 77 | library(sgdGMF)
 78 | 
 79 | # Set the data dimensions
 80 | n = 100; m = 20; d = 5
 81 | 
 82 | # Generate data using Poisson, Binomial and Gamma models
 83 | data_pois = sim.gmf.data(n = n, m = m, ncomp = d, family = poisson())
 84 | data_bin = sim.gmf.data(n = n, m = m, ncomp = d, family = binomial())
 85 | data_gam = sim.gmf.data(n = n, m = m, ncomp = d, family = Gamma(link = "log"), dispersion = 0.25)
 86 | 
 87 | # Initialize the GMF parameters assuming 3 latent factors
 88 | ncomp_pois = sgdgmf.rank(data_pois$Y, family = poisson(), normalize = TRUE)
 89 | ncomp_bin = sgdgmf.rank(data_bin$Y, family = binomial(), normalize = TRUE)
 90 | ncomp_gam = sgdgmf.rank(data_gam$Y, family = Gamma(link = "log"), normalize = TRUE)
 91 | 
 92 | # Get the selected number of components
 93 | print(paste("Poisson:", ncomp_pois$ncomp))
 94 | print(paste("Binomial:", ncomp_bin$ncomp))
 95 | print(paste("Gamma:", ncomp_gam$ncomp))
 96 | 
 97 | # Plot the screeplot used for the component determination
 98 | oldpar = par(no.readonly = TRUE)
 99 | par(mfrow = c(3,1))
100 | barplot(ncomp_pois$lambdas, main = "Poisson screeplot")
101 | barplot(ncomp_bin$lambdas, main = "Binomial screeplot")
102 | barplot(ncomp_gam$lambdas, main = "Gamma screeplot")
103 | par(oldpar)
104 | 
105 | }
106 | \references{
107 | Onatski, A. (2010).
108 | \emph{Determining the number of factors from empirical distribution of eigenvalues.}
109 | Review of Economics and Statistics, 92(4): 1004-1016
110 | 
111 | Ahn, S.C., Horenstein, A.R. (2013).
112 | \emph{Eigenvalue ratio test for the number of factors.}
113 | Econometrica, 81, 1203-1227
114 | 
115 | Gavish, M., Donoho, D.L. (2014)
116 | \emph{The optimal hard thresholding for singular values is 4/sqrt(3).}
117 | IEEE Transactions on Information Theory, 60(8): 5040--5053
118 | 
119 | Fan, J., Guo, J. and Zheng, S. (2020).
120 | \emph{Estimating number of factors by adjusted eigenvalues thresholding.}
121 | Journal of the American Statistical Association, 117(538): 852--861
122 | 
123 | Wang, L. and Carvalho, L. (2023).
124 | \emph{Deviance matrix factorization.}
125 | Electronic Journal of Statistics, 17(2): 3762-3810
126 | }
127 | 


--------------------------------------------------------------------------------
/tests/testthat/test-deviance.R:
--------------------------------------------------------------------------------
  1 | # file: test-deviance.R
  2 | # author: Cristian Castiglione
  3 | # creation: 05/02/2024
  4 | # last change: 25/02/2024
  5 | 
  6 | testthat::test_that("Elementwise Gaussian deviance", {
  7 |   n = 100; m = 10
  8 | 
  9 |   mu = matrix(rnorm(n*m), nrow = n, ncol = m)
 10 |   y = matrix(rnorm(n*m, mean = mu, sd = .1), nrow = n, ncol = m)
 11 |   dev = pointwise.deviance(mu, y, gaussian())
 12 | 
 13 |   testthat::expect_equal(dim(dev), c(n, m))
 14 |   testthat::expect_true(all(dev >= 0))
 15 |   testthat::expect_true(all(is.finite(dev)))
 16 |   testthat::expect_false(anyNA(dev))
 17 | })
 18 | 
 19 | testthat::test_that("Elementwise Poisson deviance", {
 20 |   n = 100; m = 10
 21 | 
 22 |   mu = matrix(exp(rnorm(n*m)), nrow = n, ncol = m)
 23 |   y = matrix(rpois(n*m, lambda = mu), nrow = n, ncol = m)
 24 |   dev = pointwise.deviance(mu, y, poisson())
 25 | 
 26 |   testthat::expect_equal(dim(dev), c(n, m))
 27 |   testthat::expect_true(all(dev >= 0))
 28 |   testthat::expect_true(all(is.finite(dev)))
 29 |   testthat::expect_false(anyNA(dev))
 30 | })
 31 | 
 32 | testthat::test_that("Elementwise Binomial deviance", {
 33 |   n = 100; m = 10
 34 | 
 35 |   mu = matrix(plogis(rnorm(n*m)), nrow = n, ncol = m)
 36 |   y = matrix(rbinom(n*m, size = 1, prob = mu), nrow = n, ncol = m)
 37 |   dev = pointwise.deviance(mu, y, binomial())
 38 | 
 39 |   testthat::expect_equal(dim(dev), c(n, m))
 40 |   testthat::expect_true(all(dev >= 0))
 41 |   testthat::expect_true(all(is.finite(dev)))
 42 |   testthat::expect_false(anyNA(dev))
 43 | })
 44 | 
 45 | testthat::test_that("Elementwise Gamma deviance", {
 46 |   n = 100; m = 10
 47 | 
 48 |   mu = matrix(exp(rnorm(n*m)), nrow = n, ncol = m)
 49 |   y = matrix(rgamma(n*m, shape = 1, rate = mu), nrow = n, ncol = m)
 50 |   dev = pointwise.deviance(mu, y, Gamma())
 51 | 
 52 |   testthat::expect_equal(dim(dev), c(n, m))
 53 |   testthat::expect_true(all(dev >= 0))
 54 |   testthat::expect_true(all(is.finite(dev)))
 55 |   testthat::expect_false(anyNA(dev))
 56 | })
 57 | 
 58 | testthat::test_that("Elementwise deviance with missing", {
 59 |   n = 100; m = 10; f = floor(.3 * n * m)
 60 | 
 61 |   mask = unique(cbind(
 62 |     sample(1:n, size = f, replace = TRUE),
 63 |     sample(1:m, size = f, replace = TRUE)))
 64 | 
 65 |   mu = matrix(exp(rnorm(n*m)), nrow = n, ncol = m)
 66 |   y = matrix(rgamma(n*m, shape = 1, rate = mu), nrow = n, ncol = m)
 67 |   y[mask] = NA
 68 | 
 69 |   dev = pointwise.deviance(mu, y, Gamma())
 70 | 
 71 |   testthat::expect_equal(dim(dev), c(n, m))
 72 |   testthat::expect_true(all(dev[-mask[,1],-mask[,2]] >= 0))
 73 |   testthat::expect_true(all(is.finite(dev[-mask[,1],-mask[,2]])))
 74 |   testthat::expect_equal(sum(is.na(dev)), nrow(mask))
 75 | })
 76 | 
 77 | testthat::test_that("Matrix Gaussian deviance", {
 78 |   n = 100; m = 10
 79 | 
 80 |   mu = matrix(rnorm(n*m), nrow = n, ncol = m)
 81 |   y = matrix(rnorm(n*m, mean = mu, sd = .1), nrow = n, ncol = m)
 82 |   dev = matrix.deviance(mu, y, gaussian())
 83 | 
 84 |   testthat::expect_true(is.finite(dev))
 85 |   testthat::expect_true(dev >= 0)
 86 | })
 87 | 
 88 | testthat::test_that("Matrix Poisson deviance", {
 89 |   n = 100; m = 10
 90 | 
 91 |   mu = matrix(exp(rnorm(n*m)), nrow = n, ncol = m)
 92 |   y = matrix(rpois(n*m, lambda = mu), nrow = n, ncol = m)
 93 |   dev = matrix.deviance(mu, y, poisson())
 94 | 
 95 |   testthat::expect_true(is.finite(dev))
 96 |   testthat::expect_true(dev >= 0)
 97 | })
 98 | 
 99 | testthat::test_that("Matrix Binomial deviance", {
100 |   n = 100; m = 10
101 | 
102 |   mu = matrix(plogis(rnorm(n*m)), nrow = n, ncol = m)
103 |   y = matrix(rbinom(n*m, size = 1, prob = mu), nrow = n, ncol = m)
104 |   dev = matrix.deviance(mu, y, binomial())
105 | 
106 |   testthat::expect_true(is.finite(dev))
107 |   testthat::expect_true(dev >= 0)
108 | })
109 | 
110 | testthat::test_that("Matrix Gamma deviance", {
111 |   n = 100; m = 10
112 | 
113 |   mu = matrix(exp(rnorm(n*m)), nrow = n, ncol = m)
114 |   y = matrix(rgamma(n*m, shape = 1, rate = mu), nrow = n, ncol = m)
115 |   dev = matrix.deviance(mu, y, Gamma())
116 | 
117 |   testthat::expect_true(is.finite(dev))
118 |   testthat::expect_true(dev >= 0)
119 | })
120 | 
121 | 
122 | testthat::test_that("Matrix deviance with missing", {
123 |   n = 100; m = 10; f = floor(.3 * n * m)
124 | 
125 |   mask = unique(cbind(
126 |     sample(1:n, size = f, replace = TRUE),
127 |     sample(1:m, size = f, replace = TRUE)))
128 | 
129 |   mu = matrix(exp(rnorm(n*m)), nrow = n, ncol = m)
130 |   y = matrix(rgamma(n*m, shape = 1, rate = mu), nrow = n, ncol = m)
131 |   y[mask] = NA
132 | 
133 |   dev = matrix.deviance(mu, y, Gamma())
134 | 
135 |   testthat::expect_true(is.finite(dev))
136 |   testthat::expect_false(is.na(dev))
137 |   testthat::expect_true(dev >= 0)
138 | })
139 | 
140 | testthat::test_that("Frobenious matrix penalty", {
141 |   n = 100; m = 3
142 | 
143 |   U = matrix(rnorm(n*m), nrow = n, ncol = m)
144 |   lambda = rexp(m)
145 |   pen = matrix.penalty(U, lambda)
146 | 
147 |   testthat::expect_equal(pen, sum((U * U) %*% diag(lambda)))
148 | })
149 | 
150 | 


--------------------------------------------------------------------------------
/tests/testcpp/test-utils.cpp:
--------------------------------------------------------------------------------
  1 | // test-utils.cpp
  2 | // author: Cristian Castiglione
  3 | // creation: 29/09/2023
  4 | // last change: 29/09/2023
  5 | 
  6 | #include "utils.h"
  7 | 
  8 | //' @keywords internal
  9 | // [[Rcpp::export("cpp.utils.dabsmax")]]
 10 | double cpp_dabsmax (const double & u, const double & v) {return utils::absmax(u, v);}
 11 | 
 12 | //' @keywords internal
 13 | // [[Rcpp::export("cpp.utils.vabsmax")]]
 14 | double cpp_vabsmax (const arma::vec & u, const arma::vec & v) {return utils::absmax(u, v);}
 15 | 
 16 | //' @keywords internal
 17 | // [[Rcpp::export("cpp.utils.trim")]]
 18 | arma::vec cpp_trim (const arma::vec & x, double a, double b) {
 19 |     arma::vec y = x;
 20 |     utils::trim(y, a, b);
 21 |     return y;
 22 | }
 23 | 
 24 | //' @keywords internal
 25 | // [[Rcpp::export("cpp.utils.xlogx")]]
 26 | arma::vec cpp_xlogx (const arma::vec & x) {return utils::xlogx(x);}
 27 | 
 28 | //' @keywords internal
 29 | // [[Rcpp::export("cpp.utils.log1pexp")]]
 30 | arma::vec cpp_log1pexp (const arma::vec & x) {return utils::log1pexp(x);}
 31 | 
 32 | //' @keywords internal
 33 | // [[Rcpp::export("cpp.utils.log1mexp")]]
 34 | arma::vec cpp_log1mexp (const arma::vec & x) {return utils::log1mexp(x);}
 35 | 
 36 | //' @keywords internal
 37 | // [[Rcpp::export("cpp.utils.logit")]]
 38 | arma::vec cpp_logit (const arma::vec & x) {return utils::logit(x);}
 39 | 
 40 | //' @keywords internal
 41 | // [[Rcpp::export("cpp.utils.expit")]]
 42 | arma::vec cpp_expit (const arma::vec & x) {return utils::expit(x);}
 43 | 
 44 | //' @keywords internal
 45 | // [[Rcpp::export("cpp.utils.expit2")]]
 46 | arma::vec cpp_expit2 (const arma::vec & x) {return utils::expit2(x);}
 47 | 
 48 | //' @keywords internal
 49 | // [[Rcpp::export("cpp.utils.expitn")]]
 50 | arma::vec cpp_expitn (const arma::vec & x, double n = 1) {return utils::expitn(x, n);}
 51 | 
 52 | //' @keywords internal
 53 | // [[Rcpp::export("cpp.utils.cloglog")]]
 54 | arma::vec cpp_cloglog (const arma::vec & x) {return utils::cloglog(x);}
 55 | 
 56 | //' @keywords internal
 57 | // [[Rcpp::export("cpp.utils.cexpexp")]]
 58 | arma::vec cpp_cexpexp (const arma::vec & x) {return utils::cexpexp(x);}
 59 | 
 60 | //' @keywords internal
 61 | // [[Rcpp::export("cpp.utils.loglog")]]
 62 | arma::vec cpp_loglog (const arma::vec & x) {return utils::loglog(x);}
 63 | 
 64 | //' @keywords internal
 65 | // [[Rcpp::export("cpp.utils.expexp")]]
 66 | arma::vec cpp_expexp (const arma::vec & x) {return utils::expexp(x);}
 67 | 
 68 | //' @keywords internal
 69 | // [[Rcpp::export("cpp.utils.pdfn")]]
 70 | arma::vec cpp_pdfn (const arma::vec & x) {return utils::pdfn(x);}
 71 | 
 72 | //' @keywords internal
 73 | // [[Rcpp::export("cpp.utils.cdfn")]]
 74 | arma::vec cpp_cdfn (const arma::vec & x) {return utils::cdfn(x);}
 75 | 
 76 | //' @keywords internal
 77 | // [[Rcpp::export("cpp.utils.logpdfn")]]
 78 | arma::vec cpp_logpdfn (const arma::vec & x) {return utils::logpdfn(x);}
 79 | 
 80 | //' @keywords internal
 81 | // [[Rcpp::export("cpp.utils.logcdfn")]]
 82 | arma::vec cpp_logcdfn (const arma::vec & x) {return utils::logcdfn(x);}
 83 | 
 84 | //' @keywords internal
 85 | // [[Rcpp::export("cpp.utils.gamma")]]
 86 | arma::vec cpp_gamma (const arma::vec & x) {return utils::gamma(x);}
 87 | 
 88 | //' @keywords internal
 89 | // [[Rcpp::export("cpp.utils.loggamma")]]
 90 | arma::vec cpp_loggamma (const arma::vec & x) {return utils::loggamma(x);}
 91 | 
 92 | //' @keywords internal
 93 | // [[Rcpp::export("cpp.utils.digamma")]]
 94 | arma::vec cpp_digamma (const arma::vec & x) {return utils::digamma(x);}
 95 | 
 96 | //' @keywords internal
 97 | // [[Rcpp::export("cpp.utils.trigamma")]]
 98 | arma::vec cpp_trigamma (const arma::vec & x) {return utils::trigamma(x);}
 99 | 
100 | //' @keywords internal
101 | // [[Rcpp::export("cpp.utils.beta")]]
102 | arma::vec cpp_beta (const arma::vec & x, const arma::vec & y) {return utils::beta(x, y);}
103 | 
104 | //' @keywords internal
105 | // [[Rcpp::export("cpp.utils.logbeta")]]
106 | arma::vec cpp_logbeta (const arma::vec & x, const arma::vec & y) {return utils::logbeta(x, y);}
107 | 
108 | //' @keywords internal
109 | // [[Rcpp::export("cpp.utils.dibeta")]]
110 | arma::vec cpp_dibeta (const arma::vec & x, const arma::vec & y) {return utils::dibeta(x, y);}
111 | 
112 | //' @keywords internal
113 | // [[Rcpp::export("cpp.utils.tribeta")]]
114 | arma::vec cpp_tribeta (const arma::vec & x, const arma::vec & y) {return utils::tribeta(x, y);}
115 | 
116 | //' @keywords internal
117 | // [[Rcpp::export("cpp.utils.hinge")]]
118 | arma::vec cpp_hinge (const arma::vec & x) {return utils::hinge(x);}
119 | 
120 | //' @keywords internal
121 | // [[Rcpp::export("cpp.utils.dirac")]]
122 | arma::vec cpp_dirac (const arma::vec & x, double a = 0) {return utils::dirac(x, a);}
123 | 
124 | //' @keywords internal
125 | // [[Rcpp::export("cpp.utils.step")]]
126 | arma::vec cpp_step (const arma::vec & x, double a = 0, bool lower = true) {return utils::step(x, a, lower);}
127 | 
128 | //' @keywords internal
129 | // [[Rcpp::export("cpp.utils.vech")]]
130 | arma::vec cpp_vech(const arma::mat & A) {return utils::vech(A);}
131 | 


--------------------------------------------------------------------------------