├── .Rbuildignore
├── .Renviron
├── .circleci
    └── config.yml
├── .github
    ├── .gitignore
    └── workflows
    │   ├── R-CMD-check.yaml
    │   └── rhub.yaml
├── .gitignore
├── .travis.yml
├── DESCRIPTION
├── LICENSE
├── NAMESPACE
├── NOTES.txt
├── R
    ├── RcppExports.R
    ├── annotation_heatmap.R
    ├── betanmf.R
    ├── ccd.R
    ├── colors.R
    ├── datasim.R
    ├── de_analysis.R
    ├── embedding_plots.R
    ├── embeddings.R
    ├── fit_multinom_model.R
    ├── fit_poisson_nmf.R
    ├── fit_topic_model.R
    ├── homer.R
    ├── init_poisson_nmf.R
    ├── lfc.R
    ├── likelihood.R
    ├── merge_topics.R
    ├── misc.R
    ├── mixem.R
    ├── multinom2poisson.R
    ├── newsgroups.R
    ├── other_plots.R
    ├── pbmc_facs.R
    ├── pnmfem.R
    ├── poismix.R
    ├── poisson.R
    ├── poisson2multinom.R
    ├── predict.R
    ├── scd.R
    ├── select.R
    ├── structure_plot.R
    ├── summary.R
    ├── sysdata.rda
    ├── topicscore.R
    ├── verify_args.R
    ├── volcano_plots.R
    └── zzz.R
├── README.md
├── TODO.txt
├── _pkgdown.yml
├── appveyor.yml
├── data
    ├── newsgroups.RData
    └── pbmc_facs.RData
├── docs
    ├── 404.html
    ├── LICENSE-text.html
    ├── articles
    │   ├── index.html
    │   ├── relationship.html
    │   ├── relationship_files
    │   │   └── figure-html
    │   │   │   ├── loglik-poisson-vs-multinom-1.png
    │   │   │   ├── multinom2poisson-1-1.png
    │   │   │   ├── multinom2poisson-2-1.png
    │   │   │   └── plot-loglik-1.png
    │   ├── single_cell_rnaseq_basic.html
    │   ├── single_cell_rnaseq_basic_files
    │   │   └── figure-html
    │   │   │   ├── structure-plot-test-1.png
    │   │   │   ├── structure-plot-with-celltype-labels-1.png
    │   │   │   ├── volcano-plot-b-1.png
    │   │   │   ├── volcano-plot-bcells-1.png
    │   │   │   ├── volcano-plot-nk-1.png
    │   │   │   ├── volcano-plot-t-1.png
    │   │   │   └── volcano-plot-tcells-1.png
    │   ├── single_cell_rnaseq_practical.html
    │   ├── single_cell_rnaseq_practical_files
    │   │   ├── crosstalk-1.0.0
    │   │   │   ├── css
    │   │   │   │   └── crosstalk.css
    │   │   │   └── js
    │   │   │   │   ├── crosstalk.js
    │   │   │   │   ├── crosstalk.js.map
    │   │   │   │   ├── crosstalk.min.js
    │   │   │   │   └── crosstalk.min.js.map
    │   │   ├── figure-html
    │   │   │   ├── loglik-2-1.png
    │   │   │   ├── loglik-3-1.png
    │   │   │   ├── pca-plot-1-1.png
    │   │   │   ├── pca-plot-2-1.png
    │   │   │   ├── plot-loglik-1.png
    │   │   │   ├── structure-plot-by-cluster-1-1.png
    │   │   │   ├── structure-plot-by-cluster-2-1.png
    │   │   │   ├── structure-plot-by-cluster-3-1.png
    │   │   │   ├── structure-plot-without-labels-1.png
    │   │   │   ├── volcano-plot-cd4-1.png
    │   │   │   ├── volcano-plot-cd8-1.png
    │   │   │   └── volcano-plot-t-1.png
    │   │   ├── htmlwidgets-1.5.1
    │   │   │   └── htmlwidgets.js
    │   │   ├── jquery-1.11.3
    │   │   │   ├── jquery-AUTHORS.txt
    │   │   │   ├── jquery.js
    │   │   │   ├── jquery.min.js
    │   │   │   └── jquery.min.map
    │   │   ├── plotly-binding-4.9.2
    │   │   │   └── plotly.js
    │   │   ├── plotly-htmlwidgets-css-1.52.2
    │   │   │   └── plotly-htmlwidgets.css
    │   │   ├── plotly-main-1.52.2
    │   │   │   └── plotly-latest.min.js
    │   │   └── typedarray-0.1
    │   │   │   └── typedarray.min.js
    │   ├── topics_vs_clusters.html
    │   ├── topics_vs_clusters_files
    │   │   └── figure-html
    │   │   │   ├── pca-from-loadings-1.png
    │   │   │   ├── plot-topic-proportions-1.png
    │   │   │   ├── tsne-from-counts-1-1.png
    │   │   │   ├── tsne-from-counts-2-1.png
    │   │   │   └── tsne-from-loadings-1.png
    │   └── volcano_plot_t_cells.html
    ├── authors.html
    ├── bootstrap-toc.css
    ├── bootstrap-toc.js
    ├── docsearch.css
    ├── docsearch.js
    ├── index.html
    ├── link.svg
    ├── pbmc_de_analysis.html
    ├── pbmc_facs.RData
    ├── pkgdown.css
    ├── pkgdown.js
    ├── pkgdown.yml
    ├── reference
    │   ├── Rplot001.png
    │   ├── Rplot002.png
    │   ├── Rplot003.png
    │   ├── Rplot004.png
    │   ├── Rplot005.png
    │   ├── Rplot006.png
    │   ├── compare_fits.html
    │   ├── compare_poisson_nmf_fits.html
    │   ├── de_analysis-1.png
    │   ├── de_analysis.html
    │   ├── diff_count_analysis.html
    │   ├── embedding_plots.html
    │   ├── embeddings_from_topics-1.png
    │   ├── embeddings_from_topics-2.png
    │   ├── embeddings_from_topics-3.png
    │   ├── embeddings_from_topics-4.png
    │   ├── embeddings_from_topics-5.png
    │   ├── embeddings_from_topics-6.png
    │   ├── embeddings_from_topics.html
    │   ├── fit_multinom_model.html
    │   ├── fit_poisson_nmf-1.png
    │   ├── fit_poisson_nmf-2.png
    │   ├── fit_poisson_nmf.html
    │   ├── fit_topic_model.html
    │   ├── index.html
    │   ├── likelihood.html
    │   ├── loadings_plot.html
    │   ├── merge_topics.html
    │   ├── multinom2poisson.html
    │   ├── pbmc_4k.html
    │   ├── pbmc_facs.html
    │   ├── pca_plot.html
    │   ├── plot_loglik_vs_rank.html
    │   ├── plot_progress.html
    │   ├── plot_progress_poisson_nmf.html
    │   ├── poisson2multinom.html
    │   ├── predict-1.png
    │   ├── predict-2.png
    │   ├── predict-3.png
    │   ├── predict-4.png
    │   ├── predict.html
    │   ├── run_homer.html
    │   ├── select_loadings.html
    │   ├── simulate_count_data.html
    │   ├── simulate_gene_data.html
    │   ├── simulate_toy_gene_data.html
    │   ├── structure_plot.html
    │   ├── summary.poisson_nmf_fit.html
    │   ├── tsne_from_topics.html
    │   ├── tsne_plot.html
    │   └── volcano_plot.html
    └── sitemap.xml
├── inst
    ├── CITATION
    ├── COPYRIGHTS
    ├── code
    │   ├── altsqp_original.R
    │   ├── check_map.R
    │   ├── check_poisson_hessian.R
    │   ├── compile_newsgroups_results_for_annotation.R
    │   ├── compute_newsgroups_topics.R
    │   ├── droplet.R
    │   ├── lda.R
    │   ├── multinom_demo.R
    │   ├── pbmc_de_analysis.Rmd
    │   ├── pbmc_demo.R
    │   ├── plsi.R
    │   ├── pois_vs_binom.R
    │   ├── pois_vs_multinom.R
    │   ├── poisson_demo.R
    │   ├── postfit_motif_analysis_Buenrostro2018.R
    │   ├── pseudocounts.R
    │   ├── scd.R
    │   ├── simulate_data_for_sfa.R
    │   ├── test_hpd.R
    │   ├── test_poisson_fit.R
    │   └── test_poisson_fit_basic.R
    └── datafiles
    │   ├── newsgroups.RData
    │   └── newsgroups_topics.RData
├── man
    ├── annotation_heatmap.Rd
    ├── compare_fits.Rd
    ├── de_analysis.Rd
    ├── embedding_plots.Rd
    ├── embeddings_from_topics.Rd
    ├── fit_multinom_model.Rd
    ├── fit_poisson_nmf.Rd
    ├── fit_topic_model.Rd
    ├── likelihood.Rd
    ├── loadings_plot.Rd
    ├── merge_topics.Rd
    ├── multinom2poisson.Rd
    ├── newsgroups.Rd
    ├── pbmc_facs.Rd
    ├── plot_loglik_vs_rank.Rd
    ├── plot_progress.Rd
    ├── poisson2multinom.Rd
    ├── predict.Rd
    ├── run_homer.Rd
    ├── select_loadings.Rd
    ├── simulate_count_data.Rd
    ├── simulate_gene_data.Rd
    ├── simulate_toy_gene_data.Rd
    ├── structure_plot.Rd
    ├── summary.poisson_nmf_fit.Rd
    └── volcano_plot.Rd
├── src
    ├── Makevars
    ├── Makevars.win
    ├── RcppExports.cpp
    ├── ccd.cpp
    ├── cost.cpp
    ├── cost.h
    ├── misc.cpp
    ├── misc.h
    ├── mixem.cpp
    ├── mixem.h
    ├── pnmfem.cpp
    ├── poismix.cpp
    ├── poismix.h
    ├── poisson.cpp
    └── scd.cpp
├── tests
    ├── testthat.R
    └── testthat
    │   ├── helper_functions.R
    │   ├── test_de_analysis.R
    │   ├── test_fit_multinom_model.R
    │   ├── test_fit_poisson_nmf.R
    │   ├── test_fit_topic_model.R
    │   ├── test_likelihood.R
    │   ├── test_mixem.R
    │   ├── test_plots.R
    │   ├── test_poismix.R
    │   ├── test_poisson2multinom.R
    │   ├── test_select.R
    │   └── test_summary.R
└── vignettes
    ├── relationship.Rmd
    ├── single_cell_rnaseq_basic.Rmd
    ├── single_cell_rnaseq_practical.Rmd
    └── topics_vs_clusters.Rmd


/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | ^TODO\.txt$
 2 | ^NOTES\.txt$
 3 | ^docs$
 4 | ^_pkgdown\.yml$
 5 | ^\.travis\.yml$
 6 | ^appveyor\.yml$
 7 | ^\.circleci$
 8 | ^\.circleci/config\.yml$
 9 | ^\.github$
10 | ^inst/code$
11 | ^vignettes/single\_cell\_rnaseq\_practical\.Rmd$
12 | ^\.Renviron$
13 | 


--------------------------------------------------------------------------------
/.Renviron:
--------------------------------------------------------------------------------
1 | R_LIBS_USER = ~/R_libs
2 | 


--------------------------------------------------------------------------------
/.circleci/config.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | jobs:
 3 |   build:
 4 |     docker:
 5 |       - image: rocker/verse:latest
 6 |     environment:
 7 |       R_LIBS: ~/R/Library
 8 |       R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
 9 |       _R_CHECK_FORCE_SUGGESTS_: false
10 |       NOT_CRAN: true
11 |     steps:
12 |       - restore_cache:
13 |           keys:
14 |             - r-pkg-cache-{{ arch }}-{{ .Branch }}
15 |             - r-pkg-cache-{{ arch }}-
16 |       - checkout
17 |       - run:
18 |           name: Install package dependencies
19 |           command: |
20 |             mkdir -p ~/R/Library
21 |             Rscript -e 'update.packages("Matrix")'
22 |             Rscript -e 'install.packages(c("devtools","remotes","quadprog","gtools","irlba","Rtsne","uwot","dplyr","rlang","tidyr","Rcpp","RcppArmadillo","RcppParallel","progress","pbapply","ggplot2","ggrepel","cowplot","plotly","htmlwidgets","testthat","Ternary","RhpcBLASctl"))'
23 |             Rscript -e 'devtools::install_github("slowkow/ggrepel",upgrade="never",force=TRUE)'
24 |             Rscript -e 'devtools::install_github("stephens999/ashr",upgrade="never",force=TRUE)'
25 |             Rscript -e 'devtools::install_github("linxihui/NNLM",upgrade="never",force=TRUE)'
26 |       - run:
27 |           name: Session information and installed package versions
28 |           command: |
29 |             Rscript -e 'sessionInfo()'
30 |             Rscript -e 'installed.packages()[, c("Package", "Version")]'
31 |             Rscript -e 'rmarkdown::pandoc_version()'
32 |       - run:
33 |           name: Build package
34 |           command: R CMD build --no-build-vignettes --no-manual .
35 |       - run:
36 |           name: Check package
37 |           no_output_timeout: 55m
38 |           command: R CMD check --ignore-vignettes --no-manual --no-examples *tar.gz
39 |       - store_artifacts:
40 |           path: fastTopics.Rcheck/
41 |       - save_cache:
42 |           key: r-pkg-cache-{{ arch }}-{{ .Branch }}
43 |           paths:
44 |             - "~/R/Library"
45 | 


--------------------------------------------------------------------------------
/.github/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | 


--------------------------------------------------------------------------------
/.github/workflows/R-CMD-check.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | on:
 3 |   push:
 4 |     branches: [main, master]
 5 |   pull_request:
 6 |     branches: [main, master]
 7 | 
 8 | name: R-CMD-check
 9 | 
10 | jobs:
11 |   R-CMD-check:
12 |     runs-on: macos-latest
13 |     env:
14 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
15 |       R_KEEP_PKG_SOURCE: yes
16 |       _R_CHECK_FORCE_SUGGESTS_: false
17 |     steps:
18 |       - uses: actions/checkout@v3
19 |       - uses: r-lib/actions/setup-r@v2
20 |         with:
21 |           use-public-rspm: true
22 |           
23 |       - name: Install dependencies
24 |         run: |
25 |           update.packages("Matrix")
26 |           install.packages(c("remotes","rcmdcheck"))
27 |           install.packages(c("devtools","remotes","quadprog","gtools"))
28 |           install.packages(c("irlba","Rtsne","uwot","dplyr","tidyr","rlang"))
29 |           install.packages(c("Rcpp","RcppArmadillo","RcppParallel","pbapply"))
30 |           install.packages(c("progress","ggplot2","ggrepel","cowplot"))
31 |           install.packages(c("plotly","htmlwidgets","testthat","Ternary"))
32 |           install.packages("RhpcBLASctl")
33 |           remotes::install_github("slowkow/ggrepel",upgrade="never",force=TRUE)
34 |           remotes::install_github("stephens999/ashr",upgrade="never",force=TRUE)
35 |           remotes::install_github("linxihui/NNLM",upgrade="never",force=TRUE)
36 |         shell: Rscript {0}
37 | 
38 |       - name: Check
39 |         run: |
40 |           options(crayon.enabled = TRUE)
41 |           rcmdcheck::rcmdcheck(args = c("--no-manual","--ignore-vignettes"),
42 |                                error_on = "error",build_args = "--no-build-vignettes")
43 |         shell: Rscript {0}
44 | 
45 | 
46 | 


--------------------------------------------------------------------------------
/.github/workflows/rhub.yaml:
--------------------------------------------------------------------------------
 1 | # R-hub's generic GitHub Actions workflow file. It's canonical location is at
 2 | # https://github.com/r-hub/actions/blob/v1/workflows/rhub.yaml
 3 | # You can update this file to a newer version using the rhub2 package:
 4 | #
 5 | # rhub::rhub_setup()
 6 | #
 7 | # It is unlikely that you need to modify this file manually.
 8 | 
 9 | name: R-hub
10 | run-name: "${{ github.event.inputs.id }}: ${{ github.event.inputs.name || format('Manually run by {0}', github.triggering_actor) }}"
11 | 
12 | on:
13 |   workflow_dispatch:
14 |     inputs:
15 |       config:
16 |         description: 'A comma separated list of R-hub platforms to use.'
17 |         type: string
18 |         default: 'linux,windows,macos'
19 |       name:
20 |         description: 'Run name. You can leave this empty now.'
21 |         type: string
22 |       id:
23 |         description: 'Unique ID. You can leave this empty now.'
24 |         type: string
25 | 
26 | jobs:
27 | 
28 |   setup:
29 |     runs-on: ubuntu-latest
30 |     outputs:
31 |       containers: ${{ steps.rhub-setup.outputs.containers }}
32 |       platforms: ${{ steps.rhub-setup.outputs.platforms }}
33 | 
34 |     steps:
35 |     # NO NEED TO CHECKOUT HERE
36 |     - uses: r-hub/actions/setup@v1
37 |       with:
38 |         config: ${{ github.event.inputs.config }}
39 |       id: rhub-setup
40 | 
41 |   linux-containers:
42 |     needs: setup
43 |     if: ${{ needs.setup.outputs.containers != '[]' }}
44 |     runs-on: ubuntu-latest
45 |     name: ${{ matrix.config.label }}
46 |     strategy:
47 |       fail-fast: false
48 |       matrix:
49 |         config: ${{ fromJson(needs.setup.outputs.containers) }}
50 |     container:
51 |       image: ${{ matrix.config.container }}
52 | 
53 |     steps:
54 |       - uses: r-hub/actions/checkout@v1
55 |       - uses: r-hub/actions/platform-info@v1
56 |         with:
57 |           token: ${{ secrets.RHUB_TOKEN }}
58 |           job-config: ${{ matrix.config.job-config }}
59 |       - uses: r-hub/actions/setup-deps@v1
60 |         with:
61 |           token: ${{ secrets.RHUB_TOKEN }}
62 |           job-config: ${{ matrix.config.job-config }}
63 |       - uses: r-hub/actions/run-check@v1
64 |         with:
65 |           token: ${{ secrets.RHUB_TOKEN }}
66 |           job-config: ${{ matrix.config.job-config }}
67 | 
68 |   other-platforms:
69 |     needs: setup
70 |     if: ${{ needs.setup.outputs.platforms != '[]' }}
71 |     runs-on: ${{ matrix.config.os }}
72 |     name: ${{ matrix.config.label }}
73 |     strategy:
74 |       fail-fast: false
75 |       matrix:
76 |         config: ${{ fromJson(needs.setup.outputs.platforms) }}
77 | 
78 |     steps:
79 |       - uses: r-hub/actions/checkout@v1
80 |       - uses: r-hub/actions/setup-r@v1
81 |         with:
82 |           job-config: ${{ matrix.config.job-config }}
83 |           token: ${{ secrets.RHUB_TOKEN }}
84 |       - uses: r-hub/actions/platform-info@v1
85 |         with:
86 |           token: ${{ secrets.RHUB_TOKEN }}
87 |           job-config: ${{ matrix.config.job-config }}
88 |       - uses: r-hub/actions/setup-deps@v1
89 |         with:
90 |           job-config: ${{ matrix.config.job-config }}
91 |           token: ${{ secrets.RHUB_TOKEN }}
92 |       - uses: r-hub/actions/run-check@v1
93 |         with:
94 |           job-config: ${{ matrix.config.job-config }}
95 |           token: ${{ secrets.RHUB_TOKEN }}
96 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | src/*.o
 2 | src/*.so
 3 | inst/derivations/algorithms/algorithms.aux
 4 | inst/derivations/algorithms/algorithms.bbl
 5 | inst/derivations/algorithms/algorithms.blg
 6 | inst/derivations/algorithms/algorithms.log
 7 | inst/derivations/algorithms/algorithms.out
 8 | inst/derivations/algorithms/algorithms.thm
 9 | inst/derivations/altsqp/altsqp.aux
10 | inst/derivations/altsqp/altsqp.bbl
11 | inst/derivations/altsqp/altsqp.blg
12 | inst/derivations/altsqp/altsqp.log
13 | inst/derivations/altsqp/altsqp.out
14 | inst/derivations/altsqp/altsqp.thm
15 | inst/derivations/diffcount/diffcount.aux
16 | inst/derivations/diffcount/diffcount.bbl
17 | inst/derivations/diffcount/diffcount.blg
18 | inst/derivations/diffcount/diffcount.log
19 | inst/derivations/diffcount/diffcount.out
20 | inst/derivations/diffcount/diffcount.thm
21 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: r
 2 | cache: packages
 3 | latex: false
 4 | warnings_are_errors: false
 5 | r_build_args: 
 6 | r_check_args: --as-cran
 7 | 
 8 | # This is the minimal set of R packages needed to run "R CMD check" on
 9 | # the package.
10 | install:
11 |   - R -e 'install.packages(c("devtools","covr","testthat","knitr","rmarkdown","quadprog","gtools","irlba","Rtsne","uwot","dplyr","rlang","tidyr","Rcpp","RcppArmadillo","RcppParallel","progress","pbapply","ggplot2","ggrepel","cowplot","plotly","htmlwidgets","Ternary","RhpcBLASctl"))'
12 |   - R -e 'devtools::install_github("linxihui/NNLM",upgrade="never",force=TRUE)'
13 |   - R -e 'devtools::install_github("slowkow/ggrepel",upgrade="never",force=TRUE)'
14 |   - R -e 'devtools::install_github("stephens999/ashr",upgrade="never",force=TRUE)'
15 | 
16 | env:
17 |   global:
18 |     - _R_CHECK_FORCE_SUGGESTS_: false
19 |     - R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
20 | 
21 | after_success:
22 |   - Rscript -e 'library(covr); codecov()'
23 | 
24 | branches:
25 |   only:
26 |     - master
27 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Encoding: UTF-8
 2 | Type: Package
 3 | Package: fastTopics
 4 | Version: 0.7-25
 5 | Date: 2025-06-03
 6 | Title: Fast Algorithms for Fitting Topic Models and Non-Negative
 7 |     Matrix Factorizations to Count Data
 8 | Authors@R: c(person("Peter","Carbonetto",role=c("aut","cre"),
 9 |                     email="peter.carbonetto@gmail.com"),
10 |              person("Kevin","Luo",role="aut"),
11 |              person("Kushal","Dey",role="aut"),
12 |              person("Joyce","Hsiao",role="ctb"),
13 |              person("Abhishek","Sarkar",role="ctb"),
14 |              person("Anthony","Hung",role="ctb"),
15 | 	     person("Xihui","Lin",role="ctb"),
16 | 	     person("Paul C.","Boutros",role="ctb"),
17 | 	     person("Minzhe","Wang",role="ctb"),
18 | 	     person("Tracy","Ke",role="ctb"),
19 | 	     person("Eric","Weine",role="ctb"),
20 |              person("Matthew","Stephens",role="aut"))
21 | URL: https://stephenslab.github.io/fastTopics/, https://github.com/stephenslab/fastTopics
22 | BugReports: https://github.com/stephenslab/fastTopics/issues
23 | Depends: R (>= 3.3.0)
24 | Description: Implements fast, scalable optimization algorithms for
25 |     fitting topic models ("grade of membership" models) and
26 |     non-negative matrix factorizations to count data. The methods
27 |     exploit the special relationship between the multinomial topic
28 |     model (also, "probabilistic latent semantic indexing") and Poisson
29 |     non-negative matrix factorization. The package provides tools to
30 |     compare, annotate and visualize model fits, including functions to
31 |     efficiently create "structure plots" and identify key features in
32 |     topics. The 'fastTopics' package is a successor to the
33 |     'CountClust' package. For more information, see
34 |     <doi:10.48550/arXiv.2105.13440> and
35 |     <doi:10.1186/s13059-023-03067-9>. Please also see the GitHub
36 |     repository for additional vignettes not included in the package on
37 |     CRAN.
38 | License: BSD_2_clause + file LICENSE
39 | Copyright: inst/COPYRIGHTS
40 | SystemRequirements: GNU make
41 | Imports:
42 |     graphics,
43 |     utils,
44 |     methods,
45 |     stats,
46 |     Matrix,
47 |     gtools,
48 |     quadprog,
49 |     irlba,
50 |     dplyr,
51 |     Rtsne,
52 |     uwot,
53 |     ashr,
54 |     Rcpp (>= 1.0.12),
55 |     RcppParallel (>= 5.1.7),
56 |     RhpcBLASctl,
57 |     parallel,
58 |     progress,
59 |     pbapply,
60 |     ggplot2 (>= 3.3.0),
61 |     ggrepel (>= 0.9.0),
62 |     cowplot,
63 |     plotly,
64 |     reshape2,
65 |     htmlwidgets
66 | Suggests:
67 |     Ternary,
68 |     testthat,
69 |     knitr,
70 |     rmarkdown
71 | LinkingTo:
72 |     Rcpp,
73 |     RcppParallel,
74 |     RcppArmadillo
75 | LazyData: true
76 | LazyDataCompression: xz
77 | NeedsCompilation: yes
78 | RoxygenNote: 7.3.1
79 | VignetteBuilder: knitr
80 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | YEAR: 2019-2025
2 | COPYRIGHT HOLDER: Peter Carbonetto and Matthew Stephens
3 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
  1 | # Generated by roxygen2: do not edit by hand
  2 | 
  3 | S3method(plot,multinom_topic_model_fit)
  4 | S3method(plot,poisson_nmf_fit)
  5 | S3method(plot,topic_model_de_analysis)
  6 | S3method(predict,multinom_topic_model_fit)
  7 | S3method(predict,poisson_nmf_fit)
  8 | S3method(print,summary.multinom_topic_model_fit)
  9 | S3method(print,summary.poisson_nmf_fit)
 10 | S3method(select,multinom_topic_model_fit)
 11 | S3method(select,poisson_nmf_fit)
 12 | S3method(summary,multinom_topic_model_fit)
 13 | S3method(summary,poisson_nmf_fit)
 14 | export(annotation_heatmap)
 15 | export(compare_fits)
 16 | export(cost)
 17 | export(de_analysis)
 18 | export(de_analysis_control_default)
 19 | export(deviance_poisson_nmf)
 20 | export(embedding_plot_2d)
 21 | export(embedding_plot_2d_ggplot_call)
 22 | export(fit_multinom_model)
 23 | export(fit_poisson_nmf)
 24 | export(fit_poisson_nmf_control_default)
 25 | export(fit_topic_model)
 26 | export(init_poisson_nmf)
 27 | export(init_poisson_nmf_from_clustering)
 28 | export(loadings_plot)
 29 | export(loadings_plot_ggplot_call)
 30 | export(loglik_multinom_topic_model)
 31 | export(loglik_poisson_nmf)
 32 | export(loglik_vs_rank_ggplot_call)
 33 | export(merge_topics)
 34 | export(multinom2poisson)
 35 | export(pca_from_topics)
 36 | export(pca_hexbin_plot)
 37 | export(pca_hexbin_plot_ggplot_call)
 38 | export(pca_plot)
 39 | export(plot_loglik_vs_rank)
 40 | export(plot_progress)
 41 | export(poisson2multinom)
 42 | export(run_homer)
 43 | export(select_loadings)
 44 | export(simulate_count_data)
 45 | export(simulate_multinom_gene_data)
 46 | export(simulate_poisson_gene_data)
 47 | export(simulate_toy_gene_data)
 48 | export(structure_plot)
 49 | export(structure_plot_default_embed_method)
 50 | export(structure_plot_ggplot_call)
 51 | export(tsne_from_topics)
 52 | export(tsne_plot)
 53 | export(umap_from_topics)
 54 | export(umap_plot)
 55 | export(volcano_plot)
 56 | export(volcano_plot_do_label_default)
 57 | export(volcano_plot_ggplot_call)
 58 | export(volcano_plot_ly_call)
 59 | export(volcano_plotly)
 60 | import(Matrix)
 61 | importFrom(Matrix,colMeans)
 62 | importFrom(Matrix,colSums)
 63 | importFrom(Matrix,rowMeans)
 64 | importFrom(Matrix,rowSums)
 65 | importFrom(Matrix,sparseMatrix)
 66 | importFrom(Rcpp,evalCpp)
 67 | importFrom(RcppParallel,RcppParallelLibs)
 68 | importFrom(RcppParallel,defaultNumThreads)
 69 | importFrom(RcppParallel,setThreadOptions)
 70 | importFrom(RhpcBLASctl,blas_get_num_procs)
 71 | importFrom(RhpcBLASctl,blas_set_num_threads)
 72 | importFrom(Rtsne,Rtsne)
 73 | importFrom(ashr,ash)
 74 | importFrom(cowplot,plot_grid)
 75 | importFrom(cowplot,theme_cowplot)
 76 | importFrom(dplyr,select)
 77 | importFrom(ggplot2,aes)
 78 | importFrom(ggplot2,aes_q)
 79 | importFrom(ggplot2,aes_string)
 80 | importFrom(ggplot2,after_stat)
 81 | importFrom(ggplot2,element_blank)
 82 | importFrom(ggplot2,element_text)
 83 | importFrom(ggplot2,geom_boxplot)
 84 | importFrom(ggplot2,geom_col)
 85 | importFrom(ggplot2,geom_line)
 86 | importFrom(ggplot2,geom_point)
 87 | importFrom(ggplot2,ggplot)
 88 | importFrom(ggplot2,guide_legend)
 89 | importFrom(ggplot2,guides)
 90 | importFrom(ggplot2,labs)
 91 | importFrom(ggplot2,scale_color_manual)
 92 | importFrom(ggplot2,scale_fill_gradient2)
 93 | importFrom(ggplot2,scale_fill_gradientn)
 94 | importFrom(ggplot2,scale_fill_manual)
 95 | importFrom(ggplot2,scale_linetype_manual)
 96 | importFrom(ggplot2,scale_shape_manual)
 97 | importFrom(ggplot2,scale_size)
 98 | importFrom(ggplot2,scale_size_manual)
 99 | importFrom(ggplot2,scale_x_continuous)
100 | importFrom(ggplot2,scale_y_continuous)
101 | importFrom(ggplot2,stat_bin_hex)
102 | importFrom(ggplot2,theme)
103 | importFrom(ggplot2,waiver)
104 | importFrom(ggrepel,geom_text_repel)
105 | importFrom(graphics,plot)
106 | importFrom(gtools,rdirichlet)
107 | importFrom(htmlwidgets,saveWidget)
108 | importFrom(irlba,irlba)
109 | importFrom(methods,as)
110 | importFrom(parallel,splitIndices)
111 | importFrom(pbapply,pblapply)
112 | importFrom(pbapply,pboptions)
113 | importFrom(plotly,hide_colorbar)
114 | importFrom(plotly,layout)
115 | importFrom(plotly,plot_ly)
116 | importFrom(progress,progress_bar)
117 | importFrom(quadprog,solve.QP)
118 | importFrom(reshape2,melt)
119 | importFrom(stats,dpois)
120 | importFrom(stats,formula)
121 | importFrom(stats,glm)
122 | importFrom(stats,glm.control)
123 | importFrom(stats,kmeans)
124 | importFrom(stats,pnorm)
125 | importFrom(stats,poisson)
126 | importFrom(stats,prcomp)
127 | importFrom(stats,predict)
128 | importFrom(stats,quantile)
129 | importFrom(stats,rmultinom)
130 | importFrom(stats,rnorm)
131 | importFrom(stats,rpois)
132 | importFrom(stats,runif)
133 | importFrom(stats,summary.glm)
134 | importFrom(utils,combn)
135 | importFrom(utils,modifyList)
136 | importFrom(utils,read.table)
137 | importFrom(utils,write.table)
138 | importFrom(uwot,umap)
139 | useDynLib(fastTopics)
140 | 


--------------------------------------------------------------------------------
/NOTES.txt:
--------------------------------------------------------------------------------
1 | # For Matrix version 1.4-2.
2 | export R_BUILD_ENVIRON=$HOME/git/fastTopics/.Renviron
3 | export R_CHECK_ENVIRON=$HOME/git/fastTopics/.Renviron
4 | R CMD build fastTopics
5 | R CMD check fastTopics_0.6.138.tar.gz
6 | 


--------------------------------------------------------------------------------
/R/RcppExports.R:
--------------------------------------------------------------------------------
  1 | # Generated by using Rcpp::compileAttributes() -> do not edit by hand
  2 | # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
  3 | 
  4 | ccd_update_factors_rcpp <- function(V, W, H, e) {
  5 |     .Call('_fastTopics_ccd_update_factors_rcpp', PACKAGE = 'fastTopics', V, W, H, e)
  6 | }
  7 | 
  8 | ccd_update_factors_sparse_rcpp <- function(V, W, H, e) {
  9 |     .Call('_fastTopics_ccd_update_factors_sparse_rcpp', PACKAGE = 'fastTopics', V, W, H, e)
 10 | }
 11 | 
 12 | ccd_update_factors_parallel_rcpp <- function(V, W, H, e) {
 13 |     .Call('_fastTopics_ccd_update_factors_parallel_rcpp', PACKAGE = 'fastTopics', V, W, H, e)
 14 | }
 15 | 
 16 | ccd_update_factors_sparse_parallel_rcpp <- function(V, W, H, e) {
 17 |     .Call('_fastTopics_ccd_update_factors_sparse_parallel_rcpp', PACKAGE = 'fastTopics', V, W, H, e)
 18 | }
 19 | 
 20 | cost_rcpp <- function(X, A, B, e, poisson) {
 21 |     .Call('_fastTopics_cost_rcpp', PACKAGE = 'fastTopics', X, A, B, e, poisson)
 22 | }
 23 | 
 24 | cost_sparse_rcpp <- function(X, A, B, e, poisson) {
 25 |     .Call('_fastTopics_cost_sparse_rcpp', PACKAGE = 'fastTopics', X, A, B, e, poisson)
 26 | }
 27 | 
 28 | le_diff_rcpp <- function(X) {
 29 |     .Call('_fastTopics_le_diff_rcpp', PACKAGE = 'fastTopics', X)
 30 | }
 31 | 
 32 | x_over_crossprod_rcpp <- function(i, j, x, A, B, e) {
 33 |     .Call('_fastTopics_x_over_crossprod_rcpp', PACKAGE = 'fastTopics', i, j, x, A, B, e)
 34 | }
 35 | 
 36 | mixem_rcpp <- function(L, w, x0, numiter) {
 37 |     .Call('_fastTopics_mixem_rcpp', PACKAGE = 'fastTopics', L, w, x0, numiter)
 38 | }
 39 | 
 40 | pnmfem_update_factors_rcpp <- function(X, F, L, j, numiter) {
 41 |     .Call('_fastTopics_pnmfem_update_factors_rcpp', PACKAGE = 'fastTopics', X, F, L, j, numiter)
 42 | }
 43 | 
 44 | pnmfem_update_factors_sparse_rcpp <- function(X, F, L, j, numiter) {
 45 |     .Call('_fastTopics_pnmfem_update_factors_sparse_rcpp', PACKAGE = 'fastTopics', X, F, L, j, numiter)
 46 | }
 47 | 
 48 | pnmfem_update_factors_parallel_rcpp <- function(X, F, L, j, numiter) {
 49 |     .Call('_fastTopics_pnmfem_update_factors_parallel_rcpp', PACKAGE = 'fastTopics', X, F, L, j, numiter)
 50 | }
 51 | 
 52 | pnmfem_update_factors_sparse_parallel_rcpp <- function(X, F, L, j, numiter) {
 53 |     .Call('_fastTopics_pnmfem_update_factors_sparse_parallel_rcpp', PACKAGE = 'fastTopics', X, F, L, j, numiter)
 54 | }
 55 | 
 56 | poismixem_rcpp <- function(L, w, x0, numiter) {
 57 |     .Call('_fastTopics_poismixem_rcpp', PACKAGE = 'fastTopics', L, w, x0, numiter)
 58 | }
 59 | 
 60 | poismixem2_rcpp <- function(L1, w, u, x0, numiter) {
 61 |     .Call('_fastTopics_poismixem2_rcpp', PACKAGE = 'fastTopics', L1, w, u, x0, numiter)
 62 | }
 63 | 
 64 | poismixem3_rcpp <- function(L1, w, u, i, x0, numiter) {
 65 |     .Call('_fastTopics_poismixem3_rcpp', PACKAGE = 'fastTopics', L1, w, u, i, x0, numiter)
 66 | }
 67 | 
 68 | scd_kl_update_rcpp <- function(L, w, x0, numiter, e) {
 69 |     .Call('_fastTopics_scd_kl_update_rcpp', PACKAGE = 'fastTopics', L, w, x0, numiter, e)
 70 | }
 71 | 
 72 | scd_kl_update2_rcpp <- function(L, u, w, x0, numiter, e) {
 73 |     .Call('_fastTopics_scd_kl_update2_rcpp', PACKAGE = 'fastTopics', L, u, w, x0, numiter, e)
 74 | }
 75 | 
 76 | ccd_kl_update_rcpp <- function(L, w, x0, numiter, e) {
 77 |     .Call('_fastTopics_ccd_kl_update_rcpp', PACKAGE = 'fastTopics', L, w, x0, numiter, e)
 78 | }
 79 | 
 80 | ccd_kl_update2_rcpp <- function(L, u, w, x0, numiter, e) {
 81 |     .Call('_fastTopics_ccd_kl_update2_rcpp', PACKAGE = 'fastTopics', L, u, w, x0, numiter, e)
 82 | }
 83 | 
 84 | simulate_posterior_poisson_rcpp <- function(x, L, f, D, U, M, s, e) {
 85 |     .Call('_fastTopics_simulate_posterior_poisson_rcpp', PACKAGE = 'fastTopics', x, L, f, D, U, M, s, e)
 86 | }
 87 | 
 88 | simulate_posterior_poisson_sparse_rcpp <- function(x, L, w, f, D, U, M, s, e) {
 89 |     .Call('_fastTopics_simulate_posterior_poisson_sparse_rcpp', PACKAGE = 'fastTopics', x, L, w, f, D, U, M, s, e)
 90 | }
 91 | 
 92 | scd_update_factors_rcpp <- function(A, W, H, j, numiter, e) {
 93 |     .Call('_fastTopics_scd_update_factors_rcpp', PACKAGE = 'fastTopics', A, W, H, j, numiter, e)
 94 | }
 95 | 
 96 | scd_update_factors_sparse_rcpp <- function(A, W, H, j, numiter, e) {
 97 |     .Call('_fastTopics_scd_update_factors_sparse_rcpp', PACKAGE = 'fastTopics', A, W, H, j, numiter, e)
 98 | }
 99 | 
100 | scd_update_factors_parallel_rcpp <- function(A, W, H, j, numiter, e) {
101 |     .Call('_fastTopics_scd_update_factors_parallel_rcpp', PACKAGE = 'fastTopics', A, W, H, j, numiter, e)
102 | }
103 | 
104 | scd_update_factors_sparse_parallel_rcpp <- function(A, W, H, j, numiter, e) {
105 |     .Call('_fastTopics_scd_update_factors_sparse_parallel_rcpp', PACKAGE = 'fastTopics', A, W, H, j, numiter, e)
106 | }
107 | 
108 | 


--------------------------------------------------------------------------------
/R/betanmf.R:
--------------------------------------------------------------------------------
 1 | # This function implements the multiplicative update rule for the
 2 | # loadings matrix (the "activations"), A, in which the matrix X is
 3 | # approximated by the matrix product A*B. Inputs X, A and B should
 4 | # not be sparse matrices ("is.matrix" should return TRUE).
 5 | betanmf_update_loadings <- function (X, A, B)
 6 |   scale.cols(A * tcrossprod(X / (A %*% B),B),1/rowSums(B))
 7 | 
 8 | # This function implements the multiplicative update rule for the
 9 | # factors matrix (the "basis vectors"), B, in which the matrix X is
10 | # approximated by the matrix product A*B. Inputs X, A and B should not
11 | # be sparse matrices ("is.matrix" should return TRUE).
12 | betanmf_update_factors <- function (X, A, B)
13 |   B * crossprod(A,X / (A %*% B)) / colSums(A)
14 | 


--------------------------------------------------------------------------------
/R/ccd.R:
--------------------------------------------------------------------------------
 1 | # This function implements the cyclic co-ordinate descent (CCD) update
 2 | # for the factors matrix (the "basis vectors"), B, in which the matrix
 3 | # V is approximated by the matrix product W*H. Inputs W and H should
 4 | # be dense matrices ("is.matrix" should return TRUE). Input argument
 5 | # "e" is a non-negative scalar specifying the minimum value of the
 6 | # updated factors.
 7 | #
 8 | # Note that a single EM update of each factor is performed before
 9 | # running the CCD updates.
10 | #
11 | # Also note that the RcppParallel multithreading (specified by
12 | # argument "nc") will only work correctly if the number of threads is
13 | # set beforehand using RcppParallel::setThreadOptions.
14 | #
15 | #' @importFrom Rcpp evalCpp
16 | #' @importFrom RcppParallel RcppParallelLibs
17 | #'
18 | ccd_update_factors <- function (V, W, H, nc = 1, e = 1e-15) {
19 |   m <- ncol(V)
20 |   j <- 1:m
21 |   if (nc == 1) {
22 |     if (is.matrix(V)) {
23 |       H <- pnmfem_update_factors_rcpp(V,H,W,j-1,1)
24 |       H <- ccd_update_factors_rcpp(V,W,H,e)
25 |     } else if (is.sparse.matrix(V)) {
26 |       H <- pnmfem_update_factors_sparse_rcpp(V,H,W,j-1,1)
27 |       H <- ccd_update_factors_sparse_rcpp(V,W,H,e)
28 |     }
29 |   } else if (nc > 1) {
30 |     if (is.matrix(V)) {
31 |       H <- pnmfem_update_factors_parallel_rcpp(V,H,W,j-1,1)
32 |       H <- ccd_update_factors_parallel_rcpp(V,W,H,e)
33 |     } else if (is.sparse.matrix(V)) {
34 |       H <- pnmfem_update_factors_sparse_parallel_rcpp(V,H,W,j-1,1)
35 |       H <- ccd_update_factors_sparse_parallel_rcpp(V,W,H,e)
36 |     }
37 |   }  
38 |   return(H)
39 | }
40 | 
41 | # This function implements the cyclic co-ordinate descent (CCD) update
42 | # for the loadings matrix (the "activations"), V, in which the matrix
43 | # V is approximated by the matrix product W*H. Inputs W and H should
44 | # not be sparse matrices ("is.matrix" should return TRUE). Input
45 | # argument "e" is a non-negative scalar specifying the minimum value
46 | # of the updated factors.
47 | #
48 | # Note that a single EM update of the loadings is performed before
49 | # running the CCD updates.
50 | #
51 | # Also note that the RcppParallel multithreading (specified by
52 | # argument "nc") will only work correctly if the number of threads is
53 | # set beforehand using RcppParallel::setThreadOptions.
54 | #
55 | #' @importFrom Rcpp evalCpp
56 | #' @importFrom RcppParallel RcppParallelLibs
57 | #'
58 | ccd_update_loadings <- function (V, W, H, nc = 1, e = 1e-15) {
59 |   n <- nrow(V)
60 |   i <- 1:n
61 |   V <- t(V)
62 |   W <- t(W)
63 |   H <- t(H)
64 |   if (nc == 1) {
65 |     if (is.matrix(V)) {
66 |       W <- pnmfem_update_factors_rcpp(V,W,H,i-1,1)
67 |       W <- ccd_update_factors_rcpp(V,H,W,e)
68 |     } else if (is.sparse.matrix(V)) {
69 |       W <- pnmfem_update_factors_sparse_rcpp(V,W,H,i-1,1)
70 |       W <- ccd_update_factors_sparse_rcpp(V,H,W,e)
71 |     }
72 |   } else if (nc > 1) {
73 |     if (is.matrix(V)) {
74 |       W <- pnmfem_update_factors_parallel_rcpp(V,W,H,i-1,1)
75 |       W <- ccd_update_factors_parallel_rcpp(V,H,W,e)
76 |     }
77 |     else if (is.sparse.matrix(V)) {
78 |       W <- pnmfem_update_factors_sparse_parallel_rcpp(V,W,H,i-1,1)
79 |       W <- ccd_update_factors_sparse_parallel_rcpp(V,H,W,e)
80 |     }
81 |   }
82 |   return(t(W))
83 | }
84 | 


--------------------------------------------------------------------------------
/R/colors.R:
--------------------------------------------------------------------------------
 1 | # Code in this file is adapted from
 2 | # https://github.com/btupper/catecolors
 3 | # by Ben Tupper.
 4 | 
 5 | # K. L. Kelly. Twenty two colors of maximum contrast. Color
 6 | # Engineering, 3:26-27, 1965.
 7 | # http://www.iscc.org/pdf/PC54_1724_001.pdf
 8 | kelly <- function (index, ...) {
 9 |   if (missing(index))
10 |     index <- seq_len(nrow(KELLYLUT))
11 |   return(get_lut(KELLYLUT,index,...))
12 | }
13 | 
14 | # Retrieve one or more of Glasbey et al 256 color specifications.
15 | glasbey <- function (index, ...) {
16 |   if (missing(index))
17 |     index <- seq_len(nrow(GLASBEYLUT))
18 |   return(get_lut(GLASBEYLUT,index,...))
19 | }
20 | 
21 | # Retrieve one or more of color specifications as hex, rgb triplets or
22 | # a data.frame.
23 | get_lut <- function (LUT, index, 
24 |                      form = c("hex", "rgb", "data.frame")[1],
25 |                      name = FALSE) {
26 |   if (missing(LUT))
27 |     stop("LUT is required")
28 |   if (name)
29 |     nm <- rownames(LUT)
30 |   if (missing(index))
31 |     index <- seq_len(nrow(LUT))
32 |   form <- tolower(form[1])
33 |   if (form == "hex"){
34 |     x <- LUT[index,"hex"]
35 |     if (name)
36 |       names(x) <- rownames(LUT)[index]
37 |   } else if (form == "rgb") {
38 |     x <- as.matrix(LUT[index,c("red","green","blue")])
39 |     if (name)
40 |       names(x) <- rownames(LUT)[index]
41 |   } else
42 |     x <- LUT[index,]
43 |   return(x)
44 | }
45 | 


--------------------------------------------------------------------------------
/R/fit_multinom_model.R:
--------------------------------------------------------------------------------
 1 | #' @title Fit Simple Multinomial Model
 2 | #'
 3 | #' @description Fit a simple multinomial model for count data, in
 4 | #'   which each sample (\emph{i.e.}, a row of the data matrix \code{X})
 5 | #'   is assigned to a cluster. Under this simple multinomial model,
 6 | #'   \eqn{x_{ij}} assigned to cluster \eqn{k} is multinomial with sample
 7 | #'   size \eqn{s_i = x_{i1} + ... + x_{im}} and multinomial
 8 | #'   probabilities \eqn{p_{1k}, ..., p_{mk}}. This is a special case of
 9 | #'   the multinomial topic model in which all the mixture proportions
10 | #'   are either 0 or 1. The maximum-likelihood estimates (MLEs) of the
11 | #'   multinomial probabilities have a closed-form solution; no
12 | #'   iterative algorithm is needed to fit this simple model.
13 | #' 
14 | #' @param cluster A factor specifying a grouping, or clustering, of
15 | #'   the rows of \code{X}; e.g., the \dQuote{cluster} output from
16 | #'   \code{\link[stats]{kmeans}}.
17 | #'
18 | #' @param X The n x m matrix of counts; all entries of X should be
19 | #'   non-negative. It can be a sparse matrix (class \code{"dgCMatrix"})
20 | #'   or dense matrix (class \code{"matrix"}), with some exceptions (see
21 | #'   \sQuote{Details}).
22 | #'
23 | #' @param verbose This is passed as the \dQuote{verbose} argument in
24 | #'   the call to \code{\link{init_poisson_nmf}}.
25 | #' 
26 | #' @param \dots Additional arguments passed to
27 | #'   \code{\link{init_poisson_nmf}}.
28 | #' 
29 | #' @return A multinomial topic model fit.
30 | #' 
31 | #' @seealso \code{\link{fit_topic_model}}
32 | #' 
33 | #' @importFrom Matrix colSums
34 | #' 
35 | #' @export
36 | #'
37 | fit_multinom_model <- function (cluster, X,
38 |                                 verbose = c("none","detailed"), ...) {
39 | 
40 |   # Check the input data matrix.
41 |   verify.count.matrix(X)
42 | 
43 |   # Check and process input argument "verbose"
44 |   verbose <- match.arg(verbose)
45 |   
46 |   # If necessary, remove all-zero columns from the counts matrix.
47 |   if (any_allzero_cols(X)) {
48 |     X <- remove.allzero.cols(X)
49 |     warning(sprintf(paste("One or more columns of X are all zero; after",
50 |                           "removing all-zero columns, %d columns will be",
51 |                           "used for model fitting"),ncol(X)))
52 |   }
53 | 
54 |   # Get the number of rows (n) and columns (m) of the data matrix,
55 |   n <- nrow(X)
56 |   m <- ncol(X)
57 |   
58 |   # Check the "cluster" input.
59 |   if (!is.factor(cluster))
60 |     cluster <- factor(cluster)
61 |   if (length(cluster) != n)
62 |     stop("Input argument \"cluster\" should have one entry for each row of ",
63 |          "\"X\"")
64 |   if (any(table(cluster) == 0))
65 |     stop("Each level must appear at least once in factor \"cluster\"")
66 |   
67 |   # Initialize the loadings and factors matrices from the clustering:
68 |   # L[i,j] = 1 if row i is assigned to cluster j, and L[i,j] = 0
69 |   # otherwise. The maximum-likelihood estimates of the factors have a
70 |   # closed-form solution in this case.
71 |   k <- nlevels(cluster)
72 |   F <- matrix(0,m,k)
73 |   L <- matrix(0,n,k)
74 |   rownames(L) <- rownames(X)
75 |   rownames(F) <- colnames(X)
76 |   colnames(L) <- levels(cluster)
77 |   colnames(F) <- levels(cluster)
78 |   for (j in levels(cluster)) {
79 |     i      <- which(cluster == j)
80 |     L[i,j] <- 1
81 |     F[,j]  <- colSums(X[i,])/sum(L[i,j])
82 |   }
83 | 
84 |   # Return a multinomial topic model fit.
85 |   return(poisson2multinom(init_poisson_nmf(X,F = F,L = L,
86 |                                            verbose = verbose,...)))
87 | }
88 | 


--------------------------------------------------------------------------------
/R/homer.R:
--------------------------------------------------------------------------------
  1 | #' @title Perform HOMER Motif Enrichment Analysis using DE Genomic Positions
  2 | #'
  3 | #' @description Run HOMER motif finding algorithm
  4 | #'   (\code{findMotifsGenome.pl}) to identify motifs enriched for
  5 | #'   differentially expressed (DE) genomic positions. See
  6 | #'   \url{http://homer.ucsd.edu} for more information.
  7 | #'
  8 | #' @param de An object of class \dQuote{topic_model_de_analysis},
  9 | #'   usually the result of running \code{\link{de_analysis}}.
 10 | #'
 11 | #' @param k Use the DE analysis results for this topic.
 12 | #'
 13 | #' @param positions A table of genomic positions corresponding to rows
 14 | #'   of the \code{de_analysis} results. Specifically, it should a data
 15 | #'   frame with four columns: \dQuote{chr}, chromosome name or number;
 16 | #'   \dQuote{start}, start position of genomic feature; \dQuote{end},
 17 | #'   end position of genomic feature; and \dQuote{name}, the name of the
 18 | #'   genomic feature. If not specified, the genomic positions will be
 19 | #'   extracted from the row names of \code{de$postmean}, in which the
 20 | #'   row names are expected to be of the form \code{chr_start_end}. The
 21 | #'   genomic positions will be written to a BED file (see
 22 | #'   \url{https://genome.ucsc.edu/FAQ/FAQformat.html} for more
 23 | #'   information about BED files).
 24 | #'  
 25 | #' @param genome The genome parameter passed to
 26 | #'   \code{findMotifsGenome.pl}.
 27 | #' 
 28 | #' @param subset Describe input argument "subset" here.
 29 | #'
 30 | #' @param homer.exec The name or file path of the HOMER
 31 | #'   \code{findMotifsGenome.pl} excutable.
 32 | #' 
 33 | #' @param out.dir The positions BED file and HOMER results are written
 34 | #'   to this directory.
 35 | #'
 36 | #' @param homer.options Character string used to override default
 37 | #'   \code{findMotifsGenome.pl} options.
 38 | #'
 39 | #' @param verbose When \code{verbose = TRUE}, progress information is
 40 | #'   printed to the console.
 41 | #' 
 42 | #' @return A data frame containing the motif enrichment results. It
 43 | #'   is created from the \code{knownResults.txt} HOMER output.
 44 | #'
 45 | #' @importFrom utils read.table
 46 | #' @importFrom utils write.table
 47 | #' 
 48 | #' @references
 49 | #' Heinz, S., Benner, C., Spann, N., Bertolino, E., Lin, Y. C., Laslo,
 50 | #' P., Cheng, J. X., Murre, C., Singh, H. and Glass, C. K. (2010).
 51 | #' Simple combinations of lineage-determining transcription factors
 52 | #' prime cis-regulatory elements required for macrophage and B cell
 53 | #' identities. \emph{Molecular Cell} \bold{38}, 576-589.
 54 | #' 
 55 | #' @export
 56 | #' 
 57 | run_homer <-
 58 |   function (de, k, positions, genome = "hg19",
 59 |             subset = function (postmean, lpval, lfsr, rank, quantile)
 60 |               lfsr < 0.05,
 61 |             homer.exec = "findMotifsGenome.pl",
 62 |             out.dir = tempdir(),
 63 |             homer.options = "-len 8,10,12 -size 200 -mis 2 -S 25 -p 1 -h",
 64 |             verbose = TRUE) {
 65 | 
 66 |   # Get the positions if they are not provided.
 67 |   if (missing(positions)) {
 68 |     feature_names <- rownames(de$postmean)
 69 |     out           <- strsplit(feature_names,"_")
 70 |     positions     <- data.frame(chr   = sapply(out,"[[",1),
 71 |                                 start = sapply(out,"[[",2),
 72 |                                 end   = sapply(out,"[[",3),
 73 |                                 name  = feature_names,
 74 |                                 stringsAsFactors = FALSE)
 75 |   }
 76 | 
 77 |   # Select the differentially expressed positions.
 78 |   rows <- select_de_genes(de,k,subset)
 79 |   if (verbose)
 80 |     cat(sprintf("%d out of %d positions selected\n",
 81 |                 length(rows),nrow(de$postmean)))
 82 | 
 83 |   # Write the selected positions to a BED file.
 84 |   pos.file <- file.path(out.dir,"positions.bed")
 85 |   if (verbose)
 86 |     cat("Writing selected positions to",pos.file,"\n")
 87 |   write.table(positions[rows,],pos.file,sep = "\t",quote = FALSE,
 88 |               row.names = FALSE,col.names = FALSE)
 89 | 
 90 |   # Run the HOMER motif enrichment analysis.
 91 |   homer.dir <- file.path(out.dir,"homer")
 92 |   homer.command <- paste(homer.exec,pos.file,genome,homer.dir,homer.options)
 93 |   if (verbose) {
 94 |     cat("Performing HOMER motif enrichment analysis:\n")
 95 |     cat(homer.command,"\n")
 96 |   }
 97 |   system.out <- system(homer.command,ignore.stderr = TRUE,
 98 |                        ignore.stdout = TRUE,intern = TRUE)
 99 |   res <- read.table(file.path(homer.dir,"knownResults.txt"),
100 |                     sep = "\t",comment.char = "",header = TRUE,
101 |                     check.names = FALSE,stringsAsFactors = FALSE)
102 |   return(res)
103 | }
104 | 
105 | 


--------------------------------------------------------------------------------
/R/merge_topics.R:
--------------------------------------------------------------------------------
 1 | #' @title Combine Topics in Multinomial Topic Model
 2 | #'
 3 | #' @description Combine two or more topics in a multinomial topic
 4 | #'   model fit.
 5 | #'
 6 | #' @details Mixture proportions are combined by summation, and factors
 7 | #'   are combined by averaging.
 8 | #' 
 9 | #' @param fit A multinomial topic model fit.
10 | #'
11 | #' @param k The names or numbers of the topics to be combined. Two or
12 | #'   more topics should be chosen.
13 | #'
14 | #' @return A multinomial topic model fit.
15 | #' 
16 | #' @export
17 | #' 
18 | merge_topics <- function (fit, k) {
19 | 
20 |   # Verify input "fit".
21 |   if (!inherits(fit,"multinom_topic_model_fit"))
22 |     stop("Input argument \"fit\" should be an object of class ",
23 |          "\"multinom_topic_model_fit\"")
24 |   verify.fit(fit)
25 |   
26 |   # Verify and process input "k".
27 |   msg <- paste("Input argument \"k\" should contain valid topic names or",
28 |                "numbers (column indices of F and L)")
29 |   if (!((is.numeric(k) | is.character(k)) & length(k) >= 2))
30 |     stop(msg)
31 |   if (is.numeric(k)) {
32 |     if (!all(k >= 1 & k <= ncol(fit$F)))
33 |       stop(msg)
34 |   } else {
35 |     if (!all(is.element(k,colnames(fit$F))))
36 |       stop(msg)
37 |     k <- match(k,colnames(fit$F))
38 |   }
39 |   
40 |   # Combine the selected topics.
41 |   out1   <- combine_factors(fit$F,fit$L,k)
42 |   out2   <- combine_factors(fit$Fn,fit$Ln,k)
43 |   out3   <- combine_factors(fit$Fy,fit$Ly,k)
44 |   fit$F  <- out1$F
45 |   fit$L  <- out1$L
46 |   fit$Fn <- out2$F
47 |   fit$Ln <- out2$L
48 |   fit$Fy <- out3$F
49 |   fit$Ly <- out3$L
50 |   return(fit)
51 | }
52 | 
53 | # Combine two or more columns of the factors matrix (F) and loadings
54 | # matrix (L). Loadings are combined by summation, and factors are
55 | # combined by averaging.
56 | combine_factors <- function (F, L, k) {
57 |   if (is.null(colnames(F)))
58 |     y <- NULL
59 |   else {
60 |     y <- colnames(F)
61 |     y <- c(y[-k],paste(y[k],collapse = "+"))
62 |   }
63 |   F <- cbind(F[,-k],rowMeans(F[,k]))
64 |   L <- cbind(L[,-k],rowSums(L[,k]))
65 |   colnames(F) <- y
66 |   colnames(L) <- y
67 |   return(list(F = F,L = L))
68 | }
69 | 


--------------------------------------------------------------------------------
/R/mixem.R:
--------------------------------------------------------------------------------
 1 | # Compute a maximum-likelihood estimate (MLE) of the mixture
 2 | # proportions in the multinomial mixture model by iterating the EM
 3 | # updates for a fixed number of iterations. This is mainly used for
 4 | # testing the C++ implementation. See the comments attached to the
 5 | # "mixem" C++ function for an explanation of the inputs.
 6 | mixem <- function (L, w, x0, numiter) {
 7 |   L1 <- normalize.cols(L)
 8 |   x  <- x0
 9 |   for (i in 1:numiter)
10 |     x <- mixem.update(L1,w,x)
11 |   return(x)
12 | }
13 | 
14 | # Perform a single EM update for the multinomial mixture model. This
15 | # is mainly used for testing the C++ implementation.
16 | mixem.update <- function (L1, w, x) {
17 |   e <- 1e-15
18 |   x <- x/sum(x)
19 |   w <- w/sum(w)
20 |   
21 |   # Compute the posterior mixture assignment probabilities. A small
22 |   # number is added to the posterior probabilities to prevent any
23 |   # divisions by zero. This is the "E step".
24 |   P <- scale.cols(L1,x)
25 |   P <- normalize.rows.by.max(P) + e
26 |   P <- normalize.rows(P)
27 | 
28 |   # Update the mixture weights. This is the "M step".
29 |   return(drop(w %*% P))
30 | }
31 | 
32 | # Find the maximum-likelihood estimate (MLE) for the special case when
33 | # only one of the counts is positive.
34 | mixture.one.nonzero <- function (L, w) {
35 |   j    <- which.max(w %*% normalize.cols(L))
36 |   x    <- rep(0,ncol(L))
37 |   x[j] <- 1
38 |   return(x)
39 | }
40 | 


--------------------------------------------------------------------------------
/R/multinom2poisson.R:
--------------------------------------------------------------------------------
 1 | #' @title Recover Poisson NMF Fit from Multinomial Topic Model Fit
 2 | #'
 3 | #' @description This function recovers parameter estimates of the
 4 | #'   Poisson non-negative matrix factorization (NMF) given parameter
 5 | #'   estimates for a multinomial topic model.
 6 | #'
 7 | #' @param fit An object of class \dQuote{multinom_topic_model_fit},
 8 | #'   such as an output from \code{poisson2multinom}. If a Poisson NMF
 9 | #'   fit is provided (that is, an object of class
10 | #'   \dQuote{poisson_nmf_fit}), the fit object is immediately returned
11 | #'   \dQuote{as is}.
12 | #'
13 | #' @param X Optional n x m matrix of counts, or pseudocounts. It can
14 | #'   be a sparse matrix (class \code{"dgCMatrix"}) or dense matrix
15 | #'   (class \code{"matrix"}). This only needs to be provided if the
16 | #'   document sizes \code{fit$s} are not available.
17 | #'
18 | #' @return The return value is the list \code{fit}, in which matrices
19 | #'   \code{fit$F} and \code{fit$L} specify the factors and loadings in
20 | #'   the Poisson non-negative matrix factorization; specifically,
21 | #'   the counts matrix is modeled by the low-rank matrix product
22 | #'   \code{tcrossprod(fit$L,fit$F)}.
23 | #'
24 | #' @importFrom Matrix rowSums
25 | #' 
26 | #' @export
27 | #'
28 | multinom2poisson <- function (fit, X) {
29 | 
30 |   # Check input argument "fit".
31 |   if (inherits(fit,"poisson_nmf_fit"))
32 |     return(fit)
33 |   if (!inherits(fit,"multinom_topic_model_fit"))
34 |     stop("Input argument \"fit\" should be an object of class ",
35 |          "\"multinom_topic_model_fit\"")
36 |   verify.fit(fit)
37 |   F <- fit$F
38 |   L <- fit$L
39 | 
40 |   # Check input argument "X".
41 |   if (!missing(X))
42 |     verify.fit.and.count.matrix(X,fit)    
43 |   
44 |   # Exactly one of X and fit$s should be provided.
45 |   if (sum(c(!missing(X),is.element("s",names(fit)))) != 1)
46 |     stop("Exactly one of \"X\" and \"fit$s\" should be specified")
47 |   
48 |   if (missing(X))
49 |       
50 |     # Process the "scale factors", s.
51 |     s <- as.double(fit$s)
52 |   else
53 | 
54 |     # Compute maximum-likelihood estimates of the "document sizes", s,
55 |     # from the counts matrix, X.
56 |     s <- as.double(rowSums(X))
57 | 
58 |   # Recover F and L for the Poisson non-negative matrix factorization.
59 |   out <- rescale.factors(F,s*L)
60 |   
61 |   # Update the "fit" object, and return it.
62 |   fit$F <- out$F
63 |   fit$L <- out$L
64 |   fit$s <- NULL
65 |   class(fit) <- c("poisson_nmf_fit","list")
66 |   return(fit)
67 | }
68 | 


--------------------------------------------------------------------------------
/R/newsgroups.R:
--------------------------------------------------------------------------------
 1 | #' @name newsgroups
 2 | #'
 3 | #' @title Topic modeling results from the \dQuote{20 Newsgroups} data
 4 | #'   set.
 5 | #'
 6 | #' @docType data
 7 | #' 
 8 | #' @description These are topic modeling results from the \dQuote{20
 9 | #' Newsgroups} data, with k = 10 topics. The data were originally
10 | #' downloaded from \url{http://qwone.com/~jason/20Newsgroups} and
11 | #' prepared by running code that found in an R Markdown file in this
12 | #' GitHub repository:
13 | #' \url{https://github.com/stephenslab/fastTopics-experiments}. See
14 | #' the \dQuote{inst} directory of this package for the scripts used to
15 | #' generate these results.
16 | #'
17 | #' @format \code{newsgroups} is a list with the following elements:
18 | #' 
19 | #' \describe{
20 | #'
21 | #'   \item{topics}{Original labeling of the documents: each document
22 | #'     is from one of 20 \dQuote{newsgroups}.}
23 | #'
24 | #'   \item{L}{Estimated topic proportions matrix; rows are
25 | #'     documents and columns are topics.}
26 | #'
27 | #'   \item{F}{Matrix containing posterior mean estimates of log-fold
28 | #'   changes (in base-2 logarithm). These were computed using
29 | #'   \code{\link{de_analysis}} with \code{lfc.stat = "vsnull"}. Columns
30 | #'   are words and columns are topics.}}
31 | #' 
32 | #' @keywords data
33 | #'
34 | #' @examples
35 | #' data(newsgroups)
36 | #' table(newsgroups$topics)
37 | #' dim(newsgroups$L)
38 | #' dim(newsgroups$F)
39 | #' 
40 | NULL
41 | 


--------------------------------------------------------------------------------
/R/pbmc_facs.R:
--------------------------------------------------------------------------------
 1 | #' @name pbmc_facs
 2 | #'
 3 | #' @title Mixture of 10 FACS-purified PBMC Single-Cell RNA-seq data
 4 | #'
 5 | #' @docType data
 6 | #' 
 7 | #' @description These data are a selection of the reference
 8 | #'   transcriptome profiles generated via single-cell RNA sequencing
 9 | #'   (RNA-seq) of 10 bead-enriched subpopulations of PBMCs (Donor A),
10 | #'   described in Zheng \emph{et al} (2017). The data are unique
11 | #'   molecular identifier (UMI) counts for 16,791 genes in 3,774 cells.
12 | #'   (Genes with no expression in any of the cells were removed.) Since
13 | #'   the majority of the UMI counts are zero, they are efficiently
14 | #'   stored as a 3,774 x 16,791 sparse matrix. These data are used in
15 | #'   the vignette illustrating how 'fastTopics' can be used to analyze to
16 | #'   single-cell RNA-seq data. Data for a separate set of 1,000 cells is
17 | #'   provided as a \dQuote{test set} to evaluate out-of-sample predictions.
18 | #'
19 | #' @format \code{pbmc_facs} is a list with the following elements:
20 | #' 
21 | #' \describe{
22 | #'
23 | #'   \item{counts}{3,774 x 16,791 sparse matrix of UMI counts, with
24 | #'      rows corresponding to samples (cells) and columns corresponding to
25 | #'      genes. It is an object of class \code{"dgCMatrix"}).}
26 | #'
27 | #'   \item{counts_test}{UMI counts for an additional test set of 100
28 | #'     cells.}
29 | #'
30 | #'   \item{samples}{Data frame containing information about the
31 | #'     samples, including cell barcode and source FACS population
32 | #'     (\dQuote{celltype} and \dQuote{facs_subpop}).}
33 | #'
34 | #'   \item{samples_test}{Sample information for the additional test
35 | #'      set of 100 cells.}
36 | #' 
37 | #'   \item{genes}{Data frame containing information and the genes,
38 | #'     including gene symbol and Ensembl identifier.}
39 | #'
40 | #'   \item{fit}{Poisson non-negative matrix factorization (NMF) fitted
41 | #'     to the UMI count data \code{counts}, with rank \code{k = 6}. See
42 | #'     the vignette how the Poisson NMF model fitting was performed.}}
43 | #'
44 | #' \url{https://www.10xgenomics.com/resources/datasets}
45 | #' 
46 | #' @references
47 | #' G. X. Y. Zheng \emph{et al} (2017). Massively parallel digital
48 | #' transcriptional profiling of single cells. \emph{Nature Communications}
49 | #' \bold{8}, 14049. \doi{10.1038/ncomms14049}
50 | #' 
51 | #' @keywords data
52 | #'
53 | #' @examples
54 | #' library(Matrix)
55 | #' data(pbmc_facs)
56 | #' cat(sprintf("Number of cells: %d\n",nrow(pbmc_facs$counts)))
57 | #' cat(sprintf("Number of genes: %d\n",ncol(pbmc_facs$counts)))
58 | #' cat(sprintf("Proportion of counts that are non-zero: %0.1f%%.\n",
59 | #'             100*mean(pbmc_facs$counts > 0)))
60 | #' 
61 | NULL
62 | 


--------------------------------------------------------------------------------
/R/pnmfem.R:
--------------------------------------------------------------------------------
 1 | # This function implements the EM updates for the factors matrix, F,
 2 | # in which the matrix X is approximated by tcrossprod(L,F). The EM
 3 | # updates are equivalent to multiplicative updates, but computation is
 4 | # implemented differently. Inputs F and L should be dense matrices
 5 | # ("is.matrix" should return TRUE), but for X both dense matrices and
 6 | # sparse matrices are supported ("matrix" and "dgCMatrix" classes).
 7 | # Input "j" specifies which rows of F to update; by default, all rows
 8 | # are updated. Input "numiter" specifies the number of EM updates to
 9 | # perform. Input argument "e" is a non-negative scalar specifying the
10 | # minimum value of the updated loadings. A positive value of "e"
11 | # promotes better convergence of the EM updates.
12 | #
13 | # Note that the RcppParallel multithreading (specified by argument
14 | # "nc") will only work correctly if the number of threads is set
15 | # beforehand using RcppParallel::setThreadOptions.
16 | #
17 | #' @importFrom Rcpp evalCpp
18 | #' @importFrom RcppParallel RcppParallelLibs
19 | #'
20 | pnmfem_update_factors <- function (X, F, L, j = seq(1,ncol(X)),
21 |                                    numiter = 1, nc = 1) {
22 |   F <- t(F)
23 |   if (nc == 1) {
24 |     if (is.matrix(X))
25 |       F <- pnmfem_update_factors_rcpp(X,F,L,j-1,numiter)
26 |     else if (is.sparse.matrix(X))
27 |       F <- pnmfem_update_factors_sparse_rcpp(X,F,L,j-1,numiter)
28 |   } else if (nc > 1) {
29 |     if (is.matrix(X))
30 |       F <- pnmfem_update_factors_parallel_rcpp(X,F,L,j-1,numiter)
31 |     else if (is.sparse.matrix(X))
32 |       F <- pnmfem_update_factors_sparse_parallel_rcpp(X,F,L,j-1,numiter)
33 |   }
34 |   return(t(F))
35 | }
36 | 
37 | # This function implements the EM updates for the loadings matrix, L,
38 | # in which the matrix X is approximated by tcrossprod(L,F). The EM
39 | # updates are equivalent to multiplicative updates, but computation is
40 | # implemented differently. Inputs F and L should be dense matrices
41 | # ("is.matrix" should return TRUE), but for X both dense matrices and
42 | # sparse matrices are supported ("matrix" and "dgCMatrix" classes). 
43 | # Input "i" specifies which rows of L to update; by default, all rows
44 | # are updated. Input "numiter" specifies the number of EM updates to
45 | # perform, and input "nc" specifies the number of threads to use in
46 | # the multithreaded updates. Input argument "e" is a non-negative
47 | # scalar specifying the minimum value of the updated loadings. A
48 | # positive value of "e" promotes better convergence of the EM updates.
49 | #
50 | # Note that the RcppParallel multithreading (specified by argument
51 | # "nc") will only work correctly if the number of threads is set
52 | # beforehand using RcppParallel::setThreadOptions.
53 | #
54 | #' @importFrom Rcpp evalCpp
55 | #' @importFrom RcppParallel RcppParallelLibs
56 | #'
57 | pnmfem_update_loadings <- function (X, F, L, i = seq(1,nrow(X)),
58 |                                     numiter = 1, nc = 1) {
59 |   X <- t(X)  
60 |   L <- t(L)
61 |   if (nc == 1) {
62 |     if (is.matrix(X))
63 |       L <- pnmfem_update_factors_rcpp(X,L,F,i-1,numiter)
64 |     else if (is.sparse.matrix(X))
65 |       L <- pnmfem_update_factors_sparse_rcpp(X,L,F,i-1,numiter)
66 |   } else if (nc > 1) {
67 |     if (is.matrix(X))
68 |       L <- pnmfem_update_factors_parallel_rcpp(X,L,F,i-1,numiter)
69 |     else if (is.sparse.matrix(X))
70 |       L <- pnmfem_update_factors_sparse_parallel_rcpp(X,L,F,i-1,numiter)
71 |   }
72 |   return(t(L))
73 | }
74 | 


--------------------------------------------------------------------------------
/R/poismix.R:
--------------------------------------------------------------------------------
 1 | # Compute a maximum-likelihood estimate (MLE) of the mixture weights
 2 | # in a Poisson mixture model by iterating the multinomial mixture
 3 | # model EM updates for a fixed number of iterations. This is mainly
 4 | # used for testing the C++ implementation. See the comments attached
 5 | # to the "poismixem" C++ function for an explanation of the inputs.
 6 | poismixem <- function (L, w, x0, numiter) {
 7 |   x <- x0
 8 | 
 9 |   # Recover the mixture weights of the multinomial mixture model from
10 |   # the mixture weights of the Poisson mixture model. Here, s is the
11 |   # "scale factor".
12 |   s <- sum(L %*% x)
13 |   u <- colSums(L)
14 |   L <- normalize.cols(L)
15 |   x <- x*u/s
16 | 
17 |   # Perform one or more EM updates for the multinomial mixture model.
18 |   x <- mixem(L,w,x,numiter)
19 | 
20 |   # Recover the mixture weights of the Poisson mixture model from the
21 |   # mixture weights of the multinomial mixture model.
22 |   s <- sum(w)
23 |   return(s*x/u)
24 | }
25 | 
26 | # Find the maximum-likelihood estimate (MLE) for the special case when
27 | # only one of the counts is positive.
28 | poismix.one.nonzero <- function (L, w) {
29 |   x    <- mixture.one.nonzero(L,w)
30 |   j    <- which.max(x)
31 |   x[j] <- sum(w)/sum(L[,j])
32 |   return(x)
33 | }
34 | 


--------------------------------------------------------------------------------
/R/poisson2multinom.R:
--------------------------------------------------------------------------------
 1 | #' @title Recover Multinomial Topic Model Fit from Poisson NMF fit
 2 | #'
 3 | #' @description This function recovers parameter estimates of the
 4 | #'   multinomial topic model given parameter estimates for a Poisson
 5 | #'   non-negative matrix factorization (NMF).
 6 | #'
 7 | #' @param fit An object of class \dQuote{poisson_nmf_fit}, such as an
 8 | #'   output from \code{fit_poisson_nmf}. It does not make sense for a
 9 | #'   multinomial topic model to have less than two topics, so an error
10 | #'   will be reported when k < 2, where k is the rank of the matrix
11 | #'   factorization. If a multinomial topic model fit is provided (that
12 | #'   is, an object of class \dQuote{multinom_topic_model_fit}), the fit
13 | #'   object is immediately returned \dQuote{as is}.
14 | #'
15 | #' @return The return value is the list \code{fit}, in which
16 | #'   \code{fit$F} and \code{fit$L} are the parameters of the multinomial
17 | #'   topic model; specifically, \code{fit$L[i,]} gives the topic
18 | #'   probabilities for sample or document i, and \code{fit$F[,k]} gives
19 | #'   the term probabilities for topic k. An additional vector
20 | #'   \code{fit$s} of length n is returned giving the "size factors".
21 | #' 
22 | #' @export
23 | #' 
24 | poisson2multinom <- function (fit) {
25 | 
26 |   # Check input argument "fit".
27 |   if (inherits(fit,"multinom_topic_model_fit"))
28 |     return(fit)
29 |   if (!inherits(fit,"poisson_nmf_fit"))
30 |     stop("Input argument \"fit\" should be an object of class ",
31 |          "\"poisson_nmf_fit\"")
32 |   verify.fit(fit)
33 |   if (ncol(fit$F) < 2 | ncol(fit$L) < 2)
34 |     stop("Input matrices \"fit$F\" and \"fit$L\" should have 2 or more",
35 |          "columns")
36 |   
37 |   # Recover F and L for the multinomial model. Here, s gives the
38 |   # Poisson rates for generating the "document sizes".
39 |   out   <- get_multinom_from_pnmf(fit$F,fit$L)
40 |   fit$F <- out$F
41 |   fit$L <- out$L
42 |   fit$s <- out$s
43 |   
44 |   # Return the updated fit.
45 |   class(fit) <- c("multinom_topic_model_fit","list")
46 |   return(fit)
47 | }
48 | 
49 | # Get the parameters of the multinomial topic model from the
50 | # parameters of the Poisson NMF model.
51 | get_multinom_from_pnmf <- function (F, L) {
52 |   u <- colSums(F)
53 |   F <- scale.cols(F,1/u)
54 |   L <- scale.cols(L,u)
55 |   s <- rowSums(L)
56 |   L <- L / s
57 |   return(list(F = F,L = L,s = s))
58 | }
59 | 


--------------------------------------------------------------------------------
/R/scd.R:
--------------------------------------------------------------------------------
 1 | # This function implements a sequential co-ordinate descent (SCD)
 2 | # update for the factors matrix (the "basis vectors"), H, in which the
 3 | # matrix A is approximated by the matrix product W*H. Inputs W and H
 4 | # should be dense matrices ("is.matrix" should return TRUE). Input "j"
 5 | # specifies which columns of H to update; by default, all columns are
 6 | # updated. Input "numiter" specifies the number of inner-loop
 7 | # iterations to perform.  Input argument "e" a non-negative scalar
 8 | # included in the computations to prevent NaNs due to division by
 9 | # zero.
10 | #
11 | # Note that a single EM update of each factor is performed before
12 | # running the CCD updates (unless runem = FALSE).
13 | #
14 | # Also note that the RcppParallel multithreading (specified by
15 | # argument "nc") will only work correctly if the number of threads is
16 | # set beforehand using RcppParallel::setThreadOptions.
17 | #
18 | #' @importFrom Rcpp evalCpp
19 | #' @importFrom RcppParallel RcppParallelLibs
20 | #'
21 | scd_update_factors <- function (A, W, H, j = seq(1,ncol(A)), numiter = 1,
22 |                                 nc = 1, e = 1e-16, runem = TRUE) {
23 |   if (!is.numeric(j))
24 |     stop("Input argument \"j\" should be a numeric vector")
25 |   if (nc == 1) {
26 |     if (is.matrix(A)) {
27 |       if (runem)
28 |         H <- pnmfem_update_factors_rcpp(A,H,W,j-1,1)
29 |       H <- scd_update_factors_rcpp(A,W,H,j-1,numiter,e)
30 |     } else if (is.sparse.matrix(A)) {
31 |       if (runem)
32 |         H <- pnmfem_update_factors_sparse_rcpp(A,H,W,j-1,1)
33 |       H <- scd_update_factors_sparse_rcpp(A,W,H,j-1,numiter,e)
34 |     }
35 |   } else if (nc > 1) {
36 |     if (is.matrix(A)) {
37 |       if (runem)
38 |         H <- pnmfem_update_factors_parallel_rcpp(A,H,W,j-1,1)
39 |       H <- scd_update_factors_parallel_rcpp(A,W,H,j-1,numiter,e)
40 |     } else if (is.sparse.matrix(A)) {
41 |       if (runem)
42 |         H <- pnmfem_update_factors_sparse_parallel_rcpp(A,H,W,j-1,1)
43 |       H <- scd_update_factors_sparse_parallel_rcpp(A,W,H,j-1,numiter,e)
44 |     }
45 |   }  
46 |   return(H)
47 | }
48 | 
49 | # This function implements a sequential co-ordinate descent (SCD)
50 | # update for the loadings matrix (the "activations"), W, in which the
51 | # matrix A is approximated by the matrix product W*H. Inputs W and H
52 | # should be dense matrices ("is.matrix" should return TRUE). Input "i"
53 | # specifies which rows of W to update; by default, all rows are
54 | # updated. Input "numiter" specifies the number of inner-loop
55 | # iterations to perform. Input argument "e" a non-negative scalar
56 | # included in the computations to prevent NaNs due to division by
57 | # zero.
58 | #
59 | # Note that a single EM update of the loadings is performed before
60 | # running the CCD updates (unless runem = FALSE).
61 | #
62 | # Also note that the RcppParallel multithreading (specified by
63 | # argument "nc") will only work correctly if the number of threads is
64 | # set beforehand using RcppParallel::setThreadOptions.
65 | #
66 | #' @importFrom Rcpp evalCpp
67 | #' @importFrom RcppParallel RcppParallelLibs
68 | #'
69 | scd_update_loadings <- function (A, W, H, i = seq(1,nrow(A)), numiter = 1,
70 |                                  nc = 1, e = 1e-16, runem = TRUE) {
71 |   if (!is.numeric(i))
72 |     stop("Input argument \"i\" should be a numeric vector")
73 |   A <- t(A)
74 |   W <- t(W)
75 |   H <- t(H)
76 |   if (nc == 1) {
77 |     if (is.matrix(A)) {
78 |      if (runem)
79 |         W <- pnmfem_update_factors_rcpp(A,W,H,i-1,1)
80 |       W <- scd_update_factors_rcpp(A,H,W,i-1,numiter,e)
81 |     } else if (is.sparse.matrix(A)) {
82 |      if (runem)
83 |         W <- pnmfem_update_factors_sparse_rcpp(A,W,H,i-1,1)
84 |       W <- scd_update_factors_sparse_rcpp(A,H,W,i-1,numiter,e)
85 |     }
86 |   } else if (nc > 1) {
87 |     if (is.matrix(A)) {
88 |      if (runem)
89 |         W <- pnmfem_update_factors_parallel_rcpp(A,W,H,i-1,1)
90 |       W <- scd_update_factors_parallel_rcpp(A,H,W,i-1,numiter,e)
91 |     } else if (is.sparse.matrix(A)) {
92 |       if (runem)
93 |         W <- pnmfem_update_factors_sparse_parallel_rcpp(A,W,H,i-1,1)
94 |       W <- scd_update_factors_sparse_parallel_rcpp(A,H,W,i-1,numiter,e)
95 |     }
96 |   }
97 |   return(t(W))
98 | }
99 | 


--------------------------------------------------------------------------------
/R/select.R:
--------------------------------------------------------------------------------
 1 | #' @rdname select_loadings
 2 | #'
 3 | #' @title Extract or Re-order Data Rows in Poisson NMF or Multinomial Topic Model Fit
 4 | #'
 5 | #' @description This function can be used to extract estimates for a
 6 | #'   subset of the count data, or to re-order the rows of the loadings
 7 | #'   matrix.
 8 | #'
 9 | #' @param .data Poisson NMF or Multinomial Topic Model fit; that is,
10 | #'   an object of class \dQuote{poisson_nmf_fit} or
11 | #'   \dQuote{multinom_topic_model_fit}, such as an output from
12 | #'   \code{\link{fit_poisson_nmf}} or \code{\link{fit_topic_model}}.
13 | #'
14 | #' @param loadings Indices (names or numbers) giving data rows to
15 | #'   keep. If not specified, all rows are kept.
16 | #' 
17 | #' @param \dots Other arguments passed to the generic select function.
18 | #' 
19 | #' @return A Poisson NMF or multinomial topic model fit containing the
20 | #'   selected data rows only.
21 | #'
22 | #' @importFrom dplyr select
23 | #'
24 | #' @aliases select
25 | #' 
26 | #' @method select poisson_nmf_fit
27 | #'
28 | #' @export
29 | #' 
30 | select.poisson_nmf_fit <- function (.data, loadings, ...)
31 |   select_loadings(.data,loadings,...)
32 | 
33 | #' @rdname select_loadings
34 | #' 
35 | #' @method select multinom_topic_model_fit
36 | #'
37 | #' @export
38 | #' 
39 | select.multinom_topic_model_fit <- function (.data, loadings, ...)
40 |   select_loadings(.data,loadings,...)
41 | 
42 | #' @rdname select_loadings
43 | #'
44 | #' @export
45 | #'
46 | select_loadings <- function (.data, loadings, ...) {
47 |   if (!(inherits(.data,"poisson_nmf_fit") |
48 |         inherits(.data,"multinom_topic_model_fit")))
49 |     stop("Input \"fit\" should be an object of class \"poisson_nmf_fit\" or ",
50 |          "\"multinom_topic_model_fit\"")
51 |   verify.fit(.data)
52 |   n <- nrow(.data$L)
53 |   if (missing(loadings))
54 |     loadings <- 1:n
55 |   tryCatch({  
56 |     .data$L  <- .data$L[loadings,,drop = FALSE]
57 |     .data$Ln <- .data$Ln[loadings,,drop = FALSE]
58 |     .data$Ly <- .data$Ly[loadings,,drop = FALSE]
59 |     .data$s  <- .data$s[loadings]
60 |   },error = function (e) stop("Invalid selection of loadings"))
61 |   return(.data)
62 | }
63 | 


--------------------------------------------------------------------------------
/R/sysdata.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/fastTopics/3a35d33d3e5ea0314d11965a1700e66035f1d18d/R/sysdata.rda


--------------------------------------------------------------------------------
/R/topicscore.R:
--------------------------------------------------------------------------------
  1 | # Much of the code contained here is based on the TopicScore package
  2 | # source code developed by Minzhe Wang and Tracy Ke, distributed under
  3 | # the MIT license.
  4 | 
  5 | # Estimates the word-topic matrix (A) from the word-document matrix
  6 | # (X) using the Topic SCORE algorithm.
  7 | #
  8 | # The inputs are: X, the n x m counts matrix (may be sparse or dense);
  9 | # k, the number of topics; k0, the number of greedy search steps to
 10 | # use in Vertex Hunting; m, the number of centers in the k-means step
 11 | # of Vertex Hunting; and Mquantile, the percentage of the quantile of
 12 | # the diagonal entries of matrix M, which is used to upper truncate
 13 | # the diagonal entries of matrix M.  When it is zero, it will
 14 | # degenerate the case when there is no normalization. When it's 1,
 15 | # there is no truncation.
 16 | #
 17 | # The return value is an m x k word-topic matrix.
 18 | #
 19 | #' @importFrom stats quantile
 20 | #' @importFrom irlba irlba
 21 | #' 
 22 | topic_score <- function (X, k, k0 = ifelse(k < 10,ceiling(1.5*k),k + 2),
 23 |                          m = 3*k, nstart = 4, Mquantile = 0) {
 24 | 
 25 |   # M0 = D0*1/n, where D0(j,i) is the expected frequency of word j
 26 |   # in document i.
 27 |   M0 <- colMeans(X)
 28 |   M0 <- pmin(M0,quantile(M0,Mquantile))
 29 | 
 30 |   # Compute the k right singular vectors of the normalized counts matrix.
 31 |   X <- scale.cols(X,1/sqrt(M0))
 32 |   V <- irlba(X,k)$v
 33 |   
 34 |   # Step 1: Recover the left-scaling matrix (LSM).
 35 |   v1 <- abs(V[,1])
 36 |   R  <- V[,-1,drop = FALSE]/v1
 37 |   
 38 |   # Step 2: Perform "Vertex Hunting".
 39 |   V <- vertex_hunting(R,k0,m,nstart)
 40 |   
 41 |   # Step 3: Recover the normalized topic matrix (NTM).
 42 |   P <- cbind(R,1) %*% solve(cbind(V,1))
 43 |   P <- pmax(P,0)
 44 |   P <- P / rowSums(P)
 45 |   
 46 |   # Step 4: Recover the unscaled topic matrix.
 47 |   A <- sqrt(M0)*v1*P
 48 | 
 49 |   # Step 5: Return the scaled topic matrix.
 50 |   return(normalize.cols(A))
 51 | }
 52 | 
 53 | # The Vertex Hunting algorithm for Topic-SCORE. It finds a simplex
 54 | # with k vertices that best approximates the given p data points in a
 55 | # (k-1) dimensional space.
 56 | #
 57 | # The inputs are: R, the p x k-1 data matrix, with each row being a
 58 | # data point; k0, the number of greedy search steps; and m, the number of
 59 | # centers in the k-means step.
 60 | #
 61 | # The output is the k x k-1 vertices matrix, with each row being a
 62 | # vertex in the found simplex.
 63 | #
 64 | #' @importFrom utils combn
 65 | #' @importFrom stats kmeans
 66 | #' 
 67 | vertex_hunting <- function (R, k0, m, nstart) {
 68 |   k <- ncol(R) + 1
 69 |   
 70 |   # Step 2a.
 71 |   X <- kmeans(R,m,iter.max = 100,nstart = nstart)$centers
 72 | 
 73 |   # Step 2b'.
 74 |   Y  <- tcrossprod(X)
 75 |   D  <- matrix(diag(Y),m,m)
 76 |   D  <- D + t(D) - 2*Y
 77 |   i  <- drop(arrayInd(which.max(D),dim(D)))
 78 |   X0 <- X[i,,drop = FALSE]
 79 |   X  <- X[-i,,drop = FALSE]
 80 |   if (k0 > 2) {
 81 |     for (j in 3:k0) {
 82 |       D  <- matrix(diag(tcrossprod(X)),j-1,nrow(X),byrow = TRUE)
 83 |       D  <- D - 2*tcrossprod(X0,X)
 84 |       i  <- which.max(colMeans(D))
 85 |       X0 <- rbind(X0,X[i,])
 86 |       X  <- X[-i,,drop = FALSE]
 87 |     }
 88 |     X <- X0
 89 |   }
 90 |   
 91 |   # Step 2b.
 92 |   B <- combn(1:k0,k)
 93 |   n <- ncol(B)
 94 |   v <- rep(0,n)
 95 |   for (i in 1:n)
 96 |     for (j in 1:k0) {
 97 |       u <- tryCatch(simplex_dist(X[j,],X[B[,i],,drop = FALSE]),
 98 |                     error = function (e) Inf)
 99 |       v[i] <- max(u,v[i])
100 |     }
101 |   i <- which.min(v)
102 |   return(X[B[,i],])
103 | }
104 | 
105 | # This function computes the shortest (Euclidean) distance between the
106 | # given point (x) and any point in the simplex (V).
107 | #
108 | #' @importFrom quadprog solve.QP
109 | #' 
110 | simplex_dist <- function (x, V) {
111 |   n  <- nrow(V)
112 |   v  <- V[n,]
113 |   A  <- cbind(diag(n-1),-1)
114 |   VV <- A %*% V
115 |   M  <- tcrossprod(VV)
116 |   d  <- drop(VV %*% (x - v))
117 |   b0 <- rep(0,n)
118 |   b0[n] <- -1
119 |   f  <- solve.QP(M,d,A,b0)$value
120 |   return(sqrt(max(sum((x - v)^2) + 2*f,0)))
121 | }
122 | 


--------------------------------------------------------------------------------
/R/verify_args.R:
--------------------------------------------------------------------------------
 1 | # Verify that x is a vector with positive entries.
 2 | verify.positive.vector <- function (x, arg.name = deparse(substitute(x))) {
 3 |   arg.name <- sprintf("\"%s\"",arg.name)
 4 |   msg <- paste("Input argument",arg.name,"should be a numeric vector in",
 5 |                "which all entries are finite, non-missing and positive")
 6 |   if (!is.numeric(x))
 7 |     stop(msg)
 8 |   else if (any(x <= 0) | any(is.infinite(x)) | anyNA(x))
 9 |     stop(msg)
10 |   return(TRUE)
11 | }
12 | 
13 | # Verify that x is non-negative matrix.
14 | verify.nonnegative.matrix <- function (x, arg.name = deparse(substitute(x))) {
15 |   arg.name <- sprintf("\"%s\"",arg.name)
16 |   msg <- paste("Input argument",arg.name,"should be a non-negative,",
17 |                "numeric matrix (a \"matrix\" or a \"dgCMatrix\"), and",
18 |                "all entries should be finite and non-missing")
19 |   if (!((is.matrix(x) & is.numeric(x)) | is.sparse.matrix(x)))
20 |     stop(msg)
21 |   else if (any(x < 0) | any(is.infinite(x)) | anyNA(x))
22 |     stop(msg)
23 |   return(TRUE)
24 | }
25 | 
26 | # Verify that x is a valid count matrix.
27 | verify.count.matrix <- function (x, arg.name = deparse(substitute(x))) {
28 |   verify.nonnegative.matrix(x,arg.name)
29 |   arg.name <- sprintf("\"%s\"",arg.name)
30 |   if (!(nrow(x) > 1 & ncol(x) > 1))
31 |     stop(paste("Input matrix",arg.name,"should have at least 2 rows",
32 |                "and 2 columns"))
33 |   return(TRUE)
34 | }
35 | 
36 | # Verify that x is a valid multinomial topic model fit or Poisson
37 | # non-negative matrix factorization.
38 | verify.fit <- function (x, arg.name = deparse(substitute(x))) {
39 |   arg.name.F <- paste0(arg.name,"$F")
40 |   arg.name.L <- paste0(arg.name,"$L")
41 |   arg.name.s <- paste0(arg.name,"$s")
42 |   arg.name   <- sprintf("\"%s\"",arg.name)
43 |   msg        <- paste("Input argument",arg.name,"should be a list containing",
44 |                       "non-negative matrices \"F\" and \"L\"")
45 |   if (!is.list(x))
46 |     stop(msg)
47 |   else if (!all(is.element(c("F","L"),names(x))))
48 |     stop(msg)
49 |   verify.nonnegative.matrix(x$F,arg.name.F)
50 |   verify.nonnegative.matrix(x$L,arg.name.L)
51 |   arg.name.F <- sprintf("\"%s\"",arg.name.F)
52 |   arg.name.L <- sprintf("\"%s\"",arg.name.L)
53 |   if (ncol(x$F) != ncol(x$L))
54 |     stop(paste("Input matrices",arg.name.F,"and",arg.name.L,"should have",
55 |                "the same number of columns"))
56 |   if (is.element("s",names(x))) {
57 |     
58 |     # Check the vector of "scale factors", s.
59 |     verify.positive.vector(x$s,arg.name.s)
60 |     arg.name.s <- sprintf("\"%s\"",arg.name.s)
61 |     if (length(x$s) != nrow(x$L))
62 |       stop(paste("The length of input vector",arg.name.s,"should equal the",
63 |            "number of rows in",arg.name.L))
64 |   }
65 |   return(TRUE)
66 | }
67 | 
68 | # Verify that x is a valid count matrix and "fit" is a valid topic model
69 | # fit or non-negative matrix factorization.
70 | verify.fit.and.count.matrix <-
71 |     function (x, fit,
72 |               arg.name.x   = deparse(substitute(x)),
73 |               arg.name.fit = deparse(substitute(fit))) {
74 |   verify.count.matrix(x,arg.name.x)
75 |   verify.fit(fit,arg.name.fit)
76 |   arg.name.x <- sprintf("\"%s\"",arg.name.x)
77 |   arg.name.F <- sprintf("\"%s$F\"",arg.name.fit)
78 |   arg.name.L <- sprintf("\"%s$L\"",arg.name.fit)
79 |   if (!(nrow(fit$L) == nrow(x) & nrow(fit$F) == ncol(x)))
80 |     stop(paste("Dimensions of input matrices",arg.name.x,",",arg.name.F,
81 |                "and",arg.name.L,"do not agree"))
82 |   if (!(identical(rownames(fit$L),rownames(x)) &
83 |         identical(rownames(fit$F),colnames(x))))
84 |     stop(paste("Dimnames of input matrices",arg.name.x,",",arg.name.F,
85 |                "and",arg.name.L,"are not consistent"))
86 |   return(TRUE)
87 | }
88 | 
89 | # Return TRUE if x is a finite scalar with no missing entries.
90 | is.scalar <- function (x)
91 |   is.numeric(x) &
92 |   length(x) == 1 &
93 |   all(!is.na(x)) &
94 |   all(is.finite(x))
95 | 


--------------------------------------------------------------------------------
/R/zzz.R:
--------------------------------------------------------------------------------
1 | .onLoad <- function (lib, pkg) {
2 |   options(Matrix.warnDeprecatedCoerce = 2)
3 | }
4 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # fastTopics
  2 | 
  3 | [![R-CMD-check](https://github.com/stephenslab/fastTopics/workflows/R-CMD-check/badge.svg)](https://github.com/stephenslab/fastTopics/actions)
  4 | [![CircleCI](https://dl.circleci.com/status-badge/img/gh/stephenslab/fastTopics/tree/master.svg?style=svg)](https://app.circleci.com/pipelines/github/stephenslab/fastTopics?branch=master)
  5 | [![codecov](https://codecov.io/gh/stephenslab/fastTopics/branch/master/graph/badge.svg)](https://app.codecov.io/gh/stephenslab/fastTopics)
  6 | 
  7 | fastTopics is an R package implementing fast, scalable optimization
  8 | algorithms for fitting topic models and non-negative matrix 
  9 | factorizations to count data. The methods exploit the
 10 | [close relationship][vignette-close-relationship] between the topic
 11 | model and Poisson non-negative matrix factorization. The package also
 12 | provides tools to compare, annotate and visualize model fits,
 13 | including functions to create "structure plots" and functions to
 14 | identify distinctive features of topics. The fastTopics package is a
 15 | successor to the [CountClust package][countclust].
 16 | 
 17 | If you find a bug, or you have a question or feedback on this software,
 18 | please post an [issue][issues].
 19 | 
 20 | ## Citing this work
 21 | 
 22 | If you find the fastTopics package or any of the source code in this
 23 | repository useful for your work, please cite:
 24 | 
 25 | > K. K. Dey, C. J. Hsiao and M. Stephens (2017). [Visualizing the
 26 | > structure of RNA-seq expression data using grade of membership 
 27 | > models.][countclust-paper] PLoS Genetics 13, e1006599.
 28 | >
 29 | > P. Carbonetto, A. Sarkar, Z. Wang and M. Stephens (2021).
 30 | > [Non-negative matrix factorization algorithms greatly improve topic
 31 | > model fits.][fasttopics-paper] arXiv 2105.13440.
 32 | 
 33 | If you used the `de_analysis` function in fastTopics, please cite:
 34 | 
 35 | > P. Carbonetto, K. Luo, A. Sarkar, A. Hung, K. Tayeb, S. Pott and
 36 | > M. Stephens (2023). [GoM DE: interpreting structure in sequence
 37 | > count data with differential expression analysis allowing for
 38 | > grades of membership.][singlecell-topics-paper]
 39 | > Genome Biology 24, 236.
 40 | 
 41 | ## License
 42 | 
 43 | Copyright (c) 2019-2025, Peter Carbonetto and Matthew Stephens.
 44 | 
 45 | All source code and software in this repository are made available
 46 | under the terms of the [MIT license][mit-license].
 47 | 
 48 | ## Quick Start
 49 | 
 50 | Install and load the package from CRAN:
 51 | 
 52 | ```R
 53 | install.packages("fastTopics")
 54 | library(fastTopics)
 55 | ```
 56 | 
 57 | Alternatively, install the latest version from GitHub:
 58 | 
 59 | ```R
 60 | remotes::install_github("stephenslab/fastTopics")
 61 | library(fastTopics)
 62 | ```
 63 | 
 64 | Note that installing the package will require a C++ compiler setup
 65 | that is appropriate for the version of R installed on your
 66 | computer. For details, refer to the documentation on the
 67 | [CRAN website][cran].
 68 | 
 69 | For guidance on using fastTopics to analyze gene expression data, see
 70 | the [single-cell RNA-seq vignette, part 1][vignette-scrnaseq-1] and
 71 | [part 2][vignette-scrnaseq-2].
 72 | 
 73 | Also, try running the small example that illustrates the fast model
 74 | fitting algorithms:
 75 | 
 76 | ```R
 77 | example("fit_poisson_nmf")
 78 | ```
 79 | 
 80 | See the [package documentation][pkgdown] for more information.
 81 | 
 82 | ## Developer notes
 83 | 
 84 | To prepare the package for CRAN, remove both single-cell vignettes,
 85 | then run `R CMD build fastTopics` to build the source package.
 86 | 
 87 | This is the command used to check the package before submitting to
 88 | CRAN:
 89 | 
 90 | ```r
 91 | library(rhub)
 92 | check_for_cran(".",show_status = TRUE,
 93 |   env_vars = c(`_R_CHECK_FORCE_SUGGESTS_` = "false",
 94 |                `_R_CHECK_CRAN_INCOMING_USE_ASPELL_` = "true"))
 95 | ```
 96 | 
 97 | ## Credits
 98 | 
 99 | The fastTopics R package was developed by [Peter Carbonetto][peter],
100 | [Matthew Stephens][matthew] and others.
101 | 
102 | [fasttopics]:  https://github.com/stephenslab/fastTopics
103 | [mit-license]: https://opensource.org/license/mit
104 | [issues]: https://github.com/stephenslab/fastTopics/issues
105 | [peter]: https://pcarbo.github.io
106 | [kevin]: https://github.com/kevinlkx
107 | [matthew]: http://stephenslab.uchicago.edu
108 | [uchicago]: https://www.uchicago.edu
109 | [cran]: https://cran.r-project.org
110 | [countclust]: https://github.com/kkdey/CountClust
111 | [countclust-paper]: https://doi.org/10.1371/journal.pgen.1006599
112 | [fasttopics-paper]: https://arxiv.org/abs/2105.13440
113 | [singlecell-topics-paper]: https://doi.org/10.1186/s13059-023-03067-9
114 | [pkgdown]: https://stephenslab.github.io/fastTopics/
115 | [vignette-close-relationship]: https://stephenslab.github.io/fastTopics/articles/relationship.html
116 | [vignette-scrnaseq-1]: https://stephenslab.github.io/fastTopics/articles/single_cell_rnaseq_basic.html
117 | [vignette-scrnaseq-2]: https://stephenslab.github.io/fastTopics/articles/single_cell_rnaseq_practical.html
118 | 


--------------------------------------------------------------------------------
/TODO.txt:
--------------------------------------------------------------------------------
 1 | to do
 2 | =====
 3 | 
 4 | + Implement function to	fit LDA	model by initializing with MLEs for
 5 |   Poisson NMF or multinomial topic model.
 6 | 
 7 | + Update the vignettes.
 8 | 
 9 | + Update pkgdown site.
10 | 
11 | + Implement KKT-based stopping criterion for fit_poisson_nmf.
12 | 
13 | + Implement "t" S3 method to transpose the rows and columns of a
14 |   Poisson NMF fit.
15 | 
16 | + Implement integrations for Seurat, Signac and/or ArchR.
17 | 
18 | + Implement backtracking line search option for SCD algorithm.
19 | 
20 | + Create vignette giving more details about the differential
21 |   expression analysis.
22 | 
23 | + Create vignette giving more details about the different optimization
24 |   algorithms. (Then simplify the fit_poisson_nmf example.)
25 | 
26 | + Create vignette illustrating application of fastTopics to text data,
27 |   with a focus on the different ways to visualize the results.
28 | 


--------------------------------------------------------------------------------
/_pkgdown.yml:
--------------------------------------------------------------------------------
 1 | navbar:
 2 |   left:
 3 |     - text: "Home"
 4 |       href: index.html
 5 |     - text: "Vignettes"
 6 |       href: articles/index.html
 7 |     - text: "Functions"
 8 |       href: reference/index.html
 9 |   right:
10 |     - text: "Source"
11 |       href: https://github.com/stephenslab/fastTopics
12 | 


--------------------------------------------------------------------------------
/appveyor.yml:
--------------------------------------------------------------------------------
 1 | init:
 2 |   ps: |
 3 |         $ErrorActionPreference = "Stop"
 4 |         Invoke-WebRequest http://raw.github.com/krlmlr/r-appveyor/master/scripts/appveyor-tool.ps1 -OutFile "..\appveyor-tool.ps1"
 5 |         Import-Module '..\appveyor-tool.ps1'
 6 | 
 7 | branches:
 8 |   only:
 9 |     - master
10 | 
11 | install:
12 |   ps: Bootstrap
13 | 
14 | environment:
15 |   global:
16 |     R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
17 |     _R_CHECK_FORCE_SUGGESTS_: false
18 |     USE_RTOOLS: yes
19 |     WARNINGS_ARE_ERRORS:
20 |     R_VERSION: release
21 |     R_ARCH: x64
22 |     R_CHECK_ARGS: --no-manual --no-examples --as-cran
23 | 
24 | # This is the minimal set of R packages needed to run "R CMD check" on
25 | # the package.
26 | build_script:
27 |   - R -e install.packages(c('devtools','testthat','quadprog','gtools','irlba','Rtsne','uwot','dplyr','rlang','tidyr','Rcpp','RcppArmadillo','RcppParallel','RhpcBLASctl','progress','pbapply','ggplot2','cowplot','plotly','htmlwidgets'),head(.libPaths(),1),'http://cran.wustl.edu')
28 |   - R -e devtools::install_github('slowkow/ggrepel',upgrade='never',force=TRUE)
29 |   - R -e devtools::install_github('stephens999/ashr',upgrade='never',force=TRUE)
30 | 
31 | test_script:
32 |   - travis-tool.sh run_tests
33 | 
34 | on_failure:
35 |   - 7z a failure.zip *.Rcheck\*
36 |   - appveyor PushArtifact failure.zip
37 | 
38 | artifacts:
39 |   - path: '*.Rcheck\**\*.log'
40 |     name: Logs
41 | 
42 |   - path: '*.Rcheck\**\*.out'
43 |     name: Logs
44 | 
45 |   - path: '*.Rcheck\**\*.fail'
46 |     name: Logs
47 | 
48 |   - path: '*.Rcheck\**\*.Rout'
49 |     name: Logs
50 | 
51 |   - path: '\*_*.tar.gz'
52 |     name: Bits
53 | 
54 |   - path: '\*_*.zip'
55 |     name: Bits
56 | 


--------------------------------------------------------------------------------
/data/newsgroups.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/fastTopics/3a35d33d3e5ea0314d11965a1700e66035f1d18d/data/newsgroups.RData


--------------------------------------------------------------------------------
/data/pbmc_facs.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/fastTopics/3a35d33d3e5ea0314d11965a1700e66035f1d18d/data/pbmc_facs.RData


--------------------------------------------------------------------------------
/docs/LICENSE-text.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <!-- Generated by pkgdown: do not edit by hand --><html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1.0"><title>License • fastTopics</title><!-- jquery --><script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.1/jquery.min.js" integrity="sha256-CSXorXvZcTkaix6Yvo6HppcZGetbYMGWSFlBw8HfCJo=" crossorigin="anonymous"></script><!-- Bootstrap --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/css/bootstrap.min.css" integrity="sha256-bZLfwXAP04zRMK2BjiO8iu9pf4FbLqX6zitd+tIvLhE=" crossorigin="anonymous"><script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/js/bootstrap.min.js" integrity="sha256-nuL8/2cJ5NDSSwnKD8VqreErSWHtnEP9E7AySL+1ev4=" crossorigin="anonymous"></script><!-- bootstrap-toc --><link rel="stylesheet" href="bootstrap-toc.css"><script src="bootstrap-toc.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous"><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous"><!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- pkgdown --><link href="pkgdown.css" rel="stylesheet"><script src="pkgdown.js"></script><meta property="og:title" content="License"><!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
 3 | <script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
 4 | <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
 5 | <![endif]--></head><body data-spy="scroll" data-target="#toc">
 6 |     
 7 | 
 8 |     <div class="container template-title-body">
 9 |       <header><div class="navbar navbar-default navbar-fixed-top" role="navigation">
10 |   <div class="container">
11 |     <div class="navbar-header">
12 |       <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false">
13 |         <span class="sr-only">Toggle navigation</span>
14 |         <span class="icon-bar"></span>
15 |         <span class="icon-bar"></span>
16 |         <span class="icon-bar"></span>
17 |       </button>
18 |       <span class="navbar-brand">
19 |         <a class="navbar-link" href="index.html">fastTopics</a>
20 |         <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">0.6-175</span>
21 |       </span>
22 |     </div>
23 | 
24 |     <div id="navbar" class="navbar-collapse collapse">
25 |       <ul class="nav navbar-nav"><li>
26 |   <a href="index.html">Home</a>
27 | </li>
28 | <li>
29 |   <a href="articles/index.html">Vignettes</a>
30 | </li>
31 | <li>
32 |   <a href="reference/index.html">Functions</a>
33 | </li>
34 |       </ul><ul class="nav navbar-nav navbar-right"><li>
35 |   <a href="https://github.com/stephenslab/fastTopics" class="external-link">Source</a>
36 | </li>
37 |       </ul></div><!--/.nav-collapse -->
38 |   </div><!--/.container -->
39 | </div><!--/.navbar -->
40 | 
41 |       
42 | 
43 |       </header><div class="row">
44 |   <div class="contents col-md-9">
45 |     <div class="page-header">
46 |       <h1>License</h1>
47 |     </div>
48 | 
49 | <pre>YEAR: 2019
50 | COPYRIGHT HOLDER: Peter Carbonetto and Matthew Stephens
51 | </pre>
52 | 
53 |   </div>
54 | 
55 |   <div class="col-md-3 hidden-xs hidden-sm" id="pkgdown-sidebar">
56 |     <nav id="toc" data-toggle="toc" class="sticky-top"><h2 data-toc-skip>Contents</h2>
57 |     </nav></div>
58 | 
59 | </div>
60 | 
61 | 
62 | 
63 |       <footer><div class="copyright">
64 |   <p></p><p>Developed by Peter Carbonetto, Kevin Luo, Kushal Dey, Matthew Stephens.</p>
65 | </div>
66 | 
67 | <div class="pkgdown">
68 |   <p></p><p>Site built with <a href="https://pkgdown.r-lib.org/" class="external-link">pkgdown</a> 2.0.7.</p>
69 | </div>
70 | 
71 |       </footer></div>
72 | 
73 |   
74 | 
75 | 
76 |   
77 | 
78 |   </body></html>
79 | 
80 | 


--------------------------------------------------------------------------------
/docs/articles/relationship_files/figure-html/loglik-poisson-vs-multinom-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/fastTopics/3a35d33d3e5ea0314d11965a1700e66035f1d18d/docs/articles/relationship_files/figure-html/loglik-poisson-vs-multinom-1.png


--------------------------------------------------------------------------------
/docs/articles/relationship_files/figure-html/multinom2poisson-1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/fastTopics/3a35d33d3e5ea0314d11965a1700e66035f1d18d/docs/articles/relationship_files/figure-html/multinom2poisson-1-1.png


--------------------------------------------------------------------------------
/docs/articles/relationship_files/figure-html/multinom2poisson-2-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/fastTopics/3a35d33d3e5ea0314d11965a1700e66035f1d18d/docs/articles/relationship_files/figure-html/multinom2poisson-2-1.png


--------------------------------------------------------------------------------
/docs/articles/relationship_files/figure-html/plot-loglik-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/fastTopics/3a35d33d3e5ea0314d11965a1700e66035f1d18d/docs/articles/relationship_files/figure-html/plot-loglik-1.png


--------------------------------------------------------------------------------
/docs/articles/single_cell_rnaseq_basic_files/figure-html/structure-plot-test-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/fastTopics/3a35d33d3e5ea0314d11965a1700e66035f1d18d/docs/articles/single_cell_rnaseq_basic_files/figure-html/structure-plot-test-1.png


--------------------------------------------------------------------------------
/docs/articles/single_cell_rnaseq_basic_files/figure-html/structure-plot-with-celltype-labels-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/fastTopics/3a35d33d3e5ea0314d11965a1700e66035f1d18d/docs/articles/single_cell_rnaseq_basic_files/figure-html/structure-plot-with-celltype-labels-1.png


--------------------------------------------------------------------------------
/docs/articles/single_cell_rnaseq_basic_files/figure-html/volcano-plot-b-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/fastTopics/3a35d33d3e5ea0314d11965a1700e66035f1d18d/docs/articles/single_cell_rnaseq_basic_files/figure-html/volcano-plot-b-1.png


--------------------------------------------------------------------------------
/docs/articles/single_cell_rnaseq_basic_files/figure-html/volcano-plot-bcells-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/fastTopics/3a35d33d3e5ea0314d11965a1700e66035f1d18d/docs/articles/single_cell_rnaseq_basic_files/figure-html/volcano-plot-bcells-1.png


--------------------------------------------------------------------------------
/docs/articles/single_cell_rnaseq_basic_files/figure-html/volcano-plot-nk-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/fastTopics/3a35d33d3e5ea0314d11965a1700e66035f1d18d/docs/articles/single_cell_rnaseq_basic_files/figure-html/volcano-plot-nk-1.png


--------------------------------------------------------------------------------
/docs/articles/single_cell_rnaseq_basic_files/figure-html/volcano-plot-t-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/fastTopics/3a35d33d3e5ea0314d11965a1700e66035f1d18d/docs/articles/single_cell_rnaseq_basic_files/figure-html/volcano-plot-t-1.png


--------------------------------------------------------------------------------
/docs/articles/single_cell_rnaseq_basic_files/figure-html/volcano-plot-tcells-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/fastTopics/3a35d33d3e5ea0314d11965a1700e66035f1d18d/docs/articles/single_cell_rnaseq_basic_files/figure-html/volcano-plot-tcells-1.png


--------------------------------------------------------------------------------
/docs/articles/single_cell_rnaseq_practical_files/crosstalk-1.0.0/css/crosstalk.css:
--------------------------------------------------------------------------------
 1 | /* Adjust margins outwards, so column contents line up with the edges of the
 2 |    parent of container-fluid. */
 3 | .container-fluid.crosstalk-bscols {
 4 |   margin-left: -30px;
 5 |   margin-right: -30px;
 6 |   white-space: normal;
 7 | }
 8 | 
 9 | /* But don't adjust the margins outwards if we're directly under the body,
10 |    i.e. we were the top-level of something at the console. */
11 | body > .container-fluid.crosstalk-bscols {
12 |   margin-left: auto;
13 |   margin-right: auto;
14 | }
15 | 
16 | .crosstalk-input-checkboxgroup .crosstalk-options-group .crosstalk-options-column {
17 |   display: inline-block;
18 |   padding-right: 12px;
19 |   vertical-align: top;
20 | }
21 | 
22 | @media only screen and (max-width:480px) {
23 |   .crosstalk-input-checkboxgroup .crosstalk-options-group .crosstalk-options-column {
24 |     display: block;
25 |     padding-right: inherit;
26 |   }
27 | }
28 | 


--------------------------------------------------------------------------------
/docs/articles/single_cell_rnaseq_practical_files/figure-html/loglik-2-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/fastTopics/3a35d33d3e5ea0314d11965a1700e66035f1d18d/docs/articles/single_cell_rnaseq_practical_files/figure-html/loglik-2-1.png


--------------------------------------------------------------------------------
/docs/articles/single_cell_rnaseq_practical_files/figure-html/loglik-3-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/fastTopics/3a35d33d3e5ea0314d11965a1700e66035f1d18d/docs/articles/single_cell_rnaseq_practical_files/figure-html/loglik-3-1.png


--------------------------------------------------------------------------------
/docs/articles/single_cell_rnaseq_practical_files/figure-html/pca-plot-1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/fastTopics/3a35d33d3e5ea0314d11965a1700e66035f1d18d/docs/articles/single_cell_rnaseq_practical_files/figure-html/pca-plot-1-1.png


--------------------------------------------------------------------------------
/docs/articles/single_cell_rnaseq_practical_files/figure-html/pca-plot-2-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/fastTopics/3a35d33d3e5ea0314d11965a1700e66035f1d18d/docs/articles/single_cell_rnaseq_practical_files/figure-html/pca-plot-2-1.png


--------------------------------------------------------------------------------
/docs/articles/single_cell_rnaseq_practical_files/figure-html/plot-loglik-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/fastTopics/3a35d33d3e5ea0314d11965a1700e66035f1d18d/docs/articles/single_cell_rnaseq_practical_files/figure-html/plot-loglik-1.png


--------------------------------------------------------------------------------
/docs/articles/single_cell_rnaseq_practical_files/figure-html/structure-plot-by-cluster-1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/fastTopics/3a35d33d3e5ea0314d11965a1700e66035f1d18d/docs/articles/single_cell_rnaseq_practical_files/figure-html/structure-plot-by-cluster-1-1.png


--------------------------------------------------------------------------------
/docs/articles/single_cell_rnaseq_practical_files/figure-html/structure-plot-by-cluster-2-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/fastTopics/3a35d33d3e5ea0314d11965a1700e66035f1d18d/docs/articles/single_cell_rnaseq_practical_files/figure-html/structure-plot-by-cluster-2-1.png


--------------------------------------------------------------------------------
/docs/articles/single_cell_rnaseq_practical_files/figure-html/structure-plot-by-cluster-3-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/fastTopics/3a35d33d3e5ea0314d11965a1700e66035f1d18d/docs/articles/single_cell_rnaseq_practical_files/figure-html/structure-plot-by-cluster-3-1.png


--------------------------------------------------------------------------------
/docs/articles/single_cell_rnaseq_practical_files/figure-html/structure-plot-without-labels-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/fastTopics/3a35d33d3e5ea0314d11965a1700e66035f1d18d/docs/articles/single_cell_rnaseq_practical_files/figure-html/structure-plot-without-labels-1.png


--------------------------------------------------------------------------------
/docs/articles/single_cell_rnaseq_practical_files/figure-html/volcano-plot-cd4-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/fastTopics/3a35d33d3e5ea0314d11965a1700e66035f1d18d/docs/articles/single_cell_rnaseq_practical_files/figure-html/volcano-plot-cd4-1.png


--------------------------------------------------------------------------------
/docs/articles/single_cell_rnaseq_practical_files/figure-html/volcano-plot-cd8-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/fastTopics/3a35d33d3e5ea0314d11965a1700e66035f1d18d/docs/articles/single_cell_rnaseq_practical_files/figure-html/volcano-plot-cd8-1.png


--------------------------------------------------------------------------------
/docs/articles/single_cell_rnaseq_practical_files/figure-html/volcano-plot-t-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/fastTopics/3a35d33d3e5ea0314d11965a1700e66035f1d18d/docs/articles/single_cell_rnaseq_practical_files/figure-html/volcano-plot-t-1.png


--------------------------------------------------------------------------------
/docs/articles/single_cell_rnaseq_practical_files/plotly-htmlwidgets-css-1.52.2/plotly-htmlwidgets.css:
--------------------------------------------------------------------------------
 1 | /*
 2 | just here so that plotly works
 3 | correctly with ioslides.
 4 | see https://github.com/ropensci/plotly/issues/463
 5 | */
 6 | 
 7 | slide:not(.current) .plotly.html-widget{
 8 |   display: none;
 9 | }
10 | 


--------------------------------------------------------------------------------
/docs/articles/topics_vs_clusters_files/figure-html/pca-from-loadings-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/fastTopics/3a35d33d3e5ea0314d11965a1700e66035f1d18d/docs/articles/topics_vs_clusters_files/figure-html/pca-from-loadings-1.png


--------------------------------------------------------------------------------
/docs/articles/topics_vs_clusters_files/figure-html/plot-topic-proportions-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/fastTopics/3a35d33d3e5ea0314d11965a1700e66035f1d18d/docs/articles/topics_vs_clusters_files/figure-html/plot-topic-proportions-1.png


--------------------------------------------------------------------------------
/docs/articles/topics_vs_clusters_files/figure-html/tsne-from-counts-1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/fastTopics/3a35d33d3e5ea0314d11965a1700e66035f1d18d/docs/articles/topics_vs_clusters_files/figure-html/tsne-from-counts-1-1.png


--------------------------------------------------------------------------------
/docs/articles/topics_vs_clusters_files/figure-html/tsne-from-counts-2-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/fastTopics/3a35d33d3e5ea0314d11965a1700e66035f1d18d/docs/articles/topics_vs_clusters_files/figure-html/tsne-from-counts-2-1.png


--------------------------------------------------------------------------------
/docs/articles/topics_vs_clusters_files/figure-html/tsne-from-loadings-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/fastTopics/3a35d33d3e5ea0314d11965a1700e66035f1d18d/docs/articles/topics_vs_clusters_files/figure-html/tsne-from-loadings-1.png


--------------------------------------------------------------------------------
/docs/bootstrap-toc.css:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  * Bootstrap Table of Contents v0.4.1 (http://afeld.github.io/bootstrap-toc/)
 3 |  * Copyright 2015 Aidan Feldman
 4 |  * Licensed under MIT (https://github.com/afeld/bootstrap-toc/blob/gh-pages/LICENSE.md) */
 5 | 
 6 | /* modified from https://github.com/twbs/bootstrap/blob/94b4076dd2efba9af71f0b18d4ee4b163aa9e0dd/docs/assets/css/src/docs.css#L548-L601 */
 7 | 
 8 | /* All levels of nav */
 9 | nav[data-toggle='toc'] .nav > li > a {
10 |   display: block;
11 |   padding: 4px 20px;
12 |   font-size: 13px;
13 |   font-weight: 500;
14 |   color: #767676;
15 | }
16 | nav[data-toggle='toc'] .nav > li > a:hover,
17 | nav[data-toggle='toc'] .nav > li > a:focus {
18 |   padding-left: 19px;
19 |   color: #563d7c;
20 |   text-decoration: none;
21 |   background-color: transparent;
22 |   border-left: 1px solid #563d7c;
23 | }
24 | nav[data-toggle='toc'] .nav > .active > a,
25 | nav[data-toggle='toc'] .nav > .active:hover > a,
26 | nav[data-toggle='toc'] .nav > .active:focus > a {
27 |   padding-left: 18px;
28 |   font-weight: bold;
29 |   color: #563d7c;
30 |   background-color: transparent;
31 |   border-left: 2px solid #563d7c;
32 | }
33 | 
34 | /* Nav: second level (shown on .active) */
35 | nav[data-toggle='toc'] .nav .nav {
36 |   display: none; /* Hide by default, but at >768px, show it */
37 |   padding-bottom: 10px;
38 | }
39 | nav[data-toggle='toc'] .nav .nav > li > a {
40 |   padding-top: 1px;
41 |   padding-bottom: 1px;
42 |   padding-left: 30px;
43 |   font-size: 12px;
44 |   font-weight: normal;
45 | }
46 | nav[data-toggle='toc'] .nav .nav > li > a:hover,
47 | nav[data-toggle='toc'] .nav .nav > li > a:focus {
48 |   padding-left: 29px;
49 | }
50 | nav[data-toggle='toc'] .nav .nav > .active > a,
51 | nav[data-toggle='toc'] .nav .nav > .active:hover > a,
52 | nav[data-toggle='toc'] .nav .nav > .active:focus > a {
53 |   padding-left: 28px;
54 |   font-weight: 500;
55 | }
56 | 
57 | /* from https://github.com/twbs/bootstrap/blob/e38f066d8c203c3e032da0ff23cd2d6098ee2dd6/docs/assets/css/src/docs.css#L631-L634 */
58 | nav[data-toggle='toc'] .nav > .active > ul {
59 |   display: block;
60 | }
61 | 


--------------------------------------------------------------------------------
/docs/docsearch.js:
--------------------------------------------------------------------------------
 1 | $(function() {
 2 | 
 3 |   // register a handler to move the focus to the search bar
 4 |   // upon pressing shift + "/" (i.e. "?")
 5 |   $(document).on('keydown', function(e) {
 6 |     if (e.shiftKey && e.keyCode == 191) {
 7 |       e.preventDefault();
 8 |       $("#search-input").focus();
 9 |     }
10 |   });
11 | 
12 |   $(document).ready(function() {
13 |     // do keyword highlighting
14 |     /* modified from https://jsfiddle.net/julmot/bL6bb5oo/ */
15 |     var mark = function() {
16 | 
17 |       var referrer = document.URL ;
18 |       var paramKey = "q" ;
19 | 
20 |       if (referrer.indexOf("?") !== -1) {
21 |         var qs = referrer.substr(referrer.indexOf('?') + 1);
22 |         var qs_noanchor = qs.split('#')[0];
23 |         var qsa = qs_noanchor.split('&');
24 |         var keyword = "";
25 | 
26 |         for (var i = 0; i < qsa.length; i++) {
27 |           var currentParam = qsa[i].split('=');
28 | 
29 |           if (currentParam.length !== 2) {
30 |             continue;
31 |           }
32 | 
33 |           if (currentParam[0] == paramKey) {
34 |             keyword = decodeURIComponent(currentParam[1].replace(/\+/g, "%20"));
35 |           }
36 |         }
37 | 
38 |         if (keyword !== "") {
39 |           $(".contents").unmark({
40 |             done: function() {
41 |               $(".contents").mark(keyword);
42 |             }
43 |           });
44 |         }
45 |       }
46 |     };
47 | 
48 |     mark();
49 |   });
50 | });
51 | 
52 | /* Search term highlighting ------------------------------*/
53 | 
54 | function matchedWords(hit) {
55 |   var words = [];
56 | 
57 |   var hierarchy = hit._highlightResult.hierarchy;
58 |   // loop to fetch from lvl0, lvl1, etc.
59 |   for (var idx in hierarchy) {
60 |     words = words.concat(hierarchy[idx].matchedWords);
61 |   }
62 | 
63 |   var content = hit._highlightResult.content;
64 |   if (content) {
65 |     words = words.concat(content.matchedWords);
66 |   }
67 | 
68 |   // return unique words
69 |   var words_uniq = [...new Set(words)];
70 |   return words_uniq;
71 | }
72 | 
73 | function updateHitURL(hit) {
74 | 
75 |   var words = matchedWords(hit);
76 |   var url = "";
77 | 
78 |   if (hit.anchor) {
79 |     url = hit.url_without_anchor + '?q=' + escape(words.join(" ")) + '#' + hit.anchor;
80 |   } else {
81 |     url = hit.url + '?q=' + escape(words.join(" "));
82 |   }
83 | 
84 |   return url;
85 | }
86 | 


--------------------------------------------------------------------------------
/docs/link.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <!-- Generator: Adobe Illustrator 19.2.1, SVG Export Plug-In . SVG Version: 6.00 Build 0)  -->
 3 | <svg version="1.1" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
 4 | 	 viewBox="0 0 20 20" style="enable-background:new 0 0 20 20;" xml:space="preserve">
 5 | <style type="text/css">
 6 | 	.st0{fill:#75AADB;}
 7 | </style>
 8 | <path class="st0" d="M4,11.3h1.3v1.3H4c-2,0-4-2.3-4-4.7s2.1-4.7,4-4.7h5.3c1.9,0,4,2.3,4,4.7c0,1.9-1.2,3.6-2.7,4.3v-1.5
 9 | 	C11.4,10.2,12,9.1,12,8c0-1.7-1.4-3.3-2.7-3.3H4C2.7,4.7,1.3,6.3,1.3,8S2.7,11.3,4,11.3z M16,7.3h-1.3v1.3H16c1.3,0,2.7,1.6,2.7,3.3
10 | 	s-1.4,3.3-2.7,3.3h-5.3C9.4,15.3,8,13.7,8,12c0-1.1,0.6-2.2,1.3-2.8V7.7C7.9,8.4,6.7,10.1,6.7,12c0,2.4,2.1,4.7,4,4.7H16
11 | 	c1.9,0,4-2.3,4-4.7S18,7.3,16,7.3z"/>
12 | </svg>
13 | 


--------------------------------------------------------------------------------
/docs/pbmc_facs.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/fastTopics/3a35d33d3e5ea0314d11965a1700e66035f1d18d/docs/pbmc_facs.RData


--------------------------------------------------------------------------------
/docs/pkgdown.js:
--------------------------------------------------------------------------------
  1 | /* http://gregfranko.com/blog/jquery-best-practices/ */
  2 | (function($) {
  3 |   $(function() {
  4 | 
  5 |     $('.navbar-fixed-top').headroom();
  6 | 
  7 |     $('body').css('padding-top', $('.navbar').height() + 10);
  8 |     $(window).resize(function(){
  9 |       $('body').css('padding-top', $('.navbar').height() + 10);
 10 |     });
 11 | 
 12 |     $('[data-toggle="tooltip"]').tooltip();
 13 | 
 14 |     var cur_path = paths(location.pathname);
 15 |     var links = $("#navbar ul li a");
 16 |     var max_length = -1;
 17 |     var pos = -1;
 18 |     for (var i = 0; i < links.length; i++) {
 19 |       if (links[i].getAttribute("href") === "#")
 20 |         continue;
 21 |       // Ignore external links
 22 |       if (links[i].host !== location.host)
 23 |         continue;
 24 | 
 25 |       var nav_path = paths(links[i].pathname);
 26 | 
 27 |       var length = prefix_length(nav_path, cur_path);
 28 |       if (length > max_length) {
 29 |         max_length = length;
 30 |         pos = i;
 31 |       }
 32 |     }
 33 | 
 34 |     // Add class to parent <li>, and enclosing <li> if in dropdown
 35 |     if (pos >= 0) {
 36 |       var menu_anchor = $(links[pos]);
 37 |       menu_anchor.parent().addClass("active");
 38 |       menu_anchor.closest("li.dropdown").addClass("active");
 39 |     }
 40 |   });
 41 | 
 42 |   function paths(pathname) {
 43 |     var pieces = pathname.split("/");
 44 |     pieces.shift(); // always starts with /
 45 | 
 46 |     var end = pieces[pieces.length - 1];
 47 |     if (end === "index.html" || end === "")
 48 |       pieces.pop();
 49 |     return(pieces);
 50 |   }
 51 | 
 52 |   // Returns -1 if not found
 53 |   function prefix_length(needle, haystack) {
 54 |     if (needle.length > haystack.length)
 55 |       return(-1);
 56 | 
 57 |     // Special case for length-0 haystack, since for loop won't run
 58 |     if (haystack.length === 0) {
 59 |       return(needle.length === 0 ? 0 : -1);
 60 |     }
 61 | 
 62 |     for (var i = 0; i < haystack.length; i++) {
 63 |       if (needle[i] != haystack[i])
 64 |         return(i);
 65 |     }
 66 | 
 67 |     return(haystack.length);
 68 |   }
 69 | 
 70 |   /* Clipboard --------------------------*/
 71 | 
 72 |   function changeTooltipMessage(element, msg) {
 73 |     var tooltipOriginalTitle=element.getAttribute('data-original-title');
 74 |     element.setAttribute('data-original-title', msg);
 75 |     $(element).tooltip('show');
 76 |     element.setAttribute('data-original-title', tooltipOriginalTitle);
 77 |   }
 78 | 
 79 |   if(ClipboardJS.isSupported()) {
 80 |     $(document).ready(function() {
 81 |       var copyButton = "<button type='button' class='btn btn-primary btn-copy-ex' type = 'submit' title='Copy to clipboard' aria-label='Copy to clipboard' data-toggle='tooltip' data-placement='left auto' data-trigger='hover' data-clipboard-copy><i class='fa fa-copy'></i></button>";
 82 | 
 83 |       $("div.sourceCode").addClass("hasCopyButton");
 84 | 
 85 |       // Insert copy buttons:
 86 |       $(copyButton).prependTo(".hasCopyButton");
 87 | 
 88 |       // Initialize tooltips:
 89 |       $('.btn-copy-ex').tooltip({container: 'body'});
 90 | 
 91 |       // Initialize clipboard:
 92 |       var clipboardBtnCopies = new ClipboardJS('[data-clipboard-copy]', {
 93 |         text: function(trigger) {
 94 |           return trigger.parentNode.textContent.replace(/\n#>[^\n]*/g, "");
 95 |         }
 96 |       });
 97 | 
 98 |       clipboardBtnCopies.on('success', function(e) {
 99 |         changeTooltipMessage(e.trigger, 'Copied!');
100 |         e.clearSelection();
101 |       });
102 | 
103 |       clipboardBtnCopies.on('error', function() {
104 |         changeTooltipMessage(e.trigger,'Press Ctrl+C or Command+C to copy');
105 |       });
106 |     });
107 |   }
108 | })(window.jQuery || window.$)
109 | 


--------------------------------------------------------------------------------
/docs/pkgdown.yml:
--------------------------------------------------------------------------------
 1 | pandoc: 2.17.1.1
 2 | pkgdown: 2.0.2
 3 | pkgdown_sha: ~
 4 | articles:
 5 |   relationship: relationship.html
 6 |   single_cell_rnaseq_basic: single_cell_rnaseq_basic.html
 7 |   single_cell_rnaseq_practical: single_cell_rnaseq_practical.html
 8 |   topics_vs_clusters: topics_vs_clusters.html
 9 | last_built: 2023-03-13T20:41Z
10 | 
11 | 


--------------------------------------------------------------------------------
/docs/reference/Rplot001.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/fastTopics/3a35d33d3e5ea0314d11965a1700e66035f1d18d/docs/reference/Rplot001.png


--------------------------------------------------------------------------------
/docs/reference/Rplot002.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/fastTopics/3a35d33d3e5ea0314d11965a1700e66035f1d18d/docs/reference/Rplot002.png


--------------------------------------------------------------------------------
/docs/reference/Rplot003.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/fastTopics/3a35d33d3e5ea0314d11965a1700e66035f1d18d/docs/reference/Rplot003.png


--------------------------------------------------------------------------------
/docs/reference/Rplot004.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/fastTopics/3a35d33d3e5ea0314d11965a1700e66035f1d18d/docs/reference/Rplot004.png


--------------------------------------------------------------------------------
/docs/reference/Rplot005.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/fastTopics/3a35d33d3e5ea0314d11965a1700e66035f1d18d/docs/reference/Rplot005.png


--------------------------------------------------------------------------------
/docs/reference/Rplot006.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/fastTopics/3a35d33d3e5ea0314d11965a1700e66035f1d18d/docs/reference/Rplot006.png


--------------------------------------------------------------------------------
/docs/reference/de_analysis-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/fastTopics/3a35d33d3e5ea0314d11965a1700e66035f1d18d/docs/reference/de_analysis-1.png


--------------------------------------------------------------------------------
/docs/reference/embeddings_from_topics-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/fastTopics/3a35d33d3e5ea0314d11965a1700e66035f1d18d/docs/reference/embeddings_from_topics-1.png


--------------------------------------------------------------------------------
/docs/reference/embeddings_from_topics-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/fastTopics/3a35d33d3e5ea0314d11965a1700e66035f1d18d/docs/reference/embeddings_from_topics-2.png


--------------------------------------------------------------------------------
/docs/reference/embeddings_from_topics-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/fastTopics/3a35d33d3e5ea0314d11965a1700e66035f1d18d/docs/reference/embeddings_from_topics-3.png


--------------------------------------------------------------------------------
/docs/reference/embeddings_from_topics-4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/fastTopics/3a35d33d3e5ea0314d11965a1700e66035f1d18d/docs/reference/embeddings_from_topics-4.png


--------------------------------------------------------------------------------
/docs/reference/embeddings_from_topics-5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/fastTopics/3a35d33d3e5ea0314d11965a1700e66035f1d18d/docs/reference/embeddings_from_topics-5.png


--------------------------------------------------------------------------------
/docs/reference/embeddings_from_topics-6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/fastTopics/3a35d33d3e5ea0314d11965a1700e66035f1d18d/docs/reference/embeddings_from_topics-6.png


--------------------------------------------------------------------------------
/docs/reference/fit_poisson_nmf-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/fastTopics/3a35d33d3e5ea0314d11965a1700e66035f1d18d/docs/reference/fit_poisson_nmf-1.png


--------------------------------------------------------------------------------
/docs/reference/fit_poisson_nmf-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/fastTopics/3a35d33d3e5ea0314d11965a1700e66035f1d18d/docs/reference/fit_poisson_nmf-2.png


--------------------------------------------------------------------------------
/docs/reference/predict-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/fastTopics/3a35d33d3e5ea0314d11965a1700e66035f1d18d/docs/reference/predict-1.png


--------------------------------------------------------------------------------
/docs/reference/predict-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/fastTopics/3a35d33d3e5ea0314d11965a1700e66035f1d18d/docs/reference/predict-2.png


--------------------------------------------------------------------------------
/docs/reference/predict-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/fastTopics/3a35d33d3e5ea0314d11965a1700e66035f1d18d/docs/reference/predict-3.png


--------------------------------------------------------------------------------
/docs/reference/predict-4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/fastTopics/3a35d33d3e5ea0314d11965a1700e66035f1d18d/docs/reference/predict-4.png


--------------------------------------------------------------------------------
/docs/sitemap.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
  3 |   <url>
  4 |     <loc>/404.html</loc>
  5 |   </url>
  6 |   <url>
  7 |     <loc>/LICENSE-text.html</loc>
  8 |   </url>
  9 |   <url>
 10 |     <loc>/articles/index.html</loc>
 11 |   </url>
 12 |   <url>
 13 |     <loc>/articles/relationship.html</loc>
 14 |   </url>
 15 |   <url>
 16 |     <loc>/articles/single_cell_rnaseq_basic.html</loc>
 17 |   </url>
 18 |   <url>
 19 |     <loc>/articles/single_cell_rnaseq_practical.html</loc>
 20 |   </url>
 21 |   <url>
 22 |     <loc>/articles/topics_vs_clusters.html</loc>
 23 |   </url>
 24 |   <url>
 25 |     <loc>/articles/volcano_plot_t_cells.html</loc>
 26 |   </url>
 27 |   <url>
 28 |     <loc>/authors.html</loc>
 29 |   </url>
 30 |   <url>
 31 |     <loc>/index.html</loc>
 32 |   </url>
 33 |   <url>
 34 |     <loc>/pbmc_de_analysis.html</loc>
 35 |   </url>
 36 |   <url>
 37 |     <loc>/reference/compare_fits.html</loc>
 38 |   </url>
 39 |   <url>
 40 |     <loc>/reference/compare_poisson_nmf_fits.html</loc>
 41 |   </url>
 42 |   <url>
 43 |     <loc>/reference/de_analysis.html</loc>
 44 |   </url>
 45 |   <url>
 46 |     <loc>/reference/diff_count_analysis.html</loc>
 47 |   </url>
 48 |   <url>
 49 |     <loc>/reference/embedding_plots.html</loc>
 50 |   </url>
 51 |   <url>
 52 |     <loc>/reference/embeddings_from_topics.html</loc>
 53 |   </url>
 54 |   <url>
 55 |     <loc>/reference/fit_multinom_model.html</loc>
 56 |   </url>
 57 |   <url>
 58 |     <loc>/reference/fit_poisson_nmf.html</loc>
 59 |   </url>
 60 |   <url>
 61 |     <loc>/reference/fit_topic_model.html</loc>
 62 |   </url>
 63 |   <url>
 64 |     <loc>/reference/index.html</loc>
 65 |   </url>
 66 |   <url>
 67 |     <loc>/reference/likelihood.html</loc>
 68 |   </url>
 69 |   <url>
 70 |     <loc>/reference/loadings_plot.html</loc>
 71 |   </url>
 72 |   <url>
 73 |     <loc>/reference/merge_topics.html</loc>
 74 |   </url>
 75 |   <url>
 76 |     <loc>/reference/multinom2poisson.html</loc>
 77 |   </url>
 78 |   <url>
 79 |     <loc>/reference/pbmc_4k.html</loc>
 80 |   </url>
 81 |   <url>
 82 |     <loc>/reference/pbmc_facs.html</loc>
 83 |   </url>
 84 |   <url>
 85 |     <loc>/reference/pca_plot.html</loc>
 86 |   </url>
 87 |   <url>
 88 |     <loc>/reference/plot_loglik_vs_rank.html</loc>
 89 |   </url>
 90 |   <url>
 91 |     <loc>/reference/plot_progress.html</loc>
 92 |   </url>
 93 |   <url>
 94 |     <loc>/reference/plot_progress_poisson_nmf.html</loc>
 95 |   </url>
 96 |   <url>
 97 |     <loc>/reference/poisson2multinom.html</loc>
 98 |   </url>
 99 |   <url>
100 |     <loc>/reference/predict.html</loc>
101 |   </url>
102 |   <url>
103 |     <loc>/reference/run_homer.html</loc>
104 |   </url>
105 |   <url>
106 |     <loc>/reference/select_loadings.html</loc>
107 |   </url>
108 |   <url>
109 |     <loc>/reference/simulate_count_data.html</loc>
110 |   </url>
111 |   <url>
112 |     <loc>/reference/simulate_gene_data.html</loc>
113 |   </url>
114 |   <url>
115 |     <loc>/reference/simulate_toy_gene_data.html</loc>
116 |   </url>
117 |   <url>
118 |     <loc>/reference/structure_plot.html</loc>
119 |   </url>
120 |   <url>
121 |     <loc>/reference/summary.poisson_nmf_fit.html</loc>
122 |   </url>
123 |   <url>
124 |     <loc>/reference/tsne_from_topics.html</loc>
125 |   </url>
126 |   <url>
127 |     <loc>/reference/tsne_plot.html</loc>
128 |   </url>
129 |   <url>
130 |     <loc>/reference/volcano_plot.html</loc>
131 |   </url>
132 | </urlset>
133 | 


--------------------------------------------------------------------------------
/inst/CITATION:
--------------------------------------------------------------------------------
 1 | citHeader("To cite the fastTopics package, please use:")
 2 | 
 3 | bibentry(bibtype = "Article",
 4 |          title = paste("Visualizing the structure of RNA-seq expression data",
 5 |                        "using grade of membership models"),
 6 |          author = c(person("Kushal K. Dey"),
 7 |                     person("Chiaowen Joyce Hsiao"),
 8 |                     person("Matthew Stephens")),
 9 |          journal = "PLoS Genetics",
10 |          volume  = 13,
11 |          number  = 3,
12 |          pages   = "e1006599",
13 |          year = "2017",
14 |          url = "https://doi.org/10.1371/journal.pgen.1006599",
15 |          textVersion =
16 |            paste("Kushal K. Dey, Chiaowen Joyce Hsiao and Matthew Stephens",
17 |                  "(2017). Visualizing the structure of RNA-seq expression",
18 |                  "data using grade of membership models. PLoS Genetics",
19 |                  "13(3), e1006599, doi:10.1371/journal.pgen.1006599"))
20 | 
21 | bibentry(bibtype = "Article",
22 |   title   = paste("Non-negative matrix factorization algorithms greatly",
23 |                   "improve topic model fits"),
24 |   author  = c(person("Peter Carbonetto"),
25 |               person("Abhishek Sarkar"),
26 |               person("Zihao Wang"),
27 |               person("Matthew Stephens")),
28 |   journal = "arXiv",
29 |   volume  = "2105.13440",
30 |   eprint  = "2105.13440",
31 |   year    = "2021",
32 |   archivePrefix = "arXiv",
33 |   url = "https://arxiv.org/abs/2105.13440",
34 |   textVersion = paste("Peter Carbonetto, Abhishek Sarkar, Zihao Wang",
35 |                       "and Matthew Stephens (2021). Non-negative matrix",
36 |                       "factorization algorithms greatly improve topic model",
37 |                       "fits. arXiv 2105.13440."))
38 | 
39 | bibentry(header = "If de_analysis is used, please also cite:",
40 |   bibtype = "Article",
41 |   title = paste("Interpreting structure in sequence count data with",
42 |                 "differential expression analysis allowing for grades of",
43 |                 "membership"),
44 |   author  = c(person("Peter Carbonetto"),
45 |               person("Kaixuan Luo"),
46 |               person("Abhishek Sarkar"),
47 |               person("Anthony Hung"),
48 |               person("Karl Tayeb"),
49 |               person("Sebastian Pott"),
50 |               person("Matthew Stephens")),
51 |   journal = "Genome Biology",
52 |   doi = "10.1186/s13059-023-03067-9",
53 |   volume = 24,
54 |   pages = 236,
55 |   year = 2023,
56 |   textVersion = paste("Peter Carbonetto, Kaixuan Luo, Abhishek Sarkar,",
57 |     "Anthony Hung, Karl Tayeb, Sebastian Pott and Matthew Stephens.",
58 |     "GoM DE: interpreting structure in sequence count data with differential",
59 |     "expression analysis allowing for grades of membership. Genome Biology",
60 |     "24: 236 (2023). https://doi.org/10.1186/s13059-023-03067-9"))
61 |          
62 | 


--------------------------------------------------------------------------------
/inst/COPYRIGHTS:
--------------------------------------------------------------------------------
 1 | This file contains additional copyright information about code adapted
 2 | from other R packages.
 3 | 
 4 | Portions of the C++ code in src/poismix.cpp and src/scd.cpp were
 5 | adapted from R and C++ code developed by Eric Xihui Lin and Paul
 6 | C. Boutros, which is available for download at
 7 | https://github.com/linxihui/NNLM. This code is distributed under the
 8 | 2-Clause BSD license and retains the following copyright:
 9 | 
10 | YEAR: 2015
11 | COPYRIGHT HOLDER: Eric Xihui Lin, the Boutros Lab and the Ontario Institute for Cancer Research
12 | 
13 | Portions of the R code in R/topicscore.R were adapted from R code
14 | developed by Minzhe Wang and Tracy Ke. This code is distributed under
15 | the MIT license and retains the following copyright:
16 | 
17 | YEAR: 2019
18 | COPYRIGHT HOLDER: Minzhe Wang
19 | 


--------------------------------------------------------------------------------
/inst/code/check_map.R:
--------------------------------------------------------------------------------
 1 | library(Compositional)
 2 | 
 3 | # Simulate a 80 x 100 data set.
 4 | set.seed(1)
 5 | n <- 80
 6 | m <- 100
 7 | k <- 3
 8 | dat <- simulate_count_data(n,m,k)
 9 | X <- dat$X
10 | L <- dat$L
11 | F <- dat$F
12 | a <- matrix(abs(rnorm(m*k)) + 1,m,k)
13 | b <- abs(rnorm(k))
14 | 
15 | N <- 100
16 | f0 <- rep(0,N)
17 | f1 <- rep(0,N)
18 | f2 <- rep(0,N)
19 | f3 <- rep(0,N)
20 | for (i in 1:N) {
21 |   
22 |   # Compute the penalized likelihood for the multinomial topic model
23 |   # with a Dirichlet prior.
24 |   fit <- list(L = L,F = F)
25 |   class(fit) <- c("poisson_nmf_fit","list")
26 |   f0[i]  <- sum(loglik_multinom_topic_model(X,poisson2multinom(fit),e = 0))
27 |   for (j in 1:k)
28 |     f0[i] <- f0[i] + ddiri(fit$F[,j],a[,j],logged = TRUE)
29 |   
30 |   # Compute the multinomial topic model likelihood with "pseudodata".
31 |   Y      <- rbind(X,t(a - 1))
32 |   fit2   <- fit
33 |   u      <- colSums(a - 1)/b
34 |   fit2$L <- rbind(fit$L,diag(k))
35 |   fit2$F <- scale.cols(fit2$F,u)
36 |   fit2$L <- scale.cols(fit2$L,1/u)
37 |   f1[i]  <- sum(loglik_multinom_topic_model(Y,poisson2multinom(fit2),e = 0))
38 | 
39 |   # Compute the penalized Poisson NMF likelihood with a gamma prior.
40 |   f2[i] <- sum(loglik_poisson_nmf(X,fit,e = 0))
41 |   for (j in 1:k)
42 |     f2[i] <- f2[i] + sum(dgamma(fit$F[,j],a[,j],b[j],log = TRUE))
43 |   
44 |   # Compute Poisson NMF likelihood with "pseudodata".
45 |   f3[i] <- sum(loglik_poisson_nmf(Y,fit2,e = 0))
46 | 
47 |   # Tweak the fit.
48 |   L <- L * matrix(exp(rnorm(n*k,sd = 0.1)),n,k)
49 |   F <- F * matrix(exp(rnorm(m*k,sd = 0.1)),m,k)
50 | }
51 | 
52 | # The multinomial penalized log-likelihoods and the multinomial
53 | # log-likelihoods with pseudodata should be equal up to a constant.
54 | plot(f0,f1,pch = 20)
55 | 
56 | # The Poisson NMF penalized log-likelihoods and the Poisson NMF
57 | # log-likleihoods with pseudodata should be equal up to a constant.
58 | plot(f2,f3,pch = 20)
59 | 


--------------------------------------------------------------------------------
/inst/code/check_poisson_hessian.R:
--------------------------------------------------------------------------------
 1 | # Verify gradient and Hessian calculations for the "single gene"
 2 | # Poisson model.
 3 | library(pracma)
 4 | 
 5 | # Simulate data x ~ Pois(u), with u = l0*f0 + l1*f1.
 6 | set.seed(1)
 7 | n  <- 40
 8 | f0 <- 0.1
 9 | f1 <- 1
10 | s  <- sample(10,n,replace = TRUE)
11 | u  <- runif(n)
12 | l0 <- s*(1-u)
13 | l1 <- s*u
14 | x  <- rpois(n,l0*f0 + l1*f1)
15 | 
16 | # Compute the log-likelihood under the model x ~ Pois(u), with
17 | # Poisson rates u = l0*f0 + l1*f1.
18 | loglik <- function (x, l0, l1, f0, f1)
19 |   sum(dpois(x,l0*f0 + l1*f1,log = TRUE))
20 | 
21 | # Compute the gradient of the log-likelihood with respect to log(f0)
22 | # and log(f1).
23 | loglik_grad <- function (x, l0, l1, f0, f1) {
24 |   u <- l0*f0 + l1*f1
25 |   y <- x/u - 1
26 |   return(c(f0*sum(l0*y),
27 |            f1*sum(l1*y)))
28 | }
29 | 
30 | # Compute the MLEs of f0 and f1.
31 | control <- glm.control(epsilon = 1e-10, maxit = 100)
32 | dat <- data.frame(x = x,f0 = l0,f1 = l1)
33 | fit <- glm(x ~ f0 + f1 - 1,family = poisson(link = "identity"),
34 |            data = dat,start = c(0.5,0.5),control = control)
35 | f0 <- coef(fit)["f0"]
36 | f1 <- coef(fit)["f1"]
37 | 
38 | # Compare loglik_grad and loglik_hessian against numerical gradients
39 | # calculated using finite differences.
40 | cat("gradient:\n")
41 | print(grad(function (v) loglik(x,l0,l1,exp(v[1]),exp(v[2])),log(c(f0,f1))),
42 |       digits = 12)
43 | print(loglik_grad(x,l0,l1,f0,f1),digits = 12)
44 | cat("Hessian:\n")
45 | print(rbind(grad(function (v) loglik_grad(x,l0,l1,exp(v[1]),exp(v[2]))[1],
46 |                  log(c(f0,f1))),
47 |             grad(function (v) loglik_grad(x,l0,l1,exp(v[1]),exp(v[2]))[2],
48 |                  log(c(f0,f1)))),
49 |       digits = 12)
50 | print(-solve(compute_poisson_covariance(x,cbind(l0,l1),coef(fit))),digits = 12)
51 | 


--------------------------------------------------------------------------------
/inst/code/compile_newsgroups_results_for_annotation.R:
--------------------------------------------------------------------------------
 1 | # I run this after compute_newsgroups_topics.R to compile the key
 2 | # matrices I would like to keep for subsequent analyses.
 3 | library(tools)
 4 | library(fastTopics)
 5 | 
 6 | # Load the newsgroups data.
 7 | load("../data/newsgroups.RData")
 8 | 
 9 | # Load the output generated by the compute_newsgroups_topics.R script.
10 | load("../output/newsgroups_topics.RData")
11 | 
12 | # Get the topic proportions matrix.
13 | L <- poisson2multinom(pnmf)$L
14 | 
15 | # Get the posterior mean log-fold changes (compared to the mean word
16 | # frequencies), and call this the F matrix. Note that here we are
17 | # using the base-2 logarithm.
18 | F <- de_vsnull$postmean
19 | 
20 | # Save the compiled results to an .Rdata file.
21 | newsgroups <- list(topics = topics,L = L,F = F)
22 | save(list = "newsgroups",file = "newsgroups.RData")
23 | resaveRdaFiles("newsgroups.RData")
24 | 


--------------------------------------------------------------------------------
/inst/code/compute_newsgroups_topics.R:
--------------------------------------------------------------------------------
 1 | # Analyze the "20 Newsgroups" data using fastTopics.
 2 | #
 3 | # sinteractive --mem=24G -c 8 --time=24:00:00
 4 | # module load R/4.2.0
 5 | # .libPaths()[1]
 6 | # /home/pcarbo/R_libs_4_20
 7 | library(tools)
 8 | library(Matrix)
 9 | library(fastTopics)
10 | load("../datafiles/newsgroups.RData")
11 | set.seed(1)
12 | 
13 | # Remove words that appear in fewer than 10 documents.
14 | x <- colSums(counts > 0)
15 | j <- which(x > 9)
16 | counts <- counts[,j]
17 | 
18 | # Fit a Poisson NMF using fastTopics, with k = 10 factors/topics.
19 | pnmf <- fit_poisson_nmf(counts,k = 10,numiter = 100,method = "em",
20 |                         control = list(numiter = 4,nc = 8,extrapolate = FALSE),
21 |                         init.method = "random",verbose = "detailed")
22 | pnmf <- fit_poisson_nmf(counts,fit0 = pnmf,numiter = 100,method = "scd",
23 |                         control = list(numiter = 4,nc = 8,extrapolate = TRUE),
24 |                         verbose = "detailed")
25 | 
26 | # Perform the "grade of membership" differential expression analysis
27 | # using the fitted Poisson NMF model.
28 | de_le <- de_analysis(pnmf,counts,shrink.method = "ash",
29 |                      lfc.stat = "le",pseudocount = 0.1,
30 |                      control = list(ns = 1e4,nc = 8,nsplit = 1000))
31 | de_vsnull <- de_analysis(pnmf,counts,shrink.method = "ash",
32 |                          lfc.stat = "vsnull",pseudocount = 0.1,
33 |                          control = list(ns = 1e4,nc = 8,nsplit = 1000))
34 | 
35 | # Save the outputs to an .Rdata file.
36 | session_info <- sessionInfo()
37 | save(list = c("pnmf","de_le","de_vsnull","session_info"),
38 |      file = "newsgroups_topics.RData")
39 | resaveRdaFiles("newsgroups_topics.RData")
40 | 


--------------------------------------------------------------------------------
/inst/code/droplet.R:
--------------------------------------------------------------------------------
 1 | #' @name droplet
 2 | #'
 3 | #' @title Droplet single-cell RNA-seq read count data from Montoro
 4 | #'   \emph{et al} (2018)
 5 | #'
 6 | #' @docType data
 7 | #' 
 8 | #' @description These data are gene expression profiles of trachea
 9 | #' epithelial cells in C57BL/6 mice obtained using droplet-based 3'
10 | #' single-cell RNA-seq. They were prepared from file
11 | #' \code{GSE103354_Trachea_droplet_UMIcounts.txt.gz} downloaded from
12 | #' the Gene Expression Omnibus (GEO) website, accession GSE103354.
13 | #' 
14 | #' @format \code{droplet} is a 7,193 x 17,133 sparse matrix of read
15 | #' counts, with rows corresponding to samples (cells), and columns
16 | #' corresponding to genes.
17 | #' 
18 | #' @references
19 | #'
20 | #' D. T. Montoro \emph{et al} (2018). A revised airway epithelial
21 | #' hierarchy includes CFTR-expressing ionocytes. \emph{Nature} \bold{560},
22 | #' 319–-324.
23 | #' 
24 | #' @keywords data
25 | #'
26 | #' @examples
27 | #'
28 | #' # Roughly 10% of the read counts are greater than zero.
29 | #' data(droplet)
30 | #' nnzero(droplet)/length(droplet)
31 | #' 
32 | NULL
33 | 


--------------------------------------------------------------------------------
/inst/code/lda.R:
--------------------------------------------------------------------------------
 1 | lda <- function (X, F, L, alpha = rep(1,ncol(F)), numiter = 1000) {
 2 | 
 3 |   # Get the number of rows (n) and columns (m) of X, and the number of
 4 |   # topics.
 5 |   n <- nrow(X)
 6 |   m <- ncol(X)
 7 |   k <- ncol(F)
 8 |     
 9 |   # This variable is used to keep track of the algorithm's progress;
10 |   # it stores the value of the objective (the variational lower bound,
11 |   # or "ELBO") at each iteration.
12 |   value <- rep(0,numiter)
13 | 
14 |   # Iterate the E and M steps.
15 |   cat("iter --objective(ELBO)-- max.diff\n")
16 |   for (iter in 1:numiter) {
17 |     L0 <- L
18 |     F0 <- F
19 | 
20 |     # E STEP
21 |     # ------
22 |     # Update the expected topic counts (N) and expected word counts (M).
23 |     N <- matrix(0,n,k)
24 |     M <- matrix(0,m,k)
25 |     for (i in 1:n) {
26 |       P     <- scale.cols(F,exp(digamma(L[i,])))
27 |       P     <- P / rowSums(P)
28 |       N[i,] <- X[i,] %*% P
29 |       M     <- M + X[i,] * P
30 |     }
31 | 
32 |     # M STEP
33 |     # ------
34 |     # Update the topic proportions (loadings).
35 |     L <- alpha + N
36 | 
37 |     # Update the word probabilities (factors).
38 |     F <- scale.cols(M + 1e-6)
39 |     
40 |     # Compute the variational lower bound at the current solution.
41 |     value[iter] <- elbo.lda(X,F,L,alpha)
42 |     cat(sprintf("%4d %+0.12e %0.2e\n",iter,value[iter],
43 |                 max(max(abs(L - L0)),max(abs(F - F0)))))
44 |   }
45 | 
46 |   # Return the estimates of the topic proportions (L) and word
47 |   # probabilities (F), and the value of the objective at each
48 |   # iteration ("value").
49 |   return(list(F = F,L = L,value = value))
50 | }
51 | 
52 | elbo.lda <- function (X, F, L, alpha) {
53 |   n <- nrow(X)
54 |   f <- rep(0,n)
55 |   for (i in 1:n) {
56 |     L[i,] <- L[i,] * (sum(alpha) + sum(X[i,]))
57 |     P     <- scale.cols(F,exp(digamma(L[i,])))
58 |     P     <- P / rowSums(P)
59 |     u     <- digamma(L[i,]) - digamma(sum(L[i,]))
60 |     f[i]  <- (lgamma(sum(alpha)) - lgamma(sum(L[i,]))
61 |               + sum(lgamma(L[i,])) - sum(lgamma(alpha))
62 |               + sum((alpha - L[i,]) * u)
63 |               + sum(X[i,] %*% (scale.cols(P,u) + P*log(F) - P*log(P))))
64 |   }
65 |   return(f)
66 | }
67 | 


--------------------------------------------------------------------------------
/inst/code/multinom_demo.R:
--------------------------------------------------------------------------------
 1 | # Short script to verify implementation of the differential expression
 2 | # (DE) analysis methods applied to data simulated from a multinomial
 3 | # topic model.
 4 | library(Matrix)
 5 | library(ggplot2)
 6 | library(cowplot)
 7 | 
 8 | # Simulate data.
 9 | set.seed(1)
10 | n   <- 400
11 | m   <- 1000
12 | k   <- 4
13 | dat <- simulate_multinom_gene_data(n,m,k,sparse = TRUE)
14 | X   <- dat$X
15 | L   <- dat$L
16 | 
17 | # Fit a Poisson model (approximating a binomial model) to each gene
18 | # (row) j, and compute the log-fold change statistics.
19 | fit <- init_poisson_nmf(X,L = L,init.method = "random")
20 | de1 <- de_analysis(fit,X,fit.method = "glm")
21 | de2 <- de_analysis(fit,X,fit.method = "scd")
22 | de3 <- de_analysis(fit,X,fit.method = "em")
23 | 
24 | # Compare the glm and scd estimates of the model parameters.
25 | plot(de1$F + 1e-4,de2$F + 1e-4,pch = 4,cex = 0.5,log = "xy",xlab = "glm",
26 |      ylab = "scd")
27 | abline(a = 0,b = 1,col = "magenta",lty = "dotted")
28 | 
29 | # Compare the glm and EM estimates of the model parameters.
30 | plot(de1$F + 1e-4,de3$F + 1e-4,pch = 4,cex = 0.5,log = "xy",xlab = "glm",
31 |      ylab = "em")
32 | abline(a = 0,b = 1,col = "magenta",lty = "dotted")
33 | 
34 | # Compare the scd estimates against the probabilities used to simulate
35 | # the data.
36 | plot(dat$F + 1e-4,de2$F + 1e-4,pch = 4,cex = 0.5,log = "xy",xlab = "true",
37 |      ylab = "estimated")
38 | abline(a = 0,b = 1,col = "magenta",lty = "dotted")
39 | 
40 | # Here we show that the z-score varies (as expected) with the log-fold
41 | # change estimate and the average expression level.
42 | pdat <- data.frame(f0       = rep(de3$f0,4),
43 |                    postmean = as.vector(de3$postmean),
44 |                    z        = as.vector(de3$z))
45 | print(ggplot(pdat,aes(x = f0,y = postmean,fill = z)) +
46 |   geom_point(size = 2,shape = 21,color = "white") +
47 |   geom_abline(intercept = 0,slope = 0,color = "black",linetype = "dotted") +
48 |   scale_x_continuous(trans = "log10") +
49 |   scale_fill_gradient2(low = "darkblue",mid = "lightskyblue",
50 |                        high = "orangered",midpoint = 0) +
51 |   labs(x = "average expression",y = "log-fold change",fill = "z-score") +
52 |   theme_cowplot(12))
53 | 


--------------------------------------------------------------------------------
/inst/code/pbmc_demo.R:
--------------------------------------------------------------------------------
 1 | library(ggplot2)
 2 | library(ggrepel)
 3 | library(cowplot)
 4 | library(Matrix)
 5 | 
 6 | # Load the data.
 7 | set.seed(1)
 8 | data(pbmc_facs)
 9 | genes <- pbmc_facs$genes
10 | X     <- pbmc_facs$counts
11 | fit   <- pbmc_facs$fit
12 | 
13 | # Perform the differential expression analysis, with and without
14 | # shrinkage.
15 | set.seed(1)
16 | out1 <- de_analysis(fit,X,shrink.method = "none",
17 |                     control = list(nc = 4,nsplit = 400))
18 | set.seed(1)
19 | out2 <- de_analysis(fit,X,shrink.method = "ash",
20 |                     control = list(nc = 4,nsplit = 400))
21 | 
22 | # Plot the distribution of MCMC acceptance rates.
23 | hist(out2$ar,n = 64)
24 | 
25 | # Compare the LFC estimates with and without shrinkage.
26 | pdat <- data.frame(postmean1 = as.vector(out1$postmean),
27 |                    postmean2 = as.vector(out2$postmean))
28 | p1 <- ggplot(pdat,aes(x = postmean1,y = postmean2)) +
29 |   geom_point(shape = 21,color = "white",fill = "darkblue",na.rm = TRUE) +
30 |   geom_abline(intercept = 0,slope = 1,color = "magenta",linetype = "dotted") +
31 |   labs(x = "posterior mean estimate",y = "stabilized posterior estimate") +
32 |   theme_cowplot(font_size = 10)
33 | 
34 | # Create a volcano plot to visualize the DE results for topic k = 4.
35 | k <- "k4"
36 | pdat <- data.frame(gene     = genes$symbol,
37 |                    postmean = out2$postmean[,k],
38 |                    z        = pmin(40,abs(out2$z[,k])),
39 |                    lfsr     = cut(out2$lfsr[,k],c(-1,0.001,0.01,0.05,Inf)),
40 |                    stringsAsFactors = FALSE)
41 | rows <- which(with(pdat,!(postmean > 3 | (postmean > 0 & z > 10))))
42 | pdat[rows,"gene"] <- ""
43 | p2 <- ggplot(pdat,aes(x = postmean,y = z,fill = lfsr,label = gene)) +
44 |   geom_point(color = "white",stroke = 0.3,shape = 21,
45 |              na.rm = TRUE) +
46 |   geom_text_repel(color = "darkgray",size = 2.25,fontface = "italic",
47 |                   segment.color = "darkgray",segment.size = 0.25,
48 |                   min.segment.length = 0,max.overlaps = Inf,
49 |                   na.rm = TRUE) +
50 |   scale_y_continuous(trans = "sqrt",breaks = c(0,1,2,5,10,20,50)) +
51 |   scale_fill_manual(values = c("deepskyblue","gold","orange","tomato"),
52 |                     na.value = "gainsboro") +
53 |   labs(x = "log-fold change",y = "|z-score|") +
54 |   theme_cowplot(font_size = 10)
55 | 


--------------------------------------------------------------------------------
/inst/code/plsi.R:
--------------------------------------------------------------------------------
 1 | # Small script to verify that the Poisson NMF multiplicative updates
 2 | # are equivalent to the pLSI EM updates.
 3 | 
 4 | # Simulate a 100 x 200 counts matrix.
 5 | set.seed(1)
 6 | n   <- 100
 7 | m   <- 200
 8 | k   <- 3
 9 | out <- simulate_count_data(n,m,k)
10 | X   <- out$X
11 | A   <- out$L
12 | B   <- t(out$F)
13 | 
14 | # Apply the EM updates for pLSI and Poisson NMF in parallel.
15 | N <- rowSums(X)
16 | for (iter in 1:20) {
17 |   out <- get_multinom_from_pnmf(t(B),A)
18 |   L   <- out$L
19 |   F   <- out$F
20 |     
21 |   # Apply the multiplicative (EM) update for L.
22 |   A <- scale.cols(A * tcrossprod(X / (A %*% B),B),1/rowSums(B))
23 | 
24 |   # Apply the pLSI EM update for L.
25 |   P <- matrix(0,m,k)
26 |   for (i in 1:n) {
27 |     for (j in 1:m)
28 |       P[j,] <- F[j,]*L[i,]/sum(F[j,]*L[i,])
29 |     L[i,] <- (X[i,] %*% P)/N[i]
30 |   }
31 | 
32 |   # Compare the updated L matrices.
33 |   out <- get_multinom_from_pnmf(t(B),A)
34 |   cat(sprintf("%0.1e ",max(abs(out$L - L))))
35 |   
36 |   # Apply the multiplicative (EM) update for F.
37 |   B <- B * crossprod(A,X / (A %*% B)) / colSums(A)
38 |   
39 |   # Apply the pLSI EM update for F.
40 |   P <- matrix(0,n,k)
41 |   for (j in 1:m) {
42 |     for (i in 1:n)
43 |       P[i,] <- F[j,]*L[i,]/sum(F[j,]*L[i,])
44 |     F[j,] <- X[,j] %*% P
45 |   }
46 |   F <- normalize.cols(F)
47 | 
48 |   # Compare the updated F matrices
49 |   out <- get_multinom_from_pnmf(t(B),A)
50 |   cat(sprintf("%0.1e\n",max(abs(out$F - F))))
51 | }
52 | 


--------------------------------------------------------------------------------
/inst/code/pois_vs_binom.R:
--------------------------------------------------------------------------------
 1 | # Small script to illustrate the Poisson approximation to the binomial
 2 | # likelihood.
 3 | 
 4 | # Simulate binomial data.
 5 | set.seed(1)
 6 | n <- 1000
 7 | p <- 0.1
 8 | x <- rbinom(1,n,p)
 9 | 
10 | # Plot binomial and Poisson likelihoods.
11 | p  <- seq(0,0.25,length.out = 1000)
12 | f1 <- dbinom(x,n,p,log = TRUE)
13 | f2 <- dpois(x,n*p,log = TRUE)
14 | f1 <- exp(f1 - max(f1))
15 | f2 <- exp(f2 - max(f2))
16 | f1 <- f1/sum(f1)
17 | f2 <- f2/sum(f2)
18 | plot(p,f1,type = "l",col = "darkorange",lwd = 2)
19 | lines(p,f2,col = "darkblue",lwd = 2,lty = "dashed")
20 | 


--------------------------------------------------------------------------------
/inst/code/pois_vs_multinom.R:
--------------------------------------------------------------------------------
 1 | # A short script to verify the fit_poisson_models computations against
 2 | # fit_topic_model.
 3 | #
 4 | # Use the commented-out code to show that, in smaller data sets, the f
 5 | # parameters in the Poisson glm do not always come close to summing to
 6 | # 1; that is, the approximation is not as good for smaller samples.
 7 | library(Matrix)
 8 | 
 9 | # Simulate data.
10 | set.seed(1)
11 | n   <- 120  # 20
12 | m   <- 1000 # 8
13 | k   <- 4
14 | dat <- simulate_multinom_gene_data(n,m,k,sparse = TRUE)
15 | # dat <- simulate_poisson_gene_data(n,m,k,s = rep(10,n),sparse = FALSE)
16 | X   <- dat$X
17 | L   <- dat$L
18 | 
19 | # Fit a multinomial topic model, with k = 4 topics.
20 | fit <- fit_topic_model(X,k = 4,init.method = "random")
21 | 
22 | # Ensure that none of the topic proportions are exactly zero or
23 | # exactly one.
24 | L <- fit$L
25 | L <- pmax(L,1e-8)
26 | L <- pmin(L,1 - 1e-8)
27 | 
28 | s   <- rowSums(X)
29 | out <- add_pseudocounts(X,s*L,0.01)
30 | X   <- out$X
31 | L   <- out$L
32 | 
33 | # For each column j of the counts matrix, compute MLEs of the
34 | # parameters in the Poisson glm, x ~ Poisson(u), in which the
35 | # Poisson rates are u = sum(L*f), and f = F[j,].
36 | F <- fit_poisson_models(X,L,"scd",1e-8,100,1e-8,1)
37 | F <- pmax(F,1e-8)
38 | 
39 | # Compare the estimates obtained by computing MLEs under the
40 | # multinomial topic model against the estimates obtained by running
41 | # fit_poisson_models.
42 | plot(fit$F + 1e-6,F + 1e-6,pch = 4,cex = 0.5,log = "xy")
43 | abline(a = 0,b = 1,col = "skyblue",lty = "dotted")
44 | 
45 | # Note that the model parameters estimated in fit_poisson_models no
46 | # longer represent frequencies, but they come close.
47 | print(colSums(fit$F))
48 | print(colSums(F))
49 | 


--------------------------------------------------------------------------------
/inst/code/poisson_demo.R:
--------------------------------------------------------------------------------
 1 | # Short script to verify implementation of the differential expression
 2 | # (DE) analysis methods applied to data simulated from a Poisson NMF
 3 | # model.
 4 | library(Matrix)
 5 | library(ggplot2)
 6 | library(cowplot)
 7 | 
 8 | # Simulate data.
 9 | set.seed(1)
10 | n   <- 800
11 | m   <- 1000
12 | k   <- 4
13 | s   <- 10^runif(n,-1,1)
14 | dat <- simulate_poisson_gene_data(n,m,k,s)
15 | X   <- dat$X
16 | L   <- dat$L
17 | Y   <- as(X,"dgCMatrix")
18 | mu  <- colMeans(X)
19 | f0  <- colSums(X)/sum(s)
20 | 
21 | # Add "pseudocounts" to the data.
22 | out <- add_pseudocounts(X,s*L,0.01)
23 | X <- out$X
24 | L <- out$L
25 | 
26 | # Fit a Poisson model for each gene.
27 | F1 <- fit_poisson_models(X,L,method = "glm")
28 | F2 <- fit_poisson_models(X,L,method = "scd",nc = 4)
29 | print(range(F1 - F2))
30 | 
31 | # Compare the estimates against the Poisson rates used to simulate the
32 | # data.
33 | e <- 1e-4
34 | i <- 1
35 | plot(dat$F + e,F1 + e,pch = 20,log = "xy",xlab = "true frequency",
36 |      ylab = "estimated frequency")
37 | abline(a = 0,b = 1,col = "dodgerblue",lty = "dotted")
38 | 
39 | # Compute the log-fold change statistics for each gene j and topic k.
40 | out <- compute_lfc_stats(X,F1,L,f0)
41 | 
42 | # Here we show that the z-score varies, as expected, with the log-fold
43 | # change estimate and the average expression level.
44 | pdat <- data.frame(x = mu,lfc = out$est[,1],z = out$z[,1])
45 | print(ggplot(pdat,aes(x = x,y = lfc,fill = z)) +
46 |   geom_point(size = 2,shape = 21,color = "white") +
47 |   geom_abline(intercept = 0,slope = 0,color = "black",linetype = "dotted") +
48 |   scale_x_continuous(trans = "log10") +
49 |   scale_fill_gradient2(low = "darkblue",mid = "skyblue",
50 |                        high = "orangered",midpoint = 0) +
51 |   labs(x = "average expression",y = "log-fold change",fill = "z-score") +
52 |   theme_cowplot(font_size = 12))
53 | 
54 | # Create a volcano plot in which log-fold change is shown on the
55 | # x-axis and the z-score is shown on the y-axis. To illustrate the
56 | # impact of (mean) gene expression level on the z-scores, the (log)
57 | # average expression level is shown by a colour gradient.
58 | print(ggplot(pdat,aes(x = lfc,y = abs(z),fill = log10(x))) +
59 |   geom_point(size = 2,shape = 21,color = "white") +
60 |   labs(x = "log-fold change",y = "|z-score|",fill = "log10(mean)") +
61 |   scale_y_continuous(trans = "sqrt") +
62 |   scale_fill_gradient2(low = "skyblue",mid = "gold",high = "orangered",
63 |                        midpoint = 0) +
64 |   theme_cowplot(font_size = 12))
65 | 


--------------------------------------------------------------------------------
/inst/code/postfit_motif_analysis_Buenrostro2018.R:
--------------------------------------------------------------------------------
 1 | # Perform differential accessbility analysis for ATAC-seq regions (peaks),
 2 | # and perform TF motif enrichment analysis using HOMER.
 3 | de <- readRDS("DA_regions_topics_noshrinkage_10000iters.rds")
 4 | 
 5 | # For each topic, perform TF motif enrichment analysis using HOMER
 6 | # hypergeometric test.
 7 | select_small_pvals <- function (postmean,lpval,lfsr,rank,quantile)
 8 |   lpval > 1
 9 | res <- run_homer(de,k = 4,subset = select_small_pvals)
10 | 


--------------------------------------------------------------------------------
/inst/code/pseudocounts.R:
--------------------------------------------------------------------------------
 1 | # Simulate a 100 x 200 counts matrix.
 2 | set.seed(1)
 3 | n   <- 100
 4 | m   <- 200
 5 | k   <- 3
 6 | out <- simulate_count_data(n,m,k)
 7 | X   <- out$X
 8 | F   <- out$F
 9 | L   <- out$L
10 | 
11 | # Add pseudocounts.
12 | a <- 1.1
13 | b <- 1.1
14 | X <- rbind(X,matrix(a - 1,k,m))
15 | L <- rbind(L,diag(k))
16 | X <- cbind(X,matrix(b - 1,n+k,k))
17 | F <- rbind(F,1e-4 * diag(k))
18 | 
19 | # Fit a multinomial topic model, with k = 3.
20 | s   <- rowSums(L)
21 | fit <- init_poisson_nmf(X,F = F,L = L)
22 | fit <- fit_poisson_nmf(X,fit0 = fit,numiter = 400,
23 |                        update.factors = 1:m,
24 |                        update.loadings = 1:n,
25 |                        control = list(extrapolate = TRUE))
26 | fit.multinom <- poisson2multinom(fit)
27 | 
28 | # Apply the pLSI EM update for L.
29 | X <- X[1:n,1:m]
30 | L <- fit.multinom$L[1:n,]
31 | F <- fit.multinom$F[1:m,]
32 | F <- normalize.cols(F)
33 | P <- matrix(0,m,k)
34 | for (i in 1:n) {
35 |   for (j in 1:m)
36 |     P[j,] <- F[j,]*L[i,]/sum(F[j,]*L[i,])
37 |   L[i,] <- X[i,] %*% P + b - 1
38 | }
39 | L <- normalize.rows(L)
40 | print(range(L - fit.multinom$L[1:n,]))
41 | 
42 | # Apply the pLSI EM update for F.
43 | P <- matrix(0,n,k)
44 | for (j in 1:m) {
45 |   for (i in 1:n)
46 |     P[i,] <- F[j,]*L[i,]/sum(F[j,]*L[i,])
47 |   F[j,] <- X[,j] %*% P + a - 1
48 | }
49 | F <- normalize.cols(F)
50 | print(range(F - normalize.cols(fit.multinom$F[1:m,])))
51 | 


--------------------------------------------------------------------------------
/inst/code/scd.R:
--------------------------------------------------------------------------------
 1 | # A small script to illustrate the co-ordinate ascent updates in the
 2 | # sequential co-ordinate descent (SCD) algorithm described by Lin &
 3 | # Boutros (2018).
 4 | #
 5 | # Here I enhance the SCD algorithm with a simple backtracking line
 6 | # search to guarantee that the objective decreases at each iteration.
 7 | #
 8 | 
 9 | # SCRIPT PARAMETERS
10 | # -----------------
11 | n           <- 20
12 | numiter     <- 20
13 | line.search <- TRUE
14 | 
15 | # SIMULATE DATA
16 | # -------------
17 | set.seed(49)
18 | w <- rpois(n,2)
19 | a <- abs(rnorm(n))
20 | b <- abs(rnorm(n))
21 | 
22 | # Solve the following 1-d optimization problem:
23 | #
24 | #   minimize    f(x) = sum(b*x - w*log(y))
25 | #   subject to  y = a + b*x,
26 | #               x >= 0.
27 | # 
28 | # using a simple sequential quadratic programming (SQP) method.
29 | x <- 1
30 | e <- 1e-15
31 | f <- rep(0,numiter)
32 | for (i in 1:numiter) {
33 | 
34 |   # Compute the value of the objective at x.
35 |   y    <- a + b*x;
36 |   f[i] <- sum(b*x - w*log(y))
37 |     
38 |   # Compute the gradient and Hessian at x.
39 |   u <- b/y
40 |   h <- sum(w*u^2)
41 |   g <- sum(b - w*u)
42 | 
43 |   # Optionally, perform backtracking line search to determine a
44 |   # suitable step size.
45 |   p <- -g/h
46 |   if (line.search) {
47 |     if (p >= -e)
48 |       s <- 1
49 |     else
50 |       s <- min(1,-x/p)
51 |     smin <- e
52 |     while (TRUE) {
53 |       xnew <- x + s*p
54 |       ynew <- a + b*xnew
55 |       fnew <- sum(b*xnew - w*log(ynew))
56 |       if (s < smin) {
57 |         xnew <- x
58 |         s    <- 0
59 |         break
60 |       } else if (fnew < f[i])
61 |         break
62 |       else
63 |         s <- s/2
64 |     }
65 |   } else
66 |     xnew <- max(0,x + p)
67 |       
68 |   # Update x.
69 |   x <- xnew
70 | }
71 | cat(sprintf("solution: %0.6f\n",x))
72 | 
73 | # Plot the improvement in the solution over time.
74 | y <- f - min(f) + 1e-15
75 | plot(1:numiter,y,type = "l",col = "dodgerblue",lwd = 1,log = "y",
76 |      xlab = "iteration",ylab = "distance to minimum")
77 | points(1:numiter,y,pch = 20,col = "dodgerblue")
78 | 
79 | # This is a plot I created to compare the progression with and without
80 | # the backtracking line search.
81 | # 
82 | #   f <- min(c(f.nols,f.ls))
83 | #   y <- f.nols - f + 1e-8
84 | #   plot(1:numiter,y,type = "l",col = "dodgerblue",lwd = 1,log = "y",
85 | #        xlab = "iteration",ylab = "distance to minimum")
86 | #   points(1:numiter,y,pch = 20,col = "dodgerblue")
87 | #   y <- f.ls - f + 1e-8
88 | #   lines(1:numiter,y,col = "darkorange",lwd = 1)
89 | #   points(1:numiter,y,pch = 20,col = "darkorange")
90 | #
91 | 


--------------------------------------------------------------------------------
/inst/code/simulate_data_for_sfa.R:
--------------------------------------------------------------------------------
 1 | library(R.matlab)
 2 | set.seed(1)
 3 | n   <- 400
 4 | m   <- 1000
 5 | k   <- 6
 6 | dat <- simulate_multinom_gene_data(n,m,k)
 7 | X   <- dat$X
 8 | L   <- dat$L
 9 | writeMat("sim_multinom.mat",X=X,L=L)
10 | 


--------------------------------------------------------------------------------
/inst/code/test_hpd.R:
--------------------------------------------------------------------------------
1 | set.seed(1)
2 | n <- 1e4
3 | x <- rnorm(n)
4 | print(hpd(x))
5 | 


--------------------------------------------------------------------------------
/inst/code/test_poisson_fit.R:
--------------------------------------------------------------------------------
 1 | # Verify the Poisson model computations on a small example with k = 2.
 2 | library(ggplot2)
 3 | library(cowplot)
 4 | 
 5 | # Simulate a Poisson data set.
 6 | set.seed(1)
 7 | n  <- 200
 8 | f1 <- 0.1
 9 | f2 <- 1
10 | s  <- sample(10,n,replace = TRUE)
11 | q  <- runif(n)
12 | u  <- (1-q)*f1 + q*f2
13 | x  <- rpois(n,s*u)
14 | 
15 | # Fit the generalized linear model.
16 | control <- glm.control(epsilon = 1e-10, maxit = 100)
17 | L   <- cbind(s*(1-q),s*q)
18 | dat <- data.frame(x = x,f1 = L[,1],f2 = L[,2])
19 | fit <- glm(x ~ f1 + f2 - 1,family = poisson(link = "identity"),
20 |            data = dat,start = c(0.5,0.5),control = control)
21 | print(log(coef(fit)))
22 | 
23 | # Fit the model parameters using glm with family = poisson(link =
24 | # "identity").
25 | out <- fit_poisson_glm(x,L)
26 | print(log(out$coef))
27 | 
28 | # Compute the covariance of log(f).
29 | cat("Cov(log(f)) estimated via Laplace approximation:\n")
30 | print(compute_poisson_covariance(x,L,out$coef))
31 | 
32 | # Draw samples from the posterior using random-walk Metropolis.
33 | ns  <- 1e5
34 | D   <- matrix(rnorm(2*ns),ns,2)
35 | U   <- matrix(runif(2*ns),ns,2)
36 | M   <- matrix(sample(2,2*ns,replace = TRUE),ns,2) - 1
37 | sim <- simulate_posterior_poisson_rcpp(x,L,out$coef,D,U,M,0.3,1e-15)
38 | cat("Acceptance rates:\n")
39 | print(drop(sim$ar))
40 | 
41 | cat("MCMC estimate of Cov(log(f)):\n")
42 | print(cov(sim$samples))
43 | 
44 | # Get 90% HPD intervals.
45 | cat("MCMC estimates of 90% HPD intervals:\n")
46 | print(hpd(sim$samples[,1],0.9))
47 | print(hpd(sim$samples[,2],0.9))
48 | 
49 | # Plot the likelihood surface.
50 | dat     <- expand.grid(t1 = seq(-4,1,0.05),t2 = seq(-4,1,0.02))
51 | n       <- nrow(dat)
52 | dat$lik <- 0
53 | for (i in 1:n) {
54 |   f <- exp(c(dat[i,1],dat[i,2]))
55 |   u <- drop(L %*% f)
56 |   dat[i,"lik"] <- sum(dpois(x,u,log = TRUE))
57 | }
58 | dat$lik <- exp(dat$lik - max(dat$lik))
59 | p1 <- ggplot(dat,aes(x = t1,y = t2,z = lik)) +
60 |   geom_contour(color = "black",bins = 16) +
61 |   geom_point(data = as.data.frame(t(log(out$coef))),
62 |              mapping = aes(x = f1,y = f2),
63 |              color = "red",shape = 4,
64 |              inherit.aes = FALSE) +
65 |   labs(x = "log(f1)",y = "log(f2)") +
66 |   theme_cowplot(font_size = 10)
67 | 
68 | # Plot the MCMC density estimate.
69 | sim$samples <- as.data.frame(sim$samples)
70 | names(sim$samples) <- c("k1","k2")
71 | p2 <- ggplot(sim$samples,aes(x = k1,y = k2)) +
72 |   geom_density_2d(color = "black") +
73 |   geom_point(data = as.data.frame(t(log(out$coef))),
74 |              mapping = aes(x = f1,y = f2),
75 |              color = "red",shape = 4,
76 |              inherit.aes = FALSE) +
77 |   labs(x = "log(f1)",y = "log(f2)") +
78 |   theme_cowplot(font_size = 10)
79 | print(plot_grid(p1,p2))
80 | 


--------------------------------------------------------------------------------
/inst/code/test_poisson_fit_basic.R:
--------------------------------------------------------------------------------
 1 | # Simulate binomial data, x ~ binom(s*p0).
 2 | set.seed(1)
 3 | n  <- 50
 4 | s  <- ceiling(10*runif(n))
 5 | p0 <- 0.05
 6 | q  <- runif(n)
 7 | x  <- rbinom(n,s,p0)
 8 | 
 9 | # Fit the basic Poisson model x ~ Pois(s*f0) using glm.
10 | fit <- glm(x ~ f0 - 1,family = poisson(link = "identity"),
11 |            data = data.frame(x = x,f0 = s),start = 0.5,
12 |            control = glm.control(epsilon = 1e-10, maxit = 100))
13 | 
14 | # Compute the MLE of f0.
15 | f0 <- sum(x)/sum(s)
16 | 
17 | # The glm estimate should be the same as f0.
18 | cat(coef(fit),f0,"\n")
19 | 
20 | # Compute the s.e. of log(f0) using the Laplace approximation.
21 | se <- 1/sqrt(f0*sum(s))
22 | 
23 | # Compute the s.e. of log(f0) using numerical integration.
24 | ns <- 1000
25 | t  <- seq(-6,0,length.out = ns)
26 | w  <- rep(0,ns)
27 | for (i in 1:ns)
28 |   w[i] <- sum(dpois(x,s*exp(t[i]),log = TRUE))
29 | w  <- exp(w - max(w))
30 | w  <- w/sum(w)
31 | mu <- sum(w*t)
32 | se_mc <- sqrt(sum(w*(t - mu)^2))
33 | 
34 | # The two s.e. calculations should be pretty close.
35 | cat(se,se_mc,"\n")
36 | 


--------------------------------------------------------------------------------
/inst/datafiles/newsgroups.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/fastTopics/3a35d33d3e5ea0314d11965a1700e66035f1d18d/inst/datafiles/newsgroups.RData


--------------------------------------------------------------------------------
/inst/datafiles/newsgroups_topics.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenslab/fastTopics/3a35d33d3e5ea0314d11965a1700e66035f1d18d/inst/datafiles/newsgroups_topics.RData


--------------------------------------------------------------------------------
/man/annotation_heatmap.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/annotation_heatmap.R
 3 | \name{annotation_heatmap}
 4 | \alias{annotation_heatmap}
 5 | \title{Annotation Heatmap}
 6 | \usage{
 7 | annotation_heatmap(
 8 |   effects_matrix,
 9 |   select_features = c("largest", "distinctive", "both", "all"),
10 |   feature_sign = c("both", "positive", "negative"),
11 |   dims = colnames(effects_matrix),
12 |   compare_dims = colnames(effects_matrix),
13 |   n = 2,
14 |   show_dims = colnames(effects_matrix),
15 |   zero_value = 0.01,
16 |   font_size = 10,
17 |   verbose = TRUE
18 | )
19 | }
20 | \arguments{
21 | \item{effects_matrix}{n x d numeric matrix, where n is the number
22 | of features and d is the number of dimensions. This could be for
23 | example the word frequencies matrix \code{F} from a multinomial
24 | topic model fitted using \code{\link{fit_topic_model}}. The row
25 | and columns of this matrix must be named, otherwise this function
26 | will throw and error.}
27 | 
28 | \item{select_features}{This may be a character vector specifying
29 | the features to plot (rows of the effects matrix). Or it may be one
30 | of the following: \code{"largest"}, which automatically selects the
31 | largest effects for each chosen dimension; \code{"distinctive"},
32 | which automatically selects the \dQuote{most distinctive} effects
33 | for each chosen dimension; or \code{"both"}, which uses both
34 | criteria to select features. Distinctive features are defined as
35 | rows of the effects matrix that are much larger in magnitude than
36 | the effects in the other dimensions that also share the same sign.}
37 | 
38 | \item{feature_sign}{For automated selection of features, this
39 | option determines whether to consider positive effects only
40 | (\code{"positive"}), negative effects only (\code{"negative"}), or
41 | both (\code{"both"}).}
42 | 
43 | \item{dims}{The dimensions (columns of the effect matrix) to
44 | consider for automatic feature selection. This should be dimension
45 | names (not numbers).}
46 | 
47 | \item{compare_dims}{This should be dimension names (not numbers).}
48 | 
49 | \item{n}{For automated feature selection, the number of features to
50 | select of each type and for each dimension. (see arguments
51 | \code{select_features} and \code{feature_sign}).}
52 | 
53 | \item{show_dims}{The dimensions (columns) to include in the
54 | plot. This should be dimension names (not numbers).}
55 | 
56 | \item{zero_value}{Numbers smaller than \code{zero_value} (in
57 | magnitude) are not shown in the plot.}
58 | 
59 | \item{font_size}{Specifies the font size for the plot.}
60 | 
61 | \item{verbose}{When \code{verbose = TRUE}, the list of selected
62 | features (rows) is printed.}
63 | }
64 | \value{
65 | A \code{ggplot} object.
66 | }
67 | \description{
68 | This is a generic plotting utility (not specific to 
69 | topic the model) for comparing \dQuote{effects} across multiple
70 | dimensions (e.g., topics). The function has several options for
71 | selecting the features to compare.
72 | }
73 | \examples{
74 | data(newsgroups)
75 | p1 <- annotation_heatmap(newsgroups$F,feature_sign = "positive")
76 | 
77 | }
78 | 


--------------------------------------------------------------------------------
/man/compare_fits.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/summary.R
 3 | \name{compare_fits}
 4 | \alias{compare_fits}
 5 | \title{Summarize and Compare Model Fits}
 6 | \usage{
 7 | compare_fits(fits)
 8 | }
 9 | \arguments{
10 | \item{fits}{An object of class \code{"poisson_nmf_fit"} or
11 | \code{"multinom_topic_model_fit"}, or a non-empty, named list in
12 | which all list elements are Poisson NMF model fits or all
13 | multinomial topic model fits.}
14 | }
15 | \value{
16 | A data frame with one row per element of \code{fits}, and
17 | with the following columns:
18 | 
19 | \item{k}{The rank of the matrix factorization.}
20 | 
21 | \item{loglik}{The log-likelihood (either Poisson NMF or multinomial topic
22 |   model likelihood) achieved at the last model fitting update.}
23 | 
24 | \item{dev}{For Poisson NMF model fits only, the deviance achieved
25 |   at the last model fitting update.}
26 | 
27 | \item{res}{The maximum residual of the Karush-Kuhn-Tucker (KKT)
28 |   system achieved at the last model fitting update; small values
29 |   indicate that the solution is close to a local maximum, or
30 |   stationary point, of the likelihood.}
31 | 
32 | \item{loglik.diff}{The improvement in the log-likelihood relative
33 |   to the model fit with the smallest log-likelihood.}
34 | 
35 | \item{dev.diff}{The improvement in the deviance relative to the
36 |   model fit with the largest deviance.}
37 | 
38 | \item{nonzeros.f}{The rate of nonzeros in the factors matrix, as
39 |   determined by \code{control$zero.threshold}.}
40 | 
41 | \item{nonzeros.l}{The rate of nonzeros in the loadings matrix, as
42 |   determined by \code{control$zero.threshold}.}
43 | 
44 | \item{numiter}{The number of loadings and/or factor updates
45 |   performed.}
46 | 
47 | \item{runtime}{The total runtime (in s) of the model fitting
48 |   updates.}
49 | }
50 | \description{
51 | Create a table summarizing the results of fitting one
52 |   or more Poisson non-negative matrix factorizations or multinomial
53 |   topic models.
54 | }
55 | \seealso{
56 | \code{\link{fit_poisson_nmf}},
57 |   \code{\link{fit_topic_model}}
58 | }
59 | 


--------------------------------------------------------------------------------
/man/fit_multinom_model.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/fit_multinom_model.R
 3 | \name{fit_multinom_model}
 4 | \alias{fit_multinom_model}
 5 | \title{Fit Simple Multinomial Model}
 6 | \usage{
 7 | fit_multinom_model(cluster, X, verbose = c("none", "detailed"), ...)
 8 | }
 9 | \arguments{
10 | \item{cluster}{A factor specifying a grouping, or clustering, of
11 | the rows of \code{X}; e.g., the \dQuote{cluster} output from
12 | \code{\link[stats]{kmeans}}.}
13 | 
14 | \item{X}{The n x m matrix of counts; all entries of X should be
15 | non-negative. It can be a sparse matrix (class \code{"dgCMatrix"})
16 | or dense matrix (class \code{"matrix"}), with some exceptions (see
17 | \sQuote{Details}).}
18 | 
19 | \item{verbose}{This is passed as the \dQuote{verbose} argument in
20 | the call to \code{\link{init_poisson_nmf}}.}
21 | 
22 | \item{\dots}{Additional arguments passed to
23 | \code{\link{init_poisson_nmf}}.}
24 | }
25 | \value{
26 | A multinomial topic model fit.
27 | }
28 | \description{
29 | Fit a simple multinomial model for count data, in
30 |   which each sample (\emph{i.e.}, a row of the data matrix \code{X})
31 |   is assigned to a cluster. Under this simple multinomial model,
32 |   \eqn{x_{ij}} assigned to cluster \eqn{k} is multinomial with sample
33 |   size \eqn{s_i = x_{i1} + ... + x_{im}} and multinomial
34 |   probabilities \eqn{p_{1k}, ..., p_{mk}}. This is a special case of
35 |   the multinomial topic model in which all the mixture proportions
36 |   are either 0 or 1. The maximum-likelihood estimates (MLEs) of the
37 |   multinomial probabilities have a closed-form solution; no
38 |   iterative algorithm is needed to fit this simple model.
39 | }
40 | \seealso{
41 | \code{\link{fit_topic_model}}
42 | }
43 | 


--------------------------------------------------------------------------------
/man/likelihood.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/likelihood.R
 3 | \name{loglik_poisson_nmf}
 4 | \alias{loglik_poisson_nmf}
 5 | \alias{loglik_multinom_topic_model}
 6 | \alias{deviance_poisson_nmf}
 7 | \alias{cost}
 8 | \title{NMF and Topic Model Likelihoods and Deviances}
 9 | \usage{
10 | loglik_poisson_nmf(X, fit, e = 1e-08)
11 | 
12 | loglik_multinom_topic_model(X, fit, e = 1e-08)
13 | 
14 | deviance_poisson_nmf(X, fit, e = 1e-08)
15 | 
16 | cost(X, A, B, e = 1e-08, family = c("poisson", "multinom"), version)
17 | }
18 | \arguments{
19 | \item{X}{The n x m matrix of counts or pseudocounts. It can be a
20 | sparse matrix (class \code{"dgCMatrix"}) or dense matrix (class
21 | \code{"matrix"}).}
22 | 
23 | \item{fit}{A Poisson NMF or multinomial topic model fit, such as an
24 | output from \code{\link{fit_poisson_nmf}} or
25 | \code{\link{fit_topic_model}}.}
26 | 
27 | \item{e}{A small, non-negative number added to the terms inside the
28 | logarithms to avoid computing logarithms of zero. This prevents
29 | numerical problems at the cost of introducing a very small
30 | inaccuracy in the computation.}
31 | 
32 | \item{A}{The n x k matrix of loadings. It should be a dense matrix.}
33 | 
34 | \item{B}{The k x m matrix of factors. It should be a dense matrix.}
35 | 
36 | \item{family}{If \code{model = "poisson"}, the loss function values
37 | corresponding to the Poisson non-negative matrix factorization are
38 | computed; if \code{model = "multinom"}, the multinomial topic model
39 | loss function values are returned.}
40 | 
41 | \item{version}{When \code{version == "R"}, the computations are
42 | performed entirely in R; when \code{version == "Rcpp"}, an Rcpp
43 | implementation is used. The R version is typically faster when
44 | \code{X} is a dense matrix, whereas the Rcpp version is faster and
45 | more memory-efficient when \code{X} is a large, sparse matrix. When
46 | not specified, the most suitable version is called depending on
47 | whether \code{X} is dense or sparse.}
48 | }
49 | \value{
50 | A numeric vector with one entry per row of \code{X}.
51 | }
52 | \description{
53 | Compute log-likelihoods and deviances for assessing
54 |   fit of a topic model or a non-negative matrix factorization (NMF).
55 | }
56 | \details{
57 | Function \code{cost} computes loss functions proportional
58 |   to the negative log-likelihoods, and is mainly for internal use to
59 |   quickly compute log-likelihoods and deviances; it should not be
60 |   used directly unless you know what you are doing. In particular,
61 |   little argument checking is performed by \code{cost}.
62 | }
63 | \examples{
64 | 
65 | # Generate a small counts matrix.
66 | set.seed(1)
67 | out <- simulate_count_data(10,20,3)
68 | X   <- out$X
69 | fit <- out[c("F","L")]
70 | class(fit) <- c("poisson_nmf_fit","list")
71 | 
72 | # Compute the Poisson log-likelihoods and deviances.
73 | data.frame(loglik   = loglik_poisson_nmf(X,fit),
74 |            deviance = deviance_poisson_nmf(X,fit))
75 | 
76 | # Compute multinomial log-likelihoods.
77 | loglik_multinom_topic_model(X,fit)
78 | 
79 | }
80 | 


--------------------------------------------------------------------------------
/man/loadings_plot.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/other_plots.R
 3 | \name{loadings_plot}
 4 | \alias{loadings_plot}
 5 | \alias{loadings_plot_ggplot_call}
 6 | \title{Loadings Plot}
 7 | \usage{
 8 | loadings_plot(
 9 |   fit,
10 |   x,
11 |   k,
12 |   ggplot_call = loadings_plot_ggplot_call,
13 |   plot_grid_call = function(plots) do.call(plot_grid, plots)
14 | )
15 | 
16 | loadings_plot_ggplot_call(dat, topic.label, font.size = 9)
17 | }
18 | \arguments{
19 | \item{fit}{An object of class \dQuote{poisson_nmf_fit} or
20 | \dQuote{multinom_topic_model_fit}.}
21 | 
22 | \item{x}{A categorical variable represented as a
23 | \code{\link{factor}}. It should have the same number of elements as
24 | the number of rows in \code{fit$L}.}
25 | 
26 | \item{k}{The topic, or topics, selected by number or name. When not
27 | specified, all topics are plotted.}
28 | 
29 | \item{ggplot_call}{The function used to create the plot. Replace
30 | \code{loadings_plot_ggplot_call} with your own function to
31 | customize the appearance of the plot.}
32 | 
33 | \item{plot_grid_call}{When multiple topics are selected, this is
34 | the function used to arrange the plots into a grid using
35 | \code{\link[cowplot]{plot_grid}}. It should be a function accepting
36 | a single argument, \code{plots}, a list of \code{ggplot} objects.}
37 | 
38 | \item{dat}{A data frame passed as input to
39 | \code{\link[ggplot2]{ggplot}}, containing, at a minimum, columns
40 | \dQuote{x} and \dQuote{loading}.}
41 | 
42 | \item{topic.label}{The name or number of the topic being plotted.
43 | Only used to determine the plot title.}
44 | 
45 | \item{font.size}{Font size used in plot.}
46 | }
47 | \value{
48 | A \code{ggplot} object.
49 | }
50 | \description{
51 | Generate one or more barcharts to visualize the
52 |   relationship between the loadings or mixture proportions and a
53 |   selected categorical variable (a factor).
54 | }
55 | \details{
56 | This is a lightweight interface primarily intended to
57 |   expedite creation of boxplots for investigating relationships
58 |   between topics and a categorical variables of interest without
59 |   having to spend a great deal of time worrying about the plotting
60 |   settings; most of the \dQuote{heavy lifting} is done by
61 |   \sQuote{ggplot2} (specifically, function
62 |   \code{\link[ggplot2]{geom_boxplot}} in the \sQuote{ggplot2}
63 |   package). For more control over the plot's appearance, the plot can
64 |   be customized by modifying the \code{ggplot_call} and
65 |   \code{plot_grid_call} arguments.
66 | }
67 | 


--------------------------------------------------------------------------------
/man/merge_topics.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/merge_topics.R
 3 | \name{merge_topics}
 4 | \alias{merge_topics}
 5 | \title{Combine Topics in Multinomial Topic Model}
 6 | \usage{
 7 | merge_topics(fit, k)
 8 | }
 9 | \arguments{
10 | \item{fit}{A multinomial topic model fit.}
11 | 
12 | \item{k}{The names or numbers of the topics to be combined. Two or
13 | more topics should be chosen.}
14 | }
15 | \value{
16 | A multinomial topic model fit.
17 | }
18 | \description{
19 | Combine two or more topics in a multinomial topic
20 |   model fit.
21 | }
22 | \details{
23 | Mixture proportions are combined by summation, and factors
24 |   are combined by averaging.
25 | }
26 | 


--------------------------------------------------------------------------------
/man/multinom2poisson.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/multinom2poisson.R
 3 | \name{multinom2poisson}
 4 | \alias{multinom2poisson}
 5 | \title{Recover Poisson NMF Fit from Multinomial Topic Model Fit}
 6 | \usage{
 7 | multinom2poisson(fit, X)
 8 | }
 9 | \arguments{
10 | \item{fit}{An object of class \dQuote{multinom_topic_model_fit},
11 | such as an output from \code{poisson2multinom}. If a Poisson NMF
12 | fit is provided (that is, an object of class
13 | \dQuote{poisson_nmf_fit}), the fit object is immediately returned
14 | \dQuote{as is}.}
15 | 
16 | \item{X}{Optional n x m matrix of counts, or pseudocounts. It can
17 | be a sparse matrix (class \code{"dgCMatrix"}) or dense matrix
18 | (class \code{"matrix"}). This only needs to be provided if the
19 | document sizes \code{fit$s} are not available.}
20 | }
21 | \value{
22 | The return value is the list \code{fit}, in which matrices
23 |   \code{fit$F} and \code{fit$L} specify the factors and loadings in
24 |   the Poisson non-negative matrix factorization; specifically,
25 |   the counts matrix is modeled by the low-rank matrix product
26 |   \code{tcrossprod(fit$L,fit$F)}.
27 | }
28 | \description{
29 | This function recovers parameter estimates of the
30 |   Poisson non-negative matrix factorization (NMF) given parameter
31 |   estimates for a multinomial topic model.
32 | }
33 | 


--------------------------------------------------------------------------------
/man/newsgroups.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/newsgroups.R
 3 | \docType{data}
 4 | \name{newsgroups}
 5 | \alias{newsgroups}
 6 | \title{Topic modeling results from the \dQuote{20 Newsgroups} data
 7 |   set.}
 8 | \format{
 9 | \code{newsgroups} is a list with the following elements:
10 | 
11 | \describe{
12 | 
13 |   \item{topics}{Original labeling of the documents: each document
14 |     is from one of 20 \dQuote{newsgroups}.}
15 | 
16 |   \item{L}{Estimated topic proportions matrix; rows are
17 |     documents and columns are topics.}
18 | 
19 |   \item{F}{Matrix containing posterior mean estimates of log-fold
20 |   changes (in base-2 logarithm). These were computed using
21 |   \code{\link{de_analysis}} with \code{lfc.stat = "vsnull"}. Columns
22 |   are words and columns are topics.}}
23 | }
24 | \description{
25 | These are topic modeling results from the \dQuote{20
26 | Newsgroups} data, with k = 10 topics. The data were originally
27 | downloaded from \url{http://qwone.com/~jason/20Newsgroups} and
28 | prepared by running code that found in an R Markdown file in this
29 | GitHub repository:
30 | \url{https://github.com/stephenslab/fastTopics-experiments}. See
31 | the \dQuote{inst} directory of this package for the scripts used to
32 | generate these results.
33 | }
34 | \examples{
35 | data(newsgroups)
36 | table(newsgroups$topics)
37 | dim(newsgroups$L)
38 | dim(newsgroups$F)
39 | 
40 | }
41 | \keyword{data}
42 | 


--------------------------------------------------------------------------------
/man/pbmc_facs.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/pbmc_facs.R
 3 | \docType{data}
 4 | \name{pbmc_facs}
 5 | \alias{pbmc_facs}
 6 | \title{Mixture of 10 FACS-purified PBMC Single-Cell RNA-seq data}
 7 | \format{
 8 | \code{pbmc_facs} is a list with the following elements:
 9 | 
10 | \describe{
11 | 
12 |   \item{counts}{3,774 x 16,791 sparse matrix of UMI counts, with
13 |      rows corresponding to samples (cells) and columns corresponding to
14 |      genes. It is an object of class \code{"dgCMatrix"}).}
15 | 
16 |   \item{counts_test}{UMI counts for an additional test set of 100
17 |     cells.}
18 | 
19 |   \item{samples}{Data frame containing information about the
20 |     samples, including cell barcode and source FACS population
21 |     (\dQuote{celltype} and \dQuote{facs_subpop}).}
22 | 
23 |   \item{samples_test}{Sample information for the additional test
24 |      set of 100 cells.}
25 | 
26 |   \item{genes}{Data frame containing information and the genes,
27 |     including gene symbol and Ensembl identifier.}
28 | 
29 |   \item{fit}{Poisson non-negative matrix factorization (NMF) fitted
30 |     to the UMI count data \code{counts}, with rank \code{k = 6}. See
31 |     the vignette how the Poisson NMF model fitting was performed.}}
32 | 
33 | \url{https://www.10xgenomics.com/resources/datasets}
34 | }
35 | \description{
36 | These data are a selection of the reference
37 |   transcriptome profiles generated via single-cell RNA sequencing
38 |   (RNA-seq) of 10 bead-enriched subpopulations of PBMCs (Donor A),
39 |   described in Zheng \emph{et al} (2017). The data are unique
40 |   molecular identifier (UMI) counts for 16,791 genes in 3,774 cells.
41 |   (Genes with no expression in any of the cells were removed.) Since
42 |   the majority of the UMI counts are zero, they are efficiently
43 |   stored as a 3,774 x 16,791 sparse matrix. These data are used in
44 |   the vignette illustrating how 'fastTopics' can be used to analyze to
45 |   single-cell RNA-seq data. Data for a separate set of 1,000 cells is
46 |   provided as a \dQuote{test set} to evaluate out-of-sample predictions.
47 | }
48 | \examples{
49 | library(Matrix)
50 | data(pbmc_facs)
51 | cat(sprintf("Number of cells: \%d\n",nrow(pbmc_facs$counts)))
52 | cat(sprintf("Number of genes: \%d\n",ncol(pbmc_facs$counts)))
53 | cat(sprintf("Proportion of counts that are non-zero: \%0.1f\%\%.\n",
54 |             100*mean(pbmc_facs$counts > 0)))
55 | 
56 | }
57 | \references{
58 | G. X. Y. Zheng \emph{et al} (2017). Massively parallel digital
59 | transcriptional profiling of single cells. \emph{Nature Communications}
60 | \bold{8}, 14049. \doi{10.1038/ncomms14049}
61 | }
62 | \keyword{data}
63 | 


--------------------------------------------------------------------------------
/man/plot_loglik_vs_rank.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/other_plots.R
 3 | \name{plot_loglik_vs_rank}
 4 | \alias{plot_loglik_vs_rank}
 5 | \alias{loglik_vs_rank_ggplot_call}
 6 | \title{Plot Log-Likelihood Versus Rank}
 7 | \usage{
 8 | plot_loglik_vs_rank(fits, ggplot_call = loglik_vs_rank_ggplot_call)
 9 | 
10 | loglik_vs_rank_ggplot_call(dat, font.size = 9)
11 | }
12 | \arguments{
13 | \item{fits}{A list with 2 more list elements, in which each list
14 | element is an object of class \code{"poisson_nmf_fit"} or
15 | \code{"multinom_topic_model_fit"}. If two or more fits share the
16 | same rank, or number of topics, the largest log-likelihood is
17 | plotted.}
18 | 
19 | \item{ggplot_call}{The function used to create the plot. Replace
20 | \code{loglik_vs_rank_ggplot_call} with your own function to
21 | customize the appearance of the plot.}
22 | 
23 | \item{dat}{A data frame passed as input to
24 | \code{\link[ggplot2]{ggplot}}, containing, at a minimum, columns
25 | \dQuote{x} and \dQuote{y}.}
26 | 
27 | \item{font.size}{Font size used in plot.}
28 | }
29 | \value{
30 | A \code{ggplot} object.
31 | }
32 | \description{
33 | Create a plot showing the improvement in the
34 |   log-likelihood as the rank of the matrix factorization or the
35 |   number of topics (\dQuote{k}) increases.
36 | }
37 | 


--------------------------------------------------------------------------------
/man/plot_progress.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/other_plots.R
 3 | \name{plot_progress}
 4 | \alias{plot_progress}
 5 | \title{Plot Progress of Model Fitting Over Time}
 6 | \usage{
 7 | plot_progress(
 8 |   fits,
 9 |   x = c("timing", "iter"),
10 |   y = c("loglik", "dev", "res"),
11 |   add.point.every = 20,
12 |   colors = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7"),
13 |   linetypes = "solid",
14 |   linesizes = 0.5,
15 |   shapes = 19,
16 |   fills = "white",
17 |   e = 0.01,
18 |   theme = function() theme_cowplot(12)
19 | )
20 | }
21 | \arguments{
22 | \item{fits}{An object of class \code{"poisson_nmf_fit"} or
23 | \code{"multinom_topic_model_fit"}, or a non-empty, named list in
24 | which each all list elements are objects of class
25 | \code{"poisson_nmf_fit"} or all objects of class
26 | \code{"multinom_topic_model_fit"}.}
27 | 
28 | \item{x}{Choose \code{"timing"} to plot improvement in the solution
29 | over time, or choose \code{"iter"} to plot improvement in the
30 | solution per iteration.}
31 | 
32 | \item{y}{Column of the "progress" data frame used to assess
33 | progress of the Poisson NMF optimization method(s). Should be one
34 | of \code{"loglik"} (Poisson NMF or multinomial topic model
35 | log-likelihood), \code{"dev"} (deviance) or \code{"res"} (maximum
36 | residual of KKT conditions). The deviance is only valid for Poisson
37 | NMF model fits.}
38 | 
39 | \item{add.point.every}{A positive integer giving the iteration
40 | interval for drawing points on the progress curves. Set to
41 | \code{Inf} to prevent points from being drawn on the plot.}
42 | 
43 | \item{colors}{Colours used to draw progress curves; passed as the
44 | \code{values} input to \code{\link[ggplot2]{scale_color_manual}}.
45 | If fewer colours than "fits" are given, the colours are recycled.}
46 | 
47 | \item{linetypes}{Line types used to draw progress curves; passed as
48 | the \code{values} input to \code{\link[ggplot2]{scale_linetype_manual}}.
49 | If fewer line types than \dQuote{fits} are given, the line types are
50 | recycled.}
51 | 
52 | \item{linesizes}{Line sizes used to draw progress curves; passed as
53 | the \code{values} input to \code{\link[ggplot2]{scale_size_manual}}.
54 | If fewer line sizes than \dQuote{fits} are given, the line sizes are
55 |  recycled.}
56 | 
57 | \item{shapes}{Shapes used to draw points at the selected
58 | iterations; passed as the \code{values} input to
59 | \code{\link[ggplot2]{scale_shape_manual}}. If fewer shapes than
60 | \dQuote{fits} are given, the shapes are recycled.}
61 | 
62 | \item{fills}{Fill colours used to draw points at the selected
63 | iterations; passed as the \code{values} input to
64 | \code{\link[ggplot2]{scale_fill_manual}}. If fewer fill colours
65 | than \dQuote{fits} are given, the fill colours are recycled.}
66 | 
67 | \item{e}{A small, positive number added to the vertical axis (for
68 | \code{y = "loglik"} and \code{y = "dev"} only) so that the
69 | logarithmic scale does not over-emphasize very small differences.}
70 | 
71 | \item{theme}{The \sQuote{ggplot2} \dQuote{theme}.}
72 | }
73 | \value{
74 | A \code{ggplot} object.
75 | }
76 | \description{
77 | Create a plot showing improvement in one or more
78 |   Poisson NMF or multinomial topic model fits over time.
79 | }
80 | \details{
81 | The horizontal axis shows the recorded runtime (in s), and
82 | the vertical axis shows some quantity measuring the quality of the
83 | fit: the log-likelihood, deviance or maximum residual of the
84 | Karush-Kuhn-Tucker (KKT) first-order optimality conditions. To
85 | better visualize log-likelihoods and deviances, log-likelihood and
86 | deviance differences are shown on the logarithmic scale.
87 | Differences are calculated with respect to the best value achieved
88 | over all the fits compared.
89 | 
90 | Note that only minimal argument checking is performed.
91 | }
92 | \seealso{
93 | \code{\link{fit_poisson_nmf}}
94 | }
95 | 


--------------------------------------------------------------------------------
/man/poisson2multinom.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/poisson2multinom.R
 3 | \name{poisson2multinom}
 4 | \alias{poisson2multinom}
 5 | \title{Recover Multinomial Topic Model Fit from Poisson NMF fit}
 6 | \usage{
 7 | poisson2multinom(fit)
 8 | }
 9 | \arguments{
10 | \item{fit}{An object of class \dQuote{poisson_nmf_fit}, such as an
11 | output from \code{fit_poisson_nmf}. It does not make sense for a
12 | multinomial topic model to have less than two topics, so an error
13 | will be reported when k < 2, where k is the rank of the matrix
14 | factorization. If a multinomial topic model fit is provided (that
15 | is, an object of class \dQuote{multinom_topic_model_fit}), the fit
16 | object is immediately returned \dQuote{as is}.}
17 | }
18 | \value{
19 | The return value is the list \code{fit}, in which
20 |   \code{fit$F} and \code{fit$L} are the parameters of the multinomial
21 |   topic model; specifically, \code{fit$L[i,]} gives the topic
22 |   probabilities for sample or document i, and \code{fit$F[,k]} gives
23 |   the term probabilities for topic k. An additional vector
24 |   \code{fit$s} of length n is returned giving the "size factors".
25 | }
26 | \description{
27 | This function recovers parameter estimates of the
28 |   multinomial topic model given parameter estimates for a Poisson
29 |   non-negative matrix factorization (NMF).
30 | }
31 | 


--------------------------------------------------------------------------------
/man/predict.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/predict.R
  3 | \name{predict.poisson_nmf_fit}
  4 | \alias{predict.poisson_nmf_fit}
  5 | \alias{predict.multinom_topic_model_fit}
  6 | \title{Predict Methods for Poisson NMF and Multinomial Topic Model}
  7 | \usage{
  8 | \method{predict}{poisson_nmf_fit}(object, newdata, numiter = 20, ...)
  9 | 
 10 | \method{predict}{multinom_topic_model_fit}(object, newdata, numiter = 20, ...)
 11 | }
 12 | \arguments{
 13 | \item{object}{An object of class \dQuote{poisson_nmf_fit} or
 14 | \dQuote{multinom_topic_model_fit}.}
 15 | 
 16 | \item{newdata}{An optional counts matrix. If omitted, the loadings
 17 | estimated in the original data are returned.}
 18 | 
 19 | \item{numiter}{The number of updates to perform.}
 20 | 
 21 | \item{\dots}{Additional arguments passed to
 22 | \code{\link{fit_poisson_nmf}}.}
 23 | }
 24 | \value{
 25 | A loadings matrix with one row for each data point and one
 26 |   column for each topic or factor. For
 27 |   \code{predict.multinom_topic_model_fit}, the output can also be
 28 |   interpreted as a matrix of estimated topic proportions, in which
 29 |   \code{L[i,j]} is the proportional contribution of topic j to data
 30 |   point i.
 31 | }
 32 | \description{
 33 | Predict loadings based on previously fit Poisson NMF,
 34 |   or predict topic proportions based on previously fit multinomial
 35 |   topic model. This can be thought of as projecting data points onto
 36 |   a previously estimated set of factors \code{fit$F}.
 37 | }
 38 | \examples{
 39 | \donttest{
 40 | # Simulate a 175 x 1,200 counts matrix.
 41 | set.seed(1)
 42 | dat <- simulate_count_data(175,1200,k = 3)
 43 | 
 44 | # Split the data into training and test sets.
 45 | train <- dat$X[1:100,]
 46 | test <- dat$X[101:175,]
 47 | 
 48 | # Fit a Poisson non-negative matrix factorization using the
 49 | # training data.
 50 | fit <- init_poisson_nmf(train,F = dat$F,init.method = "random")
 51 | fit <- fit_poisson_nmf(train,fit0 = fit)
 52 | 
 53 | # Compare the estimated loadings in the training data against the
 54 | # loadings used to simulate these data.
 55 | Ltrain <- predict(fit)
 56 | plot(dat$L[1:100,],Ltrain,pch = 20,col = "darkblue")
 57 | abline(a = 0,b = 1,col = "magenta",lty = "dotted",
 58 |        xlab = "true",ylab = "estimated")
 59 | 
 60 | # Next, predict loadings in unseen (test) data points, and compare
 61 | # these predictions against the loadings that were used to simulate
 62 | # the test data.
 63 | Ltest <- predict(fit,test)
 64 | plot(dat$L[101:175,],Ltest,pch = 20,col = "darkblue",
 65 |      xlab = "true",ylab = "estimated")
 66 | abline(a = 0,b = 1,col = "magenta",lty = "dotted")
 67 | 
 68 | # Simulate a 175 x 1,200 counts matrix.
 69 | set.seed(1)
 70 | dat <- simulate_multinom_gene_data(175,1200,k = 3)
 71 | 
 72 | # Split the data into training and test sets.
 73 | train <- dat$X[1:100,]
 74 | test <- dat$X[101:175,]
 75 | 
 76 | # Fit a topic model using the training data.
 77 | fit <- init_poisson_nmf(train,F = dat$F,init.method = "random")
 78 | fit <- fit_poisson_nmf(train,fit0 = fit)
 79 | fit <- poisson2multinom(fit)
 80 | 
 81 | # Compare the estimated topic proportions in the training data against
 82 | # the topic proportions used to simulate these data.
 83 | Ltrain <- predict(fit)
 84 | plot(dat$L[1:100,],Ltrain,pch = 20,col = "darkblue")
 85 | abline(a = 0,b = 1,col = "magenta",lty = "dotted",
 86 |        xlab = "true",ylab = "estimated")
 87 | 
 88 | # Next, predict loadings in unseen (test) data points, and compare
 89 | # these predictions against the loadings that were used to simulate
 90 | # the test data.
 91 | Ltest <- predict(fit,test)
 92 | plot(dat$L[101:175,],Ltest,pch = 20,col = "darkblue",
 93 |      xlab = "true",ylab = "estimated")
 94 | abline(a = 0,b = 1,col = "magenta",lty = "dotted")
 95 | }
 96 | 
 97 | }
 98 | \seealso{
 99 | \code{\link{fit_poisson_nmf}}
100 | }
101 | 


--------------------------------------------------------------------------------
/man/run_homer.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/homer.R
 3 | \name{run_homer}
 4 | \alias{run_homer}
 5 | \title{Perform HOMER Motif Enrichment Analysis using DE Genomic Positions}
 6 | \usage{
 7 | run_homer(
 8 |   de,
 9 |   k,
10 |   positions,
11 |   genome = "hg19",
12 |   subset = function(postmean, lpval, lfsr, rank, quantile) lfsr < 0.05,
13 |   homer.exec = "findMotifsGenome.pl",
14 |   out.dir = tempdir(),
15 |   homer.options = "-len 8,10,12 -size 200 -mis 2 -S 25 -p 1 -h",
16 |   verbose = TRUE
17 | )
18 | }
19 | \arguments{
20 | \item{de}{An object of class \dQuote{topic_model_de_analysis},
21 | usually the result of running \code{\link{de_analysis}}.}
22 | 
23 | \item{k}{Use the DE analysis results for this topic.}
24 | 
25 | \item{positions}{A table of genomic positions corresponding to rows
26 | of the \code{de_analysis} results. Specifically, it should a data
27 | frame with four columns: \dQuote{chr}, chromosome name or number;
28 | \dQuote{start}, start position of genomic feature; \dQuote{end},
29 | end position of genomic feature; and \dQuote{name}, the name of the
30 | genomic feature. If not specified, the genomic positions will be
31 | extracted from the row names of \code{de$postmean}, in which the
32 | row names are expected to be of the form \code{chr_start_end}. The
33 | genomic positions will be written to a BED file (see
34 | \url{https://genome.ucsc.edu/FAQ/FAQformat.html} for more
35 | information about BED files).}
36 | 
37 | \item{genome}{The genome parameter passed to
38 | \code{findMotifsGenome.pl}.}
39 | 
40 | \item{subset}{Describe input argument "subset" here.}
41 | 
42 | \item{homer.exec}{The name or file path of the HOMER
43 | \code{findMotifsGenome.pl} excutable.}
44 | 
45 | \item{out.dir}{The positions BED file and HOMER results are written
46 | to this directory.}
47 | 
48 | \item{homer.options}{Character string used to override default
49 | \code{findMotifsGenome.pl} options.}
50 | 
51 | \item{verbose}{When \code{verbose = TRUE}, progress information is
52 | printed to the console.}
53 | }
54 | \value{
55 | A data frame containing the motif enrichment results. It
56 |   is created from the \code{knownResults.txt} HOMER output.
57 | }
58 | \description{
59 | Run HOMER motif finding algorithm
60 |   (\code{findMotifsGenome.pl}) to identify motifs enriched for
61 |   differentially expressed (DE) genomic positions. See
62 |   \url{http://homer.ucsd.edu} for more information.
63 | }
64 | \references{
65 | Heinz, S., Benner, C., Spann, N., Bertolino, E., Lin, Y. C., Laslo,
66 | P., Cheng, J. X., Murre, C., Singh, H. and Glass, C. K. (2010).
67 | Simple combinations of lineage-determining transcription factors
68 | prime cis-regulatory elements required for macrophage and B cell
69 | identities. \emph{Molecular Cell} \bold{38}, 576-589.
70 | }
71 | 


--------------------------------------------------------------------------------
/man/select_loadings.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/select.R
 3 | \name{select.poisson_nmf_fit}
 4 | \alias{select.poisson_nmf_fit}
 5 | \alias{select}
 6 | \alias{select.multinom_topic_model_fit}
 7 | \alias{select_loadings}
 8 | \title{Extract or Re-order Data Rows in Poisson NMF or Multinomial Topic Model Fit}
 9 | \usage{
10 | \method{select}{poisson_nmf_fit}(.data, loadings, ...)
11 | 
12 | \method{select}{multinom_topic_model_fit}(.data, loadings, ...)
13 | 
14 | select_loadings(.data, loadings, ...)
15 | }
16 | \arguments{
17 | \item{.data}{Poisson NMF or Multinomial Topic Model fit; that is,
18 | an object of class \dQuote{poisson_nmf_fit} or
19 | \dQuote{multinom_topic_model_fit}, such as an output from
20 | \code{\link{fit_poisson_nmf}} or \code{\link{fit_topic_model}}.}
21 | 
22 | \item{loadings}{Indices (names or numbers) giving data rows to
23 | keep. If not specified, all rows are kept.}
24 | 
25 | \item{\dots}{Other arguments passed to the generic select function.}
26 | }
27 | \value{
28 | A Poisson NMF or multinomial topic model fit containing the
29 |   selected data rows only.
30 | }
31 | \description{
32 | This function can be used to extract estimates for a
33 |   subset of the count data, or to re-order the rows of the loadings
34 |   matrix.
35 | }
36 | 


--------------------------------------------------------------------------------
/man/simulate_count_data.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/datasim.R
 3 | \name{simulate_count_data}
 4 | \alias{simulate_count_data}
 5 | \title{Simulate Count Data from Poisson NMF Model}
 6 | \usage{
 7 | simulate_count_data(n, m, k, fmax = 1, lmax = 1, sparse = FALSE)
 8 | }
 9 | \arguments{
10 | \item{n}{Number of rows in simulated count matrix. The number of
11 | rows should be at least 2.}
12 | 
13 | \item{m}{Number of columns in simulated count matrix. The number of
14 | columns should be at least 2.}
15 | 
16 | \item{k}{Number of factors, or \dQuote{topics}, used to determine
17 | Poisson rates. The number of topics should be 1 or more.}
18 | 
19 | \item{fmax}{Factors are drawn uniformly at random between zero and
20 | \code{fmax}.}
21 | 
22 | \item{lmax}{Loadings are drawn uniformly at random between zero and
23 | \code{lmax}.}
24 | 
25 | \item{sparse}{If \code{sparse = TRUE}, convert the counts matrix to
26 | a sparse matrix in compressed, column-oriented format; see
27 | \code{\link[Matrix]{sparseMatrix}}.}
28 | }
29 | \value{
30 | The return value is a list containing the counts matrix
31 |   \code{X} and the factorization, \code{F} and \code{L}, used to
32 |   generate the counts.
33 | }
34 | \description{
35 | Simulate a counts matrix \code{X} such that
36 |   \code{X[i,j]} is Poisson with rate (mean) \code{Y[i,j]}, where
37 |   \code{Y = tcrossprod(L,F)}, \code{L} is an n x k loadings
38 |   (\dQuote{activations}) matrix, and \code{F} is an m x k factors
39 |   (\dQuote{basis vectors}) matrix. The entries of matrix \code{L} are
40 |   drawn uniformly at random between zero and \code{lmax}, and the
41 |   entries of matrix \code{F} are drawn uniformly at random between 0
42 |   and \code{fmax}.
43 | }
44 | \details{
45 | Note that only minimal argument checking is performed. This
46 |   function is mainly used to simulate small data sets for the examples
47 |   and package tests.
48 | }
49 | 


--------------------------------------------------------------------------------
/man/simulate_gene_data.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/datasim.R
 3 | \name{simulate_poisson_gene_data}
 4 | \alias{simulate_poisson_gene_data}
 5 | \alias{simulate_multinom_gene_data}
 6 | \title{Simulate Gene Expression Data from Poisson NMF or Multinomial
 7 |   Topic Model}
 8 | \usage{
 9 | simulate_poisson_gene_data(n, m, k, s, p = 1, sparse = FALSE)
10 | 
11 | simulate_multinom_gene_data(n, m, k, sparse = FALSE)
12 | }
13 | \arguments{
14 | \item{n}{Number of rows in the simulated count matrix. Should be at
15 | least 2.}
16 | 
17 | \item{m}{Number of columns in the simulated count matrix. Should be
18 | at least 2.}
19 | 
20 | \item{k}{Number of factors, or \dQuote{topics}, used to generate
21 | the data. Should be 2 or more.}
22 | 
23 | \item{s}{Vector of \dQuote{size factors}; each row of the loadings
24 | matrix \code{L} is scaled by the entries of \code{s} before
25 | generating the counts. This should be a vector of length n
26 | containing only positive values.}
27 | 
28 | \item{p}{Probability that \code{F[i,j]} is equal to the mean rate.
29 | Smaller values of \code{p} will result in more factors that are the
30 | same across topics.}
31 | 
32 | \item{sparse}{If \code{sparse = TRUE}, convert the counts matrix to
33 | a sparse matrix in compressed, column-oriented format; see
34 | \code{\link[Matrix]{sparseMatrix}}.}
35 | }
36 | \value{
37 | \code{simulate_poisson_gene_data} returns a list containing
38 |   the counts matrix \code{X}, and the size factors \code{s} and
39 |   factorization, \code{F}, \code{L}, used to generate the counts.
40 |   \code{simulate_multinom_gene_data} returns a list containing the
41 |   counts matrix \code{X}, and the mixture proportions \code{L} and
42 |   factors (gene probabilities, or relative gene expression levels)
43 |   \code{F} used to generate the counts.
44 | }
45 | \description{
46 | Simulate count data from a Poisson NMF model or
47 |   multinomial topic model, in which topics represent \dQuote{gene
48 |   expression programs}, and gene expression programs are
49 |   characterized by different rates of expression. The way in which
50 |   the counts are simulated is modeled after gene expression studies
51 |   in which expression is measured by single-cell RNA sequencing
52 |   (\dQuote{RNA-seq}) techniques: each row of the counts matrix
53 |   corresponds a gene expression profile, each column corresponds to a
54 |   gene, and each matrix element is a \dQuote{read count}, or
55 |   \dQuote{UMI count}, measuring expression level. Factors are
56 |   simulated so as to capture realistic changes in gene expression
57 |   across different cell types. See \dQuote{Details} for the procedure
58 |   used to simulate factors, loadings and counts.
59 | }
60 | \details{
61 | Here we describe the process for generating the n x k
62 |   loadings matrix \code{L} and the m x k factors matrix \code{F}.
63 | 
64 |   Each row of the \code{L} matrix is generated in the following
65 |   manner: (1) the number of nonzero mixture proportions is \eqn{1
66 |   \le n \le k}, with probability proportional to \eqn{2^{-n}};
67 |   (2) the indices of the nonzero mixture proportions are sampled
68 |   uniformly at random; and (3) the nonzero mixture proportions are
69 |   sampled from the Dirichlet distribution with \eqn{\alpha = 1} (so
70 |   that all topics are equally likely).
71 | 
72 |   Each row of the factors matrix are generated according to the
73 |   following procedure: (1) generate \eqn{u = |r| - 5}, where \eqn{r ~
74 |   N(0,2)}; (2) for each topic \eqn{k}, generate the Poisson rates as
75 |   \eqn{exp(max(t,-5))}, where \eqn{t ~ 0.95 * N(u,s/10) + 0.05 *
76 |   N(u,s)}, and \eqn{s = exp(-u/8)}. Factors can be interpreted as
77 |   Poisson rates or multinomial probabilities, so that individual
78 |   counts can be viewed as being generated from a weighted mixture
79 |   of \dQuote{topics} with different rates or probabilities.
80 | 
81 |   Once the loadings and factors have been generated, the counts are
82 |   simulated from either the Poisson NMF or multinomial topic model:
83 |   for the former, \code{X[i,j]} is Poisson with rate \code{Y[i,j]},
84 |   where \code{Y = tcrossprod(L,F)}; for the latter, \code{X[i,]} is
85 |   multinomial with size \code{s[i]} and with class probabilities
86 |   \code{P[i,]}, where \code{P = tcrossprod(L,F)}. For the multinomial
87 |   model only, the sizes \code{s} are randomly generated as \code{s =
88 |   10^rnorm(n,3,0.2)}.
89 | 
90 |   Note that only minimal argument checking is performed;
91 |   the function is mainly used to test implementation of the
92 |   topic-model-based differential count analysis.
93 | }
94 | 


--------------------------------------------------------------------------------
/man/simulate_toy_gene_data.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/datasim.R
 3 | \name{simulate_toy_gene_data}
 4 | \alias{simulate_toy_gene_data}
 5 | \title{Simulate Toy Gene Expression Data}
 6 | \usage{
 7 | simulate_toy_gene_data(n, m, k, s)
 8 | }
 9 | \arguments{
10 | \item{n}{The number of samples (gene expression profiles) to
11 | simulate.}
12 | 
13 | \item{m}{The number of counts (genes) to simulate.}
14 | 
15 | \item{k}{The number of topics ("gene programs") used to simulate
16 | the data.}
17 | 
18 | \item{s}{A scalar specifying the total expression of each sample;
19 | it specifies the "size" parameter in the calls to
20 | \code{\link[stats]{rmultinom}}.}
21 | }
22 | \value{
23 | The return value is a list containing the counts matrix
24 |   \code{X}, and the gene frequencies \code{F} and mixture proportions
25 |   \code{L} used to generate the counts.
26 | }
27 | \description{
28 | Simulate gene expression data (UMI counts) under a
29 |   toy expression model. Samples (expression profiles) are drawn
30 |   from a multinomial topic model in which topics are "gene programs".
31 | }
32 | \details{
33 | The mixture proportions are generated as follows. With
34 | probability 0.9, one proportion is one, or close to one, and the
35 | remaining are zero, or close to zero; that is, the counts are
36 | primarily generated from a single gene program. Otherwise (wtth
37 | probability 0.1), the mixture proportions are roughly equal.
38 | 
39 | Gene frequencies are drawn uniformly at random from [0,1].
40 | }
41 | 


--------------------------------------------------------------------------------
/man/summary.poisson_nmf_fit.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/summary.R
 3 | \name{summary.poisson_nmf_fit}
 4 | \alias{summary.poisson_nmf_fit}
 5 | \alias{summary.multinom_topic_model_fit}
 6 | \alias{print.summary.poisson_nmf_fit}
 7 | \alias{print.summary.multinom_topic_model_fit}
 8 | \title{Summarize Poisson NMF or Multinomial Topic Model Fit}
 9 | \usage{
10 | \method{summary}{poisson_nmf_fit}(object, ...)
11 | 
12 | \method{summary}{multinom_topic_model_fit}(object, ...)
13 | 
14 | \method{print}{summary.poisson_nmf_fit}(x, show.mixprops = FALSE, show.topic.reps = FALSE, ...)
15 | 
16 | \method{print}{summary.multinom_topic_model_fit}(
17 |   x,
18 |   show.size.factors = FALSE,
19 |   show.mixprops = FALSE,
20 |   show.topic.reps = FALSE,
21 |   ...
22 | )
23 | }
24 | \arguments{
25 | \item{object}{An object of class \dQuote{poisson_nmf_fit} or
26 | \dQuote{multinom_topic_model_fit}. The former is usually the result
27 | of calling \code{\link{fit_poisson_nmf}}; the latter is usually the
28 | result of calling \code{\link{fit_topic_model}} or
29 | \code{\link{poisson2multinom}}.}
30 | 
31 | \item{\dots}{Additional arguments passed to the generic \code{summary}
32 | or \code{print.summary} method.}
33 | 
34 | \item{x}{An object of class \dQuote{summary.poisson_nmf_fit},
35 | usually a result of a call to \code{summary.poisson_nmf_fit}.}
36 | 
37 | \item{show.mixprops}{If \code{TRUE}, print a summary of the mixture
38 | proportions.}
39 | 
40 | \item{show.topic.reps}{If \code{TRUE}, print a summary of the topic
41 | representatives.}
42 | 
43 | \item{show.size.factors}{If \code{TRUE}, print a summary of the
44 | size factors.}
45 | }
46 | \value{
47 | The functions \code{summary.poisson_nmf_fit} and
48 | \code{summary.multinom_topic_model_fit} compute and return a list
49 | of statistics summarizing the model fit. The returned list
50 | includes some or all of the following elements:
51 | 
52 | \item{n}{The number of rows in the counts matrix, typically the
53 |   number of samples.}
54 | 
55 | \item{m}{The number of columns in the counts matrix, typically the
56 |   number of observed counts per sample.}
57 | 
58 | \item{k}{The rank of the Poisson NMF or the number of topics.}
59 | 
60 | \item{s}{A vector of length n giving the "size factor" estimates;
61 |   these estimates should be equal, or close to, the total counts in
62 |   each row of the counts matrix.}
63 | 
64 | \item{numiter}{The number of loadings and/or factor updates
65 |   performed.}
66 | 
67 | \item{loglik}{The Poisson NMF log-likelihood.}
68 | 
69 | \item{loglik.multinom}{The multinomial topic model log-likelihood.}
70 | 
71 | \item{dev}{The Poisson NMF deviance.}
72 | 
73 | \item{res}{The maximum residual of the Karush-Kuhn-Tucker (KKT)
74 |   first-order optimality conditions. This can be used to assess
75 |   convergence of the updates to a (local) solution.}
76 | 
77 | \item{mixprops}{Matrix giving a high-level summary of the
78 |   mixture proportions, in which rows correspond to topics, and
79 |   columns are ranges of mixture proportionss.}
80 | 
81 | \item{topic.reps}{A matrix in which the ith row gives the mixture
82 |   proportions for the sample "most representative" of topic i; by
83 |   "most representative", we mean the row (or sample) with the highest
84 |   proportion of counts drawn from the topic i.}
85 | }
86 | \description{
87 | \code{summary} method for the \dQuote{poisson_nmf_fit}
88 |   and \dQuote{multinom_topic_model_fit} classes.
89 | }
90 | 


--------------------------------------------------------------------------------
/src/Makevars:
--------------------------------------------------------------------------------
1 | PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS) -DARMA_DONT_PRINT_ERRORS \
2 |                -DARMA_NO_DEBUG -DARMA_USE_BLAS -DARMA_DONT_USE_OPENMP \
3 |                -DARMA_USE_TBB_ALLOC -DRCPP_PARALLEL_USE_TBB=1 \
4 |                -DARMA_WARN_LEVEL=1 -DARMA_64BIT_WORD
5 | PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) \
6 |            $(shell ${R_HOME}/bin/Rscript -e "RcppParallel::RcppParallelLibs()")
7 | 
8 | 


--------------------------------------------------------------------------------
/src/Makevars.win:
--------------------------------------------------------------------------------
1 | PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS) -DARMA_DONT_PRINT_ERRORS \
2 |                -DARMA_NO_DEBUG -DARMA_USE_BLAS -DARMA_DONT_USE_OPENMP \
3 |                -DARMA_USE_TBB_ALLOC -DRCPP_PARALLEL_USE_TBB=1 \
4 |                -DARMA_WARN_LEVEL=1 -DARMA_64BIT_WORD
5 | PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) \
6 |            $(shell "${R_HOME}/bin${R_ARCH_BIN}/Rscript.exe" -e "RcppParallel::RcppParallelLibs()")
7 | 


--------------------------------------------------------------------------------
/src/cost.cpp:
--------------------------------------------------------------------------------
 1 | #include "misc.h"
 2 | #include "cost.h"
 3 | 
 4 | using namespace arma;
 5 | 
 6 | // FUNCTION DEFINITIONS
 7 | // --------------------
 8 | // Compute negative log-likelihoods for assessing a topic model fit or
 9 | // quality of a non-negative matrix factorization, in which matrix X
10 | // is approximated by matrix product A * B.
11 | //
12 | // [[Rcpp::depends(RcppArmadillo)]]
13 | // [[Rcpp::export]]
14 | arma::vec cost_rcpp (const arma::mat& X, const arma::mat& A,
15 | 		     const arma::mat& B, double e, bool poisson) {
16 |   return cost(X,A,B,e,poisson);
17 | }
18 | 
19 | // This is the same as cost_rcpp, except that X must be sparse.
20 | //
21 | // [[Rcpp::export]]
22 | arma::vec cost_sparse_rcpp (const arma::sp_mat& X, const arma::mat& A,
23 | 	  		    const arma::mat& B, double e, bool poisson) {
24 |   return cost_sparse(X,A,B,e,poisson);
25 | }
26 | 
27 | // This is the helper function for cost_rcpp.
28 | arma::vec cost (const mat& X, const mat& A, const mat& B, double e, 
29 |              bool poisson) {
30 |   unsigned int n = X.n_rows;
31 |   unsigned int m = X.n_cols;
32 |   vec  f(n,fill::zeros);
33 |   vec  y(n);
34 |   
35 |   // Repeat for each column of X.
36 |   for (unsigned int j = 0; j < m; j++) {
37 | 
38 |     // This is equivalent to the following R code:
39 |     //
40 |     //   f = f + poisson*y - X[,j]*log(y + e))
41 |     //
42 |     // where 
43 |     // 
44 |     //   y = A %*% B[,j]
45 |     //
46 |     y  = A * B.col(j);
47 |     f -= X.col(j) % log(y + e);
48 |     if (poisson)
49 |       f += y;
50 |   }
51 |   
52 |   return f;
53 | }
54 | 
55 | // Helper function for cost_sparse_rcpp.
56 | arma::vec cost_sparse (const sp_mat& X, const mat& A, const mat& B,
57 | 		       double e, bool poisson) {
58 |   unsigned int n = X.n_rows;
59 |   unsigned int m = X.n_cols;
60 |   unsigned int i;
61 |   vec  f(n,fill::zeros);
62 |   vec  y(n);
63 |   
64 |   // Repeat for each column of X.
65 |   for (unsigned int j = 0; j < m; j++) {
66 | 
67 |     // Initialize an iterator for the nonzero elements in the jth
68 |     // column of X.
69 |     sp_mat::const_col_iterator xj = X.begin_col(j);
70 |     sp_mat::const_col_iterator xm = X.end_col(j);
71 | 
72 |     // This is equivalent to the following R code:
73 |     //
74 |     //   f = f + poisson*y - X[,j]*log(y + e)
75 |     //
76 |     // where 
77 |     // 
78 |     //   y = A %*% B[,j]
79 |     //
80 |     y = A * B.col(j);
81 |     for(; xj != xm; ++xj) {
82 |       i     = xj.row();
83 |       f(i) -= (*xj) * log(y(i) + e);
84 |     }
85 |     if (poisson)
86 |       f += y;
87 |   }
88 |   
89 |   return f;
90 | }
91 | 


--------------------------------------------------------------------------------
/src/cost.h:
--------------------------------------------------------------------------------
 1 | #ifndef INCLUDE_COST
 2 | #define INCLUDE_COST
 3 | 
 4 | #include <RcppArmadillo.h>
 5 | 
 6 | // FUNCTION DECLARATIONS
 7 | // ---------------------
 8 | arma::vec cost        (const arma::mat& X, const arma::mat& A,
 9 | 		       const arma::mat& B, double e, bool poisson);
10 | arma::vec cost_sparse (const arma::sp_mat& X, const arma::mat& A,
11 | 	   	       const arma::mat& B, double e, bool poisson);
12 | 
13 | #endif
14 | 


--------------------------------------------------------------------------------
/src/misc.cpp:
--------------------------------------------------------------------------------
  1 | #include "misc.h"
  2 | 
  3 | using namespace arma;
  4 | 
  5 | // FUNCTION DECLARATIONS
  6 | // ---------------------
  7 | void le_diff (const vec& x, vec& y);
  8 | 
  9 | // FUNCTION DEFINITIONS
 10 | // --------------------
 11 | // Compute, for each row of X, the "least extreme" differences. This
 12 | // should output the same result as t(apply(X,1,le.diff)), but faster.
 13 | //
 14 | // [[Rcpp::depends(RcppArmadillo)]]
 15 | // [[Rcpp::export]]
 16 | arma::mat le_diff_rcpp (const arma::mat& X) {
 17 |   unsigned int n = X.n_rows;
 18 |   unsigned int m = X.n_cols;
 19 |   mat Y(n,m);
 20 |   vec x(m);
 21 |   vec y(m);
 22 |   for (unsigned int i = 0; i < n; i++) {
 23 |     x = trans(X.row(i));
 24 |     le_diff(x,y);
 25 |     Y.row(i) = trans(y);
 26 |   }
 27 |   return Y;
 28 | }
 29 | 
 30 | // This is used to implement x_over_tcrossprod.
 31 | // 
 32 | // [[Rcpp::export]]
 33 | arma::vec x_over_crossprod_rcpp (const arma::vec& i, const arma::vec& j,
 34 | 				 const arma::vec& x, const arma::mat& A, 
 35 | 				 const arma::mat& B, double e) {
 36 |   unsigned int n = x.n_elem;
 37 |   vec y = x;
 38 |   for (unsigned int t = 0; t < n; t++)
 39 |     y(t) /= (dot(A.col(i(t)),B.col(j(t))) + e);
 40 |   return y;
 41 | }
 42 | 
 43 | // For vector x, return a vector of the same length y containing the
 44 | // "least extreme" differences y(i) = x(i) - x(j), in which j is the
 45 | // index not equal to i such that abs(x(i) - x(j)) is the smallest
 46 | // possible.
 47 | void le_diff (const vec& x, vec& y) {
 48 |   unsigned int n = x.n_elem;
 49 |   if (n == 2) {
 50 |     y(0) = x(0) - x(1);
 51 |     y(1) = -y(0);
 52 |   } else {
 53 |     uvec indices = sort_index(x);
 54 |     unsigned int i, j, k;
 55 |     double a, b;
 56 |     i = indices(0);
 57 |     j = indices(1);
 58 |     y(i) = x(i) - x(j);
 59 |     i = indices(n-1);
 60 |     j = indices(n-2);
 61 |     y(i) = x(i) - x(j);
 62 |     for (unsigned int t = 1; t < n-1; t++) {
 63 |       i = indices(t-1);
 64 |       j = indices(t);
 65 |       k = indices(t+1);
 66 |       a = x(j) - x(i);
 67 |       b = x(k) - x(j);
 68 |       if (a <= b)
 69 |         y(j) = x(j) - x(i);
 70 |       else
 71 |         y(j) = x(j) - x(k);
 72 |     }
 73 |   }
 74 | }
 75 | 
 76 | // Return the row indices of the nonzeros in the jth column of sparse
 77 | // matrix A. This is the same as
 78 | //
 79 | //  i = find(A.col(j))
 80 | //
 81 | // but this code does not compile in some versions of gcc, so I
 82 | // re-implemented this code here. Vector i must already been
 83 | // initialized with the proper length, e.g., by doing
 84 | //
 85 | //   vec a = nonzeros(A.col(j));
 86 | //   unsigned int n = a.n_elem;
 87 | //   uvec i(n);
 88 | //   getcolnonzeros(A,i,j);
 89 | //
 90 | void getcolnonzeros (const sp_mat& A, uvec& i, unsigned int j) {
 91 |   sp_mat::const_col_iterator ai = A.begin_col(j);
 92 |   sp_mat::const_col_iterator an = A.end_col(j);
 93 |   for (unsigned int t = 0; ai != an; ++ai, ++t)
 94 |     i(t) = ai.row();
 95 | }
 96 | 
 97 | // Scale each column A[,i] by b[i].
 98 | void scalecols (mat& A, const vec& b) {
 99 |   rowvec c = trans(b);
100 |   A.each_row() %= c;
101 | }
102 | 
103 | // Normalize each row of A so that the entries in each row sum to 1.
104 | void normalizerows (mat& A) {
105 |   vec b = conv_to<vec>::from(sum(A,1));
106 |   A.each_col() /= b;
107 | }
108 | 
109 | // Normalize each column of A so that the entries in each column sum to 1.
110 | void normalizecols (mat& A) {
111 |   rowvec b = sum(A,0);
112 |   A.each_row() /= b;
113 | }
114 | 
115 | // Scale each row of A so that the largest entry in each row is 1.
116 | void normalizerowsbymax (mat& A) {
117 |   vec b = conv_to<vec>::from(max(A,1));
118 |   A.each_col() /= b;
119 | }
120 | 


--------------------------------------------------------------------------------
/src/misc.h:
--------------------------------------------------------------------------------
 1 | #ifndef INCLUDE_MISC
 2 | #define INCLUDE_MISC
 3 | 
 4 | #include <RcppArmadillo.h>
 5 | 
 6 | #define maximum(a,b) ((a) > (b) ? (a) : (b))
 7 | #define minimum(a,b) ((a) < (b) ? (a) : (b))
 8 | 
 9 | // FUNCTION DECLARATIONS
10 | // ---------------------
11 | void getcolnonzeros     (const arma::sp_mat& A, arma::uvec& i, unsigned int j);
12 | void scalecols          (arma::mat& A, const arma::vec& b);
13 | void normalizerows      (arma::mat& A);
14 | void normalizecols      (arma::mat& A);
15 | void normalizerowsbymax (arma::mat& A);
16 | 
17 | #endif
18 | 


--------------------------------------------------------------------------------
/src/mixem.cpp:
--------------------------------------------------------------------------------
  1 | #include "mixem.h"
  2 | #include "misc.h"
  3 | 
  4 | using namespace arma;
  5 | 
  6 | // FUNCTION DECLARATIONS
  7 | // ---------------------
  8 | void mixem_update (const arma::mat& L1, const arma::vec& w,
  9 | 		   arma::vec& x, arma::mat& P);
 10 | 
 11 | // FUNCTION DEFINITIONS
 12 | // --------------------
 13 | // This is mainly used for testing the mixem C++ function.
 14 | //
 15 | // [[Rcpp::depends(RcppArmadillo)]]
 16 | // [[Rcpp::export]]
 17 | arma::vec mixem_rcpp (const arma::mat& L, const arma::vec& w,
 18 | 		      const arma::vec& x0, unsigned int numiter) {
 19 |   return mixem(L,w,x0,numiter);
 20 | }
 21 | 
 22 | // Compute a maximum-likelihood estimate (MLE) of the mixture
 23 | // proportions in the multinomial mixture model by iterating the EM
 24 | // updates for a fixed number of iterations.
 25 | //
 26 | // Input argument L is an n x m matrix with non-negative entries;
 27 | // input w is a vector of length n containing a non-negative "weight"
 28 | // associated with each row of L; input argument x0 is the initial
 29 | // estimate of the mixture proportions; input P is a matrix of the
 30 | // same dimension as L, and is used to store the posterior mixture
 31 | // assignment probabilities; and input "numiter" specifies the number
 32 | // of EM updates to perform.
 33 | //
 34 | // The return value is a vector of length m containing the updated
 35 | // mixture proportions.
 36 | //
 37 | // Note that x0 and L need not be normalized; they will automatically
 38 | // be normalized inside this function.
 39 | //
 40 | // Also note that it does not make sense to compute a MLE of the
 41 | // mixture proportions when n < 2 and/or when m < 2; mixem will supply
 42 | // a result in such cases, but the result will not be valid.
 43 | vec mixem (const mat& L, const vec& w, const vec& x0, unsigned int numiter) {
 44 |   mat L1 = L;
 45 |   mat P  = L;
 46 |   vec x  = x0;
 47 |   normalizecols(L1);
 48 |   mixem(L1,w,x,P,numiter);
 49 |   return x;
 50 | }
 51 | 
 52 | // Use this variant of mixem if you plan on using the same L matrix
 53 | // multiple times, or for calling mixem multiple times with matrices
 54 | // of the same dimension. In the first case, you can reuse the L1 and
 55 | // P matrices; in the latter case, you can reuse the P matrix.
 56 | //
 57 | // For the result to be valid, the matrix L1 should be normalized
 58 | // beforehand so that each column sums to 1. P should be a matrix of
 59 | // the same size as L1.
 60 | //
 61 | // Note that x need not be normalized; it will automatically be
 62 | // normalized inside this function.
 63 | //
 64 | // Also note that in this mixem variant, L1 and w do not need to
 65 | // contain all the data; any rows of L1 associated with zero weights
 66 | // have no effect, so only the vector of nonzero weights w, and the
 67 | // rows of L1 associated with those weights, need to be supplied.
 68 | void mixem (const mat& L1, const vec& w, vec& x, mat& P, 
 69 |             unsigned int numiter) {
 70 |   for (unsigned int i = 0; i < numiter; i++)
 71 |     mixem_update(L1,w,x,P);
 72 | }
 73 | 
 74 | // Perform a single EM update. For this update to be valid, the matrix
 75 | // L1 should be normalized beforehand so that each column sums to 1.
 76 | // Note that x need not be normalized; it will automatically be
 77 | // normalized inside this function.
 78 | void mixem_update (const mat& L1, const vec& w, vec& x, mat& P) {
 79 |   double e = 1e-15;
 80 | 
 81 |   // Normalize the "weights".
 82 |   vec w1 = w/sum(w);
 83 | 
 84 |   // Normalize the mixture proportions.
 85 |   x /= sum(x);
 86 |   
 87 |   // Compute the posterior mixture assignment probabilities. A small
 88 |   // number is added to the posterior probabilities to prevent any
 89 |   // divisions by zero. This is the "E step".
 90 |   P = L1;
 91 |   scalecols(P,x);
 92 |   normalizerowsbymax(P);
 93 |   P += e;
 94 |   normalizerows(P);
 95 |     
 96 |   // Update the mixture weights. This is the "M step".
 97 |   x = trans(P) * w1;
 98 | }
 99 | 
100 | // Find the maximum-likelihood estimate (MLE) for the special case
101 | // when only one of the weights (w) is positive. Here, L1 should be
102 | // the column-normalized matrix, and i should be the index of the
103 | // nonzero weight.
104 | void mixture_one_nonzero (const mat& L1, unsigned int i, vec& x) {
105 |   unsigned int j = index_max(L1.row(i));
106 |   x.fill(0);
107 |   x(j) = 1;
108 | }
109 | 


--------------------------------------------------------------------------------
/src/mixem.h:
--------------------------------------------------------------------------------
 1 | #ifndef INCLUDE_MIXEM
 2 | #define INCLUDE_MIXEM
 3 | 
 4 | #include <RcppArmadillo.h>
 5 | 
 6 | // FUNCTION DECLARATIONS
 7 | // ---------------------
 8 | arma::vec mixem (const arma::mat& L, const arma::vec& w, const arma::vec& x0,
 9 | 		 unsigned int numiter);
10 | 
11 | void mixem (const arma::mat& L1, const arma::vec& w, arma::vec& x,
12 | 	    arma::mat& P, unsigned int numiter);
13 | 
14 | void mixture_one_nonzero (const arma::mat& L1, unsigned int i, arma::vec& x);
15 | 
16 | #endif
17 | 


--------------------------------------------------------------------------------
/src/poismix.h:
--------------------------------------------------------------------------------
 1 | #ifndef INCLUDE_POISMIXEM
 2 | #define INCLUDE_POISMIXEM
 3 | 
 4 | #include <RcppArmadillo.h>
 5 | 
 6 | // FUNCTION DECLARATIONS
 7 | // ---------------------
 8 | arma::vec poismixem (const arma::mat& L, const arma::vec& w, 
 9 | 		     const arma::vec& x0, unsigned int numiter);
10 | 
11 | void poismixem (const arma::mat& L1, const arma::vec& u, const arma::vec& w, 
12 | 		arma::vec& x, arma::mat& P, unsigned int numiter);
13 | 
14 | void poismixem (const arma::mat& L1, const arma::vec& u, const arma::vec& w,
15 | 		const arma::uvec& i, arma::vec& x, unsigned int numiter);
16 | 
17 | arma::vec scd_kl_update (const arma::mat& L, const arma::vec& w,
18 | 			 const arma::vec& x0, unsigned int numiter, 
19 | 			 double e);
20 | 
21 | arma::vec scd_kl_update (const arma::mat& L, const arma::vec& u,
22 | 			 const arma::vec& w, const arma::vec& x0,
23 | 			 unsigned int numiter, double e);
24 | 
25 | arma::vec ccd_kl_update (const arma::mat& L, const arma::vec& w,
26 | 			 const arma::vec& x0, double e);
27 | 
28 | arma::vec ccd_kl_update (const arma::mat& L, const arma::vec& u,
29 | 			 const arma::vec& w, const arma::vec& x0,
30 | 			 double e);
31 | 
32 | #endif
33 | 


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(fastTopics)
3 | test_check("fastTopics")
4 | 


--------------------------------------------------------------------------------
/tests/testthat/test_fit_multinom_model.R:
--------------------------------------------------------------------------------
 1 | context("fit_multinom_model")
 2 | 
 3 | test_that("fit_multinom_model gives correct factor estimates",{
 4 | 
 5 |   # Simulate a "toy" gene expression data set.
 6 |   set.seed(1)
 7 |   n   <- 400
 8 |   m   <- 40
 9 |   k   <- 3
10 |   out <- simulate_toy_gene_data(n,m,k,s = 1000)
11 |   X   <- out$X
12 |   Y   <- as(X,"CsparseMatrix")
13 | 
14 |   # Force "hard" topic assignments.
15 |   cluster <- factor(apply(force_hard_topic_assignments(out$L),1,which.max))
16 |   levels(cluster) <- paste0("k",1:k)
17 | 
18 |   # Fit the simple multinomial model.
19 |   fit1 <- fit_multinom_model(cluster,X)
20 |   fit2 <- fit_multinom_model(cluster,Y)
21 |   
22 |   # Both calls to fit_multinom_model should result in nearly the same
23 |   # loadings.
24 |   expect_equal(fit1$L,fit2$L,scale = 1,tolerance = 1e-15)
25 | 
26 |   # Check that both calls to fit_multinom_model recover the
27 |   # maximum-likelihood estimates (MLEs) of the factors (F) and "size
28 |   # factors" (s).
29 |   s <- rowSums(X)
30 |   F <- matrix(0,m,k)
31 |   for (j in 1:k) {
32 |     i     <- which(cluster == levels(cluster)[j])
33 |     F[,j] <- colSums(X[i,])/sum(X[i,])
34 |   }
35 |   expect_equal(fit1$s,s,scale = 1,tolerance = 1e-6)
36 |   expect_equal(fit2$s,s,scale = 1,tolerance = 1e-6)
37 |   expect_equivalent(fit1$F,F,scale = 1,tolerance = 1e-15)
38 |   expect_equivalent(fit2$F,F,scale = 1,tolerance = 1e-15)
39 | })
40 | 
41 | 


--------------------------------------------------------------------------------
/tests/testthat/test_fit_topic_model.R:
--------------------------------------------------------------------------------
 1 | context("fit_poisson_nmf")
 2 | 
 3 | test_that("fit_topic_model successfully fits a multinomial topic model",{
 4 | 
 5 |   # Generate a 80 x 100 sparse count matrix to factorize.
 6 |   set.seed(1)
 7 |   out <- simulate_count_data(80,100,k = 3,sparse = TRUE)
 8 |   X   <- out$X
 9 | 
10 |   # Fit a multinomial topic model to these data.
11 |   capture.output(fit <- fit_topic_model(X,k = 3))
12 |   expect_s3_class(fit,"multinom_topic_model_fit")
13 |   expect_s3_class(summary(fit),"summary.multinom_topic_model_fit")
14 | })
15 | 


--------------------------------------------------------------------------------
/tests/testthat/test_mixem.R:
--------------------------------------------------------------------------------
 1 | context("mixem")
 2 | 
 3 | test_that("mixem and mixem_rcpp produce same result",{
 4 | 
 5 |   # Generate small data set.
 6 |   set.seed(1)
 7 |   out <- generate_poismix_data(100,c(1,2,0,0,0,4,0,0))
 8 |   L   <- out$L
 9 |   w   <- out$w
10 |   
11 |   # Run 100 EM updates for the multinomial mixture model. The R and
12 |   # C++ implementations should give nearly the same result.
13 |   m  <- ncol(L)
14 |   x0 <- runif(m)
15 |   x1 <- mixem(L,w,x0,100)
16 |   x2 <- drop(mixem_rcpp(L,w,x0,100))
17 |   expect_equal(x1,x2,tolerance = 1e-12,scale = 1)
18 | })
19 | 
20 | test_that("mixem and mixem_rcpp produce correct result when sum(w > 0) = 1",{
21 | 
22 |   # Generate the data set.
23 |   set.seed(1)
24 |   n    <- 10
25 |   out  <- generate_poismix_data(n,c(1,2,0,0))
26 |   L    <- out$L
27 |   w    <- rep(0,n)
28 |   w[8] <- 2
29 | 
30 |   # Get the exact solution.
31 |   x0 <- mixture.one.nonzero(L,w)
32 |   
33 |   # Run 20 EM updates for the multinomial mixture model.
34 |   x1 <- mixem(L,w,x0,20)
35 |   x2 <- drop(mixem_rcpp(L,w,x0,20))
36 | 
37 |   # The solution should not change much after running the EM updates.
38 |   expect_equal(x0,x1,tolerance = 1e-12,scale = 1)
39 |   expect_equal(x0,x2,tolerance = 1e-12,scale = 1)
40 | })
41 | 


--------------------------------------------------------------------------------
/tests/testthat/test_plots.R:
--------------------------------------------------------------------------------
  1 | context("plots")
  2 | 
  3 | test_that("Test that plot_loglik_vs_rank works",{
  4 |   set.seed(1)
  5 |   dat <- generate_test_data(80,100,3)
  6 |   X   <- dat$X
  7 | 
  8 |   # Fit matrix factorizations with rank k = 2, 3, 5, 10.
  9 |   capture.output(fit2 <- fit_poisson_nmf(X,k = 2,numiter = 100))
 10 |   capture.output(fit3 <- fit_poisson_nmf(X,k = 3,numiter = 100))
 11 |   capture.output(fit5 <- fit_poisson_nmf(X,k = 5,numiter = 100))
 12 |   capture.output(fit10 <- fit_poisson_nmf(X,k = 10,numiter = 100))
 13 | 
 14 |   # Plot log-likelihood vs. rank.
 15 |   p1 <- plot_loglik_vs_rank(list(fit2,fit3,fit5,fit10))
 16 |   p2 <- plot_loglik_vs_rank(lapply(list(fit2,fit3,fit5,fit10),
 17 |                                    poisson2multinom))
 18 |   expect_s3_class(p1,"ggplot")
 19 |   expect_s3_class(p2,"ggplot")
 20 | })
 21 | 
 22 | test_that("Test that pca_plot and pca_hexbin_plot work",{
 23 |   set.seed(1)
 24 |   k <- 3
 25 |   X <- simulate_toy_gene_data(n = 400,m = 40,k = k,s = 1000)$X
 26 |   capture.output(fit1 <- fit_poisson_nmf(X,k = k,numiter = 100,
 27 |                                          control = list(extrapolate = TRUE)))
 28 |   fit2 <- poisson2multinom(fit1)
 29 |   
 30 |   # Test pca_plot.
 31 |   p1 <- pca_plot(fit1)
 32 |   p2 <- pca_plot(fit2)
 33 |   p3 <- pca_hexbin_plot(fit2)
 34 |   p4 <- pca_hexbin_plot(fit2)
 35 |   expect_s3_class(p1,"ggplot")
 36 |   expect_s3_class(p2,"ggplot")
 37 |   expect_s3_class(p3,"ggplot")
 38 |   expect_s3_class(p4,"ggplot")
 39 | 
 40 |   # Test the other variants of pca_plot.
 41 |   y <- factor(apply(fit2$L,1,which.max))
 42 |   levels(y) <- paste0("k",1:k)
 43 |   p5 <- pca_plot(fit1,fill = "none")
 44 |   p6 <- pca_plot(fit1,fill = fit2$L[,1])
 45 |   p7 <- pca_plot(fit1,fill = y)
 46 |   expect_s3_class(p5,"ggplot")
 47 |   expect_s3_class(p6,"ggplot")
 48 |   expect_s3_class(p7,"ggplot")
 49 | })
 50 | 
 51 | test_that("Test that other plotting functions work",{
 52 |   set.seed(1)
 53 |   dat <- generate_test_data(200,100,3)
 54 |   X   <- dat$X
 55 |   capture.output(fit0 <- init_poisson_nmf(X,k = 3))
 56 |   capture.output(
 57 |     fit1 <- fit_poisson_nmf(X,fit0 = fit0,numiter = 50,method = "scd",
 58 |                             control = list(extrapolate = TRUE)))
 59 |   capture.output(
 60 |     fit2 <- fit_poisson_nmf(X,fit0 = fit0,numiter = 50,method = "em",
 61 |                             control = list(extrapolate = TRUE)))
 62 | 
 63 |   # Test plot_progress.
 64 |   plot_progress(list(scd = fit1,em = fit2),y = "loglik")
 65 |   plot_progress(list(scd = fit1,em = fit2),y = "dev")
 66 |   plot_progress(list(scd = fit1,em = fit2),y = "res")
 67 |   plot_progress(list(scd = poisson2multinom(fit1),em = poisson2multinom(fit2)))
 68 |   
 69 |   # Test loadings_plot.
 70 |   x  <- factor(sample(1:4,200,replace = TRUE))
 71 |   p1 <- loadings_plot(fit1,x)
 72 |   p2 <- loadings_plot(poisson2multinom(fit1),x)
 73 |   expect_s3_class(p1,"ggplot")
 74 |   expect_s3_class(p2,"ggplot")
 75 | 
 76 |   # Test structure_plot.
 77 |   grouping <- factor(apply(poisson2multinom(fit1)$L,1,which.max))
 78 |   capture.output(y <- drop(tsne_from_topics(poisson2multinom(fit1),dims = 1)))
 79 |   capture.output(p1 <- structure_plot(fit1))
 80 |   capture.output(p2 <- structure_plot(fit1,grouping = grouping,gap = 5))
 81 |   capture.output(p3 <- structure_plot(poisson2multinom(fit1)$L))
 82 |   capture.output(p4 <- structure_plot(fit1$L))
 83 |   capture.output(p5 <- structure_plot(fit1,loadings_order = order(y)))
 84 |   expect_s3_class(p1,"ggplot")
 85 |   expect_s3_class(p2,"ggplot")
 86 |   expect_s3_class(p3,"ggplot")
 87 |   expect_s3_class(p4,"ggplot")
 88 |   expect_s3_class(p5,"ggplot")
 89 | 
 90 |   # Test the "plot" S3 method (which creates a Structure plot).
 91 |   fit2 <- poisson2multinom(fit1)
 92 |   capture.output(p1 <- plot(fit1))
 93 |   capture.output(p2 <- plot(fit2))
 94 |   expect_s3_class(p1,"ggplot")
 95 |   expect_s3_class(p2,"ggplot")
 96 | 
 97 |   skip_if(on_cran)
 98 |   
 99 |   # Test tsne_plot and umap_plot.
100 |   capture.output(p1 <- tsne_plot(fit1,fill = "loading"))
101 |   capture.output(p2 <- tsne_plot(fit2,fill = "loading"))
102 |   capture.output(p3 <- umap_plot(fit1,fill = "loading",verbose = FALSE))
103 |   capture.output(p4 <- umap_plot(fit2,fill = "loading",verbose = FALSE))
104 |   expect_s3_class(p1,"ggplot")
105 |   expect_s3_class(p2,"ggplot")
106 |   expect_s3_class(p3,"ggplot")
107 |   expect_s3_class(p4,"ggplot")
108 | })
109 | 


--------------------------------------------------------------------------------
/tests/testthat/test_poismix.R:
--------------------------------------------------------------------------------
  1 | context("poismix")
  2 | 
  3 | test_that("poismixem and poismixem_rcpp produce same result",{
  4 | 
  5 |   # Generate small data set.
  6 |   set.seed(1)
  7 |   out <- generate_poismix_data(100,c(1,2,0,0,0,4,0,0))
  8 |   L   <- out$L
  9 |   w   <- out$w
 10 |   
 11 |   # Run 100 EM updates for the Poisson mixture model. The R
 12 |   # implementation, and all variations of the C++ implementation,
 13 |   # should give nearly the same result.
 14 |   numiter <- 100
 15 |   m  <- ncol(L)
 16 |   L1 <- normalize.cols(L)
 17 |   u  <- colSums(L)
 18 |   i  <- which(w > 0)
 19 |   x0 <- runif(m)
 20 |   x1 <- poismixem(L,w,x0,numiter)
 21 |   x2 <- drop(poismixem_rcpp(L,w,x0,numiter))
 22 |   x3 <- drop(poismixem2_rcpp(L1,w,u,x0,numiter))
 23 |   x4 <- drop(poismixem3_rcpp(L1,w[i],u,i-1,x0,numiter))
 24 |   expect_equal(x1,x2,tolerance = 1e-14,scale = 1)
 25 |   expect_equal(x1,x3,tolerance = 1e-14,scale = 1)
 26 |   expect_equal(x1,x4,tolerance = 1e-14,scale = 1)
 27 | })
 28 | 
 29 | test_that(paste("poismixem, scd_kl_update and ccd_kl_update give nearly the",
 30 |                 "same solution"),{
 31 | 
 32 |   # Generate small data set.
 33 |   set.seed(1)
 34 |   out <- generate_poismix_data(100,c(1,2,0,0,0,4,0,0))
 35 |   L   <- out$L
 36 |   w   <- out$w
 37 | 
 38 |   # Run 10,000 EM updates.
 39 |   m  <- ncol(L)
 40 |   x0 <- runif(m)
 41 |   x1 <- drop(poismixem_rcpp(L,w,x0,1e4))
 42 | 
 43 |   # Run 100 sequential coordinate descent (SCD) updates, using both
 44 |   # C++ interfaces.
 45 |   numiter <- 100
 46 |   L1 <- normalize.cols(L)
 47 |   u  <- colSums(L)
 48 |   i  <- which(w > 0)
 49 |   x2 <- drop(scd_kl_update_rcpp(L,w,x0,numiter,1e-15))
 50 |   x3 <- drop(scd_kl_update2_rcpp(L[i,],u,w[i],x0,numiter,1e-15))
 51 | 
 52 |   # Run 100 cyclic coordinate descent (CCD) updates, using both C++
 53 |   # interfaces.
 54 |   x4 <- drop(ccd_kl_update_rcpp(L,w,x0,numiter,1e-15))
 55 |   x5 <- drop(ccd_kl_update2_rcpp(L[i,],u,w[i],x0,numiter,1e-15))
 56 |   
 57 |   # The coordinatewise updates should recover nearly the same solution
 58 |   # as mix-SQP, and should give the same results whether the "dense"
 59 |   # or "sparse" updates are used.
 60 |   expect_equal(x1,x2,tolerance = 1e-5,scale = 1)
 61 |   expect_equal(x1,x4,tolerance = 1e-5,scale = 1)
 62 |   expect_equal(x2,x3,tolerance = 1e-14,scale = 1)
 63 |   expect_equal(x4,x5,tolerance = 1e-14,scale = 1)
 64 | })
 65 | 
 66 | test_that(paste("poismixem and poismixem_rcpp produce correct result",
 67 |                 "when sum(w > 0) = 1"),{
 68 | 
 69 |   # Generate the data set.
 70 |   set.seed(1)
 71 |   n    <- 10
 72 |   out  <- generate_poismix_data(n,c(1,2,0,0))
 73 |   L    <- out$L
 74 |   w    <- rep(0,n)
 75 |   i    <- 8
 76 |   w[i] <- 2
 77 | 
 78 |   # Run 100 EM updates for the multinomial mixture model.
 79 |   numiter <- 100
 80 |   m  <- ncol(L)
 81 |   x0 <- runif(m)
 82 |   x1 <- poismixem(L,w,x0,numiter)
 83 | 
 84 |   # Run 100 EM updates another a few times, using the different C++
 85 |   # interfaces.
 86 |   L1 <- normalize.cols(L)
 87 |   u  <- colSums(L)
 88 |   x2 <- drop(poismixem_rcpp(L,w,x0,numiter))
 89 |   x3 <- drop(poismixem2_rcpp(L1,w,u,x0,numiter))
 90 |   x4 <- drop(poismixem3_rcpp(L1,w[i],u,i-1,x0,numiter))
 91 |   
 92 |   # The R and C++ implementations should give nearly the same result,
 93 |   # and should be very close to the exact solution obtained by calling
 94 |   # poismix.one.nonzero.
 95 |   x5 <- poismix.one.nonzero(L,w)
 96 |   expect_equal(x1,x2,tolerance = 1e-12,scale = 1)
 97 |   expect_equal(x1,x3,tolerance = 1e-12,scale = 1)
 98 |   expect_equal(x1,x4,tolerance = 1e-12,scale = 1)
 99 |   expect_equal(x1,x5,tolerance = 1e-12,scale = 1)
100 | })
101 | 


--------------------------------------------------------------------------------
/tests/testthat/test_poisson2multinom.R:
--------------------------------------------------------------------------------
 1 | context("poisson2multinom")
 2 | 
 3 | test_that("poisson2multinom gives error when k = 1",{
 4 |   L   <- matrix(0:3,4,1)
 5 |   F   <- matrix(0:4,5,1)
 6 |   fit <- list(F = F,L = L)
 7 |   expect_error(poisson2multinom(fit))
 8 | })
 9 | 
10 | test_that("poisson2multinom correctly scales factors and loadings",{
11 |   L   <- matrix(0:7,4,2)
12 |   F   <- matrix(0:9,5,2)
13 |   rownames(L) <- paste0("i",1:4)
14 |   rownames(F) <- paste0("j",1:5)
15 |   colnames(L) <- paste0("k",1:2)
16 |   colnames(F) <- paste0("k",1:2)
17 |   fit         <- list(F = F,L = L)
18 |   class(fit)  <- c("poisson_nmf_fit","list")
19 |   fit         <- poisson2multinom(fit)
20 |   expect_equivalent(colSums(fit$F),c(1,1))
21 |   expect_equivalent(rowSums(fit$L),c(1,1,1,1))
22 | })
23 | 
24 | test_that("multinom2poisson recovers original Poisson NMF model fit",{
25 |   set.seed(1)
26 |   out  <- simulate_count_data(10,20,3)
27 |   X    <- out$X
28 |   fit1 <- iterate_updates(X,out$F,out$L,100,
29 |                           function (X,F,L) t(betanmf_update_factors(X,L,t(F))),
30 |                           function (X,F,L) betanmf_update_loadings(X,L,t(F)))
31 |   class(fit1) <- c("poisson_nmf_fit","list")
32 |   fit2    <- poisson2multinom(fit1)
33 |   fit2a   <- fit2
34 |   fit2a$s <- NULL
35 |   fit3 <- multinom2poisson(fit2)
36 |   fit4 <- multinom2poisson(fit2a,X)
37 |   fit5 <- multinom2poisson(fit2a,as(X,"CsparseMatrix"))
38 |   Y1   <- with(fit1,tcrossprod(L,F))
39 |   Y3   <- with(fit3,tcrossprod(L,F))
40 |   Y4   <- with(fit4,tcrossprod(L,F))
41 |   Y5   <- with(fit5,tcrossprod(L,F))
42 |   f1   <- loglik_poisson_nmf(X,fit1)
43 |   f3   <- loglik_poisson_nmf(X,fit3)
44 |   f4   <- loglik_poisson_nmf(X,fit4)
45 |   f5   <- loglik_poisson_nmf(X,fit5)
46 |   expect_equal(Y1,Y3,tolerance = 1e-15,scale = 1)
47 |   expect_equal(Y1,Y4,tolerance = 1e-15,scale = 1)
48 |   expect_equal(Y1,Y5,tolerance = 1e-15,scale = 1)
49 |   expect_equal(f1,f3,tolerance = 1e-14,scale = 1)
50 |   expect_equal(f1,f4,tolerance = 1e-14,scale = 1)
51 |   expect_equal(f1,f5,tolerance = 1e-14,scale = 1)
52 | })
53 | 


--------------------------------------------------------------------------------
/tests/testthat/test_select.R:
--------------------------------------------------------------------------------
 1 | context("select")
 2 | 
 3 | test_that(paste("Select S3 method correctly subsets and re-orders the",
 4 |                 "factors and loadings in a small example; also check ",
 5 |                 "merge_topics"),{
 6 | 
 7 |   # Generate a 80 x 100 data matrix to factorize.
 8 |   set.seed(1)
 9 |   n <- 80
10 |   m <- 100
11 |   k <- 3
12 |   X <- generate_test_data(n,m,k)$X
13 | 
14 |   # Run 20 EM updates.
15 |   capture.output(
16 |     fit <- poisson2multinom(fit_poisson_nmf(X,k = k,numiter = 20,
17 |                                             method = "em")))
18 |   
19 |   # Select and re-order factors and loadings by number (here, we use
20 |   # the "select_loadings" function).
21 |   n0   <- 40
22 |   rows <- sample(n,n0)
23 |   fit1 <- select_loadings(fit,rows)
24 | 
25 |   # Select and re-order factors and loadings by name (here, we use the
26 |   # "select" S3 method).
27 |   rows <- rownames(X)[rows]
28 |   fit2 <- select(fit,rows)
29 |   
30 |   # Check the outputted Poisson NMF fits.
31 |   expect_equal(dim(fit1$L),c(n0,k))
32 |   expect_equal(dim(fit2$L),c(n0,k))
33 |   expect_equal(length(fit1$s),n0)
34 |   expect_equal(length(fit2$s),n0)
35 |   expect_equal(rownames(fit2$L),rows)
36 |   expect_equal(names(fit2$s),rows)
37 | 
38 |   # An error is thrown when the selected loadings do not exist.
39 |   expect_error(select(fit,loadings = n + 1))
40 | 
41 |   # Check that merge_topics does the right thing.
42 |   fit3 <- merge_topics(fit2,k = 1:2)
43 |   fit4 <- merge_topics(fit2,k = c("k1","k2"))
44 |   expect_equal(dim(fit3$F),c(100,2))
45 |   expect_equal(dim(fit3$L),c(40,2))
46 |   expect_equal(colnames(fit3$F),c("k3","k1+k2"))
47 |   expect_equal(colnames(fit3$L),c("k3","k1+k2"))
48 |   expect_equal(fit3,fit4)
49 | })
50 | 
51 | test_that(paste("select followed by poisson2multinom gives the same result",
52 |                 "as poisson2multinom followed by select"),{
53 | 
54 |   # Generate a 80 x 100 data matrix to factorize.
55 |   set.seed(1)
56 |   n <- 80
57 |   m <- 100
58 |   k <- 3
59 |   X <- generate_test_data(n,m,k)$X
60 | 
61 |   # Run 20 EM updates.
62 |   capture.output(fit <- fit_poisson_nmf(X,k = k,numiter = 20,method = "em"))
63 |   rows <- sample(n,20)
64 | 
65 |   # Check that select followed by poisson2multinom gives the same
66 |   # result as poisson2multinom followed by select.
67 |   fit1 <- poisson2multinom(select(fit,rows))
68 |   fit2 <- select(poisson2multinom(fit),rows)
69 |   expect_equal(fit1,fit2,scale = 1,tolerance = 1e-15)
70 | })
71 | 


--------------------------------------------------------------------------------
/tests/testthat/test_summary.R:
--------------------------------------------------------------------------------
 1 | context("summary")
 2 | 
 3 | test_that("summary method and print.summary methods produce output",{
 4 | 
 5 |   # Generate a 80 x 100 data matrix to factorize.
 6 |   set.seed(1)
 7 |   out  <- generate_test_data(80,100,3)
 8 |   X    <- out$X
 9 | 
10 |   # Fit a Poisson non-negative factorization.
11 |   capture.output(fit <- fit_poisson_nmf(X,k = 3,numiter = 100))
12 | 
13 |   # Produce summaries of the model fit.
14 |   expect_output(print(summary(fit)))
15 |   expect_output(print(summary(poisson2multinom(fit))))
16 |   expect_output(print(summary(fit),show.mixprops = TRUE,
17 |                       show.topic.reps = TRUE))
18 |   expect_output(print(summary(poisson2multinom(fit)),
19 |                       show.size.factors = TRUE,
20 |                       show.mixprops = TRUE,
21 |                       show.topic.reps = TRUE))
22 | })
23 | 


--------------------------------------------------------------------------------