├── .Rbuildignore ├── .github ├── .gitignore.deploy └── workflows │ ├── check.yml │ ├── comment_master_pr.yml │ └── master_pr_msg.md ├── .gitignore ├── CONTRIBUTORS ├── DESCRIPTION ├── LICENSE ├── NAMESPACE ├── R ├── RcppExports.R ├── accessors_carp.R ├── accessors_cbass.R ├── carp.R ├── cbass.R ├── clustRviz-package.R ├── data.R ├── logging.R ├── options.R ├── plot_carp.R ├── plot_cbass.R ├── solvers.R ├── util_hcs.R ├── util_plot.R ├── utils.R ├── weights.R └── zzz.R ├── README.Rmd ├── README.md ├── _pkgdown.yml ├── clustRviz.Rproj ├── codecov.yml ├── data ├── authors.rda ├── presidential_speech.rda └── tcga_breast.rda ├── docs ├── 404.html ├── LICENSE-text.html ├── articles │ ├── Acknowledgements.html │ ├── Acknowledgements_files │ │ └── accessible-code-block-0.0.1 │ │ │ └── empty-anchor.js │ ├── Algorithms.html │ ├── Algorithms_files │ │ └── accessible-code-block-0.0.1 │ │ │ └── empty-anchor.js │ ├── Tutorial.html │ ├── Tutorial_files │ │ ├── accessible-code-block-0.0.1 │ │ │ └── empty-anchor.js │ │ ├── colourpicker-1.1 │ │ │ ├── LICENSE │ │ │ ├── colourpicker.min.css │ │ │ └── colourpicker.min.js │ │ ├── crosstalk-1.1.0.1 │ │ │ ├── css │ │ │ │ └── crosstalk.css │ │ │ └── js │ │ │ │ ├── crosstalk.js │ │ │ │ ├── crosstalk.js.map │ │ │ │ ├── crosstalk.min.js │ │ │ │ └── crosstalk.min.js.map │ │ ├── figure-html │ │ │ ├── unnamed-chunk-11-1.png │ │ │ ├── unnamed-chunk-12-1.png │ │ │ ├── unnamed-chunk-13-1.png │ │ │ ├── unnamed-chunk-14-1.png │ │ │ ├── unnamed-chunk-15-1.gif │ │ │ ├── unnamed-chunk-16-1.png │ │ │ ├── unnamed-chunk-17-1.png │ │ │ ├── unnamed-chunk-18-1.gif │ │ │ ├── unnamed-chunk-19-1.png │ │ │ ├── unnamed-chunk-22-1.png │ │ │ ├── unnamed-chunk-23-1.png │ │ │ └── unnamed-chunk-24-1.gif │ │ ├── htmlwidgets-1.5.1 │ │ │ └── htmlwidgets.js │ │ ├── jquery-1.11.3 │ │ │ ├── jquery-AUTHORS.txt │ │ │ ├── jquery.js │ │ │ ├── jquery.min.js │ │ │ └── jquery.min.map │ │ ├── plotly-binding-4.9.2.1 │ │ │ └── plotly.js │ │ ├── plotly-htmlwidgets-css-1.52.2 │ │ │ └── plotly-htmlwidgets.css │ │ ├── plotly-main-1.52.2 │ │ │ └── plotly-latest.min.js │ │ └── typedarray-0.1 │ │ │ └── typedarray.min.js │ ├── Weights.html │ ├── Weights_files │ │ ├── accessible-code-block-0.0.1 │ │ │ └── empty-anchor.js │ │ └── figure-html │ │ │ ├── unnamed-chunk-15-1.png │ │ │ ├── unnamed-chunk-17-1.png │ │ │ ├── unnamed-chunk-20-1.png │ │ │ ├── unnamed-chunk-3-1.png │ │ │ └── unnamed-chunk-9-1.png │ ├── clustRviz.html │ ├── clustRviz_files │ │ ├── accessible-code-block-0.0.1 │ │ │ └── empty-anchor.js │ │ └── figure-html │ │ │ ├── unnamed-chunk-10-1.png │ │ │ ├── unnamed-chunk-11-1.png │ │ │ ├── unnamed-chunk-12-1.gif │ │ │ ├── unnamed-chunk-13-1.png │ │ │ ├── unnamed-chunk-2-1.png │ │ │ ├── unnamed-chunk-3-1.png │ │ │ ├── unnamed-chunk-4-1.png │ │ │ ├── unnamed-chunk-5-1.gif │ │ │ ├── unnamed-chunk-6-1.png │ │ │ ├── unnamed-chunk-8-1.png │ │ │ └── unnamed-chunk-9-1.png │ └── index.html ├── authors.html ├── bootstrap-toc.css ├── bootstrap-toc.js ├── docsearch.css ├── docsearch.js ├── index.html ├── link.svg ├── pkgdown.css ├── pkgdown.js ├── pkgdown.yml └── reference │ ├── CARP-1.png │ ├── CARP.html │ ├── CBASS.html │ ├── accessors_carp.html │ ├── accessors_cbass-1.png │ ├── accessors_cbass.html │ ├── authors.html │ ├── clustRviz.html │ ├── clustRviz_logging.html │ ├── convex_biclustering.html │ ├── convex_clustering.html │ ├── figures │ ├── README-carp_dendro-1.png │ ├── README-cbass-1.png │ ├── README-cbass_heatmap2-1.png │ └── README-cbass_rowdendro-1.png │ ├── index.html │ ├── options.html │ ├── plot_carp-1.png │ ├── plot_carp-2.png │ ├── plot_carp-3.png │ ├── plot_carp.html │ ├── plot_cbass-1.png │ ├── plot_cbass-2.png │ ├── plot_cbass-3.png │ ├── plot_cbass-4.png │ ├── plot_cbass.html │ ├── presidential_speech.html │ ├── print.ConvexBiClustering.html │ ├── print.ConvexClustering.html │ ├── print_carp.html │ ├── print_cbass.html │ ├── rbf_kernel_weights.html │ └── tcga_breast.html ├── inst └── CITATION ├── man ├── CARP.Rd ├── CBASS.Rd ├── accessors_carp.Rd ├── accessors_cbass.Rd ├── authors.Rd ├── clustRviz.Rd ├── clustRviz_logging.Rd ├── convex_biclustering.Rd ├── convex_clustering.Rd ├── figures │ ├── README-carp_dendro-1.png │ ├── README-cbass-1.png │ ├── README-cbass_heatmap2-1.png │ └── README-cbass_rowdendro-1.png ├── options.Rd ├── plot_carp.Rd ├── plot_cbass.Rd ├── presidential_speech.Rd ├── print.ConvexBiClustering.Rd ├── print.ConvexClustering.Rd ├── print_carp.Rd ├── print_cbass.Rd ├── rbf_kernel_weights.Rd └── tcga_breast.Rd ├── src ├── .gitignore ├── Makevars ├── Makevars.win ├── RcppExports.cpp ├── alg_reg_policies.h ├── biclustering_impl.h ├── clustRviz.cpp ├── clustRviz.h ├── clustRviz_base.h ├── clustRviz_logging.cpp ├── clustRviz_logging.h ├── clustering_impl.h ├── get_cluster_assignments.cpp ├── optim_policies.h ├── status.h └── utils.cpp ├── tests ├── test_admm.R ├── test_carp_accessors.R ├── test_carp_error_handling.R ├── test_carp_misc.R ├── test_carp_missing_data.R ├── test_carp_plot_static.R ├── test_carp_print.R ├── test_carp_smoke.R ├── test_carp_sparsity_pattern.R ├── test_carp_weights.R ├── test_cbass_accessors.R ├── test_cbass_error_handling.R ├── test_cbass_misc.R ├── test_cbass_missing_data.R ├── test_cbass_plot_static.R ├── test_cbass_print.R ├── test_cbass_smoke.R ├── test_cbass_sparsity_pattern.R ├── test_cbass_weights.R ├── test_clustering_assignments_cpp.R ├── test_logging.R ├── test_matrix_prox.R ├── test_options.R ├── test_solver_biclustering.R ├── test_solver_clustering.R ├── test_tensor_projection.R ├── test_utils.R ├── test_weights.R └── testthat │ ├── helper_clustrviz_tests.R │ ├── test_admm.R │ ├── test_carp_accessors.R │ ├── test_carp_error_handling.R │ ├── test_carp_misc.R │ ├── test_carp_missing_data.R │ ├── test_carp_plot_static.R │ ├── test_carp_print.R │ ├── test_carp_smoke.R │ ├── test_carp_sparsity_pattern.R │ ├── test_carp_weights.R │ ├── test_cbass_accessors.R │ ├── test_cbass_error_handling.R │ ├── test_cbass_misc.R │ ├── test_cbass_missing_data.R │ ├── test_cbass_plot_static.R │ ├── test_cbass_print.R │ ├── test_cbass_smoke.R │ ├── test_cbass_sparsity_pattern.R │ ├── test_cbass_weights.R │ ├── test_clustering_assignments_cpp.R │ ├── test_logging.R │ ├── test_matrix_prox.R │ ├── test_options.R │ ├── test_solver_biclustering.R │ ├── test_solver_clustering.R │ ├── test_tensor_projection.R │ ├── test_utils.R │ └── test_weights.R └── vignettes ├── Acknowledgements.Rmd ├── Algorithms.Rmd ├── Tutorial.Rmd ├── Weights.Rmd ├── clustRviz.Rmd └── vignettes.bib /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ## CI Files 2 | ^.github$ 3 | ^codecov\.yml$ 4 | ^appveyor\.yml$ 5 | 6 | ## RStudio Files 7 | ^.*\.Rproj$ 8 | ^\.Rproj\.user$ 9 | 10 | ## Pkgdown docs 11 | _pkgdown.yml 12 | ^docs$ 13 | 14 | ## GH files (not part of package) 15 | .gitignore.deploy 16 | README.Rmd 17 | ^data-raw$ 18 | 19 | ## Other autogenerated files 20 | ^README-.*\.png$ 21 | figs/* 22 | 23 | ## Misc other files 24 | LICENSE 25 | CONTRIBUTORS 26 | -------------------------------------------------------------------------------- /.github/.gitignore.deploy: -------------------------------------------------------------------------------- 1 | # Travis deploy file 2 | deployed* 3 | 4 | # RStudio and R working files 5 | .Rproj.user 6 | .Rhistory 7 | .RData 8 | .Ruserdata 9 | *.Rproj 10 | vignettes/*_cache/ 11 | vignettes/*_files/ 12 | vignettes/*.R 13 | 14 | # Platform specific compiled files 15 | src/*.gcno 16 | src/*.gcda 17 | src/*.o 18 | src/*.so 19 | src/*.dll 20 | -------------------------------------------------------------------------------- /.github/workflows/comment_master_pr.yml: -------------------------------------------------------------------------------- 1 | name: Tell Users to Re-Target PRs on Master 2 | on: 3 | pull_request: 4 | branches: 5 | - master 6 | 7 | jobs: 8 | comment: 9 | name: Leave Comments 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: actions/checkout@master 13 | - uses: harupy/comment-on-pr@master 14 | env: 15 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 16 | with: 17 | filename: master_pr_msg.md 18 | -------------------------------------------------------------------------------- /.github/workflows/master_pr_msg.md: -------------------------------------------------------------------------------- 1 | Hi! 2 | 3 | Thank you for your Pull Request! 4 | 5 | It looks like you have set the target branch of your PR to `master.` 6 | In this repo, the `master` branch is used for stable versions only and 7 | development is done on the `develop` branch. 8 | 9 | In order to help us most efficiently review and merge your PR, would you 10 | please change the base branch to `develop`? This will activate our 11 | continuous integration testing and make it easier to merge your PR. 12 | 13 | Instructions for changing the base branch can be found at 14 | https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/changing-the-base-branch-of-a-pull-request 15 | 16 | Thanks! 17 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Travis deploy file 2 | deployed* 3 | 4 | # RStudio and R working files 5 | .Rproj.user 6 | .Rhistory 7 | .RData 8 | .Ruserdata 9 | *.Rproj 10 | vignettes/*_cache/ 11 | vignettes/*_files/ 12 | vignettes/*.R 13 | 14 | # Platform specific compiled files 15 | src/*.gcno 16 | src/*.gcda 17 | src/*.o 18 | src/*.so 19 | src/*.dll 20 | -------------------------------------------------------------------------------- /CONTRIBUTORS: -------------------------------------------------------------------------------- 1 | Bug Reports and Other Feedback 2 | ============================== 3 | Justin Bedő (@jbedo) 4 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: clustRviz 2 | Type: Package 3 | Title: Convex Clustering and Bi-Clustering via Algorithmic Regularization 4 | Version: 0.1.0 5 | Authors@R: c( 6 | person("Michael", "Weylandt", role = c("aut", "cre"), email = "michael.weylandt@rice.edu"), 7 | person("John", "Nagorski", role = "aut", email = "jn13@rice.edu"), 8 | person("Genevera", "Allen", role = c("ths", "aut"), email = "gallen@rice.edu"), 9 | person("Lewis", "Brian W.", role = "cph", comment = "R/util_hcs.R"), 10 | person("Daniel", "Englund", role = "aut", email = "dse1@rice.edu"), 11 | person("Yue", "Zhuo", role = "aut", email = "yz154@rice.edu")) 12 | Description: Fast computation and interactive for the convex clustering and 13 | bi-clustering problems. The CARP and CBASS algorithms use an 14 | algorithmic regularization scheme to obtain high-quality global 15 | approximations of the exact regularization paths in a fraction of 16 | the time required for exact solutions. The CARP-VIZ and CBASS-VIZ 17 | variants include a back-tracking scheme to ensure exact dendrogram 18 | recovery. For more details, see "Dynamic Visualization and Fast Computation 19 | for Convex Clustering and Bi-Clustering" by M. Weylandt, J. Nagorski, and G.I. Allen, 20 | ArXiv 1901.01477 . 21 | License: GPL-3 22 | Encoding: UTF-8 23 | LazyData: true 24 | Depends: R (>= 2.10) 25 | Imports: 26 | stats, 27 | graphics, 28 | dplyr, 29 | tibble, 30 | ggplot2, 31 | rlang, 32 | tidyr, 33 | purrr, 34 | Matrix, 35 | stringr, 36 | zoo, 37 | dendextend, 38 | gtools, 39 | ggrepel, 40 | RColorBrewer, 41 | heatmaply, 42 | gganimate, 43 | plotly, 44 | missForest, 45 | grid 46 | LinkingTo: Rcpp, RcppEigen 47 | Suggests: testthat, 48 | knitr, 49 | rmarkdown, 50 | MASS, 51 | covr, 52 | cvxclustr, 53 | cvxbiclustr 54 | VignetteBuilder: knitr 55 | BugReports: https://github.com/DataSlingers/clustRviz/issues 56 | URL: https://github.io/DataSlingers/clustRviz, https://github.com/DataSlingers/clustRviz 57 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method(as.dendrogram,CARP) 4 | S3method(as.dendrogram,CBASS) 5 | S3method(as.hclust,CARP) 6 | S3method(get_cluster_centroids,CARP) 7 | S3method(get_cluster_centroids,CBASS) 8 | S3method(get_cluster_labels,CARP) 9 | S3method(get_cluster_labels,CBASS) 10 | S3method(get_clustered_data,CARP) 11 | S3method(get_clustered_data,CBASS) 12 | S3method(plot,CARP) 13 | S3method(plot,CBASS) 14 | S3method(print,CARP) 15 | S3method(print,CBASS) 16 | S3method(print,ClusteringWeights) 17 | S3method(print,ConvexBiClustering) 18 | S3method(print,ConvexClustering) 19 | S3method(print,RBFWeights) 20 | export(CARP) 21 | export(CBASS) 22 | export(clustRviz_logger_level) 23 | export(clustRviz_options) 24 | export(clustRviz_reset_options) 25 | export(convex_biclustering) 26 | export(convex_clustering) 27 | export(dense_rbf_kernel_weights) 28 | export(get_cluster_centroids) 29 | export(get_cluster_labels) 30 | export(get_clustered_data) 31 | export(sparse_rbf_kernel_weights) 32 | importFrom(Matrix,nnzero) 33 | importFrom(RColorBrewer,brewer.pal) 34 | importFrom(dendextend,as.ggdend) 35 | importFrom(dendextend,color_branches) 36 | importFrom(dendextend,get_nodes_xy) 37 | importFrom(dendextend,set) 38 | importFrom(dplyr,"%>%") 39 | importFrom(dplyr,arrange) 40 | importFrom(dplyr,as_tibble) 41 | importFrom(dplyr,bind_rows) 42 | importFrom(dplyr,desc) 43 | importFrom(dplyr,distinct) 44 | importFrom(dplyr,filter) 45 | importFrom(dplyr,full_join) 46 | importFrom(dplyr,group_by) 47 | importFrom(dplyr,lead) 48 | importFrom(dplyr,left_join) 49 | importFrom(dplyr,mutate) 50 | importFrom(dplyr,n) 51 | importFrom(dplyr,n_distinct) 52 | importFrom(dplyr,pull) 53 | importFrom(dplyr,rename) 54 | importFrom(dplyr,select) 55 | importFrom(dplyr,select_) 56 | importFrom(dplyr,slice) 57 | importFrom(dplyr,summarize) 58 | importFrom(dplyr,tibble) 59 | importFrom(dplyr,ungroup) 60 | importFrom(gganimate,transition_manual) 61 | importFrom(gganimate,transition_time) 62 | importFrom(ggplot2,aes) 63 | importFrom(ggplot2,element_blank) 64 | importFrom(ggplot2,element_text) 65 | importFrom(ggplot2,geom_path) 66 | importFrom(ggplot2,geom_point) 67 | importFrom(ggplot2,geom_segment) 68 | importFrom(ggplot2,geom_text) 69 | importFrom(ggplot2,geom_tile) 70 | importFrom(ggplot2,ggplot) 71 | importFrom(ggplot2,ggplot_build) 72 | importFrom(ggplot2,guides) 73 | importFrom(ggplot2,labs) 74 | importFrom(ggplot2,scale_color_manual) 75 | importFrom(ggplot2,scale_fill_gradient2) 76 | importFrom(ggplot2,scale_x_continuous) 77 | importFrom(ggplot2,scale_y_continuous) 78 | importFrom(ggplot2,theme) 79 | importFrom(ggplot2,xlab) 80 | importFrom(ggplot2,ylab) 81 | importFrom(ggrepel,geom_text_repel) 82 | importFrom(grDevices,adjustcolor) 83 | importFrom(grDevices,colorRampPalette) 84 | importFrom(grid,childNames) 85 | importFrom(grid,convertX) 86 | importFrom(grid,convertY) 87 | importFrom(grid,forceGrob) 88 | importFrom(grid,grid.force) 89 | importFrom(grid,grid.get) 90 | importFrom(heatmaply,heatmaply) 91 | importFrom(heatmaply,heatmapr) 92 | importFrom(missForest,missForest) 93 | importFrom(plotly,add_heatmap) 94 | importFrom(plotly,add_markers) 95 | importFrom(plotly,add_paths) 96 | importFrom(plotly,add_segments) 97 | importFrom(plotly,add_text) 98 | importFrom(plotly,animation_slider) 99 | importFrom(plotly,hide_legend) 100 | importFrom(plotly,highlight) 101 | importFrom(plotly,plot_ly) 102 | importFrom(plotly,style) 103 | importFrom(purrr,map) 104 | importFrom(purrr,map2) 105 | importFrom(rlang,"%||%") 106 | importFrom(rlang,.data) 107 | importFrom(stats,as.dendrogram) 108 | importFrom(stats,as.hclust) 109 | importFrom(stats,cutree) 110 | importFrom(stats,dist) 111 | importFrom(stats,is.leaf) 112 | importFrom(stats,median) 113 | importFrom(stats,na.omit) 114 | importFrom(stats,prcomp) 115 | importFrom(stats,quantile) 116 | importFrom(stats,setNames) 117 | importFrom(stats,var) 118 | importFrom(stringr,str_replace) 119 | importFrom(tibble,as_tibble) 120 | importFrom(tibble,tibble) 121 | importFrom(tidyr,gather) 122 | importFrom(tidyr,nest) 123 | importFrom(tidyr,unnest) 124 | importFrom(utils,data) 125 | importFrom(utils,modifyList) 126 | importFrom(zoo,na.locf) 127 | importMethodsFrom(Matrix,t) 128 | useDynLib(clustRviz) 129 | -------------------------------------------------------------------------------- /R/RcppExports.R: -------------------------------------------------------------------------------- 1 | # Generated by using Rcpp::compileAttributes() -> do not edit by hand 2 | # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 3 | 4 | CARPcpp <- function(X, M, D, weights, epsilon, t, rho = 1, thresh, max_iter = 100000L, max_inner_iter = 2500L, burn_in = 50L, back = 0.5, keep = 10L, viz_max_inner_iter = 15L, viz_initial_step = 1.1, viz_small_step = 1.01, l1 = FALSE, show_progress = TRUE, back_track = FALSE, exact = FALSE) { 5 | .Call('_clustRviz_CARPcpp', PACKAGE = 'clustRviz', X, M, D, weights, epsilon, t, rho, thresh, max_iter, max_inner_iter, burn_in, back, keep, viz_max_inner_iter, viz_initial_step, viz_small_step, l1, show_progress, back_track, exact) 6 | } 7 | 8 | CBASScpp <- function(X, M, D_row, D_col, weights_row, weights_col, epsilon, t, thresh, rho = 1, max_iter = 100000L, max_inner_iter = 2500L, burn_in = 50L, back = 0.5, keep = 10L, viz_max_inner_iter = 15L, viz_initial_step = 1.1, viz_small_step = 1.01, l1 = FALSE, show_progress = TRUE, back_track = FALSE, exact = FALSE) { 9 | .Call('_clustRviz_CBASScpp', PACKAGE = 'clustRviz', X, M, D_row, D_col, weights_row, weights_col, epsilon, t, thresh, rho, max_iter, max_inner_iter, burn_in, back, keep, viz_max_inner_iter, viz_initial_step, viz_small_step, l1, show_progress, back_track, exact) 10 | } 11 | 12 | ConvexClusteringCPP <- function(X, M, D, weights, lambda_grid, rho = 1, thresh, max_iter = 100000L, max_inner_iter = 2500L, l1 = FALSE, show_progress = TRUE) { 13 | .Call('_clustRviz_ConvexClusteringCPP', PACKAGE = 'clustRviz', X, M, D, weights, lambda_grid, rho, thresh, max_iter, max_inner_iter, l1, show_progress) 14 | } 15 | 16 | ConvexBiClusteringCPP <- function(X, M, D_row, D_col, weights_row, weights_col, lambda_grid, rho = 1, thresh, max_iter = 100000L, max_inner_iter = 2500L, l1 = FALSE, show_progress = TRUE) { 17 | .Call('_clustRviz_ConvexBiClusteringCPP', PACKAGE = 'clustRviz', X, M, D_row, D_col, weights_row, weights_col, lambda_grid, rho, thresh, max_iter, max_inner_iter, l1, show_progress) 18 | } 19 | 20 | clustRviz_set_logger_level_cpp <- function(level) { 21 | invisible(.Call('_clustRviz_clustRviz_set_logger_level_cpp', PACKAGE = 'clustRviz', level)) 22 | } 23 | 24 | clustRviz_get_logger_level_cpp <- function() { 25 | .Call('_clustRviz_clustRviz_get_logger_level_cpp', PACKAGE = 'clustRviz') 26 | } 27 | 28 | clustRviz_log_cpp <- function(level, x) { 29 | invisible(.Call('_clustRviz_clustRviz_log_cpp', PACKAGE = 'clustRviz', level, x)) 30 | } 31 | 32 | get_cluster_assignments <- function(E, edge_ind, n) { 33 | .Call('_clustRviz_get_cluster_assignments', PACKAGE = 'clustRviz', E, edge_ind, n) 34 | } 35 | 36 | MatrixRowProx <- function(X, lambda, weights, l1 = TRUE) { 37 | .Call('_clustRviz_MatrixRowProx', PACKAGE = 'clustRviz', X, lambda, weights, l1) 38 | } 39 | 40 | MatrixColProx <- function(X, lambda, weights, l1 = TRUE) { 41 | .Call('_clustRviz_MatrixColProx', PACKAGE = 'clustRviz', X, lambda, weights, l1) 42 | } 43 | 44 | check_weight_matrix <- function(weight_matrix) { 45 | invisible(.Call('_clustRviz_check_weight_matrix', PACKAGE = 'clustRviz', weight_matrix)) 46 | } 47 | 48 | smooth_u_clustering <- function(U_old, cluster_info_list) { 49 | .Call('_clustRviz_smooth_u_clustering', PACKAGE = 'clustRviz', U_old, cluster_info_list) 50 | } 51 | 52 | tensor_projection <- function(X, Y) { 53 | .Call('_clustRviz_tensor_projection', PACKAGE = 'clustRviz', X, Y) 54 | } 55 | 56 | -------------------------------------------------------------------------------- /R/clustRviz-package.R: -------------------------------------------------------------------------------- 1 | #' clustRviz: Efficient Computation and Dynamic Visualizations for Convex Clustering and BiClustering 2 | #' 3 | #' Implementation of the CARP and CBASS algorithms for fast convex clustering. 4 | #' See the package vignettes and accompanying paper for details of the algorithm 5 | #' and comparisons to existing methods. 6 | #' 7 | #' @docType package 8 | #' @name clustRviz 9 | #' @useDynLib clustRviz 10 | NULL 11 | -------------------------------------------------------------------------------- /R/data.R: -------------------------------------------------------------------------------- 1 | #' Log transformed word count of presidential speeches 2 | #' 3 | #' A dataset of the top 75 most variable log-transformed word counts for 4 | #' each US president aggregated over several speeches 5 | #' (Inaugural, State of the Union, etc.). 6 | #' Stop words have been removed and words have been stemmed. 7 | #' 8 | #' @format A data.frame with 44 rows (one for each president) and 75 columns (log transformed word counts) 9 | #' @details Grover Cleveland was elected president twice (1892 and 1884). For our purposes his speeches are combined. 10 | #' @source \url{http://www.presidency.ucsb.edu} 11 | "presidential_speech" 12 | 13 | #' Word Count Data from Four English-Language Authors 14 | #' 15 | #' This data set (\eqn{n=841, p = 69}) consists of counts of common words 16 | #' appearing in texts written by four popular English-language authors 17 | #' (Jane Austen, Jack London, William Shakespeare, and John Milton). 18 | #' The row names are the authors (true cluster labels) and the column 19 | #' names are the words (slightly processed). 20 | "authors" 21 | 22 | #' Log-Transformed Level III RPKM Gene Expression Levels for 438 Breast-Cancer Patients 23 | #' 24 | #' This data set (\eqn{n = 438, p = 353}) contains log-transformed Level III RPKM gene 25 | #' expression levels for 438 breast-cancer patients collected by the Cancer Genome Atlas 26 | #' Network. The Luminal A and Luminal B subtypes have been combined. The row names give the 27 | #' clinically diagnosed subtype (true cluster labels) and the column names are the gene IDs. 28 | #' 29 | #' @references 30 | #' The Cancer Genome Atlas Network. "Comprehensive Molecular Portraits of Human Breast Tumours" 31 | #' Nature 490, p.61-70. 2012. \doi{10.1038/nature11412} 32 | "tcga_breast" 33 | -------------------------------------------------------------------------------- /R/logging.R: -------------------------------------------------------------------------------- 1 | # Logging infrastructure for clustRviz 2 | 3 | ## This must be kept consistent with src/clustRviz_logging.h::ClustRVizLoggerLevel 4 | LEVELS <- c(ERROR = 40, 5 | WARNING = 30, 6 | MESSAGE = 20, 7 | INFO = 10, 8 | DEBUG = 00) 9 | 10 | 11 | #' \code{ClustRViz} Package Logging Functionality 12 | #' 13 | #' Control the global logging level for the \code{clustRviz} package. 14 | #' 15 | #' @importFrom stats setNames 16 | #' @export 17 | #' @param level The desired new log level. Available levels are \itemize{ 18 | #' \item \code{ERROR} - corresponding to \code{base::stop}; 19 | #' \item \code{WARNING} - corresponding to \code{base::warning}; 20 | #' \item \code{MESSAGE} - corresponding to \code{base::message}; 21 | #' \item \code{INFO}; and 22 | #' \item \code{DEBUG.} 23 | #' } If omitted, the log level is not changed (and the current level is still 24 | #' returned invisibly.) See below for details about the different levels. 25 | #' @return The previous log level (invisibly). 26 | #' @rdname clustRviz_logging 27 | #' @aliases clustRviz_logging clustRviz_logger_level 28 | #' @details The \code{clustRviz} package has a multi-level logging system, with a single 29 | #' global log level; (which applies to both \code{R} and \code{C++} level 30 | #' functionality.) the levels are, in decreasing order, \code{ERROR}, 31 | #' \code{WARNING}, \code{MESSAGE} (default), \code{INFO}, \code{DEBUG}. 32 | #' 33 | #' To change the amount of output from the \code{clustRviz} package, the 34 | #' \code{clustRviz_logger_level} function can be used to adjust the global 35 | #' log level. The \code{INFO} and \code{DEBUG} levels can be quite verbose 36 | #' and may significantly slow down the package. 37 | #' @examples 38 | #' # Switch to INFO level and fit somewhat loudly 39 | #' clustRviz_logger_level("INFO") 40 | #' CARP(presidential_speech) 41 | #' 42 | #' # Return to default behavior 43 | #' clustRviz_logger_level("MESSAGE") 44 | #' # Will give a few messages about phases of fitting 45 | #' CARP(presidential_speech) 46 | #' 47 | #' # If we switch to ERRORs only, no progress message 48 | #' clustRviz_logger_level("ERROR") 49 | #' CARP(presidential_speech) 50 | #' 51 | #' # Return to default 52 | #' clustRviz_logger_level("MESSAGE") 53 | #' 54 | #' # Normally this prints messages as the paths are calculated 55 | #' CARP(presidential_speech) 56 | #' # But it can be suppressed using standard R functions 57 | #' suppressMessages(CARP(presidential_speech)) 58 | clustRviz_logger_level <- function(level=c("ERROR", 59 | "WARNING", 60 | "MESSAGE", 61 | "INFO", 62 | "DEBUG")){ 63 | 64 | LEVELS_REV <- setNames(names(LEVELS), LEVELS) 65 | 66 | old_level <- LEVELS_REV[as.character(clustRviz_get_logger_level_cpp())] 67 | names(old_level) <- NULL 68 | 69 | if(!missing(level)){ 70 | level <- match.arg(level); 71 | clustRviz_set_logger_level_cpp(LEVELS[level]) 72 | return(invisible(old_level)) 73 | } 74 | 75 | old_level 76 | } 77 | 78 | crv_error <- function(..., call=TRUE){ 79 | msg <- paste(list(...), collapse="") 80 | 81 | ## Try to add R level calling info 82 | if(identical(call, TRUE)){ 83 | tryCatch({ 84 | msg <- paste0(msg, " (Called from ", as.character(as.list(sys.call(-1))[[1]]), ")") 85 | }, error=function(e){}) 86 | } else if(is.character(call)){ 87 | msg <- paste0(msg, " (Called from ", call, ")") 88 | } 89 | 90 | clustRviz_log_cpp(LEVELS["ERROR"], msg) 91 | } 92 | 93 | crv_warning <- function(..., call=TRUE){ 94 | msg <- paste(list(...), collapse="") 95 | 96 | ## Try to add R level calling info 97 | if(identical(call, TRUE)){ 98 | tryCatch({ 99 | msg <- paste0(msg, " (Called from ", as.character(as.list(sys.call(-1))[[1]]), ")") 100 | }, error=function(e){}) 101 | } else if(is.character(call)){ 102 | msg <- paste0(msg, " (Called from ", call, ")") 103 | } 104 | 105 | clustRviz_log_cpp(LEVELS["WARNING"], msg) 106 | } 107 | 108 | crv_message <- function(...){ 109 | msg <- paste(list(...), collapse="") 110 | clustRviz_log_cpp(LEVELS["MESSAGE"], msg) 111 | } 112 | 113 | crv_info <- function(...){ 114 | msg <- paste(list(...), collapse="") 115 | clustRviz_log_cpp(LEVELS["INFO"], msg) 116 | } 117 | 118 | crv_debug <- function(...){ 119 | msg <- paste(list(...), collapse="") 120 | clustRviz_log_cpp(LEVELS["DEBUG"], msg) 121 | } 122 | -------------------------------------------------------------------------------- /R/options.R: -------------------------------------------------------------------------------- 1 | ## clustRviz options 2 | 3 | clustRviz_default_options <- list(rho = 1.0, 4 | stopping_threshold = 1e-10, 5 | max_iter = as.integer(5e6), 6 | max_inner_iter = 2500L, 7 | burn_in = 50L, 8 | viz_initial_step = 1.1, 9 | viz_small_step = 1.01, 10 | viz_max_inner_iter = 15L, 11 | keep = 10L, 12 | epsilon = 0.000001, 13 | keep_debug_info = FALSE) 14 | 15 | .clustRvizOptionsEnv <- list2env(clustRviz_default_options) 16 | 17 | #' \code{ClustRViz} Options 18 | #' 19 | #' Advanced control of algorithmic options for \code{\link{CARP}} and \code{\link{CBASS}}. 20 | #' The \code{clustRviz_reset_options} function returns options to "factory-fresh" 21 | #' settings. 22 | #' 23 | #' @param ... Options (to be passed by name). See below for available options. 24 | #' 25 | #' @details The following options can be set by name:\itemize{ 26 | #' \item \code{epsilon} The initial step size (fixed during the "burn-in" period) 27 | #' \item \code{stopping_threshold}: Stopping threshold to use for \code{convex_clustering}, 28 | #' \code{convex_biclustering}, \code{CARP(exact = TRUE)}, 29 | #' and \code{CBASS(exact = TRUE)}. By default, 30 | #' this is set to \code{1e-10} - a very conservative 31 | #' threshold: making it larger can significantly 32 | #' improve performance 33 | #' \item \code{max_iter} An integer: the maximum number of iterations to perform 34 | #' \item \code{max_inner_iter} An integer: the maximum number of iterations for 35 | #' the iterative solvers to perform at a single 36 | #' value of \eqn{\lambda}{\lambda}. 37 | #' \item \code{burn_in} An integer: the number of initial iterations at a fixed 38 | #' (small) value of \eqn{\gamma} 39 | #' \item \code{viz_initial_step} The initial (large) step size used in back-tracking 40 | #' (\code{CARP-VIZ} and \code{CBASS-VIZ}) algorithms. 41 | #' \item \code{viz_small_step} The secondary (small) step size used in back-tracking 42 | #' (\code{CARP-VIZ} and \code{CBASS-VIZ}) algorithms. 43 | #' \item \code{viz_max_inner_iter} The maximum number of iterations to perform 44 | #' in the inner loop of back-tracking (\code{CARP-VIZ} 45 | #' and \code{CBASS-VIZ}) algorithms. 46 | #' \item \code{keep} \code{\link{CARP}} and \code{\link{CBASS}} keep every 47 | #' \code{keep}-th iteration even if no fusions are detected. 48 | #' Increasing this parameter may improve performance, at 49 | #' the expense of returning a finer grid. 50 | #' \item \code{rho} For advanced users only (not advisable to change): the penalty 51 | #' parameter used for the augmented Lagrangian. 52 | #' \item \code{keep_debug_info}: Should additional debug info (currently only the V-path) 53 | #' be kept? 54 | #' } 55 | #' @rdname options 56 | #' @export 57 | clustRviz_options <- function(...){ 58 | dots <- list(...) 59 | 60 | if (length(dots) == 0){ 61 | return(as.list(.clustRvizOptionsEnv)) 62 | } 63 | 64 | if (is.list(dots[[1]])) { 65 | dots <- do.call(c, dots) 66 | } 67 | 68 | if ( (is.null(names(dots))) || (any(names(dots) == "")) ){ 69 | crv_error("All arguments to ", sQuote("clustRviz_options"), " must be named.") 70 | } 71 | 72 | known_names <- names(.clustRvizOptionsEnv) 73 | 74 | if ( any(names(dots) %not.in% known_names) ){ 75 | unknown_names <- which(names(dots) %not.in% known_names) 76 | crv_error("Unknown argument ", sQuote(names(dots)[unknown_names[1]]), " passed to ", sQuote("clustRviz_options.")) 77 | } 78 | 79 | old_opts <- as.list(.clustRvizOptionsEnv) 80 | 81 | for(ix in seq_along(dots)){ 82 | nm <- names(dots)[ix] 83 | opt <- dots[[ix]] 84 | 85 | ## Validate 86 | if (nm %in% c("rho", "epsilon", "stopping_threshold")) { 87 | if (!is_positive_scalar(opt)) { 88 | crv_error(sQuote(nm), " must be a positive scalar.") 89 | } 90 | } else if (nm %in% c("viz_initial_step", "viz_small_step")) { 91 | if ( (!is_positive_scalar(opt)) || (opt <= 1) ){ 92 | crv_error(sQuote(nm), " must be greater than one.") 93 | } 94 | } else if (nm %in% c("burn_in", "max_iter", "max_inner_iter", "viz_burn_in", "viz_max_inner_iter", "keep")) { 95 | if (!is_positive_integer_scalar(opt) ){ 96 | crv_error(sQuote(nm), " must be a positive integer.") 97 | } 98 | } else if (nm %in% "keep_debug_info") { 99 | if (!is_logical_scalar(opt)) { 100 | crv_error(sQuote(nm), " must be a logical scalar.") 101 | } 102 | } 103 | 104 | ## Assign 105 | assign(nm, opt, .clustRvizOptionsEnv) 106 | } 107 | 108 | ## Sanity checks 109 | if (.clustRvizOptionsEnv[["burn_in"]] + 100 >= .clustRvizOptionsEnv[["max_iter"]]) { 110 | crv_warning(sQuote("burn_in"), " should typically be at least 100 less than ", sQuote("max_iter.")) 111 | } 112 | 113 | if (.clustRvizOptionsEnv[["viz_small_step"]] >= .clustRvizOptionsEnv[["viz_initial_step"]]) { 114 | crv_warning(sQuote("viz_small_step"), " should be less than ", sQuote("viz_initial_step.")) 115 | } 116 | 117 | if(.clustRvizOptionsEnv[["max_inner_iter"]] >= 0.1 * .clustRvizOptionsEnv[["max_iter"]]) { 118 | crv_warning(sQuote("max_inner_iter"), " should typically be at most a tenth of ", sQuote("max_iter.")) 119 | } 120 | 121 | ## 122 | invisible(old_opts) 123 | } 124 | 125 | #' @rdname options 126 | #' @export 127 | clustRviz_reset_options <- function(){ 128 | do.call(clustRviz_options, clustRviz_default_options) 129 | } 130 | -------------------------------------------------------------------------------- /R/util_hcs.R: -------------------------------------------------------------------------------- 1 | ##### 2 | # These function modify the iorder and hc functions of the 3 | # hclust_in_R package: https://github.com/bwlewis/hclust_in_R 4 | 5 | # Modifications are custom naming of the heirarchical clustering 6 | # method. 7 | # 8 | # Author of Changes: John Nagorski 9 | # Date: 9-11-18 10 | iorder <- function(m) { 11 | N <- nrow(m) + 1 12 | iorder <- rep(0, N) 13 | iorder[1] <- m[N - 1, 1] 14 | iorder[2] <- m[N - 1, 2] 15 | loc <- 2 16 | for (i in seq(N - 2, 1)) 17 | { 18 | for (j in seq(1, loc)) 19 | { 20 | if (iorder[j] == i) { 21 | iorder[j] <- m[i, 1] 22 | if (j == loc) { 23 | loc <- loc + 1 24 | iorder[loc] <- m[i, 2] 25 | } else { 26 | loc <- loc + 1 27 | for (k in seq(loc, j + 2)) iorder[k] <- iorder[k - 1] 28 | iorder[j + 1] <- m[i, 2] 29 | } 30 | } 31 | } 32 | } 33 | -iorder 34 | } 35 | cvxhc <- function(clust.path, gamma.path, labels) { 36 | h <- sort(gamma.path, decreasing = FALSE) 37 | # h = log(h + 1) 38 | h <- h[-1] 39 | N <- length(clust.path) 40 | n <- -(1:N) # Tracks group membership 41 | m <- matrix(0, nrow = N - 1, ncol = 2) # hclust merge output 42 | for (j in seq(1, N - 1)) 43 | { 44 | 45 | # affected observation indicies 46 | old.clustering <- clust.path[[j]] 47 | new.clustering <- clust.path[[j + 1]] 48 | i <- new.clustering[!(new.clustering %in% old.clustering)][[1]] 49 | 50 | i 51 | p <- n[i] 52 | p 53 | n.neg <- sum(p < 0) 54 | n.pos <- sum(p > 0) 55 | n.neg 56 | n.pos 57 | # if n.neg > 0 and n.pos ==0, joining two singletons 58 | if ((n.neg > 0) & (n.pos == 0)) { 59 | i 60 | p <- n[i] 61 | p 62 | } else if ((n.neg > 0) & (n.pos > 0)) { 63 | i <- c(i[p < 0][1], i[p > 0][1]) 64 | i 65 | p <- n[i] 66 | } else if ((n.neg == 0) & (n.pos > 0)) { 67 | tmp.clusts <- unique(p) 68 | i <- c(i[p == tmp.clusts[1]][1], i[p == tmp.clusts[2]][1]) 69 | p <- n[i] 70 | } 71 | p 72 | 73 | # R's convention is to order each m[j,] pair as follows: 74 | p <- p[order(p)] 75 | 76 | m[j, ] <- p 77 | # Agglomerate this pair and all previous groups they belong to 78 | # into the current jth group: 79 | grp <- c(i, which(n %in% n[i[n[i] > 0]])) 80 | n[grp] <- j 81 | } 82 | structure(list( 83 | merge = m, height = h, order = iorder(m), 84 | labels = labels, method = "CVX", 85 | call = match.call(), dist.method = "euclidean" 86 | ), 87 | class = "hclust" 88 | ) 89 | } 90 | -------------------------------------------------------------------------------- /R/util_plot.R: -------------------------------------------------------------------------------- 1 | # Adjust the height of dendrograms 2 | #' @importFrom dendextend color_branches as.ggdend 3 | adjusted_dendrogram <- function(d, rev = FALSE, k, cluster = FALSE, adjust){ 4 | if (cluster == TRUE){ 5 | c <- color_branches(d, k = k) 6 | } else { 7 | c <- d 8 | } 9 | if (rev == TRUE) { 10 | c <- rev(c) 11 | } 12 | dend <- as.ggdend(c) 13 | segs <- dend$segments 14 | 15 | if (cluster == FALSE){ 16 | segs$col <- "black" 17 | } 18 | 19 | segs$y <- segs$y*adjust 20 | segs$yend <- segs$yend*adjust 21 | 22 | return(segs) 23 | } 24 | 25 | # Generate box for dendrograms 26 | #' @importFrom stats as.hclust cutree 27 | dendrogram_box <- function(x, rev = FALSE, k, type, percent, show_memnum = FALSE){ 28 | tree <- as.hclust(x, type = type) 29 | if (rev == TRUE) { 30 | tree <- rev(tree) 31 | } 32 | cluster <- cutree(tree, k = k) 33 | clustab <- table(cluster)[unique(cluster[tree$order])] 34 | clustsum <- cumsum(clustab) 35 | m <- c(0, clustsum) + 0.5 36 | line_x <- c(m[1],m[1],m) 37 | line_y <- c(0,percent,rep(percent,k+1)) 38 | line_xend <- c(m[k+1],m[k+1],m) 39 | line_yend <- c(0,percent,rep(0,k+1)) 40 | lines <- data.frame(x=line_x,y=line_y,xend=line_xend,yend=line_yend) 41 | 42 | if (show_memnum == FALSE){ 43 | return(lines) 44 | } else { 45 | return(list(lines, m)) 46 | } 47 | } 48 | 49 | 50 | # Manually "dodge" the labels for path plots 51 | # from https://stackoverflow.com/questions/45065567/getting-coordinates-for-the-label-locations-from-ggrepel 52 | #' @importFrom ggplot2 ggplot geom_point ggplot_build 53 | #' @importFrom ggrepel geom_text_repel 54 | #' @importFrom grid grid.force grid.get childNames forceGrob 55 | get_ggrepel_coordinates <- function(x, y, label){ 56 | df <- data.frame(x = x, y = y, label = label) 57 | p <- ggplot(data = df, aes(x = x, y = y)) + 58 | geom_text_repel(aes(label = label), size = 3) + 59 | geom_point() 60 | 61 | # Get x and y plot ranges 62 | xrg <- ggplot_build(p)$layout$panel_params[[1]]$x.range 63 | yrg <- ggplot_build(p)$layout$panel_params[[1]]$y.range 64 | 65 | forceGrob(p) 66 | grid.force() 67 | kids <- childNames(grid.get("textrepeltree", grep = TRUE)) 68 | 69 | # Get positions of all ggrepel labels 70 | dts <- do.call(rbind, lapply(kids, get.xy.pos.labs, xrg = xrg, yrg = yrg)) 71 | colnames(dts) <- c("x_adj", "y_adj") 72 | return(cbind(df,dts)) 73 | } 74 | 75 | # get the x and y positions of a single ggrepel label 76 | #' @importFrom grid grid.get convertX convertY 77 | get.xy.pos.labs <- function(n, xrg, yrg) { 78 | grb <- grid.get(n) 79 | data.frame( 80 | x = xrg[1]+diff(xrg)*convertX(grb$x, "native", valueOnly = TRUE), 81 | y = yrg[1]+diff(yrg)*convertY(grb$y, "native", valueOnly = TRUE) 82 | ) 83 | } 84 | -------------------------------------------------------------------------------- /R/zzz.R: -------------------------------------------------------------------------------- 1 | .onAttach <- function(...) { # nocov start 2 | if(interactive()){ 3 | msg <- c("Thank you for using clustRviz!", 4 | "The current logging level is", 5 | sQuote(paste0(clustRviz_logger_level(), ".")), 6 | "To change this, see ?clustRviz_logging.") 7 | 8 | packageStartupMessage(paste(msg, collapse=" ")) 9 | } 10 | } # nocov end 11 | -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: github_document 3 | always_allow_html: true 4 | --- 5 | 6 | 7 | 8 | ```{r, echo = FALSE} 9 | knitr::opts_chunk$set( 10 | collapse = TRUE, 11 | comment = "#>", 12 | fig.path = "man/figures/README-" 13 | ) 14 | ``` 15 | [![GitHub Actions Build Status](https://github.com/DataSlingers/clustRviz/workflows/R-CMD-check and Deploy/badge.svg)](https://github.com/DataSlingers/clustRviz/actions?query=workflow%3A%22R-CMD-check+and+Deploy%22) 16 | [![codecov Coverage Status](https://codecov.io/gh/DataSlingers/clustRviz/branch/develop/graph/badge.svg)](https://codecov.io/gh/DataSlingers/clustRviz/branch/develop) 17 | [![License: GPL v3](https://img.shields.io/badge/License-GPL%20v3-blue.svg)](https://www.gnu.org/licenses/gpl-3.0) 18 | [![CRAN\_Status\_Badge](http://www.r-pkg.org/badges/version/clustRviz)](https://cran.r-project.org/package=clustRviz) 19 | [![Project Status: Active – The project has reached a stable, usable state and is being actively developed.](http://www.repostatus.org/badges/latest/active.svg)](http://www.repostatus.org/#active) 20 | 21 | # clustRviz 22 | 23 | `clustRviz` aims to enable fast computation and easy visualization of Convex Clustering 24 | solution paths. 25 | 26 | ## Installation 27 | 28 | You can install `clustRviz` from github with: 29 | 30 | ```{r gh-installation, eval = FALSE} 31 | # install.packages("devtools") 32 | devtools::install_github("DataSlingers/clustRviz") 33 | ``` 34 | 35 | Note that `RcppEigen` (which `clustRviz` internally) triggers many compiler warnings 36 | (which cannot be suppressed per 37 | [CRAN policies](http://cran.r-project.org/web/packages/policies.html#Source-packages)). 38 | Many of these warnings can be locally suppressed by adding the line `CXX11FLAGS+=-Wno-ignored-attributes` 39 | to your `~/.R/Makevars` file. To install an `R` package from source, you will need 40 | suitable development tools installed including a `C++` compiler and potentially 41 | a Fortran runtime. Details about these toolchains are available on CRAN for 42 | [Windows](https://cran.r-project.org/bin/windows/Rtools/) and [macOS](https://mac.r-project.org/tools/). 43 | 44 | ## Quick-Start 45 | 46 | There are two main entry points to the `clustRviz` package, the `CARP` and `CBASS` 47 | functions, which perform convex clustering and convex biclustering respectively. 48 | We demonstrate the use of these two functions on a text minining data set, 49 | `presidential_speech`, which measures how often the 44 U.S. presidents used certain 50 | words in their public addresses. 51 | 52 | ```{r load_data} 53 | library(clustRviz) 54 | data(presidential_speech) 55 | presidential_speech[1:6, 1:6] 56 | ``` 57 | 58 | ### Clustering 59 | 60 | We begin by clustering this data set, grouping the rows (presidents) into clusters: 61 | 62 | ```{r carp_example} 63 | carp_fit <- CARP(presidential_speech) 64 | print(carp_fit) 65 | ``` 66 | 67 | The algorithmic regularization technique employed by `CARP` makes computation of 68 | the whole solution path almost immediate. 69 | 70 | We can examine the result of `CARP` graphically. We begin with a standard dendrogram, 71 | with three clusters highlighted: 72 | 73 | ```{r carp_dendro} 74 | plot(carp_fit, type = "dendrogram", k = 3) 75 | ``` 76 | 77 | Examing the dendrogram, we see two clear clusters, consisting of pre-WWII and post-WWII 78 | presidents and Warren G. Harding as a possible outlier. Harding is generally considered 79 | one of the worst US presidents of all time, so this is perhaps not too surprising. 80 | 81 | A more interesting visualization is the dynamic path visualization, whereby we can 82 | watch the clusters fuse as the regularization level is increased: 83 | 84 | ```{r carp_dynamic, eval = FALSE} 85 | plot(carp_fit, type = "path", dynamic = TRUE) 86 | ``` 87 | 88 | ### BiClustering 89 | 90 | The use of `CBASS` for convex biclustering is similar, and we demonstrate it here 91 | with a cluster heatmap, with the regularization set to give 3 observation clusters: 92 | 93 | ```{r cbass} 94 | cbass_fit <- CBASS(presidential_speech) 95 | plot(cbass_fit, k.row = 3) 96 | ``` 97 | 98 | By default, plotting the result of CBASS gives the traditional cluster heatmap, 99 | but we can also get the row or column dendrograms as well: 100 | 101 | ```{r cbass_rowdendro} 102 | plot(cbass_fit, type = "row.dendrogram", k.row = 3) 103 | ``` 104 | 105 | By default, if a regularization level is specified, all plotting functions in `clustRviz` 106 | will plot the clustered data. If the regularization level is not specified, the 107 | raw data will be plotted instead: 108 | 109 | ```{r cbass_heatmap2} 110 | plot(cbass_fit, type = "heatmap") 111 | ``` 112 | 113 | More details about the use and mathematical formulation of `CARP` and `CBASS` 114 | may be found in the package documentation. 115 | -------------------------------------------------------------------------------- /_pkgdown.yml: -------------------------------------------------------------------------------- 1 | template: 2 | params: 3 | bootswatch: flatly 4 | 5 | reference: 6 | - title: "CARP - Convex Clustering" 7 | desc: > 8 | CARP (Convex Clustering via Algorithmic 9 | Regularization Paths) is a fast method 10 | for convex clustering. 11 | contents: 12 | - CARP 13 | - plot.CARP 14 | - get_cluster_labels 15 | - print.CARP 16 | - convex_clustering 17 | - title: "CBASS - Convex BiClustering" 18 | desc: > 19 | CBASS (Convex BiClustering via algorithmic 20 | regularization with Small Steps) is a fast 21 | method for convex bi-clustering. 22 | contents: 23 | - CBASS 24 | - plot.CBASS 25 | - get_cluster_labels.CBASS 26 | - print.CBASS 27 | - convex_biclustering 28 | - title: "Weights" 29 | desc: > 30 | Functions to calculate fusion weights 31 | for `CARP` and `CBASS` 32 | contents: 33 | - dense_rbf_kernel_weights 34 | - title: "Miscellaneous" 35 | desc: > 36 | Other functions and data provided 37 | by `clustRviz` 38 | contents: 39 | - presidential_speech 40 | - authors 41 | - tcga_breast 42 | - clustRviz_logging 43 | - clustRviz_options 44 | 45 | articles: 46 | - title: Vignettes 47 | navbar: ~ 48 | contents: 49 | - Tutorial 50 | - Weights 51 | - Algorithms 52 | - Acknowledgements 53 | 54 | nabvar: 55 | right: 56 | - icon: fa-github fa-lg 57 | href: https://github.com/DataSlingers/clustRviz 58 | 59 | development: 60 | mode: unreleased 61 | version_label: info 62 | version_tooltip: "clustRviz is not yet on CRAN" 63 | -------------------------------------------------------------------------------- /clustRviz.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: knitr 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | 18 | BuildType: Package 19 | PackageUseDevtools: Yes 20 | PackageInstallArgs: --no-multiarch --with-keep.source 21 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | comment: false 2 | 3 | coverage: 4 | status: 5 | project: 6 | default: 7 | target: auto 8 | threshold: 1% 9 | patch: 10 | default: 11 | target: auto 12 | threshold: 1% 13 | -------------------------------------------------------------------------------- /data/authors.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataSlingers/clustRviz/0a91ff91571c559579e51431636f195814f032e2/data/authors.rda -------------------------------------------------------------------------------- /data/presidential_speech.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataSlingers/clustRviz/0a91ff91571c559579e51431636f195814f032e2/data/presidential_speech.rda -------------------------------------------------------------------------------- /data/tcga_breast.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataSlingers/clustRviz/0a91ff91571c559579e51431636f195814f032e2/data/tcga_breast.rda -------------------------------------------------------------------------------- /docs/articles/Acknowledgements_files/accessible-code-block-0.0.1/empty-anchor.js: -------------------------------------------------------------------------------- 1 | // Hide empty tag within highlighted CodeBlock for screen reader accessibility (see https://github.com/jgm/pandoc/issues/6352#issuecomment-626106786) --> 2 | // v0.0.1 3 | // Written by JooYoung Seo (jooyoung@psu.edu) and Atsushi Yasumoto on June 1st, 2020. 4 | 5 | document.addEventListener('DOMContentLoaded', function() { 6 | const codeList = document.getElementsByClassName("sourceCode"); 7 | for (var i = 0; i < codeList.length; i++) { 8 | var linkList = codeList[i].getElementsByTagName('a'); 9 | for (var j = 0; j < linkList.length; j++) { 10 | if (linkList[j].innerHTML === "") { 11 | linkList[j].setAttribute('aria-hidden', 'true'); 12 | } 13 | } 14 | } 15 | }); 16 | -------------------------------------------------------------------------------- /docs/articles/Algorithms_files/accessible-code-block-0.0.1/empty-anchor.js: -------------------------------------------------------------------------------- 1 | // Hide empty tag within highlighted CodeBlock for screen reader accessibility (see https://github.com/jgm/pandoc/issues/6352#issuecomment-626106786) --> 2 | // v0.0.1 3 | // Written by JooYoung Seo (jooyoung@psu.edu) and Atsushi Yasumoto on June 1st, 2020. 4 | 5 | document.addEventListener('DOMContentLoaded', function() { 6 | const codeList = document.getElementsByClassName("sourceCode"); 7 | for (var i = 0; i < codeList.length; i++) { 8 | var linkList = codeList[i].getElementsByTagName('a'); 9 | for (var j = 0; j < linkList.length; j++) { 10 | if (linkList[j].innerHTML === "") { 11 | linkList[j].setAttribute('aria-hidden', 'true'); 12 | } 13 | } 14 | } 15 | }); 16 | -------------------------------------------------------------------------------- /docs/articles/Tutorial_files/accessible-code-block-0.0.1/empty-anchor.js: -------------------------------------------------------------------------------- 1 | // Hide empty tag within highlighted CodeBlock for screen reader accessibility (see https://github.com/jgm/pandoc/issues/6352#issuecomment-626106786) --> 2 | // v0.0.1 3 | // Written by JooYoung Seo (jooyoung@psu.edu) and Atsushi Yasumoto on June 1st, 2020. 4 | 5 | document.addEventListener('DOMContentLoaded', function() { 6 | const codeList = document.getElementsByClassName("sourceCode"); 7 | for (var i = 0; i < codeList.length; i++) { 8 | var linkList = codeList[i].getElementsByTagName('a'); 9 | for (var j = 0; j < linkList.length; j++) { 10 | if (linkList[j].innerHTML === "") { 11 | linkList[j].setAttribute('aria-hidden', 'true'); 12 | } 13 | } 14 | } 15 | }); 16 | -------------------------------------------------------------------------------- /docs/articles/Tutorial_files/colourpicker-1.1/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2016 Dean Attali 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | -------------------------------------------------------------------------------- /docs/articles/Tutorial_files/crosstalk-1.1.0.1/css/crosstalk.css: -------------------------------------------------------------------------------- 1 | /* Adjust margins outwards, so column contents line up with the edges of the 2 | parent of container-fluid. */ 3 | .container-fluid.crosstalk-bscols { 4 | margin-left: -30px; 5 | margin-right: -30px; 6 | white-space: normal; 7 | } 8 | 9 | /* But don't adjust the margins outwards if we're directly under the body, 10 | i.e. we were the top-level of something at the console. */ 11 | body > .container-fluid.crosstalk-bscols { 12 | margin-left: auto; 13 | margin-right: auto; 14 | } 15 | 16 | .crosstalk-input-checkboxgroup .crosstalk-options-group .crosstalk-options-column { 17 | display: inline-block; 18 | padding-right: 12px; 19 | vertical-align: top; 20 | } 21 | 22 | @media only screen and (max-width:480px) { 23 | .crosstalk-input-checkboxgroup .crosstalk-options-group .crosstalk-options-column { 24 | display: block; 25 | padding-right: inherit; 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /docs/articles/Tutorial_files/figure-html/unnamed-chunk-11-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataSlingers/clustRviz/0a91ff91571c559579e51431636f195814f032e2/docs/articles/Tutorial_files/figure-html/unnamed-chunk-11-1.png -------------------------------------------------------------------------------- /docs/articles/Tutorial_files/figure-html/unnamed-chunk-12-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataSlingers/clustRviz/0a91ff91571c559579e51431636f195814f032e2/docs/articles/Tutorial_files/figure-html/unnamed-chunk-12-1.png -------------------------------------------------------------------------------- /docs/articles/Tutorial_files/figure-html/unnamed-chunk-13-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataSlingers/clustRviz/0a91ff91571c559579e51431636f195814f032e2/docs/articles/Tutorial_files/figure-html/unnamed-chunk-13-1.png -------------------------------------------------------------------------------- /docs/articles/Tutorial_files/figure-html/unnamed-chunk-14-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataSlingers/clustRviz/0a91ff91571c559579e51431636f195814f032e2/docs/articles/Tutorial_files/figure-html/unnamed-chunk-14-1.png -------------------------------------------------------------------------------- /docs/articles/Tutorial_files/figure-html/unnamed-chunk-15-1.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataSlingers/clustRviz/0a91ff91571c559579e51431636f195814f032e2/docs/articles/Tutorial_files/figure-html/unnamed-chunk-15-1.gif -------------------------------------------------------------------------------- /docs/articles/Tutorial_files/figure-html/unnamed-chunk-16-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataSlingers/clustRviz/0a91ff91571c559579e51431636f195814f032e2/docs/articles/Tutorial_files/figure-html/unnamed-chunk-16-1.png -------------------------------------------------------------------------------- /docs/articles/Tutorial_files/figure-html/unnamed-chunk-17-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataSlingers/clustRviz/0a91ff91571c559579e51431636f195814f032e2/docs/articles/Tutorial_files/figure-html/unnamed-chunk-17-1.png -------------------------------------------------------------------------------- /docs/articles/Tutorial_files/figure-html/unnamed-chunk-18-1.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataSlingers/clustRviz/0a91ff91571c559579e51431636f195814f032e2/docs/articles/Tutorial_files/figure-html/unnamed-chunk-18-1.gif -------------------------------------------------------------------------------- /docs/articles/Tutorial_files/figure-html/unnamed-chunk-19-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataSlingers/clustRviz/0a91ff91571c559579e51431636f195814f032e2/docs/articles/Tutorial_files/figure-html/unnamed-chunk-19-1.png -------------------------------------------------------------------------------- /docs/articles/Tutorial_files/figure-html/unnamed-chunk-22-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataSlingers/clustRviz/0a91ff91571c559579e51431636f195814f032e2/docs/articles/Tutorial_files/figure-html/unnamed-chunk-22-1.png -------------------------------------------------------------------------------- /docs/articles/Tutorial_files/figure-html/unnamed-chunk-23-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataSlingers/clustRviz/0a91ff91571c559579e51431636f195814f032e2/docs/articles/Tutorial_files/figure-html/unnamed-chunk-23-1.png -------------------------------------------------------------------------------- /docs/articles/Tutorial_files/figure-html/unnamed-chunk-24-1.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataSlingers/clustRviz/0a91ff91571c559579e51431636f195814f032e2/docs/articles/Tutorial_files/figure-html/unnamed-chunk-24-1.gif -------------------------------------------------------------------------------- /docs/articles/Tutorial_files/plotly-htmlwidgets-css-1.52.2/plotly-htmlwidgets.css: -------------------------------------------------------------------------------- 1 | /* 2 | just here so that plotly works 3 | correctly with ioslides. 4 | see https://github.com/ropensci/plotly/issues/463 5 | */ 6 | 7 | slide:not(.current) .plotly.html-widget{ 8 | display: none; 9 | } 10 | -------------------------------------------------------------------------------- /docs/articles/Weights_files/accessible-code-block-0.0.1/empty-anchor.js: -------------------------------------------------------------------------------- 1 | // Hide empty tag within highlighted CodeBlock for screen reader accessibility (see https://github.com/jgm/pandoc/issues/6352#issuecomment-626106786) --> 2 | // v0.0.1 3 | // Written by JooYoung Seo (jooyoung@psu.edu) and Atsushi Yasumoto on June 1st, 2020. 4 | 5 | document.addEventListener('DOMContentLoaded', function() { 6 | const codeList = document.getElementsByClassName("sourceCode"); 7 | for (var i = 0; i < codeList.length; i++) { 8 | var linkList = codeList[i].getElementsByTagName('a'); 9 | for (var j = 0; j < linkList.length; j++) { 10 | if (linkList[j].innerHTML === "") { 11 | linkList[j].setAttribute('aria-hidden', 'true'); 12 | } 13 | } 14 | } 15 | }); 16 | -------------------------------------------------------------------------------- /docs/articles/Weights_files/figure-html/unnamed-chunk-15-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataSlingers/clustRviz/0a91ff91571c559579e51431636f195814f032e2/docs/articles/Weights_files/figure-html/unnamed-chunk-15-1.png -------------------------------------------------------------------------------- /docs/articles/Weights_files/figure-html/unnamed-chunk-17-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataSlingers/clustRviz/0a91ff91571c559579e51431636f195814f032e2/docs/articles/Weights_files/figure-html/unnamed-chunk-17-1.png -------------------------------------------------------------------------------- /docs/articles/Weights_files/figure-html/unnamed-chunk-20-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataSlingers/clustRviz/0a91ff91571c559579e51431636f195814f032e2/docs/articles/Weights_files/figure-html/unnamed-chunk-20-1.png -------------------------------------------------------------------------------- /docs/articles/Weights_files/figure-html/unnamed-chunk-3-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataSlingers/clustRviz/0a91ff91571c559579e51431636f195814f032e2/docs/articles/Weights_files/figure-html/unnamed-chunk-3-1.png -------------------------------------------------------------------------------- /docs/articles/Weights_files/figure-html/unnamed-chunk-9-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataSlingers/clustRviz/0a91ff91571c559579e51431636f195814f032e2/docs/articles/Weights_files/figure-html/unnamed-chunk-9-1.png -------------------------------------------------------------------------------- /docs/articles/clustRviz_files/accessible-code-block-0.0.1/empty-anchor.js: -------------------------------------------------------------------------------- 1 | // Hide empty tag within highlighted CodeBlock for screen reader accessibility (see https://github.com/jgm/pandoc/issues/6352#issuecomment-626106786) --> 2 | // v0.0.1 3 | // Written by JooYoung Seo (jooyoung@psu.edu) and Atsushi Yasumoto on June 1st, 2020. 4 | 5 | document.addEventListener('DOMContentLoaded', function() { 6 | const codeList = document.getElementsByClassName("sourceCode"); 7 | for (var i = 0; i < codeList.length; i++) { 8 | var linkList = codeList[i].getElementsByTagName('a'); 9 | for (var j = 0; j < linkList.length; j++) { 10 | if (linkList[j].innerHTML === "") { 11 | linkList[j].setAttribute('aria-hidden', 'true'); 12 | } 13 | } 14 | } 15 | }); 16 | -------------------------------------------------------------------------------- /docs/articles/clustRviz_files/figure-html/unnamed-chunk-10-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataSlingers/clustRviz/0a91ff91571c559579e51431636f195814f032e2/docs/articles/clustRviz_files/figure-html/unnamed-chunk-10-1.png -------------------------------------------------------------------------------- /docs/articles/clustRviz_files/figure-html/unnamed-chunk-11-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataSlingers/clustRviz/0a91ff91571c559579e51431636f195814f032e2/docs/articles/clustRviz_files/figure-html/unnamed-chunk-11-1.png -------------------------------------------------------------------------------- /docs/articles/clustRviz_files/figure-html/unnamed-chunk-12-1.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataSlingers/clustRviz/0a91ff91571c559579e51431636f195814f032e2/docs/articles/clustRviz_files/figure-html/unnamed-chunk-12-1.gif -------------------------------------------------------------------------------- /docs/articles/clustRviz_files/figure-html/unnamed-chunk-13-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataSlingers/clustRviz/0a91ff91571c559579e51431636f195814f032e2/docs/articles/clustRviz_files/figure-html/unnamed-chunk-13-1.png -------------------------------------------------------------------------------- /docs/articles/clustRviz_files/figure-html/unnamed-chunk-2-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataSlingers/clustRviz/0a91ff91571c559579e51431636f195814f032e2/docs/articles/clustRviz_files/figure-html/unnamed-chunk-2-1.png -------------------------------------------------------------------------------- /docs/articles/clustRviz_files/figure-html/unnamed-chunk-3-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataSlingers/clustRviz/0a91ff91571c559579e51431636f195814f032e2/docs/articles/clustRviz_files/figure-html/unnamed-chunk-3-1.png -------------------------------------------------------------------------------- /docs/articles/clustRviz_files/figure-html/unnamed-chunk-4-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataSlingers/clustRviz/0a91ff91571c559579e51431636f195814f032e2/docs/articles/clustRviz_files/figure-html/unnamed-chunk-4-1.png -------------------------------------------------------------------------------- /docs/articles/clustRviz_files/figure-html/unnamed-chunk-5-1.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataSlingers/clustRviz/0a91ff91571c559579e51431636f195814f032e2/docs/articles/clustRviz_files/figure-html/unnamed-chunk-5-1.gif -------------------------------------------------------------------------------- /docs/articles/clustRviz_files/figure-html/unnamed-chunk-6-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataSlingers/clustRviz/0a91ff91571c559579e51431636f195814f032e2/docs/articles/clustRviz_files/figure-html/unnamed-chunk-6-1.png -------------------------------------------------------------------------------- /docs/articles/clustRviz_files/figure-html/unnamed-chunk-8-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataSlingers/clustRviz/0a91ff91571c559579e51431636f195814f032e2/docs/articles/clustRviz_files/figure-html/unnamed-chunk-8-1.png -------------------------------------------------------------------------------- /docs/articles/clustRviz_files/figure-html/unnamed-chunk-9-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataSlingers/clustRviz/0a91ff91571c559579e51431636f195814f032e2/docs/articles/clustRviz_files/figure-html/unnamed-chunk-9-1.png -------------------------------------------------------------------------------- /docs/bootstrap-toc.css: -------------------------------------------------------------------------------- 1 | /*! 2 | * Bootstrap Table of Contents v0.4.1 (http://afeld.github.io/bootstrap-toc/) 3 | * Copyright 2015 Aidan Feldman 4 | * Licensed under MIT (https://github.com/afeld/bootstrap-toc/blob/gh-pages/LICENSE.md) */ 5 | 6 | /* modified from https://github.com/twbs/bootstrap/blob/94b4076dd2efba9af71f0b18d4ee4b163aa9e0dd/docs/assets/css/src/docs.css#L548-L601 */ 7 | 8 | /* All levels of nav */ 9 | nav[data-toggle='toc'] .nav > li > a { 10 | display: block; 11 | padding: 4px 20px; 12 | font-size: 13px; 13 | font-weight: 500; 14 | color: #767676; 15 | } 16 | nav[data-toggle='toc'] .nav > li > a:hover, 17 | nav[data-toggle='toc'] .nav > li > a:focus { 18 | padding-left: 19px; 19 | color: #563d7c; 20 | text-decoration: none; 21 | background-color: transparent; 22 | border-left: 1px solid #563d7c; 23 | } 24 | nav[data-toggle='toc'] .nav > .active > a, 25 | nav[data-toggle='toc'] .nav > .active:hover > a, 26 | nav[data-toggle='toc'] .nav > .active:focus > a { 27 | padding-left: 18px; 28 | font-weight: bold; 29 | color: #563d7c; 30 | background-color: transparent; 31 | border-left: 2px solid #563d7c; 32 | } 33 | 34 | /* Nav: second level (shown on .active) */ 35 | nav[data-toggle='toc'] .nav .nav { 36 | display: none; /* Hide by default, but at >768px, show it */ 37 | padding-bottom: 10px; 38 | } 39 | nav[data-toggle='toc'] .nav .nav > li > a { 40 | padding-top: 1px; 41 | padding-bottom: 1px; 42 | padding-left: 30px; 43 | font-size: 12px; 44 | font-weight: normal; 45 | } 46 | nav[data-toggle='toc'] .nav .nav > li > a:hover, 47 | nav[data-toggle='toc'] .nav .nav > li > a:focus { 48 | padding-left: 29px; 49 | } 50 | nav[data-toggle='toc'] .nav .nav > .active > a, 51 | nav[data-toggle='toc'] .nav .nav > .active:hover > a, 52 | nav[data-toggle='toc'] .nav .nav > .active:focus > a { 53 | padding-left: 28px; 54 | font-weight: 500; 55 | } 56 | 57 | /* from https://github.com/twbs/bootstrap/blob/e38f066d8c203c3e032da0ff23cd2d6098ee2dd6/docs/assets/css/src/docs.css#L631-L634 */ 58 | nav[data-toggle='toc'] .nav > .active > ul { 59 | display: block; 60 | } 61 | -------------------------------------------------------------------------------- /docs/bootstrap-toc.js: -------------------------------------------------------------------------------- 1 | /*! 2 | * Bootstrap Table of Contents v0.4.1 (http://afeld.github.io/bootstrap-toc/) 3 | * Copyright 2015 Aidan Feldman 4 | * Licensed under MIT (https://github.com/afeld/bootstrap-toc/blob/gh-pages/LICENSE.md) */ 5 | (function() { 6 | 'use strict'; 7 | 8 | window.Toc = { 9 | helpers: { 10 | // return all matching elements in the set, or their descendants 11 | findOrFilter: function($el, selector) { 12 | // http://danielnouri.org/notes/2011/03/14/a-jquery-find-that-also-finds-the-root-element/ 13 | // http://stackoverflow.com/a/12731439/358804 14 | var $descendants = $el.find(selector); 15 | return $el.filter(selector).add($descendants).filter(':not([data-toc-skip])'); 16 | }, 17 | 18 | generateUniqueIdBase: function(el) { 19 | var text = $(el).text(); 20 | var anchor = text.trim().toLowerCase().replace(/[^A-Za-z0-9]+/g, '-'); 21 | return anchor || el.tagName.toLowerCase(); 22 | }, 23 | 24 | generateUniqueId: function(el) { 25 | var anchorBase = this.generateUniqueIdBase(el); 26 | for (var i = 0; ; i++) { 27 | var anchor = anchorBase; 28 | if (i > 0) { 29 | // add suffix 30 | anchor += '-' + i; 31 | } 32 | // check if ID already exists 33 | if (!document.getElementById(anchor)) { 34 | return anchor; 35 | } 36 | } 37 | }, 38 | 39 | generateAnchor: function(el) { 40 | if (el.id) { 41 | return el.id; 42 | } else { 43 | var anchor = this.generateUniqueId(el); 44 | el.id = anchor; 45 | return anchor; 46 | } 47 | }, 48 | 49 | createNavList: function() { 50 | return $(''); 51 | }, 52 | 53 | createChildNavList: function($parent) { 54 | var $childList = this.createNavList(); 55 | $parent.append($childList); 56 | return $childList; 57 | }, 58 | 59 | generateNavEl: function(anchor, text) { 60 | var $a = $(''); 61 | $a.attr('href', '#' + anchor); 62 | $a.text(text); 63 | var $li = $('
  • '); 64 | $li.append($a); 65 | return $li; 66 | }, 67 | 68 | generateNavItem: function(headingEl) { 69 | var anchor = this.generateAnchor(headingEl); 70 | var $heading = $(headingEl); 71 | var text = $heading.data('toc-text') || $heading.text(); 72 | return this.generateNavEl(anchor, text); 73 | }, 74 | 75 | // Find the first heading level (`

    `, then `

    `, etc.) that has more than one element. Defaults to 1 (for `

    `). 76 | getTopLevel: function($scope) { 77 | for (var i = 1; i <= 6; i++) { 78 | var $headings = this.findOrFilter($scope, 'h' + i); 79 | if ($headings.length > 1) { 80 | return i; 81 | } 82 | } 83 | 84 | return 1; 85 | }, 86 | 87 | // returns the elements for the top level, and the next below it 88 | getHeadings: function($scope, topLevel) { 89 | var topSelector = 'h' + topLevel; 90 | 91 | var secondaryLevel = topLevel + 1; 92 | var secondarySelector = 'h' + secondaryLevel; 93 | 94 | return this.findOrFilter($scope, topSelector + ',' + secondarySelector); 95 | }, 96 | 97 | getNavLevel: function(el) { 98 | return parseInt(el.tagName.charAt(1), 10); 99 | }, 100 | 101 | populateNav: function($topContext, topLevel, $headings) { 102 | var $context = $topContext; 103 | var $prevNav; 104 | 105 | var helpers = this; 106 | $headings.each(function(i, el) { 107 | var $newNav = helpers.generateNavItem(el); 108 | var navLevel = helpers.getNavLevel(el); 109 | 110 | // determine the proper $context 111 | if (navLevel === topLevel) { 112 | // use top level 113 | $context = $topContext; 114 | } else if ($prevNav && $context === $topContext) { 115 | // create a new level of the tree and switch to it 116 | $context = helpers.createChildNavList($prevNav); 117 | } // else use the current $context 118 | 119 | $context.append($newNav); 120 | 121 | $prevNav = $newNav; 122 | }); 123 | }, 124 | 125 | parseOps: function(arg) { 126 | var opts; 127 | if (arg.jquery) { 128 | opts = { 129 | $nav: arg 130 | }; 131 | } else { 132 | opts = arg; 133 | } 134 | opts.$scope = opts.$scope || $(document.body); 135 | return opts; 136 | } 137 | }, 138 | 139 | // accepts a jQuery object, or an options object 140 | init: function(opts) { 141 | opts = this.helpers.parseOps(opts); 142 | 143 | // ensure that the data attribute is in place for styling 144 | opts.$nav.attr('data-toggle', 'toc'); 145 | 146 | var $topContext = this.helpers.createChildNavList(opts.$nav); 147 | var topLevel = this.helpers.getTopLevel(opts.$scope); 148 | var $headings = this.helpers.getHeadings(opts.$scope, topLevel); 149 | this.helpers.populateNav($topContext, topLevel, $headings); 150 | } 151 | }; 152 | 153 | $(function() { 154 | $('nav[data-toggle="toc"]').each(function(i, el) { 155 | var $nav = $(el); 156 | Toc.init($nav); 157 | }); 158 | }); 159 | })(); 160 | -------------------------------------------------------------------------------- /docs/docsearch.js: -------------------------------------------------------------------------------- 1 | $(function() { 2 | 3 | // register a handler to move the focus to the search bar 4 | // upon pressing shift + "/" (i.e. "?") 5 | $(document).on('keydown', function(e) { 6 | if (e.shiftKey && e.keyCode == 191) { 7 | e.preventDefault(); 8 | $("#search-input").focus(); 9 | } 10 | }); 11 | 12 | $(document).ready(function() { 13 | // do keyword highlighting 14 | /* modified from https://jsfiddle.net/julmot/bL6bb5oo/ */ 15 | var mark = function() { 16 | 17 | var referrer = document.URL ; 18 | var paramKey = "q" ; 19 | 20 | if (referrer.indexOf("?") !== -1) { 21 | var qs = referrer.substr(referrer.indexOf('?') + 1); 22 | var qs_noanchor = qs.split('#')[0]; 23 | var qsa = qs_noanchor.split('&'); 24 | var keyword = ""; 25 | 26 | for (var i = 0; i < qsa.length; i++) { 27 | var currentParam = qsa[i].split('='); 28 | 29 | if (currentParam.length !== 2) { 30 | continue; 31 | } 32 | 33 | if (currentParam[0] == paramKey) { 34 | keyword = decodeURIComponent(currentParam[1].replace(/\+/g, "%20")); 35 | } 36 | } 37 | 38 | if (keyword !== "") { 39 | $(".contents").unmark({ 40 | done: function() { 41 | $(".contents").mark(keyword); 42 | } 43 | }); 44 | } 45 | } 46 | }; 47 | 48 | mark(); 49 | }); 50 | }); 51 | 52 | /* Search term highlighting ------------------------------*/ 53 | 54 | function matchedWords(hit) { 55 | var words = []; 56 | 57 | var hierarchy = hit._highlightResult.hierarchy; 58 | // loop to fetch from lvl0, lvl1, etc. 59 | for (var idx in hierarchy) { 60 | words = words.concat(hierarchy[idx].matchedWords); 61 | } 62 | 63 | var content = hit._highlightResult.content; 64 | if (content) { 65 | words = words.concat(content.matchedWords); 66 | } 67 | 68 | // return unique words 69 | var words_uniq = [...new Set(words)]; 70 | return words_uniq; 71 | } 72 | 73 | function updateHitURL(hit) { 74 | 75 | var words = matchedWords(hit); 76 | var url = ""; 77 | 78 | if (hit.anchor) { 79 | url = hit.url_without_anchor + '?q=' + escape(words.join(" ")) + '#' + hit.anchor; 80 | } else { 81 | url = hit.url + '?q=' + escape(words.join(" ")); 82 | } 83 | 84 | return url; 85 | } 86 | -------------------------------------------------------------------------------- /docs/link.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 8 | 12 | 13 | -------------------------------------------------------------------------------- /docs/pkgdown.js: -------------------------------------------------------------------------------- 1 | /* http://gregfranko.com/blog/jquery-best-practices/ */ 2 | (function($) { 3 | $(function() { 4 | 5 | $('.navbar-fixed-top').headroom(); 6 | 7 | $('body').css('padding-top', $('.navbar').height() + 10); 8 | $(window).resize(function(){ 9 | $('body').css('padding-top', $('.navbar').height() + 10); 10 | }); 11 | 12 | $('[data-toggle="tooltip"]').tooltip(); 13 | 14 | var cur_path = paths(location.pathname); 15 | var links = $("#navbar ul li a"); 16 | var max_length = -1; 17 | var pos = -1; 18 | for (var i = 0; i < links.length; i++) { 19 | if (links[i].getAttribute("href") === "#") 20 | continue; 21 | // Ignore external links 22 | if (links[i].host !== location.host) 23 | continue; 24 | 25 | var nav_path = paths(links[i].pathname); 26 | 27 | var length = prefix_length(nav_path, cur_path); 28 | if (length > max_length) { 29 | max_length = length; 30 | pos = i; 31 | } 32 | } 33 | 34 | // Add class to parent
  • , and enclosing
  • if in dropdown 35 | if (pos >= 0) { 36 | var menu_anchor = $(links[pos]); 37 | menu_anchor.parent().addClass("active"); 38 | menu_anchor.closest("li.dropdown").addClass("active"); 39 | } 40 | }); 41 | 42 | function paths(pathname) { 43 | var pieces = pathname.split("/"); 44 | pieces.shift(); // always starts with / 45 | 46 | var end = pieces[pieces.length - 1]; 47 | if (end === "index.html" || end === "") 48 | pieces.pop(); 49 | return(pieces); 50 | } 51 | 52 | // Returns -1 if not found 53 | function prefix_length(needle, haystack) { 54 | if (needle.length > haystack.length) 55 | return(-1); 56 | 57 | // Special case for length-0 haystack, since for loop won't run 58 | if (haystack.length === 0) { 59 | return(needle.length === 0 ? 0 : -1); 60 | } 61 | 62 | for (var i = 0; i < haystack.length; i++) { 63 | if (needle[i] != haystack[i]) 64 | return(i); 65 | } 66 | 67 | return(haystack.length); 68 | } 69 | 70 | /* Clipboard --------------------------*/ 71 | 72 | function changeTooltipMessage(element, msg) { 73 | var tooltipOriginalTitle=element.getAttribute('data-original-title'); 74 | element.setAttribute('data-original-title', msg); 75 | $(element).tooltip('show'); 76 | element.setAttribute('data-original-title', tooltipOriginalTitle); 77 | } 78 | 79 | if(ClipboardJS.isSupported()) { 80 | $(document).ready(function() { 81 | var copyButton = ""; 82 | 83 | $(".examples, div.sourceCode").addClass("hasCopyButton"); 84 | 85 | // Insert copy buttons: 86 | $(copyButton).prependTo(".hasCopyButton"); 87 | 88 | // Initialize tooltips: 89 | $('.btn-copy-ex').tooltip({container: 'body'}); 90 | 91 | // Initialize clipboard: 92 | var clipboardBtnCopies = new ClipboardJS('[data-clipboard-copy]', { 93 | text: function(trigger) { 94 | return trigger.parentNode.textContent; 95 | } 96 | }); 97 | 98 | clipboardBtnCopies.on('success', function(e) { 99 | changeTooltipMessage(e.trigger, 'Copied!'); 100 | e.clearSelection(); 101 | }); 102 | 103 | clipboardBtnCopies.on('error', function() { 104 | changeTooltipMessage(e.trigger,'Press Ctrl+C or Command+C to copy'); 105 | }); 106 | }); 107 | } 108 | })(window.jQuery || window.$) 109 | -------------------------------------------------------------------------------- /docs/pkgdown.yml: -------------------------------------------------------------------------------- 1 | pandoc: 2.7.3 2 | pkgdown: 1.5.1 3 | pkgdown_sha: ~ 4 | articles: 5 | Acknowledgements: Acknowledgements.html 6 | Algorithms: Algorithms.html 7 | Tutorial: Tutorial.html 8 | Weights: Weights.html 9 | clustRviz: clustRviz.html 10 | last_built: 2020-08-30T20:42Z 11 | 12 | -------------------------------------------------------------------------------- /docs/reference/CARP-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataSlingers/clustRviz/0a91ff91571c559579e51431636f195814f032e2/docs/reference/CARP-1.png -------------------------------------------------------------------------------- /docs/reference/accessors_cbass-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataSlingers/clustRviz/0a91ff91571c559579e51431636f195814f032e2/docs/reference/accessors_cbass-1.png -------------------------------------------------------------------------------- /docs/reference/figures/README-carp_dendro-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataSlingers/clustRviz/0a91ff91571c559579e51431636f195814f032e2/docs/reference/figures/README-carp_dendro-1.png -------------------------------------------------------------------------------- /docs/reference/figures/README-cbass-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataSlingers/clustRviz/0a91ff91571c559579e51431636f195814f032e2/docs/reference/figures/README-cbass-1.png -------------------------------------------------------------------------------- /docs/reference/figures/README-cbass_heatmap2-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataSlingers/clustRviz/0a91ff91571c559579e51431636f195814f032e2/docs/reference/figures/README-cbass_heatmap2-1.png -------------------------------------------------------------------------------- /docs/reference/figures/README-cbass_rowdendro-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataSlingers/clustRviz/0a91ff91571c559579e51431636f195814f032e2/docs/reference/figures/README-cbass_rowdendro-1.png -------------------------------------------------------------------------------- /docs/reference/plot_carp-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataSlingers/clustRviz/0a91ff91571c559579e51431636f195814f032e2/docs/reference/plot_carp-1.png -------------------------------------------------------------------------------- /docs/reference/plot_carp-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataSlingers/clustRviz/0a91ff91571c559579e51431636f195814f032e2/docs/reference/plot_carp-2.png -------------------------------------------------------------------------------- /docs/reference/plot_carp-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataSlingers/clustRviz/0a91ff91571c559579e51431636f195814f032e2/docs/reference/plot_carp-3.png -------------------------------------------------------------------------------- /docs/reference/plot_cbass-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataSlingers/clustRviz/0a91ff91571c559579e51431636f195814f032e2/docs/reference/plot_cbass-1.png -------------------------------------------------------------------------------- /docs/reference/plot_cbass-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataSlingers/clustRviz/0a91ff91571c559579e51431636f195814f032e2/docs/reference/plot_cbass-2.png -------------------------------------------------------------------------------- /docs/reference/plot_cbass-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataSlingers/clustRviz/0a91ff91571c559579e51431636f195814f032e2/docs/reference/plot_cbass-3.png -------------------------------------------------------------------------------- /docs/reference/plot_cbass-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataSlingers/clustRviz/0a91ff91571c559579e51431636f195814f032e2/docs/reference/plot_cbass-4.png -------------------------------------------------------------------------------- /inst/CITATION: -------------------------------------------------------------------------------- 1 | citHeader("To cite clustRviz in publications, please use:") 2 | 3 | citEntry(entry = "ARTICLE", 4 | title = "Dynamic Visualization and Fast Computation for Convex Clustering via Algorithmic Regularization", 5 | author = personList(as.person("Michael Weylandt"), 6 | as.person("John Nagorski"), 7 | as.person("Genevera I. Allen")), 8 | journal = "Journal of Computational and Graphical Statistics", 9 | url = "https://arxiv.org/abs/1901.01477", 10 | year = 2020, 11 | doi = "10.1080/10618600.2019.1629943", 12 | volume = 29, 13 | number = 1, 14 | pages = "87-96", 15 | 16 | textVersion = 17 | paste("Michael Weylandt, John Nagorski, and Genevera I. Allen.", 18 | sQuote("Dynamic Visualization and Fast Computation for Convex Clustering via Algorithmic Regularization,"), 19 | " Journal of Computational and Graphical Statistics 29(1), pp. 87-96. 2020. DOI: 10.1080/10618600.2019.1629943.", 20 | "ArXiv Pre-Print 1901.01477 (https://arxiv.org/abs/1901.01477)") 21 | ) 22 | 23 | citEntry(entry = "INPROCEEDINGS", 24 | title = "Splitting Methods for Convex Bi-Clustering and Co-Clustering", 25 | author = personList(as.person("Michael Weylandt")), 26 | booktitle = "{DSW} 2019: Proceedings of the {IEEE} 2019 Data Science Workshop", 27 | url = "https://arxiv.org/abs/1901.06075", 28 | year = 2019, 29 | doi = "10.1109/DSW.2019.8755599", 30 | pages = "237-242", 31 | publisher = "IEEE", 32 | editor = "George Karypis and George Michailidis and Rebecca Willett", 33 | 34 | textVersion = 35 | paste("Michael Weylandt.", 36 | sQuote("Splitting Methods for Convex Bi-Clustering and Co-Clustering,"), 37 | " DSW 2019: Proceedings of the IEEE 2019 Data Science Workshop, pp. 237-242. 2019. DOI: 10.1109/DSW.2019.8755599.", 38 | "ArXiv Pre-Print 1901.06075 (https://arxiv.org/abs/1901.06075)"), 39 | 40 | header = 41 | "If you use the CBASS() or convex_biclustering() functions, please also cite:" 42 | ) 43 | 44 | citFooter("For more details and additional references, see the papers and the package vignettes. Thank you for using clustRviz!") 45 | -------------------------------------------------------------------------------- /man/CARP.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/carp.R 3 | \name{CARP} 4 | \alias{CARP} 5 | \title{Compute \code{CARP} (Convex Clustering) Solution Path} 6 | \usage{ 7 | CARP( 8 | X, 9 | ..., 10 | weights = sparse_rbf_kernel_weights(k = "auto", phi = "auto", dist.method = 11 | "euclidean", p = 2), 12 | labels = rownames(X), 13 | X.center = TRUE, 14 | X.scale = FALSE, 15 | back_track = FALSE, 16 | exact = FALSE, 17 | norm = 2, 18 | t = 1.05, 19 | npcs = min(4L, NCOL(X), NROW(X)), 20 | dendrogram.scale = NULL, 21 | impute_func = function(X) { if (anyNA(X)) missForest(X)$ximp else X 22 | }, 23 | status = (interactive() && (clustRviz_logger_level() \%in\% c("MESSAGE", "WARNING", 24 | "ERROR"))) 25 | ) 26 | } 27 | \arguments{ 28 | \item{X}{The data matrix (\eqn{X \in R^{n \times p}}{X}): rows correspond to 29 | the observations (to be clustered) and columns to the variables (which 30 | will not be clustered). If \code{X} has missing values - \code{NA} or 31 | \code{NaN} values - they will be automatically imputed.} 32 | 33 | \item{...}{Unused arguements. An error will be thrown if any unrecognized 34 | arguments as given. All arguments other than \code{X} must be given 35 | by name.} 36 | 37 | \item{weights}{One of the following: \itemize{ 38 | \item A function which, when called with argument \code{X}, 39 | returns an b-by-n matrix of fusion weights. 40 | \item A matrix of size n-by-n containing fusion weights 41 | }} 42 | 43 | \item{labels}{A character vector of length \eqn{n}: observations (row) labels} 44 | 45 | \item{X.center}{A logical: Should \code{X} be centered columnwise?} 46 | 47 | \item{X.scale}{A logical: Should \code{X} be scaled columnwise?} 48 | 49 | \item{back_track}{A logical: Should back-tracking be used to exactly identify fusions? 50 | By default, back-tracking is not used.} 51 | 52 | \item{exact}{A logical: Should the exact solution be computed using an iterative algorithm? 53 | By default, algorithmic regularization is applied and the exact solution 54 | is not computed. Setting \code{exact = TRUE} often significantly increases 55 | computation time.} 56 | 57 | \item{norm}{Which norm to use in the fusion penalty? Currently only \code{1} 58 | and \code{2} (default) are supported.} 59 | 60 | \item{t}{A number greater than 1: the size of the multiplicative update to 61 | the cluster fusion regularization parameter (not used by 62 | back-tracking variants). Typically on the scale of \code{1.005} to \code{1.1}.} 63 | 64 | \item{npcs}{An integer >= 2. The number of principal components to compute 65 | for path visualization.} 66 | 67 | \item{dendrogram.scale}{A character string denoting how the scale of dendrogram 68 | regularization proportions should be visualized. 69 | Choices are \code{'original'} or \code{'log'}; if not 70 | provided, a data-driven heuristic choice is used.} 71 | 72 | \item{impute_func}{A function used to impute missing data in \code{X}. By default, 73 | the \code{\link[missForest]{missForest}} function from the 74 | package of the same name is used. This provides a flexible 75 | potentially non-linear imputation function. This function 76 | has to return a data matrix with no \code{NA} values. 77 | Note that, consistent with base \code{R}, both \code{NaN} 78 | and \code{NA} are treaded as "missing values" for imputation.} 79 | 80 | \item{status}{Should a status message be printed to the console?} 81 | } 82 | \value{ 83 | An object of class \code{CARP} containing the following elements (among others): 84 | \itemize{ 85 | \item \code{X}: the original data matrix 86 | \item \code{n}: the number of observations (rows of \code{X}) 87 | \item \code{p}: the number of variables (columns of \code{X}) 88 | \item \code{alg.type}: the \code{CARP} variant used 89 | \item \code{X.center}: a logical indicating whether \code{X} was centered 90 | column-wise before clustering 91 | \item \code{X.scale}: a logical indicating whether \code{X} was scaled 92 | column-wise before centering 93 | \item \code{weight_type}: a record of the scheme used to create 94 | fusion weights 95 | } 96 | } 97 | \description{ 98 | \code{CARP} returns a fast approximation to the Convex Clustering 99 | solution path along with visualizations such as dendrograms and 100 | cluster paths. \code{CARP} solves the Convex Clustering problem via an efficient 101 | Algorithmic Regularization scheme. 102 | } 103 | \examples{ 104 | carp_fit <- CARP(presidential_speech[1:10,1:4]) 105 | print(carp_fit) 106 | plot(carp_fit) 107 | } 108 | -------------------------------------------------------------------------------- /man/CBASS.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cbass.R 3 | \name{CBASS} 4 | \alias{CBASS} 5 | \title{Compute \code{CBASS} (Convex BiClustering) Solution Path} 6 | \usage{ 7 | CBASS( 8 | X, 9 | ..., 10 | row_weights = sparse_rbf_kernel_weights(k = "auto", phi = "auto", dist.method = 11 | "euclidean", p = 2), 12 | col_weights = sparse_rbf_kernel_weights(k = "auto", phi = "auto", dist.method = 13 | "euclidean", p = 2), 14 | row_labels = rownames(X), 15 | col_labels = colnames(X), 16 | X.center.global = TRUE, 17 | t = 1.01, 18 | back_track = FALSE, 19 | exact = FALSE, 20 | norm = 2, 21 | npcs = min(4L, NCOL(X), NROW(X)), 22 | dendrogram.scale = NULL, 23 | status = (interactive() && (clustRviz_logger_level() \%in\% c("MESSAGE", "WARNING", 24 | "ERROR"))) 25 | ) 26 | } 27 | \arguments{ 28 | \item{X}{The data matrix (\eqn{X \in R^{n \times p}}{X}). 29 | If \code{X} has missing values - \code{NA} or 30 | \code{NaN} values - they will be automatically imputed.} 31 | 32 | \item{...}{Unused arguements. An error will be thrown if any unrecognized 33 | arguments as given.} 34 | 35 | \item{row_weights}{One of the following: \itemize{ 36 | \item A function which, when called with argument \code{X}, 37 | returns a n-by-n matrix of fusion weights. 38 | \item A matrix of size n-by-n containing fusion weights 39 | } 40 | Note that the weights will be renormalized to sum to 41 | \eqn{1/\sqrt{n}} internally.} 42 | 43 | \item{col_weights}{One of the following: \itemize{ 44 | \item A function which, when called with argument \code{t(X)}, 45 | returns a p-by-p matrix of fusion weights. (Note the 46 | transpose.) 47 | \item A matrix of size p-by-p containing fusion weights 48 | } 49 | Note that the weights will be renormalized to sum to 50 | \eqn{1/\sqrt{p}} internally.} 51 | 52 | \item{row_labels}{A character vector of length \eqn{n}: row (observation) labels} 53 | 54 | \item{col_labels}{A character vector of length \eqn{p}: column (variable) labels} 55 | 56 | \item{X.center.global}{A logical: Should \code{X} be centered globally? 57 | \emph{I.e.}, should the global mean of \code{X} be subtracted?} 58 | 59 | \item{t}{A number greater than 1: the size of the multiplicative update to 60 | the cluster fusion regularization parameter (not used by 61 | back-tracking variants). Typically on the scale of \code{1.005} to \code{1.1}.} 62 | 63 | \item{back_track}{A logical: Should back-tracking be used to exactly identify fusions? 64 | By default, back-tracking is not used.} 65 | 66 | \item{exact}{A logical: Should the exact solution be computed using an iterative algorithm? 67 | By default, algorithmic regularization is applied and the exact solution 68 | is not computed. Setting \code{exact = TRUE} often significantly increases 69 | computation time.} 70 | 71 | \item{norm}{Which norm to use in the fusion penalty? Currently only \code{1} 72 | and \code{2} (default) are supported.} 73 | 74 | \item{npcs}{An integer >= 2. The number of principal components to compute 75 | for path visualization.} 76 | 77 | \item{dendrogram.scale}{A character string denoting how the scale of dendrogram 78 | regularization proportions should be visualized. 79 | Choices are \code{'original'} or \code{'log'}; if not 80 | provided, a data-driven heuristic choice is used.} 81 | 82 | \item{status}{Should a status message be printed to the console?} 83 | } 84 | \value{ 85 | An object of class \code{CBASS} containing the following elements (among others): 86 | \itemize{ 87 | \item \code{X}: the original data matrix 88 | \item \code{n}: the number of observations (rows of \code{X}) 89 | \item \code{p}: the number of variables (columns of \code{X}) 90 | \item \code{alg.type}: the \code{CBASS} variant used 91 | \item \code{row_fusions}: A record of row fusions - see the documentation 92 | of \code{\link{CARP}} for details of what this 93 | may include. 94 | \item \code{col_fusions}: A record of column fusions - see the documentation 95 | of \code{\link{CARP}} for details of what this 96 | may include. 97 | } 98 | } 99 | \description{ 100 | \code{CBASS} returns a fast approximation to the Convex BiClustering 101 | solution path along with visualizations such as dendrograms and 102 | heatmaps. \code{CBASS} solves the Convex Biclustering problem using an efficient 103 | Algorithmic Regularization scheme. 104 | } 105 | \examples{ 106 | \dontrun{ 107 | cbass_fit <- CBASS(presidential_speech) 108 | print(cbass_fit) 109 | plot(cbass_fit) 110 | } 111 | } 112 | -------------------------------------------------------------------------------- /man/accessors_carp.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/accessors_carp.R 3 | \name{get_cluster_labels} 4 | \alias{get_cluster_labels} 5 | \alias{get_cluster_labels.CARP} 6 | \alias{get_cluster_centroids} 7 | \alias{get_cluster_centroids.CARP} 8 | \alias{get_clustered_data} 9 | \alias{get_clustered_data.CARP} 10 | \title{Get Clustering Results for \code{CARP}} 11 | \usage{ 12 | get_cluster_labels(x, ...) 13 | 14 | \method{get_cluster_labels}{CARP}(x, ..., percent, k) 15 | 16 | get_cluster_centroids(x, ...) 17 | 18 | \method{get_cluster_centroids}{CARP}(x, ..., percent, k, refit = TRUE) 19 | 20 | get_clustered_data(x, ...) 21 | 22 | \method{get_clustered_data}{CARP}(x, ..., percent, k, refit = TRUE) 23 | } 24 | \arguments{ 25 | \item{x}{An object of class \code{CARP} as produced by \code{\link{CARP}}} 26 | 27 | \item{...}{Additional arguments - if any are provided, an error is signalled.} 28 | 29 | \item{percent}{A number between 0 and 1, giving the regularization level (as 30 | a fraction of the final regularization level used) at which to 31 | get cluster labels.} 32 | 33 | \item{k}{The desired number of clusters. If no iteration with exactly this 34 | many clusters is found, the first iterate with fewer than \code{k} 35 | clusters is used.} 36 | 37 | \item{refit}{Should "naive" centroids (\code{TRUE}) or the actual centroids 38 | estimated by convex clustering be used? The default (\code{refit = TRUE}) 39 | centroids returned are actual centroids (mean) of all elements 40 | assigned to that cluster; if \code{refit = FALSE}, the \eqn{\hat{U}} 41 | from the convex clustering problem is used. Due to the global 42 | shrinkage imposed, these clusters are more "shrunk together" than 43 | the naive clusters.} 44 | } 45 | \description{ 46 | \code{get_cluster_labels} returns a factor vector of cluster labels. 47 | \code{get_clustered_data} returns a matrix (with the same dimensions and names 48 | as the original data), but with the values for each observation replaced by 49 | its "estimated" value (\emph{i.e.}, the appropriate cluster centroid). 50 | \code{get_cluster_centroids} returns a \code{k}-by-\code{p} matrix of cluster 51 | centroids, with the same column names as the original data. 52 | } 53 | \details{ 54 | \code{get_clustered_data} and \code{get_cluster_centroids} return 55 | centroids on the original scale of the data, independent of any pre-processing 56 | flags passed to \code{CARP}. Note that exactly one of \code{percent} and 57 | \code{k} must be supplied to each function. 58 | } 59 | \examples{ 60 | carp_fit <- CARP(presidential_speech) 61 | 62 | # Get clustering results from 50\% along the path 63 | get_cluster_labels(carp_fit, percent = 0.5) 64 | 65 | # Get labels corresponding to the 3 cluster solution 66 | get_cluster_labels(carp_fit, k = 3) 67 | 68 | # Get 3 cluster centroids 69 | get_cluster_centroids(carp_fit, k = 3) 70 | 71 | # Get the clustered estimates for k = 3 clusters 72 | get_clustered_data(carp_fit, k = 3) 73 | } 74 | -------------------------------------------------------------------------------- /man/accessors_cbass.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/accessors_cbass.R 3 | \name{get_cluster_labels.CBASS} 4 | \alias{get_cluster_labels.CBASS} 5 | \alias{get_cluster_centroids.CBASS} 6 | \alias{get_clustered_data.CBASS} 7 | \title{Get Clustering Results for \code{CBASS}} 8 | \usage{ 9 | \method{get_cluster_labels}{CBASS}(x, ..., percent, k.row, k.col, type = c("row", "col")) 10 | 11 | \method{get_cluster_centroids}{CBASS}(x, ..., percent, k.row, k.col, refit = TRUE) 12 | 13 | \method{get_clustered_data}{CBASS}(x, ..., percent, k.row, k.col, refit = TRUE) 14 | } 15 | \arguments{ 16 | \item{x}{An object of class \code{CARP} as produced by \code{\link{CBASS}}} 17 | 18 | \item{...}{Additional arguments - if any are provided, an error is signalled.} 19 | 20 | \item{percent}{A number between 0 and 1, giving the regularization level (as 21 | a fraction of the final regularization level used) at which to 22 | get cluster labels.} 23 | 24 | \item{k.row}{The desired number of row clusters} 25 | 26 | \item{k.col}{The desired number of column clusters} 27 | 28 | \item{type}{For \code{get_cluster_labels}, which set of labels to return - 29 | row (observation) or column (feature)} 30 | 31 | \item{refit}{Should "naive" centroids (\code{TRUE}) or the actual centroids 32 | estimated by convex clustering be used? The default (\code{refit = TRUE}) 33 | centroids returned are actual centroids (mean) of all elements 34 | assigned to that cluster; if \code{refit = FALSE}, the \eqn{\hat{U}} 35 | from the convex biclustering problem is used. Due to the global 36 | shrinkage imposed, these clusters are more "shrunk together" than 37 | the naive clusters.} 38 | } 39 | \description{ 40 | \code{get_cluster_labels} returns a factor vector of cluster labels. 41 | \code{get_cluster_centroids} returns a \code{k1}-by-\code{k2} matrix with the 42 | estimated centroid of the \code{k1}-th row cluster and the \code{k2}-th 43 | column cluster. 44 | \code{get_clustered_data} returns a matrix (with the same dimensions and names 45 | as the original data), but with the values for each row replaced by 46 | its "estimated" value (\emph{i.e.}, the appropriate cluster centroid). 47 | } 48 | \details{ 49 | \code{get_clustered_data} returns centroids on the original scale of 50 | the data, independent of any pre-processing flags passed to \code{CBASS}. 51 | Note that exactly one of \code{percent}, \code{k.row}, \code{k.col} 52 | must be supplied and that that \code{k.row} (if suppplied) will be 53 | used even if \code{type = "col"} and \emph{vice versa}. 54 | } 55 | \examples{ 56 | cbass_fit <- CBASS(presidential_speech) 57 | 58 | # Get row clustering results from 50\% along the path 59 | get_cluster_labels(cbass_fit, percent = 0.5) 60 | 61 | # Get column clustering corresponding to the 3 cluster solution 62 | get_cluster_labels(cbass_fit, k.col = 3, type = "col") 63 | 64 | # Get row clustering corresponding to the 3 column clusters 65 | get_cluster_labels(cbass_fit, k.col = 3, type = "row") 66 | 67 | # Get cluster centroids partially down the path 68 | get_cluster_centroids(cbass_fit, percent = 0.5) 69 | 70 | # Get clustered data 71 | image(get_clustered_data(cbass_fit, k.row = 2)) 72 | } 73 | -------------------------------------------------------------------------------- /man/authors.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{authors} 5 | \alias{authors} 6 | \title{Word Count Data from Four English-Language Authors} 7 | \format{ 8 | An object of class \code{matrix} (inherits from \code{array}) with 841 rows and 69 columns. 9 | } 10 | \usage{ 11 | authors 12 | } 13 | \description{ 14 | This data set (\eqn{n=841, p = 69}) consists of counts of common words 15 | appearing in texts written by four popular English-language authors 16 | (Jane Austen, Jack London, William Shakespeare, and John Milton). 17 | The row names are the authors (true cluster labels) and the column 18 | names are the words (slightly processed). 19 | } 20 | \keyword{datasets} 21 | -------------------------------------------------------------------------------- /man/clustRviz.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/clustRviz-package.R 3 | \docType{package} 4 | \name{clustRviz} 5 | \alias{clustRviz} 6 | \title{clustRviz: Efficient Computation and Dynamic Visualizations for Convex Clustering and BiClustering} 7 | \description{ 8 | Implementation of the CARP and CBASS algorithms for fast convex clustering. 9 | See the package vignettes and accompanying paper for details of the algorithm 10 | and comparisons to existing methods. 11 | } 12 | -------------------------------------------------------------------------------- /man/clustRviz_logging.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/logging.R 3 | \name{clustRviz_logger_level} 4 | \alias{clustRviz_logger_level} 5 | \alias{clustRviz_logging} 6 | \title{\code{ClustRViz} Package Logging Functionality} 7 | \usage{ 8 | clustRviz_logger_level( 9 | level = c("ERROR", "WARNING", "MESSAGE", "INFO", "DEBUG") 10 | ) 11 | } 12 | \arguments{ 13 | \item{level}{The desired new log level. Available levels are \itemize{ 14 | \item \code{ERROR} - corresponding to \code{base::stop}; 15 | \item \code{WARNING} - corresponding to \code{base::warning}; 16 | \item \code{MESSAGE} - corresponding to \code{base::message}; 17 | \item \code{INFO}; and 18 | \item \code{DEBUG.} 19 | } If omitted, the log level is not changed (and the current level is still 20 | returned invisibly.) See below for details about the different levels.} 21 | } 22 | \value{ 23 | The previous log level (invisibly). 24 | } 25 | \description{ 26 | Control the global logging level for the \code{clustRviz} package. 27 | } 28 | \details{ 29 | The \code{clustRviz} package has a multi-level logging system, with a single 30 | global log level; (which applies to both \code{R} and \code{C++} level 31 | functionality.) the levels are, in decreasing order, \code{ERROR}, 32 | \code{WARNING}, \code{MESSAGE} (default), \code{INFO}, \code{DEBUG}. 33 | 34 | To change the amount of output from the \code{clustRviz} package, the 35 | \code{clustRviz_logger_level} function can be used to adjust the global 36 | log level. The \code{INFO} and \code{DEBUG} levels can be quite verbose 37 | and may significantly slow down the package. 38 | } 39 | \examples{ 40 | # Switch to INFO level and fit somewhat loudly 41 | clustRviz_logger_level("INFO") 42 | CARP(presidential_speech) 43 | 44 | # Return to default behavior 45 | clustRviz_logger_level("MESSAGE") 46 | # Will give a few messages about phases of fitting 47 | CARP(presidential_speech) 48 | 49 | # If we switch to ERRORs only, no progress message 50 | clustRviz_logger_level("ERROR") 51 | CARP(presidential_speech) 52 | 53 | # Return to default 54 | clustRviz_logger_level("MESSAGE") 55 | 56 | # Normally this prints messages as the paths are calculated 57 | CARP(presidential_speech) 58 | # But it can be suppressed using standard R functions 59 | suppressMessages(CARP(presidential_speech)) 60 | } 61 | -------------------------------------------------------------------------------- /man/convex_biclustering.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/solvers.R 3 | \name{convex_biclustering} 4 | \alias{convex_biclustering} 5 | \title{Compute Convex BiClustering Solution Path on a User-Specified Grid} 6 | \usage{ 7 | convex_biclustering( 8 | X, 9 | ..., 10 | lambda_grid, 11 | row_weights = sparse_rbf_kernel_weights(k = "auto", phi = "auto", dist.method = 12 | "euclidean", p = 2), 13 | col_weights = sparse_rbf_kernel_weights(k = "auto", phi = "auto", dist.method = 14 | "euclidean", p = 2), 15 | X.center.global = TRUE, 16 | norm = 2, 17 | status = (interactive() && (clustRviz_logger_level() \%in\% c("MESSAGE", "WARNING", 18 | "ERROR"))) 19 | ) 20 | } 21 | \arguments{ 22 | \item{X}{The data matrix (\eqn{X \in R^{n \times p}}{X}). 23 | If \code{X} has missing values - \code{NA} or 24 | \code{NaN} values - they will be automatically imputed.} 25 | 26 | \item{...}{Unused arguements. An error will be thrown if any unrecognized 27 | arguments as given.} 28 | 29 | \item{lambda_grid}{A user-supplied set of \eqn{\lambda}{lambda} values at which 30 | to solve the convex biclustering problem. These must be strictly 31 | positive values and will be automatically sorted internally.} 32 | 33 | \item{row_weights}{One of the following: \itemize{ 34 | \item A function which, when called with argument \code{X}, 35 | returns a n-by-n matrix of fusion weights. 36 | \item A matrix of size n-by-n containing fusion weights 37 | } 38 | Note that the weights will be renormalized to sum to 39 | \eqn{1/\sqrt{n}} internally.} 40 | 41 | \item{col_weights}{One of the following: \itemize{ 42 | \item A function which, when called with argument \code{t(X)}, 43 | returns a p-by-p matrix of fusion weights. (Note the 44 | transpose.) 45 | \item A matrix of size p-by-p containing fusion weights 46 | } 47 | Note that the weights will be renormalized to sum to 48 | \eqn{1/\sqrt{p}} internally.} 49 | 50 | \item{X.center.global}{A logical: Should \code{X} be centered globally? 51 | \emph{I.e.}, should the global mean of \code{X} be subtracted?} 52 | 53 | \item{norm}{Which norm to use in the fusion penalty? Currently only \code{1} 54 | and \code{2} (default) are supported.} 55 | 56 | \item{status}{Should a status message be printed to the console?} 57 | } 58 | \value{ 59 | An object of class \code{convex_biclustering} containing the 60 | following elements (among others): 61 | \itemize{ 62 | \item \code{X}: the original data matrix 63 | \item \code{n}: the number of observations (rows of \code{X}) 64 | \item \code{p}: the number of variables (columns of \code{X}) 65 | \item \code{U}: a tensor (3-array) of clustering solutions 66 | } 67 | } 68 | \description{ 69 | \code{convex_biclustering} calculates the convex biclustering solution path 70 | at a user-specified grid of lambda values (or just a single value). It is, 71 | in general, difficult to know a useful set of lambda values \emph{a priori}, 72 | so this function is more useful for timing comparisons and methodological 73 | research than applied work. 74 | } 75 | \details{ 76 | Compared to the \code{\link{CBASS}} function, the returned object 77 | is much more "bare-bones," containing only the estimated \eqn{U} matrices, 78 | and no information used for dendrogram or path visualizations. 79 | } 80 | \examples{ 81 | \dontrun{ 82 | biclustering_fit <- convex_biclustering(presidential_speech, lambda_grid = 1:100) 83 | print(biclustering_fit) 84 | } 85 | } 86 | -------------------------------------------------------------------------------- /man/convex_clustering.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/solvers.R 3 | \name{convex_clustering} 4 | \alias{convex_clustering} 5 | \title{Compute Convex Clustering Solution Path on a User-Specified Grid} 6 | \usage{ 7 | convex_clustering( 8 | X, 9 | ..., 10 | lambda_grid, 11 | weights = sparse_rbf_kernel_weights(k = "auto", phi = "auto", dist.method = 12 | "euclidean", p = 2), 13 | X.center = TRUE, 14 | X.scale = FALSE, 15 | norm = 2, 16 | impute_func = function(X) { if (anyNA(X)) missForest(X)$ximp else X 17 | }, 18 | status = (interactive() && (clustRviz_logger_level() \%in\% c("MESSAGE", "WARNING", 19 | "ERROR"))) 20 | ) 21 | } 22 | \arguments{ 23 | \item{X}{The data matrix (\eqn{X \in R^{n \times p}}{X}): rows correspond to 24 | the observations (to be clustered) and columns to the variables (which 25 | will not be clustered). If \code{X} has missing values - \code{NA} or 26 | \code{NaN} values - they will be automatically imputed.} 27 | 28 | \item{...}{Unused arguements. An error will be thrown if any unrecognized 29 | arguments as given. All arguments other than \code{X} must be given 30 | by name.} 31 | 32 | \item{lambda_grid}{A user-supplied set of \eqn{\lambda}{lambda} values at which 33 | to solve the convex clustering problem. These must be strictly 34 | positive values and will be automatically sorted internally.} 35 | 36 | \item{weights}{One of the following: \itemize{ 37 | \item A function which, when called with argument \code{X}, 38 | returns an b-by-n matrix of fusion weights. 39 | \item A matrix of size n-by-n containing fusion weights 40 | }} 41 | 42 | \item{X.center}{A logical: Should \code{X} be centered columnwise?} 43 | 44 | \item{X.scale}{A logical: Should \code{X} be scaled columnwise?} 45 | 46 | \item{norm}{Which norm to use in the fusion penalty? Currently only \code{1} 47 | and \code{2} (default) are supported.} 48 | 49 | \item{impute_func}{A function used to impute missing data in \code{X}. By default, 50 | the \code{\link[missForest]{missForest}} function from the 51 | package of the same name is used. This provides a flexible 52 | potentially non-linear imputation function. This function 53 | has to return a data matrix with no \code{NA} values. 54 | Note that, consistent with base \code{R}, both \code{NaN} 55 | and \code{NA} are treaded as "missing values" for imputation.} 56 | 57 | \item{status}{Should a status message be printed to the console?} 58 | } 59 | \value{ 60 | An object of class \code{convex_clustering} containing the following elements (among others): 61 | \itemize{ 62 | \item \code{X}: the original data matrix 63 | \item \code{n}: the number of observations (rows of \code{X}) 64 | \item \code{p}: the number of variables (columns of \code{X}) 65 | \item \code{X.center}: a logical indicating whether \code{X} was centered 66 | column-wise before clustering 67 | \item \code{X.scale}: a logical indicating whether \code{X} was scaled 68 | column-wise before centering 69 | \item \code{weight_type}: a record of the scheme used to create 70 | fusion weights 71 | \item \code{U}: a tensor (3-array) of clustering solutions 72 | } 73 | } 74 | \description{ 75 | \code{convex_clustering} calculates the convex clustering solution path 76 | at a user-specified grid of lambda values (or just a single value). It is, 77 | in general, difficult to know a useful set of lambda values \emph{a priori}, 78 | so this function is more useful for timing comparisons and methodological 79 | research than applied work. 80 | } 81 | \details{ 82 | Compared to the \code{\link{CARP}} function, the returned object 83 | is much more "bare-bones," containing only the estimated \eqn{U} matrices, 84 | and no information used for dendrogram or path visualizations. 85 | } 86 | \examples{ 87 | clustering_fit <- convex_clustering(presidential_speech[1:10,1:4], lambda_grid = 1:100) 88 | print(clustering_fit) 89 | } 90 | -------------------------------------------------------------------------------- /man/figures/README-carp_dendro-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataSlingers/clustRviz/0a91ff91571c559579e51431636f195814f032e2/man/figures/README-carp_dendro-1.png -------------------------------------------------------------------------------- /man/figures/README-cbass-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataSlingers/clustRviz/0a91ff91571c559579e51431636f195814f032e2/man/figures/README-cbass-1.png -------------------------------------------------------------------------------- /man/figures/README-cbass_heatmap2-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataSlingers/clustRviz/0a91ff91571c559579e51431636f195814f032e2/man/figures/README-cbass_heatmap2-1.png -------------------------------------------------------------------------------- /man/figures/README-cbass_rowdendro-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataSlingers/clustRviz/0a91ff91571c559579e51431636f195814f032e2/man/figures/README-cbass_rowdendro-1.png -------------------------------------------------------------------------------- /man/options.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/options.R 3 | \name{clustRviz_options} 4 | \alias{clustRviz_options} 5 | \alias{clustRviz_reset_options} 6 | \title{\code{ClustRViz} Options} 7 | \usage{ 8 | clustRviz_options(...) 9 | 10 | clustRviz_reset_options() 11 | } 12 | \arguments{ 13 | \item{...}{Options (to be passed by name). See below for available options.} 14 | } 15 | \description{ 16 | Advanced control of algorithmic options for \code{\link{CARP}} and \code{\link{CBASS}}. 17 | The \code{clustRviz_reset_options} function returns options to "factory-fresh" 18 | settings. 19 | } 20 | \details{ 21 | The following options can be set by name:\itemize{ 22 | \item \code{epsilon} The initial step size (fixed during the "burn-in" period) 23 | \item \code{stopping_threshold}: Stopping threshold to use for \code{convex_clustering}, 24 | \code{convex_biclustering}, \code{CARP(exact = TRUE)}, 25 | and \code{CBASS(exact = TRUE)}. By default, 26 | this is set to \code{1e-10} - a very conservative 27 | threshold: making it larger can significantly 28 | improve performance 29 | \item \code{max_iter} An integer: the maximum number of iterations to perform 30 | \item \code{max_inner_iter} An integer: the maximum number of iterations for 31 | the iterative solvers to perform at a single 32 | value of \eqn{\lambda}{\lambda}. 33 | \item \code{burn_in} An integer: the number of initial iterations at a fixed 34 | (small) value of \eqn{\gamma} 35 | \item \code{viz_initial_step} The initial (large) step size used in back-tracking 36 | (\code{CARP-VIZ} and \code{CBASS-VIZ}) algorithms. 37 | \item \code{viz_small_step} The secondary (small) step size used in back-tracking 38 | (\code{CARP-VIZ} and \code{CBASS-VIZ}) algorithms. 39 | \item \code{viz_max_inner_iter} The maximum number of iterations to perform 40 | in the inner loop of back-tracking (\code{CARP-VIZ} 41 | and \code{CBASS-VIZ}) algorithms. 42 | \item \code{keep} \code{\link{CARP}} and \code{\link{CBASS}} keep every 43 | \code{keep}-th iteration even if no fusions are detected. 44 | Increasing this parameter may improve performance, at 45 | the expense of returning a finer grid. 46 | \item \code{rho} For advanced users only (not advisable to change): the penalty 47 | parameter used for the augmented Lagrangian. 48 | \item \code{keep_debug_info}: Should additional debug info (currently only the V-path) 49 | be kept? 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /man/plot_carp.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot_carp.R 3 | \name{plot.CARP} 4 | \alias{plot.CARP} 5 | \title{Visualize the results of Convex Clustering (\code{CARP})} 6 | \usage{ 7 | \method{plot}{CARP}( 8 | x, 9 | ..., 10 | type = c("dendrogram", "path", "heatmap"), 11 | dynamic = FALSE, 12 | interactive = FALSE, 13 | axis = c("PC1", "PC2"), 14 | percent, 15 | k, 16 | percent.seq = seq(0, 1, 0.01), 17 | slider_y = -0.3, 18 | refit = FALSE 19 | ) 20 | } 21 | \arguments{ 22 | \item{x}{An object of class \code{CARP} as returned by \code{\link{CARP}}} 23 | 24 | \item{...}{Additional arguments, which are handled differently for different 25 | values of \code{type}.\itemize{ 26 | \item When \code{type} is \code{"path"}, the presence of 27 | unknown arguments triggers an error; 28 | \item when \code{type == "dendrogram"} 29 | \code{...} is forwarded to \code{\link[stats]{dendrogram}}; and 30 | \item when \code{type == "heatmap"} and \code{interactive == TRUE}, 31 | \code{...} is forwarded to \code{\link[heatmaply]{heatmaply}}. 32 | } See the documentation of \code{\link[ggplot2]{ggplot}} (for \code{interactive == FALSE}) 33 | or \code{\link[plotly]{plotly}} (for \code{interactive == TRUE}) for details about additional 34 | supported arguments to the corresponding plot \code{type}.} 35 | 36 | \item{type}{A string indicating the type of visualization to show (see details above).} 37 | 38 | \item{dynamic}{A logical scalar.Should the resulting animation be dynamic (animated) or not? 39 | If \code{TRUE}, a dynamic visualization which varies along the CARP solution path at a 40 | grid given by \code{percent.seq} is produced. If \code{FALSE}, a fixed visualization at a single 41 | solution (determined by either \code{percent} or \code{k} if supplied) is produced.} 42 | 43 | \item{interactive}{A logical scalar. Should the resulting animation be interactive or not? 44 | If \code{TRUE}, an interactive visualization is produced by javascript(\code{\link{plotly}}). 45 | If \code{FALSE}, a non-interactive visualization is produced by \code{\link[ggplot2]{ggplot}}.} 46 | 47 | \item{axis}{A character vector of length two indicating which features or principal 48 | components to use as the axes in the \code{type = "path"} visualization. 49 | Currently only features like \code{"PC1"} or \code{"PC2"} (indicating 50 | the first principal component projections) are supported.} 51 | 52 | \item{percent}{A number between 0 and 1, giving the regularization level (as 53 | a fraction of the final regularization level used) at which to 54 | assign clusters in the static (\code{type = "dendrogram"} or \code{type = "path"}) 55 | plots.} 56 | 57 | \item{k}{An integer indicating the desired number of clusters to be displayed 58 | in the static plots. If no \code{CARP} iteration with exactly this 59 | many clusters is found, the first iterate with fewer than \code{k} 60 | clusters is used.} 61 | 62 | \item{percent.seq}{A grid of values of percent along which to generate dynamic 63 | visualizations (if dynamic == TRUE)} 64 | 65 | \item{slider_y}{A number to adjust the slider's vertical position for 66 | interactive dendrogram and interactive heatmap plots 67 | (ignored for other plot types).} 68 | 69 | \item{refit}{A logical scalar. Should "naive" centroids (TRUE) or the 70 | actual centroids estimated by convex clustering be used? 71 | When the default refit = FALSE, the estimated U from the convex 72 | clustering problem is used. The refit = TRUE returns actual 73 | centroids (mean) of all elements assigned to that cluster; 74 | Due to the global shrinkage imposed, these clusters are 75 | more "shrunk together" than the naive clusters. Only for the 76 | heatmap plots. (ignored for other plot types).} 77 | } 78 | \value{ 79 | The value of the return type depends on the \code{interactive} and \code{dynamic} arguments:\itemize{ 80 | \item if \code{interactive = FALSE} and \code{dynamic = FALSE}, an object of class \code{\link[ggplot2]{ggplot}} 81 | is returned; 82 | \item if \code{interactive = FALSE} and \code{dynamic = TRUE}, an object of class \code{\link[gganimate:gganimate-package]{gganim}} 83 | is returned; 84 | \item if \code{interactive = TRUE}, an object of class \code{\link[plotly]{plotly}} 85 | is returned. 86 | } All the plots can be plotted directly (by invoking its print method) or further 87 | manipulated by the user. 88 | } 89 | \description{ 90 | \code{plot.CARP} provides a range of ways to visualize the results of convex 91 | clustering, including: \itemize{ 92 | \item A dendrogram, illustrating the nested cluster hierarchy inferred from 93 | the convex clustering solution path (\code{type = "dendrogram"}); 94 | \item A static path plot, showing the coalescence of the estimated cluster centroids 95 | at a fixed value of the regularization parameter is increased (\code{type = "path"}); 96 | \item A \code{\link[gganimate]{gganimate}} plot, showing the coalescence of the 97 | estimated cluster centroids as the regularization parameter is increased 98 | (\code{dynamic = TRUE}) 99 | } 100 | } 101 | \examples{ 102 | carp_fit <- CARP(presidential_speech) 103 | plot(carp_fit, type = "path") 104 | plot(carp_fit, type = "dendrogram") 105 | plot(carp_fit, type = "heatmap") 106 | plot(carp_fit, type = "heatmap", dynamic = TRUE) 107 | \dontrun{ 108 | plot(carp_fit, type='heatmap', interactive=TRUE) 109 | } 110 | } 111 | -------------------------------------------------------------------------------- /man/plot_cbass.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot_cbass.R 3 | \name{plot.CBASS} 4 | \alias{plot.CBASS} 5 | \title{Visualize the results of Convex BiClustering (\code{CBASS})} 6 | \usage{ 7 | \method{plot}{CBASS}( 8 | x, 9 | ..., 10 | type = c("heatmap", "row.dendrogram", "col.dendrogram", "row.path", "col.path"), 11 | dynamic = FALSE, 12 | interactive = FALSE, 13 | percent, 14 | k.row, 15 | k.col, 16 | percent.seq = seq(0, 1, 0.01), 17 | axis = c("PC1", "PC2"), 18 | slider_y = -0.3, 19 | refit = FALSE 20 | ) 21 | } 22 | \arguments{ 23 | \item{x}{An object of class \code{CBASS} as returned by \code{\link{CBASS}}} 24 | 25 | \item{...}{Additional arguments, which are handled differently for different 26 | values of \code{type}.\itemize{ 27 | \item When \code{type} is \code{"heatmap"}, \code{"row.path"} or 28 | \code{"col.path"}, the presence of 29 | unknown arguments triggers an error; 30 | \item when \code{type == "row.dendrogram"} or \code{type == "col.dendrogram"}, 31 | \code{...} is forwarded to \code{\link[stats]{dendrogram}}; 32 | \item when \code{type == "heatmap"} and \code{interactive == TRUE}, \code{...} is forwarded to 33 | \code{\link[heatmaply]{heatmaply}}. 34 | } See the documentation of \code{\link[ggplot2]{ggplot}} (for \code{interactive == FALSE}) 35 | or \code{\link[plotly]{plotly}} (for \code{interactive == TRUE}) for details about additional 36 | supported arguments to the corresponding plot \code{type}.} 37 | 38 | \item{type}{A string indicating the type of visualization to show (see details above).} 39 | 40 | \item{dynamic}{A logical scalar.Should the resulting animation be dynamic (animated) or not? 41 | If \code{TRUE}, a dynamic visualization which varies along the CARP solution path at a 42 | grid given by \code{percent.seq} is produced. If \code{FALSE}, a fixed visualization at a single 43 | solution (determined by either \code{percent} or \code{k} if supplied) is produced.} 44 | 45 | \item{interactive}{A logical scalar. Should the resulting animation be interactive or not? 46 | If \code{TRUE}, an interactive visualization is produced by javascript(\code{\link{plotly}}). 47 | If \code{FALSE}, a non-interactive visualization is produced by \code{\link[ggplot2]{ggplot}}.} 48 | 49 | \item{percent}{A number between 0 and 1, giving the regularization level (as 50 | a fraction of the final regularization level used) at which to 51 | assign clusters in the static (\code{type = "dendrogram"} or \code{type = "path"}) 52 | plots.} 53 | 54 | \item{k.row}{An integer indicating the desired number of row clusters to be displayed 55 | in the static plots. (If plotting columns, the regularization level 56 | giving \code{k.row} rows clusters is used.)} 57 | 58 | \item{k.col}{An integer indicating the desired number of column clusters to be displayed 59 | in the static plots. (If plotting rows, the regularization level 60 | giving \code{k.col} column clusters is used.)} 61 | 62 | \item{percent.seq}{A grid of values of percent along which to generate dynamic 63 | visualizations (if dynamic == TRUE)} 64 | 65 | \item{axis}{A character vector of length two indicating which features or principal 66 | components to use as the axes in the path visualizations. 67 | Currently only features like \code{"PC1"} or \code{"PC2"} (indicating 68 | the first principal component projections) are supported.} 69 | 70 | \item{slider_y}{A number to adjust the slider's vertical position for 71 | interactive dendrogram plots (ignored for other plot types).} 72 | 73 | \item{refit}{A logical scalar. Should "naive" centroids (TRUE) or the 74 | actual centroids estimated by convex clustering be used? 75 | When the default refit = FALSE, the estimated U from the convex 76 | clustering problem is used. The refit = TRUE returns actual 77 | centroids (mean) of all elements assigned to that cluster; 78 | Due to the global shrinkage imposed, these clusters are 79 | more "shrunk together" than the naive clusters. Only for the 80 | heatmap plots. (ignored for other plot types).} 81 | } 82 | \value{ 83 | The value of the return type depends on the \code{interactive} and \code{dynamic}:\itemize{ 84 | \item if \code{interactive = FALSE} and \code{dynamic = FALSE}, an object of class 85 | \code{\link[ggplot2]{ggplot}} is returned; 86 | \item if \code{interactive = FALSE} and \code{dynamic = TRUE}, an object of class \code{\link[gganimate:gganimate-package]{gganim}} 87 | is returned; 88 | \item if \code{interactive = TRUE}, an object of class \code{\link[plotly]{plotly}} 89 | is returned. 90 | } All the plots can be plotted directly (by invoking its print method) or further 91 | manipulated by the user. 92 | } 93 | \description{ 94 | \code{plot.CBASS} provides a range of ways to visualize the results of convex 95 | clustering, including: \itemize{ 96 | \item Dendrograms, illustrating the nested cluster hierarchy inferred from 97 | the convex clustering solution path (\code{type = "row.dendrogram"} and 98 | \code{type = "col.dendrogram"} for the row (observation) and column 99 | (feature / variable) clusterings, respectively); 100 | \item Path plots, showing the coalescence of the estimated cluster centroids 101 | as the regularization parameter is increased (\code{type = "row.dendrogram"} 102 | and \code{type = "col.dendrogram"} for the row (observation) and column 103 | (feature / variable) clusterings, respectively); 104 | \item A cluster heatmap, displaying row and column histograms, as well 105 | as the clustered data matrix in a single graphic (\code{type = "heatmap"}); 106 | \item An interactive Javascript cluster (\code{interactive = TRUE}) 107 | } 108 | } 109 | \examples{ 110 | cbass_fit <- CBASS(presidential_speech) 111 | plot(cbass_fit) 112 | plot(cbass_fit, type = "col.path") 113 | plot(cbass_fit, type = "col.dendrogram") 114 | plot(cbass_fit, type = "heatmap") 115 | plot(cbass_fit, type = "heatmap", dynamic = TRUE) 116 | \dontrun{ 117 | plot(cbass_fit, type='heatmap', interactive=TRUE) 118 | } 119 | } 120 | -------------------------------------------------------------------------------- /man/presidential_speech.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{presidential_speech} 5 | \alias{presidential_speech} 6 | \title{Log transformed word count of presidential speeches} 7 | \format{ 8 | A data.frame with 44 rows (one for each president) and 75 columns (log transformed word counts) 9 | } 10 | \source{ 11 | \url{http://www.presidency.ucsb.edu} 12 | } 13 | \usage{ 14 | presidential_speech 15 | } 16 | \description{ 17 | A dataset of the top 75 most variable log-transformed word counts for 18 | each US president aggregated over several speeches 19 | (Inaugural, State of the Union, etc.). 20 | Stop words have been removed and words have been stemmed. 21 | } 22 | \details{ 23 | Grover Cleveland was elected president twice (1892 and 1884). For our purposes his speeches are combined. 24 | } 25 | \keyword{datasets} 26 | -------------------------------------------------------------------------------- /man/print.ConvexBiClustering.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/solvers.R 3 | \name{print.ConvexBiClustering} 4 | \alias{print.ConvexBiClustering} 5 | \title{Print \code{\link{convex_biclustering}} Results} 6 | \usage{ 7 | \method{print}{ConvexBiClustering}(x, ...) 8 | } 9 | \arguments{ 10 | \item{x}{an object of class \code{convex_biclustering} as returned by 11 | \code{\link{convex_biclustering}}} 12 | 13 | \item{...}{Additional unused arguments} 14 | } 15 | \description{ 16 | Prints a brief descripton of a fitted \code{convex_biclustering} object. 17 | } 18 | \details{ 19 | Reports number of observations and variables of dataset, any preprocessing 20 | done by the \code{\link{convex_biclustering}} function, and regularization 21 | details. 22 | } 23 | \examples{ 24 | bicluster_fit <- convex_biclustering(presidential_speech, lambda_grid = 1:5) 25 | print(bicluster_fit) 26 | } 27 | -------------------------------------------------------------------------------- /man/print.ConvexClustering.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/solvers.R 3 | \name{print.ConvexClustering} 4 | \alias{print.ConvexClustering} 5 | \title{Print \code{\link{convex_clustering}} Results} 6 | \usage{ 7 | \method{print}{ConvexClustering}(x, ...) 8 | } 9 | \arguments{ 10 | \item{x}{an object of class \code{convex_clustering} as returned by 11 | \code{\link{convex_clustering}}} 12 | 13 | \item{...}{Additional unused arguments} 14 | } 15 | \description{ 16 | Prints a brief descripton of a fitted \code{convex_clustering} object. 17 | } 18 | \details{ 19 | Reports number of observations and variables of dataset, any preprocessing 20 | done by the \code{\link{convex_clustering}} function, and regularization 21 | details. 22 | } 23 | \examples{ 24 | cluster_fit <- convex_clustering(presidential_speech, lambda_grid = 1:5) 25 | print(cluster_fit) 26 | } 27 | -------------------------------------------------------------------------------- /man/print_carp.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/carp.R 3 | \name{print.CARP} 4 | \alias{print.CARP} 5 | \alias{as.dendrogram.CARP} 6 | \alias{as.hclust.CARP} 7 | \title{Print \code{CARP} Results} 8 | \usage{ 9 | \method{print}{CARP}(x, ...) 10 | 11 | \method{as.dendrogram}{CARP}(object, ...) 12 | 13 | \method{as.hclust}{CARP}(x, ...) 14 | } 15 | \arguments{ 16 | \item{x}{an object of class \code{CARP} as returned by \code{\link{CARP}}} 17 | 18 | \item{...}{Additional unused arguments} 19 | 20 | \item{object}{an object of class \code{CARP} as returned by \code{\link{CARP}}} 21 | } 22 | \description{ 23 | Prints a brief descripton of a fitted \code{CARP} object. 24 | } 25 | \details{ 26 | Reports number of observations and variables of dataset, any preprocessing 27 | done by the \code{\link{CARP}} function, regularization weight information, 28 | and the variant of \code{CARP} used. 29 | 30 | The \code{as.dendrogram} and \code{as.hclust} methods convert the 31 | \code{CBASS} output to an object of class \code{dendrogram} or \code{hclust} 32 | respectively. 33 | } 34 | \examples{ 35 | carp_fit <- CARP(presidential_speech) 36 | print(carp_fit) 37 | } 38 | -------------------------------------------------------------------------------- /man/print_cbass.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cbass.R 3 | \name{print.CBASS} 4 | \alias{print.CBASS} 5 | \alias{as.dendrogram.CBASS} 6 | \alias{as.hclust.CBASS} 7 | \title{Print \code{CBASS} Results} 8 | \usage{ 9 | \method{print}{CBASS}(x, ...) 10 | 11 | \method{as.dendrogram}{CBASS}(object, ..., type = c("row", "col")) 12 | 13 | \method{as.hclust}{CBASS}(x, ..., type = c("row", "col")) 14 | } 15 | \arguments{ 16 | \item{x}{an object of class \code{CARP} as returned by \code{\link{CARP}}} 17 | 18 | \item{...}{Additional unused arguments} 19 | 20 | \item{object}{an object of class \code{CARP} as returned by \code{\link{CARP}}} 21 | 22 | \item{type}{Either \code{"row"} or \code{"col"}, indicating whether 23 | the row or column dendrogram should be returned.} 24 | } 25 | \description{ 26 | Prints a brief descripton of a fitted \code{CBASS} object. 27 | } 28 | \details{ 29 | The \code{as.dendrogram} and \code{as.hclust} methods convert the 30 | \code{CBASS} output to an object of class \code{dendrogram} or \code{hclust} 31 | respectively. 32 | } 33 | \examples{ 34 | cbass_fit <- CBASS(X=presidential_speech) 35 | print(cbass_fit) 36 | } 37 | -------------------------------------------------------------------------------- /man/rbf_kernel_weights.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/weights.R 3 | \name{RBF Kernel Weights} 4 | \alias{RBF Kernel Weights} 5 | \alias{dense_rbf_kernel_weights} 6 | \alias{sparse_rbf_kernel_weights} 7 | \title{Construct Clustering Weights Based on the Radial Basis Function (Gaussian/Euclidean) Kernel} 8 | \usage{ 9 | dense_rbf_kernel_weights( 10 | phi = "auto", 11 | dist.method = c("euclidean", "maximum", "manhattan", "canberra", "binary", 12 | "minkowski"), 13 | p = 2 14 | ) 15 | 16 | sparse_rbf_kernel_weights(..., k = "auto") 17 | } 18 | \arguments{ 19 | \item{phi}{The scale factor used for the RBF kernel} 20 | 21 | \item{dist.method}{The type of distance used to calculate distances between 22 | points. See the \code{method} argument to \code{\link[stats]{dist}}.} 23 | 24 | \item{p}{The power of the Minkowski distance (only relevant if \code{method == "minkowski"}). 25 | See the \code{p} argument to \code{\link[stats]{dist}}.} 26 | 27 | \item{...}{Arguments passed through from \code{sparse_rbf_kernel_weights} to 28 | \code{dense_rbf_kernel_weights}} 29 | 30 | \item{k}{The number of neighbors to use} 31 | } 32 | \value{ 33 | A function which, when called, returns a matrix of clustering weights. 34 | } 35 | \description{ 36 | This is a \emph{factory function} - it returns a \emph{function} which can be 37 | used to create a matrix of clustering weights. In particular, it returns a function 38 | which takes a n-by-p data matrix \eqn{X} and returns an n-by-n matrix whose 39 | \eqn{(i, j)}-th element is given by \eqn{e^{-phi * dist(x_i, x_j)}} where 40 | \eqn{x_i}, \eqn{x_j} are the \eqn{i}-th and \eqn{j}-th row of \eqn{X} 41 | respectively. The distance metric used is determined by the 42 | \code{dist.method} and \code{p} arguments, which are passed to 43 | \code{\link[stats]{dist}}. 44 | } 45 | \details{ 46 | The sparse weights (\code{sparse_rbf_kernel_weights}) are calculated by 47 | dropping all but the \code{k} largest weights for each row of the matrix 48 | (equivalent to taking the \code{k} nearest neighbors to each point). The 49 | weight matrix is symmetrized, so if \emph{a} is a neighbor of \emph{b}, but 50 | not vice versa, the edge is still included. If \code{k} is too small, resulting 51 | in a non-fully-connected graph, an error is thrown. 52 | 53 | If \code{phi == "auto"}, a grid of possible \eqn{phi} values are used and 54 | the \code{phi} which maximizes the variance of the resulting weights is taken. 55 | 56 | If \code{k == "auto"}, the smallest \code{k} that still yields a fully connected 57 | graph is used. 58 | } 59 | \examples{ 60 | weight_func <- dense_rbf_kernel_weights() 61 | weight_func(presidential_speech) 62 | 63 | weight_func <- dense_rbf_kernel_weights(phi=0.1, dist.method="canberra") 64 | weight_func(presidential_speech) 65 | 66 | weight_func <- sparse_rbf_kernel_weights() 67 | weight_func(presidential_speech) 68 | } 69 | -------------------------------------------------------------------------------- /man/tcga_breast.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{tcga_breast} 5 | \alias{tcga_breast} 6 | \title{Log-Transformed Level III RPKM Gene Expression Levels for 438 Breast-Cancer Patients} 7 | \format{ 8 | An object of class \code{matrix} (inherits from \code{array}) with 438 rows and 353 columns. 9 | } 10 | \usage{ 11 | tcga_breast 12 | } 13 | \description{ 14 | This data set (\eqn{n = 438, p = 353}) contains log-transformed Level III RPKM gene 15 | expression levels for 438 breast-cancer patients collected by the Cancer Genome Atlas 16 | Network. The Luminal A and Luminal B subtypes have been combined. The row names give the 17 | clinically diagnosed subtype (true cluster labels) and the column names are the gene IDs. 18 | } 19 | \references{ 20 | The Cancer Genome Atlas Network. "Comprehensive Molecular Portraits of Human Breast Tumours" 21 | Nature 490, p.61-70. 2012. \doi{10.1038/nature11412} 22 | } 23 | \keyword{datasets} 24 | -------------------------------------------------------------------------------- /src/.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | *.so 3 | *.dll 4 | -------------------------------------------------------------------------------- /src/Makevars: -------------------------------------------------------------------------------- 1 | CXX_STD = CXX11 2 | PKG_LIBS = $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) 3 | 4 | strip: $(SHLIB) 5 | ( [[ `uname` == "Darwin" ]] && test -e "/usr/bin/strip" && /usr/bin/strip -S *.o *.so ) || true 6 | ( [[ `uname` == "Linux" ]] && test -e "/usr/bin/strip" && /usr/bin/strip --strip-debug *.o *.so ) || true 7 | 8 | .phony: strip 9 | -------------------------------------------------------------------------------- /src/Makevars.win: -------------------------------------------------------------------------------- 1 | CXX_STD = CXX11 2 | PKG_LIBS = $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) 3 | -------------------------------------------------------------------------------- /src/clustRviz.h: -------------------------------------------------------------------------------- 1 | #include "clustRviz_base.h" 2 | #include "clustRviz_logging.h" 3 | #include "clustering_impl.h" 4 | #include "biclustering_impl.h" 5 | #include "alg_reg_policies.h" 6 | #include "optim_policies.h" 7 | 8 | typedef AlgorithmicRegularizationFixedStepSizePolicy CARP; 9 | typedef AlgorithmicRegularizationBacktrackingPolicy CARP_VIZ; 10 | typedef AlgorithmicRegularizationFixedStepSizePolicy CBASS; 11 | typedef AlgorithmicRegularizationBacktrackingPolicy CBASS_VIZ; 12 | typedef ADMMPolicy ConvexClusteringADMM; 13 | typedef ADMMPolicy ConvexBiClusteringADMM; 14 | typedef BackTrackingADMMPolicy ConvexClusteringADMM_VIZ; 15 | typedef BackTrackingADMMPolicy ConvexBiClusteringADMM_VIZ; 16 | typedef UserGridADMMPolicy UserGridConvexClusteringADMM; 17 | typedef UserGridADMMPolicy UserGridConvexBiClusteringADMM; 18 | -------------------------------------------------------------------------------- /src/clustRviz_base.h: -------------------------------------------------------------------------------- 1 | #ifndef CLUSTRVIZ_BASE_H 2 | #define CLUSTRVIZ_BASE_H 1 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #define CLUSTRVIZ_STATUS_UPDATE_TIME_SECS 0.1 // Print status to screen every 0.1s 9 | #define CLUSTRVIZ_STATUS_WIDTH_CHECK 20 // Every 20 status updates * 0.1s => every 2s 10 | #define CLUSTRVIZ_DEFAULT_STOP_PRECISION 1e-10 //Stop when cellwise diff between iters < val 11 | 12 | // Helper to determine if STL set contains an element 13 | // 14 | // In general, this is not efficient because one wants to do something 15 | // with the element and/or its location, but here we really only need containment 16 | template 17 | bool contains(const std::set& container, T element){ 18 | typename std::set::const_iterator it = container.find(element); 19 | return it != container.end(); 20 | } 21 | 22 | // Prototypes - utils.cpp 23 | Eigen::MatrixXd MatrixRowProx(const Eigen::MatrixXd&, 24 | double, 25 | const Eigen::VectorXd&, 26 | bool); 27 | 28 | Eigen::MatrixXd MatrixColProx(const Eigen::MatrixXd&, 29 | double, 30 | const Eigen::VectorXd&, 31 | bool); 32 | 33 | double scaled_squared_norm(const Eigen::MatrixXd&); 34 | 35 | #endif 36 | -------------------------------------------------------------------------------- /src/clustRviz_logging.cpp: -------------------------------------------------------------------------------- 1 | #include "clustRviz.h" 2 | 3 | // [[Rcpp::export(rng = false)]] 4 | void clustRviz_set_logger_level_cpp(int level){ 5 | auto logger_level = static_cast(level); 6 | ClustRVizLogger::set_level(logger_level); 7 | } 8 | 9 | // [[Rcpp::export(rng = false)]] 10 | int clustRviz_get_logger_level_cpp(){ 11 | auto logger_level = static_cast(ClustRVizLogger::get_level()); 12 | return logger_level; 13 | } 14 | 15 | // [[Rcpp::export(rng = false)]] 16 | void clustRviz_log_cpp(int level, Rcpp::StringVector x){ 17 | auto msg_level = static_cast(level); 18 | std::string msg = Rcpp::as(x[0]); 19 | if(msg_level >= ClustRVizLoggerLevel::ERRORS){ 20 | ClustRVizLogger::error(msg); 21 | } else if(msg_level >= ClustRVizLoggerLevel::WARNING){ 22 | ClustRVizLogger::warning(msg); 23 | } else if(msg_level >= ClustRVizLoggerLevel::MESSAGES){ 24 | ClustRVizLogger::message(msg); 25 | } else if(msg_level >= ClustRVizLoggerLevel::INFO){ 26 | ClustRVizLogger::info(msg); 27 | } else if(msg_level >= ClustRVizLoggerLevel::DEBUG){ 28 | ClustRVizLogger::debug(msg); 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /tests/test_admm.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(clustRviz) 3 | 4 | test_check("clustRviz", filter="admm") 5 | -------------------------------------------------------------------------------- /tests/test_carp_accessors.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(clustRviz) 3 | 4 | test_check("clustRviz", filter="carp_accessors") 5 | -------------------------------------------------------------------------------- /tests/test_carp_error_handling.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(clustRviz) 3 | 4 | test_check("clustRviz", filter="carp_error_handling") 5 | -------------------------------------------------------------------------------- /tests/test_carp_misc.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(clustRviz) 3 | 4 | test_check("clustRviz", filter="carp_misc") 5 | -------------------------------------------------------------------------------- /tests/test_carp_missing_data.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(clustRviz) 3 | 4 | test_check("clustRviz", filter="carp_missing_data") 5 | -------------------------------------------------------------------------------- /tests/test_carp_plot_static.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(clustRviz) 3 | 4 | test_check("clustRviz", filter="carp_plot_static") 5 | -------------------------------------------------------------------------------- /tests/test_carp_print.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(clustRviz) 3 | 4 | test_check("clustRviz", filter="carp_print") 5 | -------------------------------------------------------------------------------- /tests/test_carp_smoke.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(clustRviz) 3 | 4 | test_check("clustRviz", filter="carp_smoke") 5 | -------------------------------------------------------------------------------- /tests/test_carp_sparsity_pattern.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(clustRviz) 3 | 4 | test_check("clustRviz", filter="carp_sparsity_pattern") 5 | -------------------------------------------------------------------------------- /tests/test_carp_weights.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(clustRviz) 3 | 4 | test_check("clustRviz", filter="carp_weights") 5 | -------------------------------------------------------------------------------- /tests/test_cbass_accessors.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(clustRviz) 3 | 4 | test_check("clustRviz", filter="cbass_accessors") 5 | -------------------------------------------------------------------------------- /tests/test_cbass_error_handling.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(clustRviz) 3 | 4 | test_check("clustRviz", filter="cbass_error_handling") 5 | -------------------------------------------------------------------------------- /tests/test_cbass_misc.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(clustRviz) 3 | 4 | test_check("clustRviz", filter="cbass_misc") 5 | -------------------------------------------------------------------------------- /tests/test_cbass_missing_data.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(clustRviz) 3 | 4 | test_check("clustRviz", filter="cbass_missing_data") 5 | -------------------------------------------------------------------------------- /tests/test_cbass_plot_static.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(clustRviz) 3 | 4 | test_check("clustRviz", filter="cbass_plot_static") 5 | -------------------------------------------------------------------------------- /tests/test_cbass_print.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(clustRviz) 3 | 4 | test_check("clustRviz", filter="cbass_print") 5 | -------------------------------------------------------------------------------- /tests/test_cbass_smoke.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(clustRviz) 3 | 4 | test_check("clustRviz", filter="cbass_smoke") 5 | -------------------------------------------------------------------------------- /tests/test_cbass_sparsity_pattern.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(clustRviz) 3 | 4 | test_check("clustRviz", filter="cbass_sparsity_pattern") 5 | -------------------------------------------------------------------------------- /tests/test_cbass_weights.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(clustRviz) 3 | 4 | test_check("clustRviz", filter="cbass_weights") 5 | -------------------------------------------------------------------------------- /tests/test_clustering_assignments_cpp.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(clustRviz) 3 | 4 | test_check("clustRviz", filter="clustering_assignments_cpp") 5 | -------------------------------------------------------------------------------- /tests/test_logging.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(clustRviz) 3 | 4 | test_check("clustRviz", filter="logging") 5 | -------------------------------------------------------------------------------- /tests/test_matrix_prox.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(clustRviz) 3 | 4 | test_check("clustRviz", filter="matrix_prox") 5 | -------------------------------------------------------------------------------- /tests/test_options.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(clustRviz) 3 | 4 | test_check("clustRviz", filter="options") 5 | -------------------------------------------------------------------------------- /tests/test_solver_biclustering.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(clustRviz) 3 | 4 | test_check("clustRviz", filter="solver_biclustering") 5 | -------------------------------------------------------------------------------- /tests/test_solver_clustering.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(clustRviz) 3 | 4 | test_check("clustRviz", filter="solver_clustering") 5 | -------------------------------------------------------------------------------- /tests/test_tensor_projection.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(clustRviz) 3 | 4 | test_check("clustRviz", filter="tensor_projection") 5 | -------------------------------------------------------------------------------- /tests/test_utils.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(clustRviz) 3 | 4 | test_check("clustRviz", filter="utils") 5 | -------------------------------------------------------------------------------- /tests/test_weights.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(clustRviz) 3 | 4 | test_check("clustRviz", filter="weights") 5 | -------------------------------------------------------------------------------- /tests/testthat/helper_clustrviz_tests.R: -------------------------------------------------------------------------------- 1 | library(stringr) 2 | 3 | expect_no_error <- function(object, ..., all=FALSE, info=NULL, label=NULL){ 4 | expect_error(object, regexp=NA, ..., all=all, info=info, label=label) 5 | } 6 | 7 | expect_no_warning <- function(object, ..., all=FALSE, info=NULL, label=NULL){ 8 | expect_warning(object, regexp=NA, ..., all=all, info=info, label=label) 9 | } 10 | 11 | expect_no_message <- function(object, ..., all=FALSE, info=NULL, label=NULL){ 12 | expect_message(object, regexp=NA, ..., all=all, info=info, label=label) 13 | } 14 | 15 | expect_str_contains <- function(object, expected, info=NULL, label=NULL){ 16 | if(!is.character(object)) object <- as.character(object) 17 | if(!is.character(expected)) expected <- as.character(expected) 18 | 19 | expect_true(all(str_detect(object, expected)), 20 | info=info, label=label) 21 | } 22 | 23 | expect_zero <- function(object, ..., info=NULL, label=NULL, expected.label=NULL){ 24 | expect_equal(object, 0, ..., info=info, label=label, expected.label=expected.label) 25 | } 26 | 27 | expect_zeros <- function(object, ..., info=NULL, label=NULL, expected.label=NULL){ 28 | expect_equal(object, rep(0, length(object)), ..., info=info, label=label, expected.label=expected.label) 29 | } 30 | 31 | expect_ones <- function(object, ..., info=NULL, label=NULL, expected.label=NULL){ 32 | expect_equal(object, rep(1, length(object)), ..., info=info, label=label, expected.label=expected.label) 33 | } 34 | 35 | capture_print <- function(x, ...){ 36 | paste(capture.output(print(x, ...)), collapse="\n") 37 | } 38 | 39 | list_all_equal <- function(x) { 40 | all(vapply(seq_len(length(x) - 1), function(i) isTRUE(all.equal(x[[i]], x[[i + 1]])), logical(1))) 41 | } 42 | 43 | num_unique <- clustRviz:::num_unique 44 | num_unique_rows <- clustRviz:::num_unique_rows 45 | num_unique_cols <- clustRviz:::num_unique_cols 46 | -------------------------------------------------------------------------------- /tests/testthat/test_admm.R: -------------------------------------------------------------------------------- 1 | context("Full ADMM Converges") 2 | 3 | test_that("Full ADMM converges for CARP", { 4 | skip_on_cran() 5 | skip_if_not_installed("cvxclustr") 6 | data(mammals, package = "cvxclustr") 7 | 8 | ## Example modified from help pages of cvxclustr 9 | X <- as.matrix(mammals[,-1]) 10 | carp_fit <- CARP(X, exact = TRUE, X.center = FALSE, X.scale = FALSE) 11 | 12 | ## Calculate matching `cvxclustr` solution 13 | Xt <- t(X) 14 | 15 | ## Match CARP() selected weights 16 | w <- clustRviz:::weight_mat_to_vec(carp_fit$weights) 17 | gamma <- unique(carp_fit$cluster_membership$Gamma) 18 | 19 | ## Perform clustering 20 | suppressWarnings(cvxclust_fit <- cvxclustr::cvxclust(Xt, w, gamma, tol = 1e-7)) 21 | 22 | ## cvxclustr seems to use a pretty loose stopping tolerance, so this is a loose check... 23 | for(i in seq_along(gamma)){ 24 | expect_equal(carp_fit$U[,,i], t(cvxclust_fit$U[[i]]), 25 | check.attributes = FALSE, tolerance = 1e-4) 26 | } 27 | }) 28 | 29 | test_that("Full Back-Tracking ADMM converges for CARP", { 30 | skip_on_cran() 31 | skip_if_not_installed("cvxclustr") 32 | data(mammals, package = "cvxclustr") 33 | 34 | ## Example modified from help pages of cvxclustr 35 | X <- as.matrix(mammals[,-1]) 36 | carp_fit <- CARP(X, exact = TRUE, back_track = TRUE, X.center = FALSE, X.scale = FALSE) 37 | 38 | ## Calculate matching `cvxclustr` solution 39 | Xt <- t(X) 40 | 41 | ## Match CARP() selected weights 42 | w <- clustRviz:::weight_mat_to_vec(carp_fit$weights) 43 | gamma <- unique(carp_fit$cluster_membership$Gamma) 44 | 45 | ## Perform clustering 46 | suppressWarnings(cvxclust_fit <- cvxclustr::cvxclust(Xt, w, gamma, tol = 1e-7)) 47 | 48 | ## cvxclustr seems to use a pretty loose stopping tolerance, so this is a loose check... 49 | for(i in seq_along(gamma)){ 50 | expect_equal(carp_fit$U[,,i], t(cvxclust_fit$U[[i]]), 51 | check.attributes = FALSE, tolerance = 1e-4) 52 | } 53 | }) 54 | 55 | test_that("Full ADMM converges for CBASS", { 56 | skip_on_cran() 57 | skip_if_not_installed("cvxclustr") 58 | skip_if_not_installed("cvxbiclustr") 59 | skip_if_not_installed("Matrix") 60 | data(mammals, package = "cvxclustr") 61 | library(Matrix) 62 | 63 | clustRviz_options(max_iter = 5e6, keep_debug_info = TRUE) 64 | on.exit(clustRviz_reset_options()) 65 | 66 | ## Example modified from help pages of cvxclustr 67 | X <- as.matrix(mammals[,-1]) 68 | cbass_fit <- CBASS(X, exact = TRUE, X.center.global = FALSE, t = 1.05) 69 | 70 | cbass_gamma <- cbass_fit$debug$path$gamma_path 71 | cbass_U <- array(cbass_fit$debug$path$u_path, c(NROW(X), NCOL(X), length(cbass_gamma))) 72 | 73 | ## Calculate matching `cvxbiclustr` solution 74 | 75 | ## Match CBASS() selected edges and weights 76 | D_row <- cbass_fit$row_fusions$D 77 | D_col <- t(cbass_fit$col_fusions$D) 78 | 79 | w_row <- cbass_fit$row_fusions$weights; w_row <- clustRviz:::weight_mat_to_vec(w_row); w_row <- w_row[w_row != 0] 80 | w_col <- cbass_fit$col_fusions$weights; w_col <- clustRviz:::weight_mat_to_vec(w_col); w_col <- w_col[w_col != 0] 81 | 82 | w_row <- w_row / sum(w_row); w_row <- w_row / sqrt(NROW(X)) 83 | w_col <- w_col / sum(w_col); w_col <- w_col / sqrt(NCOL(X)) 84 | 85 | ## Perform clustering 86 | capture.output(cobra_fit <- cvxbiclustr::cobra(X, 87 | E_row = Matrix(D_row, sparse = TRUE), 88 | E_col = Matrix(D_col, sparse = TRUE), 89 | w_row = w_row, 90 | w_col = w_col, 91 | gamma = cbass_gamma)) 92 | 93 | obj <- function(U, lambda, l1 = FALSE){ 94 | DU <- D_row %*% U; DTU <- D_col %*% t(U) 95 | if(l1){ 96 | sum((X - U)^2)/2 + lambda * sum(w_row * abs(DU)) + lambda * sum(w_col * abs(t(DTU))) 97 | } else { 98 | sum((X - U)^2)/2 + lambda * sum(w_row * sqrt(rowSums(DU^2))) + lambda * sum(w_col * sqrt(rowSums(DTU^2))) 99 | } 100 | } 101 | 102 | ## Check that the objectives are of (roughly) equal quality... 103 | for(ix in seq_along(cbass_gamma)){ 104 | lambda <- cbass_gamma[ix] 105 | 106 | my_U <- cbass_U[,,ix] 107 | ec_U <- cobra_fit$U[[ix]] 108 | 109 | expect_true(obj(my_U, lambda) <= 1.01 * obj(ec_U, lambda)) 110 | } 111 | }) 112 | -------------------------------------------------------------------------------- /tests/testthat/test_carp_error_handling.R: -------------------------------------------------------------------------------- 1 | context("CARP() Error Handling") 2 | 3 | test_that("CARP() fails with non-finite numerical input", { 4 | ps <- presidential_speech 5 | 6 | ps[1,1] <- Inf; expect_error(CARP(ps)) 7 | }) 8 | 9 | test_that("CARP() errors early with incorrect input", { 10 | # Pre-processing parameters must be boolean flags 11 | expect_error(CARP(presidential_speech, X.center = NA)) 12 | expect_error(CARP(presidential_speech, X.center = c(TRUE, FALSE))) 13 | 14 | expect_error(CARP(presidential_speech, X.scale = NA)) 15 | expect_error(CARP(presidential_speech, X.scale = c(TRUE, FALSE))) 16 | 17 | # Check `exact` argument 18 | expect_error(CARP(presidential_speech, exact = "unknown")) 19 | expect_error(CARP(presidential_speech, exact = NA)) 20 | expect_error(CARP(presidential_speech, exact = c(TRUE, FALSE))) 21 | expect_error(CARP(presidential_speech, exact = 1L)) 22 | 23 | # Check `back_track` argument 24 | expect_error(CARP(presidential_speech, back_track = "unknown")) 25 | expect_error(CARP(presidential_speech, back_track = NA)) 26 | expect_error(CARP(presidential_speech, back_track = c(TRUE, FALSE))) 27 | expect_error(CARP(presidential_speech, back_track = 1L)) 28 | 29 | # Must use a t > 1 30 | expect_error(CARP(presidential_speech, t = 1)) 31 | expect_error(CARP(presidential_speech, t = 0)) 32 | expect_error(CARP(presidential_speech, t = -3)) 33 | expect_error(CARP(presidential_speech, t = NA)) 34 | expect_error(CARP(presidential_speech, t = c(1.3, 1.2))) 35 | 36 | # Fail on unknown flags 37 | expect_error(CARP(presidential_speech, flag="unknown"), regexp = "flag") 38 | expect_error(CARP(presidential_speech, "value"), regexp = "Unknown") 39 | }) 40 | -------------------------------------------------------------------------------- /tests/testthat/test_carp_misc.R: -------------------------------------------------------------------------------- 1 | context("Test Miscellaneous Features of CARP") 2 | 3 | test_that("CARP creates unique names if needed", { 4 | ## Give "Grover the Good" due credit 5 | pres2 <- rbind(presidential_speech, 6 | jitter(presidential_speech["Grover Cleveland", , drop=FALSE])) 7 | 8 | carp_fit <- CARP(pres2) 9 | 10 | expect_equal(rownames(carp_fit$X)[45], "Grover Cleveland_1") 11 | }) 12 | 13 | test_that("CARP supports factor labels", { 14 | X <- matrix(rnorm(9), 3, 3) 15 | colnames(X) <- c("a", "b", "c") 16 | 17 | carp_fit <- CARP(X, labels = factor(c("a", "b", "c"))) 18 | 19 | expect_equal(rownames(carp_fit$X), c("a", "b", "c")) 20 | }) 21 | 22 | test_that("CARP stores scale factors", { 23 | carp_fit_std <- CARP(presidential_speech, X.center = TRUE, X.scale = TRUE) 24 | 25 | expect_equal(carp_fit_std$center_vector, colMeans(presidential_speech)) 26 | expect_equal(carp_fit_std$scale_vector, apply(presidential_speech, 2, sd)) 27 | 28 | carp_fit_no_std <- CARP(presidential_speech, X.center = FALSE, X.scale = FALSE) 29 | 30 | expect_equal(carp_fit_no_std$scale_vector, rep(1, NCOL(presidential_speech))) 31 | expect_equal(carp_fit_no_std$center_vector, rep(0, NCOL(presidential_speech))) 32 | }) 33 | -------------------------------------------------------------------------------- /tests/testthat/test_carp_missing_data.R: -------------------------------------------------------------------------------- 1 | context("CARP() Missing Data Imputation") 2 | 3 | test_that("CARP Imputes Missing Data", { 4 | test_dat <- matrix(c(NA, 1, 2, 5 | 3, 1, 2, 6 | 100, 100, 100), byrow = TRUE, ncol = 3) 7 | 8 | carp_fit <- suppressWarnings(CARP(test_dat)) ## Warns that missForest can't impute well... 9 | 10 | # We should impute 3 for the missing value when we refit 11 | expect_equal(get_clustered_data(carp_fit, k = 2)[1,1], 3, check.attributes = FALSE) 12 | 13 | # Check we impute correctly in "U-space" as well. 14 | expect_equal(get_clustered_data(carp_fit, k = 2, refit = FALSE)[1,1], 15 | get_clustered_data(carp_fit, k = 2, refit = FALSE)[2,1], 16 | check.attributes = FALSE) 17 | 18 | # If we want to refit, can't get an estimated centroid from all NA data 19 | expect_true(is.nan(get_clustered_data(carp_fit, k = 3, refit = TRUE)[1,1])) 20 | # But we can impute if we don't refit (note that it's a pretty bad imputation...) 21 | expect_false(is.nan(get_clustered_data(carp_fit, k = 3, refit = FALSE)[1,1])) 22 | 23 | # We should get an error when imputation fails to remove all NAs 24 | expect_error(CARP(test_dat, impute_func = identity)) 25 | 26 | # We should also get an error when imputation puts in non-finite or NaN values 27 | expect_error(CARP(test_dat, impute_func = function(X){X[is.na(X)] <- NaN})) 28 | expect_error(CARP(test_dat, impute_func = function(X){X[is.na(X)] <- Inf})) 29 | 30 | # We should get an error when we can't impute (too many NAs) 31 | test_dat[,1] <- NA 32 | expect_error(CARP(test_dat, impute_func = function(X){X[,1] <- mean(X[,1], na.rm = TRUE); X})) 33 | }) 34 | -------------------------------------------------------------------------------- /tests/testthat/test_carp_plot_static.R: -------------------------------------------------------------------------------- 1 | context("Test Static CARP Plots") 2 | 3 | ## Add tests for static CARP plots 4 | ## 5 | ## Right now these are just "smoke" tests (i.e., runs in the default way without error) 6 | ## but we will add actual tests later (see GH #44) 7 | 8 | test_that("CARP path plot works", { 9 | carp_fit <- CARP(presidential_speech) 10 | 11 | ## Main settings work 12 | expect_no_error(plot(carp_fit, type = "path")) 13 | expect_is(plot(carp_fit, type = "path"), "gg") 14 | expect_no_error(plot(carp_fit, type = "path", k = 3, axis = c("amount", "appropri"))) 15 | expect_no_error(plot(carp_fit, type = "path", k = 3, axis = c("PC1", "PC3"))) 16 | 17 | ## Must give at most one of `percent` or `k` 18 | expect_error(plot(carp_fit, type = "path", percent = 0.5, k = 3)) 19 | 20 | ## Error checking on `percent` and `k` 21 | expect_error(plot(carp_fit, type = "path", percent = 1.5)) 22 | expect_error(plot(carp_fit, type = "path", percent = -0.5)) 23 | expect_error(plot(carp_fit, type = "path", percent = NA)) 24 | expect_error(plot(carp_fit, type = "path", percent = c(0.25, 0.75))) 25 | 26 | expect_error(plot(carp_fit, type = "path", k = 3.5)) 27 | expect_error(plot(carp_fit, type = "path", k = 0)) 28 | expect_error(plot(carp_fit, type = "path", k = -1)) 29 | expect_error(plot(carp_fit, type = "path", k = NROW(presidential_speech) + 1)) 30 | 31 | ## Error on unknown arguments 32 | expect_error(plot(carp_fit, type = "path", 5)) 33 | expect_error(plot(carp_fit, type = "path", a = 5)) 34 | }) 35 | 36 | test_that("CARP dendrogram plot works", { 37 | carp_fit <- CARP(presidential_speech) 38 | 39 | ## Main settings work 40 | expect_no_error(plot(carp_fit, type = "dendrogram")) 41 | expect_s3_class(plot(carp_fit, type = "dendrogram"), 'ggplot') 42 | 43 | ## Must give at most one of `percent` or `k` 44 | expect_error(plot(carp_fit, type = "dendrogram", percent = 0.5, k = 3)) 45 | 46 | ## Error checking on `percent` and `k` 47 | expect_error(plot(carp_fit, type = "dendrogram", percent = 1.5)) 48 | expect_error(plot(carp_fit, type = "dendrogram", percent = -0.5)) 49 | expect_error(plot(carp_fit, type = "dendrogram", percent = NA)) 50 | expect_error(plot(carp_fit, type = "dendrogram", percent = c(0.25, 0.75))) 51 | 52 | expect_error(plot(carp_fit, type = "dendrogram", k = 3.5)) 53 | expect_error(plot(carp_fit, type = "dendrogram", k = 0)) 54 | expect_error(plot(carp_fit, type = "dendrogram", k = -1)) 55 | expect_error(plot(carp_fit, type = "dendrogram", k = NROW(presidential_speech) + 1)) 56 | }) 57 | 58 | test_that("CARP heatmaply (javascript) plot works", { 59 | carp_fit <- CARP(presidential_speech) 60 | 61 | ## Main settings work 62 | expect_no_error(plot(carp_fit, type = "heatmap", interactive = TRUE)) 63 | expect_is(plot(carp_fit, type = "heatmap", interactive = TRUE), "plotly") 64 | expect_no_error(plot(carp_fit, type = "heatmap", interactive = TRUE, k = 3)) 65 | expect_no_error(plot(carp_fit, type = "heatmap", interactive = TRUE, percent = 0.5)) 66 | 67 | ## Must give at most one of `percent` or `k` 68 | expect_error(plot(carp_fit, type = "heatmap", interactive = TRUE, percent = 0.5, k = 3)) 69 | 70 | ## Error checking on `percent` and `k` 71 | expect_error(plot(carp_fit, type = "heatmap", interactive = TRUE, percent = 1.5)) 72 | expect_error(plot(carp_fit, type = "heatmap", interactive = TRUE, percent = -0.5)) 73 | expect_error(plot(carp_fit, type = "heatmap", interactive = TRUE, percent = NA)) 74 | expect_error(plot(carp_fit, type = "heatmap", interactive = TRUE, percent = c(0.25, 0.75))) 75 | 76 | expect_error(plot(carp_fit, type = "heatmap", interactive = TRUE, k = 3.5)) 77 | expect_error(plot(carp_fit, type = "heatmap", interactive = TRUE, k = 0)) 78 | expect_error(plot(carp_fit, type = "heatmap", interactive = TRUE, k = -1)) 79 | expect_error(plot(carp_fit, type = "heatmap", interactive = TRUE, k = NROW(presidential_speech) + 1)) 80 | }) 81 | -------------------------------------------------------------------------------- /tests/testthat/test_carp_print.R: -------------------------------------------------------------------------------- 1 | context("test print.CARP") 2 | 3 | test_that("print.CARP works for default settings", { 4 | carp_fit <- CARP(presidential_speech) 5 | carp_print <- capture_print(carp_fit) 6 | 7 | expect_str_contains(carp_print, "Algorithm:[ ]+CARP") 8 | expect_str_contains(carp_print, "Number of Observations:[ ]+44") 9 | expect_str_contains(carp_print, "Number of Variables:[ ]+75") 10 | expect_str_contains(carp_print, "Columnwise centering:[ ]+TRUE") 11 | expect_str_contains(carp_print, "Columnwise scaling:[ ]+FALSE") 12 | expect_str_contains(carp_print, "Source: Radial Basis Function Kernel Weights") 13 | expect_str_contains(carp_print, "Distance Metric: Euclidean") 14 | expect_str_contains(carp_print, stringr::fixed("Scale parameter (phi): 0.01 [Data-Driven]")) 15 | expect_str_contains(carp_print, stringr::fixed("Sparsified: 4 Nearest Neighbors [Data-Driven]")) 16 | }) 17 | 18 | test_that("print.CARP works for other algorithms", { 19 | expect_str_contains(capture_print(CARP(presidential_speech, back_track = TRUE)), 20 | "Algorithm:[ ]+CARP-VIZ") 21 | 22 | expect_str_contains(capture_print(CARP(presidential_speech, back_track = TRUE, norm = 1)), 23 | "Algorithm:[ ]+CARP-VIZ \\[Back-Tracking Fusion Search\\] \\[L1\\]") 24 | 25 | expect_str_contains(capture_print(CARP(presidential_speech, back_track = FALSE, t = 1.5)), 26 | "Algorithm:[ ]+CARP \\(t = 1.5\\)") 27 | 28 | expect_str_contains(capture_print(CARP(presidential_speech, back_track = FALSE, t = 1.5, norm = 1)), 29 | "Algorithm:[ ]+CARP \\(t = 1.5\\) \\[L1\\]") 30 | }) 31 | -------------------------------------------------------------------------------- /tests/testthat/test_carp_smoke.R: -------------------------------------------------------------------------------- 1 | context("Smoke tests for CARP") 2 | 3 | test_that("CARP-VIZ [L2] works", { 4 | ## Also smoke test status printing here 5 | expect_no_error(carp_fit <- CARP(presidential_speech, back_track = TRUE, status = TRUE)) 6 | expect_no_error(print(carp_fit)) 7 | }) 8 | 9 | test_that("CARP-VIZ [L1] works", { 10 | expect_no_error(carp_fit <- CARP(presidential_speech, back_track = TRUE, norm = 1)) 11 | expect_no_error(print(carp_fit)) 12 | }) 13 | 14 | test_that("CARP [L2] works", { 15 | expect_no_error(carp_fit <- CARP(presidential_speech, back_track = FALSE, t = 1.2)) 16 | expect_no_error(CARP(presidential_speech, back_track = FALSE, t = 1.1)) 17 | expect_no_error(CARP(presidential_speech, back_track = FALSE, t = 1.05)) 18 | expect_no_error(print(carp_fit)) 19 | }) 20 | 21 | test_that("CARP [L1] works", { 22 | expect_no_error(carp_fit <- CARP(presidential_speech, back_track = FALSE, t = 1.2, norm = 1)) 23 | expect_no_error(CARP(presidential_speech, back_track = FALSE, t =1.1, norm = 1)) 24 | expect_no_error(CARP(presidential_speech, back_track = FALSE, t = 1.05, norm = 1)) 25 | expect_no_error(print(carp_fit)) 26 | }) 27 | 28 | test_that("CARP interactive dendrogram plot works", { 29 | carp_fit <- CARP(presidential_speech[1:5,1:5]) 30 | 31 | ## static 32 | expect_no_error(print(plot(carp_fit, type = "dendrogram", interactive = T, dynamic = F))) 33 | expect_no_error(print(plot(carp_fit, type = "dendrogram", interactive = T, dynamic = F, k = 3))) 34 | expect_no_error(print(plot(carp_fit, type = "dendrogram", interactive = T, dynamic = F, percent = 0.25))) 35 | 36 | ## dynamic 37 | expect_no_error(print(plot(carp_fit, type = "dendrogram", interactive = T, dynamic = T))) 38 | }) 39 | 40 | test_that("CARP interactive path plot works", { 41 | carp_fit <- CARP(presidential_speech[1:5,1:5]) 42 | 43 | ## static 44 | expect_no_error(print(plot(carp_fit, type = "path", interactive = T, dynamic = F))) 45 | expect_no_error(print(plot(carp_fit, type = "path", interactive = T, dynamic = F, k = 3))) 46 | expect_no_error(print(plot(carp_fit, type = "path", interactive = T, dynamic = F, percent = 0.25))) 47 | 48 | ## dynamic 49 | expect_no_error(print(plot(carp_fit, type = "path", interactive = T, dynamic = T))) 50 | }) 51 | 52 | test_that("CARP interactive heatmap works", { 53 | carp_fit <- CARP(presidential_speech[1:5,1:5]) 54 | 55 | ## dynamic 56 | expect_no_error(print(plot(carp_fit, type = "heatmap", interactive = T, dynamic = T))) 57 | }) 58 | 59 | test_that("CARP non-interactive dynamic path plot works", { 60 | carp_fit <- CARP(presidential_speech) 61 | 62 | expect_no_error(print(plot(carp_fit, type = "path", interactive = F, dynamic = T))) 63 | }) 64 | 65 | test_that("CARP non-interactive dynamic dendrogram plot works", { 66 | carp_fit <- CARP(presidential_speech) 67 | 68 | expect_no_error(print(plot(carp_fit, type = "dendrogram", interactive = F, dynamic = T))) 69 | }) 70 | 71 | test_that("CARP non-interactive heatmap plot works", { 72 | carp_fit <- CARP(presidential_speech) 73 | 74 | ## static 75 | expect_no_error(print(plot(carp_fit, type = "heatmap", interactive = F, dynamic = F))) 76 | expect_no_error(print(plot(carp_fit, type = "heatmap", interactive = F, dynamic = F, k = 3))) 77 | expect_no_error(print(plot(carp_fit, type = "heatmap", interactive = F, dynamic = F, percent = 0.25))) 78 | 79 | ## dynamic 80 | expect_no_error(print(plot(carp_fit, type = "heatmap", interactive = F, dynamic = T))) 81 | }) 82 | -------------------------------------------------------------------------------- /tests/testthat/test_carp_sparsity_pattern.R: -------------------------------------------------------------------------------- 1 | context("Test CARP Sparsity Invariants") 2 | 3 | test_that("CARP begins with no fusions", { 4 | clustRviz_options(keep_debug_info = TRUE) 5 | 6 | carp_fit <- CARP(presidential_speech, back_track = FALSE) 7 | expect_zeros(carp_fit$debug$row$v_zero_indices[, 1]) 8 | 9 | clustRviz_reset_options() 10 | }) 11 | 12 | test_that("CARP ends with all fusions", { 13 | clustRviz_options(keep_debug_info = TRUE) 14 | 15 | carp_fit <- CARP(presidential_speech, back_track = FALSE) 16 | expect_ones(carp_fit$debug$row$v_zero_indices[, NCOL(carp_fit$debug$row$v_zero_indices)]) 17 | 18 | clustRviz_reset_options() 19 | }) 20 | 21 | test_that("CARP-VIZ begins with no fusions", { 22 | clustRviz_options(keep_debug_info = TRUE) 23 | 24 | carp_fit <- CARP(presidential_speech, back_track = TRUE) 25 | expect_zeros(carp_fit$debug$row$v_zero_indices[, 1]) 26 | 27 | clustRviz_reset_options() 28 | }) 29 | 30 | test_that("CARP-VIZ ends with all fusions", { 31 | clustRviz_options(keep_debug_info = TRUE) 32 | 33 | carp_fit <- CARP(presidential_speech, back_track = TRUE) 34 | expect_ones(carp_fit$debug$row$v_zero_indices[, NCOL(carp_fit$debug$row$v_zero_indices)]) 35 | 36 | clustRviz_reset_options() 37 | }) 38 | 39 | test_that("CARP begins with no fusions (uniform weights)", { 40 | clustRviz_options(keep_debug_info = TRUE) 41 | 42 | weight_mat <- matrix(1, nrow=NROW(presidential_speech), ncol=NROW(presidential_speech)) 43 | carp_fit <- CARP(presidential_speech, weights = weight_mat, back_track = FALSE) 44 | expect_zeros(carp_fit$debug$row$v_zero_indices[, 1]) 45 | 46 | clustRviz_reset_options() 47 | }) 48 | 49 | test_that("CARP ends with all fusions (uniform weights)", { 50 | clustRviz_options(keep_debug_info = TRUE) 51 | 52 | weight_mat <- matrix(1, nrow=NROW(presidential_speech), ncol=NROW(presidential_speech)) 53 | carp_fit <- CARP(presidential_speech, weights = weight_mat, back_track = FALSE) 54 | expect_ones(carp_fit$debug$row$v_zero_indices[, NCOL(carp_fit$debug$row$v_zero_indices)]) 55 | 56 | clustRviz_reset_options() 57 | }) 58 | 59 | test_that("CARP-VIZ begins with no fusions (uniform weights)", { 60 | clustRviz_options(keep_debug_info = TRUE) 61 | 62 | weight_mat <- matrix(1, nrow=NROW(presidential_speech), ncol=NROW(presidential_speech)) 63 | carp_fit <- CARP(presidential_speech, weights = weight_mat, back_track = TRUE) 64 | expect_zeros(carp_fit$debug$row$v_zero_indices[, 1]) 65 | 66 | clustRviz_reset_options() 67 | }) 68 | 69 | test_that("CARP-VIZ ends with all fusions (uniform weights)", { 70 | clustRviz_options(keep_debug_info = TRUE) 71 | 72 | weight_mat <- matrix(1, nrow=NROW(presidential_speech), ncol=NROW(presidential_speech)) 73 | carp_fit <- CARP(presidential_speech, weights = weight_mat, back_track = TRUE) 74 | expect_ones(carp_fit$debug$row$v_zero_indices[, NCOL(carp_fit$debug$row$v_zero_indices)]) 75 | 76 | clustRviz_reset_options() 77 | }) 78 | -------------------------------------------------------------------------------- /tests/testthat/test_carp_weights.R: -------------------------------------------------------------------------------- 1 | context("Test CARP Weight Handling") 2 | 3 | test_that("CARP works with user weight function", { 4 | # FIXME: This should work - see GitHub #8 5 | ## uniform_weight_func <- function(X) matrix(1, nrow=NROW(X), ncol=NROW(X)) 6 | ## CARP(presidential_speech, weights = uniform_weight_func) 7 | 8 | # By manual testing, this is a good weight function / matrix 9 | my_weight_func <- function(X) { 10 | mat <- exp(-0.01 * as.matrix(dist(X))^2) 11 | diag(mat) <- 0 12 | mat[mat < quantile(mat, 0.73)] <- 0 13 | mat 14 | } 15 | 16 | expect_true(clustRviz:::is_connected_adj_mat(my_weight_func(presidential_speech))) 17 | expect_no_error(CARP(presidential_speech, weights = my_weight_func)) 18 | }) 19 | 20 | test_that("CARP works with user weight matrix", { 21 | mat <- exp(-0.01 * as.matrix(dist(presidential_speech))^2) 22 | diag(mat) <- 0 23 | mat[mat < quantile(mat, 0.73)] <- 0 24 | expect_no_error(CARP(presidential_speech, weights = mat)) 25 | }) 26 | 27 | test_that("CARP errors with negative weights", { 28 | mat <- -1 * exp(-0.01 * as.matrix(dist(presidential_speech))^2) 29 | diag(mat) <- 0 30 | mat[mat < quantile(mat, 0.73)] <- 0 31 | expect_error(CARP(presidential_speech, weights = mat)) 32 | }) 33 | 34 | test_that("CARP errors with unconnected graphs", { 35 | mat <- exp(-0.01 * as.matrix(dist(presidential_speech))^2) 36 | diag(mat) <- 0 37 | mat[mat < quantile(mat, 0.95)] <- 0 38 | expect_error(CARP(presidential_speech, weights = mat)) 39 | }) 40 | -------------------------------------------------------------------------------- /tests/testthat/test_cbass_error_handling.R: -------------------------------------------------------------------------------- 1 | context("CBASS() Error Handling") 2 | 3 | test_that("CBASS() fails with non-finite numerical input", { 4 | ps <- presidential_speech 5 | 6 | ps[1,1] <- Inf; expect_error(CBASS(ps)) 7 | }) 8 | 9 | test_that("CBASS() errors early with incorrect input", { 10 | # Pre-processing parameters must be boolean flags 11 | expect_error(CBASS(presidential_speech, X.center.global = NA)) 12 | expect_error(CBASS(presidential_speech, X.center.global = c(TRUE, FALSE))) 13 | 14 | # Check `exact` argument 15 | expect_error(CARP(presidential_speech, exact = "unknown")) 16 | expect_error(CARP(presidential_speech, exact = NA)) 17 | expect_error(CARP(presidential_speech, exact = c(TRUE, FALSE))) 18 | expect_error(CARP(presidential_speech, exact = 1L)) 19 | 20 | # Check `back_track` argument 21 | expect_error(CARP(presidential_speech, back_track = "unknown")) 22 | expect_error(CARP(presidential_speech, back_track = NA)) 23 | expect_error(CARP(presidential_speech, back_track = c(TRUE, FALSE))) 24 | expect_error(CARP(presidential_speech, back_track = 1L)) 25 | 26 | # Must use a t > 1 27 | expect_error(CBASS(presidential_speech, t = 1)) 28 | expect_error(CBASS(presidential_speech, t = 0)) 29 | expect_error(CBASS(presidential_speech, t = -3)) 30 | expect_error(CBASS(presidential_speech, t = NA)) 31 | expect_error(CBASS(presidential_speech, t = c(1.3, 1.2))) 32 | 33 | # Fail on unknown flags 34 | expect_error(CBASS(presidential_speech, flag="unknown"), regexp = "flag") 35 | expect_error(CBASS(presidential_speech, "value"), regexp = "Unknown") 36 | }) 37 | -------------------------------------------------------------------------------- /tests/testthat/test_cbass_misc.R: -------------------------------------------------------------------------------- 1 | context("Test Miscellaneous Features of CBASS") 2 | 3 | test_that("CBASS creates unique names if needed", { 4 | ## Give "Grover the Good" due credit 5 | pres2 <- rbind(presidential_speech, 6 | jitter(presidential_speech["Grover Cleveland", , drop=FALSE])) 7 | 8 | cbass_fit <- CBASS(pres2, X.center.global = FALSE) 9 | 10 | expect_equal(rownames(cbass_fit$X)[45], "Grover Cleveland_1") 11 | 12 | expect_equal(head(get_clustered_data(cbass_fit, percent = 0), -1), 13 | presidential_speech) 14 | }) 15 | 16 | test_that("CBASS supports factor labels", { 17 | X <- matrix(rnorm(9), 3, 3) 18 | 19 | cbass_fit <- CBASS(X, row_labels = factor(c("a", "b", "c")), 20 | col_labels = factor(c("d", "e", "f"))) 21 | 22 | expect_equal(rownames(cbass_fit$X), c("a", "b", "c")) 23 | expect_equal(colnames(cbass_fit$X), c("d", "e", "f")) 24 | }) 25 | 26 | test_that("CBASS stores mean of original data", { 27 | cbass_fit <- CBASS(presidential_speech, X.center.global = TRUE) 28 | expect_equal(mean(presidential_speech), cbass_fit$mean_adjust) 29 | 30 | cbass_fit <- CBASS(presidential_speech, X.center.global = FALSE) 31 | expect_equal(0, cbass_fit$mean_adjust) 32 | }) 33 | -------------------------------------------------------------------------------- /tests/testthat/test_cbass_missing_data.R: -------------------------------------------------------------------------------- 1 | context("CBASS() Missing Data Imputation") 2 | 3 | test_that("CBASS Imputes Missing Data", { 4 | test_dat <- matrix(c(1, 2, 3, 5 | 4, NA, 6, 6 | 7, 8, 9), byrow = TRUE, ncol = 3) 7 | 8 | cbass_fit <- CBASS(test_dat) 9 | 10 | # We should impute 5 for the missing value regardless of refit 11 | expect_equal(get_clustered_data(cbass_fit, k.row = 1)[2,2], 5) 12 | expect_equal(get_clustered_data(cbass_fit, k.row = 1, refit = FALSE)[2,2], 5) 13 | 14 | # If we want to refit, can't get an estimated centroid from all NA data 15 | expect_true(is.nan(get_clustered_data(cbass_fit, k.row = 3, refit = TRUE)[2,2])) 16 | # But we can impute if we don't refit (note that it's a pretty bad imputation...) 17 | expect_false(is.nan(get_clustered_data(cbass_fit, k.row = 3, refit = FALSE)[2,2])) 18 | 19 | # We should get an error when we can't succesfully impute 20 | expect_error(CBASS(presidential_speech * NA)) 21 | }) 22 | -------------------------------------------------------------------------------- /tests/testthat/test_cbass_print.R: -------------------------------------------------------------------------------- 1 | context("test print.CBASS") 2 | 3 | test_that("print.CBASS works for default settings", { 4 | cbass_fit <- CBASS(presidential_speech) 5 | cbass_print <- capture_print(cbass_fit) 6 | 7 | expect_str_contains(cbass_print, "Algorithm:[ ]+CBASS") 8 | expect_str_contains(cbass_print, "Number of Rows:[ ]+44") 9 | expect_str_contains(cbass_print, "Number of Columns:[ ]+75") 10 | expect_str_contains(cbass_print, "Global centering:[ ]+TRUE") 11 | expect_str_contains(cbass_print, "Source: Radial Basis Function Kernel Weights") 12 | expect_str_contains(cbass_print, "Distance Metric: Euclidean") 13 | expect_str_contains(cbass_print, stringr::fixed("Scale parameter (phi): 0.01 [Data-Driven]")) 14 | expect_str_contains(cbass_print, stringr::fixed("Sparsified: 4 Nearest Neighbors [Data-Driven]")) 15 | }) 16 | 17 | test_that("print.CBASS works for other algorithms", { 18 | expect_str_contains(capture_print(CBASS(presidential_speech, back_track = TRUE)), 19 | "Algorithm:[ ]+CBASS-VIZ") 20 | 21 | expect_str_contains(capture_print(CBASS(presidential_speech, back_track = TRUE, norm = 1)), 22 | "Algorithm:[ ]+CBASS-VIZ \\[Back-Tracking Fusion Search\\] \\[L1\\]") 23 | 24 | expect_str_contains(capture_print(CBASS(presidential_speech, back_track = FALSE, t = 1.5)), 25 | "Algorithm:[ ]+CBASS \\(t = 1.5\\)") 26 | 27 | expect_str_contains(capture_print(CBASS(presidential_speech, back_track = FALSE, t = 1.5, norm = 1)), 28 | "Algorithm:[ ]+CBASS \\(t = 1.5\\) \\[L1\\]") 29 | }) 30 | -------------------------------------------------------------------------------- /tests/testthat/test_cbass_smoke.R: -------------------------------------------------------------------------------- 1 | context("Smoke tests for CBASS") 2 | 3 | test_that("CBASS-VIZ [L2] works", { 4 | expect_no_error(cbass_fit <- CBASS(presidential_speech, back_track = TRUE)) 5 | expect_no_error(print(cbass_fit)) 6 | }) 7 | 8 | test_that("CBASS-VIZ [L1] works", { 9 | expect_no_error(cbass_fit <- CBASS(presidential_speech, back_track = TRUE, norm = 1)) 10 | expect_no_error(print(cbass_fit)) 11 | }) 12 | 13 | test_that("CBASS [L2] works", { 14 | expect_no_error(cbass_fit <- CBASS(presidential_speech, back_track = FALSE, t = 1.2)) 15 | expect_no_error(CBASS(presidential_speech, back_track = FALSE, t = 1.1)) 16 | expect_no_error(CBASS(presidential_speech, back_track = FALSE, t = 1.05)) 17 | expect_no_error(print(cbass_fit)) 18 | }) 19 | 20 | test_that("CBASS [L1] works", { 21 | expect_no_error(cbass_fit <- CBASS(presidential_speech, back_track = FALSE, t = 1.2, norm = 1)) 22 | expect_no_error(CBASS(presidential_speech, back_track = FALSE, t = 1.1, norm = 1)) 23 | expect_no_error(CBASS(presidential_speech, back_track = FALSE, t = 1.05, norm = 1)) 24 | expect_no_error(print(cbass_fit)) 25 | }) 26 | 27 | test_that("CBASS runs with X.center.global = FALSE & progress printing", { 28 | expect_no_error(CBASS(presidential_speech, X.center.global = FALSE, status = TRUE)) 29 | }) 30 | 31 | test_that("CBASS interactive dendrogram plot works", { 32 | cbass_fit <- CBASS(presidential_speech[1:5,1:5]) 33 | 34 | ## static row 35 | expect_no_error(print(plot(cbass_fit, type = "row.dendrogram", interactive = T, dynamic = F))) 36 | expect_no_error(print(plot(cbass_fit, type = "row.dendrogram", interactive = T, dynamic = F, k.row = 3))) 37 | expect_no_error(print(plot(cbass_fit, type = "row.dendrogram", interactive = T, dynamic = F, percent = 0.25))) 38 | 39 | ## static col 40 | expect_no_error(print(plot(cbass_fit, type = "col.dendrogram", interactive = T, dynamic = F))) 41 | expect_no_error(print(plot(cbass_fit, type = "col.dendrogram", interactive = T, dynamic = F, k.col = 3))) 42 | expect_no_error(print(plot(cbass_fit, type = "col.dendrogram", interactive = T, dynamic = F, percent = 0.25))) 43 | 44 | ## dynamic row 45 | expect_no_error(print(plot(cbass_fit, type = "row.dendrogram", interactive = T, dynamic = T))) 46 | 47 | ## dynamic col 48 | expect_no_error(print(plot(cbass_fit, type = "col.dendrogram", interactive = T, dynamic = T))) 49 | }) 50 | 51 | test_that("CBASS interactive path plot works", { 52 | cbass_fit <- CBASS(presidential_speech[1:5,1:5]) 53 | 54 | ## static row 55 | expect_no_error(print(plot(cbass_fit, type = "row.path", interactive = T, dynamic = F))) 56 | expect_no_error(print(plot(cbass_fit, type = "row.path", interactive = T, dynamic = F, k.row = 3))) 57 | expect_no_error(print(plot(cbass_fit, type = "row.path", interactive = T, dynamic = F, percent = 0.25))) 58 | 59 | ## static col 60 | expect_no_error(print(plot(cbass_fit, type = "col.path", interactive = T, dynamic = F))) 61 | expect_no_error(print(plot(cbass_fit, type = "col.path", interactive = T, dynamic = F, k.col = 3))) 62 | expect_no_error(print(plot(cbass_fit, type = "col.path", interactive = T, dynamic = F, percent = 0.25))) 63 | 64 | ## dynamic row 65 | expect_no_error(print(plot(cbass_fit, type = "row.path", interactive = T, dynamic = T))) 66 | 67 | ## dynamic col 68 | expect_no_error(print(plot(cbass_fit, type = "col.path", interactive = T, dynamic = T))) 69 | }) 70 | 71 | test_that("CBASS interactive dynamic heatmap works", { 72 | cbass_fit <- CBASS(presidential_speech[1:5,1:5]) 73 | 74 | ## dynamic 75 | expect_no_error(print(plot(cbass_fit, type = "heatmap", interactive = T, dynamic = T))) 76 | }) 77 | 78 | test_that("CBASS non-interactive dynamic dendrogram plot works", { 79 | cbass_fit <- CBASS(presidential_speech) 80 | 81 | ## row 82 | expect_no_error(print(plot(cbass_fit, type = "row.dendrogram", interactive = F, dynamic = T))) 83 | 84 | ## col 85 | expect_no_error(print(plot(cbass_fit, type = "col.dendrogram", interactive = F, dynamic = T))) 86 | }) 87 | 88 | test_that("CBASS non-interactive dynamic path plot works", { 89 | cbass_fit <- CBASS(presidential_speech) 90 | 91 | ## row 92 | expect_no_error(print(plot(cbass_fit, type = "row.path", interactive = F, dynamic = T))) 93 | 94 | ## col 95 | expect_no_error(print(plot(cbass_fit, type = "col.path", interactive = F, dynamic = T))) 96 | }) 97 | 98 | test_that("CBASS non-interactive heatmap plot works", { 99 | cbass_fit <- CBASS(presidential_speech) 100 | 101 | ## static 102 | expect_no_error(print(plot(cbass_fit, type = "heatmap", interactive = F, dynamic = F))) 103 | expect_no_error(print(plot(cbass_fit, type = "heatmap", interactive = F, dynamic = F, k.row = 3))) 104 | expect_no_error(print(plot(cbass_fit, type = "heatmap", interactive = F, dynamic = F, k.col = 3))) 105 | expect_no_error(print(plot(cbass_fit, type = "heatmap", interactive = F, dynamic = F, percent = 0.25))) 106 | 107 | ## dynamic 108 | expect_no_error(print(plot(cbass_fit, type = "heatmap", interactive = F, dynamic = T))) 109 | }) 110 | -------------------------------------------------------------------------------- /tests/testthat/test_cbass_sparsity_pattern.R: -------------------------------------------------------------------------------- 1 | context("Test CBASS Sparsity Invariants") 2 | 3 | test_that("CBASS begins with no fusions", { 4 | clustRviz_options(keep_debug_info = TRUE) 5 | 6 | cbass_fit <- CBASS(presidential_speech, back_track = FALSE) 7 | expect_zeros(cbass_fit$debug$row$v_zero_indices[, 1]) 8 | expect_zeros(cbass_fit$debug$col$v_zero_indices[, 1]) 9 | 10 | clustRviz_reset_options() 11 | }) 12 | 13 | test_that("CBASS ends with full fusions", { 14 | clustRviz_options(keep_debug_info = TRUE) 15 | 16 | cbass_fit <- CBASS(presidential_speech, back_track = FALSE) 17 | expect_ones(cbass_fit$debug$row$v_zero_indices[, NCOL(cbass_fit$debug$row$v_zero_indices)]) 18 | expect_ones(cbass_fit$debug$col$v_zero_indices[, NCOL(cbass_fit$debug$col$v_zero_indices)]) 19 | 20 | clustRviz_reset_options() 21 | }) 22 | 23 | test_that("CBASS-VIZ begins with no fusions", { 24 | clustRviz_options(keep_debug_info = TRUE) 25 | 26 | cbass_fit <- CBASS(presidential_speech, back_track = TRUE) 27 | expect_zeros(cbass_fit$debug$row$v_zero_indices[, 1]) 28 | expect_zeros(cbass_fit$debug$col$v_zero_indices[, 1]) 29 | 30 | clustRviz_reset_options() 31 | }) 32 | 33 | test_that("CBASS-VIZ ends with full fusions", { 34 | clustRviz_options(keep_debug_info = TRUE) 35 | 36 | cbass_fit <- CBASS(presidential_speech, back_track = TRUE) 37 | expect_ones(cbass_fit$debug$row$v_zero_indices[, NCOL(cbass_fit$debug$row$v_zero_indices)]) 38 | expect_ones(cbass_fit$debug$col$v_zero_indices[, NCOL(cbass_fit$debug$col$v_zero_indices)]) 39 | 40 | clustRviz_reset_options() 41 | }) 42 | 43 | test_that("CBASS begins with no fusions (uniform weights)", { 44 | clustRviz_options(keep_debug_info = TRUE) 45 | 46 | uniform_weights <- function(X) matrix(1, nrow = NROW(X), ncol = NROW(X)) 47 | cbass_fit <- CBASS(presidential_speech, back_track = FALSE, row_weights = uniform_weights, col_weights = uniform_weights) 48 | expect_zeros(cbass_fit$debug$row$v_zero_indices[, 1]) 49 | expect_zeros(cbass_fit$debug$col$v_zero_indices[, 1]) 50 | 51 | clustRviz_reset_options() 52 | }) 53 | 54 | test_that("CBASS ends with full fusions (uniform weights)", { 55 | clustRviz_options(keep_debug_info = TRUE) 56 | 57 | uniform_weights <- function(X) matrix(1, nrow = NROW(X), ncol = NROW(X)) 58 | cbass_fit <- CBASS(presidential_speech, back_track = FALSE, row_weights = uniform_weights, col_weights = uniform_weights) 59 | expect_ones(cbass_fit$debug$row$v_zero_indices[, NCOL(cbass_fit$debug$row$v_zero_indices)]) 60 | expect_ones(cbass_fit$debug$col$v_zero_indices[, NCOL(cbass_fit$debug$col$v_zero_indices)]) 61 | 62 | clustRviz_reset_options() 63 | }) 64 | 65 | test_that("CBASS-VIZ begins with no fusions (uniform weights)", { 66 | clustRviz_options(keep_debug_info = TRUE) 67 | 68 | uniform_weights <- function(X) matrix(1, nrow = NROW(X), ncol = NROW(X)) 69 | cbass_fit <- CBASS(presidential_speech, back_track = TRUE, row_weights = uniform_weights, col_weights = uniform_weights) 70 | expect_zeros(cbass_fit$debug$row$v_zero_indices[, 1]) 71 | expect_zeros(cbass_fit$debug$col$v_zero_indices[, 1]) 72 | 73 | clustRviz_reset_options() 74 | }) 75 | 76 | test_that("CBASS-VIZ ends with full fusions (uniform weights)", { 77 | clustRviz_options(keep_debug_info = TRUE) 78 | 79 | uniform_weights <- function(X) matrix(1, nrow = NROW(X), ncol = NROW(X)) 80 | cbass_fit <- CBASS(presidential_speech, back_track = TRUE, row_weights = uniform_weights, col_weights = uniform_weights) 81 | expect_ones(cbass_fit$debug$row$v_zero_indices[, NCOL(cbass_fit$debug$row$v_zero_indices)]) 82 | expect_ones(cbass_fit$debug$col$v_zero_indices[, NCOL(cbass_fit$debug$col$v_zero_indices)]) 83 | 84 | clustRviz_reset_options() 85 | }) 86 | -------------------------------------------------------------------------------- /tests/testthat/test_cbass_weights.R: -------------------------------------------------------------------------------- 1 | context("Test CBASS Weight Handling") 2 | 3 | ## Tests for row weights 4 | test_that("CBASS works with user weight function for row weights", { 5 | # FIXME: This should work - see GitHub #8 6 | ## uniform_weight_func <- function(X) matrix(1, nrow=NROW(X), ncol=NROW(X)) 7 | ## CBASS(presidential_speech, row_weights = uniform_weight_func) 8 | 9 | # By manual testing, this is a good weight function / matrix 10 | my_weight_func <- function(X) { 11 | mat <- exp(-0.01 * as.matrix(dist(X))^2) 12 | diag(mat) <- 0 13 | mat[mat < quantile(mat, 0.73)] <- 0 14 | mat 15 | } 16 | 17 | expect_true(clustRviz:::is_connected_adj_mat(my_weight_func(presidential_speech))) 18 | expect_no_error(CBASS(presidential_speech, row_weights = my_weight_func)) 19 | }) 20 | 21 | test_that("CBASS works with user weight matrix for row weights", { 22 | mat <- exp(-0.01 * as.matrix(dist(presidential_speech))^2) 23 | diag(mat) <- 0 24 | mat[mat < quantile(mat, 0.73)] <- 0 25 | expect_no_error(CBASS(presidential_speech, row_weights = mat)) 26 | }) 27 | 28 | test_that("CBASS errors with negative weights for row weights", { 29 | mat <- -1 * exp(-0.01 * as.matrix(dist(presidential_speech))^2) 30 | diag(mat) <- 0 31 | mat[mat < quantile(mat, 0.73)] <- 0 32 | expect_error(CBASS(presidential_speech, row_weights = mat)) 33 | }) 34 | 35 | test_that("CBASS errors with unconnected graphs for row weights", { 36 | mat <- exp(-0.01 * as.matrix(dist(presidential_speech))^2) 37 | diag(mat) <- 0 38 | mat[mat < quantile(mat, 0.95)] <- 0 39 | expect_error(CBASS(presidential_speech, row_weights = mat)) 40 | }) 41 | 42 | ## Tests for column / feature weights 43 | test_that("CBASS works with user weight function for column weights", { 44 | # FIXME: This should work - see GitHub #8 45 | ## uniform_weight_func <- function(X) matrix(1, nrow=NROW(X), ncol=NROW(X)) 46 | ## CBASS(presidential_speech, col_weights = uniform_weight_func) 47 | 48 | # By manual testing, this is a good weight function / matrix 49 | my_weight_func <- function(X) { 50 | mat <- exp(-0.01 * as.matrix(dist(X))^2) 51 | diag(mat) <- 0 52 | mat[mat < quantile(mat, 0.73)] <- 0 53 | mat 54 | } 55 | 56 | expect_true(clustRviz:::is_connected_adj_mat(my_weight_func(t(presidential_speech)))) 57 | expect_no_error(CBASS(presidential_speech, col_weights = my_weight_func)) 58 | }) 59 | 60 | test_that("CBASS works with user weight matrix for column weights", { 61 | mat <- exp(-0.01 * as.matrix(dist(t(presidential_speech)))^2) 62 | diag(mat) <- 0 63 | mat[mat < quantile(mat, 0.73)] <- 0 64 | expect_no_error(CBASS(presidential_speech, col_weights = mat)) 65 | }) 66 | 67 | test_that("CBASS errors with negative weights for column weights", { 68 | mat <- -1 * exp(-0.01 * as.matrix(dist(t(presidential_speech)))^2) 69 | diag(mat) <- 0 70 | mat[mat < quantile(mat, 0.73)] <- 0 71 | expect_error(CBASS(presidential_speech, col_weights = mat)) 72 | }) 73 | 74 | test_that("CBASS errors with unconnected graphs for column weights", { 75 | mat <- exp(-0.01 * as.matrix(dist(t(presidential_speech)))^2) 76 | diag(mat) <- 0 77 | mat[mat < quantile(mat, 0.95)] <- 0 78 | expect_error(CBASS(presidential_speech, col_weights = mat)) 79 | }) 80 | -------------------------------------------------------------------------------- /tests/testthat/test_logging.R: -------------------------------------------------------------------------------- 1 | context("Logging Tests") 2 | 3 | test_that("Logging controls work", { 4 | expect_error(clustRviz_logger_level("BAD LEVEL")) 5 | 6 | clustRviz_logger_level("INFO") 7 | expect_equal("INFO", clustRviz_logger_level()) 8 | 9 | clustRviz_logger_level("MESSAGE") 10 | }) 11 | 12 | test_that("INFO and DEBUG message print as expected", { 13 | clustRviz_logger_level("MESSAGE") 14 | 15 | expect_silent(clustRviz:::crv_info("A message")) 16 | expect_silent(clustRviz:::crv_debug("A message")) 17 | 18 | clustRviz_logger_level("DEBUG") 19 | 20 | expect_output(clustRviz:::crv_info("A message"), "[INFO]") 21 | expect_output(clustRviz:::crv_info("A message"), "A message") 22 | expect_output(clustRviz:::crv_debug("The message"), "[DEBUG]") 23 | expect_output(clustRviz:::crv_debug("The message"), "The message") 24 | 25 | clustRviz_logger_level("MESSAGE") 26 | }) 27 | 28 | test_that("Supressing messages works", { 29 | # At INFO level, everything is shown in R 30 | clustRviz_logger_level("INFO") 31 | 32 | expect_error(clustRviz:::crv_error("ERROR")) 33 | expect_warning(clustRviz:::crv_warning("WARNING")) 34 | expect_message(clustRviz:::crv_message("MESSAGE")) 35 | 36 | # At MESSAGE level, everything is shown in R 37 | clustRviz_logger_level("MESSAGE") 38 | 39 | expect_error(clustRviz:::crv_error("ERROR")) 40 | expect_warning(clustRviz:::crv_warning("WARNING")) 41 | expect_message(clustRviz:::crv_message("MESSAGE")) 42 | 43 | # At WARNING level, we don't get a message 44 | clustRviz_logger_level("WARNING") 45 | 46 | expect_error(clustRviz:::crv_error("ERROR")) 47 | expect_warning(clustRviz:::crv_warning("WARNING")) 48 | expect_no_message(clustRviz:::crv_message("MESSAGE")) 49 | 50 | # At ERROR level, we don't get a message or warning 51 | clustRviz_logger_level("ERROR") 52 | 53 | expect_error(clustRviz:::crv_error("ERROR")) 54 | expect_no_warning(clustRviz:::crv_warning("WARNING")) 55 | expect_no_message(clustRviz:::crv_message("MESSAGE")) 56 | 57 | clustRviz_logger_level("MESSAGE") 58 | }) 59 | 60 | test_that("No extra newlines", { 61 | clustRviz_logger_level("DEBUG") 62 | 63 | e <- tryCatch(clustRviz:::crv_error("MY ERROR"), error=identity) 64 | expect_equal(str_count(e$message, "\n"), 1) 65 | 66 | e <- tryCatch(clustRviz:::crv_warning("MY WARNING"), warning=identity) 67 | expect_equal(str_count(e$message, "\n"), 1) 68 | 69 | e <- tryCatch(clustRviz:::crv_message("MY MESSAGE"), message=identity) 70 | expect_equal(str_count(e$message, "\n"), 1) 71 | 72 | e <- tryCatch(clustRviz:::crv_error("MY ERROR\nON TWO LINES"), error=identity) 73 | expect_equal(str_count(e$message, "\n"), 2) 74 | 75 | clustRviz_logger_level("MESSAGE") 76 | }) 77 | 78 | test_that("Function capture works at R level", { 79 | clustRviz_logger_level("MESSAGE") 80 | 81 | f <- function(x){clustRviz:::crv_error("ERROR MESSAGE")} 82 | 83 | e <- tryCatch(f(), error=identity) 84 | 85 | expect_str_contains(e$message, "ERROR MESSAGE") 86 | expect_str_contains(e$message, "(Called from f)") 87 | expect_true(is.null(e$call)) 88 | expect_true(is.null(e$cppstack)) 89 | 90 | f <- function(x){clustRviz:::crv_error("ERROR MESSAGE", call=FALSE)} 91 | e <- tryCatch(f(), error=identity) 92 | 93 | expect_false(grepl("\\(Called from f\\)", e$message)) 94 | 95 | f <- function(x){clustRviz:::crv_error("ERROR MESSAGE", call="my func")} 96 | e <- tryCatch(f(), error=identity) 97 | 98 | expect_true(grepl("\\(Called from my func\\)", e$message)) 99 | 100 | f <- function(x){clustRviz:::crv_warning("WARNING MESSAGE", call=FALSE)} 101 | e <- tryCatch(f(), warning=identity) 102 | 103 | expect_false(grepl("\\(Called from f\\)", e$message)) 104 | 105 | f <- function(x){clustRviz:::crv_warning("WARNING MESSAGE", call="my func")} 106 | e <- tryCatch(f(), warning=identity) 107 | 108 | expect_true(grepl("\\(Called from my func\\)", e$message)) 109 | }) 110 | -------------------------------------------------------------------------------- /tests/testthat/test_matrix_prox.R: -------------------------------------------------------------------------------- 1 | context("Test C++ matrix prox") 2 | 3 | test_that("L1 matrix prox works", { 4 | set.seed(125) 5 | n <- 25 6 | p <- 50 7 | 8 | X <- matrix(rnorm(n * p), nrow = n, ncol = p) 9 | 10 | MatrixRowProx <- clustRviz:::MatrixRowProx 11 | weights <- rep(1, n) 12 | 13 | expect_equal(X, MatrixRowProx(X, lambda = 0, weights = weights, l1 = TRUE)) 14 | expect_equal(abs(X) + 4, MatrixRowProx(abs(X) + 5, lambda = 1, weights = weights, l1 = TRUE)) 15 | expect_equal(-abs(X) - 4, MatrixRowProx(-abs(X) - 5, lambda = 1, weights = weights, l1 = TRUE)) 16 | 17 | ## Now we check that weights work 18 | X <- matrix(1:25, nrow = 25, ncol = 1) 19 | weights <- 1:25 20 | expect_equal(matrix(0, nrow = 25, ncol = 1), 21 | MatrixRowProx(X, lambda = 1, weights = weights, l1 = TRUE)) 22 | 23 | X <- matrix(5, nrow = 6, ncol = 1) 24 | weights <- seq(0, 5) 25 | expect_equal(matrix(5 - weights, nrow = 6, ncol = 1), 26 | MatrixRowProx(X, lambda = 1, weights = weights, l1 = TRUE)) 27 | 28 | #Now check matrix_col_prox against row prox 29 | MatrixColProx <- clustRviz:::MatrixColProx 30 | expect_equal(t(MatrixColProx(t(X), lambda = 1, weights = weights, l1 = TRUE)), 31 | MatrixRowProx(X, lambda = 1, weights = weights, l1 = TRUE)) 32 | }) 33 | 34 | test_that("L2 prox works", { 35 | set.seed(125) 36 | MatrixRowProx <- clustRviz:::MatrixRowProx 37 | num_unique_cols <- clustRviz:::num_unique_cols 38 | n <- 25 39 | 40 | ## If X has a single column, same as L1 prox 41 | X <- matrix(rnorm(n, sd = 3), ncol = 1) 42 | weights <- rexp(n) 43 | 44 | expect_equal(MatrixRowProx(X, lambda = 1, weights = weights, l1 = TRUE), 45 | MatrixRowProx(X, lambda = 1, weights = weights, l1 = FALSE)) 46 | 47 | p <- 5 48 | X <- matrix(1, nrow = n, ncol = p) 49 | weights <- seq(0, 5, length.out = 25) 50 | 51 | expect_equal(1, num_unique_cols(MatrixRowProx(X, lambda = 1, weights = weights, l1 = FALSE))) 52 | 53 | y <- matrix(c(3, 4), nrow = 1) 54 | 55 | expect_equal(MatrixRowProx(y, 1, 1, l1 = FALSE), y * (1 - 1/5)) 56 | expect_equal(MatrixRowProx(y, 1, 3, l1 = FALSE), y * (1 - 3/5)) 57 | expect_equal(MatrixRowProx(y, 2, 1, l1 = FALSE), y * (1 - 2/5)) 58 | expect_equal(MatrixRowProx(y, 2, 3, l1 = FALSE), y * 0) 59 | 60 | y <- -1 * y 61 | expect_equal(MatrixRowProx(y, 1, 1, l1 = FALSE), y * (1 - 1/5)) 62 | expect_equal(MatrixRowProx(y, 1, 3, l1 = FALSE), y * (1 - 3/5)) 63 | expect_equal(MatrixRowProx(y, 2, 1, l1 = FALSE), y * (1 - 2/5)) 64 | expect_equal(MatrixRowProx(y, 2, 3, l1 = FALSE), y * 0) 65 | 66 | #Now check matrix_col_prox against row prox 67 | MatrixColProx <- clustRviz:::MatrixColProx 68 | expect_equal(t(MatrixColProx(t(X), lambda = 1, weights = weights, l1 = FALSE)), 69 | MatrixRowProx(X, lambda = 1, weights = weights, l1 = FALSE)) 70 | expect_equal(t(MatrixColProx(t(y), lambda = 1, weights = weights, l1 = TRUE)), 71 | MatrixRowProx(y, lambda = 1, weights = weights, l1 = TRUE)) 72 | }) 73 | -------------------------------------------------------------------------------- /tests/testthat/test_options.R: -------------------------------------------------------------------------------- 1 | context("Options Handling") 2 | 3 | test_that("Test option error handling", { 4 | ## ADMM relaxation parameter 5 | expect_error(clustRviz_options(rho = 0)) 6 | expect_error(clustRviz_options(rho = -1)) 7 | expect_error(clustRviz_options(rho = "a")) 8 | expect_error(clustRviz_options(rho = NA)) 9 | expect_error(clustRviz_options(rho = c(1, 2))) 10 | 11 | ## Initial (burn-in phase) regularization parameter 12 | expect_error(clustRviz_options(epsilon = 0)) 13 | expect_error(clustRviz_options(epsilon = -1)) 14 | expect_error(clustRviz_options(epsilon = "a")) 15 | expect_error(clustRviz_options(epsilon = NA)) 16 | expect_error(clustRviz_options(epsilon = c(1, 2))) 17 | 18 | ## Back-tracking parameters 19 | expect_error(clustRviz_options(viz_initial_step = 1)) 20 | expect_error(clustRviz_options(viz_initial_step = 0.5)) 21 | expect_error(clustRviz_options(viz_initial_step = 0)) 22 | expect_error(clustRviz_options(viz_initial_step = -1)) 23 | expect_error(clustRviz_options(viz_initial_step = -1.5)) 24 | expect_error(clustRviz_options(viz_initial_step = "a")) 25 | expect_error(clustRviz_options(viz_initial_step = NA)) 26 | expect_error(clustRviz_options(viz_initial_step = c(1.5, 2))) 27 | 28 | expect_error(clustRviz_options(viz_small_step = 1)) 29 | expect_error(clustRviz_options(viz_small_step = 0.5)) 30 | expect_error(clustRviz_options(viz_small_step = 0)) 31 | expect_error(clustRviz_options(viz_small_step = -1)) 32 | expect_error(clustRviz_options(viz_small_step = -1.5)) 33 | expect_error(clustRviz_options(viz_small_step = "a")) 34 | expect_error(clustRviz_options(viz_small_step = NA)) 35 | expect_error(clustRviz_options(viz_small_step = c(1.5, 2))) 36 | 37 | expect_error(clustRviz_options(viz_max_inner_iter = 0)) 38 | expect_error(clustRviz_options(viz_max_inner_iter = -5)) 39 | expect_error(clustRviz_options(viz_max_inner_iter = 35.5)) 40 | expect_error(clustRviz_options(viz_max_inner_iter = "a")) 41 | expect_error(clustRviz_options(viz_max_inner_iter = NA)) 42 | expect_error(clustRviz_options(viz_max_inner_iter = c(500, 600))) 43 | 44 | # Stopping and storage parameters 45 | expect_error(clustRviz_options(max_iter = 0)) 46 | expect_error(clustRviz_options(max_iter = -5)) 47 | expect_error(clustRviz_options(max_iter = 35.5)) 48 | expect_error(clustRviz_options(max_iter = "a")) 49 | expect_error(clustRviz_options(max_iter = NA)) 50 | expect_error(clustRviz_options(max_iter = c(500, 600))) 51 | 52 | expect_error(clustRviz_options(burn_in = 0)) 53 | expect_error(clustRviz_options(burn_in = -5)) 54 | expect_error(clustRviz_options(burn_in = 35.5)) 55 | expect_error(clustRviz_options(burn_in = "a")) 56 | expect_error(clustRviz_options(burn_in = NA)) 57 | expect_error(clustRviz_options(burn_in = c(500, 600))) 58 | 59 | expect_error(clustRviz_options(keep = 0)) 60 | expect_error(clustRviz_options(keep = -5)) 61 | expect_error(clustRviz_options(keep = 35.5)) 62 | expect_error(clustRviz_options(keep = "a")) 63 | expect_error(clustRviz_options(keep = NA)) 64 | expect_error(clustRviz_options(keep = c(500, 600))) 65 | 66 | expect_error(clustRviz_options(keep_debug_info = 0)) 67 | expect_error(clustRviz_options(keep_debug_info = 3)) 68 | expect_error(clustRviz_options(keep_debug_info = -5)) 69 | expect_error(clustRviz_options(keep_debug_info = 35.5)) 70 | expect_error(clustRviz_options(keep_debug_info = "a")) 71 | expect_error(clustRviz_options(keep_debug_info = NA)) 72 | expect_error(clustRviz_options(keep_debug_info = c(500, 600))) 73 | }) 74 | 75 | test_that("clustRviz_reset_options works", { 76 | base_opts <- clustRviz_options() 77 | 78 | clustRviz_options(rho = 2, keep = 5, max_iter = 500, burn_in = 20) 79 | expect_false(isTRUE(all.equal(base_opts, clustRviz_options()))) 80 | 81 | clustRviz_reset_options() 82 | expect_equal(base_opts, clustRviz_options()) 83 | }) 84 | 85 | test_that("Setting clustRviz options works", { 86 | clustRviz_options(rho = 2) 87 | expect_equal(clustRviz:::.clustRvizOptionsEnv[["rho"]], 2) 88 | 89 | clustRviz_options(viz_initial_step = 2) 90 | expect_equal(clustRviz:::.clustRvizOptionsEnv[["viz_initial_step"]], 2) 91 | 92 | clustRviz_options(viz_max_inner_iter = 20) 93 | expect_equal(clustRviz:::.clustRvizOptionsEnv[["viz_max_inner_iter"]], 20) 94 | 95 | clustRviz_options(viz_small_step = 1.2) 96 | expect_equal(clustRviz:::.clustRvizOptionsEnv[["viz_small_step"]], 1.2) 97 | 98 | clustRviz_reset_options() 99 | }) 100 | 101 | test_that("Options warnings work", { 102 | expect_warning(clustRviz_options(viz_initial_step = 1.2, viz_small_step = 1.2)) 103 | expect_warning(clustRviz_options(viz_initial_step = 1.2, viz_small_step = 2)) 104 | 105 | expect_warning(clustRviz_options(max_iter = 100, burn_in = 100)) 106 | expect_warning(clustRviz_options(max_iter = 100, burn_in = 150)) 107 | expect_warning(clustRviz_options(max_iter = 1000, max_inner_iter = 500)) 108 | 109 | clustRviz_reset_options() 110 | }) 111 | -------------------------------------------------------------------------------- /tests/testthat/test_solver_biclustering.R: -------------------------------------------------------------------------------- 1 | context("Test convex_biclustering() Solver") 2 | 3 | test_that("convex_biclustering() errors early with incorrect input", { 4 | # Pre-processing parameters must be boolean flags 5 | expect_error(convex_biclustering(presidential_speech, X.center.global = NA, lambda_grid = 1:5)) 6 | expect_error(convex_biclustering(presidential_speech, X.center.global = c(TRUE, FALSE), lambda_grid = 1:5)) 7 | 8 | # Fail on unknown flags 9 | expect_error(convex_biclustering(presidential_speech, flag="unknown", lambda_grid = 1:5), regexp = "flag") 10 | expect_error(convex_biclustering(presidential_speech, "value", lambda_grid = 1:5), regexp = "Unknown") 11 | 12 | ps <- presidential_speech 13 | ps[1,1] <- Inf; expect_error(convex_biclustering(ps, lambda_grid = 1:5)) 14 | 15 | ## lambda_grid must be supplied, strictly positive, and ordered (only gets a warning) 16 | expect_error(convex_biclustering(presidential_speech)) 17 | expect_error(convex_biclustering(presidential_speech), lambda_grid = numeric()) 18 | expect_error(convex_biclustering(presidential_speech, lambda_grid = 0)) 19 | expect_error(convex_biclustering(presidential_speech, lambda_grid = c(0, 3))) 20 | expect_warning(convex_biclustering(presidential_speech, lambda_grid = c(3, 2, 1))) 21 | }) 22 | 23 | test_that("convex_biclustering() matches cvxclustr", { 24 | skip_on_cran() 25 | skip_if_not_installed("cvxclustr") 26 | skip_if_not_installed("cvxbiclustr") 27 | skip_if_not_installed("Matrix") 28 | data(mammals, package = "cvxclustr") 29 | library(Matrix) 30 | 31 | ## Example modified from help pages of cvxclustr 32 | X <- as.matrix(mammals[,-1]) 33 | biclust_fit <- convex_biclustering(X, X.center.global = FALSE, lambda_grid = seq(4, 40, length.out = 10)) 34 | 35 | ## Calculate matching `cvxbiclustr` solution 36 | 37 | ## Match CBASS() selected edges and weights 38 | D_row <- biclust_fit$D_row 39 | D_col <- t(biclust_fit$D_col) 40 | 41 | w_row <- biclust_fit$row_weights; w_row <- clustRviz:::weight_mat_to_vec(w_row); w_row <- w_row[w_row != 0] 42 | w_col <- biclust_fit$col_weights; w_col <- clustRviz:::weight_mat_to_vec(w_col); w_col <- w_col[w_col != 0] 43 | 44 | w_row <- w_row / sum(w_row); w_row <- w_row / sqrt(NROW(X)) 45 | w_col <- w_col / sum(w_col); w_col <- w_col / sqrt(NCOL(X)) 46 | 47 | ## Perform clustering 48 | capture.output(cobra_fit <- cvxbiclustr::cobra(X, 49 | E_row = Matrix(D_row, sparse = TRUE), 50 | E_col = Matrix(D_col, sparse = TRUE), 51 | w_row = w_row, 52 | w_col = w_col, 53 | gamma = biclust_fit$lambda_grid)) 54 | 55 | obj <- function(U, lambda, l1 = FALSE){ 56 | DU <- D_row %*% U; DTU <- D_col %*% t(U) 57 | if(l1){ 58 | sum((X - U)^2)/2 + lambda * sum(w_row * abs(DU)) + lambda * sum(w_col * abs(t(DTU))) 59 | } else { 60 | sum((X - U)^2)/2 + lambda * sum(w_row * sqrt(rowSums(DU^2))) + lambda * sum(w_col * sqrt(rowSums(DTU^2))) 61 | } 62 | } 63 | 64 | ## Check that the objectives are of (roughly) equal quality... 65 | for(ix in seq_along(biclust_fit$lambda_grid)){ 66 | lambda <- biclust_fit$lambda_grid[ix] 67 | 68 | my_U <- biclust_fit$U[,,ix] 69 | ec_U <- cobra_fit$U[[ix]] 70 | 71 | expect_true(obj(my_U, lambda) <= 1.001 * obj(ec_U, lambda)) 72 | } 73 | }) 74 | -------------------------------------------------------------------------------- /tests/testthat/test_solver_clustering.R: -------------------------------------------------------------------------------- 1 | context("Test convex_clustering() Solver") 2 | 3 | test_that("convex_clustering() errors early with incorrect input", { 4 | # Pre-processing parameters must be boolean flags 5 | expect_error(convex_clustering(presidential_speech, X.center = NA, lambda_grid = 1:5)) 6 | expect_error(convex_clustering(presidential_speech, X.center = c(TRUE, FALSE), lambda_grid = 1:5)) 7 | 8 | expect_error(convex_clustering(presidential_speech, X.scale = NA, lambda_grid = 1:5)) 9 | expect_error(convex_clustering(presidential_speech, X.scale = c(TRUE, FALSE), lambda_grid = 1:5)) 10 | 11 | # Fail on unknown flags 12 | expect_error(convex_clustering(presidential_speech, flag="unknown", lambda_grid = 1:5), regexp = "flag") 13 | expect_error(convex_clustering(presidential_speech, "value", lambda_grid = 1:5), regexp = "Unknown") 14 | 15 | ps <- presidential_speech 16 | ps[1,1] <- Inf; expect_error(convex_clustering(ps, lambda_grid = 1:5)) 17 | 18 | ## lambda_grid must be supplied, strictly positive, and ordered (only gets a warning) 19 | expect_error(convex_clustering(presidential_speech)) 20 | expect_error(convex_clustering(presidential_speech), lambda_grid = numeric()) 21 | expect_error(convex_clustering(presidential_speech, lambda_grid = 0)) 22 | expect_error(convex_clustering(presidential_speech, lambda_grid = c(0, 3))) 23 | expect_warning(convex_clustering(presidential_speech, lambda_grid = c(3, 2, 1))) 24 | }) 25 | 26 | test_that("convex_clustering() matches cvxclustr", { 27 | skip_on_cran() 28 | skip_if_not_installed("cvxclustr") 29 | data(mammals, package = "cvxclustr") 30 | 31 | ## Example modified from help pages of cvxclustr 32 | X <- as.matrix(mammals[,-1]) 33 | clust_fit <- convex_clustering(X, X.center = FALSE, X.scale = FALSE, 34 | lambda_grid = seq(4, 40, length.out = 10)) 35 | 36 | ## Calculate matching `cvxclustr` solution 37 | Xt <- t(X) 38 | 39 | ## Match CARP() selected weights 40 | w <- clustRviz:::weight_mat_to_vec(clust_fit$weights) 41 | 42 | ## Perform clustering 43 | suppressWarnings(cvxclust_fit <- cvxclustr::cvxclust(Xt, w, tol = 1e-7, 44 | gamma = clust_fit$lambda_grid)) 45 | 46 | ## cvxclustr seems to use a pretty loose stopping tolerance, so this is a loose check... 47 | for(i in seq_along(clust_fit$lambda_grid)){ 48 | expect_equal(clust_fit$U[,,i], t(cvxclust_fit$U[[i]]), 49 | check.attributes = FALSE, tolerance = 1e-4) 50 | } 51 | }) 52 | -------------------------------------------------------------------------------- /tests/testthat/test_tensor_projection.R: -------------------------------------------------------------------------------- 1 | context("Test tensor PCA projection") 2 | 3 | test_that("tensor_projection works", { 4 | tensor_projection <- clustRviz:::tensor_projection 5 | n <- 50 6 | p <- 30 7 | q <- 100 8 | 9 | X <- array(rnorm(n * p * q), dim = c(n, p, q)) 10 | 11 | k <- 10 12 | P <- matrix(rnorm(p * k), nrow = p, ncol = k) 13 | 14 | XP <- array(NA, dim = c(n, k, q)) 15 | for(i in seq(1, q)){ 16 | XP[,,i] <- X[,,i] %*% P 17 | } 18 | 19 | expect_equal(tensor_projection(X, P), XP) 20 | }) 21 | -------------------------------------------------------------------------------- /tests/testthat/test_utils.R: -------------------------------------------------------------------------------- 1 | context("Test Utility Functions") 2 | 3 | capitalize_string <- function(x){ 4 | x <- gsub("_", " ", x) 5 | vapply(strsplit(x, " "), 6 | function(x) paste(paste0(toupper(substring(x, 1, 1)), substring(x, 2)), collapse = " "), 7 | character(1)) 8 | } 9 | 10 | test_that("Validators work", { 11 | is_logical_scalar <- clustRviz:::is_logical_scalar 12 | is_numeric_scalar <- clustRviz:::is_numeric_scalar 13 | is_integer_scalar <- clustRviz:::is_integer_scalar 14 | is_percent_scalar <- clustRviz:::is_percent_scalar 15 | is_positive_scalar <- clustRviz:::is_positive_scalar 16 | is_positive_integer_scalar <- clustRviz:::is_positive_integer_scalar 17 | is_character_scalar <- clustRviz:::is_character_scalar 18 | is_nonempty_character_scalar <- clustRviz:::is_nonempty_character_scalar 19 | 20 | expect_true(is_logical_scalar(TRUE)) 21 | expect_true(is_logical_scalar(FALSE)) 22 | expect_false(is_logical_scalar(NA)) 23 | expect_false(is_logical_scalar(0)) 24 | expect_false(is_logical_scalar("a")) 25 | expect_false(is_logical_scalar(c(TRUE, TRUE))) 26 | 27 | expect_true(is_numeric_scalar(3)) 28 | expect_true(is_numeric_scalar(3.5)) 29 | expect_true(is_numeric_scalar(0)) 30 | expect_true(is_numeric_scalar(-4)) 31 | expect_false(is_numeric_scalar(NA)) 32 | expect_false(is_numeric_scalar(c(2, 5))) 33 | expect_false(is_numeric_scalar("a")) 34 | 35 | expect_true(is_integer_scalar(3)) 36 | expect_false(is_integer_scalar(3.5)) 37 | expect_true(is_integer_scalar(0)) 38 | expect_true(is_integer_scalar(-4)) 39 | expect_false(is_integer_scalar(NA)) 40 | expect_false(is_integer_scalar(c(2, 5))) 41 | expect_false(is_integer_scalar("a")) 42 | 43 | expect_true(is_percent_scalar(0.3)) 44 | expect_true(is_percent_scalar(1)) 45 | expect_true(is_percent_scalar(0)) 46 | expect_false(is_percent_scalar(1.5)) 47 | expect_false(is_percent_scalar(-1.5)) 48 | expect_false(is_percent_scalar(NA)) 49 | expect_false(is_percent_scalar(c(0.2, 0.5))) 50 | expect_false(is_percent_scalar("a")) 51 | 52 | expect_true(is_positive_scalar(0.3)) 53 | expect_true(is_positive_scalar(1)) 54 | expect_false(is_positive_scalar(0)) 55 | expect_true(is_positive_scalar(1.5)) 56 | expect_false(is_positive_scalar(-1.5)) 57 | expect_false(is_positive_scalar(NA)) 58 | expect_false(is_positive_scalar(c(0.2, 0.5))) 59 | expect_false(is_positive_scalar("a")) 60 | 61 | expect_true(is_positive_integer_scalar(3)) 62 | expect_false(is_positive_integer_scalar(3.5)) 63 | expect_false(is_positive_integer_scalar(0)) 64 | expect_false(is_positive_integer_scalar(-4)) 65 | expect_false(is_positive_integer_scalar(NA)) 66 | expect_false(is_positive_integer_scalar(c(2, 5))) 67 | expect_false(is_positive_integer_scalar("a")) 68 | 69 | expect_false(is_character_scalar(3)) 70 | expect_false(is_character_scalar(3.5)) 71 | expect_false(is_character_scalar(0)) 72 | expect_false(is_character_scalar(-4)) 73 | expect_false(is_character_scalar(NA)) 74 | expect_false(is_character_scalar(c(2, 5))) 75 | expect_true(is_character_scalar("")) 76 | expect_true(is_character_scalar("a")) 77 | 78 | expect_false(is_nonempty_character_scalar(3)) 79 | expect_false(is_nonempty_character_scalar(3.5)) 80 | expect_false(is_nonempty_character_scalar(0)) 81 | expect_false(is_nonempty_character_scalar(-4)) 82 | expect_false(is_nonempty_character_scalar(NA)) 83 | expect_false(is_nonempty_character_scalar(c(2, 5))) 84 | expect_false(is_nonempty_character_scalar("")) 85 | expect_true(is_nonempty_character_scalar("a")) 86 | 87 | is_square <- clustRviz:::is_square 88 | expect_true(is_square(matrix(1, 5, 5))) 89 | expect_false(is_square(matrix(1, 5, 3))) 90 | }) 91 | 92 | test_that("Capitalization works", { 93 | capitalize_string <- clustRviz:::capitalize_string 94 | 95 | expect_equal("A", capitalize_string("a")) 96 | expect_equal("Abc", capitalize_string("abc")) 97 | expect_equal("ABc", capitalize_string("ABc")) 98 | expect_equal("A Fantastic Cow", capitalize_string("a fantastic cow")) 99 | }) 100 | 101 | test_that("Unscaling matrix works", { 102 | set.seed(5) 103 | n = 100; p = 400; 104 | 105 | X <- matrix(rnorm(n * p, sd = 1:25, mean = 1:50), ncol=p) 106 | X_std <- scale(X, center=TRUE, scale=TRUE) 107 | 108 | expect_equal(clustRviz:::unscale_matrix(X_std), X, check.attributes = FALSE) 109 | }) 110 | 111 | test_that("connectedness check works", { 112 | is_connected_adj_mat <- clustRviz:::is_connected_adj_mat 113 | 114 | eye <- function(n) diag(1, nrow = n, ncol = n) 115 | 116 | expect_true(is_connected_adj_mat(eye(1))) 117 | expect_false(is_connected_adj_mat(eye(5))) 118 | 119 | A <- eye(3); A[1,2] <- A[2,3] <- A[2,1] <- A[3,2] <- 1 120 | expect_true(is_connected_adj_mat(A)) 121 | 122 | A <- eye(3); A[1,2] <- A[2,1] <- 1 123 | expect_false(is_connected_adj_mat(A)) 124 | }) 125 | 126 | test_that("U smoothing for CARP works", { 127 | set.seed(200) 128 | 129 | N <- 50 130 | P <- 30 131 | 132 | U <- array(rnorm(N * P), c(N, P, 1)) 133 | 134 | # Fake cluster assignments 135 | K <- 5 136 | membership <- sample(K, N, replace = TRUE) 137 | cluster_info <- list(membership = membership, 138 | csize = table(membership), 139 | no = length(unique(membership))) 140 | 141 | U_smoothed <- smooth_u_clustering(U, list(cluster_info)) 142 | 143 | for(k in 1:K){ 144 | u_row_mean <- colMeans(U[membership == k,,1]) 145 | for(n in 1:N){ 146 | if(membership[n] == k){ 147 | expect_equal(U_smoothed[n,,1], u_row_mean) 148 | } 149 | } 150 | } 151 | }) 152 | -------------------------------------------------------------------------------- /tests/testthat/test_weights.R: -------------------------------------------------------------------------------- 1 | context("Test Built-In Weight Functions") 2 | 3 | test_that("Dense RBF works with fixed phi", { 4 | weight_func <- dense_rbf_kernel_weights(phi = 1) 5 | weight_results <- weight_func(presidential_speech) 6 | 7 | weight_mat_manual <- exp(-as.matrix(dist(presidential_speech))^2) 8 | diag(weight_mat_manual) <- 0 9 | 10 | expect_equal(weight_results$weight_mat, weight_mat_manual) 11 | }) 12 | 13 | test_that("Dense RBF works with learned phi", { 14 | weight_func <- dense_rbf_kernel_weights() 15 | weight_results <- weight_func(presidential_speech) 16 | phi <- weight_results$type$phi 17 | 18 | weight_mat_manual <- exp(-phi * as.matrix(dist(presidential_speech))^2) 19 | diag(weight_mat_manual) <- 0 20 | 21 | expect_equal(weight_results$weight_mat, weight_mat_manual) 22 | }) 23 | 24 | test_that("Sparse RBF with full k is a no-op", { 25 | sparse_weight_func <- sparse_rbf_kernel_weights(k = NROW(presidential_speech) - 1) 26 | dense_weight_func <- dense_rbf_kernel_weights() 27 | 28 | expect_equal(sparse_weight_func(presidential_speech)$weight_mat, 29 | dense_weight_func(presidential_speech)$weight_mat) 30 | }) 31 | 32 | test_that("Sparse RBF with learned k is same as if k were known a priori", { 33 | weight_func <- sparse_rbf_kernel_weights() 34 | weight_results <- weight_func(presidential_speech) 35 | k <- weight_results$type$k 36 | 37 | weight_func2 <- sparse_rbf_kernel_weights(k = k) 38 | weight_results2 <- weight_func2(presidential_speech) 39 | 40 | expect_equal(weight_results$weight_mat, 41 | weight_results2$weight_mat) 42 | }) 43 | 44 | test_that("Dense RBF works with Manhattan distance", { 45 | weight_func <- dense_rbf_kernel_weights(dist.method = "manhattan") 46 | weight_results <- weight_func(presidential_speech) 47 | phi <- weight_results$type$phi 48 | 49 | weight_mat_manual <- exp(-phi * as.matrix(dist(presidential_speech, method = "manhattan"))^2) 50 | diag(weight_mat_manual) <- 0 51 | 52 | expect_equal(weight_results$weight_mat, weight_mat_manual) 53 | }) 54 | 55 | test_that("Dense RBF checks inputs", { 56 | expect_error(dense_rbf_kernel_weights(dist.method = "London")) 57 | expect_error(dense_rbf_kernel_weights(dist.method = NA)) 58 | expect_error(dense_rbf_kernel_weights(p = 0)) 59 | expect_error(dense_rbf_kernel_weights(p = -3)) 60 | expect_error(dense_rbf_kernel_weights(p = NA)) 61 | expect_error(dense_rbf_kernel_weights(p = c(1, 2))) 62 | expect_error(dense_rbf_kernel_weights(phi = 0)(presidential_speech)) 63 | }) 64 | 65 | test_that("Sparse RBF checks inputs", { 66 | expect_error(sparse_rbf_kernel_weights(dist.method = "London")) 67 | expect_error(sparse_rbf_kernel_weights(dist.method = NA)) 68 | expect_error(sparse_rbf_kernel_weights(p = 0)) 69 | expect_error(sparse_rbf_kernel_weights(p = -3)) 70 | expect_error(sparse_rbf_kernel_weights(p = NA)) 71 | expect_error(sparse_rbf_kernel_weights(p = c(1, 2))) 72 | expect_error(sparse_rbf_kernel_weights(phi = 0)(presidential_speech)) 73 | expect_error(sparse_rbf_kernel_weights(k = 0)(presidential_speech)) 74 | expect_error(sparse_rbf_kernel_weights(k = -1)(presidential_speech)) 75 | }) 76 | 77 | test_that("Print method works - Dense RBF", { 78 | weight_func <- dense_rbf_kernel_weights(phi = 1) 79 | weight_fit_obj <- weight_func(presidential_speech)$type 80 | weight_print <- capture_print(weight_fit_obj) 81 | 82 | expect_str_contains(weight_print, "Radial Basis Function Kernel Weights") 83 | expect_str_contains(weight_print, "Distance Metric: Euclidean") 84 | expect_str_contains(weight_print, stringr::fixed("Scale parameter (phi): 1 [User-Supplied]")) 85 | 86 | 87 | weight_func <- dense_rbf_kernel_weights() 88 | weight_fit_obj <- weight_func(presidential_speech)$type 89 | weight_print <- capture_print(weight_fit_obj) 90 | 91 | expect_str_contains(weight_print, "Radial Basis Function Kernel Weights") 92 | expect_str_contains(weight_print, "Distance Metric: Euclidean") 93 | expect_str_contains(weight_print, stringr::fixed("Scale parameter (phi): 0.01 [Data-Driven]")) 94 | }) 95 | 96 | test_that("Print method works - Sparse RBF", { 97 | weight_func <- sparse_rbf_kernel_weights(k = 10) 98 | weight_fit_obj <- weight_func(presidential_speech)$type 99 | weight_print <- capture_print(weight_fit_obj) 100 | expect_str_contains(weight_print, stringr::fixed("Sparsified: 10 Nearest Neighbors [User-Supplied]")) 101 | 102 | weight_func <- sparse_rbf_kernel_weights() 103 | weight_fit_obj <- weight_func(presidential_speech)$type 104 | weight_print <- capture_print(weight_fit_obj) 105 | expect_str_contains(weight_print, stringr::fixed("Sparsified: 4 Nearest Neighbors [Data-Driven]")) 106 | }) 107 | 108 | test_that("Print method works - User-Function", { 109 | expect_str_contains(capture_print(clustRviz:::UserFunction()), 110 | "Source: User-Provided Function") 111 | }) 112 | 113 | 114 | test_that("Print method works - User-Matrix", { 115 | expect_str_contains(capture_print(clustRviz:::UserMatrix()), 116 | "Source: User-Provided Matrix") 117 | }) 118 | 119 | test_that("Dense weights connectedness check works", { 120 | check_weight_matrix <- clustRviz:::check_weight_matrix 121 | eye <- function(n) diag(1, nrow = n, ncol = n) 122 | 123 | W <- eye(3) 124 | expect_error(check_weight_matrix(W)) 125 | 126 | W <- matrix(1, 3, 3) 127 | expect_no_error(check_weight_matrix(W)) 128 | 129 | W <- matrix(1, 3, 3); W[1,2] <- W[2,1] <- W[1,3] <- W[3,1] <- 0 130 | ## No check for first ob since we are only checking lower triangle 131 | expect_error(check_weight_matrix(W), regexp = "observation 2") 132 | 133 | W <- matrix(1, 3, 4) 134 | expect_error(check_weight_matrix(W), regexp = "square") 135 | }) 136 | -------------------------------------------------------------------------------- /vignettes/Acknowledgements.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Acknowledgements" 3 | author: 4 | - name: Michael Weylandt 5 | affiliation: Department of Statistics, Rice University 6 | email: michael.weylandt@rice.edu 7 | - name: John Nagorski 8 | affiliation: Department of Statistics, Rice University 9 | - name: Genevera I. Allen 10 | affiliation: | 11 | | Departments of Statistics, Computer Science, and Electical and Computer Engineering, Rice University 12 | | Jan and Dan Duncan Neurological Research Institute, Baylor College of Medicine 13 | email: gallen@rice.edu 14 | date: "Last Updated: August 19th, 2020" 15 | output: 16 | html_document: 17 | toc: true 18 | toc_float: 19 | collapsed: false 20 | bibliography: vignettes.bib 21 | vignette: > 22 | %\VignetteIndexEntry{Acknowledgements} 23 | %\VignetteEngine{knitr::rmarkdown} 24 | %\VignetteEncoding{UTF-8} 25 | --- 26 | 27 | The computational core of `clustRviz` is written in standard modern `C++` [@CppStandard:11], 28 | using the `Eigen3` library for numerical linear algebra [@Eigen3] and the `Rcpp` [@Eddelbuettel:2011] 29 | and `RcppEigen` [@Bates:2013] libraries to interface with `R` [@R]. 30 | 31 | The graphical components of `clustRviz` depend on several publicly available `R` packages, including: 32 | 33 | - the `dendextend` package [@Galili:2015] which is used for dendrogram construction and manipulation; 34 | - the `ggplot2` [@Wickham:2016] and `gganimate` [@Pedersen:2019] packages which are used for path visualizations; 35 | - the `plotly` [@Sievert:2020] package which is used for interactive dendrograms and path plots; and 36 | - the `heatmaply` [@Galili:2018] package which is used for export of interactive cluster heatmaps based on `CARP` and `CBASS` results 37 | 38 | We are grateful to the authors of each of these packages for making their software freely available. 39 | 40 | ## References 41 | -------------------------------------------------------------------------------- /vignettes/clustRviz.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "clustRviz Quick Start" 3 | author: 4 | - name: Michael Weylandt 5 | affiliation: Department of Statistics, Rice University 6 | email: michael.weylandt@rice.edu 7 | - name: John Nagorski 8 | affiliation: Department of Statistics, Rice University 9 | - name: Genevera I. Allen 10 | affiliation: | 11 | | Departments of Statistics, Computer Science, and Electical and Computer Engineering, Rice University 12 | | Jan and Dan Duncan Neurological Research Institute, Baylor College of Medicine 13 | email: gallen@rice.edu 14 | date: "Last Updated: August 19th, 2020" 15 | output: 16 | html_document: 17 | toc: true 18 | toc_float: 19 | collapsed: false 20 | bibliography: vignettes.bib 21 | vignette: > 22 | %\VignetteIndexEntry{clustRviz Quick Start} 23 | %\VignetteEngine{knitr::rmarkdown} 24 | %\VignetteEncoding{UTF-8} 25 | --- 26 | 27 | ```{r setup, include = FALSE} 28 | knitr::opts_chunk$set( 29 | collapse = TRUE, 30 | comment = "#>", 31 | eval=TRUE, 32 | message = FALSE 33 | ) 34 | ``` 35 | \renewcommand{\vec}[1]{\boldsymbol{#1}} 36 | 37 | ## Introduction 38 | 39 | This vignette provides a brief introduction to the `clustRviz` package, 40 | describing how to use the main entry points `CARP` and `CBASS` and providing 41 | a quick overview of the rich built-in graphics functionality. For more details 42 | on graphics, weight selection, or the computational algorithms used, please 43 | see the other package vignettes. 44 | 45 | ## Clustering 46 | 47 | `clustRviz` implements the *convex* clustering formulation popularized by 48 | Hocking *et al.* [-@Hocking:2011] and uses the path-wise algorithms of 49 | Weylandt, Nagorski, and Allen [-@Weylandt:2019] to support full path 50 | computation and dendrogram construction. This allows convex clustering to 51 | produce `hclust`-style dendrograms while maintaining its statistical and computational 52 | advantages. 53 | 54 | The main entry point for clustering is the `CARP` function, which implements 55 | the **Clustering via Algorithmic Regularization Paths** proposed by 56 | Weylandt, Nagorski, and Allen [-@Weylandt:2019]. We can use it on the built-in 57 | `presidential_speech` data set: 58 | 59 | ```{r} 60 | library(clustRviz) 61 | carp_fit <- CARP(presidential_speech) 62 | print(carp_fit) 63 | ``` 64 | 65 | As can be seen, this provides a full path in only a few seconds. From this, 66 | we can produce a variety of attractive plots, including dendrograms 67 | 68 | ```{r} 69 | plot(carp_fit, type = "dendrogram") 70 | ``` 71 | 72 | one-way heatmaps 73 | 74 | ```{r} 75 | plot(carp_fit, type = "heatmap") 76 | ``` 77 | 78 | and regularization paths 79 | 80 | ```{r} 81 | plot(carp_fit, type = "path") 82 | ``` 83 | 84 | For each plot type, interactive and dynamic versions are also supported: for example, 85 | 86 | ```{r} 87 | plot(carp_fit, type = "dendrogram", dynamic = TRUE) 88 | ``` 89 | 90 | By default, the entire path is shown, but it is possible to obtain specific solutions 91 | by specifying the `k` or `percent` arguments to plot. 92 | 93 | ```{r} 94 | plot(carp_fit, k = 3) 95 | ``` 96 | 97 | To work with the clustering solutions directly, the `get_cluster_labels`, `get_clustered_data`, 98 | or `get_cluster_centroids` functions may be useful. 99 | 100 | ## Bi-Clustering 101 | 102 | Chi *et al* [-@Chi:2017] proposed a convex formulation of *biclustering* for which 103 | Weylandt [-@Weylandt:2019b] later proposed an efficient ADMM algorithm. This ADMM 104 | was adapted into the **CBASS** - *Convex Biclustering via Algorithmic Regularization 105 | with Small Steps* algorithm. `clustRviz` exposes an implementation of this algorithm 106 | via the function of the same name. 107 | 108 | ```{r} 109 | library(clustRviz) 110 | cbass_fit <- CBASS(presidential_speech) 111 | print(cbass_fit) 112 | ``` 113 | 114 | As can be seen, this provides a full path in only a few seconds. In general, the 115 | bi-clustering problem is a bit slower than the standard clustering problem but still 116 | highly efficient. From this, we can produce a variety of attractive plots, 117 | including row- and column-wise dendrograms 118 | 119 | ```{r} 120 | plot(cbass_fit, type = "row.dendrogram") 121 | ``` 122 | 123 | ```{r} 124 | plot(cbass_fit, type = "col.dendrogram") 125 | ``` 126 | 127 | row- and columnwise regularization paths 128 | 129 | ```{r} 130 | plot(cbass_fit, type = "row.path") 131 | ``` 132 | 133 | and the traditional two-way cluster heatmap 134 | 135 | ```{r} 136 | plot(cbass_fit, type = "heatmap") 137 | ``` 138 | 139 | As before, interactive and dynamic versions are also supported: for example, 140 | 141 | ```{r} 142 | plot(cbass_fit, type = "heatmap", dynamic = TRUE) 143 | ``` 144 | 145 | Because `CBASS` clusters rows and columns simultaneously, when specifying 146 | cluster numbers, it is necessary to distinguish between row and column clusters 147 | 148 | ```{r} 149 | plot(cbass_fit, k.row = 3) 150 | ``` 151 | 152 | This is only a brief demonstration of the capabilities of the `clustRviz` package 153 | - see the other vignettes for more! 154 | 155 | ## References 156 | --------------------------------------------------------------------------------