├── .Rbuildignore ├── .gitattributes ├── .github ├── .gitignore └── workflows │ └── pkgdown.yaml ├── .gitignore ├── DESCRIPTION ├── HuMMuS.Rproj ├── LICENSE.md ├── NAMESPACE ├── R ├── bipartites.R ├── explore_network.R ├── fetch_online.R ├── hummus_objet.R ├── layers.R ├── method_wrappers.R └── utils.R ├── README.md ├── data ├── chen_dataset.rda ├── chen_dataset_subset.rda ├── chen_subset_hummus.rda ├── human_pwms_v2.rda ├── mouse_pwms_v2.rda ├── real_example │ ├── hESC_Chen_TFs.tsv │ ├── hESC_Chen_scATAC_bin.tsv │ └── hESC_Chen_scRNA.tsv └── toy_example_multilayer │ ├── bipartites │ ├── atac_rna │ └── tf_atac │ ├── layers │ ├── layers_atac │ │ └── atac_network.tsv │ ├── layers_rna │ │ └── rna_network │ └── layers_tf │ │ └── tf_network │ └── seeds │ ├── '1'.txt │ └── 1 6.txt ├── hummuspy ├── LICENSE ├── README.md ├── examples │ └── Tutorial_example_hummuspy.ipynb ├── poetry.lock ├── pyproject.toml └── src │ └── hummuspy │ ├── __init__.py │ ├── __pycache__ │ ├── __init__.cpython-310.pyc │ ├── __init__.cpython-311.pyc │ ├── __init__.cpython-312.pyc │ ├── explore_network.cpython-310.pyc │ ├── explore_network.cpython-311.pyc │ └── explore_network.cpython-312.pyc │ ├── config.py │ ├── core_grn.py │ ├── create_multilayer.py │ └── explore_network.py ├── man ├── DefaultAssay.Rd ├── Hummus_Object-class.Rd ├── Initiate_Hummus_Object.Rd ├── VariableFeatures.Rd ├── add_network.Rd ├── aggregate_matrix.Rd ├── bipartite-class.Rd ├── bipartite_peaks2genes.Rd ├── bipartite_tfs2peaks.Rd ├── compute_atac_peak_network.Rd ├── compute_gene_network.Rd ├── compute_tf_network.Rd ├── dMcast.Rd ├── define_binding_regions.Rd ├── define_enhancers.Rd ├── define_general_config.Rd ├── define_grn.Rd ├── define_output.Rd ├── define_target_genes.Rd ├── fast_aggregate.Rd ├── figures │ ├── Fig_0001.jpg │ ├── hummus_logo.png │ └── logo.png ├── find_peaks_near_genes.Rd ├── format_bipartites_names.Rd ├── format_multiplex_names.Rd ├── get_genome_annotations.Rd ├── get_tf2motifs.Rd ├── get_tfs.Rd ├── motifs_db-class.Rd ├── multilayer-class.Rd ├── multiplex-class.Rd ├── peaks_in_regulatory_elements.Rd ├── run_cicero_wrapper.Rd ├── run_tf_null_wrapper.Rd ├── save_multilayer.Rd ├── store_network.Rd └── sub-subset-Hummus_Object.Rd ├── pkgdown ├── _pkgdown.yml ├── extra.css └── favicon │ ├── android-chrome-192x192.png │ ├── android-chrome-512x512.png │ ├── apple-touch-icon-120x120.png │ ├── apple-touch-icon-152x152.png │ ├── apple-touch-icon-180x180.png │ ├── apple-touch-icon-60x60.png │ ├── apple-touch-icon-76x76.png │ ├── apple-touch-icon.png │ ├── favicon-16x16.png │ ├── favicon-32x32.png │ ├── favicon.ico │ └── safari-pinned-tab.svg └── vignettes ├── add_networks.Rmd ├── chen_multilayer ├── bipartite │ └── atac_rna.tsv └── multiplex │ └── RNA │ └── RNA_GENIE3.tsv ├── chen_vignette.Rmd ├── config.yml └── figures ├── 5_steps.png ├── build_multilayer.png ├── explore_multilayer.png ├── hummus_object_description.png └── schema_HuMMuS.png /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^LICENSE\.md$ 2 | ^_pkgdown\.yml$ 3 | ^docs$ 4 | ^pkgdown$ 5 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | hummuspy/dist/* linguist-detectable=false 2 | *.html linguist-detectable=false 3 | *.ipynb linguist-detectable=false 4 | hummuspy/src/hummuspy/config.py linguist-detectable=false 5 | -------------------------------------------------------------------------------- /.github/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | -------------------------------------------------------------------------------- /.github/workflows/pkgdown.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master, dev_SeuratV5] 6 | pull_request: 7 | branches: [main, master, dev_SeuratV5] 8 | release: 9 | types: [published] 10 | workflow_dispatch: 11 | 12 | name: pkgdown 13 | 14 | jobs: 15 | pkgdown: 16 | runs-on: ubuntu-latest 17 | env: 18 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 19 | steps: 20 | - uses: actions/checkout@v2 21 | 22 | - uses: r-lib/actions/setup-pandoc@v2 23 | 24 | - uses: r-lib/actions/setup-r@v2 25 | with: 26 | use-public-rspm: true 27 | 28 | - uses: r-lib/actions/setup-r-dependencies@v2 29 | with: 30 | extra-packages: any::pkgdown, any::reticulate, local::., any::devtools 31 | needs: website 32 | 33 | - uses: actions/setup-python@v2 34 | with: 35 | python-version: '3.10' 36 | 37 | - name: Setup hummuspy env 38 | run: | 39 | reticulate::conda_create("r-reticulate", packages="python==3.10") 40 | reticulate::py_install("hummuspy", envname = "r-reticulate", pip=TRUE) 41 | shell: Rscript {0} 42 | 43 | - name: ubuntu setup for Monocle3 44 | run: sudo apt-get install libgdal-dev libgeos-dev libproj-dev 45 | 46 | - name: Install Monocle3 47 | run: devtools::install_github('cole-trapnell-lab/monocle3') 48 | shell: Rscript {0} 49 | 50 | - name: Install Cicero 51 | run: devtools::install_github("cole-trapnell-lab/cicero-release", ref = "monocle3") 52 | shell: Rscript {0} 53 | 54 | - name: Build site 55 | run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE) 56 | shell: Rscript {0} 57 | 58 | - name: Deploy to GitHub pages 🚀 59 | if: github.event_name != 'pull_request' 60 | uses: JamesIves/github-pages-deploy-action@v4.4.1 61 | with: 62 | clean: false 63 | branch: gh-pages 64 | folder: docs 65 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # History files 2 | .Rhistory 3 | .Rapp.history 4 | 5 | # Session Data files 6 | .RData 7 | .RDataTmp 8 | 9 | # User-specific files 10 | .Ruserdata 11 | 12 | # Example code in package build process 13 | *-Ex.R 14 | 15 | # Output files from R CMD build 16 | /*.tar.gz 17 | 18 | # Output files from R CMD check 19 | /*.Rcheck/ 20 | 21 | # RStudio files 22 | .Rproj.user/ 23 | 24 | # produced vignettes 25 | vignettes/*.html 26 | vignettes/*.pdf 27 | 28 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 29 | .httr-oauth 30 | 31 | # knitr and R markdown default cache directories 32 | *_cache/ 33 | /cache/ 34 | 35 | # Temporary files created by R markdown 36 | *.utf8.md 37 | *.knit.md 38 | 39 | # R Environment Variables 40 | .Renviron 41 | 42 | # pkgdown site 43 | docs/ 44 | 45 | # translation temp files 46 | po/*~ 47 | 48 | # RStudio Connect folder 49 | rsconnect/ 50 | 51 | omnipath-log/ 52 | b/ 53 | a/ 54 | test* 55 | RW_grn.tsv 56 | config* 57 | .ipynb_checkpoints 58 | test_hummuspy.ipynb 59 | temp 60 | chen_multilayer 61 | examples/* 62 | inst/doc 63 | docs 64 | hummuspy/dist/ 65 | Untitled.ipynb 66 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: HuMMuS 2 | Title: Heterogeneous Multilayer Network for Multi-Omics Single-Cell Data 3 | Version: 0.0.2 4 | Authors@R: person(given = "Rémi", 5 | family = "Trimbour", 6 | email = "remi.trimbour@pasteur.fr", 7 | role = c("aut", "cre"), 8 | comment = c(ORCID = "0000-0001-8770-8412")) 9 | Description: A general framework to infer regulatory mechanisms from multi-omics single-cell data. 10 | HuMMuS package provides functions to build individual networks 11 | from different single-cell modalities, then to integrate them 12 | into a single-cell heterogeneous multilayer network. The package 13 | also provides functions to infer regulatory mechanisms from the 14 | heterogeneous multilayer network. 15 | HuMMuS is based on a flexible framework that can be adapted to any 16 | single-cell modalities. We provide study cases for scRNA+scATAC 17 | and scRNA+scATAC+snmC(methylation) that can be completed by PPI. 18 | Many regulatory mechanisms can be inferred with HuMMuS, notably 19 | classical gene regulatory networks (GRN), but TF-target genes, 20 | enhancer-gene interactions, TF-enhancer bindings, or even research 21 | of heterogeneous communnities of omics features (e.g. ensemble of 22 | peaks, genes and TFs cooperating to the same biological function). 23 | License: AGPL (>= 3) 24 | Encoding: UTF-8 25 | LazyData: true 26 | Roxygen: list(markdown = TRUE) 27 | RoxygenNote: 7.2.3 28 | Depends: 29 | R (>= 4.0.0) 30 | biocViews: 31 | Imports: 32 | grr, 33 | sparseMatrixStats, 34 | TFBSTools, 35 | stringr, 36 | JASPAR2020, 37 | chromVAR, 38 | IRanges, 39 | Matrix, 40 | S4Vectors, 41 | utils, 42 | tidyr, 43 | OmnipathR, 44 | GENIE3, 45 | reshape2, 46 | Signac, 47 | biovizBase, 48 | SingleCellExperiment, 49 | motifmatchr, 50 | reticulate 51 | Suggests: 52 | doParallel, 53 | doRNG, 54 | EnsDb.Hsapiens.v86, 55 | BSgenome.Hsapiens.UCSC.hg38, 56 | knitr, 57 | rmarkdown 58 | VignetteBuilder: knitr 59 | URL: https://cantinilab.github.io/HuMMuS/ 60 | -------------------------------------------------------------------------------- /HuMMuS.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | 18 | BuildType: Package 19 | PackageUseDevtools: Yes 20 | PackageInstallArgs: --no-multiarch --with-keep.source 21 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method("[[",Hummus_Object) 4 | S3method(DefaultAssay,Hummus_Object) 5 | S3method(VariableFeatures,Hummus_Object) 6 | export("DefaultAssay<-") 7 | export("VariableFeatures<-") 8 | export(DefaultAssay) 9 | export(Hummus_Object) 10 | export(Initiate_Hummus_Object) 11 | export(VariableFeatures) 12 | export(add_network) 13 | export(aggregate_matrix) 14 | export(bipartite_peaks2genes) 15 | export(bipartite_tfs2peaks) 16 | export(compute_atac_peak_network) 17 | export(compute_gene_network) 18 | export(compute_tf_network) 19 | export(define_binding_regions) 20 | export(define_enhancers) 21 | export(define_general_config) 22 | export(define_grn) 23 | export(define_output) 24 | export(define_target_genes) 25 | export(find_peaks_near_genes) 26 | export(format_bipartites_names) 27 | export(format_multiplex_names) 28 | export(get_genome_annotations) 29 | export(get_tf2motifs) 30 | export(get_tfs) 31 | export(peaks_in_regulatory_elements) 32 | export(run_cicero_wrapper) 33 | export(save_multilayer) 34 | export(store_network) 35 | exportClasses(Hummus_Object) 36 | exportClasses(bipartite) 37 | exportClasses(motifs_db) 38 | exportClasses(multilayer) 39 | exportClasses(multiplex) 40 | import(sparseMatrixStats) 41 | importClassesFrom(SeuratObject,Seurat) 42 | importClassesFrom(Signac,Motif) 43 | importClassesFrom(TFBSTools,PWMatrixList) 44 | importFrom(methods,setClass) 45 | -------------------------------------------------------------------------------- /R/bipartites.R: -------------------------------------------------------------------------------- 1 | #' Compute links between TFs and DNA regions (ATAC peaks) 2 | #' 3 | #' Compute and add bipartite between TFs and DNA regions to hummus object. 4 | #' Links are computed based on the binding motifs of TFs and their locations 5 | #' on a reference genome. 6 | #' Currently based on Signac AddMotifs function (--> motifmachR, itself based on 7 | #' MOODs algorithm). 8 | #' 9 | #' @param hummus_object (hummus_object) - Hummus object. 10 | #' @param tf_expr_assay (character) - Name of assay containing the TF expression 11 | #' data. If NULL, all TFs with a motif are used. Default: "RNA". 12 | #' @param peak_assay (character) - Name of the assay containing the DNA regions 13 | #' (ATAC peaks). Default: "peaks". 14 | #' @param tf_multiplex_name (character) - Name of multiplex containing the TFs. 15 | #' If NULL, the name of the TF assay is used. 16 | #' @param peak_multiplex_name (character) - Name of the multiplex containing the 17 | #' DNA regions (ATAC peaks). If NULL, the name of the peak assay is used. 18 | #' @param genome (BSgenome object) - Reference genome. 19 | #' @param store_network (bool) - Save the bipartite directly 20 | #' (\code{TRUE}, default) or return without saving on disk (\code{FALSE}). 21 | #' @param output_file (character) - Name of the output_file 22 | #' (if store_bipartite == \code{TRUE}). Default: NULL. 23 | #' @param verbose (integer) Display function messages. 24 | #' Set to 0 for no message displayed, >= 1 for more details. Default: 1. 25 | #' @param bipartite_name (character) - Name of bipartite. Default: "tf_peak". 26 | #' 27 | #' @return hummus_object (hummus_object) - Hummus object with TF-peak bipartite 28 | #' added to the multilayer slot 29 | #' @export 30 | #' 31 | #' @examples hummus <- bipartite_tfs2peaks( 32 | #' hummus_object = hummus, 33 | #' tf_expr_assay = "RNA", 34 | #' peak_assay = "peaks", 35 | #' tf_multiplex_name = "TF", 36 | #' peak_multiplex_name = "peaks", 37 | #' genome = BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38, 38 | #' store_network = FALSE, 39 | #' verbose = 1, 40 | #' bipartite_name = "tf_peak") 41 | 42 | bipartite_tfs2peaks <- function( 43 | hummus_object, 44 | tf_expr_assay = "RNA", 45 | peak_assay = "peaks", 46 | tf_multiplex_name = NULL, 47 | peak_multiplex_name = NULL, 48 | genome, 49 | store_network = FALSE, 50 | output_file = NULL, 51 | verbose = 1, 52 | bipartite_name = "tf_peak" 53 | ) { 54 | 55 | if (verbose > 0) { 56 | cat("Computing TF-peak bipartite\n") 57 | } 58 | # Cck if tf_gene_assay is NULL 59 | if (!is.null(tf_expr_assay)) { 60 | # Check if the gene assay is present in the seurat object 61 | if (!tf_expr_assay %in% names(hummus_object@assays)) { 62 | stop("The gene assay is not present in the seurat object") 63 | } 64 | # Get TFs expressed in assay AND having known binding motifs 65 | tfs_use <- get_tfs(hummus_object, 66 | assay = tf_expr_assay, 67 | store_tfs = FALSE, 68 | verbose = verbose) 69 | } else { # No filtering on expression assay, use all TFs with a motif 70 | if (verbose > 0) { 71 | cat("No filtering on expression assay, using all TFs with a motif.\n") 72 | } 73 | tfs_use <- unique(hummus_object@motifs_db@tf2motifs$tf) 74 | } 75 | 76 | # Check if the peak assay is present in the seurat object 77 | if (!peak_assay %in% names(hummus_object@assays)) { 78 | stop("The peak assay is not present in the seurat object") 79 | } 80 | # Check if the peak assay is a ChromatinAssay object 81 | if (!inherits(hummus_object@assays[[peak_assay]], 82 | "ChromatinAssay")) { 83 | stop("The peak assay is not a ChromatinAssay object 84 | or does not have annotations (gene.range object))") 85 | } 86 | # Check if the peak assay has gene.range annotations 87 | if (is.null(Signac::Annotation(hummus_object[[peak_assay]]))) { 88 | stop("The peak assay does not have annotations (gene.range object)") 89 | } 90 | 91 | # Add motifs to the peaks 92 | motif_pos <- Signac::AddMotifs( 93 | object = hummus_object[[peak_assay]], 94 | genome = genome, 95 | pfm = hummus_object@motifs_db@motifs #add verbose options 96 | ) 97 | 98 | ## The 17 following lines are inspired from the Pando package : 99 | # https://github.com/quadbiolab/Pando/blob/main/R/regions.R 100 | # Add TF info for motifs 101 | if (verbose > 0) { 102 | cat("\tAdding TF info\n") 103 | } 104 | 105 | # Spread dataframe to sparse matrix 106 | tf2motifs <- hummus_object@motifs_db@tf2motifs 107 | # Select motif and tf columns 108 | tf2motifs <- dplyr::"%>%"(tf2motifs, dplyr::select("motif" = 1, "tf" = 2)) 109 | tf2motifs <- dplyr::"%>%"(tf2motifs, dplyr::distinct()) # Remove duplicates 110 | # Add value column 111 | tf2motifs <- dplyr::"%>%"(tf2motifs, dplyr::mutate(val = 1)) 112 | tf2motifs <- dplyr::"%>%"(tf2motifs, # Spread TFs 113 | tidyr::pivot_wider(names_from = "tf", 114 | values_from = val, 115 | values_fill = 0) 116 | ) 117 | # Set motif as rownames 118 | tf2motifs <- dplyr::"%>%"(tf2motifs, tibble::column_to_rownames("motif")) 119 | tf2motifs <- dplyr::"%>%"(tf2motifs, as.matrix()) # Convert to matrix 120 | 121 | # Convert to sparse matrix 122 | tf2motifs <- dplyr::"%>%"(tf2motifs, Matrix::Matrix(sparse = TRUE)) 123 | 124 | if (length(tfs_use) == 0) { # If no TFs are found in the dataset 125 | stop("None of the provided TFs were found in the dataset. 126 | Consider providing a custom motif-to-TF map as `motif_tfs`") 127 | } 128 | 129 | # Get TF peak links 130 | # Keep only the TFs that are in our tf list 131 | TFs_Peaks <- motif_pos@motifs@data %*% tf2motifs[, tfs_use] 132 | 133 | # Remove values equal to 0 134 | tfs2peaks <- expand.grid(rownames(TFs_Peaks), 135 | colnames(TFs_Peaks))[as.vector(TFs_Peaks > 0), ] 136 | # TF-peak links 137 | colnames(tfs2peaks) <- c("peak", "TF") # set column names 138 | 139 | # Save TF-peak links 140 | store_network(network = tfs2peaks, 141 | store_network = store_network, 142 | output_file = output_file, 143 | verbose = verbose) 144 | 145 | if (verbose > 0) { 146 | cat("\tReturning TF-peak links as bipartite object\n") 147 | } 148 | 149 | # Set default names for the networks if not provided 150 | if (is.null(tf_multiplex_name)) { 151 | cat("no TF layer name provided, using tf_expr_assay name\n") 152 | tf_multiplex_name <- tf_expr_assay 153 | } 154 | if (is.null(peak_multiplex_name)) { 155 | cat("no peak layer name provided, using peak_assay name\n") 156 | peak_multiplex_name <- peak_assay 157 | } 158 | 159 | # Return tf-peak bipartite 160 | hummus_object@multilayer@bipartites[[bipartite_name]] <- new("bipartite", 161 | "network" = tfs2peaks, 162 | "multiplex_left" = peak_multiplex_name, 163 | "multiplex_right" = tf_multiplex_name) 164 | return(hummus_object) # Return TF-peak bipartite object 165 | } 166 | 167 | 168 | 169 | 170 | #' Compute links between DNA regions and genenames 171 | #' 172 | #' Compute and add bipartite between DNA regions and genenames to hummus object. 173 | #' Links are computed based on the distance between peaks and gene's TSS 174 | #' location from gene.range annotations. 175 | #' Call find_peaks_near_genes function, that can use different methods. 176 | #' 177 | #' @param hummus_object (hummus_object) - Hummus object. 178 | #' @param gene_assay (character) - Name of assay containing the gene expression 179 | #' data. Default: "RNA". 180 | #' @param peak_assay (character) - Name of the assay containing the DNA regions 181 | #' (ATAC peaks). Default: "peaks". 182 | #' @param gene_multiplex_name (character) - Name of the multiplex containing the 183 | #' genes. 184 | #' If NULL, the name of the gene assay is used. 185 | #' @param peak_multiplex_name (character) - Name of the multiplex containing the 186 | #' DNA regions (ATAC peaks). If NULL, the name of the peak assay is used. 187 | #' @param peak_to_gene_method (character) - Method to use to compute the links 188 | #' between peaks and genes. Default: "Signac". 189 | #' * \code{'Signac'} - Use Signac::Extend to extend genes. 190 | #' * \code{'GREAT'} - Not implemented yet. 191 | #' @param upstream (int) - Upstream distance from TSS 192 | #' to consider as potential promoter. 193 | #' @param downstream (int) - Downstream distance from TSS 194 | #' to consider as potential promoter. 195 | #' @param only_tss (logical) - If TRUE, only TSS will be considered. 196 | #' @param store_network (bool) - Save the bipartite directly 197 | #' (\code{TRUE}, default) or return without saving on disk (\code{FALSE}). 198 | #' @param output_file (character) - Name of the output_file 199 | #' (if store_bipartite == \code{TRUE}). Default: NULL. 200 | #' @param verbose (integer) Display function messages. 201 | #' Set to 0 for no message displayed, >= 1 for more details. Default: 1. 202 | #' @param bipartite_name (character) - Name of bipartite. Default: "atac_rna". 203 | #' 204 | #' @return hummus_object (hummus_object) - Hummus object w/ atac-rna bipartite 205 | #' added to the multilayer slot 206 | #' @export 207 | #' 208 | #' @examples hummus <- bipartite_peaks2genes( 209 | #' hummus_object = hummus, 210 | #' gene_assay = "RNA", 211 | #' peak_assay = "peaks", 212 | #' gene_multiplex_name = "RNA", 213 | #' peak_multiplex_name = "peaks", 214 | #' peak_to_gene_method = "Signac", 215 | #' upstream = 500, 216 | #' downstream = 500, 217 | #' only_tss = TRUE, 218 | #' store_network = FALSE, 219 | #' bipartite_name = "atac_rna") 220 | 221 | bipartite_peaks2genes <- function( 222 | hummus_object, 223 | gene_assay = "RNA", 224 | peak_assay = "peaks", 225 | gene_multiplex_name = NULL, 226 | peak_multiplex_name = NULL, 227 | peak_to_gene_method = "Signac", 228 | upstream = 500, 229 | downstream = 500, 230 | only_tss = TRUE, 231 | store_network = FALSE, 232 | output_file = NULL, 233 | bipartite_name = "atac_rna" 234 | ) { 235 | # Check if the gene assay is present in the hummus object 236 | if (!gene_assay %in% names(hummus_object@assays)) { 237 | stop("The gene assay is not present in the hummus object") 238 | } else if (!peak_assay %in% names(hummus_object@assays)) { 239 | # Check if the peak assay is present in the hummus object 240 | stop("The peak assay is not present in the hummus object") 241 | } else if (!inherits(hummus_object@assays[[peak_assay]], 242 | "ChromatinAssay")) { 243 | # Check if the peak assay is a ChromatinAssay object 244 | stop("The peak assay is not a ChromatinAssay object 245 | or does not have annotations (gene.range object))") 246 | } else if (is.null(Signac::Annotation(hummus_object[[peak_assay]]))) { 247 | # Check if the peak assay has gene.range annotations 248 | stop("The peak assay does not have annotations (gene.range object)") 249 | } 250 | 251 | # Find candidate regions near gene bodies 252 | peaks_near_genes <- find_peaks_near_genes( 253 | peaks = hummus_object[[peak_assay]]@ranges, 254 | method = peak_to_gene_method, 255 | genes = Signac::Annotation(hummus_object[[peak_assay]]), 256 | upstream = upstream, 257 | downstream = downstream, 258 | only_tss = only_tss) 259 | # Aggregate candidate regions to gene bodies (peak to gene matrix) 260 | peaks2genes <- aggregate_matrix(Matrix::t(peaks_near_genes), 261 | groups = colnames(peaks_near_genes), 262 | fun = "sum") 263 | # Keep only the genes that are in our scRNA-seq dataset 264 | peaks2genes <- peaks2genes[rownames(peaks2genes) 265 | %in% rownames(hummus_object@assays[[gene_assay]]), ] 266 | # Remove rows/cols with only zeros 267 | peaks2genes <- peaks2genes[Matrix::rowSums(peaks2genes) != 0, 268 | Matrix::colSums(peaks2genes) != 0] 269 | # peak-gene links 270 | peaks2genes <- expand.grid(rownames(peaks2genes), 271 | colnames(peaks2genes))[as.vector(peaks2genes > 0), ] 272 | colnames(peaks2genes) <- c("gene", "peak") # set column names 273 | 274 | 275 | # Save peak-gene links 276 | store_network(network = peaks2genes, 277 | store_network = store_network, 278 | output_file = output_file, 279 | verbose = 1) 280 | 281 | # Set default names for the networks if not provided 282 | if (is.null(gene_multiplex_name)) { 283 | gene_multiplex_name <- gene_assay 284 | } 285 | if (is.null(peak_multiplex_name)) { 286 | peak_multiplex_name <- peak_assay 287 | } 288 | 289 | # Return atac-rna bipartite 290 | hummus_object@multilayer@bipartites[[bipartite_name]] <- new("bipartite", 291 | "network" = peaks2genes, 292 | "multiplex_left" = gene_multiplex_name, 293 | "multiplex_right" = peak_multiplex_name) 294 | return(hummus_object) 295 | } 296 | 297 | #' @title Associate peaks to genes based on distance to TSS (or gene body) 298 | #' 299 | #' @param peaks vector(character) - List of peaks. 300 | #' @param genes vector(character) - List of genes. 301 | #' @param sep vector(character) - Separator between chromosome, 302 | #' start and end position. Default: c('-', '-'). 303 | #' @param method (character) - Method to use. Default: "Signac". 304 | #' * \code{'Signac'} - Use Signac::Extend to extend genes. 305 | #' * \code{'GREAT'} - Not implemented yet. 306 | #' @param upstream (int) - Upstream distance from TSS 307 | #' to consider as potential promoter. 308 | #' @param downstream (int) - Downstream distance from TSS 309 | #' to consider as potential promoter. 310 | #' @param extend (int) - Integer defining the distance from the upstream 311 | #' and downstream of the basal regulatory region. Used only by method 'GREAT'. 312 | #' @param only_tss (logical) - If TRUE, only TSS will be considered. 313 | #' @param verbose (logical) - If TRUE, print progress messages. 314 | #' 315 | #' @return (matrix) - Matrix of peaks x genes with 1 if peak is near gene. 316 | #' @export 317 | #' 318 | #' @examples TODO 319 | find_peaks_near_genes <- function( 320 | peaks, 321 | genes, 322 | sep = c("-", "-"), 323 | method = c("Signac", "GREAT"), 324 | upstream = 100000, 325 | downstream = 0, 326 | extend = 1000000, 327 | only_tss = FALSE, 328 | verbose = TRUE 329 | ) { 330 | # Match arg 331 | method <- match.arg(method) 332 | 333 | if (method == "Signac") { 334 | 335 | if (only_tss) { 336 | genes <- IRanges::resize(x = genes, width = 1, fix = "start") 337 | } 338 | genes_extended <- suppressWarnings( 339 | expr = Signac::Extend( 340 | genes, upstream = upstream, downstream = downstream 341 | ) 342 | ) 343 | overlaps <- IRanges::findOverlaps( 344 | query = peaks, 345 | subject = genes_extended, 346 | type = "any", 347 | select = "all" 348 | ) 349 | hit_matrix <- Matrix::sparseMatrix( 350 | i = S4Vectors::queryHits(overlaps), 351 | j = S4Vectors::subjectHits(overlaps), 352 | x = 1, 353 | dims = c(length(peaks), length(genes_extended)) 354 | ) 355 | rownames(hit_matrix) <- Signac::GRangesToString(grange = peaks, sep = sep) 356 | colnames(hit_matrix) <- genes_extended$gene_name 357 | 358 | } else { 359 | stop("method must be either 'Signac' or 'GREAT' ; 360 | please check that current version of HuMMuS 361 | already accepts GREAT as a method.") 362 | } 363 | return(hit_matrix) 364 | } 365 | 366 | 367 | #' @title Filter peaks to those overlapping specific (regulatory) elements 368 | #' @description Function to reduce list of "Peaks" to the ones overlapping with 369 | #' list of "RegEl", e.g. regulatory elements, evolutionary conserved regions 370 | #' 371 | #' @param Peaks (character) vector of genomic coordinates of peaks 372 | #' @param RegEl (character) vector of genomic coordinates of regulatory elements 373 | #' @param sep_Peak1 (character) separator between chromosome and 374 | #' start position of peak 375 | #' @param sep_Peak2 (character) separator between start position 376 | #' and end position of peak 377 | #' @param sep_RegEl1 (character) separator between chromosome and 378 | #' start position of regulatory element 379 | #' @param sep_RegEl2 (character) separator between start position and 380 | #' end position of regulatory element 381 | #' 382 | #' @return (character) vector of genomic coordinates of peaks overlapping 383 | #' @export 384 | #' 385 | #' @examples peaks_in_regulatory_elements(peaks, RegEl) 386 | peaks_in_regulatory_elements <- function( 387 | Peaks, 388 | RegEl, 389 | sep_Peak1 = "-", 390 | sep_Peak2 = "-", 391 | sep_RegEl1 = "-", 392 | sep_RegEl2 = "-" 393 | ) { 394 | # Make sure Peaks and RegEl are unique 395 | Peaks <- unique(Peaks) 396 | RegEl <- unique(RegEl) 397 | 398 | # convert genomic corrdinate string to GRanges object 399 | Peak_GRangesObj <- Signac::StringToGRanges(Peaks, 400 | sep = c(sep_Peak1, sep_Peak2)) 401 | RegEl_GRangesObj <- Signac::StringToGRanges(RegEl, 402 | sep = c(sep_RegEl1, sep_RegEl2)) 403 | 404 | # find overlap between peaks and regulatory elements 405 | PeakOverlaps <- IRanges::findOverlaps(query = RegEl_GRangesObj, 406 | subject = Peak_GRangesObj) 407 | 408 | # return peaks that overlapped with regulatory element 409 | return(Peaks[unique(as.matrix(PeakOverlaps)[, 2])]) 410 | } -------------------------------------------------------------------------------- /R/explore_network.R: -------------------------------------------------------------------------------- 1 | #' Format multiplex names for python hummuspy package config functions 2 | #' 3 | #' @param hummus_object A HuMMuS_Object 4 | #' @param multiplex_names A vector of multiplex names considered. It must be 5 | #' a subset of the names of the multiplexes in the hummus object. 6 | #' 7 | #' @return A list of multiplexes names formatted for hummuspy config funtions 8 | #' each element of the list is a list of the network types (directed/weighted) 9 | #' and the name of the networks as named in the hummus object 10 | #' @export 11 | #' 12 | #' @examples multiplexes_dictionary <- format_multiplex_names( 13 | #' hummus_object = hummus, 14 | #' multiplex_names = c("TF", "peaks")) 15 | #' 16 | format_multiplex_names <- function( 17 | hummus_object, 18 | multiplex_names = NULL 19 | ) { 20 | 21 | ##### this part should be handled with pointers 22 | # Check type of object 23 | # if (inherits(hummus_object, "multilayer")) { 24 | # multiplex_list <- hummus_object@multiplex 25 | # bipartites_list <- hummus_object@bipartites 26 | 27 | #} else 28 | if (inherits(hummus_object, "Hummus_Object")) { 29 | multiplex_list <- hummus_object@multilayer@multiplex 30 | } else { 31 | stop("Object is not a multilayer nor an hummus object.") 32 | } 33 | 34 | # Check if multiplex_names is NULL 35 | if (is.null(multiplex_names)) { 36 | multiplex_names <- names(multiplex_list) 37 | } 38 | 39 | # Create a named list containing the multiplexes infos 40 | # formatted for hummuspy config funtions 41 | # each element of the list is a list of the network types (directed/weighted) 42 | # and the name of the networks as named in the hummus object 43 | multiplexes_dictionary <- lapply( 44 | hummus_object@multilayer@multiplex[multiplex_names], 45 | function(x) c(paste0(as.integer(x@directed), as.integer(x@weighted)))) 46 | 47 | # Add the names of the networks as named in the hummus object 48 | for (multiplex in names(hummus_object@multilayer@multiplex[multiplex_names])){ 49 | # Check if multiplex exists in hummus object 50 | if (is.null(hummus_object@multilayer@multiplex[[multiplex]])) { 51 | cat("Multiplex ", multiplex, " is NULL\n") 52 | # Skip to next multiplex 53 | next 54 | } 55 | 56 | names(multiplexes_dictionary[[multiplex]]) <- names( 57 | hummus_object@multilayer@multiplex[[multiplex]]@networks) 58 | multiplexes_dictionary[[multiplex]] = reticulate::py_dict( 59 | keys = names(multiplexes_dictionary[[multiplex]]), 60 | values = multiplexes_dictionary[[multiplex]] 61 | ) 62 | } 63 | return(multiplexes_dictionary) 64 | } 65 | 66 | #' Format bipartites names for python hummuspy package config functions 67 | #' 68 | #' @param hummus_object A hummus object 69 | #' @param bipartites_names A vector of bipartites names considered. 70 | #' It must be a subset of the names of the bipartites in the hummus object. 71 | #' @param suffix_bipartites A suffix to add to the bipartites location 72 | #' 73 | #' @return A list of bipartites names formatted for hummuspy config funtions 74 | #' each element of the list is a list containing the right and left layer 75 | #' connected by the bipartite 76 | #' @export 77 | #' 78 | #' @examples bipartites_dictionary <- format_bipartites_names( 79 | #' hummus_object = hummus, 80 | #' bipartites_names = c("atac_rna", 81 | #' "tf_peaks")) 82 | #' 83 | format_bipartites_names <- function( 84 | hummus_object, 85 | bipartites_names = NULL, 86 | suffix_bipartites = ".tsv" 87 | ) { 88 | 89 | ##### this part should be handled with pointers 90 | # Check type of object 91 | #if (inherits(hummus_object, "multilayer")) { 92 | # multiplex_list <- hummus_object@multiplex 93 | #bipartites_list <- hummus_object@bipartites 94 | 95 | #} else 96 | if (inherits(hummus_object, "Hummus_Object")) { 97 | bipartites_list <- hummus_object@multilayer@bipartites 98 | } else { 99 | stop("Object is not a multilayer nor an hummus object.") 100 | } 101 | 102 | # Check if bipartites_names is NULL 103 | if (is.null(bipartites_names)) { 104 | bipartites_names <- names(bipartites_list) 105 | } 106 | 107 | # Create a named list containing the bipartites infos 108 | # formatted for hummuspy config funtions 109 | # each element of the list is a list containing 110 | # the right and left layer connected by the bipartite 111 | bipartites_dictionary <- 112 | lapply(hummus_object@multilayer@bipartites[bipartites_names], 113 | function(x) { 114 | list("multiplex_right" = x@multiplex_right, 115 | "multiplex_left" = x@multiplex_left)}) 116 | # Add the names of the bipartites as named in the hummus object 117 | # and add the suffix to the names since it should indicate 118 | # the exact file name 119 | names(bipartites_dictionary) <- paste( 120 | names(bipartites_dictionary), 121 | suffix_bipartites, 122 | sep = "") 123 | 124 | # return the list 125 | return(bipartites_dictionary) 126 | } 127 | 128 | #' Define GRN from hummus object 129 | #' 130 | #' Calling the define_output function with output_type = 'GRN' 131 | #' 132 | #' @param hummus_object A hummus object 133 | #' @param multiplex_names A vector of multiplex names considered. 134 | #' It must be a subset of the names of the multiplexes in the hummus object. 135 | #' @param bipartites_names A vector of bipartites names considered. 136 | #' It must be a subset of the names of the bipartites in the hummus object. 137 | #' @param config_name The name of the config file to be created by hummuspy 138 | #' @param config_folder The folder where the config file will be created 139 | #' @param tf_multiplex The name of the multiplex containing the TFs 140 | #' @param atac_multiplex The name of the multiplex containing the ATAC-seq peaks 141 | #' @param rna_multiplex The name of the multiplex containing the RNA-seq genes 142 | #' @param multilayer_f The folder where the multilayer is stored 143 | #' @param gene_list A vector of genes to be considered for the final GRN 144 | #' (filtering is done on the genes before inferring the GRN) 145 | #' @param tf_list A vector of TFs to be considered for the final GRN (filtering 146 | #' is done on the TFs after inferring the GRN) 147 | #' @param save A boolean indicating if the GRN should be saved 148 | #' @param output_f The name of the file where the GRN should be saved 149 | #' (if save == TRUE) 150 | #' @param return_df A boolean indicating if the GRN should be returned as a 151 | #' dataframe 152 | #' @param suffix_bipartites A suffix to add to the bipartites names (to indicate 153 | #' the exact file location) 154 | #' @param njobs The number of jobs to be used for the computation of the GRN 155 | #' 156 | #' @return A dataframe containing the GRN (if return_df == TRUE) 157 | #' @export 158 | #' 159 | #' @examples grn <- define_grn(hummus_object = hummus, 160 | #' multilayer_f = multilayer_folder, 161 | #' njobs = 5) 162 | #' 163 | define_grn <- function( 164 | hummus_object, 165 | multiplex_names = NULL, 166 | bipartites_names = NULL, 167 | config_name = "grn_config.yml", 168 | config_folder = "config", 169 | tf_multiplex = "TF", 170 | atac_multiplex = "peaks", 171 | rna_multiplex = "RNA", 172 | multilayer_f = "multilayer", 173 | gene_list = NULL, 174 | tf_list = NULL, 175 | save = FALSE, 176 | output_f = NULL, 177 | return_df = TRUE, 178 | suffix_bipartites = ".tsv", 179 | njobs = 1 180 | ) { 181 | 182 | grn <- define_output( 183 | output_type = "grn", 184 | hummus_object = hummus_object, 185 | multiplex_names = multiplex_names, 186 | bipartites_names = bipartites_names, 187 | config_name = config_name, 188 | config_folder = config_folder, 189 | tf_multiplex = tf_multiplex, 190 | atac_multiplex = atac_multiplex, 191 | rna_multiplex = rna_multiplex, 192 | multilayer_f = multilayer_f, 193 | gene_list = gene_list, 194 | tf_list = tf_list, 195 | save = save, 196 | output_f = output_f, 197 | return_df = return_df, 198 | suffix_bipartites = suffix_bipartites, 199 | njobs = njobs 200 | ) 201 | 202 | # return grn 203 | return(grn) 204 | } 205 | 206 | #' Define enhancers from hummus object 207 | #' 208 | #' Calling the define_output function with output_type = 'enhancers' 209 | #' 210 | #' @param hummus_object A hummus object 211 | #' @param multiplex_names A vector of multiplex names considered. 212 | #' It must be a subset of the names of the multiplexes in the hummus object. 213 | #' @param bipartites_names A vector of bipartites names considered. 214 | #' It must be a subset of the names of the bipartites in the hummus object. 215 | #' @param config_name The name of the config file to be created by hummuspy 216 | #' @param config_folder The folder where the config file will be created 217 | #' @param tf_multiplex The name of the multiplex containing the TFs 218 | #' @param atac_multiplex The name of the multiplex containing the ATAC-seq peaks 219 | #' @param rna_multiplex The name of the multiplex containing the RNA-seq genes 220 | #' @param multilayer_f The folder where the multilayer is stored 221 | #' @param gene_list A vector of genes to be considered for the final enhancers 222 | #' (filtering is done on the genes before inferring the enhancers) 223 | #' @param tf_list A vector of TFs to be considered for the final enhancers 224 | #' (filtering is done on the TFs after inferring the enhancers) 225 | #' @param save A boolean indicating if the enhancers should be saved 226 | #' @param output_f The name of the file where the enhancers should be saved 227 | #' (if save == TRUE) 228 | #' @param return_df A boolean indicating if the enhancers should be returned 229 | #' as a dataframe 230 | #' @param suffix_bipartites A suffix to add to the bipartites names (to indicate 231 | #' the exact file location) 232 | #' @param njobs The number of jobs to be used for to compute of the enhancers 233 | #' 234 | #' @return A dataframe containing the enhancers (if return_df == TRUE) 235 | #' @export 236 | #' 237 | #' @examples enhancers <- define_enhancers(hummus_object = hummus, 238 | #' multilayer_f = multilayer_folder, 239 | #' njobs = 5) 240 | #' 241 | define_enhancers <- function( 242 | hummus_object, 243 | multiplex_names = NULL, 244 | bipartites_names = NULL, 245 | config_name = "enhancers_config.yml", 246 | config_folder = "config", 247 | tf_multiplex = "TF", 248 | atac_multiplex = "peaks", 249 | rna_multiplex = "RNA", 250 | multilayer_f = "multilayer", 251 | gene_list = NULL, 252 | tf_list = NULL, 253 | save = FALSE, 254 | output_f = NULL, 255 | return_df = TRUE, 256 | suffix_bipartites = ".tsv", 257 | njobs = 1 258 | ) { 259 | 260 | enhancers <- define_output( 261 | output_type = "enhancers", 262 | hummus_object = hummus_object, 263 | multiplex_names = multiplex_names, 264 | bipartites_names = bipartites_names, 265 | config_name = config_name, 266 | config_folder = config_folder, 267 | tf_multiplex = tf_multiplex, 268 | atac_multiplex = atac_multiplex, 269 | rna_multiplex = rna_multiplex, 270 | multilayer_f = multilayer_f, 271 | gene_list = gene_list, 272 | tf_list = tf_list, 273 | save = save, 274 | output_f = output_f, 275 | return_df = return_df, 276 | suffix_bipartites = suffix_bipartites, 277 | njobs = njobs 278 | ) 279 | 280 | # return enhancers 281 | return(enhancers) 282 | } 283 | 284 | 285 | #' Define binding_regions from hummus object 286 | #' 287 | #' Calling the define_output function with output_type = 'binding_regions' 288 | #' 289 | #' @param hummus_object A hummus object 290 | #' @param multiplex_names A vector of multiplex names considered. 291 | #' It must be a subset of the names of the multiplexes in the hummus object. 292 | #' @param bipartites_names A vector of bipartites names considered. 293 | #' It must be a subset of the names of the bipartites in the hummus object. 294 | #' @param config_name The name of the config file to be created by hummuspy 295 | #' @param config_folder The folder where the config file will be created 296 | #' @param tf_multiplex The name of the multiplex containing the TFs 297 | #' @param atac_multiplex The name of the multiplex containing the ATAC-seq peaks 298 | #' @param rna_multiplex The name of the multiplex containing the RNA-seq genes 299 | #' @param multilayer_f The folder where the multilayer is stored 300 | #' @param gene_list A vector of genes to be considered for the final binding 301 | #' regions (filtering is done on the genes before inferring the binding_regions) 302 | #' @param tf_list A vector of TFs to be considered for the binding_regions 303 | #' (filtering is done on the TFs after inferring the binding_regions) 304 | #' @param save A boolean indicating if the binding_regions should be saved 305 | #' @param output_f The name of the file where the binding_regions can be saved 306 | #' (if save == TRUE) 307 | #' @param return_df A boolean indicating if the binding_regions should be 308 | #' returned as a dataframe 309 | #' @param suffix_bipartites A suffix to add to the bipartites names (to indicate 310 | #' the exact file location) 311 | #' @param njobs The number of jobs to be used for the computation of the binding_regions 312 | #' 313 | #' @return A dataframe containing the binding_regions (if return_df == TRUE) 314 | #' @export 315 | #' 316 | #' @examples binding_regions <- define_binding_regions(hummus_object = hummus, 317 | #' multilayer_f = multilayer_folder, 318 | #' njobs = 5) 319 | #' 320 | define_binding_regions <- function( 321 | hummus_object, 322 | multiplex_names = NULL, 323 | bipartites_names = NULL, 324 | config_name = "binding_regions_config.yml", 325 | config_folder = "config", 326 | tf_multiplex = "TF", 327 | atac_multiplex = "peaks", 328 | rna_multiplex = "RNA", 329 | multilayer_f = "multilayer", 330 | gene_list = NULL, 331 | tf_list = NULL, 332 | save = FALSE, 333 | output_f = NULL, 334 | return_df = TRUE, 335 | suffix_bipartites = ".tsv", 336 | njobs = 1 337 | ) { 338 | 339 | binding_regions <- define_output( 340 | output_type = "binding_regions", 341 | hummus_object = hummus_object, 342 | multiplex_names = multiplex_names, 343 | bipartites_names = bipartites_names, 344 | config_name = config_name, 345 | config_folder = config_folder, 346 | tf_multiplex = tf_multiplex, 347 | atac_multiplex = atac_multiplex, 348 | rna_multiplex = rna_multiplex, 349 | multilayer_f = multilayer_f, 350 | gene_list = gene_list, 351 | tf_list = tf_list, 352 | save = save, 353 | output_f = output_f, 354 | return_df = return_df, 355 | suffix_bipartites = suffix_bipartites, 356 | njobs = njobs 357 | ) 358 | 359 | # return binding_regions 360 | return(binding_regions) 361 | } 362 | 363 | 364 | #' Define target genes from hummus object 365 | #' 366 | #' Calling the define_output function with output_type = 'target_genes' 367 | #' 368 | #' @param hummus_object A hummus object 369 | #' @param multiplex_names A vector of multiplex names considered. 370 | #' It must be a subset of the names of the multiplexes in the hummus object. 371 | #' @param bipartites_names A vector of bipartites names considered. 372 | #' It must be a subset of the names of the bipartites in the hummus object. 373 | #' @param config_name The name of the config file to be created by hummuspy 374 | #' @param config_folder The folder where the config file will be created 375 | #' @param tf_multiplex The name of the multiplex containing the TFs 376 | #' @param atac_multiplex The name of the multiplex containing the ATAC-seq peaks 377 | #' @param rna_multiplex The name of the multiplex containing the RNA-seq genes 378 | #' @param multilayer_f The folder where the multilayer is stored 379 | #' @param gene_list A vector of genes to be considered for the target_genes 380 | #' (filtering is done on the genes before inferring the target_genes) 381 | #' @param tf_list A vector of TFs to be considered for the final target_genes 382 | #' (filtering is done on the TFs after inferring the target_genes) 383 | #' @param save A boolean indicating if the target_genes should be saved 384 | #' @param output_f The name of the file where the target_genes should be saved 385 | #' (if save == TRUE) 386 | #' @param return_df A boolean indicating if the target_genes should be returned 387 | #' as a dataframe 388 | #' @param suffix_bipartites A suffix to add to the bipartites names (to indicate 389 | #' the exact file location) 390 | #' @param njobs The number of jobs to be used to compute of the target_genes 391 | #' 392 | #' @return A dataframe containing the target_genes (if return_df == TRUE) 393 | #' @export 394 | #' 395 | #' @examples target_genes <- define_target_genes(hummus_object = hummus, 396 | #' multilayer_f = multilayer_folder, 397 | #' njobs = 5) 398 | #' 399 | define_target_genes <- function( 400 | hummus_object, 401 | multiplex_names = NULL, 402 | bipartites_names = NULL, 403 | config_name = "target_genes_config.yml", 404 | config_folder = "config", 405 | tf_multiplex = "TF", 406 | atac_multiplex = "peaks", 407 | rna_multiplex = "RNA", 408 | multilayer_f = "multilayer", 409 | gene_list = NULL, 410 | tf_list = NULL, 411 | save = FALSE, 412 | output_f = NULL, 413 | return_df = TRUE, 414 | suffix_bipartites = ".tsv", 415 | njobs = 1 416 | ) { 417 | 418 | target_genes <- define_output( 419 | output_type = "target_genes", 420 | hummus_object = hummus_object, 421 | multiplex_names = multiplex_names, 422 | bipartites_names = bipartites_names, 423 | config_name = config_name, 424 | config_folder = config_folder, 425 | tf_multiplex = tf_multiplex, 426 | atac_multiplex = atac_multiplex, 427 | rna_multiplex = rna_multiplex, 428 | multilayer_f = multilayer_f, 429 | gene_list = gene_list, 430 | tf_list = tf_list, 431 | save = save, 432 | output_f = output_f, 433 | return_df = return_df, 434 | suffix_bipartites = suffix_bipartites, 435 | njobs = njobs 436 | ) 437 | 438 | # return target_genes 439 | return(target_genes) 440 | } 441 | 442 | #' @title Define output from hummus object 443 | #' 444 | #' @description Define output from hummus object 445 | #' 446 | #' @param output_type The type of output to be defined 447 | #' @param hummus_object A hummus object 448 | #' @param multiplex_names A vector of multiplex names considered. 449 | #' It must be a subset of the names of the multiplexes in the hummus object. 450 | #' @param bipartites_names A vector of bipartites names considered. 451 | #' It must be a subset of the names of the bipartites in the hummus object. 452 | #' @param config_name The name of the config file to be created by hummuspy 453 | #' @param config_folder The folder where the config file will be created 454 | #' @param tf_multiplex The name of the multiplex containing the TFs 455 | #' @param atac_multiplex The name of the multiplex containing the ATAC-seq peaks 456 | #' @param rna_multiplex The name of the multiplex containing the RNA-seq genes 457 | #' @param multilayer_f The folder where the multilayer is stored 458 | #' @param gene_list A vector of genes to be considered for the target_genes 459 | #' (filtering is done on the genes before inferring the target_genes) 460 | #' @param tf_list A vector of TFs to be considered for the final target_genes 461 | #' (filtering is done on the TFs after inferring the target_genes) 462 | #' @param save A boolean indicating if the target_genes should be saved 463 | #' @param output_f The name of the file where the target_genes should be saved 464 | #' (if save == TRUE) 465 | #' @param return_df A boolean indicating if the target_genes should be returned 466 | #' as a dataframe 467 | #' @param suffix_bipartites A suffix to add to the bipartites names (to indicate 468 | #' the exact file location) 469 | #' @param njobs The number of jobs to be used to compute of the target_genes 470 | #' 471 | #' @return A dataframe containing the target_genes (if return_df == TRUE) 472 | #' @export 473 | #' 474 | #' @examples target_genes <- define_output('grn', hummus_object = hummus) 475 | define_output <- function( 476 | output_type, 477 | hummus_object, 478 | multiplex_names = NULL, 479 | bipartites_names = NULL, 480 | config_name = "config.yml", 481 | config_folder = "config", 482 | tf_multiplex = "TF", 483 | atac_multiplex = "peaks", 484 | rna_multiplex = "RNA", 485 | multilayer_f = "multilayer", 486 | gene_list = NULL, 487 | tf_list = NULL, 488 | save = FALSE, 489 | output_f = NULL, 490 | return_df = TRUE, 491 | suffix_bipartites = ".tsv", 492 | njobs = 1 493 | ) { 494 | 495 | # Check if hummuspy is installed and import it 496 | hummuspy <- tryCatch({ 497 | reticulate::import("hummuspy") 498 | }, error = function(err) { 499 | stop("hummuspy package not found. Make sure that Reticulate \ 500 | is pointing to the right Python binary.") 501 | } 502 | ) 503 | # Format multiplexes names 504 | multiplexes_dictionary <- format_multiplex_names( 505 | hummus_object, 506 | multiplex_names = multiplex_names) 507 | # Format bipartites names 508 | bipartites_dictionary <- format_bipartites_names( 509 | hummus_object, 510 | bipartites_names = bipartites_names, 511 | suffix_bipartites = suffix_bipartites) 512 | 513 | # define target_genes with hummuspy function 514 | output <- hummuspy$core_grn$get_output_from_dicts( 515 | output_request = output_type, 516 | multilayer_f = multilayer_f, 517 | multiplexes_list = multiplexes_dictionary, 518 | bipartites_list = bipartites_dictionary, 519 | gene_list = gene_list, 520 | tf_list = tf_list, 521 | config_filename = config_name, 522 | config_folder = config_folder, 523 | output_f = output_f, 524 | tf_multiplex = tf_multiplex, 525 | peak_multiplex = atac_multiplex, 526 | rna_multiplex = rna_multiplex, 527 | update_config = TRUE, 528 | save = save, 529 | return_df = return_df, 530 | njobs = njobs) 531 | 532 | # return target_genes 533 | return(output) 534 | } 535 | 536 | #' @title Define general config file for hummuspy 537 | #' 538 | #' @description Define general config file for hummuspy 539 | #' 540 | #' @param hummus_object A hummus object 541 | #' @param multiplex_names A vector of multiplex names considered. 542 | #' It must be a subset of the names of the multiplexes in the hummus object, or NULL 543 | #' if all multiplexes should be considered. 544 | #' @param bipartites_names A vector of bipartites names considered. 545 | #' It must be a subset of the names of the bipartites in the hummus object, or NULL 546 | #' if all bipartites should be considered. 547 | #' @param folder_multiplexes The folder where the multiplexes are stored 548 | #' @param folder_bipartites The folder where the bipartites are stored 549 | #' @param seed_path The path to the seed file 550 | #' @param suffix_bipartites A suffix to add to the bipartites names (to indicate 551 | #' the exact file name) 552 | #' @param self_loops A boolean indicating if self loops should be considered. 553 | #' @param restart_proba The restart probability for the random walk (default = 0.7) 554 | #' @param save_configfile A boolean indicating if the config file should be saved 555 | #' @param config_name The name of the config file to be created by hummuspy 556 | #' @param config_folder The folder where the config file will be created (inside multilayer_f) 557 | #' @param multilayer_f The folder where the multilayer is stored 558 | #' 559 | #' @return A config file for hummuspy 560 | #' @export 561 | #' 562 | define_general_config <- function( 563 | hummus_object, 564 | multiplex_names = NULL, 565 | bipartites_names = NULL, 566 | folder_multiplexes = "multiplex", 567 | folder_bipartites = "bipartites", 568 | seed_path = 'seed/seeds.txt', 569 | suffix = ".tsv", 570 | self_loops = FALSE, 571 | restart_proba = 0.7, 572 | save_configfile = FALSE, 573 | config_name = "config.yml", 574 | config_folder = "config", 575 | multilayer_f = "multilayer", 576 | suffix_bipartites = ".tsv" 577 | ) { 578 | 579 | # Check if hummuspy is installed and import it 580 | hummuspy <- tryCatch({ 581 | reticulate::import("hummuspy") 582 | }, error = function(err) { 583 | stop("hummuspy package not found. Make sure that Reticulate \ 584 | is pointing to the right Python binary.") 585 | } 586 | ) 587 | # Format multiplexes names 588 | multiplexes_dictionary <- format_multiplex_names( 589 | hummus_object, 590 | multiplex_names = multiplex_names) 591 | # Format bipartites names 592 | bipartites_dictionary <- format_bipartites_names( 593 | hummus_object, 594 | bipartites_names = bipartites_names, 595 | suffix_bipartites = suffix_bipartites) 596 | 597 | self_loops <- as.integer(self_loops) 598 | 599 | if (save_configfile == TRUE) { 600 | config_filename <- file.path(multilayer_f, config_folder, config_name) 601 | } else { 602 | config_filename <- NULL 603 | } 604 | 605 | # define target_genes with hummuspy function 606 | config <- hummuspy$config$general_config( 607 | multiplexes = multiplexes_dictionary, 608 | bipartites = bipartites_dictionary, 609 | folder_multiplexes = folder_multiplexes, 610 | folder_bipartites = folder_bipartites, 611 | seed_path = seed_path, 612 | self_loops = self_loops, 613 | restart_prob = restart_proba, 614 | config_filename = config_filename, 615 | save_configfile = save_configfile, 616 | suffix = suffix) 617 | 618 | return(config) 619 | } 620 | -------------------------------------------------------------------------------- /R/fetch_online.R: -------------------------------------------------------------------------------- 1 | #' Fetch online genome annotations from Ensembldb database 2 | #' 3 | #' @param EnsDb_annotations (EndsDb object) - Ensembldb database (default: EnsDb.Hsapiens.v86::EnsDb.Hsapiens.v86 4 | #' 5 | #' @return gene_range (GRanges object) - Genome annotations 6 | #' @export 7 | #' 8 | #' @examples gene_range = get_genome_annotations(EnsDb.Hsapiens.v86::EnsDb.Hsapiens.v86) 9 | get_genome_annotations <- function( 10 | ensdb_annotations = EnsDb.Hsapiens.v86::EnsDb.Hsapiens.v86 11 | ) { 12 | # Get genome annotations from Ensembldb database 13 | gene_range <- Signac::GetGRangesFromEnsDb(ensdb_annotations) 14 | 15 | ucsc.levels <- stringr::str_replace( 16 | string = paste("chr", Signac::seqlevels(gene_range), sep = ""), 17 | pattern = "chrMT", 18 | replacement = "chrM") # Change chromosome names to UCSC format 19 | 20 | Signac::seqlevels(gene_range) <- ucsc.levels 21 | # check if Signac is the good package 22 | 23 | return(gene_range) # Return genome annotations 24 | } 25 | 26 | #' Fetch online TF motifs from JASPAR2020 and chromVARmotifs 27 | #' 28 | #' @param species (character) - Species name (default: "human") 29 | #' 30 | #' @return motifs_db (motifs_db object) - TF2motifs + motifs PWMs 31 | #' @export 32 | #' 33 | #' @examples motifs_db = get_tf2motifs(species = "human") 34 | get_tf2motifs <- function(species = "human") { 35 | #TF motifs using the union of databases: JASPAR and cis-BP 36 | # included in chromVAR 37 | getMatrixSet <- TFBSTools::getMatrixSet 38 | 39 | # If species is human or mouse 40 | if (species == "human") { 41 | # Parameters for JASPAR2020 42 | opts <- list(collection = "CORE", 43 | species = "Homo sapiens", 44 | all_versions = FALSE) 45 | JASPAR_PWM <- TFBSTools::toPWM(getMatrixSet(JASPAR2020::JASPAR2020, opts)) 46 | # Load data from JASPAR2020 47 | # Load data from chromVARmotifs 48 | # Original data accessible at https://github.com/GreenleafLab/chromVARmotifs 49 | data("human_pwms_v2") 50 | # Load data from chromVARmotifs 51 | motifs <- human_pwms_v2 52 | # Motifs from chromVARmotifs 53 | } else if (species == "mouse") { 54 | # Parameters for JASPAR2020 55 | opts <- list(collection = "CORE", 56 | species = "Mus musculus", 57 | all_versions = FALSE) 58 | JASPAR_PWM <- TFBSTools::toPWM(getMatrixSet(JASPAR2020::JASPAR2020, opts)) 59 | # Load data from JASPAR2020 60 | data("mouse_pwms_v2") 61 | # Load data from chromVARmotifs 62 | # Original data accessible at https://github.com/GreenleafLab/chromVARmotifs 63 | motifs <- mouse_pwms_v2 64 | # Motifs from chromVARmotifs 65 | } 66 | 67 | for (name in names(JASPAR_PWM)){ 68 | # Combine motifs of JASPAR20202 and chromVARmotif 69 | motifs[name] <- JASPAR_PWM[name] 70 | } 71 | 72 | # Initiate final TF motifs table 73 | tf2motifs <- data.frame(motif = character(), 74 | tf = character(), 75 | stringsAsFactors = FALSE) 76 | for (i in seq_along(TFBSTools::name(motifs))){ # Fill TF motif table 77 | # TFBSTools::name(motifs) returns names of TFs associated to each PWMatrix 78 | tfs <- strsplit(TFBSTools::name(motifs)[i], "::")[[1]] 79 | # splitting TFs that are given as "name1::name2" 80 | for (tf in tfs){ 81 | tf <- strsplit(tf, "(", fixed = TRUE)[[1]][1] 82 | # only keeping in identifier "(var.n)" 83 | tf2motifs <- rbind(tf2motifs, data.frame(motif = names(motifs)[i], 84 | tf = tf)) 85 | } 86 | } 87 | 88 | return(new("motifs_db", 89 | tf2motifs = tf2motifs, 90 | motifs = motifs, 91 | tfs = unique(tf2motifs$tf))) # Return motifs_db <- TF2motifs + motifs PWMs 92 | } 93 | -------------------------------------------------------------------------------- /R/layers.R: -------------------------------------------------------------------------------- 1 | #' Compute TF network and add it to hummus object 2 | #' 3 | #' Compute a protein-protein interaction layer from Omnipath request that will represent tf cooperativity. 4 | #' This network is the top-layer of HuMMuS multilayer. 5 | #' 6 | #' @param hummus (Hummus_Object) - Hummus object 7 | #' @param organism (integer) - Specie identifier from Omnipath to fetch 8 | #' specific interactions 9 | #' @param tfs vector(character) - List of tfs consider. If NA, tfs are extracted 10 | #' from the hummus object with get_tfs function. 11 | #' @param gene_assay (character) - Name of the assay to get tfs from if tfs is 12 | #' not provided. If NULL, all TFs with motifs in the hummus object are used. 13 | #' @param method (character) - Method used to infer network edges. 14 | #' * \code{'Omnipath'} - Use Omnipath to infer tf-tf networks. 15 | #' * \code{'NULL'} - A fake connected network is computed. 16 | #' * \code{'Other method'} - TO DO. 17 | #' @param store_network (bool) - Save the network directly (\code{TRUE}, 18 | #' default) or return without saving on disk (\code{FALSE}). 19 | #' @param output_file (character) - Name of the output_file 20 | #' (if store_network == \code{TRUE}). 21 | #' @param source_target ('AND'|'OR') - Fetch only the interactions involving 22 | #' two considered tfs (\code{'AND', default}), or one considered tfs and any 23 | #' other element (\code{'OR'}) 24 | #' @param multiplex_name (character) - Name of the multiplex to add the network 25 | #' to. Default is \code{'TF'}. 26 | #' @param tf_network_name (character) - Name of the network in the multiplex to 27 | #' add the network to. Default is \code{'TF_network'}. 28 | #' @param verbose (integer) - Display function messages. Set to 0 for no message 29 | #' displayed, >= 1 for more details. 30 | #' 31 | #' @return (Hummus_Object) - Return hummus object with the new network added. 32 | #' @export 33 | #' 34 | #' @examples hummus <- compute_tf_network(hummus, 35 | #' gene_assay = "RNA", 36 | #' verbose = 1) 37 | compute_tf_network <- function( 38 | hummus, # Hummus object 39 | organism = 9606, # Human by default 40 | tfs = NA, # List of tfs considered. 41 | gene_assay = NULL, # Name of the assay to get tfs from 42 | # if tfs is not provided 43 | method = NULL, # Method used to infer network edges. 44 | # * 'Omnipath' - Use Omnipath to infer tf-tf networks. 45 | # * 'NULL' - A fake connected network is computed. 46 | # * 'Other method' - TO DO. 47 | store_network = FALSE, # Save the network on disk (TRUE, default) 48 | output_file = NULL, # Name of the output_file (if store_network == TRUE) 49 | source_target = "AND", # 'AND' | 'OR' 50 | multiplex_name = "TF", # Name of the multiplex to add the network to 51 | tf_network_name = "TF_network", # Name of the network in the multiplex 52 | verbose = 1 53 | ) { 54 | 55 | a <- Sys.time() 56 | # Check if method is implemented 57 | if (is.null(method)) { 58 | tf_network <- run_tf_null_wrapper( 59 | hummus = hummus, 60 | organism = organism, 61 | tfs = tfs, 62 | gene_assay = gene_assay, 63 | verbose) 64 | } else if (method == "Omnipath") { 65 | if (!requireNamespace("OmnipathR", quietly = TRUE)) { 66 | stop("Please install Omnipath.\n", 67 | "github.com/saezlab/OmnipathR") 68 | } else { 69 | # infer network with cicero 70 | tf_network <- run_omnipath_wrapper( 71 | hummus = hummus, 72 | organism = organism, 73 | tfs = tfs, 74 | gene_assay = gene_assay, 75 | source_target = source_target, 76 | verbose = verbose) 77 | } 78 | } else { 79 | stop(cat("Method not implemented yet, choose between Omnipath and NULL..", 80 | "that's it for now.\n But you can always compute the network", 81 | "independently and add it to the hummus object manually !")) 82 | } 83 | if (verbose > 0) { 84 | cat("TF network construction time:", Sys.time() - a) 85 | } 86 | 87 | # Save gene network 88 | store_network(network = tf_network, 89 | store_network = store_network, 90 | output_file = output_file, 91 | verbose = verbose) 92 | 93 | # Add network to hummus object 94 | hummus <- add_network(hummus, 95 | multiplex_name = multiplex_name, 96 | network = tf_network, 97 | network_name = tf_network_name, 98 | weighted = FALSE, # PPI could be weighted, 99 | # could be added later 100 | directed = FALSE, # PPI are not directed 101 | verbose = verbose) 102 | 103 | return(hummus) 104 | } 105 | 106 | 107 | #' Compute gene netwok from scRNA-seq data 108 | #' 109 | #' This function will create a network from rna data (or in theory any data 110 | #' wtih genes as features). 111 | #' Different method should be implemented at some point (any suggestion is welcomed ! :) ), 112 | #' for now Genie3 is still the reference and only method available 113 | #' 114 | #' Method descriptions : 115 | #' 1. Genie3 116 | #' Use tree random forest to infer regulatory networks : 117 | #' https://bioconductor.org/packages/release/bioc/html/GENIE3.html 118 | #' 119 | #' @param hummus (Hummus_Object) - Hummus object 120 | #' @param gene_assay (character) - Name of the assay containing the gene 121 | #' expression data. 122 | #' @param tfs vector(character) - List of tfs considered. If NULL, all TFs with 123 | #' motifs in the hummus object are used. 124 | #' @param method (character) - Method used to infer network edges. 125 | #' * \code{'Genie3'} - Use tree random forest to infer regulatory networks. 126 | #' * \code{'Other method'} - TO DO. 127 | #' @param multiplex_name (character) - Name of the multiplex to add the network 128 | #' to. Default is \code{'RNA'}. 129 | #' @param network_name (character) - Name of the network in the multiplex to 130 | #' add the network to. Default is \code{'RNA_network'}. 131 | #' @param store_network (bool) - Save the network directly (\code{TRUE}, 132 | #' default) or return without saving on disk (\code{FALSE}). 133 | #' @param output_file (character) - Name of the output_file 134 | #' (if store_network == \code{TRUE}). 135 | #' @param threshold (interger, default 0) - Minimal threshold 136 | #' to select tf-gene edges. 137 | #' @param number_cores (interger, default 1) - Number of thread that should be 138 | #' used for the parallelizable methods. 139 | #' @param verbose (integer) - Display function messages. Set to 0 for no 140 | #' message displayed, >= 1 for more details. 141 | #' 142 | #' @return (data.frame) - Return list of network interactions between genes 143 | #' @export 144 | #' 145 | #' @examples hummus <- compute_gene_network( 146 | #' hummus, 147 | #' gene_assay = "RNA", 148 | #' method = "GENIE3", 149 | #' verbose = 1, 150 | #' number_cores = 8, 151 | #' store_network = FALSE) 152 | #' 153 | compute_gene_network <- function( 154 | hummus, 155 | gene_assay = "RNA", 156 | tfs = NULL, 157 | method = "GENIE3", 158 | multiplex_name = NULL, 159 | network_name = NULL, 160 | store_network = FALSE, 161 | output_file = NULL, 162 | threshold = 0.0, 163 | number_cores = 1, 164 | verbose = 1 165 | ) { 166 | 167 | a <- Sys.time() 168 | # Check if method is implemented 169 | if (method == "GENIE3") { 170 | if (verbose > 0) { 171 | cat("Computing gene network with ", method, " ...\n") 172 | } 173 | # Get tfs list 174 | if (verbose > 0 && is.null(tfs)) { 175 | cat("\tNo TFs list provided, fetching from hummus object...\n") 176 | tfs <- get_tfs(hummus = hummus, 177 | assay = gene_assay, 178 | store_tfs = FALSE, 179 | output_file = NULL, 180 | verbose = verbose) 181 | } 182 | 183 | # infer network 184 | weightMat <- GENIE3::GENIE3( 185 | as.matrix(hummus@assays[[gene_assay]]$counts), 186 | regulators = tfs, 187 | nCores = number_cores) 188 | 189 | # get edge list 190 | linkList <- GENIE3::getLinkList(weightMat) 191 | gene_network <- linkList[which(linkList$weight > threshold), ] 192 | 193 | # TODO : add other methods 194 | } else { 195 | stop(cat("Method not implemented yet, choose between GENIE3 and..", 196 | "that's it for now.\n but you can always compute the network", 197 | "independently and add it to the hummus object.")) 198 | } 199 | if (verbose > 0) { 200 | cat("\tGene network construction time:", Sys.time() - a, "\n") 201 | } 202 | 203 | # Save gene network 204 | store_network(network = gene_network, 205 | store_network = store_network, 206 | output_file = output_file, 207 | verbose = verbose) 208 | 209 | # If no multiplex name provided, use assay name 210 | if (is.null(multiplex_name)) { 211 | multiplex_name <- gene_assay 212 | } 213 | # If no network name provided, use method name + assay name 214 | if (is.null(network_name)) { 215 | network_name <- paste(multiplex_name, method, sep = "_") 216 | } 217 | # Add network to hummus object 218 | hummus <- add_network(hummus, 219 | multiplex_name = multiplex_name, 220 | network = gene_network, 221 | network_name = network_name, 222 | weighted = TRUE, 223 | directed = FALSE, 224 | verbose = verbose) 225 | 226 | # Return hummus object 227 | return(hummus) 228 | } 229 | 230 | #' Compute peak network from scATAC-seq data 231 | #' 232 | #' This function will create a network from atac data (or in theory any data 233 | #' wtih peaks coordinates as features). 234 | #' Different method should be implemented at some point (e.g. RENIN), 235 | #' for now Cicero is still the reference and only method available 236 | #' 237 | #' Method descriptions : 238 | #' 1. Cicero 239 | #' Use patial corelation between peaks that are in a given window (e.g. : 240 | #' less distant than 500K base pairs) 241 | #' 242 | #' @param hummus (Hummus_Object) - Hummus object 243 | #' @param atac_assay (character) - Name of the assay containing the atac 244 | #' peaks data. 245 | #' @param genome (BSgenome) - Genome used to compute the distance between peaks. 246 | #' @param method (character) - Method used to infer network edges. 247 | #' * \code{'cicero'} - Use cicero to infer regulatory networks. 248 | #' * \code{'Other method'} - TO DO. 249 | #' @param multiplex_name (character) - Name of the multiplex to add the network 250 | #' to. Default is \code{'peaks'}. 251 | #' @param network_name (character) - Name of the network in the multiplex to 252 | #' add the network to. Default is \code{'peak_network'}. 253 | #' @param store_network (bool) - Save the network directly (\code{TRUE}, 254 | #' default) or return without saving on disk (\code{FALSE}). 255 | #' @param output_file (character) - Name of the output_file 256 | #' (if store_network == \code{TRUE}). 257 | #' @param threshold (interger, default 0) - Minimal threshold to select tf-gene 258 | #' edges. 259 | #' @param number_cells_per_clusters (integer) - Number of cells grouped by 260 | #' territory to define pseudocells 261 | #' @param sample_num (integer | Cicero) - Number of pseudocells to sample from 262 | #' each territory. Default is 100. 263 | #' @param seed (integer | Cicero) - Seed used to sample pseudocells. Default is 264 | #' 2025 265 | #' @param verbose (integer) - Display function messages. Set to 0 for no 266 | #' message displayed, >= 1 for more details. 267 | #' @param window (integer) - Size of window to consider potential 268 | #' cis-regulatory cooperations between peaks. Default is 500K base pairs. 269 | #' @param reduction_method (character | Cicero) - Method used to reduce dimensionality 270 | #' of the data to identify territories. Default is \code{'UMAP'}. 271 | #' 272 | #' @return (data.frame) - Return list of network interactions between peaks 273 | #' @export 274 | #' 275 | #' @examples hummus <- compute_atac_peak_network(hummus) 276 | #' 277 | compute_atac_peak_network <- function( 278 | hummus, 279 | atac_assay = "peaks", 280 | genome = BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38, 281 | method = "cicero", 282 | multiplex_name = NULL, 283 | network_name = NULL, 284 | store_network = FALSE, 285 | output_file = NULL, 286 | threshold = 0.0, 287 | number_cells_per_clusters = 50, 288 | sample_num = 100, 289 | seed = 2025, 290 | verbose = 1, 291 | window = 5e+05, 292 | reduction_method = "UMAP") { 293 | 294 | a <- Sys.time() 295 | # Check if method is implemented 296 | if (method == "cicero") { 297 | if (!requireNamespace("cicero", quietly = TRUE)) { 298 | stop("Please install cicero.\n", 299 | "https://cole-trapnell-lab.github.io/cicero-release/docs_m3/") 300 | } else { 301 | # infer network with cicero 302 | atac_peak_network <- run_cicero_wrapper( 303 | hummus, 304 | atac_assay, 305 | genome, 306 | window, 307 | number_cells_per_clusters, 308 | sample_num, 309 | seed, 310 | verbose, 311 | threshold, 312 | reduction_method) 313 | } 314 | } else { 315 | stop(cat("Method not implemented yet, choose between Cicero and..", 316 | "that's it for now.\n but you can always compute the network", 317 | "independently and add it to the hummus object manually.")) 318 | } 319 | if (verbose > 0) { 320 | cat("Peak network construction time:", Sys.time() - a) 321 | } 322 | # Save peak network 323 | store_network(network = atac_peak_network, 324 | store_network = store_network, 325 | output_file = output_file, 326 | verbose = verbose) 327 | # If no multiplex name provided, use assay name 328 | if (is.null(multiplex_name)) { 329 | multiplex_name <- atac_assay 330 | } 331 | # If no network name provided, use method name + assay name 332 | if (is.null(network_name)) { 333 | network_name <- paste0("peak_network_", method) 334 | } 335 | 336 | # Add network to hummus object 337 | hummus <- add_network( 338 | object = hummus, 339 | network = atac_peak_network, 340 | network_name = network_name, 341 | multiplex_name = multiplex_name, 342 | weighted = TRUE, 343 | directed = FALSE, 344 | verbose = verbose) 345 | 346 | } 347 | -------------------------------------------------------------------------------- /R/method_wrappers.R: -------------------------------------------------------------------------------- 1 | # Description: This file contains the wrapper functions for the methods that 2 | # are used to compute the different layers of the multilayer network. The 3 | # functions are called from the compute_*_network functions in layers.R 4 | # For now, only the compute_atac_peak_network function has wrapper functions 5 | # for the different methods. The other methods are still directly implemented 6 | # in the compute_*_network functions in layers.R 7 | 8 | #' @title Cicero wrapper function for the compute_atac_peak_network function 9 | #' 10 | #' @description This function is a wrapper for the compute_atac_peak_network 11 | #' function in layers.R. It computes the peak network from scATAC-seq data 12 | #' using Cicero. It returns a data frame with the peak network. The data frame 13 | #' also contains the coaccess score for each edge. The coaccess score is the 14 | #' probability that two peaks are accessible in the same cell. The coaccess 15 | #' score is computed by Cicero. Edges are filtered based on the coaccess score. 16 | #' Only edges with a coaccess score > threshold are kept. 17 | #' 18 | #' @param hummus A hummus object 19 | #' @param atac_assay The name of the assay containing the scATAC-seq data 20 | #' @param genome The genome object 21 | #' @param window The window size used by Cicero to compute the coaccess score 22 | #' @param number_cells_per_clusters The number of cells per cluster used by 23 | #' Cicero to compute the coaccess score 24 | #' @param sample_num The number of samples used by Cicero to compute the 25 | #' coaccess score 26 | #' @param seed The seed used by Cicero to compute the coaccess score 27 | #' @param verbose The verbosity level 28 | #' @param threshold The threshold used to filter edges based on the coaccess 29 | #' score 30 | #' @param reduction_method The method used by monocle3 to reduce the dimension 31 | #' of the scATAC-seq data before defining the pseudocells. The default is UMAP. 32 | #' 33 | #' @return A data frame containing the peak network 34 | #' @export 35 | #' 36 | run_cicero_wrapper <- function( 37 | hummus, 38 | atac_assay, 39 | genome, 40 | window, 41 | number_cells_per_clusters, 42 | sample_num, 43 | seed, 44 | verbose, 45 | threshold, 46 | reduction_method = "UMAP" 47 | ) { 48 | 49 | # functions that need to be renamed 50 | int_elementMetadata <- SingleCellExperiment::int_elementMetadata 51 | counts <- SingleCellExperiment::counts 52 | 53 | # obtain chromosome sizes 54 | chromosome_sizes <- data.frame(V1 = genome@seqinfo@seqnames, 55 | V2 = genome@seqinfo@seqlengths) 56 | 57 | # Get scATAC-seq data 58 | scATAC <- as.matrix(hummus@assays[[atac_assay]]@counts) 59 | # Matrix to edgelist 60 | acc <- reshape2::melt(scATAC) 61 | colnames(acc) <- c("V1", "V2", "V3") 62 | 63 | # Prepare cicero input 64 | input_cds <- cicero::make_atac_cds(acc, binarize = TRUE) # Create CDS object 65 | set.seed(seed) 66 | # It is required that there is no empty cell 67 | if (length(which(colSums(as.matrix(monocle3::exprs(input_cds))) == 0)) == 0 68 | ) { 69 | # Calculating size factors using default method = mean-geometric-mean-total 70 | input_cds <- monocle3::estimate_size_factors(input_cds) 71 | # Preprocessing using LSI 72 | input_cds <- monocle3::preprocess_cds(input_cds, method = "LSI") 73 | # Dimensionality reduction using UMAP 74 | input_cds <- monocle3::reduce_dimension( 75 | input_cds, 76 | reduction_method = reduction_method, 77 | preprocess_method = "LSI") 78 | } else { 79 | print("Error: there is at least one cell with no signal.") 80 | } 81 | # Get reduced (UMAP) coordinates 82 | umap_coords <- SingleCellExperiment::reducedDims(input_cds)$UMAP 83 | # Compute pseudocells 84 | cicero_cds <- cicero::make_cicero_cds( 85 | input_cds, # Create a Cicero CDS object 86 | reduced_coordinates = umap_coords, 87 | k = number_cells_per_clusters, #number neighbors/ Default = 50 88 | summary_stats = NULL, # Default 89 | size_factor_normalize = TRUE, # Default 90 | silent = FALSE) # Default 91 | 92 | cicero <- cicero::run_cicero( 93 | cds = cicero_cds, # Infer peak-links 94 | genomic_coords = chromosome_sizes, 95 | window = window, # Default = 5e+05 96 | silent = FALSE, # Default = FALSE 97 | sample_num = sample_num) # Default = 100 98 | 99 | # Remove NAs, double edges, and edges with coaccess score <=0 100 | # Check for coaccess = NA 101 | if (length(which(is.na(cicero$coaccess))) > threshold) { 102 | cicero <- cicero[which(!is.na(cicero$coaccess)), ] # Remove NAs 103 | } 104 | cicero$temp <- NA # Helper column to check and remove double edges 105 | my_cols <- which(as.character(cicero$Peak1) <= as.character(cicero$Peak2)) 106 | cicero$temp[my_cols] <- paste(cicero$Peak1[my_cols], 107 | cicero$Peak2[my_cols], 108 | sep = ";") 109 | 110 | my_cols <- which(as.character(cicero$Peak1) > as.character(cicero$Peak2)) 111 | cicero$temp[my_cols] <- paste(cicero$Peak2[my_cols], 112 | cicero$Peak1[my_cols], 113 | sep = ";") 114 | 115 | # Sort table according to temp-column (each entry appears double) 116 | cicero <- cicero[with(cicero, order(temp, decreasing = TRUE)), ] 117 | rownames(cicero) <- c(1:dim(cicero)[1]) 118 | A <- as.character(cicero$Peak1[seq(1, dim(cicero)[1], 2)]) 119 | Anum <- round(cicero$coaccess[seq(1, dim(cicero)[1], 2)], 10) 120 | B <- as.character(cicero$Peak2[seq(2, dim(cicero)[1], 2)]) 121 | Bnum <- round(cicero$coaccess[seq(2, dim(cicero)[1], 2)], 10) 122 | # length(which(A==B & Anum==Bnum)) 123 | # Each edge appears twice with same coaccess score (rounded to 10 digits) 124 | cicero <- cicero[seq(1, dim(cicero)[1], 2), ] # Remove double edges 125 | cicero$temp <- NULL # Remove helper column 126 | cicero <- cicero[with(cicero, order(cicero$coaccess, 127 | decreasing = TRUE)), ] # Sort 128 | rownames(cicero) <- c(1:dim(cicero)[1]) 129 | cicero$Peak1 <- gsub("_", "-", cicero$Peak1) 130 | # Peak names 2x"-" to match bipartites 131 | cicero$Peak2 <- gsub("_", "-", cicero$Peak2) 132 | # Peak names 2x"-" to match bipartites ? 2x"-" or 2x"_" 133 | 134 | peak_network <- cicero[which(cicero$coaccess > threshold), ] 135 | # Remove edges with coaccess score <= threshold 136 | 137 | if (verbose > 0) { 138 | cat("\n", dim(peak_network)[1], "peak edges with a coaccess score >", 139 | threshold, "were found.\n") 140 | } 141 | 142 | # Return peak network including edges with positive coaccess score 143 | return(peak_network) 144 | } 145 | 146 | 147 | #' @title Omnipath wrapper function for the compute_tf_network function 148 | #' @description This function is a wrapper for the compute_tf_network function 149 | #' in layers.R. It computes the TF network from using Omnipath database. 150 | #' It returns a data frame with the TF network. The data frame is not weighted 151 | #' and does not contain scores for the edges. 152 | #' @param hummus A hummus object 153 | # Get TF-TF interactions from Omnipath 154 | run_omnipath_wrapper <- function( 155 | hummus = hummus, 156 | organism = organism, 157 | tfs = tfs, 158 | gene_assay = gene_assay, 159 | source_target = source_target, 160 | verbose = 1) { 161 | 162 | TF_PPI <- OmnipathR::import_post_translational_interactions( 163 | organism = organism, partners = tfs, source_target = source_target 164 | ) 165 | 166 | if (verbose > 0) { 167 | cat("\tNumber of edges from Omnipath:", nrow(TF_PPI), 168 | "\nWill now be filtered to only those corresponding to specified tfs") 169 | } 170 | 171 | if (is.na(tfs)) { 172 | # Get tfs list 173 | tfs <- get_tfs(hummus = hummus, 174 | assay = gene_assay, 175 | store_tfs = FALSE, 176 | output_file = NULL, 177 | verbose = verbose) 178 | } else if (typeof(tfs) != "character") { 179 | stop("'tfs' argument needs to be a vector of characters 180 | (e.g.: c('MYC', 'JAK1')).") 181 | } 182 | 183 | # add filtering if element is not a TF expressed in the dataset 184 | if (source_target == "AND") { 185 | TF_PPI <- TF_PPI[which(TF_PPI$source_genesymbol %in% tfs & 186 | TF_PPI$target_genesymbol %in% tfs), ] 187 | } else if (source_target == "OR") { 188 | TF_PPI <- TF_PPI[which(TF_PPI$source_genesymbol %in% tfs | 189 | TF_PPI$target_genesymbol %in% tfs), ] 190 | } 191 | # Get only source and target columns 192 | tf_network <- TF_PPI[, c(3, 4)] 193 | 194 | # Convert to data.frame from tibble 195 | tf_network <- as.data.frame(tf_network) 196 | 197 | # Check if there is any TF-TF edges otherwise add a fake node 198 | # and connect all TFs to it (to allow HuMMuS to run without impacting result) 199 | if (nrow(tf_network) == 0) { 200 | cat("No TF-TF edges from Omnipath for the given parameters. 201 | You can try to change the source_target parameter to 'OR' to get 202 | TF-other protein interactions. Or try to import a network 203 | computed externally. Right now, a network with all TFs connected 204 | to a fake node is created, for HuMMuS analysis.\n It has no biological 205 | meaning but will allow to run the pipeline as if no edges were present. 206 | \n") 207 | tf_network <- run_tf_null_wrapper( 208 | hummus = hummus, 209 | organism = organism, 210 | tfs = tfs, 211 | gene_assay = gene_assay, 212 | verbose = ) 213 | 214 | } 215 | return(tf_network) 216 | } 217 | 218 | #' @title tf_null wrapper function for the tf_network function 219 | #' @description This function is a wrapper for the tf_network function 220 | #' 221 | #' @param hummus A hummus object 222 | #' 223 | #' 224 | run_tf_null_wrapper <- function( 225 | hummus = hummus, 226 | organism = organism, 227 | tfs = tfs, 228 | gene_assay = gene_assay, 229 | verbose = 1) { 230 | 231 | if (verbose > 0) { 232 | cat("Creating a fake TF network with all TFs connected to a fake node.\n") 233 | } 234 | 235 | if (is.na(tfs)) { 236 | # Get tfs list 237 | tfs <- get_tfs(hummus = hummus, 238 | assay = gene_assay, 239 | store_tfs = FALSE, 240 | output_file = NULL, 241 | verbose = verbose) 242 | } else if (typeof(tfs) != "character") { 243 | stop("'tfs' argument needs to be a vector of characters 244 | (e.g.: c('MYC', 'JAK1')).") 245 | } 246 | 247 | 248 | FAKE_NODE <- "fake_node" 249 | tf_network <- data.frame(source = c(), target = c()) 250 | for (tf in tfs) { 251 | tf_network <- rbind(tf_network, data.frame(source = tf, target = FAKE_NODE)) 252 | } 253 | return(tf_network) 254 | } -------------------------------------------------------------------------------- /R/utils.R: -------------------------------------------------------------------------------- 1 | `%||%` <- rlang::`%||%` 2 | 3 | #' @title Extract TF names from scRNA data and tf2motifs 4 | #' 5 | #' @param species (character) - Species name. Default: "human". 6 | #' @param genes (vector(character)) - List of expressed genes. 7 | #' @param output_file (character) - Path to output file. 8 | #' @param tf2motifs (data.frame) - TF to motifs names mapping. 9 | #' Columns: motif, tf. 10 | #' @param verbose (integer) - Verbosity level. Default: 1. 11 | #' 12 | #' @return TFs (vector(character)) - List of TFs expressed with motifs. 13 | #' @export 14 | #' 15 | get_tfs <- function( 16 | hummus, 17 | assay = NULL, 18 | store_tfs = TRUE, 19 | output_file = NULL, 20 | verbose = 0 21 | ) { 22 | # Check if the hummus object has motifs_db slot 23 | if (is.null(hummus@motifs_db)) { 24 | stop("The hummus object does not have a motifs_db slot") 25 | } 26 | 27 | # Check if the assay is present in the seurat object 28 | if (! is.null(assay)) { 29 | if (!assay %in% names(hummus@assays)) { 30 | stop("The gene assay is not present in the seurat object") 31 | } 32 | # Get the expressed genes 33 | expr_genes <- rownames(hummus@assays[[assay]]) 34 | tfs <- intersect(unique(as.character(hummus@motifs_db@tf2motifs$tf)), 35 | expr_genes) 36 | if (verbose > 0) { 37 | cat("\t", length(tfs), "TFs expressed\n") 38 | } 39 | } else { # If no assay is provided, get all TFs with motifs 40 | tfs <- unique(as.character(hummus@motifs_db@tf2motifs$tf)) 41 | if (verbose > 0) { 42 | cat("\t", length(tfs), "TFs with motif. No check if expressed or not.\n") 43 | } 44 | } 45 | # Store TFs in a file if specified 46 | if (store_tfs) { 47 | if (is.null(output_file)) { 48 | stop("Please provide an output file name") 49 | } 50 | write.table(tfs, output_file, # Store TFs 51 | col.names = FALSE, row.names = FALSE, quote = FALSE, sep = "\t") 52 | } 53 | 54 | return(tfs) 55 | } 56 | 57 | # Code from Pando github.com/quadbiolab/Pando 58 | #' @import sparseMatrixStats 59 | summary_fun <- list( 60 | "mean" = sparseMatrixStats::colMeans2, 61 | "median" = sparseMatrixStats::colMedians, 62 | "max" = sparseMatrixStats::colMaxs, 63 | "min" = sparseMatrixStats::colMins, 64 | "count" = sparseMatrixStats::colCounts, 65 | "any" = sparseMatrixStats::colAnys, 66 | "all" = sparseMatrixStats::colAlls, 67 | "sd" = sparseMatrixStats::colSds, 68 | "mad" = sparseMatrixStats::colMads 69 | ) 70 | 71 | #' Copy of the aggregate.Matrix function from the Matrix.utils package, 72 | #' since this is off CRAN and does not seem to be maintained anymore 73 | #' internally 74 | #' 75 | fast_aggregate <- function( 76 | x, 77 | groupings = NULL, 78 | form = NULL, 79 | fun = "sum", 80 | ... 81 | ) { 82 | if (!is(x, "Matrix")) { 83 | x <- Matrix(as.matrix(x), sparse = TRUE) 84 | } 85 | if (fun == "count") { 86 | x <- x != 0 87 | } 88 | groupings2 <- groupings 89 | if (!is(groupings2, "data.frame")) { 90 | groupings2 <- as.data.frame(groupings2) 91 | } 92 | groupings2 <- data.frame(lapply(groupings2, as.factor)) 93 | groupings2 <- data.frame(interaction(groupings2, sep = "_")) 94 | colnames(groupings2) <- "A" 95 | if (is.null(form)) { 96 | form <- as.formula("~0+.") 97 | } 98 | form <- as.formula(form) 99 | mapping <- dMcast(groupings2, form) 100 | colnames(mapping) <- substring(colnames(mapping), 2) 101 | result <- Matrix::t(mapping) %*% x 102 | if (fun == "mean") { 103 | result@x <- result@x / (fast_aggregate(x, groupings2, fun = "count"))@x 104 | } 105 | attr(result, "crosswalk") <- grr::extract(groupings, match(rownames(result), 106 | groupings2$A)) 107 | return(result) 108 | } 109 | 110 | #' Copy of the dMcast function from the Matrix.utils package, 111 | #' since this is off CRAN and does not seem to be maintained anymore 112 | #' internally 113 | #' 114 | dMcast <- function( 115 | data, 116 | formula, 117 | fun.aggregate = "sum", 118 | value.var = NULL, 119 | as.factors = FALSE, 120 | factor.nas = TRUE, 121 | drop.unused.levels = TRUE 122 | ) { 123 | values <- 1 124 | if (!is.null(value.var)) { 125 | values <- data[,value.var] 126 | } 127 | alltms <- terms(formula, data=data) 128 | response <- rownames(attr(alltms, "factors"))[attr(alltms, "response")] 129 | tm <- attr(alltms, "term.labels") 130 | interactionsIndex <- grep(":", tm) 131 | interactions <- tm[interactionsIndex] 132 | simple <- setdiff(tm, interactions) 133 | i2 <- strsplit(interactions, ":") 134 | newterms <- unlist(lapply(i2, function(x){ 135 | paste("paste(", paste(x, collapse = ","), ",", "sep='_'", ")") 136 | })) 137 | newterms <- c(simple, newterms) 138 | newformula <- as.formula(paste("~0+", paste(newterms, collapse = "+"))) 139 | allvars <- all.vars(alltms) 140 | data <- data[, c(allvars), drop = FALSE] 141 | if (as.factors) 142 | data <- data.frame(lapply(data, as.factor)) 143 | characters <- unlist(lapply(data, is.character)) 144 | data[, characters] <- lapply(data[, characters, drop = FALSE], as.factor) 145 | factors <- unlist(lapply(data, is.factor)) 146 | # Prevents errors with 1 or fewer distinct levels 147 | data[, factors] <- lapply(data[, factors, drop = FALSE], function(x) { 148 | if (factor.nas) { 149 | if (any(is.na(x))) { 150 | levels(x) <- c(levels(x), "NA") 151 | x[is.na(x)] <- "NA" 152 | } 153 | } 154 | if (drop.unused.levels){ 155 | if (nlevels(x)!=length(na.omit(unique(x)))){ 156 | x <- factor(as.character(x)) 157 | } 158 | } 159 | y <- contrasts(x, contrasts=FALSE, sparse=TRUE) 160 | attr(x, 'contrasts') <- y 161 | return(x) 162 | }) 163 | # Allows NAs to pass 164 | attr(data,'na.action') <- na.pass 165 | result <- Matrix::sparse.model.matrix(newformula, 166 | data, .unused.levels = FALSE, 167 | row.names = FALSE) 168 | brokenNames <- grep("paste(", colnames(result), fixed = TRUE) 169 | colnames(result)[brokenNames] <- lapply(colnames(result)[brokenNames], 170 | function(x) { 171 | x <- gsub("paste(", replacement = "", x = x, fixed = TRUE) 172 | x <- gsub(pattern = ", ", replacement = "_", x = x, fixed = TRUE) 173 | x <- gsub(pattern = '_sep = \"_\")', 174 | replacement = "", 175 | x = x, 176 | fixed = TRUE) 177 | return(x) 178 | }) 179 | 180 | result <- result * values 181 | if (isTRUE(response > 0)) { 182 | responses = all.vars(terms(as.formula(paste(response, "~0")))) 183 | result <- fast_aggregate(result, 184 | data[, responses, drop = FALSE], 185 | fun = fun.aggregate) 186 | } 187 | return(result) 188 | } 189 | 190 | 191 | #' Aggregate matrix over groups 192 | #' 193 | #' @import sparseMatrixStats 194 | #' 195 | #' @param groups A character vector with the groups to aggregate over. 196 | #' @param fun The summary function to be applied to each group. 197 | #' 198 | #' @return A summary matrix. 199 | #' 200 | #' @export 201 | aggregate_matrix <- function( 202 | x, 203 | groups = NULL, 204 | fun = "mean" 205 | ) { 206 | if (length(groups) == nrow(x) & "character" %in% class(fun)) { 207 | if (fun %in% c("count", "sum")) { 208 | agg_mat <- fast_aggregate(x = x, groupings = groups, fun = fun) 209 | return(agg_mat) 210 | } 211 | 212 | if (fun == "mean") { 213 | group_counts <- as.numeric(table(groups)) 214 | agg_mat <- fast_aggregate(x = x, groupings = groups, fun = "sum") 215 | agg_mat <- agg_mat / group_counts 216 | return(agg_mat) 217 | } 218 | } 219 | 220 | if ("character" %in% class(fun)) { 221 | fun <- summary_fun[[fun]] 222 | } 223 | 224 | if (length(groups) == nrow(x)) { 225 | agg_mat <- sapply(levels(factor(groups)), function(g) { 226 | chunk <- x[which(groups == g), ] 227 | if (is.null(dim(chunk))) { 228 | return(chunk) 229 | } else { 230 | return(fun(chunk)) 231 | } 232 | }) 233 | agg_mat <- Matrix::Matrix(agg_mat, sparse = TRUE) 234 | } else if (length(groups) <= 1) { 235 | agg_mat <- fun(x) 236 | agg_mat <- Matrix::Matrix(agg_mat, sparse = TRUE) 237 | colnames(agg_mat) <- groups 238 | rownames(agg_mat) <- colnames(x) 239 | } else { 240 | stop("Length of groups must be either nrow(x) or 1.") 241 | } 242 | return(Matrix::t(agg_mat)) 243 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # HuMMuS 2 | 3 | ![pkgdown](https://github.com/cantinilab/HuMMuS/actions/workflows/pkgdown.yaml/badge.svg) 4 | [![doc-deployment](https://github.com/cantinilab/HuMMuS/actions/workflows/pages/pages-build-deployment/badge.svg)](https://github.com/cantinilab/HuMMuS/actions/workflows/pages/pages-build-deployment?theme=flickr) 5 | [![PyPI version](https://img.shields.io/pypi/v/hummuspy?color=blue)](https://pypi.org/project/hummuspy/) 6 | 7 | ### Heterogeneous Multilayer network for Multi-omics Single-cell data 8 | 9 | HuMMuS exploits multi-omics single-cell measurements to infer numerous regulatory mechanisms. 10 | Inter-omics interactions (e.g. peak-gene, TF-peak) and intra-omics interactions (e.g. peak-peak, gene-gene, TF-TF) are considered to capture both regulatory interactions and macromolecule cooperations. 11 | 12 | ## Overview 13 | 14 | The current outputs available from HuMMuS are 15 | 16 | * gene regulatory networks (GRNs) 17 | * enhancers 18 | * TF - DNA binding regions 19 | * TF - target genes. 20 | 21 | #### [Read our publication](https://academic.oup.com/bioinformatics/advance-article/doi/10.1093/bioinformatics/btae143/7625061) for more details ! 22 | 23 | 24 | ### **scRNA + scATAC** 25 | Like most current state-of-the-art methods to infer GRN, we provide a standard version of HuMMuS based on scRNA-seq + scATAC-seq data (paired or **unpaired**). 26 | 27 | ### **Additional modalities** 28 | HuMMuS has been developed to be extendable to any additional biological modality of interest. 29 | It is then possible to add any additional network to an already **existing modality** (e.g. both prior-knowledge network and data-driven network of genes), or from a **new modality** (e.g. adding epigenetic or proteomic networks). 30 |
_For now, such personalisation requires directly using some hummuspy (Python package) functions at the end of the pipeline and writing some configuration files manually. It will be simplified soon !_ 31 | 32 | ## Tutorials/Vignettes 33 | 34 | * [**Infer a gene regulatory network and other outputs from unpaired/paired scRNA + scATAC data**](https://cantinilab.github.io/HuMMuS/articles/chen_vignette.html) shows the application of HuMMuS to the Chen dataset, used in the benchmark of [HuMMuS publication](https://academic.oup.com/bioinformatics/advance-article/doi/10.1093/bioinformatics/btae143/7625061). 35 | 36 | ## Installation 37 | HuMMuS is (for now!) only available in R and requires the hummuspy Python library. Be sure to install a virtual environment with `hummuspy` and make use of the `reticulate` R library to connect the two parts. 38 | 39 | ### HuMMuS Python dependency 40 | Python package **hummuspy** should preferably be installed using pip (from the terminal in a conda environment for example) 41 | ```r 42 | conda create -n hummuspy_env python 43 | conda activate hummuspy_env 44 | pip install hummuspy 45 | ``` 46 | 47 | Alternatively, you can also install it directly from R using the reticulate package: 48 | ```r 49 | library(reticulate) 50 | py_install("hummuspy", envname = "r-reticulate", method="auto") 51 | ``` 52 | 53 | ### HuMMuS R package 54 | The core R package can be installed directly from R: 55 | ```r 56 | devtools::install_github("cantinilab/HuMMuS", ref="dev_SeuratV5") 57 | 58 | # If you only work SeuratV4, you can also use the main branch that will soon be deprecated 59 | #devtools::install_github("cantinilab/HuMMuS") 60 | ``` 61 | 62 | Before running HuMMuS, if you're using multiple conda environments, you must ensure that the `reticulate` package points to the virtual environment where hummuspy is installed. You can specify it at the beginning of your R script, e.g.: 63 | 64 | ```r 65 | library(reticulate) 66 | # Using a specific conda environment 67 | envname = "hummuspy_env" # or "r-reticulate" for, e.g.: 68 | use_condaenv(envname, required = TRUE) 69 | ``` 70 | 71 | For more details on how to set up the reticulate connection, 72 | see: https://rstudio.github.io/reticulate 73 | 74 | ### scATAC processing 75 | To compute the scATAC data with HuMMuS, we propose to use [Cicero](https://cole-trapnell-lab.github.io/cicero-release/docs_m3/). It requires the version running with [Monocle3](https://cole-trapnell-lab.github.io/monocle3/). 76 | You then need to install both [Monocle3](https://cole-trapnell-lab.github.io/monocle3/docs/installation/), and Cicero: 77 | 78 | ```r 79 | devtools::install_github("cole-trapnell-lab/monocle3") 80 | devtools::install_github("cole-trapnell-lab/cicero-release", ref = "monocle3") 81 | ``` 82 | *If you encounter some troubles with Monocle3 installation, on Ubuntu you can try to run: `sudo apt-get install libgdal-dev libgeos-dev libproj-dev`. You can also go on [their GitHub page](https://github.com/cole-trapnell-lab/monocle3/issues) for more help. Having Monocle version 1 or 2 still loaded in your R session may cause conflicts. If you encounter some even after restarting your R session, try to `remove.packages("monocle")` before reinstalling both Monocle**3** and Cicero* 83 | 84 | Alternatively, we recently developed [`Circe`](https://github.com/cantinilab/Circe/tree/main), a Python package that replaces Cicero and adds some functionality. 85 | `Circe` should be much faster but will require to generate first the network in python, before loading it in your R session. Since the preprocessing is different from Cicero, results will also differ. 86 | 87 | ## Data accessibility 88 | 89 | To reproduce HuMMuS results presented in the manuscript, preprocessed data [are accessible here](https://figshare.com/projects/Molecular_mechanisms_reconstruction_from_single-cell_multi-omics_data_with_HuMMuS/168899) 90 |
For quick tests, the Chen dataset preprocessed is accessible directly through the package as a Seurat object: `load(chen_dataset)`, along with a subset version `load(chen_dataset_subset)`. 91 | 92 | ## Recommendations for users, issues and bugfixes 93 | - We recommend using common gene symbols for defining gene names (e.g. human: MYC, NFKB2, mouse: Myc, Nfkb2). This is especially useful when using some Hummus functions that query external tools, like [`compute_tf_network`](https://cantinilab.github.io/HuMMuS/reference/compute_tf_network.html), that can query [`Omnipath`](https://omnipathdb.org/). 94 | - Currently, HuMMus supports only the double `-` separator for genomic coordinates, e.g. `chr1-13354210-27462910`. We strongly recommend always using this format for genomic coordinates to optimise the creation of the Hummus object and the tool run. 95 | - We recommend installing the `hummuspy` Python library in a clean conda or virtual environment. 96 | - We use the [`dask`](https://www.dask.org/) library for parallelisation of some tasks in the Python side of HuMMus. In case you encounter some issues with parallelisation, try the following: 97 | - If you are running HuMMus through an HPC schedule manager(like SLURM), try assigning a specific amount of RAM to each core (for example, 10GB per core) rather than a global pool of memory, while reducing the total number of cores 98 | - Try reducing the size of the networks, i.e. by retaining only the top % of edges in the GRN or the peak-peak network. 99 | - We are currently testing [this branch](https://github.com/cantinilab/HuMMuS/tree/dask_update) to improve memory management with Hummus. You can install this branch in a clean environment and run HuMMus using that version through reticulate. To do so, you can execute the following: 100 | - `pip install git+https://github.com/cantinilab/HuMMuS.git@17136df93101d84910dfa9297232132990ff965b#subdirectory=hummuspy` (**NOTE** here it is not necessary to specify the memory per core, just let the HPC scheduler allocate it) 101 | 102 | ## Cite us 103 | Trimbour R., Deutschmann I. M., Cantini L. Molecular mechanisms reconstruction from single-cell multi-omics data with HuMMuS. Bioinformatics (2024), btae143. doi: https://doi.org/10.1093/bioinformatics/btae143 104 | -------------------------------------------------------------------------------- /data/chen_dataset.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cantinilab/HuMMuS/c86df373ebd78cc33bf33a3236e3f4931d7a7c60/data/chen_dataset.rda -------------------------------------------------------------------------------- /data/chen_dataset_subset.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cantinilab/HuMMuS/c86df373ebd78cc33bf33a3236e3f4931d7a7c60/data/chen_dataset_subset.rda -------------------------------------------------------------------------------- /data/chen_subset_hummus.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cantinilab/HuMMuS/c86df373ebd78cc33bf33a3236e3f4931d7a7c60/data/chen_subset_hummus.rda -------------------------------------------------------------------------------- /data/human_pwms_v2.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cantinilab/HuMMuS/c86df373ebd78cc33bf33a3236e3f4931d7a7c60/data/human_pwms_v2.rda -------------------------------------------------------------------------------- /data/mouse_pwms_v2.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cantinilab/HuMMuS/c86df373ebd78cc33bf33a3236e3f4931d7a7c60/data/mouse_pwms_v2.rda -------------------------------------------------------------------------------- /data/real_example/hESC_Chen_TFs.tsv: -------------------------------------------------------------------------------- 1 | ARID3A 2 | ARID3B 3 | ARID2 4 | ARNT2 5 | ATF2 6 | ATF6 7 | ATF1 8 | ATF4 9 | BACH1 10 | ATF3 11 | ATF6B 12 | AR 13 | BBX 14 | -------------------------------------------------------------------------------- /data/toy_example_multilayer/bipartites/atac_rna: -------------------------------------------------------------------------------- 1 | '6' '9' 2 | '8' '10' 3 | -------------------------------------------------------------------------------- /data/toy_example_multilayer/bipartites/tf_atac: -------------------------------------------------------------------------------- 1 | '1' '6' 2 | '1' '5' 3 | -------------------------------------------------------------------------------- /data/toy_example_multilayer/layers/layers_atac/atac_network.tsv: -------------------------------------------------------------------------------- 1 | '5' '6' 2 | '7' '8' 3 | '5' '8' 4 | -------------------------------------------------------------------------------- /data/toy_example_multilayer/layers/layers_rna/rna_network: -------------------------------------------------------------------------------- 1 | '9' '10' 2 | '11' '12' 3 | '10' '12' 4 | -------------------------------------------------------------------------------- /data/toy_example_multilayer/layers/layers_tf/tf_network: -------------------------------------------------------------------------------- 1 | '1' '2' 2 | '3' '4' 3 | '1' '4' 4 | -------------------------------------------------------------------------------- /data/toy_example_multilayer/seeds/'1'.txt: -------------------------------------------------------------------------------- 1 | '1' 2 | -------------------------------------------------------------------------------- /data/toy_example_multilayer/seeds/1 6.txt: -------------------------------------------------------------------------------- 1 | 1 6 2 | -------------------------------------------------------------------------------- /hummuspy/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cantinilab/HuMMuS/c86df373ebd78cc33bf33a3236e3f4931d7a7c60/hummuspy/LICENSE -------------------------------------------------------------------------------- /hummuspy/README.md: -------------------------------------------------------------------------------- 1 | Python framework of HuMMuS. 2 | 3 | It is the backbone code running when processing random walk exploration with the R HuMMuS package. -------------------------------------------------------------------------------- /hummuspy/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "hummuspy" 3 | version = "0.1.7.post3" 4 | description = "HuMMuS is a novel method for the inference of regulatory mechanisms from multi-omics data with any type and number of omics, through a heterogeneous multilayer network framework." 5 | authors = ["Rémi Trimbour "] 6 | license = "GPL-3.0-only" 7 | readme = "README.md" 8 | 9 | [tool.poetry.dependencies] 10 | python = ">=3.8" 11 | multixrank = ">=0.1, <0.4" 12 | joblib = "^1.3.0" 13 | tqdm = ">=4.66.3" 14 | numpy = ">1.24.2, <2.0.0" 15 | pandas = "^2.0.0" 16 | pyyaml = "^6.0" 17 | matplotlib = "^3.4.3" 18 | scipy = "^1.8.0" 19 | distributed = ">=2023.0.0" 20 | dask = ">=2023.0.0" 21 | rich = ">=10.12.0" 22 | bokeh = ">=2.4.2,!=3.0.*" 23 | 24 | [build-system] 25 | requires = ["poetry-core"] 26 | build-backend = "poetry.core.masonry.api" 27 | -------------------------------------------------------------------------------- /hummuspy/src/hummuspy/__init__.py: -------------------------------------------------------------------------------- 1 | import hummuspy.config 2 | import hummuspy.explore_network 3 | import hummuspy.core_grn 4 | -------------------------------------------------------------------------------- /hummuspy/src/hummuspy/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cantinilab/HuMMuS/c86df373ebd78cc33bf33a3236e3f4931d7a7c60/hummuspy/src/hummuspy/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /hummuspy/src/hummuspy/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cantinilab/HuMMuS/c86df373ebd78cc33bf33a3236e3f4931d7a7c60/hummuspy/src/hummuspy/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /hummuspy/src/hummuspy/__pycache__/__init__.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cantinilab/HuMMuS/c86df373ebd78cc33bf33a3236e3f4931d7a7c60/hummuspy/src/hummuspy/__pycache__/__init__.cpython-312.pyc -------------------------------------------------------------------------------- /hummuspy/src/hummuspy/__pycache__/explore_network.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cantinilab/HuMMuS/c86df373ebd78cc33bf33a3236e3f4931d7a7c60/hummuspy/src/hummuspy/__pycache__/explore_network.cpython-310.pyc -------------------------------------------------------------------------------- /hummuspy/src/hummuspy/__pycache__/explore_network.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cantinilab/HuMMuS/c86df373ebd78cc33bf33a3236e3f4931d7a7c60/hummuspy/src/hummuspy/__pycache__/explore_network.cpython-311.pyc -------------------------------------------------------------------------------- /hummuspy/src/hummuspy/__pycache__/explore_network.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cantinilab/HuMMuS/c86df373ebd78cc33bf33a3236e3f4931d7a7c60/hummuspy/src/hummuspy/__pycache__/explore_network.cpython-312.pyc -------------------------------------------------------------------------------- /hummuspy/src/hummuspy/explore_network.py: -------------------------------------------------------------------------------- 1 | from hummuspy.create_multilayer import Multixrank 2 | import os 3 | import numpy 4 | import pandas 5 | 6 | 7 | def compute_RandomWalk( 8 | multiplex, 9 | bipartite, 10 | eta, 11 | lamb, 12 | seeds, 13 | self_loops=True, 14 | restart_proba=0.7, 15 | pr=None, 16 | save=True, 17 | output_f=None, 18 | return_df=True, 19 | spec_layer_result_saved='all', 20 | n_jobs=1): 21 | """Compute random walks for a list of seeds. 22 | 23 | Parameters 24 | ---------- 25 | multilayer_f : str 26 | Path to the multilayer folder. 27 | config_name : str 28 | Name of the config file. 29 | seeds : list 30 | List of seeds. 31 | config_folder : str, optional 32 | Name of the config folder. The default is 'config'. 33 | spec_layer_result_saved : str, optional 34 | Name of the layer to save. The default is 'all'. 35 | unnamed : bool, optional 36 | If True, the seeds file will be named 'seeds.txt'. 37 | The default is False. 38 | njobs : int, optional 39 | Number of jobs. The default is 1. 40 | 41 | Returns 42 | ------- 43 | ranking_df : pd.DataFrame 44 | Dataframe containing the result of the random walk. 45 | Structure: 46 | layer : str 47 | Name of the target layer. 48 | target : str 49 | Name of the target. 50 | path_layer : str 51 | Name of the layer of the path. 52 | score : float 53 | Score of the random walk. 54 | seed : str 55 | Name of the seed. 56 | 57 | Examples 58 | -------- 59 | >>> import hummuspy 60 | >>> multilayer_f = 'path/to/multilayer/folder' 61 | >>> config_folder = 'config' 62 | >>> config_name = 'hummuspy.config.yml' 63 | >>> seed = 'seed1' 64 | >>> df = compute_RandomWalk(multilayer_f, 65 | config_name, 66 | seed, 67 | # seeds_filename = 'auto'/'your_name.txt' 68 | config_folder=config_folder, 69 | spec_layer_result_saved='all', # or 'TF' 70 | njobs=5) 71 | """ 72 | 73 | # multixrank 74 | multixrank_obj = Multixrank( 75 | multiplex=multiplex, 76 | bipartite=bipartite, 77 | eta=eta, 78 | lamb=lamb, 79 | seeds=seeds, 80 | self_loops=self_loops, 81 | restart_proba=restart_proba, 82 | pr=pr) 83 | ranking_df = multixrank_obj.random_walk_rank().sort_values( 84 | by='score', 85 | ascending=False) 86 | 87 | # and filter df results and add seeds name 88 | ranking_df['seed'] = '_'.join(seeds) 89 | ranking_df = ranking_df[ranking_df.score > 0] # ?? 90 | ranking_df.columns = ['layer', 'target', 'path_layer', 'score', 'seed'] 91 | if spec_layer_result_saved != 'all': 92 | if type(spec_layer_result_saved) is str: 93 | spec_layer_result_saved = [spec_layer_result_saved] 94 | ranking_df = ranking_df[ranking_df['layer'].isin( 95 | spec_layer_result_saved)] 96 | 97 | if save: 98 | assert output_f is not None, 'You need to provide an output_f name' +\ 99 | ' to save the random walks result' 100 | ranking_df.to_csv(output_f, sep='\t', index=False, header=True) 101 | if return_df: 102 | return ranking_df 103 | 104 | 105 | def compute_multiple_RandomWalk( 106 | multiplex, 107 | bipartite, 108 | eta, 109 | lamb, 110 | seeds, 111 | self_loops=True, 112 | restart_proba=0.7, 113 | pr=None, 114 | save=True, 115 | output_f=None, 116 | return_df=True, 117 | spec_layer_result_saved='all', 118 | n_jobs=1): 119 | """Compute random walks for a list of seeds. 120 | 121 | Parameters 122 | ---------- 123 | multilayer_f : str 124 | Path to the multilayer folder. 125 | config_name : strLINC01409 126 | Name of the config file. 127 | seeds : list 128 | List of seeds. 129 | config_folder : str, optional 130 | Name of the config folder. The default is 'config'. 131 | spec_layer_result_saved : str, optional 132 | Name of the layer to save. The default is 'all'. 133 | unnamed : bool, optional 134 | If True, the seeds file will be named 'seeds.txt'. 135 | The default is False. 136 | njobs : int, optional 137 | Number of jobs. The default is 1. 138 | 139 | Returns 140 | ------- 141 | ranking_df : pd.DataFrame 142 | Dataframe containing the result of the random walk. 143 | Structure: 144 | layer : str 145 | Name of the target layer. 146 | target : str 147 | Name of the target. 148 | path_layer : str 149 | Name of the layer of the path. 150 | score : float 151 | Score of the random walk. 152 | seed : str 153 | Name of the seed. 154 | 155 | Examples 156 | -------- 157 | >>> import hummuspy 158 | >>> multilayer_f = 'path/to/multilayer/folder' 159 | >>> config_folder = 'config' 160 | >>> config_name = 'hummuspy.config.yml' 161 | >>> seed = 'seed1' 162 | >>> df = compute_RandomWalk(multilayer_f, 163 | config_name, 164 | seed, 165 | # seeds_filename = 'auto'/'your_name.txt' 166 | config_folder=config_folder, 167 | spec_layer_result_saved='all', # or 'TF' 168 | njobs=5) 169 | """ 170 | 171 | # multixrank 172 | multixrank_obj = Multixrank( 173 | multiplex=multiplex, 174 | bipartite=bipartite, 175 | eta=eta, 176 | lamb=lamb, 177 | seeds=seeds, 178 | self_loops=self_loops, 179 | restart_proba=restart_proba, 180 | pr=pr) 181 | 182 | ranking_df = multixrank_obj.per_seed_random_walk_rank( 183 | n_jobs=n_jobs).sort_values(by='score', ascending=False) 184 | 185 | # and filter df results and add seeds name 186 | ranking_df = ranking_df[ranking_df.score > 0] # ?? 187 | ranking_df.columns = ['layer', 'target', 'path_layer', 'score', 'seed'] 188 | if spec_layer_result_saved != 'all': 189 | if type(spec_layer_result_saved) is str: 190 | spec_layer_result_saved = [spec_layer_result_saved] 191 | ranking_df = ranking_df[ranking_df['layer'].isin( 192 | spec_layer_result_saved)] 193 | 194 | ranking_df = ranking_df.reset_index() 195 | 196 | if save: 197 | assert output_f is not None, 'You need to provide an output_f name' +\ 198 | ' to save the random walks result' 199 | ranking_df.to_csv(output_f, sep='\t', index=False, header=True) 200 | if return_df: 201 | return ranking_df 202 | -------------------------------------------------------------------------------- /man/DefaultAssay.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/hummus_objet.R 3 | \name{DefaultAssay} 4 | \alias{DefaultAssay} 5 | \alias{DefaultAssay.Hummus_Object} 6 | \alias{DefaultAssay<-} 7 | \title{Get Default assays of Hummus_Object (based on Seurat)} 8 | \usage{ 9 | \method{DefaultAssay}{Hummus_Object}(object, ...) 10 | 11 | DefaultAssay(object, ...) 12 | 13 | DefaultAssay(object, ...) <- value 14 | } 15 | \arguments{ 16 | \item{object}{An object} 17 | 18 | \item{value}{Name of assay to set as default} 19 | } 20 | \value{ 21 | \code{DefaultAssay}: The name of the default assay 22 | 23 | \code{DefaultAssay<-}: An object with the default assay updated 24 | } 25 | \description{ 26 | Get and set the default assay 27 | } 28 | \examples{ 29 | # Get current default assay 30 | DefaultAssay(object = pbmc_small) 31 | 32 | } 33 | \concept{data-access} 34 | -------------------------------------------------------------------------------- /man/Hummus_Object-class.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/hummus_objet.R 3 | \docType{class} 4 | \name{Hummus_Object-class} 5 | \alias{Hummus_Object-class} 6 | \alias{Hummus_Object} 7 | \title{The Hummus_Object class} 8 | \description{ 9 | The Hummus_Object object is an extended \code{Seurat} object 10 | for the storage and analysis of a heterogeneous multilayer network 11 | } 12 | \section{Slots}{ 13 | 14 | \describe{ 15 | \item{\code{multilayer}}{(multilayer) - Multilayer object} 16 | 17 | \item{\code{motifs_db}}{(motifs_db) - Motifs database} 18 | 19 | \item{\code{assay}}{(list) - List of assays} 20 | }} 21 | 22 | -------------------------------------------------------------------------------- /man/Initiate_Hummus_Object.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/hummus_objet.R 3 | \name{Initiate_Hummus_Object} 4 | \alias{Initiate_Hummus_Object} 5 | \title{Initiate a hummus object} 6 | \usage{ 7 | Initiate_Hummus_Object( 8 | seurat_assays, 9 | active.assay = NULL, 10 | multilayer = NULL, 11 | motifs_db = NULL 12 | ) 13 | } 14 | \arguments{ 15 | \item{seurat_assays}{A Seurat object or a list of Seurat assays} 16 | 17 | \item{active.assay}{The name of the active assay. Default: NULL} 18 | 19 | \item{multilayer}{A multilayer object. Default: NULL} 20 | 21 | \item{motifs_db}{A motifs_db object. Default: NULL} 22 | } 23 | \value{ 24 | A hummus object 25 | } 26 | \description{ 27 | Initiate a hummus object 28 | } 29 | \examples{ 30 | seurat_object <- Seurat::CreateSeuratObject(counts = matrix(rnorm(1000), nrow = 100, ncol = 10)) 31 | hummus <- InitiateHummus_Object(seurat_object) 32 | hummus 33 | } 34 | -------------------------------------------------------------------------------- /man/VariableFeatures.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/hummus_objet.R 3 | \name{VariableFeatures} 4 | \alias{VariableFeatures} 5 | \alias{VariableFeatures.Hummus_Object} 6 | \alias{VariableFeatures<-} 7 | \title{Variable features of assays in Hummus_Object (based on Seurat)} 8 | \usage{ 9 | \method{VariableFeatures}{Hummus_Object}( 10 | object, 11 | method = NULL, 12 | assay = NULL, 13 | nfeatures = NULL, 14 | layer = NA, 15 | simplify = TRUE, 16 | selection.method = lifecycle::deprecated(), 17 | ... 18 | ) 19 | 20 | VariableFeatures(object, method = NULL, ...) 21 | 22 | VariableFeatures(object, ...) <- value 23 | } 24 | \arguments{ 25 | \item{value}{A character vector of variable features} 26 | } 27 | \value{ 28 | \code{VariableFeatures}: a vector of the variable features 29 | } 30 | \description{ 31 | Variable features of assays in Hummus_Object (based on Seurat) 32 | } 33 | -------------------------------------------------------------------------------- /man/add_network.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/hummus_objet.R 3 | \name{add_network} 4 | \alias{add_network} 5 | \title{Add a network to a multiplex, a multilayer or an hummus object} 6 | \usage{ 7 | add_network( 8 | object, 9 | network, 10 | network_name, 11 | multiplex_name = NULL, 12 | directed = FALSE, 13 | weighted = FALSE, 14 | verbose = 1 15 | ) 16 | } 17 | \arguments{ 18 | \item{object}{A multiplex, a multilayer or an hummus object} 19 | 20 | \item{network}{A network (edge list)} 21 | 22 | \item{network_name}{The name of the network} 23 | 24 | \item{multiplex_name}{The name of the multiplex. Default: NULL if object is a 25 | multiplex already only} 26 | 27 | \item{directed}{Logical indicating if the network is directed. Default: FALSE} 28 | 29 | \item{weighted}{Logical indicating if the network is weighted. Default: FALSE} 30 | 31 | \item{verbose}{(integer) - Display function messages. Set to 0 for no 32 | message displayed, >= 1 for more details.} 33 | } 34 | \value{ 35 | A multiplex, a multilayer or an hummus object with the added network 36 | } 37 | \description{ 38 | Add a network to a multiplex, a multilayer or an hummus object 39 | } 40 | \examples{ 41 | hummus <- add_network( 42 | object = hummus, 43 | network = atac_peak_network, 44 | network_name = network_name, 45 | multiplex_name = multiplex_name, 46 | weighted = TRUE, 47 | directed = FALSE) 48 | 49 | } 50 | -------------------------------------------------------------------------------- /man/aggregate_matrix.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{aggregate_matrix} 4 | \alias{aggregate_matrix} 5 | \title{Aggregate matrix over groups} 6 | \usage{ 7 | aggregate_matrix(x, groups = NULL, fun = "mean") 8 | } 9 | \arguments{ 10 | \item{groups}{A character vector with the groups to aggregate over.} 11 | 12 | \item{fun}{The summary function to be applied to each group.} 13 | } 14 | \value{ 15 | A summary matrix. 16 | } 17 | \description{ 18 | Aggregate matrix over groups 19 | } 20 | -------------------------------------------------------------------------------- /man/bipartite-class.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/hummus_objet.R 3 | \docType{class} 4 | \name{bipartite-class} 5 | \alias{bipartite-class} 6 | \alias{bipartite} 7 | \title{Bipartite class} 8 | \description{ 9 | Bipartite object stores a bipartite network (edge list) and the names of the 10 | left and right features' multiplexes. 11 | } 12 | \section{Slots}{ 13 | 14 | \describe{ 15 | \item{\code{network}}{(data.frame) - Bipartite network (edge list)} 16 | 17 | \item{\code{multiplex_left}}{(character) - Left features' multiplex} 18 | 19 | \item{\code{multiplex_right}}{(character) - Right features' multiplex} 20 | }} 21 | 22 | \examples{ 23 | bipartite <- bipartite( 24 | network = bipartite_network, 25 | multiplex_left = "RNA", 26 | multiplex_right = "peaks") 27 | 28 | } 29 | -------------------------------------------------------------------------------- /man/bipartite_peaks2genes.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bipartites.R 3 | \name{bipartite_peaks2genes} 4 | \alias{bipartite_peaks2genes} 5 | \title{Compute links between DNA regions and genenames} 6 | \usage{ 7 | bipartite_peaks2genes( 8 | hummus_object, 9 | gene_assay = "RNA", 10 | peak_assay = "peaks", 11 | gene_multiplex_name = NULL, 12 | peak_multiplex_name = NULL, 13 | peak_to_gene_method = "Signac", 14 | upstream = 500, 15 | downstream = 500, 16 | only_tss = TRUE, 17 | store_network = FALSE, 18 | output_file = NULL, 19 | bipartite_name = "atac_rna" 20 | ) 21 | } 22 | \arguments{ 23 | \item{hummus_object}{(hummus_object) - Hummus object.} 24 | 25 | \item{gene_assay}{(character) - Name of assay containing the gene expression 26 | data. Default: "RNA".} 27 | 28 | \item{peak_assay}{(character) - Name of the assay containing the DNA regions 29 | (ATAC peaks). Default: "peaks".} 30 | 31 | \item{gene_multiplex_name}{(character) - Name of the multiplex containing the 32 | genes. 33 | If NULL, the name of the gene assay is used.} 34 | 35 | \item{peak_multiplex_name}{(character) - Name of the multiplex containing the 36 | DNA regions (ATAC peaks). If NULL, the name of the peak assay is used.} 37 | 38 | \item{peak_to_gene_method}{(character) - Method to use to compute the links 39 | between peaks and genes. Default: "Signac". 40 | \itemize{ 41 | \item \code{'Signac'} - Use Signac::Extend to extend genes. 42 | \item \code{'GREAT'} - Not implemented yet. 43 | }} 44 | 45 | \item{upstream}{(int) - Upstream distance from TSS 46 | to consider as potential promoter.} 47 | 48 | \item{downstream}{(int) - Downstream distance from TSS 49 | to consider as potential promoter.} 50 | 51 | \item{only_tss}{(logical) - If TRUE, only TSS will be considered.} 52 | 53 | \item{store_network}{(bool) - Save the bipartite directly 54 | (\code{TRUE}, default) or return without saving on disk (\code{FALSE}).} 55 | 56 | \item{output_file}{(character) - Name of the output_file 57 | (if store_bipartite == \code{TRUE}). Default: NULL.} 58 | 59 | \item{bipartite_name}{(character) - Name of bipartite. Default: "atac_rna".} 60 | 61 | \item{verbose}{(integer) Display function messages. 62 | Set to 0 for no message displayed, >= 1 for more details. Default: 1.} 63 | } 64 | \value{ 65 | hummus_object (hummus_object) - Hummus object w/ atac-rna bipartite 66 | added to the multilayer slot 67 | } 68 | \description{ 69 | Compute and add bipartite between DNA regions and genenames to hummus object. 70 | Links are computed based on the distance between peaks and gene's TSS 71 | location from gene.range annotations. 72 | Call find_peaks_near_genes function, that can use different methods. 73 | } 74 | \examples{ 75 | hummus <- bipartite_peaks2genes( 76 | hummus_object = hummus, 77 | gene_assay = "RNA", 78 | peak_assay = "peaks", 79 | gene_multiplex_name = "RNA", 80 | peak_multiplex_name = "peaks", 81 | peak_to_gene_method = "Signac", 82 | upstream = 500, 83 | downstream = 500, 84 | only_tss = TRUE, 85 | store_network = FALSE, 86 | bipartite_name = "atac_rna") 87 | } 88 | -------------------------------------------------------------------------------- /man/bipartite_tfs2peaks.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bipartites.R 3 | \name{bipartite_tfs2peaks} 4 | \alias{bipartite_tfs2peaks} 5 | \title{Compute links between TFs and DNA regions (ATAC peaks)} 6 | \usage{ 7 | bipartite_tfs2peaks( 8 | hummus_object, 9 | tf_expr_assay = "RNA", 10 | peak_assay = "peaks", 11 | tf_multiplex_name = NULL, 12 | peak_multiplex_name = NULL, 13 | genome, 14 | store_network = FALSE, 15 | output_file = NULL, 16 | verbose = 1, 17 | bipartite_name = "tf_peak" 18 | ) 19 | } 20 | \arguments{ 21 | \item{hummus_object}{(hummus_object) - Hummus object.} 22 | 23 | \item{tf_expr_assay}{(character) - Name of assay containing the TF expression 24 | data. If NULL, all TFs with a motif are used. Default: "RNA".} 25 | 26 | \item{peak_assay}{(character) - Name of the assay containing the DNA regions 27 | (ATAC peaks). Default: "peaks".} 28 | 29 | \item{tf_multiplex_name}{(character) - Name of multiplex containing the TFs. 30 | If NULL, the name of the TF assay is used.} 31 | 32 | \item{peak_multiplex_name}{(character) - Name of the multiplex containing the 33 | DNA regions (ATAC peaks). If NULL, the name of the peak assay is used.} 34 | 35 | \item{genome}{(BSgenome object) - Reference genome.} 36 | 37 | \item{store_network}{(bool) - Save the bipartite directly 38 | (\code{TRUE}, default) or return without saving on disk (\code{FALSE}).} 39 | 40 | \item{output_file}{(character) - Name of the output_file 41 | (if store_bipartite == \code{TRUE}). Default: NULL.} 42 | 43 | \item{verbose}{(integer) Display function messages. 44 | Set to 0 for no message displayed, >= 1 for more details. Default: 1.} 45 | 46 | \item{bipartite_name}{(character) - Name of bipartite. Default: "tf_peak".} 47 | } 48 | \value{ 49 | hummus_object (hummus_object) - Hummus object with TF-peak bipartite 50 | added to the multilayer slot 51 | } 52 | \description{ 53 | Compute and add bipartite between TFs and DNA regions to hummus object. 54 | Links are computed based on the binding motifs of TFs and their locations 55 | on a reference genome. 56 | Currently based on Signac AddMotifs function (--> motifmachR, itself based on 57 | MOODs algorithm). 58 | } 59 | \examples{ 60 | hummus <- bipartite_tfs2peaks( 61 | hummus_object = hummus, 62 | tf_expr_assay = "RNA", 63 | peak_assay = "peaks", 64 | tf_multiplex_name = "TF", 65 | peak_multiplex_name = "peaks", 66 | genome = BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38, 67 | store_network = FALSE, 68 | verbose = 1, 69 | bipartite_name = "tf_peak") 70 | } 71 | -------------------------------------------------------------------------------- /man/compute_atac_peak_network.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/layers.R 3 | \name{compute_atac_peak_network} 4 | \alias{compute_atac_peak_network} 5 | \title{Compute peak network from scATAC-seq data} 6 | \usage{ 7 | compute_atac_peak_network( 8 | hummus, 9 | atac_assay = "peaks", 10 | genome = BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38, 11 | method = "cicero", 12 | multiplex_name = NULL, 13 | network_name = NULL, 14 | store_network = FALSE, 15 | output_file = NULL, 16 | threshold = 0, 17 | number_cells_per_clusters = 50, 18 | sample_num = 100, 19 | seed = 2025, 20 | verbose = 1, 21 | window = 5e+05, 22 | reduction_method = "UMAP" 23 | ) 24 | } 25 | \arguments{ 26 | \item{hummus}{(Hummus_Object) - Hummus object} 27 | 28 | \item{atac_assay}{(character) - Name of the assay containing the atac 29 | peaks data.} 30 | 31 | \item{genome}{(BSgenome) - Genome used to compute the distance between peaks.} 32 | 33 | \item{method}{(character) - Method used to infer network edges. 34 | \itemize{ 35 | \item \code{'cicero'} - Use cicero to infer regulatory networks. 36 | \item \code{'Other method'} - TO DO. 37 | }} 38 | 39 | \item{multiplex_name}{(character) - Name of the multiplex to add the network 40 | to. Default is \code{'peaks'}.} 41 | 42 | \item{network_name}{(character) - Name of the network in the multiplex to 43 | add the network to. Default is \code{'peak_network'}.} 44 | 45 | \item{store_network}{(bool) - Save the network directly (\code{TRUE}, 46 | default) or return without saving on disk (\code{FALSE}).} 47 | 48 | \item{output_file}{(character) - Name of the output_file 49 | (if store_network == \code{TRUE}).} 50 | 51 | \item{threshold}{(interger, default 0) - Minimal threshold to select tf-gene 52 | edges.} 53 | 54 | \item{number_cells_per_clusters}{(integer) - Number of cells grouped by 55 | territory to define pseudocells} 56 | 57 | \item{sample_num}{(integer | Cicero) - Number of pseudocells to sample from 58 | each territory. Default is 100.} 59 | 60 | \item{seed}{(integer | Cicero) - Seed used to sample pseudocells. Default is 61 | 2025} 62 | 63 | \item{verbose}{(integer) - Display function messages. Set to 0 for no 64 | message displayed, >= 1 for more details.} 65 | 66 | \item{window}{(integer) - Size of window to consider potential 67 | cis-regulatory cooperations between peaks. Default is 500K base pairs.} 68 | 69 | \item{reduction_method}{(character | Cicero) - Method used to reduce dimensionality 70 | of the data to identify territories. Default is \code{'UMAP'}.} 71 | } 72 | \value{ 73 | (data.frame) - Return list of network interactions between peaks 74 | } 75 | \description{ 76 | This function will create a network from atac data (or in theory any data 77 | wtih peaks coordinates as features). 78 | Different method should be implemented at some point (e.g. RENIN), 79 | for now Cicero is still the reference and only method available 80 | } 81 | \details{ 82 | Method descriptions : 83 | \enumerate{ 84 | \item Cicero 85 | Use patial corelation between peaks that are in a given window (e.g. : 86 | less distant than 500K base pairs) 87 | } 88 | } 89 | \examples{ 90 | hummus <- compute_atac_peak_network(hummus) 91 | 92 | } 93 | -------------------------------------------------------------------------------- /man/compute_gene_network.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/layers.R 3 | \name{compute_gene_network} 4 | \alias{compute_gene_network} 5 | \title{Compute gene netwok from scRNA-seq data} 6 | \usage{ 7 | compute_gene_network( 8 | hummus, 9 | gene_assay = "RNA", 10 | tfs = NULL, 11 | method = "GENIE3", 12 | multiplex_name = NULL, 13 | network_name = NULL, 14 | store_network = FALSE, 15 | output_file = NULL, 16 | threshold = 0, 17 | number_cores = 1, 18 | verbose = 1 19 | ) 20 | } 21 | \arguments{ 22 | \item{hummus}{(Hummus_Object) - Hummus object} 23 | 24 | \item{gene_assay}{(character) - Name of the assay containing the gene 25 | expression data.} 26 | 27 | \item{tfs}{vector(character) - List of tfs considered. If NULL, all TFs with 28 | motifs in the hummus object are used.} 29 | 30 | \item{method}{(character) - Method used to infer network edges. 31 | \itemize{ 32 | \item \code{'Genie3'} - Use tree random forest to infer regulatory networks. 33 | \item \code{'Other method'} - TO DO. 34 | }} 35 | 36 | \item{multiplex_name}{(character) - Name of the multiplex to add the network 37 | to. Default is \code{'RNA'}.} 38 | 39 | \item{network_name}{(character) - Name of the network in the multiplex to 40 | add the network to. Default is \code{'RNA_network'}.} 41 | 42 | \item{store_network}{(bool) - Save the network directly (\code{TRUE}, 43 | default) or return without saving on disk (\code{FALSE}).} 44 | 45 | \item{output_file}{(character) - Name of the output_file 46 | (if store_network == \code{TRUE}).} 47 | 48 | \item{threshold}{(interger, default 0) - Minimal threshold 49 | to select tf-gene edges.} 50 | 51 | \item{number_cores}{(interger, default 1) - Number of thread that should be 52 | used for the parallelizable methods.} 53 | 54 | \item{verbose}{(integer) - Display function messages. Set to 0 for no 55 | message displayed, >= 1 for more details.} 56 | } 57 | \value{ 58 | (data.frame) - Return list of network interactions between genes 59 | } 60 | \description{ 61 | This function will create a network from rna data (or in theory any data 62 | wtih genes as features). 63 | Different method should be implemented at some point (any suggestion is welcomed ! :) ), 64 | for now Genie3 is still the reference and only method available 65 | } 66 | \details{ 67 | Method descriptions : 68 | \enumerate{ 69 | \item Genie3 70 | Use tree random forest to infer regulatory networks : 71 | https://bioconductor.org/packages/release/bioc/html/GENIE3.html 72 | } 73 | } 74 | \examples{ 75 | hummus <- compute_gene_network( 76 | hummus, 77 | gene_assay = "RNA", 78 | method = "GENIE3", 79 | verbose = 1, 80 | number_cores = 8, 81 | store_network = FALSE) 82 | 83 | } 84 | -------------------------------------------------------------------------------- /man/compute_tf_network.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/layers.R 3 | \name{compute_tf_network} 4 | \alias{compute_tf_network} 5 | \title{Compute TF network and add it to hummus object} 6 | \usage{ 7 | compute_tf_network( 8 | hummus, 9 | organism = 9606, 10 | tfs = NA, 11 | gene_assay = NULL, 12 | method = NULL, 13 | store_network = FALSE, 14 | output_file = NULL, 15 | source_target = "AND", 16 | multiplex_name = "TF", 17 | tf_network_name = "TF_network", 18 | verbose = 1 19 | ) 20 | } 21 | \arguments{ 22 | \item{hummus}{(Hummus_Object) - Hummus object} 23 | 24 | \item{organism}{(integer) - Specie identifier from Omnipath to fetch 25 | specific interactions} 26 | 27 | \item{tfs}{vector(character) - List of tfs consider. If NA, tfs are extracted 28 | from the hummus object with get_tfs function.} 29 | 30 | \item{gene_assay}{(character) - Name of the assay to get tfs from if tfs is 31 | not provided. If NULL, all TFs with motifs in the hummus object are used.} 32 | 33 | \item{method}{(character) - Method used to infer network edges. 34 | \itemize{ 35 | \item \code{'Omnipath'} - Use Omnipath to infer tf-tf networks. 36 | \item \code{'NULL'} - A fake connected network is computed. 37 | \item \code{'Other method'} - TO DO. 38 | }} 39 | 40 | \item{store_network}{(bool) - Save the network directly (\code{TRUE}, 41 | default) or return without saving on disk (\code{FALSE}).} 42 | 43 | \item{output_file}{(character) - Name of the output_file 44 | (if store_network == \code{TRUE}).} 45 | 46 | \item{source_target}{('AND'|'OR') - Fetch only the interactions involving 47 | two considered tfs (\code{'AND', default}), or one considered tfs and any 48 | other element (\code{'OR'})} 49 | 50 | \item{multiplex_name}{(character) - Name of the multiplex to add the network 51 | to. Default is \code{'TF'}.} 52 | 53 | \item{tf_network_name}{(character) - Name of the network in the multiplex to 54 | add the network to. Default is \code{'TF_network'}.} 55 | 56 | \item{verbose}{(integer) - Display function messages. Set to 0 for no message 57 | displayed, >= 1 for more details.} 58 | } 59 | \value{ 60 | (Hummus_Object) - Return hummus object with the new network added. 61 | } 62 | \description{ 63 | Compute a protein-protein interaction layer from Omnipath request that will represent tf cooperativity. 64 | This network is the top-layer of HuMMuS multilayer. 65 | } 66 | \examples{ 67 | hummus <- compute_tf_network(hummus, 68 | gene_assay = "RNA", 69 | verbose = 1) 70 | } 71 | -------------------------------------------------------------------------------- /man/dMcast.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{dMcast} 4 | \alias{dMcast} 5 | \title{Copy of the dMcast function from the Matrix.utils package, 6 | since this is off CRAN and does not seem to be maintained anymore 7 | internally} 8 | \usage{ 9 | dMcast( 10 | data, 11 | formula, 12 | fun.aggregate = "sum", 13 | value.var = NULL, 14 | as.factors = FALSE, 15 | factor.nas = TRUE, 16 | drop.unused.levels = TRUE 17 | ) 18 | } 19 | \description{ 20 | Copy of the dMcast function from the Matrix.utils package, 21 | since this is off CRAN and does not seem to be maintained anymore 22 | internally 23 | } 24 | -------------------------------------------------------------------------------- /man/define_binding_regions.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/explore_network.R 3 | \name{define_binding_regions} 4 | \alias{define_binding_regions} 5 | \title{Define binding_regions from hummus object} 6 | \usage{ 7 | define_binding_regions( 8 | hummus_object, 9 | multiplex_names = NULL, 10 | bipartites_names = NULL, 11 | config_name = "binding_regions_config.yml", 12 | config_folder = "config", 13 | tf_multiplex = "TF", 14 | atac_multiplex = "peaks", 15 | rna_multiplex = "RNA", 16 | multilayer_f = "multilayer", 17 | gene_list = NULL, 18 | tf_list = NULL, 19 | save = FALSE, 20 | output_f = NULL, 21 | return_df = TRUE, 22 | suffix_bipartites = ".tsv", 23 | njobs = 1 24 | ) 25 | } 26 | \arguments{ 27 | \item{hummus_object}{A hummus object} 28 | 29 | \item{multiplex_names}{A vector of multiplex names considered. 30 | It must be a subset of the names of the multiplexes in the hummus object.} 31 | 32 | \item{bipartites_names}{A vector of bipartites names considered. 33 | It must be a subset of the names of the bipartites in the hummus object.} 34 | 35 | \item{config_name}{The name of the config file to be created by hummuspy} 36 | 37 | \item{config_folder}{The folder where the config file will be created} 38 | 39 | \item{tf_multiplex}{The name of the multiplex containing the TFs} 40 | 41 | \item{atac_multiplex}{The name of the multiplex containing the ATAC-seq peaks} 42 | 43 | \item{rna_multiplex}{The name of the multiplex containing the RNA-seq genes} 44 | 45 | \item{multilayer_f}{The folder where the multilayer is stored} 46 | 47 | \item{gene_list}{A vector of genes to be considered for the final binding 48 | regions (filtering is done on the genes before inferring the binding_regions)} 49 | 50 | \item{tf_list}{A vector of TFs to be considered for the binding_regions 51 | (filtering is done on the TFs after inferring the binding_regions)} 52 | 53 | \item{save}{A boolean indicating if the binding_regions should be saved} 54 | 55 | \item{output_f}{The name of the file where the binding_regions can be saved 56 | (if save == TRUE)} 57 | 58 | \item{return_df}{A boolean indicating if the binding_regions should be 59 | returned as a dataframe} 60 | 61 | \item{suffix_bipartites}{A suffix to add to the bipartites names (to indicate 62 | the exact file location)} 63 | 64 | \item{njobs}{The number of jobs to be used for the computation of the binding_regions} 65 | } 66 | \value{ 67 | A dataframe containing the binding_regions (if return_df == TRUE) 68 | } 69 | \description{ 70 | Calling the define_output function with output_type = 'binding_regions' 71 | } 72 | \examples{ 73 | binding_regions <- define_binding_regions(hummus_object = hummus, 74 | multilayer_f = multilayer_folder, 75 | njobs = 5) 76 | 77 | } 78 | -------------------------------------------------------------------------------- /man/define_enhancers.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/explore_network.R 3 | \name{define_enhancers} 4 | \alias{define_enhancers} 5 | \title{Define enhancers from hummus object} 6 | \usage{ 7 | define_enhancers( 8 | hummus_object, 9 | multiplex_names = NULL, 10 | bipartites_names = NULL, 11 | config_name = "enhancers_config.yml", 12 | config_folder = "config", 13 | tf_multiplex = "TF", 14 | atac_multiplex = "peaks", 15 | rna_multiplex = "RNA", 16 | multilayer_f = "multilayer", 17 | gene_list = NULL, 18 | tf_list = NULL, 19 | save = FALSE, 20 | output_f = NULL, 21 | return_df = TRUE, 22 | suffix_bipartites = ".tsv", 23 | njobs = 1 24 | ) 25 | } 26 | \arguments{ 27 | \item{hummus_object}{A hummus object} 28 | 29 | \item{multiplex_names}{A vector of multiplex names considered. 30 | It must be a subset of the names of the multiplexes in the hummus object.} 31 | 32 | \item{bipartites_names}{A vector of bipartites names considered. 33 | It must be a subset of the names of the bipartites in the hummus object.} 34 | 35 | \item{config_name}{The name of the config file to be created by hummuspy} 36 | 37 | \item{config_folder}{The folder where the config file will be created} 38 | 39 | \item{tf_multiplex}{The name of the multiplex containing the TFs} 40 | 41 | \item{atac_multiplex}{The name of the multiplex containing the ATAC-seq peaks} 42 | 43 | \item{rna_multiplex}{The name of the multiplex containing the RNA-seq genes} 44 | 45 | \item{multilayer_f}{The folder where the multilayer is stored} 46 | 47 | \item{gene_list}{A vector of genes to be considered for the final enhancers 48 | (filtering is done on the genes before inferring the enhancers)} 49 | 50 | \item{tf_list}{A vector of TFs to be considered for the final enhancers 51 | (filtering is done on the TFs after inferring the enhancers)} 52 | 53 | \item{save}{A boolean indicating if the enhancers should be saved} 54 | 55 | \item{output_f}{The name of the file where the enhancers should be saved 56 | (if save == TRUE)} 57 | 58 | \item{return_df}{A boolean indicating if the enhancers should be returned 59 | as a dataframe} 60 | 61 | \item{suffix_bipartites}{A suffix to add to the bipartites names (to indicate 62 | the exact file location)} 63 | 64 | \item{njobs}{The number of jobs to be used for to compute of the enhancers} 65 | } 66 | \value{ 67 | A dataframe containing the enhancers (if return_df == TRUE) 68 | } 69 | \description{ 70 | Calling the define_output function with output_type = 'enhancers' 71 | } 72 | \examples{ 73 | enhancers <- define_enhancers(hummus_object = hummus, 74 | multilayer_f = multilayer_folder, 75 | njobs = 5) 76 | 77 | } 78 | -------------------------------------------------------------------------------- /man/define_general_config.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/explore_network.R 3 | \name{define_general_config} 4 | \alias{define_general_config} 5 | \title{Define general config file for hummuspy} 6 | \usage{ 7 | define_general_config( 8 | hummus_object, 9 | multiplex_names = NULL, 10 | bipartites_names = NULL, 11 | folder_multiplexes = "multiplex", 12 | folder_bipartites = "bipartites", 13 | seed_path = "seed/seeds.txt", 14 | suffix = ".tsv", 15 | self_loops = FALSE, 16 | restart_proba = 0.7, 17 | save_configfile = FALSE, 18 | config_name = "config.yml", 19 | config_folder = "config", 20 | multilayer_f = "multilayer", 21 | suffix_bipartites = ".tsv" 22 | ) 23 | } 24 | \arguments{ 25 | \item{hummus_object}{A hummus object} 26 | 27 | \item{multiplex_names}{A vector of multiplex names considered. 28 | It must be a subset of the names of the multiplexes in the hummus object, or NULL 29 | if all multiplexes should be considered.} 30 | 31 | \item{bipartites_names}{A vector of bipartites names considered. 32 | It must be a subset of the names of the bipartites in the hummus object, or NULL 33 | if all bipartites should be considered.} 34 | 35 | \item{folder_multiplexes}{The folder where the multiplexes are stored} 36 | 37 | \item{folder_bipartites}{The folder where the bipartites are stored} 38 | 39 | \item{seed_path}{The path to the seed file} 40 | 41 | \item{self_loops}{A boolean indicating if self loops should be considered.} 42 | 43 | \item{restart_proba}{The restart probability for the random walk (default = 0.7)} 44 | 45 | \item{save_configfile}{A boolean indicating if the config file should be saved} 46 | 47 | \item{config_name}{The name of the config file to be created by hummuspy} 48 | 49 | \item{config_folder}{The folder where the config file will be created (inside multilayer_f)} 50 | 51 | \item{multilayer_f}{The folder where the multilayer is stored} 52 | 53 | \item{suffix_bipartites}{A suffix to add to the bipartites names (to indicate 54 | the exact file name)} 55 | } 56 | \value{ 57 | A config file for hummuspy 58 | } 59 | \description{ 60 | Define general config file for hummuspy 61 | } 62 | -------------------------------------------------------------------------------- /man/define_grn.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/explore_network.R 3 | \name{define_grn} 4 | \alias{define_grn} 5 | \title{Define GRN from hummus object} 6 | \usage{ 7 | define_grn( 8 | hummus_object, 9 | multiplex_names = NULL, 10 | bipartites_names = NULL, 11 | config_name = "grn_config.yml", 12 | config_folder = "config", 13 | tf_multiplex = "TF", 14 | atac_multiplex = "peaks", 15 | rna_multiplex = "RNA", 16 | multilayer_f = "multilayer", 17 | gene_list = NULL, 18 | tf_list = NULL, 19 | save = FALSE, 20 | output_f = NULL, 21 | return_df = TRUE, 22 | suffix_bipartites = ".tsv", 23 | njobs = 1 24 | ) 25 | } 26 | \arguments{ 27 | \item{hummus_object}{A hummus object} 28 | 29 | \item{multiplex_names}{A vector of multiplex names considered. 30 | It must be a subset of the names of the multiplexes in the hummus object.} 31 | 32 | \item{bipartites_names}{A vector of bipartites names considered. 33 | It must be a subset of the names of the bipartites in the hummus object.} 34 | 35 | \item{config_name}{The name of the config file to be created by hummuspy} 36 | 37 | \item{config_folder}{The folder where the config file will be created} 38 | 39 | \item{tf_multiplex}{The name of the multiplex containing the TFs} 40 | 41 | \item{atac_multiplex}{The name of the multiplex containing the ATAC-seq peaks} 42 | 43 | \item{rna_multiplex}{The name of the multiplex containing the RNA-seq genes} 44 | 45 | \item{multilayer_f}{The folder where the multilayer is stored} 46 | 47 | \item{gene_list}{A vector of genes to be considered for the final GRN 48 | (filtering is done on the genes before inferring the GRN)} 49 | 50 | \item{tf_list}{A vector of TFs to be considered for the final GRN (filtering 51 | is done on the TFs after inferring the GRN)} 52 | 53 | \item{save}{A boolean indicating if the GRN should be saved} 54 | 55 | \item{output_f}{The name of the file where the GRN should be saved 56 | (if save == TRUE)} 57 | 58 | \item{return_df}{A boolean indicating if the GRN should be returned as a 59 | dataframe} 60 | 61 | \item{suffix_bipartites}{A suffix to add to the bipartites names (to indicate 62 | the exact file location)} 63 | 64 | \item{njobs}{The number of jobs to be used for the computation of the GRN} 65 | } 66 | \value{ 67 | A dataframe containing the GRN (if return_df == TRUE) 68 | } 69 | \description{ 70 | Calling the define_output function with output_type = 'GRN' 71 | } 72 | \examples{ 73 | grn <- define_grn(hummus_object = hummus, 74 | multilayer_f = multilayer_folder, 75 | njobs = 5) 76 | 77 | } 78 | -------------------------------------------------------------------------------- /man/define_output.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/explore_network.R 3 | \name{define_output} 4 | \alias{define_output} 5 | \title{Define output from hummus object} 6 | \usage{ 7 | define_output( 8 | output_type, 9 | hummus_object, 10 | multiplex_names = NULL, 11 | bipartites_names = NULL, 12 | config_name = "config.yml", 13 | config_folder = "config", 14 | tf_multiplex = "TF", 15 | atac_multiplex = "peaks", 16 | rna_multiplex = "RNA", 17 | multilayer_f = "multilayer", 18 | gene_list = NULL, 19 | tf_list = NULL, 20 | save = FALSE, 21 | output_f = NULL, 22 | return_df = TRUE, 23 | suffix_bipartites = ".tsv", 24 | njobs = 1 25 | ) 26 | } 27 | \arguments{ 28 | \item{output_type}{The type of output to be defined} 29 | 30 | \item{hummus_object}{A hummus object} 31 | 32 | \item{multiplex_names}{A vector of multiplex names considered. 33 | It must be a subset of the names of the multiplexes in the hummus object.} 34 | 35 | \item{bipartites_names}{A vector of bipartites names considered. 36 | It must be a subset of the names of the bipartites in the hummus object.} 37 | 38 | \item{config_name}{The name of the config file to be created by hummuspy} 39 | 40 | \item{config_folder}{The folder where the config file will be created} 41 | 42 | \item{tf_multiplex}{The name of the multiplex containing the TFs} 43 | 44 | \item{atac_multiplex}{The name of the multiplex containing the ATAC-seq peaks} 45 | 46 | \item{rna_multiplex}{The name of the multiplex containing the RNA-seq genes} 47 | 48 | \item{multilayer_f}{The folder where the multilayer is stored} 49 | 50 | \item{gene_list}{A vector of genes to be considered for the target_genes 51 | (filtering is done on the genes before inferring the target_genes)} 52 | 53 | \item{tf_list}{A vector of TFs to be considered for the final target_genes 54 | (filtering is done on the TFs after inferring the target_genes)} 55 | 56 | \item{save}{A boolean indicating if the target_genes should be saved} 57 | 58 | \item{output_f}{The name of the file where the target_genes should be saved 59 | (if save == TRUE)} 60 | 61 | \item{return_df}{A boolean indicating if the target_genes should be returned 62 | as a dataframe} 63 | 64 | \item{suffix_bipartites}{A suffix to add to the bipartites names (to indicate 65 | the exact file location)} 66 | 67 | \item{njobs}{The number of jobs to be used to compute of the target_genes} 68 | } 69 | \value{ 70 | A dataframe containing the target_genes (if return_df == TRUE) 71 | } 72 | \description{ 73 | Define output from hummus object 74 | } 75 | \examples{ 76 | target_genes <- define_output('grn', hummus_object = hummus) 77 | } 78 | -------------------------------------------------------------------------------- /man/define_target_genes.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/explore_network.R 3 | \name{define_target_genes} 4 | \alias{define_target_genes} 5 | \title{Define target genes from hummus object} 6 | \usage{ 7 | define_target_genes( 8 | hummus_object, 9 | multiplex_names = NULL, 10 | bipartites_names = NULL, 11 | config_name = "target_genes_config.yml", 12 | config_folder = "config", 13 | tf_multiplex = "TF", 14 | atac_multiplex = "peaks", 15 | rna_multiplex = "RNA", 16 | multilayer_f = "multilayer", 17 | gene_list = NULL, 18 | tf_list = NULL, 19 | save = FALSE, 20 | output_f = NULL, 21 | return_df = TRUE, 22 | suffix_bipartites = ".tsv", 23 | njobs = 1 24 | ) 25 | } 26 | \arguments{ 27 | \item{hummus_object}{A hummus object} 28 | 29 | \item{multiplex_names}{A vector of multiplex names considered. 30 | It must be a subset of the names of the multiplexes in the hummus object.} 31 | 32 | \item{bipartites_names}{A vector of bipartites names considered. 33 | It must be a subset of the names of the bipartites in the hummus object.} 34 | 35 | \item{config_name}{The name of the config file to be created by hummuspy} 36 | 37 | \item{config_folder}{The folder where the config file will be created} 38 | 39 | \item{tf_multiplex}{The name of the multiplex containing the TFs} 40 | 41 | \item{atac_multiplex}{The name of the multiplex containing the ATAC-seq peaks} 42 | 43 | \item{rna_multiplex}{The name of the multiplex containing the RNA-seq genes} 44 | 45 | \item{multilayer_f}{The folder where the multilayer is stored} 46 | 47 | \item{gene_list}{A vector of genes to be considered for the target_genes 48 | (filtering is done on the genes before inferring the target_genes)} 49 | 50 | \item{tf_list}{A vector of TFs to be considered for the final target_genes 51 | (filtering is done on the TFs after inferring the target_genes)} 52 | 53 | \item{save}{A boolean indicating if the target_genes should be saved} 54 | 55 | \item{output_f}{The name of the file where the target_genes should be saved 56 | (if save == TRUE)} 57 | 58 | \item{return_df}{A boolean indicating if the target_genes should be returned 59 | as a dataframe} 60 | 61 | \item{suffix_bipartites}{A suffix to add to the bipartites names (to indicate 62 | the exact file location)} 63 | 64 | \item{njobs}{The number of jobs to be used to compute of the target_genes} 65 | } 66 | \value{ 67 | A dataframe containing the target_genes (if return_df == TRUE) 68 | } 69 | \description{ 70 | Calling the define_output function with output_type = 'target_genes' 71 | } 72 | \examples{ 73 | target_genes <- define_target_genes(hummus_object = hummus, 74 | multilayer_f = multilayer_folder, 75 | njobs = 5) 76 | 77 | } 78 | -------------------------------------------------------------------------------- /man/fast_aggregate.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{fast_aggregate} 4 | \alias{fast_aggregate} 5 | \title{Copy of the aggregate.Matrix function from the Matrix.utils package, 6 | since this is off CRAN and does not seem to be maintained anymore 7 | internally} 8 | \usage{ 9 | fast_aggregate(x, groupings = NULL, form = NULL, fun = "sum", ...) 10 | } 11 | \description{ 12 | Copy of the aggregate.Matrix function from the Matrix.utils package, 13 | since this is off CRAN and does not seem to be maintained anymore 14 | internally 15 | } 16 | -------------------------------------------------------------------------------- /man/figures/Fig_0001.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cantinilab/HuMMuS/c86df373ebd78cc33bf33a3236e3f4931d7a7c60/man/figures/Fig_0001.jpg -------------------------------------------------------------------------------- /man/figures/hummus_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cantinilab/HuMMuS/c86df373ebd78cc33bf33a3236e3f4931d7a7c60/man/figures/hummus_logo.png -------------------------------------------------------------------------------- /man/figures/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cantinilab/HuMMuS/c86df373ebd78cc33bf33a3236e3f4931d7a7c60/man/figures/logo.png -------------------------------------------------------------------------------- /man/find_peaks_near_genes.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bipartites.R 3 | \name{find_peaks_near_genes} 4 | \alias{find_peaks_near_genes} 5 | \title{Associate peaks to genes based on distance to TSS (or gene body)} 6 | \usage{ 7 | find_peaks_near_genes( 8 | peaks, 9 | genes, 10 | sep = c("-", "-"), 11 | method = c("Signac", "GREAT"), 12 | upstream = 1e+05, 13 | downstream = 0, 14 | extend = 1e+06, 15 | only_tss = FALSE, 16 | verbose = TRUE 17 | ) 18 | } 19 | \arguments{ 20 | \item{peaks}{vector(character) - List of peaks.} 21 | 22 | \item{genes}{vector(character) - List of genes.} 23 | 24 | \item{sep}{vector(character) - Separator between chromosome, 25 | start and end position. Default: c('-', '-').} 26 | 27 | \item{method}{(character) - Method to use. Default: "Signac". 28 | \itemize{ 29 | \item \code{'Signac'} - Use Signac::Extend to extend genes. 30 | \item \code{'GREAT'} - Not implemented yet. 31 | }} 32 | 33 | \item{upstream}{(int) - Upstream distance from TSS 34 | to consider as potential promoter.} 35 | 36 | \item{downstream}{(int) - Downstream distance from TSS 37 | to consider as potential promoter.} 38 | 39 | \item{extend}{(int) - Integer defining the distance from the upstream 40 | and downstream of the basal regulatory region. Used only by method 'GREAT'.} 41 | 42 | \item{only_tss}{(logical) - If TRUE, only TSS will be considered.} 43 | 44 | \item{verbose}{(logical) - If TRUE, print progress messages.} 45 | } 46 | \value{ 47 | (matrix) - Matrix of peaks x genes with 1 if peak is near gene. 48 | } 49 | \description{ 50 | Associate peaks to genes based on distance to TSS (or gene body) 51 | } 52 | \examples{ 53 | TODO 54 | } 55 | -------------------------------------------------------------------------------- /man/format_bipartites_names.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/explore_network.R 3 | \name{format_bipartites_names} 4 | \alias{format_bipartites_names} 5 | \title{Format bipartites names for python hummuspy package config functions} 6 | \usage{ 7 | format_bipartites_names( 8 | hummus_object, 9 | bipartites_names = NULL, 10 | suffix_bipartites = ".tsv" 11 | ) 12 | } 13 | \arguments{ 14 | \item{hummus_object}{A hummus object} 15 | 16 | \item{bipartites_names}{A vector of bipartites names considered. 17 | It must be a subset of the names of the bipartites in the hummus object.} 18 | 19 | \item{suffix_bipartites}{A suffix to add to the bipartites location} 20 | } 21 | \value{ 22 | A list of bipartites names formatted for hummuspy config funtions 23 | each element of the list is a list containing the right and left layer 24 | connected by the bipartite 25 | } 26 | \description{ 27 | Format bipartites names for python hummuspy package config functions 28 | } 29 | \examples{ 30 | bipartites_dictionary <- format_bipartites_names( 31 | hummus_object = hummus, 32 | bipartites_names = c("atac_rna", 33 | "tf_peaks")) 34 | 35 | } 36 | -------------------------------------------------------------------------------- /man/format_multiplex_names.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/explore_network.R 3 | \name{format_multiplex_names} 4 | \alias{format_multiplex_names} 5 | \title{Format multiplex names for python hummuspy package config functions} 6 | \usage{ 7 | format_multiplex_names(hummus_object, multiplex_names = NULL) 8 | } 9 | \arguments{ 10 | \item{hummus_object}{A HuMMuS_Object} 11 | 12 | \item{multiplex_names}{A vector of multiplex names considered. It must be 13 | a subset of the names of the multiplexes in the hummus object.} 14 | } 15 | \value{ 16 | A list of multiplexes names formatted for hummuspy config funtions 17 | each element of the list is a list of the network types (directed/weighted) 18 | and the name of the networks as named in the hummus object 19 | } 20 | \description{ 21 | Format multiplex names for python hummuspy package config functions 22 | } 23 | \examples{ 24 | multiplexes_dictionary <- format_multiplex_names( 25 | hummus_object = hummus, 26 | multiplex_names = c("TF", "peaks")) 27 | 28 | } 29 | -------------------------------------------------------------------------------- /man/get_genome_annotations.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/fetch_online.R 3 | \name{get_genome_annotations} 4 | \alias{get_genome_annotations} 5 | \title{Fetch online genome annotations from Ensembldb database} 6 | \usage{ 7 | get_genome_annotations( 8 | ensdb_annotations = EnsDb.Hsapiens.v86::EnsDb.Hsapiens.v86 9 | ) 10 | } 11 | \arguments{ 12 | \item{EnsDb_annotations}{(EndsDb object) - Ensembldb database (default: EnsDb.Hsapiens.v86::EnsDb.Hsapiens.v86} 13 | } 14 | \value{ 15 | gene_range (GRanges object) - Genome annotations 16 | } 17 | \description{ 18 | Fetch online genome annotations from Ensembldb database 19 | } 20 | \examples{ 21 | gene_range = get_genome_annotations(EnsDb.Hsapiens.v86::EnsDb.Hsapiens.v86) 22 | } 23 | -------------------------------------------------------------------------------- /man/get_tf2motifs.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/fetch_online.R 3 | \name{get_tf2motifs} 4 | \alias{get_tf2motifs} 5 | \title{Fetch online TF motifs from JASPAR2020 and chromVARmotifs} 6 | \usage{ 7 | get_tf2motifs(species = "human") 8 | } 9 | \arguments{ 10 | \item{species}{(character) - Species name (default: "human")} 11 | } 12 | \value{ 13 | motifs_db (motifs_db object) - TF2motifs + motifs PWMs 14 | } 15 | \description{ 16 | Fetch online TF motifs from JASPAR2020 and chromVARmotifs 17 | } 18 | \examples{ 19 | motifs_db = get_tf2motifs(species = "human") 20 | } 21 | -------------------------------------------------------------------------------- /man/get_tfs.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{get_tfs} 4 | \alias{get_tfs} 5 | \title{Extract TF names from scRNA data and tf2motifs} 6 | \usage{ 7 | get_tfs( 8 | hummus, 9 | assay = NULL, 10 | store_tfs = TRUE, 11 | output_file = NULL, 12 | verbose = 0 13 | ) 14 | } 15 | \arguments{ 16 | \item{output_file}{(character) - Path to output file.} 17 | 18 | \item{verbose}{(integer) - Verbosity level. Default: 1.} 19 | 20 | \item{species}{(character) - Species name. Default: "human".} 21 | 22 | \item{genes}{(vector(character)) - List of expressed genes.} 23 | 24 | \item{tf2motifs}{(data.frame) - TF to motifs names mapping. 25 | Columns: motif, tf.} 26 | } 27 | \value{ 28 | TFs (vector(character)) - List of TFs expressed with motifs. 29 | } 30 | \description{ 31 | Extract TF names from scRNA data and tf2motifs 32 | } 33 | -------------------------------------------------------------------------------- /man/motifs_db-class.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/hummus_objet.R 3 | \docType{class} 4 | \name{motifs_db-class} 5 | \alias{motifs_db-class} 6 | \alias{motifs_db} 7 | \title{Motifs database class} 8 | \description{ 9 | MotifsDatabase object stores motifs(PFM matrices) 10 | and tf2motifs (TF to motifs names mapping) data. 11 | } 12 | \section{Slots}{ 13 | 14 | \describe{ 15 | \item{\code{motifs}}{(TFBSTools::PWMatrixList) - PFM matrices.} 16 | 17 | \item{\code{tf2motifs}}{(data.frame) - TF to motif names mapping. Columns: motif, tf.} 18 | }} 19 | 20 | -------------------------------------------------------------------------------- /man/multilayer-class.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/hummus_objet.R 3 | \docType{class} 4 | \name{multilayer-class} 5 | \alias{multilayer-class} 6 | \alias{multilayer} 7 | \title{Multilayer class} 8 | \description{ 9 | Multilayer object stores a list of bipartite networks and a 10 | list of multiplex networks. It can also stores a config list to create a 11 | yaml file, which is used to parametrize the random walk with restart to 12 | explore the multilayer. 13 | } 14 | \section{Slots}{ 15 | 16 | \describe{ 17 | \item{\code{bipartites}}{(list) - List of bipartite networks} 18 | 19 | \item{\code{multiplex}}{(list) - List of multiplex networks} 20 | 21 | \item{\code{config}}{(list) - List of parameters to parametrize the random walk with 22 | restart to explore the multilayer} 23 | }} 24 | 25 | -------------------------------------------------------------------------------- /man/multiplex-class.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/hummus_objet.R 3 | \docType{class} 4 | \name{multiplex-class} 5 | \alias{multiplex-class} 6 | \alias{multiplex} 7 | \title{Multiplex class} 8 | \description{ 9 | Multiplex object stores a list of networks, a list of features and 10 | a list of logicals indicating if the network is directed or weighted. 11 | } 12 | \section{Slots}{ 13 | 14 | \describe{ 15 | \item{\code{networks}}{(list) - List of networks.} 16 | 17 | \item{\code{features}}{(vector) - Vector of features.} 18 | 19 | \item{\code{directed}}{(list) - List of logical indicating if networks are directed.} 20 | 21 | \item{\code{weighted}}{(list) - List of logical indicating if networks are weighted.} 22 | }} 23 | 24 | -------------------------------------------------------------------------------- /man/peaks_in_regulatory_elements.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bipartites.R 3 | \name{peaks_in_regulatory_elements} 4 | \alias{peaks_in_regulatory_elements} 5 | \title{Filter peaks to those overlapping specific (regulatory) elements} 6 | \usage{ 7 | peaks_in_regulatory_elements( 8 | Peaks, 9 | RegEl, 10 | sep_Peak1 = "-", 11 | sep_Peak2 = "-", 12 | sep_RegEl1 = "-", 13 | sep_RegEl2 = "-" 14 | ) 15 | } 16 | \arguments{ 17 | \item{Peaks}{(character) vector of genomic coordinates of peaks} 18 | 19 | \item{RegEl}{(character) vector of genomic coordinates of regulatory elements} 20 | 21 | \item{sep_Peak1}{(character) separator between chromosome and 22 | start position of peak} 23 | 24 | \item{sep_Peak2}{(character) separator between start position 25 | and end position of peak} 26 | 27 | \item{sep_RegEl1}{(character) separator between chromosome and 28 | start position of regulatory element} 29 | 30 | \item{sep_RegEl2}{(character) separator between start position and 31 | end position of regulatory element} 32 | } 33 | \value{ 34 | (character) vector of genomic coordinates of peaks overlapping 35 | } 36 | \description{ 37 | Function to reduce list of "Peaks" to the ones overlapping with 38 | list of "RegEl", e.g. regulatory elements, evolutionary conserved regions 39 | } 40 | \examples{ 41 | peaks_in_regulatory_elements(peaks, RegEl) 42 | } 43 | -------------------------------------------------------------------------------- /man/run_cicero_wrapper.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/method_wrappers.R 3 | \name{run_cicero_wrapper} 4 | \alias{run_cicero_wrapper} 5 | \title{Cicero wrapper function for the compute_atac_peak_network function} 6 | \usage{ 7 | run_cicero_wrapper( 8 | hummus, 9 | atac_assay, 10 | genome, 11 | window, 12 | number_cells_per_clusters, 13 | sample_num, 14 | seed, 15 | verbose, 16 | threshold, 17 | reduction_method = "UMAP" 18 | ) 19 | } 20 | \arguments{ 21 | \item{hummus}{A hummus object} 22 | 23 | \item{atac_assay}{The name of the assay containing the scATAC-seq data} 24 | 25 | \item{genome}{The genome object} 26 | 27 | \item{window}{The window size used by Cicero to compute the coaccess score} 28 | 29 | \item{number_cells_per_clusters}{The number of cells per cluster used by 30 | Cicero to compute the coaccess score} 31 | 32 | \item{sample_num}{The number of samples used by Cicero to compute the 33 | coaccess score} 34 | 35 | \item{seed}{The seed used by Cicero to compute the coaccess score} 36 | 37 | \item{verbose}{The verbosity level} 38 | 39 | \item{threshold}{The threshold used to filter edges based on the coaccess 40 | score} 41 | 42 | \item{reduction_method}{The method used by monocle3 to reduce the dimension 43 | of the scATAC-seq data before defining the pseudocells. The default is UMAP.} 44 | } 45 | \value{ 46 | A data frame containing the peak network 47 | } 48 | \description{ 49 | This function is a wrapper for the compute_atac_peak_network 50 | function in layers.R. It computes the peak network from scATAC-seq data 51 | using Cicero. It returns a data frame with the peak network. The data frame 52 | also contains the coaccess score for each edge. The coaccess score is the 53 | probability that two peaks are accessible in the same cell. The coaccess 54 | score is computed by Cicero. Edges are filtered based on the coaccess score. 55 | Only edges with a coaccess score > threshold are kept. 56 | } 57 | -------------------------------------------------------------------------------- /man/run_tf_null_wrapper.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/method_wrappers.R 3 | \name{run_tf_null_wrapper} 4 | \alias{run_tf_null_wrapper} 5 | \title{tf_null wrapper function for the tf_network function} 6 | \usage{ 7 | run_tf_null_wrapper( 8 | hummus = hummus, 9 | organism = organism, 10 | tfs = tfs, 11 | gene_assay = gene_assay, 12 | verbose = 1 13 | ) 14 | } 15 | \arguments{ 16 | \item{hummus}{A hummus object} 17 | } 18 | \description{ 19 | This function is a wrapper for the tf_network function 20 | } 21 | -------------------------------------------------------------------------------- /man/save_multilayer.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/hummus_objet.R 3 | \name{save_multilayer} 4 | \alias{save_multilayer} 5 | \title{Save multilayer object files in a hierarchical structure on disk} 6 | \usage{ 7 | save_multilayer(hummus, folder_name, verbose = TRUE, suffix = ".tsv") 8 | } 9 | \arguments{ 10 | \item{hummus}{A hummus object} 11 | 12 | \item{folder_name}{The name of the folder to save the multilayer} 13 | 14 | \item{verbose}{(integer) - Display function messages. Set to 0 for no 15 | message displayed, >= 1 for more details.} 16 | 17 | \item{suffix}{The suffix of the files to save. Default: ".tsv"} 18 | } 19 | \value{ 20 | Nothing, but create a folder containing the multilayer object files 21 | } 22 | \description{ 23 | Save multilayer files from a Hummus_Object 24 | in a hierarchical structure on disk, inside a folder specified through 25 | folder_name 26 | } 27 | \examples{ 28 | folder_name = "multilayer" 29 | save_multilayer(hummus = hummus, folder_name = "multilayer") 30 | 31 | } 32 | -------------------------------------------------------------------------------- /man/store_network.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/hummus_objet.R 3 | \name{store_network} 4 | \alias{store_network} 5 | \title{Wrapper function to save a network or not} 6 | \usage{ 7 | store_network(network, store_network, output_file, verbose = 1) 8 | } 9 | \arguments{ 10 | \item{network}{A network (edge list)} 11 | 12 | \item{store_network}{Logical indicating if the network should be saved} 13 | 14 | \item{output_file}{The name of the file to save the network} 15 | 16 | \item{verbose}{(integer) - Display function messages. Set to 0 for no 17 | message displayed, >= 1 for more details.} 18 | } 19 | \value{ 20 | Nothing, but save the network in a file if store_network is TRUE 21 | } 22 | \description{ 23 | Wrapper function to save a network or not in a file according 24 | to the store_network parameter. If store_network is TRUE, the network is 25 | saved in the output_file. 26 | } 27 | \examples{ 28 | network <- read.table("network.tsv", header = TRUE, sep = "\t") 29 | store_network(network = network, 30 | store_network = TRUE, 31 | output_file = "network.tsv", 32 | verbose = 1) 33 | 34 | } 35 | -------------------------------------------------------------------------------- /man/sub-subset-Hummus_Object.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/hummus_objet.R 3 | \name{[[<-,Hummus_Object} 4 | \alias{[[<-,Hummus_Object} 5 | \alias{[[.Hummus_Object} 6 | \alias{[[<-.Hummus_Object} 7 | \alias{\S4method{[[<-}{Hummus_Object,character,missing,Assay}} 8 | \title{Access assays in Hummus_Object (based on Seurat)} 9 | \usage{ 10 | \method{[[}{Hummus_Object}(x, i = missing_arg(), ..., drop = FALSE, na.rm = FALSE) 11 | } 12 | \description{ 13 | Access assays in Hummus_Object (based on Seurat) 14 | } 15 | -------------------------------------------------------------------------------- /pkgdown/_pkgdown.yml: -------------------------------------------------------------------------------- 1 | destination: docs 2 | url: https://cantinilab.github.io/HuMMuS 3 | authors: 4 | Rémi Trimbour: 5 | href: https://research.pasteur.fr/en/member/remi-trimbour/ 6 | email: remi.trimbour@pasteur.fr 7 | navbar: 8 | structure: 9 | left: [home, reference, articles] 10 | right: [github, homepage] 11 | components: 12 | github: 13 | icon: "fab fa-github" 14 | href: https://github.com/cantinilab/HuMMuS 15 | homepage: 16 | icon: "fas fa-university" 17 | href: https://research.pasteur.fr/en/team/machine-learning-for-integrative-genomics/ 18 | type: dark 19 | bg: primary 20 | template: 21 | params: 22 | ganalytics: G-36XYS9J0B3 23 | highlightcss: false 24 | bootstrap: 5 25 | bootswatch: cosmo 26 | bslib: 27 | primary: "#F7C673" 28 | link-color: "#265668" 29 | link-hover-color: "#FAAAE2" 30 | nav-link-color: "#265668" 31 | nav-link-hover-color: "#265668" 32 | nav-tabs-link-active-color: "#95D874" 33 | nav-tabs-link-active-bg: "#FFFFFF" 34 | 35 | -------------------------------------------------------------------------------- /pkgdown/extra.css: -------------------------------------------------------------------------------- 1 | .contents p:first-of-type img { 2 | background-color: white; 3 | padding: 10px; 4 | margin-top: 15px; 5 | scale: 1.1; 6 | position: relative; 7 | left: 10%; 8 | } 9 | 10 | body { 11 | font-size: 117%!important; 12 | } 13 | 14 | .bg-primary { 15 | background-color: #F7C673!important; 16 | } 17 | 18 | nav[data-toggle="toc"] .nav > li > a { 19 | border-radius: 0px!important; 20 | padding-left: 1rem!important; 21 | } 22 | 23 | pre { 24 | border-radius: 0px!important; 25 | background-color: white!important; 26 | } 27 | 28 | code span, code a:any-link { 29 | color: #1f1c1b!important; 30 | }/* Normal */ 31 | code span.al, code span.al a:any-link { 32 | color: #bf0303!important; 33 | background-color: #f7e6e6!important; 34 | font-weight: bold; 35 | } /* Alert */ 36 | code span.an, code span.an a:any-link { 37 | color: #ca60ca!important; 38 | } /* Annotation */ 39 | code span.at, code span.at a:any-link { 40 | color: #0057ae!important; 41 | } /* Attribute */ 42 | code span.bn, code span.bn a:any-link { 43 | color: #b08000!important; 44 | } /* BaseN */ 45 | code span.bu, code span.bu a:any-link { 46 | color: #644a9b!important; 47 | font-weight: bold; 48 | } /* BuiltIn */ 49 | code span.cf, code span.cf a:any-link { 50 | color: #1f1c1b!important; 51 | font-weight: bold; 52 | } /* ControlFlow */ 53 | code span.ch, code span.ch a:any-link { 54 | color: #924c9d!important; 55 | } /* Char */ 56 | code span.cn, code span.cn a:any-link { 57 | color: #aa5500!important; 58 | }/* Constant */ 59 | code span.co, code span.co a:any-link { 60 | color: #898887!important; 61 | }/* Comment */ 62 | code span.cv, code span.cv a:any-link { 63 | color: #0095ff!important; 64 | }/* CommentVar */ 65 | code span.do, code span.do a:any-link { 66 | color: #607880!important; 67 | }/* Documentation */ 68 | code span.dt, code span.dt a:any-link { 69 | color: #0057ae!important; 70 | }/* DataType */ 71 | code span.dv, code span.dv a:any-link { 72 | color: #b08000!important; 73 | }/* DecVal */ 74 | code span.er, code span.er a:any-link { 75 | color: #bf0303!important; 76 | text-decoration: underline; 77 | } /* Error */ 78 | code span.ex, code span.ex a:any-link { 79 | color: #0095ff!important; 80 | font-weight: bold; 81 | } /* Extension */ 82 | code span.fl, code span.fl a:any-link { 83 | color: #b08000!important; 84 | }/* Float */ 85 | code span.fu, 86 | code span.fu a:any-link { 87 | color: #644a9b!important; 88 | } /* Function */ 89 | code span.im, code span.im a:any-link { 90 | color: #ff5500!important; 91 | }/* Import */ 92 | code span.in, code span.in a:any-link { 93 | color: #b08000!important; 94 | }/* Information */ 95 | code span.kw, code span.kw a:any-link { 96 | /*color: #1f1c1b!important;*/ 97 | color: #007BA5!important; 98 | font-weight: bold; 99 | } /* Keyword */ 100 | code span.op, code span.op a:any-link { 101 | /* color: #1f1c1b!important; */ 102 | color: #5E5E5E!important; 103 | }/* Operator */ 104 | code span.ot, code span.ot a:any-link { 105 | color: #006e28!important; 106 | }/* Other */ 107 | code span.pp, code span.pp a:any-link { 108 | color: #006e28!important; 109 | }/* Preprocessor */ 110 | code span.re, code span.re a:any-link { 111 | color: #0057ae!important; 112 | background-color: #e0e9f8!important; 113 | } /* RegionMarker */ 114 | code span.sc, code span.sc a:any-link { 115 | color: #3daee9!important; 116 | }/* SpecialChar */ 117 | code span.ss, code span.ss a:any-link { 118 | color: #ff5500!important; 119 | }/* SpecialString */ 120 | /* code span.st, code span.st a:any-link { 121 | color: #bf0303!important; 122 | }/* String */ 123 | code span.st, code span.st a:any-link { 124 | color: #20794d!important; 125 | }/* String */ 126 | code span.va, code span.va a:any-link { 127 | color: #0057ae!important; 128 | }/* Variable */ 129 | code span.vs, code span.vs a:any-link { 130 | color: #bf0303!important; 131 | }/* VerbatimString */ 132 | code span.wa, code span.wa a:any-link { 133 | color: #bf0303!important; 134 | }/* Warning */ 135 | 136 | .navbar-brand { 137 | color: #000000!important; 138 | } 139 | 140 | .navbar-dark input[type="search"] { 141 | background-color: #e9ecef!important; 142 | color: #212529!important; 143 | } 144 | 145 | .template-home > .row > #main > p > img { 146 | background-color: #ffffff!important; 147 | padding-top: 20px; 148 | } 149 | 150 | .template-home h1, .template-home h2, .template-home h3, .template-home h4, 151 | .template-home h5, .template-home h6, .template-article .page-header h1 { 152 | font-weight: 700!important; 153 | } 154 | 155 | h1#omnipathr { 156 | font-size: 4.125rem!important; 157 | } 158 | 159 | img.logo { 160 | background-color: #ffffff!important; 161 | } 162 | 163 | p.abstract { 164 | font-size: calc(1.375rem + 1.5vw)!important; 165 | } 166 | 167 | h4.author { 168 | font-size: 1.25rem!important; 169 | margin-top: 1rem!important; 170 | } 171 | 172 | @media (min-width: 1200px){ 173 | p.abstract { 174 | font-size: 2.5rem!important; 175 | } 176 | } 177 | 178 | .author_afil { 179 | font-size: small!important; 180 | } 181 | 182 | .nav-item { 183 | margin-left: 10px!important; 184 | } 185 | -------------------------------------------------------------------------------- /pkgdown/favicon/android-chrome-192x192.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cantinilab/HuMMuS/c86df373ebd78cc33bf33a3236e3f4931d7a7c60/pkgdown/favicon/android-chrome-192x192.png -------------------------------------------------------------------------------- /pkgdown/favicon/android-chrome-512x512.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cantinilab/HuMMuS/c86df373ebd78cc33bf33a3236e3f4931d7a7c60/pkgdown/favicon/android-chrome-512x512.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-120x120.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cantinilab/HuMMuS/c86df373ebd78cc33bf33a3236e3f4931d7a7c60/pkgdown/favicon/apple-touch-icon-120x120.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-152x152.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cantinilab/HuMMuS/c86df373ebd78cc33bf33a3236e3f4931d7a7c60/pkgdown/favicon/apple-touch-icon-152x152.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-180x180.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cantinilab/HuMMuS/c86df373ebd78cc33bf33a3236e3f4931d7a7c60/pkgdown/favicon/apple-touch-icon-180x180.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-60x60.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cantinilab/HuMMuS/c86df373ebd78cc33bf33a3236e3f4931d7a7c60/pkgdown/favicon/apple-touch-icon-60x60.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-76x76.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cantinilab/HuMMuS/c86df373ebd78cc33bf33a3236e3f4931d7a7c60/pkgdown/favicon/apple-touch-icon-76x76.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cantinilab/HuMMuS/c86df373ebd78cc33bf33a3236e3f4931d7a7c60/pkgdown/favicon/apple-touch-icon.png -------------------------------------------------------------------------------- /pkgdown/favicon/favicon-16x16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cantinilab/HuMMuS/c86df373ebd78cc33bf33a3236e3f4931d7a7c60/pkgdown/favicon/favicon-16x16.png -------------------------------------------------------------------------------- /pkgdown/favicon/favicon-32x32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cantinilab/HuMMuS/c86df373ebd78cc33bf33a3236e3f4931d7a7c60/pkgdown/favicon/favicon-32x32.png -------------------------------------------------------------------------------- /pkgdown/favicon/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cantinilab/HuMMuS/c86df373ebd78cc33bf33a3236e3f4931d7a7c60/pkgdown/favicon/favicon.ico -------------------------------------------------------------------------------- /pkgdown/favicon/safari-pinned-tab.svg: -------------------------------------------------------------------------------- 1 | 2 | 4 | 7 | 8 | Created by potrace 1.14, written by Peter Selinger 2001-2017 9 | 10 | 12 | 20 | 42 | 52 | 72 | 91 | 101 | 114 | 125 | 150 | 166 | 167 | 168 | -------------------------------------------------------------------------------- /vignettes/add_networks.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Add external networks to HuMMuS object" 3 | author: "Trimbour Rémi" 4 | date: "2023-05-16" 5 | output: 6 | html_document: 7 | css: pkgdown/extra.css 8 | github_document: default 9 | pdf_document: default 10 | --- 11 | 12 | ```{r setup, include=FALSE} 13 | knitr::opts_chunk$set(eval = TRUE) 14 | #devtools::install_github("cantinilab/HuMMuS") 15 | ``` 16 | 17 | ### General plan 18 | ##### 1. Initialisation of a hummus object 19 | ##### 2. Add a new multiplex 20 | ##### 3. Add a new bipartite 21 | 22 | ```{r import_packages} 23 | # install python dependency 24 | # reticulate::py_install("hummuspy", envname = "r-reticulate", pip=TRUE) 25 | reticulate::use_condaenv("r-reticulate") 26 | hummuspy <- reticulate::import("hummuspy") 27 | library(HuMMuS) 28 | ``` 29 | 30 | 31 | ## 1. Initialisation of HuMMuS object 32 | HuMMuS R objects are instances developed on top of seurat objects. It means it’s created from a seurat object and the contained assays can be accessed the same way. 33 | 34 | Additionally, it contains a motifs_db object, providing tf motifs informations, and a multilayer objects, that will be completed while going through this tutorial. It will mostly include : 35 | - list of multiplex networks (one per modality) 36 | - list of bipartites (one per connection between layers) 37 | ![hummus object schema](figures/hummus_object_description.png) 38 | 39 | ```{r genome_annotations, warning=FALSE} 40 | # Create an hummus object from seurat object 41 | # Load the Chen dataset, which is a Seurat object containing scRNA-seq and scATAC-seq data 42 | data("chen_dataset_subset") 43 | chen_dataset_subset 44 | hummus <- Initiate_Hummus_Object(chen_dataset_subset) 45 | 46 | 47 | # wrapper of Signac::GetGRangesFromEnsDb, adapting output to UCSC format 48 | genome_annotations <- get_genome_annotations( 49 | ensdb_annotations = EnsDb.Hsapiens.v86::EnsDb.Hsapiens.v86) 50 | # Add annotations to peak assay 51 | Signac::Annotation(hummus@assays$peaks) <- genome_annotations 52 | rm(genome_annotations) 53 | 54 | # Load TF motifs from JASPAR2020 and chromVARmotifs in hummus object 55 | hummus@motifs_db <- get_tf2motifs() # by default human motifs 56 | ``` 57 | 58 | ## 2. Add networks 59 | 60 | ![Steps to build HuMMuS objects](figures/5_steps.png) 61 | 62 | ### 2.1 Add a new multiplex through a network 63 | ```{r load_rna_network} 64 | # The networks should be imported as 3 columns data.frame: (source, target, weight) 65 | rna_network_path = "chen_multilayer/multiplex/RNA/RNA_GENIE3.tsv" 66 | rna_network <- read.table(rna_network_path, sep = "\t", header = FALSE) 67 | head(rna_network) 68 | ``` 69 | 70 | ```{r add_network_new_multiplex} 71 | hummus <- add_network( 72 | hummus, rna_network, 73 | multiplex_name = "RNA", 74 | network_name = "GENIE3", 75 | weighted = TRUE, 76 | verbose = 1) 77 | ``` 78 | 79 | ### 2.2 Add a network to an existing multiplex 80 | ```{r add_network_existing_multiplex} 81 | hummus <- add_network( 82 | hummus, rna_network, 83 | multiplex_name = "RNA", 84 | network_name = "GENIE3_other", 85 | weighted = TRUE) 86 | ``` 87 | 88 | ### 3 Add external bipartites 89 | Bipartites can be 2 or 3 columns data.frame, depending on if weighted or not. 90 | Bipartites not computed through HuMMuS have to be added directly to the object as below: 91 | ```{r load_bipartite_tf_peak} 92 | ## Add TF peaks bipartite 93 | atac_rna_path = "chen_multilayer/bipartite/atac_rna.tsv" 94 | atac_rna <- read.table(atac_rna_path, sep = "\t", header = FALSE) 95 | head(atac_rna) 96 | ``` 97 | 98 | ```{r add_tf_peak bipartite} 99 | hummus@multilayer@bipartites[["atac_rna"]] <- new("bipartite", 100 | "network" = atac_rna, 101 | "multiplex_left" = "RNA", 102 | "multiplex_right" = "peaks") 103 | ``` 104 | 105 | Display HuMMuS object 106 | ```{r show_hummus} 107 | hummus 108 | hummus@multilayer@multiplex 109 | ``` 110 | 111 | 112 | -------------------------------------------------------------------------------- /vignettes/chen_vignette.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Predict GRN from scRNA+scATAC data (Chen 2018 dataset)" 3 | author: "Trimbour Rémi" 4 | date: "2023-05-16" 5 | output: 6 | html_document: default 7 | github_document: default 8 | pdf_document: default 9 | resource_files: 10 | - figures/schema_HuMMuS.png 11 | - figures/hummus_object_description.png 12 | - figures/build_multilayer.png 13 | - figures/5_steps.png 14 | - figures/explore_multilayer.png 15 | --- 16 | 17 | 18 | ## Useful links: 19 | *Paper*: https://www.biorxiv.org/content/10.1101/2023.06.09.543828v1 20 | 21 | *Github repo*: https://github.com/cantinilab/HuMMuS 22 | 23 | *Documentation*: https://cantinilab.github.io/HuMMuS/ 24 | 25 | ```{r setup, include=FALSE} 26 | knitr::opts_chunk$set(eval = TRUE) 27 | devtools::install_github("cantinilab/HuMMuS", ref="dev_SeuratV5") 28 | ``` 29 | # General description of the pipeline 30 | ![Overall pipeline](figures/schema_HuMMuS.png) 31 | 32 | ## Useful ressources 33 | *Preprint detailing the method:* [https://www.biorxiv.org/content/10.1101/2023.06.09.543828v1](https://www.biorxiv.org/content/10.1101/2023.06.09.543828v1) 34 | 35 | *Github repo detailing the installation:* [https://github.com/cantinilab/HuMMuS](https://github.com/cantinilab/HuMMuS/tree/) 36 | 37 | *Documentation and vignette:* [https://cantinilab.github.io/HuMMuS](https://cantinilab.github.io/HuMMuS) 38 | 39 | 40 | ### General plan 41 | ##### 0. Preparation of the environment 42 | ##### 1. Initialisation of a hummus object 43 | ##### 2. Construction of the multilayer 44 | ##### 3. Analyse multilayer and define gene regulatory network (GRN) 45 | 46 | 47 | 48 | ## 0. Setting up the environment 49 | ```{r import_packages} 50 | # install python dependency 51 | envname = "r-reticulate" 52 | # reticulate::py_install("hummuspy", envname = envname, pip=TRUE) 53 | reticulate::use_condaenv(envname) 54 | 55 | hummuspy <- reticulate::import("hummuspy") 56 | library(HuMMuS) 57 | ``` 58 | ## Download the single-cell data 59 | The data used in this tutorial can be [downloaded here](https://figshare.com/account/home#/projects/168899) 60 | 61 | 62 | ## 1. Initialisation of HuMMuS object 63 | HuMMuS R objects are instances developed on top of seurat objects. It means it’s created from a seurat object and the contained assays can be accessed the same way. 64 | 65 | Additionally, it contains a motifs_db object, providing tf motifs informations, and a multilayer objects, that will be completed while going through this tutorial. It will mostly include : 66 | 67 | - list of multiplex networks (one per modality) 68 | - list of bipartites (one per connection between layers) 69 | ![hummus object schema](figures/hummus_object_description.png) 70 | 71 | 72 | ### 1.1. Transform data into a hummus object 73 | ```{r initiate_hummus_object} 74 | # Load the Chen dataset, which is a Seurat object containing scRNA-seq and scATAC-seq data 75 | data("chen_dataset_subset") 76 | chen_dataset_subset 77 | 78 | # Create an hummus object from seurat object 79 | hummus <- Initiate_Hummus_Object(chen_dataset_subset) 80 | ``` 81 | 82 | ### 1.2. Add genome and motif annotations to hummus object 83 | Fetch genome annotations online (necessitate an internet connection). 84 | You can also request any "EnsDB" object adapted to your data 85 | (e.g. EnsDb.Hsapiens.v86::EnsDb.Hsapiens.v86 for human genome annotations) 86 | or use your own genome annotations in the same format. 87 | 88 | ```{r genome_annotations, eval=TRUE, warning=FALSE} 89 | # get human genome annotation from EndDb data 90 | # wrapper of Signac::GetGRangesFromEnsDb, adapting output to UCSC format 91 | genome_annotations <- get_genome_annotations( 92 | ensdb_annotations = EnsDb.Hsapiens.v86::EnsDb.Hsapiens.v86) 93 | 94 | # can also be downloaded, saved as an RDS objects for example 95 | ``` 96 | 97 | Add genome annotations to hummus/seurat object 98 | ```{r add_genome_annotations, eval=TRUE} 99 | Signac::Annotation(hummus@assays$peaks) <- genome_annotations 100 | rm(genome_annotations) 101 | ``` 102 | 103 | Get TF motifs from JASPAR2020 and chromVARmotifs databsases in a motifs_db 104 | object. By default, human motifs are used. You can specify the species you want 105 | to use with the `species` argument (e.g. species = "mouse" for mouse). 106 | motifs_db objects contain 3 slots : 107 | * `motifs = "PWMatrixList"` 108 | * `tf2motifs = "data.frame"` 109 | * `tfs = "NULL"` 110 | PWMatrixList is a named vector of the motif matrices, whil tf2motifs is a 111 | correspondance table between TFs and motifs. tfs is a named vector of the TFs. 112 | You can also use your own motifs_db object, as long as it contains the same 113 | slots. 114 | 115 | ```{r get_tf2motifs, eval=TRUE} 116 | # Load TF motifs from JASPAR2020 and chromVARmotifs in hummus object 117 | hummus@motifs_db <- get_tf2motifs() # by default human motifs 118 | ``` 119 | 120 | ## 2. Construction of the multilayer 121 | 122 | ![hummus object schema](figures/build_multilayer.png) 123 | 124 | You can compute the different layers and bipartites as indicated below. 125 | 126 | An example multilayer completed can also be imported with : `data(chen_subset_hummus)`. 127 | This object corresponds to a multilayer from chen_dataset_subset completed. You can then go to the part 3, replacing `hummus` by `chen_subset_hummus` in each step. 128 | 129 | Finally, [you can compute the different layers before, and add them afterwards](add_networks.Rmd). 130 | It allows to use faster methods to compute the networks 131 | (e.g. [Arboreto](https://arboreto.readthedocs.io/en/latest/) for the gene network, 132 | [ATACNet](https://github.com/r-trimbour/ATACNet) for the peak network, etc.). 133 | 134 | ### Compute 3 layers and 2 bipartites 135 | ![hummus object schema](figures/5_steps.png) 136 | 137 | **!! Long step !!** You can also go directly to the part 3 for your "discovery tour". :) 138 | 139 | ### 2.1. TF - peaks bipartite reconstruction 140 | 141 | TF - peaks bipartite is computed using the motifs_db object and the peak 142 | assay. You can specify the assay to use to filter TFs (e.g. "RNA" if you want 143 | to use only the TFs expressed in your dataset). If NULL, all TFs with motifs 144 | will be used. 145 | BSGenome object is used to identify location of motifs and intersect them with 146 | peak 147 |
You can also specify the name of the bipartite that will be added to the 148 | hummus object. By default, it will be named "tf_peak". 149 | ```{r bipartite_tf_peak, eval=TRUE} 150 | hummus <- bipartite_tfs2peaks( 151 | hummus_object = hummus, 152 | tf_expr_assay = "RNA", # use to filter TF on only expressed TFs, 153 | # if NULL, all TFs with motifs are used 154 | peak_assay = "peaks", 155 | tf_multiplex_name = "TF", 156 | genome = BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38, 157 | ) 158 | ``` 159 | 160 | ### 2.2. Genes - peaks bipartite reconstruction 161 | Peaks - genes bipartite is computed 162 | ```{r bipartite_peaks2genes, eval=TRUE} 163 | hummus <- bipartite_peaks2genes( 164 | hummus_object = hummus, 165 | gene_assay = "RNA", 166 | peak_assay = "peaks", 167 | store_network = FALSE, 168 | ) 169 | ``` 170 | 171 | #### Compute layer networks and add it to hummus object 172 | Each one of the three layers is computed individually. 173 | 174 | ### 2.3. Compute the TF network from OmniPath database 175 | We currently use OmniPath R package to fetch TF interactions. 176 | You can first specify if you want to use only the TFs expressed in your dataset 177 | (if you have a RNA assay in your hummus object). If `gene_assay` is NULL, all 178 | TFs with motifs will be used. 179 |
You can then specify which interactions you want to keep through 180 | 'source_target' argument ("AND" | "OR"). If "AND", only the interactions 181 | between 2 TFs that are both present in the dataset will be kept. If "OR", all 182 | interactions involving at least one TF present in the dataset will be kept. 183 |
Finally, you can specify the name of the multiplex and the name of the 184 | network that will be added to the hummus object. 185 | The added network will be undirected and unweighted since PPI and OmniPath 186 | database are not directional nor return any weight here. 187 | ```{r tf_network, eval=TRUE} 188 | hummus <- compute_tf_network(hummus, 189 | gene_assay = "RNA", # default = None ; 190 | # If a assay is provided, 191 | # only the TFs that are present 192 | # will be considered 193 | verbose = 1, 194 | #source_target = "OR", 195 | multiplex_name = "TF", 196 | tf_network_name = "TF_network") 197 | ``` 198 | 199 | ### 2.4. Compute gene network from scRNA-seq w/ GENIE3 200 | ### *!! This step can be very slow if you have thousands of cells !!* 201 | _Current recommendation if you have a big dataset is to compute the network_ 202 | _before with [GRNBoost2 thorugh arboreto](https://arboreto.readthedocs.io/en/latest/)_ 203 | _and [add it to the hummus object afterwards](https://cantinilab.github.io/HuMMuS/articles/add_networks.html)._ 204 | Different methods can be used to compute the gene network. For now, only GENIE3 205 | is implemented in HuMMuS. You can specify which assay to use to compute the 206 | network (`gene_assay`). 207 |
You can specify the number of cores to use to compute 208 | the network. You can also specify if you want to save the network locally 209 | (`store_network = TRUE`) or not (`store_network = FALSE`). If you choose to save 210 | the network, you will need to specify the output file name (`output_file`). 211 | The returned network will be considered undirected and weighted. While GENIE3 212 | returns a directed network, we symmetrize it for the random walk with restart 213 | exploration of the genes proximity. 214 | 215 | ```{r gene_network, eval=TRUE} 216 | hummus <- compute_gene_network( 217 | hummus, 218 | gene_assay = "RNA", 219 | method = "GENIE3", 220 | verbose = 1, 221 | number_cores = 5, # GENIE3 method can be ran 222 | # parallelised on multiple cores 223 | store_network = FALSE, # by default : FALSE, but 224 | # each network can be saved 225 | # when computed with hummus 226 | output_file = "gene_network.tsv") 227 | ``` 228 | 229 | ### 2.5. Compute the peak network from scATAC-seq w/ Cicero 230 | Different methods can be used to compute the peak network. For now, only Cicero 231 | is implemented in HuMMuS. You can specify which assay to use to compute the network 232 | (`peak_assay`). You can also specify the number of cores to use to compute the 233 | network. You can also specify if you want to save the network locally 234 | (`store_network = TRUE`) or not (`store_network = FALSE`). If you choose to save 235 | the network, you will need to specify the output file name (`output_file`). 236 | The returned network will be considered undirected and weighted, since cis-regulatory 237 | interaction and Cicero outputs are not directional. 238 | 239 | ```{r peak_network, eval=TRUE} 240 | hummus <- compute_atac_peak_network(hummus, 241 | atac_assay = "peaks", 242 | verbose = 1, 243 | genome = BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38, 244 | store_network = FALSE) 245 | ``` 246 | 247 | 248 | ## 3. Analyse of the multilayer and definition of GRN 249 | ```{r load_precomputed} 250 | data(chen_subset_hummus) 251 | hummus <- chen_subset_hummus 252 | ``` 253 | 254 | ### 3.1. Save the mulilayer in a classic hierarchical structure 255 | The package used for the random walk with restart exploration (multixrank) 256 | requires currently to save all the network files on disk. To simplify the 257 | organisation of the file, it is possible to save everything necessary with 258 | function `save_multilayer()`.
259 | It will create a folder (specified through `folder_name`) containing all the 260 | files necessary to run the multixrank algorithm. The folder will contain 261 | the following subfolders : 262 | * **bipartite** : containing the bipartites files 263 | * **multiplex** : containing the multiplex sub-subfolders 264 | * **multiplex_1** (e.g. TF|peak|RNA) : containing the network file 265 | of each layer of the multiplex 266 | * **seed** : that will contain the seed files (necessary to compute HuMMuS 267 | outputs later) 268 | * **config** : that will contain the config files (necessary to compute 269 | HuMMuS outputs later) 270 | 271 | ![hummus object schema](figures/explore_multilayer.png) 272 | ```{r save_multilayer} 273 | save_multilayer(hummus = hummus, 274 | folder_name = "chen_multilayer") 275 | ``` 276 | 277 | ### 3.2. Retrieve target genes 278 | With HuMMuS, inference of GRN and target gene of TFs are different outputs. 279 | Indeed, while GRN is computed making TFs compete to regulate genes (by random 280 | walk with restart starting from the genes and going to the TFs), target genes 281 | are computed making genes compete to be regulated by TFs (by random walk with 282 | restart starting from the TFs and going to the genes).
283 | For target genes output, you can specify the list of TFs (`tf_list`) to use as seed 284 | (if NULL by default, all TFs will be used as seed). Only the links between 285 | the seed TFs and the genes will be computed. You can also specify the list of 286 | genes to use. Only the score of the genes present in the network and the 287 | `gene_list` will be returned. 288 | 289 | ```{r target_genes_ATF2, eval=TRUE} 290 | ATF2_genes <- define_target_genes( 291 | hummus, 292 | tf_list = list("ATF2"), 293 | multilayer_f = "chen_multilayer", 294 | njobs = 1 295 | ) 296 | ``` 297 | ```{r head_target_genes_ATF2} 298 | head(ATF2_genes) 299 | ``` 300 | 301 | 302 | ```{r target_genes} 303 | target_genes <- define_target_genes( 304 | hummus, 305 | multilayer_f = "chen_multilayer", 306 | njobs = 1 307 | ) 308 | ``` 309 | ```{r head_target_genes} 310 | head(target_genes) 311 | ``` 312 | 313 | ### 3.3. Define GRN 314 | The GRN is defined using the multixrank algorithm. It requires to have 315 | the hummuspy python package installed (pip install hummuspy).
316 |
This can be parallelised using the njobs argument. 317 | You can also specify the list of genes and the list of TFs to use. 318 | 319 | ```{r grn, eval=FALSE} 320 | grn <- define_grn( 321 | hummus, 322 | multilayer_f = "chen_multilayer", 323 | njobs = 5 324 | ) 325 | ``` 326 | ```{r head_grn, eval=FALSE} 327 | grn 328 | ``` 329 | 330 | ### 3.4. Retrieve enhancers 331 |
You can also specify the list of peaks to use. 332 | 333 | ```{r enhancers} 334 | enhancers <- define_enhancers( 335 | hummus, 336 | gene_list = list("ATF2"), 337 | multilayer_f = "chen_multilayer", 338 | njobs = 1 339 | ) 340 | ``` 341 | ```{r head_enhancers} 342 | head(enhancers) 343 | ``` 344 | 345 | ### 3.5. Retrieve binding regions 346 | For binding regions output, you can specify the list of TFs (`tf_list`) to use as seed 347 | (if NULL by default, all TFs will be used as seed). Only the links between 348 | the seed TFs and the peaks will be computed. You can also specify the list of 349 | peaks to use. Only the score of the peaks present in the network and the 350 | `peak_list` will be returned. 351 | ```{r} 352 | binding_regions <- define_binding_regions( 353 | hummus, 354 | multilayer_f = "chen_multilayer", 355 | njobs = 1 356 | ) 357 | ``` 358 | ```{r} 359 | head(binding_regions) 360 | ``` 361 | -------------------------------------------------------------------------------- /vignettes/config.yml: -------------------------------------------------------------------------------- 1 | bipartite: 2 | bipartite/atac_rna.tsv: 3 | graph_type: '00' 4 | source: RNA 5 | target: peaks 6 | bipartite/tf_rna.tsv: 7 | graph_type: '00' 8 | source: peaks 9 | target: TF 10 | eta: 11 | - 0 12 | - 1 13 | - 0 14 | lamb: 15 | - - 0.5 16 | - 0.0 17 | - 0.5 18 | - - 0.0 19 | - 0.5 20 | - 0.0 21 | - - 0.5 22 | - 0.5 23 | - 0.5 24 | multiplex: 25 | RNA: 26 | graph_type: 27 | - '00' 28 | layers: 29 | - multiplex/RNA/GENIE3.tsv 30 | TF: 31 | graph_type: 32 | - '00' 33 | layers: 34 | - multiplex/TF/PPI.tsv 35 | peaks: 36 | graph_type: 37 | - '00' 38 | layers: 39 | - multiplex/peaks/Cicero.tsv 40 | r: 0.7 41 | seed: seeds/seeds.txt 42 | self_loops: 0 43 | -------------------------------------------------------------------------------- /vignettes/figures/5_steps.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cantinilab/HuMMuS/c86df373ebd78cc33bf33a3236e3f4931d7a7c60/vignettes/figures/5_steps.png -------------------------------------------------------------------------------- /vignettes/figures/build_multilayer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cantinilab/HuMMuS/c86df373ebd78cc33bf33a3236e3f4931d7a7c60/vignettes/figures/build_multilayer.png -------------------------------------------------------------------------------- /vignettes/figures/explore_multilayer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cantinilab/HuMMuS/c86df373ebd78cc33bf33a3236e3f4931d7a7c60/vignettes/figures/explore_multilayer.png -------------------------------------------------------------------------------- /vignettes/figures/hummus_object_description.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cantinilab/HuMMuS/c86df373ebd78cc33bf33a3236e3f4931d7a7c60/vignettes/figures/hummus_object_description.png -------------------------------------------------------------------------------- /vignettes/figures/schema_HuMMuS.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cantinilab/HuMMuS/c86df373ebd78cc33bf33a3236e3f4931d7a7c60/vignettes/figures/schema_HuMMuS.png --------------------------------------------------------------------------------