├── .Rbuildignore ├── .gitattributes ├── .github ├── .gitignore └── workflows │ ├── R-CMD-check.yaml │ ├── pr-commands.yaml │ └── test-coverage.yaml ├── .gitignore ├── .trigger_build ├── DESCRIPTION ├── LICENSE ├── LICENSE.md ├── NAMESPACE ├── NEWS ├── R ├── CoNGAfy.R ├── Ibex.matrix.R ├── basiliskEnv.R ├── combineExpandedBCR.R ├── global.R ├── ibex_example.R ├── ibex_vdj.R ├── quietBCRgenes.R ├── runIbex.R └── utils.R ├── README.md ├── _pkgdown.yml ├── data ├── ibex_example.rda └── ibex_vdj.rda ├── inst ├── WORDLIST ├── extdata │ ├── ibex-basilisk.yaml │ └── metadata.csv └── scripts │ ├── make-data.R │ └── make-metadata.R ├── man ├── CoNGAfy.Rd ├── Ibex.matrix.Rd ├── combineExpandedBCR.Rd ├── filter.cells.Rd ├── getHumanIgPseudoGenes.Rd ├── ibex_example.Rd ├── ibex_vdj.Rd ├── quietBCRgenes.Rd └── runIbex.Rd ├── tests ├── spelling.R ├── testthat.R └── testthat │ ├── basiliskEnv.R │ ├── helper-testingFunctions.R │ ├── test-CoNGAfy.R │ ├── test-Ibex.matrix.R │ ├── test-combineExpandedBCR.R │ ├── test-quietBCRgenes.R │ ├── test-runIbex.R │ └── testdata │ ├── CoNGAfy │ ├── CoNGAfy_counts.rds │ ├── CoNGAfy_mean_counts.rds │ ├── CoNGAfy_mean_meta.data.rds │ └── CoNGAfy_meta.data.rds │ ├── quietBCRgenes │ └── quietBCRgenes_feature.vector.rds │ └── runIbex │ ├── ibex.matrix_Heavy_VAE_AF.rds │ ├── ibex.matrix_Light_AE_OHE.rds │ ├── runIbex_Heavy_VAE_AF_reduction.rds │ ├── runIbex_Heavy_VAE_OHE_reduction.rds │ ├── runIbex_Heavy_geometric_reduction.rds │ └── runIbex_Light_AE_KF_reduction.rds ├── vignettes └── Ibex.Rmd └── www ├── graphicalAbstract.png ├── ibex_hex.png ├── training_info.png └── wnn_output.png /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^_pkgdown\.yml$ 2 | ^docs$ 3 | ^pkgdown$ 4 | ^www$ 5 | ^‘__autograph_generated_file3mbjv7rr\.py’$ 6 | ^‘__pycache__’$ 7 | ^__autograph_generated_fileuzetj_u2\.py$ 8 | ^__pycache__$ 9 | ^__autograph_generated_filezt06eymn\.py$ 10 | ^\.github$ 11 | ^LICENSE\.md$ 12 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /.github/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | -------------------------------------------------------------------------------- /.github/workflows/R-CMD-check.yaml: -------------------------------------------------------------------------------- 1 | # .github/workflows/R-CMD-check.yaml 2 | # Simplified for basilisk-based Ibex (no global Keras install required) 3 | on: 4 | push: 5 | branches: [main, master] 6 | pull_request: 7 | branches: [main, master] 8 | 9 | name: R-CMD-check 10 | 11 | jobs: 12 | R-CMD-check: 13 | runs-on: ${{ matrix.config.os }} 14 | name: ${{ matrix.config.os }} (${{ matrix.config.r }}) 15 | 16 | strategy: 17 | fail-fast: false 18 | matrix: 19 | config: 20 | - {os: ubuntu-latest, r: 'release'} 21 | 22 | env: 23 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 24 | R_KEEP_PKG_SOURCE: yes 25 | 26 | steps: 27 | - uses: actions/checkout@v3 28 | 29 | - uses: r-lib/actions/setup-pandoc@v2 30 | 31 | - uses: r-lib/actions/setup-r@v2 32 | with: 33 | r-version: ${{ matrix.config.r }} 34 | use-public-rspm: true 35 | 36 | # Optional: cache basilisk's miniconda to speed up repeated runs 37 | - uses: actions/cache@v4 38 | with: 39 | path: ${{ runner.tool_cache }}/basilisk 40 | key: ${{ runner.os }}-basilisk-${{ hashFiles('DESCRIPTION') }} 41 | 42 | - name: Install R dependencies 43 | uses: r-lib/actions/setup-r-dependencies@v2 44 | with: 45 | extra-packages: | 46 | any::rcmdcheck 47 | bioc::basilisk 48 | bioc::basilisk.utils 49 | needs: cran, bioc 50 | 51 | - uses: r-lib/actions/check-r-package@v2 52 | with: 53 | upload-snapshots: true 54 | -------------------------------------------------------------------------------- /.github/workflows/pr-commands.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | issue_comment: 5 | types: [created] 6 | 7 | name: Commands 8 | 9 | jobs: 10 | document: 11 | if: ${{ github.event.issue.pull_request && (github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'OWNER') && startsWith(github.event.comment.body, '/document') }} 12 | name: document 13 | runs-on: ubuntu-latest 14 | env: 15 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 16 | steps: 17 | - uses: actions/checkout@v3 18 | 19 | - uses: r-lib/actions/pr-fetch@v2 20 | with: 21 | repo-token: ${{ secrets.GITHUB_TOKEN }} 22 | 23 | - uses: r-lib/actions/setup-r@v2 24 | with: 25 | use-public-rspm: true 26 | 27 | - uses: r-lib/actions/setup-r-dependencies@v2 28 | with: 29 | extra-packages: any::roxygen2 30 | needs: pr-document 31 | 32 | - name: Document 33 | run: roxygen2::roxygenise() 34 | shell: Rscript {0} 35 | 36 | - name: commit 37 | run: | 38 | git config --local user.name "$GITHUB_ACTOR" 39 | git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com" 40 | git add man/\* NAMESPACE 41 | git commit -m 'Document' 42 | 43 | - uses: r-lib/actions/pr-push@v2 44 | with: 45 | repo-token: ${{ secrets.GITHUB_TOKEN }} 46 | 47 | style: 48 | if: ${{ github.event.issue.pull_request && (github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'OWNER') && startsWith(github.event.comment.body, '/style') }} 49 | name: style 50 | runs-on: ubuntu-latest 51 | env: 52 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 53 | steps: 54 | - uses: actions/checkout@v3 55 | 56 | - uses: r-lib/actions/pr-fetch@v2 57 | with: 58 | repo-token: ${{ secrets.GITHUB_TOKEN }} 59 | 60 | - uses: r-lib/actions/setup-r@v2 61 | 62 | - name: Install dependencies 63 | run: install.packages("styler") 64 | shell: Rscript {0} 65 | 66 | - name: Style 67 | run: styler::style_pkg() 68 | shell: Rscript {0} 69 | 70 | - name: commit 71 | run: | 72 | git config --local user.name "$GITHUB_ACTOR" 73 | git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com" 74 | git add \*.R 75 | git commit -m 'Style' 76 | 77 | - uses: r-lib/actions/pr-push@v2 78 | with: 79 | repo-token: ${{ secrets.GITHUB_TOKEN }} 80 | -------------------------------------------------------------------------------- /.github/workflows/test-coverage.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | pull_request: 7 | branches: [main, master] 8 | 9 | name: test-coverage 10 | 11 | permissions: read-all 12 | 13 | jobs: 14 | test-coverage: 15 | runs-on: ubuntu-latest 16 | env: 17 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 18 | 19 | steps: 20 | - uses: actions/checkout@v4 21 | 22 | - uses: r-lib/actions/setup-r@v2 23 | with: 24 | use-public-rspm: true 25 | 26 | - uses: r-lib/actions/setup-r-dependencies@v2 27 | with: 28 | extra-packages: any::covr, any::xml2, rcmdcheck, bioc::basilisk, bioc::basilisk.utils 29 | needs: cran, bioc, coverage 30 | 31 | - name: Test coverage 32 | run: | 33 | cov <- covr::package_coverage( 34 | quiet = FALSE, 35 | clean = FALSE, 36 | install_path = file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package") 37 | ) 38 | covr::to_cobertura(cov) 39 | shell: Rscript {0} 40 | 41 | - uses: codecov/codecov-action@v4 42 | with: 43 | fail_ci_if_error: ${{ github.event_name != 'pull_request' && true || false }} 44 | file: ./cobertura.xml 45 | plugin: noop 46 | disable_search: true 47 | token: ${{ secrets.CODECOV_TOKEN }} 48 | 49 | - name: Show testthat output 50 | if: always() 51 | run: | 52 | ## -------------------------------------------------------------------- 53 | find '${{ runner.temp }}/package' -name 'testthat.Rout*' -exec cat '{}' \; || true 54 | shell: bash 55 | 56 | - name: Upload test results 57 | if: failure() 58 | uses: actions/upload-artifact@v4 59 | with: 60 | name: coverage-test-failures 61 | path: ${{ runner.temp }}/package 62 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | docs 2 | .DS_Store 3 | -------------------------------------------------------------------------------- /.trigger_build: -------------------------------------------------------------------------------- 1 | Trigger build 2 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: Ibex 2 | Title: Methods for BCR single-cell embedding 3 | Version: 0.99.18 4 | Authors@R: c( 5 | person(given = "Nick", family = "Borcherding", role = c("aut", "cre"), email = "ncborch@gmail.com")) 6 | Description: Implementation of the Ibex algorithm for single-cell embedding based on BCR sequences. The package includes a standalone function to encode BCR sequence information by amino acid properties or sequence order using tensorflow-based autoencoder. In addition, the package interacts with SingleCellExperiment or Seurat data objects. 7 | License: MIT + file LICENSE 8 | Encoding: UTF-8 9 | LazyData: true 10 | LazyDataCompression: xz 11 | RoxygenNote: 7.3.2 12 | biocViews: Software, ImmunoOncology, SingleCell, Classification, Annotation, Sequencing, ExperimentHubSoftware 13 | Depends: 14 | R (>= 4.0) 15 | Imports: 16 | basilisk, 17 | immApex, 18 | methods, 19 | Matrix, 20 | reticulate, 21 | rlang, 22 | SeuratObject, 23 | scRepertoire, 24 | SingleCellExperiment, 25 | stats, 26 | SummarizedExperiment, 27 | tensorflow, 28 | tools 29 | Suggests: 30 | BiocStyle, 31 | bluster, 32 | dplyr, 33 | ggplot2, 34 | kableExtra, 35 | knitr, 36 | markdown, 37 | mumosa, 38 | patchwork, 39 | rmarkdown, 40 | scater, 41 | spelling, 42 | testthat (>= 3.0.0), 43 | utils, 44 | viridis 45 | SystemRequirements: Python (via basilisk) 46 | VignetteBuilder: knitr 47 | Language: en-US 48 | URL: https://github.com/BorchLab/Ibex/ 49 | BugReports: https://github.com/BorchLab/Ibex/issues 50 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | YEAR: 2025 2 | COPYRIGHT HOLDER: Ibex authors 3 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | Copyright (c) 2025 Ibex authors 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method(quietBCRgenes,Seurat) 4 | S3method(quietBCRgenes,default) 5 | export(CoNGAfy) 6 | export(Ibex.matrix) 7 | export(combineExpandedBCR) 8 | export(getHumanIgPseudoGenes) 9 | export(quietBCRgenes) 10 | export(runIbex) 11 | import(basilisk) 12 | importFrom(Matrix,colSums) 13 | importFrom(Matrix,sparse.model.matrix) 14 | importFrom(SeuratObject,CreateAssayObject) 15 | importFrom(SeuratObject,CreateDimReducObject) 16 | importFrom(SeuratObject,CreateSeuratObject) 17 | importFrom(SeuratObject,DefaultAssay) 18 | importFrom(SeuratObject,GetAssayData) 19 | importFrom(SeuratObject,VariableFeatures) 20 | importFrom(SingleCellExperiment,"altExp<-") 21 | importFrom(SingleCellExperiment,"reducedDim<-") 22 | importFrom(SingleCellExperiment,SingleCellExperiment) 23 | importFrom(SingleCellExperiment,altExp) 24 | importFrom(SingleCellExperiment,colData) 25 | importFrom(SingleCellExperiment,reducedDim) 26 | importFrom(SummarizedExperiment,"assay<-") 27 | importFrom(SummarizedExperiment,"colData<-") 28 | importFrom(SummarizedExperiment,SummarizedExperiment) 29 | importFrom(SummarizedExperiment,assay) 30 | importFrom(SummarizedExperiment,assayNames) 31 | importFrom(SummarizedExperiment,colData) 32 | importFrom(basilisk,basiliskRun) 33 | importFrom(immApex,geometricEncoder) 34 | importFrom(immApex,getIR) 35 | importFrom(immApex,onehotEncoder) 36 | importFrom(immApex,propertyEncoder) 37 | importFrom(methods,is) 38 | importFrom(rlang,"%||%") 39 | importFrom(scRepertoire,combineBCR) 40 | importFrom(stats,as.formula) 41 | importFrom(stats,complete.cases) 42 | importFrom(stats,dist) 43 | importFrom(tensorflow,tf) 44 | importFrom(tools,R_user_dir) 45 | importFrom(utils,download.file) 46 | importFrom(utils,read.csv) 47 | -------------------------------------------------------------------------------- /NEWS: -------------------------------------------------------------------------------- 1 | CHANGES IN VERSION 0.99.18 2 | ------------------------ 3 | * assigning current environment to basilisk environment 4 | 5 | CHANGES IN VERSION 0.99.17 6 | ------------------------ 7 | * basilisk is not intuitive 8 | 9 | CHANGES IN VERSION 0.99.16 10 | ------------------------ 11 | * flanking library(ibex) with basilisk creation 12 | 13 | CHANGES IN VERSION 0.99.15 14 | ------------------------ 15 | * defining system env for basilisk in vignette 16 | 17 | CHANGES IN VERSION 0.99.14 18 | ------------------------ 19 | * Adding proper wrapper to ```ibex.matrix()``` 20 | 21 | CHANGES IN VERSION 0.99.12 22 | ------------------------ 23 | * Basilisk conversion 24 | 25 | CHANGES IN VERSION 0.99.11 26 | ------------------------ 27 | * Switched keras python check to chunk-specific for vignette 28 | 29 | CHANGES IN VERSION 0.99.10 30 | ------------------------ 31 | * Added information to example data 32 | 33 | CHANGES IN VERSION 0.99.9 34 | ------------------------ 35 | * Examples now check if python is installed and running 36 | 37 | CHANGES IN VERSION 0.99.8 38 | ------------------------ 39 | * Updated example data to 2k HEL BEAM-Ab from 10x 40 | * Converted ibex_example into SCE object for compliance 41 | * Large revision of vignette to fit new data/format 42 | * Added species argument to runIbex 43 | * Updated CoNGA handling of assay for Seurat and Single-Cell Objects. 44 | 45 | CHANGES IN VERSION 0.99.7 46 | ------------------------ 47 | * Integration of Ibex with immApex 48 | * Updated Seurat object to v5 49 | * Updated support for SCE format for ```runIbex()``` 50 | * Update ```CoNGAfy()``` to function with all versions of Seurat 51 | * Updated ```quietBCRgenes()``` to use VariableFeatures() call for SeuratV5 and backward compatibility. 52 | * Add ```getHumanIgPseudoGenes()``` to return a list of human Immunoglobulin Pseudo genes that are kept by ```quietBCRgenes()``` 53 | 54 | ## New Models 55 | * Added New Light and Heavy Chain Models 56 | * Encoding methods now accepted: "OHE", "atchleyFactors", "crucianiProperties", "kideraFactors", "MSWHIM","tScales", "zScales" 57 | * Sequence input: 58 | - Human Heavy: 10000000 59 | - Human Light: 5000000 60 | - Human Heavy-Expanded: 5000000 61 | - Human Light-Expanded: 2500000 62 | - Mouse Heavy: 5000000 63 | - Mouse Heavy-Expanded: 5000000 64 | * Trained convolutional and variational autoencoders for Heavy/Light chains 65 | - Architecture: 512-256-128-256-512 66 | - Parameters: 67 | Batch Size = 128 68 | Latent Dimensions = 128 69 | Epochs = 100 70 | Loss = Mean Squared Error (CNN) & KL Divergence (VAE) 71 | Activation = relu 72 | Learning rate = 1e-6 73 | - Optimizers: Adam 74 | - Early stopping was set to patients of 10 for minimal validation loss and restoration of best weights 75 | - CNN autoencoders have batch normalization layers between the dense layers. 76 | 77 | CHANGES IN VERSION 0.99.6 78 | ------------------------ 79 | * Implementing GitHub action workflows 80 | * Adding testthat framework 81 | * Deprecating clonalCommunity 82 | 83 | CHANGES IN VERSION 0.99.5 84 | ------------------------ 85 | * Added geometric encoding using the BLOSUM62 matrix 86 | * Trained classical and variational autoencoders for light/heavy chains with 1.5 million cdr sequences 87 | - Architecture: 256-128-30-128-256 88 | - Parameters: 89 | Batch Size = 64 90 | Latent Dimensions = 30 91 | Epochs = 100 92 | Loss = Mean Squared Error 93 | - Optimizers: Adam 94 | - Early stopping was set to patients of 10 for minimal validation loss and restoration of best weights 95 | - learn rate varied by models 96 | - classical auto encoders have batch normalization layers between the dense layers. 97 | 98 | CHANGES IN VERSION 0.99.4 99 | ------------------------ 100 | * Added chain.checker() function to allow for uncapitlized chain calls 101 | 102 | CHANGES IN VERSION 0.99.3 103 | ------------------------ 104 | * Updated models for manuscript revision 105 | - Architecture: 256-128-30-128-256 106 | - Parameters: 107 | Batch Size = 64 108 | Learning Rate = 0.001 109 | Latent Dimensions = 30 110 | Epochs = 50 111 | Loss = Mean Squared Error 112 | - Optimizers: RAdam (for amino acid properties) and RMSprop (for OHE) 113 | - Early stopping was set to patients of 10 for minimal validation loss and restoration of best weights 114 | 115 | 116 | CHANGES IN VERSION 0.99.2 117 | ------------------------ 118 | * Updated models to include radam optimization, early stop for min 10 epochs, and all trained on 800,000 unique cdr3s 119 | * quietBCRgenes() now does not remove human Ig pseudogenes 120 | 121 | 122 | CHANGES IN VERSION 0.99.1 123 | ------------------------ 124 | * Added detection of chain length to function call 125 | * Added support for direct output of combineBCR() 126 | * Modified quietBCR() to include constant regions and J-chains 127 | 128 | 129 | CHANGES IN VERSION 0.99.0 130 | ------------------------ 131 | * Initial commit -------------------------------------------------------------------------------- /R/CoNGAfy.R: -------------------------------------------------------------------------------- 1 | #' Reduce a Single-Cell Object to Representative Cells 2 | #' 3 | #' This function generates a single-cell object with a reduced representation 4 | #' of RNA expression by clone. The approach is inspired by the method introduced 5 | #' in \href{https://pubmed.ncbi.nlm.nih.gov/34426704/}{CoNGA}. Users can 6 | #' generate either a mean representation of features by clone or identify a 7 | #' representative cell using count-based minimal Euclidean distance. 8 | #' Please read and cite the original work by the authors of CoNGA. 9 | #' 10 | #' @examples 11 | #' ibex.clones <- CoNGAfy(ibex_example, 12 | #' method = "dist") 13 | #' 14 | #' ibex.clones <- CoNGAfy(ibex_example, 15 | #' method = "mean") 16 | #' 17 | #' @param input.data A single-cell dataset in Seurat or SingleCellExperiment format. 18 | #' @param method Character. Specifies the method to reduce the dataset: 19 | #' \itemize{ 20 | #' \item "mean" - Computes the mean expression of selected features across cells in each clonotype. 21 | #' \item "dist" - Uses PCA reduction to identify the cell with the minimal Euclidean distance within each clonotype group. 22 | #' } 23 | #' @param features Character vector. Selected genes for the reduction. If \code{NULL} (default), all genes are used. 24 | #' @param assay Character. The name of the assay or assays to include in the output. Defaults to the active assay. 25 | #' @param meta.carry Character vector. Metadata variables to carry over from the input single-cell object to the output. 26 | #' 27 | #' @return A reduced single-cell object where each clonotype is represented by a single cell. 28 | #' 29 | #' @export 30 | #' @importFrom SeuratObject CreateSeuratObject CreateAssayObject 31 | #' @importFrom SingleCellExperiment SingleCellExperiment altExp<- 32 | #' @importFrom SummarizedExperiment assay assay<- SummarizedExperiment colData<- 33 | #' colData 34 | 35 | CoNGAfy <- function(input.data, 36 | method = "dist", 37 | features = NULL, 38 | assay = "RNA", 39 | meta.carry = c("CTaa", "CTgene")) { 40 | if(inherits(input.data, "Seurat")) { 41 | cells.chains <- rownames(input.data[[]][!is.na(input.data[["CTaa"]]),]) 42 | input.data <- subset(input.data, cells = cells.chains) 43 | } else if (inherits(input.data, "SingleCellExperiment")) { 44 | cells.chains <- rownames(as.data.frame(colData(input.data)[!is.na(input.data$CTaa),])) 45 | input.data <- input.data[,which(colnames(input.data) %in% cells.chains)] 46 | } else { 47 | stop("The input.data is not a Seurat or SingleCellExperiment object.") 48 | } 49 | conga <- NULL 50 | if(method == "mean") { 51 | for (x in seq_along(assay)) { 52 | conga[[x]] <- .CoNGA.mean(input.data, features, assay[x]) 53 | 54 | } 55 | } else if(method == "dist") { 56 | for (x in seq_along(assay)) { 57 | conga[[x]] <- .CoNGA.dist(input.data, features, assay[x]) 58 | 59 | } 60 | 61 | } 62 | names(conga) <- assay 63 | if (inherits(x=input.data, what ="Seurat")) { 64 | sc.output <- CreateSeuratObject(conga[[1]], assay = names(conga)[1], project = "Ibex") 65 | if(length(conga) > 1) { 66 | for(y in 2:length(conga)) { 67 | sc.output[[names(conga)[y]]] <- CreateAssayObject(conga[[y]]) 68 | } 69 | } 70 | CTge <- unique(input.data[[]][,c(meta.carry)]) 71 | } else if (inherits(x=input.data, what ="SingleCellExperiment")) { 72 | sc.output <- SingleCellExperiment(assay = conga[[1]]) 73 | if(length(conga) > 1) { 74 | for(y in 2:length(conga)) { 75 | altExp(sc.output, "BEAM") <- SummarizedExperiment( 76 | assays = list( 77 | counts = as.matrix(conga[[y]]) 78 | ), 79 | colData = colData(sc.output) 80 | ) 81 | } 82 | } 83 | sc.output$CTaa <- rownames(sc.output@colData) 84 | CTge <- data.frame(unique(input.data@colData[,c(meta.carry)])) 85 | } 86 | CTge <- CTge[!duplicated(CTge$CTaa),] 87 | clones <- unique(CTge$CTaa) 88 | rownames(CTge) <- clones 89 | colnames(CTge) <- c("CTaa", "CTgene") 90 | sc.output <- add.meta.data(sc.output, CTge, colnames(CTge)) 91 | return(sc.output) 92 | } 93 | 94 | # Pulls Assay Data 95 | #' @importFrom SummarizedExperiment assayNames assay 96 | #' @importFrom SingleCellExperiment altExp 97 | grabAssay <- function(input.data, assay) { 98 | if (inherits(x=input.data, what ="Seurat")) { 99 | data.use <- input.data[[assay]]$counts 100 | } else if (inherits(x=input.data, what ="SingleCellExperiment")){ 101 | if(assay %in% assayNames(input.data) | assay == "RNA") { 102 | if(assay == "RNA") assay <- "counts" 103 | data.use <- assay(input.data, name = assay) 104 | } else { 105 | data.use <- assay(altExp(input.data), name = assay) 106 | } 107 | } 108 | return(data.use) 109 | } 110 | 111 | # Calculate best representation individual clones 112 | #' @importFrom SummarizedExperiment assay 113 | #' @importFrom SeuratObject GetAssayData 114 | #' @importFrom methods is 115 | #' @importFrom stats dist 116 | #' @keywords internal 117 | #' @noRd 118 | .CoNGA.dist <- function(input.data, 119 | features = NULL, 120 | assay = "RNA") { 121 | # Ensure 'assay' is character (vector or single string) 122 | if (!is.character(assay)) { 123 | stop("'assay' must be a character vector or a single character string.") 124 | } 125 | 126 | # Grab clone meta-information; here we assume 'grabMeta' returns a DataFrame or data.frame 127 | meta <- grabMeta(input.data) 128 | # Create a small table of CTaa assignments 129 | ct_col <- "CTaa" 130 | if (!ct_col %in% colnames(meta)) { 131 | stop("The metadata must contain a column named 'CTaa'.") 132 | } 133 | meta_ct <- data.frame(CTaa = meta[, ct_col], row.names = rownames(meta)) 134 | 135 | # Identify number of cells per clone 136 | clone_tab <- table(meta_ct$CTaa) 137 | multi_clone_names <- names(clone_tab[clone_tab > 1]) # clones with >1 cell 138 | single_clone_names <- names(clone_tab[clone_tab == 1])# clones with exactly 1 cell 139 | 140 | # Function to process a single assay 141 | process_single_assay <- function(assay_name) { 142 | # Pull the correct data matrix from input.data 143 | data_mat <- grabAssay(input.data, assay_name) 144 | 145 | # Subset the features if requested 146 | features_to_use <- features %||% rownames(data_mat) 147 | features_to_use <- intersect(features_to_use, rownames(data_mat)) 148 | 149 | # If no features remain, warn 150 | if (length(features_to_use) == 0) { 151 | warning("No overlapping features found in assay '", assay_name, "'. Returning empty matrix.") 152 | return(matrix(nrow = 0, ncol = 0)) 153 | } 154 | 155 | # Subset 'data_mat' to only those features 156 | data_mat_use <- data_mat[features_to_use, , drop = FALSE] 157 | 158 | # We now find the "best representation" for each multi-cell clone by minimal sum of distances 159 | # Start with barcodes that are single-cell clones (they trivially represent themselves) 160 | best_barcodes <- rownames(meta_ct)[meta_ct$CTaa %in% single_clone_names] 161 | 162 | # For each multi-cell clone, compute distances and pick the cell with smallest total distance 163 | for (clone_name in multi_clone_names) { 164 | clone_cells <- rownames(meta_ct)[meta_ct$CTaa == clone_name] 165 | # Distances are among rows of data_mat_use 166 | dist_mat <- as.matrix(dist(t(as.matrix(data_mat_use[, clone_cells, drop = FALSE])))) 167 | 168 | # rowSums(dist_mat) is sum of distances from each cell to all others in the clone 169 | chosen_idx <- which.min(rowSums(dist_mat)) 170 | chosen_cell <- clone_cells[chosen_idx] 171 | best_barcodes <- c(best_barcodes, chosen_cell) 172 | } 173 | 174 | # Finally, subset original matrix to these 'best_barcodes' 175 | data_return <- data_mat_use[, best_barcodes, drop = FALSE] 176 | # Rename columns to the clone name for clarity 177 | colnames(data_return) <- meta_ct$CTaa[match(best_barcodes, rownames(meta_ct))] 178 | 179 | return(data_return) 180 | } 181 | 182 | # If user passed multiple assays, return a list 183 | if (length(assay) > 1) { 184 | results_list <- lapply(assay, process_single_assay) 185 | names(results_list) <- assay 186 | return(results_list) 187 | } else { 188 | # If user passed a single assay, return a single matrix 189 | return(process_single_assay(assay)) 190 | } 191 | } 192 | 193 | # Calculate mean across individual clones 194 | #' @importFrom rlang %||% 195 | #' @importFrom Matrix sparse.model.matrix colSums 196 | #' @importFrom SummarizedExperiment assay 197 | #' @importFrom SeuratObject GetAssayData 198 | #' @importFrom stats as.formula 199 | #' @keywords internal 200 | #' @noRd 201 | .CoNGA.mean <- function(input.data, 202 | features = NULL, 203 | assay = "RNA") { 204 | # Ensure 'assay' is character (vector or single string) 205 | if (!is.character(assay)) { 206 | stop("'assay' must be a character vector or a single character string.") 207 | } 208 | 209 | # Grab clone meta-information 210 | meta <- grabMeta(input.data) 211 | ct_col <- "CTaa" 212 | if (!ct_col %in% colnames(meta)) { 213 | stop("The metadata must contain a column named 'CTaa'.") 214 | } 215 | meta_ct <- data.frame(CTaa = meta[, ct_col], row.names = rownames(meta)) 216 | 217 | # Remove rows with NA in CTaa 218 | meta_ct <- meta_ct[which(rowSums(is.na(meta_ct)) == 0), , drop = FALSE] 219 | # Convert CTaa to a factor 220 | meta_ct$CTaa <- as.factor(meta_ct$CTaa) 221 | 222 | # Construct a model matrix with no intercept 223 | # ~0 + CTaa means we get one column per level of CTaa 224 | category_matrix <- sparse.model.matrix( 225 | as.formula('~0+CTaa'), 226 | data = meta_ct 227 | ) 228 | 229 | # Precompute column sums and scale columns to sum to 1 230 | col_sums <- Matrix::colSums(category_matrix) 231 | # remove columns with zero count if any 232 | keep_cols <- which(col_sums > 0) 233 | category_matrix <- category_matrix[, keep_cols, drop = FALSE] 234 | col_sums <- col_sums[keep_cols] 235 | 236 | # scale columns so each column sums to 1 237 | category_matrix <- sweep(category_matrix, MARGIN = 2, STATS = col_sums, FUN = "/") 238 | 239 | # Function to process a single assay 240 | process_single_assay <- function(assay_name) { 241 | data_mat <- grabAssay(input.data, assay_name) 242 | 243 | # Subset features if requested 244 | features_to_use <- features %||% rownames(data_mat) 245 | features_to_use <- intersect(features_to_use, rownames(data_mat)) 246 | 247 | if (length(features_to_use) == 0) { 248 | warning("No overlapping features found in assay '", assay_name, "'. Returning empty matrix.") 249 | return(matrix(nrow = 0, ncol = 0)) 250 | } 251 | 252 | data_mat_use <- data_mat[features_to_use, , drop = FALSE] 253 | 254 | # Multiply by the category matrix to get mean expression per clone 255 | # For each feature, we do feature_values %*% category_matrix 256 | # (since category_matrix has columns that are "per-clone" indicators). 257 | data_return <- data_mat_use %*% category_matrix 258 | 259 | # Rename columns to reflect the clone name(s) 260 | colnames(data_return) <- gsub("^CTaa", "", colnames(category_matrix)) 261 | 262 | return(data_return) 263 | } 264 | 265 | # If multiple assays, return a list 266 | if (length(assay) > 1) { 267 | results_list <- lapply(assay, process_single_assay) 268 | names(results_list) <- assay 269 | return(results_list) 270 | } else { 271 | # Single assay: return just a single matrix 272 | return(process_single_assay(assay)) 273 | } 274 | } 275 | -------------------------------------------------------------------------------- /R/Ibex.matrix.R: -------------------------------------------------------------------------------- 1 | #' Ibex Matrix Interface 2 | #' 3 | #' This function runs the Ibex algorithm to generate latent vectors from 4 | #' input data. The output can be returned as a matrix, with options to choose 5 | #' between deep learning autoencoders or geometric transformations based on 6 | #' the BLOSUM62 matrix. 7 | #' 8 | #' @examples 9 | #' # Using the encoder method with a variational autoencoder 10 | #' ibex_values <- Ibex.matrix(ibex_example, 11 | #' chain = "Heavy", 12 | #' method = "encoder", 13 | #' encoder.model = "VAE", 14 | #' encoder.input = "atchleyFactors") 15 | #' 16 | #' # Using the geometric method with a specified angle 17 | #' ibex_values <- Ibex.matrix(ibex_example, 18 | #' chain = "Heavy", 19 | #' method = "geometric", 20 | #' geometric.theta = pi) 21 | #' 22 | #' @param input.data Input data, which can be: 23 | #' \itemize{ 24 | #' \item A Single Cell Object in Seurat or SingleCellExperiment format 25 | #' \item The output of \code{combineBCR()} from the \code{scRepertoire} package 26 | #' } 27 | #' @param chain Character. Specifies which chain to analyze: 28 | #' \itemize{ 29 | #' \item "Heavy" for the heavy chain 30 | #' \item "Light" for the light chain 31 | #' } 32 | #' @param method Character. The algorithm to use for generating latent vectors: 33 | #' \itemize{ 34 | #' \item "encoder" - Uses deep learning autoencoders 35 | #' \item "geometric" - Uses geometric transformations based on the 36 | #' BLOSUM62 matrix 37 | #' } 38 | #' @param encoder.model Character. The type of autoencoder model to use: 39 | #' \itemize{ 40 | #' \item "CNN" - CDR3 Convolutional Neural Network-based autoencoder 41 | #' \item "VAE" - CDR3 Variational Autoencoder 42 | #' \item "CNN.EXP" - CDR1/2/3 CNN 43 | #' \item "VAE.EXP" - CDR1/2/3 VAE 44 | #' } 45 | #' @param encoder.input Character. Specifies the input features for the 46 | #' encoder model. Options include: 47 | #' \itemize{ 48 | #' \item Amino Acid Properties: "atchleyFactors", "crucianiProperties", 49 | #' "kideraFactors", "MSWHIM","tScales", "zScales" 50 | #' \item "OHE" for One Hot Encoding 51 | #' } 52 | #' @param geometric.theta Numeric. Angle (in radians) for the geometric 53 | #' transformation. Only used when \code{method = "geometric"}. 54 | #' @param species Character. Default is "Human" or "Mouse". 55 | #' @param verbose Logical. Whether to print progress messages. Default is TRUE. 56 | #' @return A matrix of latent vectors generated by the specified method. 57 | #' 58 | #' @export 59 | #' @importFrom basilisk basiliskRun 60 | #' @importFrom SeuratObject CreateDimReducObject 61 | #' @importFrom immApex propertyEncoder onehotEncoder geometricEncoder getIR 62 | #' @importFrom stats complete.cases 63 | #' @importFrom tensorflow tf 64 | #' 65 | #' @seealso 66 | #' \code{\link[immApex]{propertyEncoder}}, 67 | #' \code{\link[immApex]{geometricEncoder}} 68 | Ibex.matrix <- function(input.data, 69 | chain = c("Heavy", "Light"), 70 | method = c("encoder", "geometric"), 71 | encoder.model = c("CNN", "VAE", "CNN.EXP", "VAE.EXP"), 72 | encoder.input = c("atchleyFactors", "crucianiProperties", 73 | "kideraFactors", "MSWHIM", "tScales", "OHE"), 74 | geometric.theta = pi/3, 75 | species = "Human", 76 | verbose = TRUE) { 77 | 78 | # Match arguments for better validation 79 | chain <- match.arg(chain) 80 | method <- match.arg(method) 81 | 82 | if (method == "encoder") { 83 | encoder.model <- match.arg(encoder.model) 84 | encoder.input <- match.arg(encoder.input) 85 | expanded.sequences <- grepl(".EXP", encoder.model) 86 | } else { 87 | expanded.sequences <- FALSE 88 | } 89 | 90 | # Define loci based on chain selection 91 | loci <- if (chain == "Heavy") "IGH" else c("IGK", "IGL") 92 | 93 | #Getting Sequences 94 | BCR <- getIR(input.data, chain, sequence.type = "aa")[[1]] 95 | BCR <- BCR[complete.cases(BCR[,2]), ] 96 | 97 | # Determine dictionary for sequence encoding 98 | if (expanded.sequences) { 99 | if (all(grepl("-", BCR[,2]))) { 100 | stop("Expanded sequences are not properly formated, please use combineExpandedBCR().") 101 | } 102 | BCR[,2] <- gsub("-", "_", BCR[,2]) 103 | dictionary <- c(amino.acids, "_") 104 | } else { 105 | dictionary <- amino.acids 106 | } 107 | 108 | # Filter by gene locus 109 | BCR <- BCR[grepl(paste0(loci, collapse = "|"), BCR[, "v"]), ] 110 | 111 | # Ensure sequences meet length criteria 112 | checkLength(x = BCR[,2], expanded = expanded.sequences) 113 | length.to.use <- if (expanded.sequences) 90 else 45 114 | 115 | if (method == "encoder") { 116 | # Getting Model Path 117 | model.path <- aa.model.loader(species = species, 118 | chain = chain, 119 | encoder.input = encoder.input, 120 | encoder.model = encoder.model) 121 | 122 | if (verbose) print("Encoding sequences and calculating latent dimensions...") 123 | 124 | # Run ENCODING and PREDICTION inside the basilisk environment 125 | reduction <- basiliskRun( 126 | env = IbexEnv, 127 | fun = function(sequences, enc.input, max.len, seq.dict, mpath, verbose.fun) { 128 | 129 | # 1. Encode sequences inside the correct environment 130 | if(enc.input == "OHE") { 131 | encoded.values <- immApex::onehotEncoder(sequences, 132 | max.length = max.len, 133 | convert.to.matrix = TRUE, 134 | sequence.dictionary = seq.dict, 135 | padding.symbol = ".") 136 | } else { 137 | encoded.values <- immApex::propertyEncoder(sequences, 138 | max.length = max.len, 139 | method.to.use = enc.input, 140 | convert.to.matrix = TRUE) 141 | } 142 | 143 | # 2. Load model and predict 144 | keras <- reticulate::import("keras", delay_load = FALSE) 145 | model <- keras$models$load_model(mpath) 146 | pred <- model$predict(encoded.values) 147 | 148 | return(as.array(pred)) 149 | }, 150 | # Pass arguments to the function inside basiliskRun 151 | sequences = BCR[,2], 152 | enc.input = encoder.input, 153 | max.len = length.to.use, 154 | seq.dict = dictionary, 155 | mpath = model.path 156 | ) 157 | 158 | } else if (method == "geometric") { 159 | if (verbose) print("Performing geometric transformation...") 160 | BCR[,2] <- gsub("-", "", BCR[,2]) 161 | reduction <- suppressMessages(geometricEncoder(BCR[,2], theta = geometric.theta)) 162 | } 163 | 164 | reduction <- as.data.frame(reduction) 165 | barcodes <- BCR[,1] 166 | rownames(reduction) <- barcodes 167 | colnames(reduction) <- paste0("Ibex_", seq_len(ncol(reduction))) 168 | return(reduction) 169 | } -------------------------------------------------------------------------------- /R/basiliskEnv.R: -------------------------------------------------------------------------------- 1 | #' @import basilisk 2 | IbexEnv <- BasiliskEnvironment( 3 | envname = "IbexEnv", 4 | pkgname = "Ibex", 5 | packages = c( 6 | "python=3.9", 7 | "keras=3.6.*", 8 | "tensorflow=2.18.*", 9 | "h5py=3.13", 10 | "numpy=1.26" 11 | ) 12 | ) -------------------------------------------------------------------------------- /R/combineExpandedBCR.R: -------------------------------------------------------------------------------- 1 | #' combineBCR for CDR1/2/3 sequences 2 | #' 3 | #' This function enhances BCR processing by incorporating additional 4 | #' sequence information from CDR1 and CDR2 regions before applying the BCR 5 | #' combination logic. The function depends on 6 | #' \code{\link[scRepertoire]{combineBCR}} from the scRepertoire package. 7 | #' 8 | #' @examples 9 | #' combined.BCR <- combineExpandedBCR(list(ibex_vdj), 10 | #' samples = "Sample1", 11 | #' filterNonproductive = TRUE) 12 | #' 13 | #' @param input.data List of filtered contig annotations. 14 | #' @param samples Character vector. Labels of samples (required). 15 | #' @param ID Character vector. Additional sample labeling (optional). 16 | #' @param call.related.clones Logical. Whether to call related clones based on 17 | #' nucleotide sequence and V gene. Default is `TRUE`. 18 | #' @param threshold Numeric. Normalized edit distance for clone clustering. 19 | #' Default is `0.85`. 20 | #' @param removeNA Logical. Whether to remove any chain without values. Default 21 | #' is `FALSE`. 22 | #' @param removeMulti Logical. Whether to remove barcodes with more than two 23 | #' chains. Default is `FALSE`. 24 | #' @param filterMulti Logical. Whether to select the highest-expressing light 25 | #' and heavy chains. Default is `TRUE`. 26 | #' @param filterNonproductive Logical. Whether to remove nonproductive chains. 27 | #' Default is `TRUE`. 28 | #' 29 | #'@return A list of consolidated BCR clones with expanded CDR sequences. 30 | #' @seealso 31 | #' \code{\link[scRepertoire]{combineBCR}} 32 | #' 33 | #' @importFrom scRepertoire combineBCR 34 | #' @export 35 | combineExpandedBCR <- function(input.data, 36 | samples = NULL, 37 | ID = NULL, 38 | call.related.clones = TRUE, 39 | threshold = 0.85, 40 | removeNA = FALSE, 41 | removeMulti = FALSE, 42 | filterMulti = TRUE, 43 | filterNonproductive = TRUE) { 44 | 45 | # Ensure input is a list of data frames 46 | if (!is.list(input.data) || !all(sapply(input.data, is.data.frame))) { 47 | stop("Input data must be a list of data frames.") 48 | } 49 | 50 | # Modify each data frame in the list 51 | modified_data <- lapply(input.data, function(df) { 52 | if (!all(c("cdr1", "cdr2", "cdr3") %in% colnames(df))) { 53 | stop("Each data frame must contain 'cdr1', 'cdr2', and 'cdr3' columns.") 54 | } 55 | 56 | # Create concatenated CDR sequence 57 | df$cdr3 <- paste(df$cdr1, df$cdr2, df$cdr3, sep = "-") 58 | df$cdr3_nt<- paste(df$cdr1_nt, df$cdr2_nt, df$cdr3_nt, sep = "-") 59 | 60 | return(df) 61 | }) 62 | 63 | # Call combineBCR() on the modified data 64 | combined_result <- combineBCR(input.data = modified_data, 65 | samples = samples, 66 | ID = ID, 67 | call.related.clones = call.related.clones, 68 | threshold = threshold, 69 | removeNA = removeNA, 70 | removeMulti = removeMulti, 71 | filterMulti = filterMulti, 72 | filterNonproductive = filterNonproductive) 73 | 74 | return(combined_result) 75 | } 76 | -------------------------------------------------------------------------------- /R/global.R: -------------------------------------------------------------------------------- 1 | .onLoad <- function (libname, pkgname) 2 | { 3 | utils::globalVariables ("AF.col") 4 | utils::globalVariables ("KF.col") 5 | utils::globalVariables ("array_reshape") 6 | utils::globalVariables ("is") 7 | utils::globalVariables ("reducedDim<-") 8 | utils::globalVariables ("na.omit") 9 | utils::globalVariables ("median") 10 | utils::globalVariables ("slot") 11 | utils::globalVariables ("get.adjacency") 12 | utils::globalVariables ("nn") 13 | utils::globalVariables ("data") 14 | utils::globalVariables ("ibex.data") 15 | utils::globalVariables ("colData<-") 16 | utils::globalVariables ("TR") 17 | utils::globalVariables ("graph.edgelist") 18 | utils::globalVariables ("f") 19 | 20 | invisible () 21 | 22 | } 23 | -------------------------------------------------------------------------------- /R/ibex_example.R: -------------------------------------------------------------------------------- 1 | #' A SingleCellExperiment object with 200 randomly-sampled 2 | #' B cells with BCR sequences from the 10x Genomics 3 | #' 2k_BEAM-Ab_Mouse_HEL_5pv2 dataset. 4 | #' 5 | #' This object includes normalized gene expression values, metadata annotations, 6 | #' and B cell clonotype information derived from 10x V(D)J sequencing. It is intended 7 | #' as a small example dataset for testing and demonstration purposes. 8 | #' 9 | #' @format A \code{SingleCellExperiment} object with 32,285 genes (rows) and 200 cells (columns). 10 | #' \describe{ 11 | #' \item{assays}{List of matrices containing expression values: \code{counts} (raw counts) and \code{logcounts} (log-transformed).} 12 | #' \item{rowData}{Empty in this example (no gene-level annotations).} 13 | #' \item{colData}{A \code{DataFrame} with 14 columns of cell metadata, including:} 14 | #' \itemize{ 15 | #' \item orig.identOriginal sample identity. 16 | #' \item nCount_RNA Total number of counts per cell. 17 | #' \item nFeature_RNA Number of detected genes per cell. 18 | #' \item cloneSize Size of each clone. 19 | #' \item ident Cluster assignment. 20 | #' } 21 | #' \item{reducedDims}{Contains dimensionality reductions: \code{PCA}, \code{pca}, and \code{apca}.} 22 | #' \item{altExp}{One alternative experiment named \code{BEAM} containing additional expression data.} 23 | #' } 24 | #' @name ibex_example 25 | #' @docType data 26 | NULL 27 | -------------------------------------------------------------------------------- /R/ibex_vdj.R: -------------------------------------------------------------------------------- 1 | #' Full filtered_annotated_contig.csv from the 10x 2 | #' 2k_BEAM-Ab_Mouse_HEL_5pv2 3 | #' 4 | #' This dataset contains single-cell V(D)J sequencing annotations 5 | #' from the 10x Genomics BEAM-Ab Mouse dataset. It includes V(D)J 6 | #' gene calls, CDR regions, productivity information, and clonotype 7 | #' assignments for each contig. 8 | #' 9 | #' @format A data frame with 6 rows and 35 columns: 10 | #' \describe{ 11 | #' \item{barcode}{Character. Unique cell barcode.} 12 | #' \item{is_cell}{Logical. Whether the barcode is identified as a cell.} 13 | #' \item{contig_id}{Character. Unique identifier for each contig.} 14 | #' \item{high_confidence}{Logical. Whether the contig is high confidence.} 15 | #' \item{length}{Integer. Length of the contig.} 16 | #' \item{chain}{Character. Chain type (e.g., IGH, IGK).} 17 | #' \item{v_gene}{Character. V gene annotation.} 18 | #' \item{d_gene}{Character. D gene annotation.} 19 | #' \item{j_gene}{Character. J gene annotation.} 20 | #' \item{c_gene}{Character. C gene annotation.} 21 | #' \item{full_length}{Logical. Whether the contig is full-length.} 22 | #' \item{productive}{Logical. Whether the contig is productive.} 23 | #' \item{fwr1}{Character. Amino acid sequence for Framework Region 1.} 24 | #' \item{fwr1_nt}{Character. Nucleotide sequence for FWR1.} 25 | #' \item{cdr1}{Character. Amino acid sequence for CDR1.} 26 | #' \item{cdr1_nt}{Character. Nucleotide sequence for CDR1.} 27 | #' \item{fwr2}{Character. Amino acid sequence for FWR2.} 28 | #' \item{fwr2_nt}{Character. Nucleotide sequence for FWR2.} 29 | #' \item{cdr2}{Character. Amino acid sequence for CDR2.} 30 | #' \item{cdr2_nt}{Character. Nucleotide sequence for CDR2.} 31 | #' \item{fwr3}{Character. Amino acid sequence for FWR3.} 32 | #' \item{fwr3_nt}{Character. Nucleotide sequence for FWR3.} 33 | #' \item{cdr3}{Character. Amino acid sequence for CDR3.} 34 | #' \item{cdr3_nt}{Character. Nucleotide sequence for CDR3.} 35 | #' \item{fwr4}{Character. Amino acid sequence for FWR4.} 36 | #' \item{fwr4_nt}{Character. Nucleotide sequence for FWR4.} 37 | #' \item{reads}{Integer. Number of reads supporting the contig.} 38 | #' \item{umis}{Integer. Number of UMIs supporting the contig.} 39 | #' \item{raw_clonotype_id}{Character. Clonotype ID from 10x output.} 40 | #' \item{raw_consensus_id}{Character. Consensus ID from 10x output.} 41 | #' \item{exact_subclonotype_id}{Integer. Exact subclonotype grouping.} 42 | #' } 43 | #' @name ibex_vdj 44 | #' @docType data 45 | NULL 46 | -------------------------------------------------------------------------------- /R/quietBCRgenes.R: -------------------------------------------------------------------------------- 1 | #' Remove BCR Genes from Variable Gene Results 2 | #' 3 | #' This function removes B-cell receptor (BCR) genes from the variable features of a 4 | #' single-cell dataset. Most single-cell workflows prioritize highly expressed and 5 | #' highly variable genes for principal component analysis (PCA) and dimensional 6 | #' reduction. By excluding BCR genes, this function ensures that the variable gene 7 | #' set focuses on biologically relevant features rather than highly variable BCR genes. 8 | #' 9 | #' @examples 10 | #' # Remove BCR genes from the variable features of a vector 11 | #' variable.genes <- c("IGHV1-69", "IGHV3-23", "IGHV4-34", "IGHV5-51", "IGHV6-1", 12 | #' "IGKV1-5", "IGKV3-20", "IGLV2-14", "IGLV3-21", "IGLV6-57", 13 | #' "TP53", "MYC", "BCL2", "CD19", "CD79A", "CD79B", "PAX5") 14 | #' variable.genes <- quietBCRgenes(variable.genes) 15 | #' 16 | #' @param sc A single-cell dataset, which can be: 17 | #' \itemize{ 18 | #' \item A Seurat object 19 | #' \item A vector of variable genes generated by workflows such as Bioconductor's \code{scran} 20 | #' } 21 | #' @param assay Character. Specifies the Seurat assay slot to use for removing BCR genes. 22 | #' If \code{NULL}, the function defaults to the active assay in the Seurat object. 23 | #' 24 | #' @return The input Seurat object or vector with BCR genes removed from the variable features. 25 | #' 26 | #' @importFrom SeuratObject DefaultAssay 27 | #' @export 28 | quietBCRgenes <- function(sc, assay = NULL) { 29 | UseMethod("quietBCRgenes") 30 | } 31 | 32 | #' @export 33 | #' @importFrom SeuratObject DefaultAssay VariableFeatures 34 | quietBCRgenes.Seurat <- function(sc, assay = NULL) { 35 | if (is.null(assay)) { 36 | assay <- DefaultAssay(sc) 37 | } 38 | SeuratObject::VariableFeatures(sc, assay = assay) <- 39 | quietBCRgenes.default(SeuratObject::VariableFeatures(sc, assay = assay)) 40 | sc 41 | } 42 | 43 | #' @export 44 | quietBCRgenes.default <- function(sc, assay = NULL) { 45 | unwanted_genes <- "^IG[HLK][VDJCAGM]" 46 | unwanted_genes <- grep(pattern = unwanted_genes, x = sc, value = TRUE) 47 | unwanted_genes <- c(unwanted_genes, "JCHAIN") 48 | unwanted_genes <- unwanted_genes[unwanted_genes %!in% getHumanIgPseudoGenes()] 49 | sc[sc %!in% unwanted_genes] 50 | } 51 | 52 | #' Get Human Immunoglobulin pseudogenes 53 | #' 54 | #' This function returns a character vector of human immunoglobulin 55 | #' pseudogenes. These are also the genes that are removed from the 56 | #' variable gene list in the \code{quietBCRgenes} function. 57 | #' 58 | #' @return Character vector of human immunoglobulin pseudogenes. 59 | #' @export 60 | #' 61 | getHumanIgPseudoGenes <- function() { 62 | unique(c( 63 | "IGHJ1P", "IGHJ2P", "IGHJ3P", "IGLC4", "IGLC5", "IGHEP1", "IGHEP2", 64 | "IGHV1-12","IGHV1-14", "IGHV1-17", "IGHV1-67", "IGHV1-68", 65 | "IGHV2-10", "IGHV3-6", "IGHV3-19", "IGHV3-22", "IGHV3-25", 66 | "IGHV3-29", "IGHV3-32", "IGHV3-36", "IGHV3-37", "IGHV3-41", 67 | "IGHV3-42", "IGHV3-47", "IGHV3-50", "IGHV3-52", "IGHV3-54", 68 | "IGHV3-57", "IGHV3-60", "IGHV3-62", "IGHV3-63", "IGHV3-65", 69 | "IGHV3-71", "IGHV3-75", "IGHV3-76", "IGHV3-79", "IGHV4-55", 70 | "IGHV4-80", "IGHV5-78", "IGHV7-27", "IGHV7-40", "IGHV7-56", 71 | "IGHVIII-44", "IGHVIII-82", "IGKV1-22", "IGKV1-32", "IGKV1-35", 72 | "IGKV1D-22", "IGKV1D-27", "IGKV1D-32", "IGKV1D-35", "IGKVOR-2", 73 | "IGKVOR-3", "IGKVOR-4", "IGKV2-4", "IGKV2-10", "IGKV2-14", "IGKV2-18", 74 | "IGKV2-19", "IGKV2-23", "IGKV2-26", "IGKV2-36", "IGKV2-38", 75 | "IGKV2D-10", "IGKV2D-14", "IGKV2D-18", "IGKV2D-19", "IGKV2D-23", 76 | "IGKV2D-36", "IGKV2D-38", "IGKV3-25", "IGKV3-31", "IGKV3-34", 77 | "IGKV7-3", "IGLCOR22-1", "IGLCOR22-2", "IGLJCOR18", "IGLV1-41", 78 | "IGLV1-62", "IGLV2-5", "IGLV2-28", "IGLV2-34", "IGLV3-2", 79 | "IGLV3-4", "IGLV3-6", "IGLV3-7", "IGLV3-13", "IGLV3-15", 80 | "IGLV3-17", "IGLV3-24", "IGLV3-26", "IGLV3-29", "IGLV3-30", 81 | "IGLV3-31", "IGLV7-35", "IGLV10-67", "IGLVI-20", "IGLVI-38", 82 | "IGLVI-42", "IGLVI-56", "IGLVI-63", "IGLVI-68", "IGLVI-70", 83 | "IGLVIV-53", "IGLVIV-59", "IGLVIV-64", "IGLVIV-65", "IGLVV-58", 84 | "IGLVV-66", "IGHV1OR15-2", "IGHV1OR15-3", "IGHV1OR15-4", "IGHV1OR15-6", 85 | "IGHV1OR16-1", "IGHV1OR16-2", "IGHV1OR16-3", "IGHV1OR16-4", "IGHV3-30-2", 86 | "IGHV3-33-2", "IGHV3-69-1", "IGHV3OR15-7", "IGHV3OR16-6", "IGHV3OR16-7", 87 | "IGHV3OR16-11", "IGHV3OR16-14", "IGHV3OR16-15", "IGHV3OR16-16", "IGHV7-34-1", 88 | "IGHVII-1-1", "IGHVII-15-1", "IGHVII-20-1", "IGHVII-22-1", "IGHVII-26-2", 89 | "IGHVII-28-1", "IGHVII-30-1", "IGHVII-30-21", "IGHVII-31-1", "IGHVII-33-1", 90 | "IGHVII-40-1", "IGHVII-43-1", "IGHVII-44-2", "IGHVII-46-1", "IGHVII-49-1", 91 | "IGHVII-51-2", "IGHVII-53-1", "IGHVII-60-1", "IGHVII-62-1", "IGHVII-65-1", 92 | "IGHVII-67-1", "IGHVII-74-1", "IGHVII-78-1", "IGHVIII-2-1", "IGHVIII-5-1", 93 | "IGHVIII-5-2", "IGHVIII-11-1", "IGHVIII-13-1", "IGHVIII-16-1", "IGHVIII-22-2", 94 | "IGHVIII-25-1", "IGHVIII-26-1", "IGHVIII-38-1", "IGHVIII-47-1", "IGHVIII-67-2", 95 | "IGHVIII-67-3", "IGHVIII-67-4", "IGHVIII-76-1", "IGHVIV-44-1", "IGKV1OR1-1", 96 | "IGKV1OR2-1", "IGKV1OR2-2", "IGKV1OR2-3", "IGKV1OR2-6", "IGKV1OR2-9", 97 | "IGKV1OR2-11", "IGKV1OR2-118", "IGKV1OR9-1", "IGKV1OR9-2", "IGKV1OR10-1", 98 | "IGKV1OR15-118", "IGKV1OR22-1", "IGKV1OR22-5", "IGKV1ORY-1", "IGKV2OR2-1", 99 | "IGKV2OR2-2", "IGKV2OR2-4", "IGKV2OR2-7", "IGKV2OR2-7D", "IGKV2OR2-8", 100 | "IGKV2OR2-10", "IGKV2OR22-3", "IGKV2OR22-4", "IGKV3OR2-5", "IGKV3OR22-2", 101 | "IGKV8OR8-1", "IGLVIV-66-1", "IGLVIVOR22-1", "IGLVIVOR22-2", "IGLVVI-22-1", 102 | "IGLVVI-25-1", "IGLVVII-41-1" 103 | )) 104 | } 105 | -------------------------------------------------------------------------------- /R/runIbex.R: -------------------------------------------------------------------------------- 1 | #' Ibex Single-Cell Calculation 2 | #' 3 | #' This function applies the Ibex algorithm to single-cell data, integrating 4 | #' seamlessly with Seurat or SingleCellExperiment pipelines. The algorithm 5 | #' generates latent dimensions using deep learning or geometric transformations, 6 | #' storing the results in the dimensional reduction slot. \code{runIbex} will 7 | #' automatically subset the single-cell object based on amino acid sequences 8 | #' present for the given chain selection. 9 | #' 10 | #' @examples 11 | #' # Using the encoder method with a variational autoencoder 12 | #' ibex_example <- runIbex(ibex_example, 13 | #' chain = "Heavy", 14 | #' method = "encoder", 15 | #' encoder.model = "VAE", 16 | #' encoder.input = "atchleyFactors") 17 | #' 18 | #' # Using the geometric method with a specified angle 19 | #' ibex_example <- runIbex(ibex_example, 20 | #' chain = "Heavy", 21 | #' method = "geometric", 22 | #' geometric.theta = pi) 23 | #' 24 | #' @param sc.data A single-cell dataset, which can be: 25 | #' \itemize{ 26 | #' \item A Seurat object 27 | #' \item A SingleCellExperiment object 28 | #' } 29 | #' @param chain Character. Specifies the chain to analyze: 30 | #' \itemize{ 31 | #' \item "Heavy" for the heavy chain 32 | #' \item "Light" for the light chain 33 | #' } 34 | #' @param method Character. Algorithm to use for generating latent dimensions: 35 | #' \itemize{ 36 | #' \item "encoder" - Uses deep learning autoencoders 37 | #' \item "geometric" - Uses geometric transformations based on the BLOSUM62 matrix 38 | #' } 39 | #' @param encoder.model Character. The type of autoencoder model to use: 40 | #' \itemize{ 41 | #' \item "CNN" - CDR3 Convolutional Neural Network-based autoencoder 42 | #' \item "VAE" - CDR3 Variational Autoencoder 43 | #' \item "CNN.EXP" - CDR1/2/3 CNN 44 | #' \item "VAE.EXP" - CDR1/2/3 VAE 45 | #' } 46 | #' @param encoder.input Character. Input features for the encoder model: 47 | #' \itemize{ 48 | #' \item Amino Acid Properties: "atchleyFactors", "crucianiProperties", 49 | #' "kideraFactors", "MSWHIM", "tScales" 50 | #' \item "OHE" - One Hot Encoding 51 | #' } 52 | #' @param geometric.theta Numeric. Angle (in radians) for geometric transformation. 53 | #' Used only when \code{method = "geometric"}. 54 | #' @param reduction.name Character. The name to assign to the dimensional reduction. 55 | #' This is useful for running Ibex with multiple parameter settings and saving results 56 | #' under different names. 57 | #' @param species Character. Default is "Human" or "Mouse". 58 | #' @param verbose Logical. Whether to print progress messages. Default is TRUE. 59 | #' 60 | #' @return An updated Seurat or SingleCellExperiment object with Ibex dimensions added 61 | #' to the dimensional reduction slot. 62 | #' @export 63 | runIbex <- function(sc.data, 64 | chain = "Heavy", 65 | method = "encoder", 66 | encoder.model = "VAE", 67 | encoder.input = "atchleyFactors", 68 | geometric.theta = pi, 69 | reduction.name = "Ibex", 70 | species = "Human", 71 | verbose = TRUE) { 72 | checkSingleObject(sc.data) 73 | sc.data <- filter.cells(sc.data, chain) 74 | reduction <- Ibex.matrix(input.data = sc.data, 75 | chain = chain, 76 | method = method, 77 | encoder.model = encoder.model, 78 | encoder.input = encoder.input, 79 | geometric.theta = geometric.theta, 80 | species = species, 81 | verbose = verbose) 82 | BCR <- getIR(sc.data, chain, sequence.type = "aa")[[1]] 83 | sc.data <- adding.DR(sc.data, reduction, reduction.name) 84 | return(sc.data) 85 | } 86 | 87 | #' Filter Single-Cell Data Based on CDR3 Sequences 88 | #' 89 | #' This function subsets a Seurat or SingleCellExperiment object, 90 | #' removing cells where the `CTaa` column is missing or contains unwanted patterns. 91 | #' 92 | #' @param sc.obj A Seurat or SingleCellExperiment object. 93 | #' @param chain Character. Specifies the chain type ("Heavy" or "Light"). 94 | #' 95 | #' @return A filtered Seurat or SingleCellExperiment object. 96 | filter.cells <- function(sc.obj, 97 | chain) { 98 | meta <- grabMeta(sc.obj) 99 | if (!"CTaa" %in% colnames(meta)) { 100 | stop("Amino acid sequences are not added to the single-cell object correctly.") 101 | } 102 | pattern.NA <- ifelse(chain == "Heavy", "NA_", "_NA") 103 | pattern.none <- ifelse(chain == "Heavy", "None_", "_None") 104 | 105 | cells.index <- which(!is.na(meta[,"CTaa"]) & 106 | !grepl(paste0(pattern.NA, "|", pattern.none), meta[,"CTaa"])) 107 | 108 | if (inherits(x=sc.obj, what ="Seurat")) { 109 | cell.chains <- rownames(meta)[cells.index] 110 | sc.obj <- subset(sc.obj, cells = cell.chains) 111 | } else if (inherits(x=sc.obj, what ="SingleCellExperiment")){ 112 | sc.obj <- sc.obj[,cells.index] 113 | } 114 | return(sc.obj) 115 | } 116 | -------------------------------------------------------------------------------- /R/utils.R: -------------------------------------------------------------------------------- 1 | "%!in%" <- Negate("%in%") 2 | 3 | amino.acids <- c("A", "R", "N", "D", "C", "Q", "E", "G", "H", "I", "L", "K", "M", "F", "P", "S", "T", "W", "Y", "V") 4 | 5 | # Add to meta data some of the metrics calculated 6 | #' @importFrom rlang %||% 7 | #' @importFrom SingleCellExperiment colData 8 | add.meta.data <- function(sc, meta, header) { 9 | if (inherits(x=sc, what ="Seurat")) { 10 | col.name <- names(meta) %||% colnames(meta) 11 | sc[[col.name]] <- meta 12 | } else { 13 | rownames <- rownames(colData(sc)) 14 | colData(sc) <- cbind(colData(sc), 15 | meta[rownames,])[, union(colnames(colData(sc)), colnames(meta))] 16 | rownames(colData(sc)) <- rownames 17 | } 18 | return(sc) 19 | } 20 | 21 | # This is to grab the metadata from a Seurat or SCE object 22 | #' @importFrom SingleCellExperiment colData 23 | grabMeta <- function(sc) { 24 | if (inherits(x=sc, what ="Seurat")) { 25 | meta <- data.frame(sc[[]], slot(sc, "active.ident")) 26 | if ("cluster" %in% colnames(meta)) { 27 | colnames(meta)[length(meta)] <- "cluster.active.ident" 28 | } else { 29 | colnames(meta)[length(meta)] <- "cluster" 30 | } 31 | } 32 | else if (inherits(x=sc, what ="SingleCellExperiment")){ 33 | meta <- data.frame(colData(sc)) 34 | rownames(meta) <- sc@colData@rownames 35 | clu <- which(colnames(meta) == "ident") 36 | if ("cluster" %in% colnames(meta)) { 37 | colnames(meta)[clu] <- "cluster.active.idents" 38 | } else { 39 | colnames(meta)[clu] <- "cluster" 40 | } 41 | } 42 | return(meta) 43 | } 44 | 45 | # This is to check the single-cell expression object 46 | checkSingleObject <- function(sc) { 47 | if (!inherits(x=sc, what ="Seurat") & 48 | !inherits(x=sc, what ="SummarizedExperiment")){ 49 | stop("Object indicated is not of class 'Seurat' or 50 | 'SummarizedExperiment', make sure you are using 51 | the correct data.") } 52 | } 53 | 54 | # This is to check that all the CDR3 sequences are < 45 residues or < 90 for CDR1/2/3 55 | checkLength <- function(x, expanded = NULL) { 56 | cutoff <- ifelse( expanded == FALSE || is.null(expanded), 45, 90) 57 | if(any(na.omit(nchar(x)) > cutoff)) { 58 | stop(paste0("Models have been trained on sequences 59 | less than ", cutoff, " amino acid residues. Please 60 | filter the larger sequences before running")) 61 | } 62 | } 63 | # Returns appropriate encoder model 64 | #' @importFrom utils download.file read.csv 65 | #' @importFrom tools R_user_dir 66 | #' @importFrom utils download.file read.csv 67 | #' @importFrom tools R_user_dir 68 | aa.model.loader <- function(species, 69 | chain, 70 | encoder.input, 71 | encoder.model) { 72 | 73 | ## 1. Expected filename 74 | file_name <- paste0( 75 | species, "_", chain, "_", 76 | encoder.model, "_", encoder.input, 77 | "_encoder.keras") 78 | 79 | ## 2. Sanity-check against metadata.csv 80 | meta <- read.csv( 81 | system.file("extdata", "metadata.csv", package = "Ibex"), 82 | stringsAsFactors = FALSE 83 | ) 84 | 85 | if (!file_name %in% meta[[1]]) 86 | stop("Model '", file_name, "' is not listed in metadata.csv.") 87 | 88 | ## 3. Cache directory 89 | cache_dir <- tools::R_user_dir("Ibex", which = "cache") 90 | if (!dir.exists(cache_dir)) 91 | dir.create(cache_dir, recursive = TRUE, showWarnings = FALSE) 92 | 93 | local_path <- file.path(cache_dir, file_name) 94 | 95 | ## 4. Download if we have never seen this model before 96 | if (!file.exists(local_path)) { 97 | message("Downloading model '", file_name, " ...") 98 | base_url <- "https://zenodo.org/record/14919286/files" 99 | status <- utils::download.file( 100 | url = file.path(base_url, file_name), 101 | destfile = local_path, 102 | mode = "wb", 103 | quiet = TRUE 104 | ) 105 | if (status != 0) 106 | stop("Download of model '", file_name, 107 | "' failed (status ", status, ").") 108 | } 109 | 110 | ## 5. Done return the path for use in basiliskRun() 111 | normalizePath(local_path, winslash = "/") 112 | } 113 | 114 | 115 | 116 | # Add the dimRed to single cell object 117 | #' @importFrom SeuratObject CreateDimReducObject 118 | #' @importFrom SingleCellExperiment reducedDim reducedDim<- 119 | adding.DR <- function(sc, reduction, reduction.name) { 120 | if (inherits(sc, "Seurat")) { 121 | DR <- suppressWarnings(CreateDimReducObject( 122 | embeddings = as.matrix(reduction), 123 | loadings = as.matrix(reduction), 124 | projected = as.matrix(reduction), 125 | stdev = rep(0, ncol(reduction)), 126 | key = reduction.name, 127 | jackstraw = NULL, 128 | misc = list())) 129 | sc[[reduction.name]] <- DR 130 | } else if (inherits(sc, "SingleCellExperiment")) { 131 | reducedDim(sc, reduction.name) <- reduction 132 | } 133 | return(sc) 134 | } 135 | 136 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Ibex 2 | Using BCR sequences for graph embedding 3 | 4 | [![R-CMD-check](https://github.com/BorchLab/Ibex/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/BorchLab/Ibex/actions/workflows/R-CMD-check.yaml) 5 | [![Codecov test coverage](https://codecov.io/gh/BorchLab/Ibex/graph/badge.svg)](https://app.codecov.io/gh/BorchLab/Ibex?branch=master) 6 | [![Documentation](https://img.shields.io/badge/docs-stable-blue.svg)](https://www.borch.dev/uploads/screpertoire/articles/ibex) 7 | 8 | 9 | 10 | ## Introduction 11 | Single-cell sequencing is an integral tool in immunology and oncology, enabling researchers to measure gene expression and immune cell receptor profiling at the level of individual cells. We developed the [scRepertoire](https://github.com/BorchLab/scRepertoire) R package to facilitate the integration of immune receptor and gene expression data. However, leveraging clonal indices for more complex analyses—such as using clonality in cell embedding—remains challenging. 12 | 13 | **Ibex** addresses this need by using deep learning to vectorize BCR sequences based on amino acid properties or their underlying order. Ibex is the sister package to [Trex](https://github.com/BorchLab/Trex), which focuses on TCR sequence data. 14 | 15 | --- 16 | 17 | # System Requirements 18 | Ibex has been tested on R versions >= 4.0. For details on required R packages, refer to the package’s DESCRIPTION file. It is designed to work with single-cell objects containing BCR data generated using [scRepertoire](https://github.com/BorchLab/scRepertoire). Ibex has been tested on macOS and Linux. 19 | 20 | 21 | # Installation 22 | 23 | Ibex relies on the [immApex](https://github.com/BorchLab/immApex) API can be installed directly from GitHub: 24 | 25 | ```r 26 | devtools::install_github("BorchLab/immApex") 27 | ``` 28 | 29 | You may also install immApex from Bioconductor: 30 | 31 | ```r 32 | if (!require("BiocManager", quietly = TRUE)) 33 | install.packages("BiocManager") 34 | 35 | BiocManager::install("immApex") 36 | ``` 37 | 38 | After immApex installation, you can install Ibex with: 39 | 40 | ```r 41 | devtools::install_github("BorchLab/Ibex") 42 | ``` 43 | 44 | The main version of Ibex is submitted to Bioconductor (installation instructions will be updated after review). By default, Ibex will automatically pull deep learning models from a [Zenodo repository](https://zenodo.org/records/14919286) and cache them locally. 45 | 46 | Alternatively, to install **Ibex** and all the required models at once: 47 | ```r 48 | devtools::install_github("BorchLab/Ibex@fullstack") 49 | ``` 50 | 51 | # Usage/Demos 52 | 53 | Ibex integrates smoothly into most popular R-based single-cell workflows, including **Seurat** and **Bioconductor/SingleCellExperiment.** 54 | 55 | ## Quick Start 56 | 57 | See the [vignette](https://www.borch.dev/uploads/screpertoire/articles/ibex) for a step-by-step tutorial. 58 | 59 | 60 | 61 | ## Autoencoded Matrix 62 | 63 | The Ibex algorithm allows users to select BCR-based metrics to return autoencoded values to be used in dimensional reduction. If single-cell objects are not filtered for B cells with BCR, `Ibex.matrix()` will still return values, however IBEX_1 will be based on the disparity of BCR-containing and BCR-non-containing cells based on the Ibex algorithm. 64 | 65 | ```r 66 | library(Ibex) 67 | my_ibex <- Ibex.matrix(singleObject) 68 | ``` 69 | 70 | ## Seurat or Single-Cell Experiment 71 | 72 | You can run Ibex within your Seurat or Single-Cell Experiemt workflow. **Importantly** `runIbex()` will automatically filter single-cells that do not contain BCR information in the meta data of the single-cell object. 73 | 74 | ```r 75 | seuratObj_Bonly <- runIbex(seuratObj, #The single cell object 76 | chain = c("Heavy", "Light"), # "Heavy" or "Light" 77 | method = c("encoder", "geometric"), # Use deep learning "encoder" or "geometric" transformation 78 | encoder.model = c("CNN", "VAE", "CNN.EXP", "VAE.EXP"), # Types of Deep Learning Models 79 | encoder.input = c("atchleyFactors", "crucianiProperties", 80 | "kideraFactors", "MSWHIM", "tScales", "OHE"), # Method of Encoding 81 | geometric.theta = pi/3, # theta for Geometric Encoding 82 | species = "Human") # "Mouse" or "Human" 83 | 84 | seuratObj_Bonly <- runIbex(seuratObj, reduction.name = "Ibex") 85 | ``` 86 | 87 | ## After Running Ibex 88 | 89 | Once the Ibex embeddings are part of your Seurat object, you can use these embeddings to generate a t-SNE or UMAP: 90 | 91 | ```r 92 | seuratObj <- RunTSNE(seuratObj, reduction = "Ibex", reduction.key = "Ibex_") 93 | seuratObj <- RunUMAP(seuratObj, reduction = "Ibex", reduction.key = "Ibex_") 94 | ``` 95 | 96 | If using Seurat package, the Ibex embedding information and gene expression PCA can be used to find the [Weighted Nearest Neighbors](https://pubmed.ncbi.nlm.nih.gov/34062119/). Before applying the WNN approach, best practice would be to remove the BCR-related genes from the list of variable genes and rerunning the PCA analysis. 97 | 98 | ### Recalculate PCA without BCR genes with quietBCRgenes() function in Ibex. 99 | ```r 100 | seuratObj <- quietBCRgenes(seuratObj) 101 | seuratObj <- RunPCA(seuratObj) 102 | ``` 103 | 104 | ### Running WNN approach 105 | ```r 106 | seuratObj <- FindMultiModalNeighbors(seuratObj, 107 | reduction.list = list("pca", "Ibex"), 108 | dims.list = list(1:30, 1:20), 109 | modality.weight.name = "RNA.weight") 110 | 111 | seuratObj <- RunUMAP(seuratObj, 112 | nn.name = "weighted.nn", 113 | reduction.name = "wnn.umap", 114 | reduction.key = "wnnUMAP_") 115 | ``` 116 | ## Bug Reports/New Features 117 | 118 | #### If you run into any issues or bugs please submit a [GitHub issue](https://github.com/BorchLab/Ibex/issues) with details of the issue. 119 | 120 | - If possible please include a [reproducible example](https://reprex.tidyverse.org/). 121 | Alternatively, an example with the internal **ibex_example** would 122 | be extremely helpful. 123 | 124 | #### Any requests for new features or enhancements can also be submitted as [GitHub issues](https://github.com/BorchLab/Ibex/issues). 125 | 126 | #### [Pull Requests](https://github.com/BorchLab/Ibex/pulls) are welcome for bug fixes, new features, or enhancements. 127 | 128 | ## Citation 129 | More information on Ibex is available at our [Biorxiv preprint](https://www.biorxiv.org/content/10.1101/2022.11.09.515787v2). 130 | -------------------------------------------------------------------------------- /_pkgdown.yml: -------------------------------------------------------------------------------- 1 | url: ~ 2 | template: 3 | bootstrap: 5 4 | 5 | -------------------------------------------------------------------------------- /data/ibex_example.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BorchLab/Ibex/543ce724c3f01bebedb0db705873788509b5e8a7/data/ibex_example.rda -------------------------------------------------------------------------------- /data/ibex_vdj.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BorchLab/Ibex/543ce724c3f01bebedb0db705873788509b5e8a7/data/ibex_vdj.rda -------------------------------------------------------------------------------- /inst/WORDLIST: -------------------------------------------------------------------------------- 1 | ADT 2 | Atchley 3 | Autoencoded 4 | Autoencoder 5 | Autoencoders 6 | BCR 7 | BLOSUM 8 | Bioconductor's 9 | Biorxiv 10 | CDR 11 | CMD 12 | CNNs 13 | CTaa 14 | Clonotype 15 | CoNGA 16 | Codecov 17 | Convolutional 18 | Cruciani 19 | Experiemt 20 | FWR 21 | Genomics’ 22 | HEL 23 | IGH 24 | IGK 25 | Ig 26 | Kidera 27 | Lysozyme 28 | MSWHIM 29 | MultiUMAP 30 | OHE 31 | SNE 32 | Schattgen 33 | SingleCellExperiment 34 | TCR 35 | TRI 36 | Trex 37 | UMAP 38 | UMIs 39 | VAE 40 | VAEs 41 | Variational 42 | WNN 43 | Zenodo 44 | atchleyFactors 45 | autoencoded 46 | autoencoder 47 | autoencoders 48 | barcode 49 | barcodes 50 | clonality 51 | cloneSize 52 | clonotype 53 | clonotypes 54 | combineBCR 55 | contig 56 | crucianiProperties 57 | csv 58 | customizable 59 | embeddings 60 | gp 61 | hydrophobicity 62 | ident 63 | identOriginal 64 | immApex 65 | interpretability 66 | keras 67 | kideraFactors 68 | nCount 69 | nFeature 70 | physicochemical 71 | preprint 72 | pseudogenes 73 | pv 74 | quietBCRgenes 75 | runIbex 76 | scRepertoire 77 | splenocytes 78 | subclonotype 79 | tScales 80 | tensorflow 81 | vectorize 82 | zScales 83 | π 84 | -------------------------------------------------------------------------------- /inst/extdata/ibex-basilisk.yaml: -------------------------------------------------------------------------------- 1 | name: ibex_env 2 | channels: 3 | - defaults 4 | - conda-forge 5 | dependencies: 6 | - python=3.9 7 | - tensorflow=2.11 8 | - keras=2.11 9 | - numpy 10 | - h5py 11 | 12 | -------------------------------------------------------------------------------- /inst/extdata/metadata.csv: -------------------------------------------------------------------------------- 1 | "Title","Description","BiocVersion","Genome","SourceType","SourceUrl","SourceVersion","Species","TaxonomyId","Coordinate_1_based","DataProvider","Maintainer","RDataClass","DispatchClass","Location_Prefix","RDataPath","Tags" 2 | "Human_Heavy_CNN_atchleyFactors_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: CNN, Encoding Method: atchleyFactors","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Heavy_CNN_atchleyFactors_encoder.keras","BCR:scRNA-seq:Encoder:Model" 3 | "Human_Heavy_CNN_crucianiProperties_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: CNN, Encoding Method: crucianiProperties","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Heavy_CNN_crucianiProperties_encoder.keras","BCR:scRNA-seq:Encoder:Model" 4 | "Human_Heavy_CNN_kideraFactors_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: CNN, Encoding Method: kideraFactors","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Heavy_CNN_kideraFactors_encoder.keras","BCR:scRNA-seq:Encoder:Model" 5 | "Human_Heavy_CNN_MSWHIM_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: CNN, Encoding Method: MSWHIM","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Heavy_CNN_MSWHIM_encoder.keras","BCR:scRNA-seq:Encoder:Model" 6 | "Human_Heavy_CNN_OHE_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: CNN, Encoding Method: OHE","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Heavy_CNN_OHE_encoder.keras","BCR:scRNA-seq:Encoder:Model" 7 | "Human_Heavy_CNN_tScales_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: CNN, Encoding Method: tScales","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Heavy_CNN_tScales_encoder.keras","BCR:scRNA-seq:Encoder:Model" 8 | "Human_Heavy_CNN.EXP_atchleyFactors_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: CNN.EXP, Encoding Method: atchleyFactors","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Heavy_CNN.EXP_atchleyFactors_encoder.keras","BCR:scRNA-seq:Encoder:Model" 9 | "Human_Heavy_CNN.EXP_crucianiProperties_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: CNN.EXP, Encoding Method: crucianiProperties","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Heavy_CNN.EXP_crucianiProperties_encoder.keras","BCR:scRNA-seq:Encoder:Model" 10 | "Human_Heavy_CNN.EXP_kideraFactors_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: CNN.EXP, Encoding Method: kideraFactors","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Heavy_CNN.EXP_kideraFactors_encoder.keras","BCR:scRNA-seq:Encoder:Model" 11 | "Human_Heavy_CNN.EXP_MSWHIM_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: CNN.EXP, Encoding Method: MSWHIM","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Heavy_CNN.EXP_MSWHIM_encoder.keras","BCR:scRNA-seq:Encoder:Model" 12 | "Human_Heavy_CNN.EXP_OHE_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: CNN.EXP, Encoding Method: OHE","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Heavy_CNN.EXP_OHE_encoder.keras","BCR:scRNA-seq:Encoder:Model" 13 | "Human_Heavy_CNN.EXP_tScales_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: CNN.EXP, Encoding Method: tScales","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Heavy_CNN.EXP_tScales_encoder.keras","BCR:scRNA-seq:Encoder:Model" 14 | "Human_Heavy_VAE_atchleyFactors_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: VAE, Encoding Method: atchleyFactors","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Heavy_VAE_atchleyFactors_encoder.keras","BCR:scRNA-seq:Encoder:Model" 15 | "Human_Heavy_VAE_crucianiProperties_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: VAE, Encoding Method: crucianiProperties","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Heavy_VAE_crucianiProperties_encoder.keras","BCR:scRNA-seq:Encoder:Model" 16 | "Human_Heavy_VAE_kideraFactors_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: VAE, Encoding Method: kideraFactors","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Heavy_VAE_kideraFactors_encoder.keras","BCR:scRNA-seq:Encoder:Model" 17 | "Human_Heavy_VAE_MSWHIM_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: VAE, Encoding Method: MSWHIM","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Heavy_VAE_MSWHIM_encoder.keras","BCR:scRNA-seq:Encoder:Model" 18 | "Human_Heavy_VAE_OHE_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: VAE, Encoding Method: OHE","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Heavy_VAE_OHE_encoder.keras","BCR:scRNA-seq:Encoder:Model" 19 | "Human_Heavy_VAE_tScales_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: VAE, Encoding Method: tScales","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Heavy_VAE_tScales_encoder.keras","BCR:scRNA-seq:Encoder:Model" 20 | "Human_Heavy_VAE.EXP_atchleyFactors_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: VAE.EXP, Encoding Method: atchleyFactors","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Heavy_VAE.EXP_atchleyFactors_encoder.keras","BCR:scRNA-seq:Encoder:Model" 21 | "Human_Heavy_VAE.EXP_crucianiProperties_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: VAE.EXP, Encoding Method: crucianiProperties","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Heavy_VAE.EXP_crucianiProperties_encoder.keras","BCR:scRNA-seq:Encoder:Model" 22 | "Human_Heavy_VAE.EXP_kideraFactors_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: VAE.EXP, Encoding Method: kideraFactors","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Heavy_VAE.EXP_kideraFactors_encoder.keras","BCR:scRNA-seq:Encoder:Model" 23 | "Human_Heavy_VAE.EXP_MSWHIM_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: VAE.EXP, Encoding Method: MSWHIM","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Heavy_VAE.EXP_MSWHIM_encoder.keras","BCR:scRNA-seq:Encoder:Model" 24 | "Human_Heavy_VAE.EXP_OHE_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: VAE.EXP, Encoding Method: OHE","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Heavy_VAE.EXP_OHE_encoder.keras","BCR:scRNA-seq:Encoder:Model" 25 | "Human_Heavy_VAE.EXP_tScales_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: VAE.EXP, Encoding Method: tScales","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Heavy_VAE.EXP_tScales_encoder.keras","BCR:scRNA-seq:Encoder:Model" 26 | "Human_Light_CNN_atchleyFactors_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Light, Architecture: CNN, Encoding Method: atchleyFactors","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Light_CNN_atchleyFactors_encoder.keras","BCR:scRNA-seq:Encoder:Model" 27 | "Human_Light_CNN_crucianiProperties_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Light, Architecture: CNN, Encoding Method: crucianiProperties","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Light_CNN_crucianiProperties_encoder.keras","BCR:scRNA-seq:Encoder:Model" 28 | "Human_Light_CNN_kideraFactors_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Light, Architecture: CNN, Encoding Method: kideraFactors","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Light_CNN_kideraFactors_encoder.keras","BCR:scRNA-seq:Encoder:Model" 29 | "Human_Light_CNN_MSWHIM_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Light, Architecture: CNN, Encoding Method: MSWHIM","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Light_CNN_MSWHIM_encoder.keras","BCR:scRNA-seq:Encoder:Model" 30 | "Human_Light_CNN_OHE_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Light, Architecture: CNN, Encoding Method: OHE","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Light_CNN_OHE_encoder.keras","BCR:scRNA-seq:Encoder:Model" 31 | "Human_Light_CNN_tScales_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Light, Architecture: CNN, Encoding Method: tScales","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Light_CNN_tScales_encoder.keras","BCR:scRNA-seq:Encoder:Model" 32 | "Human_Light_CNN.EXP_atchleyFactors_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Light, Architecture: CNN.EXP, Encoding Method: atchleyFactors","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Light_CNN.EXP_atchleyFactors_encoder.keras","BCR:scRNA-seq:Encoder:Model" 33 | "Human_Light_CNN.EXP_crucianiProperties_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Light, Architecture: CNN.EXP, Encoding Method: crucianiProperties","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Light_CNN.EXP_crucianiProperties_encoder.keras","BCR:scRNA-seq:Encoder:Model" 34 | "Human_Light_CNN.EXP_kideraFactors_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Light, Architecture: CNN.EXP, Encoding Method: kideraFactors","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Light_CNN.EXP_kideraFactors_encoder.keras","BCR:scRNA-seq:Encoder:Model" 35 | "Human_Light_CNN.EXP_MSWHIM_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Light, Architecture: CNN.EXP, Encoding Method: MSWHIM","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Light_CNN.EXP_MSWHIM_encoder.keras","BCR:scRNA-seq:Encoder:Model" 36 | "Human_Light_CNN.EXP_OHE_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Light, Architecture: CNN.EXP, Encoding Method: OHE","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Light_CNN.EXP_OHE_encoder.keras","BCR:scRNA-seq:Encoder:Model" 37 | "Human_Light_CNN.EXP_tScales_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Light, Architecture: CNN.EXP, Encoding Method: tScales","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Light_CNN.EXP_tScales_encoder.keras","BCR:scRNA-seq:Encoder:Model" 38 | "Human_Light_VAE_atchleyFactors_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Light, Architecture: VAE, Encoding Method: atchleyFactors","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Light_VAE_atchleyFactors_encoder.keras","BCR:scRNA-seq:Encoder:Model" 39 | "Human_Light_VAE_crucianiProperties_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Light, Architecture: VAE, Encoding Method: crucianiProperties","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Light_VAE_crucianiProperties_encoder.keras","BCR:scRNA-seq:Encoder:Model" 40 | "Human_Light_VAE_kideraFactors_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Light, Architecture: VAE, Encoding Method: kideraFactors","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Light_VAE_kideraFactors_encoder.keras","BCR:scRNA-seq:Encoder:Model" 41 | "Human_Light_VAE_MSWHIM_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Light, Architecture: VAE, Encoding Method: MSWHIM","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Light_VAE_MSWHIM_encoder.keras","BCR:scRNA-seq:Encoder:Model" 42 | "Human_Light_VAE_OHE_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Light, Architecture: VAE, Encoding Method: OHE","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Light_VAE_OHE_encoder.keras","BCR:scRNA-seq:Encoder:Model" 43 | "Human_Light_VAE_tScales_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Light, Architecture: VAE, Encoding Method: tScales","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Light_VAE_tScales_encoder.keras","BCR:scRNA-seq:Encoder:Model" 44 | "Human_Light_VAE.EXP_atchleyFactors_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Light, Architecture: VAE.EXP, Encoding Method: atchleyFactors","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Light_VAE.EXP_atchleyFactors_encoder.keras","BCR:scRNA-seq:Encoder:Model" 45 | "Human_Light_VAE.EXP_crucianiProperties_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Light, Architecture: VAE.EXP, Encoding Method: crucianiProperties","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Light_VAE.EXP_crucianiProperties_encoder.keras","BCR:scRNA-seq:Encoder:Model" 46 | "Human_Light_VAE.EXP_kideraFactors_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Light, Architecture: VAE.EXP, Encoding Method: kideraFactors","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Light_VAE.EXP_kideraFactors_encoder.keras","BCR:scRNA-seq:Encoder:Model" 47 | "Human_Light_VAE.EXP_MSWHIM_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Light, Architecture: VAE.EXP, Encoding Method: MSWHIM","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Light_VAE.EXP_MSWHIM_encoder.keras","BCR:scRNA-seq:Encoder:Model" 48 | "Human_Light_VAE.EXP_OHE_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Light, Architecture: VAE.EXP, Encoding Method: OHE","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Light_VAE.EXP_OHE_encoder.keras","BCR:scRNA-seq:Encoder:Model" 49 | "Human_Light_VAE.EXP_tScales_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Light, Architecture: VAE.EXP, Encoding Method: tScales","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Light_VAE.EXP_tScales_encoder.keras","BCR:scRNA-seq:Encoder:Model" 50 | "Mouse_Heavy_CNN_atchleyFactors_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: CNN, Encoding Method: atchleyFactors","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Mus musculus","10090",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Mouse_Heavy_CNN_atchleyFactors_encoder.keras","BCR:scRNA-seq:Encoder:Model" 51 | "Mouse_Heavy_CNN_crucianiProperties_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: CNN, Encoding Method: crucianiProperties","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Mus musculus","10090",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Mouse_Heavy_CNN_crucianiProperties_encoder.keras","BCR:scRNA-seq:Encoder:Model" 52 | "Mouse_Heavy_CNN_kideraFactors_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: CNN, Encoding Method: kideraFactors","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Mus musculus","10090",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Mouse_Heavy_CNN_kideraFactors_encoder.keras","BCR:scRNA-seq:Encoder:Model" 53 | "Mouse_Heavy_CNN_MSWHIM_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: CNN, Encoding Method: MSWHIM","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Mus musculus","10090",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Mouse_Heavy_CNN_MSWHIM_encoder.keras","BCR:scRNA-seq:Encoder:Model" 54 | "Mouse_Heavy_CNN_OHE_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: CNN, Encoding Method: OHE","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Mus musculus","10090",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Mouse_Heavy_CNN_OHE_encoder.keras","BCR:scRNA-seq:Encoder:Model" 55 | "Mouse_Heavy_CNN_tScales_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: CNN, Encoding Method: tScales","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Mus musculus","10090",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Mouse_Heavy_CNN_tScales_encoder.keras","BCR:scRNA-seq:Encoder:Model" 56 | "Mouse_Heavy_CNN.EXP_atchleyFactors_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: CNN.EXP, Encoding Method: atchleyFactors","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Mus musculus","10090",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Mouse_Heavy_CNN.EXP_atchleyFactors_encoder.keras","BCR:scRNA-seq:Encoder:Model" 57 | "Mouse_Heavy_CNN.EXP_crucianiProperties_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: CNN.EXP, Encoding Method: crucianiProperties","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Mus musculus","10090",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Mouse_Heavy_CNN.EXP_crucianiProperties_encoder.keras","BCR:scRNA-seq:Encoder:Model" 58 | "Mouse_Heavy_CNN.EXP_kideraFactors_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: CNN.EXP, Encoding Method: kideraFactors","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Mus musculus","10090",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Mouse_Heavy_CNN.EXP_kideraFactors_encoder.keras","BCR:scRNA-seq:Encoder:Model" 59 | "Mouse_Heavy_CNN.EXP_MSWHIM_autoencoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: CNN.EXP, Encoding Method: MSWHIM","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Mus musculus","10090",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Mouse_Heavy_CNN.EXP_MSWHIM_autoencoder.keras","BCR:scRNA-seq:Encoder:Model" 60 | "Mouse_Heavy_CNN.EXP_OHE_autoencoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: CNN.EXP, Encoding Method: OHE","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Mus musculus","10090",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Mouse_Heavy_CNN.EXP_OHE_autoencoder.keras","BCR:scRNA-seq:Encoder:Model" 61 | "Mouse_Heavy_CNN.EXP_tScales_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: CNN.EXP, Encoding Method: tScales","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Mus musculus","10090",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Mouse_Heavy_CNN.EXP_tScales_encoder.keras","BCR:scRNA-seq:Encoder:Model" 62 | "Mouse_Heavy_VAE_atchleyFactors_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: VAE, Encoding Method: atchleyFactors","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Mus musculus","10090",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Mouse_Heavy_VAE_atchleyFactors_encoder.keras","BCR:scRNA-seq:Encoder:Model" 63 | "Mouse_Heavy_VAE_crucianiProperties_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: VAE, Encoding Method: crucianiProperties","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Mus musculus","10090",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Mouse_Heavy_VAE_crucianiProperties_encoder.keras","BCR:scRNA-seq:Encoder:Model" 64 | "Mouse_Heavy_VAE_kideraFactors_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: VAE, Encoding Method: kideraFactors","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Mus musculus","10090",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Mouse_Heavy_VAE_kideraFactors_encoder.keras","BCR:scRNA-seq:Encoder:Model" 65 | "Mouse_Heavy_VAE_MSWHIM_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: VAE, Encoding Method: MSWHIM","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Mus musculus","10090",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Mouse_Heavy_VAE_MSWHIM_encoder.keras","BCR:scRNA-seq:Encoder:Model" 66 | "Mouse_Heavy_VAE_OHE_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: VAE, Encoding Method: OHE","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Mus musculus","10090",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Mouse_Heavy_VAE_OHE_encoder.keras","BCR:scRNA-seq:Encoder:Model" 67 | "Mouse_Heavy_VAE_tScales_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: VAE, Encoding Method: tScales","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Mus musculus","10090",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Mouse_Heavy_VAE_tScales_encoder.keras","BCR:scRNA-seq:Encoder:Model" 68 | "Mouse_Heavy_VAE.EXP_atchleyFactors_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: VAE.EXP, Encoding Method: atchleyFactors","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Mus musculus","10090",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Mouse_Heavy_VAE.EXP_atchleyFactors_encoder.keras","BCR:scRNA-seq:Encoder:Model" 69 | "Mouse_Heavy_VAE.EXP_crucianiProperties_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: VAE.EXP, Encoding Method: crucianiProperties","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Mus musculus","10090",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Mouse_Heavy_VAE.EXP_crucianiProperties_encoder.keras","BCR:scRNA-seq:Encoder:Model" 70 | "Mouse_Heavy_VAE.EXP_kideraFactors_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: VAE.EXP, Encoding Method: kideraFactors","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Mus musculus","10090",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Mouse_Heavy_VAE.EXP_kideraFactors_encoder.keras","BCR:scRNA-seq:Encoder:Model" 71 | "Mouse_Heavy_VAE.EXP_MSWHIM_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: VAE.EXP, Encoding Method: MSWHIM","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Mus musculus","10090",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Mouse_Heavy_VAE.EXP_MSWHIM_encoder.keras","BCR:scRNA-seq:Encoder:Model" 72 | "Mouse_Heavy_VAE.EXP_OHE_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: VAE.EXP, Encoding Method: OHE","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Mus musculus","10090",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Mouse_Heavy_VAE.EXP_OHE_encoder.keras","BCR:scRNA-seq:Encoder:Model" 73 | "Mouse_Heavy_VAE.EXP_tScales_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: VAE.EXP, Encoding Method: tScales","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Mus musculus","10090",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Mouse_Heavy_VAE.EXP_tScales_encoder.keras","BCR:scRNA-seq:Encoder:Model" 74 | -------------------------------------------------------------------------------- /inst/scripts/make-data.R: -------------------------------------------------------------------------------- 1 | ######################### 2 | #Defining Hyperparameters 3 | ######################### 4 | 5 | factors <- c("OHE", "atchleyFactors", "crucianiProperties", "kideraFactors", "MSWHIM", "tScales") 6 | hidden_dim1 <- 512 7 | hidden_dim2 <- 256 8 | latent_dim <- 128 9 | batch_size <- 128 10 | learning_rate <- 1e-6 11 | epochs <- 128 12 | optimizer <- "adam" 13 | layer_act <- "relu" 14 | epsilon.std <- 1 15 | amino.acids <- c("A", "R", "N", "D", "C", "Q", "E", "G", "H", "I", "L", "K", "M", "F", "P", "S", "T", "W", "Y", "V") 16 | 17 | #################### 18 | #Training CNN Models 19 | #################### 20 | 21 | set.seed(42) 22 | 23 | for(i in seq_along(factors)) { 24 | 25 | es = callback_early_stopping( 26 | monitor = "val_loss", 27 | min_delta = 0, 28 | patience = 8, 29 | verbose = 1, 30 | mode = "min") 31 | 32 | sequence.matrix <- readRDS(paste0(data.path, factors[i], "_Heavy_CDR3.rds")) 33 | 34 | stratified.sequences <- prepare_data(sequence.matrix, 35 | train_split = 0.75, 36 | val_split = 0.2) 37 | 38 | # Create the training, validation, and test sets 39 | x_train <- stratified.sequences[[1]] 40 | x_val <- stratified.sequences[[2]] 41 | x_test <- stratified.sequences[[3]] 42 | rm(stratified.sequences) 43 | rm(sequence.matrix) 44 | gc() 45 | 46 | d.1 <- dim(x_train)[2] 47 | input_layer <- layer_input(shape = c(d.1)) 48 | # Encoder part 49 | encoder <- input_layer %>% 50 | layer_dense(units = hidden_dim1, name = "e.1") %>% 51 | layer_batch_normalization(name = "bn.1") %>% 52 | layer_activation(activation = layer_act, name = "act.1") %>% 53 | layer_dense(units = hidden_dim2, name = "e.2") %>% 54 | layer_batch_normalization(name = "bn.2") %>% 55 | layer_activation(activation = layer_act, name = "act.2") %>% 56 | layer_dense(units = latent_dim, activation = layer_act, name = "latent_space") 57 | 58 | # Decoder part 59 | decoder <- encoder %>% 60 | layer_dense(units = hidden_dim2, name = "d.1") %>% 61 | layer_batch_normalization(name = "bn.3") %>% 62 | layer_activation(activation = layer_act, name = "act.3") %>% 63 | layer_dense(units = hidden_dim1, name = "d.2") %>% 64 | layer_batch_normalization(name = "bn.4") %>% 65 | layer_activation(activation = layer_act, name = "act.4") %>% 66 | layer_dense(units = d.1, activation = 'sigmoid', name = "output") 67 | 68 | # Complete autoencoder model 69 | autoencoder <- keras_model(input_layer, decoder) 70 | 71 | # Extract the latent space output 72 | encoder_model <- keras_model(inputs = autoencoder$input, outputs = get_layer(autoencoder, "latent_space")$output) 73 | 74 | # Create the decoder model 75 | latent_input <- layer_input(shape = latent_dim, name = "latent_input") 76 | decoder_output <- latent_input %>% 77 | get_layer(autoencoder, "d.1")(.) %>% 78 | get_layer(autoencoder, "bn.3")(.) %>% 79 | get_layer(autoencoder, "act.3")(.) %>% 80 | get_layer(autoencoder, "d.2")(.) %>% 81 | get_layer(autoencoder, "bn.4")(.) %>% 82 | get_layer(autoencoder, "act.4")(.) %>% 83 | get_layer(autoencoder, "output")(.) 84 | 85 | decoder_model <- keras_model(latent_input, decoder_output) 86 | 87 | autoencoder %>% compile( 88 | optimizer = optimizer_adam(learning_rate = learning_rate), 89 | loss = "mean_squared_error", 90 | metrics = 'mean_absolute_error') 91 | 92 | # Train the model 93 | history <- autoencoder %>% fit( 94 | x = x_train, 95 | y = x_train, 96 | validation_data = list(x_val, x_val), 97 | epochs = epochs, 98 | batch_size = batch_size, 99 | shuffle = TRUE, 100 | callbacks = es) 101 | 102 | save_model(encoder_model, paste0(data.path, "/models/Human_Heavy_CNN_", factors[i], "_encoder.keras"), overwrite = TRUE) 103 | save_model(decoder_model, paste0(data.path, "/models/Human_Heavy_CNN_", factors[i], "_decoder.keras"), overwrite = TRUE) 104 | save_model(autoencoder, paste0(data.path, "/models/Human_Heavy_CNN_", factors[i], "_autoencoder.keras"), overwrite = TRUE) 105 | } 106 | 107 | #################### 108 | #Training VAE Models 109 | #################### 110 | 111 | for(i in seq_along(factors)) { 112 | 113 | es = callback_early_stopping( 114 | monitor = "val_loss", 115 | min_delta = 0, 116 | patience = 8, 117 | verbose = 1, 118 | mode = "min") 119 | 120 | sequence.matrix <- readRDS(paste0(data.path, factors[i], "_Heavy_CDR3.rds")) 121 | 122 | stratified.sequences <- prepare_data(sequence.matrix, 123 | train_split = 0.75, 124 | val_split = 0.2) 125 | 126 | # Create the training, validation, and test sets 127 | x_train <- stratified.sequences[[1]] 128 | x_val <- stratified.sequences[[2]] 129 | x_test <- stratified.sequences[[3]] 130 | rm(stratified.sequences) 131 | rm(sequence.matrix) 132 | gc() 133 | 134 | vae_loss_layer <- function(original_dim) { 135 | layer_lambda( 136 | f = function(x) { 137 | x_decoded_mean <- x[[1]] 138 | x_input <- x[[2]] 139 | z_mean <- x[[3]] 140 | z_log_var <- x[[4]] 141 | 142 | # Reconstruction loss 143 | xent_loss <- loss_mean_squared_error(x_input, x_decoded_mean) * original_dim 144 | 145 | # KL Divergence loss 146 | kl_loss <- -0.5 * tf$reduce_mean(1 + z_log_var - tf$square(z_mean) - tf$exp(z_log_var), axis = -1L) 147 | 148 | # Total loss 149 | tf$reduce_mean(xent_loss + kl_loss) 150 | }, 151 | output_shape = list(NULL, 1) # Explicit output shape 152 | ) 153 | } 154 | original_dim <- ncol(x_test) 155 | 156 | 157 | # Encoder 158 | encoder_input <- layer_input(shape = original_dim) 159 | h <- encoder_input 160 | h <- layer_dense(h, 161 | units = hidden_dim1, 162 | activation = layer_act, 163 | name = "e.1") 164 | h <- layer_dense(h, 165 | units = hidden_dim2, 166 | activation = layer_act, 167 | name = "e.2") 168 | z_mean <- layer_dense(h, units = latent_dim, name = "z_mean") 169 | z_log_var <- layer_dense(h, units = latent_dim, name = "z_log_var") 170 | 171 | # Sampling Layer 172 | z <- layer_lambda(f = function(args) { 173 | z_mean <- args[[1]] 174 | z_log_var <- args[[2]] 175 | batch <- tf$shape(z_mean)[1] 176 | dim <- tf$shape(z_mean)[2] 177 | epsilon <- tf$random$normal(shape = c(batch, dim), mean = 0., stddev = epsilon.std) 178 | z_mean + tf$exp(z_log_var / 2) * epsilon 179 | }, output_shape = c(latent_dim))(list(z_mean, z_log_var)) 180 | 181 | # Decoder 182 | decoder_input <- layer_input(shape = latent_dim) 183 | d <- decoder_input 184 | d <- layer_dense(d, 185 | units = hidden_dim2, 186 | activation = layer_act, 187 | name = "d.1") 188 | d <- layer_dense(d, 189 | units = hidden_dim1, 190 | activation = layer_act, 191 | name = "d.2") 192 | decoder_output <- layer_dense(d, units = original_dim, activation = "sigmoid") 193 | 194 | # Encoder and Decoder Models 195 | encoder <- keras_model(encoder_input, z_mean) 196 | decoder <- keras_model(decoder_input, decoder_output) 197 | 198 | # VAE Model 199 | decoder_output <- decoder(z) 200 | vae <- keras_model(encoder_input, decoder_output) 201 | 202 | # Add custom loss layer 203 | loss_layer <- vae_loss_layer(original_dim)(list(decoder_output, encoder_input, z_mean, z_log_var)) 204 | vae_with_loss <- keras_model(encoder_input, loss_layer) 205 | 206 | # Dummy loss function 207 | dummy_loss <- function(y_true, y_pred) { 208 | tf$reduce_mean(y_pred) 209 | } 210 | 211 | # Compile the model 212 | vae_with_loss %>% compile(optimizer = optimizer_adam(learning_rate = learning_rate), 213 | loss = dummy_loss, 214 | metrics = c("mean_squared_error", "mean_absolute_error")) 215 | 216 | history <- vae_with_loss %>% fit( 217 | x_train, x_train, 218 | shuffle = TRUE, 219 | epochs = epochs, 220 | batch_size = batch_size, 221 | validation_data = list(x_test, x_test), 222 | verbose = 0, 223 | callbacks = es 224 | ) 225 | 226 | save_model(encoder, paste0(data.path, "/models/Human_Heavy_VAE_", factors[i], "_encoder.keras"), overwrite = TRUE) 227 | save_model(decoder, paste0(data.path, "models/Human_Heavy_VAE_", factors[i], "_decoder.keras"), overwrite = TRUE) 228 | save_model(vae, paste0(data.path, "models/Human_Heavy_VAE_", factors[i], "_autoencoder.keras"), overwrite = TRUE) 229 | } 230 | -------------------------------------------------------------------------------- /inst/scripts/make-metadata.R: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env Rscript 2 | ## make-metadata.R 3 | ## This script scans inst/extdata, builds a metadata.csv file 4 | ## for ExperimentHub or AnnotationHub submission. 5 | 6 | 7 | PKG_NAME <- "Ibex" 8 | BIOC_VERSION <- "3.21" 9 | MAINTAINER <- "Nick Borcherding " 10 | DATA_PROVIDER <- "Consolidated Sources: IReceptor, OAS, and GEO" 11 | SOURCE_URL <- "https://github.com/BorchLab" 12 | SOURCE_VERSION <- NA 13 | SOURCE_TYPE <- "CSV" 14 | GENOME <- NA 15 | COORDINATE_1_BASED <- NA 16 | DESCRIPTION <- "Keras-based deep learning encoder for BCR sequences." 17 | 18 | 19 | # 2) Locate the data files in inst/extdata 20 | path_to_extdata <- file.path("inst", "extdata") 21 | files <- list.files(path_to_extdata, full.names = TRUE, pattern = ".keras") 22 | 23 | # 3) Helper function: guess DispatchClass and RDataClass from file extension 24 | inferDispatchClass <- function(file_ext) { 25 | switch( 26 | tolower(file_ext), 27 | "rds" = "Rds", 28 | "rda" = "Rda", 29 | "csv" = "FilePath", 30 | "tsv" = "FilePath", 31 | "txt" = "FilePath", 32 | "FilePath" 33 | ) 34 | } 35 | 36 | inferRDataClass <- function(dispatchClass) { 37 | # Adjust to reflect how your data is actually loaded in R. 38 | if (dispatchClass %in% c("Rds", "Rda")) { 39 | return("SummarizedExperiment") # or whatever class your objects are 40 | } else { 41 | return("character") # or NA, if you just return a path 42 | } 43 | } 44 | 45 | # 4) Build metadata data.frame row-by-row 46 | metadata_list <- lapply(files, function(f) { 47 | # Example: f == "inst/extdata/somefile.rds" 48 | file_name <- basename(f) 49 | file_ext <- tolower(tools::file_ext(f)) # "rds", "rda", "csv", etc. 50 | 51 | dispatchClass <- inferDispatchClass(file_ext) 52 | rDataClass <- inferRDataClass(dispatchClass) 53 | 54 | # The Title could simply be the file name or something more descriptive 55 | title <- file_name 56 | components <- stringr::str_split(title, "_")[[1]] 57 | 58 | # Adaptive Variables 59 | description <- paste0(DESCRIPTION, 60 | " Chain: ", components[2], 61 | ", Architecture: ", components[3], 62 | ", Encoding Method: ", components[4]) 63 | SPECIES <- ifelse(grepl("Human", title), "Homo sapiens", "Mus musculus") 64 | TAXONOMY_ID <- ifelse(grepl("Human", title), "9606", "10090") 65 | rDataPath <- paste0("records/14919286/files/", file_name) 66 | # We assemble a named vector or list for each file: 67 | c( 68 | Title = title, 69 | Description = description, 70 | BiocVersion = BIOC_VERSION, 71 | Genome = as.character(GENOME), 72 | SourceType = SOURCE_TYPE, 73 | SourceUrl = SOURCE_URL, 74 | SourceVersion = SOURCE_VERSION, 75 | Species = SPECIES, 76 | TaxonomyId = TAXONOMY_ID, 77 | Coordinate_1_based = ifelse(is.na(COORDINATE_1_BASED), NA, 78 | as.character(COORDINATE_1_BASED)), 79 | DataProvider = DATA_PROVIDER, 80 | Maintainer = MAINTAINER, 81 | RDataClass = rDataClass, 82 | DispatchClass = dispatchClass, 83 | Location_Prefix = "https://zenodo.org/", 84 | RDataPath = rDataPath, 85 | Tags = paste("BCR", "scRNA-seq", "Encoder", "Model", sep = ":") 86 | ) 87 | }) 88 | 89 | # 5) Convert this list of named vectors to a data.frame 90 | metadata_df <- do.call(rbind, lapply(metadata_list, as.data.frame.list)) 91 | 92 | # 6) Write out the metadata.csv to inst/extdata 93 | output_csv <- file.path("inst", "extdata", "metadata.csv") 94 | write.csv(metadata_df, file = output_csv, row.names = FALSE, quote = TRUE) 95 | 96 | -------------------------------------------------------------------------------- /man/CoNGAfy.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/CoNGAfy.R 3 | \name{CoNGAfy} 4 | \alias{CoNGAfy} 5 | \title{Reduce a Single-Cell Object to Representative Cells} 6 | \usage{ 7 | CoNGAfy( 8 | input.data, 9 | method = "dist", 10 | features = NULL, 11 | assay = "RNA", 12 | meta.carry = c("CTaa", "CTgene") 13 | ) 14 | } 15 | \arguments{ 16 | \item{input.data}{A single-cell dataset in Seurat or SingleCellExperiment format.} 17 | 18 | \item{method}{Character. Specifies the method to reduce the dataset: 19 | \itemize{ 20 | \item "mean" - Computes the mean expression of selected features across cells in each clonotype. 21 | \item "dist" - Uses PCA reduction to identify the cell with the minimal Euclidean distance within each clonotype group. 22 | }} 23 | 24 | \item{features}{Character vector. Selected genes for the reduction. If \code{NULL} (default), all genes are used.} 25 | 26 | \item{assay}{Character. The name of the assay or assays to include in the output. Defaults to the active assay.} 27 | 28 | \item{meta.carry}{Character vector. Metadata variables to carry over from the input single-cell object to the output.} 29 | } 30 | \value{ 31 | A reduced single-cell object where each clonotype is represented by a single cell. 32 | } 33 | \description{ 34 | This function generates a single-cell object with a reduced representation 35 | of RNA expression by clone. The approach is inspired by the method introduced 36 | in \href{https://pubmed.ncbi.nlm.nih.gov/34426704/}{CoNGA}. Users can 37 | generate either a mean representation of features by clone or identify a 38 | representative cell using count-based minimal Euclidean distance. 39 | Please read and cite the original work by the authors of CoNGA. 40 | } 41 | \examples{ 42 | ibex.clones <- CoNGAfy(ibex_example, 43 | method = "dist") 44 | 45 | ibex.clones <- CoNGAfy(ibex_example, 46 | method = "mean") 47 | 48 | } 49 | -------------------------------------------------------------------------------- /man/Ibex.matrix.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Ibex.matrix.R 3 | \name{Ibex.matrix} 4 | \alias{Ibex.matrix} 5 | \title{Ibex Matrix Interface} 6 | \usage{ 7 | Ibex.matrix( 8 | input.data, 9 | chain = c("Heavy", "Light"), 10 | method = c("encoder", "geometric"), 11 | encoder.model = c("CNN", "VAE", "CNN.EXP", "VAE.EXP"), 12 | encoder.input = c("atchleyFactors", "crucianiProperties", "kideraFactors", "MSWHIM", 13 | "tScales", "OHE"), 14 | geometric.theta = pi/3, 15 | species = "Human", 16 | verbose = TRUE 17 | ) 18 | } 19 | \arguments{ 20 | \item{input.data}{Input data, which can be: 21 | \itemize{ 22 | \item A Single Cell Object in Seurat or SingleCellExperiment format 23 | \item The output of \code{combineBCR()} from the \code{scRepertoire} package 24 | }} 25 | 26 | \item{chain}{Character. Specifies which chain to analyze: 27 | \itemize{ 28 | \item "Heavy" for the heavy chain 29 | \item "Light" for the light chain 30 | }} 31 | 32 | \item{method}{Character. The algorithm to use for generating latent vectors: 33 | \itemize{ 34 | \item "encoder" - Uses deep learning autoencoders 35 | \item "geometric" - Uses geometric transformations based on the 36 | BLOSUM62 matrix 37 | }} 38 | 39 | \item{encoder.model}{Character. The type of autoencoder model to use: 40 | \itemize{ 41 | \item "CNN" - CDR3 Convolutional Neural Network-based autoencoder 42 | \item "VAE" - CDR3 Variational Autoencoder 43 | \item "CNN.EXP" - CDR1/2/3 CNN 44 | \item "VAE.EXP" - CDR1/2/3 VAE 45 | }} 46 | 47 | \item{encoder.input}{Character. Specifies the input features for the 48 | encoder model. Options include: 49 | \itemize{ 50 | \item Amino Acid Properties: "atchleyFactors", "crucianiProperties", 51 | "kideraFactors", "MSWHIM","tScales", "zScales" 52 | \item "OHE" for One Hot Encoding 53 | }} 54 | 55 | \item{geometric.theta}{Numeric. Angle (in radians) for the geometric 56 | transformation. Only used when \code{method = "geometric"}.} 57 | 58 | \item{species}{Character. Default is "Human" or "Mouse".} 59 | 60 | \item{verbose}{Logical. Whether to print progress messages. Default is TRUE.} 61 | } 62 | \value{ 63 | A matrix of latent vectors generated by the specified method. 64 | } 65 | \description{ 66 | This function runs the Ibex algorithm to generate latent vectors from 67 | input data. The output can be returned as a matrix, with options to choose 68 | between deep learning autoencoders or geometric transformations based on 69 | the BLOSUM62 matrix. 70 | } 71 | \examples{ 72 | # Using the encoder method with a variational autoencoder 73 | ibex_values <- Ibex.matrix(ibex_example, 74 | chain = "Heavy", 75 | method = "encoder", 76 | encoder.model = "VAE", 77 | encoder.input = "atchleyFactors") 78 | 79 | # Using the geometric method with a specified angle 80 | ibex_values <- Ibex.matrix(ibex_example, 81 | chain = "Heavy", 82 | method = "geometric", 83 | geometric.theta = pi) 84 | 85 | } 86 | \seealso{ 87 | \code{\link[immApex]{propertyEncoder}}, 88 | \code{\link[immApex]{geometricEncoder}} 89 | } 90 | -------------------------------------------------------------------------------- /man/combineExpandedBCR.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/combineExpandedBCR.R 3 | \name{combineExpandedBCR} 4 | \alias{combineExpandedBCR} 5 | \title{combineBCR for CDR1/2/3 sequences} 6 | \usage{ 7 | combineExpandedBCR( 8 | input.data, 9 | samples = NULL, 10 | ID = NULL, 11 | call.related.clones = TRUE, 12 | threshold = 0.85, 13 | removeNA = FALSE, 14 | removeMulti = FALSE, 15 | filterMulti = TRUE, 16 | filterNonproductive = TRUE 17 | ) 18 | } 19 | \arguments{ 20 | \item{input.data}{List of filtered contig annotations.} 21 | 22 | \item{samples}{Character vector. Labels of samples (required).} 23 | 24 | \item{ID}{Character vector. Additional sample labeling (optional).} 25 | 26 | \item{call.related.clones}{Logical. Whether to call related clones based on 27 | nucleotide sequence and V gene. Default is `TRUE`.} 28 | 29 | \item{threshold}{Numeric. Normalized edit distance for clone clustering. 30 | Default is `0.85`.} 31 | 32 | \item{removeNA}{Logical. Whether to remove any chain without values. Default 33 | is `FALSE`.} 34 | 35 | \item{removeMulti}{Logical. Whether to remove barcodes with more than two 36 | chains. Default is `FALSE`.} 37 | 38 | \item{filterMulti}{Logical. Whether to select the highest-expressing light 39 | and heavy chains. Default is `TRUE`.} 40 | 41 | \item{filterNonproductive}{Logical. Whether to remove nonproductive chains. 42 | Default is `TRUE`.} 43 | } 44 | \value{ 45 | A list of consolidated BCR clones with expanded CDR sequences. 46 | } 47 | \description{ 48 | This function enhances BCR processing by incorporating additional 49 | sequence information from CDR1 and CDR2 regions before applying the BCR 50 | combination logic. The function depends on 51 | \code{\link[scRepertoire]{combineBCR}} from the scRepertoire package. 52 | } 53 | \examples{ 54 | combined.BCR <- combineExpandedBCR(list(ibex_vdj), 55 | samples = "Sample1", 56 | filterNonproductive = TRUE) 57 | 58 | } 59 | \seealso{ 60 | \code{\link[scRepertoire]{combineBCR}} 61 | } 62 | -------------------------------------------------------------------------------- /man/filter.cells.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/runIbex.R 3 | \name{filter.cells} 4 | \alias{filter.cells} 5 | \title{Filter Single-Cell Data Based on CDR3 Sequences} 6 | \usage{ 7 | filter.cells(sc.obj, chain) 8 | } 9 | \arguments{ 10 | \item{sc.obj}{A Seurat or SingleCellExperiment object.} 11 | 12 | \item{chain}{Character. Specifies the chain type ("Heavy" or "Light").} 13 | } 14 | \value{ 15 | A filtered Seurat or SingleCellExperiment object. 16 | } 17 | \description{ 18 | This function subsets a Seurat or SingleCellExperiment object, 19 | removing cells where the `CTaa` column is missing or contains unwanted patterns. 20 | } 21 | -------------------------------------------------------------------------------- /man/getHumanIgPseudoGenes.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/quietBCRgenes.R 3 | \name{getHumanIgPseudoGenes} 4 | \alias{getHumanIgPseudoGenes} 5 | \title{Get Human Immunoglobulin pseudogenes} 6 | \usage{ 7 | getHumanIgPseudoGenes() 8 | } 9 | \value{ 10 | Character vector of human immunoglobulin pseudogenes. 11 | } 12 | \description{ 13 | This function returns a character vector of human immunoglobulin 14 | pseudogenes. These are also the genes that are removed from the 15 | variable gene list in the \code{quietBCRgenes} function. 16 | } 17 | -------------------------------------------------------------------------------- /man/ibex_example.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ibex_example.R 3 | \docType{data} 4 | \name{ibex_example} 5 | \alias{ibex_example} 6 | \title{A SingleCellExperiment object with 200 randomly-sampled 7 | B cells with BCR sequences from the 10x Genomics 8 | 2k_BEAM-Ab_Mouse_HEL_5pv2 dataset.} 9 | \format{ 10 | A \code{SingleCellExperiment} object with 32,285 genes (rows) and 200 cells (columns). 11 | \describe{ 12 | \item{assays}{List of matrices containing expression values: \code{counts} (raw counts) and \code{logcounts} (log-transformed).} 13 | \item{rowData}{Empty in this example (no gene-level annotations).} 14 | \item{colData}{A \code{DataFrame} with 14 columns of cell metadata, including:} 15 | \itemize{ 16 | \item orig.identOriginal sample identity. 17 | \item nCount_RNA Total number of counts per cell. 18 | \item nFeature_RNA Number of detected genes per cell. 19 | \item cloneSize Size of each clone. 20 | \item ident Cluster assignment. 21 | } 22 | \item{reducedDims}{Contains dimensionality reductions: \code{PCA}, \code{pca}, and \code{apca}.} 23 | \item{altExp}{One alternative experiment named \code{BEAM} containing additional expression data.} 24 | } 25 | } 26 | \description{ 27 | This object includes normalized gene expression values, metadata annotations, 28 | and B cell clonotype information derived from 10x V(D)J sequencing. It is intended 29 | as a small example dataset for testing and demonstration purposes. 30 | } 31 | -------------------------------------------------------------------------------- /man/ibex_vdj.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ibex_vdj.R 3 | \docType{data} 4 | \name{ibex_vdj} 5 | \alias{ibex_vdj} 6 | \title{Full filtered_annotated_contig.csv from the 10x 7 | 2k_BEAM-Ab_Mouse_HEL_5pv2} 8 | \format{ 9 | A data frame with 6 rows and 35 columns: 10 | \describe{ 11 | \item{barcode}{Character. Unique cell barcode.} 12 | \item{is_cell}{Logical. Whether the barcode is identified as a cell.} 13 | \item{contig_id}{Character. Unique identifier for each contig.} 14 | \item{high_confidence}{Logical. Whether the contig is high confidence.} 15 | \item{length}{Integer. Length of the contig.} 16 | \item{chain}{Character. Chain type (e.g., IGH, IGK).} 17 | \item{v_gene}{Character. V gene annotation.} 18 | \item{d_gene}{Character. D gene annotation.} 19 | \item{j_gene}{Character. J gene annotation.} 20 | \item{c_gene}{Character. C gene annotation.} 21 | \item{full_length}{Logical. Whether the contig is full-length.} 22 | \item{productive}{Logical. Whether the contig is productive.} 23 | \item{fwr1}{Character. Amino acid sequence for Framework Region 1.} 24 | \item{fwr1_nt}{Character. Nucleotide sequence for FWR1.} 25 | \item{cdr1}{Character. Amino acid sequence for CDR1.} 26 | \item{cdr1_nt}{Character. Nucleotide sequence for CDR1.} 27 | \item{fwr2}{Character. Amino acid sequence for FWR2.} 28 | \item{fwr2_nt}{Character. Nucleotide sequence for FWR2.} 29 | \item{cdr2}{Character. Amino acid sequence for CDR2.} 30 | \item{cdr2_nt}{Character. Nucleotide sequence for CDR2.} 31 | \item{fwr3}{Character. Amino acid sequence for FWR3.} 32 | \item{fwr3_nt}{Character. Nucleotide sequence for FWR3.} 33 | \item{cdr3}{Character. Amino acid sequence for CDR3.} 34 | \item{cdr3_nt}{Character. Nucleotide sequence for CDR3.} 35 | \item{fwr4}{Character. Amino acid sequence for FWR4.} 36 | \item{fwr4_nt}{Character. Nucleotide sequence for FWR4.} 37 | \item{reads}{Integer. Number of reads supporting the contig.} 38 | \item{umis}{Integer. Number of UMIs supporting the contig.} 39 | \item{raw_clonotype_id}{Character. Clonotype ID from 10x output.} 40 | \item{raw_consensus_id}{Character. Consensus ID from 10x output.} 41 | \item{exact_subclonotype_id}{Integer. Exact subclonotype grouping.} 42 | } 43 | } 44 | \description{ 45 | This dataset contains single-cell V(D)J sequencing annotations 46 | from the 10x Genomics BEAM-Ab Mouse dataset. It includes V(D)J 47 | gene calls, CDR regions, productivity information, and clonotype 48 | assignments for each contig. 49 | } 50 | -------------------------------------------------------------------------------- /man/quietBCRgenes.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/quietBCRgenes.R 3 | \name{quietBCRgenes} 4 | \alias{quietBCRgenes} 5 | \title{Remove BCR Genes from Variable Gene Results} 6 | \usage{ 7 | quietBCRgenes(sc, assay = NULL) 8 | } 9 | \arguments{ 10 | \item{sc}{A single-cell dataset, which can be: 11 | \itemize{ 12 | \item A Seurat object 13 | \item A vector of variable genes generated by workflows such as Bioconductor's \code{scran} 14 | }} 15 | 16 | \item{assay}{Character. Specifies the Seurat assay slot to use for removing BCR genes. 17 | If \code{NULL}, the function defaults to the active assay in the Seurat object.} 18 | } 19 | \value{ 20 | The input Seurat object or vector with BCR genes removed from the variable features. 21 | } 22 | \description{ 23 | This function removes B-cell receptor (BCR) genes from the variable features of a 24 | single-cell dataset. Most single-cell workflows prioritize highly expressed and 25 | highly variable genes for principal component analysis (PCA) and dimensional 26 | reduction. By excluding BCR genes, this function ensures that the variable gene 27 | set focuses on biologically relevant features rather than highly variable BCR genes. 28 | } 29 | \examples{ 30 | # Remove BCR genes from the variable features of a vector 31 | variable.genes <- c("IGHV1-69", "IGHV3-23", "IGHV4-34", "IGHV5-51", "IGHV6-1", 32 | "IGKV1-5", "IGKV3-20", "IGLV2-14", "IGLV3-21", "IGLV6-57", 33 | "TP53", "MYC", "BCL2", "CD19", "CD79A", "CD79B", "PAX5") 34 | variable.genes <- quietBCRgenes(variable.genes) 35 | 36 | } 37 | -------------------------------------------------------------------------------- /man/runIbex.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/runIbex.R 3 | \name{runIbex} 4 | \alias{runIbex} 5 | \title{Ibex Single-Cell Calculation} 6 | \usage{ 7 | runIbex( 8 | sc.data, 9 | chain = "Heavy", 10 | method = "encoder", 11 | encoder.model = "VAE", 12 | encoder.input = "atchleyFactors", 13 | geometric.theta = pi, 14 | reduction.name = "Ibex", 15 | species = "Human", 16 | verbose = TRUE 17 | ) 18 | } 19 | \arguments{ 20 | \item{sc.data}{A single-cell dataset, which can be: 21 | \itemize{ 22 | \item A Seurat object 23 | \item A SingleCellExperiment object 24 | }} 25 | 26 | \item{chain}{Character. Specifies the chain to analyze: 27 | \itemize{ 28 | \item "Heavy" for the heavy chain 29 | \item "Light" for the light chain 30 | }} 31 | 32 | \item{method}{Character. Algorithm to use for generating latent dimensions: 33 | \itemize{ 34 | \item "encoder" - Uses deep learning autoencoders 35 | \item "geometric" - Uses geometric transformations based on the BLOSUM62 matrix 36 | }} 37 | 38 | \item{encoder.model}{Character. The type of autoencoder model to use: 39 | \itemize{ 40 | \item "CNN" - CDR3 Convolutional Neural Network-based autoencoder 41 | \item "VAE" - CDR3 Variational Autoencoder 42 | \item "CNN.EXP" - CDR1/2/3 CNN 43 | \item "VAE.EXP" - CDR1/2/3 VAE 44 | }} 45 | 46 | \item{encoder.input}{Character. Input features for the encoder model: 47 | \itemize{ 48 | \item Amino Acid Properties: "atchleyFactors", "crucianiProperties", 49 | "kideraFactors", "MSWHIM", "tScales" 50 | \item "OHE" - One Hot Encoding 51 | }} 52 | 53 | \item{geometric.theta}{Numeric. Angle (in radians) for geometric transformation. 54 | Used only when \code{method = "geometric"}.} 55 | 56 | \item{reduction.name}{Character. The name to assign to the dimensional reduction. 57 | This is useful for running Ibex with multiple parameter settings and saving results 58 | under different names.} 59 | 60 | \item{species}{Character. Default is "Human" or "Mouse".} 61 | 62 | \item{verbose}{Logical. Whether to print progress messages. Default is TRUE.} 63 | } 64 | \value{ 65 | An updated Seurat or SingleCellExperiment object with Ibex dimensions added 66 | to the dimensional reduction slot. 67 | } 68 | \description{ 69 | This function applies the Ibex algorithm to single-cell data, integrating 70 | seamlessly with Seurat or SingleCellExperiment pipelines. The algorithm 71 | generates latent dimensions using deep learning or geometric transformations, 72 | storing the results in the dimensional reduction slot. \code{runIbex} will 73 | automatically subset the single-cell object based on amino acid sequences 74 | present for the given chain selection. 75 | } 76 | \examples{ 77 | # Using the encoder method with a variational autoencoder 78 | ibex_example <- runIbex(ibex_example, 79 | chain = "Heavy", 80 | method = "encoder", 81 | encoder.model = "VAE", 82 | encoder.input = "atchleyFactors") 83 | 84 | # Using the geometric method with a specified angle 85 | ibex_example <- runIbex(ibex_example, 86 | chain = "Heavy", 87 | method = "geometric", 88 | geometric.theta = pi) 89 | 90 | } 91 | -------------------------------------------------------------------------------- /tests/spelling.R: -------------------------------------------------------------------------------- 1 | if(requireNamespace('spelling', quietly = TRUE)) 2 | spelling::spell_check_test(vignettes = TRUE, error = FALSE, 3 | skip_on_cran = TRUE) 4 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | # This file is part of the standard setup for testthat. 2 | # It is recommended that you do not modify it. 3 | # 4 | # Where should you do additional test configuration? 5 | # Learn more about the roles of various files in: 6 | # * https://r-pkgs.org/tests.html 7 | # * https://testthat.r-lib.org/reference/test_package.html#special-files 8 | 9 | library(testthat) 10 | library(Ibex) 11 | 12 | test_check("Ibex") 13 | -------------------------------------------------------------------------------- /tests/testthat/basiliskEnv.R: -------------------------------------------------------------------------------- 1 | #' @import basilisk 2 | IbexEnv <- BasiliskEnvironment( 3 | envname = "IbexEnv", 4 | pkgname = "Ibex", 5 | path = system.file("extdata", "ibex-basilisk.yaml", package = "Ibex") 6 | ) 7 | -------------------------------------------------------------------------------- /tests/testthat/helper-testingFunctions.R: -------------------------------------------------------------------------------- 1 | getdata <- function(dir, name) { 2 | readRDS(paste("testdata/", dir, "/", name, ".rds", sep = "")) # could move testdata 1 dir lvl up nstead 3 | } -------------------------------------------------------------------------------- /tests/testthat/test-CoNGAfy.R: -------------------------------------------------------------------------------- 1 | # test script for CoNGAfy.R - testcases are NOT comprehensive! 2 | 3 | test_that("CoNGAfy works with Seurat object", { 4 | result <- CoNGAfy(ibex_example, method = "mean") 5 | 6 | expect_true(inherits(result, "SingleCellExperiment")) 7 | expect_gt(ncol(result), 0) 8 | expect_gt(nrow(result), 0) 9 | }) 10 | 11 | 12 | test_that("CoNGAfy works with dist method", { 13 | result <- CoNGAfy(ibex_example, method = "dist") 14 | 15 | expect_true(inherits(result, "SingleCellExperiment")) 16 | expect_gt(ncol(result), 0) 17 | expect_gt(nrow(result), 0) 18 | }) 19 | 20 | test_that("CoNGAfy filters cells correctly", { 21 | result <- CoNGAfy(ibex_example, method = "mean") 22 | expect_equal(ncol(result), 52) 23 | }) 24 | 25 | test_that("CoNGAfy stops if amino acid sequences are missing", { 26 | sc_example <- suppressWarnings(CreateSeuratObject(counts = matrix(rnorm(1000), 27 | nrow = 10, 28 | ncol = 100))) 29 | 30 | expect_error(CoNGAfy(sc_example, method = "mean"), 31 | "'CTaa' not found in this Seurat object\n ") 32 | }) 33 | 34 | test_that("CoNGA.dist selects representative cells correctly", { 35 | result <- .CoNGA.dist(ibex_example, features = NULL, assay = "RNA") 36 | 37 | expect_true(inherits(result, "dgCMatrix")) 38 | expect_gt(ncol(result), 0) 39 | expect_gt(nrow(result), 0) 40 | }) 41 | 42 | test_that("CoNGA.mean computes mean expression per clonotype", { 43 | result <- .CoNGA.mean(ibex_example, features = NULL, assay = "RNA") 44 | 45 | expect_true(inherits(result, "dgCMatrix")) 46 | expect_gt(ncol(result), 0) 47 | expect_gt(nrow(result), 0) 48 | }) 49 | -------------------------------------------------------------------------------- /tests/testthat/test-Ibex.matrix.R: -------------------------------------------------------------------------------- 1 | # test script for Ibex.matrix.R - testcases are NOT comprehensive! 2 | 3 | test_that("Ibex.matrix handles incorrect inputs gracefully", { 4 | expect_error(Ibex.matrix(input.data = ibex_example, chain = "Middle", method = "encoder"), 5 | "'arg' should be one of \"Heavy\", \"Light\"") 6 | expect_error(Ibex.matrix(input.data = ibex_example, chain = "Heavy", method = "xyz"), 7 | "'arg' should be one of \"encoder\", \"geometric\"") 8 | expect_error(Ibex.matrix(input.data = ibex_example, chain = "Heavy", method = "encoder", encoder.model = "ABC"), 9 | "'arg' should be one of \"CNN\", \"VAE\", \"CNN.EXP\", \"VAE.EXP\"") 10 | expect_error(Ibex.matrix(input.data = ibex_example, chain = "Heavy", method = "encoder", encoder.input = "XYZ"), 11 | "arg' should be one of \"atchleyFactors\", \"crucianiProperties\", \"kideraFactors\", \"MSWHIM\", \"tScales\", \"OHE\"") 12 | expect_error(Ibex.matrix(input.data = ibex_example, chain = "Heavy", method = "geometric", geometric.theta = "not_numeric"), 13 | "non-numeric argument to mathematical function") 14 | }) 15 | 16 | test_that("Ibex.matrix returns expected output format", { 17 | result <- Ibex.matrix(input.data = ibex_example, 18 | chain = "Heavy", 19 | method = "encoder", 20 | encoder.model = "VAE", 21 | encoder.input = "atchleyFactors", 22 | verbose = FALSE) 23 | expect_true(is.data.frame(result)) 24 | expect_true(all(grepl("^Ibex_", colnames(result)))) 25 | expect_gt(nrow(result), 0) 26 | expect_gt(ncol(result), 0) 27 | }) 28 | 29 | test_that("Ibex.matrix works with encoder method", { 30 | result <- Ibex.matrix(input.data = ibex_example, 31 | chain = "Light", 32 | method = "encoder", 33 | encoder.model = "CNN", 34 | encoder.input = "OHE", 35 | verbose = FALSE) 36 | expect_true(is.data.frame(result)) 37 | expect_true(all(grepl("^Ibex_", colnames(result)))) 38 | }) 39 | 40 | test_that("Ibex.matrix works with geometric method", { 41 | result <- Ibex.matrix(input.data = ibex_example, 42 | chain = "Heavy", 43 | method = "geometric", 44 | geometric.theta = pi / 4, 45 | verbose = FALSE) 46 | expect_true(is.data.frame(result)) 47 | expect_true(all(grepl("^Ibex_", colnames(result)))) 48 | }) 49 | 50 | test_that("Ibex.matrix handles different species options", { 51 | result1 <- Ibex.matrix(input.data = ibex_example, 52 | chain = "Heavy", 53 | method = "encoder", 54 | encoder.model = "VAE", 55 | encoder.input = "atchleyFactors", 56 | species = "Human", 57 | verbose = FALSE) 58 | result2 <- Ibex.matrix(input.data = ibex_example, 59 | chain = "Heavy", 60 | method = "encoder", 61 | encoder.model = "VAE", 62 | encoder.input = "atchleyFactors", 63 | species = "Mouse", 64 | verbose = FALSE) 65 | expect_true(is.data.frame(result1)) 66 | expect_true(is.data.frame(result2)) 67 | expect_true(all(grepl("^Ibex_", colnames(result1)))) 68 | expect_true(all(grepl("^Ibex_", colnames(result2)))) 69 | }) 70 | 71 | -------------------------------------------------------------------------------- /tests/testthat/test-combineExpandedBCR.R: -------------------------------------------------------------------------------- 1 | # test script for combineExpandedBCR.R - testcases are NOT comprehensive! 2 | 3 | test_that("combineExpandedBCR handles incorrect input gracefully", { 4 | expect_error(combineExpandedBCR(NULL, samples = "Sample1"), 5 | "Input data must be a list of data frames.") 6 | 7 | invalid_data <- list(data.frame(cdr1 = c("AA", "BB"), cdr3 = c("CC", "DD"))) 8 | expect_error(combineExpandedBCR(invalid_data, samples = "Sample1"), 9 | "Each data frame must contain 'cdr1', 'cdr2', and 'cdr3' columns.") 10 | }) 11 | 12 | test_that("combineExpandedBCR correctly concatenates CDR sequences", { 13 | 14 | modified_data <- combineExpandedBCR(list(ibex_vdj), samples = "Sample1") 15 | 16 | expect_true(any(grepl("-", modified_data[[1]]$CTaa))) 17 | }) 18 | 19 | test_that("combineExpandedBCR integrates correctly with combineBCR", { 20 | 21 | result <- combineExpandedBCR(list(ibex_vdj), samples = "Sample1") 22 | expect_true(is.list(result)) 23 | expect_true(all(c("barcode", "CTaa") %in% colnames(result[[1]]))) 24 | expect_gt(nrow(result[[1]]), 0) 25 | }) 26 | 27 | test_that("combineExpandedBCR correctly assigns sample labels", { 28 | 29 | result <- combineExpandedBCR(list(ibex_vdj), samples = "Sample1") 30 | 31 | expect_true("sample" %in% colnames(result[[1]])) 32 | expect_equal(result[[1]]$sample[1], "Sample1") 33 | }) 34 | 35 | test_that("combineExpandedBCR handles multiple sample inputs correctly", { 36 | 37 | result <- combineExpandedBCR(list(ibex_vdj, ibex_vdj), samples = c("Sample1", "Sample2")) 38 | 39 | expect_true(length(result) == 2) 40 | expect_equal(result[[1]]$sample[1], "Sample1") 41 | expect_equal(result[[2]]$sample[1], "Sample2") 42 | }) 43 | 44 | 45 | -------------------------------------------------------------------------------- /tests/testthat/test-quietBCRgenes.R: -------------------------------------------------------------------------------- 1 | # test script for quietBCRgenes.R - testcases are NOT comprehensive! 2 | 3 | test_that("quietBCRgenes works", { 4 | 5 | data("ibex_example") 6 | 7 | features <- rownames(ibex_example@assays@data$counts) 8 | 9 | expect_equal( 10 | quietBCRgenes(features), 11 | getdata("quietBCRgenes", "quietBCRgenes_feature.vector") 12 | ) 13 | }) -------------------------------------------------------------------------------- /tests/testthat/test-runIbex.R: -------------------------------------------------------------------------------- 1 | # test script for runIbex.R - testcases are NOT comprehensive! 2 | test_that("runIbex handles incorrect inputs gracefully", { 3 | expect_error(runIbex(sc.data = ibex_example, chain = "Middle", method = "encoder"), 4 | "'arg' should be one of \"Heavy\", \"Light\"") 5 | expect_error(runIbex(sc.data = ibex_example, chain = "Heavy", method = "xyz"), 6 | "'arg' should be one of \"encoder\", \"geometric\"") 7 | expect_error(runIbex(sc.data = ibex_example, chain = "Heavy", method = "encoder", encoder.model = "ABC"), 8 | "'arg' should be one of \"CNN\", \"VAE\", \"CNN.EXP\", \"VAE.EXP\"") 9 | expect_error(runIbex(sc.data = ibex_example, chain = "Heavy", method = "encoder", encoder.input = "XYZ"), 10 | "arg' should be one of \"atchleyFactors\", \"crucianiProperties\", \"kideraFactors\", \"MSWHIM\", \"tScales\", \"OHE\"") 11 | expect_error(runIbex(sc.data = ibex_example, chain = "Heavy", method = "geometric", geometric.theta = "not_numeric"), 12 | "non-numeric argument to mathematical function") 13 | }) 14 | 15 | test_that("runIbex works with Seurat object", { 16 | suppressWarnings(sc_example <- CreateSeuratObject(counts = matrix(rnorm(1000), nrow = 10, ncol = 100))) 17 | sc_example[["CTaa"]] <- sample(c("CASSL", "CASST", NA, "NA_IGHV1", "None_IGHV2"), 100, replace = TRUE) 18 | sc_example[["CTgene"]] <- sample(c("NA_IGHV1.IGD1.IGJ1.IGM", "NA_IGHV1.IGD1.IGJ1.IGM", NA, "NA_IGHV1.IGD1.IGJ1.IGM", "None_IGHV1.IGD1.IGJ1.IGM"), 100, replace = TRUE) 19 | 20 | result <- runIbex(sc_example, 21 | chain = "Heavy", 22 | method = "encoder", 23 | encoder.model = "VAE", 24 | encoder.input = "atchleyFactors", 25 | reduction.name = "IbexTest", 26 | verbose = FALSE) 27 | 28 | expect_true("IbexTest" %in% names(result@reductions)) 29 | expect_true(inherits(result, "Seurat")) 30 | }) 31 | 32 | test_that("runIbex works with geometric method", { 33 | sc_example <- suppressWarnings(SeuratObject::CreateSeuratObject(counts = matrix(rnorm(1000), nrow = 10, ncol = 100))) 34 | sc_example[["CTaa"]] <- sample(c("CASSL", "CASST", NA, "NA_IGHV1", "None_IGHV2"), 100, replace = TRUE) 35 | sc_example[["CTgene"]] <- sample(c("NA_IGHV1.IGD1.IGJ1.IGM", "NA_IGHV1.IGD1.IGJ1.IGM", NA, "NA_IGHV1.IGD1.IGJ1.IGM", "None_IGHV1.IGD1.IGJ1.IGM"), 100, replace = TRUE) 36 | 37 | result <- runIbex(sc_example, 38 | chain = "Heavy", 39 | method = "geometric", 40 | geometric.theta = pi / 4, 41 | reduction.name = "IbexGeo", 42 | verbose = FALSE) 43 | 44 | expect_true("IbexGeo" %in% names(result@reductions)) 45 | expect_true(inherits(result, "Seurat")) 46 | }) 47 | 48 | test_that("runIbex filters cells correctly", { 49 | sc_example <- suppressWarnings(CreateSeuratObject(counts = matrix(rnorm(1000), nrow = 10, ncol = 100))) 50 | sc_example[["CTaa"]] <- c(rep("CASSL", 50), rep(NA, 50)) 51 | sc_example[["CTgene"]] <- sample(c("NA_IGHV1.IGD1.IGJ1.IGM", "NA_IGHV1.IGD1.IGJ1.IGM", NA, "NA_IGHV1.IGD1.IGJ1.IGM", "None_IGHV1.IGD1.IGJ1.IGM"), 100, replace = TRUE) 52 | result <- runIbex(sc_example, 53 | chain = "Heavy", 54 | method = "encoder", 55 | encoder.model = "VAE", 56 | encoder.input = "atchleyFactors", 57 | reduction.name = "IbexFiltered", 58 | verbose = FALSE) 59 | 60 | expect_true("IbexFiltered" %in% names(result@reductions)) 61 | expect_lt(ncol(result), 100) # Ensures some cells were filtered out 62 | }) 63 | 64 | test_that("runIbex stops if amino acid sequences are missing", { 65 | sc_example <- suppressWarnings(SeuratObject::CreateSeuratObject(counts = matrix(rnorm(1000), nrow = 10, ncol = 100))) 66 | 67 | expect_error(runIbex(sc_example, 68 | chain = "Heavy", 69 | method = "encoder", 70 | encoder.model = "VAE", 71 | encoder.input = "atchleyFactors", 72 | verbose = FALSE), 73 | "Amino acid sequences are not added to the single-cell object correctly.") 74 | }) 75 | 76 | test_that("runIbex works with different reduction names", { 77 | sc_example <- suppressWarnings(SeuratObject::CreateSeuratObject(counts = matrix(rnorm(1000), nrow = 10, ncol = 100))) 78 | sc_example[["CTaa"]] <- sample(c("CASSL", "CASST", NA, "NA_IGHV1", "None_IGHV2"), 100, replace = TRUE) 79 | sc_example[["CTgene"]] <- sample(c("NA_IGHV1.IGD1.IGJ1.IGM", "NA_IGHV1.IGD1.IGJ1.IGM", NA, "NA_IGHV1.IGD1.IGJ1.IGM", "None_IGHV1.IGD1.IGJ1.IGM"), 100, replace = TRUE) 80 | result1 <- runIbex(sc_example, 81 | chain = "Heavy", 82 | method = "encoder", 83 | encoder.model = "VAE", 84 | encoder.input = "atchleyFactors", 85 | reduction.name = "Ibex1", 86 | verbose = FALSE) 87 | 88 | result2 <- runIbex(sc_example, chain = "Heavy", 89 | method = "encoder", 90 | encoder.model = "VAE", 91 | encoder.input = "atchleyFactors", 92 | reduction.name = "Ibex2", 93 | verbose = FALSE) 94 | 95 | expect_true("Ibex1" %in% names(result1@reductions)) 96 | expect_true("Ibex2" %in% names(result2@reductions)) 97 | }) 98 | 99 | -------------------------------------------------------------------------------- /tests/testthat/testdata/CoNGAfy/CoNGAfy_counts.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BorchLab/Ibex/543ce724c3f01bebedb0db705873788509b5e8a7/tests/testthat/testdata/CoNGAfy/CoNGAfy_counts.rds -------------------------------------------------------------------------------- /tests/testthat/testdata/CoNGAfy/CoNGAfy_mean_counts.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BorchLab/Ibex/543ce724c3f01bebedb0db705873788509b5e8a7/tests/testthat/testdata/CoNGAfy/CoNGAfy_mean_counts.rds -------------------------------------------------------------------------------- /tests/testthat/testdata/CoNGAfy/CoNGAfy_mean_meta.data.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BorchLab/Ibex/543ce724c3f01bebedb0db705873788509b5e8a7/tests/testthat/testdata/CoNGAfy/CoNGAfy_mean_meta.data.rds -------------------------------------------------------------------------------- /tests/testthat/testdata/CoNGAfy/CoNGAfy_meta.data.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BorchLab/Ibex/543ce724c3f01bebedb0db705873788509b5e8a7/tests/testthat/testdata/CoNGAfy/CoNGAfy_meta.data.rds -------------------------------------------------------------------------------- /tests/testthat/testdata/quietBCRgenes/quietBCRgenes_feature.vector.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BorchLab/Ibex/543ce724c3f01bebedb0db705873788509b5e8a7/tests/testthat/testdata/quietBCRgenes/quietBCRgenes_feature.vector.rds -------------------------------------------------------------------------------- /tests/testthat/testdata/runIbex/ibex.matrix_Heavy_VAE_AF.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BorchLab/Ibex/543ce724c3f01bebedb0db705873788509b5e8a7/tests/testthat/testdata/runIbex/ibex.matrix_Heavy_VAE_AF.rds -------------------------------------------------------------------------------- /tests/testthat/testdata/runIbex/ibex.matrix_Light_AE_OHE.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BorchLab/Ibex/543ce724c3f01bebedb0db705873788509b5e8a7/tests/testthat/testdata/runIbex/ibex.matrix_Light_AE_OHE.rds -------------------------------------------------------------------------------- /tests/testthat/testdata/runIbex/runIbex_Heavy_VAE_AF_reduction.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BorchLab/Ibex/543ce724c3f01bebedb0db705873788509b5e8a7/tests/testthat/testdata/runIbex/runIbex_Heavy_VAE_AF_reduction.rds -------------------------------------------------------------------------------- /tests/testthat/testdata/runIbex/runIbex_Heavy_VAE_OHE_reduction.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BorchLab/Ibex/543ce724c3f01bebedb0db705873788509b5e8a7/tests/testthat/testdata/runIbex/runIbex_Heavy_VAE_OHE_reduction.rds -------------------------------------------------------------------------------- /tests/testthat/testdata/runIbex/runIbex_Heavy_geometric_reduction.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BorchLab/Ibex/543ce724c3f01bebedb0db705873788509b5e8a7/tests/testthat/testdata/runIbex/runIbex_Heavy_geometric_reduction.rds -------------------------------------------------------------------------------- /tests/testthat/testdata/runIbex/runIbex_Light_AE_KF_reduction.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BorchLab/Ibex/543ce724c3f01bebedb0db705873788509b5e8a7/tests/testthat/testdata/runIbex/runIbex_Light_AE_KF_reduction.rds -------------------------------------------------------------------------------- /vignettes/Ibex.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: A tour of Ibex. 3 | author: 4 | - name: Nick Borcherding 5 | email: ncborch@gmail.com 6 | affiliation: Washington University in St. Louis, School of Medicine, St. Louis, MO, USA 7 | date: 'Compiled: `r format(Sys.Date(), "%B %d, %Y")`' 8 | output: 9 | BiocStyle::html_document: 10 | toc_float: true 11 | package: Ibex 12 | vignette: > 13 | %\VignetteEngine{knitr::knitr} 14 | %\VignetteIndexEntry{Charging through Ibex} 15 | %\usepackage[UTF-8]{inputenc} 16 | --- 17 | 18 | ```{r include=FALSE} 19 | # Create and set a temporary, 20 | temp_cache <- file.path(tempdir(), "basilisk_cache") 21 | dir.create(temp_cache, recursive = TRUE, showWarnings = FALSE) 22 | Sys.setenv("BASILISK_CACHE_DIR" = temp_cache) 23 | do.call(Sys.setenv, list(BASILISK_CACHE_DIR = temp_cache)) 24 | 25 | knitr::opts_chunk$set(error=FALSE, message=FALSE, warning=FALSE, tidy = FALSE) 26 | library(BiocStyle) 27 | ``` 28 | 29 | # Introduction 30 | 31 | ## Load Libraries 32 | 33 | ```{r tidy = FALSE} 34 | suppressMessages(library(bluster)) 35 | suppressMessages(library(dplyr)) 36 | suppressMessages(library(ggplot2)) 37 | suppressMessages(library(Ibex)) 38 | suppressMessages(library(kableExtra)) 39 | suppressMessages(library(mumosa)) 40 | suppressMessages(library(patchwork)) 41 | suppressMessages(library(scater)) 42 | suppressMessages(library(viridis)) 43 | suppressMessages(library(utils)) 44 | ``` 45 | 46 | ```{r include = FALSE} 47 | # Defensive Basilisk environment setup for vignette building 48 | setup_basilisk_env <- function() { 49 | # Double-check cache directory setup 50 | cache_dir <- Sys.getenv("BASILISK_CACHE_DIR") 51 | if (nzchar(cache_dir) && !dir.exists(cache_dir)) { 52 | dir.create(cache_dir, recursive = TRUE, showWarnings = FALSE) 53 | } 54 | 55 | # Only attempt initialization if not in CHECK mode 56 | if (!identical(Sys.getenv("_R_CHECK_PACKAGE_NAME_"), "Ibex") && 57 | !identical(Sys.getenv("R_CMD"), "check")) { 58 | 59 | tryCatch({ 60 | basilisk::basiliskRun(env = Ibex:::IbexEnv, fun = function() { 61 | keras <- reticulate::import("keras") 62 | invisible(NULL) 63 | }) 64 | }, error = function(e) { 65 | message("Basilisk environment initialization skipped during build: ", e$message) 66 | return(NULL) 67 | }) 68 | } else { 69 | message("Skipping Basilisk initialization during package check/build") 70 | } 71 | } 72 | 73 | # Run the setup 74 | setup_basilisk_env() 75 | ``` 76 | 77 | ## The Data Set 78 | 79 | The data used here are derived from 10x Genomics’ 2k BEAM-Ab Mouse HEL data set, consisting of splenocytes from transgenic mice engineered to recognize Hen Egg Lysozyme (HEL). These splenocytes were labeled with a small antigen panel: SARS-TRI-S, gp120, H5N1, and a negative control. 80 | 81 | To illustrate the Ibex framework, we subset to a smaller set of 200 cells (including some dominant clones) and convert the Seurat object into a SingleCellExperiment. The resulting “ibex_example” object stores all the necessary data—RNA expression, antigen capture (BEAM) features, BCR contig annotations, and computed dimensional reductions—ready for downstream Ibex analyses. The object is saved (`ibex_example.rda`), along with the contig information (`ibex_vdj.rda`), ensuring that the integrated data set can be readily reloaded and explored in subsequent steps. 82 | 83 | ```{r, eval=FALSE} 84 | library(scRepertoire) 85 | library(Seurat) 86 | library(dplyr) 87 | library(SummarizedExperiment) 88 | library(SingleCellExperiment) 89 | ################################## 90 | #scRNA/ADT loading and processing 91 | ################################# 92 | 93 | tmp <- Read10X("~/data/filtered_feature_bc_matrix") 94 | 95 | SeuratObj <- CreateSeuratObject(counts = tmp$`Gene Expression`) 96 | beam_assay <- CreateAssayObject(counts = tmp$`Antigen Capture`) 97 | 98 | SeuratObj[["BEAM"]] <- beam_assay 99 | SeuratObj <- subset(SeuratObj, subset = nFeature_RNA > 100) 100 | SeuratObj <- RenameCells(object = SeuratObj , new.names = paste0("BEAM.sample_", rownames(SeuratObj[[]]))) 101 | SeuratObj[["mito.genes"]] <- PercentageFeatureSet(SeuratObj, pattern = "^mt-") 102 | 103 | #Filtering step 104 | standev <- sd(log(SeuratObj$nFeature_RNA))*2.5 #cutting off above standard deviation of 2.5 105 | mean <- mean(log(SeuratObj$nFeature_RNA)) 106 | cut <- round(exp(standev+mean)) 107 | SeuratObj <- subset(SeuratObj, subset = mito.genes < 10 & nFeature_RNA < cut) 108 | 109 | #Processing and Adding Contig Info 110 | contigs <- read.csv("~/data/2k_BEAM-Ab_Mouse_HEL_5pv2_2k_BEAM-Ab_Mouse_HEL_5pv2_vdj_b_filtered_contig_annotations.csv") 111 | clones <- combineBCR(contigs, samples = "BEAM.sample", removeNA = TRUE) 112 | SeuratObj <- combineExpression(clones, SeuratObj, cloneCall="aa") 113 | 114 | #Subset only cells with BCR and Heavy Chain 115 | cell.idx <- intersect(which(!is.na(SeuratObj$CTaa)), which(!is.na(stringr::str_split(SeuratObj$CTaa, "_", simplify = TRUE)[,1]))) 116 | SeuratObj <- subset(SeuratObj, cells = colnames(SeuratObj)[cell.idx]) 117 | 118 | #Processing RNA 119 | DefaultAssay(SeuratObj) <- 'RNA' 120 | SeuratObj <- NormalizeData(SeuratObj, verbose = FALSE) %>% 121 | FindVariableFeatures(verbose = FALSE) %>% 122 | quietBCRgenes() %>% 123 | ScaleData(verbose = FALSE) %>% 124 | RunPCA(verbose = FALSE) 125 | 126 | #Removing negative control + B Cells 127 | DefaultAssay(SeuratObj) <- 'BEAM' 128 | SeuratObj <- subset(SeuratObj, subset = `negative-control` < 100, slot = "counts") 129 | 130 | #Processing BEAM 131 | VariableFeatures(SeuratObj) <- rownames(SeuratObj[["BEAM"]]) 132 | SeuratObj <- NormalizeData(SeuratObj, 133 | normalization.method = 'CLR', 134 | margin = 2, ) %>% 135 | ScaleData(verbose = FALSE) %>% 136 | RunPCA(verbose = FALSE, reduction.name = 'apca') 137 | 138 | DefaultAssay(SeuratObj) <- 'RNA' 139 | ################################### 140 | #Making Example Data Set for Ibex 141 | ################################# 142 | 143 | # Subset nondominate clones + random sampling of dominant 144 | set.seed(42) 145 | cell.idx <- unique(c(which(!grepl("CANWDGDYW", SeuratObj$CTaa)), sample(seq_len(nrow(SeuratObj[[]])), 154))) 146 | 147 | ibex_example <- SeuratObj 148 | saveRDS(ibex_example, file = "Ibex_FullExample.rds") 149 | 150 | # Forming Example Data set in SCE format 151 | ibex_example <- subset(ibex_example, cells = colnames(ibex_example)[cell.idx]) 152 | PCA <- Embeddings(ibex_example[["pca"]]) 153 | APCA <- Embeddings(ibex_example[["apca"]]) 154 | BEAM_counts <- GetAssayData(ibex_example, slot = "counts", assay = "BEAM")[1:4,] 155 | BEAM_data <- GetAssayData(ibex_example, slot = "data", assay = "BEAM")[1:4,] 156 | ibex_example <- as.SingleCellExperiment(ibex_example) 157 | altExp(ibex_example, "BEAM") <- SummarizedExperiment( 158 | assays = list( 159 | counts = as.matrix(BEAM_counts), 160 | logcounts = as.matrix(BEAM_data) 161 | ), 162 | colData = colData(ibex_example) 163 | ) 164 | reducedDim(ibex_example, "pca") <- PCA 165 | reducedDim(ibex_example, "apca") <- APCA 166 | 167 | #Saving the built-in data set 168 | save(ibex_example, file = "ibex_example.rda", compress = "xz") 169 | ibex_vdj <- contigs 170 | save(ibex_vdj, file = "ibex_vdj.rda", compress = "xz") 171 | ``` 172 | 173 | ### Loading the processed data 174 | 175 | ```{r, echo=FALSE} 176 | data("ibex_example") 177 | data("ibex_vdj") 178 | ``` 179 | 180 | ## Getting Expanded Sequences 181 | 182 | The function ```combineExpandedBCR()``` extends the functionality of ```combineBCR()``` from the scRepertoire package by first concatenating the CDR1, CDR2, and CDR3 sequences into a single expanded variable. This approach retains additional information from the BCR variable regions before calling ```combineBCR()``` to consolidate BCR sequences into clones. This will allow for use of expanded sequence models which we will detail below. 183 | 184 | ### **Function Parameters** 185 | The `combineExpandedBCR()` function supports the following parameters: 186 | 187 | | Parameter | Description | Default | 188 | |------------------------|---------------------------------------------------------------------------|---------| 189 | | `input.data` | List of data frames containing BCR sequencing results. | **Required** | 190 | | `samples` | Character vector labeling each sample. | **Required** | 191 | | `ID` | Additional sample labeling (optional). | `NULL` | 192 | | `call.related.clones` | Whether to group related clones using nucleotide sequences and V genes. | `TRUE` | 193 | | `threshold` | Normalized edit distance for clone clustering. | `0.85` | 194 | | `removeNA` | Remove chains without values. | `FALSE` | 195 | | `removeMulti` | Remove barcodes with more than two chains. | `FALSE` | 196 | | `filterMulti` | Select highest-expressing light and heavy chains. | `TRUE` | 197 | | `filterNonproductive` | Remove nonproductive chains if the column exists. | `TRUE` | 198 | 199 | ```{r tidy = FALSE} 200 | combined.BCR <- combineExpandedBCR(input.data = list(ibex_vdj), 201 | samples = "Sample1", 202 | filterNonproductive = TRUE) 203 | head(combined.BCR[[1]])[,c(1,11)] 204 | ``` 205 | 206 | We can attach the expanded sequences to the Seurat or Single-Cell Experiment objects using the scRepertoire [`combineExpression()`](https://www.borch.dev/uploads/screpertoire/reference/combineexpression) function. 207 | 208 | ## Available Models 209 | 210 | **Ibex** offers a diverse set of models built on various architectures and encoding methods. Currently, models are available for both heavy and light chain sequences in humans, as well as heavy chain models for mice. Models for CDR3-based sequences have been trained on sequences of 45 residues or fewer, while models for CDR1/2/3-based sequences are specific to sequences of 90 amino acids or fewer. 211 | 212 | A full list of available models is provided below: 213 | 214 | ```{r tidy = FALSE} 215 | model.meta.data <- read.csv(system.file("extdata", "metadata.csv", 216 | package = "Ibex"))[,c(1:2,8)] 217 | model.meta.data %>% 218 | kable("html", escape = FALSE) %>% 219 | kable_styling(full_width = FALSE) %>% 220 | scroll_box(width = "100%", height = "400px") 221 | 222 | ``` 223 | 224 | All the models are available via a [Zenodo repository](https://zenodo.org/records/14919286), which Ibex will pull automatically and cache for future use locally. There is no need to download the models independent of the ```runIbex()``` or ```ibex.matrix()``` calls. 225 | 226 | ### Choosing Between CNN and VAE 227 | 228 | **Convolutional Neural Networks (CNNs)** 229 | 230 | * **Pros**: Detect local sequence motifs effectively; relatively straightforward and quick to train. 231 | * **Cons**: Can struggle to capture global context 232 | 233 | **Variational Autoencoders (VAEs)** 234 | 235 | * **Pros**: Model sequences within a probabilistic, continuous latent space; suitable for generating novel variants. 236 | * **Cons**: Training can be more complex (balancing reconstruction and regularization losses); interpretability may be less direct. 237 | 238 | **Which to choose?** 239 | 240 | * **Use CNNs** if local motif detection and simpler training are priorities. 241 | * **Use VAEs** if you want a generative model capturing broader sequence structures. 242 | 243 | ### Choosing Encoding Methods 244 | 245 | **One-Hot Encoding:** Represents each amino acid as a binary vector (e.g., a 20-length vector for the 20 standard residues). 246 | 247 | * **Pros**: Simple and assumption-free. 248 | * **Cons**: High-dimensional and doesn’t capture biochemical similarities. 249 | 250 | **Atchley Factors:** Uses five numerical descriptors summarizing key physicochemical properties. 251 | 252 | * **Pros**: Compact and embeds biochemical information. 253 | * **Cons**: May overlook some residue-specific nuances. 254 | 255 | **Cruciani Properties:** Encodes amino acids via descriptors that reflect molecular shape, hydrophobicity, and electronic features. 256 | 257 | * **Pros**: Captures rich chemical details. 258 | * **Cons**: More complex to compute and less standardized. 259 | 260 | **Kidera Factors:** Provides ten orthogonal values derived from a broad set of physical and chemical properties. 261 | 262 | * **Pros**: Offers a balanced, low-dimensional representation. 263 | * **Cons**: Derived statistically, potentially averaging out finer details. 264 | 265 | **MSWHIM:** Derives descriptors from 3D structural data, summarizing overall shape and surface properties. 266 | 267 | * **Pros**: Provides robust, rotation-invariant structural insight. 268 | * **Cons**: Requires 3D information and can be computationally intensive. 269 | 270 | **tScales:** Encodes amino acids based on topological and structural features reflective of protein folding and interactions. 271 | 272 | * **Pros**: Captures contextual information from the overall sequence structure. 273 | * **Cons**: Less commonly used, making standardization and tool support a challenge. 274 | 275 | # Running Ibex 276 | 277 | The idea behind **Ibex** is to combine BCR CDR3 amino acid information with phenotypic RNA/protein data to direct the use of single-cell sequencing towards antigen-specific discoveries. This is a growing field - specifically [TESSA](https://github.com/jcao89757/TESSA) uses amino acid characteristics and autoencoder as a means to get a dimensional reduction. Another option is [CoNGA](https://github.com/phbradley/conga), which produces an embedding using BCR and RNA. **Ibex** was designed to make a customizable approach to this combined approach using R. 278 | 279 | ## Ibex.matrix Function 280 | 281 | **Ibex** includes two primary functions: `Ibex.matrix()` and `runIbex()`. The `Ibex.matrix()` function serves as the backbone of the algorithm, returning encoded values based on user-selected parameters. In contrast to `runIbex()`, which filters input to include only B cells with attached BCR data, `Ibex.matrix()` operates on all provided data. Additionally, it is compatible with the list output from the `combineBCR()` function (from the [scRepertoire](https://github.com/BorchLab/scRepertoire) package), whereas `runIbex()` is designed for use with a single-cell object. 282 | 283 | ### Parameters 284 | 285 | - **chain**: 286 | Specifies the chain type. Options: 287 | - `"Heavy"` for Ig Heavy Chain 288 | - `"Light"` for Ig Light Chain 289 | 290 | - **method**: 291 | Chooses the transformation method. Options: 292 | - `"encoder"`: Applies a CNN/VAE-based transformation. 293 | - `"geometric"`: Uses a geometric transformation. 294 | 295 | - **encoder.model**: 296 | When using the `"encoder"` method, selects the specific model variant. Options: 297 | - `"CNN"`: CDR3 Convolutional Neural Network-based autoencoder 298 | - `"VAE"`: CDR3 Variational Autoencoder 299 | - `"CNN.EXP"`: CDR1/2/3 CNN 300 | - `"VAE.EXP"`: CDR1/2/3 VAE 301 | 302 | - **encoder.input**: 303 | Specifies the encoding input method. Options: 304 | - `"atchleyFactors"` 305 | - `"crucianiProperties"` 306 | - `"kideraFactors"` 307 | - `"MSWHIM"` 308 | - `"tScales"` 309 | - `"OHE"` 310 | 311 | - **theta**: 312 | For the geometric transformation, defines the value of theta (default is π/3). 313 | 314 | ```{r tidy = FALSE} 315 | Ibex_vectors <- Ibex.matrix(ibex_example, 316 | chain = "Heavy", 317 | method = "encoder", 318 | encoder.model = "VAE", 319 | encoder.input = "OHE", 320 | species = "Mouse", 321 | verbose = FALSE) 322 | 323 | ggplot(data = as.data.frame(Ibex_vectors), aes(Ibex_1, Ibex_2)) + 324 | geom_point(color = "grey", alpha = 0.7, size = 2) + 325 | theme_classic() 326 | 327 | Ibex_vectors2 <- Ibex.matrix(ibex_example, 328 | chain = "Heavy", 329 | method = "geometric", 330 | geometric.theta = pi, 331 | verbose = FALSE) 332 | 333 | ggplot(as.data.frame(Ibex_vectors2), aes(x = Ibex_1, y = Ibex_2)) + 334 | geom_point(color = "grey", alpha = 0.7, size = 2) + 335 | theme_classic() 336 | ``` 337 | 338 | ## runIbex 339 | 340 | Additionally, ```runIbex()``` can be used to append the Seurat or Single-cell Experiment object with the Ibex vectors and allow for further analysis. Importantly, ```runIbex()``` will remove single cells that do not have recovered BCR data in the metadata of the object. 341 | 342 | ```{r tidy = FALSE} 343 | ibex_example <- runIbex(ibex_example, 344 | chain = "Heavy", 345 | encoder.input = "kideraFactors", 346 | reduction.name = "Ibex.KF", 347 | species = "Mouse", 348 | verbose = FALSE) 349 | ``` 350 | 351 | ## Using Ibex Vectors 352 | 353 | After ```runIbex()``` we have the encoded values stored under **"Ibex..."**. Using the Ibex dimensions, we can calculate a UMAP based solely on the embedded heavy chain values. Here we will visualize both the Heavy/Light Chain amino acid sequence (via **CTaa**) and normalized counts associated with the **Anti-Hen-Egg-Lysozyme** antigen. 354 | 355 | ```{r tidy = FALSE} 356 | set.seed(123) 357 | #Generating UMAP from Ibex Neighbors 358 | ibex_example <- runUMAP(ibex_example, 359 | dimred = "Ibex.KF", 360 | name = "ibexUMAP") 361 | #Ibex UMAP 362 | plot1 <- plotUMAP(ibex_example, color_by ="Anti-Hen-Egg-Lysozyme", dimred = "ibexUMAP") + 363 | theme(legend.position = "bottom") 364 | plot2 <- plotUMAP(ibex_example, color_by = "CTaa", dimred = "ibexUMAP") + 365 | scale_color_viridis(discrete = TRUE, option = "B") + 366 | guides(color = "none") 367 | 368 | plot1 + plot2 369 | ``` 370 | 371 | In this workflow, we can combine these three dimension reductions into a single, integrated UMAP embedding using the ```runMultiUMAP()``` function with a cosine metric. To further refine this integration, we apply ```rescaleByNeighbors()``` to align the nearest neighbors across modalities, followed by clustering with ```clusterRows()```, resulting in a “combined.clustering” that reflects all data types. Finally, we visualize this joint embedding as “MultiUMAP,” coloring points by expression of a specific protein marker (e.g., Anti-Hen-Egg-Lysozyme), the integrated cluster assignments, or other relevant annotations. The result is a holistic representation of cellular diversity that leverages shared and unique signals from RNA, protein, and Ibex IGH latent features. 372 | 373 | ```{r tidy = FALSE} 374 | #Multimodal UMAP 375 | ibex_example <- mumosa::runMultiUMAP(ibex_example, 376 | dimreds=c("pca", "apca", "Ibex.KF")) 377 | #Multimodal Clustering 378 | output <- rescaleByNeighbors(ibex_example, 379 | dimreds=c("pca", "apca", "Ibex.KF")) 380 | ibex_example$combined.clustering <- clusterRows(output, NNGraphParam()) 381 | 382 | plot3 <- plotUMAP(ibex_example, 383 | dimred = "MultiUMAP", 384 | color_by = "Anti-Hen-Egg-Lysozyme") + 385 | theme(legend.position = "bottom") 386 | plot4 <- plotUMAP(ibex_example, 387 | dimred = "MultiUMAP", 388 | color_by = "combined.clustering") + 389 | theme(legend.position = "bottom") 390 | plot5 <- plotUMAP(ibex_example, 391 | dimred = "MultiUMAP", 392 | color_by = "CTaa") + 393 | scale_color_manual(values = viridis_pal(option = "B")(length(unique(ibex_example$CTaa)))) 394 | 395 | plot3 + plot4 + plot5 396 | ``` 397 | 398 | ## Comparing the outcome to just one modality 399 | 400 | We can also look at the differences in the UMAP generated from RNA, ADT, or Ibex as individual components. Remember, the clusters that we are displaying in UMAP are based on clusters defined by the weighted nearest neighbors calculated above. 401 | 402 | ```{r tidy = FALSE} 403 | ibex_example <- runUMAP(ibex_example, 404 | dimred = 'pca', 405 | name = "pcaUMAP") 406 | 407 | ibex_example <- runUMAP(ibex_example, 408 | dimred = 'apca', 409 | name = "beamUMAP") 410 | 411 | plot6 <- plotUMAP(ibex_example, 412 | dimred = "pcaUMAP", 413 | color_by = "combined.clustering") 414 | plot7 <- plotUMAP(ibex_example, 415 | dimred = "beamUMAP", 416 | color_by = "combined.clustering") 417 | plot8 <- plotUMAP(ibex_example, 418 | dimred = "ibexUMAP", 419 | color_by = "combined.clustering") 420 | 421 | plot6 + plot7 + plot8 + plot_layout(guides = "collect") & 422 | theme(legend.position = "bottom") 423 | ``` 424 | 425 | # CoNGA Reduction 426 | 427 | Single-cell B-cell receptor (BCR) sequencing enables the identification of clonotypes, which are groups of B cells sharing the same BCR sequence. Often, you want to link clonotypes to their gene expression profiles. 428 | 429 | A challenge arises, however, when a clonotype contains multiple cells (e.g., 10 cells sharing the same BCR). Including all cells for every clonotype can lead to over-representation of highly expanded clones or complicate analyses that require a one-to-one mapping between clonotypes and “cells.” Recent work [Schattgen,2021](https://pubmed.ncbi.nlm.nih.gov/34426704/) has proposed different strategies to summarize or represent a clonotype by a single expression profile. Two key strategies are common: 430 | 431 | **Distance Approach** 432 | 433 | * First, look at the PCA or count matrices 434 | * Identify the cell that has the minimum summed Euclidean distance to all other cells in the clonotype. 435 | * This approach can help ensure that your single representation is an actual cell, rather than a potentially non-biological average. 436 | 437 | **Mean Approach** 438 | 439 | * Simply take the average (mean) expression across all cells in the same clonotype. 440 | * Conceptually, you collapse a multi-cell clone into one “virtual cell” representing its average expression. 441 | 442 | ```{r tidy = FALSE} 443 | CoNGA.sce <- CoNGAfy(ibex_example, 444 | method = "mean", 445 | assay = c("RNA", "BEAM")) 446 | 447 | CoNGA.sce <- runIbex(CoNGA.sce, 448 | encoder.input = "kideraFactors", 449 | encoder.model = "VAE", 450 | reduction.name = "Ibex.KF", 451 | species = "Mouse", 452 | verbose = FALSE) 453 | 454 | CoNGA.sce <- CoNGA.sce %>% 455 | runUMAP(dimred = "Ibex.KF", name = "ibexUMAP", ) 456 | 457 | plot9 <- plotUMAP(CoNGA.sce, 458 | dimred = "ibexUMAP", 459 | color_by = "Anti-Hen-Egg-Lysozyme", 460 | by.assay.type = "counts") 461 | 462 | plot10 <- plotUMAP(CoNGA.sce, 463 | dimred = "ibexUMAP", 464 | color_by = "H5N1", 465 | by.assay.type = "counts") 466 | 467 | plot9 + plot10 & 468 | theme(legend.position = "bottom") 469 | ``` 470 | 471 | # Conclusion 472 | 473 | This has been a general overview of the capabilities of **Ibex** for incorporating BCR information into the embedding space of single-cell data. If you have any questions, comments, or suggestions, feel free to visit the [GitHub repository](https://github.com/BorchLab/Ibex). 474 | 475 | ## Session Info 476 | 477 | ```{r tidy = FALSE} 478 | sessionInfo() 479 | ``` 480 | -------------------------------------------------------------------------------- /www/graphicalAbstract.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BorchLab/Ibex/543ce724c3f01bebedb0db705873788509b5e8a7/www/graphicalAbstract.png -------------------------------------------------------------------------------- /www/ibex_hex.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BorchLab/Ibex/543ce724c3f01bebedb0db705873788509b5e8a7/www/ibex_hex.png -------------------------------------------------------------------------------- /www/training_info.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BorchLab/Ibex/543ce724c3f01bebedb0db705873788509b5e8a7/www/training_info.png -------------------------------------------------------------------------------- /www/wnn_output.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BorchLab/Ibex/543ce724c3f01bebedb0db705873788509b5e8a7/www/wnn_output.png --------------------------------------------------------------------------------