├── .Rbuildignore
├── .gitattributes
├── .github
├── .gitignore
└── workflows
│ ├── R-CMD-check.yaml
│ ├── pr-commands.yaml
│ └── test-coverage.yaml
├── .gitignore
├── .trigger_build
├── DESCRIPTION
├── LICENSE
├── LICENSE.md
├── NAMESPACE
├── NEWS
├── R
├── CoNGAfy.R
├── Ibex.matrix.R
├── basiliskEnv.R
├── combineExpandedBCR.R
├── global.R
├── ibex_example.R
├── ibex_vdj.R
├── quietBCRgenes.R
├── runIbex.R
└── utils.R
├── README.md
├── _pkgdown.yml
├── data
├── ibex_example.rda
└── ibex_vdj.rda
├── inst
├── WORDLIST
├── extdata
│ ├── ibex-basilisk.yaml
│ └── metadata.csv
└── scripts
│ ├── make-data.R
│ └── make-metadata.R
├── man
├── CoNGAfy.Rd
├── Ibex.matrix.Rd
├── combineExpandedBCR.Rd
├── filter.cells.Rd
├── getHumanIgPseudoGenes.Rd
├── ibex_example.Rd
├── ibex_vdj.Rd
├── quietBCRgenes.Rd
└── runIbex.Rd
├── tests
├── spelling.R
├── testthat.R
└── testthat
│ ├── basiliskEnv.R
│ ├── helper-testingFunctions.R
│ ├── test-CoNGAfy.R
│ ├── test-Ibex.matrix.R
│ ├── test-combineExpandedBCR.R
│ ├── test-quietBCRgenes.R
│ ├── test-runIbex.R
│ └── testdata
│ ├── CoNGAfy
│ ├── CoNGAfy_counts.rds
│ ├── CoNGAfy_mean_counts.rds
│ ├── CoNGAfy_mean_meta.data.rds
│ └── CoNGAfy_meta.data.rds
│ ├── quietBCRgenes
│ └── quietBCRgenes_feature.vector.rds
│ └── runIbex
│ ├── ibex.matrix_Heavy_VAE_AF.rds
│ ├── ibex.matrix_Light_AE_OHE.rds
│ ├── runIbex_Heavy_VAE_AF_reduction.rds
│ ├── runIbex_Heavy_VAE_OHE_reduction.rds
│ ├── runIbex_Heavy_geometric_reduction.rds
│ └── runIbex_Light_AE_KF_reduction.rds
├── vignettes
└── Ibex.Rmd
└── www
├── graphicalAbstract.png
├── ibex_hex.png
├── training_info.png
└── wnn_output.png
/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^_pkgdown\.yml$
2 | ^docs$
3 | ^pkgdown$
4 | ^www$
5 | ^‘__autograph_generated_file3mbjv7rr\.py’$
6 | ^‘__pycache__’$
7 | ^__autograph_generated_fileuzetj_u2\.py$
8 | ^__pycache__$
9 | ^__autograph_generated_filezt06eymn\.py$
10 | ^\.github$
11 | ^LICENSE\.md$
12 |
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 |
--------------------------------------------------------------------------------
/.github/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 |
--------------------------------------------------------------------------------
/.github/workflows/R-CMD-check.yaml:
--------------------------------------------------------------------------------
1 | # .github/workflows/R-CMD-check.yaml
2 | # Simplified for basilisk-based Ibex (no global Keras install required)
3 | on:
4 | push:
5 | branches: [main, master]
6 | pull_request:
7 | branches: [main, master]
8 |
9 | name: R-CMD-check
10 |
11 | jobs:
12 | R-CMD-check:
13 | runs-on: ${{ matrix.config.os }}
14 | name: ${{ matrix.config.os }} (${{ matrix.config.r }})
15 |
16 | strategy:
17 | fail-fast: false
18 | matrix:
19 | config:
20 | - {os: ubuntu-latest, r: 'release'}
21 |
22 | env:
23 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
24 | R_KEEP_PKG_SOURCE: yes
25 |
26 | steps:
27 | - uses: actions/checkout@v3
28 |
29 | - uses: r-lib/actions/setup-pandoc@v2
30 |
31 | - uses: r-lib/actions/setup-r@v2
32 | with:
33 | r-version: ${{ matrix.config.r }}
34 | use-public-rspm: true
35 |
36 | # Optional: cache basilisk's miniconda to speed up repeated runs
37 | - uses: actions/cache@v4
38 | with:
39 | path: ${{ runner.tool_cache }}/basilisk
40 | key: ${{ runner.os }}-basilisk-${{ hashFiles('DESCRIPTION') }}
41 |
42 | - name: Install R dependencies
43 | uses: r-lib/actions/setup-r-dependencies@v2
44 | with:
45 | extra-packages: |
46 | any::rcmdcheck
47 | bioc::basilisk
48 | bioc::basilisk.utils
49 | needs: cran, bioc
50 |
51 | - uses: r-lib/actions/check-r-package@v2
52 | with:
53 | upload-snapshots: true
54 |
--------------------------------------------------------------------------------
/.github/workflows/pr-commands.yaml:
--------------------------------------------------------------------------------
1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
3 | on:
4 | issue_comment:
5 | types: [created]
6 |
7 | name: Commands
8 |
9 | jobs:
10 | document:
11 | if: ${{ github.event.issue.pull_request && (github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'OWNER') && startsWith(github.event.comment.body, '/document') }}
12 | name: document
13 | runs-on: ubuntu-latest
14 | env:
15 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
16 | steps:
17 | - uses: actions/checkout@v3
18 |
19 | - uses: r-lib/actions/pr-fetch@v2
20 | with:
21 | repo-token: ${{ secrets.GITHUB_TOKEN }}
22 |
23 | - uses: r-lib/actions/setup-r@v2
24 | with:
25 | use-public-rspm: true
26 |
27 | - uses: r-lib/actions/setup-r-dependencies@v2
28 | with:
29 | extra-packages: any::roxygen2
30 | needs: pr-document
31 |
32 | - name: Document
33 | run: roxygen2::roxygenise()
34 | shell: Rscript {0}
35 |
36 | - name: commit
37 | run: |
38 | git config --local user.name "$GITHUB_ACTOR"
39 | git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com"
40 | git add man/\* NAMESPACE
41 | git commit -m 'Document'
42 |
43 | - uses: r-lib/actions/pr-push@v2
44 | with:
45 | repo-token: ${{ secrets.GITHUB_TOKEN }}
46 |
47 | style:
48 | if: ${{ github.event.issue.pull_request && (github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'OWNER') && startsWith(github.event.comment.body, '/style') }}
49 | name: style
50 | runs-on: ubuntu-latest
51 | env:
52 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
53 | steps:
54 | - uses: actions/checkout@v3
55 |
56 | - uses: r-lib/actions/pr-fetch@v2
57 | with:
58 | repo-token: ${{ secrets.GITHUB_TOKEN }}
59 |
60 | - uses: r-lib/actions/setup-r@v2
61 |
62 | - name: Install dependencies
63 | run: install.packages("styler")
64 | shell: Rscript {0}
65 |
66 | - name: Style
67 | run: styler::style_pkg()
68 | shell: Rscript {0}
69 |
70 | - name: commit
71 | run: |
72 | git config --local user.name "$GITHUB_ACTOR"
73 | git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com"
74 | git add \*.R
75 | git commit -m 'Style'
76 |
77 | - uses: r-lib/actions/pr-push@v2
78 | with:
79 | repo-token: ${{ secrets.GITHUB_TOKEN }}
80 |
--------------------------------------------------------------------------------
/.github/workflows/test-coverage.yaml:
--------------------------------------------------------------------------------
1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
3 | on:
4 | push:
5 | branches: [main, master]
6 | pull_request:
7 | branches: [main, master]
8 |
9 | name: test-coverage
10 |
11 | permissions: read-all
12 |
13 | jobs:
14 | test-coverage:
15 | runs-on: ubuntu-latest
16 | env:
17 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
18 |
19 | steps:
20 | - uses: actions/checkout@v4
21 |
22 | - uses: r-lib/actions/setup-r@v2
23 | with:
24 | use-public-rspm: true
25 |
26 | - uses: r-lib/actions/setup-r-dependencies@v2
27 | with:
28 | extra-packages: any::covr, any::xml2, rcmdcheck, bioc::basilisk, bioc::basilisk.utils
29 | needs: cran, bioc, coverage
30 |
31 | - name: Test coverage
32 | run: |
33 | cov <- covr::package_coverage(
34 | quiet = FALSE,
35 | clean = FALSE,
36 | install_path = file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package")
37 | )
38 | covr::to_cobertura(cov)
39 | shell: Rscript {0}
40 |
41 | - uses: codecov/codecov-action@v4
42 | with:
43 | fail_ci_if_error: ${{ github.event_name != 'pull_request' && true || false }}
44 | file: ./cobertura.xml
45 | plugin: noop
46 | disable_search: true
47 | token: ${{ secrets.CODECOV_TOKEN }}
48 |
49 | - name: Show testthat output
50 | if: always()
51 | run: |
52 | ## --------------------------------------------------------------------
53 | find '${{ runner.temp }}/package' -name 'testthat.Rout*' -exec cat '{}' \; || true
54 | shell: bash
55 |
56 | - name: Upload test results
57 | if: failure()
58 | uses: actions/upload-artifact@v4
59 | with:
60 | name: coverage-test-failures
61 | path: ${{ runner.temp }}/package
62 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | docs
2 | .DS_Store
3 |
--------------------------------------------------------------------------------
/.trigger_build:
--------------------------------------------------------------------------------
1 | Trigger build
2 |
--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
1 | Package: Ibex
2 | Title: Methods for BCR single-cell embedding
3 | Version: 0.99.18
4 | Authors@R: c(
5 | person(given = "Nick", family = "Borcherding", role = c("aut", "cre"), email = "ncborch@gmail.com"))
6 | Description: Implementation of the Ibex algorithm for single-cell embedding based on BCR sequences. The package includes a standalone function to encode BCR sequence information by amino acid properties or sequence order using tensorflow-based autoencoder. In addition, the package interacts with SingleCellExperiment or Seurat data objects.
7 | License: MIT + file LICENSE
8 | Encoding: UTF-8
9 | LazyData: true
10 | LazyDataCompression: xz
11 | RoxygenNote: 7.3.2
12 | biocViews: Software, ImmunoOncology, SingleCell, Classification, Annotation, Sequencing, ExperimentHubSoftware
13 | Depends:
14 | R (>= 4.0)
15 | Imports:
16 | basilisk,
17 | immApex,
18 | methods,
19 | Matrix,
20 | reticulate,
21 | rlang,
22 | SeuratObject,
23 | scRepertoire,
24 | SingleCellExperiment,
25 | stats,
26 | SummarizedExperiment,
27 | tensorflow,
28 | tools
29 | Suggests:
30 | BiocStyle,
31 | bluster,
32 | dplyr,
33 | ggplot2,
34 | kableExtra,
35 | knitr,
36 | markdown,
37 | mumosa,
38 | patchwork,
39 | rmarkdown,
40 | scater,
41 | spelling,
42 | testthat (>= 3.0.0),
43 | utils,
44 | viridis
45 | SystemRequirements: Python (via basilisk)
46 | VignetteBuilder: knitr
47 | Language: en-US
48 | URL: https://github.com/BorchLab/Ibex/
49 | BugReports: https://github.com/BorchLab/Ibex/issues
50 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | YEAR: 2025
2 | COPYRIGHT HOLDER: Ibex authors
3 |
--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | # MIT License
2 |
3 | Copyright (c) 2025 Ibex authors
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
1 | # Generated by roxygen2: do not edit by hand
2 |
3 | S3method(quietBCRgenes,Seurat)
4 | S3method(quietBCRgenes,default)
5 | export(CoNGAfy)
6 | export(Ibex.matrix)
7 | export(combineExpandedBCR)
8 | export(getHumanIgPseudoGenes)
9 | export(quietBCRgenes)
10 | export(runIbex)
11 | import(basilisk)
12 | importFrom(Matrix,colSums)
13 | importFrom(Matrix,sparse.model.matrix)
14 | importFrom(SeuratObject,CreateAssayObject)
15 | importFrom(SeuratObject,CreateDimReducObject)
16 | importFrom(SeuratObject,CreateSeuratObject)
17 | importFrom(SeuratObject,DefaultAssay)
18 | importFrom(SeuratObject,GetAssayData)
19 | importFrom(SeuratObject,VariableFeatures)
20 | importFrom(SingleCellExperiment,"altExp<-")
21 | importFrom(SingleCellExperiment,"reducedDim<-")
22 | importFrom(SingleCellExperiment,SingleCellExperiment)
23 | importFrom(SingleCellExperiment,altExp)
24 | importFrom(SingleCellExperiment,colData)
25 | importFrom(SingleCellExperiment,reducedDim)
26 | importFrom(SummarizedExperiment,"assay<-")
27 | importFrom(SummarizedExperiment,"colData<-")
28 | importFrom(SummarizedExperiment,SummarizedExperiment)
29 | importFrom(SummarizedExperiment,assay)
30 | importFrom(SummarizedExperiment,assayNames)
31 | importFrom(SummarizedExperiment,colData)
32 | importFrom(basilisk,basiliskRun)
33 | importFrom(immApex,geometricEncoder)
34 | importFrom(immApex,getIR)
35 | importFrom(immApex,onehotEncoder)
36 | importFrom(immApex,propertyEncoder)
37 | importFrom(methods,is)
38 | importFrom(rlang,"%||%")
39 | importFrom(scRepertoire,combineBCR)
40 | importFrom(stats,as.formula)
41 | importFrom(stats,complete.cases)
42 | importFrom(stats,dist)
43 | importFrom(tensorflow,tf)
44 | importFrom(tools,R_user_dir)
45 | importFrom(utils,download.file)
46 | importFrom(utils,read.csv)
47 |
--------------------------------------------------------------------------------
/NEWS:
--------------------------------------------------------------------------------
1 | CHANGES IN VERSION 0.99.18
2 | ------------------------
3 | * assigning current environment to basilisk environment
4 |
5 | CHANGES IN VERSION 0.99.17
6 | ------------------------
7 | * basilisk is not intuitive
8 |
9 | CHANGES IN VERSION 0.99.16
10 | ------------------------
11 | * flanking library(ibex) with basilisk creation
12 |
13 | CHANGES IN VERSION 0.99.15
14 | ------------------------
15 | * defining system env for basilisk in vignette
16 |
17 | CHANGES IN VERSION 0.99.14
18 | ------------------------
19 | * Adding proper wrapper to ```ibex.matrix()```
20 |
21 | CHANGES IN VERSION 0.99.12
22 | ------------------------
23 | * Basilisk conversion
24 |
25 | CHANGES IN VERSION 0.99.11
26 | ------------------------
27 | * Switched keras python check to chunk-specific for vignette
28 |
29 | CHANGES IN VERSION 0.99.10
30 | ------------------------
31 | * Added information to example data
32 |
33 | CHANGES IN VERSION 0.99.9
34 | ------------------------
35 | * Examples now check if python is installed and running
36 |
37 | CHANGES IN VERSION 0.99.8
38 | ------------------------
39 | * Updated example data to 2k HEL BEAM-Ab from 10x
40 | * Converted ibex_example into SCE object for compliance
41 | * Large revision of vignette to fit new data/format
42 | * Added species argument to runIbex
43 | * Updated CoNGA handling of assay for Seurat and Single-Cell Objects.
44 |
45 | CHANGES IN VERSION 0.99.7
46 | ------------------------
47 | * Integration of Ibex with immApex
48 | * Updated Seurat object to v5
49 | * Updated support for SCE format for ```runIbex()```
50 | * Update ```CoNGAfy()``` to function with all versions of Seurat
51 | * Updated ```quietBCRgenes()``` to use VariableFeatures() call for SeuratV5 and backward compatibility.
52 | * Add ```getHumanIgPseudoGenes()``` to return a list of human Immunoglobulin Pseudo genes that are kept by ```quietBCRgenes()```
53 |
54 | ## New Models
55 | * Added New Light and Heavy Chain Models
56 | * Encoding methods now accepted: "OHE", "atchleyFactors", "crucianiProperties", "kideraFactors", "MSWHIM","tScales", "zScales"
57 | * Sequence input:
58 | - Human Heavy: 10000000
59 | - Human Light: 5000000
60 | - Human Heavy-Expanded: 5000000
61 | - Human Light-Expanded: 2500000
62 | - Mouse Heavy: 5000000
63 | - Mouse Heavy-Expanded: 5000000
64 | * Trained convolutional and variational autoencoders for Heavy/Light chains
65 | - Architecture: 512-256-128-256-512
66 | - Parameters:
67 | Batch Size = 128
68 | Latent Dimensions = 128
69 | Epochs = 100
70 | Loss = Mean Squared Error (CNN) & KL Divergence (VAE)
71 | Activation = relu
72 | Learning rate = 1e-6
73 | - Optimizers: Adam
74 | - Early stopping was set to patients of 10 for minimal validation loss and restoration of best weights
75 | - CNN autoencoders have batch normalization layers between the dense layers.
76 |
77 | CHANGES IN VERSION 0.99.6
78 | ------------------------
79 | * Implementing GitHub action workflows
80 | * Adding testthat framework
81 | * Deprecating clonalCommunity
82 |
83 | CHANGES IN VERSION 0.99.5
84 | ------------------------
85 | * Added geometric encoding using the BLOSUM62 matrix
86 | * Trained classical and variational autoencoders for light/heavy chains with 1.5 million cdr sequences
87 | - Architecture: 256-128-30-128-256
88 | - Parameters:
89 | Batch Size = 64
90 | Latent Dimensions = 30
91 | Epochs = 100
92 | Loss = Mean Squared Error
93 | - Optimizers: Adam
94 | - Early stopping was set to patients of 10 for minimal validation loss and restoration of best weights
95 | - learn rate varied by models
96 | - classical auto encoders have batch normalization layers between the dense layers.
97 |
98 | CHANGES IN VERSION 0.99.4
99 | ------------------------
100 | * Added chain.checker() function to allow for uncapitlized chain calls
101 |
102 | CHANGES IN VERSION 0.99.3
103 | ------------------------
104 | * Updated models for manuscript revision
105 | - Architecture: 256-128-30-128-256
106 | - Parameters:
107 | Batch Size = 64
108 | Learning Rate = 0.001
109 | Latent Dimensions = 30
110 | Epochs = 50
111 | Loss = Mean Squared Error
112 | - Optimizers: RAdam (for amino acid properties) and RMSprop (for OHE)
113 | - Early stopping was set to patients of 10 for minimal validation loss and restoration of best weights
114 |
115 |
116 | CHANGES IN VERSION 0.99.2
117 | ------------------------
118 | * Updated models to include radam optimization, early stop for min 10 epochs, and all trained on 800,000 unique cdr3s
119 | * quietBCRgenes() now does not remove human Ig pseudogenes
120 |
121 |
122 | CHANGES IN VERSION 0.99.1
123 | ------------------------
124 | * Added detection of chain length to function call
125 | * Added support for direct output of combineBCR()
126 | * Modified quietBCR() to include constant regions and J-chains
127 |
128 |
129 | CHANGES IN VERSION 0.99.0
130 | ------------------------
131 | * Initial commit
--------------------------------------------------------------------------------
/R/CoNGAfy.R:
--------------------------------------------------------------------------------
1 | #' Reduce a Single-Cell Object to Representative Cells
2 | #'
3 | #' This function generates a single-cell object with a reduced representation
4 | #' of RNA expression by clone. The approach is inspired by the method introduced
5 | #' in \href{https://pubmed.ncbi.nlm.nih.gov/34426704/}{CoNGA}. Users can
6 | #' generate either a mean representation of features by clone or identify a
7 | #' representative cell using count-based minimal Euclidean distance.
8 | #' Please read and cite the original work by the authors of CoNGA.
9 | #'
10 | #' @examples
11 | #' ibex.clones <- CoNGAfy(ibex_example,
12 | #' method = "dist")
13 | #'
14 | #' ibex.clones <- CoNGAfy(ibex_example,
15 | #' method = "mean")
16 | #'
17 | #' @param input.data A single-cell dataset in Seurat or SingleCellExperiment format.
18 | #' @param method Character. Specifies the method to reduce the dataset:
19 | #' \itemize{
20 | #' \item "mean" - Computes the mean expression of selected features across cells in each clonotype.
21 | #' \item "dist" - Uses PCA reduction to identify the cell with the minimal Euclidean distance within each clonotype group.
22 | #' }
23 | #' @param features Character vector. Selected genes for the reduction. If \code{NULL} (default), all genes are used.
24 | #' @param assay Character. The name of the assay or assays to include in the output. Defaults to the active assay.
25 | #' @param meta.carry Character vector. Metadata variables to carry over from the input single-cell object to the output.
26 | #'
27 | #' @return A reduced single-cell object where each clonotype is represented by a single cell.
28 | #'
29 | #' @export
30 | #' @importFrom SeuratObject CreateSeuratObject CreateAssayObject
31 | #' @importFrom SingleCellExperiment SingleCellExperiment altExp<-
32 | #' @importFrom SummarizedExperiment assay assay<- SummarizedExperiment colData<-
33 | #' colData
34 |
35 | CoNGAfy <- function(input.data,
36 | method = "dist",
37 | features = NULL,
38 | assay = "RNA",
39 | meta.carry = c("CTaa", "CTgene")) {
40 | if(inherits(input.data, "Seurat")) {
41 | cells.chains <- rownames(input.data[[]][!is.na(input.data[["CTaa"]]),])
42 | input.data <- subset(input.data, cells = cells.chains)
43 | } else if (inherits(input.data, "SingleCellExperiment")) {
44 | cells.chains <- rownames(as.data.frame(colData(input.data)[!is.na(input.data$CTaa),]))
45 | input.data <- input.data[,which(colnames(input.data) %in% cells.chains)]
46 | } else {
47 | stop("The input.data is not a Seurat or SingleCellExperiment object.")
48 | }
49 | conga <- NULL
50 | if(method == "mean") {
51 | for (x in seq_along(assay)) {
52 | conga[[x]] <- .CoNGA.mean(input.data, features, assay[x])
53 |
54 | }
55 | } else if(method == "dist") {
56 | for (x in seq_along(assay)) {
57 | conga[[x]] <- .CoNGA.dist(input.data, features, assay[x])
58 |
59 | }
60 |
61 | }
62 | names(conga) <- assay
63 | if (inherits(x=input.data, what ="Seurat")) {
64 | sc.output <- CreateSeuratObject(conga[[1]], assay = names(conga)[1], project = "Ibex")
65 | if(length(conga) > 1) {
66 | for(y in 2:length(conga)) {
67 | sc.output[[names(conga)[y]]] <- CreateAssayObject(conga[[y]])
68 | }
69 | }
70 | CTge <- unique(input.data[[]][,c(meta.carry)])
71 | } else if (inherits(x=input.data, what ="SingleCellExperiment")) {
72 | sc.output <- SingleCellExperiment(assay = conga[[1]])
73 | if(length(conga) > 1) {
74 | for(y in 2:length(conga)) {
75 | altExp(sc.output, "BEAM") <- SummarizedExperiment(
76 | assays = list(
77 | counts = as.matrix(conga[[y]])
78 | ),
79 | colData = colData(sc.output)
80 | )
81 | }
82 | }
83 | sc.output$CTaa <- rownames(sc.output@colData)
84 | CTge <- data.frame(unique(input.data@colData[,c(meta.carry)]))
85 | }
86 | CTge <- CTge[!duplicated(CTge$CTaa),]
87 | clones <- unique(CTge$CTaa)
88 | rownames(CTge) <- clones
89 | colnames(CTge) <- c("CTaa", "CTgene")
90 | sc.output <- add.meta.data(sc.output, CTge, colnames(CTge))
91 | return(sc.output)
92 | }
93 |
94 | # Pulls Assay Data
95 | #' @importFrom SummarizedExperiment assayNames assay
96 | #' @importFrom SingleCellExperiment altExp
97 | grabAssay <- function(input.data, assay) {
98 | if (inherits(x=input.data, what ="Seurat")) {
99 | data.use <- input.data[[assay]]$counts
100 | } else if (inherits(x=input.data, what ="SingleCellExperiment")){
101 | if(assay %in% assayNames(input.data) | assay == "RNA") {
102 | if(assay == "RNA") assay <- "counts"
103 | data.use <- assay(input.data, name = assay)
104 | } else {
105 | data.use <- assay(altExp(input.data), name = assay)
106 | }
107 | }
108 | return(data.use)
109 | }
110 |
111 | # Calculate best representation individual clones
112 | #' @importFrom SummarizedExperiment assay
113 | #' @importFrom SeuratObject GetAssayData
114 | #' @importFrom methods is
115 | #' @importFrom stats dist
116 | #' @keywords internal
117 | #' @noRd
118 | .CoNGA.dist <- function(input.data,
119 | features = NULL,
120 | assay = "RNA") {
121 | # Ensure 'assay' is character (vector or single string)
122 | if (!is.character(assay)) {
123 | stop("'assay' must be a character vector or a single character string.")
124 | }
125 |
126 | # Grab clone meta-information; here we assume 'grabMeta' returns a DataFrame or data.frame
127 | meta <- grabMeta(input.data)
128 | # Create a small table of CTaa assignments
129 | ct_col <- "CTaa"
130 | if (!ct_col %in% colnames(meta)) {
131 | stop("The metadata must contain a column named 'CTaa'.")
132 | }
133 | meta_ct <- data.frame(CTaa = meta[, ct_col], row.names = rownames(meta))
134 |
135 | # Identify number of cells per clone
136 | clone_tab <- table(meta_ct$CTaa)
137 | multi_clone_names <- names(clone_tab[clone_tab > 1]) # clones with >1 cell
138 | single_clone_names <- names(clone_tab[clone_tab == 1])# clones with exactly 1 cell
139 |
140 | # Function to process a single assay
141 | process_single_assay <- function(assay_name) {
142 | # Pull the correct data matrix from input.data
143 | data_mat <- grabAssay(input.data, assay_name)
144 |
145 | # Subset the features if requested
146 | features_to_use <- features %||% rownames(data_mat)
147 | features_to_use <- intersect(features_to_use, rownames(data_mat))
148 |
149 | # If no features remain, warn
150 | if (length(features_to_use) == 0) {
151 | warning("No overlapping features found in assay '", assay_name, "'. Returning empty matrix.")
152 | return(matrix(nrow = 0, ncol = 0))
153 | }
154 |
155 | # Subset 'data_mat' to only those features
156 | data_mat_use <- data_mat[features_to_use, , drop = FALSE]
157 |
158 | # We now find the "best representation" for each multi-cell clone by minimal sum of distances
159 | # Start with barcodes that are single-cell clones (they trivially represent themselves)
160 | best_barcodes <- rownames(meta_ct)[meta_ct$CTaa %in% single_clone_names]
161 |
162 | # For each multi-cell clone, compute distances and pick the cell with smallest total distance
163 | for (clone_name in multi_clone_names) {
164 | clone_cells <- rownames(meta_ct)[meta_ct$CTaa == clone_name]
165 | # Distances are among rows of data_mat_use
166 | dist_mat <- as.matrix(dist(t(as.matrix(data_mat_use[, clone_cells, drop = FALSE]))))
167 |
168 | # rowSums(dist_mat) is sum of distances from each cell to all others in the clone
169 | chosen_idx <- which.min(rowSums(dist_mat))
170 | chosen_cell <- clone_cells[chosen_idx]
171 | best_barcodes <- c(best_barcodes, chosen_cell)
172 | }
173 |
174 | # Finally, subset original matrix to these 'best_barcodes'
175 | data_return <- data_mat_use[, best_barcodes, drop = FALSE]
176 | # Rename columns to the clone name for clarity
177 | colnames(data_return) <- meta_ct$CTaa[match(best_barcodes, rownames(meta_ct))]
178 |
179 | return(data_return)
180 | }
181 |
182 | # If user passed multiple assays, return a list
183 | if (length(assay) > 1) {
184 | results_list <- lapply(assay, process_single_assay)
185 | names(results_list) <- assay
186 | return(results_list)
187 | } else {
188 | # If user passed a single assay, return a single matrix
189 | return(process_single_assay(assay))
190 | }
191 | }
192 |
193 | # Calculate mean across individual clones
194 | #' @importFrom rlang %||%
195 | #' @importFrom Matrix sparse.model.matrix colSums
196 | #' @importFrom SummarizedExperiment assay
197 | #' @importFrom SeuratObject GetAssayData
198 | #' @importFrom stats as.formula
199 | #' @keywords internal
200 | #' @noRd
201 | .CoNGA.mean <- function(input.data,
202 | features = NULL,
203 | assay = "RNA") {
204 | # Ensure 'assay' is character (vector or single string)
205 | if (!is.character(assay)) {
206 | stop("'assay' must be a character vector or a single character string.")
207 | }
208 |
209 | # Grab clone meta-information
210 | meta <- grabMeta(input.data)
211 | ct_col <- "CTaa"
212 | if (!ct_col %in% colnames(meta)) {
213 | stop("The metadata must contain a column named 'CTaa'.")
214 | }
215 | meta_ct <- data.frame(CTaa = meta[, ct_col], row.names = rownames(meta))
216 |
217 | # Remove rows with NA in CTaa
218 | meta_ct <- meta_ct[which(rowSums(is.na(meta_ct)) == 0), , drop = FALSE]
219 | # Convert CTaa to a factor
220 | meta_ct$CTaa <- as.factor(meta_ct$CTaa)
221 |
222 | # Construct a model matrix with no intercept
223 | # ~0 + CTaa means we get one column per level of CTaa
224 | category_matrix <- sparse.model.matrix(
225 | as.formula('~0+CTaa'),
226 | data = meta_ct
227 | )
228 |
229 | # Precompute column sums and scale columns to sum to 1
230 | col_sums <- Matrix::colSums(category_matrix)
231 | # remove columns with zero count if any
232 | keep_cols <- which(col_sums > 0)
233 | category_matrix <- category_matrix[, keep_cols, drop = FALSE]
234 | col_sums <- col_sums[keep_cols]
235 |
236 | # scale columns so each column sums to 1
237 | category_matrix <- sweep(category_matrix, MARGIN = 2, STATS = col_sums, FUN = "/")
238 |
239 | # Function to process a single assay
240 | process_single_assay <- function(assay_name) {
241 | data_mat <- grabAssay(input.data, assay_name)
242 |
243 | # Subset features if requested
244 | features_to_use <- features %||% rownames(data_mat)
245 | features_to_use <- intersect(features_to_use, rownames(data_mat))
246 |
247 | if (length(features_to_use) == 0) {
248 | warning("No overlapping features found in assay '", assay_name, "'. Returning empty matrix.")
249 | return(matrix(nrow = 0, ncol = 0))
250 | }
251 |
252 | data_mat_use <- data_mat[features_to_use, , drop = FALSE]
253 |
254 | # Multiply by the category matrix to get mean expression per clone
255 | # For each feature, we do feature_values %*% category_matrix
256 | # (since category_matrix has columns that are "per-clone" indicators).
257 | data_return <- data_mat_use %*% category_matrix
258 |
259 | # Rename columns to reflect the clone name(s)
260 | colnames(data_return) <- gsub("^CTaa", "", colnames(category_matrix))
261 |
262 | return(data_return)
263 | }
264 |
265 | # If multiple assays, return a list
266 | if (length(assay) > 1) {
267 | results_list <- lapply(assay, process_single_assay)
268 | names(results_list) <- assay
269 | return(results_list)
270 | } else {
271 | # Single assay: return just a single matrix
272 | return(process_single_assay(assay))
273 | }
274 | }
275 |
--------------------------------------------------------------------------------
/R/Ibex.matrix.R:
--------------------------------------------------------------------------------
1 | #' Ibex Matrix Interface
2 | #'
3 | #' This function runs the Ibex algorithm to generate latent vectors from
4 | #' input data. The output can be returned as a matrix, with options to choose
5 | #' between deep learning autoencoders or geometric transformations based on
6 | #' the BLOSUM62 matrix.
7 | #'
8 | #' @examples
9 | #' # Using the encoder method with a variational autoencoder
10 | #' ibex_values <- Ibex.matrix(ibex_example,
11 | #' chain = "Heavy",
12 | #' method = "encoder",
13 | #' encoder.model = "VAE",
14 | #' encoder.input = "atchleyFactors")
15 | #'
16 | #' # Using the geometric method with a specified angle
17 | #' ibex_values <- Ibex.matrix(ibex_example,
18 | #' chain = "Heavy",
19 | #' method = "geometric",
20 | #' geometric.theta = pi)
21 | #'
22 | #' @param input.data Input data, which can be:
23 | #' \itemize{
24 | #' \item A Single Cell Object in Seurat or SingleCellExperiment format
25 | #' \item The output of \code{combineBCR()} from the \code{scRepertoire} package
26 | #' }
27 | #' @param chain Character. Specifies which chain to analyze:
28 | #' \itemize{
29 | #' \item "Heavy" for the heavy chain
30 | #' \item "Light" for the light chain
31 | #' }
32 | #' @param method Character. The algorithm to use for generating latent vectors:
33 | #' \itemize{
34 | #' \item "encoder" - Uses deep learning autoencoders
35 | #' \item "geometric" - Uses geometric transformations based on the
36 | #' BLOSUM62 matrix
37 | #' }
38 | #' @param encoder.model Character. The type of autoencoder model to use:
39 | #' \itemize{
40 | #' \item "CNN" - CDR3 Convolutional Neural Network-based autoencoder
41 | #' \item "VAE" - CDR3 Variational Autoencoder
42 | #' \item "CNN.EXP" - CDR1/2/3 CNN
43 | #' \item "VAE.EXP" - CDR1/2/3 VAE
44 | #' }
45 | #' @param encoder.input Character. Specifies the input features for the
46 | #' encoder model. Options include:
47 | #' \itemize{
48 | #' \item Amino Acid Properties: "atchleyFactors", "crucianiProperties",
49 | #' "kideraFactors", "MSWHIM","tScales", "zScales"
50 | #' \item "OHE" for One Hot Encoding
51 | #' }
52 | #' @param geometric.theta Numeric. Angle (in radians) for the geometric
53 | #' transformation. Only used when \code{method = "geometric"}.
54 | #' @param species Character. Default is "Human" or "Mouse".
55 | #' @param verbose Logical. Whether to print progress messages. Default is TRUE.
56 | #' @return A matrix of latent vectors generated by the specified method.
57 | #'
58 | #' @export
59 | #' @importFrom basilisk basiliskRun
60 | #' @importFrom SeuratObject CreateDimReducObject
61 | #' @importFrom immApex propertyEncoder onehotEncoder geometricEncoder getIR
62 | #' @importFrom stats complete.cases
63 | #' @importFrom tensorflow tf
64 | #'
65 | #' @seealso
66 | #' \code{\link[immApex]{propertyEncoder}},
67 | #' \code{\link[immApex]{geometricEncoder}}
68 | Ibex.matrix <- function(input.data,
69 | chain = c("Heavy", "Light"),
70 | method = c("encoder", "geometric"),
71 | encoder.model = c("CNN", "VAE", "CNN.EXP", "VAE.EXP"),
72 | encoder.input = c("atchleyFactors", "crucianiProperties",
73 | "kideraFactors", "MSWHIM", "tScales", "OHE"),
74 | geometric.theta = pi/3,
75 | species = "Human",
76 | verbose = TRUE) {
77 |
78 | # Match arguments for better validation
79 | chain <- match.arg(chain)
80 | method <- match.arg(method)
81 |
82 | if (method == "encoder") {
83 | encoder.model <- match.arg(encoder.model)
84 | encoder.input <- match.arg(encoder.input)
85 | expanded.sequences <- grepl(".EXP", encoder.model)
86 | } else {
87 | expanded.sequences <- FALSE
88 | }
89 |
90 | # Define loci based on chain selection
91 | loci <- if (chain == "Heavy") "IGH" else c("IGK", "IGL")
92 |
93 | #Getting Sequences
94 | BCR <- getIR(input.data, chain, sequence.type = "aa")[[1]]
95 | BCR <- BCR[complete.cases(BCR[,2]), ]
96 |
97 | # Determine dictionary for sequence encoding
98 | if (expanded.sequences) {
99 | if (all(grepl("-", BCR[,2]))) {
100 | stop("Expanded sequences are not properly formated, please use combineExpandedBCR().")
101 | }
102 | BCR[,2] <- gsub("-", "_", BCR[,2])
103 | dictionary <- c(amino.acids, "_")
104 | } else {
105 | dictionary <- amino.acids
106 | }
107 |
108 | # Filter by gene locus
109 | BCR <- BCR[grepl(paste0(loci, collapse = "|"), BCR[, "v"]), ]
110 |
111 | # Ensure sequences meet length criteria
112 | checkLength(x = BCR[,2], expanded = expanded.sequences)
113 | length.to.use <- if (expanded.sequences) 90 else 45
114 |
115 | if (method == "encoder") {
116 | # Getting Model Path
117 | model.path <- aa.model.loader(species = species,
118 | chain = chain,
119 | encoder.input = encoder.input,
120 | encoder.model = encoder.model)
121 |
122 | if (verbose) print("Encoding sequences and calculating latent dimensions...")
123 |
124 | # Run ENCODING and PREDICTION inside the basilisk environment
125 | reduction <- basiliskRun(
126 | env = IbexEnv,
127 | fun = function(sequences, enc.input, max.len, seq.dict, mpath, verbose.fun) {
128 |
129 | # 1. Encode sequences inside the correct environment
130 | if(enc.input == "OHE") {
131 | encoded.values <- immApex::onehotEncoder(sequences,
132 | max.length = max.len,
133 | convert.to.matrix = TRUE,
134 | sequence.dictionary = seq.dict,
135 | padding.symbol = ".")
136 | } else {
137 | encoded.values <- immApex::propertyEncoder(sequences,
138 | max.length = max.len,
139 | method.to.use = enc.input,
140 | convert.to.matrix = TRUE)
141 | }
142 |
143 | # 2. Load model and predict
144 | keras <- reticulate::import("keras", delay_load = FALSE)
145 | model <- keras$models$load_model(mpath)
146 | pred <- model$predict(encoded.values)
147 |
148 | return(as.array(pred))
149 | },
150 | # Pass arguments to the function inside basiliskRun
151 | sequences = BCR[,2],
152 | enc.input = encoder.input,
153 | max.len = length.to.use,
154 | seq.dict = dictionary,
155 | mpath = model.path
156 | )
157 |
158 | } else if (method == "geometric") {
159 | if (verbose) print("Performing geometric transformation...")
160 | BCR[,2] <- gsub("-", "", BCR[,2])
161 | reduction <- suppressMessages(geometricEncoder(BCR[,2], theta = geometric.theta))
162 | }
163 |
164 | reduction <- as.data.frame(reduction)
165 | barcodes <- BCR[,1]
166 | rownames(reduction) <- barcodes
167 | colnames(reduction) <- paste0("Ibex_", seq_len(ncol(reduction)))
168 | return(reduction)
169 | }
--------------------------------------------------------------------------------
/R/basiliskEnv.R:
--------------------------------------------------------------------------------
1 | #' @import basilisk
2 | IbexEnv <- BasiliskEnvironment(
3 | envname = "IbexEnv",
4 | pkgname = "Ibex",
5 | packages = c(
6 | "python=3.9",
7 | "keras=3.6.*",
8 | "tensorflow=2.18.*",
9 | "h5py=3.13",
10 | "numpy=1.26"
11 | )
12 | )
--------------------------------------------------------------------------------
/R/combineExpandedBCR.R:
--------------------------------------------------------------------------------
1 | #' combineBCR for CDR1/2/3 sequences
2 | #'
3 | #' This function enhances BCR processing by incorporating additional
4 | #' sequence information from CDR1 and CDR2 regions before applying the BCR
5 | #' combination logic. The function depends on
6 | #' \code{\link[scRepertoire]{combineBCR}} from the scRepertoire package.
7 | #'
8 | #' @examples
9 | #' combined.BCR <- combineExpandedBCR(list(ibex_vdj),
10 | #' samples = "Sample1",
11 | #' filterNonproductive = TRUE)
12 | #'
13 | #' @param input.data List of filtered contig annotations.
14 | #' @param samples Character vector. Labels of samples (required).
15 | #' @param ID Character vector. Additional sample labeling (optional).
16 | #' @param call.related.clones Logical. Whether to call related clones based on
17 | #' nucleotide sequence and V gene. Default is `TRUE`.
18 | #' @param threshold Numeric. Normalized edit distance for clone clustering.
19 | #' Default is `0.85`.
20 | #' @param removeNA Logical. Whether to remove any chain without values. Default
21 | #' is `FALSE`.
22 | #' @param removeMulti Logical. Whether to remove barcodes with more than two
23 | #' chains. Default is `FALSE`.
24 | #' @param filterMulti Logical. Whether to select the highest-expressing light
25 | #' and heavy chains. Default is `TRUE`.
26 | #' @param filterNonproductive Logical. Whether to remove nonproductive chains.
27 | #' Default is `TRUE`.
28 | #'
29 | #'@return A list of consolidated BCR clones with expanded CDR sequences.
30 | #' @seealso
31 | #' \code{\link[scRepertoire]{combineBCR}}
32 | #'
33 | #' @importFrom scRepertoire combineBCR
34 | #' @export
35 | combineExpandedBCR <- function(input.data,
36 | samples = NULL,
37 | ID = NULL,
38 | call.related.clones = TRUE,
39 | threshold = 0.85,
40 | removeNA = FALSE,
41 | removeMulti = FALSE,
42 | filterMulti = TRUE,
43 | filterNonproductive = TRUE) {
44 |
45 | # Ensure input is a list of data frames
46 | if (!is.list(input.data) || !all(sapply(input.data, is.data.frame))) {
47 | stop("Input data must be a list of data frames.")
48 | }
49 |
50 | # Modify each data frame in the list
51 | modified_data <- lapply(input.data, function(df) {
52 | if (!all(c("cdr1", "cdr2", "cdr3") %in% colnames(df))) {
53 | stop("Each data frame must contain 'cdr1', 'cdr2', and 'cdr3' columns.")
54 | }
55 |
56 | # Create concatenated CDR sequence
57 | df$cdr3 <- paste(df$cdr1, df$cdr2, df$cdr3, sep = "-")
58 | df$cdr3_nt<- paste(df$cdr1_nt, df$cdr2_nt, df$cdr3_nt, sep = "-")
59 |
60 | return(df)
61 | })
62 |
63 | # Call combineBCR() on the modified data
64 | combined_result <- combineBCR(input.data = modified_data,
65 | samples = samples,
66 | ID = ID,
67 | call.related.clones = call.related.clones,
68 | threshold = threshold,
69 | removeNA = removeNA,
70 | removeMulti = removeMulti,
71 | filterMulti = filterMulti,
72 | filterNonproductive = filterNonproductive)
73 |
74 | return(combined_result)
75 | }
76 |
--------------------------------------------------------------------------------
/R/global.R:
--------------------------------------------------------------------------------
1 | .onLoad <- function (libname, pkgname)
2 | {
3 | utils::globalVariables ("AF.col")
4 | utils::globalVariables ("KF.col")
5 | utils::globalVariables ("array_reshape")
6 | utils::globalVariables ("is")
7 | utils::globalVariables ("reducedDim<-")
8 | utils::globalVariables ("na.omit")
9 | utils::globalVariables ("median")
10 | utils::globalVariables ("slot")
11 | utils::globalVariables ("get.adjacency")
12 | utils::globalVariables ("nn")
13 | utils::globalVariables ("data")
14 | utils::globalVariables ("ibex.data")
15 | utils::globalVariables ("colData<-")
16 | utils::globalVariables ("TR")
17 | utils::globalVariables ("graph.edgelist")
18 | utils::globalVariables ("f")
19 |
20 | invisible ()
21 |
22 | }
23 |
--------------------------------------------------------------------------------
/R/ibex_example.R:
--------------------------------------------------------------------------------
1 | #' A SingleCellExperiment object with 200 randomly-sampled
2 | #' B cells with BCR sequences from the 10x Genomics
3 | #' 2k_BEAM-Ab_Mouse_HEL_5pv2 dataset.
4 | #'
5 | #' This object includes normalized gene expression values, metadata annotations,
6 | #' and B cell clonotype information derived from 10x V(D)J sequencing. It is intended
7 | #' as a small example dataset for testing and demonstration purposes.
8 | #'
9 | #' @format A \code{SingleCellExperiment} object with 32,285 genes (rows) and 200 cells (columns).
10 | #' \describe{
11 | #' \item{assays}{List of matrices containing expression values: \code{counts} (raw counts) and \code{logcounts} (log-transformed).}
12 | #' \item{rowData}{Empty in this example (no gene-level annotations).}
13 | #' \item{colData}{A \code{DataFrame} with 14 columns of cell metadata, including:}
14 | #' \itemize{
15 | #' \item orig.identOriginal sample identity.
16 | #' \item nCount_RNA Total number of counts per cell.
17 | #' \item nFeature_RNA Number of detected genes per cell.
18 | #' \item cloneSize Size of each clone.
19 | #' \item ident Cluster assignment.
20 | #' }
21 | #' \item{reducedDims}{Contains dimensionality reductions: \code{PCA}, \code{pca}, and \code{apca}.}
22 | #' \item{altExp}{One alternative experiment named \code{BEAM} containing additional expression data.}
23 | #' }
24 | #' @name ibex_example
25 | #' @docType data
26 | NULL
27 |
--------------------------------------------------------------------------------
/R/ibex_vdj.R:
--------------------------------------------------------------------------------
1 | #' Full filtered_annotated_contig.csv from the 10x
2 | #' 2k_BEAM-Ab_Mouse_HEL_5pv2
3 | #'
4 | #' This dataset contains single-cell V(D)J sequencing annotations
5 | #' from the 10x Genomics BEAM-Ab Mouse dataset. It includes V(D)J
6 | #' gene calls, CDR regions, productivity information, and clonotype
7 | #' assignments for each contig.
8 | #'
9 | #' @format A data frame with 6 rows and 35 columns:
10 | #' \describe{
11 | #' \item{barcode}{Character. Unique cell barcode.}
12 | #' \item{is_cell}{Logical. Whether the barcode is identified as a cell.}
13 | #' \item{contig_id}{Character. Unique identifier for each contig.}
14 | #' \item{high_confidence}{Logical. Whether the contig is high confidence.}
15 | #' \item{length}{Integer. Length of the contig.}
16 | #' \item{chain}{Character. Chain type (e.g., IGH, IGK).}
17 | #' \item{v_gene}{Character. V gene annotation.}
18 | #' \item{d_gene}{Character. D gene annotation.}
19 | #' \item{j_gene}{Character. J gene annotation.}
20 | #' \item{c_gene}{Character. C gene annotation.}
21 | #' \item{full_length}{Logical. Whether the contig is full-length.}
22 | #' \item{productive}{Logical. Whether the contig is productive.}
23 | #' \item{fwr1}{Character. Amino acid sequence for Framework Region 1.}
24 | #' \item{fwr1_nt}{Character. Nucleotide sequence for FWR1.}
25 | #' \item{cdr1}{Character. Amino acid sequence for CDR1.}
26 | #' \item{cdr1_nt}{Character. Nucleotide sequence for CDR1.}
27 | #' \item{fwr2}{Character. Amino acid sequence for FWR2.}
28 | #' \item{fwr2_nt}{Character. Nucleotide sequence for FWR2.}
29 | #' \item{cdr2}{Character. Amino acid sequence for CDR2.}
30 | #' \item{cdr2_nt}{Character. Nucleotide sequence for CDR2.}
31 | #' \item{fwr3}{Character. Amino acid sequence for FWR3.}
32 | #' \item{fwr3_nt}{Character. Nucleotide sequence for FWR3.}
33 | #' \item{cdr3}{Character. Amino acid sequence for CDR3.}
34 | #' \item{cdr3_nt}{Character. Nucleotide sequence for CDR3.}
35 | #' \item{fwr4}{Character. Amino acid sequence for FWR4.}
36 | #' \item{fwr4_nt}{Character. Nucleotide sequence for FWR4.}
37 | #' \item{reads}{Integer. Number of reads supporting the contig.}
38 | #' \item{umis}{Integer. Number of UMIs supporting the contig.}
39 | #' \item{raw_clonotype_id}{Character. Clonotype ID from 10x output.}
40 | #' \item{raw_consensus_id}{Character. Consensus ID from 10x output.}
41 | #' \item{exact_subclonotype_id}{Integer. Exact subclonotype grouping.}
42 | #' }
43 | #' @name ibex_vdj
44 | #' @docType data
45 | NULL
46 |
--------------------------------------------------------------------------------
/R/quietBCRgenes.R:
--------------------------------------------------------------------------------
1 | #' Remove BCR Genes from Variable Gene Results
2 | #'
3 | #' This function removes B-cell receptor (BCR) genes from the variable features of a
4 | #' single-cell dataset. Most single-cell workflows prioritize highly expressed and
5 | #' highly variable genes for principal component analysis (PCA) and dimensional
6 | #' reduction. By excluding BCR genes, this function ensures that the variable gene
7 | #' set focuses on biologically relevant features rather than highly variable BCR genes.
8 | #'
9 | #' @examples
10 | #' # Remove BCR genes from the variable features of a vector
11 | #' variable.genes <- c("IGHV1-69", "IGHV3-23", "IGHV4-34", "IGHV5-51", "IGHV6-1",
12 | #' "IGKV1-5", "IGKV3-20", "IGLV2-14", "IGLV3-21", "IGLV6-57",
13 | #' "TP53", "MYC", "BCL2", "CD19", "CD79A", "CD79B", "PAX5")
14 | #' variable.genes <- quietBCRgenes(variable.genes)
15 | #'
16 | #' @param sc A single-cell dataset, which can be:
17 | #' \itemize{
18 | #' \item A Seurat object
19 | #' \item A vector of variable genes generated by workflows such as Bioconductor's \code{scran}
20 | #' }
21 | #' @param assay Character. Specifies the Seurat assay slot to use for removing BCR genes.
22 | #' If \code{NULL}, the function defaults to the active assay in the Seurat object.
23 | #'
24 | #' @return The input Seurat object or vector with BCR genes removed from the variable features.
25 | #'
26 | #' @importFrom SeuratObject DefaultAssay
27 | #' @export
28 | quietBCRgenes <- function(sc, assay = NULL) {
29 | UseMethod("quietBCRgenes")
30 | }
31 |
32 | #' @export
33 | #' @importFrom SeuratObject DefaultAssay VariableFeatures
34 | quietBCRgenes.Seurat <- function(sc, assay = NULL) {
35 | if (is.null(assay)) {
36 | assay <- DefaultAssay(sc)
37 | }
38 | SeuratObject::VariableFeatures(sc, assay = assay) <-
39 | quietBCRgenes.default(SeuratObject::VariableFeatures(sc, assay = assay))
40 | sc
41 | }
42 |
43 | #' @export
44 | quietBCRgenes.default <- function(sc, assay = NULL) {
45 | unwanted_genes <- "^IG[HLK][VDJCAGM]"
46 | unwanted_genes <- grep(pattern = unwanted_genes, x = sc, value = TRUE)
47 | unwanted_genes <- c(unwanted_genes, "JCHAIN")
48 | unwanted_genes <- unwanted_genes[unwanted_genes %!in% getHumanIgPseudoGenes()]
49 | sc[sc %!in% unwanted_genes]
50 | }
51 |
52 | #' Get Human Immunoglobulin pseudogenes
53 | #'
54 | #' This function returns a character vector of human immunoglobulin
55 | #' pseudogenes. These are also the genes that are removed from the
56 | #' variable gene list in the \code{quietBCRgenes} function.
57 | #'
58 | #' @return Character vector of human immunoglobulin pseudogenes.
59 | #' @export
60 | #'
61 | getHumanIgPseudoGenes <- function() {
62 | unique(c(
63 | "IGHJ1P", "IGHJ2P", "IGHJ3P", "IGLC4", "IGLC5", "IGHEP1", "IGHEP2",
64 | "IGHV1-12","IGHV1-14", "IGHV1-17", "IGHV1-67", "IGHV1-68",
65 | "IGHV2-10", "IGHV3-6", "IGHV3-19", "IGHV3-22", "IGHV3-25",
66 | "IGHV3-29", "IGHV3-32", "IGHV3-36", "IGHV3-37", "IGHV3-41",
67 | "IGHV3-42", "IGHV3-47", "IGHV3-50", "IGHV3-52", "IGHV3-54",
68 | "IGHV3-57", "IGHV3-60", "IGHV3-62", "IGHV3-63", "IGHV3-65",
69 | "IGHV3-71", "IGHV3-75", "IGHV3-76", "IGHV3-79", "IGHV4-55",
70 | "IGHV4-80", "IGHV5-78", "IGHV7-27", "IGHV7-40", "IGHV7-56",
71 | "IGHVIII-44", "IGHVIII-82", "IGKV1-22", "IGKV1-32", "IGKV1-35",
72 | "IGKV1D-22", "IGKV1D-27", "IGKV1D-32", "IGKV1D-35", "IGKVOR-2",
73 | "IGKVOR-3", "IGKVOR-4", "IGKV2-4", "IGKV2-10", "IGKV2-14", "IGKV2-18",
74 | "IGKV2-19", "IGKV2-23", "IGKV2-26", "IGKV2-36", "IGKV2-38",
75 | "IGKV2D-10", "IGKV2D-14", "IGKV2D-18", "IGKV2D-19", "IGKV2D-23",
76 | "IGKV2D-36", "IGKV2D-38", "IGKV3-25", "IGKV3-31", "IGKV3-34",
77 | "IGKV7-3", "IGLCOR22-1", "IGLCOR22-2", "IGLJCOR18", "IGLV1-41",
78 | "IGLV1-62", "IGLV2-5", "IGLV2-28", "IGLV2-34", "IGLV3-2",
79 | "IGLV3-4", "IGLV3-6", "IGLV3-7", "IGLV3-13", "IGLV3-15",
80 | "IGLV3-17", "IGLV3-24", "IGLV3-26", "IGLV3-29", "IGLV3-30",
81 | "IGLV3-31", "IGLV7-35", "IGLV10-67", "IGLVI-20", "IGLVI-38",
82 | "IGLVI-42", "IGLVI-56", "IGLVI-63", "IGLVI-68", "IGLVI-70",
83 | "IGLVIV-53", "IGLVIV-59", "IGLVIV-64", "IGLVIV-65", "IGLVV-58",
84 | "IGLVV-66", "IGHV1OR15-2", "IGHV1OR15-3", "IGHV1OR15-4", "IGHV1OR15-6",
85 | "IGHV1OR16-1", "IGHV1OR16-2", "IGHV1OR16-3", "IGHV1OR16-4", "IGHV3-30-2",
86 | "IGHV3-33-2", "IGHV3-69-1", "IGHV3OR15-7", "IGHV3OR16-6", "IGHV3OR16-7",
87 | "IGHV3OR16-11", "IGHV3OR16-14", "IGHV3OR16-15", "IGHV3OR16-16", "IGHV7-34-1",
88 | "IGHVII-1-1", "IGHVII-15-1", "IGHVII-20-1", "IGHVII-22-1", "IGHVII-26-2",
89 | "IGHVII-28-1", "IGHVII-30-1", "IGHVII-30-21", "IGHVII-31-1", "IGHVII-33-1",
90 | "IGHVII-40-1", "IGHVII-43-1", "IGHVII-44-2", "IGHVII-46-1", "IGHVII-49-1",
91 | "IGHVII-51-2", "IGHVII-53-1", "IGHVII-60-1", "IGHVII-62-1", "IGHVII-65-1",
92 | "IGHVII-67-1", "IGHVII-74-1", "IGHVII-78-1", "IGHVIII-2-1", "IGHVIII-5-1",
93 | "IGHVIII-5-2", "IGHVIII-11-1", "IGHVIII-13-1", "IGHVIII-16-1", "IGHVIII-22-2",
94 | "IGHVIII-25-1", "IGHVIII-26-1", "IGHVIII-38-1", "IGHVIII-47-1", "IGHVIII-67-2",
95 | "IGHVIII-67-3", "IGHVIII-67-4", "IGHVIII-76-1", "IGHVIV-44-1", "IGKV1OR1-1",
96 | "IGKV1OR2-1", "IGKV1OR2-2", "IGKV1OR2-3", "IGKV1OR2-6", "IGKV1OR2-9",
97 | "IGKV1OR2-11", "IGKV1OR2-118", "IGKV1OR9-1", "IGKV1OR9-2", "IGKV1OR10-1",
98 | "IGKV1OR15-118", "IGKV1OR22-1", "IGKV1OR22-5", "IGKV1ORY-1", "IGKV2OR2-1",
99 | "IGKV2OR2-2", "IGKV2OR2-4", "IGKV2OR2-7", "IGKV2OR2-7D", "IGKV2OR2-8",
100 | "IGKV2OR2-10", "IGKV2OR22-3", "IGKV2OR22-4", "IGKV3OR2-5", "IGKV3OR22-2",
101 | "IGKV8OR8-1", "IGLVIV-66-1", "IGLVIVOR22-1", "IGLVIVOR22-2", "IGLVVI-22-1",
102 | "IGLVVI-25-1", "IGLVVII-41-1"
103 | ))
104 | }
105 |
--------------------------------------------------------------------------------
/R/runIbex.R:
--------------------------------------------------------------------------------
1 | #' Ibex Single-Cell Calculation
2 | #'
3 | #' This function applies the Ibex algorithm to single-cell data, integrating
4 | #' seamlessly with Seurat or SingleCellExperiment pipelines. The algorithm
5 | #' generates latent dimensions using deep learning or geometric transformations,
6 | #' storing the results in the dimensional reduction slot. \code{runIbex} will
7 | #' automatically subset the single-cell object based on amino acid sequences
8 | #' present for the given chain selection.
9 | #'
10 | #' @examples
11 | #' # Using the encoder method with a variational autoencoder
12 | #' ibex_example <- runIbex(ibex_example,
13 | #' chain = "Heavy",
14 | #' method = "encoder",
15 | #' encoder.model = "VAE",
16 | #' encoder.input = "atchleyFactors")
17 | #'
18 | #' # Using the geometric method with a specified angle
19 | #' ibex_example <- runIbex(ibex_example,
20 | #' chain = "Heavy",
21 | #' method = "geometric",
22 | #' geometric.theta = pi)
23 | #'
24 | #' @param sc.data A single-cell dataset, which can be:
25 | #' \itemize{
26 | #' \item A Seurat object
27 | #' \item A SingleCellExperiment object
28 | #' }
29 | #' @param chain Character. Specifies the chain to analyze:
30 | #' \itemize{
31 | #' \item "Heavy" for the heavy chain
32 | #' \item "Light" for the light chain
33 | #' }
34 | #' @param method Character. Algorithm to use for generating latent dimensions:
35 | #' \itemize{
36 | #' \item "encoder" - Uses deep learning autoencoders
37 | #' \item "geometric" - Uses geometric transformations based on the BLOSUM62 matrix
38 | #' }
39 | #' @param encoder.model Character. The type of autoencoder model to use:
40 | #' \itemize{
41 | #' \item "CNN" - CDR3 Convolutional Neural Network-based autoencoder
42 | #' \item "VAE" - CDR3 Variational Autoencoder
43 | #' \item "CNN.EXP" - CDR1/2/3 CNN
44 | #' \item "VAE.EXP" - CDR1/2/3 VAE
45 | #' }
46 | #' @param encoder.input Character. Input features for the encoder model:
47 | #' \itemize{
48 | #' \item Amino Acid Properties: "atchleyFactors", "crucianiProperties",
49 | #' "kideraFactors", "MSWHIM", "tScales"
50 | #' \item "OHE" - One Hot Encoding
51 | #' }
52 | #' @param geometric.theta Numeric. Angle (in radians) for geometric transformation.
53 | #' Used only when \code{method = "geometric"}.
54 | #' @param reduction.name Character. The name to assign to the dimensional reduction.
55 | #' This is useful for running Ibex with multiple parameter settings and saving results
56 | #' under different names.
57 | #' @param species Character. Default is "Human" or "Mouse".
58 | #' @param verbose Logical. Whether to print progress messages. Default is TRUE.
59 | #'
60 | #' @return An updated Seurat or SingleCellExperiment object with Ibex dimensions added
61 | #' to the dimensional reduction slot.
62 | #' @export
63 | runIbex <- function(sc.data,
64 | chain = "Heavy",
65 | method = "encoder",
66 | encoder.model = "VAE",
67 | encoder.input = "atchleyFactors",
68 | geometric.theta = pi,
69 | reduction.name = "Ibex",
70 | species = "Human",
71 | verbose = TRUE) {
72 | checkSingleObject(sc.data)
73 | sc.data <- filter.cells(sc.data, chain)
74 | reduction <- Ibex.matrix(input.data = sc.data,
75 | chain = chain,
76 | method = method,
77 | encoder.model = encoder.model,
78 | encoder.input = encoder.input,
79 | geometric.theta = geometric.theta,
80 | species = species,
81 | verbose = verbose)
82 | BCR <- getIR(sc.data, chain, sequence.type = "aa")[[1]]
83 | sc.data <- adding.DR(sc.data, reduction, reduction.name)
84 | return(sc.data)
85 | }
86 |
87 | #' Filter Single-Cell Data Based on CDR3 Sequences
88 | #'
89 | #' This function subsets a Seurat or SingleCellExperiment object,
90 | #' removing cells where the `CTaa` column is missing or contains unwanted patterns.
91 | #'
92 | #' @param sc.obj A Seurat or SingleCellExperiment object.
93 | #' @param chain Character. Specifies the chain type ("Heavy" or "Light").
94 | #'
95 | #' @return A filtered Seurat or SingleCellExperiment object.
96 | filter.cells <- function(sc.obj,
97 | chain) {
98 | meta <- grabMeta(sc.obj)
99 | if (!"CTaa" %in% colnames(meta)) {
100 | stop("Amino acid sequences are not added to the single-cell object correctly.")
101 | }
102 | pattern.NA <- ifelse(chain == "Heavy", "NA_", "_NA")
103 | pattern.none <- ifelse(chain == "Heavy", "None_", "_None")
104 |
105 | cells.index <- which(!is.na(meta[,"CTaa"]) &
106 | !grepl(paste0(pattern.NA, "|", pattern.none), meta[,"CTaa"]))
107 |
108 | if (inherits(x=sc.obj, what ="Seurat")) {
109 | cell.chains <- rownames(meta)[cells.index]
110 | sc.obj <- subset(sc.obj, cells = cell.chains)
111 | } else if (inherits(x=sc.obj, what ="SingleCellExperiment")){
112 | sc.obj <- sc.obj[,cells.index]
113 | }
114 | return(sc.obj)
115 | }
116 |
--------------------------------------------------------------------------------
/R/utils.R:
--------------------------------------------------------------------------------
1 | "%!in%" <- Negate("%in%")
2 |
3 | amino.acids <- c("A", "R", "N", "D", "C", "Q", "E", "G", "H", "I", "L", "K", "M", "F", "P", "S", "T", "W", "Y", "V")
4 |
5 | # Add to meta data some of the metrics calculated
6 | #' @importFrom rlang %||%
7 | #' @importFrom SingleCellExperiment colData
8 | add.meta.data <- function(sc, meta, header) {
9 | if (inherits(x=sc, what ="Seurat")) {
10 | col.name <- names(meta) %||% colnames(meta)
11 | sc[[col.name]] <- meta
12 | } else {
13 | rownames <- rownames(colData(sc))
14 | colData(sc) <- cbind(colData(sc),
15 | meta[rownames,])[, union(colnames(colData(sc)), colnames(meta))]
16 | rownames(colData(sc)) <- rownames
17 | }
18 | return(sc)
19 | }
20 |
21 | # This is to grab the metadata from a Seurat or SCE object
22 | #' @importFrom SingleCellExperiment colData
23 | grabMeta <- function(sc) {
24 | if (inherits(x=sc, what ="Seurat")) {
25 | meta <- data.frame(sc[[]], slot(sc, "active.ident"))
26 | if ("cluster" %in% colnames(meta)) {
27 | colnames(meta)[length(meta)] <- "cluster.active.ident"
28 | } else {
29 | colnames(meta)[length(meta)] <- "cluster"
30 | }
31 | }
32 | else if (inherits(x=sc, what ="SingleCellExperiment")){
33 | meta <- data.frame(colData(sc))
34 | rownames(meta) <- sc@colData@rownames
35 | clu <- which(colnames(meta) == "ident")
36 | if ("cluster" %in% colnames(meta)) {
37 | colnames(meta)[clu] <- "cluster.active.idents"
38 | } else {
39 | colnames(meta)[clu] <- "cluster"
40 | }
41 | }
42 | return(meta)
43 | }
44 |
45 | # This is to check the single-cell expression object
46 | checkSingleObject <- function(sc) {
47 | if (!inherits(x=sc, what ="Seurat") &
48 | !inherits(x=sc, what ="SummarizedExperiment")){
49 | stop("Object indicated is not of class 'Seurat' or
50 | 'SummarizedExperiment', make sure you are using
51 | the correct data.") }
52 | }
53 |
54 | # This is to check that all the CDR3 sequences are < 45 residues or < 90 for CDR1/2/3
55 | checkLength <- function(x, expanded = NULL) {
56 | cutoff <- ifelse( expanded == FALSE || is.null(expanded), 45, 90)
57 | if(any(na.omit(nchar(x)) > cutoff)) {
58 | stop(paste0("Models have been trained on sequences
59 | less than ", cutoff, " amino acid residues. Please
60 | filter the larger sequences before running"))
61 | }
62 | }
63 | # Returns appropriate encoder model
64 | #' @importFrom utils download.file read.csv
65 | #' @importFrom tools R_user_dir
66 | #' @importFrom utils download.file read.csv
67 | #' @importFrom tools R_user_dir
68 | aa.model.loader <- function(species,
69 | chain,
70 | encoder.input,
71 | encoder.model) {
72 |
73 | ## 1. Expected filename
74 | file_name <- paste0(
75 | species, "_", chain, "_",
76 | encoder.model, "_", encoder.input,
77 | "_encoder.keras")
78 |
79 | ## 2. Sanity-check against metadata.csv
80 | meta <- read.csv(
81 | system.file("extdata", "metadata.csv", package = "Ibex"),
82 | stringsAsFactors = FALSE
83 | )
84 |
85 | if (!file_name %in% meta[[1]])
86 | stop("Model '", file_name, "' is not listed in metadata.csv.")
87 |
88 | ## 3. Cache directory
89 | cache_dir <- tools::R_user_dir("Ibex", which = "cache")
90 | if (!dir.exists(cache_dir))
91 | dir.create(cache_dir, recursive = TRUE, showWarnings = FALSE)
92 |
93 | local_path <- file.path(cache_dir, file_name)
94 |
95 | ## 4. Download if we have never seen this model before
96 | if (!file.exists(local_path)) {
97 | message("Downloading model '", file_name, " ...")
98 | base_url <- "https://zenodo.org/record/14919286/files"
99 | status <- utils::download.file(
100 | url = file.path(base_url, file_name),
101 | destfile = local_path,
102 | mode = "wb",
103 | quiet = TRUE
104 | )
105 | if (status != 0)
106 | stop("Download of model '", file_name,
107 | "' failed (status ", status, ").")
108 | }
109 |
110 | ## 5. Done return the path for use in basiliskRun()
111 | normalizePath(local_path, winslash = "/")
112 | }
113 |
114 |
115 |
116 | # Add the dimRed to single cell object
117 | #' @importFrom SeuratObject CreateDimReducObject
118 | #' @importFrom SingleCellExperiment reducedDim reducedDim<-
119 | adding.DR <- function(sc, reduction, reduction.name) {
120 | if (inherits(sc, "Seurat")) {
121 | DR <- suppressWarnings(CreateDimReducObject(
122 | embeddings = as.matrix(reduction),
123 | loadings = as.matrix(reduction),
124 | projected = as.matrix(reduction),
125 | stdev = rep(0, ncol(reduction)),
126 | key = reduction.name,
127 | jackstraw = NULL,
128 | misc = list()))
129 | sc[[reduction.name]] <- DR
130 | } else if (inherits(sc, "SingleCellExperiment")) {
131 | reducedDim(sc, reduction.name) <- reduction
132 | }
133 | return(sc)
134 | }
135 |
136 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | Ibex
2 | Using BCR sequences for graph embedding
3 |
4 | [](https://github.com/BorchLab/Ibex/actions/workflows/R-CMD-check.yaml)
5 | [](https://app.codecov.io/gh/BorchLab/Ibex?branch=master)
6 | [](https://www.borch.dev/uploads/screpertoire/articles/ibex)
7 |
8 |
9 |
10 | ## Introduction
11 | Single-cell sequencing is an integral tool in immunology and oncology, enabling researchers to measure gene expression and immune cell receptor profiling at the level of individual cells. We developed the [scRepertoire](https://github.com/BorchLab/scRepertoire) R package to facilitate the integration of immune receptor and gene expression data. However, leveraging clonal indices for more complex analyses—such as using clonality in cell embedding—remains challenging.
12 |
13 | **Ibex** addresses this need by using deep learning to vectorize BCR sequences based on amino acid properties or their underlying order. Ibex is the sister package to [Trex](https://github.com/BorchLab/Trex), which focuses on TCR sequence data.
14 |
15 | ---
16 |
17 | # System Requirements
18 | Ibex has been tested on R versions >= 4.0. For details on required R packages, refer to the package’s DESCRIPTION file. It is designed to work with single-cell objects containing BCR data generated using [scRepertoire](https://github.com/BorchLab/scRepertoire). Ibex has been tested on macOS and Linux.
19 |
20 |
21 | # Installation
22 |
23 | Ibex relies on the [immApex](https://github.com/BorchLab/immApex) API can be installed directly from GitHub:
24 |
25 | ```r
26 | devtools::install_github("BorchLab/immApex")
27 | ```
28 |
29 | You may also install immApex from Bioconductor:
30 |
31 | ```r
32 | if (!require("BiocManager", quietly = TRUE))
33 | install.packages("BiocManager")
34 |
35 | BiocManager::install("immApex")
36 | ```
37 |
38 | After immApex installation, you can install Ibex with:
39 |
40 | ```r
41 | devtools::install_github("BorchLab/Ibex")
42 | ```
43 |
44 | The main version of Ibex is submitted to Bioconductor (installation instructions will be updated after review). By default, Ibex will automatically pull deep learning models from a [Zenodo repository](https://zenodo.org/records/14919286) and cache them locally.
45 |
46 | Alternatively, to install **Ibex** and all the required models at once:
47 | ```r
48 | devtools::install_github("BorchLab/Ibex@fullstack")
49 | ```
50 |
51 | # Usage/Demos
52 |
53 | Ibex integrates smoothly into most popular R-based single-cell workflows, including **Seurat** and **Bioconductor/SingleCellExperiment.**
54 |
55 | ## Quick Start
56 |
57 | See the [vignette](https://www.borch.dev/uploads/screpertoire/articles/ibex) for a step-by-step tutorial.
58 |
59 |
60 |
61 | ## Autoencoded Matrix
62 |
63 | The Ibex algorithm allows users to select BCR-based metrics to return autoencoded values to be used in dimensional reduction. If single-cell objects are not filtered for B cells with BCR, `Ibex.matrix()` will still return values, however IBEX_1 will be based on the disparity of BCR-containing and BCR-non-containing cells based on the Ibex algorithm.
64 |
65 | ```r
66 | library(Ibex)
67 | my_ibex <- Ibex.matrix(singleObject)
68 | ```
69 |
70 | ## Seurat or Single-Cell Experiment
71 |
72 | You can run Ibex within your Seurat or Single-Cell Experiemt workflow. **Importantly** `runIbex()` will automatically filter single-cells that do not contain BCR information in the meta data of the single-cell object.
73 |
74 | ```r
75 | seuratObj_Bonly <- runIbex(seuratObj, #The single cell object
76 | chain = c("Heavy", "Light"), # "Heavy" or "Light"
77 | method = c("encoder", "geometric"), # Use deep learning "encoder" or "geometric" transformation
78 | encoder.model = c("CNN", "VAE", "CNN.EXP", "VAE.EXP"), # Types of Deep Learning Models
79 | encoder.input = c("atchleyFactors", "crucianiProperties",
80 | "kideraFactors", "MSWHIM", "tScales", "OHE"), # Method of Encoding
81 | geometric.theta = pi/3, # theta for Geometric Encoding
82 | species = "Human") # "Mouse" or "Human"
83 |
84 | seuratObj_Bonly <- runIbex(seuratObj, reduction.name = "Ibex")
85 | ```
86 |
87 | ## After Running Ibex
88 |
89 | Once the Ibex embeddings are part of your Seurat object, you can use these embeddings to generate a t-SNE or UMAP:
90 |
91 | ```r
92 | seuratObj <- RunTSNE(seuratObj, reduction = "Ibex", reduction.key = "Ibex_")
93 | seuratObj <- RunUMAP(seuratObj, reduction = "Ibex", reduction.key = "Ibex_")
94 | ```
95 |
96 | If using Seurat package, the Ibex embedding information and gene expression PCA can be used to find the [Weighted Nearest Neighbors](https://pubmed.ncbi.nlm.nih.gov/34062119/). Before applying the WNN approach, best practice would be to remove the BCR-related genes from the list of variable genes and rerunning the PCA analysis.
97 |
98 | ### Recalculate PCA without BCR genes with quietBCRgenes() function in Ibex.
99 | ```r
100 | seuratObj <- quietBCRgenes(seuratObj)
101 | seuratObj <- RunPCA(seuratObj)
102 | ```
103 |
104 | ### Running WNN approach
105 | ```r
106 | seuratObj <- FindMultiModalNeighbors(seuratObj,
107 | reduction.list = list("pca", "Ibex"),
108 | dims.list = list(1:30, 1:20),
109 | modality.weight.name = "RNA.weight")
110 |
111 | seuratObj <- RunUMAP(seuratObj,
112 | nn.name = "weighted.nn",
113 | reduction.name = "wnn.umap",
114 | reduction.key = "wnnUMAP_")
115 | ```
116 | ## Bug Reports/New Features
117 |
118 | #### If you run into any issues or bugs please submit a [GitHub issue](https://github.com/BorchLab/Ibex/issues) with details of the issue.
119 |
120 | - If possible please include a [reproducible example](https://reprex.tidyverse.org/).
121 | Alternatively, an example with the internal **ibex_example** would
122 | be extremely helpful.
123 |
124 | #### Any requests for new features or enhancements can also be submitted as [GitHub issues](https://github.com/BorchLab/Ibex/issues).
125 |
126 | #### [Pull Requests](https://github.com/BorchLab/Ibex/pulls) are welcome for bug fixes, new features, or enhancements.
127 |
128 | ## Citation
129 | More information on Ibex is available at our [Biorxiv preprint](https://www.biorxiv.org/content/10.1101/2022.11.09.515787v2).
130 |
--------------------------------------------------------------------------------
/_pkgdown.yml:
--------------------------------------------------------------------------------
1 | url: ~
2 | template:
3 | bootstrap: 5
4 |
5 |
--------------------------------------------------------------------------------
/data/ibex_example.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BorchLab/Ibex/543ce724c3f01bebedb0db705873788509b5e8a7/data/ibex_example.rda
--------------------------------------------------------------------------------
/data/ibex_vdj.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BorchLab/Ibex/543ce724c3f01bebedb0db705873788509b5e8a7/data/ibex_vdj.rda
--------------------------------------------------------------------------------
/inst/WORDLIST:
--------------------------------------------------------------------------------
1 | ADT
2 | Atchley
3 | Autoencoded
4 | Autoencoder
5 | Autoencoders
6 | BCR
7 | BLOSUM
8 | Bioconductor's
9 | Biorxiv
10 | CDR
11 | CMD
12 | CNNs
13 | CTaa
14 | Clonotype
15 | CoNGA
16 | Codecov
17 | Convolutional
18 | Cruciani
19 | Experiemt
20 | FWR
21 | Genomics’
22 | HEL
23 | IGH
24 | IGK
25 | Ig
26 | Kidera
27 | Lysozyme
28 | MSWHIM
29 | MultiUMAP
30 | OHE
31 | SNE
32 | Schattgen
33 | SingleCellExperiment
34 | TCR
35 | TRI
36 | Trex
37 | UMAP
38 | UMIs
39 | VAE
40 | VAEs
41 | Variational
42 | WNN
43 | Zenodo
44 | atchleyFactors
45 | autoencoded
46 | autoencoder
47 | autoencoders
48 | barcode
49 | barcodes
50 | clonality
51 | cloneSize
52 | clonotype
53 | clonotypes
54 | combineBCR
55 | contig
56 | crucianiProperties
57 | csv
58 | customizable
59 | embeddings
60 | gp
61 | hydrophobicity
62 | ident
63 | identOriginal
64 | immApex
65 | interpretability
66 | keras
67 | kideraFactors
68 | nCount
69 | nFeature
70 | physicochemical
71 | preprint
72 | pseudogenes
73 | pv
74 | quietBCRgenes
75 | runIbex
76 | scRepertoire
77 | splenocytes
78 | subclonotype
79 | tScales
80 | tensorflow
81 | vectorize
82 | zScales
83 | π
84 |
--------------------------------------------------------------------------------
/inst/extdata/ibex-basilisk.yaml:
--------------------------------------------------------------------------------
1 | name: ibex_env
2 | channels:
3 | - defaults
4 | - conda-forge
5 | dependencies:
6 | - python=3.9
7 | - tensorflow=2.11
8 | - keras=2.11
9 | - numpy
10 | - h5py
11 |
12 |
--------------------------------------------------------------------------------
/inst/extdata/metadata.csv:
--------------------------------------------------------------------------------
1 | "Title","Description","BiocVersion","Genome","SourceType","SourceUrl","SourceVersion","Species","TaxonomyId","Coordinate_1_based","DataProvider","Maintainer","RDataClass","DispatchClass","Location_Prefix","RDataPath","Tags"
2 | "Human_Heavy_CNN_atchleyFactors_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: CNN, Encoding Method: atchleyFactors","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Heavy_CNN_atchleyFactors_encoder.keras","BCR:scRNA-seq:Encoder:Model"
3 | "Human_Heavy_CNN_crucianiProperties_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: CNN, Encoding Method: crucianiProperties","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Heavy_CNN_crucianiProperties_encoder.keras","BCR:scRNA-seq:Encoder:Model"
4 | "Human_Heavy_CNN_kideraFactors_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: CNN, Encoding Method: kideraFactors","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Heavy_CNN_kideraFactors_encoder.keras","BCR:scRNA-seq:Encoder:Model"
5 | "Human_Heavy_CNN_MSWHIM_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: CNN, Encoding Method: MSWHIM","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Heavy_CNN_MSWHIM_encoder.keras","BCR:scRNA-seq:Encoder:Model"
6 | "Human_Heavy_CNN_OHE_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: CNN, Encoding Method: OHE","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Heavy_CNN_OHE_encoder.keras","BCR:scRNA-seq:Encoder:Model"
7 | "Human_Heavy_CNN_tScales_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: CNN, Encoding Method: tScales","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Heavy_CNN_tScales_encoder.keras","BCR:scRNA-seq:Encoder:Model"
8 | "Human_Heavy_CNN.EXP_atchleyFactors_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: CNN.EXP, Encoding Method: atchleyFactors","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Heavy_CNN.EXP_atchleyFactors_encoder.keras","BCR:scRNA-seq:Encoder:Model"
9 | "Human_Heavy_CNN.EXP_crucianiProperties_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: CNN.EXP, Encoding Method: crucianiProperties","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Heavy_CNN.EXP_crucianiProperties_encoder.keras","BCR:scRNA-seq:Encoder:Model"
10 | "Human_Heavy_CNN.EXP_kideraFactors_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: CNN.EXP, Encoding Method: kideraFactors","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Heavy_CNN.EXP_kideraFactors_encoder.keras","BCR:scRNA-seq:Encoder:Model"
11 | "Human_Heavy_CNN.EXP_MSWHIM_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: CNN.EXP, Encoding Method: MSWHIM","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Heavy_CNN.EXP_MSWHIM_encoder.keras","BCR:scRNA-seq:Encoder:Model"
12 | "Human_Heavy_CNN.EXP_OHE_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: CNN.EXP, Encoding Method: OHE","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Heavy_CNN.EXP_OHE_encoder.keras","BCR:scRNA-seq:Encoder:Model"
13 | "Human_Heavy_CNN.EXP_tScales_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: CNN.EXP, Encoding Method: tScales","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Heavy_CNN.EXP_tScales_encoder.keras","BCR:scRNA-seq:Encoder:Model"
14 | "Human_Heavy_VAE_atchleyFactors_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: VAE, Encoding Method: atchleyFactors","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Heavy_VAE_atchleyFactors_encoder.keras","BCR:scRNA-seq:Encoder:Model"
15 | "Human_Heavy_VAE_crucianiProperties_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: VAE, Encoding Method: crucianiProperties","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Heavy_VAE_crucianiProperties_encoder.keras","BCR:scRNA-seq:Encoder:Model"
16 | "Human_Heavy_VAE_kideraFactors_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: VAE, Encoding Method: kideraFactors","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Heavy_VAE_kideraFactors_encoder.keras","BCR:scRNA-seq:Encoder:Model"
17 | "Human_Heavy_VAE_MSWHIM_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: VAE, Encoding Method: MSWHIM","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Heavy_VAE_MSWHIM_encoder.keras","BCR:scRNA-seq:Encoder:Model"
18 | "Human_Heavy_VAE_OHE_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: VAE, Encoding Method: OHE","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Heavy_VAE_OHE_encoder.keras","BCR:scRNA-seq:Encoder:Model"
19 | "Human_Heavy_VAE_tScales_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: VAE, Encoding Method: tScales","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Heavy_VAE_tScales_encoder.keras","BCR:scRNA-seq:Encoder:Model"
20 | "Human_Heavy_VAE.EXP_atchleyFactors_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: VAE.EXP, Encoding Method: atchleyFactors","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Heavy_VAE.EXP_atchleyFactors_encoder.keras","BCR:scRNA-seq:Encoder:Model"
21 | "Human_Heavy_VAE.EXP_crucianiProperties_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: VAE.EXP, Encoding Method: crucianiProperties","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Heavy_VAE.EXP_crucianiProperties_encoder.keras","BCR:scRNA-seq:Encoder:Model"
22 | "Human_Heavy_VAE.EXP_kideraFactors_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: VAE.EXP, Encoding Method: kideraFactors","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Heavy_VAE.EXP_kideraFactors_encoder.keras","BCR:scRNA-seq:Encoder:Model"
23 | "Human_Heavy_VAE.EXP_MSWHIM_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: VAE.EXP, Encoding Method: MSWHIM","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Heavy_VAE.EXP_MSWHIM_encoder.keras","BCR:scRNA-seq:Encoder:Model"
24 | "Human_Heavy_VAE.EXP_OHE_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: VAE.EXP, Encoding Method: OHE","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Heavy_VAE.EXP_OHE_encoder.keras","BCR:scRNA-seq:Encoder:Model"
25 | "Human_Heavy_VAE.EXP_tScales_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: VAE.EXP, Encoding Method: tScales","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Heavy_VAE.EXP_tScales_encoder.keras","BCR:scRNA-seq:Encoder:Model"
26 | "Human_Light_CNN_atchleyFactors_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Light, Architecture: CNN, Encoding Method: atchleyFactors","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Light_CNN_atchleyFactors_encoder.keras","BCR:scRNA-seq:Encoder:Model"
27 | "Human_Light_CNN_crucianiProperties_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Light, Architecture: CNN, Encoding Method: crucianiProperties","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Light_CNN_crucianiProperties_encoder.keras","BCR:scRNA-seq:Encoder:Model"
28 | "Human_Light_CNN_kideraFactors_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Light, Architecture: CNN, Encoding Method: kideraFactors","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Light_CNN_kideraFactors_encoder.keras","BCR:scRNA-seq:Encoder:Model"
29 | "Human_Light_CNN_MSWHIM_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Light, Architecture: CNN, Encoding Method: MSWHIM","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Light_CNN_MSWHIM_encoder.keras","BCR:scRNA-seq:Encoder:Model"
30 | "Human_Light_CNN_OHE_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Light, Architecture: CNN, Encoding Method: OHE","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Light_CNN_OHE_encoder.keras","BCR:scRNA-seq:Encoder:Model"
31 | "Human_Light_CNN_tScales_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Light, Architecture: CNN, Encoding Method: tScales","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Light_CNN_tScales_encoder.keras","BCR:scRNA-seq:Encoder:Model"
32 | "Human_Light_CNN.EXP_atchleyFactors_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Light, Architecture: CNN.EXP, Encoding Method: atchleyFactors","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Light_CNN.EXP_atchleyFactors_encoder.keras","BCR:scRNA-seq:Encoder:Model"
33 | "Human_Light_CNN.EXP_crucianiProperties_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Light, Architecture: CNN.EXP, Encoding Method: crucianiProperties","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Light_CNN.EXP_crucianiProperties_encoder.keras","BCR:scRNA-seq:Encoder:Model"
34 | "Human_Light_CNN.EXP_kideraFactors_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Light, Architecture: CNN.EXP, Encoding Method: kideraFactors","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Light_CNN.EXP_kideraFactors_encoder.keras","BCR:scRNA-seq:Encoder:Model"
35 | "Human_Light_CNN.EXP_MSWHIM_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Light, Architecture: CNN.EXP, Encoding Method: MSWHIM","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Light_CNN.EXP_MSWHIM_encoder.keras","BCR:scRNA-seq:Encoder:Model"
36 | "Human_Light_CNN.EXP_OHE_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Light, Architecture: CNN.EXP, Encoding Method: OHE","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Light_CNN.EXP_OHE_encoder.keras","BCR:scRNA-seq:Encoder:Model"
37 | "Human_Light_CNN.EXP_tScales_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Light, Architecture: CNN.EXP, Encoding Method: tScales","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Light_CNN.EXP_tScales_encoder.keras","BCR:scRNA-seq:Encoder:Model"
38 | "Human_Light_VAE_atchleyFactors_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Light, Architecture: VAE, Encoding Method: atchleyFactors","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Light_VAE_atchleyFactors_encoder.keras","BCR:scRNA-seq:Encoder:Model"
39 | "Human_Light_VAE_crucianiProperties_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Light, Architecture: VAE, Encoding Method: crucianiProperties","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Light_VAE_crucianiProperties_encoder.keras","BCR:scRNA-seq:Encoder:Model"
40 | "Human_Light_VAE_kideraFactors_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Light, Architecture: VAE, Encoding Method: kideraFactors","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Light_VAE_kideraFactors_encoder.keras","BCR:scRNA-seq:Encoder:Model"
41 | "Human_Light_VAE_MSWHIM_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Light, Architecture: VAE, Encoding Method: MSWHIM","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Light_VAE_MSWHIM_encoder.keras","BCR:scRNA-seq:Encoder:Model"
42 | "Human_Light_VAE_OHE_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Light, Architecture: VAE, Encoding Method: OHE","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Light_VAE_OHE_encoder.keras","BCR:scRNA-seq:Encoder:Model"
43 | "Human_Light_VAE_tScales_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Light, Architecture: VAE, Encoding Method: tScales","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Light_VAE_tScales_encoder.keras","BCR:scRNA-seq:Encoder:Model"
44 | "Human_Light_VAE.EXP_atchleyFactors_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Light, Architecture: VAE.EXP, Encoding Method: atchleyFactors","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Light_VAE.EXP_atchleyFactors_encoder.keras","BCR:scRNA-seq:Encoder:Model"
45 | "Human_Light_VAE.EXP_crucianiProperties_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Light, Architecture: VAE.EXP, Encoding Method: crucianiProperties","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Light_VAE.EXP_crucianiProperties_encoder.keras","BCR:scRNA-seq:Encoder:Model"
46 | "Human_Light_VAE.EXP_kideraFactors_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Light, Architecture: VAE.EXP, Encoding Method: kideraFactors","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Light_VAE.EXP_kideraFactors_encoder.keras","BCR:scRNA-seq:Encoder:Model"
47 | "Human_Light_VAE.EXP_MSWHIM_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Light, Architecture: VAE.EXP, Encoding Method: MSWHIM","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Light_VAE.EXP_MSWHIM_encoder.keras","BCR:scRNA-seq:Encoder:Model"
48 | "Human_Light_VAE.EXP_OHE_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Light, Architecture: VAE.EXP, Encoding Method: OHE","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Light_VAE.EXP_OHE_encoder.keras","BCR:scRNA-seq:Encoder:Model"
49 | "Human_Light_VAE.EXP_tScales_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Light, Architecture: VAE.EXP, Encoding Method: tScales","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Homo sapiens","9606",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Human_Light_VAE.EXP_tScales_encoder.keras","BCR:scRNA-seq:Encoder:Model"
50 | "Mouse_Heavy_CNN_atchleyFactors_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: CNN, Encoding Method: atchleyFactors","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Mus musculus","10090",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Mouse_Heavy_CNN_atchleyFactors_encoder.keras","BCR:scRNA-seq:Encoder:Model"
51 | "Mouse_Heavy_CNN_crucianiProperties_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: CNN, Encoding Method: crucianiProperties","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Mus musculus","10090",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Mouse_Heavy_CNN_crucianiProperties_encoder.keras","BCR:scRNA-seq:Encoder:Model"
52 | "Mouse_Heavy_CNN_kideraFactors_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: CNN, Encoding Method: kideraFactors","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Mus musculus","10090",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Mouse_Heavy_CNN_kideraFactors_encoder.keras","BCR:scRNA-seq:Encoder:Model"
53 | "Mouse_Heavy_CNN_MSWHIM_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: CNN, Encoding Method: MSWHIM","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Mus musculus","10090",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Mouse_Heavy_CNN_MSWHIM_encoder.keras","BCR:scRNA-seq:Encoder:Model"
54 | "Mouse_Heavy_CNN_OHE_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: CNN, Encoding Method: OHE","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Mus musculus","10090",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Mouse_Heavy_CNN_OHE_encoder.keras","BCR:scRNA-seq:Encoder:Model"
55 | "Mouse_Heavy_CNN_tScales_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: CNN, Encoding Method: tScales","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Mus musculus","10090",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Mouse_Heavy_CNN_tScales_encoder.keras","BCR:scRNA-seq:Encoder:Model"
56 | "Mouse_Heavy_CNN.EXP_atchleyFactors_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: CNN.EXP, Encoding Method: atchleyFactors","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Mus musculus","10090",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Mouse_Heavy_CNN.EXP_atchleyFactors_encoder.keras","BCR:scRNA-seq:Encoder:Model"
57 | "Mouse_Heavy_CNN.EXP_crucianiProperties_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: CNN.EXP, Encoding Method: crucianiProperties","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Mus musculus","10090",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Mouse_Heavy_CNN.EXP_crucianiProperties_encoder.keras","BCR:scRNA-seq:Encoder:Model"
58 | "Mouse_Heavy_CNN.EXP_kideraFactors_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: CNN.EXP, Encoding Method: kideraFactors","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Mus musculus","10090",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Mouse_Heavy_CNN.EXP_kideraFactors_encoder.keras","BCR:scRNA-seq:Encoder:Model"
59 | "Mouse_Heavy_CNN.EXP_MSWHIM_autoencoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: CNN.EXP, Encoding Method: MSWHIM","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Mus musculus","10090",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Mouse_Heavy_CNN.EXP_MSWHIM_autoencoder.keras","BCR:scRNA-seq:Encoder:Model"
60 | "Mouse_Heavy_CNN.EXP_OHE_autoencoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: CNN.EXP, Encoding Method: OHE","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Mus musculus","10090",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Mouse_Heavy_CNN.EXP_OHE_autoencoder.keras","BCR:scRNA-seq:Encoder:Model"
61 | "Mouse_Heavy_CNN.EXP_tScales_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: CNN.EXP, Encoding Method: tScales","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Mus musculus","10090",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Mouse_Heavy_CNN.EXP_tScales_encoder.keras","BCR:scRNA-seq:Encoder:Model"
62 | "Mouse_Heavy_VAE_atchleyFactors_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: VAE, Encoding Method: atchleyFactors","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Mus musculus","10090",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Mouse_Heavy_VAE_atchleyFactors_encoder.keras","BCR:scRNA-seq:Encoder:Model"
63 | "Mouse_Heavy_VAE_crucianiProperties_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: VAE, Encoding Method: crucianiProperties","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Mus musculus","10090",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Mouse_Heavy_VAE_crucianiProperties_encoder.keras","BCR:scRNA-seq:Encoder:Model"
64 | "Mouse_Heavy_VAE_kideraFactors_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: VAE, Encoding Method: kideraFactors","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Mus musculus","10090",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Mouse_Heavy_VAE_kideraFactors_encoder.keras","BCR:scRNA-seq:Encoder:Model"
65 | "Mouse_Heavy_VAE_MSWHIM_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: VAE, Encoding Method: MSWHIM","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Mus musculus","10090",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Mouse_Heavy_VAE_MSWHIM_encoder.keras","BCR:scRNA-seq:Encoder:Model"
66 | "Mouse_Heavy_VAE_OHE_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: VAE, Encoding Method: OHE","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Mus musculus","10090",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Mouse_Heavy_VAE_OHE_encoder.keras","BCR:scRNA-seq:Encoder:Model"
67 | "Mouse_Heavy_VAE_tScales_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: VAE, Encoding Method: tScales","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Mus musculus","10090",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Mouse_Heavy_VAE_tScales_encoder.keras","BCR:scRNA-seq:Encoder:Model"
68 | "Mouse_Heavy_VAE.EXP_atchleyFactors_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: VAE.EXP, Encoding Method: atchleyFactors","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Mus musculus","10090",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Mouse_Heavy_VAE.EXP_atchleyFactors_encoder.keras","BCR:scRNA-seq:Encoder:Model"
69 | "Mouse_Heavy_VAE.EXP_crucianiProperties_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: VAE.EXP, Encoding Method: crucianiProperties","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Mus musculus","10090",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Mouse_Heavy_VAE.EXP_crucianiProperties_encoder.keras","BCR:scRNA-seq:Encoder:Model"
70 | "Mouse_Heavy_VAE.EXP_kideraFactors_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: VAE.EXP, Encoding Method: kideraFactors","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Mus musculus","10090",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Mouse_Heavy_VAE.EXP_kideraFactors_encoder.keras","BCR:scRNA-seq:Encoder:Model"
71 | "Mouse_Heavy_VAE.EXP_MSWHIM_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: VAE.EXP, Encoding Method: MSWHIM","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Mus musculus","10090",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Mouse_Heavy_VAE.EXP_MSWHIM_encoder.keras","BCR:scRNA-seq:Encoder:Model"
72 | "Mouse_Heavy_VAE.EXP_OHE_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: VAE.EXP, Encoding Method: OHE","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Mus musculus","10090",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Mouse_Heavy_VAE.EXP_OHE_encoder.keras","BCR:scRNA-seq:Encoder:Model"
73 | "Mouse_Heavy_VAE.EXP_tScales_encoder.keras","Keras-based deep learning encoder for BCR sequences. Chain: Heavy, Architecture: VAE.EXP, Encoding Method: tScales","3.21",NA,"CSV","https://github.com/BorchLab",NA,"Mus musculus","10090",NA,"Consolidated Sources: IReceptor, OAS, and GEO","Nick Borcherding ","character","FilePath","https://zenodo.org/","records/14919286/files/Mouse_Heavy_VAE.EXP_tScales_encoder.keras","BCR:scRNA-seq:Encoder:Model"
74 |
--------------------------------------------------------------------------------
/inst/scripts/make-data.R:
--------------------------------------------------------------------------------
1 | #########################
2 | #Defining Hyperparameters
3 | #########################
4 |
5 | factors <- c("OHE", "atchleyFactors", "crucianiProperties", "kideraFactors", "MSWHIM", "tScales")
6 | hidden_dim1 <- 512
7 | hidden_dim2 <- 256
8 | latent_dim <- 128
9 | batch_size <- 128
10 | learning_rate <- 1e-6
11 | epochs <- 128
12 | optimizer <- "adam"
13 | layer_act <- "relu"
14 | epsilon.std <- 1
15 | amino.acids <- c("A", "R", "N", "D", "C", "Q", "E", "G", "H", "I", "L", "K", "M", "F", "P", "S", "T", "W", "Y", "V")
16 |
17 | ####################
18 | #Training CNN Models
19 | ####################
20 |
21 | set.seed(42)
22 |
23 | for(i in seq_along(factors)) {
24 |
25 | es = callback_early_stopping(
26 | monitor = "val_loss",
27 | min_delta = 0,
28 | patience = 8,
29 | verbose = 1,
30 | mode = "min")
31 |
32 | sequence.matrix <- readRDS(paste0(data.path, factors[i], "_Heavy_CDR3.rds"))
33 |
34 | stratified.sequences <- prepare_data(sequence.matrix,
35 | train_split = 0.75,
36 | val_split = 0.2)
37 |
38 | # Create the training, validation, and test sets
39 | x_train <- stratified.sequences[[1]]
40 | x_val <- stratified.sequences[[2]]
41 | x_test <- stratified.sequences[[3]]
42 | rm(stratified.sequences)
43 | rm(sequence.matrix)
44 | gc()
45 |
46 | d.1 <- dim(x_train)[2]
47 | input_layer <- layer_input(shape = c(d.1))
48 | # Encoder part
49 | encoder <- input_layer %>%
50 | layer_dense(units = hidden_dim1, name = "e.1") %>%
51 | layer_batch_normalization(name = "bn.1") %>%
52 | layer_activation(activation = layer_act, name = "act.1") %>%
53 | layer_dense(units = hidden_dim2, name = "e.2") %>%
54 | layer_batch_normalization(name = "bn.2") %>%
55 | layer_activation(activation = layer_act, name = "act.2") %>%
56 | layer_dense(units = latent_dim, activation = layer_act, name = "latent_space")
57 |
58 | # Decoder part
59 | decoder <- encoder %>%
60 | layer_dense(units = hidden_dim2, name = "d.1") %>%
61 | layer_batch_normalization(name = "bn.3") %>%
62 | layer_activation(activation = layer_act, name = "act.3") %>%
63 | layer_dense(units = hidden_dim1, name = "d.2") %>%
64 | layer_batch_normalization(name = "bn.4") %>%
65 | layer_activation(activation = layer_act, name = "act.4") %>%
66 | layer_dense(units = d.1, activation = 'sigmoid', name = "output")
67 |
68 | # Complete autoencoder model
69 | autoencoder <- keras_model(input_layer, decoder)
70 |
71 | # Extract the latent space output
72 | encoder_model <- keras_model(inputs = autoencoder$input, outputs = get_layer(autoencoder, "latent_space")$output)
73 |
74 | # Create the decoder model
75 | latent_input <- layer_input(shape = latent_dim, name = "latent_input")
76 | decoder_output <- latent_input %>%
77 | get_layer(autoencoder, "d.1")(.) %>%
78 | get_layer(autoencoder, "bn.3")(.) %>%
79 | get_layer(autoencoder, "act.3")(.) %>%
80 | get_layer(autoencoder, "d.2")(.) %>%
81 | get_layer(autoencoder, "bn.4")(.) %>%
82 | get_layer(autoencoder, "act.4")(.) %>%
83 | get_layer(autoencoder, "output")(.)
84 |
85 | decoder_model <- keras_model(latent_input, decoder_output)
86 |
87 | autoencoder %>% compile(
88 | optimizer = optimizer_adam(learning_rate = learning_rate),
89 | loss = "mean_squared_error",
90 | metrics = 'mean_absolute_error')
91 |
92 | # Train the model
93 | history <- autoencoder %>% fit(
94 | x = x_train,
95 | y = x_train,
96 | validation_data = list(x_val, x_val),
97 | epochs = epochs,
98 | batch_size = batch_size,
99 | shuffle = TRUE,
100 | callbacks = es)
101 |
102 | save_model(encoder_model, paste0(data.path, "/models/Human_Heavy_CNN_", factors[i], "_encoder.keras"), overwrite = TRUE)
103 | save_model(decoder_model, paste0(data.path, "/models/Human_Heavy_CNN_", factors[i], "_decoder.keras"), overwrite = TRUE)
104 | save_model(autoencoder, paste0(data.path, "/models/Human_Heavy_CNN_", factors[i], "_autoencoder.keras"), overwrite = TRUE)
105 | }
106 |
107 | ####################
108 | #Training VAE Models
109 | ####################
110 |
111 | for(i in seq_along(factors)) {
112 |
113 | es = callback_early_stopping(
114 | monitor = "val_loss",
115 | min_delta = 0,
116 | patience = 8,
117 | verbose = 1,
118 | mode = "min")
119 |
120 | sequence.matrix <- readRDS(paste0(data.path, factors[i], "_Heavy_CDR3.rds"))
121 |
122 | stratified.sequences <- prepare_data(sequence.matrix,
123 | train_split = 0.75,
124 | val_split = 0.2)
125 |
126 | # Create the training, validation, and test sets
127 | x_train <- stratified.sequences[[1]]
128 | x_val <- stratified.sequences[[2]]
129 | x_test <- stratified.sequences[[3]]
130 | rm(stratified.sequences)
131 | rm(sequence.matrix)
132 | gc()
133 |
134 | vae_loss_layer <- function(original_dim) {
135 | layer_lambda(
136 | f = function(x) {
137 | x_decoded_mean <- x[[1]]
138 | x_input <- x[[2]]
139 | z_mean <- x[[3]]
140 | z_log_var <- x[[4]]
141 |
142 | # Reconstruction loss
143 | xent_loss <- loss_mean_squared_error(x_input, x_decoded_mean) * original_dim
144 |
145 | # KL Divergence loss
146 | kl_loss <- -0.5 * tf$reduce_mean(1 + z_log_var - tf$square(z_mean) - tf$exp(z_log_var), axis = -1L)
147 |
148 | # Total loss
149 | tf$reduce_mean(xent_loss + kl_loss)
150 | },
151 | output_shape = list(NULL, 1) # Explicit output shape
152 | )
153 | }
154 | original_dim <- ncol(x_test)
155 |
156 |
157 | # Encoder
158 | encoder_input <- layer_input(shape = original_dim)
159 | h <- encoder_input
160 | h <- layer_dense(h,
161 | units = hidden_dim1,
162 | activation = layer_act,
163 | name = "e.1")
164 | h <- layer_dense(h,
165 | units = hidden_dim2,
166 | activation = layer_act,
167 | name = "e.2")
168 | z_mean <- layer_dense(h, units = latent_dim, name = "z_mean")
169 | z_log_var <- layer_dense(h, units = latent_dim, name = "z_log_var")
170 |
171 | # Sampling Layer
172 | z <- layer_lambda(f = function(args) {
173 | z_mean <- args[[1]]
174 | z_log_var <- args[[2]]
175 | batch <- tf$shape(z_mean)[1]
176 | dim <- tf$shape(z_mean)[2]
177 | epsilon <- tf$random$normal(shape = c(batch, dim), mean = 0., stddev = epsilon.std)
178 | z_mean + tf$exp(z_log_var / 2) * epsilon
179 | }, output_shape = c(latent_dim))(list(z_mean, z_log_var))
180 |
181 | # Decoder
182 | decoder_input <- layer_input(shape = latent_dim)
183 | d <- decoder_input
184 | d <- layer_dense(d,
185 | units = hidden_dim2,
186 | activation = layer_act,
187 | name = "d.1")
188 | d <- layer_dense(d,
189 | units = hidden_dim1,
190 | activation = layer_act,
191 | name = "d.2")
192 | decoder_output <- layer_dense(d, units = original_dim, activation = "sigmoid")
193 |
194 | # Encoder and Decoder Models
195 | encoder <- keras_model(encoder_input, z_mean)
196 | decoder <- keras_model(decoder_input, decoder_output)
197 |
198 | # VAE Model
199 | decoder_output <- decoder(z)
200 | vae <- keras_model(encoder_input, decoder_output)
201 |
202 | # Add custom loss layer
203 | loss_layer <- vae_loss_layer(original_dim)(list(decoder_output, encoder_input, z_mean, z_log_var))
204 | vae_with_loss <- keras_model(encoder_input, loss_layer)
205 |
206 | # Dummy loss function
207 | dummy_loss <- function(y_true, y_pred) {
208 | tf$reduce_mean(y_pred)
209 | }
210 |
211 | # Compile the model
212 | vae_with_loss %>% compile(optimizer = optimizer_adam(learning_rate = learning_rate),
213 | loss = dummy_loss,
214 | metrics = c("mean_squared_error", "mean_absolute_error"))
215 |
216 | history <- vae_with_loss %>% fit(
217 | x_train, x_train,
218 | shuffle = TRUE,
219 | epochs = epochs,
220 | batch_size = batch_size,
221 | validation_data = list(x_test, x_test),
222 | verbose = 0,
223 | callbacks = es
224 | )
225 |
226 | save_model(encoder, paste0(data.path, "/models/Human_Heavy_VAE_", factors[i], "_encoder.keras"), overwrite = TRUE)
227 | save_model(decoder, paste0(data.path, "models/Human_Heavy_VAE_", factors[i], "_decoder.keras"), overwrite = TRUE)
228 | save_model(vae, paste0(data.path, "models/Human_Heavy_VAE_", factors[i], "_autoencoder.keras"), overwrite = TRUE)
229 | }
230 |
--------------------------------------------------------------------------------
/inst/scripts/make-metadata.R:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env Rscript
2 | ## make-metadata.R
3 | ## This script scans inst/extdata, builds a metadata.csv file
4 | ## for ExperimentHub or AnnotationHub submission.
5 |
6 |
7 | PKG_NAME <- "Ibex"
8 | BIOC_VERSION <- "3.21"
9 | MAINTAINER <- "Nick Borcherding "
10 | DATA_PROVIDER <- "Consolidated Sources: IReceptor, OAS, and GEO"
11 | SOURCE_URL <- "https://github.com/BorchLab"
12 | SOURCE_VERSION <- NA
13 | SOURCE_TYPE <- "CSV"
14 | GENOME <- NA
15 | COORDINATE_1_BASED <- NA
16 | DESCRIPTION <- "Keras-based deep learning encoder for BCR sequences."
17 |
18 |
19 | # 2) Locate the data files in inst/extdata
20 | path_to_extdata <- file.path("inst", "extdata")
21 | files <- list.files(path_to_extdata, full.names = TRUE, pattern = ".keras")
22 |
23 | # 3) Helper function: guess DispatchClass and RDataClass from file extension
24 | inferDispatchClass <- function(file_ext) {
25 | switch(
26 | tolower(file_ext),
27 | "rds" = "Rds",
28 | "rda" = "Rda",
29 | "csv" = "FilePath",
30 | "tsv" = "FilePath",
31 | "txt" = "FilePath",
32 | "FilePath"
33 | )
34 | }
35 |
36 | inferRDataClass <- function(dispatchClass) {
37 | # Adjust to reflect how your data is actually loaded in R.
38 | if (dispatchClass %in% c("Rds", "Rda")) {
39 | return("SummarizedExperiment") # or whatever class your objects are
40 | } else {
41 | return("character") # or NA, if you just return a path
42 | }
43 | }
44 |
45 | # 4) Build metadata data.frame row-by-row
46 | metadata_list <- lapply(files, function(f) {
47 | # Example: f == "inst/extdata/somefile.rds"
48 | file_name <- basename(f)
49 | file_ext <- tolower(tools::file_ext(f)) # "rds", "rda", "csv", etc.
50 |
51 | dispatchClass <- inferDispatchClass(file_ext)
52 | rDataClass <- inferRDataClass(dispatchClass)
53 |
54 | # The Title could simply be the file name or something more descriptive
55 | title <- file_name
56 | components <- stringr::str_split(title, "_")[[1]]
57 |
58 | # Adaptive Variables
59 | description <- paste0(DESCRIPTION,
60 | " Chain: ", components[2],
61 | ", Architecture: ", components[3],
62 | ", Encoding Method: ", components[4])
63 | SPECIES <- ifelse(grepl("Human", title), "Homo sapiens", "Mus musculus")
64 | TAXONOMY_ID <- ifelse(grepl("Human", title), "9606", "10090")
65 | rDataPath <- paste0("records/14919286/files/", file_name)
66 | # We assemble a named vector or list for each file:
67 | c(
68 | Title = title,
69 | Description = description,
70 | BiocVersion = BIOC_VERSION,
71 | Genome = as.character(GENOME),
72 | SourceType = SOURCE_TYPE,
73 | SourceUrl = SOURCE_URL,
74 | SourceVersion = SOURCE_VERSION,
75 | Species = SPECIES,
76 | TaxonomyId = TAXONOMY_ID,
77 | Coordinate_1_based = ifelse(is.na(COORDINATE_1_BASED), NA,
78 | as.character(COORDINATE_1_BASED)),
79 | DataProvider = DATA_PROVIDER,
80 | Maintainer = MAINTAINER,
81 | RDataClass = rDataClass,
82 | DispatchClass = dispatchClass,
83 | Location_Prefix = "https://zenodo.org/",
84 | RDataPath = rDataPath,
85 | Tags = paste("BCR", "scRNA-seq", "Encoder", "Model", sep = ":")
86 | )
87 | })
88 |
89 | # 5) Convert this list of named vectors to a data.frame
90 | metadata_df <- do.call(rbind, lapply(metadata_list, as.data.frame.list))
91 |
92 | # 6) Write out the metadata.csv to inst/extdata
93 | output_csv <- file.path("inst", "extdata", "metadata.csv")
94 | write.csv(metadata_df, file = output_csv, row.names = FALSE, quote = TRUE)
95 |
96 |
--------------------------------------------------------------------------------
/man/CoNGAfy.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/CoNGAfy.R
3 | \name{CoNGAfy}
4 | \alias{CoNGAfy}
5 | \title{Reduce a Single-Cell Object to Representative Cells}
6 | \usage{
7 | CoNGAfy(
8 | input.data,
9 | method = "dist",
10 | features = NULL,
11 | assay = "RNA",
12 | meta.carry = c("CTaa", "CTgene")
13 | )
14 | }
15 | \arguments{
16 | \item{input.data}{A single-cell dataset in Seurat or SingleCellExperiment format.}
17 |
18 | \item{method}{Character. Specifies the method to reduce the dataset:
19 | \itemize{
20 | \item "mean" - Computes the mean expression of selected features across cells in each clonotype.
21 | \item "dist" - Uses PCA reduction to identify the cell with the minimal Euclidean distance within each clonotype group.
22 | }}
23 |
24 | \item{features}{Character vector. Selected genes for the reduction. If \code{NULL} (default), all genes are used.}
25 |
26 | \item{assay}{Character. The name of the assay or assays to include in the output. Defaults to the active assay.}
27 |
28 | \item{meta.carry}{Character vector. Metadata variables to carry over from the input single-cell object to the output.}
29 | }
30 | \value{
31 | A reduced single-cell object where each clonotype is represented by a single cell.
32 | }
33 | \description{
34 | This function generates a single-cell object with a reduced representation
35 | of RNA expression by clone. The approach is inspired by the method introduced
36 | in \href{https://pubmed.ncbi.nlm.nih.gov/34426704/}{CoNGA}. Users can
37 | generate either a mean representation of features by clone or identify a
38 | representative cell using count-based minimal Euclidean distance.
39 | Please read and cite the original work by the authors of CoNGA.
40 | }
41 | \examples{
42 | ibex.clones <- CoNGAfy(ibex_example,
43 | method = "dist")
44 |
45 | ibex.clones <- CoNGAfy(ibex_example,
46 | method = "mean")
47 |
48 | }
49 |
--------------------------------------------------------------------------------
/man/Ibex.matrix.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/Ibex.matrix.R
3 | \name{Ibex.matrix}
4 | \alias{Ibex.matrix}
5 | \title{Ibex Matrix Interface}
6 | \usage{
7 | Ibex.matrix(
8 | input.data,
9 | chain = c("Heavy", "Light"),
10 | method = c("encoder", "geometric"),
11 | encoder.model = c("CNN", "VAE", "CNN.EXP", "VAE.EXP"),
12 | encoder.input = c("atchleyFactors", "crucianiProperties", "kideraFactors", "MSWHIM",
13 | "tScales", "OHE"),
14 | geometric.theta = pi/3,
15 | species = "Human",
16 | verbose = TRUE
17 | )
18 | }
19 | \arguments{
20 | \item{input.data}{Input data, which can be:
21 | \itemize{
22 | \item A Single Cell Object in Seurat or SingleCellExperiment format
23 | \item The output of \code{combineBCR()} from the \code{scRepertoire} package
24 | }}
25 |
26 | \item{chain}{Character. Specifies which chain to analyze:
27 | \itemize{
28 | \item "Heavy" for the heavy chain
29 | \item "Light" for the light chain
30 | }}
31 |
32 | \item{method}{Character. The algorithm to use for generating latent vectors:
33 | \itemize{
34 | \item "encoder" - Uses deep learning autoencoders
35 | \item "geometric" - Uses geometric transformations based on the
36 | BLOSUM62 matrix
37 | }}
38 |
39 | \item{encoder.model}{Character. The type of autoencoder model to use:
40 | \itemize{
41 | \item "CNN" - CDR3 Convolutional Neural Network-based autoencoder
42 | \item "VAE" - CDR3 Variational Autoencoder
43 | \item "CNN.EXP" - CDR1/2/3 CNN
44 | \item "VAE.EXP" - CDR1/2/3 VAE
45 | }}
46 |
47 | \item{encoder.input}{Character. Specifies the input features for the
48 | encoder model. Options include:
49 | \itemize{
50 | \item Amino Acid Properties: "atchleyFactors", "crucianiProperties",
51 | "kideraFactors", "MSWHIM","tScales", "zScales"
52 | \item "OHE" for One Hot Encoding
53 | }}
54 |
55 | \item{geometric.theta}{Numeric. Angle (in radians) for the geometric
56 | transformation. Only used when \code{method = "geometric"}.}
57 |
58 | \item{species}{Character. Default is "Human" or "Mouse".}
59 |
60 | \item{verbose}{Logical. Whether to print progress messages. Default is TRUE.}
61 | }
62 | \value{
63 | A matrix of latent vectors generated by the specified method.
64 | }
65 | \description{
66 | This function runs the Ibex algorithm to generate latent vectors from
67 | input data. The output can be returned as a matrix, with options to choose
68 | between deep learning autoencoders or geometric transformations based on
69 | the BLOSUM62 matrix.
70 | }
71 | \examples{
72 | # Using the encoder method with a variational autoencoder
73 | ibex_values <- Ibex.matrix(ibex_example,
74 | chain = "Heavy",
75 | method = "encoder",
76 | encoder.model = "VAE",
77 | encoder.input = "atchleyFactors")
78 |
79 | # Using the geometric method with a specified angle
80 | ibex_values <- Ibex.matrix(ibex_example,
81 | chain = "Heavy",
82 | method = "geometric",
83 | geometric.theta = pi)
84 |
85 | }
86 | \seealso{
87 | \code{\link[immApex]{propertyEncoder}},
88 | \code{\link[immApex]{geometricEncoder}}
89 | }
90 |
--------------------------------------------------------------------------------
/man/combineExpandedBCR.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/combineExpandedBCR.R
3 | \name{combineExpandedBCR}
4 | \alias{combineExpandedBCR}
5 | \title{combineBCR for CDR1/2/3 sequences}
6 | \usage{
7 | combineExpandedBCR(
8 | input.data,
9 | samples = NULL,
10 | ID = NULL,
11 | call.related.clones = TRUE,
12 | threshold = 0.85,
13 | removeNA = FALSE,
14 | removeMulti = FALSE,
15 | filterMulti = TRUE,
16 | filterNonproductive = TRUE
17 | )
18 | }
19 | \arguments{
20 | \item{input.data}{List of filtered contig annotations.}
21 |
22 | \item{samples}{Character vector. Labels of samples (required).}
23 |
24 | \item{ID}{Character vector. Additional sample labeling (optional).}
25 |
26 | \item{call.related.clones}{Logical. Whether to call related clones based on
27 | nucleotide sequence and V gene. Default is `TRUE`.}
28 |
29 | \item{threshold}{Numeric. Normalized edit distance for clone clustering.
30 | Default is `0.85`.}
31 |
32 | \item{removeNA}{Logical. Whether to remove any chain without values. Default
33 | is `FALSE`.}
34 |
35 | \item{removeMulti}{Logical. Whether to remove barcodes with more than two
36 | chains. Default is `FALSE`.}
37 |
38 | \item{filterMulti}{Logical. Whether to select the highest-expressing light
39 | and heavy chains. Default is `TRUE`.}
40 |
41 | \item{filterNonproductive}{Logical. Whether to remove nonproductive chains.
42 | Default is `TRUE`.}
43 | }
44 | \value{
45 | A list of consolidated BCR clones with expanded CDR sequences.
46 | }
47 | \description{
48 | This function enhances BCR processing by incorporating additional
49 | sequence information from CDR1 and CDR2 regions before applying the BCR
50 | combination logic. The function depends on
51 | \code{\link[scRepertoire]{combineBCR}} from the scRepertoire package.
52 | }
53 | \examples{
54 | combined.BCR <- combineExpandedBCR(list(ibex_vdj),
55 | samples = "Sample1",
56 | filterNonproductive = TRUE)
57 |
58 | }
59 | \seealso{
60 | \code{\link[scRepertoire]{combineBCR}}
61 | }
62 |
--------------------------------------------------------------------------------
/man/filter.cells.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/runIbex.R
3 | \name{filter.cells}
4 | \alias{filter.cells}
5 | \title{Filter Single-Cell Data Based on CDR3 Sequences}
6 | \usage{
7 | filter.cells(sc.obj, chain)
8 | }
9 | \arguments{
10 | \item{sc.obj}{A Seurat or SingleCellExperiment object.}
11 |
12 | \item{chain}{Character. Specifies the chain type ("Heavy" or "Light").}
13 | }
14 | \value{
15 | A filtered Seurat or SingleCellExperiment object.
16 | }
17 | \description{
18 | This function subsets a Seurat or SingleCellExperiment object,
19 | removing cells where the `CTaa` column is missing or contains unwanted patterns.
20 | }
21 |
--------------------------------------------------------------------------------
/man/getHumanIgPseudoGenes.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/quietBCRgenes.R
3 | \name{getHumanIgPseudoGenes}
4 | \alias{getHumanIgPseudoGenes}
5 | \title{Get Human Immunoglobulin pseudogenes}
6 | \usage{
7 | getHumanIgPseudoGenes()
8 | }
9 | \value{
10 | Character vector of human immunoglobulin pseudogenes.
11 | }
12 | \description{
13 | This function returns a character vector of human immunoglobulin
14 | pseudogenes. These are also the genes that are removed from the
15 | variable gene list in the \code{quietBCRgenes} function.
16 | }
17 |
--------------------------------------------------------------------------------
/man/ibex_example.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/ibex_example.R
3 | \docType{data}
4 | \name{ibex_example}
5 | \alias{ibex_example}
6 | \title{A SingleCellExperiment object with 200 randomly-sampled
7 | B cells with BCR sequences from the 10x Genomics
8 | 2k_BEAM-Ab_Mouse_HEL_5pv2 dataset.}
9 | \format{
10 | A \code{SingleCellExperiment} object with 32,285 genes (rows) and 200 cells (columns).
11 | \describe{
12 | \item{assays}{List of matrices containing expression values: \code{counts} (raw counts) and \code{logcounts} (log-transformed).}
13 | \item{rowData}{Empty in this example (no gene-level annotations).}
14 | \item{colData}{A \code{DataFrame} with 14 columns of cell metadata, including:}
15 | \itemize{
16 | \item orig.identOriginal sample identity.
17 | \item nCount_RNA Total number of counts per cell.
18 | \item nFeature_RNA Number of detected genes per cell.
19 | \item cloneSize Size of each clone.
20 | \item ident Cluster assignment.
21 | }
22 | \item{reducedDims}{Contains dimensionality reductions: \code{PCA}, \code{pca}, and \code{apca}.}
23 | \item{altExp}{One alternative experiment named \code{BEAM} containing additional expression data.}
24 | }
25 | }
26 | \description{
27 | This object includes normalized gene expression values, metadata annotations,
28 | and B cell clonotype information derived from 10x V(D)J sequencing. It is intended
29 | as a small example dataset for testing and demonstration purposes.
30 | }
31 |
--------------------------------------------------------------------------------
/man/ibex_vdj.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/ibex_vdj.R
3 | \docType{data}
4 | \name{ibex_vdj}
5 | \alias{ibex_vdj}
6 | \title{Full filtered_annotated_contig.csv from the 10x
7 | 2k_BEAM-Ab_Mouse_HEL_5pv2}
8 | \format{
9 | A data frame with 6 rows and 35 columns:
10 | \describe{
11 | \item{barcode}{Character. Unique cell barcode.}
12 | \item{is_cell}{Logical. Whether the barcode is identified as a cell.}
13 | \item{contig_id}{Character. Unique identifier for each contig.}
14 | \item{high_confidence}{Logical. Whether the contig is high confidence.}
15 | \item{length}{Integer. Length of the contig.}
16 | \item{chain}{Character. Chain type (e.g., IGH, IGK).}
17 | \item{v_gene}{Character. V gene annotation.}
18 | \item{d_gene}{Character. D gene annotation.}
19 | \item{j_gene}{Character. J gene annotation.}
20 | \item{c_gene}{Character. C gene annotation.}
21 | \item{full_length}{Logical. Whether the contig is full-length.}
22 | \item{productive}{Logical. Whether the contig is productive.}
23 | \item{fwr1}{Character. Amino acid sequence for Framework Region 1.}
24 | \item{fwr1_nt}{Character. Nucleotide sequence for FWR1.}
25 | \item{cdr1}{Character. Amino acid sequence for CDR1.}
26 | \item{cdr1_nt}{Character. Nucleotide sequence for CDR1.}
27 | \item{fwr2}{Character. Amino acid sequence for FWR2.}
28 | \item{fwr2_nt}{Character. Nucleotide sequence for FWR2.}
29 | \item{cdr2}{Character. Amino acid sequence for CDR2.}
30 | \item{cdr2_nt}{Character. Nucleotide sequence for CDR2.}
31 | \item{fwr3}{Character. Amino acid sequence for FWR3.}
32 | \item{fwr3_nt}{Character. Nucleotide sequence for FWR3.}
33 | \item{cdr3}{Character. Amino acid sequence for CDR3.}
34 | \item{cdr3_nt}{Character. Nucleotide sequence for CDR3.}
35 | \item{fwr4}{Character. Amino acid sequence for FWR4.}
36 | \item{fwr4_nt}{Character. Nucleotide sequence for FWR4.}
37 | \item{reads}{Integer. Number of reads supporting the contig.}
38 | \item{umis}{Integer. Number of UMIs supporting the contig.}
39 | \item{raw_clonotype_id}{Character. Clonotype ID from 10x output.}
40 | \item{raw_consensus_id}{Character. Consensus ID from 10x output.}
41 | \item{exact_subclonotype_id}{Integer. Exact subclonotype grouping.}
42 | }
43 | }
44 | \description{
45 | This dataset contains single-cell V(D)J sequencing annotations
46 | from the 10x Genomics BEAM-Ab Mouse dataset. It includes V(D)J
47 | gene calls, CDR regions, productivity information, and clonotype
48 | assignments for each contig.
49 | }
50 |
--------------------------------------------------------------------------------
/man/quietBCRgenes.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/quietBCRgenes.R
3 | \name{quietBCRgenes}
4 | \alias{quietBCRgenes}
5 | \title{Remove BCR Genes from Variable Gene Results}
6 | \usage{
7 | quietBCRgenes(sc, assay = NULL)
8 | }
9 | \arguments{
10 | \item{sc}{A single-cell dataset, which can be:
11 | \itemize{
12 | \item A Seurat object
13 | \item A vector of variable genes generated by workflows such as Bioconductor's \code{scran}
14 | }}
15 |
16 | \item{assay}{Character. Specifies the Seurat assay slot to use for removing BCR genes.
17 | If \code{NULL}, the function defaults to the active assay in the Seurat object.}
18 | }
19 | \value{
20 | The input Seurat object or vector with BCR genes removed from the variable features.
21 | }
22 | \description{
23 | This function removes B-cell receptor (BCR) genes from the variable features of a
24 | single-cell dataset. Most single-cell workflows prioritize highly expressed and
25 | highly variable genes for principal component analysis (PCA) and dimensional
26 | reduction. By excluding BCR genes, this function ensures that the variable gene
27 | set focuses on biologically relevant features rather than highly variable BCR genes.
28 | }
29 | \examples{
30 | # Remove BCR genes from the variable features of a vector
31 | variable.genes <- c("IGHV1-69", "IGHV3-23", "IGHV4-34", "IGHV5-51", "IGHV6-1",
32 | "IGKV1-5", "IGKV3-20", "IGLV2-14", "IGLV3-21", "IGLV6-57",
33 | "TP53", "MYC", "BCL2", "CD19", "CD79A", "CD79B", "PAX5")
34 | variable.genes <- quietBCRgenes(variable.genes)
35 |
36 | }
37 |
--------------------------------------------------------------------------------
/man/runIbex.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/runIbex.R
3 | \name{runIbex}
4 | \alias{runIbex}
5 | \title{Ibex Single-Cell Calculation}
6 | \usage{
7 | runIbex(
8 | sc.data,
9 | chain = "Heavy",
10 | method = "encoder",
11 | encoder.model = "VAE",
12 | encoder.input = "atchleyFactors",
13 | geometric.theta = pi,
14 | reduction.name = "Ibex",
15 | species = "Human",
16 | verbose = TRUE
17 | )
18 | }
19 | \arguments{
20 | \item{sc.data}{A single-cell dataset, which can be:
21 | \itemize{
22 | \item A Seurat object
23 | \item A SingleCellExperiment object
24 | }}
25 |
26 | \item{chain}{Character. Specifies the chain to analyze:
27 | \itemize{
28 | \item "Heavy" for the heavy chain
29 | \item "Light" for the light chain
30 | }}
31 |
32 | \item{method}{Character. Algorithm to use for generating latent dimensions:
33 | \itemize{
34 | \item "encoder" - Uses deep learning autoencoders
35 | \item "geometric" - Uses geometric transformations based on the BLOSUM62 matrix
36 | }}
37 |
38 | \item{encoder.model}{Character. The type of autoencoder model to use:
39 | \itemize{
40 | \item "CNN" - CDR3 Convolutional Neural Network-based autoencoder
41 | \item "VAE" - CDR3 Variational Autoencoder
42 | \item "CNN.EXP" - CDR1/2/3 CNN
43 | \item "VAE.EXP" - CDR1/2/3 VAE
44 | }}
45 |
46 | \item{encoder.input}{Character. Input features for the encoder model:
47 | \itemize{
48 | \item Amino Acid Properties: "atchleyFactors", "crucianiProperties",
49 | "kideraFactors", "MSWHIM", "tScales"
50 | \item "OHE" - One Hot Encoding
51 | }}
52 |
53 | \item{geometric.theta}{Numeric. Angle (in radians) for geometric transformation.
54 | Used only when \code{method = "geometric"}.}
55 |
56 | \item{reduction.name}{Character. The name to assign to the dimensional reduction.
57 | This is useful for running Ibex with multiple parameter settings and saving results
58 | under different names.}
59 |
60 | \item{species}{Character. Default is "Human" or "Mouse".}
61 |
62 | \item{verbose}{Logical. Whether to print progress messages. Default is TRUE.}
63 | }
64 | \value{
65 | An updated Seurat or SingleCellExperiment object with Ibex dimensions added
66 | to the dimensional reduction slot.
67 | }
68 | \description{
69 | This function applies the Ibex algorithm to single-cell data, integrating
70 | seamlessly with Seurat or SingleCellExperiment pipelines. The algorithm
71 | generates latent dimensions using deep learning or geometric transformations,
72 | storing the results in the dimensional reduction slot. \code{runIbex} will
73 | automatically subset the single-cell object based on amino acid sequences
74 | present for the given chain selection.
75 | }
76 | \examples{
77 | # Using the encoder method with a variational autoencoder
78 | ibex_example <- runIbex(ibex_example,
79 | chain = "Heavy",
80 | method = "encoder",
81 | encoder.model = "VAE",
82 | encoder.input = "atchleyFactors")
83 |
84 | # Using the geometric method with a specified angle
85 | ibex_example <- runIbex(ibex_example,
86 | chain = "Heavy",
87 | method = "geometric",
88 | geometric.theta = pi)
89 |
90 | }
91 |
--------------------------------------------------------------------------------
/tests/spelling.R:
--------------------------------------------------------------------------------
1 | if(requireNamespace('spelling', quietly = TRUE))
2 | spelling::spell_check_test(vignettes = TRUE, error = FALSE,
3 | skip_on_cran = TRUE)
4 |
--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | # This file is part of the standard setup for testthat.
2 | # It is recommended that you do not modify it.
3 | #
4 | # Where should you do additional test configuration?
5 | # Learn more about the roles of various files in:
6 | # * https://r-pkgs.org/tests.html
7 | # * https://testthat.r-lib.org/reference/test_package.html#special-files
8 |
9 | library(testthat)
10 | library(Ibex)
11 |
12 | test_check("Ibex")
13 |
--------------------------------------------------------------------------------
/tests/testthat/basiliskEnv.R:
--------------------------------------------------------------------------------
1 | #' @import basilisk
2 | IbexEnv <- BasiliskEnvironment(
3 | envname = "IbexEnv",
4 | pkgname = "Ibex",
5 | path = system.file("extdata", "ibex-basilisk.yaml", package = "Ibex")
6 | )
7 |
--------------------------------------------------------------------------------
/tests/testthat/helper-testingFunctions.R:
--------------------------------------------------------------------------------
1 | getdata <- function(dir, name) {
2 | readRDS(paste("testdata/", dir, "/", name, ".rds", sep = "")) # could move testdata 1 dir lvl up nstead
3 | }
--------------------------------------------------------------------------------
/tests/testthat/test-CoNGAfy.R:
--------------------------------------------------------------------------------
1 | # test script for CoNGAfy.R - testcases are NOT comprehensive!
2 |
3 | test_that("CoNGAfy works with Seurat object", {
4 | result <- CoNGAfy(ibex_example, method = "mean")
5 |
6 | expect_true(inherits(result, "SingleCellExperiment"))
7 | expect_gt(ncol(result), 0)
8 | expect_gt(nrow(result), 0)
9 | })
10 |
11 |
12 | test_that("CoNGAfy works with dist method", {
13 | result <- CoNGAfy(ibex_example, method = "dist")
14 |
15 | expect_true(inherits(result, "SingleCellExperiment"))
16 | expect_gt(ncol(result), 0)
17 | expect_gt(nrow(result), 0)
18 | })
19 |
20 | test_that("CoNGAfy filters cells correctly", {
21 | result <- CoNGAfy(ibex_example, method = "mean")
22 | expect_equal(ncol(result), 52)
23 | })
24 |
25 | test_that("CoNGAfy stops if amino acid sequences are missing", {
26 | sc_example <- suppressWarnings(CreateSeuratObject(counts = matrix(rnorm(1000),
27 | nrow = 10,
28 | ncol = 100)))
29 |
30 | expect_error(CoNGAfy(sc_example, method = "mean"),
31 | "'CTaa' not found in this Seurat object\n ")
32 | })
33 |
34 | test_that("CoNGA.dist selects representative cells correctly", {
35 | result <- .CoNGA.dist(ibex_example, features = NULL, assay = "RNA")
36 |
37 | expect_true(inherits(result, "dgCMatrix"))
38 | expect_gt(ncol(result), 0)
39 | expect_gt(nrow(result), 0)
40 | })
41 |
42 | test_that("CoNGA.mean computes mean expression per clonotype", {
43 | result <- .CoNGA.mean(ibex_example, features = NULL, assay = "RNA")
44 |
45 | expect_true(inherits(result, "dgCMatrix"))
46 | expect_gt(ncol(result), 0)
47 | expect_gt(nrow(result), 0)
48 | })
49 |
--------------------------------------------------------------------------------
/tests/testthat/test-Ibex.matrix.R:
--------------------------------------------------------------------------------
1 | # test script for Ibex.matrix.R - testcases are NOT comprehensive!
2 |
3 | test_that("Ibex.matrix handles incorrect inputs gracefully", {
4 | expect_error(Ibex.matrix(input.data = ibex_example, chain = "Middle", method = "encoder"),
5 | "'arg' should be one of \"Heavy\", \"Light\"")
6 | expect_error(Ibex.matrix(input.data = ibex_example, chain = "Heavy", method = "xyz"),
7 | "'arg' should be one of \"encoder\", \"geometric\"")
8 | expect_error(Ibex.matrix(input.data = ibex_example, chain = "Heavy", method = "encoder", encoder.model = "ABC"),
9 | "'arg' should be one of \"CNN\", \"VAE\", \"CNN.EXP\", \"VAE.EXP\"")
10 | expect_error(Ibex.matrix(input.data = ibex_example, chain = "Heavy", method = "encoder", encoder.input = "XYZ"),
11 | "arg' should be one of \"atchleyFactors\", \"crucianiProperties\", \"kideraFactors\", \"MSWHIM\", \"tScales\", \"OHE\"")
12 | expect_error(Ibex.matrix(input.data = ibex_example, chain = "Heavy", method = "geometric", geometric.theta = "not_numeric"),
13 | "non-numeric argument to mathematical function")
14 | })
15 |
16 | test_that("Ibex.matrix returns expected output format", {
17 | result <- Ibex.matrix(input.data = ibex_example,
18 | chain = "Heavy",
19 | method = "encoder",
20 | encoder.model = "VAE",
21 | encoder.input = "atchleyFactors",
22 | verbose = FALSE)
23 | expect_true(is.data.frame(result))
24 | expect_true(all(grepl("^Ibex_", colnames(result))))
25 | expect_gt(nrow(result), 0)
26 | expect_gt(ncol(result), 0)
27 | })
28 |
29 | test_that("Ibex.matrix works with encoder method", {
30 | result <- Ibex.matrix(input.data = ibex_example,
31 | chain = "Light",
32 | method = "encoder",
33 | encoder.model = "CNN",
34 | encoder.input = "OHE",
35 | verbose = FALSE)
36 | expect_true(is.data.frame(result))
37 | expect_true(all(grepl("^Ibex_", colnames(result))))
38 | })
39 |
40 | test_that("Ibex.matrix works with geometric method", {
41 | result <- Ibex.matrix(input.data = ibex_example,
42 | chain = "Heavy",
43 | method = "geometric",
44 | geometric.theta = pi / 4,
45 | verbose = FALSE)
46 | expect_true(is.data.frame(result))
47 | expect_true(all(grepl("^Ibex_", colnames(result))))
48 | })
49 |
50 | test_that("Ibex.matrix handles different species options", {
51 | result1 <- Ibex.matrix(input.data = ibex_example,
52 | chain = "Heavy",
53 | method = "encoder",
54 | encoder.model = "VAE",
55 | encoder.input = "atchleyFactors",
56 | species = "Human",
57 | verbose = FALSE)
58 | result2 <- Ibex.matrix(input.data = ibex_example,
59 | chain = "Heavy",
60 | method = "encoder",
61 | encoder.model = "VAE",
62 | encoder.input = "atchleyFactors",
63 | species = "Mouse",
64 | verbose = FALSE)
65 | expect_true(is.data.frame(result1))
66 | expect_true(is.data.frame(result2))
67 | expect_true(all(grepl("^Ibex_", colnames(result1))))
68 | expect_true(all(grepl("^Ibex_", colnames(result2))))
69 | })
70 |
71 |
--------------------------------------------------------------------------------
/tests/testthat/test-combineExpandedBCR.R:
--------------------------------------------------------------------------------
1 | # test script for combineExpandedBCR.R - testcases are NOT comprehensive!
2 |
3 | test_that("combineExpandedBCR handles incorrect input gracefully", {
4 | expect_error(combineExpandedBCR(NULL, samples = "Sample1"),
5 | "Input data must be a list of data frames.")
6 |
7 | invalid_data <- list(data.frame(cdr1 = c("AA", "BB"), cdr3 = c("CC", "DD")))
8 | expect_error(combineExpandedBCR(invalid_data, samples = "Sample1"),
9 | "Each data frame must contain 'cdr1', 'cdr2', and 'cdr3' columns.")
10 | })
11 |
12 | test_that("combineExpandedBCR correctly concatenates CDR sequences", {
13 |
14 | modified_data <- combineExpandedBCR(list(ibex_vdj), samples = "Sample1")
15 |
16 | expect_true(any(grepl("-", modified_data[[1]]$CTaa)))
17 | })
18 |
19 | test_that("combineExpandedBCR integrates correctly with combineBCR", {
20 |
21 | result <- combineExpandedBCR(list(ibex_vdj), samples = "Sample1")
22 | expect_true(is.list(result))
23 | expect_true(all(c("barcode", "CTaa") %in% colnames(result[[1]])))
24 | expect_gt(nrow(result[[1]]), 0)
25 | })
26 |
27 | test_that("combineExpandedBCR correctly assigns sample labels", {
28 |
29 | result <- combineExpandedBCR(list(ibex_vdj), samples = "Sample1")
30 |
31 | expect_true("sample" %in% colnames(result[[1]]))
32 | expect_equal(result[[1]]$sample[1], "Sample1")
33 | })
34 |
35 | test_that("combineExpandedBCR handles multiple sample inputs correctly", {
36 |
37 | result <- combineExpandedBCR(list(ibex_vdj, ibex_vdj), samples = c("Sample1", "Sample2"))
38 |
39 | expect_true(length(result) == 2)
40 | expect_equal(result[[1]]$sample[1], "Sample1")
41 | expect_equal(result[[2]]$sample[1], "Sample2")
42 | })
43 |
44 |
45 |
--------------------------------------------------------------------------------
/tests/testthat/test-quietBCRgenes.R:
--------------------------------------------------------------------------------
1 | # test script for quietBCRgenes.R - testcases are NOT comprehensive!
2 |
3 | test_that("quietBCRgenes works", {
4 |
5 | data("ibex_example")
6 |
7 | features <- rownames(ibex_example@assays@data$counts)
8 |
9 | expect_equal(
10 | quietBCRgenes(features),
11 | getdata("quietBCRgenes", "quietBCRgenes_feature.vector")
12 | )
13 | })
--------------------------------------------------------------------------------
/tests/testthat/test-runIbex.R:
--------------------------------------------------------------------------------
1 | # test script for runIbex.R - testcases are NOT comprehensive!
2 | test_that("runIbex handles incorrect inputs gracefully", {
3 | expect_error(runIbex(sc.data = ibex_example, chain = "Middle", method = "encoder"),
4 | "'arg' should be one of \"Heavy\", \"Light\"")
5 | expect_error(runIbex(sc.data = ibex_example, chain = "Heavy", method = "xyz"),
6 | "'arg' should be one of \"encoder\", \"geometric\"")
7 | expect_error(runIbex(sc.data = ibex_example, chain = "Heavy", method = "encoder", encoder.model = "ABC"),
8 | "'arg' should be one of \"CNN\", \"VAE\", \"CNN.EXP\", \"VAE.EXP\"")
9 | expect_error(runIbex(sc.data = ibex_example, chain = "Heavy", method = "encoder", encoder.input = "XYZ"),
10 | "arg' should be one of \"atchleyFactors\", \"crucianiProperties\", \"kideraFactors\", \"MSWHIM\", \"tScales\", \"OHE\"")
11 | expect_error(runIbex(sc.data = ibex_example, chain = "Heavy", method = "geometric", geometric.theta = "not_numeric"),
12 | "non-numeric argument to mathematical function")
13 | })
14 |
15 | test_that("runIbex works with Seurat object", {
16 | suppressWarnings(sc_example <- CreateSeuratObject(counts = matrix(rnorm(1000), nrow = 10, ncol = 100)))
17 | sc_example[["CTaa"]] <- sample(c("CASSL", "CASST", NA, "NA_IGHV1", "None_IGHV2"), 100, replace = TRUE)
18 | sc_example[["CTgene"]] <- sample(c("NA_IGHV1.IGD1.IGJ1.IGM", "NA_IGHV1.IGD1.IGJ1.IGM", NA, "NA_IGHV1.IGD1.IGJ1.IGM", "None_IGHV1.IGD1.IGJ1.IGM"), 100, replace = TRUE)
19 |
20 | result <- runIbex(sc_example,
21 | chain = "Heavy",
22 | method = "encoder",
23 | encoder.model = "VAE",
24 | encoder.input = "atchleyFactors",
25 | reduction.name = "IbexTest",
26 | verbose = FALSE)
27 |
28 | expect_true("IbexTest" %in% names(result@reductions))
29 | expect_true(inherits(result, "Seurat"))
30 | })
31 |
32 | test_that("runIbex works with geometric method", {
33 | sc_example <- suppressWarnings(SeuratObject::CreateSeuratObject(counts = matrix(rnorm(1000), nrow = 10, ncol = 100)))
34 | sc_example[["CTaa"]] <- sample(c("CASSL", "CASST", NA, "NA_IGHV1", "None_IGHV2"), 100, replace = TRUE)
35 | sc_example[["CTgene"]] <- sample(c("NA_IGHV1.IGD1.IGJ1.IGM", "NA_IGHV1.IGD1.IGJ1.IGM", NA, "NA_IGHV1.IGD1.IGJ1.IGM", "None_IGHV1.IGD1.IGJ1.IGM"), 100, replace = TRUE)
36 |
37 | result <- runIbex(sc_example,
38 | chain = "Heavy",
39 | method = "geometric",
40 | geometric.theta = pi / 4,
41 | reduction.name = "IbexGeo",
42 | verbose = FALSE)
43 |
44 | expect_true("IbexGeo" %in% names(result@reductions))
45 | expect_true(inherits(result, "Seurat"))
46 | })
47 |
48 | test_that("runIbex filters cells correctly", {
49 | sc_example <- suppressWarnings(CreateSeuratObject(counts = matrix(rnorm(1000), nrow = 10, ncol = 100)))
50 | sc_example[["CTaa"]] <- c(rep("CASSL", 50), rep(NA, 50))
51 | sc_example[["CTgene"]] <- sample(c("NA_IGHV1.IGD1.IGJ1.IGM", "NA_IGHV1.IGD1.IGJ1.IGM", NA, "NA_IGHV1.IGD1.IGJ1.IGM", "None_IGHV1.IGD1.IGJ1.IGM"), 100, replace = TRUE)
52 | result <- runIbex(sc_example,
53 | chain = "Heavy",
54 | method = "encoder",
55 | encoder.model = "VAE",
56 | encoder.input = "atchleyFactors",
57 | reduction.name = "IbexFiltered",
58 | verbose = FALSE)
59 |
60 | expect_true("IbexFiltered" %in% names(result@reductions))
61 | expect_lt(ncol(result), 100) # Ensures some cells were filtered out
62 | })
63 |
64 | test_that("runIbex stops if amino acid sequences are missing", {
65 | sc_example <- suppressWarnings(SeuratObject::CreateSeuratObject(counts = matrix(rnorm(1000), nrow = 10, ncol = 100)))
66 |
67 | expect_error(runIbex(sc_example,
68 | chain = "Heavy",
69 | method = "encoder",
70 | encoder.model = "VAE",
71 | encoder.input = "atchleyFactors",
72 | verbose = FALSE),
73 | "Amino acid sequences are not added to the single-cell object correctly.")
74 | })
75 |
76 | test_that("runIbex works with different reduction names", {
77 | sc_example <- suppressWarnings(SeuratObject::CreateSeuratObject(counts = matrix(rnorm(1000), nrow = 10, ncol = 100)))
78 | sc_example[["CTaa"]] <- sample(c("CASSL", "CASST", NA, "NA_IGHV1", "None_IGHV2"), 100, replace = TRUE)
79 | sc_example[["CTgene"]] <- sample(c("NA_IGHV1.IGD1.IGJ1.IGM", "NA_IGHV1.IGD1.IGJ1.IGM", NA, "NA_IGHV1.IGD1.IGJ1.IGM", "None_IGHV1.IGD1.IGJ1.IGM"), 100, replace = TRUE)
80 | result1 <- runIbex(sc_example,
81 | chain = "Heavy",
82 | method = "encoder",
83 | encoder.model = "VAE",
84 | encoder.input = "atchleyFactors",
85 | reduction.name = "Ibex1",
86 | verbose = FALSE)
87 |
88 | result2 <- runIbex(sc_example, chain = "Heavy",
89 | method = "encoder",
90 | encoder.model = "VAE",
91 | encoder.input = "atchleyFactors",
92 | reduction.name = "Ibex2",
93 | verbose = FALSE)
94 |
95 | expect_true("Ibex1" %in% names(result1@reductions))
96 | expect_true("Ibex2" %in% names(result2@reductions))
97 | })
98 |
99 |
--------------------------------------------------------------------------------
/tests/testthat/testdata/CoNGAfy/CoNGAfy_counts.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BorchLab/Ibex/543ce724c3f01bebedb0db705873788509b5e8a7/tests/testthat/testdata/CoNGAfy/CoNGAfy_counts.rds
--------------------------------------------------------------------------------
/tests/testthat/testdata/CoNGAfy/CoNGAfy_mean_counts.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BorchLab/Ibex/543ce724c3f01bebedb0db705873788509b5e8a7/tests/testthat/testdata/CoNGAfy/CoNGAfy_mean_counts.rds
--------------------------------------------------------------------------------
/tests/testthat/testdata/CoNGAfy/CoNGAfy_mean_meta.data.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BorchLab/Ibex/543ce724c3f01bebedb0db705873788509b5e8a7/tests/testthat/testdata/CoNGAfy/CoNGAfy_mean_meta.data.rds
--------------------------------------------------------------------------------
/tests/testthat/testdata/CoNGAfy/CoNGAfy_meta.data.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BorchLab/Ibex/543ce724c3f01bebedb0db705873788509b5e8a7/tests/testthat/testdata/CoNGAfy/CoNGAfy_meta.data.rds
--------------------------------------------------------------------------------
/tests/testthat/testdata/quietBCRgenes/quietBCRgenes_feature.vector.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BorchLab/Ibex/543ce724c3f01bebedb0db705873788509b5e8a7/tests/testthat/testdata/quietBCRgenes/quietBCRgenes_feature.vector.rds
--------------------------------------------------------------------------------
/tests/testthat/testdata/runIbex/ibex.matrix_Heavy_VAE_AF.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BorchLab/Ibex/543ce724c3f01bebedb0db705873788509b5e8a7/tests/testthat/testdata/runIbex/ibex.matrix_Heavy_VAE_AF.rds
--------------------------------------------------------------------------------
/tests/testthat/testdata/runIbex/ibex.matrix_Light_AE_OHE.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BorchLab/Ibex/543ce724c3f01bebedb0db705873788509b5e8a7/tests/testthat/testdata/runIbex/ibex.matrix_Light_AE_OHE.rds
--------------------------------------------------------------------------------
/tests/testthat/testdata/runIbex/runIbex_Heavy_VAE_AF_reduction.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BorchLab/Ibex/543ce724c3f01bebedb0db705873788509b5e8a7/tests/testthat/testdata/runIbex/runIbex_Heavy_VAE_AF_reduction.rds
--------------------------------------------------------------------------------
/tests/testthat/testdata/runIbex/runIbex_Heavy_VAE_OHE_reduction.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BorchLab/Ibex/543ce724c3f01bebedb0db705873788509b5e8a7/tests/testthat/testdata/runIbex/runIbex_Heavy_VAE_OHE_reduction.rds
--------------------------------------------------------------------------------
/tests/testthat/testdata/runIbex/runIbex_Heavy_geometric_reduction.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BorchLab/Ibex/543ce724c3f01bebedb0db705873788509b5e8a7/tests/testthat/testdata/runIbex/runIbex_Heavy_geometric_reduction.rds
--------------------------------------------------------------------------------
/tests/testthat/testdata/runIbex/runIbex_Light_AE_KF_reduction.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BorchLab/Ibex/543ce724c3f01bebedb0db705873788509b5e8a7/tests/testthat/testdata/runIbex/runIbex_Light_AE_KF_reduction.rds
--------------------------------------------------------------------------------
/vignettes/Ibex.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: A tour of Ibex.
3 | author:
4 | - name: Nick Borcherding
5 | email: ncborch@gmail.com
6 | affiliation: Washington University in St. Louis, School of Medicine, St. Louis, MO, USA
7 | date: 'Compiled: `r format(Sys.Date(), "%B %d, %Y")`'
8 | output:
9 | BiocStyle::html_document:
10 | toc_float: true
11 | package: Ibex
12 | vignette: >
13 | %\VignetteEngine{knitr::knitr}
14 | %\VignetteIndexEntry{Charging through Ibex}
15 | %\usepackage[UTF-8]{inputenc}
16 | ---
17 |
18 | ```{r include=FALSE}
19 | # Create and set a temporary,
20 | temp_cache <- file.path(tempdir(), "basilisk_cache")
21 | dir.create(temp_cache, recursive = TRUE, showWarnings = FALSE)
22 | Sys.setenv("BASILISK_CACHE_DIR" = temp_cache)
23 | do.call(Sys.setenv, list(BASILISK_CACHE_DIR = temp_cache))
24 |
25 | knitr::opts_chunk$set(error=FALSE, message=FALSE, warning=FALSE, tidy = FALSE)
26 | library(BiocStyle)
27 | ```
28 |
29 | # Introduction
30 |
31 | ## Load Libraries
32 |
33 | ```{r tidy = FALSE}
34 | suppressMessages(library(bluster))
35 | suppressMessages(library(dplyr))
36 | suppressMessages(library(ggplot2))
37 | suppressMessages(library(Ibex))
38 | suppressMessages(library(kableExtra))
39 | suppressMessages(library(mumosa))
40 | suppressMessages(library(patchwork))
41 | suppressMessages(library(scater))
42 | suppressMessages(library(viridis))
43 | suppressMessages(library(utils))
44 | ```
45 |
46 | ```{r include = FALSE}
47 | # Defensive Basilisk environment setup for vignette building
48 | setup_basilisk_env <- function() {
49 | # Double-check cache directory setup
50 | cache_dir <- Sys.getenv("BASILISK_CACHE_DIR")
51 | if (nzchar(cache_dir) && !dir.exists(cache_dir)) {
52 | dir.create(cache_dir, recursive = TRUE, showWarnings = FALSE)
53 | }
54 |
55 | # Only attempt initialization if not in CHECK mode
56 | if (!identical(Sys.getenv("_R_CHECK_PACKAGE_NAME_"), "Ibex") &&
57 | !identical(Sys.getenv("R_CMD"), "check")) {
58 |
59 | tryCatch({
60 | basilisk::basiliskRun(env = Ibex:::IbexEnv, fun = function() {
61 | keras <- reticulate::import("keras")
62 | invisible(NULL)
63 | })
64 | }, error = function(e) {
65 | message("Basilisk environment initialization skipped during build: ", e$message)
66 | return(NULL)
67 | })
68 | } else {
69 | message("Skipping Basilisk initialization during package check/build")
70 | }
71 | }
72 |
73 | # Run the setup
74 | setup_basilisk_env()
75 | ```
76 |
77 | ## The Data Set
78 |
79 | The data used here are derived from 10x Genomics’ 2k BEAM-Ab Mouse HEL data set, consisting of splenocytes from transgenic mice engineered to recognize Hen Egg Lysozyme (HEL). These splenocytes were labeled with a small antigen panel: SARS-TRI-S, gp120, H5N1, and a negative control.
80 |
81 | To illustrate the Ibex framework, we subset to a smaller set of 200 cells (including some dominant clones) and convert the Seurat object into a SingleCellExperiment. The resulting “ibex_example” object stores all the necessary data—RNA expression, antigen capture (BEAM) features, BCR contig annotations, and computed dimensional reductions—ready for downstream Ibex analyses. The object is saved (`ibex_example.rda`), along with the contig information (`ibex_vdj.rda`), ensuring that the integrated data set can be readily reloaded and explored in subsequent steps.
82 |
83 | ```{r, eval=FALSE}
84 | library(scRepertoire)
85 | library(Seurat)
86 | library(dplyr)
87 | library(SummarizedExperiment)
88 | library(SingleCellExperiment)
89 | ##################################
90 | #scRNA/ADT loading and processing
91 | #################################
92 |
93 | tmp <- Read10X("~/data/filtered_feature_bc_matrix")
94 |
95 | SeuratObj <- CreateSeuratObject(counts = tmp$`Gene Expression`)
96 | beam_assay <- CreateAssayObject(counts = tmp$`Antigen Capture`)
97 |
98 | SeuratObj[["BEAM"]] <- beam_assay
99 | SeuratObj <- subset(SeuratObj, subset = nFeature_RNA > 100)
100 | SeuratObj <- RenameCells(object = SeuratObj , new.names = paste0("BEAM.sample_", rownames(SeuratObj[[]])))
101 | SeuratObj[["mito.genes"]] <- PercentageFeatureSet(SeuratObj, pattern = "^mt-")
102 |
103 | #Filtering step
104 | standev <- sd(log(SeuratObj$nFeature_RNA))*2.5 #cutting off above standard deviation of 2.5
105 | mean <- mean(log(SeuratObj$nFeature_RNA))
106 | cut <- round(exp(standev+mean))
107 | SeuratObj <- subset(SeuratObj, subset = mito.genes < 10 & nFeature_RNA < cut)
108 |
109 | #Processing and Adding Contig Info
110 | contigs <- read.csv("~/data/2k_BEAM-Ab_Mouse_HEL_5pv2_2k_BEAM-Ab_Mouse_HEL_5pv2_vdj_b_filtered_contig_annotations.csv")
111 | clones <- combineBCR(contigs, samples = "BEAM.sample", removeNA = TRUE)
112 | SeuratObj <- combineExpression(clones, SeuratObj, cloneCall="aa")
113 |
114 | #Subset only cells with BCR and Heavy Chain
115 | cell.idx <- intersect(which(!is.na(SeuratObj$CTaa)), which(!is.na(stringr::str_split(SeuratObj$CTaa, "_", simplify = TRUE)[,1])))
116 | SeuratObj <- subset(SeuratObj, cells = colnames(SeuratObj)[cell.idx])
117 |
118 | #Processing RNA
119 | DefaultAssay(SeuratObj) <- 'RNA'
120 | SeuratObj <- NormalizeData(SeuratObj, verbose = FALSE) %>%
121 | FindVariableFeatures(verbose = FALSE) %>%
122 | quietBCRgenes() %>%
123 | ScaleData(verbose = FALSE) %>%
124 | RunPCA(verbose = FALSE)
125 |
126 | #Removing negative control + B Cells
127 | DefaultAssay(SeuratObj) <- 'BEAM'
128 | SeuratObj <- subset(SeuratObj, subset = `negative-control` < 100, slot = "counts")
129 |
130 | #Processing BEAM
131 | VariableFeatures(SeuratObj) <- rownames(SeuratObj[["BEAM"]])
132 | SeuratObj <- NormalizeData(SeuratObj,
133 | normalization.method = 'CLR',
134 | margin = 2, ) %>%
135 | ScaleData(verbose = FALSE) %>%
136 | RunPCA(verbose = FALSE, reduction.name = 'apca')
137 |
138 | DefaultAssay(SeuratObj) <- 'RNA'
139 | ###################################
140 | #Making Example Data Set for Ibex
141 | #################################
142 |
143 | # Subset nondominate clones + random sampling of dominant
144 | set.seed(42)
145 | cell.idx <- unique(c(which(!grepl("CANWDGDYW", SeuratObj$CTaa)), sample(seq_len(nrow(SeuratObj[[]])), 154)))
146 |
147 | ibex_example <- SeuratObj
148 | saveRDS(ibex_example, file = "Ibex_FullExample.rds")
149 |
150 | # Forming Example Data set in SCE format
151 | ibex_example <- subset(ibex_example, cells = colnames(ibex_example)[cell.idx])
152 | PCA <- Embeddings(ibex_example[["pca"]])
153 | APCA <- Embeddings(ibex_example[["apca"]])
154 | BEAM_counts <- GetAssayData(ibex_example, slot = "counts", assay = "BEAM")[1:4,]
155 | BEAM_data <- GetAssayData(ibex_example, slot = "data", assay = "BEAM")[1:4,]
156 | ibex_example <- as.SingleCellExperiment(ibex_example)
157 | altExp(ibex_example, "BEAM") <- SummarizedExperiment(
158 | assays = list(
159 | counts = as.matrix(BEAM_counts),
160 | logcounts = as.matrix(BEAM_data)
161 | ),
162 | colData = colData(ibex_example)
163 | )
164 | reducedDim(ibex_example, "pca") <- PCA
165 | reducedDim(ibex_example, "apca") <- APCA
166 |
167 | #Saving the built-in data set
168 | save(ibex_example, file = "ibex_example.rda", compress = "xz")
169 | ibex_vdj <- contigs
170 | save(ibex_vdj, file = "ibex_vdj.rda", compress = "xz")
171 | ```
172 |
173 | ### Loading the processed data
174 |
175 | ```{r, echo=FALSE}
176 | data("ibex_example")
177 | data("ibex_vdj")
178 | ```
179 |
180 | ## Getting Expanded Sequences
181 |
182 | The function ```combineExpandedBCR()``` extends the functionality of ```combineBCR()``` from the scRepertoire package by first concatenating the CDR1, CDR2, and CDR3 sequences into a single expanded variable. This approach retains additional information from the BCR variable regions before calling ```combineBCR()``` to consolidate BCR sequences into clones. This will allow for use of expanded sequence models which we will detail below.
183 |
184 | ### **Function Parameters**
185 | The `combineExpandedBCR()` function supports the following parameters:
186 |
187 | | Parameter | Description | Default |
188 | |------------------------|---------------------------------------------------------------------------|---------|
189 | | `input.data` | List of data frames containing BCR sequencing results. | **Required** |
190 | | `samples` | Character vector labeling each sample. | **Required** |
191 | | `ID` | Additional sample labeling (optional). | `NULL` |
192 | | `call.related.clones` | Whether to group related clones using nucleotide sequences and V genes. | `TRUE` |
193 | | `threshold` | Normalized edit distance for clone clustering. | `0.85` |
194 | | `removeNA` | Remove chains without values. | `FALSE` |
195 | | `removeMulti` | Remove barcodes with more than two chains. | `FALSE` |
196 | | `filterMulti` | Select highest-expressing light and heavy chains. | `TRUE` |
197 | | `filterNonproductive` | Remove nonproductive chains if the column exists. | `TRUE` |
198 |
199 | ```{r tidy = FALSE}
200 | combined.BCR <- combineExpandedBCR(input.data = list(ibex_vdj),
201 | samples = "Sample1",
202 | filterNonproductive = TRUE)
203 | head(combined.BCR[[1]])[,c(1,11)]
204 | ```
205 |
206 | We can attach the expanded sequences to the Seurat or Single-Cell Experiment objects using the scRepertoire [`combineExpression()`](https://www.borch.dev/uploads/screpertoire/reference/combineexpression) function.
207 |
208 | ## Available Models
209 |
210 | **Ibex** offers a diverse set of models built on various architectures and encoding methods. Currently, models are available for both heavy and light chain sequences in humans, as well as heavy chain models for mice. Models for CDR3-based sequences have been trained on sequences of 45 residues or fewer, while models for CDR1/2/3-based sequences are specific to sequences of 90 amino acids or fewer.
211 |
212 | A full list of available models is provided below:
213 |
214 | ```{r tidy = FALSE}
215 | model.meta.data <- read.csv(system.file("extdata", "metadata.csv",
216 | package = "Ibex"))[,c(1:2,8)]
217 | model.meta.data %>%
218 | kable("html", escape = FALSE) %>%
219 | kable_styling(full_width = FALSE) %>%
220 | scroll_box(width = "100%", height = "400px")
221 |
222 | ```
223 |
224 | All the models are available via a [Zenodo repository](https://zenodo.org/records/14919286), which Ibex will pull automatically and cache for future use locally. There is no need to download the models independent of the ```runIbex()``` or ```ibex.matrix()``` calls.
225 |
226 | ### Choosing Between CNN and VAE
227 |
228 | **Convolutional Neural Networks (CNNs)**
229 |
230 | * **Pros**: Detect local sequence motifs effectively; relatively straightforward and quick to train.
231 | * **Cons**: Can struggle to capture global context
232 |
233 | **Variational Autoencoders (VAEs)**
234 |
235 | * **Pros**: Model sequences within a probabilistic, continuous latent space; suitable for generating novel variants.
236 | * **Cons**: Training can be more complex (balancing reconstruction and regularization losses); interpretability may be less direct.
237 |
238 | **Which to choose?**
239 |
240 | * **Use CNNs** if local motif detection and simpler training are priorities.
241 | * **Use VAEs** if you want a generative model capturing broader sequence structures.
242 |
243 | ### Choosing Encoding Methods
244 |
245 | **One-Hot Encoding:** Represents each amino acid as a binary vector (e.g., a 20-length vector for the 20 standard residues).
246 |
247 | * **Pros**: Simple and assumption-free.
248 | * **Cons**: High-dimensional and doesn’t capture biochemical similarities.
249 |
250 | **Atchley Factors:** Uses five numerical descriptors summarizing key physicochemical properties.
251 |
252 | * **Pros**: Compact and embeds biochemical information.
253 | * **Cons**: May overlook some residue-specific nuances.
254 |
255 | **Cruciani Properties:** Encodes amino acids via descriptors that reflect molecular shape, hydrophobicity, and electronic features.
256 |
257 | * **Pros**: Captures rich chemical details.
258 | * **Cons**: More complex to compute and less standardized.
259 |
260 | **Kidera Factors:** Provides ten orthogonal values derived from a broad set of physical and chemical properties.
261 |
262 | * **Pros**: Offers a balanced, low-dimensional representation.
263 | * **Cons**: Derived statistically, potentially averaging out finer details.
264 |
265 | **MSWHIM:** Derives descriptors from 3D structural data, summarizing overall shape and surface properties.
266 |
267 | * **Pros**: Provides robust, rotation-invariant structural insight.
268 | * **Cons**: Requires 3D information and can be computationally intensive.
269 |
270 | **tScales:** Encodes amino acids based on topological and structural features reflective of protein folding and interactions.
271 |
272 | * **Pros**: Captures contextual information from the overall sequence structure.
273 | * **Cons**: Less commonly used, making standardization and tool support a challenge.
274 |
275 | # Running Ibex
276 |
277 | The idea behind **Ibex** is to combine BCR CDR3 amino acid information with phenotypic RNA/protein data to direct the use of single-cell sequencing towards antigen-specific discoveries. This is a growing field - specifically [TESSA](https://github.com/jcao89757/TESSA) uses amino acid characteristics and autoencoder as a means to get a dimensional reduction. Another option is [CoNGA](https://github.com/phbradley/conga), which produces an embedding using BCR and RNA. **Ibex** was designed to make a customizable approach to this combined approach using R.
278 |
279 | ## Ibex.matrix Function
280 |
281 | **Ibex** includes two primary functions: `Ibex.matrix()` and `runIbex()`. The `Ibex.matrix()` function serves as the backbone of the algorithm, returning encoded values based on user-selected parameters. In contrast to `runIbex()`, which filters input to include only B cells with attached BCR data, `Ibex.matrix()` operates on all provided data. Additionally, it is compatible with the list output from the `combineBCR()` function (from the [scRepertoire](https://github.com/BorchLab/scRepertoire) package), whereas `runIbex()` is designed for use with a single-cell object.
282 |
283 | ### Parameters
284 |
285 | - **chain**:
286 | Specifies the chain type. Options:
287 | - `"Heavy"` for Ig Heavy Chain
288 | - `"Light"` for Ig Light Chain
289 |
290 | - **method**:
291 | Chooses the transformation method. Options:
292 | - `"encoder"`: Applies a CNN/VAE-based transformation.
293 | - `"geometric"`: Uses a geometric transformation.
294 |
295 | - **encoder.model**:
296 | When using the `"encoder"` method, selects the specific model variant. Options:
297 | - `"CNN"`: CDR3 Convolutional Neural Network-based autoencoder
298 | - `"VAE"`: CDR3 Variational Autoencoder
299 | - `"CNN.EXP"`: CDR1/2/3 CNN
300 | - `"VAE.EXP"`: CDR1/2/3 VAE
301 |
302 | - **encoder.input**:
303 | Specifies the encoding input method. Options:
304 | - `"atchleyFactors"`
305 | - `"crucianiProperties"`
306 | - `"kideraFactors"`
307 | - `"MSWHIM"`
308 | - `"tScales"`
309 | - `"OHE"`
310 |
311 | - **theta**:
312 | For the geometric transformation, defines the value of theta (default is π/3).
313 |
314 | ```{r tidy = FALSE}
315 | Ibex_vectors <- Ibex.matrix(ibex_example,
316 | chain = "Heavy",
317 | method = "encoder",
318 | encoder.model = "VAE",
319 | encoder.input = "OHE",
320 | species = "Mouse",
321 | verbose = FALSE)
322 |
323 | ggplot(data = as.data.frame(Ibex_vectors), aes(Ibex_1, Ibex_2)) +
324 | geom_point(color = "grey", alpha = 0.7, size = 2) +
325 | theme_classic()
326 |
327 | Ibex_vectors2 <- Ibex.matrix(ibex_example,
328 | chain = "Heavy",
329 | method = "geometric",
330 | geometric.theta = pi,
331 | verbose = FALSE)
332 |
333 | ggplot(as.data.frame(Ibex_vectors2), aes(x = Ibex_1, y = Ibex_2)) +
334 | geom_point(color = "grey", alpha = 0.7, size = 2) +
335 | theme_classic()
336 | ```
337 |
338 | ## runIbex
339 |
340 | Additionally, ```runIbex()``` can be used to append the Seurat or Single-cell Experiment object with the Ibex vectors and allow for further analysis. Importantly, ```runIbex()``` will remove single cells that do not have recovered BCR data in the metadata of the object.
341 |
342 | ```{r tidy = FALSE}
343 | ibex_example <- runIbex(ibex_example,
344 | chain = "Heavy",
345 | encoder.input = "kideraFactors",
346 | reduction.name = "Ibex.KF",
347 | species = "Mouse",
348 | verbose = FALSE)
349 | ```
350 |
351 | ## Using Ibex Vectors
352 |
353 | After ```runIbex()``` we have the encoded values stored under **"Ibex..."**. Using the Ibex dimensions, we can calculate a UMAP based solely on the embedded heavy chain values. Here we will visualize both the Heavy/Light Chain amino acid sequence (via **CTaa**) and normalized counts associated with the **Anti-Hen-Egg-Lysozyme** antigen.
354 |
355 | ```{r tidy = FALSE}
356 | set.seed(123)
357 | #Generating UMAP from Ibex Neighbors
358 | ibex_example <- runUMAP(ibex_example,
359 | dimred = "Ibex.KF",
360 | name = "ibexUMAP")
361 | #Ibex UMAP
362 | plot1 <- plotUMAP(ibex_example, color_by ="Anti-Hen-Egg-Lysozyme", dimred = "ibexUMAP") +
363 | theme(legend.position = "bottom")
364 | plot2 <- plotUMAP(ibex_example, color_by = "CTaa", dimred = "ibexUMAP") +
365 | scale_color_viridis(discrete = TRUE, option = "B") +
366 | guides(color = "none")
367 |
368 | plot1 + plot2
369 | ```
370 |
371 | In this workflow, we can combine these three dimension reductions into a single, integrated UMAP embedding using the ```runMultiUMAP()``` function with a cosine metric. To further refine this integration, we apply ```rescaleByNeighbors()``` to align the nearest neighbors across modalities, followed by clustering with ```clusterRows()```, resulting in a “combined.clustering” that reflects all data types. Finally, we visualize this joint embedding as “MultiUMAP,” coloring points by expression of a specific protein marker (e.g., Anti-Hen-Egg-Lysozyme), the integrated cluster assignments, or other relevant annotations. The result is a holistic representation of cellular diversity that leverages shared and unique signals from RNA, protein, and Ibex IGH latent features.
372 |
373 | ```{r tidy = FALSE}
374 | #Multimodal UMAP
375 | ibex_example <- mumosa::runMultiUMAP(ibex_example,
376 | dimreds=c("pca", "apca", "Ibex.KF"))
377 | #Multimodal Clustering
378 | output <- rescaleByNeighbors(ibex_example,
379 | dimreds=c("pca", "apca", "Ibex.KF"))
380 | ibex_example$combined.clustering <- clusterRows(output, NNGraphParam())
381 |
382 | plot3 <- plotUMAP(ibex_example,
383 | dimred = "MultiUMAP",
384 | color_by = "Anti-Hen-Egg-Lysozyme") +
385 | theme(legend.position = "bottom")
386 | plot4 <- plotUMAP(ibex_example,
387 | dimred = "MultiUMAP",
388 | color_by = "combined.clustering") +
389 | theme(legend.position = "bottom")
390 | plot5 <- plotUMAP(ibex_example,
391 | dimred = "MultiUMAP",
392 | color_by = "CTaa") +
393 | scale_color_manual(values = viridis_pal(option = "B")(length(unique(ibex_example$CTaa))))
394 |
395 | plot3 + plot4 + plot5
396 | ```
397 |
398 | ## Comparing the outcome to just one modality
399 |
400 | We can also look at the differences in the UMAP generated from RNA, ADT, or Ibex as individual components. Remember, the clusters that we are displaying in UMAP are based on clusters defined by the weighted nearest neighbors calculated above.
401 |
402 | ```{r tidy = FALSE}
403 | ibex_example <- runUMAP(ibex_example,
404 | dimred = 'pca',
405 | name = "pcaUMAP")
406 |
407 | ibex_example <- runUMAP(ibex_example,
408 | dimred = 'apca',
409 | name = "beamUMAP")
410 |
411 | plot6 <- plotUMAP(ibex_example,
412 | dimred = "pcaUMAP",
413 | color_by = "combined.clustering")
414 | plot7 <- plotUMAP(ibex_example,
415 | dimred = "beamUMAP",
416 | color_by = "combined.clustering")
417 | plot8 <- plotUMAP(ibex_example,
418 | dimred = "ibexUMAP",
419 | color_by = "combined.clustering")
420 |
421 | plot6 + plot7 + plot8 + plot_layout(guides = "collect") &
422 | theme(legend.position = "bottom")
423 | ```
424 |
425 | # CoNGA Reduction
426 |
427 | Single-cell B-cell receptor (BCR) sequencing enables the identification of clonotypes, which are groups of B cells sharing the same BCR sequence. Often, you want to link clonotypes to their gene expression profiles.
428 |
429 | A challenge arises, however, when a clonotype contains multiple cells (e.g., 10 cells sharing the same BCR). Including all cells for every clonotype can lead to over-representation of highly expanded clones or complicate analyses that require a one-to-one mapping between clonotypes and “cells.” Recent work [Schattgen,2021](https://pubmed.ncbi.nlm.nih.gov/34426704/) has proposed different strategies to summarize or represent a clonotype by a single expression profile. Two key strategies are common:
430 |
431 | **Distance Approach**
432 |
433 | * First, look at the PCA or count matrices
434 | * Identify the cell that has the minimum summed Euclidean distance to all other cells in the clonotype.
435 | * This approach can help ensure that your single representation is an actual cell, rather than a potentially non-biological average.
436 |
437 | **Mean Approach**
438 |
439 | * Simply take the average (mean) expression across all cells in the same clonotype.
440 | * Conceptually, you collapse a multi-cell clone into one “virtual cell” representing its average expression.
441 |
442 | ```{r tidy = FALSE}
443 | CoNGA.sce <- CoNGAfy(ibex_example,
444 | method = "mean",
445 | assay = c("RNA", "BEAM"))
446 |
447 | CoNGA.sce <- runIbex(CoNGA.sce,
448 | encoder.input = "kideraFactors",
449 | encoder.model = "VAE",
450 | reduction.name = "Ibex.KF",
451 | species = "Mouse",
452 | verbose = FALSE)
453 |
454 | CoNGA.sce <- CoNGA.sce %>%
455 | runUMAP(dimred = "Ibex.KF", name = "ibexUMAP", )
456 |
457 | plot9 <- plotUMAP(CoNGA.sce,
458 | dimred = "ibexUMAP",
459 | color_by = "Anti-Hen-Egg-Lysozyme",
460 | by.assay.type = "counts")
461 |
462 | plot10 <- plotUMAP(CoNGA.sce,
463 | dimred = "ibexUMAP",
464 | color_by = "H5N1",
465 | by.assay.type = "counts")
466 |
467 | plot9 + plot10 &
468 | theme(legend.position = "bottom")
469 | ```
470 |
471 | # Conclusion
472 |
473 | This has been a general overview of the capabilities of **Ibex** for incorporating BCR information into the embedding space of single-cell data. If you have any questions, comments, or suggestions, feel free to visit the [GitHub repository](https://github.com/BorchLab/Ibex).
474 |
475 | ## Session Info
476 |
477 | ```{r tidy = FALSE}
478 | sessionInfo()
479 | ```
480 |
--------------------------------------------------------------------------------
/www/graphicalAbstract.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BorchLab/Ibex/543ce724c3f01bebedb0db705873788509b5e8a7/www/graphicalAbstract.png
--------------------------------------------------------------------------------
/www/ibex_hex.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BorchLab/Ibex/543ce724c3f01bebedb0db705873788509b5e8a7/www/ibex_hex.png
--------------------------------------------------------------------------------
/www/training_info.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BorchLab/Ibex/543ce724c3f01bebedb0db705873788509b5e8a7/www/training_info.png
--------------------------------------------------------------------------------
/www/wnn_output.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BorchLab/Ibex/543ce724c3f01bebedb0db705873788509b5e8a7/www/wnn_output.png
--------------------------------------------------------------------------------