├── .BBSoptions ├── .Rbuildignore ├── .github ├── .gitignore ├── dependabot.yaml └── workflows │ ├── check-bioc.yml │ └── pr-commands.yaml ├── .gitignore ├── CODE_OF_CONDUCT.md ├── DESCRIPTION ├── LICENSE ├── LICENSE.md ├── NAMESPACE ├── NEWS.md ├── R ├── AnnData2SCE.R ├── SCE2AnnData.R ├── basilisk.R ├── read.R ├── reticulate.R ├── ui.R ├── utils.R ├── validation.R ├── write.R └── zellkonverter-package.R ├── README.md ├── codecov.yml ├── configure ├── configure.win ├── inst ├── NEWS.Rd ├── WORDLIST ├── extdata │ ├── example_anndata.h5ad │ ├── krumsiek11.h5ad │ └── krumsiek11_augmented_v0-8.h5ad └── scripts │ ├── example_anndata.R │ ├── krumsiek11.md │ └── krumsiek11_augmented.py ├── longtests ├── testthat.R └── testthat │ ├── test-cellrank_pancreas.R │ ├── test-example_anndata.R │ ├── test-gtex_8tissues.R │ ├── test-pegasus_marrow.R │ ├── test-scIB_pancreas.R │ ├── test-scanpy_pbmc3k.R │ ├── test-scanpy_trajectory.R │ ├── test-scvelo_pancreas.R │ ├── test-scvi_citeseq.R │ └── test-squidpy_visium.R ├── man ├── AnnData-Conversion.Rd ├── AnnData-Environment.Rd ├── expectSCE.Rd ├── figures │ ├── AnnData2SCE.png │ └── zellkonverter.png ├── r-py-conversion.Rd ├── readH5AD.Rd ├── setZellkonverterVerbose.Rd ├── validateH5ADSCE.Rd ├── writeH5AD.Rd └── zellkonverter-package.Rd ├── tests ├── spelling.R ├── testthat.R └── testthat │ ├── default.profraw │ ├── test-SCE2AnnData.R │ ├── test-read.R │ ├── test-validation.R │ ├── test-write.R │ └── test-zzz-anndata.R └── vignettes └── zellkonverter.Rmd /.BBSoptions: -------------------------------------------------------------------------------- 1 | RunLongTests: TRUE 2 | -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^zellkonverter\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^LICENSE\.md$ 4 | ^CODE_OF_CONDUCT\.md$ 5 | ^\.BBSoptions$ 6 | ^\.github$ 7 | ^codecov\.yml$ 8 | ^doc$ 9 | ^Meta$ 10 | -------------------------------------------------------------------------------- /.github/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | -------------------------------------------------------------------------------- /.github/dependabot.yaml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "github-actions" 4 | directory: "/" 5 | schedule: 6 | interval: "weekly" 7 | -------------------------------------------------------------------------------- /.github/workflows/check-bioc.yml: -------------------------------------------------------------------------------- 1 | ## This is a simplified action for building and testing a Bioconductor package 2 | ## based on: 3 | ## * https://github.com/lcolladotor/biocthis/blob/master/actions/check-bioc.yml 4 | ## * https://github.com/r-lib/actions/blob/master/examples/check-standard.yaml 5 | ## * https://github.com/seandavi/BuildABiocWorkshop2020/blob/master/.github/workflows/basic_checks.yaml 6 | name: R-CMD-check-bioc 7 | 8 | ## Specify which branches to run on 9 | ## The "devel" branch corresponds to Bioc-devel and "RELEASE_X" branches are 10 | ## Bioconductor releases. See http://bioconductor.org/developers/how-to/git/. 11 | on: 12 | push: 13 | branches: 14 | - devel 15 | - 'RELEASE_*' 16 | pull_request: 17 | branches: 18 | - devel 19 | - 'RELEASE_*' 20 | 21 | jobs: 22 | get-bioc-release: 23 | # Identify the Bioconductor release from the git branch. Also specifies a 24 | # Bioconductor Docker image to use. 25 | runs-on: ubuntu-latest 26 | outputs: 27 | biocimage: ${{ steps.get-release.outputs.biocimage }} 28 | biocrelease: ${{ steps.get-release.outputs.biocrelease }} 29 | 30 | steps: 31 | - id: get-release 32 | name: Get Bioconductor release 33 | run: | 34 | if echo "$GITHUB_REF" | grep -q "RELEASE_"; then 35 | biocrelease="$(basename -- $GITHUB_REF | tr '[:upper:]' '[:lower:]')" 36 | else 37 | biocrelease="devel" 38 | fi 39 | biocimage="bioconductor/bioconductor_docker:${biocrelease}" 40 | echo "Bioc release: ${biocrelease}" 41 | echo "Bioc docker image: {$biocimage}" 42 | ## Store the information 43 | echo "biocimage=${biocimage}" >> $GITHUB_OUTPUT 44 | echo "biocrelease=${biocrelease}" >> $GITHUB_OUTPUT 45 | 46 | get-bioc-version: 47 | # Identify the Bioconductor version number and R version to use. This is 48 | # done by checking the versions in the Bioconductor Docker container 49 | # selected by get-bioc-release. 50 | runs-on: ubuntu-latest 51 | needs: get-bioc-release 52 | container: 53 | image: ${{ needs.get-bioc-release.outputs.biocimage }} 54 | outputs: 55 | Rversion: ${{ steps.set-versions.outputs.rversion }} 56 | biocversion: ${{ steps.set-versions.outputs.biocversion }} 57 | 58 | steps: 59 | - id: get-versions 60 | name: Get Bioconductor/R versions 61 | run: | 62 | biocconfig <- "https://bioconductor.org/config.yaml" 63 | biocrelease <- "${{ needs.get-bioc-release.outputs.biocrelease }}" 64 | cat("Bioc release RAW:", biocrelease, "\n") 65 | biocrelease <- ifelse( 66 | grepl(biocrelease, "release"), 67 | "release", "devel" 68 | ) 69 | biocmap <- BiocManager:::.version_map_get_online(biocconfig) 70 | biocversion <- subset(biocmap, BiocStatus == biocrelease)[, 'Bioc'] 71 | biocversion_str <- as.character(biocversion) 72 | rversion <- subset(biocmap, BiocStatus == biocrelease)[, 'R'] 73 | rversion_str <- as.character(rversion) 74 | # Use R devel for BioC devel between November and May 75 | if (biocrelease == "devel") { 76 | current_month <- as.numeric(format(Sys.Date(), "%m")) 77 | if (current_month >= 11 || current_month <= 5) { 78 | cat("Setting R version to devel") 79 | rversion <- "devel" 80 | rversion_str <- "devel" 81 | } 82 | } 83 | writeLines(c(biocversion_str, rversion_str), "versions.txt") 84 | cat("GET VERSIONS", "\n") 85 | cat("Bioc release: ", biocrelease, "\n") 86 | cat("Bioc version: ", biocversion_str, "\n") 87 | cat("R version: ", rversion_str, "\n") 88 | shell: Rscript {0} 89 | - id: set-versions 90 | name: Set Bioconductor/R versions 91 | run: | 92 | biocversion=$(head -n 1 versions.txt) 93 | rversion=$(tail -n 1 versions.txt) 94 | echo "SET VERSIONS" 95 | echo "Bioc version: ${biocversion}" 96 | echo "R version: ${rversion}" 97 | ## Store the information 98 | echo "biocversion=${biocversion}" >> $GITHUB_OUTPUT 99 | echo "rversion=${rversion}" >> $GITHUB_OUTPUT 100 | 101 | R-CMD-check-docker: 102 | ## Run checks in the Bioconductor Docker container 103 | name: ubuntu-latest (r-biocdocker bioc-${{ needs.get-bioc-version.outputs.biocversion }}) 104 | needs: [get-bioc-release, get-bioc-version] 105 | runs-on: ubuntu-latest 106 | container: 107 | image: ${{ needs.get-bioc-release.outputs.biocimage }} 108 | volumes: 109 | - /home/runner/work/_temp/Library:/usr/local/lib/R/host-site-library 110 | env: 111 | R_REMOTES_NO_ERRORS_FROM_WARNINGS: true 112 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 113 | R_BIOC_VERSION: ${{ needs.get-bioc-version.outputs.biocversion }} 114 | 115 | steps: 116 | - name: Checkout 117 | uses: actions/checkout@v4 118 | 119 | - name: Install extra linux dependencies 120 | run: sudo apt-get update -y && sudo apt-get install -y libcurl4-openssl-dev devscripts qpdf 121 | 122 | - name: Setup R dependencies 123 | uses: r-lib/actions/setup-r-dependencies@v2 124 | with: 125 | cache-version: 3 126 | extra-packages: any::rcmdcheck 127 | needs: check 128 | 129 | - name: Show session info 130 | run: | 131 | options(width = 100) 132 | pkgs <- installed.packages()[, "Package"] 133 | sessioninfo::session_info(pkgs, include_base = TRUE) 134 | shell: Rscript {0} 135 | 136 | - name: Check R package 137 | uses: r-lib/actions/check-r-package@v2 138 | with: 139 | upload-snapshots: true 140 | upload-results: true 141 | 142 | - name: BiocCheck 143 | run: | 144 | BiocManager::install("BiocCheck") 145 | BiocCheck::BiocCheck( 146 | dir('check', 'tar.gz$', full.names = TRUE), 147 | `no-check-R-ver` = TRUE, 148 | `no-check-bioc-help` = TRUE 149 | ) 150 | shell: Rscript {0} 151 | 152 | R-CMD-check: 153 | ## Run checks on other platforms. 154 | name: ${{ matrix.config.os }} (r-${{ needs.get-bioc-version.outputs.rversion }} bioc-${{ needs.get-bioc-version.outputs.biocversion }}) 155 | needs: [get-bioc-release, get-bioc-version] 156 | runs-on: ${{ matrix.config.os }} 157 | strategy: 158 | fail-fast: false 159 | matrix: 160 | experimental: [true] 161 | config: 162 | - {os: windows-latest} 163 | - {os: macOS-latest} 164 | - {os: ubuntu-24.04, rspm: "https://packagemanager.posit.co/cran/__linux__/noble/latest"} 165 | env: 166 | R_REMOTES_NO_ERRORS_FROM_WARNINGS: true 167 | RSPM: ${{ matrix.config.rspm }} 168 | R_BIOC_VERSION: ${{ needs.get-bioc-version.outputs.biocversion }} 169 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 170 | 171 | steps: 172 | - name: Checkout 173 | uses: actions/checkout@v4 174 | 175 | - name: Install Linux system dependencies 176 | if: runner.os == 'Linux' 177 | run: | 178 | sudo apt update 179 | sudo apt-get update 180 | sudo apt-get upgrade libstdc++6 181 | 182 | - name: Setup Pandoc 183 | uses: r-lib/actions/setup-pandoc@v2 184 | 185 | - name: Setup R 186 | uses: r-lib/actions/setup-r@v2 187 | with: 188 | r-version: ${{ needs.get-bioc-version.outputs.rversion }} 189 | use-public-rspm: true 190 | 191 | - name: Setup R dependencies 192 | uses: r-lib/actions/setup-r-dependencies@v2 193 | with: 194 | cache-version: 3 195 | extra-packages: any::rcmdcheck 196 | needs: check 197 | 198 | - name: Session info 199 | run: | 200 | options(width = 100) 201 | pkgs <- installed.packages()[, "Package"] 202 | sessioninfo::session_info(pkgs, include_base = TRUE) 203 | shell: Rscript {0} 204 | 205 | - name: Check R package 206 | uses: r-lib/actions/check-r-package@v2 207 | with: 208 | upload-snapshots: true 209 | upload-results: true 210 | 211 | test-coverage: 212 | ## Calculate package test coverage. Only runs if R-CMD-check-docker has 213 | ## completed successfully. Uses the Bioconductor Docker image. 214 | if: ${{ github.ref == 'refs/heads/devel' }} 215 | needs: [get-bioc-release, get-bioc-version, R-CMD-check-docker] 216 | runs-on: ubuntu-latest 217 | container: 218 | image: ${{ needs.get-bioc-release.outputs.biocimage }} 219 | volumes: 220 | - /home/runner/work/_temp/Library:/usr/local/lib/R/host-site-library 221 | env: 222 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 223 | R_BIOC_VERSION: ${{ needs.get-bioc-version.outputs.biocversion }} 224 | 225 | steps: 226 | - name: Checkout 227 | uses: actions/checkout@v4 228 | 229 | - name: Install extra linux dependencies 230 | run: sudo apt-get update -y && sudo apt-get install -y libcurl4-openssl-dev devscripts qpdf 231 | 232 | - name: Setup R dependencies 233 | uses: r-lib/actions/setup-r-dependencies@v2 234 | with: 235 | cache-version: 3 236 | extra-packages: any::covr 237 | needs: coverage 238 | 239 | - name: Test coverage 240 | run: covr::codecov(quiet = FALSE) 241 | shell: Rscript {0} 242 | 243 | pkgdown: 244 | ## Build pkgdown site and push to gh-pages branch. Only runs if on the 245 | ## devel branch and R-CMD-check-docker has completed successfully. Uses 246 | ## the Bioconductor Docker image. 247 | needs: [get-bioc-release, get-bioc-version] 248 | runs-on: ubuntu-latest 249 | container: 250 | image: ${{ needs.get-bioc-release.outputs.biocimage }} 251 | volumes: 252 | - /home/runner/work/_temp/Library:/usr/local/lib/R/host-site-library 253 | env: 254 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 255 | R_BIOC_VERSION: ${{ needs.get-bioc-version.outputs.biocversion }} 256 | 257 | steps: 258 | - name: Checkout 259 | uses: actions/checkout@v4 260 | 261 | - name: Install extra linux dependencies 262 | run: sudo apt-get update -y && sudo apt-get install -y libcurl4-openssl-dev devscripts qpdf rsync 263 | 264 | - name: Setup R dependencies 265 | uses: r-lib/actions/setup-r-dependencies@v2 266 | with: 267 | cache-version: 3 268 | extra-packages: any::pkgdown, local::. 269 | needs: website 270 | 271 | - name: Build pkgdown site 272 | run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE) 273 | shell: Rscript {0} 274 | 275 | - name: Deploy to GitHub pages 🚀 276 | if: ${{ (github.ref == 'refs/heads/devel') && (github.event_name != 'pull_request') }} 277 | uses: JamesIves/github-pages-deploy-action@v4.7.3 278 | with: 279 | clean: false 280 | branch: gh-pages 281 | folder: docs 282 | git-config-name: "github-actions[bot]" 283 | git-config-email: "41898282+github-actions[bot]@users.noreply.github.com" 284 | -------------------------------------------------------------------------------- /.github/workflows/pr-commands.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | issue_comment: 5 | types: [created] 6 | 7 | name: pr-commands.yaml 8 | 9 | permissions: read-all 10 | 11 | jobs: 12 | document: 13 | if: ${{ github.event.issue.pull_request && (github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'OWNER') && startsWith(github.event.comment.body, '/document') }} 14 | name: document 15 | runs-on: ubuntu-latest 16 | env: 17 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 18 | permissions: 19 | contents: write 20 | steps: 21 | - uses: actions/checkout@v4 22 | 23 | - uses: r-lib/actions/pr-fetch@v2 24 | with: 25 | repo-token: ${{ secrets.GITHUB_TOKEN }} 26 | 27 | - uses: r-lib/actions/setup-r@v2 28 | with: 29 | use-public-rspm: true 30 | 31 | - uses: r-lib/actions/setup-r-dependencies@v2 32 | with: 33 | extra-packages: any::roxygen2 34 | needs: pr-document 35 | 36 | - name: Document 37 | run: roxygen2::roxygenise() 38 | shell: Rscript {0} 39 | 40 | - name: commit 41 | run: | 42 | git config --local user.name "$GITHUB_ACTOR" 43 | git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com" 44 | git add man/\* NAMESPACE 45 | git commit -m 'Document' 46 | 47 | - uses: r-lib/actions/pr-push@v2 48 | with: 49 | repo-token: ${{ secrets.GITHUB_TOKEN }} 50 | 51 | style: 52 | if: ${{ github.event.issue.pull_request && (github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'OWNER') && startsWith(github.event.comment.body, '/style') }} 53 | name: style 54 | runs-on: ubuntu-latest 55 | env: 56 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 57 | permissions: 58 | contents: write 59 | steps: 60 | - uses: actions/checkout@v4 61 | 62 | - uses: r-lib/actions/pr-fetch@v2 63 | with: 64 | repo-token: ${{ secrets.GITHUB_TOKEN }} 65 | 66 | - uses: r-lib/actions/setup-r@v2 67 | 68 | - name: Install dependencies 69 | run: install.packages("styler") 70 | shell: Rscript {0} 71 | 72 | - name: Style 73 | run: styler::style_pkg() 74 | shell: Rscript {0} 75 | 76 | - name: commit 77 | run: | 78 | git config --local user.name "$GITHUB_ACTOR" 79 | git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com" 80 | git add \*.R 81 | git commit -m 'Style' 82 | 83 | - uses: r-lib/actions/pr-push@v2 84 | with: 85 | repo-token: ${{ secrets.GITHUB_TOKEN }} 86 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | *.html 5 | doc 6 | Meta 7 | *.Rproj 8 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | We as members, contributors, and leaders pledge to make participation in our 6 | community a harassment-free experience for everyone, regardless of age, body 7 | size, visible or invisible disability, ethnicity, sex characteristics, gender 8 | identity and expression, level of experience, education, socio-economic status, 9 | nationality, personal appearance, race, religion, or sexual identity and 10 | orientation. 11 | 12 | We pledge to act and interact in ways that contribute to an open, welcoming, 13 | diverse, inclusive, and healthy community. 14 | 15 | ## Our Standards 16 | 17 | Examples of behavior that contributes to a positive environment for our 18 | community include: 19 | 20 | * Demonstrating empathy and kindness toward other people 21 | * Being respectful of differing opinions, viewpoints, and experiences 22 | * Giving and gracefully accepting constructive feedback 23 | * Accepting responsibility and apologizing to those affected by our mistakes, 24 | and learning from the experience 25 | * Focusing on what is best not just for us as individuals, but for the overall 26 | community 27 | 28 | Examples of unacceptable behavior include: 29 | 30 | * The use of sexualized language or imagery, and sexual attention or 31 | advances of any kind 32 | * Trolling, insulting or derogatory comments, and personal or political attacks 33 | * Public or private harassment 34 | * Publishing others' private information, such as a physical or email 35 | address, without their explicit permission 36 | * Other conduct which could reasonably be considered inappropriate in a 37 | professional setting 38 | 39 | ## Enforcement Responsibilities 40 | 41 | Community leaders are responsible for clarifying and enforcing our standards 42 | of acceptable behavior and will take appropriate and fair corrective action in 43 | response to any behavior that they deem inappropriate, threatening, offensive, 44 | or harmful. 45 | 46 | Community leaders have the right and responsibility to remove, edit, or reject 47 | comments, commits, code, wiki edits, issues, and other contributions that are 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation 49 | decisions when appropriate. 50 | 51 | ## Scope 52 | 53 | This Code of Conduct applies within all community spaces, and also applies 54 | when an individual is officially representing the community in public spaces. 55 | Examples of representing our community include using an official e-mail 56 | address, posting via an official social media account, or acting as an appointed 57 | representative at an online or offline event. 58 | 59 | ## Enforcement 60 | 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 62 | reported to the community leaders responsible for enforcement at [INSERT CONTACT 63 | METHOD]. All complaints will be reviewed and investigated promptly and fairly. 64 | 65 | All community leaders are obligated to respect the privacy and security of the 66 | reporter of any incident. 67 | 68 | ## Enforcement Guidelines 69 | 70 | Community leaders will follow these Community Impact Guidelines in determining 71 | the consequences for any action they deem in violation of this Code of Conduct: 72 | 73 | ### 1. Correction 74 | 75 | **Community Impact**: Use of inappropriate language or other behavior deemed 76 | unprofessional or unwelcome in the community. 77 | 78 | **Consequence**: A private, written warning from community leaders, providing 79 | clarity around the nature of the violation and an explanation of why the 80 | behavior was inappropriate. A public apology may be requested. 81 | 82 | ### 2. Warning 83 | 84 | **Community Impact**: A violation through a single incident or series of 85 | actions. 86 | 87 | **Consequence**: A warning with consequences for continued behavior. No 88 | interaction with the people involved, including unsolicited interaction with 89 | those enforcing the Code of Conduct, for a specified period of time. This 90 | includes avoiding interactions in community spaces as well as external channels 91 | like social media. Violating these terms may lead to a temporary or permanent 92 | ban. 93 | 94 | ### 3. Temporary Ban 95 | 96 | **Community Impact**: A serious violation of community standards, including 97 | sustained inappropriate behavior. 98 | 99 | **Consequence**: A temporary ban from any sort of interaction or public 100 | communication with the community for a specified period of time. No public or 101 | private interaction with the people involved, including unsolicited interaction 102 | with those enforcing the Code of Conduct, is allowed during this period. 103 | Violating these terms may lead to a permanent ban. 104 | 105 | ### 4. Permanent Ban 106 | 107 | **Community Impact**: Demonstrating a pattern of violation of community 108 | standards, including sustained inappropriate behavior, harassment of an 109 | individual, or aggression toward or disparagement of classes of individuals. 110 | 111 | **Consequence**: A permanent ban from any sort of public interaction within the 112 | community. 113 | 114 | ## Attribution 115 | 116 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], 117 | version 2.0, 118 | available at https://www.contributor-covenant.org/version/2/0/ 119 | code_of_conduct.html. 120 | 121 | Community Impact Guidelines were inspired by [Mozilla's code of conduct 122 | enforcement ladder](https://github.com/mozilla/diversity). 123 | 124 | [homepage]: https://www.contributor-covenant.org 125 | 126 | For answers to common questions about this code of conduct, see the FAQ at 127 | https://www.contributor-covenant.org/faq. Translations are available at https:// 128 | www.contributor-covenant.org/translations. 129 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: zellkonverter 2 | Title: Conversion Between scRNA-seq Objects 3 | Version: 1.19.0 4 | Date: 2025-04-16 5 | Authors@R: c( 6 | person("Luke", "Zappia", , "luke@lazappi.id.au", role = c("aut", "cre"), 7 | comment = c(ORCID = "0000-0001-7744-8565")), 8 | person("Aaron", "Lun", , "infinite.monkeys.with.keyboards@gmail.com", role = "aut", 9 | comment = c(ORCID = "0000-0002-3564-4813")), 10 | person("Jack", "Kamm", , "jackkamm@gmail.com", role = "ctb", 11 | comment = c(ORCID = "0000-0003-2412-756X")), 12 | person("Robrecht", "Cannoodt", , "rcannood@gmail.com", role = "ctb", 13 | comment = c(ORCID = "0000-0003-3641-729X", github = "rcannood")), 14 | person("Gabriel", "Hoffman", , "gabriel.hoffman@mssm.edu", role = "ctb", 15 | comment = c(ORCID = "0000-0002-0957-0224", github = "GabrielHoffman")), 16 | person("Marek", "Cmero", , "cmero.ma@wehi.edu.au", role = "ctb", 17 | comment = c(ORCID = "0000-0001-7783-5530", github = "mcmero")) 18 | ) 19 | Description: Provides methods to convert between Python AnnData objects 20 | and SingleCellExperiment objects. These are primarily intended for use 21 | by downstream Bioconductor packages that wrap Python methods for 22 | single-cell data analysis. It also includes functions to read and 23 | write H5AD files used for saving AnnData objects to disk. 24 | License: MIT + file LICENSE 25 | URL: https://github.com/theislab/zellkonverter 26 | BugReports: https://github.com/theislab/zellkonverter/issues 27 | Imports: 28 | basilisk, 29 | cli, 30 | DelayedArray, 31 | Matrix, 32 | methods, 33 | reticulate, 34 | S4Vectors, 35 | SingleCellExperiment (>= 1.11.6), 36 | SummarizedExperiment, 37 | utils 38 | Suggests: 39 | anndata, 40 | BiocFileCache, 41 | BiocStyle, 42 | covr, 43 | HDF5Array, 44 | knitr, 45 | pkgload, 46 | rhdf5 (>= 2.45.1), 47 | rmarkdown, 48 | scRNAseq, 49 | SpatialExperiment, 50 | spelling, 51 | testthat, 52 | withr 53 | VignetteBuilder: 54 | knitr 55 | biocViews: SingleCell, DataImport, DataRepresentation 56 | Encoding: UTF-8 57 | Language: en-GB 58 | LazyData: true 59 | Roxygen: list(markdown = TRUE) 60 | RoxygenNote: 7.3.2 61 | StagedInstall: no 62 | Collate: 63 | 'AnnData2SCE.R' 64 | 'SCE2AnnData.R' 65 | 'ui.R' 66 | 'basilisk.R' 67 | 'read.R' 68 | 'reticulate.R' 69 | 'utils.R' 70 | 'validation.R' 71 | 'write.R' 72 | 'zellkonverter-package.R' 73 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | YEAR: 2020 2 | COPYRIGHT HOLDER: Luke Zappia 3 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | Copyright (c) 2020 Theis Lab 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method(py_to_r,numpy.ndarray) 4 | S3method(py_to_r,pandas.core.arrays.categorical.Categorical) 5 | S3method(py_to_r,pandas.core.arrays.masked.BaseMaskedArray) 6 | export(.AnnDataVersions) 7 | export(AnnData2SCE) 8 | export(AnnDataDependencies) 9 | export(SCE2AnnData) 10 | export(readH5AD) 11 | export(setZellkonverterVerbose) 12 | export(writeH5AD) 13 | export(zellkonverterAnnDataEnv) 14 | import(SingleCellExperiment) 15 | import(SummarizedExperiment) 16 | importClassesFrom(Matrix,CsparseMatrix) 17 | importFrom(DelayedArray,blockApply) 18 | importFrom(DelayedArray,is_sparse) 19 | importFrom(DelayedArray,nzdata) 20 | importFrom(DelayedArray,nzindex) 21 | importFrom(DelayedArray,rowAutoGrid) 22 | importFrom(DelayedArray,type) 23 | importFrom(Matrix,sparseMatrix) 24 | importFrom(Matrix,t) 25 | importFrom(S4Vectors,DataFrame) 26 | importFrom(S4Vectors,I) 27 | importFrom(S4Vectors,make_zero_col_DFrame) 28 | importFrom(S4Vectors,metadata) 29 | importFrom(S4Vectors,wmsg) 30 | importFrom(SingleCellExperiment,"colPairs<-") 31 | importFrom(SingleCellExperiment,"reducedDims<-") 32 | importFrom(SingleCellExperiment,"rowPairs<-") 33 | importFrom(SingleCellExperiment,SingleCellExperiment) 34 | importFrom(SummarizedExperiment,"assays<-") 35 | importFrom(SummarizedExperiment,"colData<-") 36 | importFrom(SummarizedExperiment,"rowData<-") 37 | importFrom(SummarizedExperiment,assays) 38 | importFrom(SummarizedExperiment,colData) 39 | importFrom(SummarizedExperiment,rowData) 40 | importFrom(basilisk,basiliskRun) 41 | importFrom(methods,as) 42 | importFrom(methods,is) 43 | importFrom(methods,selectMethod) 44 | importFrom(methods,slot) 45 | importFrom(reticulate,import) 46 | importFrom(reticulate,import_builtins) 47 | importFrom(reticulate,py_to_r) 48 | importFrom(reticulate,r_to_py) 49 | importFrom(utils,capture.output) 50 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | # zellkonverter 1.20.0 2 | 3 | * Bioconductor 3.22, October 2025 4 | 5 | ## zellkonverter 1.19.0 (2025-04-16) 6 | 7 | * Bioconductor 3.22 devel 8 | 9 | # zellkonverter 1.18.0 (2025-04-16) 10 | 11 | * Bioconductor 3.21, April 2025 12 | 13 | ## zellkonverter 1.17.4 (2025-04-10) 14 | 15 | * Add tests for **anndata** v0.10.9 16 | * Modify `SCE2AnnData()` to covert sparse matrices to `dgRMatrix` when they are 17 | transposed (mostly assays) (Fixes #132) 18 | 19 | ## zellkonverter 1.17.3 (2025-04-08) 20 | 21 | * Add an environment for **anndata** v0.11.4. This is now the default. 22 | * Disable **anndata** v0.7.6 tests on aarch64 Linux as Python 3.7 is not available 23 | 24 | ## zellkonverter 1.17.2 (2025-04-01) 25 | 26 | * Add support for `SpatialExperiment` objects to `SCE2AnnData()` (PR #138 @mcmero, Fixes #61) 27 | * Improve handling of missing `rownames`/`colnames` (PR #138, Fixes #140) 28 | 29 | ## zellkonverter 1.17.1 (2025-03-09) 30 | 31 | * Add `testload` argument to `basiliskRun()` calls (Partial fix for #139) 32 | * Handle missing `rowData`/`colData` with no names in `SCE2AnnData()` (Fixes #105) 33 | * Update links in function documentation 34 | 35 | ## zellkonverter 1.17.0 (2024-10-30) 36 | 37 | * Bioconductor 3.21 devel 38 | 39 | # zellkonverter 1.16.0 (2024-10-30) 40 | 41 | * Bioconductor 3.20, October 2024 42 | 43 | ## zellkonverter 1.15.4 (2024-10-18) 44 | 45 | * Fix correctly assigning levels to factors in the R reader with **anndata** v0.7 files (Fixes #122) 46 | * Add environment for **anndata** v0.10.9 47 | * Avoid deprecation warning due to setting `dtype` when creating Python `AnnData` objects 48 | * Standardise code styling using **{styler}** 49 | 50 | ## zellkonverter 1.15.3 (2024-10-04) 51 | 52 | * Correctly assign levels to factors in R reader (Fixes #122) 53 | 54 | ## zellkonverter 1.15.2 (2024-10-02) 55 | 56 | * Correctly set `filepath` in the R reader with reading `adata.raw` with `use_hdf5 = TRUE` (PR #124 @GabrielHoffman, Fixes #123) 57 | 58 | ## zellkonverter 1.15.1 (2024-06-21) 59 | 60 | * Skip **anndata** v0.7.6 tests on Apple Silicon as Python 3.7 is not available 61 | * Fix URL for GTEx 8 tissues dataset in long tests 62 | * Add test using the more complete example H5AD file 63 | 64 | ## zellkonverter 1.15.0 (2024-05-01) 65 | 66 | * Bioconductor 3.20 devel 67 | 68 | # zellkonverter 1.14.0 (2024-05-01) 69 | 70 | * Bioconductor 3.19, May 2023 71 | 72 | ## zellkonverter 1.14.1 (2024-06-21) 73 | 74 | * Skip **anndata** v0.7.6 tests on Apple Silicon as Python 3.7 is not available 75 | * Fix URL for GTEx 8 tissues dataset in long tests 76 | * Add test using the more complete example H5AD file 77 | 78 | ## zellkonverter 1.13.4 (2024-04-26) 79 | 80 | * Prepare `NEWS` for release 81 | 82 | ## zellkonverter 1.13.3 (2024-03-25) 83 | 84 | * Correctly handle `use_backed = TRUE` with newer **anndata** versions (Fixes #114) 85 | * Improve warnings when converting matrices fails 86 | * Add environment for **anndata** v0.10.6 87 | 88 | ## zellkonverter 1.13.2 (2024-01-17) 89 | 90 | * Minor change to writing `DelayedArray`s for compatibility with **{HDF5Array}** 91 | v1.31.1 92 | 93 | ## zellkonverter 1.13.1 (2023-11-13) 94 | 95 | * Fix the **anndata** v0.10.2 environment instantiation (Fixes #103) 96 | * Fix a typo in the AnnData Conversion docs (Fixes #100) 97 | 98 | ## zellkonverter 1.13.0 (2023-10-25) 99 | 100 | * Bioconductor 3.19 devel 101 | 102 | # zellkonverter 1.12.0 (2023-10-25) 103 | 104 | * Bioconductor 3.18, October 2023 105 | 106 | ## zellkonverter 1.12.1 (2023-11-13) 107 | 108 | * Fix the **anndata** v0.10.2 environment instantiation (Fixes #103) 109 | * Fix a typo in the AnnData Conversion docs (Fixes #100) 110 | 111 | ## zellkonverter 1.11.4 (2023-10-16) 112 | 113 | * Add environment for **anndata** v0.10.2 114 | 115 | ## zellkonverter 1.11.3 (2023-10-2) 116 | 117 | * Add environment for **anndata** v0.9.2 118 | 119 | ## zellkonverter 1.11.2 (2023-08-28) 120 | 121 | * Changes for compatibility with **{rhdf5}** v2.45.1 122 | * Support for enum types that simplifies reading of nullable types in the 123 | native R reader 124 | 125 | ## zellkonverter 1.11.1 (2023-05-23) 126 | 127 | * Pass correct dimensions when converting `raw` (Fixes #96) 128 | * Convert **anndata** backed sparse matrices in `AnnData2SCE()` (Fixes #96) 129 | 130 | ## zellkonverter 1.11.0 (2023-04-26) 131 | 132 | * Bioconductor 3.18 devel 133 | 134 | # zellkonverter 1.10.0 (2023-04-26) 135 | 136 | * Bioconductor 3.17, April 2023 137 | 138 | ## zellkonverter 1.10.1 (2023-05-23) 139 | 140 | * Pass correct dimensions when converting `raw` (Fixes #96) 141 | * Convert **anndata** backed sparse matrices in `AnnData2SCE()` (Fixes #96) 142 | 143 | ## zellkonverter 1.9.3 (2023-04-06) 144 | 145 | * Add functions for converting **pandas** arrays used by **anndata** when 146 | arrays have missing values (Fixes #87) 147 | * Read the correct index names in the R reader (PR #93 mtmorgan) 148 | * Adjust tests to match reader changes 149 | 150 | ## zellkonverter 1.9.2 (2023-03-28) 151 | 152 | * Add @rcannood as a contributor (PR #90 @rcannood, fixes #88) 153 | 154 | ## zellkonverter 1.9.1 (2023-03-14) 155 | 156 | * Add compatibility with the **anndata** v0.8 H5AD format to the the native R 157 | writer (PR #86 @jackkamm, fixes #78) 158 | 159 | ## zellkonverter 1.9.0 (2022-11-02) 160 | 161 | * Bioconductor 3.17 devel 162 | 163 | # zellkonverter 1.8.0 (2022-11-02) 164 | 165 | * Bioconductor 3.16, November 2022 166 | 167 | ## zellkonverter 1.7.8 (2022-10-04) 168 | 169 | * Improve compatibility with the R **{anndata}** package (PR #76 @rcannood, 170 | fixes #75) 171 | * Python objects are now explicitly converted rather than relying on automatic 172 | conversion 173 | * Other minor modifications for compatibility 174 | * Added support for **numpy** recarrays (dtype number 20) (PR #81, fixes #45, 175 | #28) 176 | * Added a new `py_to_r.numpy.ndarray()` function which extends the default 177 | **{reticulate}** function 178 | * Improvements to warnings 179 | * Improvements and updates to tests 180 | 181 | ## zellkonverter 1.7.7 (2022-10-04) 182 | 183 | * Pin **python** version to 3.7.10 in **anndata** v0.7.6 environment (3.7.12 184 | was not compatible with other dependencies) 185 | 186 | ## zellkonverter 1.7.6 (2022-09-29) 187 | 188 | * Pin **python** version to 3.7.12 in **anndata** v0.7.6 environment to match 189 | **{basilisk}** changes 190 | 191 | ## zellkonverter 1.7.5 (2022-09-13) 192 | 193 | * Minor changes for compatibility with **{cli}** v3.4.0 194 | * Added tests for `verbose=TRUE` 195 | 196 | ## zellkonverter 1.7.4 (2022-08-17) 197 | 198 | * Minor changes for compatibility with the upcoming **{Matrix}** 1.4-2 release 199 | 200 | ## zellkonverter 1.7.3 (2022-06-23) 201 | 202 | * Move verbose from `zellkonverterAnnDataEnv()` (Fixes #66) 203 | 204 | ## zellkonverter 1.7.2 (2022-06-09) 205 | 206 | * Instantiate environments for `basilisk::configureBasiliskEnv()` (Fixes #66) 207 | * Allow missing obs/var names when `use_hdf5 = TRUE` (Fixes #65) 208 | 209 | ## zellkonverter 1.7.1 (2022-05-17) 210 | 211 | * Fix bug in long tests 212 | 213 | ## zellkonverter 1.7.0 (2022-04-27) 214 | 215 | * Bioconductor 3.16 devel 216 | 217 | # zellkonverter 1.6.0 (2022-04-27) 218 | 219 | * Bioconductor 3.15, April 2022 220 | 221 | ## zellkonverter 1.6.5 (2022-09-13) 222 | 223 | * Minor changes for compatibility with **{cli}** v3.4.0 224 | * Added tests for `verbose=TRUE` 225 | 226 | ## zellkonverter 1.6.4 (2022-08-17) 227 | 228 | * Minor changes for compatibility with the upcoming **{Matrix}** 1.4-2 release 229 | 230 | ## zellkonverter 1.6.3 (2022-06-23) 231 | 232 | * Move verbose from `zellkonverterAnnDataEnv()` (Fixes #66) 233 | 234 | ## zellkonverter 1.6.2 (2022-06-09) 235 | 236 | * Instantiate environments for `basilisk::configureBasiliskEnv()` (Fixes #66) 237 | * Allow missing obs/var names when `use_hdf5 = TRUE` (Fixes #65) 238 | 239 | ## zellkonverter 1.6.1 (2022-05-17) 240 | 241 | * Fix bug in long tests 242 | 243 | ## zellkonverter 1.5.4 (2022-04-25) 244 | 245 | * Fix progress messages in `.convert_anndata_df()` 246 | * Allow `data.frames` in `varm` in `SCE2AnnData()` 247 | * Standardise `uns` names to match R conventions in `AnnData2SCE()` 248 | * Adjust long tests 249 | 250 | ## zellkonverter 1.5.3 (2022-04-19) 251 | 252 | * Reduce **scipy** version to 1.7.3 253 | * **scipy** >= 1.8.0 is incompatible with **{reticulate}** <= 1.24 (see 254 | https://github.com/rstudio/reticulate/pull/1173) 255 | * Add GTEX 8 tissues dataset to long tests (see #58) 256 | 257 | ## zellkonverter 1.5.2 (2022-04-17) 258 | 259 | * Update the default Python environment to use **anndata** v0.8.0 260 | * **anndata** 0.8.0 261 | * **h5py** 3.6.0 262 | * **hdf5** 1.12.1 263 | * **natsort** 8.1.0 264 | * **numpy** 1.22.3 265 | * **packaging** 21.3 266 | * **pandas** 1.4.2 267 | * **python** 3.8.13 268 | * **scipy** 1.8.0 269 | * **sqlite** 3.38.2 270 | * Add options to choose Python environments with different versions of 271 | **anndata** 272 | * To facilitate this `zellkonverterAnnDataEnv()` and `AnnDataDependencies()` 273 | are new functions rather than variables 274 | * Added a new `.AnnDataVersions` variable which stores the available 275 | **anndata** versions 276 | * Updates to the vignette and function documentation explaining this option 277 | 278 | ## zellkonverter 1.5.1 (2022-03-21) 279 | 280 | * Modify how Pandas DataFrames are converted to R 281 | * Columns should now use R approved names with a warning when changes are 282 | made 283 | 284 | ## zellkonverter 1.5.0 (2021-10-27) 285 | 286 | * Bioconductor 3.15 devel 287 | 288 | # zellkonverter 1.4.0 (2021-10-27) 289 | 290 | * Bioconductor 3.14, October 2021 291 | 292 | ## zellkonverter 1.3.3 (2021-10-20) 293 | 294 | * Add progress messages to various functions 295 | * Can be controlled by function arguments or a global variable 296 | * Split `konverter.R` into two files (`AnnData2SCE.R` and `SCE2AnnData.R`) 297 | * Add arguments to control how slots are converted in `AnnData2SCE()` and 298 | `SCE2AnnData()` (Fixes #47) 299 | * Each slot can now be fully converted, skipped entirely or only selected 300 | items converted. 301 | * Add support for converting the `raw` slot to an `altExp` in `AnnData2SCE()` 302 | (Fixes #53, fixes #57) 303 | 304 | ## zellkonverter 1.3.2 (2021-09-09) 305 | 306 | * Add recursive conversion of lists in `AnnData2SCE()` 307 | * Correctly handle `DataFrame` objects stored in `adata.obsm` 308 | * Remove **pandas** indexes from converted `DataFrame` objects 309 | * Add functions for validating `SingleCellExperiment` objects (for testing) 310 | * Add long tests for various public datasets 311 | 312 | ## zellkonverter 1.3.1 (2021-06-22) 313 | 314 | * Fix bug in converting `dgRMatrix` sparse matrices (Fixes #55) 315 | 316 | ## zellkonverter 1.3.0 (2021-05-20) 317 | 318 | * Bioconductor 3.14 devel 319 | 320 | # zellkonverter 1.2.0 (2021-05-20) 321 | 322 | * Bioconductor 3.13, May 2021 323 | 324 | ## zellkonverter 1.2.1 (2021-06-22) 325 | 326 | * Fix bug in converting `dgRMatrix` sparse matrices (Fixes #55) 327 | 328 | ## zellkonverter 1.1.11 (2021-05-19) 329 | 330 | * Add experimental native R reader to `readH5AD()` 331 | 332 | ## zellkonverter 1.1.10 (2021-05-18) 333 | 334 | * Update NEWS for release 335 | 336 | ## zellkonverter 1.1.9 (2021-05-12) 337 | 338 | * `AnnData2SCE()` no longer returns `dgRMatrix` sparse matrices (Fixes #34) 339 | 340 | ## zellkonverter 1.1.8 (2021-05-03) 341 | 342 | * Add conversion checks to all slots in `AnnData2SCE()` (See #45) 343 | * Enable return conversion of `varm` in `SCE2AnnData()` (Fixes #43) 344 | * Store `X_name` in `AnnData2SCE()` for use by `SCE2AnnData()` and add an 345 | `X_name` argument to `AnnData2SCE()` and `readH5AD()` (Fixes #7) 346 | 347 | ## zellkonverter 1.1.7 (2021-04-30) 348 | 349 | * Add `compression` argument to `writeH5AD()` (Fixes #49) 350 | * Update **anndata** Python dependencies, now using **anndata** v0.7.6 351 | 352 | ## zellkonverter 1.1.6 (2021-04-27) 353 | 354 | * Adapt to changes in `HDF5Array::HDF5Array()` 355 | 356 | ## zellkonverter 1.1.5 (2021-03-05) 357 | 358 | * Better support for **anndata** `SparseDataset` arrays (PR #41, Fixes #37, 359 | Fixes #42) 360 | * More consistent conversion of `metadata` to `uns` in `SCE2AnnData()` 361 | (Fixes #40) 362 | * Add handling of list columns in `colData` and `rowData` in `SCE2AnnData()` 363 | (Fixes #26) 364 | * Export `zellkonverterAnnDataEnv` (Fixes #38) 365 | 366 | ## zellkonverter 1.1.4 (2021-02-18) 367 | 368 | * Handle writing **DelayedArray** assays on the R side in `writeH5AD()` 369 | (PR #35, Fixes #32) 370 | 371 | ## zellkonverter 1.1.3 (2021-01-22) 372 | 373 | * Adjust `SCE2AnnData()` example (Fixes #31) 374 | 375 | ## zellkonverter 1.1.2 (2020-12-19) 376 | 377 | * Improved support for HDF5 backed conversion (PR #29, fixes #13) 378 | 379 | ## zellkonverter 1.1.1 (2020-12-03) 380 | 381 | * Add `example_anndata.h5ad` file to `inst/extdata/` and creation script to `inst/scripts/` 382 | * Improve conversion checks when converting `.uns` to `metadata` 383 | * Avoid converting `obsp` and `varp` to dense matrices 384 | 385 | ## zellkonverter 1.1.0 (2020-10-28) 386 | 387 | * Bioconductor 3.13 devel 388 | 389 | # zellkonverter 1.0.0 (2020-10-28) 390 | 391 | * Bioconductor 3.12, October 2020 392 | 393 | ## zellkonverter 1.0.3 (2021-03-08) 394 | 395 | * Avoid converting `obsp` and `varp` to dense matrices 396 | 397 | ## zellkonverter 1.0.2 (2021-01-28) 398 | 399 | * Merge remaining commits for HDF5 conversion (fixes #33) 400 | 401 | ## zellkonverter 1.0.1 (2021-01-26) 402 | 403 | * Improved support for HDF5 backed conversion (PR #29, fixes #13, fixes #33) 404 | 405 | ## zellkonverter 0.99.7 (2020-10-16) 406 | 407 | * Update Python dependencies 408 | * **numpy** 1.18.5 -> 1.19.1 409 | * **pandas** 1.0.4 -> 1.1.2 410 | * **scipy** 1.4.1 -> 1.5.2 411 | * **sqlite** 3.30.1 -> 3.33.0 412 | 413 | ## zellkonverter 0.99.6 (2020-10-12) 414 | 415 | * Document character to factor coercion in `writeH5ad()` (Fixes #6) 416 | * Add `X_name` argument to `writeH5AD()` (Fixes #23) 417 | 418 | ## zellkonverter 0.99.5 (2020-09-28) 419 | 420 | * Tidy NEWS files for Bioconductor release 421 | 422 | ## zellkonverter 0.99.4 (2020-08-28) 423 | 424 | * Bump anndata version to 0.7.4 425 | 426 | ## zellkonverter 0.99.3 (2020-08-21) 427 | 428 | * Document the `krumsiek11.h5ad` file 429 | * Remove the `internal` keyword from the `zellkonverter-package` documentation 430 | 431 | ## zellkonverter 0.99.2 (2020-08-21) 432 | 433 | * Update `.gitignore` 434 | 435 | ## zellkonverter 0.99.1 (2020-07-15) 436 | 437 | * Fix SCE to AnnData map figure in PDF manual 438 | * Use `expect_equal()` instead of `expect_identical()` in `writeH5AD()` sparse 439 | matrices test 440 | * Edit package title and description 441 | 442 | ## zellkonverter 0.99.0 (2020-07-10) 443 | 444 | * Initial Bioconductor submission 445 | 446 | # zellkonverter 0.0.0 (early development version) 447 | 448 | ## zellkonverter 0.0.0.9017 (2020-07-10) 449 | 450 | * Add biocViews to DESCRIPTION 451 | * Edit package description 452 | * Tidy code 453 | * Replace 1:... with `seq_len()` 454 | 455 | ## zellkonverter 0.0.0.9016 (2020-07-10) 456 | 457 | * Add check for **scRNAseq** in examples (Fixes #18) 458 | 459 | ## zellkonverter 0.0.0.9015 (2020-07-02) 460 | 461 | * Skip `AnnData` matrices without a transposable R counterpart 462 | * Only replace skipped matrices when `use_hdf5 = TRUE` in `readH5AD()` 463 | (Fixes #12) 464 | * Additional tests for sparse matrices 465 | 466 | ## zellkonverter 0.0.0.9014 (2020-06-30) 467 | 468 | * Allow assay skipping when converting from `SingleCellExperiment` to `AnnData` 469 | * Allow skipping of assays that aren't **numpy** friendly in `writeH5AD()` 470 | * Wait for **basilisk** process shutdown to release `.h5ad` file 471 | * Updates to documentation and tests 472 | 473 | ## zellkonverter 0.0.0.9013 (2020-06-25) 474 | 475 | * Improve conversion between `SingleCellExperiment` and `AnnData` (See #8) 476 | * Convert between `metadata` and `uns` (where objects are compatible) 477 | * Convert between `rowPairs` and `varp` 478 | * Convert between `colPairs` and `obsp` 479 | * Convert from `varm` to `rowData` (but not in reverse) 480 | * Add mapping table to docs 481 | 482 | ## zellkonverter 0.0.0.9012 (2020-06-19) 483 | 484 | * Tidy documentation and code 485 | * Tidy vignette 486 | 487 | ## zellkonverter 0.0.0.9011 (2020-06-18) 488 | 489 | * Support for HDF5Array outputs in `readH5AD()` (Fixes #4) 490 | 491 | ## zellkonverter 0.0.0.9010 (2020-06-17) 492 | 493 | * Avoid checking column names for `colData` and `rowData` in `SCE2AnnData()` 494 | * Make sure that all matrices passes to **{reticulate}** are **numpy** friendly 495 | * Add more tests 496 | * Update vignette front matter 497 | 498 | ## zellkonverter 0.0.0.9009 (2020-06-15) 499 | 500 | * Add vignette 501 | 502 | ## zellkonverter 0.0.0.9008 (2020-06-12) 503 | 504 | * Add examples and improve documentation 505 | * Export `.AnnDataDependencies` for external use 506 | 507 | ## zellkonverter 0.0.0.9007 (2020-06-11) 508 | 509 | * Add `SCE2AnnData()` function 510 | * Add `writeH5AD()` function 511 | 512 | ## zellkonverter 0.0.0.9006 (2020-06-11) 513 | 514 | * Use internal function in `readH5AD()` 515 | 516 | ## zellkonverter 0.0.0.9005 (2020-06-09) 517 | 518 | * Rename `adata2SCE()` to `AnnData2SCE()` 519 | * Remove **{basilisk}** context from `AnnData2SCE()` (See #1) 520 | * Now uses the calling context 521 | 522 | ## zellkonverter 0.0.0.9004 (2020-06-09) 523 | 524 | * Pin more **AnnData** dependencies (See #1) 525 | 526 | ## zellkonverter 0.0.0.9003 (2020-06-08) 527 | 528 | * Add test `.h5ad` file 529 | * Add test for `readH5AD()` 530 | * Add package man page 531 | 532 | ## zellkonverter 0.0.0.9002 (2020-06-08) 533 | 534 | * Add `adata2SCE()` function 535 | * Add `readH5AD()` function 536 | 537 | ## zellkonverter 0.0.0.9001 (2020-06-08) 538 | 539 | * Add **{basilisk}** infrastructure 540 | 541 | ## zellkonverter 0.0.0.9000 (2020-06-08) 542 | 543 | * Set up package 544 | -------------------------------------------------------------------------------- /R/SCE2AnnData.R: -------------------------------------------------------------------------------- 1 | #' @rdname AnnData-Conversion 2 | #' 3 | #' @param sce A \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment} 4 | #' object. 5 | #' @param X_name For `SCE2AnnData()` name of the assay to use as the primary 6 | #' matrix (`X`) of the AnnData object. If `NULL`, the first assay of `sce` will 7 | #' be used by default. For `AnnData2SCE()` name used when saving `X` as an 8 | #' assay. If `NULL` looks for an `X_name` value in `uns`, otherwise uses `"X"`. 9 | #' @param assays,colData,rowData,reducedDims,metadata,colPairs,rowPairs 10 | #' Arguments specifying how these slots are converted. If `TRUE` everything in 11 | #' that slot is converted, if `FALSE` nothing is converted and if a character 12 | #' vector only those items or columns are converted. 13 | #' @param verbose Logical scalar indicating whether to print progress messages. 14 | #' If `NULL` uses `getOption("zellkonverter.verbose")`. 15 | #' 16 | #' @export 17 | #' @importFrom utils capture.output 18 | #' @importFrom S4Vectors metadata make_zero_col_DFrame 19 | #' @importFrom reticulate import r_to_py py_to_r 20 | SCE2AnnData <- function(sce, X_name = NULL, assays = TRUE, colData = TRUE, 21 | rowData = TRUE, varm = TRUE, reducedDims = TRUE, 22 | metadata = TRUE, colPairs = TRUE, rowPairs = TRUE, 23 | skip_assays = FALSE, verbose = NULL) { 24 | anndata <- import("anndata") 25 | 26 | # Create a list to store parts of the AnnData 27 | adata_list <- list() 28 | 29 | .ui_process( 30 | "Converting {.field AnnData} to {.field SingleCellExperiment}" 31 | ) 32 | 33 | if (is.null(X_name)) { 34 | .ui_step( 35 | "Selecting {.field X matrix}", 36 | msg_done = "Selected {.field X matrix}" 37 | ) 38 | if (length(assays(sce)) == 0) { 39 | stop("'sce' does not contain any assays") 40 | } 41 | X_name <- assayNames(sce)[1] 42 | cli::cli_alert_info( 43 | "Using the {.field '{X_name}'} assay as the {.field X matrix}" 44 | ) 45 | cli::cli_progress_done() 46 | } 47 | 48 | .ui_step( 49 | "Converting {.field assays${X_name}} to {.field X matrix}", 50 | msg_done = "{.field assays${X_name}} converted to {.field X matrix}" 51 | ) 52 | if (!skip_assays) { 53 | X <- assay(sce, X_name) 54 | X <- .makeNumpyFriendly(X) 55 | } else { 56 | cli::cli_alert_warning(paste( 57 | "{.field skip_assays} is {.field TRUE}", 58 | "so {.field X/layers} will be empty" 59 | )) 60 | X <- fake_mat <- .make_fake_mat(rev(dim(sce))) 61 | } 62 | # NOTE: Previously dtype was set here but was removed due to deprecation 63 | adata_list$X <- reticulate::r_to_py(X) 64 | cli::cli_progress_done() 65 | 66 | assay_names <- assayNames(sce) 67 | assay_names <- assay_names[!assay_names == X_name] 68 | if (isFALSE(assays)) { 69 | .ui_info("Skipping conversion of {.field assays}") 70 | } else if (length(assay_names) == 0) { 71 | .ui_info("No {.field additional assays} present, assays were skipped") 72 | } else { 73 | .ui_step( 74 | "Converting {.field additional assays} to {.field layers}", 75 | msg_done = "{.field additional assays} converted to {.field layers}" 76 | ) 77 | if (is.character(assays)) { 78 | if (!all(assays %in% assay_names)) { 79 | missing <- assays[!c(assays %in% assay_names)] 80 | .ui_warn( 81 | "These selected assays are not in the object: {.field {missing}}" 82 | ) 83 | } 84 | assay_names <- assay_names[assay_names %in% assays] 85 | } 86 | if (!skip_assays) { 87 | assays_list <- assays(sce, withDimnames = FALSE) 88 | assays_list <- lapply(assays_list[assay_names], .makeNumpyFriendly) 89 | } else { 90 | assays_list <- rep(list(fake_mat), length(assay_names)) 91 | names(assays_list) <- assay_names 92 | } 93 | adata_list$layers <- assays_list 94 | cli::cli_progress_done() 95 | } 96 | 97 | if (isFALSE(colData)) { 98 | .ui_info("Skipping conversion of {.field colData}") 99 | } else { 100 | sce <- .store_non_atomic(sce, "colData") 101 | adata_list$obs <- .convert_sce_df(colData(sce), "colData", "obs", select = colData) 102 | } 103 | 104 | if (is.null(adata_list$obs)) { 105 | # Add a dummy data.frame if obs is currently empty 106 | adata_list$obs <- as.data.frame(make_zero_col_DFrame(ncol(sce))) 107 | } 108 | 109 | # Convert to python now because python DFs can have duplicates in 110 | # their index 111 | adata_list$obs <- r_to_py(adata_list$obs) 112 | if (!is.null(colnames(sce))) { 113 | adata_list$obs$index <- colnames(sce) 114 | } else if (ncol(adata_list$obs) == 0) { 115 | # If there are no colnames and obs has no columns delete it 116 | adata_list$obs <- NULL 117 | } else { 118 | # Otherwise convert the index to string 119 | adata_list$obs$index <- adata_list$obs$index$astype("str") 120 | } 121 | 122 | if (!is.null(int_metadata(sce)$has_varm)) { 123 | varm_list <- as.list(rowData(sce)[["varm"]]) 124 | rowData(sce)[["varm"]] <- NULL 125 | 126 | if (isFALSE(varm)) { 127 | .ui_info("Skipping conversion of {.field rowData$varm}") 128 | } else { 129 | .ui_step( 130 | "Converting {.field rowData$varm} to {.field varm}", 131 | msg_done = "{.field rowData$varm} converted to {.field varm}" 132 | ) 133 | 134 | if (is.character(varm)) { 135 | varm <- .check_select(varm, "rowData$varm", names(varm_list)) 136 | varm_list <- varm_list[varm] 137 | } 138 | 139 | adata_list$varm <- varm_list 140 | cli::cli_progress_done() 141 | } 142 | } else { 143 | .ui_info("{.field rowData$varm} is empty and was skipped") 144 | } 145 | 146 | if (isFALSE(rowData)) { 147 | .ui_info("Skipping conversion of {.field rowData}") 148 | } else { 149 | sce <- .store_non_atomic(sce, "rowData") 150 | adata_list$var <- .convert_sce_df(rowData(sce), "rowData", "var", 151 | select = rowData 152 | ) 153 | } 154 | 155 | if (is.null(adata_list$var)) { 156 | # Add a dummy data.frame if var is currently empty 157 | adata_list$var <- as.data.frame(make_zero_col_DFrame(nrow(sce))) 158 | } 159 | 160 | # Convert to python now because python DFs can have duplicates in 161 | # their index 162 | adata_list$var <- r_to_py(adata_list$var) 163 | if (!is.null(rownames(sce))) { 164 | adata_list$var$index <- rownames(sce) 165 | } else if (ncol(adata_list$var) == 0) { 166 | # If there are no rownames and var has no columns delete it 167 | adata_list$var <- NULL 168 | } else { 169 | # Otherwise convert the index to string 170 | adata_list$var$index <- adata_list$var$index$astype("str") 171 | } 172 | 173 | if (inherits(sce, "SpatialExperiment")) { 174 | .ui_info("Adding {.field spatialCoords} to {.field reducedDim}") 175 | coords <- SpatialExperiment::spatialCoords(sce) 176 | if (ncol(coords) > 1) { 177 | SingleCellExperiment::reducedDim(sce, "spatial") <- coords 178 | } 179 | } 180 | 181 | if (isFALSE(reducedDims)) { 182 | .ui_info("Skipping conversion of {.field reducedDims}") 183 | } else if (length(reducedDims(sce)) == 0) { 184 | .ui_info("{.field reducedDims} is empty and was skipped") 185 | } else { 186 | .ui_step( 187 | "Converting {.field reducedDims} to {.field obsm}", 188 | msg_done = "{.field reducedDims} converted to {.field obsm}" 189 | ) 190 | red_dims <- as.list(reducedDims(sce)) 191 | if (is.character(reducedDims)) { 192 | reducedDims <- .check_select( 193 | reducedDims, "reducedDims", names(red_dims) 194 | ) 195 | red_dims <- red_dims[reducedDims] 196 | } 197 | red_dims <- lapply(red_dims, .makeNumpyFriendly, transpose = FALSE) 198 | red_dims <- lapply(red_dims, function(rd) { 199 | if (!is.null(colnames(rd))) { 200 | rd <- r_to_py(as.data.frame(rd)) 201 | if (!is.null(adata_list$obs)) { 202 | rd <- rd$set_axis(adata_list$obs$index) 203 | } 204 | } 205 | 206 | rd 207 | }) 208 | adata_list$obsm <- red_dims 209 | cli::cli_progress_done() 210 | } 211 | 212 | uns_list <- list() 213 | uns_list[["X_name"]] <- X_name 214 | if (isFALSE(metadata)) { 215 | .ui_info("Skipping conversion of {.field metadata}") 216 | } else if (length(metadata(sce)) == 0) { 217 | .ui_info("{.field metadata} is empty and was skipped") 218 | } else { 219 | .ui_step( 220 | "Converting {.field metadata} to {.field uns}", 221 | msg_done = "{.field metadata} converted to {.field uns}" 222 | ) 223 | meta_list <- .addListNames(metadata(sce)) 224 | if (is.character(metadata)) { 225 | metadata <- .check_select(metadata, "metadata", names(meta_list)) 226 | meta_list <- meta_list[metadata] 227 | } 228 | for (item_name in names(meta_list)) { 229 | item <- meta_list[[item_name]] 230 | tryCatch( 231 | { 232 | # Try to convert the item using reticulate, skip if it fails 233 | # Capture the object output printed by reticulate 234 | capture.output(r_to_py(item)) 235 | uns_list[[item_name]] <- item 236 | }, 237 | error = function(err) { 238 | .ui_warn(paste( 239 | "The {.field {item_name}} item in {.field metadata}", 240 | "cannot be converted to a Python type and has been", 241 | "skipped" 242 | )) 243 | } 244 | ) 245 | } 246 | cli::cli_progress_done() 247 | } 248 | adata_list$uns <- r_to_py(uns_list) 249 | 250 | if (length(rowPairs(sce)) > 0) { 251 | .ui_step( 252 | "Converting {.field rowPairs} to {.field varp}", 253 | msg_done = "{.field rowPairs} converted to {.field varp}" 254 | ) 255 | adata_list$varp <- as.list(rowPairs(sce, asSparse = TRUE)) 256 | cli::cli_progress_done() 257 | } else { 258 | .ui_info("{.field rowPairs} is empty and was skipped") 259 | } 260 | 261 | adata_list$obsp <- .convert_sce_pairs(sce, "colPairs", "obsp", colPairs) 262 | adata_list$varp <- .convert_sce_pairs(sce, "rowPairs", "varp", rowPairs) 263 | 264 | do.call(anndata$AnnData, adata_list) 265 | } 266 | 267 | #' @importFrom methods as is 268 | #' @importClassesFrom Matrix CsparseMatrix 269 | #' @importFrom DelayedArray is_sparse 270 | #' @importFrom Matrix t 271 | # Original code from Charlotte Soneson in kevinrue/velociraptor 272 | .makeNumpyFriendly <- function(x, transpose = TRUE) { 273 | if (transpose) { 274 | x <- t(x) 275 | } 276 | 277 | if (is_sparse(x)) { 278 | x <- as(x, "CsparseMatrix") 279 | if (transpose) { 280 | x <- as(x, "RsparseMatrix") 281 | } 282 | x 283 | } else { 284 | as.matrix(x) 285 | } 286 | } 287 | 288 | .addListNames <- function(x) { 289 | if (length(x) == 0) { 290 | return(x) 291 | } 292 | 293 | if (is.null(names(x))) { 294 | names(x) <- paste0("item", seq_along(x)) 295 | return(x) 296 | } 297 | 298 | list_names <- names(x) 299 | is_empty <- list_names == "" 300 | list_names[is_empty] <- paste0("item", seq_along(x)[is_empty]) 301 | list_names <- make.names(list_names, unique = TRUE) 302 | 303 | names(x) <- list_names 304 | 305 | return(x) 306 | } 307 | 308 | .store_non_atomic <- function(sce, slot = c("rowData", "colData")) { 309 | slot <- match.arg(slot) 310 | 311 | df <- switch(slot, 312 | rowData = rowData(sce), 313 | colData = colData(sce) 314 | ) 315 | 316 | is_atomic <- vapply(df, is.atomic, NA) 317 | 318 | if (all(is_atomic)) { 319 | return(sce) 320 | } 321 | 322 | non_atomic_cols <- colnames(df)[!is_atomic] 323 | .ui_warn(paste( 324 | "The following {.field {slot}} columns are not atomic and will be", 325 | "stored in {.field metadata(sce)$.colData} before conversion:", 326 | "{.val {non_atomic_cols}}" 327 | )) 328 | 329 | meta_slot <- paste0(".", slot) 330 | if (meta_slot %in% names(metadata(sce))) { 331 | meta_list <- metadata(sce)[[meta_slot]] 332 | } else { 333 | meta_list <- list() 334 | } 335 | 336 | for (col in non_atomic_cols) { 337 | store_name <- make.names(c(col, names(meta_list)), unique = TRUE)[1] 338 | meta_list[[store_name]] <- df[[col]] 339 | } 340 | 341 | df[non_atomic_cols] <- NULL 342 | metadata(sce)[[meta_slot]] <- meta_list 343 | 344 | if (slot == "rowData") { 345 | rowData(sce) <- df 346 | } else { 347 | colData(sce) <- df 348 | } 349 | 350 | return(sce) 351 | } 352 | 353 | .check_select <- function(select, slot_name, options) { 354 | verbose <- parent.frame()$verbose 355 | 356 | if (!all(select %in% options)) { 357 | missing <- select[!c(select %in% options)] 358 | .ui_warn(paste( 359 | "These selected {.field {slot_name}} items are not in the", 360 | "object: {.field {missing}}" 361 | )) 362 | } 363 | 364 | select <- select[select %in% options] 365 | 366 | return(select) 367 | } 368 | 369 | .convert_sce_df <- function(sce_df, slot_name, to_name, select = TRUE) { 370 | if (ncol(sce_df) == 0) { 371 | .ui_info("{.field {slot_name}} is empty and was skipped") 372 | return(NULL) 373 | } 374 | 375 | .ui_step( 376 | "Converting {.field {slot_name}} to {.field {to_name}}", 377 | msg_done = "{.field {slot_name}} converted to {.field {to_name}}" 378 | ) 379 | if (is.character(select)) { 380 | select <- .check_select(select, slot_name, colnames(sce_df)) 381 | 382 | if (length(select) == 0) { 383 | return(NULL) 384 | } 385 | 386 | df <- sce_df[, select, drop = FALSE] 387 | } else { 388 | df <- sce_df 389 | } 390 | 391 | df <- do.call( 392 | data.frame, 393 | c( 394 | as.list(df), 395 | check.names = FALSE, 396 | stringsAsFactors = FALSE 397 | ) 398 | ) 399 | cli::cli_progress_done() 400 | 401 | return(df) 402 | } 403 | 404 | .convert_sce_pairs <- function(sce, slot_name = c("rowPairs", "colPairs"), 405 | to_name, select) { 406 | slot_name <- match.arg(slot_name) 407 | 408 | 409 | if (isFALSE(select)) { 410 | .ui_info("Skipping conversion of {.field {slot_name}}") 411 | return(NULL) 412 | } 413 | 414 | pairs <- switch(slot_name, 415 | rowPairs = as.list(rowPairs(sce, asSparse = TRUE)), 416 | colPairs = as.list(colPairs(sce, asSparse = TRUE)) 417 | ) 418 | 419 | if (length(pairs) == 0) { 420 | .ui_info("{.field {slot_name}} is empty and was skipped") 421 | return(NULL) 422 | } 423 | 424 | .ui_step( 425 | "Converting {.field {slot_name}} to {.field {to_name}}", 426 | msg_done = "{.field {slot_name}} converted to {.field {to_name}}" 427 | ) 428 | 429 | if (is.character(select)) { 430 | select <- .check_select(select, slot_name, names(pairs)) 431 | pairs <- pairs[select] 432 | } 433 | cli::cli_progress_done() 434 | 435 | return(pairs) 436 | } 437 | -------------------------------------------------------------------------------- /R/basilisk.R: -------------------------------------------------------------------------------- 1 | #' AnnData environment 2 | #' 3 | #' The Python environment used by **zellkonverter** for interfacing with the 4 | #' **anndata** Python library (and H5AD files) is described by the dependencies 5 | #' in returned by `AnnDataDependencies()`. The `zellkonverterAnnDataEnv()` 6 | #' functions returns the [basilisk::BasiliskEnvironment()] containing these 7 | #' dependencies used by **zellkonverter**. Allowed versions of **anndata** are 8 | #' available in `.AnnDataVersions`. 9 | #' 10 | #' @details 11 | #' 12 | #' ## Using Python environments 13 | #' 14 | #' When a **zellkonverter** is first run a conda environment containing all of 15 | #' the necessary dependencies for that version with be instantiated. This will 16 | #' not be performed on any subsequent run or if any other **zellkonverter** 17 | #' function has been run prior with the same environment version. 18 | #' 19 | #' By default the **zellkonverter** conda environment will become the shared R 20 | #' Python environment if one does not already exist. When one does exist (for 21 | #' example when a **zellkonverter** function has already been run using a 22 | #' a different environment version) then a separate environment will be used. 23 | #' See [basilisk::setBasiliskShared()] for more information on this behaviour. 24 | #' Note the when the environment is not shared progress messages are lost. 25 | #' 26 | #' ## Development 27 | #' 28 | #' The `AnnDataDependencies()` function is exposed for use by other package 29 | #' developers who want an easy way to define the dependencies required for 30 | #' creating a Python environment to work with AnnData objects, most typically 31 | #' within a **basilisk** context. For example, we can simply combine this 32 | #' vector with additional dependencies to create a **basilisk** environment with 33 | #' Python package versions that are consistent with those in **zellkonverter**. 34 | #' 35 | #' If you want to run code in the exact environment used by **zellkonverter** 36 | #' this can be done using `zellkonverterAnnDataEnv()` in combination with 37 | #' [basilisk::basiliskStart()] and/or [basilisk::basiliskRun()]. Please refer to 38 | #' the **basilisk** documentation for more information on using these 39 | #' environments. 40 | #' 41 | #' @author Luke Zappia 42 | #' @author Aaron Lun 43 | #' 44 | #' @examples 45 | #' .AnnDataVersions 46 | #' 47 | #' AnnDataDependencies() 48 | #' AnnDataDependencies(version = "0.7.6") 49 | #' 50 | #' cl <- basilisk::basiliskStart(zellkonverterAnnDataEnv()) 51 | #' anndata <- reticulate::import("anndata") 52 | #' basilisk::basiliskStop(cl) 53 | #' @name AnnData-Environment 54 | #' @rdname AnnData-Environment 55 | NULL 56 | 57 | #' @rdname AnnData-Environment 58 | #' 59 | #' @format 60 | #' For `.AnnDataVersions` a character vector containing allowed **anndata** 61 | #' version strings. 62 | #' 63 | #' @export 64 | .AnnDataVersions <- c("0.11.4", "0.10.9", "0.10.6", "0.10.2", "0.9.2", "0.8.0", "0.7.6") 65 | 66 | #' @rdname AnnData-Environment 67 | #' 68 | #' @param version A string giving the version of the **anndata** Python library 69 | #' to use. Allowed values are available in `.AnnDataVersions`. By default the 70 | #' latest version is used. 71 | #' 72 | #' @returns 73 | #' For `AnnDataDependencies` a character vector containing the pinned versions 74 | #' of all Python packages to be used by `zellkonverterAnnDataEnv()`. 75 | #' 76 | #' @export 77 | AnnDataDependencies <- function(version = .AnnDataVersions) { 78 | version <- match.arg(version) 79 | 80 | switch(version, 81 | "0.7.6" = c( 82 | "anndata==0.7.6", 83 | "h5py==3.2.1", 84 | "hdf5==1.10.6", 85 | "natsort==7.1.1", 86 | "numpy==1.20.2", 87 | "packaging==20.9", 88 | "pandas==1.2.4", 89 | "python==3.7.10", 90 | "scipy==1.6.3", 91 | "sqlite==3.35.5" 92 | ), 93 | "0.8.0" = c( 94 | "anndata==0.8.0", 95 | "h5py==3.6.0", 96 | "hdf5==1.12.1", 97 | "natsort==8.1.0", 98 | "numpy==1.22.3", 99 | "packaging==21.3", 100 | "pandas==1.4.2", 101 | "python==3.8.13", 102 | "scipy==1.7.3", 103 | "sqlite==3.38.2" 104 | ), 105 | "0.9.2" = c( 106 | "anndata==0.9.2", 107 | "h5py==3.9.0", 108 | "hdf5==1.14.2", 109 | "natsort==8.4.0", 110 | "numpy==1.26.0", 111 | "packaging==23.2", 112 | "pandas==2.1.1", 113 | "python==3.11.5", 114 | "scipy==1.11.3" 115 | ), 116 | "0.10.2" = c( 117 | "anndata==0.10.2", 118 | "h5py==3.10.0", 119 | "hdf5==1.14.2", 120 | "natsort==8.4.0", 121 | "numpy==1.26.0", 122 | "packaging==23.2", 123 | "pandas==2.1.1", 124 | "python==3.11.5", 125 | "scipy==1.11.3" 126 | ), 127 | "0.10.6" = c( 128 | "anndata==0.10.6", 129 | "h5py==3.10.0", 130 | "hdf5==1.14.3", 131 | "natsort==8.4.0", 132 | "numpy==1.26.4", 133 | "packaging==24.0", 134 | "pandas==2.2.1", 135 | "python==3.12.2", 136 | "scipy==1.12.0" 137 | ), 138 | "0.10.9" = c( 139 | "anndata==0.10.9", 140 | "h5py==3.12.1", 141 | "hdf5==1.14.3", 142 | "natsort==8.4.0", 143 | "numpy==2.1.2", 144 | "packaging==24.1", 145 | "pandas==2.2.3", 146 | "python==3.12.7", 147 | "scipy==1.14.1" 148 | ), 149 | "0.11.4" = c( 150 | "anndata==0.11.4", 151 | "h5py==3.13.0", 152 | "hdf5==1.14.3", 153 | "natsort==8.4.0", 154 | "numpy==2.2.4", 155 | "packaging==24.2", 156 | "pandas==2.2.3", 157 | "python==3.13.2", 158 | "scipy==1.15.2" 159 | ) 160 | ) 161 | } 162 | 163 | #' @rdname AnnData-Environment 164 | #' 165 | #' @return 166 | #' For `zellkonverterAnnDataEnv` a [basilisk::BasiliskEnvironment()] containing 167 | #' **zellkonverter**'s AnnData Python environment. 168 | #' 169 | #' @include ui.R 170 | #' @export 171 | zellkonverterAnnDataEnv <- function(version = .AnnDataVersions) { 172 | version <- match.arg(version) 173 | 174 | basilisk::BasiliskEnvironment( 175 | envname = paste0("zellkonverterAnnDataEnv-", version), 176 | pkgname = "zellkonverter", 177 | packages = AnnDataDependencies(version) 178 | ) 179 | } 180 | 181 | # Instantiate environments so they can be found by 182 | # `basilisk::configureBasiliskEnv()` when `BASILISK_USE_SYSTEM_DIR=1`. 183 | # See https://github.com/theislab/zellkonverter/issues/66. 184 | anndata_env_0.7.6 <- zellkonverterAnnDataEnv(version = "0.7.6") 185 | anndata_env_0.8.0 <- zellkonverterAnnDataEnv(version = "0.8.0") 186 | anndata_env_0.9.2 <- zellkonverterAnnDataEnv(version = "0.9.2") 187 | anndata_env_0.10.2 <- zellkonverterAnnDataEnv(version = "0.10.2") 188 | anndata_env_0.10.6 <- zellkonverterAnnDataEnv(version = "0.10.6") 189 | anndata_env_0.10.9 <- zellkonverterAnnDataEnv(version = "0.10.9") 190 | anndata_env_0.11.4 <- zellkonverterAnnDataEnv(version = "0.11.4") 191 | -------------------------------------------------------------------------------- /R/read.R: -------------------------------------------------------------------------------- 1 | #' Read H5AD 2 | #' 3 | #' Reads a H5AD file and returns a 4 | #' \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment} 5 | #' object. 6 | #' 7 | #' @param file String containing a path to a `.h5ad` file. 8 | #' @param X_name Name used when saving `X` as an assay. If `NULL` looks for an 9 | #' `X_name` value in `uns`, otherwise uses `"X"`. 10 | #' @param use_hdf5 Logical scalar indicating whether assays should be 11 | #' loaded as HDF5-based matrices from the **HDF5Array** package. 12 | #' @param reader Which HDF5 reader to use. Either `"python"` for reading with 13 | #' the **anndata** Python package via **reticulate** or `"R"` for 14 | #' **zellkonverter**'s native R reader. 15 | #' @param version A string giving the version of the **anndata** Python library 16 | #' to use. Allowed values are available in `.AnnDataVersions`. By default the 17 | #' latest version is used. 18 | #' @param verbose Logical scalar indicating whether to print progress messages. 19 | #' If `NULL` uses `getOption("zellkonverter.verbose")`. 20 | #' @inheritDotParams AnnData2SCE -adata -hdf5_backed 21 | #' 22 | #' @details 23 | #' Setting `use_hdf5 = TRUE` allows for very large datasets to be efficiently 24 | #' represented on machines with little memory. However, this comes at the cost 25 | #' of access speed as data needs to be fetched from the HDF5 file upon request. 26 | #' 27 | #' Setting `reader = "R"` will use an experimental native R reader instead of 28 | #' reading the file into Python and converting the result. This avoids the need 29 | #' for a Python environment and some of the issues with conversion but is still 30 | #' under development and is likely to return slightly different output. 31 | #' 32 | #' See [AnnData-Environment] for more details on **zellkonverter** Python 33 | #' environments. 34 | #' 35 | #' @return A \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment} 36 | #' object is returned. 37 | #' 38 | #' @examples 39 | #' library(SummarizedExperiment) 40 | #' 41 | #' file <- system.file("extdata", "krumsiek11.h5ad", package = "zellkonverter") 42 | #' sce <- readH5AD(file) 43 | #' class(assay(sce)) 44 | #' 45 | #' sce2 <- readH5AD(file, use_hdf5 = TRUE) 46 | #' class(assay(sce2)) 47 | #' 48 | #' sce3 <- readH5AD(file, reader = "R") 49 | #' @author Luke Zappia 50 | #' @author Aaron Lun 51 | #' 52 | #' @seealso 53 | #' [`writeH5AD()`], to write a 54 | #' \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment} 55 | #' object to a H5AD file. 56 | #' 57 | #' [`AnnData2SCE()`], for developers to convert existing AnnData instances to a 58 | #' \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment}. 59 | #' 60 | #' @export 61 | #' @importFrom basilisk basiliskRun 62 | #' @importFrom methods slot 63 | readH5AD <- function(file, X_name = NULL, use_hdf5 = FALSE, 64 | reader = c("python", "R"), version = NULL, 65 | verbose = NULL, ...) { 66 | file <- path.expand(file) 67 | reader <- match.arg(reader) 68 | 69 | if (reader == "python") { 70 | .ui_info("Using the {.field Python} reader") 71 | env <- zellkonverterAnnDataEnv(version) 72 | version <- gsub("zellkonverterAnnDataEnv-", "", slot(env, "envname")) 73 | .ui_info("Using {.field anndata} version {.field {version}}") 74 | 75 | sce <- basiliskRun( 76 | env = env, 77 | fun = .H5ADreader, 78 | testload = "anndata", 79 | file = file, 80 | X_name = X_name, 81 | backed = use_hdf5, 82 | verbose = verbose, 83 | ... 84 | ) 85 | } else if (reader == "R") { 86 | sce <- .native_reader(file, backed = use_hdf5, verbose = verbose) 87 | } 88 | 89 | return(sce) 90 | } 91 | 92 | #' @importFrom reticulate import 93 | .H5ADreader <- function(file, X_name = NULL, backed = FALSE, verbose = NULL, ...) { 94 | anndata <- import("anndata") 95 | .ui_step( 96 | "Reading {.file { .trim_path(file)} }", 97 | msg_done = "Read {.file { .trim_path(file) }}", 98 | spinner = TRUE 99 | ) 100 | adata <- anndata$read_h5ad(file, backed = if (backed) "r" else FALSE) 101 | cli::cli_progress_done() 102 | 103 | AnnData2SCE( 104 | adata, 105 | X_name = X_name, hdf5_backed = backed, verbose = verbose, ... 106 | ) 107 | } 108 | 109 | #' @importFrom S4Vectors I DataFrame wmsg 110 | #' @importFrom SummarizedExperiment assays assays<- rowData colData rowData<- colData<- 111 | #' @importFrom SingleCellExperiment SingleCellExperiment reducedDims<- colPairs<- rowPairs<- 112 | .native_reader <- function(file, backed = FALSE, verbose = FALSE) { 113 | .ui_info("Using the {.field R} reader") 114 | .ui_step("Reading {.file {file}}", spinner = TRUE) 115 | 116 | contents <- .list_contents(file) 117 | 118 | all.assays <- list() 119 | 120 | # Let's read in the X matrix first... if it's there. 121 | if ("X" %in% names(contents)) { 122 | all.assays[["X"]] <- .read_matrix(file, "X", contents[["X"]], backed = backed) 123 | } 124 | 125 | for (layer in names(contents[["layers"]])) { 126 | tryCatch( 127 | { 128 | all.assays[[layer]] <- .read_matrix( 129 | file, 130 | file.path("layers", layer), 131 | contents[["layers"]][[layer]], 132 | backed = backed 133 | ) 134 | }, 135 | error = function(e) { 136 | warning(wmsg( 137 | "setting additional assays from 'layers' failed for '", 138 | file, "':\n ", conditionMessage(e) 139 | )) 140 | } 141 | ) 142 | } 143 | 144 | sce <- SingleCellExperiment(all.assays) 145 | 146 | # Adding the various pieces of data. 147 | tryCatch( 148 | { 149 | col_data <- .read_dim_data(file, "obs", contents[["obs"]]) 150 | if (!is.null(col_data)) { 151 | colData(sce) <- col_data 152 | } 153 | }, 154 | error = function(e) { 155 | warning(wmsg( 156 | "setting 'colData' failed for '", file, "':\n ", 157 | conditionMessage(e) 158 | )) 159 | } 160 | ) 161 | 162 | tryCatch( 163 | { 164 | row_data <- .read_dim_data(file, "var", contents[["var"]]) 165 | if (!is.null(row_data)) { 166 | rowData(sce) <- row_data 167 | # Manually set SCE rownames, because setting rowData 168 | # doesn't seem to set them. (Even tho setting colData 169 | # does set the colnames) 170 | rownames(sce) <- rownames(row_data) 171 | } 172 | }, 173 | error = function(e) { 174 | warning(wmsg( 175 | "setting 'rowData' failed for '", file, "':\n ", 176 | conditionMessage(e) 177 | )) 178 | } 179 | ) 180 | 181 | # Adding the reduced dimensions and other bits and pieces. 182 | tryCatch( 183 | { 184 | reducedDims(sce) <- .read_dim_mats(file, "obsm", contents[["obsm"]]) 185 | }, 186 | error = function(e) { 187 | warning(wmsg( 188 | "setting 'reducedDims' failed for '", file, "':\n ", 189 | conditionMessage(e) 190 | )) 191 | } 192 | ) 193 | 194 | tryCatch( 195 | { 196 | row_mat <- .read_dim_mats(file, "varm", contents[["varm"]]) 197 | if (length(row_mat)) { 198 | row_mat_df <- do.call(DataFrame, lapply(row_mat, I)) 199 | rowData(sce) <- cbind(rowData(sce), row_mat_df) 200 | } 201 | }, 202 | error = function(e) { 203 | warning(wmsg( 204 | "extracting 'varm' failed for '", file, "':\n ", 205 | conditionMessage(e) 206 | )) 207 | } 208 | ) 209 | 210 | # Adding pairings, if any exist. 211 | tryCatch( 212 | { 213 | rowPairs(sce) <- .read_dim_pairs(file, "varp", contents[["varp"]]) 214 | }, 215 | error = function(e) { 216 | warning(wmsg( 217 | "setting 'rowPairs' failed for '", file, "':\n ", 218 | conditionMessage(e) 219 | )) 220 | } 221 | ) 222 | 223 | tryCatch( 224 | { 225 | colPairs(sce) <- .read_dim_pairs(file, "obsp", contents[["obsp"]]) 226 | }, 227 | error = function(e) { 228 | warning(wmsg( 229 | "setting 'colPairs' failed for '", file, "':\n ", 230 | conditionMessage(e) 231 | )) 232 | } 233 | ) 234 | 235 | if ("uns" %in% names(contents)) { 236 | tryCatch( 237 | { 238 | uns <- rhdf5::h5read(file, "uns") 239 | uns <- .convert_element( 240 | uns, "uns", file, 241 | recursive = TRUE 242 | ) 243 | metadata(sce) <- uns 244 | }, 245 | error = function(e) { 246 | warning(wmsg( 247 | "setting 'metadata' failed for '", file, "':\n ", 248 | conditionMessage(e) 249 | )) 250 | } 251 | ) 252 | } 253 | 254 | if (("X_name" %in% names(metadata(sce))) && ("X" %in% names(contents))) { 255 | stopifnot(names(assays(sce))[1] == "X") # should be true b/c X is read 1st 256 | names(assays(sce))[1] <- metadata(sce)[["X_name"]] 257 | metadata(sce)[["X_name"]] <- NULL 258 | } 259 | 260 | sce 261 | } 262 | 263 | .list_contents <- function(file) { 264 | manifest <- rhdf5::h5ls(file) 265 | 266 | set_myself <- function(x, series, value) { 267 | if (length(series) != 1) { 268 | value <- set_myself(x[[series[1]]], series[-1], value) 269 | } 270 | if (is.null(x)) { 271 | x <- list() 272 | } 273 | x[[series[1]]] <- value 274 | 275 | x 276 | } 277 | 278 | contents <- list() 279 | for (i in seq_len(nrow(manifest))) { 280 | components <- c( 281 | strsplit(manifest[i, "group"], "/")[[1]], manifest[i, "name"] 282 | ) 283 | if (components[1] == "") { 284 | components <- components[-1] 285 | } 286 | 287 | info <- manifest[i, c("otype", "dclass", "dim")] 288 | if (info$otype == "H5I_GROUP") { 289 | info <- list() 290 | } 291 | contents <- set_myself(contents, components, info) 292 | } 293 | 294 | contents 295 | } 296 | 297 | .read_matrix <- function(file, path, fields, backed) { 298 | if (is.data.frame(fields)) { 299 | mat <- HDF5Array::HDF5Array(file, path) 300 | } else { 301 | mat <- HDF5Array::H5SparseMatrix(file, path) 302 | } 303 | 304 | if (!backed) { 305 | if (DelayedArray::is_sparse(mat)) { 306 | mat <- as(mat, "sparseMatrix") 307 | } else { 308 | mat <- as.matrix(mat) 309 | } 310 | } 311 | 312 | mat 313 | } 314 | 315 | .convert_element <- function(obj, path, file, recursive = FALSE) { 316 | element_attrs <- rhdf5::h5readAttributes(file, path) 317 | 318 | # Convert categorical element for AnnData v0.8+ 319 | if (identical(element_attrs[["encoding-type"]], "categorical") && 320 | all(c("codes", "categories") %in% names(obj))) { 321 | codes <- obj[["codes"]] + 1 322 | codes[codes == 0] <- NA 323 | levels <- obj[["categories"]] 324 | 325 | ord <- as.logical(element_attrs[["ordered"]]) 326 | 327 | obj <- factor(levels[codes], levels = levels, ordered = ord) 328 | return(obj) 329 | } 330 | 331 | # Handle booleans. Non-nullable booleans have encoding-type 332 | # "array", so we have to infer the type from the enum levels 333 | if (is.factor(obj) && identical(levels(obj), c("FALSE", "TRUE"))) { 334 | obj <- as.logical(obj) 335 | return(obj) 336 | } 337 | 338 | # Recursively convert element members 339 | if (recursive && is.list(obj) && !is.null(names(obj))) { 340 | for (k in names(obj)) { 341 | obj[[k]] <- rhdf5::h5read(file, file.path(path, k)) 342 | obj[[k]] <- .convert_element( 343 | obj[[k]], file.path(path, k), 344 | file, 345 | recursive = TRUE 346 | ) 347 | } 348 | } 349 | 350 | if (is.list(obj) && !is.null(names(obj))) { 351 | names(obj) <- make.names(names(obj)) 352 | } 353 | 354 | obj 355 | } 356 | 357 | #' @importFrom S4Vectors DataFrame 358 | .read_dim_data <- function(file, path, fields) { 359 | col_names <- setdiff(names(fields), "__categories") 360 | out_cols <- list() 361 | for (col_name in col_names) { 362 | vec <- rhdf5::h5read(file, file.path(path, col_name)) 363 | 364 | vec <- .convert_element( 365 | vec, file.path(path, col_name), 366 | file, 367 | recursive = FALSE 368 | ) 369 | 370 | if (!is.factor(vec)) { 371 | vec <- as.vector(vec) 372 | } 373 | 374 | out_cols[[col_name]] <- vec 375 | } 376 | 377 | # for AnnData versions <= 0.7 378 | cat_names <- names(fields[["__categories"]]) 379 | for (cat_name in cat_names) { 380 | levels <- as.vector( 381 | rhdf5::h5read(file, file.path(path, "__categories", cat_name)) 382 | ) 383 | codes <- out_cols[[cat_name]] + 1L 384 | out_cols[[cat_name]] <- factor(levels[codes], levels = levels) 385 | } 386 | 387 | ## rhdf5::h5readAttributes(file, "var") |> str() 388 | ## List of 4 389 | ## $ _index : chr "feature_id" 390 | ## $ column-order : chr [1:4(1d)] "feature_is_filtered" "feature_name" "feature_reference" "feature_biotype" 391 | ## $ encoding-type : chr "dataframe" 392 | ## $ encoding-version: chr "0.2.0" 393 | attributes <- rhdf5::h5readAttributes(file, path) 394 | index <- attributes[["_index"]] 395 | if (!is.null(index)) { 396 | indices <- out_cols[[index]] 397 | } else { 398 | indices <- NULL 399 | } 400 | 401 | column_order <- attributes[["column-order"]] 402 | if (!is.null(column_order)) { 403 | out_cols <- out_cols[column_order] 404 | } 405 | 406 | if (length(out_cols)) { 407 | df <- do.call(DataFrame, out_cols) 408 | rownames(df) <- indices 409 | } else if (!is.null(indices)) { 410 | df <- DataFrame(row.names = indices) 411 | } else { 412 | df <- NULL 413 | } 414 | 415 | df 416 | } 417 | 418 | .read_dim_mats <- function(file, path, fields) { 419 | all.contents <- list() 420 | for (field in names(fields)) { 421 | # Because everything's transposed. 422 | all.contents[[field]] <- t(rhdf5::h5read(file, file.path(path, field))) 423 | } 424 | all.contents 425 | } 426 | 427 | .read_dim_pairs <- function(file, path, fields) { 428 | all.pairs <- list() 429 | for (field in names(fields)) { 430 | mat <- HDF5Array::H5SparseMatrix(file, file.path(path, field)) 431 | all.pairs[[field]] <- as(mat, "sparseMatrix") 432 | } 433 | all.pairs 434 | } 435 | -------------------------------------------------------------------------------- /R/reticulate.R: -------------------------------------------------------------------------------- 1 | #' Convert between Python and R objects 2 | #' 3 | #' @param x A Python object. 4 | #' 5 | #' @return An \R object, as converted from the Python object. 6 | #' 7 | #' @details 8 | #' These functions are extensions of the default conversion functions in the 9 | #' `reticulate` package for the following reasons: 10 | #' 11 | #' - `numpy.ndarray` - Handle conversion of **numpy** recarrays 12 | #' - `pandas.core.arrays.masked.BaseMaskedArray` - Handle conversion of 13 | #' **pandas** arrays (used when by `AnnData` objects when there are missing 14 | #' values) 15 | #' - `pandas.core.arrays.categorical.Categorical` - Handle conversion of 16 | #' **pandas** categorical arrays 17 | #' 18 | #' @author Luke Zappia 19 | #' 20 | #' @seealso 21 | #' [reticulate::py_to_r()] for the base `reticulate` functions 22 | #' 23 | #' @name r-py-conversion 24 | #' @export 25 | py_to_r.numpy.ndarray <- function(x) { 26 | disable_conversion_scope(x) 27 | 28 | # Suggested method to detect recarrays from 29 | # https://stackoverflow.com/a/62491135/4384120 30 | if (!is.null(py_to_r(x$dtype$names))) { 31 | # Convert via pandas DataFrame as suggested here 32 | # https://stackoverflow.com/a/60614003/4384120 33 | # Not as efficient but less messing around with types 34 | pandas <- import("pandas", convert = FALSE) 35 | out <- tryCatch( 36 | { 37 | x <- pandas$DataFrame(x)$to_numpy() 38 | py_to_r(x) 39 | }, 40 | error = function(err) { 41 | stop("Failed to convert recarray with error: ", err$message, 42 | call. = FALSE 43 | ) 44 | } 45 | ) 46 | return(out) 47 | } 48 | 49 | # No special handler found, delegate to next method 50 | NextMethod() 51 | } 52 | 53 | #' @export 54 | py_to_r.pandas.core.arrays.masked.BaseMaskedArray <- function(x) { 55 | disable_conversion_scope(x) 56 | 57 | if (is(x, "pandas.core.arrays.boolean.BooleanArray")) { 58 | dtype <- "bool" 59 | fill <- FALSE 60 | } else if (is(x, "pandas.core.arrays.integer.IntegerArray")) { 61 | dtype <- "int" 62 | fill <- 0L 63 | } else if (is(x, "pandas.core.arrays.floating.FloatingArray")) { 64 | dtype <- "float" 65 | fill <- 0.0 66 | } else if (is(x, "pandas.core.arrays.string_.StringArray")) { 67 | dtype <- "str" 68 | fill <- "" 69 | } else { 70 | stop( 71 | "No conversion exists for this Pandas array type: ", 72 | paste(class(x), collapse = ", ") 73 | ) 74 | } 75 | 76 | # Record which values should be NA 77 | is_na <- reticulate::py_to_r(x$isna()) 78 | 79 | # Fill NA values with a dummy 80 | x <- x$fillna(value = fill) 81 | 82 | # Convert to numpy array and then to R using default conversion 83 | x <- x$to_numpy()$astype(dtype) 84 | x <- reticulate::py_to_r(x) 85 | 86 | # Restore the NA values 87 | x[is_na] <- NA 88 | 89 | return(x) 90 | } 91 | 92 | #' @export 93 | py_to_r.pandas.core.arrays.categorical.Categorical <- function(x) { 94 | disable_conversion_scope(x) 95 | 96 | # Get the category levels 97 | cats <- reticulate::py_to_r(x$categories$to_list()) 98 | 99 | # Record which values should be NA 100 | is_na <- reticulate::py_to_r(x$isna()) 101 | 102 | # Fill NA values with a dummy 103 | x <- x$fillna(value = cats[1]) 104 | 105 | # Convert to list and then to R using default conversion 106 | x <- x$tolist() 107 | x <- reticulate::py_to_r(x) 108 | 109 | # Restore the NA values 110 | x[is_na] <- NA 111 | 112 | # Convert to factor 113 | x <- factor(x, levels = cats) 114 | 115 | return(x) 116 | } 117 | -------------------------------------------------------------------------------- /R/ui.R: -------------------------------------------------------------------------------- 1 | #' Set zellkonverter verbose 2 | #' 3 | #' Set the zellkonverter verbosity option 4 | #' 5 | #' @param verbose Logical value for the verbosity option. 6 | #' 7 | #' @details 8 | #' Running `setZellkonverterVerbose(TRUE)` will turn on **zellkonverter** 9 | #' progress messages by default without having to set `verbose = TRUE` in each 10 | #' function call. This is done by setting the `"zellkonverter.verbose"` option. 11 | #' Running `setZellkonverterVerbose(FALSE)` will turn default verbosity off. 12 | #' 13 | #' @return The value of getOption("zellkonverter.verbose") invisibly 14 | #' @export 15 | #' 16 | #' @examples 17 | #' current <- getOption("zellkonverter.verbose") 18 | #' setZellkonverterVerbose(TRUE) 19 | #' getOption("zellkonverter.verbose") 20 | #' setZellkonverterVerbose(FALSE) 21 | #' getOption("zellkonverter.verbose") 22 | #' setZellkonverterVerbose(current) 23 | #' getOption("zellkonverter.verbose") 24 | setZellkonverterVerbose <- function(verbose = TRUE) { 25 | options(zellkonverter.verbose = isTRUE(verbose)) 26 | invisible(getOption("zellkonverter.verbose")) 27 | } 28 | 29 | .get_verbose <- function(envir) { 30 | verbose <- envir$verbose 31 | 32 | if (is.null(verbose)) { 33 | verbose <- getOption("zellkonverter.verbose") 34 | } 35 | 36 | isTRUE(verbose) 37 | } 38 | 39 | .ui_rule <- function(msg, ...) { 40 | envir <- parent.frame() 41 | 42 | if (.get_verbose(envir)) { 43 | cli::cli_rule(msg, ..., .envir = envir) 44 | } 45 | } 46 | 47 | .ui_info <- function(msg, ...) { 48 | envir <- parent.frame() 49 | 50 | if (.get_verbose(envir)) { 51 | cli::cli_alert_info(msg, ..., .envir = envir) 52 | } 53 | } 54 | 55 | .ui_warn <- function(msg, warn = TRUE, ...) { 56 | envir <- parent.frame() 57 | 58 | msg <- cli::format_message(msg, .envir = envir) 59 | 60 | if (.get_verbose(envir)) { 61 | cli::cli_alert_warning(msg, ..., .envir = envir) 62 | } 63 | 64 | if (warn) { 65 | warning(msg, call. = FALSE) 66 | } 67 | } 68 | 69 | .ui_step <- function(msg, ...) { 70 | envir <- parent.frame() 71 | 72 | if (.get_verbose(envir)) { 73 | cli::cli_progress_step(msg, ..., .envir = envir) 74 | } 75 | } 76 | 77 | .ui_process <- function(msg, ...) { 78 | envir <- parent.frame() 79 | 80 | if (.get_verbose(envir)) { 81 | cli::cli_process_start(msg, ..., .envir = envir) 82 | } 83 | } 84 | 85 | .ui_process_done <- function(...) { 86 | envir <- parent.frame() 87 | 88 | if (.get_verbose(envir)) { 89 | cli::cli_process_done(..., .envir = envir) 90 | } 91 | } 92 | 93 | .trim_path <- function(path, n = 40) { 94 | path_split <- .split_path(path) 95 | 96 | for (level in seq_along(path_split)) { 97 | trimmed_path <- do.call(file.path, as.list(path_split)) 98 | trimmed_path <- gsub("^//", "/", trimmed_path) 99 | if (nchar(trimmed_path) <= n) { 100 | break 101 | } else if (nchar(path_split[level]) >= 3) { 102 | path_split[level] <- "..." 103 | } 104 | } 105 | 106 | return(trimmed_path) 107 | } 108 | 109 | .split_path <- function(path) { 110 | if (dirname(path) != path) { 111 | path <- c(.split_path(dirname(path)), basename(path)) 112 | } 113 | 114 | return(path) 115 | } 116 | -------------------------------------------------------------------------------- /R/utils.R: -------------------------------------------------------------------------------- 1 | # yoinked from reticulate -> 2 | # https://github.com/rstudio/reticulate/blob/fe0eda154a80b22c0d45e043b74390b73ab8b64e/R/utils.R#L49 3 | yoink <- function(package, symbol) { 4 | do.call(":::", list(package, symbol)) 5 | } 6 | disable_conversion_scope <- yoink("reticulate", "disable_conversion_scope") 7 | -------------------------------------------------------------------------------- /R/validation.R: -------------------------------------------------------------------------------- 1 | #' Validate H5AD SCE 2 | #' 3 | #' Validate a SingleCellExperiment created by `readH5AD()`. Designed to be used 4 | #' inside `testhat::test_that()` during package testing. 5 | #' 6 | #' @param sce A \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment} 7 | #' object. 8 | #' @param names Named list of expected names. Names are slots and values are 9 | #' vectors of names that are expected to exist in that slot. 10 | #' @param missing Named list of known missing names. Names are slots and values 11 | #' are vectors of names that are expected to not exist in that slot. 12 | #' 13 | #' @details 14 | #' This function checks that a SingleCellExperiment contains the expected items 15 | #' in each slot. The main reason for this function is avoid repeating code when 16 | #' testing multiple `.h5ad` files. The following items in `names` and `missing` 17 | #' are recognised: 18 | #' 19 | #' * `assays` - Assay names 20 | #' * `colData` - colData column names 21 | #' * `rowData` - rowData column names 22 | #' * `metadata` - metadata names 23 | #' * `redDim` - Reduced dimension names 24 | #' * `varm` - Column names of the `varm` rowData column (from the AnnData varm 25 | #' slot) 26 | #' * `colPairs` - Column pair names 27 | #' * `rowPairs` - rowData pair names 28 | #' * `raw_rowData` - rowData columns names in the `raw` altExp 29 | #' * `raw_varm` - Column names of the raw `varm` rowData column (from the 30 | #' AnnData varm slot) 31 | #' 32 | #' If an item in `names` or `missing` is `NULL` then it won't be checked. The 33 | #' items in `missing` are checked that they explicitly do not exist. This is 34 | #' mostly for record keeping when something is known to not be converted but can 35 | #' also be useful when the corresponding `names` item is `NULL`. 36 | #' 37 | #' @return If checks are successful `TRUE` invisibly, if not other output 38 | #' depending on the context 39 | #' 40 | #' @author Luke Zappia 41 | validateH5ADSCE <- function(sce, names, missing) { 42 | if ("varm" %in% colnames(SummarizedExperiment::rowData(sce))) { 43 | varm <- SummarizedExperiment::rowData(sce)$varm 44 | SummarizedExperiment::rowData(sce)$varm <- NULL 45 | } else { 46 | varm <- NULL 47 | } 48 | 49 | .names_validator( 50 | "Assay names", 51 | SummarizedExperiment::assayNames(sce), 52 | names$assays, 53 | missing$assays 54 | ) 55 | 56 | .names_validator( 57 | "colData names", 58 | colnames(SummarizedExperiment::colData(sce)), 59 | names$colData, 60 | missing$colData 61 | ) 62 | 63 | .names_validator( 64 | "rowData names", 65 | colnames(SummarizedExperiment::rowData(sce)), 66 | names$rowData, 67 | missing$rowData 68 | ) 69 | 70 | .names_validator( 71 | "metadata names", 72 | names(S4Vectors::metadata(sce)), 73 | names$metadata, 74 | missing$metadata 75 | ) 76 | 77 | .names_validator( 78 | "redDim names", 79 | SingleCellExperiment::reducedDimNames(sce), 80 | names$redDim, 81 | missing$redDim 82 | ) 83 | 84 | .names_validator( 85 | "varm names", 86 | colnames(varm), 87 | names$varm, 88 | missing$varm 89 | ) 90 | 91 | .names_validator( 92 | "colPairs names", 93 | names(SingleCellExperiment::colPairs(sce)), 94 | names$colPairs, 95 | missing$colPairs 96 | ) 97 | 98 | .names_validator( 99 | "rowPairs names", 100 | names(SingleCellExperiment::rowPairs(sce)), 101 | names$rowPairs, 102 | missing$rowPairs 103 | ) 104 | 105 | if ("raw" %in% altExpNames(sce)) { 106 | raw_rowData <- SummarizedExperiment::rowData(altExp(sce, "raw")) 107 | 108 | if ("varm" %in% colnames(raw_rowData)) { 109 | raw_varm <- raw_rowData$varm 110 | raw_rowData$varm <- NULL 111 | } else { 112 | varm <- NULL 113 | } 114 | 115 | .names_validator( 116 | "raw rowData names", 117 | colnames(raw_rowData), 118 | names$raw_rowData, 119 | missing$raw_rowData 120 | ) 121 | 122 | .names_validator( 123 | "varm names", 124 | colnames(raw_varm), 125 | names$raw_varm, 126 | missing$raw_varm 127 | ) 128 | } 129 | 130 | invisible(TRUE) 131 | } 132 | 133 | .names_validator <- function(label, actual_names, correct_names, missing_names) { 134 | if (!is.null(correct_names)) { 135 | testthat::expect_identical( 136 | actual_names, 137 | correct_names, 138 | label = label 139 | ) 140 | } 141 | 142 | if (!is.null(missing_names)) { 143 | testthat::expect_true( 144 | !any(missing_names %in% actual_names), 145 | label = paste(label, "missing") 146 | ) 147 | } 148 | 149 | invisible(TRUE) 150 | } 151 | 152 | #' Expect SCE 153 | #' 154 | #' Test that a SingleCellExperiment matches an expected object. Designed to be 155 | #' used inside `testhat::test_that()` during package testing. 156 | #' 157 | #' @param sce A \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment} 158 | #' object. 159 | #' @param expected A template \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment} 160 | #' object to compare to. 161 | #' 162 | #' @return `TRUE` invisibly if checks pass 163 | #' 164 | #' @author Luke Zappia 165 | expectSCE <- function(sce, expected) { 166 | testthat::expect_identical(dimnames(sce), dimnames(expected)) 167 | if (length(metadata(expected)) > 0) { 168 | testthat::expect_identical( 169 | S4Vectors::metadata(sce), 170 | S4Vectors::metadata(expected) 171 | ) 172 | } 173 | testthat::expect_identical( 174 | SummarizedExperiment::assayNames(sce), 175 | SummarizedExperiment::assayNames(expected) 176 | ) 177 | for (assay in SummarizedExperiment::assayNames(expected)) { 178 | testthat::expect_equal( 179 | SummarizedExperiment::assay(sce, assay), 180 | SummarizedExperiment::assay(expected, assay) 181 | ) 182 | } 183 | testthat::expect_identical( 184 | SingleCellExperiment::reducedDims(sce), 185 | SingleCellExperiment::reducedDims(expected) 186 | ) 187 | testthat::expect_identical( 188 | SummarizedExperiment::rowData(sce), 189 | SummarizedExperiment::rowData(expected) 190 | ) 191 | testthat::expect_identical( 192 | SummarizedExperiment::colData(sce), 193 | SummarizedExperiment::colData(expected) 194 | ) 195 | testthat::expect_identical( 196 | SingleCellExperiment::rowPairs(sce), 197 | SingleCellExperiment::rowPairs(expected) 198 | ) 199 | testthat::expect_identical( 200 | SingleCellExperiment::colPairs(sce), 201 | SingleCellExperiment::colPairs(expected) 202 | ) 203 | 204 | invisible(TRUE) 205 | } 206 | -------------------------------------------------------------------------------- /R/write.R: -------------------------------------------------------------------------------- 1 | #' Write H5AD 2 | #' 3 | #' Write a H5AD file from a 4 | #' \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment} 5 | #' object. 6 | #' 7 | #' @param sce A \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment} 8 | #' object. 9 | #' @param file String containing a path to write the new `.h5ad` file. 10 | #' @param X_name Name of the assay to use as the primary matrix (`X`) of the 11 | #' AnnData object. If `NULL`, the first assay of `sce` will be used by default. 12 | #' @param skip_assays Logical scalar indicating whether assay matrices should 13 | #' be ignored when writing to `file`. 14 | #' @param compression Type of compression when writing the new `.h5ad` file. 15 | #' @param version A string giving the version of the **anndata** Python library 16 | #' to use. Allowed values are available in `.AnnDataVersions`. By default the 17 | #' latest version is used. 18 | #' @param verbose Logical scalar indicating whether to print progress messages. 19 | #' If `NULL` uses `getOption("zellkonverter.verbose")`. 20 | #' @inheritDotParams SCE2AnnData 21 | #' 22 | #' @details 23 | #' 24 | #' ## Skipping assays 25 | #' 26 | #' Setting `skip_assays = TRUE` can occasionally be useful if the matrices in 27 | #' `sce` are stored in a format that is not amenable for efficient conversion 28 | #' to a **numpy**-compatible format. In such cases, it can be better to create 29 | #' an empty placeholder dataset in `file` and fill it in R afterwards. 30 | #' 31 | #' ## **DelayedArray** assays 32 | #' 33 | #' If `sce` contains any **DelayedArray** matrices as assays `writeH5AD()` will 34 | #' write them to disk using the **rhdf5** package directly rather than via 35 | #' Python to avoid instantiating them in memory. However there is currently 36 | #' an issue which prevents this being done for sparse **DelayedArray** matrices. 37 | #' 38 | #' ## Known conversion issues 39 | #' 40 | #' ### Coercion to factors 41 | #' 42 | #' The **anndata** package automatically converts some character vectors to 43 | #' factors when saving `.h5ad` files. This can effect columns of `rowData(sce)` 44 | #' and `colData(sce)` which may change type when the `.h5ad` file is read back 45 | #' into R. 46 | #' 47 | #' ## Environment 48 | #' 49 | #' See [AnnData-Environment] for more details on **zellkonverter** Python 50 | #' environments. 51 | #' 52 | #' @return A `NULL` is invisibly returned. 53 | #' 54 | #' @author Luke Zappia 55 | #' @author Aaron Lun 56 | #' 57 | #' @seealso 58 | #' [`readH5AD()`], to read a 59 | #' \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment} 60 | #' object from a H5AD file. 61 | #' 62 | #' [`SCE2AnnData()`], for developers to create an AnnData object from a 63 | #' \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment}. 64 | #' 65 | #' @examples 66 | #' # Using the Zeisel brain dataset 67 | #' if (requireNamespace("scRNAseq", quietly = TRUE)) { 68 | #' library(scRNAseq) 69 | #' sce <- ZeiselBrainData() 70 | #' 71 | #' # Writing to a H5AD file 72 | #' temp <- tempfile(fileext = ".h5ad") 73 | #' writeH5AD(sce, temp) 74 | #' } 75 | #' @export 76 | #' @importFrom basilisk basiliskRun 77 | #' @importFrom Matrix sparseMatrix 78 | #' @importFrom DelayedArray is_sparse 79 | writeH5AD <- function( 80 | sce, file, X_name = NULL, skip_assays = FALSE, 81 | compression = c("none", "gzip", "lzf"), version = NULL, 82 | verbose = NULL, ...) { 83 | compression <- match.arg(compression) 84 | 85 | if (compression == "none") { 86 | compression <- NULL 87 | } 88 | 89 | # Loop over and replace DelayedArrays. 90 | ass_list <- assays(sce) 91 | is_da <- logical(length(ass_list)) 92 | for (a in seq_along(ass_list)) { 93 | # Skip sparse DelayedArrays due to rhdf5 issue 94 | # https://github.com/grimbough/rhdf5/issues/79 95 | if (is(ass_list[[a]], "DelayedMatrix") && !is_sparse(ass_list[[a]])) { 96 | is_da[a] <- TRUE 97 | assay(sce, a, withDimnames = FALSE) <- .make_fake_mat(dim(sce)) 98 | } 99 | } 100 | 101 | env <- zellkonverterAnnDataEnv(version) 102 | version <- gsub("zellkonverterAnnDataEnv-", "", slot(env, "envname")) 103 | .ui_info("Using {.field anndata} version {.field {version}}") 104 | 105 | file <- path.expand(file) 106 | basiliskRun( 107 | env = env, 108 | fun = .H5ADwriter, 109 | testload = "anndata", 110 | sce = sce, 111 | file = file, 112 | X_name = X_name, 113 | skip_assays = skip_assays, 114 | compression = compression, 115 | verbose = verbose, 116 | ... 117 | ) 118 | 119 | # Going back out and replacing each of them. 120 | if (any(is_da)) { 121 | for (p in which(is_da)) { 122 | if (p == 1L) { 123 | curp <- "X" 124 | } else { 125 | curp <- file.path("layers", assayNames(sce)[p]) 126 | } 127 | rhdf5::h5delete(file, curp) 128 | mat <- ass_list[[p]] 129 | 130 | if (!is_sparse(mat)) { 131 | HDF5Array::writeHDF5Array( 132 | mat, 133 | filepath = file, name = curp, with.dimnames = FALSE 134 | ) 135 | } else { 136 | .write_CSR_matrix(file, name = curp, mat = mat) 137 | } 138 | } 139 | } 140 | 141 | invisible(NULL) 142 | } 143 | 144 | #' @importFrom reticulate import 145 | .H5ADwriter <- function(sce, file, X_name, skip_assays, compression, 146 | verbose = NULL, ...) { 147 | adata <- SCE2AnnData( 148 | sce, 149 | X_name = X_name, skip_assays = skip_assays, verbose = verbose, ... 150 | ) 151 | .ui_step( 152 | "Writing {.file { .trim_path(file)} }", 153 | msg_done = "Wrote {.file { .trim_path(file)} }", 154 | spinner = TRUE 155 | ) 156 | if (!is.null(compression)) { 157 | .ui_info("Using {.field compression} compression") 158 | } 159 | adata$write_h5ad(file, compression = compression) 160 | } 161 | 162 | # nocov start 163 | 164 | # Skipping code coverage on these function because they aren't used until the 165 | # sparse DelayedArray rhdf5 issue mentioned above is addressed 166 | 167 | #' @importFrom DelayedArray blockApply rowAutoGrid type 168 | .write_CSR_matrix <- function(file, name, mat, chunk_dim = 10000) { 169 | handle <- rhdf5::H5Fopen(file) 170 | on.exit(rhdf5::H5Fclose(handle)) 171 | 172 | rhdf5::h5createGroup(handle, name) 173 | ghandle <- rhdf5::H5Gopen(handle, name) 174 | on.exit(rhdf5::H5Gclose(ghandle), add = TRUE, after = FALSE) 175 | 176 | rhdf5::h5writeAttribute("csc_matrix", ghandle, "encoding-type") 177 | rhdf5::h5writeAttribute("0.1.0", ghandle, "encoding-version") 178 | rhdf5::h5writeAttribute(rev(dim(mat)), ghandle, "shape") 179 | 180 | rhdf5::h5createDataset( 181 | handle, 182 | file.path(name, "data"), 183 | dims = 0, 184 | maxdims = rhdf5::H5Sunlimited(), 185 | H5type = if (type(mat) == "integer") { 186 | "H5T_NATIVE_INT32" 187 | } else { 188 | "H5T_NATIVE_DOUBLE" 189 | }, 190 | chunk = chunk_dim 191 | ) 192 | 193 | rhdf5::h5createDataset( 194 | handle, 195 | file.path(name, "indices"), 196 | dims = 0, 197 | maxdims = rhdf5::H5Sunlimited(), 198 | H5type = "H5T_NATIVE_UINT32", 199 | chunk = chunk_dim 200 | ) 201 | 202 | env <- new.env() # persist the 'last' counter. 203 | env$last <- 0L 204 | out <- blockApply( 205 | mat, 206 | grid = rowAutoGrid(mat), 207 | FUN = .blockwise_sparse_writer, 208 | env = env, 209 | file = handle, 210 | name = name, 211 | as.sparse = TRUE 212 | ) 213 | 214 | out <- as.double(unlist(out)) 215 | iname <- file.path(name, "indptr") 216 | 217 | rhdf5::h5createDataset( 218 | handle, 219 | iname, 220 | dims = length(out) + 1L, 221 | H5type = "H5T_NATIVE_UINT64" 222 | ) 223 | 224 | rhdf5::h5writeDataset(c(0, cumsum(out)), handle, iname) 225 | } 226 | 227 | #' @importFrom DelayedArray nzdata nzindex 228 | .blockwise_sparse_writer <- function(block, env, file, name) { 229 | nzdex <- nzindex(block) 230 | i <- nzdex[, 1] 231 | j <- nzdex[, 2] 232 | v <- nzdata(block) 233 | 234 | o <- order(i) 235 | i <- i[o] 236 | j <- j[o] 237 | v <- v[o] 238 | 239 | last <- env$last 240 | index <- list(last + seq_along(j)) 241 | 242 | iname <- file.path(name, "indices") 243 | rhdf5::h5set_extent(file, iname, last + length(j)) 244 | rhdf5::h5writeDataset(j - 1L, file, iname, index = index) 245 | 246 | vname <- file.path(name, "data") 247 | rhdf5::h5set_extent(file, vname, last + length(j)) 248 | rhdf5::h5writeDataset(v, file, vname, index = index) 249 | 250 | env$last <- last + length(j) 251 | tabulate(i, nrow(block)) 252 | } 253 | 254 | # nocov end 255 | -------------------------------------------------------------------------------- /R/zellkonverter-package.R: -------------------------------------------------------------------------------- 1 | #' @import SummarizedExperiment 2 | #' @import SingleCellExperiment 3 | "_PACKAGE" 4 | 5 | # The following block is used by usethis to automatically manage 6 | # roxygen namespace tags. Modify with care! 7 | ## usethis namespace: start 8 | ## usethis namespace: end 9 | NULL 10 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | zellkonverter logo 2 | 3 | # zellkonverter 4 | 5 | 6 | [![Project Status](http://www.repostatus.org/badges/latest/active.svg)](http://www.repostatus.org/#active) 7 | [![Lifecycle](https://lifecycle.r-lib.org/articles/figures/lifecycle-stable.svg)](https://lifecycle.r-lib.org/articles/stages.html#stable) 8 | [![Codecov test coverage](https://codecov.io/gh/theislab/zellonverter/graph/badge.svg)](https://app.codecov.io/gh/theislab/zellonverter) 9 | [![R-CMD-check-bioc](https://github.com/theislab/zellkonverter/actions/workflows/check-bioc.yml/badge.svg)](https://github.com/theislab/zellkonverter/actions/workflows/check-bioc.yml) 10 | [![Bioc release status](http://www.bioconductor.org/shields/build/release/bioc/zellkonverter.svg)](https://bioconductor.org/checkResults/release/bioc-LATEST/zellkonverter) 11 | [![Bioc devel status](http://www.bioconductor.org/shields/build/devel/bioc/zellkonverter.svg)](https://bioconductor.org/checkResults/devel/bioc-LATEST/zellkonverter) 12 | [![Bioc downloads rank](https://bioconductor.org/shields/downloads/release/zellkonverter.svg)](http://bioconductor.org/packages/stats/bioc/zellkonverter/) 13 | [![Bioc support](https://bioconductor.org/shields/posts/zellkonverter.svg)](https://support.bioconductor.org/tag/zellkonverter) 14 | [![Bioc history](https://bioconductor.org/shields/years-in-bioc/zellkonverter.svg)](https://bioconductor.org/packages/release/bioc/html/zellkonverter.html#since) 15 | [![Bioc last commit](https://bioconductor.org/shields/lastcommit/devel/bioc/zellkonverter.svg)](http://bioconductor.org/checkResults/devel/bioc-LATEST/zellkonverter/) 16 | [![Bioc dependencies](https://bioconductor.org/shields/dependencies/release/zellkonverter.svg)](https://bioconductor.org/packages/release/bioc/html/zellkonverter.html#since) 17 | 18 | 19 | **zellkonverter** is a small package for converting between SingleCellExperiment 20 | objects and alternative objects for storing single-cell RNA-sequencing data 21 | (such as AnnData). It is built on top of the [**basilisk**][basilisk] package. 22 | 23 | For documentation see please refer to [Bioconductor][bioc]. Development 24 | documentation is also available on [Bioconductor devel][bioc-devel] or the 25 | [pkgdown site][pkgdown]. 26 | 27 | ## Installation 28 | 29 | **zellkonverter** can be installed from Bioconductor using the **BiocManager** 30 | package: 31 | 32 | ```r 33 | if (!requireNamespace("BiocManager", quietly=TRUE)) { 34 | install.packages("BiocManager") 35 | } 36 | BiocManager::install("zellkonverter") 37 | ``` 38 | 39 | ## Build status 40 | 41 | | Source | Checks | Updated | 42 | |:----------------:|:----------------:|:------------:| 43 | | [Bioc release](http://bioconductor.org/packages/release/bioc/html/zellkonverter.html) | [![Bioc release status](http://www.bioconductor.org/shields/build/release/bioc/zellkonverter.svg)](https://bioconductor.org/checkResults/release/bioc-LATEST/zellkonverter) | ![](http://bioconductor.org/shields/lastcommit/release/bioc/zellkonverter.svg) | 44 | | [Bioc devel](http://bioconductor.org/packages/devel/bioc/html/zellkonverter.html) | [![Bioc devel status](http://www.bioconductor.org/shields/build/devel/bioc/zellkonverter.svg)](https://bioconductor.org/checkResults/devel/bioc-LATEST/zellkonverter) | ![](http://bioconductor.org/shields/lastcommit/devel/bioc/zellkonverter.svg) | 45 | | [GitHub actions](https://github.com/theislab/zellkonverter/actions) | [![R-CMD-check-bioc](https://github.com/theislab/zellkonverter/actions/workflows/check-bioc.yml/badge.svg)](https://github.com/theislab/zellkonverter/actions) | ![GitHub last commit](https://img.shields.io/github/last-commit/theislab/zellkonverter) | 46 | 47 | ## Code of Conduct 48 | 49 | Please note that the **zellkonverter** project is released with a 50 | [Contributor Code of Conduct](https://contributor-covenant.org/version/2/0/CODE_OF_CONDUCT.html). 51 | By contributing to this project, you agree to abide by its terms. 52 | 53 | ## Contributors 54 | 55 | 56 | 57 | 58 | 59 | Made with [contributors-img](https://contrib.rocks). 60 | 61 | [basilisk]: https://www.bioconductor.org/packages/basilisk/ "basilisk on Bioconductor" 62 | [bioc]: https://bioconductor.org/packages/zellkonverter/ "zellkonverter on Bioconductor" 63 | [bioc-devel]: https://bioconductor.org/packages/devel/bioc/html/zellkonverter.html "zellkonverter on Bioconductor devel" 64 | [pkgdown]: https://theislab.github.io/zellkonverter/ "zellkonverter pkgdown site" 65 | 66 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | comment: false 2 | 3 | coverage: 4 | status: 5 | project: 6 | default: 7 | target: auto 8 | threshold: 1% 9 | informational: true 10 | patch: 11 | default: 12 | target: auto 13 | threshold: 1% 14 | informational: true 15 | -------------------------------------------------------------------------------- /configure: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | ${R_HOME}/bin/Rscript -e "basilisk::configureBasiliskEnv()" 4 | -------------------------------------------------------------------------------- /configure.win: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | ${R_HOME}/bin${R_ARCH_BIN}/Rscript.exe -e "basilisk::configureBasiliskEnv()" 4 | -------------------------------------------------------------------------------- /inst/NEWS.Rd: -------------------------------------------------------------------------------- 1 | \name{NEWS} 2 | \title{News for Package \pkg{zellkonverter}} 3 | 4 | \section{Version 1.18.0, Bioconductor 3.31 Release (April 2025)}{ 5 | \subsection{New features}{ 6 | \itemize{ 7 | \item{ 8 | Add minimal support for \code{SpatialExperiment} objects to 9 | \code{writeH5AD()} and \code{SCE2AnnData()}. This stores the spatial 10 | coordinates in a \code{obsm} item named "spatial" as expected by the 11 | \bold{squidpy} Python package. (PR from @mcmero) 12 | } 13 | }} 14 | \subsection{Major changes}{ 15 | \itemize{ 16 | \item{ 17 | Add environment for \bold{anndata} v0.11.4. This is now the default 18 | environment for the Python reader/writer. 19 | } 20 | \item{ 21 | Modify \code{SCE2AnnData()} to covert sparse matrices to \code{dgRMatrix} 22 | when they are. This mostly applies to assays and should be more compatible 23 | with what is expected by Python packages. 24 | } 25 | }} 26 | \subsection{Minor changes}{ 27 | \itemize{ 28 | \item{ 29 | Add a \code{testload} argument to \code{basiliskRun()} calls which may 30 | help with problems creating Python environments 31 | } 32 | \item{ 33 | Updates to documentation and tests 34 | } 35 | }} 36 | \subsection{Bug fixes}{ 37 | \itemize{ 38 | \item{ 39 | Improve handling of missing row or column names in \code{SCE2AnnData()} 40 | } 41 | }} 42 | } 43 | 44 | \section{Version 1.16.0, Bioconductor 3.30 Release (October 2024)}{ 45 | \subsection{Major changes}{ 46 | \itemize{ 47 | \item{ 48 | Add environment for \bold{anndata} v0.10.9. This is now the default 49 | environment for the Python reader/writer. 50 | } 51 | }} 52 | \subsection{Minor changes}{ 53 | \itemize{ 54 | \item{ 55 | Avoid deprecation warning due to setting \code{dtype} when creating Python 56 | \code{AnnData} objects 57 | } 58 | \item{ 59 | Standardise code styling using \bold{{styler}} 60 | } 61 | \item{ 62 | Add test using the more complete example H5AD file 63 | } 64 | }} 65 | \subsection{Bug fixes}{ 66 | \itemize{ 67 | \item{ 68 | Correctly assign levels to factors in R reader with \bold{anndata} v0.7 69 | files 70 | } 71 | \item{ 72 | Correctly set \code{filepath} in the R reader with reading 73 | \code{adata.raw} with \code{use_hdf5 = TRUE} (PR from @GabrielHoffman) 74 | } 75 | }} 76 | } 77 | 78 | \section{Version 1.14.0, Bioconductor 3.19 Release (May 2024)}{ 79 | \subsection{Major changes}{ 80 | \itemize{ 81 | \item{ 82 | Add environment for \bold{anndata} v0.10.6. This is now the default 83 | environment for the Python reader/writer. 84 | } 85 | }} 86 | \subsection{Minor changes}{ 87 | \itemize{ 88 | \item{ 89 | Improve warnings when converting matrices fails 90 | } 91 | \item{ 92 | Minor change to writing \code{DelayedArray} matrices for compatibility 93 | with \bold{{HDF5Array}} >= v1.31.1 94 | } 95 | }} 96 | \subsection{Bug fixes}{ 97 | \itemize{ 98 | \item{ 99 | Correctly handle \code{use_backed = TRUE} with newer \bold{anndata} 100 | versions 101 | } 102 | \item{ 103 | Correctly instantiate the \bold{anndata} v0.10.2 environment 104 | } 105 | \item{ 106 | Minor fixes for typos etc. 107 | } 108 | }} 109 | } 110 | 111 | \section{Version 1.12.0, Bioconductor 3.18 Release (October 2023)}{ 112 | \subsection{Major changes}{ 113 | \itemize{ 114 | \item{ 115 | Add environments for \bold{anndata} v0.9.2 and v0.10.2. Version 0.10.20 is 116 | now the default envrionment for the Python reader/writer. 117 | } 118 | }} 119 | \subsection{Minor changes}{ 120 | \itemize{ 121 | \item{ 122 | Changes for compatibility with \bold{rhdf5} v2.45.1 including enum types 123 | that simplifies reading of nullable types in the native R reader 124 | } 125 | \item{ 126 | Dimensions are now passed correctly when converting the \code{raw} slot 127 | } 128 | \item{ 129 | Backed sparse matrices are now converted in \code{AnnData2SCE()} 130 | } 131 | }} 132 | } 133 | 134 | \section{Version 1.10.0, Bioconductor 3.17 Release (April 2023)}{ 135 | \subsection{Major changes}{ 136 | \itemize{ 137 | \item{ 138 | Add compatibility with the \bold{anndata} v0.8 H5AD format to the the 139 | native R writer (By @jackkamm and @mtmorgan) 140 | } 141 | \item{ 142 | Add functions for converting \bold{pandas} arrays used by \bold{anndata} 143 | when arrays have missing values 144 | } 145 | }} 146 | \subsection{Minor changes}{ 147 | \itemize{ 148 | \item{ 149 | Add Robrecht Cannoodt and Jack Kamm as contributors! 150 | } 151 | \item{ 152 | Minor adjustments to tests to match reader changes 153 | } 154 | }} 155 | } 156 | 157 | \section{Version 1.8.0, Bioconductor 3.16 Release (October 2022)}{ 158 | \subsection{Major changes}{ 159 | \itemize{ 160 | \item{ 161 | Improve compatibility with the R \bold{anndata} package. This required 162 | modifying conversion functions so that Python objects are explicitly 163 | converted rather than relying on automatic conversion. 164 | } 165 | \item{ 166 | Added support for \bold{numpy} recarrays. This solves a long-standing 167 | issue and allows results from \bold{scanpy}'s \code{rank_genes_groups()} 168 | function to be read. 169 | } 170 | }} 171 | \subsection{Minor changes}{ 172 | \itemize{ 173 | \item{ 174 | The Python version is now pinned in the \bold{anndata} v0.7.6 environment 175 | for compatibility with changes in \bold{basilisk} 176 | } 177 | \item{ 178 | Instatiate Python environments so they can be properly picked up by 179 | \code{basilisk::configureBasiliskEnv()} 180 | } 181 | \item{ 182 | Allow missing obs/var names when \code{use_hdf5 = TRUE} 183 | } 184 | \item{ 185 | Minor changes to the UI functions for compatibility with \bold{cli} v3.4.0 186 | } 187 | \item{ 188 | Minor changes for compatibility with \bold{Matrix} v1.4-2 189 | } 190 | \item{ 191 | Improvements to the UI for warnings 192 | } 193 | \item{ 194 | Updates and improvments to tests 195 | } 196 | }} 197 | } 198 | 199 | \section{Version 1.6.0, Bioconductor 3.15 Release (April 2022)}{ 200 | \subsection{Major changes}{ 201 | \itemize{ 202 | \item{ 203 | Added support for multiple \bold{basilisk} environments with different 204 | \bold{anndata} versions. Users can now specify the environment to use with 205 | options in \code{readH5AD()} and \code{writeH5AD()}. To faciliate this 206 | some exported objects where converted to functions but this should only 207 | effect developers. 208 | } 209 | \item{ 210 | Updated the default environment to use \bold{anndata} v0.8.0. This is a 211 | major update and files written with v0.8.0 cannot be read by previous 212 | \bold{anndata} versions. This was the motivation for supporting multiple 213 | environments and users can select the previous environment with 214 | \bold{anndata} v0.7.6 if compatibility is required. 215 | } 216 | \item{ 217 | Standardise naming in \code{AnnData2SCE()}. Column names of data frames 218 | and names of list items will now be modified to match R conventions 219 | (according to \code{make.names()}). When this happens a warning will be 220 | issued listing the modifications. This makes sure than everything in the 221 | created \code{SingleCellExperiment} is accessible. 222 | } 223 | }} 224 | \subsection{Minor changes}{ 225 | \itemize{ 226 | \item{ 227 | Allow \code{data.frame}'s stored in \code{varm} to be converted in 228 | \code{SCE2AnnData()} 229 | } 230 | \item{ 231 | Minor updates to the vignette and other documentation. 232 | } 233 | \item{ 234 | Updates to tests to match the changes above. 235 | } 236 | }} 237 | } 238 | 239 | \section{Version 1.4.0, Bioconductor 3.14 Release (October 2021)}{ 240 | \itemize{ 241 | \item{ 242 | Add arguments to control how slots are converted in \code{AnnData2SCE()} 243 | and \code{SCE2AnnData()}. Each slot can now be fully converted, skipped 244 | entirely or only selected items converted. 245 | } 246 | \item{ 247 | Add support for converting the \code{raw} slot to an \code{altExp} in 248 | \code{AnnData2SCE()} 249 | } 250 | \item{ 251 | Add recursive conversion of lists in \code{AnnData2SCE()} 252 | } 253 | \item{ 254 | Add progress messages to various functions. These can be controlled by 255 | function arguments or a global variable. 256 | } 257 | \item{ 258 | Add long tests for various public datasets. This should help to make the 259 | package more robust 260 | } 261 | \item{ 262 | Fix bug in converting \code{dgRMatrix} sparse matrices 263 | } 264 | \item{ 265 | Correctly handle \code{DataFrame} objects stored in \code{adata.obsm} 266 | } 267 | } 268 | } 269 | 270 | \section{Version 1.2.0, Bioconductor 3.13 Release (May 2021)}{ 271 | \itemize{ 272 | \item{ 273 | Update \strong{anndata} and other Python dependencies, now using 274 | \strong{anndata} v0.7.6 275 | } 276 | \item{ 277 | Improved conversion checks for all slots in \code{AnnData2SCE()} 278 | } 279 | \item{ 280 | Enable return conversion of the \code{varm} slot in \code{AnnData2SCE()} 281 | } 282 | \item{ 283 | Avoid converting \code{obsp} and \code{varp} to dense matrices in 284 | \code{AnnData2SCE()} 285 | } 286 | \item{ 287 | \code{AnnData2SCE()} should now always return \code{dgCMatrix} matrices 288 | when assays are sparse 289 | } 290 | \item{ 291 | More consistent conversion of \code{metadata} to \code{uns} in 292 | \code{SCE2AnnData()} 293 | } 294 | \item{ 295 | Handle conversion of list columns in \code{colData} and \code{rowData} in 296 | \code{SCE2AnnData()} 297 | } 298 | \item{ 299 | Better support for converting \strong{anndata} \code{SparseDataset} arrays 300 | } 301 | \item{ 302 | Improved support for conversion of HDF5 backed \code{AnnData} objects 303 | } 304 | \item{ 305 | Better support for writing \code{DelayedArray} assays in 306 | \code{writeH5AD()} 307 | } 308 | \item{ 309 | Store \code{X_name} in \code{AnnData2SCE()} for use by 310 | \code{SCE2AnnData()} and add an \code{X_name} argument to 311 | \code{AnnData2SCE()} and \code{readH5AD()} 312 | } 313 | \item{ 314 | Add a \code{compression} argument to \code{writeH5AD()} 315 | } 316 | \item{ 317 | Add an experimental native R reader to \code{readH5AD()} 318 | } 319 | \item{ 320 | Export \code{zellkonverterAnnDataEnv} for use by other packages 321 | } 322 | } 323 | } 324 | 325 | \section{Version 1.0.0, Bioconductor 3.12 Release (October 2020)}{ 326 | \itemize{ 327 | \item{Accepted into Bioconductor for Release 3.12} 328 | \item{ 329 | zellkonverter provides methods to convert between Python AnnData objects 330 | and SingleCellExperiment objects. These are primarily intended for use by 331 | downstream Bioconductor packages that wrap Python methods for single-cell 332 | data analysis. It also includes functions to read and write H5AD files 333 | used for saving AnnData objects to disk. 334 | } 335 | } 336 | } 337 | -------------------------------------------------------------------------------- /inst/WORDLIST: -------------------------------------------------------------------------------- 1 | AnnData 2 | Bioc 3 | BiocManager 4 | Codecov 5 | DataFrames 6 | DelayedArray 7 | GTEX 8 | HDF 9 | Lifecycle 10 | ORCID 11 | SCE 12 | SingleCellExperiment 13 | Zeisel 14 | altExp 15 | anndata 16 | biocViews 17 | cli 18 | colData 19 | conda 20 | dtype 21 | getOption 22 | hdf 23 | img 24 | mtmorgan 25 | natsort 26 | numpy 27 | obs 28 | pkgdown 29 | py 30 | recarrays 31 | rhdf 32 | rowData 33 | scRNA 34 | scRNAseq 35 | scipy 36 | seq 37 | sqlite 38 | var 39 | varm 40 | zellkonverter's 41 | -------------------------------------------------------------------------------- /inst/extdata/example_anndata.h5ad: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/theislab/zellkonverter/c17a17220088ff880d512c392d5de4aacb9e9bb1/inst/extdata/example_anndata.h5ad -------------------------------------------------------------------------------- /inst/extdata/krumsiek11.h5ad: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/theislab/zellkonverter/c17a17220088ff880d512c392d5de4aacb9e9bb1/inst/extdata/krumsiek11.h5ad -------------------------------------------------------------------------------- /inst/extdata/krumsiek11_augmented_v0-8.h5ad: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/theislab/zellkonverter/c17a17220088ff880d512c392d5de4aacb9e9bb1/inst/extdata/krumsiek11_augmented_v0-8.h5ad -------------------------------------------------------------------------------- /inst/scripts/example_anndata.R: -------------------------------------------------------------------------------- 1 | # This script was used to create the `example_anndata.h5ad` file. 2 | # This file contains an example AnnData object for use in examples and tests. 3 | # A dataset with 200 cells and 500 genes was generated using the Splat simulation in the Splatter package. 4 | # A Python AnnData object was created using this data (via reticulate) and run through a standard Scanpy analysis workflow to populate the various slots. 5 | # The file object was then saved to disk as a .h5ad file. 6 | # 7 | # Key package versions: 8 | # 9 | # splatter v1.14.0 10 | # reticulate v1.18 11 | # scanpy v1.5.1 12 | # anndata v0.7.4 13 | 14 | library(splatter) 15 | library(reticulate) 16 | 17 | mini_sim <- splatSimulateGroups(batchCells = 200, nGenes = 500, lib.loc = 8, 18 | group.prob = c(0.5, 0.5), seed = 1) 19 | 20 | anndata <- import("anndata") 21 | scanpy <- import("scanpy") 22 | 23 | adata <- anndata$AnnData(t(counts(mini_sim))) 24 | adata$obs_names <- colnames(mini_sim) 25 | adata$var_names <- rownames(mini_sim) 26 | adata$layers <- list(counts = t(counts(mini_sim))) 27 | 28 | scanpy$pp$filter_genes(adata, min_counts = 10) 29 | scanpy$pp$normalize_total(adata, target_sum = 1e4) 30 | scanpy$pp$log1p(adata) 31 | scanpy$pp$highly_variable_genes(adata) 32 | scanpy$tl$pca(adata, svd_solver = "arpack") 33 | scanpy$pp$neighbors(adata, n_pcs = 10L) 34 | scanpy$tl$umap(adata) 35 | scanpy$tl$louvain(adata) 36 | scanpy$tl$rank_genes_groups(adata, "louvain") 37 | 38 | adata$write_h5ad("example_anndata.h5ad") 39 | -------------------------------------------------------------------------------- /inst/scripts/krumsiek11.md: -------------------------------------------------------------------------------- 1 | The `krumsiek11.h5ad` file contains an AnnData object with a simulated myeloid 2 | progenitor scRNA-seq dataset. It was created using the\ 3 | `scanpy.datasets.krumsiek11()` function in the **scanpy** Python package and 4 | saved as a `.h5ad` file using the `adata.write()` method. This file is included 5 | in **zellkonverter** for tests and examples that require reading a `.h5ad` file 6 | from disk. More details on this dataset can be found in the **scanpy** 7 | documentation at https://scanpy.readthedocs.io/en/stable/api/scanpy.datasets.krumsiek11.html#scanpy.datasets.krumsiek11. 8 | -------------------------------------------------------------------------------- /inst/scripts/krumsiek11_augmented.py: -------------------------------------------------------------------------------- 1 | # This script was used to create the `krumsiek11_augmented_v0-8.h5ad` 2 | # file. It adds some extra data to the previous `krumsiek11.h5ad` 3 | # dataset to cover some additional cases for testing (NAs, booleans, 4 | # etc). The data was saved in AnnData=0.8.0 format. 5 | # 6 | # Key package versions: 7 | # - anndata=0.8.0 8 | # - h5py=3.8.0 9 | # - hdf5=1.14.0 10 | # - numpy=1.23.5 11 | # - pandas=1.5.3 12 | # - python=3.9.16 13 | # - scanpy=1.9.2 14 | 15 | import numpy as np 16 | import pandas as pd 17 | import anndata as ad 18 | 19 | adata = ad.read_h5ad("krumsiek11.h5ad") 20 | 21 | # add string column to rowData/var. Make the entries unique so it's 22 | # saved as str instead of factor 23 | adata.var["dummy_str"] = [f"row{i}" for i in range(adata.shape[1])] 24 | 25 | # add float column to colData/obs 26 | adata.obs["dummy_num"] = 42.42 27 | 28 | # float column with NA 29 | adata.obs["dummy_num2"] = adata.obs["dummy_num"] 30 | adata.obs["dummy_num2"][0] = float("nan") 31 | 32 | # int column 33 | adata.obs["dummy_int"] = np.arange(adata.shape[0]) 34 | 35 | # int column with NA 36 | adata.obs["dummy_int2"] = pd.array([None] + [42] * (adata.shape[0] - 1)) 37 | 38 | # bool column 39 | adata.obs["dummy_bool"] = True 40 | adata.obs["dummy_bool"][0] = False 41 | 42 | # bool column with NA 43 | adata.obs["dummy_bool2"] = pd.array([False, None] + [True] * (adata.shape[0] - 2)) 44 | 45 | # also add some entries to the metadata/uns 46 | adata.uns["dummy_category"] = pd.array(["a", "b", None], dtype="category") 47 | 48 | adata.uns["dummy_bool"] = [True, True, False] 49 | adata.uns["dummy_bool2"] = pd.array([True, False, None]) 50 | 51 | adata.uns["dummy_int"] = [1,2,3] 52 | adata.uns["dummy_int2"] = pd.array([1,2,None]) 53 | 54 | adata.write("krumsiek11_augmented_v0-8.h5ad") 55 | -------------------------------------------------------------------------------- /longtests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(zellkonverter) 3 | 4 | test_check("zellkonverter") 5 | -------------------------------------------------------------------------------- /longtests/testthat/test-cellrank_pancreas.R: -------------------------------------------------------------------------------- 1 | library(SingleCellExperiment) 2 | library(BiocFileCache) 3 | 4 | cache <- BiocFileCache(ask = FALSE) 5 | file <- bfcrpath(cache, "https://figshare.com/ndownloader/files/30683438") 6 | outfile <- tempfile(fileext = ".h5ad") 7 | 8 | names <- list( 9 | assays = c("X", "Ms", "Mu", "fit_t", "fit_tau", "fit_tau_", "spliced", 10 | "unspliced", "velocity", "velocity_u"), 11 | colData = c("day", "proliferation", "G2M_score", "S_score", "phase", 12 | "clusters_coarse", "clusters", "clusters_fine", "louvain_Alpha", 13 | "louvain_Beta", "palantir_pseudotime", "initial_size_spliced", 14 | "initial_size_unspliced", "initial_size", "n_counts", 15 | "velocity_self_transition", "terminal_states", 16 | "terminal_states_probs", "initial_states", 17 | "initial_states_probs", "velocity_pseudotime", "latent_time", 18 | "dpt_pseudotime"), 19 | rowData = c("highly_variable_genes", "gene_count_corr", "means", 20 | "dispersions", "dispersions_norm", "highly_variable", "fit_r2", 21 | "fit_alpha", "fit_beta", "fit_gamma", "fit_t_", "fit_scaling", 22 | "fit_std_u", "fit_std_s", "fit_likelihood", "fit_u0", "fit_s0", 23 | "fit_pval_steady", "fit_steady_u", "fit_steady_s", 24 | "fit_variance", "fit_alignment_scaling", "velocity_genes", 25 | "to.Epsilon.corr", "to.Alpha.corr", "to.Beta.corr", 26 | "to.Epsilon.qval", "to.Alpha.qval", "to.Beta.qval"), 27 | metadata = c("T_bwd_params", "clusters_colors", "clusters_fine_colors", 28 | "clusters_sizes", "diffmap_evals", "eig_bwd", "eig_fwd", 29 | "initial_states_colors", "initial_states_names", "iroot", 30 | "louvain_Alpha_colors", "louvain_Beta_colors", "neighbors", 31 | "paga", "pca", "recover_dynamics", "terminal_states_colors", 32 | "terminal_states_names", "to_terminal_states_colors", 33 | "to_terminal_states_names", "velocity_graph", 34 | "velocity_graph_neg", "velocity_params"), 35 | redDim = c("X_diffmap", "X_pca", "X_umap", "macrostates_bwd", 36 | "macrostates_fwd", "to_terminal_states", "velocity_umap"), 37 | varm = c("PCs", "loss"), 38 | colPairs = c("T_bwd", "T_fwd", "connectivities", "distances") 39 | ) 40 | 41 | missing <- list() 42 | 43 | test_that("Reading H5AD works", { 44 | expect_warning( 45 | {sce <- readH5AD(file)}, 46 | "The names of these selected var columns have been modified" 47 | ) 48 | expect_s4_class(sce, "SingleCellExperiment") 49 | }) 50 | 51 | sce <- suppressWarnings(readH5AD(file)) 52 | 53 | test_that("SCE is valid", { 54 | validateH5ADSCE(sce, names, missing) 55 | }) 56 | 57 | test_that("Writing H5AD works", { 58 | writeH5AD(sce, outfile) 59 | expect_true(file.exists(outfile)) 60 | }) 61 | 62 | test_that("Round trip is as expected", { 63 | out <- readH5AD(outfile) 64 | 65 | expectSCE(out, sce) 66 | }) 67 | -------------------------------------------------------------------------------- /longtests/testthat/test-example_anndata.R: -------------------------------------------------------------------------------- 1 | library(SingleCellExperiment) 2 | 3 | file <- system.file("extdata", "example_anndata.h5ad", 4 | package = "zellkonverter") 5 | outfile <- tempfile(fileext = ".h5ad") 6 | 7 | names <- list( 8 | assays = c("X", "counts"), 9 | colData = "louvain", 10 | rowData = c("n_counts", "highly_variable", "means", "dispersions", 11 | "dispersions_norm"), 12 | metadata = c("louvain", "neighbors", "pca", "rank_genes_groups", "umap"), 13 | redDim = c("X_pca", "X_umap"), 14 | varm = "PCs", 15 | colPairs = c("connectivities", "distances") 16 | ) 17 | 18 | missing <- list() 19 | 20 | test_that("Reading H5AD works", { 21 | sce <- readH5AD(file) 22 | expect_s4_class(sce, "SingleCellExperiment") 23 | }) 24 | 25 | sce <- suppressWarnings(readH5AD(file)) 26 | 27 | test_that("SCE is valid", { 28 | validateH5ADSCE(sce, names, missing) 29 | }) 30 | 31 | test_that("Writing H5AD works", { 32 | writeH5AD(sce, outfile) 33 | expect_true(file.exists(outfile)) 34 | }) 35 | 36 | test_that("Round trip is as expected", { 37 | out <- readH5AD(outfile) 38 | expectSCE(out, sce) 39 | }) 40 | -------------------------------------------------------------------------------- /longtests/testthat/test-gtex_8tissues.R: -------------------------------------------------------------------------------- 1 | library(SingleCellExperiment) 2 | library(BiocFileCache) 3 | 4 | cache <- BiocFileCache(ask = FALSE) 5 | # Available from https://www.gtexportal.org/home/downloads/adult-gtex/single_cell 6 | file <- bfcrpath(cache, "https://storage.googleapis.com/adult-gtex/single-cell/v9/snrna-seq-data/GTEx_8_tissues_snRNAseq_atlas_071421.public_obs.h5ad") 7 | outfile <- tempfile(fileext = ".h5ad") 8 | 9 | names <- list( 10 | assays = c("X", "counts"), 11 | colData = c( 12 | "n_genes", "fpr", "tissue", "prep", "individual", "nGenes", "nUMIs", 13 | "PercentMito", "PercentRibo", "Age_bin", "Sex", "Sample.ID", 14 | "Participant.ID", "Sample.ID.short", 15 | "RIN.score.from.PAXgene.tissue.Aliquot", 16 | "RIN.score.from.Frozen.tissue.Aliquot", "Autolysis.Score", 17 | "Sample.Ischemic.Time..mins.", "Tissue.Site.Detail", "scrublet", 18 | "scrublet_score", "barcode", "batch", "n_counts", 19 | "tissue.individual.prep", "Broad.cell.type", "Granular.cell.type", 20 | "introns", "junctions", "exons", "sense", "antisense", "intergenic", 21 | "batch.barcode", "exon_ratio", "intron_ratio", "junction_ratio", 22 | "log10_nUMIs", "leiden", "leiden_tissue", "Tissue.composition", 23 | "Cell.types.level.2", "Cell.types.level.3", "Broad.cell.type.numbers", 24 | "Broad.cell.type..numbers.", "Tissue", "channel" 25 | ), 26 | rowData = c( 27 | "gene_ids", "Chromosome", "Source", "Start", "End", "Strand", 28 | "gene_name", "gene_source", "gene_biotype", "gene_length", 29 | "gene_coding_length", "Approved.symbol", "Approved.name", "Status", 30 | "Previous.symbols", "Alias.symbols", "gene_include", "n_cells" 31 | ), 32 | metadata = c( 33 | "Broad.cell.type..numbers._colors", "Broad.cell.type.numbers_colors", 34 | "Broad.cell.type_colors", "Broad.cell.type_logregcv_vae_colors", 35 | "Broad.cell.type_sizes", "Granular.cell.type_colors", 36 | "Participant.ID_colors", "Sex_colors", "Tissue.composition_colors", 37 | "Tissue_colors", "dendrogram_..Broad.cell.type..", "leiden", 38 | "leiden_colors", "leiden_sub_colors", "neighbors", "paga", 39 | "prep_colors", "tissue_colors", "umap" 40 | ), 41 | redDim = c( 42 | "X_pca", "X_umap", "X_umap_tissue", "X_vae_mean", "X_vae_mean_tissue", 43 | "X_vae_samples", "X_vae_var" 44 | ), 45 | varm = c("spring_leiden_sub"), 46 | colPairs = c("connectivities", "distances") 47 | ) 48 | 49 | missing <- list() 50 | 51 | test_that("Reading H5AD works", { 52 | expect_warning( 53 | {sce <- readH5AD(file)}, 54 | "The names of these selected uns items have been modified" 55 | ) 56 | expect_s4_class(sce, "SingleCellExperiment") 57 | }) 58 | 59 | sce <- suppressWarnings(readH5AD(file)) 60 | 61 | test_that("SCE is valid", { 62 | validateH5ADSCE(sce, names, missing) 63 | }) 64 | 65 | test_that("Writing H5AD works", { 66 | writeH5AD(sce, outfile) 67 | expect_true(file.exists(outfile)) 68 | }) 69 | 70 | test_that("Round trip is as expected", { 71 | out <- readH5AD(outfile) 72 | expectSCE(out, sce) 73 | }) 74 | -------------------------------------------------------------------------------- /longtests/testthat/test-pegasus_marrow.R: -------------------------------------------------------------------------------- 1 | library(SingleCellExperiment) 2 | library(BiocFileCache) 3 | 4 | cache <- BiocFileCache(ask = FALSE) 5 | file <- bfcrpath(cache, "https://figshare.com/ndownloader/files/30682400") 6 | outfile <- tempfile(fileext = ".h5ad") 7 | 8 | names <- list( 9 | assays = c("X"), 10 | colData = c("n_genes", "Channel", "n_counts", "percent_mito", "scale", 11 | "Group", "louvain_labels", "anno"), 12 | rowData = c("featureid", "n_cells", "percent_cells", "robust", 13 | "highly_variable_features", "mean", "var", "hvf_loess", 14 | "hvf_rank"), 15 | metadata = c("Channels", "Groups", "PCs", "W_diffmap", "W_pca_harmony", 16 | "c2gid", "diffmap_evals", "diffmap_knn_distances", 17 | "diffmap_knn_indices", "genome", "gncells", 18 | "louvain_resolution", "modality", "ncells", "norm_count", 19 | "pca", "pca_features", "pca_harmony_knn_distances", 20 | "pca_harmony_knn_indices", "stdzn_max_value", "stdzn_mean", 21 | "stdzn_std"), 22 | redDim = c("X_diffmap", "X_fle", "X_pca", "X_pca_harmony", "X_phi", 23 | "X_tsne", "X_umap"), 24 | varm = c("de_res", "gmeans", "gstds", "means", "partial_sum") 25 | ) 26 | 27 | missing <- list() 28 | 29 | test_that("Reading H5AD works", { 30 | sce <- readH5AD(file) 31 | expect_s4_class(sce, "SingleCellExperiment") 32 | }) 33 | 34 | sce <- suppressWarnings(readH5AD(file)) 35 | 36 | test_that("SCE is valid", { 37 | validateH5ADSCE(sce, names, missing) 38 | }) 39 | 40 | test_that("Writing H5AD works", { 41 | writeH5AD(sce, outfile) 42 | expect_true(file.exists(outfile)) 43 | }) 44 | 45 | test_that("Round trip is as expected", { 46 | out <- readH5AD(outfile) 47 | 48 | expectSCE(out, sce) 49 | }) 50 | -------------------------------------------------------------------------------- /longtests/testthat/test-scIB_pancreas.R: -------------------------------------------------------------------------------- 1 | library(SingleCellExperiment) 2 | library(BiocFileCache) 3 | 4 | cache <- BiocFileCache(ask = FALSE) 5 | file <- bfcrpath(cache, "https://ndownloader.figshare.com/files/24539828") 6 | outfile <- tempfile(fileext = ".h5ad") 7 | 8 | names <- list( 9 | assays = c("X", "counts"), 10 | colData = c("tech", "celltype", "size_factors") 11 | ) 12 | missing <- list() 13 | 14 | test_that("Reading H5AD works", { 15 | sce <- readH5AD(file) 16 | expect_s4_class(sce, "SingleCellExperiment") 17 | }) 18 | 19 | sce <- suppressWarnings(readH5AD(file)) 20 | 21 | test_that("SCE is valid", { 22 | validateH5ADSCE(sce, names, missing) 23 | }) 24 | 25 | test_that("Writing H5AD works", { 26 | writeH5AD(sce, outfile) 27 | expect_true(file.exists(outfile)) 28 | }) 29 | 30 | test_that("Round trip is as expected", { 31 | out <- readH5AD(outfile) 32 | expectSCE(out, sce) 33 | }) 34 | -------------------------------------------------------------------------------- /longtests/testthat/test-scanpy_pbmc3k.R: -------------------------------------------------------------------------------- 1 | library(SingleCellExperiment) 2 | library(BiocFileCache) 3 | 4 | cache <- BiocFileCache(ask = FALSE) 5 | file <- bfcrpath(cache, "https://ndownloader.figshare.com/files/30462915") 6 | outfile <- tempfile(fileext = ".h5ad") 7 | 8 | names <- list( 9 | assays = c("X"), 10 | colData = c("n_genes", "n_genes_by_counts", "total_counts", 11 | "total_counts_mt", "pct_counts_mt", "leiden"), 12 | rowData = c("gene_ids", "n_cells", "mt", "n_cells_by_counts", "mean_counts", 13 | "pct_dropout_by_counts", "total_counts", "highly_variable", 14 | "means", "dispersions", "dispersions_norm", "mean", "std"), 15 | metadata = c("hvg", "leiden", "neighbors", "pca", "rank_genes_groups", 16 | "umap"), 17 | redDim = c("X_pca", "X_umap"), 18 | varm = c("PCs"), 19 | colPairs = c("connectivities", "distances") 20 | ) 21 | 22 | missing <- list() 23 | 24 | test_that("Reading H5AD works", { 25 | sce <- readH5AD(file) 26 | expect_s4_class(sce, "SingleCellExperiment") 27 | }) 28 | 29 | sce <- suppressWarnings(readH5AD(file)) 30 | 31 | test_that("SCE is valid", { 32 | validateH5ADSCE(sce, names, missing) 33 | }) 34 | 35 | test_that("Writing H5AD works", { 36 | writeH5AD(sce, outfile) 37 | expect_true(file.exists(outfile)) 38 | }) 39 | 40 | test_that("Round trip is as expected", { 41 | out <- readH5AD(outfile) 42 | expectSCE(out, sce) 43 | }) 44 | -------------------------------------------------------------------------------- /longtests/testthat/test-scanpy_trajectory.R: -------------------------------------------------------------------------------- 1 | library(SingleCellExperiment) 2 | library(BiocFileCache) 3 | 4 | cache <- BiocFileCache(ask = FALSE) 5 | file <- bfcrpath(cache, "https://figshare.com/ndownloader/files/30594477") 6 | outfile <- tempfile(fileext = ".h5ad") 7 | 8 | names <- list( 9 | assays = c("X"), 10 | colData = c("paul15_clusters", "n_counts_all", "louvain", "dpt_pseudotime"), 11 | rowData = c("n_counts", "mean", "std"), 12 | metadata = c("diffmap_evals", "draw_graph", "iroot", "louvain", 13 | "louvain_sizes", "neighbors", "paga", "pca"), 14 | redDim = c("X_diffmap", "X_draw_graph_fa", "X_pca"), 15 | varm = c("PCs"), 16 | colPairs = c("connectivities", "distances") 17 | ) 18 | 19 | missing <- list() 20 | 21 | test_that("Reading H5AD works", { 22 | sce <- readH5AD(file) 23 | expect_s4_class(sce, "SingleCellExperiment") 24 | }) 25 | 26 | sce <- suppressWarnings(readH5AD(file)) 27 | 28 | test_that("SCE is valid", { 29 | validateH5ADSCE(sce, names, missing) 30 | }) 31 | 32 | test_that("Writing H5AD works", { 33 | writeH5AD(sce, outfile) 34 | expect_true(file.exists(outfile)) 35 | }) 36 | 37 | test_that("Round trip is as expected", { 38 | out <- readH5AD(outfile) 39 | expectSCE(out, sce) 40 | }) 41 | -------------------------------------------------------------------------------- /longtests/testthat/test-scvelo_pancreas.R: -------------------------------------------------------------------------------- 1 | library(SingleCellExperiment) 2 | library(BiocFileCache) 3 | 4 | cache <- BiocFileCache(ask = FALSE) 5 | file <- bfcrpath(cache, "https://figshare.com/ndownloader/files/30595479") 6 | outfile <- tempfile(fileext = ".h5ad") 7 | 8 | names <- list( 9 | assays = c("X", "Ms", "Mu", "fit_t", "fit_tau", "fit_tau_", "spliced", 10 | "unspliced", "variance_velocity", "velocity", "velocity_u"), 11 | colData = c("clusters_coarse", "clusters", "S_score", "G2M_score", 12 | "initial_size_spliced", "initial_size_unspliced", 13 | "initial_size", "n_counts", "velocity_self_transition", "phase", 14 | "velocity_length", "velocity_confidence", 15 | "velocity_confidence_transition", "root_cells", "end_points", 16 | "velocity_pseudotime", "latent_time"), 17 | rowData = c("highly_variable_genes", "gene_count_corr", "means", 18 | "dispersions", "dispersions_norm", "highly_variable", 19 | "velocity_gamma", "velocity_qreg_ratio", "velocity_r2", 20 | "velocity_genes", "spearmans_score", "velocity_score", 21 | "fit_alpha", "fit_beta", "fit_gamma", "fit_t_", "fit_scaling", 22 | "fit_std_u", "fit_std_s", "fit_likelihood", "fit_u0", "fit_s0", 23 | "fit_pval_steady", "fit_steady_u", "fit_steady_s", 24 | "fit_variance", "fit_alignment_scaling", "fit_r2"), 25 | metadata = c("clusters_coarse_colors", "clusters_colors", "clusters_sizes", 26 | "day_colors", "neighbors", "paga", "pca", 27 | "rank_dynamical_genes", "rank_velocity_genes", 28 | "recover_dynamics", "velocity_graph", "velocity_graph_neg", 29 | "velocity_params"), 30 | redDim = c("X_pca", "X_umap", "velocity_umap"), 31 | varm = c("loss"), 32 | colPairs = c("connectivities", "distances") 33 | ) 34 | 35 | missing <- list() 36 | 37 | test_that("Reading H5AD works", { 38 | sce <- readH5AD(file) 39 | expect_s4_class(sce, "SingleCellExperiment") 40 | }) 41 | 42 | sce <- suppressWarnings(readH5AD(file)) 43 | 44 | test_that("SCE is valid", { 45 | validateH5ADSCE(sce, names, missing) 46 | }) 47 | 48 | test_that("Writing H5AD works", { 49 | writeH5AD(sce, outfile) 50 | expect_true(file.exists(outfile)) 51 | }) 52 | 53 | test_that("Round trip is as expected", { 54 | out <- readH5AD(outfile) 55 | expectSCE(out, sce) 56 | }) 57 | -------------------------------------------------------------------------------- /longtests/testthat/test-scvi_citeseq.R: -------------------------------------------------------------------------------- 1 | library(SingleCellExperiment) 2 | library(BiocFileCache) 3 | 4 | cache <- BiocFileCache(ask = FALSE) 5 | file <- bfcrpath(cache, "https://figshare.com/ndownloader/files/30612834") 6 | outfile <- tempfile(fileext = ".h5ad") 7 | 8 | names <- list( 9 | assays = c("X", "counts", "denoised_rna"), 10 | colData = c("n_genes", "percent_mito", "n_counts", "batch", "X_scvi_batch", 11 | "X_scvi_labels", "X_scvi_local_l_mean", "X_scvi_local_l_var", 12 | "leiden_totalVI"), 13 | rowData = c("highly_variable", "highly_variable_rank", "means", "variances", 14 | "variances_norm", "highly_variable_nbatches"), 15 | metadata = c("X_scvi", "hvg", "leiden", "neighbors", "umap"), 16 | redDim = c("X_totalVI", "X_umap", "denoised_protein", 17 | "protein_expression", "protein_foreground_prob"), 18 | colPairs = c("connectivities", "distances") 19 | ) 20 | 21 | missing <- list() 22 | 23 | test_that("Reading H5AD works", { 24 | sce <- expect_warning(readH5AD(file)) 25 | expect_s4_class(sce, "SingleCellExperiment") 26 | }) 27 | 28 | sce <- suppressWarnings(readH5AD(file)) 29 | 30 | test_that("SCE is valid", { 31 | validateH5ADSCE(sce, names, missing) 32 | }) 33 | 34 | test_that("Writing H5AD works", { 35 | writeH5AD(sce, outfile) 36 | expect_true(file.exists(outfile)) 37 | }) 38 | 39 | test_that("Round trip is as expected", { 40 | out <- suppressWarnings(readH5AD(outfile)) 41 | 42 | # For some reason "_scvi" gets changed to "X_scvi", not sure why... 43 | names(S4Vectors::metadata(sce))[1] <- "X_scvi" 44 | 45 | expectSCE(out, sce) 46 | }) 47 | -------------------------------------------------------------------------------- /longtests/testthat/test-squidpy_visium.R: -------------------------------------------------------------------------------- 1 | library(SingleCellExperiment) 2 | library(BiocFileCache) 3 | 4 | cache <- BiocFileCache(ask = FALSE) 5 | file <- bfcrpath(cache, "https://figshare.com/ndownloader/files/30639279") 6 | outfile <- tempfile(fileext = ".h5ad") 7 | 8 | names <- list( 9 | assays = c("X"), 10 | colData = c("in_tissue", "array_row", "array_col", "n_genes_by_counts", 11 | "log1p_n_genes_by_counts", "total_counts", "log1p_total_counts", 12 | "pct_counts_in_top_50_genes", "pct_counts_in_top_100_genes", 13 | "pct_counts_in_top_200_genes", "pct_counts_in_top_500_genes", 14 | "total_counts_MT", "log1p_total_counts_MT", "pct_counts_MT", 15 | "n_counts", "leiden", "cluster", "features_summary_cluster", 16 | "features_histogram_cluster", "features_texture_cluster"), 17 | rowData = c("gene_ids", "feature_types", "genome", "MT", 18 | "n_cells_by_counts", "mean_counts", "log1p_mean_counts", 19 | "pct_dropout_by_counts", "total_counts", "log1p_total_counts", 20 | "n_cells", "highly_variable", "highly_variable_rank", "means", 21 | "variances", "variances_norm"), 22 | metadata = c("cluster_co_occurrence", "cluster_colors", "cluster_ligrec", 23 | "cluster_nhood_enrichment", "hvg", "leiden", "leiden_colors", 24 | "moranI", "neighbors", "pca", "spatial", "spatial_neighbors", 25 | "umap"), 26 | redDim = c("X_pca", "X_umap", "features", "features_context", 27 | "features_lowres", "features_orig", "features_segmentation", 28 | "spatial"), 29 | varm = c("PCs"), 30 | colPairs = c("connectivities", "distances", "spatial_connectivities", 31 | "spatial_distances") 32 | ) 33 | 34 | missing <- list() 35 | 36 | test_that("Reading H5AD works", { 37 | sce <- readH5AD(file) 38 | expect_s4_class(sce, "SingleCellExperiment") 39 | }) 40 | 41 | sce <- suppressWarnings(readH5AD(file)) 42 | 43 | test_that("SCE is valid", { 44 | validateH5ADSCE(sce, names, missing) 45 | }) 46 | 47 | test_that("Writing H5AD works", { 48 | writeH5AD(sce, outfile) 49 | expect_true(file.exists(outfile)) 50 | }) 51 | 52 | test_that("Round trip is as expected", { 53 | out <- readH5AD(outfile) 54 | 55 | expectSCE(out, sce) 56 | }) 57 | -------------------------------------------------------------------------------- /man/AnnData-Conversion.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/AnnData2SCE.R, R/SCE2AnnData.R 3 | \name{AnnData-Conversion} 4 | \alias{AnnData-Conversion} 5 | \alias{AnnData2SCE} 6 | \alias{SCE2AnnData} 7 | \title{Convert between AnnData and SingleCellExperiment} 8 | \usage{ 9 | AnnData2SCE( 10 | adata, 11 | X_name = NULL, 12 | layers = TRUE, 13 | uns = TRUE, 14 | var = TRUE, 15 | obs = TRUE, 16 | varm = TRUE, 17 | obsm = TRUE, 18 | varp = TRUE, 19 | obsp = TRUE, 20 | raw = FALSE, 21 | skip_assays = FALSE, 22 | hdf5_backed = TRUE, 23 | verbose = NULL 24 | ) 25 | 26 | SCE2AnnData( 27 | sce, 28 | X_name = NULL, 29 | assays = TRUE, 30 | colData = TRUE, 31 | rowData = TRUE, 32 | varm = TRUE, 33 | reducedDims = TRUE, 34 | metadata = TRUE, 35 | colPairs = TRUE, 36 | rowPairs = TRUE, 37 | skip_assays = FALSE, 38 | verbose = NULL 39 | ) 40 | } 41 | \arguments{ 42 | \item{adata}{A \strong{reticulate} reference to a Python AnnData object.} 43 | 44 | \item{X_name}{For \code{SCE2AnnData()} name of the assay to use as the primary 45 | matrix (\code{X}) of the AnnData object. If \code{NULL}, the first assay of \code{sce} will 46 | be used by default. For \code{AnnData2SCE()} name used when saving \code{X} as an 47 | assay. If \code{NULL} looks for an \code{X_name} value in \code{uns}, otherwise uses \code{"X"}.} 48 | 49 | \item{layers, uns, var, obs, varm, obsm, varp, obsp, raw}{Arguments specifying how 50 | these slots are converted. If \code{TRUE} everything in that slot is converted, if 51 | \code{FALSE} nothing is converted and if a character vector only those items or 52 | columns are converted.} 53 | 54 | \item{skip_assays}{Logical scalar indicating whether to skip conversion of 55 | any assays in \code{sce} or \code{adata}, replacing them with empty sparse matrices 56 | instead.} 57 | 58 | \item{hdf5_backed}{Logical scalar indicating whether HDF5-backed matrices 59 | in \code{adata} should be represented as HDF5Array objects. This assumes that 60 | \code{adata} is created with \code{backed="r"}.} 61 | 62 | \item{verbose}{Logical scalar indicating whether to print progress messages. 63 | If \code{NULL} uses \code{getOption("zellkonverter.verbose")}.} 64 | 65 | \item{sce}{A \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment} 66 | object.} 67 | 68 | \item{assays, colData, rowData, reducedDims, metadata, colPairs, rowPairs}{Arguments specifying how these slots are converted. If \code{TRUE} everything in 69 | that slot is converted, if \code{FALSE} nothing is converted and if a character 70 | vector only those items or columns are converted.} 71 | } 72 | \value{ 73 | \code{AnnData2SCE()} will return a 74 | \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment} 75 | containing the equivalent data from \code{adata}. 76 | 77 | \code{SCE2AnnData()} will return a \strong{reticulate} reference to an AnnData object 78 | containing the content of \code{sce}. 79 | } 80 | \description{ 81 | Conversion between Python AnnData objects and 82 | \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment} 83 | objects. 84 | } 85 | \details{ 86 | \subsection{Python environment}{ 87 | 88 | These functions assume that an appropriate Python environment has already 89 | been loaded. As such, they are largely intended for developer use, most 90 | typically inside a \strong{basilisk} context. 91 | } 92 | 93 | \subsection{Conversion mapping}{ 94 | 95 | The conversion is not entirely lossless. The current mapping is shown below 96 | (also at \url{https://tinyurl.com/AnnData2SCE}): 97 | 98 | \if{html}{ 99 | \figure{AnnData2SCE.png}{options: width=800, alt="SCE-AnnData map"} 100 | } 101 | \if{latex}{\figure{AnnData2SCE.png}{options: width=5in}} 102 | } 103 | 104 | \subsection{Matrix conversion}{ 105 | 106 | In \code{SCE2AnnData()}, matrices are converted to a \strong{numpy}-friendly format. 107 | Sparse matrices are converted to 108 | \link[Matrix:dgCMatrix-class]{Matrix::dgCMatrix} objects while all 109 | other matrices are converted into ordinary matrices. If \code{skip_assays = TRUE}, 110 | empty sparse matrices are created instead and the user is expected to fill in 111 | the assays on the Python side. 112 | 113 | For \code{AnnData2SCE()}, a warning is raised if there is no corresponding R 114 | format for a matrix in the \code{AnnData} object, and an empty sparse matrix is 115 | created instead as a placeholder. If \code{skip_assays = NA}, no warning is 116 | emitted but variables are created in the 117 | \code{\link[SingleCellExperiment:internals]{int_metadata()}} of the output to 118 | specify which assays were skipped. 119 | 120 | If \code{skip_assays = TRUE}, empty sparse matrices are created for all assays, 121 | regardless of whether they might be convertible to an R format or not. 122 | In both cases, the user is expected to fill in the assays on the R side. 123 | } 124 | 125 | \subsection{\code{metadata}/\code{uns} conversion}{ 126 | 127 | We attempt to convert between items in the 128 | \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment} 129 | \code{\link[S4Vectors:Annotated-class]{metadata()}} slot and the \code{AnnData} \code{uns} slot. If 130 | an item cannot be converted a warning will be raised. 131 | } 132 | 133 | \subsection{\code{uns} conversion}{ 134 | 135 | Values stored in the \code{varm} slot of an \code{AnnData} object are stored in a 136 | column of \code{\link[SummarizedExperiment:SummarizedExperiment-class]{rowData()}} in a 137 | \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment} 138 | as a \link[S4Vectors:DataFrame-class]{S4Vectors::DataFrame-class} of matrices. 139 | If this column is present an attempt is made to transfer this information 140 | when converting from 141 | \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment} 142 | to \code{AnnData}. 143 | } 144 | 145 | \subsection{\code{SpatialExperiment} conversion}{ 146 | 147 | In \code{SCE2AnnData()}, if \code{sce} is a \link[SpatialExperiment:SpatialExperiment-class]{SpatialExperiment::SpatialExperiment} 148 | object, the spatial coordinates are added to the \code{reducedDims} slot before 149 | conversion to an \code{AnnData} object. 150 | } 151 | } 152 | \examples{ 153 | if (requireNamespace("scRNAseq", quietly = TRUE)) { 154 | library(basilisk) 155 | library(scRNAseq) 156 | seger <- SegerstolpePancreasData() 157 | 158 | # These functions are designed to be run inside 159 | # a specified Python environment 160 | roundtrip <- basiliskRun(fun = function(sce) { 161 | # Convert SCE to AnnData: 162 | adata <- zellkonverter::SCE2AnnData(sce) 163 | 164 | # Maybe do some work in Python on 'adata': 165 | # BLAH BLAH BLAH 166 | 167 | # Convert back to an SCE: 168 | zellkonverter::AnnData2SCE(adata) 169 | }, env = zellkonverterAnnDataEnv(), sce = seger) 170 | } 171 | } 172 | \seealso{ 173 | \code{\link[=writeH5AD]{writeH5AD()}} and \code{\link[=readH5AD]{readH5AD()}} for dealing directly with H5AD files. 174 | } 175 | \author{ 176 | Luke Zappia 177 | 178 | Aaron Lun 179 | } 180 | -------------------------------------------------------------------------------- /man/AnnData-Environment.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/basilisk.R 3 | \docType{data} 4 | \name{AnnData-Environment} 5 | \alias{AnnData-Environment} 6 | \alias{.AnnDataVersions} 7 | \alias{AnnDataDependencies} 8 | \alias{zellkonverterAnnDataEnv} 9 | \title{AnnData environment} 10 | \format{ 11 | For \code{.AnnDataVersions} a character vector containing allowed \strong{anndata} 12 | version strings. 13 | } 14 | \usage{ 15 | .AnnDataVersions 16 | 17 | AnnDataDependencies(version = .AnnDataVersions) 18 | 19 | zellkonverterAnnDataEnv(version = .AnnDataVersions) 20 | } 21 | \arguments{ 22 | \item{version}{A string giving the version of the \strong{anndata} Python library 23 | to use. Allowed values are available in \code{.AnnDataVersions}. By default the 24 | latest version is used.} 25 | } 26 | \value{ 27 | For \code{AnnDataDependencies} a character vector containing the pinned versions 28 | of all Python packages to be used by \code{zellkonverterAnnDataEnv()}. 29 | 30 | For \code{zellkonverterAnnDataEnv} a \code{\link[basilisk:BasiliskEnvironment-class]{basilisk::BasiliskEnvironment()}} containing 31 | \strong{zellkonverter}'s AnnData Python environment. 32 | } 33 | \description{ 34 | The Python environment used by \strong{zellkonverter} for interfacing with the 35 | \strong{anndata} Python library (and H5AD files) is described by the dependencies 36 | in returned by \code{AnnDataDependencies()}. The \code{zellkonverterAnnDataEnv()} 37 | functions returns the \code{\link[basilisk:BasiliskEnvironment-class]{basilisk::BasiliskEnvironment()}} containing these 38 | dependencies used by \strong{zellkonverter}. Allowed versions of \strong{anndata} are 39 | available in \code{.AnnDataVersions}. 40 | } 41 | \details{ 42 | \subsection{Using Python environments}{ 43 | 44 | When a \strong{zellkonverter} is first run a conda environment containing all of 45 | the necessary dependencies for that version with be instantiated. This will 46 | not be performed on any subsequent run or if any other \strong{zellkonverter} 47 | function has been run prior with the same environment version. 48 | 49 | By default the \strong{zellkonverter} conda environment will become the shared R 50 | Python environment if one does not already exist. When one does exist (for 51 | example when a \strong{zellkonverter} function has already been run using a 52 | a different environment version) then a separate environment will be used. 53 | See \code{\link[basilisk:basiliskOptions]{basilisk::setBasiliskShared()}} for more information on this behaviour. 54 | Note the when the environment is not shared progress messages are lost. 55 | } 56 | 57 | \subsection{Development}{ 58 | 59 | The \code{AnnDataDependencies()} function is exposed for use by other package 60 | developers who want an easy way to define the dependencies required for 61 | creating a Python environment to work with AnnData objects, most typically 62 | within a \strong{basilisk} context. For example, we can simply combine this 63 | vector with additional dependencies to create a \strong{basilisk} environment with 64 | Python package versions that are consistent with those in \strong{zellkonverter}. 65 | 66 | If you want to run code in the exact environment used by \strong{zellkonverter} 67 | this can be done using \code{zellkonverterAnnDataEnv()} in combination with 68 | \code{\link[basilisk:basiliskStart]{basilisk::basiliskStart()}} and/or \code{\link[basilisk:basiliskStart]{basilisk::basiliskRun()}}. Please refer to 69 | the \strong{basilisk} documentation for more information on using these 70 | environments. 71 | } 72 | } 73 | \examples{ 74 | .AnnDataVersions 75 | 76 | AnnDataDependencies() 77 | AnnDataDependencies(version = "0.7.6") 78 | 79 | cl <- basilisk::basiliskStart(zellkonverterAnnDataEnv()) 80 | anndata <- reticulate::import("anndata") 81 | basilisk::basiliskStop(cl) 82 | } 83 | \author{ 84 | Luke Zappia 85 | 86 | Aaron Lun 87 | } 88 | \keyword{datasets} 89 | -------------------------------------------------------------------------------- /man/expectSCE.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/validation.R 3 | \name{expectSCE} 4 | \alias{expectSCE} 5 | \title{Expect SCE} 6 | \usage{ 7 | expectSCE(sce, expected) 8 | } 9 | \arguments{ 10 | \item{sce}{A \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment} 11 | object.} 12 | 13 | \item{expected}{A template \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment} 14 | object to compare to.} 15 | } 16 | \value{ 17 | \code{TRUE} invisibly if checks pass 18 | } 19 | \description{ 20 | Test that a SingleCellExperiment matches an expected object. Designed to be 21 | used inside \code{testhat::test_that()} during package testing. 22 | } 23 | \author{ 24 | Luke Zappia 25 | } 26 | -------------------------------------------------------------------------------- /man/figures/AnnData2SCE.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/theislab/zellkonverter/c17a17220088ff880d512c392d5de4aacb9e9bb1/man/figures/AnnData2SCE.png -------------------------------------------------------------------------------- /man/figures/zellkonverter.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/theislab/zellkonverter/c17a17220088ff880d512c392d5de4aacb9e9bb1/man/figures/zellkonverter.png -------------------------------------------------------------------------------- /man/r-py-conversion.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/reticulate.R 3 | \name{r-py-conversion} 4 | \alias{r-py-conversion} 5 | \alias{py_to_r.numpy.ndarray} 6 | \title{Convert between Python and R objects} 7 | \usage{ 8 | \method{py_to_r}{numpy.ndarray}(x) 9 | } 10 | \arguments{ 11 | \item{x}{A Python object.} 12 | } 13 | \value{ 14 | An \R object, as converted from the Python object. 15 | } 16 | \description{ 17 | Convert between Python and R objects 18 | } 19 | \details{ 20 | These functions are extensions of the default conversion functions in the 21 | \code{reticulate} package for the following reasons: 22 | \itemize{ 23 | \item \code{numpy.ndarray} - Handle conversion of \strong{numpy} recarrays 24 | \item \code{pandas.core.arrays.masked.BaseMaskedArray} - Handle conversion of 25 | \strong{pandas} arrays (used when by \code{AnnData} objects when there are missing 26 | values) 27 | \item \code{pandas.core.arrays.categorical.Categorical} - Handle conversion of 28 | \strong{pandas} categorical arrays 29 | } 30 | } 31 | \seealso{ 32 | \code{\link[reticulate:r-py-conversion]{reticulate::py_to_r()}} for the base \code{reticulate} functions 33 | } 34 | \author{ 35 | Luke Zappia 36 | } 37 | -------------------------------------------------------------------------------- /man/readH5AD.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/read.R 3 | \name{readH5AD} 4 | \alias{readH5AD} 5 | \title{Read H5AD} 6 | \usage{ 7 | readH5AD( 8 | file, 9 | X_name = NULL, 10 | use_hdf5 = FALSE, 11 | reader = c("python", "R"), 12 | version = NULL, 13 | verbose = NULL, 14 | ... 15 | ) 16 | } 17 | \arguments{ 18 | \item{file}{String containing a path to a \code{.h5ad} file.} 19 | 20 | \item{X_name}{Name used when saving \code{X} as an assay. If \code{NULL} looks for an 21 | \code{X_name} value in \code{uns}, otherwise uses \code{"X"}.} 22 | 23 | \item{use_hdf5}{Logical scalar indicating whether assays should be 24 | loaded as HDF5-based matrices from the \strong{HDF5Array} package.} 25 | 26 | \item{reader}{Which HDF5 reader to use. Either \code{"python"} for reading with 27 | the \strong{anndata} Python package via \strong{reticulate} or \code{"R"} for 28 | \strong{zellkonverter}'s native R reader.} 29 | 30 | \item{version}{A string giving the version of the \strong{anndata} Python library 31 | to use. Allowed values are available in \code{.AnnDataVersions}. By default the 32 | latest version is used.} 33 | 34 | \item{verbose}{Logical scalar indicating whether to print progress messages. 35 | If \code{NULL} uses \code{getOption("zellkonverter.verbose")}.} 36 | 37 | \item{...}{ 38 | Arguments passed on to \code{\link[=AnnData2SCE]{AnnData2SCE}} 39 | \describe{ 40 | \item{\code{layers,uns,var,obs,varm,obsm,varp,obsp,raw}}{Arguments specifying how 41 | these slots are converted. If \code{TRUE} everything in that slot is converted, if 42 | \code{FALSE} nothing is converted and if a character vector only those items or 43 | columns are converted.} 44 | \item{\code{skip_assays}}{Logical scalar indicating whether to skip conversion of 45 | any assays in \code{sce} or \code{adata}, replacing them with empty sparse matrices 46 | instead.} 47 | }} 48 | } 49 | \value{ 50 | A \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment} 51 | object is returned. 52 | } 53 | \description{ 54 | Reads a H5AD file and returns a 55 | \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment} 56 | object. 57 | } 58 | \details{ 59 | Setting \code{use_hdf5 = TRUE} allows for very large datasets to be efficiently 60 | represented on machines with little memory. However, this comes at the cost 61 | of access speed as data needs to be fetched from the HDF5 file upon request. 62 | 63 | Setting \code{reader = "R"} will use an experimental native R reader instead of 64 | reading the file into Python and converting the result. This avoids the need 65 | for a Python environment and some of the issues with conversion but is still 66 | under development and is likely to return slightly different output. 67 | 68 | See \link{AnnData-Environment} for more details on \strong{zellkonverter} Python 69 | environments. 70 | } 71 | \examples{ 72 | library(SummarizedExperiment) 73 | 74 | file <- system.file("extdata", "krumsiek11.h5ad", package = "zellkonverter") 75 | sce <- readH5AD(file) 76 | class(assay(sce)) 77 | 78 | sce2 <- readH5AD(file, use_hdf5 = TRUE) 79 | class(assay(sce2)) 80 | 81 | sce3 <- readH5AD(file, reader = "R") 82 | } 83 | \seealso{ 84 | \code{\link[=writeH5AD]{writeH5AD()}}, to write a 85 | \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment} 86 | object to a H5AD file. 87 | 88 | \code{\link[=AnnData2SCE]{AnnData2SCE()}}, for developers to convert existing AnnData instances to a 89 | \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment}. 90 | } 91 | \author{ 92 | Luke Zappia 93 | 94 | Aaron Lun 95 | } 96 | -------------------------------------------------------------------------------- /man/setZellkonverterVerbose.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ui.R 3 | \name{setZellkonverterVerbose} 4 | \alias{setZellkonverterVerbose} 5 | \title{Set zellkonverter verbose} 6 | \usage{ 7 | setZellkonverterVerbose(verbose = TRUE) 8 | } 9 | \arguments{ 10 | \item{verbose}{Logical value for the verbosity option.} 11 | } 12 | \value{ 13 | The value of getOption("zellkonverter.verbose") invisibly 14 | } 15 | \description{ 16 | Set the zellkonverter verbosity option 17 | } 18 | \details{ 19 | Running \code{setZellkonverterVerbose(TRUE)} will turn on \strong{zellkonverter} 20 | progress messages by default without having to set \code{verbose = TRUE} in each 21 | function call. This is done by setting the \code{"zellkonverter.verbose"} option. 22 | Running \code{setZellkonverterVerbose(FALSE)} will turn default verbosity off. 23 | } 24 | \examples{ 25 | current <- getOption("zellkonverter.verbose") 26 | setZellkonverterVerbose(TRUE) 27 | getOption("zellkonverter.verbose") 28 | setZellkonverterVerbose(FALSE) 29 | getOption("zellkonverter.verbose") 30 | setZellkonverterVerbose(current) 31 | getOption("zellkonverter.verbose") 32 | } 33 | -------------------------------------------------------------------------------- /man/validateH5ADSCE.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/validation.R 3 | \name{validateH5ADSCE} 4 | \alias{validateH5ADSCE} 5 | \title{Validate H5AD SCE} 6 | \usage{ 7 | validateH5ADSCE(sce, names, missing) 8 | } 9 | \arguments{ 10 | \item{sce}{A \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment} 11 | object.} 12 | 13 | \item{names}{Named list of expected names. Names are slots and values are 14 | vectors of names that are expected to exist in that slot.} 15 | 16 | \item{missing}{Named list of known missing names. Names are slots and values 17 | are vectors of names that are expected to not exist in that slot.} 18 | } 19 | \value{ 20 | If checks are successful \code{TRUE} invisibly, if not other output 21 | depending on the context 22 | } 23 | \description{ 24 | Validate a SingleCellExperiment created by \code{readH5AD()}. Designed to be used 25 | inside \code{testhat::test_that()} during package testing. 26 | } 27 | \details{ 28 | This function checks that a SingleCellExperiment contains the expected items 29 | in each slot. The main reason for this function is avoid repeating code when 30 | testing multiple \code{.h5ad} files. The following items in \code{names} and \code{missing} 31 | are recognised: 32 | \itemize{ 33 | \item \code{assays} - Assay names 34 | \item \code{colData} - colData column names 35 | \item \code{rowData} - rowData column names 36 | \item \code{metadata} - metadata names 37 | \item \code{redDim} - Reduced dimension names 38 | \item \code{varm} - Column names of the \code{varm} rowData column (from the AnnData varm 39 | slot) 40 | \item \code{colPairs} - Column pair names 41 | \item \code{rowPairs} - rowData pair names 42 | \item \code{raw_rowData} - rowData columns names in the \code{raw} altExp 43 | \item \code{raw_varm} - Column names of the raw \code{varm} rowData column (from the 44 | AnnData varm slot) 45 | } 46 | 47 | If an item in \code{names} or \code{missing} is \code{NULL} then it won't be checked. The 48 | items in \code{missing} are checked that they explicitly do not exist. This is 49 | mostly for record keeping when something is known to not be converted but can 50 | also be useful when the corresponding \code{names} item is \code{NULL}. 51 | } 52 | \author{ 53 | Luke Zappia 54 | } 55 | -------------------------------------------------------------------------------- /man/writeH5AD.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/write.R 3 | \name{writeH5AD} 4 | \alias{writeH5AD} 5 | \title{Write H5AD} 6 | \usage{ 7 | writeH5AD( 8 | sce, 9 | file, 10 | X_name = NULL, 11 | skip_assays = FALSE, 12 | compression = c("none", "gzip", "lzf"), 13 | version = NULL, 14 | verbose = NULL, 15 | ... 16 | ) 17 | } 18 | \arguments{ 19 | \item{sce}{A \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment} 20 | object.} 21 | 22 | \item{file}{String containing a path to write the new \code{.h5ad} file.} 23 | 24 | \item{X_name}{Name of the assay to use as the primary matrix (\code{X}) of the 25 | AnnData object. If \code{NULL}, the first assay of \code{sce} will be used by default.} 26 | 27 | \item{skip_assays}{Logical scalar indicating whether assay matrices should 28 | be ignored when writing to \code{file}.} 29 | 30 | \item{compression}{Type of compression when writing the new \code{.h5ad} file.} 31 | 32 | \item{version}{A string giving the version of the \strong{anndata} Python library 33 | to use. Allowed values are available in \code{.AnnDataVersions}. By default the 34 | latest version is used.} 35 | 36 | \item{verbose}{Logical scalar indicating whether to print progress messages. 37 | If \code{NULL} uses \code{getOption("zellkonverter.verbose")}.} 38 | 39 | \item{...}{ 40 | Arguments passed on to \code{\link[=SCE2AnnData]{SCE2AnnData}} 41 | \describe{ 42 | \item{\code{assays,colData,rowData,reducedDims,metadata,colPairs,rowPairs}}{Arguments specifying how these slots are converted. If \code{TRUE} everything in 43 | that slot is converted, if \code{FALSE} nothing is converted and if a character 44 | vector only those items or columns are converted.} 45 | }} 46 | } 47 | \value{ 48 | A \code{NULL} is invisibly returned. 49 | } 50 | \description{ 51 | Write a H5AD file from a 52 | \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment} 53 | object. 54 | } 55 | \details{ 56 | \subsection{Skipping assays}{ 57 | 58 | Setting \code{skip_assays = TRUE} can occasionally be useful if the matrices in 59 | \code{sce} are stored in a format that is not amenable for efficient conversion 60 | to a \strong{numpy}-compatible format. In such cases, it can be better to create 61 | an empty placeholder dataset in \code{file} and fill it in R afterwards. 62 | } 63 | 64 | \subsection{\strong{DelayedArray} assays}{ 65 | 66 | If \code{sce} contains any \strong{DelayedArray} matrices as assays \code{writeH5AD()} will 67 | write them to disk using the \strong{rhdf5} package directly rather than via 68 | Python to avoid instantiating them in memory. However there is currently 69 | an issue which prevents this being done for sparse \strong{DelayedArray} matrices. 70 | } 71 | 72 | \subsection{Known conversion issues}{ 73 | \subsection{Coercion to factors}{ 74 | 75 | The \strong{anndata} package automatically converts some character vectors to 76 | factors when saving \code{.h5ad} files. This can effect columns of \code{rowData(sce)} 77 | and \code{colData(sce)} which may change type when the \code{.h5ad} file is read back 78 | into R. 79 | } 80 | 81 | } 82 | 83 | \subsection{Environment}{ 84 | 85 | See \link{AnnData-Environment} for more details on \strong{zellkonverter} Python 86 | environments. 87 | } 88 | } 89 | \examples{ 90 | # Using the Zeisel brain dataset 91 | if (requireNamespace("scRNAseq", quietly = TRUE)) { 92 | library(scRNAseq) 93 | sce <- ZeiselBrainData() 94 | 95 | # Writing to a H5AD file 96 | temp <- tempfile(fileext = ".h5ad") 97 | writeH5AD(sce, temp) 98 | } 99 | } 100 | \seealso{ 101 | \code{\link[=readH5AD]{readH5AD()}}, to read a 102 | \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment} 103 | object from a H5AD file. 104 | 105 | \code{\link[=SCE2AnnData]{SCE2AnnData()}}, for developers to create an AnnData object from a 106 | \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment}. 107 | } 108 | \author{ 109 | Luke Zappia 110 | 111 | Aaron Lun 112 | } 113 | -------------------------------------------------------------------------------- /man/zellkonverter-package.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/zellkonverter-package.R 3 | \docType{package} 4 | \name{zellkonverter-package} 5 | \alias{zellkonverter} 6 | \alias{zellkonverter-package} 7 | \title{zellkonverter: Conversion Between scRNA-seq Objects} 8 | \description{ 9 | Provides methods to convert between Python AnnData objects and SingleCellExperiment objects. These are primarily intended for use by downstream Bioconductor packages that wrap Python methods for single-cell data analysis. It also includes functions to read and write H5AD files used for saving AnnData objects to disk. 10 | } 11 | \seealso{ 12 | Useful links: 13 | \itemize{ 14 | \item \url{https://github.com/theislab/zellkonverter} 15 | \item Report bugs at \url{https://github.com/theislab/zellkonverter/issues} 16 | } 17 | 18 | } 19 | \author{ 20 | \strong{Maintainer}: Luke Zappia \email{luke@lazappi.id.au} (\href{https://orcid.org/0000-0001-7744-8565}{ORCID}) 21 | 22 | Authors: 23 | \itemize{ 24 | \item Aaron Lun \email{infinite.monkeys.with.keyboards@gmail.com} (\href{https://orcid.org/0000-0002-3564-4813}{ORCID}) 25 | } 26 | 27 | Other contributors: 28 | \itemize{ 29 | \item Jack Kamm \email{jackkamm@gmail.com} (\href{https://orcid.org/0000-0003-2412-756X}{ORCID}) [contributor] 30 | \item Robrecht Cannoodt \email{rcannood@gmail.com} (\href{https://orcid.org/0000-0003-3641-729X}{ORCID}) (rcannood) [contributor] 31 | \item Gabriel Hoffman \email{gabriel.hoffman@mssm.edu} (\href{https://orcid.org/0000-0002-0957-0224}{ORCID}) (GabrielHoffman) [contributor] 32 | \item Marek Cmero \email{cmero.ma@wehi.edu.au} (\href{https://orcid.org/0000-0001-7783-5530}{ORCID}) (mcmero) [contributor] 33 | } 34 | 35 | } 36 | -------------------------------------------------------------------------------- /tests/spelling.R: -------------------------------------------------------------------------------- 1 | if (requireNamespace("spelling", quietly = TRUE)) { 2 | spelling::spell_check_test( 3 | vignettes = TRUE, 4 | error = FALSE, 5 | skip_on_cran = TRUE 6 | ) 7 | } 8 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(zellkonverter) 3 | 4 | test_check("zellkonverter") 5 | -------------------------------------------------------------------------------- /tests/testthat/default.profraw: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/theislab/zellkonverter/c17a17220088ff880d512c392d5de4aacb9e9bb1/tests/testthat/default.profraw -------------------------------------------------------------------------------- /tests/testthat/test-SCE2AnnData.R: -------------------------------------------------------------------------------- 1 | test_that(".makeNumpyFriendly() works correctly", { 2 | mat <- matrix(1:50, nrow = 10, ncol = 5) 3 | 4 | friendly_mat <- .makeNumpyFriendly(mat, transpose = TRUE) 5 | expect_identical(friendly_mat, t(mat)) 6 | expect_identical(dim(friendly_mat), rev(dim(mat))) 7 | 8 | friendly_mat <- .makeNumpyFriendly(mat, transpose = FALSE) 9 | expect_identical(friendly_mat, mat) 10 | expect_identical(dim(friendly_mat), dim(mat)) 11 | 12 | sparse_mat <- Matrix::Matrix(mat, sparse = TRUE) 13 | friendly_sparse_mat <- .makeNumpyFriendly(sparse_mat, transpose = TRUE) 14 | expect_s4_class(friendly_sparse_mat, "dgRMatrix") 15 | expect_identical(dim(friendly_sparse_mat), rev(dim(sparse_mat))) 16 | 17 | friendly_sparse_mat <- .makeNumpyFriendly(sparse_mat, transpose = FALSE) 18 | expect_s4_class(friendly_sparse_mat, "dgCMatrix") 19 | expect_identical(dim(friendly_sparse_mat), dim(sparse_mat)) 20 | 21 | delayed_mat <- DelayedArray::DelayedArray(mat) 22 | friendly_delayed_mat <- .makeNumpyFriendly(delayed_mat, transpose = TRUE) 23 | expect_identical(friendly_delayed_mat, t(mat)) 24 | expect_identical(dim(friendly_delayed_mat), rev(dim(mat))) 25 | 26 | friendly_delayed_mat <- .makeNumpyFriendly(delayed_mat, transpose = FALSE) 27 | expect_identical(friendly_delayed_mat, mat) 28 | expect_identical(dim(friendly_delayed_mat), dim(mat)) 29 | 30 | sparse_delayed_mat <- DelayedArray::DelayedArray(sparse_mat) 31 | friendly_sparse_delayed_mat <- .makeNumpyFriendly(sparse_delayed_mat, transpose = TRUE) 32 | expect_s4_class(friendly_sparse_delayed_mat, "dgRMatrix") 33 | expect_identical(dim(friendly_sparse_delayed_mat), rev(dim(sparse_delayed_mat))) 34 | 35 | friendly_sparse_delayed_mat <- .makeNumpyFriendly(sparse_delayed_mat, transpose = FALSE) 36 | expect_s4_class(friendly_sparse_delayed_mat, "dgCMatrix") 37 | expect_identical(dim(friendly_sparse_delayed_mat), dim(sparse_delayed_mat)) 38 | }) 39 | -------------------------------------------------------------------------------- /tests/testthat/test-read.R: -------------------------------------------------------------------------------- 1 | # This tests the readH5AD function (and by implication, SCE2AnnData). 2 | library(SummarizedExperiment) 3 | file <- system.file("extdata", "krumsiek11.h5ad", package = "zellkonverter") 4 | file_example <- system.file("extdata", "example_anndata.h5ad", package = "zellkonverter") 5 | file_v08 <- system.file("extdata", "krumsiek11_augmented_v0-8.h5ad", package = "zellkonverter") 6 | 7 | test_that("Reading H5AD works", { 8 | sce <- readH5AD(file) 9 | expect_s4_class(sce, "SingleCellExperiment") 10 | 11 | expect_identical(assayNames(sce), "X") 12 | expect_identical(colnames(colData(sce)), "cell_type") 13 | }) 14 | 15 | test_that("Reading example H5AD works", { 16 | names <- list( 17 | assays = c("X", "counts"), 18 | colData = "louvain", 19 | rowData = c( 20 | "n_counts", "highly_variable", "means", "dispersions", 21 | "dispersions_norm" 22 | ), 23 | metadata = c("louvain", "neighbors", "pca", "rank_genes_groups", "umap"), 24 | redDim = c("X_pca", "X_umap"), 25 | varm = "PCs", 26 | colPairs = c("connectivities", "distances") 27 | ) 28 | missing <- list() 29 | 30 | sce <- expect_silent(readH5AD(file_example)) 31 | expect_s4_class(sce, "SingleCellExperiment") 32 | 33 | validateH5ADSCE(sce, names, missing) 34 | }) 35 | 36 | test_that("Reading H5AD works with version 0.10.9", { 37 | sce <- readH5AD(file, version = "0.10.9") 38 | expect_s4_class(sce, "SingleCellExperiment") 39 | 40 | expect_identical(assayNames(sce), "X") 41 | expect_identical(colnames(colData(sce)), "cell_type") 42 | }) 43 | 44 | test_that("Reading H5AD works with version 0.10.6", { 45 | sce <- readH5AD(file, version = "0.10.6") 46 | expect_s4_class(sce, "SingleCellExperiment") 47 | 48 | expect_identical(assayNames(sce), "X") 49 | expect_identical(colnames(colData(sce)), "cell_type") 50 | }) 51 | 52 | test_that("Reading H5AD works with version 0.10.2", { 53 | sce <- readH5AD(file, version = "0.10.2") 54 | expect_s4_class(sce, "SingleCellExperiment") 55 | 56 | expect_identical(assayNames(sce), "X") 57 | expect_identical(colnames(colData(sce)), "cell_type") 58 | }) 59 | 60 | test_that("Reading H5AD works with version 0.9.2", { 61 | sce <- readH5AD(file, version = "0.9.2") 62 | expect_s4_class(sce, "SingleCellExperiment") 63 | 64 | expect_identical(assayNames(sce), "X") 65 | expect_identical(colnames(colData(sce)), "cell_type") 66 | }) 67 | 68 | test_that("Reading H5AD works with version 0.8.0", { 69 | sce <- readH5AD(file, version = "0.8.0") 70 | expect_s4_class(sce, "SingleCellExperiment") 71 | 72 | expect_identical(assayNames(sce), "X") 73 | expect_identical(colnames(colData(sce)), "cell_type") 74 | }) 75 | 76 | test_that("Reading H5AD works with version 0.7.6", { 77 | # Python 3.7 is not available for aarch64 78 | skip_on_os("mac", arch = "aarch64") 79 | skip_on_os("linux", arch = "aarch64") 80 | 81 | sce <- readH5AD(file, version = "0.7.6") 82 | expect_s4_class(sce, "SingleCellExperiment") 83 | 84 | expect_identical(assayNames(sce), "X") 85 | expect_identical(colnames(colData(sce)), "cell_type") 86 | }) 87 | 88 | test_that("Reading H5AD works with verbose=TRUE", { 89 | sce <- readH5AD(file, verbose = TRUE) 90 | expect_s4_class(sce, "SingleCellExperiment") 91 | 92 | expect_identical(assayNames(sce), "X") 93 | expect_identical(colnames(colData(sce)), "cell_type") 94 | }) 95 | 96 | test_that("Reading H5AD works with HDF5Arrays", { 97 | sce <- readH5AD(file, use_hdf5 = TRUE) 98 | expect_s4_class(sce, "SingleCellExperiment") 99 | expect_s4_class(DelayedArray::seed(assay(sce)), "HDF5ArraySeed") 100 | 101 | ref <- readH5AD(file) 102 | expect_identical(as.matrix(assay(ref)), as.matrix(assay(sce))) 103 | 104 | # Properly sleeps to wait for the process to shut down. 105 | expect_s4_class( 106 | sce <- readH5AD(file, use_hdf5 = TRUE), 107 | "SingleCellExperiment" 108 | ) 109 | }) 110 | 111 | test_that("Reading H5AD works with a mixture of sparse and HDF5Arrays", { 112 | sce <- readH5AD(file) 113 | assay(sce, "more") <- as(assay(sce, "X"), "CsparseMatrix") 114 | 115 | temp <- tempfile(fileext = ".h5ad") 116 | writeH5AD(sce, temp) 117 | 118 | backed <- readH5AD(temp, use_hdf5 = TRUE) 119 | expect_s4_class(DelayedArray::seed(assay(backed)), "HDF5ArraySeed") 120 | expect_s4_class(assay(backed, "more"), "CsparseMatrix") 121 | }) 122 | 123 | test_that("readH5AD works in a separate process", { 124 | oldshare <- basilisk::getBasiliskShared() 125 | basilisk::setBasiliskShared(FALSE) 126 | oldfork <- basilisk::getBasiliskFork() 127 | basilisk::setBasiliskFork(FALSE) 128 | 129 | sce <- readH5AD(file) 130 | expect_s4_class(sce, "SingleCellExperiment") 131 | 132 | basilisk::setBasiliskShared(oldshare) 133 | basilisk::setBasiliskFork(oldfork) 134 | }) 135 | 136 | test_that("Reading H5AD works with native reader", { 137 | sce <- readH5AD(file, reader = "R") 138 | expect_s4_class(sce, "SingleCellExperiment") 139 | 140 | expect_identical(assayNames(sce), "X") 141 | expect_identical(colnames(colData(sce)), "cell_type") 142 | }) 143 | 144 | test_that("Reading v0.8 H5AD works with native reader", { 145 | sce_py <- readH5AD(file_v08) 146 | sce_r <- readH5AD(file_v08, reader = "R") 147 | 148 | expect_identical(rownames(sce_py), rownames(sce_r)) 149 | expect_identical(colnames(sce_py), colnames(sce_r)) 150 | 151 | expect_identical(rowData(sce_py), rowData(sce_r)) 152 | 153 | expect_identical(colnames(colData(sce_py)), colnames(colData(sce_r))) 154 | expect_equal(colData(sce_py), colData(sce_r)) 155 | 156 | # check the X assay 157 | expect_identical(assays(sce_py), assays(sce_r)) 158 | 159 | # check the easy metadata columns 160 | for (key in c( 161 | "dummy_category", "dummy_int", "dummy_int2", "highlight", 162 | "iroot" 163 | )) { 164 | expect_equal(metadata(sce_py)[[key]], metadata(sce_r)[[key]]) 165 | } 166 | 167 | # For these columns the Python reader reads an array 168 | for (key in c("dummy_bool", "dummy_bool2")) { 169 | expect_equal(as.vector(metadata(sce_py)[[key]]), metadata(sce_r)[[key]]) 170 | } 171 | }) 172 | 173 | test_that("Skipping slot conversion works", { 174 | sce <- readH5AD(file, 175 | layers = FALSE, uns = FALSE, var = FALSE, obs = FALSE, 176 | varm = FALSE, obsm = FALSE, varp = FALSE, obsp = FALSE 177 | ) 178 | 179 | expect_identical(assayNames(sce), "X") 180 | expect_identical(metadata(sce), list()) 181 | expect_equal(ncol(rowData(sce)), 0) 182 | expect_equal(ncol(colData(sce)), 0) 183 | expect_equal(length(reducedDims(sce)), 0) 184 | expect_equal(length(rowPairs(sce)), 0) 185 | expect_equal(length(colPairs(sce)), 0) 186 | }) 187 | 188 | test_that("Selective slot conversion works", { 189 | sce <- readH5AD(file, uns = "iroot") 190 | 191 | expect_identical(names(metadata(sce)), "iroot") 192 | }) 193 | 194 | test_that("Selective DF conversion works", { 195 | sce <- readH5AD(file, obs = "cell_type") 196 | 197 | expect_identical(names(colData(sce)), "cell_type") 198 | }) 199 | 200 | test_that("Conversion of raw works", { 201 | skip_if_offline() 202 | 203 | cache <- BiocFileCache::BiocFileCache(ask = FALSE) 204 | example_file <- BiocFileCache::bfcrpath( 205 | cache, "https://ndownloader.figshare.com/files/30462915" 206 | ) 207 | 208 | sce <- readH5AD(example_file, raw = TRUE) 209 | 210 | names <- list( 211 | assays = c("X"), 212 | colData = c( 213 | "n_genes", "n_genes_by_counts", "total_counts", 214 | "total_counts_mt", "pct_counts_mt", "leiden" 215 | ), 216 | rowData = c( 217 | "gene_ids", "n_cells", "mt", "n_cells_by_counts", 218 | "mean_counts", "pct_dropout_by_counts", "total_counts", 219 | "highly_variable", "means", "dispersions", 220 | "dispersions_norm", "mean", "std" 221 | ), 222 | metadata = c( 223 | "hvg", "leiden", "neighbors", "pca", "rank_genes_groups", 224 | "umap" 225 | ), 226 | redDim = c("X_pca", "X_umap"), 227 | varm = c("PCs"), 228 | colPairs = c("connectivities", "distances"), 229 | raw_rowData = c( 230 | "gene_ids", "n_cells", "mt", "n_cells_by_counts", 231 | "mean_counts", "pct_dropout_by_counts", "total_counts", 232 | "highly_variable", "means", "dispersions", 233 | "dispersions_norm" 234 | ) 235 | ) 236 | 237 | missing <- list() 238 | 239 | validateH5ADSCE(sce, names, missing) 240 | }) 241 | 242 | test_that("Conversion of raw works with use_hdf5 = TRUE", { 243 | skip_if_offline() 244 | 245 | cache <- BiocFileCache::BiocFileCache(ask = FALSE) 246 | example_file <- BiocFileCache::bfcrpath( 247 | cache, "https://ndownloader.figshare.com/files/30462915" 248 | ) 249 | 250 | sce <- readH5AD(example_file, raw = TRUE, use_hdf5 = TRUE) 251 | 252 | names <- list( 253 | assays = c("X"), 254 | colData = c( 255 | "n_genes", "n_genes_by_counts", "total_counts", 256 | "total_counts_mt", "pct_counts_mt", "leiden" 257 | ), 258 | rowData = c( 259 | "gene_ids", "n_cells", "mt", "n_cells_by_counts", 260 | "mean_counts", "pct_dropout_by_counts", "total_counts", 261 | "highly_variable", "means", "dispersions", 262 | "dispersions_norm", "mean", "std" 263 | ), 264 | metadata = c( 265 | "hvg", "leiden", "neighbors", "pca", "rank_genes_groups", 266 | "umap" 267 | ), 268 | redDim = c("X_pca", "X_umap"), 269 | varm = c("PCs"), 270 | colPairs = c("connectivities", "distances"), 271 | raw_rowData = c( 272 | "gene_ids", "n_cells", "mt", "n_cells_by_counts", 273 | "mean_counts", "pct_dropout_by_counts", "total_counts", 274 | "highly_variable", "means", "dispersions", 275 | "dispersions_norm" 276 | ) 277 | ) 278 | 279 | missing <- list() 280 | 281 | validateH5ADSCE(sce, names, missing) 282 | }) 283 | -------------------------------------------------------------------------------- /tests/testthat/test-validation.R: -------------------------------------------------------------------------------- 1 | file <- system.file("extdata", "example_anndata.h5ad", 2 | package = "zellkonverter" 3 | ) 4 | sce <- readH5AD(file) 5 | 6 | names <- list( 7 | assays = c("X", "counts"), 8 | colData = "louvain", 9 | rowData = c( 10 | "n_counts", "highly_variable", "means", "dispersions", 11 | "dispersions_norm" 12 | ), 13 | metadata = c("louvain", "neighbors", "pca", "rank_genes_groups", "umap"), 14 | redDim = c("X_pca", "X_umap"), 15 | varm = "PCs", 16 | colPairs = c("connectivities", "distances") 17 | ) 18 | 19 | missing <- list() 20 | 21 | test_that("validateH5ADSCE works", { 22 | validateH5ADSCE(sce, names, missing) 23 | expect_error( 24 | validateH5ADSCE(sce, names, list(varm = "PCs")), 25 | "varm names missing is not TRUE" 26 | ) 27 | }) 28 | 29 | test_that("expectSCE works", { 30 | expectSCE(sce, sce) 31 | }) 32 | -------------------------------------------------------------------------------- /tests/testthat/test-write.R: -------------------------------------------------------------------------------- 1 | # This tests the writeH5AD function (and by implication, AnnData2SCE). 2 | library(scRNAseq) 3 | 4 | sce <- ZeiselBrainData() 5 | reducedDim(sce, "WHEE") <- matrix(runif(ncol(sce) * 10), ncol = 10) 6 | 7 | test_that("writeH5AD works as expected", { 8 | temp <- tempfile(fileext = ".h5ad") 9 | writeH5AD(sce, temp) 10 | expect_true(file.exists(temp)) 11 | 12 | # Reading it back out again. Hopefully we didn't lose anything important. 13 | out <- readH5AD(temp) 14 | 15 | expect_identical(dimnames(out), dimnames(sce)) 16 | expect_equal(assay(out), assay(sce)) 17 | expect_identical(reducedDims(out), reducedDims(sce)) 18 | 19 | # Need to coerce the factors back to strings. 20 | row_data <- rowData(out) 21 | for (i in seq_len(ncol(row_data))) { 22 | if (is.factor(row_data[[i]])) { 23 | row_data[[i]] <- as.character(row_data[[i]]) 24 | } 25 | } 26 | expect_identical(row_data, rowData(sce)) 27 | 28 | col_data <- colData(out) 29 | for (i in seq_len(ncol(col_data))) { 30 | if (is.factor(col_data[[i]])) { 31 | col_data[[i]] <- as.character(col_data[[i]]) 32 | } 33 | } 34 | names(col_data) <- names(colData(sce)) 35 | expect_identical(col_data, colData(sce)) 36 | }) 37 | 38 | test_that("writeH5AD works as expected with version 0.10.9", { 39 | temp <- tempfile(fileext = ".h5ad") 40 | writeH5AD(sce, temp, version = "0.10.9") 41 | expect_true(file.exists(temp)) 42 | 43 | # Reading it back out again. Hopefully we didn't lose anything important. 44 | out <- readH5AD(temp, version = "0.10.9") 45 | 46 | expect_identical(dimnames(out), dimnames(sce)) 47 | expect_equal(assay(out), assay(sce)) 48 | expect_identical(reducedDims(out), reducedDims(sce)) 49 | 50 | # Need to coerce the factors back to strings. 51 | row_data <- rowData(out) 52 | for (i in seq_len(ncol(row_data))) { 53 | if (is.factor(row_data[[i]])) { 54 | row_data[[i]] <- as.character(row_data[[i]]) 55 | } 56 | } 57 | expect_identical(row_data, rowData(sce)) 58 | 59 | col_data <- colData(out) 60 | for (i in seq_len(ncol(col_data))) { 61 | if (is.factor(col_data[[i]])) { 62 | col_data[[i]] <- as.character(col_data[[i]]) 63 | } 64 | } 65 | names(col_data) <- names(colData(sce)) 66 | expect_identical(col_data, colData(sce)) 67 | }) 68 | 69 | test_that("writeH5AD works as expected with version 0.10.6", { 70 | temp <- tempfile(fileext = ".h5ad") 71 | writeH5AD(sce, temp, version = "0.10.6") 72 | expect_true(file.exists(temp)) 73 | 74 | # Reading it back out again. Hopefully we didn't lose anything important. 75 | out <- readH5AD(temp, version = "0.10.6") 76 | 77 | expect_identical(dimnames(out), dimnames(sce)) 78 | expect_equal(assay(out), assay(sce)) 79 | expect_identical(reducedDims(out), reducedDims(sce)) 80 | 81 | # Need to coerce the factors back to strings. 82 | row_data <- rowData(out) 83 | for (i in seq_len(ncol(row_data))) { 84 | if (is.factor(row_data[[i]])) { 85 | row_data[[i]] <- as.character(row_data[[i]]) 86 | } 87 | } 88 | expect_identical(row_data, rowData(sce)) 89 | 90 | col_data <- colData(out) 91 | for (i in seq_len(ncol(col_data))) { 92 | if (is.factor(col_data[[i]])) { 93 | col_data[[i]] <- as.character(col_data[[i]]) 94 | } 95 | } 96 | names(col_data) <- names(colData(sce)) 97 | expect_identical(col_data, colData(sce)) 98 | }) 99 | 100 | test_that("writeH5AD works as expected with version 0.10.2", { 101 | temp <- tempfile(fileext = ".h5ad") 102 | writeH5AD(sce, temp, version = "0.10.2") 103 | expect_true(file.exists(temp)) 104 | 105 | # Reading it back out again. Hopefully we didn't lose anything important. 106 | out <- readH5AD(temp, version = "0.10.2") 107 | 108 | expect_identical(dimnames(out), dimnames(sce)) 109 | expect_equal(assay(out), assay(sce)) 110 | expect_identical(reducedDims(out), reducedDims(sce)) 111 | 112 | # Need to coerce the factors back to strings. 113 | row_data <- rowData(out) 114 | for (i in seq_len(ncol(row_data))) { 115 | if (is.factor(row_data[[i]])) { 116 | row_data[[i]] <- as.character(row_data[[i]]) 117 | } 118 | } 119 | expect_identical(row_data, rowData(sce)) 120 | 121 | col_data <- colData(out) 122 | for (i in seq_len(ncol(col_data))) { 123 | if (is.factor(col_data[[i]])) { 124 | col_data[[i]] <- as.character(col_data[[i]]) 125 | } 126 | } 127 | names(col_data) <- names(colData(sce)) 128 | expect_identical(col_data, colData(sce)) 129 | }) 130 | 131 | test_that("writeH5AD works as expected with version 0.9.2", { 132 | temp <- tempfile(fileext = ".h5ad") 133 | writeH5AD(sce, temp, version = "0.9.2") 134 | expect_true(file.exists(temp)) 135 | 136 | # Reading it back out again. Hopefully we didn't lose anything important. 137 | out <- readH5AD(temp, version = "0.9.2") 138 | 139 | expect_identical(dimnames(out), dimnames(sce)) 140 | expect_equal(assay(out), assay(sce)) 141 | expect_identical(reducedDims(out), reducedDims(sce)) 142 | 143 | # Need to coerce the factors back to strings. 144 | row_data <- rowData(out) 145 | for (i in seq_len(ncol(row_data))) { 146 | if (is.factor(row_data[[i]])) { 147 | row_data[[i]] <- as.character(row_data[[i]]) 148 | } 149 | } 150 | expect_identical(row_data, rowData(sce)) 151 | 152 | col_data <- colData(out) 153 | for (i in seq_len(ncol(col_data))) { 154 | if (is.factor(col_data[[i]])) { 155 | col_data[[i]] <- as.character(col_data[[i]]) 156 | } 157 | } 158 | names(col_data) <- names(colData(sce)) 159 | expect_identical(col_data, colData(sce)) 160 | }) 161 | 162 | test_that("writeH5AD works as expected with version 0.8.0", { 163 | temp <- tempfile(fileext = ".h5ad") 164 | writeH5AD(sce, temp, version = "0.8.0") 165 | expect_true(file.exists(temp)) 166 | 167 | # Reading it back out again. Hopefully we didn't lose anything important. 168 | out <- readH5AD(temp, version = "0.8.0") 169 | 170 | expect_identical(dimnames(out), dimnames(sce)) 171 | expect_equal(assay(out), assay(sce)) 172 | expect_identical(reducedDims(out), reducedDims(sce)) 173 | 174 | # Need to coerce the factors back to strings. 175 | row_data <- rowData(out) 176 | for (i in seq_len(ncol(row_data))) { 177 | if (is.factor(row_data[[i]])) { 178 | row_data[[i]] <- as.character(row_data[[i]]) 179 | } 180 | } 181 | expect_identical(row_data, rowData(sce)) 182 | 183 | col_data <- colData(out) 184 | for (i in seq_len(ncol(col_data))) { 185 | if (is.factor(col_data[[i]])) { 186 | col_data[[i]] <- as.character(col_data[[i]]) 187 | } 188 | } 189 | names(col_data) <- names(colData(sce)) 190 | expect_identical(col_data, colData(sce)) 191 | }) 192 | 193 | test_that("writeH5AD works as expected with version 0.7.6", { 194 | # Python 3.7 is not available for aarch64 195 | skip_on_os("mac", arch = "aarch64") 196 | skip_on_os("linux", arch = "aarch64") 197 | 198 | temp <- tempfile(fileext = ".h5ad") 199 | writeH5AD(sce, temp, version = "0.7.6") 200 | expect_true(file.exists(temp)) 201 | 202 | # Reading it back out again. Hopefully we didn't lose anything important. 203 | out <- readH5AD(temp, version = "0.7.6") 204 | 205 | expect_identical(dimnames(out), dimnames(sce)) 206 | expect_equal(assay(out), assay(sce)) 207 | expect_identical(reducedDims(out), reducedDims(sce)) 208 | 209 | # Need to coerce the factors back to strings. 210 | row_data <- rowData(out) 211 | for (i in seq_len(ncol(row_data))) { 212 | if (is.factor(row_data[[i]])) { 213 | row_data[[i]] <- as.character(row_data[[i]]) 214 | } 215 | } 216 | expect_identical(row_data, rowData(sce)) 217 | 218 | col_data <- colData(out) 219 | for (i in seq_len(ncol(col_data))) { 220 | if (is.factor(col_data[[i]])) { 221 | col_data[[i]] <- as.character(col_data[[i]]) 222 | } 223 | } 224 | names(col_data) <- names(colData(sce)) 225 | expect_identical(col_data, colData(sce)) 226 | }) 227 | 228 | test_that("writeH5AD works as expected with verbose=TRUE", { 229 | temp <- tempfile(fileext = ".h5ad") 230 | writeH5AD(sce, temp, verbose = TRUE) 231 | expect_true(file.exists(temp)) 232 | 233 | # Reading it back out again. Hopefully we didn't lose anything important. 234 | out <- readH5AD(temp) 235 | 236 | expect_identical(dimnames(out), dimnames(sce)) 237 | expect_equal(assay(out), assay(sce)) 238 | expect_identical(reducedDims(out), reducedDims(sce)) 239 | 240 | # Need to coerce the factors back to strings. 241 | row_data <- rowData(out) 242 | for (i in seq_len(ncol(row_data))) { 243 | if (is.factor(row_data[[i]])) { 244 | row_data[[i]] <- as.character(row_data[[i]]) 245 | } 246 | } 247 | expect_identical(row_data, rowData(sce)) 248 | 249 | col_data <- colData(out) 250 | for (i in seq_len(ncol(col_data))) { 251 | if (is.factor(col_data[[i]])) { 252 | col_data[[i]] <- as.character(col_data[[i]]) 253 | } 254 | } 255 | names(col_data) <- names(colData(sce)) 256 | expect_identical(col_data, colData(sce)) 257 | }) 258 | 259 | test_that("writeH5AD works as expected with sparse matrices", { 260 | sparse_sce <- sce 261 | mat <- assay(sparse_sce) 262 | counts(sparse_sce) <- as(mat, "CsparseMatrix") 263 | logcounts(sparse_sce) <- counts(sparse_sce) * 10 264 | assay(sparse_sce, "random") <- mat # throwing in a dense matrix in a mixture. 265 | 266 | temp <- tempfile(fileext = ".h5ad") 267 | writeH5AD(sparse_sce, temp) 268 | expect_true(file.exists(temp)) 269 | 270 | # Reading it back out again. Hopefully we didn't lose anything important. 271 | out <- readH5AD(temp, X_name = "X") 272 | 273 | expect_identical(counts(sparse_sce), assay(out, "X")) 274 | expect_identical(logcounts(sparse_sce), logcounts(out)) 275 | # expect_identical() was failing on Windows for some reason... 276 | expect_equal(assay(sparse_sce, "random"), assay(out, "random")) 277 | }) 278 | 279 | test_that("writeH5AD works with assay skipping", { 280 | temp <- tempfile(fileext = ".h5ad") 281 | writeH5AD(sce, temp, skip_assays = TRUE) 282 | expect_true(file.exists(temp)) 283 | 284 | out <- HDF5Array::HDF5Array(temp, "X/data") 285 | expect_identical(sum(out), 0) # it's empty! 286 | }) 287 | 288 | test_that("writeH5AD works with X_name", { 289 | temp <- tempfile(fileext = ".h5ad") 290 | writeH5AD(sce, temp, X_name = "counts") 291 | expect_true(file.exists(temp)) 292 | 293 | out <- readH5AD(temp, X_name = "X") 294 | expect_equal(assay(out, "X"), assay(sce, "counts")) 295 | }) 296 | 297 | test_that("writeH5AD works in a separate process", { 298 | oldshare <- basilisk::getBasiliskShared() 299 | basilisk::setBasiliskShared(FALSE) 300 | oldfork <- basilisk::getBasiliskFork() 301 | basilisk::setBasiliskFork(FALSE) 302 | 303 | temp <- tempfile(fileext = ".h5ad") 304 | writeH5AD(sce, temp) 305 | expect_true(file.exists(temp)) 306 | 307 | basilisk::setBasiliskShared(oldshare) 308 | basilisk::setBasiliskFork(oldfork) 309 | }) 310 | 311 | test_that("writeH5AD DelayedArray X works", { 312 | delayed_sce <- sce 313 | counts(delayed_sce) <- DelayedArray::DelayedArray(counts(delayed_sce)) 314 | 315 | temp <- tempfile(fileext = ".h5ad") 316 | 317 | writeH5AD(delayed_sce, temp, X_name = "counts") 318 | expect_true(file.exists(temp)) 319 | 320 | out <- readH5AD(temp, X_name = "X") 321 | 322 | # Identical fail on Windows for some reason 323 | expect_equal(counts(sce), assay(out, "X")) 324 | }) 325 | 326 | test_that("writeH5AD sparse DelayedArray X works", { 327 | delayed_sce <- sce 328 | sparse_counts <- as(counts(delayed_sce), "CsparseMatrix") 329 | counts(delayed_sce) <- DelayedArray::DelayedArray(sparse_counts) 330 | 331 | temp <- tempfile(fileext = ".h5ad") 332 | 333 | writeH5AD(delayed_sce, temp, X_name = "counts") 334 | expect_true(file.exists(temp)) 335 | 336 | out <- readH5AD(temp, X_name = "X") 337 | 338 | # Sparse DelayedArrays are currently coerced into memory 339 | # This expectation will need to be changed once that is fixed 340 | expect_identical(sparse_counts, assay(out, "X")) 341 | }) 342 | 343 | test_that("writeH5AD DelayedArray layer works", { 344 | delayed_sce <- sce 345 | assay(delayed_sce, "layer") <- DelayedArray::DelayedArray( 346 | counts(delayed_sce) 347 | ) 348 | 349 | temp <- tempfile(fileext = ".h5ad") 350 | 351 | writeH5AD(delayed_sce, temp) 352 | expect_true(file.exists(temp)) 353 | 354 | out <- readH5AD(temp, X_name = "X") 355 | 356 | # Identical fails on Windows for some reason 357 | expect_equal(counts(sce), assay(out, "layer")) 358 | }) 359 | 360 | test_that("writeH5AD works with colData list columns", { 361 | list_sce <- sce 362 | colData(list_sce)$ListCol <- lapply(seq_len(ncol(list_sce)), function(x) { 363 | sample(LETTERS, 2) 364 | }) 365 | 366 | temp <- tempfile(fileext = ".h5ad") 367 | 368 | expect_warning(writeH5AD(list_sce, temp), "columns are not atomic") 369 | expect_true(file.exists(temp)) 370 | 371 | # Knowing what comes back is hard so just check there is something 372 | out <- readH5AD(temp, X_name = "X") 373 | expect_true("ListCol" %in% names(metadata(out)$.colData)) 374 | }) 375 | 376 | test_that("writeH5AD works with rowData list columns", { 377 | list_sce <- sce 378 | rowData(list_sce)$ListCol <- lapply(seq_len(nrow(list_sce)), function(x) { 379 | sample(LETTERS, 2) 380 | }) 381 | 382 | temp <- tempfile(fileext = ".h5ad") 383 | 384 | expect_warning(writeH5AD(list_sce, temp), "columns are not atomic") 385 | expect_true(file.exists(temp)) 386 | 387 | # Knowing what comes back is hard so just check there is something 388 | out <- readH5AD(temp, X_name = "X") 389 | expect_true("ListCol" %in% names(metadata(out)$.rowData)) 390 | }) 391 | 392 | test_that("writeH5AD works with gzip compression", { 393 | temp <- tempfile(fileext = ".h5ad") 394 | writeH5AD(sce, temp, X_name = "counts", compression = "gzip") 395 | expect_true(file.exists(temp)) 396 | 397 | out <- readH5AD(temp, X_name = "X") 398 | expect_equal(assay(out, "X"), assay(sce, "counts")) 399 | }) 400 | 401 | test_that("writeH5AD works with lzf compression", { 402 | temp <- tempfile(fileext = ".h5ad") 403 | writeH5AD(sce, temp, X_name = "counts", compression = "lzf") 404 | expect_true(file.exists(temp)) 405 | 406 | out <- readH5AD(temp, X_name = "X") 407 | expect_equal(assay(out, "X"), assay(sce, "counts")) 408 | }) 409 | 410 | test_that("Skipping slot conversion works", { 411 | temp <- tempfile(fileext = ".h5ad") 412 | writeH5AD(sce, temp, 413 | assays = FALSE, colData = FALSE, rowData = FALSE, 414 | varm = FALSE, reducedDims = FALSE, metadata = FALSE, 415 | colPairs = FALSE, rowPairs = FALSE 416 | ) 417 | 418 | out <- readH5AD(temp, X_name = "X") 419 | 420 | expect_identical(assayNames(out), "X") 421 | expect_identical(metadata(out), list(X_name = "counts")) 422 | expect_equal(ncol(rowData(out)), 0) 423 | expect_equal(ncol(colData(out)), 0) 424 | expect_equal(length(reducedDims(out)), 0) 425 | expect_equal(length(rowPairs(out)), 0) 426 | expect_equal(length(colPairs(out)), 0) 427 | }) 428 | 429 | test_that("Selective DF conversion works", { 430 | temp <- tempfile(fileext = ".h5ad") 431 | writeH5AD(sce, temp, assays = FALSE, colData = "tissue") 432 | 433 | out <- readH5AD(temp, X_name = "X") 434 | 435 | expect_identical(names(colData(out)), "tissue") 436 | }) 437 | 438 | test_that("Writing works with empty rowData/colData", { 439 | mini_sce <- SingleCellExperiment::SingleCellExperiment( 440 | assays = list(counts = matrix(rpois(100 * 50, 4), nrow = 100, ncol = 50)) 441 | ) 442 | 443 | temp <- tempfile(fileext = ".h5ad") 444 | writeH5AD(mini_sce, temp) 445 | 446 | out <- readH5AD(temp, X_name = "X") 447 | expect_true(all(counts(mini_sce) == assay(out, "X"))) 448 | }) 449 | 450 | test_that("writeH5AD works with SpatialExperiment objects", { 451 | skip_if_not_installed("SpatialExperiment") 452 | 453 | spe <- SpatialExperiment::SpatialExperiment( 454 | assays = list(counts = SingleCellExperiment::counts(sce)) 455 | ) 456 | spcoords <- matrix( 457 | runif(ncol(sce) * 2), 458 | ncol = 2 459 | ) 460 | rownames(spcoords) <- colnames(sce) 461 | colnames(spcoords) <- paste0("Spatial", 1:2) 462 | SpatialExperiment::spatialCoords(spe) <- spcoords 463 | 464 | temp <- tempfile(fileext = ".h5ad") 465 | 466 | writeH5AD(spe, temp) 467 | expect_true(file.exists(temp)) 468 | 469 | out <- readH5AD(temp, X_name = "X") 470 | 471 | expect_identical(assay(out, "X"), assay(spe, "counts")) 472 | expect_identical(dimnames(out), dimnames(spe)) 473 | 474 | # Check the spatial coordinates. 475 | expect_identical(reducedDims(out)$spatial, spcoords) 476 | }) 477 | 478 | test_that("writeH5AD works with SpatialExperiment objects without names", { 479 | skip_if_not_installed("SpatialExperiment") 480 | 481 | spe <- SpatialExperiment::SpatialExperiment( 482 | assays = list(counts = SingleCellExperiment::counts(sce)) 483 | ) 484 | spcoords <- matrix( 485 | runif(ncol(sce) * 2), 486 | ncol = 2 487 | ) 488 | SpatialExperiment::spatialCoords(spe) <- spcoords 489 | 490 | temp <- tempfile(fileext = ".h5ad") 491 | 492 | writeH5AD(spe, temp) 493 | expect_true(file.exists(temp)) 494 | 495 | out <- readH5AD(temp, X_name = "X") 496 | 497 | expect_identical(assay(out, "X"), assay(spe, "counts")) 498 | expect_identical(dimnames(out), dimnames(spe)) 499 | 500 | # Check the spatial coordinates. 501 | expect_identical(reducedDim(out, "spatial", withDimnames = FALSE), spcoords) 502 | }) 503 | 504 | test_that("writeH5AD works without names", { 505 | nameless_sce <- SingleCellExperiment::SingleCellExperiment( 506 | assays = list( 507 | counts = matrix(rpois(100 * 50, 4), nrow = 100, ncol = 50) 508 | ), 509 | reducedDims = list( 510 | redDim = matrix(runif(50 * 10), ncol = 10) 511 | ) 512 | ) 513 | 514 | temp <- tempfile(fileext = ".h5ad") 515 | writeH5AD(nameless_sce, temp) 516 | 517 | out <- readH5AD(temp, X_name = "X") 518 | expect_true(all(counts(nameless_sce) == assay(out, "X"))) 519 | 520 | expect_identical( 521 | reducedDim(out, "redDim", withDimnames = FALSE), 522 | reducedDim(nameless_sce, "redDim") 523 | ) 524 | }) 525 | 526 | test_that("writeH5AD keeps dimnames", { 527 | cells <- letters[1:8] 528 | genes <- LETTERS[1:5] 529 | ncells <- length(cells) 530 | ngenes <- length(genes) 531 | counts <- matrix( 532 | rpois(ngenes * ncells, 5), 533 | ncol = ncells, 534 | dimnames = list(genes, cells) 535 | ) 536 | dimname_sce <- SingleCellExperiment::SingleCellExperiment( 537 | list(counts = counts) 538 | ) 539 | 540 | temp <- tempfile(fileext = ".h5ad") 541 | writeH5AD(dimname_sce, temp) 542 | 543 | out <- readH5AD(temp, X_name = "X") 544 | 545 | expect_identical(dimnames(out), dimnames(dimname_sce)) 546 | }) 547 | -------------------------------------------------------------------------------- /tests/testthat/test-zzz-anndata.R: -------------------------------------------------------------------------------- 1 | # This file tests compatibility with the R {anndata} package 2 | # Despite best efforts the package isn't reliably unloaded so these tests have 3 | # been moved to a separate file that is (hopefully) always run last 4 | 5 | test_that("Reading is compatible with R anndata", { 6 | skip_if_offline() 7 | skip_if_not_installed("withr") 8 | skip_if_not_installed("anndata") 9 | 10 | withr::with_package("anndata", { 11 | file <- system.file("extdata", "krumsiek11.h5ad", 12 | package = "zellkonverter" 13 | ) 14 | sce <- readH5AD(file) 15 | expect_s4_class(sce, "SingleCellExperiment") 16 | 17 | expect_identical(assayNames(sce), "X") 18 | expect_identical(colnames(colData(sce)), "cell_type") 19 | 20 | cache <- BiocFileCache::BiocFileCache(ask = FALSE) 21 | example_file <- BiocFileCache::bfcrpath( 22 | cache, "https://ndownloader.figshare.com/files/30462915" 23 | ) 24 | 25 | sce <- readH5AD(example_file, raw = TRUE) 26 | 27 | names <- list( 28 | assays = c("X"), 29 | colData = c( 30 | "n_genes", "n_genes_by_counts", "total_counts", 31 | "total_counts_mt", "pct_counts_mt", "leiden" 32 | ), 33 | rowData = c( 34 | "gene_ids", "n_cells", "mt", "n_cells_by_counts", 35 | "mean_counts", "pct_dropout_by_counts", "total_counts", 36 | "highly_variable", "means", "dispersions", 37 | "dispersions_norm", "mean", "std" 38 | ), 39 | raw_rowData = c( 40 | "gene_ids", "n_cells", "mt", "n_cells_by_counts", 41 | "mean_counts", "pct_dropout_by_counts", 42 | "total_counts", "highly_variable", "means", 43 | "dispersions", "dispersions_norm" 44 | ), 45 | redDim = c("X_pca", "X_umap"), 46 | varm = c("PCs"), 47 | colPairs = c("connectivities", "distances"), 48 | metadata = c( 49 | "hvg", "leiden", "neighbors", "pca", 50 | "rank_genes_groups", "umap" 51 | ) 52 | ) 53 | 54 | missing <- list() 55 | 56 | validateH5ADSCE(sce, names, missing) 57 | }) 58 | 59 | pkgload::unload("anndata") 60 | }) 61 | 62 | test_that("Writing is compatible with R anndata", { 63 | skip_if_offline() 64 | skip_if_not_installed("withr") 65 | skip_if_not_installed("anndata") 66 | 67 | withr::with_package("anndata", { 68 | sce <- scRNAseq::ZeiselBrainData() 69 | temp <- tempfile(fileext = ".h5ad") 70 | writeH5AD(sce, temp) 71 | expect_true(file.exists(temp)) 72 | 73 | # Reading it back out again. Hopefully we didn't lose anything important 74 | out <- readH5AD(temp) 75 | 76 | expect_identical(dimnames(out), dimnames(sce)) 77 | expect_equal(assay(out), assay(sce)) 78 | expect_identical(reducedDims(out), reducedDims(sce)) 79 | 80 | # Need to coerce the factors back to strings. 81 | row_data <- rowData(out) 82 | for (i in seq_len(ncol(row_data))) { 83 | if (is.factor(row_data[[i]])) { 84 | row_data[[i]] <- as.character(row_data[[i]]) 85 | } 86 | } 87 | expect_identical(row_data, rowData(sce)) 88 | 89 | col_data <- colData(out) 90 | for (i in seq_len(ncol(col_data))) { 91 | if (is.factor(col_data[[i]])) { 92 | col_data[[i]] <- as.character(col_data[[i]]) 93 | } 94 | } 95 | names(col_data) <- names(colData(sce)) 96 | expect_identical(col_data, colData(sce)) 97 | }) 98 | 99 | pkgload::unload("anndata") 100 | }) 101 | -------------------------------------------------------------------------------- /vignettes/zellkonverter.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: Converting single-cell data structures between Bioconductor and Python 3 | author: 4 | - name: Luke Zappia 5 | email: luke@lazappi.id.au 6 | - name: Aaron Lun 7 | email: infinite.monkeys.with.keyboards@gmail.com 8 | date: "Revised: 17 April 2022" 9 | output: 10 | BiocStyle::html_document: 11 | toc_float: true 12 | package: zellkonverter 13 | vignette: > 14 | %\VignetteIndexEntry{Converting to/from AnnData to SingleCellExperiments} 15 | %\VignetteEngine{knitr::rmarkdown} 16 | %\VignetteEncoding{UTF-8} 17 | --- 18 | 19 | ```{r setup, echo = FALSE, results = "hide", message = FALSE} 20 | require(knitr) 21 | library(BiocStyle) 22 | opts_chunk$set(error = FALSE, message = FALSE, warning = FALSE) 23 | ``` 24 | 25 | Overview 26 | ======== 27 | 28 | This package provides a lightweight interface between the Bioconductor 29 | `SingleCellExperiment` data structure and the Python `AnnData`-based single-cell 30 | analysis environment. The idea is to enable users and developers to easily move 31 | data between these frameworks to construct a multi-language analysis pipeline 32 | across R/Bioconductor and Python. 33 | 34 | Reading and writing H5AD files 35 | ============================== 36 | 37 | The `readH5AD()` function can be used to read a `SingleCellExperiment` from a 38 | H5AD file. This can be manipulated in the usual way as described in the 39 | `r Biocpkg("SingleCellExperiment")` documentation. 40 | 41 | ```{r read} 42 | library(zellkonverter) 43 | 44 | # Obtaining an example H5AD file. 45 | example_h5ad <- system.file( 46 | "extdata", "krumsiek11.h5ad", 47 | package = "zellkonverter" 48 | ) 49 | readH5AD(example_h5ad) 50 | ``` 51 | 52 | We can also write a `SingleCellExperiment` to a H5AD file with the 53 | `writeH5AD()` function. This is demonstrated below on the classic Zeisel mouse 54 | brain dataset from the `r Biocpkg("scRNAseq")` package. The resulting file can 55 | then be directly used in compatible Python-based analysis frameworks. 56 | 57 | ```{r write} 58 | library(scRNAseq) 59 | 60 | sce_zeisel <- ZeiselBrainData() 61 | out_path <- tempfile(pattern = ".h5ad") 62 | writeH5AD(sce_zeisel, file = out_path) 63 | ``` 64 | 65 | Converting between `SingleCellExperiment` and `AnnData` objects 66 | =============================================================== 67 | 68 | Developers and power users who control their Python environments can directly 69 | convert between `SingleCellExperiment` and 70 | [`AnnData` objects](https://anndata.readthedocs.io/en/stable/) using the 71 | `SCE2AnnData()` and `AnnData2SCE()` utilities. These functions expect that 72 | `r CRANpkg("reticulate")` has already been loaded along with an appropriate 73 | version of the [_anndata_](https://pypi.org/project/anndata/) package. We 74 | suggest using the `r Biocpkg("basilisk")` package to set up the Python 75 | environment before using these functions. 76 | 77 | ```{r convert} 78 | library(basilisk) 79 | library(scRNAseq) 80 | 81 | seger <- SegerstolpePancreasData() 82 | roundtrip <- basiliskRun(fun = function(sce) { 83 | # Convert SCE to AnnData: 84 | adata <- SCE2AnnData(sce) 85 | 86 | # Maybe do some work in Python on 'adata': 87 | # BLAH BLAH BLAH 88 | 89 | # Convert back to an SCE: 90 | AnnData2SCE(adata) 91 | }, env = zellkonverterAnnDataEnv(), sce = seger) 92 | ``` 93 | 94 | Package developers can guarantee that they are using the same versions of Python 95 | packages as `r Biocpkg("zellkonverter")` by using the `AnnDataDependencies()` 96 | function to set up their Python environments. 97 | 98 | ```{r anndata-deps} 99 | AnnDataDependencies() 100 | ``` 101 | 102 | This function can also be used to return dependencies for environments using 103 | older versions of _anndata_. 104 | 105 | ```{r anndata-deps-old} 106 | AnnDataDependencies(version = "0.7.6") 107 | ``` 108 | 109 | Progress messages 110 | ================= 111 | 112 | By default the functions in `r Biocpkg("zellkonverter")` don't display any 113 | information about their progress but this can be turned on by setting the 114 | `verbose = TRUE` argument. 115 | 116 | ```{r verbose} 117 | readH5AD(example_h5ad, verbose = TRUE) 118 | ``` 119 | 120 | If you would like to see progress messages for all functions by default you can 121 | turn this on using the `setZellkonverterVerbose()` function. 122 | 123 | ```{r verbose-set, eval = FALSE} 124 | # This is not run here 125 | setZellkonverterVerbose(TRUE) 126 | ``` 127 | 128 | Session information 129 | =================== 130 | 131 | ```{r} 132 | sessionInfo() 133 | ``` 134 | --------------------------------------------------------------------------------