├── .BBSoptions
├── .Rbuildignore
├── .github
    ├── .gitignore
    ├── dependabot.yaml
    └── workflows
    │   ├── check-bioc.yml
    │   └── pr-commands.yaml
├── .gitignore
├── CODE_OF_CONDUCT.md
├── DESCRIPTION
├── LICENSE
├── LICENSE.md
├── NAMESPACE
├── NEWS.md
├── R
    ├── AnnData2SCE.R
    ├── SCE2AnnData.R
    ├── basilisk.R
    ├── read.R
    ├── reticulate.R
    ├── ui.R
    ├── utils.R
    ├── validation.R
    ├── write.R
    └── zellkonverter-package.R
├── README.md
├── codecov.yml
├── configure
├── configure.win
├── inst
    ├── NEWS.Rd
    ├── WORDLIST
    ├── extdata
    │   ├── example_anndata.h5ad
    │   ├── krumsiek11.h5ad
    │   └── krumsiek11_augmented_v0-8.h5ad
    └── scripts
    │   ├── example_anndata.R
    │   ├── krumsiek11.md
    │   └── krumsiek11_augmented.py
├── longtests
    ├── testthat.R
    └── testthat
    │   ├── test-cellrank_pancreas.R
    │   ├── test-example_anndata.R
    │   ├── test-gtex_8tissues.R
    │   ├── test-pegasus_marrow.R
    │   ├── test-scIB_pancreas.R
    │   ├── test-scanpy_pbmc3k.R
    │   ├── test-scanpy_trajectory.R
    │   ├── test-scvelo_pancreas.R
    │   ├── test-scvi_citeseq.R
    │   └── test-squidpy_visium.R
├── man
    ├── AnnData-Conversion.Rd
    ├── AnnData-Environment.Rd
    ├── expectSCE.Rd
    ├── figures
    │   ├── AnnData2SCE.png
    │   └── zellkonverter.png
    ├── r-py-conversion.Rd
    ├── readH5AD.Rd
    ├── setZellkonverterVerbose.Rd
    ├── validateH5ADSCE.Rd
    ├── writeH5AD.Rd
    └── zellkonverter-package.Rd
├── tests
    ├── spelling.R
    ├── testthat.R
    └── testthat
    │   ├── default.profraw
    │   ├── test-SCE2AnnData.R
    │   ├── test-read.R
    │   ├── test-validation.R
    │   ├── test-write.R
    │   └── test-zzz-anndata.R
└── vignettes
    └── zellkonverter.Rmd


/.BBSoptions:
--------------------------------------------------------------------------------
1 | RunLongTests: TRUE
2 | 


--------------------------------------------------------------------------------
/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | ^zellkonverter\.Rproj$
 2 | ^\.Rproj\.user$
 3 | ^LICENSE\.md$
 4 | ^CODE_OF_CONDUCT\.md$
 5 | ^\.BBSoptions$
 6 | ^\.github$
 7 | ^codecov\.yml$
 8 | ^doc$
 9 | ^Meta$
10 | 


--------------------------------------------------------------------------------
/.github/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | 


--------------------------------------------------------------------------------
/.github/dependabot.yaml:
--------------------------------------------------------------------------------
1 | version: 2
2 | updates:
3 |   - package-ecosystem: "github-actions"
4 |     directory: "/"
5 |     schedule:
6 |       interval: "weekly"
7 | 


--------------------------------------------------------------------------------
/.github/workflows/check-bioc.yml:
--------------------------------------------------------------------------------
  1 | ## This is a simplified action for building and testing a Bioconductor package
  2 | ## based on:
  3 | ## * https://github.com/lcolladotor/biocthis/blob/master/actions/check-bioc.yml
  4 | ## * https://github.com/r-lib/actions/blob/master/examples/check-standard.yaml
  5 | ## * https://github.com/seandavi/BuildABiocWorkshop2020/blob/master/.github/workflows/basic_checks.yaml
  6 | name: R-CMD-check-bioc
  7 | 
  8 | ## Specify which branches to run on
  9 | ## The "devel" branch corresponds to Bioc-devel and "RELEASE_X" branches are
 10 | ## Bioconductor releases. See http://bioconductor.org/developers/how-to/git/.
 11 | on:
 12 |   push:
 13 |     branches:
 14 |       - devel
 15 |       - 'RELEASE_*'
 16 |   pull_request:
 17 |     branches:
 18 |       - devel
 19 |       - 'RELEASE_*'
 20 | 
 21 | jobs:
 22 |   get-bioc-release:
 23 |     # Identify the Bioconductor release from the git branch. Also specifies a
 24 |     # Bioconductor Docker image to use.
 25 |     runs-on: ubuntu-latest
 26 |     outputs:
 27 |       biocimage: ${{ steps.get-release.outputs.biocimage }}
 28 |       biocrelease: ${{ steps.get-release.outputs.biocrelease }}
 29 | 
 30 |     steps:
 31 |       - id: get-release
 32 |         name: Get Bioconductor release
 33 |         run: |
 34 |           if echo "$GITHUB_REF" | grep -q "RELEASE_"; then
 35 |               biocrelease="$(basename -- $GITHUB_REF | tr '[:upper:]' '[:lower:]')"
 36 |           else
 37 |               biocrelease="devel"
 38 |           fi
 39 |           biocimage="bioconductor/bioconductor_docker:${biocrelease}"
 40 |           echo "Bioc release: ${biocrelease}"
 41 |           echo "Bioc docker image: {$biocimage}"
 42 |           ## Store the information
 43 |           echo "biocimage=${biocimage}" >> $GITHUB_OUTPUT
 44 |           echo "biocrelease=${biocrelease}" >> $GITHUB_OUTPUT
 45 | 
 46 |   get-bioc-version:
 47 |     # Identify the Bioconductor version number and R version to use. This is
 48 |     # done by checking the versions in the Bioconductor Docker container
 49 |     # selected by get-bioc-release.
 50 |     runs-on: ubuntu-latest
 51 |     needs: get-bioc-release
 52 |     container:
 53 |       image: ${{ needs.get-bioc-release.outputs.biocimage }}
 54 |     outputs:
 55 |       Rversion: ${{ steps.set-versions.outputs.rversion }}
 56 |       biocversion: ${{ steps.set-versions.outputs.biocversion }}
 57 | 
 58 |     steps:
 59 |       - id: get-versions
 60 |         name: Get Bioconductor/R versions
 61 |         run: |
 62 |           biocconfig <- "https://bioconductor.org/config.yaml"
 63 |           biocrelease <- "${{ needs.get-bioc-release.outputs.biocrelease }}"
 64 |           cat("Bioc release RAW:", biocrelease, "\n")
 65 |           biocrelease <- ifelse(
 66 |             grepl(biocrelease, "release"),
 67 |             "release", "devel"
 68 |           )
 69 |           biocmap <- BiocManager:::.version_map_get_online(biocconfig)
 70 |           biocversion <- subset(biocmap, BiocStatus == biocrelease)[, 'Bioc']
 71 |           biocversion_str <- as.character(biocversion)
 72 |           rversion <- subset(biocmap, BiocStatus == biocrelease)[, 'R']
 73 |           rversion_str <- as.character(rversion)
 74 |           # Use R devel for BioC devel between November and May
 75 |           if (biocrelease == "devel") {
 76 |               current_month <- as.numeric(format(Sys.Date(), "%m"))
 77 |               if (current_month >= 11 || current_month <= 5) {
 78 |                   cat("Setting R version to devel")
 79 |                   rversion <- "devel"
 80 |                   rversion_str <- "devel"
 81 |               }
 82 |           }
 83 |           writeLines(c(biocversion_str, rversion_str), "versions.txt")
 84 |           cat("GET VERSIONS", "\n")
 85 |           cat("Bioc release: ", biocrelease, "\n")
 86 |           cat("Bioc version: ", biocversion_str, "\n")
 87 |           cat("R version: ", rversion_str, "\n")
 88 |         shell: Rscript {0}
 89 |       - id: set-versions
 90 |         name: Set Bioconductor/R versions
 91 |         run: |
 92 |           biocversion=$(head -n 1 versions.txt)
 93 |           rversion=$(tail -n 1 versions.txt)
 94 |           echo "SET VERSIONS"
 95 |           echo "Bioc version: ${biocversion}"
 96 |           echo "R version: ${rversion}"
 97 |           ## Store the information
 98 |           echo "biocversion=${biocversion}" >> $GITHUB_OUTPUT
 99 |           echo "rversion=${rversion}" >> $GITHUB_OUTPUT
100 | 
101 |   R-CMD-check-docker:
102 |     ## Run checks in the Bioconductor Docker container
103 |     name: ubuntu-latest (r-biocdocker bioc-${{ needs.get-bioc-version.outputs.biocversion }})
104 |     needs: [get-bioc-release, get-bioc-version]
105 |     runs-on: ubuntu-latest
106 |     container:
107 |       image: ${{ needs.get-bioc-release.outputs.biocimage }}
108 |       volumes:
109 |         - /home/runner/work/_temp/Library:/usr/local/lib/R/host-site-library
110 |     env:
111 |       R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
112 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
113 |       R_BIOC_VERSION: ${{ needs.get-bioc-version.outputs.biocversion }}
114 | 
115 |     steps:
116 |       - name: Checkout
117 |         uses: actions/checkout@v4
118 | 
119 |       - name: Install extra linux dependencies
120 |         run: sudo apt-get update -y && sudo apt-get install -y libcurl4-openssl-dev devscripts qpdf
121 | 
122 |       - name: Setup R dependencies
123 |         uses: r-lib/actions/setup-r-dependencies@v2
124 |         with:
125 |           cache-version: 3
126 |           extra-packages: any::rcmdcheck
127 |           needs: check
128 | 
129 |       - name: Show session info
130 |         run: |
131 |           options(width = 100)
132 |           pkgs <- installed.packages()[, "Package"]
133 |           sessioninfo::session_info(pkgs, include_base = TRUE)
134 |         shell: Rscript {0}
135 | 
136 |       - name: Check R package
137 |         uses: r-lib/actions/check-r-package@v2
138 |         with:
139 |           upload-snapshots: true
140 |           upload-results: true
141 | 
142 |       - name: BiocCheck
143 |         run: |
144 |           BiocManager::install("BiocCheck")
145 |           BiocCheck::BiocCheck(
146 |             dir('check', 'tar.gz$', full.names = TRUE),
147 |             `no-check-R-ver` = TRUE,
148 |             `no-check-bioc-help` = TRUE
149 |           )
150 |         shell: Rscript {0}
151 | 
152 |   R-CMD-check:
153 |     ## Run checks on other platforms.
154 |     name: ${{ matrix.config.os }} (r-${{ needs.get-bioc-version.outputs.rversion }} bioc-${{ needs.get-bioc-version.outputs.biocversion }})
155 |     needs: [get-bioc-release, get-bioc-version]
156 |     runs-on: ${{ matrix.config.os }}
157 |     strategy:
158 |       fail-fast: false
159 |       matrix:
160 |         experimental: [true]
161 |         config:
162 |           - {os: windows-latest}
163 |           - {os: macOS-latest}
164 |           - {os: ubuntu-24.04, rspm: "https://packagemanager.posit.co/cran/__linux__/noble/latest"}
165 |     env:
166 |       R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
167 |       RSPM: ${{ matrix.config.rspm }}
168 |       R_BIOC_VERSION: ${{ needs.get-bioc-version.outputs.biocversion }}
169 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
170 | 
171 |     steps:
172 |       - name: Checkout
173 |         uses: actions/checkout@v4
174 | 
175 |       - name: Install Linux system dependencies
176 |         if: runner.os == 'Linux'
177 |         run: |
178 |           sudo apt update
179 |           sudo apt-get update
180 |           sudo apt-get upgrade libstdc++6
181 | 
182 |       - name: Setup Pandoc
183 |         uses: r-lib/actions/setup-pandoc@v2
184 | 
185 |       - name: Setup R
186 |         uses: r-lib/actions/setup-r@v2
187 |         with:
188 |           r-version: ${{ needs.get-bioc-version.outputs.rversion }}
189 |           use-public-rspm: true
190 | 
191 |       - name: Setup R dependencies
192 |         uses: r-lib/actions/setup-r-dependencies@v2
193 |         with:
194 |           cache-version: 3
195 |           extra-packages: any::rcmdcheck
196 |           needs: check
197 | 
198 |       - name: Session info
199 |         run: |
200 |           options(width = 100)
201 |           pkgs <- installed.packages()[, "Package"]
202 |           sessioninfo::session_info(pkgs, include_base = TRUE)
203 |         shell: Rscript {0}
204 | 
205 |       - name: Check R package
206 |         uses: r-lib/actions/check-r-package@v2
207 |         with:
208 |           upload-snapshots: true
209 |           upload-results: true
210 | 
211 |   test-coverage:
212 |     ## Calculate package test coverage. Only runs if R-CMD-check-docker has
213 |     ## completed successfully. Uses the Bioconductor Docker image.
214 |     if: ${{ github.ref == 'refs/heads/devel' }}
215 |     needs: [get-bioc-release, get-bioc-version, R-CMD-check-docker]
216 |     runs-on: ubuntu-latest
217 |     container:
218 |       image: ${{ needs.get-bioc-release.outputs.biocimage }}
219 |       volumes:
220 |         - /home/runner/work/_temp/Library:/usr/local/lib/R/host-site-library
221 |     env:
222 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
223 |       R_BIOC_VERSION: ${{ needs.get-bioc-version.outputs.biocversion }}
224 | 
225 |     steps:
226 |       - name: Checkout
227 |         uses: actions/checkout@v4
228 | 
229 |       - name: Install extra linux dependencies
230 |         run: sudo apt-get update -y && sudo apt-get install -y libcurl4-openssl-dev devscripts qpdf
231 | 
232 |       - name: Setup R dependencies
233 |         uses: r-lib/actions/setup-r-dependencies@v2
234 |         with:
235 |           cache-version: 3
236 |           extra-packages: any::covr
237 |           needs: coverage
238 | 
239 |       - name: Test coverage
240 |         run: covr::codecov(quiet = FALSE)
241 |         shell: Rscript {0}
242 | 
243 |   pkgdown:
244 |     ## Build pkgdown site and push to gh-pages branch. Only runs if on the
245 |     ## devel branch and R-CMD-check-docker has completed successfully. Uses
246 |     ## the Bioconductor Docker image.
247 |     needs: [get-bioc-release, get-bioc-version]
248 |     runs-on: ubuntu-latest
249 |     container:
250 |       image: ${{ needs.get-bioc-release.outputs.biocimage }}
251 |       volumes:
252 |         - /home/runner/work/_temp/Library:/usr/local/lib/R/host-site-library
253 |     env:
254 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
255 |       R_BIOC_VERSION: ${{ needs.get-bioc-version.outputs.biocversion }}
256 | 
257 |     steps:
258 |       - name: Checkout
259 |         uses: actions/checkout@v4
260 | 
261 |       - name: Install extra linux dependencies
262 |         run: sudo apt-get update -y && sudo apt-get install -y libcurl4-openssl-dev devscripts qpdf rsync
263 | 
264 |       - name: Setup R dependencies
265 |         uses: r-lib/actions/setup-r-dependencies@v2
266 |         with:
267 |           cache-version: 3
268 |           extra-packages: any::pkgdown, local::.
269 |           needs: website
270 | 
271 |       - name: Build pkgdown site
272 |         run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE)
273 |         shell: Rscript {0}
274 | 
275 |       - name: Deploy to GitHub pages 🚀
276 |         if: ${{ (github.ref == 'refs/heads/devel') && (github.event_name != 'pull_request') }}
277 |         uses: JamesIves/github-pages-deploy-action@v4.7.3
278 |         with:
279 |           clean: false
280 |           branch: gh-pages
281 |           folder: docs
282 |           git-config-name: "github-actions[bot]"
283 |           git-config-email: "41898282+github-actions[bot]@users.noreply.github.com"
284 | 


--------------------------------------------------------------------------------
/.github/workflows/pr-commands.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   issue_comment:
 5 |     types: [created]
 6 | 
 7 | name: pr-commands.yaml
 8 | 
 9 | permissions: read-all
10 | 
11 | jobs:
12 |   document:
13 |     if: ${{ github.event.issue.pull_request && (github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'OWNER') && startsWith(github.event.comment.body, '/document') }}
14 |     name: document
15 |     runs-on: ubuntu-latest
16 |     env:
17 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
18 |     permissions:
19 |       contents: write
20 |     steps:
21 |       - uses: actions/checkout@v4
22 | 
23 |       - uses: r-lib/actions/pr-fetch@v2
24 |         with:
25 |           repo-token: ${{ secrets.GITHUB_TOKEN }}
26 | 
27 |       - uses: r-lib/actions/setup-r@v2
28 |         with:
29 |           use-public-rspm: true
30 | 
31 |       - uses: r-lib/actions/setup-r-dependencies@v2
32 |         with:
33 |           extra-packages: any::roxygen2
34 |           needs: pr-document
35 | 
36 |       - name: Document
37 |         run: roxygen2::roxygenise()
38 |         shell: Rscript {0}
39 | 
40 |       - name: commit
41 |         run: |
42 |           git config --local user.name "$GITHUB_ACTOR"
43 |           git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com"
44 |           git add man/\* NAMESPACE
45 |           git commit -m 'Document'
46 | 
47 |       - uses: r-lib/actions/pr-push@v2
48 |         with:
49 |           repo-token: ${{ secrets.GITHUB_TOKEN }}
50 | 
51 |   style:
52 |     if: ${{ github.event.issue.pull_request && (github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'OWNER') && startsWith(github.event.comment.body, '/style') }}
53 |     name: style
54 |     runs-on: ubuntu-latest
55 |     env:
56 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
57 |     permissions:
58 |       contents: write
59 |     steps:
60 |       - uses: actions/checkout@v4
61 | 
62 |       - uses: r-lib/actions/pr-fetch@v2
63 |         with:
64 |           repo-token: ${{ secrets.GITHUB_TOKEN }}
65 | 
66 |       - uses: r-lib/actions/setup-r@v2
67 | 
68 |       - name: Install dependencies
69 |         run: install.packages("styler")
70 |         shell: Rscript {0}
71 | 
72 |       - name: Style
73 |         run: styler::style_pkg()
74 |         shell: Rscript {0}
75 | 
76 |       - name: commit
77 |         run: |
78 |           git config --local user.name "$GITHUB_ACTOR"
79 |           git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com"
80 |           git add \*.R
81 |           git commit -m 'Style'
82 | 
83 |       - uses: r-lib/actions/pr-push@v2
84 |         with:
85 |           repo-token: ${{ secrets.GITHUB_TOKEN }}
86 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | *.html
5 | doc
6 | Meta
7 | *.Rproj
8 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
  1 | # Contributor Covenant Code of Conduct
  2 | 
  3 | ## Our Pledge
  4 | 
  5 | We as members, contributors, and leaders pledge to make participation in our
  6 | community a harassment-free experience for everyone, regardless of age, body
  7 | size, visible or invisible disability, ethnicity, sex characteristics, gender
  8 | identity and expression, level of experience, education, socio-economic status,
  9 | nationality, personal appearance, race, religion, or sexual identity and
 10 | orientation.
 11 | 
 12 | We pledge to act and interact in ways that contribute to an open, welcoming,
 13 | diverse, inclusive, and healthy community.
 14 | 
 15 | ## Our Standards
 16 | 
 17 | Examples of behavior that contributes to a positive environment for our
 18 | community include:
 19 | 
 20 | * Demonstrating empathy and kindness toward other people
 21 | * Being respectful of differing opinions, viewpoints, and experiences
 22 | * Giving and gracefully accepting constructive feedback
 23 | * Accepting responsibility and apologizing to those affected by our mistakes,
 24 | and learning from the experience
 25 | * Focusing on what is best not just for us as individuals, but for the overall
 26 | community
 27 | 
 28 | Examples of unacceptable behavior include:
 29 | 
 30 | * The use of sexualized language or imagery, and sexual attention or
 31 | advances of any kind
 32 | * Trolling, insulting or derogatory comments, and personal or political attacks
 33 | * Public or private harassment
 34 | * Publishing others' private information, such as a physical or email
 35 | address, without their explicit permission
 36 | * Other conduct which could reasonably be considered inappropriate in a
 37 | professional setting
 38 | 
 39 | ## Enforcement Responsibilities
 40 | 
 41 | Community leaders are responsible for clarifying and enforcing our standards
 42 | of acceptable behavior and will take appropriate and fair corrective action in
 43 | response to any behavior that they deem inappropriate, threatening, offensive,
 44 | or harmful.
 45 | 
 46 | Community leaders have the right and responsibility to remove, edit, or reject
 47 | comments, commits, code, wiki edits, issues, and other contributions that are
 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation
 49 | decisions when appropriate.
 50 | 
 51 | ## Scope
 52 | 
 53 | This Code of Conduct applies within all community spaces, and also applies
 54 | when an individual is officially representing the community in public spaces.
 55 | Examples of representing our community include using an official e-mail
 56 | address, posting via an official social media account, or acting as an appointed
 57 | representative at an online or offline event.
 58 | 
 59 | ## Enforcement
 60 | 
 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
 62 | reported to the community leaders responsible for enforcement at [INSERT CONTACT
 63 | METHOD]. All complaints will be reviewed and investigated promptly and fairly.
 64 | 
 65 | All community leaders are obligated to respect the privacy and security of the
 66 | reporter of any incident.
 67 | 
 68 | ## Enforcement Guidelines
 69 | 
 70 | Community leaders will follow these Community Impact Guidelines in determining
 71 | the consequences for any action they deem in violation of this Code of Conduct:
 72 | 
 73 | ### 1. Correction
 74 | 
 75 | **Community Impact**: Use of inappropriate language or other behavior deemed
 76 | unprofessional or unwelcome in the community.
 77 | 
 78 | **Consequence**: A private, written warning from community leaders, providing
 79 | clarity around the nature of the violation and an explanation of why the
 80 | behavior was inappropriate. A public apology may be requested.
 81 | 
 82 | ### 2. Warning
 83 | 
 84 | **Community Impact**: A violation through a single incident or series of
 85 | actions.
 86 | 
 87 | **Consequence**: A warning with consequences for continued behavior. No
 88 | interaction with the people involved, including unsolicited interaction with
 89 | those enforcing the Code of Conduct, for a specified period of time. This
 90 | includes avoiding interactions in community spaces as well as external channels
 91 | like social media. Violating these terms may lead to a temporary or permanent
 92 | ban.
 93 | 
 94 | ### 3. Temporary Ban
 95 | 
 96 | **Community Impact**: A serious violation of community standards, including
 97 | sustained inappropriate behavior.
 98 | 
 99 | **Consequence**: A temporary ban from any sort of interaction or public
100 | communication with the community for a specified period of time. No public or
101 | private interaction with the people involved, including unsolicited interaction
102 | with those enforcing the Code of Conduct, is allowed during this period.
103 | Violating these terms may lead to a permanent ban.
104 | 
105 | ### 4. Permanent Ban
106 | 
107 | **Community Impact**: Demonstrating a pattern of violation of community
108 | standards, including sustained inappropriate behavior, harassment of an
109 | individual, or aggression toward or disparagement of classes of individuals.
110 | 
111 | **Consequence**: A permanent ban from any sort of public interaction within the
112 | community.
113 | 
114 | ## Attribution
115 | 
116 | This Code of Conduct is adapted from the [Contributor Covenant][homepage],
117 | version 2.0,
118 | available at https://www.contributor-covenant.org/version/2/0/
119 | code_of_conduct.html.
120 | 
121 | Community Impact Guidelines were inspired by [Mozilla's code of conduct
122 | enforcement ladder](https://github.com/mozilla/diversity).
123 | 
124 | [homepage]: https://www.contributor-covenant.org
125 | 
126 | For answers to common questions about this code of conduct, see the FAQ at
127 | https://www.contributor-covenant.org/faq. Translations are available at https://
128 | www.contributor-covenant.org/translations.
129 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: zellkonverter
 2 | Title: Conversion Between scRNA-seq Objects
 3 | Version: 1.19.0
 4 | Date: 2025-04-16
 5 | Authors@R: c(
 6 |     person("Luke", "Zappia", , "luke@lazappi.id.au", role = c("aut", "cre"),
 7 |            comment = c(ORCID = "0000-0001-7744-8565")),
 8 |     person("Aaron", "Lun", , "infinite.monkeys.with.keyboards@gmail.com", role = "aut",
 9 |            comment = c(ORCID = "0000-0002-3564-4813")),
10 |     person("Jack", "Kamm", , "jackkamm@gmail.com", role = "ctb",
11 |            comment = c(ORCID = "0000-0003-2412-756X")),
12 |     person("Robrecht", "Cannoodt", , "rcannood@gmail.com", role = "ctb",
13 |            comment = c(ORCID = "0000-0003-3641-729X", github = "rcannood")),
14 |     person("Gabriel", "Hoffman", , "gabriel.hoffman@mssm.edu", role = "ctb",
15 |            comment = c(ORCID = "0000-0002-0957-0224", github = "GabrielHoffman")),
16 |     person("Marek", "Cmero", , "cmero.ma@wehi.edu.au", role = "ctb",
17 |            comment = c(ORCID = "0000-0001-7783-5530", github = "mcmero"))
18 |   )
19 | Description: Provides methods to convert between Python AnnData objects
20 |     and SingleCellExperiment objects. These are primarily intended for use
21 |     by downstream Bioconductor packages that wrap Python methods for
22 |     single-cell data analysis. It also includes functions to read and
23 |     write H5AD files used for saving AnnData objects to disk.
24 | License: MIT + file LICENSE
25 | URL: https://github.com/theislab/zellkonverter
26 | BugReports: https://github.com/theislab/zellkonverter/issues
27 | Imports: 
28 |     basilisk,
29 |     cli,
30 |     DelayedArray,
31 |     Matrix,
32 |     methods,
33 |     reticulate,
34 |     S4Vectors,
35 |     SingleCellExperiment (>= 1.11.6),
36 |     SummarizedExperiment,
37 |     utils
38 | Suggests:
39 |     anndata,
40 |     BiocFileCache,
41 |     BiocStyle,
42 |     covr,
43 |     HDF5Array,
44 |     knitr,
45 |     pkgload,
46 |     rhdf5 (>= 2.45.1),
47 |     rmarkdown,
48 |     scRNAseq,
49 |     SpatialExperiment,
50 |     spelling,
51 |     testthat,
52 |     withr
53 | VignetteBuilder: 
54 |     knitr
55 | biocViews: SingleCell, DataImport, DataRepresentation
56 | Encoding: UTF-8
57 | Language: en-GB
58 | LazyData: true
59 | Roxygen: list(markdown = TRUE)
60 | RoxygenNote: 7.3.2
61 | StagedInstall: no
62 | Collate: 
63 |     'AnnData2SCE.R'
64 |     'SCE2AnnData.R'
65 |     'ui.R'
66 |     'basilisk.R'
67 |     'read.R'
68 |     'reticulate.R'
69 |     'utils.R'
70 |     'validation.R'
71 |     'write.R'
72 |     'zellkonverter-package.R'
73 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | YEAR: 2020
2 | COPYRIGHT HOLDER: Luke Zappia
3 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | Copyright (c) 2020 Theis Lab
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | S3method(py_to_r,numpy.ndarray)
 4 | S3method(py_to_r,pandas.core.arrays.categorical.Categorical)
 5 | S3method(py_to_r,pandas.core.arrays.masked.BaseMaskedArray)
 6 | export(.AnnDataVersions)
 7 | export(AnnData2SCE)
 8 | export(AnnDataDependencies)
 9 | export(SCE2AnnData)
10 | export(readH5AD)
11 | export(setZellkonverterVerbose)
12 | export(writeH5AD)
13 | export(zellkonverterAnnDataEnv)
14 | import(SingleCellExperiment)
15 | import(SummarizedExperiment)
16 | importClassesFrom(Matrix,CsparseMatrix)
17 | importFrom(DelayedArray,blockApply)
18 | importFrom(DelayedArray,is_sparse)
19 | importFrom(DelayedArray,nzdata)
20 | importFrom(DelayedArray,nzindex)
21 | importFrom(DelayedArray,rowAutoGrid)
22 | importFrom(DelayedArray,type)
23 | importFrom(Matrix,sparseMatrix)
24 | importFrom(Matrix,t)
25 | importFrom(S4Vectors,DataFrame)
26 | importFrom(S4Vectors,I)
27 | importFrom(S4Vectors,make_zero_col_DFrame)
28 | importFrom(S4Vectors,metadata)
29 | importFrom(S4Vectors,wmsg)
30 | importFrom(SingleCellExperiment,"colPairs<-")
31 | importFrom(SingleCellExperiment,"reducedDims<-")
32 | importFrom(SingleCellExperiment,"rowPairs<-")
33 | importFrom(SingleCellExperiment,SingleCellExperiment)
34 | importFrom(SummarizedExperiment,"assays<-")
35 | importFrom(SummarizedExperiment,"colData<-")
36 | importFrom(SummarizedExperiment,"rowData<-")
37 | importFrom(SummarizedExperiment,assays)
38 | importFrom(SummarizedExperiment,colData)
39 | importFrom(SummarizedExperiment,rowData)
40 | importFrom(basilisk,basiliskRun)
41 | importFrom(methods,as)
42 | importFrom(methods,is)
43 | importFrom(methods,selectMethod)
44 | importFrom(methods,slot)
45 | importFrom(reticulate,import)
46 | importFrom(reticulate,import_builtins)
47 | importFrom(reticulate,py_to_r)
48 | importFrom(reticulate,r_to_py)
49 | importFrom(utils,capture.output)
50 | 


--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
  1 | # zellkonverter 1.20.0
  2 | 
  3 | * Bioconductor 3.22, October 2025
  4 | 
  5 | ## zellkonverter 1.19.0 (2025-04-16)
  6 | 
  7 | * Bioconductor 3.22 devel
  8 | 
  9 | # zellkonverter 1.18.0 (2025-04-16)
 10 | 
 11 | * Bioconductor 3.21, April 2025
 12 | 
 13 | ## zellkonverter 1.17.4 (2025-04-10)
 14 | 
 15 | * Add tests for **anndata** v0.10.9
 16 | * Modify `SCE2AnnData()` to covert sparse matrices to `dgRMatrix` when they are
 17 |   transposed (mostly assays) (Fixes #132)
 18 | 
 19 | ## zellkonverter 1.17.3 (2025-04-08)
 20 | 
 21 | * Add an environment for **anndata** v0.11.4. This is now the default.
 22 | * Disable **anndata** v0.7.6 tests on aarch64 Linux as Python 3.7 is not available
 23 | 
 24 | ## zellkonverter 1.17.2 (2025-04-01)
 25 | 
 26 | * Add support for `SpatialExperiment` objects to `SCE2AnnData()` (PR #138 @mcmero, Fixes #61)
 27 | * Improve handling of missing `rownames`/`colnames` (PR #138, Fixes #140) 
 28 | 
 29 | ## zellkonverter 1.17.1 (2025-03-09)
 30 | 
 31 | * Add `testload` argument to `basiliskRun()` calls (Partial fix for #139)
 32 | * Handle missing `rowData`/`colData` with no names in `SCE2AnnData()` (Fixes #105)
 33 | * Update links in function documentation
 34 | 
 35 | ## zellkonverter 1.17.0 (2024-10-30)
 36 | 
 37 | * Bioconductor 3.21 devel
 38 | 
 39 | # zellkonverter 1.16.0 (2024-10-30)
 40 | 
 41 | * Bioconductor 3.20, October 2024
 42 | 
 43 | ## zellkonverter 1.15.4 (2024-10-18)
 44 | 
 45 | * Fix correctly assigning levels to factors in the R reader with **anndata** v0.7 files (Fixes #122)
 46 | * Add environment for **anndata** v0.10.9
 47 | * Avoid deprecation warning due to setting `dtype` when creating Python `AnnData` objects
 48 | * Standardise code styling using **{styler}**
 49 | 
 50 | ## zellkonverter 1.15.3 (2024-10-04)
 51 | 
 52 | * Correctly assign levels to factors in R reader (Fixes #122)
 53 | 
 54 | ## zellkonverter 1.15.2 (2024-10-02)
 55 | 
 56 | * Correctly set `filepath` in the R reader with reading `adata.raw` with `use_hdf5 = TRUE` (PR #124 @GabrielHoffman, Fixes #123)
 57 | 
 58 | ## zellkonverter 1.15.1 (2024-06-21)
 59 | 
 60 | * Skip **anndata** v0.7.6 tests on Apple Silicon as Python 3.7 is not available
 61 | * Fix URL for GTEx 8 tissues dataset in long tests
 62 | * Add test using the more complete example H5AD file
 63 | 
 64 | ## zellkonverter 1.15.0 (2024-05-01)
 65 | 
 66 | * Bioconductor 3.20 devel
 67 | 
 68 | # zellkonverter 1.14.0 (2024-05-01)
 69 | 
 70 | * Bioconductor 3.19, May 2023
 71 | 
 72 | ## zellkonverter 1.14.1 (2024-06-21)
 73 | 
 74 | * Skip **anndata** v0.7.6 tests on Apple Silicon as Python 3.7 is not available
 75 | * Fix URL for GTEx 8 tissues dataset in long tests
 76 | * Add test using the more complete example H5AD file
 77 | 
 78 | ## zellkonverter 1.13.4 (2024-04-26)
 79 | 
 80 | * Prepare `NEWS` for release
 81 | 
 82 | ## zellkonverter 1.13.3 (2024-03-25)
 83 | 
 84 | * Correctly handle `use_backed = TRUE` with newer **anndata** versions (Fixes #114)
 85 | * Improve warnings when converting matrices fails
 86 | * Add environment for **anndata** v0.10.6
 87 | 
 88 | ## zellkonverter 1.13.2 (2024-01-17)
 89 | 
 90 | * Minor change to writing `DelayedArray`s for compatibility with **{HDF5Array}**
 91 |   v1.31.1
 92 | 
 93 | ## zellkonverter 1.13.1 (2023-11-13)
 94 | 
 95 | * Fix the **anndata** v0.10.2 environment instantiation (Fixes #103)
 96 | * Fix a typo in the AnnData Conversion docs (Fixes #100)
 97 | 
 98 | ## zellkonverter 1.13.0 (2023-10-25)
 99 | 
100 | * Bioconductor 3.19 devel
101 | 
102 | # zellkonverter 1.12.0 (2023-10-25)
103 | 
104 | * Bioconductor 3.18, October 2023
105 | 
106 | ## zellkonverter 1.12.1 (2023-11-13)
107 | 
108 | * Fix the **anndata** v0.10.2 environment instantiation (Fixes #103)
109 | * Fix a typo in the AnnData Conversion docs (Fixes #100)
110 | 
111 | ## zellkonverter 1.11.4 (2023-10-16)
112 | 
113 | * Add environment for **anndata** v0.10.2
114 | 
115 | ## zellkonverter 1.11.3 (2023-10-2)
116 | 
117 | * Add environment for **anndata** v0.9.2
118 | 
119 | ## zellkonverter 1.11.2 (2023-08-28)
120 | 
121 | * Changes for compatibility with **{rhdf5}** v2.45.1
122 |   * Support for enum types that simplifies reading of nullable types in the
123 |     native R reader
124 | 
125 | ## zellkonverter 1.11.1 (2023-05-23)
126 | 
127 | * Pass correct dimensions when converting `raw` (Fixes #96)
128 | * Convert **anndata** backed sparse matrices in `AnnData2SCE()` (Fixes #96)
129 | 
130 | ## zellkonverter 1.11.0 (2023-04-26)
131 | 
132 | * Bioconductor 3.18 devel
133 | 
134 | # zellkonverter 1.10.0 (2023-04-26)
135 | 
136 | * Bioconductor 3.17, April 2023
137 | 
138 | ## zellkonverter 1.10.1 (2023-05-23)
139 | 
140 | * Pass correct dimensions when converting `raw` (Fixes #96)
141 | * Convert **anndata** backed sparse matrices in `AnnData2SCE()` (Fixes #96)
142 | 
143 | ## zellkonverter 1.9.3 (2023-04-06)
144 | 
145 | * Add functions for converting **pandas** arrays used by **anndata** when
146 |   arrays have missing values (Fixes #87)
147 | * Read the correct index names in the R reader (PR #93 mtmorgan)
148 | * Adjust tests to match reader changes
149 | 
150 | ## zellkonverter 1.9.2 (2023-03-28)
151 | 
152 | * Add @rcannood as a contributor (PR #90 @rcannood, fixes #88)
153 | 
154 | ## zellkonverter 1.9.1 (2023-03-14)
155 | 
156 | * Add compatibility with the **anndata** v0.8 H5AD format to the the native R
157 |   writer (PR #86 @jackkamm, fixes #78)
158 | 
159 | ## zellkonverter 1.9.0 (2022-11-02)
160 | 
161 | * Bioconductor 3.17 devel
162 | 
163 | # zellkonverter 1.8.0 (2022-11-02)
164 | 
165 | * Bioconductor 3.16, November 2022
166 | 
167 | ## zellkonverter 1.7.8 (2022-10-04)
168 | 
169 | * Improve compatibility with the R **{anndata}** package (PR #76 @rcannood,
170 |   fixes #75)
171 |   * Python objects are now explicitly converted rather than relying on automatic
172 |     conversion
173 |   * Other minor modifications for compatibility
174 | * Added support for **numpy** recarrays (dtype number 20) (PR #81, fixes #45,
175 |   #28)
176 |   * Added a new `py_to_r.numpy.ndarray()` function which extends the default
177 |     **{reticulate}** function
178 | * Improvements to warnings
179 | * Improvements and updates to tests
180 | 
181 | ## zellkonverter 1.7.7 (2022-10-04)
182 | 
183 | * Pin **python** version to 3.7.10 in **anndata** v0.7.6 environment (3.7.12
184 |   was not compatible with other dependencies)
185 | 
186 | ## zellkonverter 1.7.6 (2022-09-29)
187 | 
188 | * Pin **python** version to 3.7.12 in **anndata** v0.7.6 environment to match
189 |   **{basilisk}** changes
190 | 
191 | ## zellkonverter 1.7.5 (2022-09-13)
192 | 
193 | * Minor changes for compatibility with **{cli}** v3.4.0
194 |   * Added tests for `verbose=TRUE` 
195 | 
196 | ## zellkonverter 1.7.4 (2022-08-17)
197 | 
198 | * Minor changes for compatibility with the upcoming **{Matrix}** 1.4-2 release
199 | 
200 | ## zellkonverter 1.7.3 (2022-06-23)
201 | 
202 | * Move verbose from `zellkonverterAnnDataEnv()` (Fixes #66)
203 | 
204 | ## zellkonverter 1.7.2 (2022-06-09)
205 | 
206 | * Instantiate environments for `basilisk::configureBasiliskEnv()` (Fixes #66)
207 | * Allow missing obs/var names when `use_hdf5 = TRUE` (Fixes #65)
208 | 
209 | ## zellkonverter 1.7.1 (2022-05-17)
210 | 
211 | * Fix bug in long tests
212 | 
213 | ## zellkonverter 1.7.0 (2022-04-27)
214 | 
215 | * Bioconductor 3.16 devel
216 | 
217 | # zellkonverter 1.6.0 (2022-04-27)
218 | 
219 | * Bioconductor 3.15, April 2022
220 | 
221 | ## zellkonverter 1.6.5 (2022-09-13)
222 | 
223 | * Minor changes for compatibility with **{cli}** v3.4.0
224 |   * Added tests for `verbose=TRUE` 
225 | 
226 | ## zellkonverter 1.6.4 (2022-08-17)
227 | 
228 | * Minor changes for compatibility with the upcoming **{Matrix}** 1.4-2 release
229 | 
230 | ## zellkonverter 1.6.3 (2022-06-23)
231 | 
232 | * Move verbose from `zellkonverterAnnDataEnv()` (Fixes #66)
233 | 
234 | ## zellkonverter 1.6.2 (2022-06-09)
235 | 
236 | * Instantiate environments for `basilisk::configureBasiliskEnv()` (Fixes #66)
237 | * Allow missing obs/var names when `use_hdf5 = TRUE` (Fixes #65)
238 | 
239 | ## zellkonverter 1.6.1 (2022-05-17)
240 | 
241 | * Fix bug in long tests
242 | 
243 | ## zellkonverter 1.5.4 (2022-04-25)
244 | 
245 | * Fix progress messages in `.convert_anndata_df()`
246 | * Allow `data.frames` in `varm` in `SCE2AnnData()`
247 | * Standardise `uns` names to match R conventions in `AnnData2SCE()`
248 | * Adjust long tests
249 | 
250 | ## zellkonverter 1.5.3 (2022-04-19)
251 | 
252 | * Reduce **scipy** version to 1.7.3
253 |   * **scipy** >= 1.8.0 is incompatible with **{reticulate}** <= 1.24 (see
254 |     https://github.com/rstudio/reticulate/pull/1173)
255 | * Add GTEX 8 tissues dataset to long tests (see #58)
256 | 
257 | ## zellkonverter 1.5.2 (2022-04-17)
258 | 
259 | * Update the default Python environment to use **anndata** v0.8.0
260 |     * **anndata** 0.8.0
261 |     * **h5py** 3.6.0
262 |     * **hdf5** 1.12.1
263 |     * **natsort** 8.1.0
264 |     * **numpy** 1.22.3
265 |     * **packaging** 21.3
266 |     * **pandas** 1.4.2
267 |     * **python** 3.8.13
268 |     * **scipy** 1.8.0
269 |     * **sqlite** 3.38.2
270 | * Add options to choose Python environments with different versions of
271 |   **anndata**
272 |   * To facilitate this `zellkonverterAnnDataEnv()` and `AnnDataDependencies()`
273 |     are new functions rather than variables
274 |   * Added a new `.AnnDataVersions` variable which stores the available
275 |     **anndata** versions
276 |   * Updates to the vignette and function documentation explaining this option
277 | 
278 | ## zellkonverter 1.5.1 (2022-03-21)
279 | 
280 | * Modify how Pandas DataFrames are converted to R
281 |   * Columns should now use R approved names with a warning when changes are
282 |     made
283 | 
284 | ## zellkonverter 1.5.0 (2021-10-27)
285 | 
286 | * Bioconductor 3.15 devel
287 | 
288 | # zellkonverter 1.4.0 (2021-10-27)
289 | 
290 | * Bioconductor 3.14, October 2021
291 | 
292 | ## zellkonverter 1.3.3 (2021-10-20)
293 | 
294 | * Add progress messages to various functions
295 |   * Can be controlled by function arguments or a global variable
296 | * Split `konverter.R` into two files (`AnnData2SCE.R` and `SCE2AnnData.R`)
297 | * Add arguments to control how slots are converted in `AnnData2SCE()` and
298 |   `SCE2AnnData()` (Fixes #47)
299 |   * Each slot can now be fully converted, skipped entirely or only selected
300 |     items converted.
301 | * Add support for converting the `raw` slot to an `altExp` in `AnnData2SCE()` 
302 |   (Fixes #53, fixes #57)
303 | 
304 | ## zellkonverter 1.3.2 (2021-09-09)
305 | 
306 | * Add recursive conversion of lists in `AnnData2SCE()`
307 | * Correctly handle `DataFrame` objects stored in `adata.obsm`
308 | * Remove **pandas** indexes from converted `DataFrame` objects
309 | * Add functions for validating `SingleCellExperiment` objects (for testing)
310 | * Add long tests for various public datasets
311 | 
312 | ## zellkonverter 1.3.1 (2021-06-22)
313 | 
314 | * Fix bug in converting `dgRMatrix` sparse matrices (Fixes #55)
315 | 
316 | ## zellkonverter 1.3.0 (2021-05-20)
317 | 
318 | * Bioconductor 3.14 devel
319 | 
320 | # zellkonverter 1.2.0 (2021-05-20)
321 | 
322 | * Bioconductor 3.13, May 2021
323 | 
324 | ## zellkonverter 1.2.1 (2021-06-22)
325 | 
326 | * Fix bug in converting `dgRMatrix` sparse matrices (Fixes #55)
327 | 
328 | ## zellkonverter 1.1.11 (2021-05-19)
329 | 
330 | * Add experimental native R reader to `readH5AD()`
331 | 
332 | ## zellkonverter 1.1.10 (2021-05-18)
333 | 
334 | * Update NEWS for release
335 | 
336 | ## zellkonverter 1.1.9 (2021-05-12)
337 | 
338 | * `AnnData2SCE()` no longer returns `dgRMatrix` sparse matrices (Fixes #34)
339 | 
340 | ## zellkonverter 1.1.8 (2021-05-03)
341 | 
342 | * Add conversion checks to all slots in `AnnData2SCE()` (See #45)
343 | * Enable return conversion of `varm` in `SCE2AnnData()` (Fixes #43)
344 | * Store `X_name` in `AnnData2SCE()` for use by `SCE2AnnData()` and add an
345 |   `X_name` argument to `AnnData2SCE()` and `readH5AD()` (Fixes #7)
346 | 
347 | ## zellkonverter 1.1.7 (2021-04-30)
348 | 
349 | * Add `compression` argument to `writeH5AD()` (Fixes #49)
350 | * Update **anndata** Python dependencies, now using **anndata** v0.7.6
351 | 
352 | ## zellkonverter 1.1.6 (2021-04-27)
353 | 
354 | * Adapt to changes in `HDF5Array::HDF5Array()`
355 | 
356 | ## zellkonverter 1.1.5 (2021-03-05)
357 | 
358 | * Better support for **anndata** `SparseDataset` arrays (PR #41, Fixes #37,
359 |   Fixes #42)
360 | * More consistent conversion of `metadata` to `uns` in `SCE2AnnData()`
361 |   (Fixes #40)
362 | * Add handling of list columns in `colData` and `rowData` in `SCE2AnnData()`
363 |   (Fixes #26)
364 | * Export `zellkonverterAnnDataEnv` (Fixes #38)
365 | 
366 | ## zellkonverter 1.1.4 (2021-02-18)
367 | 
368 | * Handle writing **DelayedArray** assays on the R side in `writeH5AD()`
369 |   (PR #35, Fixes #32)
370 | 
371 | ## zellkonverter 1.1.3 (2021-01-22)
372 | 
373 | * Adjust `SCE2AnnData()` example (Fixes #31)
374 | 
375 | ## zellkonverter 1.1.2 (2020-12-19)
376 | 
377 | * Improved support for HDF5 backed conversion (PR #29, fixes #13)
378 | 
379 | ## zellkonverter 1.1.1 (2020-12-03)
380 | 
381 | * Add `example_anndata.h5ad` file to `inst/extdata/` and creation script to `inst/scripts/`
382 | * Improve conversion checks when converting `.uns` to `metadata`
383 | * Avoid converting `obsp` and `varp` to dense matrices
384 | 
385 | ## zellkonverter 1.1.0 (2020-10-28)
386 | 
387 | * Bioconductor 3.13 devel
388 | 
389 | # zellkonverter 1.0.0 (2020-10-28)
390 | 
391 | * Bioconductor 3.12, October 2020
392 | 
393 | ## zellkonverter 1.0.3 (2021-03-08)
394 | 
395 | * Avoid converting `obsp` and `varp` to dense matrices
396 | 
397 | ## zellkonverter 1.0.2 (2021-01-28)
398 | 
399 | * Merge remaining commits for HDF5 conversion (fixes #33)
400 | 
401 | ## zellkonverter 1.0.1 (2021-01-26)
402 | 
403 | * Improved support for HDF5 backed conversion (PR #29, fixes #13, fixes #33)
404 | 
405 | ## zellkonverter 0.99.7 (2020-10-16)
406 | 
407 | * Update Python dependencies
408 |   * **numpy** 1.18.5 -> 1.19.1
409 |   * **pandas** 1.0.4 -> 1.1.2
410 |   * **scipy** 1.4.1 -> 1.5.2
411 |   * **sqlite** 3.30.1 -> 3.33.0
412 | 
413 | ## zellkonverter 0.99.6 (2020-10-12)
414 | 
415 | * Document character to factor coercion in `writeH5ad()` (Fixes #6)
416 | * Add `X_name` argument to `writeH5AD()` (Fixes #23)
417 | 
418 | ## zellkonverter 0.99.5 (2020-09-28)
419 | 
420 | * Tidy NEWS files for Bioconductor release
421 | 
422 | ## zellkonverter 0.99.4 (2020-08-28)
423 | 
424 | * Bump anndata version to 0.7.4
425 | 
426 | ## zellkonverter 0.99.3 (2020-08-21)
427 | 
428 | * Document the `krumsiek11.h5ad` file
429 | * Remove the `internal` keyword from the `zellkonverter-package` documentation
430 | 
431 | ## zellkonverter 0.99.2 (2020-08-21)
432 | 
433 | * Update `.gitignore`
434 | 
435 | ## zellkonverter 0.99.1 (2020-07-15)
436 | 
437 | * Fix SCE to AnnData map figure in PDF manual
438 | * Use `expect_equal()` instead of `expect_identical()` in `writeH5AD()` sparse
439 |   matrices test
440 | * Edit package title and description
441 | 
442 | ## zellkonverter 0.99.0 (2020-07-10)
443 | 
444 | * Initial Bioconductor submission
445 | 
446 | # zellkonverter 0.0.0 (early development version)
447 | 
448 | ## zellkonverter 0.0.0.9017 (2020-07-10)
449 | 
450 | * Add biocViews to DESCRIPTION
451 | * Edit package description
452 | * Tidy code
453 | * Replace 1:... with `seq_len()`
454 | 
455 | ## zellkonverter 0.0.0.9016 (2020-07-10)
456 | 
457 | * Add check for **scRNAseq** in examples (Fixes #18)
458 | 
459 | ## zellkonverter 0.0.0.9015 (2020-07-02)
460 | 
461 | * Skip `AnnData` matrices without a transposable R counterpart
462 | * Only replace skipped matrices when `use_hdf5 = TRUE` in `readH5AD()`
463 |   (Fixes #12)
464 | * Additional tests for sparse matrices
465 | 
466 | ## zellkonverter 0.0.0.9014 (2020-06-30)
467 | 
468 | * Allow assay skipping when converting from `SingleCellExperiment` to `AnnData`
469 | * Allow skipping of assays that aren't **numpy** friendly in `writeH5AD()`
470 | * Wait for **basilisk** process shutdown to release `.h5ad` file
471 | * Updates to documentation and tests
472 | 
473 | ## zellkonverter 0.0.0.9013 (2020-06-25)
474 | 
475 | * Improve conversion between `SingleCellExperiment` and `AnnData` (See #8)
476 |   * Convert between `metadata` and `uns` (where objects are compatible)
477 |   * Convert between `rowPairs` and `varp`
478 |   * Convert between `colPairs` and `obsp`
479 |   * Convert from `varm` to `rowData` (but not in reverse)
480 | * Add mapping table to docs
481 | 
482 | ## zellkonverter 0.0.0.9012 (2020-06-19)
483 | 
484 | * Tidy documentation and code
485 | * Tidy vignette
486 | 
487 | ## zellkonverter 0.0.0.9011 (2020-06-18)
488 | 
489 | * Support for HDF5Array outputs in `readH5AD()` (Fixes #4)
490 | 
491 | ## zellkonverter 0.0.0.9010 (2020-06-17)
492 | 
493 | * Avoid checking column names for `colData` and `rowData` in `SCE2AnnData()`
494 | * Make sure that all matrices passes to **{reticulate}** are **numpy** friendly
495 | * Add more tests
496 | * Update vignette front matter
497 | 
498 | ## zellkonverter 0.0.0.9009 (2020-06-15)
499 | 
500 | * Add vignette
501 | 
502 | ## zellkonverter 0.0.0.9008 (2020-06-12)
503 | 
504 | * Add examples and improve documentation
505 | * Export `.AnnDataDependencies` for external use
506 | 
507 | ## zellkonverter 0.0.0.9007 (2020-06-11)
508 | 
509 | * Add `SCE2AnnData()` function
510 | * Add `writeH5AD()` function
511 | 
512 | ## zellkonverter 0.0.0.9006 (2020-06-11)
513 | 
514 | * Use internal function in `readH5AD()`
515 | 
516 | ## zellkonverter 0.0.0.9005 (2020-06-09)
517 | 
518 | * Rename `adata2SCE()` to `AnnData2SCE()`
519 | * Remove **{basilisk}** context from `AnnData2SCE()` (See #1)
520 |   * Now uses the calling context
521 | 
522 | ## zellkonverter 0.0.0.9004 (2020-06-09)
523 | 
524 | * Pin more **AnnData** dependencies (See #1)
525 | 
526 | ## zellkonverter 0.0.0.9003 (2020-06-08)
527 | 
528 | * Add test `.h5ad` file
529 | * Add test for `readH5AD()`
530 | * Add package man page
531 | 
532 | ## zellkonverter 0.0.0.9002 (2020-06-08)
533 | 
534 | * Add `adata2SCE()` function
535 | * Add `readH5AD()` function
536 | 
537 | ## zellkonverter 0.0.0.9001 (2020-06-08)
538 | 
539 | * Add **{basilisk}** infrastructure
540 | 
541 | ## zellkonverter 0.0.0.9000 (2020-06-08)
542 | 
543 | * Set up package
544 | 


--------------------------------------------------------------------------------
/R/SCE2AnnData.R:
--------------------------------------------------------------------------------
  1 | #' @rdname AnnData-Conversion
  2 | #'
  3 | #' @param sce A \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment}
  4 | #'   object.
  5 | #' @param X_name For `SCE2AnnData()` name of the assay to use as the primary
  6 | #' matrix (`X`) of the AnnData object. If `NULL`, the first assay of `sce` will
  7 | #' be used by default. For `AnnData2SCE()` name used when saving `X` as an
  8 | #' assay. If `NULL` looks for an `X_name` value in `uns`, otherwise uses `"X"`.
  9 | #' @param assays,colData,rowData,reducedDims,metadata,colPairs,rowPairs
 10 | #' Arguments specifying how these slots are converted. If `TRUE` everything in
 11 | #' that slot is converted, if `FALSE` nothing is converted and if a character
 12 | #' vector only those items or columns are converted.
 13 | #' @param verbose Logical scalar indicating whether to print progress messages.
 14 | #' If `NULL` uses `getOption("zellkonverter.verbose")`.
 15 | #'
 16 | #' @export
 17 | #' @importFrom utils capture.output
 18 | #' @importFrom S4Vectors metadata make_zero_col_DFrame
 19 | #' @importFrom reticulate import r_to_py py_to_r
 20 | SCE2AnnData <- function(sce, X_name = NULL, assays = TRUE, colData = TRUE,
 21 |                         rowData = TRUE, varm = TRUE, reducedDims = TRUE,
 22 |                         metadata = TRUE, colPairs = TRUE, rowPairs = TRUE,
 23 |                         skip_assays = FALSE, verbose = NULL) {
 24 |     anndata <- import("anndata")
 25 | 
 26 |     # Create a list to store parts of the AnnData
 27 |     adata_list <- list()
 28 | 
 29 |     .ui_process(
 30 |         "Converting {.field AnnData} to {.field SingleCellExperiment}"
 31 |     )
 32 | 
 33 |     if (is.null(X_name)) {
 34 |         .ui_step(
 35 |             "Selecting {.field X matrix}",
 36 |             msg_done = "Selected {.field X matrix}"
 37 |         )
 38 |         if (length(assays(sce)) == 0) {
 39 |             stop("'sce' does not contain any assays")
 40 |         }
 41 |         X_name <- assayNames(sce)[1]
 42 |         cli::cli_alert_info(
 43 |             "Using the {.field '{X_name}'} assay as the {.field X matrix}"
 44 |         )
 45 |         cli::cli_progress_done()
 46 |     }
 47 | 
 48 |     .ui_step(
 49 |         "Converting {.field assays${X_name}} to {.field X matrix}",
 50 |         msg_done = "{.field assays${X_name}} converted to {.field X matrix}"
 51 |     )
 52 |     if (!skip_assays) {
 53 |         X <- assay(sce, X_name)
 54 |         X <- .makeNumpyFriendly(X)
 55 |     } else {
 56 |         cli::cli_alert_warning(paste(
 57 |             "{.field skip_assays} is {.field TRUE}",
 58 |             "so {.field X/layers} will be empty"
 59 |         ))
 60 |         X <- fake_mat <- .make_fake_mat(rev(dim(sce)))
 61 |     }
 62 |     # NOTE: Previously dtype was set here but was removed due to deprecation
 63 |     adata_list$X <- reticulate::r_to_py(X)
 64 |     cli::cli_progress_done()
 65 | 
 66 |     assay_names <- assayNames(sce)
 67 |     assay_names <- assay_names[!assay_names == X_name]
 68 |     if (isFALSE(assays)) {
 69 |         .ui_info("Skipping conversion of {.field assays}")
 70 |     } else if (length(assay_names) == 0) {
 71 |         .ui_info("No {.field additional assays} present, assays were skipped")
 72 |     } else {
 73 |         .ui_step(
 74 |             "Converting {.field additional assays} to {.field layers}",
 75 |             msg_done = "{.field additional assays} converted to {.field layers}"
 76 |         )
 77 |         if (is.character(assays)) {
 78 |             if (!all(assays %in% assay_names)) {
 79 |                 missing <- assays[!c(assays %in% assay_names)]
 80 |                 .ui_warn(
 81 |                     "These selected assays are not in the object: {.field {missing}}"
 82 |                 )
 83 |             }
 84 |             assay_names <- assay_names[assay_names %in% assays]
 85 |         }
 86 |         if (!skip_assays) {
 87 |             assays_list <- assays(sce, withDimnames = FALSE)
 88 |             assays_list <- lapply(assays_list[assay_names], .makeNumpyFriendly)
 89 |         } else {
 90 |             assays_list <- rep(list(fake_mat), length(assay_names))
 91 |             names(assays_list) <- assay_names
 92 |         }
 93 |         adata_list$layers <- assays_list
 94 |         cli::cli_progress_done()
 95 |     }
 96 | 
 97 |     if (isFALSE(colData)) {
 98 |         .ui_info("Skipping conversion of {.field colData}")
 99 |     } else {
100 |         sce <- .store_non_atomic(sce, "colData")
101 |         adata_list$obs <- .convert_sce_df(colData(sce), "colData", "obs", select = colData)
102 |     }
103 | 
104 |     if (is.null(adata_list$obs)) {
105 |         # Add a dummy data.frame if obs is currently empty
106 |         adata_list$obs <- as.data.frame(make_zero_col_DFrame(ncol(sce)))
107 |     }
108 | 
109 |     # Convert to python now because python DFs can have duplicates in
110 |     # their index
111 |     adata_list$obs <- r_to_py(adata_list$obs)
112 |     if (!is.null(colnames(sce))) {
113 |         adata_list$obs$index <- colnames(sce)
114 |     } else if (ncol(adata_list$obs) == 0) {
115 |         # If there are no colnames and obs has no columns delete it
116 |         adata_list$obs <- NULL
117 |     } else {
118 |         # Otherwise convert the index to string
119 |         adata_list$obs$index <- adata_list$obs$index$astype("str")
120 |     }
121 | 
122 |     if (!is.null(int_metadata(sce)$has_varm)) {
123 |         varm_list <- as.list(rowData(sce)[["varm"]])
124 |         rowData(sce)[["varm"]] <- NULL
125 | 
126 |         if (isFALSE(varm)) {
127 |             .ui_info("Skipping conversion of {.field rowData$varm}")
128 |         } else {
129 |             .ui_step(
130 |                 "Converting {.field rowData$varm} to {.field varm}",
131 |                 msg_done = "{.field rowData$varm} converted to {.field varm}"
132 |             )
133 | 
134 |             if (is.character(varm)) {
135 |                 varm <- .check_select(varm, "rowData$varm", names(varm_list))
136 |                 varm_list <- varm_list[varm]
137 |             }
138 | 
139 |             adata_list$varm <- varm_list
140 |             cli::cli_progress_done()
141 |         }
142 |     } else {
143 |         .ui_info("{.field rowData$varm} is empty and was skipped")
144 |     }
145 | 
146 |     if (isFALSE(rowData)) {
147 |         .ui_info("Skipping conversion of {.field rowData}")
148 |     } else {
149 |         sce <- .store_non_atomic(sce, "rowData")
150 |         adata_list$var <- .convert_sce_df(rowData(sce), "rowData", "var",
151 |             select = rowData
152 |         )
153 |     }
154 | 
155 |     if (is.null(adata_list$var)) {
156 |         # Add a dummy data.frame if var is currently empty
157 |         adata_list$var <- as.data.frame(make_zero_col_DFrame(nrow(sce)))
158 |     }
159 | 
160 |     # Convert to python now because python DFs can have duplicates in
161 |     # their index
162 |     adata_list$var <- r_to_py(adata_list$var)
163 |     if (!is.null(rownames(sce))) {
164 |         adata_list$var$index <- rownames(sce)
165 |     } else if (ncol(adata_list$var) == 0) {
166 |         # If there are no rownames and var has no columns delete it
167 |         adata_list$var <- NULL
168 |     } else {
169 |         # Otherwise convert the index to string
170 |         adata_list$var$index <- adata_list$var$index$astype("str")
171 |     }
172 | 
173 |     if (inherits(sce, "SpatialExperiment")) {
174 |         .ui_info("Adding {.field spatialCoords} to {.field reducedDim}")
175 |         coords <- SpatialExperiment::spatialCoords(sce)
176 |         if (ncol(coords) > 1) {
177 |             SingleCellExperiment::reducedDim(sce, "spatial") <- coords
178 |         }
179 |     }
180 | 
181 |     if (isFALSE(reducedDims)) {
182 |         .ui_info("Skipping conversion of {.field reducedDims}")
183 |     } else if (length(reducedDims(sce)) == 0) {
184 |         .ui_info("{.field reducedDims} is empty and was skipped")
185 |     } else {
186 |         .ui_step(
187 |             "Converting {.field reducedDims} to {.field obsm}",
188 |             msg_done = "{.field reducedDims} converted to {.field obsm}"
189 |         )
190 |         red_dims <- as.list(reducedDims(sce))
191 |         if (is.character(reducedDims)) {
192 |             reducedDims <- .check_select(
193 |                 reducedDims, "reducedDims", names(red_dims)
194 |             )
195 |             red_dims <- red_dims[reducedDims]
196 |         }
197 |         red_dims <- lapply(red_dims, .makeNumpyFriendly, transpose = FALSE)
198 |         red_dims <- lapply(red_dims, function(rd) {
199 |             if (!is.null(colnames(rd))) {
200 |                 rd <- r_to_py(as.data.frame(rd))
201 |                 if (!is.null(adata_list$obs)) {
202 |                     rd <- rd$set_axis(adata_list$obs$index)
203 |                 }
204 |             }
205 | 
206 |             rd
207 |         })
208 |         adata_list$obsm <- red_dims
209 |         cli::cli_progress_done()
210 |     }
211 | 
212 |     uns_list <- list()
213 |     uns_list[["X_name"]] <- X_name
214 |     if (isFALSE(metadata)) {
215 |         .ui_info("Skipping conversion of {.field metadata}")
216 |     } else if (length(metadata(sce)) == 0) {
217 |         .ui_info("{.field metadata} is empty and was skipped")
218 |     } else {
219 |         .ui_step(
220 |             "Converting {.field metadata} to {.field uns}",
221 |             msg_done = "{.field metadata} converted to {.field uns}"
222 |         )
223 |         meta_list <- .addListNames(metadata(sce))
224 |         if (is.character(metadata)) {
225 |             metadata <- .check_select(metadata, "metadata", names(meta_list))
226 |             meta_list <- meta_list[metadata]
227 |         }
228 |         for (item_name in names(meta_list)) {
229 |             item <- meta_list[[item_name]]
230 |             tryCatch(
231 |                 {
232 |                     # Try to convert the item using reticulate, skip if it fails
233 |                     # Capture the object output printed by reticulate
234 |                     capture.output(r_to_py(item))
235 |                     uns_list[[item_name]] <- item
236 |                 },
237 |                 error = function(err) {
238 |                     .ui_warn(paste(
239 |                         "The {.field {item_name}} item in {.field metadata}",
240 |                         "cannot be converted to a Python type and has been",
241 |                         "skipped"
242 |                     ))
243 |                 }
244 |             )
245 |         }
246 |         cli::cli_progress_done()
247 |     }
248 |     adata_list$uns <- r_to_py(uns_list)
249 | 
250 |     if (length(rowPairs(sce)) > 0) {
251 |         .ui_step(
252 |             "Converting {.field rowPairs} to {.field varp}",
253 |             msg_done = "{.field rowPairs} converted to {.field varp}"
254 |         )
255 |         adata_list$varp <- as.list(rowPairs(sce, asSparse = TRUE))
256 |         cli::cli_progress_done()
257 |     } else {
258 |         .ui_info("{.field rowPairs} is empty and was skipped")
259 |     }
260 | 
261 |     adata_list$obsp <- .convert_sce_pairs(sce, "colPairs", "obsp", colPairs)
262 |     adata_list$varp <- .convert_sce_pairs(sce, "rowPairs", "varp", rowPairs)
263 | 
264 |     do.call(anndata$AnnData, adata_list)
265 | }
266 | 
267 | #' @importFrom methods as is
268 | #' @importClassesFrom Matrix CsparseMatrix
269 | #' @importFrom DelayedArray is_sparse
270 | #' @importFrom Matrix t
271 | # Original code from Charlotte Soneson in kevinrue/velociraptor
272 | .makeNumpyFriendly <- function(x, transpose = TRUE) {
273 |     if (transpose) {
274 |         x <- t(x)
275 |     }
276 | 
277 |     if (is_sparse(x)) {
278 |         x <- as(x, "CsparseMatrix")
279 |         if (transpose) {
280 |             x <- as(x, "RsparseMatrix")
281 |         }
282 |         x
283 |     } else {
284 |         as.matrix(x)
285 |     }
286 | }
287 | 
288 | .addListNames <- function(x) {
289 |     if (length(x) == 0) {
290 |         return(x)
291 |     }
292 | 
293 |     if (is.null(names(x))) {
294 |         names(x) <- paste0("item", seq_along(x))
295 |         return(x)
296 |     }
297 | 
298 |     list_names <- names(x)
299 |     is_empty <- list_names == ""
300 |     list_names[is_empty] <- paste0("item", seq_along(x)[is_empty])
301 |     list_names <- make.names(list_names, unique = TRUE)
302 | 
303 |     names(x) <- list_names
304 | 
305 |     return(x)
306 | }
307 | 
308 | .store_non_atomic <- function(sce, slot = c("rowData", "colData")) {
309 |     slot <- match.arg(slot)
310 | 
311 |     df <- switch(slot,
312 |         rowData = rowData(sce),
313 |         colData = colData(sce)
314 |     )
315 | 
316 |     is_atomic <- vapply(df, is.atomic, NA)
317 | 
318 |     if (all(is_atomic)) {
319 |         return(sce)
320 |     }
321 | 
322 |     non_atomic_cols <- colnames(df)[!is_atomic]
323 |     .ui_warn(paste(
324 |         "The following {.field {slot}} columns are not atomic and will be",
325 |         "stored in {.field metadata(sce)$.colData} before conversion:",
326 |         "{.val {non_atomic_cols}}"
327 |     ))
328 | 
329 |     meta_slot <- paste0(".", slot)
330 |     if (meta_slot %in% names(metadata(sce))) {
331 |         meta_list <- metadata(sce)[[meta_slot]]
332 |     } else {
333 |         meta_list <- list()
334 |     }
335 | 
336 |     for (col in non_atomic_cols) {
337 |         store_name <- make.names(c(col, names(meta_list)), unique = TRUE)[1]
338 |         meta_list[[store_name]] <- df[[col]]
339 |     }
340 | 
341 |     df[non_atomic_cols] <- NULL
342 |     metadata(sce)[[meta_slot]] <- meta_list
343 | 
344 |     if (slot == "rowData") {
345 |         rowData(sce) <- df
346 |     } else {
347 |         colData(sce) <- df
348 |     }
349 | 
350 |     return(sce)
351 | }
352 | 
353 | .check_select <- function(select, slot_name, options) {
354 |     verbose <- parent.frame()$verbose
355 | 
356 |     if (!all(select %in% options)) {
357 |         missing <- select[!c(select %in% options)]
358 |         .ui_warn(paste(
359 |             "These selected {.field {slot_name}} items are not in the",
360 |             "object: {.field {missing}}"
361 |         ))
362 |     }
363 | 
364 |     select <- select[select %in% options]
365 | 
366 |     return(select)
367 | }
368 | 
369 | .convert_sce_df <- function(sce_df, slot_name, to_name, select = TRUE) {
370 |     if (ncol(sce_df) == 0) {
371 |         .ui_info("{.field {slot_name}} is empty and was skipped")
372 |         return(NULL)
373 |     }
374 | 
375 |     .ui_step(
376 |         "Converting {.field {slot_name}} to {.field {to_name}}",
377 |         msg_done = "{.field {slot_name}} converted to {.field {to_name}}"
378 |     )
379 |     if (is.character(select)) {
380 |         select <- .check_select(select, slot_name, colnames(sce_df))
381 | 
382 |         if (length(select) == 0) {
383 |             return(NULL)
384 |         }
385 | 
386 |         df <- sce_df[, select, drop = FALSE]
387 |     } else {
388 |         df <- sce_df
389 |     }
390 | 
391 |     df <- do.call(
392 |         data.frame,
393 |         c(
394 |             as.list(df),
395 |             check.names      = FALSE,
396 |             stringsAsFactors = FALSE
397 |         )
398 |     )
399 |     cli::cli_progress_done()
400 | 
401 |     return(df)
402 | }
403 | 
404 | .convert_sce_pairs <- function(sce, slot_name = c("rowPairs", "colPairs"),
405 |                                to_name, select) {
406 |     slot_name <- match.arg(slot_name)
407 | 
408 | 
409 |     if (isFALSE(select)) {
410 |         .ui_info("Skipping conversion of {.field {slot_name}}")
411 |         return(NULL)
412 |     }
413 | 
414 |     pairs <- switch(slot_name,
415 |         rowPairs = as.list(rowPairs(sce, asSparse = TRUE)),
416 |         colPairs = as.list(colPairs(sce, asSparse = TRUE))
417 |     )
418 | 
419 |     if (length(pairs) == 0) {
420 |         .ui_info("{.field {slot_name}} is empty and was skipped")
421 |         return(NULL)
422 |     }
423 | 
424 |     .ui_step(
425 |         "Converting {.field {slot_name}} to {.field {to_name}}",
426 |         msg_done = "{.field {slot_name}} converted to {.field {to_name}}"
427 |     )
428 | 
429 |     if (is.character(select)) {
430 |         select <- .check_select(select, slot_name, names(pairs))
431 |         pairs <- pairs[select]
432 |     }
433 |     cli::cli_progress_done()
434 | 
435 |     return(pairs)
436 | }
437 | 


--------------------------------------------------------------------------------
/R/basilisk.R:
--------------------------------------------------------------------------------
  1 | #' AnnData environment
  2 | #'
  3 | #' The Python environment used by **zellkonverter** for interfacing with the
  4 | #' **anndata** Python library (and H5AD files) is described by the dependencies
  5 | #' in returned by `AnnDataDependencies()`. The `zellkonverterAnnDataEnv()`
  6 | #' functions returns the [basilisk::BasiliskEnvironment()] containing these
  7 | #' dependencies used by **zellkonverter**. Allowed versions of **anndata** are
  8 | #' available in `.AnnDataVersions`.
  9 | #'
 10 | #' @details
 11 | #'
 12 | #' ## Using Python environments
 13 | #'
 14 | #' When a **zellkonverter** is first run a conda environment containing all of
 15 | #' the necessary dependencies for that version with be instantiated. This will
 16 | #' not be performed on any subsequent run or if any other **zellkonverter**
 17 | #' function has been run prior with the same environment version.
 18 | #'
 19 | #' By default the **zellkonverter** conda environment will become the shared R
 20 | #' Python environment if one does not already exist. When one does exist (for
 21 | #' example when a **zellkonverter** function has already been run using a
 22 | #' a different environment version) then a separate environment will be used.
 23 | #' See [basilisk::setBasiliskShared()] for more information on this behaviour.
 24 | #' Note the when the environment is not shared progress messages are lost.
 25 | #'
 26 | #' ## Development
 27 | #'
 28 | #' The `AnnDataDependencies()` function is exposed for use by other package
 29 | #' developers who want an easy way to define the dependencies required for
 30 | #' creating a Python environment to work with AnnData objects, most typically
 31 | #' within a **basilisk** context. For example, we can simply combine this
 32 | #' vector with additional dependencies to create a **basilisk** environment with
 33 | #' Python package versions that are consistent with those in **zellkonverter**.
 34 | #'
 35 | #' If you want to run code in the exact environment used by **zellkonverter**
 36 | #' this can be done using `zellkonverterAnnDataEnv()` in combination with
 37 | #' [basilisk::basiliskStart()] and/or [basilisk::basiliskRun()]. Please refer to
 38 | #' the **basilisk** documentation for more information on using these
 39 | #' environments.
 40 | #'
 41 | #' @author Luke Zappia
 42 | #' @author Aaron Lun
 43 | #'
 44 | #' @examples
 45 | #' .AnnDataVersions
 46 | #'
 47 | #' AnnDataDependencies()
 48 | #' AnnDataDependencies(version = "0.7.6")
 49 | #'
 50 | #' cl <- basilisk::basiliskStart(zellkonverterAnnDataEnv())
 51 | #' anndata <- reticulate::import("anndata")
 52 | #' basilisk::basiliskStop(cl)
 53 | #' @name AnnData-Environment
 54 | #' @rdname AnnData-Environment
 55 | NULL
 56 | 
 57 | #' @rdname AnnData-Environment
 58 | #'
 59 | #' @format
 60 | #' For `.AnnDataVersions` a character vector containing allowed **anndata**
 61 | #' version strings.
 62 | #'
 63 | #' @export
 64 | .AnnDataVersions <- c("0.11.4", "0.10.9", "0.10.6", "0.10.2", "0.9.2", "0.8.0", "0.7.6")
 65 | 
 66 | #' @rdname AnnData-Environment
 67 | #'
 68 | #' @param version A string giving the version of the **anndata** Python library
 69 | #' to use. Allowed values are available in `.AnnDataVersions`. By default the
 70 | #' latest version is used.
 71 | #'
 72 | #' @returns
 73 | #' For `AnnDataDependencies` a character vector containing the pinned versions
 74 | #' of all Python packages to be used by `zellkonverterAnnDataEnv()`.
 75 | #'
 76 | #' @export
 77 | AnnDataDependencies <- function(version = .AnnDataVersions) {
 78 |     version <- match.arg(version)
 79 | 
 80 |     switch(version,
 81 |         "0.7.6" = c(
 82 |             "anndata==0.7.6",
 83 |             "h5py==3.2.1",
 84 |             "hdf5==1.10.6",
 85 |             "natsort==7.1.1",
 86 |             "numpy==1.20.2",
 87 |             "packaging==20.9",
 88 |             "pandas==1.2.4",
 89 |             "python==3.7.10",
 90 |             "scipy==1.6.3",
 91 |             "sqlite==3.35.5"
 92 |         ),
 93 |         "0.8.0" = c(
 94 |             "anndata==0.8.0",
 95 |             "h5py==3.6.0",
 96 |             "hdf5==1.12.1",
 97 |             "natsort==8.1.0",
 98 |             "numpy==1.22.3",
 99 |             "packaging==21.3",
100 |             "pandas==1.4.2",
101 |             "python==3.8.13",
102 |             "scipy==1.7.3",
103 |             "sqlite==3.38.2"
104 |         ),
105 |         "0.9.2" = c(
106 |             "anndata==0.9.2",
107 |             "h5py==3.9.0",
108 |             "hdf5==1.14.2",
109 |             "natsort==8.4.0",
110 |             "numpy==1.26.0",
111 |             "packaging==23.2",
112 |             "pandas==2.1.1",
113 |             "python==3.11.5",
114 |             "scipy==1.11.3"
115 |         ),
116 |         "0.10.2" = c(
117 |             "anndata==0.10.2",
118 |             "h5py==3.10.0",
119 |             "hdf5==1.14.2",
120 |             "natsort==8.4.0",
121 |             "numpy==1.26.0",
122 |             "packaging==23.2",
123 |             "pandas==2.1.1",
124 |             "python==3.11.5",
125 |             "scipy==1.11.3"
126 |         ),
127 |         "0.10.6" = c(
128 |             "anndata==0.10.6",
129 |             "h5py==3.10.0",
130 |             "hdf5==1.14.3",
131 |             "natsort==8.4.0",
132 |             "numpy==1.26.4",
133 |             "packaging==24.0",
134 |             "pandas==2.2.1",
135 |             "python==3.12.2",
136 |             "scipy==1.12.0"
137 |         ),
138 |         "0.10.9" = c(
139 |             "anndata==0.10.9",
140 |             "h5py==3.12.1",
141 |             "hdf5==1.14.3",
142 |             "natsort==8.4.0",
143 |             "numpy==2.1.2",
144 |             "packaging==24.1",
145 |             "pandas==2.2.3",
146 |             "python==3.12.7",
147 |             "scipy==1.14.1"
148 |         ),
149 |         "0.11.4" = c(
150 |             "anndata==0.11.4",
151 |             "h5py==3.13.0",
152 |             "hdf5==1.14.3",
153 |             "natsort==8.4.0",
154 |             "numpy==2.2.4",
155 |             "packaging==24.2",
156 |             "pandas==2.2.3",
157 |             "python==3.13.2",
158 |             "scipy==1.15.2"
159 |         )
160 |     )
161 | }
162 | 
163 | #' @rdname AnnData-Environment
164 | #'
165 | #' @return
166 | #' For `zellkonverterAnnDataEnv` a [basilisk::BasiliskEnvironment()] containing
167 | #'  **zellkonverter**'s AnnData Python environment.
168 | #'
169 | #' @include ui.R
170 | #' @export
171 | zellkonverterAnnDataEnv <- function(version = .AnnDataVersions) {
172 |     version <- match.arg(version)
173 | 
174 |     basilisk::BasiliskEnvironment(
175 |         envname = paste0("zellkonverterAnnDataEnv-", version),
176 |         pkgname = "zellkonverter",
177 |         packages = AnnDataDependencies(version)
178 |     )
179 | }
180 | 
181 | # Instantiate environments so they can be found by
182 | # `basilisk::configureBasiliskEnv()` when `BASILISK_USE_SYSTEM_DIR=1`.
183 | # See https://github.com/theislab/zellkonverter/issues/66.
184 | anndata_env_0.7.6 <- zellkonverterAnnDataEnv(version = "0.7.6")
185 | anndata_env_0.8.0 <- zellkonverterAnnDataEnv(version = "0.8.0")
186 | anndata_env_0.9.2 <- zellkonverterAnnDataEnv(version = "0.9.2")
187 | anndata_env_0.10.2 <- zellkonverterAnnDataEnv(version = "0.10.2")
188 | anndata_env_0.10.6 <- zellkonverterAnnDataEnv(version = "0.10.6")
189 | anndata_env_0.10.9 <- zellkonverterAnnDataEnv(version = "0.10.9")
190 | anndata_env_0.11.4 <- zellkonverterAnnDataEnv(version = "0.11.4")
191 | 


--------------------------------------------------------------------------------
/R/read.R:
--------------------------------------------------------------------------------
  1 | #' Read H5AD
  2 | #'
  3 | #' Reads a H5AD file and returns a
  4 | #' \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment}
  5 | #' object.
  6 | #'
  7 | #' @param file String containing a path to a `.h5ad` file.
  8 | #' @param X_name Name used when saving `X` as an assay. If `NULL` looks for an
  9 | #' `X_name` value in `uns`, otherwise uses `"X"`.
 10 | #' @param use_hdf5 Logical scalar indicating whether assays should be
 11 | #' loaded as HDF5-based matrices from the **HDF5Array** package.
 12 | #' @param reader Which HDF5 reader to use. Either `"python"` for reading with
 13 | #' the **anndata** Python package via **reticulate** or `"R"` for
 14 | #' **zellkonverter**'s native R reader.
 15 | #' @param version A string giving the version of the **anndata** Python library
 16 | #' to use. Allowed values are available in `.AnnDataVersions`. By default the
 17 | #' latest version is used.
 18 | #' @param verbose Logical scalar indicating whether to print progress messages.
 19 | #' If `NULL` uses `getOption("zellkonverter.verbose")`.
 20 | #' @inheritDotParams AnnData2SCE -adata -hdf5_backed
 21 | #'
 22 | #' @details
 23 | #' Setting `use_hdf5 = TRUE` allows for very large datasets to be efficiently
 24 | #' represented on machines with little memory. However, this comes at the cost
 25 | #' of access speed as data needs to be fetched from the HDF5 file upon request.
 26 | #'
 27 | #' Setting `reader = "R"` will use an experimental native R reader instead of
 28 | #' reading the file into Python and converting the result. This avoids the need
 29 | #' for a Python environment and some of the issues with conversion but is still
 30 | #' under development and is likely to return slightly different output.
 31 | #'
 32 | #' See [AnnData-Environment] for more details on **zellkonverter** Python
 33 | #' environments.
 34 | #'
 35 | #' @return A \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment}
 36 | #' object is returned.
 37 | #'
 38 | #' @examples
 39 | #' library(SummarizedExperiment)
 40 | #'
 41 | #' file <- system.file("extdata", "krumsiek11.h5ad", package = "zellkonverter")
 42 | #' sce <- readH5AD(file)
 43 | #' class(assay(sce))
 44 | #'
 45 | #' sce2 <- readH5AD(file, use_hdf5 = TRUE)
 46 | #' class(assay(sce2))
 47 | #'
 48 | #' sce3 <- readH5AD(file, reader = "R")
 49 | #' @author Luke Zappia
 50 | #' @author Aaron Lun
 51 | #'
 52 | #' @seealso
 53 | #' [`writeH5AD()`], to write a
 54 | #' \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment}
 55 | #' object to a H5AD file.
 56 | #'
 57 | #' [`AnnData2SCE()`], for developers to convert existing AnnData instances to a
 58 | #' \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment}.
 59 | #'
 60 | #' @export
 61 | #' @importFrom basilisk basiliskRun
 62 | #' @importFrom methods slot
 63 | readH5AD <- function(file, X_name = NULL, use_hdf5 = FALSE,
 64 |                      reader = c("python", "R"), version = NULL,
 65 |                      verbose = NULL, ...) {
 66 |     file <- path.expand(file)
 67 |     reader <- match.arg(reader)
 68 | 
 69 |     if (reader == "python") {
 70 |         .ui_info("Using the {.field Python} reader")
 71 |         env <- zellkonverterAnnDataEnv(version)
 72 |         version <- gsub("zellkonverterAnnDataEnv-", "", slot(env, "envname"))
 73 |         .ui_info("Using {.field anndata} version {.field {version}}")
 74 | 
 75 |         sce <- basiliskRun(
 76 |             env = env,
 77 |             fun = .H5ADreader,
 78 |             testload = "anndata",
 79 |             file = file,
 80 |             X_name = X_name,
 81 |             backed = use_hdf5,
 82 |             verbose = verbose,
 83 |             ...
 84 |         )
 85 |     } else if (reader == "R") {
 86 |         sce <- .native_reader(file, backed = use_hdf5, verbose = verbose)
 87 |     }
 88 | 
 89 |     return(sce)
 90 | }
 91 | 
 92 | #' @importFrom reticulate import
 93 | .H5ADreader <- function(file, X_name = NULL, backed = FALSE, verbose = NULL, ...) {
 94 |     anndata <- import("anndata")
 95 |     .ui_step(
 96 |         "Reading {.file { .trim_path(file)} }",
 97 |         msg_done = "Read {.file { .trim_path(file) }}",
 98 |         spinner = TRUE
 99 |     )
100 |     adata <- anndata$read_h5ad(file, backed = if (backed) "r" else FALSE)
101 |     cli::cli_progress_done()
102 | 
103 |     AnnData2SCE(
104 |         adata,
105 |         X_name = X_name, hdf5_backed = backed, verbose = verbose, ...
106 |     )
107 | }
108 | 
109 | #' @importFrom S4Vectors I DataFrame wmsg
110 | #' @importFrom SummarizedExperiment assays assays<- rowData colData rowData<- colData<-
111 | #' @importFrom SingleCellExperiment SingleCellExperiment reducedDims<- colPairs<- rowPairs<-
112 | .native_reader <- function(file, backed = FALSE, verbose = FALSE) {
113 |     .ui_info("Using the {.field R} reader")
114 |     .ui_step("Reading {.file {file}}", spinner = TRUE)
115 | 
116 |     contents <- .list_contents(file)
117 | 
118 |     all.assays <- list()
119 | 
120 |     # Let's read in the X matrix first... if it's there.
121 |     if ("X" %in% names(contents)) {
122 |         all.assays[["X"]] <- .read_matrix(file, "X", contents[["X"]], backed = backed)
123 |     }
124 | 
125 |     for (layer in names(contents[["layers"]])) {
126 |         tryCatch(
127 |             {
128 |                 all.assays[[layer]] <- .read_matrix(
129 |                     file,
130 |                     file.path("layers", layer),
131 |                     contents[["layers"]][[layer]],
132 |                     backed = backed
133 |                 )
134 |             },
135 |             error = function(e) {
136 |                 warning(wmsg(
137 |                     "setting additional assays from 'layers' failed for '",
138 |                     file, "':\n  ", conditionMessage(e)
139 |                 ))
140 |             }
141 |         )
142 |     }
143 | 
144 |     sce <- SingleCellExperiment(all.assays)
145 | 
146 |     # Adding the various pieces of data.
147 |     tryCatch(
148 |         {
149 |             col_data <- .read_dim_data(file, "obs", contents[["obs"]])
150 |             if (!is.null(col_data)) {
151 |                 colData(sce) <- col_data
152 |             }
153 |         },
154 |         error = function(e) {
155 |             warning(wmsg(
156 |                 "setting 'colData' failed for '", file, "':\n  ",
157 |                 conditionMessage(e)
158 |             ))
159 |         }
160 |     )
161 | 
162 |     tryCatch(
163 |         {
164 |             row_data <- .read_dim_data(file, "var", contents[["var"]])
165 |             if (!is.null(row_data)) {
166 |                 rowData(sce) <- row_data
167 |                 # Manually set SCE rownames, because setting rowData
168 |                 # doesn't seem to set them. (Even tho setting colData
169 |                 # does set the colnames)
170 |                 rownames(sce) <- rownames(row_data)
171 |             }
172 |         },
173 |         error = function(e) {
174 |             warning(wmsg(
175 |                 "setting 'rowData' failed for '", file, "':\n  ",
176 |                 conditionMessage(e)
177 |             ))
178 |         }
179 |     )
180 | 
181 |     # Adding the reduced dimensions and other bits and pieces.
182 |     tryCatch(
183 |         {
184 |             reducedDims(sce) <- .read_dim_mats(file, "obsm", contents[["obsm"]])
185 |         },
186 |         error = function(e) {
187 |             warning(wmsg(
188 |                 "setting 'reducedDims' failed for '", file, "':\n  ",
189 |                 conditionMessage(e)
190 |             ))
191 |         }
192 |     )
193 | 
194 |     tryCatch(
195 |         {
196 |             row_mat <- .read_dim_mats(file, "varm", contents[["varm"]])
197 |             if (length(row_mat)) {
198 |                 row_mat_df <- do.call(DataFrame, lapply(row_mat, I))
199 |                 rowData(sce) <- cbind(rowData(sce), row_mat_df)
200 |             }
201 |         },
202 |         error = function(e) {
203 |             warning(wmsg(
204 |                 "extracting 'varm' failed for '", file, "':\n  ",
205 |                 conditionMessage(e)
206 |             ))
207 |         }
208 |     )
209 | 
210 |     # Adding pairings, if any exist.
211 |     tryCatch(
212 |         {
213 |             rowPairs(sce) <- .read_dim_pairs(file, "varp", contents[["varp"]])
214 |         },
215 |         error = function(e) {
216 |             warning(wmsg(
217 |                 "setting 'rowPairs' failed for '", file, "':\n  ",
218 |                 conditionMessage(e)
219 |             ))
220 |         }
221 |     )
222 | 
223 |     tryCatch(
224 |         {
225 |             colPairs(sce) <- .read_dim_pairs(file, "obsp", contents[["obsp"]])
226 |         },
227 |         error = function(e) {
228 |             warning(wmsg(
229 |                 "setting 'colPairs' failed for '", file, "':\n  ",
230 |                 conditionMessage(e)
231 |             ))
232 |         }
233 |     )
234 | 
235 |     if ("uns" %in% names(contents)) {
236 |         tryCatch(
237 |             {
238 |                 uns <- rhdf5::h5read(file, "uns")
239 |                 uns <- .convert_element(
240 |                     uns, "uns", file,
241 |                     recursive = TRUE
242 |                 )
243 |                 metadata(sce) <- uns
244 |             },
245 |             error = function(e) {
246 |                 warning(wmsg(
247 |                     "setting 'metadata' failed for '", file, "':\n  ",
248 |                     conditionMessage(e)
249 |                 ))
250 |             }
251 |         )
252 |     }
253 | 
254 |     if (("X_name" %in% names(metadata(sce))) && ("X" %in% names(contents))) {
255 |         stopifnot(names(assays(sce))[1] == "X") # should be true b/c X is read 1st
256 |         names(assays(sce))[1] <- metadata(sce)[["X_name"]]
257 |         metadata(sce)[["X_name"]] <- NULL
258 |     }
259 | 
260 |     sce
261 | }
262 | 
263 | .list_contents <- function(file) {
264 |     manifest <- rhdf5::h5ls(file)
265 | 
266 |     set_myself <- function(x, series, value) {
267 |         if (length(series) != 1) {
268 |             value <- set_myself(x[[series[1]]], series[-1], value)
269 |         }
270 |         if (is.null(x)) {
271 |             x <- list()
272 |         }
273 |         x[[series[1]]] <- value
274 | 
275 |         x
276 |     }
277 | 
278 |     contents <- list()
279 |     for (i in seq_len(nrow(manifest))) {
280 |         components <- c(
281 |             strsplit(manifest[i, "group"], "/")[[1]], manifest[i, "name"]
282 |         )
283 |         if (components[1] == "") {
284 |             components <- components[-1]
285 |         }
286 | 
287 |         info <- manifest[i, c("otype", "dclass", "dim")]
288 |         if (info$otype == "H5I_GROUP") {
289 |             info <- list()
290 |         }
291 |         contents <- set_myself(contents, components, info)
292 |     }
293 | 
294 |     contents
295 | }
296 | 
297 | .read_matrix <- function(file, path, fields, backed) {
298 |     if (is.data.frame(fields)) {
299 |         mat <- HDF5Array::HDF5Array(file, path)
300 |     } else {
301 |         mat <- HDF5Array::H5SparseMatrix(file, path)
302 |     }
303 | 
304 |     if (!backed) {
305 |         if (DelayedArray::is_sparse(mat)) {
306 |             mat <- as(mat, "sparseMatrix")
307 |         } else {
308 |             mat <- as.matrix(mat)
309 |         }
310 |     }
311 | 
312 |     mat
313 | }
314 | 
315 | .convert_element <- function(obj, path, file, recursive = FALSE) {
316 |     element_attrs <- rhdf5::h5readAttributes(file, path)
317 | 
318 |     # Convert categorical element for AnnData v0.8+
319 |     if (identical(element_attrs[["encoding-type"]], "categorical") &&
320 |         all(c("codes", "categories") %in% names(obj))) {
321 |         codes <- obj[["codes"]] + 1
322 |         codes[codes == 0] <- NA
323 |         levels <- obj[["categories"]]
324 | 
325 |         ord <- as.logical(element_attrs[["ordered"]])
326 | 
327 |         obj <- factor(levels[codes], levels = levels, ordered = ord)
328 |         return(obj)
329 |     }
330 | 
331 |     # Handle booleans. Non-nullable booleans have encoding-type
332 |     # "array", so we have to infer the type from the enum levels
333 |     if (is.factor(obj) && identical(levels(obj), c("FALSE", "TRUE"))) {
334 |         obj <- as.logical(obj)
335 |         return(obj)
336 |     }
337 | 
338 |     # Recursively convert element members
339 |     if (recursive && is.list(obj) && !is.null(names(obj))) {
340 |         for (k in names(obj)) {
341 |             obj[[k]] <- rhdf5::h5read(file, file.path(path, k))
342 |             obj[[k]] <- .convert_element(
343 |                 obj[[k]], file.path(path, k),
344 |                 file,
345 |                 recursive = TRUE
346 |             )
347 |         }
348 |     }
349 | 
350 |     if (is.list(obj) && !is.null(names(obj))) {
351 |         names(obj) <- make.names(names(obj))
352 |     }
353 | 
354 |     obj
355 | }
356 | 
357 | #' @importFrom S4Vectors DataFrame
358 | .read_dim_data <- function(file, path, fields) {
359 |     col_names <- setdiff(names(fields), "__categories")
360 |     out_cols <- list()
361 |     for (col_name in col_names) {
362 |         vec <- rhdf5::h5read(file, file.path(path, col_name))
363 | 
364 |         vec <- .convert_element(
365 |             vec, file.path(path, col_name),
366 |             file,
367 |             recursive = FALSE
368 |         )
369 | 
370 |         if (!is.factor(vec)) {
371 |             vec <- as.vector(vec)
372 |         }
373 | 
374 |         out_cols[[col_name]] <- vec
375 |     }
376 | 
377 |     # for AnnData versions <= 0.7
378 |     cat_names <- names(fields[["__categories"]])
379 |     for (cat_name in cat_names) {
380 |         levels <- as.vector(
381 |             rhdf5::h5read(file, file.path(path, "__categories", cat_name))
382 |         )
383 |         codes <- out_cols[[cat_name]] + 1L
384 |         out_cols[[cat_name]] <- factor(levels[codes], levels = levels)
385 |     }
386 | 
387 |     ## rhdf5::h5readAttributes(file, "var") |> str()
388 |     ## List of 4
389 |     ##  $ _index          : chr "feature_id"
390 |     ##  $ column-order    : chr [1:4(1d)] "feature_is_filtered" "feature_name" "feature_reference" "feature_biotype"
391 |     ##  $ encoding-type   : chr "dataframe"
392 |     ##  $ encoding-version: chr "0.2.0"
393 |     attributes <- rhdf5::h5readAttributes(file, path)
394 |     index <- attributes[["_index"]]
395 |     if (!is.null(index)) {
396 |         indices <- out_cols[[index]]
397 |     } else {
398 |         indices <- NULL
399 |     }
400 | 
401 |     column_order <- attributes[["column-order"]]
402 |     if (!is.null(column_order)) {
403 |         out_cols <- out_cols[column_order]
404 |     }
405 | 
406 |     if (length(out_cols)) {
407 |         df <- do.call(DataFrame, out_cols)
408 |         rownames(df) <- indices
409 |     } else if (!is.null(indices)) {
410 |         df <- DataFrame(row.names = indices)
411 |     } else {
412 |         df <- NULL
413 |     }
414 | 
415 |     df
416 | }
417 | 
418 | .read_dim_mats <- function(file, path, fields) {
419 |     all.contents <- list()
420 |     for (field in names(fields)) {
421 |         # Because everything's transposed.
422 |         all.contents[[field]] <- t(rhdf5::h5read(file, file.path(path, field)))
423 |     }
424 |     all.contents
425 | }
426 | 
427 | .read_dim_pairs <- function(file, path, fields) {
428 |     all.pairs <- list()
429 |     for (field in names(fields)) {
430 |         mat <- HDF5Array::H5SparseMatrix(file, file.path(path, field))
431 |         all.pairs[[field]] <- as(mat, "sparseMatrix")
432 |     }
433 |     all.pairs
434 | }
435 | 


--------------------------------------------------------------------------------
/R/reticulate.R:
--------------------------------------------------------------------------------
  1 | #' Convert between Python and R objects
  2 | #'
  3 | #' @param x A Python object.
  4 | #'
  5 | #' @return An \R object, as converted from the Python object.
  6 | #'
  7 | #' @details
  8 | #' These functions are extensions of the default conversion functions in the
  9 | #' `reticulate` package for the following reasons:
 10 | #'
 11 | #' - `numpy.ndarray` - Handle conversion of **numpy** recarrays
 12 | #' - `pandas.core.arrays.masked.BaseMaskedArray` - Handle conversion of
 13 | #'   **pandas** arrays (used when by `AnnData` objects when there are missing
 14 | #'   values)
 15 | #' - `pandas.core.arrays.categorical.Categorical` - Handle conversion of
 16 | #'   **pandas** categorical arrays
 17 | #'
 18 | #' @author Luke Zappia
 19 | #'
 20 | #' @seealso
 21 | #' [reticulate::py_to_r()] for the base `reticulate` functions
 22 | #'
 23 | #' @name r-py-conversion
 24 | #' @export
 25 | py_to_r.numpy.ndarray <- function(x) {
 26 |     disable_conversion_scope(x)
 27 | 
 28 |     # Suggested method to detect recarrays from
 29 |     # https://stackoverflow.com/a/62491135/4384120
 30 |     if (!is.null(py_to_r(x$dtype$names))) {
 31 |         # Convert via pandas DataFrame as suggested here
 32 |         # https://stackoverflow.com/a/60614003/4384120
 33 |         # Not as efficient but less messing around with types
 34 |         pandas <- import("pandas", convert = FALSE)
 35 |         out <- tryCatch(
 36 |             {
 37 |                 x <- pandas$DataFrame(x)$to_numpy()
 38 |                 py_to_r(x)
 39 |             },
 40 |             error = function(err) {
 41 |                 stop("Failed to convert recarray with error: ", err$message,
 42 |                     call. = FALSE
 43 |                 )
 44 |             }
 45 |         )
 46 |         return(out)
 47 |     }
 48 | 
 49 |     # No special handler found, delegate to next method
 50 |     NextMethod()
 51 | }
 52 | 
 53 | #' @export
 54 | py_to_r.pandas.core.arrays.masked.BaseMaskedArray <- function(x) {
 55 |     disable_conversion_scope(x)
 56 | 
 57 |     if (is(x, "pandas.core.arrays.boolean.BooleanArray")) {
 58 |         dtype <- "bool"
 59 |         fill <- FALSE
 60 |     } else if (is(x, "pandas.core.arrays.integer.IntegerArray")) {
 61 |         dtype <- "int"
 62 |         fill <- 0L
 63 |     } else if (is(x, "pandas.core.arrays.floating.FloatingArray")) {
 64 |         dtype <- "float"
 65 |         fill <- 0.0
 66 |     } else if (is(x, "pandas.core.arrays.string_.StringArray")) {
 67 |         dtype <- "str"
 68 |         fill <- ""
 69 |     } else {
 70 |         stop(
 71 |             "No conversion exists for this Pandas array type: ",
 72 |             paste(class(x), collapse = ", ")
 73 |         )
 74 |     }
 75 | 
 76 |     # Record which values should be NA
 77 |     is_na <- reticulate::py_to_r(x$isna())
 78 | 
 79 |     # Fill NA values with a dummy
 80 |     x <- x$fillna(value = fill)
 81 | 
 82 |     # Convert to numpy array and then to R using default conversion
 83 |     x <- x$to_numpy()$astype(dtype)
 84 |     x <- reticulate::py_to_r(x)
 85 | 
 86 |     # Restore the NA values
 87 |     x[is_na] <- NA
 88 | 
 89 |     return(x)
 90 | }
 91 | 
 92 | #' @export
 93 | py_to_r.pandas.core.arrays.categorical.Categorical <- function(x) {
 94 |     disable_conversion_scope(x)
 95 | 
 96 |     # Get the category levels
 97 |     cats <- reticulate::py_to_r(x$categories$to_list())
 98 | 
 99 |     # Record which values should be NA
100 |     is_na <- reticulate::py_to_r(x$isna())
101 | 
102 |     # Fill NA values with a dummy
103 |     x <- x$fillna(value = cats[1])
104 | 
105 |     # Convert to list and then to R using default conversion
106 |     x <- x$tolist()
107 |     x <- reticulate::py_to_r(x)
108 | 
109 |     # Restore the NA values
110 |     x[is_na] <- NA
111 | 
112 |     # Convert to factor
113 |     x <- factor(x, levels = cats)
114 | 
115 |     return(x)
116 | }
117 | 


--------------------------------------------------------------------------------
/R/ui.R:
--------------------------------------------------------------------------------
  1 | #' Set zellkonverter verbose
  2 | #'
  3 | #' Set the zellkonverter verbosity option
  4 | #'
  5 | #' @param verbose Logical value for the verbosity option.
  6 | #'
  7 | #' @details
  8 | #' Running `setZellkonverterVerbose(TRUE)` will turn on **zellkonverter**
  9 | #' progress messages by default without having to set `verbose = TRUE` in each
 10 | #' function call. This is done by setting the `"zellkonverter.verbose"` option.
 11 | #' Running `setZellkonverterVerbose(FALSE)` will turn default verbosity off.
 12 | #'
 13 | #' @return The value of getOption("zellkonverter.verbose") invisibly
 14 | #' @export
 15 | #'
 16 | #' @examples
 17 | #' current <- getOption("zellkonverter.verbose")
 18 | #' setZellkonverterVerbose(TRUE)
 19 | #' getOption("zellkonverter.verbose")
 20 | #' setZellkonverterVerbose(FALSE)
 21 | #' getOption("zellkonverter.verbose")
 22 | #' setZellkonverterVerbose(current)
 23 | #' getOption("zellkonverter.verbose")
 24 | setZellkonverterVerbose <- function(verbose = TRUE) {
 25 |     options(zellkonverter.verbose = isTRUE(verbose))
 26 |     invisible(getOption("zellkonverter.verbose"))
 27 | }
 28 | 
 29 | .get_verbose <- function(envir) {
 30 |     verbose <- envir$verbose
 31 | 
 32 |     if (is.null(verbose)) {
 33 |         verbose <- getOption("zellkonverter.verbose")
 34 |     }
 35 | 
 36 |     isTRUE(verbose)
 37 | }
 38 | 
 39 | .ui_rule <- function(msg, ...) {
 40 |     envir <- parent.frame()
 41 | 
 42 |     if (.get_verbose(envir)) {
 43 |         cli::cli_rule(msg, ..., .envir = envir)
 44 |     }
 45 | }
 46 | 
 47 | .ui_info <- function(msg, ...) {
 48 |     envir <- parent.frame()
 49 | 
 50 |     if (.get_verbose(envir)) {
 51 |         cli::cli_alert_info(msg, ..., .envir = envir)
 52 |     }
 53 | }
 54 | 
 55 | .ui_warn <- function(msg, warn = TRUE, ...) {
 56 |     envir <- parent.frame()
 57 | 
 58 |     msg <- cli::format_message(msg, .envir = envir)
 59 | 
 60 |     if (.get_verbose(envir)) {
 61 |         cli::cli_alert_warning(msg, ..., .envir = envir)
 62 |     }
 63 | 
 64 |     if (warn) {
 65 |         warning(msg, call. = FALSE)
 66 |     }
 67 | }
 68 | 
 69 | .ui_step <- function(msg, ...) {
 70 |     envir <- parent.frame()
 71 | 
 72 |     if (.get_verbose(envir)) {
 73 |         cli::cli_progress_step(msg, ..., .envir = envir)
 74 |     }
 75 | }
 76 | 
 77 | .ui_process <- function(msg, ...) {
 78 |     envir <- parent.frame()
 79 | 
 80 |     if (.get_verbose(envir)) {
 81 |         cli::cli_process_start(msg, ..., .envir = envir)
 82 |     }
 83 | }
 84 | 
 85 | .ui_process_done <- function(...) {
 86 |     envir <- parent.frame()
 87 | 
 88 |     if (.get_verbose(envir)) {
 89 |         cli::cli_process_done(..., .envir = envir)
 90 |     }
 91 | }
 92 | 
 93 | .trim_path <- function(path, n = 40) {
 94 |     path_split <- .split_path(path)
 95 | 
 96 |     for (level in seq_along(path_split)) {
 97 |         trimmed_path <- do.call(file.path, as.list(path_split))
 98 |         trimmed_path <- gsub("^//", "/", trimmed_path)
 99 |         if (nchar(trimmed_path) <= n) {
100 |             break
101 |         } else if (nchar(path_split[level]) >= 3) {
102 |             path_split[level] <- "..."
103 |         }
104 |     }
105 | 
106 |     return(trimmed_path)
107 | }
108 | 
109 | .split_path <- function(path) {
110 |     if (dirname(path) != path) {
111 |         path <- c(.split_path(dirname(path)), basename(path))
112 |     }
113 | 
114 |     return(path)
115 | }
116 | 


--------------------------------------------------------------------------------
/R/utils.R:
--------------------------------------------------------------------------------
1 | # yoinked from reticulate ->
2 | # https://github.com/rstudio/reticulate/blob/fe0eda154a80b22c0d45e043b74390b73ab8b64e/R/utils.R#L49
3 | yoink <- function(package, symbol) {
4 |     do.call(":::", list(package, symbol))
5 | }
6 | disable_conversion_scope <- yoink("reticulate", "disable_conversion_scope")
7 | 


--------------------------------------------------------------------------------
/R/validation.R:
--------------------------------------------------------------------------------
  1 | #' Validate H5AD SCE
  2 | #'
  3 | #' Validate a SingleCellExperiment created by `readH5AD()`. Designed to be used
  4 | #' inside `testhat::test_that()` during package testing.
  5 | #'
  6 | #' @param sce A \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment}
  7 | #'   object.
  8 | #' @param names Named list of expected names. Names are slots and values are
  9 | #' vectors of names that are expected to exist in that slot.
 10 | #' @param missing Named list of known missing names. Names are slots and values
 11 | #' are vectors of names that are expected to not exist in that slot.
 12 | #'
 13 | #' @details
 14 | #' This function checks that a SingleCellExperiment contains the expected items
 15 | #' in each slot. The main reason for this function is avoid repeating code when
 16 | #' testing multiple `.h5ad` files. The following items in `names` and `missing`
 17 | #' are recognised:
 18 | #'
 19 | #' * `assays` - Assay names
 20 | #' * `colData` - colData column names
 21 | #' * `rowData` - rowData column names
 22 | #' * `metadata` - metadata names
 23 | #' * `redDim` - Reduced dimension names
 24 | #' * `varm` - Column names of the `varm` rowData column (from the AnnData varm
 25 | #'    slot)
 26 | #' * `colPairs` - Column pair names
 27 | #' * `rowPairs` - rowData pair names
 28 | #' * `raw_rowData` - rowData columns names in the `raw` altExp
 29 | #' * `raw_varm` - Column names of the raw `varm` rowData column (from the
 30 | #'   AnnData varm slot)
 31 | #'
 32 | #' If an item in `names` or `missing` is `NULL` then it won't be checked. The
 33 | #' items in `missing` are checked that they explicitly do not exist. This is
 34 | #' mostly for record keeping when something is known to not be converted but can
 35 | #' also be useful when the corresponding `names` item is `NULL`.
 36 | #'
 37 | #' @return If checks are successful `TRUE` invisibly, if not other output
 38 | #' depending on the context
 39 | #'
 40 | #' @author Luke Zappia
 41 | validateH5ADSCE <- function(sce, names, missing) {
 42 |     if ("varm" %in% colnames(SummarizedExperiment::rowData(sce))) {
 43 |         varm <- SummarizedExperiment::rowData(sce)$varm
 44 |         SummarizedExperiment::rowData(sce)$varm <- NULL
 45 |     } else {
 46 |         varm <- NULL
 47 |     }
 48 | 
 49 |     .names_validator(
 50 |         "Assay names",
 51 |         SummarizedExperiment::assayNames(sce),
 52 |         names$assays,
 53 |         missing$assays
 54 |     )
 55 | 
 56 |     .names_validator(
 57 |         "colData names",
 58 |         colnames(SummarizedExperiment::colData(sce)),
 59 |         names$colData,
 60 |         missing$colData
 61 |     )
 62 | 
 63 |     .names_validator(
 64 |         "rowData names",
 65 |         colnames(SummarizedExperiment::rowData(sce)),
 66 |         names$rowData,
 67 |         missing$rowData
 68 |     )
 69 | 
 70 |     .names_validator(
 71 |         "metadata names",
 72 |         names(S4Vectors::metadata(sce)),
 73 |         names$metadata,
 74 |         missing$metadata
 75 |     )
 76 | 
 77 |     .names_validator(
 78 |         "redDim names",
 79 |         SingleCellExperiment::reducedDimNames(sce),
 80 |         names$redDim,
 81 |         missing$redDim
 82 |     )
 83 | 
 84 |     .names_validator(
 85 |         "varm names",
 86 |         colnames(varm),
 87 |         names$varm,
 88 |         missing$varm
 89 |     )
 90 | 
 91 |     .names_validator(
 92 |         "colPairs names",
 93 |         names(SingleCellExperiment::colPairs(sce)),
 94 |         names$colPairs,
 95 |         missing$colPairs
 96 |     )
 97 | 
 98 |     .names_validator(
 99 |         "rowPairs names",
100 |         names(SingleCellExperiment::rowPairs(sce)),
101 |         names$rowPairs,
102 |         missing$rowPairs
103 |     )
104 | 
105 |     if ("raw" %in% altExpNames(sce)) {
106 |         raw_rowData <- SummarizedExperiment::rowData(altExp(sce, "raw"))
107 | 
108 |         if ("varm" %in% colnames(raw_rowData)) {
109 |             raw_varm <- raw_rowData$varm
110 |             raw_rowData$varm <- NULL
111 |         } else {
112 |             varm <- NULL
113 |         }
114 | 
115 |         .names_validator(
116 |             "raw rowData names",
117 |             colnames(raw_rowData),
118 |             names$raw_rowData,
119 |             missing$raw_rowData
120 |         )
121 | 
122 |         .names_validator(
123 |             "varm names",
124 |             colnames(raw_varm),
125 |             names$raw_varm,
126 |             missing$raw_varm
127 |         )
128 |     }
129 | 
130 |     invisible(TRUE)
131 | }
132 | 
133 | .names_validator <- function(label, actual_names, correct_names, missing_names) {
134 |     if (!is.null(correct_names)) {
135 |         testthat::expect_identical(
136 |             actual_names,
137 |             correct_names,
138 |             label = label
139 |         )
140 |     }
141 | 
142 |     if (!is.null(missing_names)) {
143 |         testthat::expect_true(
144 |             !any(missing_names %in% actual_names),
145 |             label = paste(label, "missing")
146 |         )
147 |     }
148 | 
149 |     invisible(TRUE)
150 | }
151 | 
152 | #' Expect SCE
153 | #'
154 | #' Test that a SingleCellExperiment matches an expected object. Designed to be
155 | #' used inside `testhat::test_that()` during package testing.
156 | #'
157 | #' @param sce A \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment}
158 | #'   object.
159 | #' @param expected A template \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment}
160 | #'   object to compare to.
161 | #'
162 | #' @return `TRUE` invisibly if checks pass
163 | #'
164 | #' @author Luke Zappia
165 | expectSCE <- function(sce, expected) {
166 |     testthat::expect_identical(dimnames(sce), dimnames(expected))
167 |     if (length(metadata(expected)) > 0) {
168 |         testthat::expect_identical(
169 |             S4Vectors::metadata(sce),
170 |             S4Vectors::metadata(expected)
171 |         )
172 |     }
173 |     testthat::expect_identical(
174 |         SummarizedExperiment::assayNames(sce),
175 |         SummarizedExperiment::assayNames(expected)
176 |     )
177 |     for (assay in SummarizedExperiment::assayNames(expected)) {
178 |         testthat::expect_equal(
179 |             SummarizedExperiment::assay(sce, assay),
180 |             SummarizedExperiment::assay(expected, assay)
181 |         )
182 |     }
183 |     testthat::expect_identical(
184 |         SingleCellExperiment::reducedDims(sce),
185 |         SingleCellExperiment::reducedDims(expected)
186 |     )
187 |     testthat::expect_identical(
188 |         SummarizedExperiment::rowData(sce),
189 |         SummarizedExperiment::rowData(expected)
190 |     )
191 |     testthat::expect_identical(
192 |         SummarizedExperiment::colData(sce),
193 |         SummarizedExperiment::colData(expected)
194 |     )
195 |     testthat::expect_identical(
196 |         SingleCellExperiment::rowPairs(sce),
197 |         SingleCellExperiment::rowPairs(expected)
198 |     )
199 |     testthat::expect_identical(
200 |         SingleCellExperiment::colPairs(sce),
201 |         SingleCellExperiment::colPairs(expected)
202 |     )
203 | 
204 |     invisible(TRUE)
205 | }
206 | 


--------------------------------------------------------------------------------
/R/write.R:
--------------------------------------------------------------------------------
  1 | #' Write H5AD
  2 | #'
  3 | #' Write a H5AD file from a
  4 | #' \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment}
  5 | #' object.
  6 | #'
  7 | #' @param sce A \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment}
  8 | #'   object.
  9 | #' @param file String containing a path to write the new `.h5ad` file.
 10 | #' @param X_name Name of the assay to use as the primary matrix (`X`) of the
 11 | #' AnnData object. If `NULL`, the first assay of `sce` will be used by default.
 12 | #' @param skip_assays Logical scalar indicating whether assay matrices should
 13 | #' be ignored when writing to `file`.
 14 | #' @param compression Type of compression when writing the new `.h5ad` file.
 15 | #' @param version A string giving the version of the **anndata** Python library
 16 | #' to use. Allowed values are available in `.AnnDataVersions`. By default the
 17 | #' latest version is used.
 18 | #' @param verbose Logical scalar indicating whether to print progress messages.
 19 | #' If `NULL` uses `getOption("zellkonverter.verbose")`.
 20 | #' @inheritDotParams SCE2AnnData
 21 | #'
 22 | #' @details
 23 | #'
 24 | #' ## Skipping assays
 25 | #'
 26 | #' Setting `skip_assays = TRUE` can occasionally be useful if the matrices in
 27 | #' `sce` are stored in a format that is not amenable for efficient conversion
 28 | #' to a **numpy**-compatible format. In such cases, it can be better to create
 29 | #' an empty placeholder dataset in `file` and fill it in R afterwards.
 30 | #'
 31 | #' ## **DelayedArray** assays
 32 | #'
 33 | #' If `sce` contains any **DelayedArray** matrices as assays `writeH5AD()` will
 34 | #' write them to disk using the **rhdf5** package directly rather than via
 35 | #' Python to avoid instantiating them in memory. However there is currently
 36 | #' an issue which prevents this being done for sparse **DelayedArray** matrices.
 37 | #'
 38 | #' ## Known conversion issues
 39 | #'
 40 | #' ### Coercion to factors
 41 | #'
 42 | #' The **anndata** package automatically converts some character vectors to
 43 | #' factors when saving `.h5ad` files. This can effect columns of `rowData(sce)`
 44 | #' and `colData(sce)` which may change type when the `.h5ad` file is read back
 45 | #' into R.
 46 | #'
 47 | #' ## Environment
 48 | #'
 49 | #' See [AnnData-Environment] for more details on **zellkonverter** Python
 50 | #' environments.
 51 | #'
 52 | #' @return A `NULL` is invisibly returned.
 53 | #'
 54 | #' @author Luke Zappia
 55 | #' @author Aaron Lun
 56 | #'
 57 | #' @seealso
 58 | #' [`readH5AD()`], to read a
 59 | #' \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment}
 60 | #' object from a H5AD file.
 61 | #'
 62 | #' [`SCE2AnnData()`], for developers to create an AnnData object from a
 63 | #' \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment}.
 64 | #'
 65 | #' @examples
 66 | #' # Using the Zeisel brain dataset
 67 | #' if (requireNamespace("scRNAseq", quietly = TRUE)) {
 68 | #'     library(scRNAseq)
 69 | #'     sce <- ZeiselBrainData()
 70 | #'
 71 | #'     # Writing to a H5AD file
 72 | #'     temp <- tempfile(fileext = ".h5ad")
 73 | #'     writeH5AD(sce, temp)
 74 | #' }
 75 | #' @export
 76 | #' @importFrom basilisk basiliskRun
 77 | #' @importFrom Matrix sparseMatrix
 78 | #' @importFrom DelayedArray is_sparse
 79 | writeH5AD <- function(
 80 |     sce, file, X_name = NULL, skip_assays = FALSE,
 81 |     compression = c("none", "gzip", "lzf"), version = NULL,
 82 |     verbose = NULL, ...) {
 83 |     compression <- match.arg(compression)
 84 | 
 85 |     if (compression == "none") {
 86 |         compression <- NULL
 87 |     }
 88 | 
 89 |     # Loop over and replace DelayedArrays.
 90 |     ass_list <- assays(sce)
 91 |     is_da <- logical(length(ass_list))
 92 |     for (a in seq_along(ass_list)) {
 93 |         # Skip sparse DelayedArrays due to rhdf5 issue
 94 |         # https://github.com/grimbough/rhdf5/issues/79
 95 |         if (is(ass_list[[a]], "DelayedMatrix") && !is_sparse(ass_list[[a]])) {
 96 |             is_da[a] <- TRUE
 97 |             assay(sce, a, withDimnames = FALSE) <- .make_fake_mat(dim(sce))
 98 |         }
 99 |     }
100 | 
101 |     env <- zellkonverterAnnDataEnv(version)
102 |     version <- gsub("zellkonverterAnnDataEnv-", "", slot(env, "envname"))
103 |     .ui_info("Using {.field anndata} version {.field {version}}")
104 | 
105 |     file <- path.expand(file)
106 |     basiliskRun(
107 |         env = env,
108 |         fun = .H5ADwriter,
109 |         testload = "anndata",
110 |         sce = sce,
111 |         file = file,
112 |         X_name = X_name,
113 |         skip_assays = skip_assays,
114 |         compression = compression,
115 |         verbose = verbose,
116 |         ...
117 |     )
118 | 
119 |     # Going back out and replacing each of them.
120 |     if (any(is_da)) {
121 |         for (p in which(is_da)) {
122 |             if (p == 1L) {
123 |                 curp <- "X"
124 |             } else {
125 |                 curp <- file.path("layers", assayNames(sce)[p])
126 |             }
127 |             rhdf5::h5delete(file, curp)
128 |             mat <- ass_list[[p]]
129 | 
130 |             if (!is_sparse(mat)) {
131 |                 HDF5Array::writeHDF5Array(
132 |                     mat,
133 |                     filepath = file, name = curp, with.dimnames = FALSE
134 |                 )
135 |             } else {
136 |                 .write_CSR_matrix(file, name = curp, mat = mat)
137 |             }
138 |         }
139 |     }
140 | 
141 |     invisible(NULL)
142 | }
143 | 
144 | #' @importFrom reticulate import
145 | .H5ADwriter <- function(sce, file, X_name, skip_assays, compression,
146 |                         verbose = NULL, ...) {
147 |     adata <- SCE2AnnData(
148 |         sce,
149 |         X_name = X_name, skip_assays = skip_assays, verbose = verbose, ...
150 |     )
151 |     .ui_step(
152 |         "Writing {.file { .trim_path(file)} }",
153 |         msg_done = "Wrote {.file { .trim_path(file)} }",
154 |         spinner = TRUE
155 |     )
156 |     if (!is.null(compression)) {
157 |         .ui_info("Using {.field compression} compression")
158 |     }
159 |     adata$write_h5ad(file, compression = compression)
160 | }
161 | 
162 | # nocov start
163 | 
164 | # Skipping code coverage on these function because they aren't used until the
165 | # sparse DelayedArray rhdf5 issue mentioned above is addressed
166 | 
167 | #' @importFrom DelayedArray blockApply rowAutoGrid type
168 | .write_CSR_matrix <- function(file, name, mat, chunk_dim = 10000) {
169 |     handle <- rhdf5::H5Fopen(file)
170 |     on.exit(rhdf5::H5Fclose(handle))
171 | 
172 |     rhdf5::h5createGroup(handle, name)
173 |     ghandle <- rhdf5::H5Gopen(handle, name)
174 |     on.exit(rhdf5::H5Gclose(ghandle), add = TRUE, after = FALSE)
175 | 
176 |     rhdf5::h5writeAttribute("csc_matrix", ghandle, "encoding-type")
177 |     rhdf5::h5writeAttribute("0.1.0", ghandle, "encoding-version")
178 |     rhdf5::h5writeAttribute(rev(dim(mat)), ghandle, "shape")
179 | 
180 |     rhdf5::h5createDataset(
181 |         handle,
182 |         file.path(name, "data"),
183 |         dims = 0,
184 |         maxdims = rhdf5::H5Sunlimited(),
185 |         H5type = if (type(mat) == "integer") {
186 |             "H5T_NATIVE_INT32"
187 |         } else {
188 |             "H5T_NATIVE_DOUBLE"
189 |         },
190 |         chunk = chunk_dim
191 |     )
192 | 
193 |     rhdf5::h5createDataset(
194 |         handle,
195 |         file.path(name, "indices"),
196 |         dims    = 0,
197 |         maxdims = rhdf5::H5Sunlimited(),
198 |         H5type  = "H5T_NATIVE_UINT32",
199 |         chunk   = chunk_dim
200 |     )
201 | 
202 |     env <- new.env() # persist the 'last' counter.
203 |     env$last <- 0L
204 |     out <- blockApply(
205 |         mat,
206 |         grid      = rowAutoGrid(mat),
207 |         FUN       = .blockwise_sparse_writer,
208 |         env       = env,
209 |         file      = handle,
210 |         name      = name,
211 |         as.sparse = TRUE
212 |     )
213 | 
214 |     out <- as.double(unlist(out))
215 |     iname <- file.path(name, "indptr")
216 | 
217 |     rhdf5::h5createDataset(
218 |         handle,
219 |         iname,
220 |         dims   = length(out) + 1L,
221 |         H5type = "H5T_NATIVE_UINT64"
222 |     )
223 | 
224 |     rhdf5::h5writeDataset(c(0, cumsum(out)), handle, iname)
225 | }
226 | 
227 | #' @importFrom DelayedArray nzdata nzindex
228 | .blockwise_sparse_writer <- function(block, env, file, name) {
229 |     nzdex <- nzindex(block)
230 |     i <- nzdex[, 1]
231 |     j <- nzdex[, 2]
232 |     v <- nzdata(block)
233 | 
234 |     o <- order(i)
235 |     i <- i[o]
236 |     j <- j[o]
237 |     v <- v[o]
238 | 
239 |     last <- env$last
240 |     index <- list(last + seq_along(j))
241 | 
242 |     iname <- file.path(name, "indices")
243 |     rhdf5::h5set_extent(file, iname, last + length(j))
244 |     rhdf5::h5writeDataset(j - 1L, file, iname, index = index)
245 | 
246 |     vname <- file.path(name, "data")
247 |     rhdf5::h5set_extent(file, vname, last + length(j))
248 |     rhdf5::h5writeDataset(v, file, vname, index = index)
249 | 
250 |     env$last <- last + length(j)
251 |     tabulate(i, nrow(block))
252 | }
253 | 
254 | # nocov end
255 | 


--------------------------------------------------------------------------------
/R/zellkonverter-package.R:
--------------------------------------------------------------------------------
 1 | #' @import SummarizedExperiment
 2 | #' @import SingleCellExperiment
 3 | "_PACKAGE"
 4 | 
 5 | # The following block is used by usethis to automatically manage
 6 | # roxygen namespace tags. Modify with care!
 7 | ## usethis namespace: start
 8 | ## usethis namespace: end
 9 | NULL
10 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | <img src="man/figures/zellkonverter.png" align="right" alt="zellkonverter logo" width="180" />
 2 | 
 3 | # zellkonverter
 4 | 
 5 | <!-- badges: start -->
 6 | [![Project Status](http://www.repostatus.org/badges/latest/active.svg)](http://www.repostatus.org/#active)
 7 | [![Lifecycle](https://lifecycle.r-lib.org/articles/figures/lifecycle-stable.svg)](https://lifecycle.r-lib.org/articles/stages.html#stable)
 8 | [![Codecov test coverage](https://codecov.io/gh/theislab/zellonverter/graph/badge.svg)](https://app.codecov.io/gh/theislab/zellonverter)
 9 | [![R-CMD-check-bioc](https://github.com/theislab/zellkonverter/actions/workflows/check-bioc.yml/badge.svg)](https://github.com/theislab/zellkonverter/actions/workflows/check-bioc.yml)
10 | [![Bioc release status](http://www.bioconductor.org/shields/build/release/bioc/zellkonverter.svg)](https://bioconductor.org/checkResults/release/bioc-LATEST/zellkonverter)
11 | [![Bioc devel status](http://www.bioconductor.org/shields/build/devel/bioc/zellkonverter.svg)](https://bioconductor.org/checkResults/devel/bioc-LATEST/zellkonverter)
12 | [![Bioc downloads rank](https://bioconductor.org/shields/downloads/release/zellkonverter.svg)](http://bioconductor.org/packages/stats/bioc/zellkonverter/)
13 | [![Bioc support](https://bioconductor.org/shields/posts/zellkonverter.svg)](https://support.bioconductor.org/tag/zellkonverter)
14 | [![Bioc history](https://bioconductor.org/shields/years-in-bioc/zellkonverter.svg)](https://bioconductor.org/packages/release/bioc/html/zellkonverter.html#since)
15 | [![Bioc last commit](https://bioconductor.org/shields/lastcommit/devel/bioc/zellkonverter.svg)](http://bioconductor.org/checkResults/devel/bioc-LATEST/zellkonverter/)
16 | [![Bioc dependencies](https://bioconductor.org/shields/dependencies/release/zellkonverter.svg)](https://bioconductor.org/packages/release/bioc/html/zellkonverter.html#since)
17 | <!-- badges: end -->
18 | 
19 | **zellkonverter** is a small package for converting between SingleCellExperiment
20 | objects and alternative objects for storing single-cell RNA-sequencing data
21 | (such as AnnData). It is built on top of the [**basilisk**][basilisk] package.
22 | 
23 | For documentation see please refer to [Bioconductor][bioc]. Development
24 | documentation is also available on [Bioconductor devel][bioc-devel] or the
25 | [pkgdown site][pkgdown].
26 | 
27 | ## Installation
28 | 
29 | **zellkonverter** can be installed from Bioconductor using the **BiocManager**
30 | package:
31 | 
32 | ```r
33 | if (!requireNamespace("BiocManager", quietly=TRUE)) {
34 |     install.packages("BiocManager")
35 | }
36 | BiocManager::install("zellkonverter")
37 | ```
38 | 
39 | ## Build status
40 | 
41 | |      Source      |       Checks     |    Updated   |
42 | |:----------------:|:----------------:|:------------:|
43 | | [Bioc release](http://bioconductor.org/packages/release/bioc/html/zellkonverter.html) | [![Bioc release status](http://www.bioconductor.org/shields/build/release/bioc/zellkonverter.svg)](https://bioconductor.org/checkResults/release/bioc-LATEST/zellkonverter) | ![](http://bioconductor.org/shields/lastcommit/release/bioc/zellkonverter.svg) |
44 | | [Bioc devel](http://bioconductor.org/packages/devel/bioc/html/zellkonverter.html) | [![Bioc devel status](http://www.bioconductor.org/shields/build/devel/bioc/zellkonverter.svg)](https://bioconductor.org/checkResults/devel/bioc-LATEST/zellkonverter) | ![](http://bioconductor.org/shields/lastcommit/devel/bioc/zellkonverter.svg) |
45 | | [GitHub actions](https://github.com/theislab/zellkonverter/actions) | [![R-CMD-check-bioc](https://github.com/theislab/zellkonverter/actions/workflows/check-bioc.yml/badge.svg)](https://github.com/theislab/zellkonverter/actions) | ![GitHub last commit](https://img.shields.io/github/last-commit/theislab/zellkonverter) |
46 | 
47 | ## Code of Conduct
48 | 
49 | Please note that the **zellkonverter** project is released with a
50 | [Contributor Code of Conduct](https://contributor-covenant.org/version/2/0/CODE_OF_CONDUCT.html).
51 | By contributing to this project, you agree to abide by its terms.
52 | 
53 | ## Contributors
54 | 
55 | <a href="https://github.com/theislab/zellkonverter/graphs/contributors">
56 |   <img src="https://contrib.rocks/image?repo=theislab/zellkonverter" />
57 | </a>
58 | 
59 | Made with [contributors-img](https://contrib.rocks).
60 | 
61 | [basilisk]: https://www.bioconductor.org/packages/basilisk/ "basilisk on Bioconductor"
62 | [bioc]: https://bioconductor.org/packages/zellkonverter/ "zellkonverter on Bioconductor"
63 | [bioc-devel]: https://bioconductor.org/packages/devel/bioc/html/zellkonverter.html "zellkonverter on Bioconductor devel"
64 | [pkgdown]: https://theislab.github.io/zellkonverter/ "zellkonverter pkgdown site"
65 | 
66 | 


--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
 1 | comment: false
 2 | 
 3 | coverage:
 4 |   status:
 5 |     project:
 6 |       default:
 7 |         target: auto
 8 |         threshold: 1%
 9 |         informational: true
10 |     patch:
11 |       default:
12 |         target: auto
13 |         threshold: 1%
14 |         informational: true
15 | 


--------------------------------------------------------------------------------
/configure:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | 
3 | ${R_HOME}/bin/Rscript -e "basilisk::configureBasiliskEnv()"
4 | 


--------------------------------------------------------------------------------
/configure.win:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | 
3 | ${R_HOME}/bin${R_ARCH_BIN}/Rscript.exe -e "basilisk::configureBasiliskEnv()"
4 | 


--------------------------------------------------------------------------------
/inst/NEWS.Rd:
--------------------------------------------------------------------------------
  1 | \name{NEWS}
  2 | \title{News for Package \pkg{zellkonverter}}
  3 | 
  4 | \section{Version 1.18.0, Bioconductor 3.31 Release (April 2025)}{
  5 |   \subsection{New features}{
  6 |   \itemize{
  7 |     \item{
  8 |       Add minimal support for \code{SpatialExperiment} objects to
  9 |       \code{writeH5AD()} and \code{SCE2AnnData()}. This stores the spatial
 10 |       coordinates in a \code{obsm} item named "spatial" as expected by the
 11 |       \bold{squidpy} Python package. (PR from @mcmero)
 12 |     }
 13 |   }}
 14 |   \subsection{Major changes}{
 15 |   \itemize{
 16 |     \item{
 17 |       Add environment for \bold{anndata} v0.11.4. This is now the default
 18 |       environment for the Python reader/writer.
 19 |     }
 20 |     \item{
 21 |       Modify \code{SCE2AnnData()} to covert sparse matrices to \code{dgRMatrix}
 22 |       when they are. This mostly applies to assays and should be more compatible
 23 |       with what is expected by Python packages.
 24 |     }
 25 |   }}
 26 |   \subsection{Minor changes}{
 27 |   \itemize{
 28 |     \item{
 29 |       Add a \code{testload} argument to \code{basiliskRun()} calls which may
 30 |       help with problems creating Python environments
 31 |     }
 32 |     \item{
 33 |       Updates to documentation and tests
 34 |     }
 35 |   }}
 36 |   \subsection{Bug fixes}{
 37 |   \itemize{
 38 |     \item{
 39 |       Improve handling of missing row or column names in \code{SCE2AnnData()}
 40 |     }
 41 |   }}
 42 | }
 43 | 
 44 | \section{Version 1.16.0, Bioconductor 3.30 Release (October 2024)}{
 45 |   \subsection{Major changes}{
 46 |   \itemize{
 47 |     \item{
 48 |       Add environment for \bold{anndata} v0.10.9. This is now the default
 49 |       environment for the Python reader/writer.
 50 |     }
 51 |   }}
 52 |   \subsection{Minor changes}{
 53 |   \itemize{
 54 |     \item{
 55 |       Avoid deprecation warning due to setting \code{dtype} when creating Python
 56 |       \code{AnnData} objects
 57 |     }
 58 |     \item{
 59 |       Standardise code styling using \bold{{styler}}
 60 |     }
 61 |     \item{
 62 |       Add test using the more complete example H5AD file
 63 |     }
 64 |   }}
 65 |   \subsection{Bug fixes}{
 66 |   \itemize{
 67 |     \item{
 68 |       Correctly assign levels to factors in R reader with \bold{anndata} v0.7
 69 |       files
 70 |     }
 71 |     \item{
 72 |       Correctly set \code{filepath} in the R reader with reading
 73 |       \code{adata.raw} with \code{use_hdf5 = TRUE} (PR from @GabrielHoffman)
 74 |     }
 75 |   }}
 76 | }
 77 | 
 78 | \section{Version 1.14.0, Bioconductor 3.19 Release (May 2024)}{
 79 |   \subsection{Major changes}{
 80 |   \itemize{
 81 |     \item{
 82 |       Add environment for \bold{anndata} v0.10.6. This is now the default
 83 |       environment for the Python reader/writer.
 84 |     }
 85 |   }}
 86 |   \subsection{Minor changes}{
 87 |   \itemize{
 88 |     \item{
 89 |       Improve warnings when converting matrices fails
 90 |     }
 91 |     \item{
 92 |       Minor change to writing \code{DelayedArray} matrices for compatibility
 93 |       with \bold{{HDF5Array}} >= v1.31.1
 94 |     }
 95 |   }}
 96 |   \subsection{Bug fixes}{
 97 |   \itemize{
 98 |     \item{
 99 |       Correctly handle \code{use_backed = TRUE} with newer \bold{anndata}
100 |       versions
101 |     }
102 |     \item{
103 |       Correctly instantiate the \bold{anndata} v0.10.2 environment
104 |     }
105 |     \item{
106 |       Minor fixes for typos etc.
107 |     }
108 |   }}
109 | }
110 | 
111 | \section{Version 1.12.0, Bioconductor 3.18 Release (October 2023)}{
112 |   \subsection{Major changes}{
113 |   \itemize{
114 |     \item{
115 |       Add environments for \bold{anndata} v0.9.2 and v0.10.2. Version 0.10.20 is
116 |       now the default envrionment for the Python reader/writer.
117 |     }
118 |   }}
119 |   \subsection{Minor changes}{
120 |   \itemize{
121 |     \item{
122 |       Changes for compatibility with \bold{rhdf5} v2.45.1 including enum types
123 |       that simplifies reading of nullable types in the native R reader
124 |     }
125 |     \item{
126 |       Dimensions are now passed correctly when converting the \code{raw} slot
127 |     }
128 |     \item{
129 |       Backed sparse matrices are now converted in \code{AnnData2SCE()}
130 |     }
131 |   }}
132 | }
133 | 
134 | \section{Version 1.10.0, Bioconductor 3.17 Release (April 2023)}{
135 |   \subsection{Major changes}{
136 |   \itemize{
137 |     \item{
138 |       Add compatibility with the \bold{anndata} v0.8 H5AD format to the the
139 |       native R writer (By @jackkamm and @mtmorgan)
140 |     }
141 |     \item{
142 |       Add functions for converting \bold{pandas} arrays used by \bold{anndata}
143 |       when arrays have missing values
144 |     }
145 |   }}
146 |   \subsection{Minor changes}{
147 |   \itemize{
148 |     \item{
149 |       Add Robrecht Cannoodt and Jack Kamm as contributors!
150 |     }
151 |     \item{
152 |       Minor adjustments to tests to match reader changes
153 |     }
154 |   }}
155 | }
156 | 
157 | \section{Version 1.8.0, Bioconductor 3.16 Release (October 2022)}{
158 |   \subsection{Major changes}{
159 |   \itemize{
160 |     \item{
161 |       Improve compatibility with the R \bold{anndata} package. This required
162 |       modifying conversion functions so that Python objects are explicitly
163 |       converted rather than relying on automatic conversion.
164 |     }
165 |     \item{
166 |       Added support for \bold{numpy} recarrays. This solves a long-standing
167 |       issue and allows results from \bold{scanpy}'s \code{rank_genes_groups()}
168 |       function to be read.
169 |     }
170 |   }}
171 |   \subsection{Minor changes}{
172 |   \itemize{
173 |     \item{
174 |       The Python version is now pinned in the \bold{anndata} v0.7.6 environment
175 |       for compatibility with changes in \bold{basilisk}
176 |     }
177 |     \item{
178 |       Instatiate Python environments so they can be properly picked up by
179 |       \code{basilisk::configureBasiliskEnv()}
180 |     }
181 |     \item{
182 |       Allow missing obs/var names when \code{use_hdf5 = TRUE}
183 |     }
184 |     \item{
185 |       Minor changes to the UI functions for compatibility with \bold{cli} v3.4.0
186 |     }
187 |     \item{
188 |       Minor changes for compatibility with \bold{Matrix} v1.4-2
189 |     }
190 |     \item{
191 |       Improvements to the UI for warnings
192 |     }
193 |     \item{
194 |       Updates and improvments to tests
195 |     }
196 |   }}
197 | }
198 | 
199 | \section{Version 1.6.0, Bioconductor 3.15 Release (April 2022)}{
200 |   \subsection{Major changes}{
201 |   \itemize{
202 |     \item{
203 |       Added support for multiple \bold{basilisk} environments with different
204 |       \bold{anndata} versions. Users can now specify the environment to use with
205 |       options in \code{readH5AD()} and \code{writeH5AD()}. To faciliate this
206 |       some exported objects where converted to functions but this should only
207 |       effect developers.
208 |     }
209 |     \item{
210 |       Updated the default environment to use \bold{anndata} v0.8.0. This is a
211 |       major update and files written with v0.8.0 cannot be read by previous
212 |       \bold{anndata} versions. This was the motivation for supporting multiple
213 |       environments and users can select the previous environment with
214 |       \bold{anndata} v0.7.6 if compatibility is required.
215 |     }
216 |     \item{
217 |       Standardise naming in \code{AnnData2SCE()}. Column names of data frames
218 |       and names of list items will now be modified to match R conventions
219 |       (according to \code{make.names()}). When this happens a warning will be
220 |       issued listing the modifications. This makes sure than everything in the
221 |       created \code{SingleCellExperiment} is accessible.
222 |     }
223 |   }}
224 |   \subsection{Minor changes}{
225 |   \itemize{
226 |     \item{
227 |       Allow \code{data.frame}'s stored in \code{varm} to be converted in
228 |       \code{SCE2AnnData()}
229 |     }
230 |     \item{
231 |       Minor updates to the vignette and other documentation.
232 |     }
233 |     \item{
234 |       Updates to tests to match the changes above.
235 |     }
236 |   }}
237 | }
238 | 
239 | \section{Version 1.4.0, Bioconductor 3.14 Release (October 2021)}{
240 |   \itemize{
241 |     \item{
242 |       Add arguments to control how slots are converted in \code{AnnData2SCE()}
243 |       and \code{SCE2AnnData()}. Each slot can now be fully converted, skipped
244 |       entirely or only selected items converted.
245 |     }
246 |     \item{
247 |       Add support for converting the \code{raw} slot to an \code{altExp} in
248 |       \code{AnnData2SCE()}
249 |     }
250 |     \item{
251 |       Add recursive conversion of lists in \code{AnnData2SCE()}
252 |     }
253 |     \item{
254 |       Add progress messages to various functions. These can be controlled by
255 |       function arguments or a global variable.
256 |     }
257 |     \item{
258 |       Add long tests for various public datasets. This should help to make the
259 |       package more robust
260 |     }
261 |     \item{
262 |       Fix bug in converting \code{dgRMatrix} sparse matrices
263 |     }
264 |     \item{
265 |       Correctly handle \code{DataFrame} objects stored in \code{adata.obsm}
266 |     }
267 |   }
268 | }
269 | 
270 | \section{Version 1.2.0, Bioconductor 3.13 Release (May 2021)}{
271 |   \itemize{
272 |     \item{
273 |       Update \strong{anndata} and other Python dependencies, now using
274 |       \strong{anndata} v0.7.6
275 |     }
276 |     \item{
277 |       Improved conversion checks for all slots in \code{AnnData2SCE()}
278 |     }
279 |     \item{
280 |       Enable return conversion of the \code{varm} slot in \code{AnnData2SCE()}
281 |     }
282 |     \item{
283 |       Avoid converting \code{obsp} and \code{varp} to dense matrices in
284 |       \code{AnnData2SCE()}
285 |     }
286 |     \item{
287 |       \code{AnnData2SCE()} should now always return \code{dgCMatrix} matrices
288 |       when assays are sparse
289 |     }
290 |     \item{
291 |       More consistent conversion of \code{metadata} to \code{uns} in
292 |       \code{SCE2AnnData()}
293 |     }
294 |     \item{
295 |       Handle conversion of list columns in \code{colData} and \code{rowData} in
296 |       \code{SCE2AnnData()}
297 |     }
298 |     \item{
299 |       Better support for converting \strong{anndata} \code{SparseDataset} arrays
300 |     }
301 |     \item{
302 |       Improved support for conversion of HDF5 backed \code{AnnData} objects
303 |     }
304 |     \item{
305 |       Better support for writing \code{DelayedArray} assays in
306 |       \code{writeH5AD()}
307 |     }
308 |     \item{
309 |       Store \code{X_name} in \code{AnnData2SCE()} for use by
310 |       \code{SCE2AnnData()} and add an \code{X_name} argument to
311 |       \code{AnnData2SCE()} and \code{readH5AD()}
312 |     }
313 |     \item{
314 |       Add a \code{compression} argument to \code{writeH5AD()}
315 |     }
316 |     \item{
317 |       Add an experimental native R reader to \code{readH5AD()}
318 |     }
319 |     \item{
320 |       Export \code{zellkonverterAnnDataEnv} for use by other packages
321 |     }
322 |   }
323 | }
324 | 
325 | \section{Version 1.0.0, Bioconductor 3.12 Release (October 2020)}{
326 |   \itemize{
327 |     \item{Accepted into Bioconductor for Release 3.12}
328 |     \item{
329 |       zellkonverter provides methods to convert between Python AnnData objects
330 |       and SingleCellExperiment objects. These are primarily intended for use by
331 |       downstream Bioconductor packages that wrap Python methods for single-cell
332 |       data analysis. It also includes functions to read and write H5AD files
333 |       used for saving AnnData objects to disk.
334 |     }
335 |   }
336 | }
337 | 


--------------------------------------------------------------------------------
/inst/WORDLIST:
--------------------------------------------------------------------------------
 1 | AnnData
 2 | Bioc
 3 | BiocManager
 4 | Codecov
 5 | DataFrames
 6 | DelayedArray
 7 | GTEX
 8 | HDF
 9 | Lifecycle
10 | ORCID
11 | SCE
12 | SingleCellExperiment
13 | Zeisel
14 | altExp
15 | anndata
16 | biocViews
17 | cli
18 | colData
19 | conda
20 | dtype
21 | getOption
22 | hdf
23 | img
24 | mtmorgan
25 | natsort
26 | numpy
27 | obs
28 | pkgdown
29 | py
30 | recarrays
31 | rhdf
32 | rowData
33 | scRNA
34 | scRNAseq
35 | scipy
36 | seq
37 | sqlite
38 | var
39 | varm
40 | zellkonverter's
41 | 


--------------------------------------------------------------------------------
/inst/extdata/example_anndata.h5ad:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theislab/zellkonverter/c17a17220088ff880d512c392d5de4aacb9e9bb1/inst/extdata/example_anndata.h5ad


--------------------------------------------------------------------------------
/inst/extdata/krumsiek11.h5ad:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theislab/zellkonverter/c17a17220088ff880d512c392d5de4aacb9e9bb1/inst/extdata/krumsiek11.h5ad


--------------------------------------------------------------------------------
/inst/extdata/krumsiek11_augmented_v0-8.h5ad:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theislab/zellkonverter/c17a17220088ff880d512c392d5de4aacb9e9bb1/inst/extdata/krumsiek11_augmented_v0-8.h5ad


--------------------------------------------------------------------------------
/inst/scripts/example_anndata.R:
--------------------------------------------------------------------------------
 1 | # This script was used to create the `example_anndata.h5ad` file.
 2 | # This file contains an example AnnData object for use in examples and tests.
 3 | # A dataset with 200 cells and 500 genes was generated using the Splat simulation in the Splatter package.
 4 | # A Python AnnData object was created using this data (via reticulate) and run through a standard Scanpy analysis workflow to populate the various slots.
 5 | # The file object was then saved to disk as a .h5ad file.
 6 | #
 7 | # Key package versions:
 8 | #
 9 | # splatter   v1.14.0
10 | # reticulate v1.18
11 | # scanpy     v1.5.1
12 | # anndata    v0.7.4
13 | 
14 | library(splatter)
15 | library(reticulate)
16 | 
17 | mini_sim <- splatSimulateGroups(batchCells = 200, nGenes = 500, lib.loc = 8,
18 |                                 group.prob = c(0.5, 0.5), seed = 1)
19 | 
20 | anndata <- import("anndata")
21 | scanpy  <- import("scanpy")
22 | 
23 | adata <- anndata$AnnData(t(counts(mini_sim)))
24 | adata$obs_names <- colnames(mini_sim)
25 | adata$var_names <- rownames(mini_sim)
26 | adata$layers <- list(counts = t(counts(mini_sim)))
27 | 
28 | scanpy$pp$filter_genes(adata, min_counts = 10)
29 | scanpy$pp$normalize_total(adata, target_sum = 1e4)
30 | scanpy$pp$log1p(adata)
31 | scanpy$pp$highly_variable_genes(adata)
32 | scanpy$tl$pca(adata, svd_solver = "arpack")
33 | scanpy$pp$neighbors(adata, n_pcs = 10L)
34 | scanpy$tl$umap(adata)
35 | scanpy$tl$louvain(adata)
36 | scanpy$tl$rank_genes_groups(adata, "louvain")
37 | 
38 | adata$write_h5ad("example_anndata.h5ad")
39 | 


--------------------------------------------------------------------------------
/inst/scripts/krumsiek11.md:
--------------------------------------------------------------------------------
1 | The `krumsiek11.h5ad` file contains an AnnData object with a simulated myeloid
2 | progenitor scRNA-seq dataset. It was created using the\
3 | `scanpy.datasets.krumsiek11()` function in the **scanpy** Python package and
4 | saved as a `.h5ad` file using the `adata.write()` method. This file is included
5 | in **zellkonverter** for tests and examples that require reading a `.h5ad` file
6 | from disk. More details on this dataset can be found in the **scanpy**
7 | documentation at https://scanpy.readthedocs.io/en/stable/api/scanpy.datasets.krumsiek11.html#scanpy.datasets.krumsiek11.
8 | 


--------------------------------------------------------------------------------
/inst/scripts/krumsiek11_augmented.py:
--------------------------------------------------------------------------------
 1 | # This script was used to create the `krumsiek11_augmented_v0-8.h5ad`
 2 | # file. It adds some extra data to the previous `krumsiek11.h5ad`
 3 | # dataset to cover some additional cases for testing (NAs, booleans,
 4 | # etc). The data was saved in AnnData=0.8.0 format.
 5 | #
 6 | # Key package versions:
 7 | #  - anndata=0.8.0
 8 | #  - h5py=3.8.0
 9 | #  - hdf5=1.14.0
10 | #  - numpy=1.23.5
11 | #  - pandas=1.5.3
12 | #  - python=3.9.16
13 | #  - scanpy=1.9.2
14 | 
15 | import numpy as np
16 | import pandas as pd
17 | import anndata as ad
18 | 
19 | adata = ad.read_h5ad("krumsiek11.h5ad")
20 | 
21 | # add string column to rowData/var. Make the entries unique so it's
22 | # saved as str instead of factor
23 | adata.var["dummy_str"] = [f"row{i}" for i in range(adata.shape[1])]
24 | 
25 | # add float column to colData/obs
26 | adata.obs["dummy_num"] = 42.42
27 | 
28 | # float column with NA
29 | adata.obs["dummy_num2"] = adata.obs["dummy_num"]
30 | adata.obs["dummy_num2"][0] = float("nan")
31 | 
32 | # int column
33 | adata.obs["dummy_int"] = np.arange(adata.shape[0])
34 | 
35 | # int column with NA
36 | adata.obs["dummy_int2"] = pd.array([None] + [42] * (adata.shape[0] - 1))
37 | 
38 | # bool column
39 | adata.obs["dummy_bool"] = True
40 | adata.obs["dummy_bool"][0] = False
41 | 
42 | # bool column with NA
43 | adata.obs["dummy_bool2"] = pd.array([False, None] + [True] * (adata.shape[0] - 2))
44 | 
45 | # also add some entries to the metadata/uns
46 | adata.uns["dummy_category"] = pd.array(["a", "b", None], dtype="category")
47 | 
48 | adata.uns["dummy_bool"] = [True, True, False]
49 | adata.uns["dummy_bool2"] = pd.array([True, False, None])
50 | 
51 | adata.uns["dummy_int"] = [1,2,3]
52 | adata.uns["dummy_int2"] = pd.array([1,2,None])
53 | 
54 | adata.write("krumsiek11_augmented_v0-8.h5ad")
55 | 


--------------------------------------------------------------------------------
/longtests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(zellkonverter)
3 | 
4 | test_check("zellkonverter")
5 | 


--------------------------------------------------------------------------------
/longtests/testthat/test-cellrank_pancreas.R:
--------------------------------------------------------------------------------
 1 | library(SingleCellExperiment)
 2 | library(BiocFileCache)
 3 | 
 4 | cache <- BiocFileCache(ask = FALSE)
 5 | file <- bfcrpath(cache, "https://figshare.com/ndownloader/files/30683438")
 6 | outfile <- tempfile(fileext = ".h5ad")
 7 | 
 8 | names <- list(
 9 |     assays = c("X", "Ms", "Mu", "fit_t", "fit_tau", "fit_tau_", "spliced",
10 |                "unspliced", "velocity", "velocity_u"),
11 |     colData = c("day", "proliferation", "G2M_score", "S_score", "phase",
12 |                 "clusters_coarse", "clusters", "clusters_fine", "louvain_Alpha",
13 |                 "louvain_Beta", "palantir_pseudotime", "initial_size_spliced",
14 |                 "initial_size_unspliced", "initial_size", "n_counts",
15 |                 "velocity_self_transition", "terminal_states",
16 |                 "terminal_states_probs", "initial_states",
17 |                 "initial_states_probs", "velocity_pseudotime", "latent_time",
18 |                 "dpt_pseudotime"),
19 |     rowData = c("highly_variable_genes", "gene_count_corr", "means",
20 |                 "dispersions", "dispersions_norm", "highly_variable", "fit_r2",
21 |                 "fit_alpha", "fit_beta", "fit_gamma", "fit_t_", "fit_scaling",
22 |                 "fit_std_u", "fit_std_s", "fit_likelihood", "fit_u0", "fit_s0",
23 |                 "fit_pval_steady", "fit_steady_u", "fit_steady_s",
24 |                 "fit_variance", "fit_alignment_scaling", "velocity_genes",
25 |                 "to.Epsilon.corr", "to.Alpha.corr", "to.Beta.corr",
26 |                 "to.Epsilon.qval", "to.Alpha.qval", "to.Beta.qval"),
27 |     metadata = c("T_bwd_params", "clusters_colors", "clusters_fine_colors",
28 |                  "clusters_sizes", "diffmap_evals", "eig_bwd", "eig_fwd",
29 |                  "initial_states_colors", "initial_states_names", "iroot",
30 |                  "louvain_Alpha_colors", "louvain_Beta_colors", "neighbors",
31 |                  "paga", "pca", "recover_dynamics", "terminal_states_colors",
32 |                  "terminal_states_names", "to_terminal_states_colors",
33 |                  "to_terminal_states_names", "velocity_graph",
34 |                  "velocity_graph_neg", "velocity_params"),
35 |     redDim = c("X_diffmap", "X_pca", "X_umap", "macrostates_bwd",
36 |                "macrostates_fwd", "to_terminal_states", "velocity_umap"),
37 |     varm = c("PCs", "loss"),
38 |     colPairs = c("T_bwd", "T_fwd", "connectivities", "distances")
39 | )
40 | 
41 | missing <- list()
42 | 
43 | test_that("Reading H5AD works", {
44 |     expect_warning(
45 |         {sce <- readH5AD(file)},
46 |         "The names of these selected var columns have been modified"
47 |     )
48 |     expect_s4_class(sce, "SingleCellExperiment")
49 | })
50 | 
51 | sce <- suppressWarnings(readH5AD(file))
52 | 
53 | test_that("SCE is valid", {
54 |     validateH5ADSCE(sce, names, missing)
55 | })
56 | 
57 | test_that("Writing H5AD works", {
58 |     writeH5AD(sce, outfile)
59 |     expect_true(file.exists(outfile))
60 | })
61 | 
62 | test_that("Round trip is as expected", {
63 |     out <- readH5AD(outfile)
64 | 
65 |     expectSCE(out, sce)
66 | })
67 | 


--------------------------------------------------------------------------------
/longtests/testthat/test-example_anndata.R:
--------------------------------------------------------------------------------
 1 | library(SingleCellExperiment)
 2 | 
 3 | file <- system.file("extdata", "example_anndata.h5ad",
 4 |                     package = "zellkonverter")
 5 | outfile <- tempfile(fileext = ".h5ad")
 6 | 
 7 | names <- list(
 8 |     assays = c("X", "counts"),
 9 |     colData = "louvain",
10 |     rowData = c("n_counts", "highly_variable", "means", "dispersions",
11 |                 "dispersions_norm"),
12 |     metadata = c("louvain", "neighbors", "pca", "rank_genes_groups", "umap"),
13 |     redDim = c("X_pca", "X_umap"),
14 |     varm = "PCs",
15 |     colPairs = c("connectivities", "distances")
16 | )
17 | 
18 | missing <- list()
19 | 
20 | test_that("Reading H5AD works", {
21 |     sce <- readH5AD(file)
22 |     expect_s4_class(sce, "SingleCellExperiment")
23 | })
24 | 
25 | sce <- suppressWarnings(readH5AD(file))
26 | 
27 | test_that("SCE is valid", {
28 |     validateH5ADSCE(sce, names, missing)
29 | })
30 | 
31 | test_that("Writing H5AD works", {
32 |     writeH5AD(sce, outfile)
33 |     expect_true(file.exists(outfile))
34 | })
35 | 
36 | test_that("Round trip is as expected", {
37 |     out <- readH5AD(outfile)
38 |     expectSCE(out, sce)
39 | })
40 | 


--------------------------------------------------------------------------------
/longtests/testthat/test-gtex_8tissues.R:
--------------------------------------------------------------------------------
 1 | library(SingleCellExperiment)
 2 | library(BiocFileCache)
 3 | 
 4 | cache <- BiocFileCache(ask = FALSE)
 5 | # Available from https://www.gtexportal.org/home/downloads/adult-gtex/single_cell
 6 | file <- bfcrpath(cache, "https://storage.googleapis.com/adult-gtex/single-cell/v9/snrna-seq-data/GTEx_8_tissues_snRNAseq_atlas_071421.public_obs.h5ad")
 7 | outfile <- tempfile(fileext = ".h5ad")
 8 | 
 9 | names <- list(
10 |     assays = c("X", "counts"),
11 |     colData = c(
12 |         "n_genes", "fpr", "tissue", "prep", "individual", "nGenes", "nUMIs",
13 |         "PercentMito", "PercentRibo", "Age_bin", "Sex", "Sample.ID",
14 |         "Participant.ID", "Sample.ID.short",
15 |         "RIN.score.from.PAXgene.tissue.Aliquot",
16 |         "RIN.score.from.Frozen.tissue.Aliquot", "Autolysis.Score",
17 |         "Sample.Ischemic.Time..mins.", "Tissue.Site.Detail", "scrublet",
18 |         "scrublet_score", "barcode", "batch", "n_counts",
19 |         "tissue.individual.prep", "Broad.cell.type", "Granular.cell.type",
20 |         "introns", "junctions", "exons", "sense", "antisense", "intergenic",
21 |         "batch.barcode", "exon_ratio", "intron_ratio", "junction_ratio",
22 |         "log10_nUMIs", "leiden", "leiden_tissue", "Tissue.composition",
23 |         "Cell.types.level.2", "Cell.types.level.3", "Broad.cell.type.numbers",
24 |         "Broad.cell.type..numbers.", "Tissue", "channel"
25 |     ),
26 |     rowData = c(
27 |         "gene_ids", "Chromosome", "Source", "Start", "End", "Strand",
28 |         "gene_name", "gene_source", "gene_biotype", "gene_length",
29 |         "gene_coding_length", "Approved.symbol", "Approved.name", "Status",
30 |         "Previous.symbols", "Alias.symbols", "gene_include", "n_cells"
31 |     ),
32 |     metadata = c(
33 |         "Broad.cell.type..numbers._colors", "Broad.cell.type.numbers_colors",
34 |         "Broad.cell.type_colors", "Broad.cell.type_logregcv_vae_colors",
35 |         "Broad.cell.type_sizes", "Granular.cell.type_colors",
36 |         "Participant.ID_colors", "Sex_colors", "Tissue.composition_colors",
37 |         "Tissue_colors", "dendrogram_..Broad.cell.type..", "leiden",
38 |         "leiden_colors", "leiden_sub_colors", "neighbors", "paga",
39 |         "prep_colors", "tissue_colors", "umap"
40 |     ),
41 |     redDim = c(
42 |         "X_pca", "X_umap", "X_umap_tissue", "X_vae_mean", "X_vae_mean_tissue",
43 |         "X_vae_samples", "X_vae_var"
44 |     ),
45 |     varm = c("spring_leiden_sub"),
46 |     colPairs = c("connectivities", "distances")
47 | )
48 | 
49 | missing <- list()
50 | 
51 | test_that("Reading H5AD works", {
52 |     expect_warning(
53 |         {sce <- readH5AD(file)},
54 |         "The names of these selected uns items have been modified"
55 |     )
56 |     expect_s4_class(sce, "SingleCellExperiment")
57 | })
58 | 
59 | sce <- suppressWarnings(readH5AD(file))
60 | 
61 | test_that("SCE is valid", {
62 |     validateH5ADSCE(sce, names, missing)
63 | })
64 | 
65 | test_that("Writing H5AD works", {
66 |     writeH5AD(sce, outfile)
67 |     expect_true(file.exists(outfile))
68 | })
69 | 
70 | test_that("Round trip is as expected", {
71 |     out <- readH5AD(outfile)
72 |     expectSCE(out, sce)
73 | })
74 | 


--------------------------------------------------------------------------------
/longtests/testthat/test-pegasus_marrow.R:
--------------------------------------------------------------------------------
 1 | library(SingleCellExperiment)
 2 | library(BiocFileCache)
 3 | 
 4 | cache <- BiocFileCache(ask = FALSE)
 5 | file <- bfcrpath(cache, "https://figshare.com/ndownloader/files/30682400")
 6 | outfile <- tempfile(fileext = ".h5ad")
 7 | 
 8 | names <- list(
 9 |     assays = c("X"),
10 |     colData = c("n_genes", "Channel", "n_counts", "percent_mito", "scale",
11 |                 "Group", "louvain_labels", "anno"),
12 |     rowData = c("featureid", "n_cells", "percent_cells", "robust",
13 |                 "highly_variable_features", "mean", "var", "hvf_loess",
14 |                 "hvf_rank"),
15 |     metadata = c("Channels", "Groups", "PCs", "W_diffmap", "W_pca_harmony",
16 |                  "c2gid", "diffmap_evals", "diffmap_knn_distances",
17 |                  "diffmap_knn_indices", "genome", "gncells",
18 |                  "louvain_resolution", "modality", "ncells", "norm_count",
19 |                  "pca", "pca_features", "pca_harmony_knn_distances",
20 |                  "pca_harmony_knn_indices", "stdzn_max_value", "stdzn_mean",
21 |                  "stdzn_std"),
22 |     redDim = c("X_diffmap", "X_fle", "X_pca", "X_pca_harmony", "X_phi",
23 |                "X_tsne", "X_umap"),
24 |     varm = c("de_res", "gmeans", "gstds", "means", "partial_sum")
25 | )
26 | 
27 | missing <- list()
28 | 
29 | test_that("Reading H5AD works", {
30 |     sce <- readH5AD(file)
31 |     expect_s4_class(sce, "SingleCellExperiment")
32 | })
33 | 
34 | sce <- suppressWarnings(readH5AD(file))
35 | 
36 | test_that("SCE is valid", {
37 |     validateH5ADSCE(sce, names, missing)
38 | })
39 | 
40 | test_that("Writing H5AD works", {
41 |     writeH5AD(sce, outfile)
42 |     expect_true(file.exists(outfile))
43 | })
44 | 
45 | test_that("Round trip is as expected", {
46 |     out <- readH5AD(outfile)
47 | 
48 |     expectSCE(out, sce)
49 | })
50 | 


--------------------------------------------------------------------------------
/longtests/testthat/test-scIB_pancreas.R:
--------------------------------------------------------------------------------
 1 | library(SingleCellExperiment)
 2 | library(BiocFileCache)
 3 | 
 4 | cache <- BiocFileCache(ask = FALSE)
 5 | file <- bfcrpath(cache, "https://ndownloader.figshare.com/files/24539828")
 6 | outfile <- tempfile(fileext = ".h5ad")
 7 | 
 8 | names <- list(
 9 |     assays = c("X", "counts"),
10 |     colData = c("tech", "celltype", "size_factors")
11 | )
12 | missing <- list()
13 | 
14 | test_that("Reading H5AD works", {
15 |     sce <- readH5AD(file)
16 |     expect_s4_class(sce, "SingleCellExperiment")
17 | })
18 | 
19 | sce <- suppressWarnings(readH5AD(file))
20 | 
21 | test_that("SCE is valid", {
22 |     validateH5ADSCE(sce, names, missing)
23 | })
24 | 
25 | test_that("Writing H5AD works", {
26 |     writeH5AD(sce, outfile)
27 |     expect_true(file.exists(outfile))
28 | })
29 | 
30 | test_that("Round trip is as expected", {
31 |     out <- readH5AD(outfile)
32 |     expectSCE(out, sce)
33 | })
34 | 


--------------------------------------------------------------------------------
/longtests/testthat/test-scanpy_pbmc3k.R:
--------------------------------------------------------------------------------
 1 | library(SingleCellExperiment)
 2 | library(BiocFileCache)
 3 | 
 4 | cache <- BiocFileCache(ask = FALSE)
 5 | file <- bfcrpath(cache, "https://ndownloader.figshare.com/files/30462915")
 6 | outfile <- tempfile(fileext = ".h5ad")
 7 | 
 8 | names <- list(
 9 |     assays = c("X"),
10 |     colData = c("n_genes", "n_genes_by_counts", "total_counts",
11 |                 "total_counts_mt", "pct_counts_mt", "leiden"),
12 |     rowData = c("gene_ids", "n_cells", "mt", "n_cells_by_counts", "mean_counts",
13 |                 "pct_dropout_by_counts", "total_counts", "highly_variable",
14 |                 "means", "dispersions", "dispersions_norm", "mean", "std"),
15 |     metadata = c("hvg", "leiden", "neighbors", "pca", "rank_genes_groups",
16 |                  "umap"),
17 |     redDim = c("X_pca", "X_umap"),
18 |     varm = c("PCs"),
19 |     colPairs = c("connectivities", "distances")
20 | )
21 | 
22 | missing <- list()
23 | 
24 | test_that("Reading H5AD works", {
25 |     sce <- readH5AD(file)
26 |     expect_s4_class(sce, "SingleCellExperiment")
27 | })
28 | 
29 | sce <- suppressWarnings(readH5AD(file))
30 | 
31 | test_that("SCE is valid", {
32 |     validateH5ADSCE(sce, names, missing)
33 | })
34 | 
35 | test_that("Writing H5AD works", {
36 |     writeH5AD(sce, outfile)
37 |     expect_true(file.exists(outfile))
38 | })
39 | 
40 | test_that("Round trip is as expected", {
41 |     out <- readH5AD(outfile)
42 |     expectSCE(out, sce)
43 | })
44 | 


--------------------------------------------------------------------------------
/longtests/testthat/test-scanpy_trajectory.R:
--------------------------------------------------------------------------------
 1 | library(SingleCellExperiment)
 2 | library(BiocFileCache)
 3 | 
 4 | cache <- BiocFileCache(ask = FALSE)
 5 | file <- bfcrpath(cache, "https://figshare.com/ndownloader/files/30594477")
 6 | outfile <- tempfile(fileext = ".h5ad")
 7 | 
 8 | names <- list(
 9 |     assays = c("X"),
10 |     colData = c("paul15_clusters", "n_counts_all", "louvain", "dpt_pseudotime"),
11 |     rowData = c("n_counts", "mean", "std"),
12 |     metadata = c("diffmap_evals", "draw_graph", "iroot", "louvain",
13 |                  "louvain_sizes", "neighbors", "paga", "pca"),
14 |     redDim = c("X_diffmap", "X_draw_graph_fa", "X_pca"),
15 |     varm = c("PCs"),
16 |     colPairs = c("connectivities", "distances")
17 | )
18 | 
19 | missing <- list()
20 | 
21 | test_that("Reading H5AD works", {
22 |     sce <- readH5AD(file)
23 |     expect_s4_class(sce, "SingleCellExperiment")
24 | })
25 | 
26 | sce <- suppressWarnings(readH5AD(file))
27 | 
28 | test_that("SCE is valid", {
29 |     validateH5ADSCE(sce, names, missing)
30 | })
31 | 
32 | test_that("Writing H5AD works", {
33 |     writeH5AD(sce, outfile)
34 |     expect_true(file.exists(outfile))
35 | })
36 | 
37 | test_that("Round trip is as expected", {
38 |     out <- readH5AD(outfile)
39 |     expectSCE(out, sce)
40 | })
41 | 


--------------------------------------------------------------------------------
/longtests/testthat/test-scvelo_pancreas.R:
--------------------------------------------------------------------------------
 1 | library(SingleCellExperiment)
 2 | library(BiocFileCache)
 3 | 
 4 | cache <- BiocFileCache(ask = FALSE)
 5 | file <- bfcrpath(cache, "https://figshare.com/ndownloader/files/30595479")
 6 | outfile <- tempfile(fileext = ".h5ad")
 7 | 
 8 | names <- list(
 9 |     assays = c("X", "Ms", "Mu", "fit_t", "fit_tau", "fit_tau_", "spliced",
10 |                "unspliced", "variance_velocity", "velocity", "velocity_u"),
11 |     colData = c("clusters_coarse", "clusters", "S_score", "G2M_score",
12 |                 "initial_size_spliced", "initial_size_unspliced",
13 |                 "initial_size", "n_counts", "velocity_self_transition", "phase",
14 |                 "velocity_length", "velocity_confidence",
15 |                 "velocity_confidence_transition", "root_cells", "end_points",
16 |                 "velocity_pseudotime", "latent_time"),
17 |     rowData = c("highly_variable_genes", "gene_count_corr", "means",
18 |                 "dispersions", "dispersions_norm", "highly_variable",
19 |                 "velocity_gamma", "velocity_qreg_ratio", "velocity_r2",
20 |                 "velocity_genes", "spearmans_score", "velocity_score",
21 |                 "fit_alpha", "fit_beta", "fit_gamma", "fit_t_", "fit_scaling",
22 |                 "fit_std_u", "fit_std_s", "fit_likelihood", "fit_u0", "fit_s0",
23 |                 "fit_pval_steady", "fit_steady_u", "fit_steady_s",
24 |                 "fit_variance", "fit_alignment_scaling", "fit_r2"),
25 |     metadata = c("clusters_coarse_colors", "clusters_colors", "clusters_sizes",
26 |                  "day_colors", "neighbors", "paga", "pca",
27 |                  "rank_dynamical_genes", "rank_velocity_genes",
28 |                  "recover_dynamics", "velocity_graph", "velocity_graph_neg",
29 |                  "velocity_params"),
30 |     redDim = c("X_pca", "X_umap", "velocity_umap"),
31 |     varm = c("loss"),
32 |     colPairs = c("connectivities", "distances")
33 | )
34 | 
35 | missing <- list()
36 | 
37 | test_that("Reading H5AD works", {
38 |     sce <- readH5AD(file)
39 |     expect_s4_class(sce, "SingleCellExperiment")
40 | })
41 | 
42 | sce <- suppressWarnings(readH5AD(file))
43 | 
44 | test_that("SCE is valid", {
45 |     validateH5ADSCE(sce, names, missing)
46 | })
47 | 
48 | test_that("Writing H5AD works", {
49 |     writeH5AD(sce, outfile)
50 |     expect_true(file.exists(outfile))
51 | })
52 | 
53 | test_that("Round trip is as expected", {
54 |     out <- readH5AD(outfile)
55 |     expectSCE(out, sce)
56 | })
57 | 


--------------------------------------------------------------------------------
/longtests/testthat/test-scvi_citeseq.R:
--------------------------------------------------------------------------------
 1 | library(SingleCellExperiment)
 2 | library(BiocFileCache)
 3 | 
 4 | cache <- BiocFileCache(ask = FALSE)
 5 | file <- bfcrpath(cache, "https://figshare.com/ndownloader/files/30612834")
 6 | outfile <- tempfile(fileext = ".h5ad")
 7 | 
 8 | names <- list(
 9 |     assays = c("X", "counts", "denoised_rna"),
10 |     colData = c("n_genes", "percent_mito", "n_counts", "batch", "X_scvi_batch",
11 |                 "X_scvi_labels", "X_scvi_local_l_mean", "X_scvi_local_l_var",
12 |                 "leiden_totalVI"),
13 |     rowData = c("highly_variable", "highly_variable_rank", "means", "variances",
14 |                 "variances_norm", "highly_variable_nbatches"),
15 |     metadata = c("X_scvi", "hvg", "leiden", "neighbors", "umap"),
16 |     redDim = c("X_totalVI", "X_umap", "denoised_protein",
17 |                "protein_expression", "protein_foreground_prob"),
18 |     colPairs = c("connectivities", "distances")
19 | )
20 | 
21 | missing <- list()
22 | 
23 | test_that("Reading H5AD works", {
24 |     sce <- expect_warning(readH5AD(file))
25 |     expect_s4_class(sce, "SingleCellExperiment")
26 | })
27 | 
28 | sce <- suppressWarnings(readH5AD(file))
29 | 
30 | test_that("SCE is valid", {
31 |     validateH5ADSCE(sce, names, missing)
32 | })
33 | 
34 | test_that("Writing H5AD works", {
35 |     writeH5AD(sce, outfile)
36 |     expect_true(file.exists(outfile))
37 | })
38 | 
39 | test_that("Round trip is as expected", {
40 |     out <- suppressWarnings(readH5AD(outfile))
41 | 
42 |     # For some reason "_scvi" gets changed to "X_scvi", not sure why...
43 |     names(S4Vectors::metadata(sce))[1] <- "X_scvi"
44 | 
45 |     expectSCE(out, sce)
46 | })
47 | 


--------------------------------------------------------------------------------
/longtests/testthat/test-squidpy_visium.R:
--------------------------------------------------------------------------------
 1 | library(SingleCellExperiment)
 2 | library(BiocFileCache)
 3 | 
 4 | cache <- BiocFileCache(ask = FALSE)
 5 | file <- bfcrpath(cache, "https://figshare.com/ndownloader/files/30639279")
 6 | outfile <- tempfile(fileext = ".h5ad")
 7 | 
 8 | names <- list(
 9 |     assays = c("X"),
10 |     colData = c("in_tissue", "array_row", "array_col", "n_genes_by_counts",
11 |                 "log1p_n_genes_by_counts", "total_counts", "log1p_total_counts",
12 |                 "pct_counts_in_top_50_genes", "pct_counts_in_top_100_genes",
13 |                 "pct_counts_in_top_200_genes", "pct_counts_in_top_500_genes",
14 |                 "total_counts_MT", "log1p_total_counts_MT", "pct_counts_MT",
15 |                 "n_counts", "leiden", "cluster", "features_summary_cluster",
16 |                 "features_histogram_cluster", "features_texture_cluster"),
17 |     rowData = c("gene_ids", "feature_types", "genome", "MT",
18 |                 "n_cells_by_counts", "mean_counts", "log1p_mean_counts",
19 |                 "pct_dropout_by_counts", "total_counts", "log1p_total_counts",
20 |                 "n_cells", "highly_variable", "highly_variable_rank", "means",
21 |                 "variances", "variances_norm"),
22 |     metadata = c("cluster_co_occurrence", "cluster_colors", "cluster_ligrec",
23 |                  "cluster_nhood_enrichment", "hvg", "leiden", "leiden_colors",
24 |                  "moranI", "neighbors", "pca", "spatial", "spatial_neighbors",
25 |                  "umap"),
26 |     redDim = c("X_pca", "X_umap", "features", "features_context",
27 |                "features_lowres", "features_orig", "features_segmentation",
28 |                "spatial"),
29 |     varm = c("PCs"),
30 |     colPairs = c("connectivities", "distances", "spatial_connectivities",
31 |                  "spatial_distances")
32 | )
33 | 
34 | missing <- list()
35 | 
36 | test_that("Reading H5AD works", {
37 |     sce <- readH5AD(file)
38 |     expect_s4_class(sce, "SingleCellExperiment")
39 | })
40 | 
41 | sce <- suppressWarnings(readH5AD(file))
42 | 
43 | test_that("SCE is valid", {
44 |     validateH5ADSCE(sce, names, missing)
45 | })
46 | 
47 | test_that("Writing H5AD works", {
48 |     writeH5AD(sce, outfile)
49 |     expect_true(file.exists(outfile))
50 | })
51 | 
52 | test_that("Round trip is as expected", {
53 |     out <- readH5AD(outfile)
54 | 
55 |     expectSCE(out, sce)
56 | })
57 | 


--------------------------------------------------------------------------------
/man/AnnData-Conversion.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/AnnData2SCE.R, R/SCE2AnnData.R
  3 | \name{AnnData-Conversion}
  4 | \alias{AnnData-Conversion}
  5 | \alias{AnnData2SCE}
  6 | \alias{SCE2AnnData}
  7 | \title{Convert between AnnData and SingleCellExperiment}
  8 | \usage{
  9 | AnnData2SCE(
 10 |   adata,
 11 |   X_name = NULL,
 12 |   layers = TRUE,
 13 |   uns = TRUE,
 14 |   var = TRUE,
 15 |   obs = TRUE,
 16 |   varm = TRUE,
 17 |   obsm = TRUE,
 18 |   varp = TRUE,
 19 |   obsp = TRUE,
 20 |   raw = FALSE,
 21 |   skip_assays = FALSE,
 22 |   hdf5_backed = TRUE,
 23 |   verbose = NULL
 24 | )
 25 | 
 26 | SCE2AnnData(
 27 |   sce,
 28 |   X_name = NULL,
 29 |   assays = TRUE,
 30 |   colData = TRUE,
 31 |   rowData = TRUE,
 32 |   varm = TRUE,
 33 |   reducedDims = TRUE,
 34 |   metadata = TRUE,
 35 |   colPairs = TRUE,
 36 |   rowPairs = TRUE,
 37 |   skip_assays = FALSE,
 38 |   verbose = NULL
 39 | )
 40 | }
 41 | \arguments{
 42 | \item{adata}{A \strong{reticulate} reference to a Python AnnData object.}
 43 | 
 44 | \item{X_name}{For \code{SCE2AnnData()} name of the assay to use as the primary
 45 | matrix (\code{X}) of the AnnData object. If \code{NULL}, the first assay of \code{sce} will
 46 | be used by default. For \code{AnnData2SCE()} name used when saving \code{X} as an
 47 | assay. If \code{NULL} looks for an \code{X_name} value in \code{uns}, otherwise uses \code{"X"}.}
 48 | 
 49 | \item{layers, uns, var, obs, varm, obsm, varp, obsp, raw}{Arguments specifying how
 50 | these slots are converted. If \code{TRUE} everything in that slot is converted, if
 51 | \code{FALSE} nothing is converted and if a character vector only those items or
 52 | columns are converted.}
 53 | 
 54 | \item{skip_assays}{Logical scalar indicating whether to skip conversion of
 55 | any assays in \code{sce} or \code{adata}, replacing them with empty sparse matrices
 56 | instead.}
 57 | 
 58 | \item{hdf5_backed}{Logical scalar indicating whether HDF5-backed matrices
 59 | in \code{adata} should be represented as HDF5Array objects. This assumes that
 60 | \code{adata} is created with \code{backed="r"}.}
 61 | 
 62 | \item{verbose}{Logical scalar indicating whether to print progress messages.
 63 | If \code{NULL} uses \code{getOption("zellkonverter.verbose")}.}
 64 | 
 65 | \item{sce}{A \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment}
 66 | object.}
 67 | 
 68 | \item{assays, colData, rowData, reducedDims, metadata, colPairs, rowPairs}{Arguments specifying how these slots are converted. If \code{TRUE} everything in
 69 | that slot is converted, if \code{FALSE} nothing is converted and if a character
 70 | vector only those items or columns are converted.}
 71 | }
 72 | \value{
 73 | \code{AnnData2SCE()} will return a
 74 | \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment}
 75 | containing the equivalent data from \code{adata}.
 76 | 
 77 | \code{SCE2AnnData()} will return a \strong{reticulate} reference to an AnnData object
 78 | containing the content of \code{sce}.
 79 | }
 80 | \description{
 81 | Conversion between Python AnnData objects and
 82 | \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment}
 83 | objects.
 84 | }
 85 | \details{
 86 | \subsection{Python environment}{
 87 | 
 88 | These functions assume that an appropriate Python environment has already
 89 | been loaded. As such, they are largely intended for developer use, most
 90 | typically inside a \strong{basilisk} context.
 91 | }
 92 | 
 93 | \subsection{Conversion mapping}{
 94 | 
 95 | The conversion is not entirely lossless. The current mapping is shown below
 96 | (also at \url{https://tinyurl.com/AnnData2SCE}):
 97 | 
 98 | \if{html}{
 99 |     \figure{AnnData2SCE.png}{options: width=800, alt="SCE-AnnData map"}
100 | }
101 | \if{latex}{\figure{AnnData2SCE.png}{options: width=5in}}
102 | }
103 | 
104 | \subsection{Matrix conversion}{
105 | 
106 | In \code{SCE2AnnData()}, matrices are converted to a \strong{numpy}-friendly format.
107 | Sparse matrices are converted to
108 | \link[Matrix:dgCMatrix-class]{Matrix::dgCMatrix} objects while all
109 | other matrices are converted into ordinary matrices. If \code{skip_assays = TRUE},
110 | empty sparse matrices are created instead and the user is expected to fill in
111 | the assays on the Python side.
112 | 
113 | For \code{AnnData2SCE()}, a warning is raised if there is no corresponding R
114 | format for a matrix in the \code{AnnData} object, and an empty sparse matrix is
115 | created instead as a placeholder. If \code{skip_assays = NA}, no warning is
116 | emitted but variables are created in the
117 | \code{\link[SingleCellExperiment:internals]{int_metadata()}} of the output to
118 | specify which assays were skipped.
119 | 
120 | If \code{skip_assays = TRUE}, empty sparse matrices are created for all assays,
121 | regardless of whether they might be convertible to an R format or not.
122 | In both cases, the user is expected to fill in the assays on the R side.
123 | }
124 | 
125 | \subsection{\code{metadata}/\code{uns} conversion}{
126 | 
127 | We attempt to convert between items in the
128 | \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment}
129 | \code{\link[S4Vectors:Annotated-class]{metadata()}} slot and the \code{AnnData} \code{uns} slot. If
130 | an item cannot be converted a warning will be raised.
131 | }
132 | 
133 | \subsection{\code{uns} conversion}{
134 | 
135 | Values stored in the \code{varm} slot of an \code{AnnData} object are stored in a
136 | column of \code{\link[SummarizedExperiment:SummarizedExperiment-class]{rowData()}} in a
137 | \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment}
138 | as a \link[S4Vectors:DataFrame-class]{S4Vectors::DataFrame-class} of matrices.
139 | If this column is present an attempt is made to transfer this information
140 | when converting from
141 | \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment}
142 | to \code{AnnData}.
143 | }
144 | 
145 | \subsection{\code{SpatialExperiment} conversion}{
146 | 
147 | In \code{SCE2AnnData()}, if \code{sce} is a \link[SpatialExperiment:SpatialExperiment-class]{SpatialExperiment::SpatialExperiment}
148 | object, the spatial coordinates are added to the \code{reducedDims} slot before
149 | conversion to an \code{AnnData} object.
150 | }
151 | }
152 | \examples{
153 | if (requireNamespace("scRNAseq", quietly = TRUE)) {
154 |     library(basilisk)
155 |     library(scRNAseq)
156 |     seger <- SegerstolpePancreasData()
157 | 
158 |     # These functions are designed to be run inside
159 |     # a specified Python environment
160 |     roundtrip <- basiliskRun(fun = function(sce) {
161 |         # Convert SCE to AnnData:
162 |         adata <- zellkonverter::SCE2AnnData(sce)
163 | 
164 |         # Maybe do some work in Python on 'adata':
165 |         # BLAH BLAH BLAH
166 | 
167 |         # Convert back to an SCE:
168 |         zellkonverter::AnnData2SCE(adata)
169 |     }, env = zellkonverterAnnDataEnv(), sce = seger)
170 | }
171 | }
172 | \seealso{
173 | \code{\link[=writeH5AD]{writeH5AD()}} and \code{\link[=readH5AD]{readH5AD()}} for dealing directly with H5AD files.
174 | }
175 | \author{
176 | Luke Zappia
177 | 
178 | Aaron Lun
179 | }
180 | 


--------------------------------------------------------------------------------
/man/AnnData-Environment.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/basilisk.R
 3 | \docType{data}
 4 | \name{AnnData-Environment}
 5 | \alias{AnnData-Environment}
 6 | \alias{.AnnDataVersions}
 7 | \alias{AnnDataDependencies}
 8 | \alias{zellkonverterAnnDataEnv}
 9 | \title{AnnData environment}
10 | \format{
11 | For \code{.AnnDataVersions} a character vector containing allowed \strong{anndata}
12 | version strings.
13 | }
14 | \usage{
15 | .AnnDataVersions
16 | 
17 | AnnDataDependencies(version = .AnnDataVersions)
18 | 
19 | zellkonverterAnnDataEnv(version = .AnnDataVersions)
20 | }
21 | \arguments{
22 | \item{version}{A string giving the version of the \strong{anndata} Python library
23 | to use. Allowed values are available in \code{.AnnDataVersions}. By default the
24 | latest version is used.}
25 | }
26 | \value{
27 | For \code{AnnDataDependencies} a character vector containing the pinned versions
28 | of all Python packages to be used by \code{zellkonverterAnnDataEnv()}.
29 | 
30 | For \code{zellkonverterAnnDataEnv} a \code{\link[basilisk:BasiliskEnvironment-class]{basilisk::BasiliskEnvironment()}} containing
31 | \strong{zellkonverter}'s AnnData Python environment.
32 | }
33 | \description{
34 | The Python environment used by \strong{zellkonverter} for interfacing with the
35 | \strong{anndata} Python library (and H5AD files) is described by the dependencies
36 | in returned by \code{AnnDataDependencies()}. The \code{zellkonverterAnnDataEnv()}
37 | functions returns the \code{\link[basilisk:BasiliskEnvironment-class]{basilisk::BasiliskEnvironment()}} containing these
38 | dependencies used by \strong{zellkonverter}. Allowed versions of \strong{anndata} are
39 | available in \code{.AnnDataVersions}.
40 | }
41 | \details{
42 | \subsection{Using Python environments}{
43 | 
44 | When a \strong{zellkonverter} is first run a conda environment containing all of
45 | the necessary dependencies for that version with be instantiated. This will
46 | not be performed on any subsequent run or if any other \strong{zellkonverter}
47 | function has been run prior with the same environment version.
48 | 
49 | By default the \strong{zellkonverter} conda environment will become the shared R
50 | Python environment if one does not already exist. When one does exist (for
51 | example when a \strong{zellkonverter} function has already been run using a
52 | a different environment version) then a separate environment will be used.
53 | See \code{\link[basilisk:basiliskOptions]{basilisk::setBasiliskShared()}} for more information on this behaviour.
54 | Note the when the environment is not shared progress messages are lost.
55 | }
56 | 
57 | \subsection{Development}{
58 | 
59 | The \code{AnnDataDependencies()} function is exposed for use by other package
60 | developers who want an easy way to define the dependencies required for
61 | creating a Python environment to work with AnnData objects, most typically
62 | within a \strong{basilisk} context. For example, we can simply combine this
63 | vector with additional dependencies to create a \strong{basilisk} environment with
64 | Python package versions that are consistent with those in \strong{zellkonverter}.
65 | 
66 | If you want to run code in the exact environment used by \strong{zellkonverter}
67 | this can be done using \code{zellkonverterAnnDataEnv()} in combination with
68 | \code{\link[basilisk:basiliskStart]{basilisk::basiliskStart()}} and/or \code{\link[basilisk:basiliskStart]{basilisk::basiliskRun()}}. Please refer to
69 | the \strong{basilisk} documentation for more information on using these
70 | environments.
71 | }
72 | }
73 | \examples{
74 | .AnnDataVersions
75 | 
76 | AnnDataDependencies()
77 | AnnDataDependencies(version = "0.7.6")
78 | 
79 | cl <- basilisk::basiliskStart(zellkonverterAnnDataEnv())
80 | anndata <- reticulate::import("anndata")
81 | basilisk::basiliskStop(cl)
82 | }
83 | \author{
84 | Luke Zappia
85 | 
86 | Aaron Lun
87 | }
88 | \keyword{datasets}
89 | 


--------------------------------------------------------------------------------
/man/expectSCE.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/validation.R
 3 | \name{expectSCE}
 4 | \alias{expectSCE}
 5 | \title{Expect SCE}
 6 | \usage{
 7 | expectSCE(sce, expected)
 8 | }
 9 | \arguments{
10 | \item{sce}{A \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment}
11 | object.}
12 | 
13 | \item{expected}{A template \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment}
14 | object to compare to.}
15 | }
16 | \value{
17 | \code{TRUE} invisibly if checks pass
18 | }
19 | \description{
20 | Test that a SingleCellExperiment matches an expected object. Designed to be
21 | used inside \code{testhat::test_that()} during package testing.
22 | }
23 | \author{
24 | Luke Zappia
25 | }
26 | 


--------------------------------------------------------------------------------
/man/figures/AnnData2SCE.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theislab/zellkonverter/c17a17220088ff880d512c392d5de4aacb9e9bb1/man/figures/AnnData2SCE.png


--------------------------------------------------------------------------------
/man/figures/zellkonverter.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theislab/zellkonverter/c17a17220088ff880d512c392d5de4aacb9e9bb1/man/figures/zellkonverter.png


--------------------------------------------------------------------------------
/man/r-py-conversion.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/reticulate.R
 3 | \name{r-py-conversion}
 4 | \alias{r-py-conversion}
 5 | \alias{py_to_r.numpy.ndarray}
 6 | \title{Convert between Python and R objects}
 7 | \usage{
 8 | \method{py_to_r}{numpy.ndarray}(x)
 9 | }
10 | \arguments{
11 | \item{x}{A Python object.}
12 | }
13 | \value{
14 | An \R object, as converted from the Python object.
15 | }
16 | \description{
17 | Convert between Python and R objects
18 | }
19 | \details{
20 | These functions are extensions of the default conversion functions in the
21 | \code{reticulate} package for the following reasons:
22 | \itemize{
23 | \item \code{numpy.ndarray} - Handle conversion of \strong{numpy} recarrays
24 | \item \code{pandas.core.arrays.masked.BaseMaskedArray} - Handle conversion of
25 | \strong{pandas} arrays (used when by \code{AnnData} objects when there are missing
26 | values)
27 | \item \code{pandas.core.arrays.categorical.Categorical} - Handle conversion of
28 | \strong{pandas} categorical arrays
29 | }
30 | }
31 | \seealso{
32 | \code{\link[reticulate:r-py-conversion]{reticulate::py_to_r()}} for the base \code{reticulate} functions
33 | }
34 | \author{
35 | Luke Zappia
36 | }
37 | 


--------------------------------------------------------------------------------
/man/readH5AD.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/read.R
 3 | \name{readH5AD}
 4 | \alias{readH5AD}
 5 | \title{Read H5AD}
 6 | \usage{
 7 | readH5AD(
 8 |   file,
 9 |   X_name = NULL,
10 |   use_hdf5 = FALSE,
11 |   reader = c("python", "R"),
12 |   version = NULL,
13 |   verbose = NULL,
14 |   ...
15 | )
16 | }
17 | \arguments{
18 | \item{file}{String containing a path to a \code{.h5ad} file.}
19 | 
20 | \item{X_name}{Name used when saving \code{X} as an assay. If \code{NULL} looks for an
21 | \code{X_name} value in \code{uns}, otherwise uses \code{"X"}.}
22 | 
23 | \item{use_hdf5}{Logical scalar indicating whether assays should be
24 | loaded as HDF5-based matrices from the \strong{HDF5Array} package.}
25 | 
26 | \item{reader}{Which HDF5 reader to use. Either \code{"python"} for reading with
27 | the \strong{anndata} Python package via \strong{reticulate} or \code{"R"} for
28 | \strong{zellkonverter}'s native R reader.}
29 | 
30 | \item{version}{A string giving the version of the \strong{anndata} Python library
31 | to use. Allowed values are available in \code{.AnnDataVersions}. By default the
32 | latest version is used.}
33 | 
34 | \item{verbose}{Logical scalar indicating whether to print progress messages.
35 | If \code{NULL} uses \code{getOption("zellkonverter.verbose")}.}
36 | 
37 | \item{...}{
38 |   Arguments passed on to \code{\link[=AnnData2SCE]{AnnData2SCE}}
39 |   \describe{
40 |     \item{\code{layers,uns,var,obs,varm,obsm,varp,obsp,raw}}{Arguments specifying how
41 | these slots are converted. If \code{TRUE} everything in that slot is converted, if
42 | \code{FALSE} nothing is converted and if a character vector only those items or
43 | columns are converted.}
44 |     \item{\code{skip_assays}}{Logical scalar indicating whether to skip conversion of
45 | any assays in \code{sce} or \code{adata}, replacing them with empty sparse matrices
46 | instead.}
47 |   }}
48 | }
49 | \value{
50 | A \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment}
51 | object is returned.
52 | }
53 | \description{
54 | Reads a H5AD file and returns a
55 | \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment}
56 | object.
57 | }
58 | \details{
59 | Setting \code{use_hdf5 = TRUE} allows for very large datasets to be efficiently
60 | represented on machines with little memory. However, this comes at the cost
61 | of access speed as data needs to be fetched from the HDF5 file upon request.
62 | 
63 | Setting \code{reader = "R"} will use an experimental native R reader instead of
64 | reading the file into Python and converting the result. This avoids the need
65 | for a Python environment and some of the issues with conversion but is still
66 | under development and is likely to return slightly different output.
67 | 
68 | See \link{AnnData-Environment} for more details on \strong{zellkonverter} Python
69 | environments.
70 | }
71 | \examples{
72 | library(SummarizedExperiment)
73 | 
74 | file <- system.file("extdata", "krumsiek11.h5ad", package = "zellkonverter")
75 | sce <- readH5AD(file)
76 | class(assay(sce))
77 | 
78 | sce2 <- readH5AD(file, use_hdf5 = TRUE)
79 | class(assay(sce2))
80 | 
81 | sce3 <- readH5AD(file, reader = "R")
82 | }
83 | \seealso{
84 | \code{\link[=writeH5AD]{writeH5AD()}}, to write a
85 | \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment}
86 | object to a H5AD file.
87 | 
88 | \code{\link[=AnnData2SCE]{AnnData2SCE()}}, for developers to convert existing AnnData instances to a
89 | \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment}.
90 | }
91 | \author{
92 | Luke Zappia
93 | 
94 | Aaron Lun
95 | }
96 | 


--------------------------------------------------------------------------------
/man/setZellkonverterVerbose.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ui.R
 3 | \name{setZellkonverterVerbose}
 4 | \alias{setZellkonverterVerbose}
 5 | \title{Set zellkonverter verbose}
 6 | \usage{
 7 | setZellkonverterVerbose(verbose = TRUE)
 8 | }
 9 | \arguments{
10 | \item{verbose}{Logical value for the verbosity option.}
11 | }
12 | \value{
13 | The value of getOption("zellkonverter.verbose") invisibly
14 | }
15 | \description{
16 | Set the zellkonverter verbosity option
17 | }
18 | \details{
19 | Running \code{setZellkonverterVerbose(TRUE)} will turn on \strong{zellkonverter}
20 | progress messages by default without having to set \code{verbose = TRUE} in each
21 | function call. This is done by setting the \code{"zellkonverter.verbose"} option.
22 | Running \code{setZellkonverterVerbose(FALSE)} will turn default verbosity off.
23 | }
24 | \examples{
25 | current <- getOption("zellkonverter.verbose")
26 | setZellkonverterVerbose(TRUE)
27 | getOption("zellkonverter.verbose")
28 | setZellkonverterVerbose(FALSE)
29 | getOption("zellkonverter.verbose")
30 | setZellkonverterVerbose(current)
31 | getOption("zellkonverter.verbose")
32 | }
33 | 


--------------------------------------------------------------------------------
/man/validateH5ADSCE.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/validation.R
 3 | \name{validateH5ADSCE}
 4 | \alias{validateH5ADSCE}
 5 | \title{Validate H5AD SCE}
 6 | \usage{
 7 | validateH5ADSCE(sce, names, missing)
 8 | }
 9 | \arguments{
10 | \item{sce}{A \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment}
11 | object.}
12 | 
13 | \item{names}{Named list of expected names. Names are slots and values are
14 | vectors of names that are expected to exist in that slot.}
15 | 
16 | \item{missing}{Named list of known missing names. Names are slots and values
17 | are vectors of names that are expected to not exist in that slot.}
18 | }
19 | \value{
20 | If checks are successful \code{TRUE} invisibly, if not other output
21 | depending on the context
22 | }
23 | \description{
24 | Validate a SingleCellExperiment created by \code{readH5AD()}. Designed to be used
25 | inside \code{testhat::test_that()} during package testing.
26 | }
27 | \details{
28 | This function checks that a SingleCellExperiment contains the expected items
29 | in each slot. The main reason for this function is avoid repeating code when
30 | testing multiple \code{.h5ad} files. The following items in \code{names} and \code{missing}
31 | are recognised:
32 | \itemize{
33 | \item \code{assays} - Assay names
34 | \item \code{colData} - colData column names
35 | \item \code{rowData} - rowData column names
36 | \item \code{metadata} - metadata names
37 | \item \code{redDim} - Reduced dimension names
38 | \item \code{varm} - Column names of the \code{varm} rowData column (from the AnnData varm
39 | slot)
40 | \item \code{colPairs} - Column pair names
41 | \item \code{rowPairs} - rowData pair names
42 | \item \code{raw_rowData} - rowData columns names in the \code{raw} altExp
43 | \item \code{raw_varm} - Column names of the raw \code{varm} rowData column (from the
44 | AnnData varm slot)
45 | }
46 | 
47 | If an item in \code{names} or \code{missing} is \code{NULL} then it won't be checked. The
48 | items in \code{missing} are checked that they explicitly do not exist. This is
49 | mostly for record keeping when something is known to not be converted but can
50 | also be useful when the corresponding \code{names} item is \code{NULL}.
51 | }
52 | \author{
53 | Luke Zappia
54 | }
55 | 


--------------------------------------------------------------------------------
/man/writeH5AD.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/write.R
  3 | \name{writeH5AD}
  4 | \alias{writeH5AD}
  5 | \title{Write H5AD}
  6 | \usage{
  7 | writeH5AD(
  8 |   sce,
  9 |   file,
 10 |   X_name = NULL,
 11 |   skip_assays = FALSE,
 12 |   compression = c("none", "gzip", "lzf"),
 13 |   version = NULL,
 14 |   verbose = NULL,
 15 |   ...
 16 | )
 17 | }
 18 | \arguments{
 19 | \item{sce}{A \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment}
 20 | object.}
 21 | 
 22 | \item{file}{String containing a path to write the new \code{.h5ad} file.}
 23 | 
 24 | \item{X_name}{Name of the assay to use as the primary matrix (\code{X}) of the
 25 | AnnData object. If \code{NULL}, the first assay of \code{sce} will be used by default.}
 26 | 
 27 | \item{skip_assays}{Logical scalar indicating whether assay matrices should
 28 | be ignored when writing to \code{file}.}
 29 | 
 30 | \item{compression}{Type of compression when writing the new \code{.h5ad} file.}
 31 | 
 32 | \item{version}{A string giving the version of the \strong{anndata} Python library
 33 | to use. Allowed values are available in \code{.AnnDataVersions}. By default the
 34 | latest version is used.}
 35 | 
 36 | \item{verbose}{Logical scalar indicating whether to print progress messages.
 37 | If \code{NULL} uses \code{getOption("zellkonverter.verbose")}.}
 38 | 
 39 | \item{...}{
 40 |   Arguments passed on to \code{\link[=SCE2AnnData]{SCE2AnnData}}
 41 |   \describe{
 42 |     \item{\code{assays,colData,rowData,reducedDims,metadata,colPairs,rowPairs}}{Arguments specifying how these slots are converted. If \code{TRUE} everything in
 43 | that slot is converted, if \code{FALSE} nothing is converted and if a character
 44 | vector only those items or columns are converted.}
 45 |   }}
 46 | }
 47 | \value{
 48 | A \code{NULL} is invisibly returned.
 49 | }
 50 | \description{
 51 | Write a H5AD file from a
 52 | \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment}
 53 | object.
 54 | }
 55 | \details{
 56 | \subsection{Skipping assays}{
 57 | 
 58 | Setting \code{skip_assays = TRUE} can occasionally be useful if the matrices in
 59 | \code{sce} are stored in a format that is not amenable for efficient conversion
 60 | to a \strong{numpy}-compatible format. In such cases, it can be better to create
 61 | an empty placeholder dataset in \code{file} and fill it in R afterwards.
 62 | }
 63 | 
 64 | \subsection{\strong{DelayedArray} assays}{
 65 | 
 66 | If \code{sce} contains any \strong{DelayedArray} matrices as assays \code{writeH5AD()} will
 67 | write them to disk using the \strong{rhdf5} package directly rather than via
 68 | Python to avoid instantiating them in memory. However there is currently
 69 | an issue which prevents this being done for sparse \strong{DelayedArray} matrices.
 70 | }
 71 | 
 72 | \subsection{Known conversion issues}{
 73 | \subsection{Coercion to factors}{
 74 | 
 75 | The \strong{anndata} package automatically converts some character vectors to
 76 | factors when saving \code{.h5ad} files. This can effect columns of \code{rowData(sce)}
 77 | and \code{colData(sce)} which may change type when the \code{.h5ad} file is read back
 78 | into R.
 79 | }
 80 | 
 81 | }
 82 | 
 83 | \subsection{Environment}{
 84 | 
 85 | See \link{AnnData-Environment} for more details on \strong{zellkonverter} Python
 86 | environments.
 87 | }
 88 | }
 89 | \examples{
 90 | # Using the Zeisel brain dataset
 91 | if (requireNamespace("scRNAseq", quietly = TRUE)) {
 92 |     library(scRNAseq)
 93 |     sce <- ZeiselBrainData()
 94 | 
 95 |     # Writing to a H5AD file
 96 |     temp <- tempfile(fileext = ".h5ad")
 97 |     writeH5AD(sce, temp)
 98 | }
 99 | }
100 | \seealso{
101 | \code{\link[=readH5AD]{readH5AD()}}, to read a
102 | \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment}
103 | object from a H5AD file.
104 | 
105 | \code{\link[=SCE2AnnData]{SCE2AnnData()}}, for developers to create an AnnData object from a
106 | \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment}.
107 | }
108 | \author{
109 | Luke Zappia
110 | 
111 | Aaron Lun
112 | }
113 | 


--------------------------------------------------------------------------------
/man/zellkonverter-package.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/zellkonverter-package.R
 3 | \docType{package}
 4 | \name{zellkonverter-package}
 5 | \alias{zellkonverter}
 6 | \alias{zellkonverter-package}
 7 | \title{zellkonverter: Conversion Between scRNA-seq Objects}
 8 | \description{
 9 | Provides methods to convert between Python AnnData objects and SingleCellExperiment objects. These are primarily intended for use by downstream Bioconductor packages that wrap Python methods for single-cell data analysis. It also includes functions to read and write H5AD files used for saving AnnData objects to disk.
10 | }
11 | \seealso{
12 | Useful links:
13 | \itemize{
14 |   \item \url{https://github.com/theislab/zellkonverter}
15 |   \item Report bugs at \url{https://github.com/theislab/zellkonverter/issues}
16 | }
17 | 
18 | }
19 | \author{
20 | \strong{Maintainer}: Luke Zappia \email{luke@lazappi.id.au} (\href{https://orcid.org/0000-0001-7744-8565}{ORCID})
21 | 
22 | Authors:
23 | \itemize{
24 |   \item Aaron Lun \email{infinite.monkeys.with.keyboards@gmail.com} (\href{https://orcid.org/0000-0002-3564-4813}{ORCID})
25 | }
26 | 
27 | Other contributors:
28 | \itemize{
29 |   \item Jack Kamm \email{jackkamm@gmail.com} (\href{https://orcid.org/0000-0003-2412-756X}{ORCID}) [contributor]
30 |   \item Robrecht Cannoodt \email{rcannood@gmail.com} (\href{https://orcid.org/0000-0003-3641-729X}{ORCID}) (rcannood) [contributor]
31 |   \item Gabriel Hoffman \email{gabriel.hoffman@mssm.edu} (\href{https://orcid.org/0000-0002-0957-0224}{ORCID}) (GabrielHoffman) [contributor]
32 |   \item Marek Cmero \email{cmero.ma@wehi.edu.au} (\href{https://orcid.org/0000-0001-7783-5530}{ORCID}) (mcmero) [contributor]
33 | }
34 | 
35 | }
36 | 


--------------------------------------------------------------------------------
/tests/spelling.R:
--------------------------------------------------------------------------------
1 | if (requireNamespace("spelling", quietly = TRUE)) {
2 |     spelling::spell_check_test(
3 |         vignettes = TRUE,
4 |         error = FALSE,
5 |         skip_on_cran = TRUE
6 |     )
7 | }
8 | 


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(zellkonverter)
3 | 
4 | test_check("zellkonverter")
5 | 


--------------------------------------------------------------------------------
/tests/testthat/default.profraw:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theislab/zellkonverter/c17a17220088ff880d512c392d5de4aacb9e9bb1/tests/testthat/default.profraw


--------------------------------------------------------------------------------
/tests/testthat/test-SCE2AnnData.R:
--------------------------------------------------------------------------------
 1 | test_that(".makeNumpyFriendly() works correctly", {
 2 |     mat <- matrix(1:50, nrow = 10, ncol = 5)
 3 | 
 4 |     friendly_mat <- .makeNumpyFriendly(mat, transpose = TRUE)
 5 |     expect_identical(friendly_mat, t(mat))
 6 |     expect_identical(dim(friendly_mat), rev(dim(mat)))
 7 | 
 8 |     friendly_mat <- .makeNumpyFriendly(mat, transpose = FALSE)
 9 |     expect_identical(friendly_mat, mat)
10 |     expect_identical(dim(friendly_mat), dim(mat))
11 | 
12 |     sparse_mat <- Matrix::Matrix(mat, sparse = TRUE)
13 |     friendly_sparse_mat <- .makeNumpyFriendly(sparse_mat, transpose = TRUE)
14 |     expect_s4_class(friendly_sparse_mat, "dgRMatrix")
15 |     expect_identical(dim(friendly_sparse_mat), rev(dim(sparse_mat)))
16 | 
17 |     friendly_sparse_mat <- .makeNumpyFriendly(sparse_mat, transpose = FALSE)
18 |     expect_s4_class(friendly_sparse_mat, "dgCMatrix")
19 |     expect_identical(dim(friendly_sparse_mat), dim(sparse_mat))
20 | 
21 |     delayed_mat <- DelayedArray::DelayedArray(mat)
22 |     friendly_delayed_mat <- .makeNumpyFriendly(delayed_mat, transpose = TRUE)
23 |     expect_identical(friendly_delayed_mat, t(mat))
24 |     expect_identical(dim(friendly_delayed_mat), rev(dim(mat)))
25 | 
26 |     friendly_delayed_mat <- .makeNumpyFriendly(delayed_mat, transpose = FALSE)
27 |     expect_identical(friendly_delayed_mat, mat)
28 |     expect_identical(dim(friendly_delayed_mat), dim(mat))
29 | 
30 |     sparse_delayed_mat <- DelayedArray::DelayedArray(sparse_mat)
31 |     friendly_sparse_delayed_mat <- .makeNumpyFriendly(sparse_delayed_mat, transpose = TRUE)
32 |     expect_s4_class(friendly_sparse_delayed_mat, "dgRMatrix")
33 |     expect_identical(dim(friendly_sparse_delayed_mat), rev(dim(sparse_delayed_mat)))
34 | 
35 |     friendly_sparse_delayed_mat <- .makeNumpyFriendly(sparse_delayed_mat, transpose = FALSE)
36 |     expect_s4_class(friendly_sparse_delayed_mat, "dgCMatrix")
37 |     expect_identical(dim(friendly_sparse_delayed_mat), dim(sparse_delayed_mat))
38 | })
39 | 


--------------------------------------------------------------------------------
/tests/testthat/test-read.R:
--------------------------------------------------------------------------------
  1 | # This tests the readH5AD function (and by implication, SCE2AnnData).
  2 | library(SummarizedExperiment)
  3 | file <- system.file("extdata", "krumsiek11.h5ad", package = "zellkonverter")
  4 | file_example <- system.file("extdata", "example_anndata.h5ad", package = "zellkonverter")
  5 | file_v08 <- system.file("extdata", "krumsiek11_augmented_v0-8.h5ad", package = "zellkonverter")
  6 | 
  7 | test_that("Reading H5AD works", {
  8 |     sce <- readH5AD(file)
  9 |     expect_s4_class(sce, "SingleCellExperiment")
 10 | 
 11 |     expect_identical(assayNames(sce), "X")
 12 |     expect_identical(colnames(colData(sce)), "cell_type")
 13 | })
 14 | 
 15 | test_that("Reading example H5AD works", {
 16 |     names <- list(
 17 |         assays = c("X", "counts"),
 18 |         colData = "louvain",
 19 |         rowData = c(
 20 |             "n_counts", "highly_variable", "means", "dispersions",
 21 |             "dispersions_norm"
 22 |         ),
 23 |         metadata = c("louvain", "neighbors", "pca", "rank_genes_groups", "umap"),
 24 |         redDim = c("X_pca", "X_umap"),
 25 |         varm = "PCs",
 26 |         colPairs = c("connectivities", "distances")
 27 |     )
 28 |     missing <- list()
 29 | 
 30 |     sce <- expect_silent(readH5AD(file_example))
 31 |     expect_s4_class(sce, "SingleCellExperiment")
 32 | 
 33 |     validateH5ADSCE(sce, names, missing)
 34 | })
 35 | 
 36 | test_that("Reading H5AD works with version 0.10.9", {
 37 |     sce <- readH5AD(file, version = "0.10.9")
 38 |     expect_s4_class(sce, "SingleCellExperiment")
 39 | 
 40 |     expect_identical(assayNames(sce), "X")
 41 |     expect_identical(colnames(colData(sce)), "cell_type")
 42 | })
 43 | 
 44 | test_that("Reading H5AD works with version 0.10.6", {
 45 |     sce <- readH5AD(file, version = "0.10.6")
 46 |     expect_s4_class(sce, "SingleCellExperiment")
 47 | 
 48 |     expect_identical(assayNames(sce), "X")
 49 |     expect_identical(colnames(colData(sce)), "cell_type")
 50 | })
 51 | 
 52 | test_that("Reading H5AD works with version 0.10.2", {
 53 |     sce <- readH5AD(file, version = "0.10.2")
 54 |     expect_s4_class(sce, "SingleCellExperiment")
 55 | 
 56 |     expect_identical(assayNames(sce), "X")
 57 |     expect_identical(colnames(colData(sce)), "cell_type")
 58 | })
 59 | 
 60 | test_that("Reading H5AD works with version 0.9.2", {
 61 |     sce <- readH5AD(file, version = "0.9.2")
 62 |     expect_s4_class(sce, "SingleCellExperiment")
 63 | 
 64 |     expect_identical(assayNames(sce), "X")
 65 |     expect_identical(colnames(colData(sce)), "cell_type")
 66 | })
 67 | 
 68 | test_that("Reading H5AD works with version 0.8.0", {
 69 |     sce <- readH5AD(file, version = "0.8.0")
 70 |     expect_s4_class(sce, "SingleCellExperiment")
 71 | 
 72 |     expect_identical(assayNames(sce), "X")
 73 |     expect_identical(colnames(colData(sce)), "cell_type")
 74 | })
 75 | 
 76 | test_that("Reading H5AD works with version 0.7.6", {
 77 |     # Python 3.7 is not available for aarch64
 78 |     skip_on_os("mac", arch = "aarch64")
 79 |     skip_on_os("linux", arch = "aarch64")
 80 | 
 81 |     sce <- readH5AD(file, version = "0.7.6")
 82 |     expect_s4_class(sce, "SingleCellExperiment")
 83 | 
 84 |     expect_identical(assayNames(sce), "X")
 85 |     expect_identical(colnames(colData(sce)), "cell_type")
 86 | })
 87 | 
 88 | test_that("Reading H5AD works with verbose=TRUE", {
 89 |     sce <- readH5AD(file, verbose = TRUE)
 90 |     expect_s4_class(sce, "SingleCellExperiment")
 91 | 
 92 |     expect_identical(assayNames(sce), "X")
 93 |     expect_identical(colnames(colData(sce)), "cell_type")
 94 | })
 95 | 
 96 | test_that("Reading H5AD works with HDF5Arrays", {
 97 |     sce <- readH5AD(file, use_hdf5 = TRUE)
 98 |     expect_s4_class(sce, "SingleCellExperiment")
 99 |     expect_s4_class(DelayedArray::seed(assay(sce)), "HDF5ArraySeed")
100 | 
101 |     ref <- readH5AD(file)
102 |     expect_identical(as.matrix(assay(ref)), as.matrix(assay(sce)))
103 | 
104 |     # Properly sleeps to wait for the process to shut down.
105 |     expect_s4_class(
106 |         sce <- readH5AD(file, use_hdf5 = TRUE),
107 |         "SingleCellExperiment"
108 |     )
109 | })
110 | 
111 | test_that("Reading H5AD works with a mixture of sparse and HDF5Arrays", {
112 |     sce <- readH5AD(file)
113 |     assay(sce, "more") <- as(assay(sce, "X"), "CsparseMatrix")
114 | 
115 |     temp <- tempfile(fileext = ".h5ad")
116 |     writeH5AD(sce, temp)
117 | 
118 |     backed <- readH5AD(temp, use_hdf5 = TRUE)
119 |     expect_s4_class(DelayedArray::seed(assay(backed)), "HDF5ArraySeed")
120 |     expect_s4_class(assay(backed, "more"), "CsparseMatrix")
121 | })
122 | 
123 | test_that("readH5AD works in a separate process", {
124 |     oldshare <- basilisk::getBasiliskShared()
125 |     basilisk::setBasiliskShared(FALSE)
126 |     oldfork <- basilisk::getBasiliskFork()
127 |     basilisk::setBasiliskFork(FALSE)
128 | 
129 |     sce <- readH5AD(file)
130 |     expect_s4_class(sce, "SingleCellExperiment")
131 | 
132 |     basilisk::setBasiliskShared(oldshare)
133 |     basilisk::setBasiliskFork(oldfork)
134 | })
135 | 
136 | test_that("Reading H5AD works with native reader", {
137 |     sce <- readH5AD(file, reader = "R")
138 |     expect_s4_class(sce, "SingleCellExperiment")
139 | 
140 |     expect_identical(assayNames(sce), "X")
141 |     expect_identical(colnames(colData(sce)), "cell_type")
142 | })
143 | 
144 | test_that("Reading v0.8 H5AD works with native reader", {
145 |     sce_py <- readH5AD(file_v08)
146 |     sce_r <- readH5AD(file_v08, reader = "R")
147 | 
148 |     expect_identical(rownames(sce_py), rownames(sce_r))
149 |     expect_identical(colnames(sce_py), colnames(sce_r))
150 | 
151 |     expect_identical(rowData(sce_py), rowData(sce_r))
152 | 
153 |     expect_identical(colnames(colData(sce_py)), colnames(colData(sce_r)))
154 |     expect_equal(colData(sce_py), colData(sce_r))
155 | 
156 |     # check the X assay
157 |     expect_identical(assays(sce_py), assays(sce_r))
158 | 
159 |     # check the easy metadata columns
160 |     for (key in c(
161 |         "dummy_category", "dummy_int", "dummy_int2", "highlight",
162 |         "iroot"
163 |     )) {
164 |         expect_equal(metadata(sce_py)[[key]], metadata(sce_r)[[key]])
165 |     }
166 | 
167 |     # For these columns the Python reader reads an array
168 |     for (key in c("dummy_bool", "dummy_bool2")) {
169 |         expect_equal(as.vector(metadata(sce_py)[[key]]), metadata(sce_r)[[key]])
170 |     }
171 | })
172 | 
173 | test_that("Skipping slot conversion works", {
174 |     sce <- readH5AD(file,
175 |         layers = FALSE, uns = FALSE, var = FALSE, obs = FALSE,
176 |         varm = FALSE, obsm = FALSE, varp = FALSE, obsp = FALSE
177 |     )
178 | 
179 |     expect_identical(assayNames(sce), "X")
180 |     expect_identical(metadata(sce), list())
181 |     expect_equal(ncol(rowData(sce)), 0)
182 |     expect_equal(ncol(colData(sce)), 0)
183 |     expect_equal(length(reducedDims(sce)), 0)
184 |     expect_equal(length(rowPairs(sce)), 0)
185 |     expect_equal(length(colPairs(sce)), 0)
186 | })
187 | 
188 | test_that("Selective slot conversion works", {
189 |     sce <- readH5AD(file, uns = "iroot")
190 | 
191 |     expect_identical(names(metadata(sce)), "iroot")
192 | })
193 | 
194 | test_that("Selective DF conversion works", {
195 |     sce <- readH5AD(file, obs = "cell_type")
196 | 
197 |     expect_identical(names(colData(sce)), "cell_type")
198 | })
199 | 
200 | test_that("Conversion of raw works", {
201 |     skip_if_offline()
202 | 
203 |     cache <- BiocFileCache::BiocFileCache(ask = FALSE)
204 |     example_file <- BiocFileCache::bfcrpath(
205 |         cache, "https://ndownloader.figshare.com/files/30462915"
206 |     )
207 | 
208 |     sce <- readH5AD(example_file, raw = TRUE)
209 | 
210 |     names <- list(
211 |         assays = c("X"),
212 |         colData = c(
213 |             "n_genes", "n_genes_by_counts", "total_counts",
214 |             "total_counts_mt", "pct_counts_mt", "leiden"
215 |         ),
216 |         rowData = c(
217 |             "gene_ids", "n_cells", "mt", "n_cells_by_counts",
218 |             "mean_counts", "pct_dropout_by_counts", "total_counts",
219 |             "highly_variable", "means", "dispersions",
220 |             "dispersions_norm", "mean", "std"
221 |         ),
222 |         metadata = c(
223 |             "hvg", "leiden", "neighbors", "pca", "rank_genes_groups",
224 |             "umap"
225 |         ),
226 |         redDim = c("X_pca", "X_umap"),
227 |         varm = c("PCs"),
228 |         colPairs = c("connectivities", "distances"),
229 |         raw_rowData = c(
230 |             "gene_ids", "n_cells", "mt", "n_cells_by_counts",
231 |             "mean_counts", "pct_dropout_by_counts", "total_counts",
232 |             "highly_variable", "means", "dispersions",
233 |             "dispersions_norm"
234 |         )
235 |     )
236 | 
237 |     missing <- list()
238 | 
239 |     validateH5ADSCE(sce, names, missing)
240 | })
241 | 
242 | test_that("Conversion of raw works with use_hdf5 = TRUE", {
243 |     skip_if_offline()
244 | 
245 |     cache <- BiocFileCache::BiocFileCache(ask = FALSE)
246 |     example_file <- BiocFileCache::bfcrpath(
247 |         cache, "https://ndownloader.figshare.com/files/30462915"
248 |     )
249 | 
250 |     sce <- readH5AD(example_file, raw = TRUE, use_hdf5 = TRUE)
251 | 
252 |     names <- list(
253 |         assays = c("X"),
254 |         colData = c(
255 |             "n_genes", "n_genes_by_counts", "total_counts",
256 |             "total_counts_mt", "pct_counts_mt", "leiden"
257 |         ),
258 |         rowData = c(
259 |             "gene_ids", "n_cells", "mt", "n_cells_by_counts",
260 |             "mean_counts", "pct_dropout_by_counts", "total_counts",
261 |             "highly_variable", "means", "dispersions",
262 |             "dispersions_norm", "mean", "std"
263 |         ),
264 |         metadata = c(
265 |             "hvg", "leiden", "neighbors", "pca", "rank_genes_groups",
266 |             "umap"
267 |         ),
268 |         redDim = c("X_pca", "X_umap"),
269 |         varm = c("PCs"),
270 |         colPairs = c("connectivities", "distances"),
271 |         raw_rowData = c(
272 |             "gene_ids", "n_cells", "mt", "n_cells_by_counts",
273 |             "mean_counts", "pct_dropout_by_counts", "total_counts",
274 |             "highly_variable", "means", "dispersions",
275 |             "dispersions_norm"
276 |         )
277 |     )
278 | 
279 |     missing <- list()
280 | 
281 |     validateH5ADSCE(sce, names, missing)
282 | })
283 | 


--------------------------------------------------------------------------------
/tests/testthat/test-validation.R:
--------------------------------------------------------------------------------
 1 | file <- system.file("extdata", "example_anndata.h5ad",
 2 |     package = "zellkonverter"
 3 | )
 4 | sce <- readH5AD(file)
 5 | 
 6 | names <- list(
 7 |     assays = c("X", "counts"),
 8 |     colData = "louvain",
 9 |     rowData = c(
10 |         "n_counts", "highly_variable", "means", "dispersions",
11 |         "dispersions_norm"
12 |     ),
13 |     metadata = c("louvain", "neighbors", "pca", "rank_genes_groups", "umap"),
14 |     redDim = c("X_pca", "X_umap"),
15 |     varm = "PCs",
16 |     colPairs = c("connectivities", "distances")
17 | )
18 | 
19 | missing <- list()
20 | 
21 | test_that("validateH5ADSCE works", {
22 |     validateH5ADSCE(sce, names, missing)
23 |     expect_error(
24 |         validateH5ADSCE(sce, names, list(varm = "PCs")),
25 |         "varm names missing is not TRUE"
26 |     )
27 | })
28 | 
29 | test_that("expectSCE works", {
30 |     expectSCE(sce, sce)
31 | })
32 | 


--------------------------------------------------------------------------------
/tests/testthat/test-write.R:
--------------------------------------------------------------------------------
  1 | # This tests the writeH5AD function (and by implication, AnnData2SCE).
  2 | library(scRNAseq)
  3 | 
  4 | sce <- ZeiselBrainData()
  5 | reducedDim(sce, "WHEE") <- matrix(runif(ncol(sce) * 10), ncol = 10)
  6 | 
  7 | test_that("writeH5AD works as expected", {
  8 |     temp <- tempfile(fileext = ".h5ad")
  9 |     writeH5AD(sce, temp)
 10 |     expect_true(file.exists(temp))
 11 | 
 12 |     # Reading it back out again. Hopefully we didn't lose anything important.
 13 |     out <- readH5AD(temp)
 14 | 
 15 |     expect_identical(dimnames(out), dimnames(sce))
 16 |     expect_equal(assay(out), assay(sce))
 17 |     expect_identical(reducedDims(out), reducedDims(sce))
 18 | 
 19 |     # Need to coerce the factors back to strings.
 20 |     row_data <- rowData(out)
 21 |     for (i in seq_len(ncol(row_data))) {
 22 |         if (is.factor(row_data[[i]])) {
 23 |             row_data[[i]] <- as.character(row_data[[i]])
 24 |         }
 25 |     }
 26 |     expect_identical(row_data, rowData(sce))
 27 | 
 28 |     col_data <- colData(out)
 29 |     for (i in seq_len(ncol(col_data))) {
 30 |         if (is.factor(col_data[[i]])) {
 31 |             col_data[[i]] <- as.character(col_data[[i]])
 32 |         }
 33 |     }
 34 |     names(col_data) <- names(colData(sce))
 35 |     expect_identical(col_data, colData(sce))
 36 | })
 37 | 
 38 | test_that("writeH5AD works as expected with version 0.10.9", {
 39 |     temp <- tempfile(fileext = ".h5ad")
 40 |     writeH5AD(sce, temp, version = "0.10.9")
 41 |     expect_true(file.exists(temp))
 42 | 
 43 |     # Reading it back out again. Hopefully we didn't lose anything important.
 44 |     out <- readH5AD(temp, version = "0.10.9")
 45 | 
 46 |     expect_identical(dimnames(out), dimnames(sce))
 47 |     expect_equal(assay(out), assay(sce))
 48 |     expect_identical(reducedDims(out), reducedDims(sce))
 49 | 
 50 |     # Need to coerce the factors back to strings.
 51 |     row_data <- rowData(out)
 52 |     for (i in seq_len(ncol(row_data))) {
 53 |         if (is.factor(row_data[[i]])) {
 54 |             row_data[[i]] <- as.character(row_data[[i]])
 55 |         }
 56 |     }
 57 |     expect_identical(row_data, rowData(sce))
 58 | 
 59 |     col_data <- colData(out)
 60 |     for (i in seq_len(ncol(col_data))) {
 61 |         if (is.factor(col_data[[i]])) {
 62 |             col_data[[i]] <- as.character(col_data[[i]])
 63 |         }
 64 |     }
 65 |     names(col_data) <- names(colData(sce))
 66 |     expect_identical(col_data, colData(sce))
 67 | })
 68 | 
 69 | test_that("writeH5AD works as expected with version 0.10.6", {
 70 |     temp <- tempfile(fileext = ".h5ad")
 71 |     writeH5AD(sce, temp, version = "0.10.6")
 72 |     expect_true(file.exists(temp))
 73 | 
 74 |     # Reading it back out again. Hopefully we didn't lose anything important.
 75 |     out <- readH5AD(temp, version = "0.10.6")
 76 | 
 77 |     expect_identical(dimnames(out), dimnames(sce))
 78 |     expect_equal(assay(out), assay(sce))
 79 |     expect_identical(reducedDims(out), reducedDims(sce))
 80 | 
 81 |     # Need to coerce the factors back to strings.
 82 |     row_data <- rowData(out)
 83 |     for (i in seq_len(ncol(row_data))) {
 84 |         if (is.factor(row_data[[i]])) {
 85 |             row_data[[i]] <- as.character(row_data[[i]])
 86 |         }
 87 |     }
 88 |     expect_identical(row_data, rowData(sce))
 89 | 
 90 |     col_data <- colData(out)
 91 |     for (i in seq_len(ncol(col_data))) {
 92 |         if (is.factor(col_data[[i]])) {
 93 |             col_data[[i]] <- as.character(col_data[[i]])
 94 |         }
 95 |     }
 96 |     names(col_data) <- names(colData(sce))
 97 |     expect_identical(col_data, colData(sce))
 98 | })
 99 | 
100 | test_that("writeH5AD works as expected with version 0.10.2", {
101 |     temp <- tempfile(fileext = ".h5ad")
102 |     writeH5AD(sce, temp, version = "0.10.2")
103 |     expect_true(file.exists(temp))
104 | 
105 |     # Reading it back out again. Hopefully we didn't lose anything important.
106 |     out <- readH5AD(temp, version = "0.10.2")
107 | 
108 |     expect_identical(dimnames(out), dimnames(sce))
109 |     expect_equal(assay(out), assay(sce))
110 |     expect_identical(reducedDims(out), reducedDims(sce))
111 | 
112 |     # Need to coerce the factors back to strings.
113 |     row_data <- rowData(out)
114 |     for (i in seq_len(ncol(row_data))) {
115 |         if (is.factor(row_data[[i]])) {
116 |             row_data[[i]] <- as.character(row_data[[i]])
117 |         }
118 |     }
119 |     expect_identical(row_data, rowData(sce))
120 | 
121 |     col_data <- colData(out)
122 |     for (i in seq_len(ncol(col_data))) {
123 |         if (is.factor(col_data[[i]])) {
124 |             col_data[[i]] <- as.character(col_data[[i]])
125 |         }
126 |     }
127 |     names(col_data) <- names(colData(sce))
128 |     expect_identical(col_data, colData(sce))
129 | })
130 | 
131 | test_that("writeH5AD works as expected with version 0.9.2", {
132 |     temp <- tempfile(fileext = ".h5ad")
133 |     writeH5AD(sce, temp, version = "0.9.2")
134 |     expect_true(file.exists(temp))
135 | 
136 |     # Reading it back out again. Hopefully we didn't lose anything important.
137 |     out <- readH5AD(temp, version = "0.9.2")
138 | 
139 |     expect_identical(dimnames(out), dimnames(sce))
140 |     expect_equal(assay(out), assay(sce))
141 |     expect_identical(reducedDims(out), reducedDims(sce))
142 | 
143 |     # Need to coerce the factors back to strings.
144 |     row_data <- rowData(out)
145 |     for (i in seq_len(ncol(row_data))) {
146 |         if (is.factor(row_data[[i]])) {
147 |             row_data[[i]] <- as.character(row_data[[i]])
148 |         }
149 |     }
150 |     expect_identical(row_data, rowData(sce))
151 | 
152 |     col_data <- colData(out)
153 |     for (i in seq_len(ncol(col_data))) {
154 |         if (is.factor(col_data[[i]])) {
155 |             col_data[[i]] <- as.character(col_data[[i]])
156 |         }
157 |     }
158 |     names(col_data) <- names(colData(sce))
159 |     expect_identical(col_data, colData(sce))
160 | })
161 | 
162 | test_that("writeH5AD works as expected with version 0.8.0", {
163 |     temp <- tempfile(fileext = ".h5ad")
164 |     writeH5AD(sce, temp, version = "0.8.0")
165 |     expect_true(file.exists(temp))
166 | 
167 |     # Reading it back out again. Hopefully we didn't lose anything important.
168 |     out <- readH5AD(temp, version = "0.8.0")
169 | 
170 |     expect_identical(dimnames(out), dimnames(sce))
171 |     expect_equal(assay(out), assay(sce))
172 |     expect_identical(reducedDims(out), reducedDims(sce))
173 | 
174 |     # Need to coerce the factors back to strings.
175 |     row_data <- rowData(out)
176 |     for (i in seq_len(ncol(row_data))) {
177 |         if (is.factor(row_data[[i]])) {
178 |             row_data[[i]] <- as.character(row_data[[i]])
179 |         }
180 |     }
181 |     expect_identical(row_data, rowData(sce))
182 | 
183 |     col_data <- colData(out)
184 |     for (i in seq_len(ncol(col_data))) {
185 |         if (is.factor(col_data[[i]])) {
186 |             col_data[[i]] <- as.character(col_data[[i]])
187 |         }
188 |     }
189 |     names(col_data) <- names(colData(sce))
190 |     expect_identical(col_data, colData(sce))
191 | })
192 | 
193 | test_that("writeH5AD works as expected with version 0.7.6", {
194 |     # Python 3.7 is not available for aarch64
195 |     skip_on_os("mac", arch = "aarch64")
196 |     skip_on_os("linux", arch = "aarch64")
197 | 
198 |     temp <- tempfile(fileext = ".h5ad")
199 |     writeH5AD(sce, temp, version = "0.7.6")
200 |     expect_true(file.exists(temp))
201 | 
202 |     # Reading it back out again. Hopefully we didn't lose anything important.
203 |     out <- readH5AD(temp, version = "0.7.6")
204 | 
205 |     expect_identical(dimnames(out), dimnames(sce))
206 |     expect_equal(assay(out), assay(sce))
207 |     expect_identical(reducedDims(out), reducedDims(sce))
208 | 
209 |     # Need to coerce the factors back to strings.
210 |     row_data <- rowData(out)
211 |     for (i in seq_len(ncol(row_data))) {
212 |         if (is.factor(row_data[[i]])) {
213 |             row_data[[i]] <- as.character(row_data[[i]])
214 |         }
215 |     }
216 |     expect_identical(row_data, rowData(sce))
217 | 
218 |     col_data <- colData(out)
219 |     for (i in seq_len(ncol(col_data))) {
220 |         if (is.factor(col_data[[i]])) {
221 |             col_data[[i]] <- as.character(col_data[[i]])
222 |         }
223 |     }
224 |     names(col_data) <- names(colData(sce))
225 |     expect_identical(col_data, colData(sce))
226 | })
227 | 
228 | test_that("writeH5AD works as expected with verbose=TRUE", {
229 |     temp <- tempfile(fileext = ".h5ad")
230 |     writeH5AD(sce, temp, verbose = TRUE)
231 |     expect_true(file.exists(temp))
232 | 
233 |     # Reading it back out again. Hopefully we didn't lose anything important.
234 |     out <- readH5AD(temp)
235 | 
236 |     expect_identical(dimnames(out), dimnames(sce))
237 |     expect_equal(assay(out), assay(sce))
238 |     expect_identical(reducedDims(out), reducedDims(sce))
239 | 
240 |     # Need to coerce the factors back to strings.
241 |     row_data <- rowData(out)
242 |     for (i in seq_len(ncol(row_data))) {
243 |         if (is.factor(row_data[[i]])) {
244 |             row_data[[i]] <- as.character(row_data[[i]])
245 |         }
246 |     }
247 |     expect_identical(row_data, rowData(sce))
248 | 
249 |     col_data <- colData(out)
250 |     for (i in seq_len(ncol(col_data))) {
251 |         if (is.factor(col_data[[i]])) {
252 |             col_data[[i]] <- as.character(col_data[[i]])
253 |         }
254 |     }
255 |     names(col_data) <- names(colData(sce))
256 |     expect_identical(col_data, colData(sce))
257 | })
258 | 
259 | test_that("writeH5AD works as expected with sparse matrices", {
260 |     sparse_sce <- sce
261 |     mat <- assay(sparse_sce)
262 |     counts(sparse_sce) <- as(mat, "CsparseMatrix")
263 |     logcounts(sparse_sce) <- counts(sparse_sce) * 10
264 |     assay(sparse_sce, "random") <- mat # throwing in a dense matrix in a mixture.
265 | 
266 |     temp <- tempfile(fileext = ".h5ad")
267 |     writeH5AD(sparse_sce, temp)
268 |     expect_true(file.exists(temp))
269 | 
270 |     # Reading it back out again. Hopefully we didn't lose anything important.
271 |     out <- readH5AD(temp, X_name = "X")
272 | 
273 |     expect_identical(counts(sparse_sce), assay(out, "X"))
274 |     expect_identical(logcounts(sparse_sce), logcounts(out))
275 |     # expect_identical() was failing on Windows for some reason...
276 |     expect_equal(assay(sparse_sce, "random"), assay(out, "random"))
277 | })
278 | 
279 | test_that("writeH5AD works with assay skipping", {
280 |     temp <- tempfile(fileext = ".h5ad")
281 |     writeH5AD(sce, temp, skip_assays = TRUE)
282 |     expect_true(file.exists(temp))
283 | 
284 |     out <- HDF5Array::HDF5Array(temp, "X/data")
285 |     expect_identical(sum(out), 0) # it's empty!
286 | })
287 | 
288 | test_that("writeH5AD works with X_name", {
289 |     temp <- tempfile(fileext = ".h5ad")
290 |     writeH5AD(sce, temp, X_name = "counts")
291 |     expect_true(file.exists(temp))
292 | 
293 |     out <- readH5AD(temp, X_name = "X")
294 |     expect_equal(assay(out, "X"), assay(sce, "counts"))
295 | })
296 | 
297 | test_that("writeH5AD works in a separate process", {
298 |     oldshare <- basilisk::getBasiliskShared()
299 |     basilisk::setBasiliskShared(FALSE)
300 |     oldfork <- basilisk::getBasiliskFork()
301 |     basilisk::setBasiliskFork(FALSE)
302 | 
303 |     temp <- tempfile(fileext = ".h5ad")
304 |     writeH5AD(sce, temp)
305 |     expect_true(file.exists(temp))
306 | 
307 |     basilisk::setBasiliskShared(oldshare)
308 |     basilisk::setBasiliskFork(oldfork)
309 | })
310 | 
311 | test_that("writeH5AD DelayedArray X works", {
312 |     delayed_sce <- sce
313 |     counts(delayed_sce) <- DelayedArray::DelayedArray(counts(delayed_sce))
314 | 
315 |     temp <- tempfile(fileext = ".h5ad")
316 | 
317 |     writeH5AD(delayed_sce, temp, X_name = "counts")
318 |     expect_true(file.exists(temp))
319 | 
320 |     out <- readH5AD(temp, X_name = "X")
321 | 
322 |     # Identical fail on Windows for some reason
323 |     expect_equal(counts(sce), assay(out, "X"))
324 | })
325 | 
326 | test_that("writeH5AD sparse DelayedArray X works", {
327 |     delayed_sce <- sce
328 |     sparse_counts <- as(counts(delayed_sce), "CsparseMatrix")
329 |     counts(delayed_sce) <- DelayedArray::DelayedArray(sparse_counts)
330 | 
331 |     temp <- tempfile(fileext = ".h5ad")
332 | 
333 |     writeH5AD(delayed_sce, temp, X_name = "counts")
334 |     expect_true(file.exists(temp))
335 | 
336 |     out <- readH5AD(temp, X_name = "X")
337 | 
338 |     # Sparse DelayedArrays are currently coerced into memory
339 |     # This expectation will need to be changed once that is fixed
340 |     expect_identical(sparse_counts, assay(out, "X"))
341 | })
342 | 
343 | test_that("writeH5AD DelayedArray layer works", {
344 |     delayed_sce <- sce
345 |     assay(delayed_sce, "layer") <- DelayedArray::DelayedArray(
346 |         counts(delayed_sce)
347 |     )
348 | 
349 |     temp <- tempfile(fileext = ".h5ad")
350 | 
351 |     writeH5AD(delayed_sce, temp)
352 |     expect_true(file.exists(temp))
353 | 
354 |     out <- readH5AD(temp, X_name = "X")
355 | 
356 |     # Identical fails on Windows for some reason
357 |     expect_equal(counts(sce), assay(out, "layer"))
358 | })
359 | 
360 | test_that("writeH5AD works with colData list columns", {
361 |     list_sce <- sce
362 |     colData(list_sce)$ListCol <- lapply(seq_len(ncol(list_sce)), function(x) {
363 |         sample(LETTERS, 2)
364 |     })
365 | 
366 |     temp <- tempfile(fileext = ".h5ad")
367 | 
368 |     expect_warning(writeH5AD(list_sce, temp), "columns are not atomic")
369 |     expect_true(file.exists(temp))
370 | 
371 |     # Knowing what comes back is hard so just check there is something
372 |     out <- readH5AD(temp, X_name = "X")
373 |     expect_true("ListCol" %in% names(metadata(out)$.colData))
374 | })
375 | 
376 | test_that("writeH5AD works with rowData list columns", {
377 |     list_sce <- sce
378 |     rowData(list_sce)$ListCol <- lapply(seq_len(nrow(list_sce)), function(x) {
379 |         sample(LETTERS, 2)
380 |     })
381 | 
382 |     temp <- tempfile(fileext = ".h5ad")
383 | 
384 |     expect_warning(writeH5AD(list_sce, temp), "columns are not atomic")
385 |     expect_true(file.exists(temp))
386 | 
387 |     # Knowing what comes back is hard so just check there is something
388 |     out <- readH5AD(temp, X_name = "X")
389 |     expect_true("ListCol" %in% names(metadata(out)$.rowData))
390 | })
391 | 
392 | test_that("writeH5AD works with gzip compression", {
393 |     temp <- tempfile(fileext = ".h5ad")
394 |     writeH5AD(sce, temp, X_name = "counts", compression = "gzip")
395 |     expect_true(file.exists(temp))
396 | 
397 |     out <- readH5AD(temp, X_name = "X")
398 |     expect_equal(assay(out, "X"), assay(sce, "counts"))
399 | })
400 | 
401 | test_that("writeH5AD works with lzf compression", {
402 |     temp <- tempfile(fileext = ".h5ad")
403 |     writeH5AD(sce, temp, X_name = "counts", compression = "lzf")
404 |     expect_true(file.exists(temp))
405 | 
406 |     out <- readH5AD(temp, X_name = "X")
407 |     expect_equal(assay(out, "X"), assay(sce, "counts"))
408 | })
409 | 
410 | test_that("Skipping slot conversion works", {
411 |     temp <- tempfile(fileext = ".h5ad")
412 |     writeH5AD(sce, temp,
413 |         assays = FALSE, colData = FALSE, rowData = FALSE,
414 |         varm = FALSE, reducedDims = FALSE, metadata = FALSE,
415 |         colPairs = FALSE, rowPairs = FALSE
416 |     )
417 | 
418 |     out <- readH5AD(temp, X_name = "X")
419 | 
420 |     expect_identical(assayNames(out), "X")
421 |     expect_identical(metadata(out), list(X_name = "counts"))
422 |     expect_equal(ncol(rowData(out)), 0)
423 |     expect_equal(ncol(colData(out)), 0)
424 |     expect_equal(length(reducedDims(out)), 0)
425 |     expect_equal(length(rowPairs(out)), 0)
426 |     expect_equal(length(colPairs(out)), 0)
427 | })
428 | 
429 | test_that("Selective DF conversion works", {
430 |     temp <- tempfile(fileext = ".h5ad")
431 |     writeH5AD(sce, temp, assays = FALSE, colData = "tissue")
432 | 
433 |     out <- readH5AD(temp, X_name = "X")
434 | 
435 |     expect_identical(names(colData(out)), "tissue")
436 | })
437 | 
438 | test_that("Writing works with empty rowData/colData", {
439 |     mini_sce <- SingleCellExperiment::SingleCellExperiment(
440 |         assays = list(counts = matrix(rpois(100 * 50, 4), nrow = 100, ncol = 50))
441 |     )
442 | 
443 |     temp <- tempfile(fileext = ".h5ad")
444 |     writeH5AD(mini_sce, temp)
445 | 
446 |     out <- readH5AD(temp, X_name = "X")
447 |     expect_true(all(counts(mini_sce) == assay(out, "X")))
448 | })
449 | 
450 | test_that("writeH5AD works with SpatialExperiment objects", {
451 |     skip_if_not_installed("SpatialExperiment")
452 | 
453 |     spe <- SpatialExperiment::SpatialExperiment(
454 |         assays = list(counts = SingleCellExperiment::counts(sce))
455 |     )
456 |     spcoords <- matrix(
457 |         runif(ncol(sce) * 2),
458 |         ncol = 2
459 |     )
460 |     rownames(spcoords) <- colnames(sce)
461 |     colnames(spcoords) <- paste0("Spatial", 1:2)
462 |     SpatialExperiment::spatialCoords(spe) <- spcoords
463 | 
464 |     temp <- tempfile(fileext = ".h5ad")
465 | 
466 |     writeH5AD(spe, temp)
467 |     expect_true(file.exists(temp))
468 | 
469 |     out <- readH5AD(temp, X_name = "X")
470 | 
471 |     expect_identical(assay(out, "X"), assay(spe, "counts"))
472 |     expect_identical(dimnames(out), dimnames(spe))
473 | 
474 |     # Check the spatial coordinates.
475 |     expect_identical(reducedDims(out)$spatial, spcoords)
476 | })
477 | 
478 | test_that("writeH5AD works with SpatialExperiment objects without names", {
479 |     skip_if_not_installed("SpatialExperiment")
480 | 
481 |     spe <- SpatialExperiment::SpatialExperiment(
482 |         assays = list(counts = SingleCellExperiment::counts(sce))
483 |     )
484 |     spcoords <- matrix(
485 |         runif(ncol(sce) * 2),
486 |         ncol = 2
487 |     )
488 |     SpatialExperiment::spatialCoords(spe) <- spcoords
489 | 
490 |     temp <- tempfile(fileext = ".h5ad")
491 | 
492 |     writeH5AD(spe, temp)
493 |     expect_true(file.exists(temp))
494 | 
495 |     out <- readH5AD(temp, X_name = "X")
496 | 
497 |     expect_identical(assay(out, "X"), assay(spe, "counts"))
498 |     expect_identical(dimnames(out), dimnames(spe))
499 | 
500 |     # Check the spatial coordinates.
501 |     expect_identical(reducedDim(out, "spatial", withDimnames = FALSE), spcoords)
502 | })
503 | 
504 | test_that("writeH5AD works without names", {
505 |     nameless_sce <- SingleCellExperiment::SingleCellExperiment(
506 |         assays = list(
507 |             counts = matrix(rpois(100 * 50, 4), nrow = 100, ncol = 50)
508 |         ),
509 |         reducedDims = list(
510 |             redDim = matrix(runif(50 * 10), ncol = 10)
511 |         )
512 |     )
513 | 
514 |     temp <- tempfile(fileext = ".h5ad")
515 |     writeH5AD(nameless_sce, temp)
516 | 
517 |     out <- readH5AD(temp, X_name = "X")
518 |     expect_true(all(counts(nameless_sce) == assay(out, "X")))
519 | 
520 |     expect_identical(
521 |         reducedDim(out, "redDim", withDimnames = FALSE),
522 |         reducedDim(nameless_sce, "redDim")
523 |     )
524 | })
525 | 
526 | test_that("writeH5AD keeps dimnames", {
527 |     cells <- letters[1:8]
528 |     genes <- LETTERS[1:5]
529 |     ncells <- length(cells)
530 |     ngenes <- length(genes)
531 |     counts <- matrix(
532 |         rpois(ngenes * ncells, 5),
533 |         ncol = ncells,
534 |         dimnames = list(genes, cells)
535 |     )
536 |     dimname_sce <- SingleCellExperiment::SingleCellExperiment(
537 |         list(counts = counts)
538 |     )
539 | 
540 |     temp <- tempfile(fileext = ".h5ad")
541 |     writeH5AD(dimname_sce, temp)
542 | 
543 |     out <- readH5AD(temp, X_name = "X")
544 | 
545 |     expect_identical(dimnames(out), dimnames(dimname_sce))
546 | })
547 | 


--------------------------------------------------------------------------------
/tests/testthat/test-zzz-anndata.R:
--------------------------------------------------------------------------------
  1 | # This file tests compatibility with the R {anndata} package
  2 | # Despite best efforts the package isn't reliably unloaded so these tests have
  3 | # been moved to a separate file that is (hopefully) always run last
  4 | 
  5 | test_that("Reading is compatible with R anndata", {
  6 |     skip_if_offline()
  7 |     skip_if_not_installed("withr")
  8 |     skip_if_not_installed("anndata")
  9 | 
 10 |     withr::with_package("anndata", {
 11 |         file <- system.file("extdata", "krumsiek11.h5ad",
 12 |             package = "zellkonverter"
 13 |         )
 14 |         sce <- readH5AD(file)
 15 |         expect_s4_class(sce, "SingleCellExperiment")
 16 | 
 17 |         expect_identical(assayNames(sce), "X")
 18 |         expect_identical(colnames(colData(sce)), "cell_type")
 19 | 
 20 |         cache <- BiocFileCache::BiocFileCache(ask = FALSE)
 21 |         example_file <- BiocFileCache::bfcrpath(
 22 |             cache, "https://ndownloader.figshare.com/files/30462915"
 23 |         )
 24 | 
 25 |         sce <- readH5AD(example_file, raw = TRUE)
 26 | 
 27 |         names <- list(
 28 |             assays = c("X"),
 29 |             colData = c(
 30 |                 "n_genes", "n_genes_by_counts", "total_counts",
 31 |                 "total_counts_mt", "pct_counts_mt", "leiden"
 32 |             ),
 33 |             rowData = c(
 34 |                 "gene_ids", "n_cells", "mt", "n_cells_by_counts",
 35 |                 "mean_counts", "pct_dropout_by_counts", "total_counts",
 36 |                 "highly_variable", "means", "dispersions",
 37 |                 "dispersions_norm", "mean", "std"
 38 |             ),
 39 |             raw_rowData = c(
 40 |                 "gene_ids", "n_cells", "mt", "n_cells_by_counts",
 41 |                 "mean_counts", "pct_dropout_by_counts",
 42 |                 "total_counts", "highly_variable", "means",
 43 |                 "dispersions", "dispersions_norm"
 44 |             ),
 45 |             redDim = c("X_pca", "X_umap"),
 46 |             varm = c("PCs"),
 47 |             colPairs = c("connectivities", "distances"),
 48 |             metadata = c(
 49 |                 "hvg", "leiden", "neighbors", "pca",
 50 |                 "rank_genes_groups", "umap"
 51 |             )
 52 |         )
 53 | 
 54 |         missing <- list()
 55 | 
 56 |         validateH5ADSCE(sce, names, missing)
 57 |     })
 58 | 
 59 |     pkgload::unload("anndata")
 60 | })
 61 | 
 62 | test_that("Writing is compatible with R anndata", {
 63 |     skip_if_offline()
 64 |     skip_if_not_installed("withr")
 65 |     skip_if_not_installed("anndata")
 66 | 
 67 |     withr::with_package("anndata", {
 68 |         sce <- scRNAseq::ZeiselBrainData()
 69 |         temp <- tempfile(fileext = ".h5ad")
 70 |         writeH5AD(sce, temp)
 71 |         expect_true(file.exists(temp))
 72 | 
 73 |         # Reading it back out again. Hopefully we didn't lose anything important
 74 |         out <- readH5AD(temp)
 75 | 
 76 |         expect_identical(dimnames(out), dimnames(sce))
 77 |         expect_equal(assay(out), assay(sce))
 78 |         expect_identical(reducedDims(out), reducedDims(sce))
 79 | 
 80 |         # Need to coerce the factors back to strings.
 81 |         row_data <- rowData(out)
 82 |         for (i in seq_len(ncol(row_data))) {
 83 |             if (is.factor(row_data[[i]])) {
 84 |                 row_data[[i]] <- as.character(row_data[[i]])
 85 |             }
 86 |         }
 87 |         expect_identical(row_data, rowData(sce))
 88 | 
 89 |         col_data <- colData(out)
 90 |         for (i in seq_len(ncol(col_data))) {
 91 |             if (is.factor(col_data[[i]])) {
 92 |                 col_data[[i]] <- as.character(col_data[[i]])
 93 |             }
 94 |         }
 95 |         names(col_data) <- names(colData(sce))
 96 |         expect_identical(col_data, colData(sce))
 97 |     })
 98 | 
 99 |     pkgload::unload("anndata")
100 | })
101 | 


--------------------------------------------------------------------------------
/vignettes/zellkonverter.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: Converting single-cell data structures between Bioconductor and Python
  3 | author: 
  4 | - name: Luke Zappia
  5 |   email: luke@lazappi.id.au
  6 | - name: Aaron Lun
  7 |   email: infinite.monkeys.with.keyboards@gmail.com
  8 | date: "Revised: 17 April 2022"
  9 | output:
 10 |   BiocStyle::html_document:
 11 |     toc_float: true
 12 | package: zellkonverter 
 13 | vignette: >
 14 |   %\VignetteIndexEntry{Converting to/from AnnData to SingleCellExperiments}
 15 |   %\VignetteEngine{knitr::rmarkdown}
 16 |   %\VignetteEncoding{UTF-8}    
 17 | ---
 18 | 
 19 | ```{r setup, echo = FALSE, results = "hide", message = FALSE}
 20 | require(knitr)
 21 | library(BiocStyle)
 22 | opts_chunk$set(error = FALSE, message = FALSE, warning = FALSE)
 23 | ```
 24 | 
 25 | Overview
 26 | ========
 27 | 
 28 | This package provides a lightweight interface between the Bioconductor
 29 | `SingleCellExperiment` data structure and the Python `AnnData`-based single-cell
 30 | analysis environment. The idea is to enable users and developers to easily move
 31 | data between these frameworks to construct a multi-language analysis pipeline
 32 | across R/Bioconductor and Python.
 33 | 
 34 | Reading and writing H5AD files
 35 | ==============================
 36 | 
 37 | The `readH5AD()` function can be used to read a `SingleCellExperiment` from a
 38 | H5AD file. This can be manipulated in the usual way as described in the
 39 | `r Biocpkg("SingleCellExperiment")` documentation.
 40 | 
 41 | ```{r read}
 42 | library(zellkonverter)
 43 | 
 44 | # Obtaining an example H5AD file.
 45 | example_h5ad <- system.file(
 46 |     "extdata", "krumsiek11.h5ad",
 47 |     package = "zellkonverter"
 48 | )
 49 | readH5AD(example_h5ad)
 50 | ```
 51 | 
 52 | We can also write a `SingleCellExperiment` to a H5AD file with the
 53 | `writeH5AD()` function. This is demonstrated below on the classic Zeisel mouse
 54 | brain dataset from the `r Biocpkg("scRNAseq")` package. The resulting file can
 55 | then be directly used in compatible Python-based analysis frameworks.
 56 | 
 57 | ```{r write}
 58 | library(scRNAseq)
 59 | 
 60 | sce_zeisel <- ZeiselBrainData()
 61 | out_path <- tempfile(pattern = ".h5ad")
 62 | writeH5AD(sce_zeisel, file = out_path)
 63 | ```
 64 | 
 65 | Converting between `SingleCellExperiment` and `AnnData` objects
 66 | ===============================================================
 67 | 
 68 | Developers and power users who control their Python environments can directly
 69 | convert between `SingleCellExperiment` and
 70 | [`AnnData` objects](https://anndata.readthedocs.io/en/stable/) using the
 71 | `SCE2AnnData()` and `AnnData2SCE()` utilities. These functions expect that
 72 | `r CRANpkg("reticulate")` has already been loaded along with an appropriate
 73 | version of the [_anndata_](https://pypi.org/project/anndata/) package. We
 74 | suggest using the `r Biocpkg("basilisk")` package to set up the Python
 75 | environment before using these functions.
 76 | 
 77 | ```{r convert}
 78 | library(basilisk)
 79 | library(scRNAseq)
 80 | 
 81 | seger <- SegerstolpePancreasData()
 82 | roundtrip <- basiliskRun(fun = function(sce) {
 83 |     # Convert SCE to AnnData:
 84 |     adata <- SCE2AnnData(sce)
 85 | 
 86 |     # Maybe do some work in Python on 'adata':
 87 |     # BLAH BLAH BLAH
 88 | 
 89 |     # Convert back to an SCE:
 90 |     AnnData2SCE(adata)
 91 | }, env = zellkonverterAnnDataEnv(), sce = seger)
 92 | ```
 93 | 
 94 | Package developers can guarantee that they are using the same versions of Python
 95 | packages as `r Biocpkg("zellkonverter")` by using the `AnnDataDependencies()`
 96 | function to set up their Python environments.
 97 | 
 98 | ```{r anndata-deps}
 99 | AnnDataDependencies()
100 | ```
101 | 
102 | This function can also be used to return dependencies for environments using
103 | older versions of _anndata_.
104 | 
105 | ```{r anndata-deps-old}
106 | AnnDataDependencies(version = "0.7.6")
107 | ```
108 | 
109 | Progress messages
110 | =================
111 | 
112 | By default the functions in `r Biocpkg("zellkonverter")` don't display any
113 | information about their progress but this can be turned on by setting the
114 | `verbose = TRUE` argument.
115 | 
116 | ```{r verbose}
117 | readH5AD(example_h5ad, verbose = TRUE)
118 | ```
119 | 
120 | If you would like to see progress messages for all functions by default you can
121 | turn this on using the `setZellkonverterVerbose()` function.
122 | 
123 | ```{r verbose-set, eval = FALSE}
124 | # This is not run here
125 | setZellkonverterVerbose(TRUE)
126 | ```
127 | 
128 | Session information
129 | ===================
130 | 
131 | ```{r}
132 | sessionInfo()
133 | ```
134 | 


--------------------------------------------------------------------------------