├── .BBSoptions
├── .Rbuildignore
├── .github
├── .gitignore
├── dependabot.yaml
└── workflows
│ ├── check-bioc.yml
│ └── pr-commands.yaml
├── .gitignore
├── CODE_OF_CONDUCT.md
├── DESCRIPTION
├── LICENSE
├── LICENSE.md
├── NAMESPACE
├── NEWS.md
├── R
├── AnnData2SCE.R
├── SCE2AnnData.R
├── basilisk.R
├── read.R
├── reticulate.R
├── ui.R
├── utils.R
├── validation.R
├── write.R
└── zellkonverter-package.R
├── README.md
├── codecov.yml
├── configure
├── configure.win
├── inst
├── NEWS.Rd
├── WORDLIST
├── extdata
│ ├── example_anndata.h5ad
│ ├── krumsiek11.h5ad
│ └── krumsiek11_augmented_v0-8.h5ad
└── scripts
│ ├── example_anndata.R
│ ├── krumsiek11.md
│ └── krumsiek11_augmented.py
├── longtests
├── testthat.R
└── testthat
│ ├── test-cellrank_pancreas.R
│ ├── test-example_anndata.R
│ ├── test-gtex_8tissues.R
│ ├── test-pegasus_marrow.R
│ ├── test-scIB_pancreas.R
│ ├── test-scanpy_pbmc3k.R
│ ├── test-scanpy_trajectory.R
│ ├── test-scvelo_pancreas.R
│ ├── test-scvi_citeseq.R
│ └── test-squidpy_visium.R
├── man
├── AnnData-Conversion.Rd
├── AnnData-Environment.Rd
├── expectSCE.Rd
├── figures
│ ├── AnnData2SCE.png
│ └── zellkonverter.png
├── r-py-conversion.Rd
├── readH5AD.Rd
├── setZellkonverterVerbose.Rd
├── validateH5ADSCE.Rd
├── writeH5AD.Rd
└── zellkonverter-package.Rd
├── tests
├── spelling.R
├── testthat.R
└── testthat
│ ├── default.profraw
│ ├── test-SCE2AnnData.R
│ ├── test-read.R
│ ├── test-validation.R
│ ├── test-write.R
│ └── test-zzz-anndata.R
└── vignettes
└── zellkonverter.Rmd
/.BBSoptions:
--------------------------------------------------------------------------------
1 | RunLongTests: TRUE
2 |
--------------------------------------------------------------------------------
/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^zellkonverter\.Rproj$
2 | ^\.Rproj\.user$
3 | ^LICENSE\.md$
4 | ^CODE_OF_CONDUCT\.md$
5 | ^\.BBSoptions$
6 | ^\.github$
7 | ^codecov\.yml$
8 | ^doc$
9 | ^Meta$
10 |
--------------------------------------------------------------------------------
/.github/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 |
--------------------------------------------------------------------------------
/.github/dependabot.yaml:
--------------------------------------------------------------------------------
1 | version: 2
2 | updates:
3 | - package-ecosystem: "github-actions"
4 | directory: "/"
5 | schedule:
6 | interval: "weekly"
7 |
--------------------------------------------------------------------------------
/.github/workflows/check-bioc.yml:
--------------------------------------------------------------------------------
1 | ## This is a simplified action for building and testing a Bioconductor package
2 | ## based on:
3 | ## * https://github.com/lcolladotor/biocthis/blob/master/actions/check-bioc.yml
4 | ## * https://github.com/r-lib/actions/blob/master/examples/check-standard.yaml
5 | ## * https://github.com/seandavi/BuildABiocWorkshop2020/blob/master/.github/workflows/basic_checks.yaml
6 | name: R-CMD-check-bioc
7 |
8 | ## Specify which branches to run on
9 | ## The "devel" branch corresponds to Bioc-devel and "RELEASE_X" branches are
10 | ## Bioconductor releases. See http://bioconductor.org/developers/how-to/git/.
11 | on:
12 | push:
13 | branches:
14 | - devel
15 | - 'RELEASE_*'
16 | pull_request:
17 | branches:
18 | - devel
19 | - 'RELEASE_*'
20 |
21 | jobs:
22 | get-bioc-release:
23 | # Identify the Bioconductor release from the git branch. Also specifies a
24 | # Bioconductor Docker image to use.
25 | runs-on: ubuntu-latest
26 | outputs:
27 | biocimage: ${{ steps.get-release.outputs.biocimage }}
28 | biocrelease: ${{ steps.get-release.outputs.biocrelease }}
29 |
30 | steps:
31 | - id: get-release
32 | name: Get Bioconductor release
33 | run: |
34 | if echo "$GITHUB_REF" | grep -q "RELEASE_"; then
35 | biocrelease="$(basename -- $GITHUB_REF | tr '[:upper:]' '[:lower:]')"
36 | else
37 | biocrelease="devel"
38 | fi
39 | biocimage="bioconductor/bioconductor_docker:${biocrelease}"
40 | echo "Bioc release: ${biocrelease}"
41 | echo "Bioc docker image: {$biocimage}"
42 | ## Store the information
43 | echo "biocimage=${biocimage}" >> $GITHUB_OUTPUT
44 | echo "biocrelease=${biocrelease}" >> $GITHUB_OUTPUT
45 |
46 | get-bioc-version:
47 | # Identify the Bioconductor version number and R version to use. This is
48 | # done by checking the versions in the Bioconductor Docker container
49 | # selected by get-bioc-release.
50 | runs-on: ubuntu-latest
51 | needs: get-bioc-release
52 | container:
53 | image: ${{ needs.get-bioc-release.outputs.biocimage }}
54 | outputs:
55 | Rversion: ${{ steps.set-versions.outputs.rversion }}
56 | biocversion: ${{ steps.set-versions.outputs.biocversion }}
57 |
58 | steps:
59 | - id: get-versions
60 | name: Get Bioconductor/R versions
61 | run: |
62 | biocconfig <- "https://bioconductor.org/config.yaml"
63 | biocrelease <- "${{ needs.get-bioc-release.outputs.biocrelease }}"
64 | cat("Bioc release RAW:", biocrelease, "\n")
65 | biocrelease <- ifelse(
66 | grepl(biocrelease, "release"),
67 | "release", "devel"
68 | )
69 | biocmap <- BiocManager:::.version_map_get_online(biocconfig)
70 | biocversion <- subset(biocmap, BiocStatus == biocrelease)[, 'Bioc']
71 | biocversion_str <- as.character(biocversion)
72 | rversion <- subset(biocmap, BiocStatus == biocrelease)[, 'R']
73 | rversion_str <- as.character(rversion)
74 | # Use R devel for BioC devel between November and May
75 | if (biocrelease == "devel") {
76 | current_month <- as.numeric(format(Sys.Date(), "%m"))
77 | if (current_month >= 11 || current_month <= 5) {
78 | cat("Setting R version to devel")
79 | rversion <- "devel"
80 | rversion_str <- "devel"
81 | }
82 | }
83 | writeLines(c(biocversion_str, rversion_str), "versions.txt")
84 | cat("GET VERSIONS", "\n")
85 | cat("Bioc release: ", biocrelease, "\n")
86 | cat("Bioc version: ", biocversion_str, "\n")
87 | cat("R version: ", rversion_str, "\n")
88 | shell: Rscript {0}
89 | - id: set-versions
90 | name: Set Bioconductor/R versions
91 | run: |
92 | biocversion=$(head -n 1 versions.txt)
93 | rversion=$(tail -n 1 versions.txt)
94 | echo "SET VERSIONS"
95 | echo "Bioc version: ${biocversion}"
96 | echo "R version: ${rversion}"
97 | ## Store the information
98 | echo "biocversion=${biocversion}" >> $GITHUB_OUTPUT
99 | echo "rversion=${rversion}" >> $GITHUB_OUTPUT
100 |
101 | R-CMD-check-docker:
102 | ## Run checks in the Bioconductor Docker container
103 | name: ubuntu-latest (r-biocdocker bioc-${{ needs.get-bioc-version.outputs.biocversion }})
104 | needs: [get-bioc-release, get-bioc-version]
105 | runs-on: ubuntu-latest
106 | container:
107 | image: ${{ needs.get-bioc-release.outputs.biocimage }}
108 | volumes:
109 | - /home/runner/work/_temp/Library:/usr/local/lib/R/host-site-library
110 | env:
111 | R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
112 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
113 | R_BIOC_VERSION: ${{ needs.get-bioc-version.outputs.biocversion }}
114 |
115 | steps:
116 | - name: Checkout
117 | uses: actions/checkout@v4
118 |
119 | - name: Install extra linux dependencies
120 | run: sudo apt-get update -y && sudo apt-get install -y libcurl4-openssl-dev devscripts qpdf
121 |
122 | - name: Setup R dependencies
123 | uses: r-lib/actions/setup-r-dependencies@v2
124 | with:
125 | cache-version: 3
126 | extra-packages: any::rcmdcheck
127 | needs: check
128 |
129 | - name: Show session info
130 | run: |
131 | options(width = 100)
132 | pkgs <- installed.packages()[, "Package"]
133 | sessioninfo::session_info(pkgs, include_base = TRUE)
134 | shell: Rscript {0}
135 |
136 | - name: Check R package
137 | uses: r-lib/actions/check-r-package@v2
138 | with:
139 | upload-snapshots: true
140 | upload-results: true
141 |
142 | - name: BiocCheck
143 | run: |
144 | BiocManager::install("BiocCheck")
145 | BiocCheck::BiocCheck(
146 | dir('check', 'tar.gz$', full.names = TRUE),
147 | `no-check-R-ver` = TRUE,
148 | `no-check-bioc-help` = TRUE
149 | )
150 | shell: Rscript {0}
151 |
152 | R-CMD-check:
153 | ## Run checks on other platforms.
154 | name: ${{ matrix.config.os }} (r-${{ needs.get-bioc-version.outputs.rversion }} bioc-${{ needs.get-bioc-version.outputs.biocversion }})
155 | needs: [get-bioc-release, get-bioc-version]
156 | runs-on: ${{ matrix.config.os }}
157 | strategy:
158 | fail-fast: false
159 | matrix:
160 | experimental: [true]
161 | config:
162 | - {os: windows-latest}
163 | - {os: macOS-latest}
164 | - {os: ubuntu-24.04, rspm: "https://packagemanager.posit.co/cran/__linux__/noble/latest"}
165 | env:
166 | R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
167 | RSPM: ${{ matrix.config.rspm }}
168 | R_BIOC_VERSION: ${{ needs.get-bioc-version.outputs.biocversion }}
169 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
170 |
171 | steps:
172 | - name: Checkout
173 | uses: actions/checkout@v4
174 |
175 | - name: Install Linux system dependencies
176 | if: runner.os == 'Linux'
177 | run: |
178 | sudo apt update
179 | sudo apt-get update
180 | sudo apt-get upgrade libstdc++6
181 |
182 | - name: Setup Pandoc
183 | uses: r-lib/actions/setup-pandoc@v2
184 |
185 | - name: Setup R
186 | uses: r-lib/actions/setup-r@v2
187 | with:
188 | r-version: ${{ needs.get-bioc-version.outputs.rversion }}
189 | use-public-rspm: true
190 |
191 | - name: Setup R dependencies
192 | uses: r-lib/actions/setup-r-dependencies@v2
193 | with:
194 | cache-version: 3
195 | extra-packages: any::rcmdcheck
196 | needs: check
197 |
198 | - name: Session info
199 | run: |
200 | options(width = 100)
201 | pkgs <- installed.packages()[, "Package"]
202 | sessioninfo::session_info(pkgs, include_base = TRUE)
203 | shell: Rscript {0}
204 |
205 | - name: Check R package
206 | uses: r-lib/actions/check-r-package@v2
207 | with:
208 | upload-snapshots: true
209 | upload-results: true
210 |
211 | test-coverage:
212 | ## Calculate package test coverage. Only runs if R-CMD-check-docker has
213 | ## completed successfully. Uses the Bioconductor Docker image.
214 | if: ${{ github.ref == 'refs/heads/devel' }}
215 | needs: [get-bioc-release, get-bioc-version, R-CMD-check-docker]
216 | runs-on: ubuntu-latest
217 | container:
218 | image: ${{ needs.get-bioc-release.outputs.biocimage }}
219 | volumes:
220 | - /home/runner/work/_temp/Library:/usr/local/lib/R/host-site-library
221 | env:
222 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
223 | R_BIOC_VERSION: ${{ needs.get-bioc-version.outputs.biocversion }}
224 |
225 | steps:
226 | - name: Checkout
227 | uses: actions/checkout@v4
228 |
229 | - name: Install extra linux dependencies
230 | run: sudo apt-get update -y && sudo apt-get install -y libcurl4-openssl-dev devscripts qpdf
231 |
232 | - name: Setup R dependencies
233 | uses: r-lib/actions/setup-r-dependencies@v2
234 | with:
235 | cache-version: 3
236 | extra-packages: any::covr
237 | needs: coverage
238 |
239 | - name: Test coverage
240 | run: covr::codecov(quiet = FALSE)
241 | shell: Rscript {0}
242 |
243 | pkgdown:
244 | ## Build pkgdown site and push to gh-pages branch. Only runs if on the
245 | ## devel branch and R-CMD-check-docker has completed successfully. Uses
246 | ## the Bioconductor Docker image.
247 | needs: [get-bioc-release, get-bioc-version]
248 | runs-on: ubuntu-latest
249 | container:
250 | image: ${{ needs.get-bioc-release.outputs.biocimage }}
251 | volumes:
252 | - /home/runner/work/_temp/Library:/usr/local/lib/R/host-site-library
253 | env:
254 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
255 | R_BIOC_VERSION: ${{ needs.get-bioc-version.outputs.biocversion }}
256 |
257 | steps:
258 | - name: Checkout
259 | uses: actions/checkout@v4
260 |
261 | - name: Install extra linux dependencies
262 | run: sudo apt-get update -y && sudo apt-get install -y libcurl4-openssl-dev devscripts qpdf rsync
263 |
264 | - name: Setup R dependencies
265 | uses: r-lib/actions/setup-r-dependencies@v2
266 | with:
267 | cache-version: 3
268 | extra-packages: any::pkgdown, local::.
269 | needs: website
270 |
271 | - name: Build pkgdown site
272 | run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE)
273 | shell: Rscript {0}
274 |
275 | - name: Deploy to GitHub pages 🚀
276 | if: ${{ (github.ref == 'refs/heads/devel') && (github.event_name != 'pull_request') }}
277 | uses: JamesIves/github-pages-deploy-action@v4.7.3
278 | with:
279 | clean: false
280 | branch: gh-pages
281 | folder: docs
282 | git-config-name: "github-actions[bot]"
283 | git-config-email: "41898282+github-actions[bot]@users.noreply.github.com"
284 |
--------------------------------------------------------------------------------
/.github/workflows/pr-commands.yaml:
--------------------------------------------------------------------------------
1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
3 | on:
4 | issue_comment:
5 | types: [created]
6 |
7 | name: pr-commands.yaml
8 |
9 | permissions: read-all
10 |
11 | jobs:
12 | document:
13 | if: ${{ github.event.issue.pull_request && (github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'OWNER') && startsWith(github.event.comment.body, '/document') }}
14 | name: document
15 | runs-on: ubuntu-latest
16 | env:
17 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
18 | permissions:
19 | contents: write
20 | steps:
21 | - uses: actions/checkout@v4
22 |
23 | - uses: r-lib/actions/pr-fetch@v2
24 | with:
25 | repo-token: ${{ secrets.GITHUB_TOKEN }}
26 |
27 | - uses: r-lib/actions/setup-r@v2
28 | with:
29 | use-public-rspm: true
30 |
31 | - uses: r-lib/actions/setup-r-dependencies@v2
32 | with:
33 | extra-packages: any::roxygen2
34 | needs: pr-document
35 |
36 | - name: Document
37 | run: roxygen2::roxygenise()
38 | shell: Rscript {0}
39 |
40 | - name: commit
41 | run: |
42 | git config --local user.name "$GITHUB_ACTOR"
43 | git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com"
44 | git add man/\* NAMESPACE
45 | git commit -m 'Document'
46 |
47 | - uses: r-lib/actions/pr-push@v2
48 | with:
49 | repo-token: ${{ secrets.GITHUB_TOKEN }}
50 |
51 | style:
52 | if: ${{ github.event.issue.pull_request && (github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'OWNER') && startsWith(github.event.comment.body, '/style') }}
53 | name: style
54 | runs-on: ubuntu-latest
55 | env:
56 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
57 | permissions:
58 | contents: write
59 | steps:
60 | - uses: actions/checkout@v4
61 |
62 | - uses: r-lib/actions/pr-fetch@v2
63 | with:
64 | repo-token: ${{ secrets.GITHUB_TOKEN }}
65 |
66 | - uses: r-lib/actions/setup-r@v2
67 |
68 | - name: Install dependencies
69 | run: install.packages("styler")
70 | shell: Rscript {0}
71 |
72 | - name: Style
73 | run: styler::style_pkg()
74 | shell: Rscript {0}
75 |
76 | - name: commit
77 | run: |
78 | git config --local user.name "$GITHUB_ACTOR"
79 | git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com"
80 | git add \*.R
81 | git commit -m 'Style'
82 |
83 | - uses: r-lib/actions/pr-push@v2
84 | with:
85 | repo-token: ${{ secrets.GITHUB_TOKEN }}
86 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | *.html
5 | doc
6 | Meta
7 | *.Rproj
8 |
--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Contributor Covenant Code of Conduct
2 |
3 | ## Our Pledge
4 |
5 | We as members, contributors, and leaders pledge to make participation in our
6 | community a harassment-free experience for everyone, regardless of age, body
7 | size, visible or invisible disability, ethnicity, sex characteristics, gender
8 | identity and expression, level of experience, education, socio-economic status,
9 | nationality, personal appearance, race, religion, or sexual identity and
10 | orientation.
11 |
12 | We pledge to act and interact in ways that contribute to an open, welcoming,
13 | diverse, inclusive, and healthy community.
14 |
15 | ## Our Standards
16 |
17 | Examples of behavior that contributes to a positive environment for our
18 | community include:
19 |
20 | * Demonstrating empathy and kindness toward other people
21 | * Being respectful of differing opinions, viewpoints, and experiences
22 | * Giving and gracefully accepting constructive feedback
23 | * Accepting responsibility and apologizing to those affected by our mistakes,
24 | and learning from the experience
25 | * Focusing on what is best not just for us as individuals, but for the overall
26 | community
27 |
28 | Examples of unacceptable behavior include:
29 |
30 | * The use of sexualized language or imagery, and sexual attention or
31 | advances of any kind
32 | * Trolling, insulting or derogatory comments, and personal or political attacks
33 | * Public or private harassment
34 | * Publishing others' private information, such as a physical or email
35 | address, without their explicit permission
36 | * Other conduct which could reasonably be considered inappropriate in a
37 | professional setting
38 |
39 | ## Enforcement Responsibilities
40 |
41 | Community leaders are responsible for clarifying and enforcing our standards
42 | of acceptable behavior and will take appropriate and fair corrective action in
43 | response to any behavior that they deem inappropriate, threatening, offensive,
44 | or harmful.
45 |
46 | Community leaders have the right and responsibility to remove, edit, or reject
47 | comments, commits, code, wiki edits, issues, and other contributions that are
48 | not aligned to this Code of Conduct, and will communicate reasons for moderation
49 | decisions when appropriate.
50 |
51 | ## Scope
52 |
53 | This Code of Conduct applies within all community spaces, and also applies
54 | when an individual is officially representing the community in public spaces.
55 | Examples of representing our community include using an official e-mail
56 | address, posting via an official social media account, or acting as an appointed
57 | representative at an online or offline event.
58 |
59 | ## Enforcement
60 |
61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
62 | reported to the community leaders responsible for enforcement at [INSERT CONTACT
63 | METHOD]. All complaints will be reviewed and investigated promptly and fairly.
64 |
65 | All community leaders are obligated to respect the privacy and security of the
66 | reporter of any incident.
67 |
68 | ## Enforcement Guidelines
69 |
70 | Community leaders will follow these Community Impact Guidelines in determining
71 | the consequences for any action they deem in violation of this Code of Conduct:
72 |
73 | ### 1. Correction
74 |
75 | **Community Impact**: Use of inappropriate language or other behavior deemed
76 | unprofessional or unwelcome in the community.
77 |
78 | **Consequence**: A private, written warning from community leaders, providing
79 | clarity around the nature of the violation and an explanation of why the
80 | behavior was inappropriate. A public apology may be requested.
81 |
82 | ### 2. Warning
83 |
84 | **Community Impact**: A violation through a single incident or series of
85 | actions.
86 |
87 | **Consequence**: A warning with consequences for continued behavior. No
88 | interaction with the people involved, including unsolicited interaction with
89 | those enforcing the Code of Conduct, for a specified period of time. This
90 | includes avoiding interactions in community spaces as well as external channels
91 | like social media. Violating these terms may lead to a temporary or permanent
92 | ban.
93 |
94 | ### 3. Temporary Ban
95 |
96 | **Community Impact**: A serious violation of community standards, including
97 | sustained inappropriate behavior.
98 |
99 | **Consequence**: A temporary ban from any sort of interaction or public
100 | communication with the community for a specified period of time. No public or
101 | private interaction with the people involved, including unsolicited interaction
102 | with those enforcing the Code of Conduct, is allowed during this period.
103 | Violating these terms may lead to a permanent ban.
104 |
105 | ### 4. Permanent Ban
106 |
107 | **Community Impact**: Demonstrating a pattern of violation of community
108 | standards, including sustained inappropriate behavior, harassment of an
109 | individual, or aggression toward or disparagement of classes of individuals.
110 |
111 | **Consequence**: A permanent ban from any sort of public interaction within the
112 | community.
113 |
114 | ## Attribution
115 |
116 | This Code of Conduct is adapted from the [Contributor Covenant][homepage],
117 | version 2.0,
118 | available at https://www.contributor-covenant.org/version/2/0/
119 | code_of_conduct.html.
120 |
121 | Community Impact Guidelines were inspired by [Mozilla's code of conduct
122 | enforcement ladder](https://github.com/mozilla/diversity).
123 |
124 | [homepage]: https://www.contributor-covenant.org
125 |
126 | For answers to common questions about this code of conduct, see the FAQ at
127 | https://www.contributor-covenant.org/faq. Translations are available at https://
128 | www.contributor-covenant.org/translations.
129 |
--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
1 | Package: zellkonverter
2 | Title: Conversion Between scRNA-seq Objects
3 | Version: 1.19.0
4 | Date: 2025-04-16
5 | Authors@R: c(
6 | person("Luke", "Zappia", , "luke@lazappi.id.au", role = c("aut", "cre"),
7 | comment = c(ORCID = "0000-0001-7744-8565")),
8 | person("Aaron", "Lun", , "infinite.monkeys.with.keyboards@gmail.com", role = "aut",
9 | comment = c(ORCID = "0000-0002-3564-4813")),
10 | person("Jack", "Kamm", , "jackkamm@gmail.com", role = "ctb",
11 | comment = c(ORCID = "0000-0003-2412-756X")),
12 | person("Robrecht", "Cannoodt", , "rcannood@gmail.com", role = "ctb",
13 | comment = c(ORCID = "0000-0003-3641-729X", github = "rcannood")),
14 | person("Gabriel", "Hoffman", , "gabriel.hoffman@mssm.edu", role = "ctb",
15 | comment = c(ORCID = "0000-0002-0957-0224", github = "GabrielHoffman")),
16 | person("Marek", "Cmero", , "cmero.ma@wehi.edu.au", role = "ctb",
17 | comment = c(ORCID = "0000-0001-7783-5530", github = "mcmero"))
18 | )
19 | Description: Provides methods to convert between Python AnnData objects
20 | and SingleCellExperiment objects. These are primarily intended for use
21 | by downstream Bioconductor packages that wrap Python methods for
22 | single-cell data analysis. It also includes functions to read and
23 | write H5AD files used for saving AnnData objects to disk.
24 | License: MIT + file LICENSE
25 | URL: https://github.com/theislab/zellkonverter
26 | BugReports: https://github.com/theislab/zellkonverter/issues
27 | Imports:
28 | basilisk,
29 | cli,
30 | DelayedArray,
31 | Matrix,
32 | methods,
33 | reticulate,
34 | S4Vectors,
35 | SingleCellExperiment (>= 1.11.6),
36 | SummarizedExperiment,
37 | utils
38 | Suggests:
39 | anndata,
40 | BiocFileCache,
41 | BiocStyle,
42 | covr,
43 | HDF5Array,
44 | knitr,
45 | pkgload,
46 | rhdf5 (>= 2.45.1),
47 | rmarkdown,
48 | scRNAseq,
49 | SpatialExperiment,
50 | spelling,
51 | testthat,
52 | withr
53 | VignetteBuilder:
54 | knitr
55 | biocViews: SingleCell, DataImport, DataRepresentation
56 | Encoding: UTF-8
57 | Language: en-GB
58 | LazyData: true
59 | Roxygen: list(markdown = TRUE)
60 | RoxygenNote: 7.3.2
61 | StagedInstall: no
62 | Collate:
63 | 'AnnData2SCE.R'
64 | 'SCE2AnnData.R'
65 | 'ui.R'
66 | 'basilisk.R'
67 | 'read.R'
68 | 'reticulate.R'
69 | 'utils.R'
70 | 'validation.R'
71 | 'write.R'
72 | 'zellkonverter-package.R'
73 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | YEAR: 2020
2 | COPYRIGHT HOLDER: Luke Zappia
3 |
--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | # MIT License
2 |
3 | Copyright (c) 2020 Theis Lab
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
1 | # Generated by roxygen2: do not edit by hand
2 |
3 | S3method(py_to_r,numpy.ndarray)
4 | S3method(py_to_r,pandas.core.arrays.categorical.Categorical)
5 | S3method(py_to_r,pandas.core.arrays.masked.BaseMaskedArray)
6 | export(.AnnDataVersions)
7 | export(AnnData2SCE)
8 | export(AnnDataDependencies)
9 | export(SCE2AnnData)
10 | export(readH5AD)
11 | export(setZellkonverterVerbose)
12 | export(writeH5AD)
13 | export(zellkonverterAnnDataEnv)
14 | import(SingleCellExperiment)
15 | import(SummarizedExperiment)
16 | importClassesFrom(Matrix,CsparseMatrix)
17 | importFrom(DelayedArray,blockApply)
18 | importFrom(DelayedArray,is_sparse)
19 | importFrom(DelayedArray,nzdata)
20 | importFrom(DelayedArray,nzindex)
21 | importFrom(DelayedArray,rowAutoGrid)
22 | importFrom(DelayedArray,type)
23 | importFrom(Matrix,sparseMatrix)
24 | importFrom(Matrix,t)
25 | importFrom(S4Vectors,DataFrame)
26 | importFrom(S4Vectors,I)
27 | importFrom(S4Vectors,make_zero_col_DFrame)
28 | importFrom(S4Vectors,metadata)
29 | importFrom(S4Vectors,wmsg)
30 | importFrom(SingleCellExperiment,"colPairs<-")
31 | importFrom(SingleCellExperiment,"reducedDims<-")
32 | importFrom(SingleCellExperiment,"rowPairs<-")
33 | importFrom(SingleCellExperiment,SingleCellExperiment)
34 | importFrom(SummarizedExperiment,"assays<-")
35 | importFrom(SummarizedExperiment,"colData<-")
36 | importFrom(SummarizedExperiment,"rowData<-")
37 | importFrom(SummarizedExperiment,assays)
38 | importFrom(SummarizedExperiment,colData)
39 | importFrom(SummarizedExperiment,rowData)
40 | importFrom(basilisk,basiliskRun)
41 | importFrom(methods,as)
42 | importFrom(methods,is)
43 | importFrom(methods,selectMethod)
44 | importFrom(methods,slot)
45 | importFrom(reticulate,import)
46 | importFrom(reticulate,import_builtins)
47 | importFrom(reticulate,py_to_r)
48 | importFrom(reticulate,r_to_py)
49 | importFrom(utils,capture.output)
50 |
--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
1 | # zellkonverter 1.20.0
2 |
3 | * Bioconductor 3.22, October 2025
4 |
5 | ## zellkonverter 1.19.0 (2025-04-16)
6 |
7 | * Bioconductor 3.22 devel
8 |
9 | # zellkonverter 1.18.0 (2025-04-16)
10 |
11 | * Bioconductor 3.21, April 2025
12 |
13 | ## zellkonverter 1.17.4 (2025-04-10)
14 |
15 | * Add tests for **anndata** v0.10.9
16 | * Modify `SCE2AnnData()` to covert sparse matrices to `dgRMatrix` when they are
17 | transposed (mostly assays) (Fixes #132)
18 |
19 | ## zellkonverter 1.17.3 (2025-04-08)
20 |
21 | * Add an environment for **anndata** v0.11.4. This is now the default.
22 | * Disable **anndata** v0.7.6 tests on aarch64 Linux as Python 3.7 is not available
23 |
24 | ## zellkonverter 1.17.2 (2025-04-01)
25 |
26 | * Add support for `SpatialExperiment` objects to `SCE2AnnData()` (PR #138 @mcmero, Fixes #61)
27 | * Improve handling of missing `rownames`/`colnames` (PR #138, Fixes #140)
28 |
29 | ## zellkonverter 1.17.1 (2025-03-09)
30 |
31 | * Add `testload` argument to `basiliskRun()` calls (Partial fix for #139)
32 | * Handle missing `rowData`/`colData` with no names in `SCE2AnnData()` (Fixes #105)
33 | * Update links in function documentation
34 |
35 | ## zellkonverter 1.17.0 (2024-10-30)
36 |
37 | * Bioconductor 3.21 devel
38 |
39 | # zellkonverter 1.16.0 (2024-10-30)
40 |
41 | * Bioconductor 3.20, October 2024
42 |
43 | ## zellkonverter 1.15.4 (2024-10-18)
44 |
45 | * Fix correctly assigning levels to factors in the R reader with **anndata** v0.7 files (Fixes #122)
46 | * Add environment for **anndata** v0.10.9
47 | * Avoid deprecation warning due to setting `dtype` when creating Python `AnnData` objects
48 | * Standardise code styling using **{styler}**
49 |
50 | ## zellkonverter 1.15.3 (2024-10-04)
51 |
52 | * Correctly assign levels to factors in R reader (Fixes #122)
53 |
54 | ## zellkonverter 1.15.2 (2024-10-02)
55 |
56 | * Correctly set `filepath` in the R reader with reading `adata.raw` with `use_hdf5 = TRUE` (PR #124 @GabrielHoffman, Fixes #123)
57 |
58 | ## zellkonverter 1.15.1 (2024-06-21)
59 |
60 | * Skip **anndata** v0.7.6 tests on Apple Silicon as Python 3.7 is not available
61 | * Fix URL for GTEx 8 tissues dataset in long tests
62 | * Add test using the more complete example H5AD file
63 |
64 | ## zellkonverter 1.15.0 (2024-05-01)
65 |
66 | * Bioconductor 3.20 devel
67 |
68 | # zellkonverter 1.14.0 (2024-05-01)
69 |
70 | * Bioconductor 3.19, May 2023
71 |
72 | ## zellkonverter 1.14.1 (2024-06-21)
73 |
74 | * Skip **anndata** v0.7.6 tests on Apple Silicon as Python 3.7 is not available
75 | * Fix URL for GTEx 8 tissues dataset in long tests
76 | * Add test using the more complete example H5AD file
77 |
78 | ## zellkonverter 1.13.4 (2024-04-26)
79 |
80 | * Prepare `NEWS` for release
81 |
82 | ## zellkonverter 1.13.3 (2024-03-25)
83 |
84 | * Correctly handle `use_backed = TRUE` with newer **anndata** versions (Fixes #114)
85 | * Improve warnings when converting matrices fails
86 | * Add environment for **anndata** v0.10.6
87 |
88 | ## zellkonverter 1.13.2 (2024-01-17)
89 |
90 | * Minor change to writing `DelayedArray`s for compatibility with **{HDF5Array}**
91 | v1.31.1
92 |
93 | ## zellkonverter 1.13.1 (2023-11-13)
94 |
95 | * Fix the **anndata** v0.10.2 environment instantiation (Fixes #103)
96 | * Fix a typo in the AnnData Conversion docs (Fixes #100)
97 |
98 | ## zellkonverter 1.13.0 (2023-10-25)
99 |
100 | * Bioconductor 3.19 devel
101 |
102 | # zellkonverter 1.12.0 (2023-10-25)
103 |
104 | * Bioconductor 3.18, October 2023
105 |
106 | ## zellkonverter 1.12.1 (2023-11-13)
107 |
108 | * Fix the **anndata** v0.10.2 environment instantiation (Fixes #103)
109 | * Fix a typo in the AnnData Conversion docs (Fixes #100)
110 |
111 | ## zellkonverter 1.11.4 (2023-10-16)
112 |
113 | * Add environment for **anndata** v0.10.2
114 |
115 | ## zellkonverter 1.11.3 (2023-10-2)
116 |
117 | * Add environment for **anndata** v0.9.2
118 |
119 | ## zellkonverter 1.11.2 (2023-08-28)
120 |
121 | * Changes for compatibility with **{rhdf5}** v2.45.1
122 | * Support for enum types that simplifies reading of nullable types in the
123 | native R reader
124 |
125 | ## zellkonverter 1.11.1 (2023-05-23)
126 |
127 | * Pass correct dimensions when converting `raw` (Fixes #96)
128 | * Convert **anndata** backed sparse matrices in `AnnData2SCE()` (Fixes #96)
129 |
130 | ## zellkonverter 1.11.0 (2023-04-26)
131 |
132 | * Bioconductor 3.18 devel
133 |
134 | # zellkonverter 1.10.0 (2023-04-26)
135 |
136 | * Bioconductor 3.17, April 2023
137 |
138 | ## zellkonverter 1.10.1 (2023-05-23)
139 |
140 | * Pass correct dimensions when converting `raw` (Fixes #96)
141 | * Convert **anndata** backed sparse matrices in `AnnData2SCE()` (Fixes #96)
142 |
143 | ## zellkonverter 1.9.3 (2023-04-06)
144 |
145 | * Add functions for converting **pandas** arrays used by **anndata** when
146 | arrays have missing values (Fixes #87)
147 | * Read the correct index names in the R reader (PR #93 mtmorgan)
148 | * Adjust tests to match reader changes
149 |
150 | ## zellkonverter 1.9.2 (2023-03-28)
151 |
152 | * Add @rcannood as a contributor (PR #90 @rcannood, fixes #88)
153 |
154 | ## zellkonverter 1.9.1 (2023-03-14)
155 |
156 | * Add compatibility with the **anndata** v0.8 H5AD format to the the native R
157 | writer (PR #86 @jackkamm, fixes #78)
158 |
159 | ## zellkonverter 1.9.0 (2022-11-02)
160 |
161 | * Bioconductor 3.17 devel
162 |
163 | # zellkonverter 1.8.0 (2022-11-02)
164 |
165 | * Bioconductor 3.16, November 2022
166 |
167 | ## zellkonverter 1.7.8 (2022-10-04)
168 |
169 | * Improve compatibility with the R **{anndata}** package (PR #76 @rcannood,
170 | fixes #75)
171 | * Python objects are now explicitly converted rather than relying on automatic
172 | conversion
173 | * Other minor modifications for compatibility
174 | * Added support for **numpy** recarrays (dtype number 20) (PR #81, fixes #45,
175 | #28)
176 | * Added a new `py_to_r.numpy.ndarray()` function which extends the default
177 | **{reticulate}** function
178 | * Improvements to warnings
179 | * Improvements and updates to tests
180 |
181 | ## zellkonverter 1.7.7 (2022-10-04)
182 |
183 | * Pin **python** version to 3.7.10 in **anndata** v0.7.6 environment (3.7.12
184 | was not compatible with other dependencies)
185 |
186 | ## zellkonverter 1.7.6 (2022-09-29)
187 |
188 | * Pin **python** version to 3.7.12 in **anndata** v0.7.6 environment to match
189 | **{basilisk}** changes
190 |
191 | ## zellkonverter 1.7.5 (2022-09-13)
192 |
193 | * Minor changes for compatibility with **{cli}** v3.4.0
194 | * Added tests for `verbose=TRUE`
195 |
196 | ## zellkonverter 1.7.4 (2022-08-17)
197 |
198 | * Minor changes for compatibility with the upcoming **{Matrix}** 1.4-2 release
199 |
200 | ## zellkonverter 1.7.3 (2022-06-23)
201 |
202 | * Move verbose from `zellkonverterAnnDataEnv()` (Fixes #66)
203 |
204 | ## zellkonverter 1.7.2 (2022-06-09)
205 |
206 | * Instantiate environments for `basilisk::configureBasiliskEnv()` (Fixes #66)
207 | * Allow missing obs/var names when `use_hdf5 = TRUE` (Fixes #65)
208 |
209 | ## zellkonverter 1.7.1 (2022-05-17)
210 |
211 | * Fix bug in long tests
212 |
213 | ## zellkonverter 1.7.0 (2022-04-27)
214 |
215 | * Bioconductor 3.16 devel
216 |
217 | # zellkonverter 1.6.0 (2022-04-27)
218 |
219 | * Bioconductor 3.15, April 2022
220 |
221 | ## zellkonverter 1.6.5 (2022-09-13)
222 |
223 | * Minor changes for compatibility with **{cli}** v3.4.0
224 | * Added tests for `verbose=TRUE`
225 |
226 | ## zellkonverter 1.6.4 (2022-08-17)
227 |
228 | * Minor changes for compatibility with the upcoming **{Matrix}** 1.4-2 release
229 |
230 | ## zellkonverter 1.6.3 (2022-06-23)
231 |
232 | * Move verbose from `zellkonverterAnnDataEnv()` (Fixes #66)
233 |
234 | ## zellkonverter 1.6.2 (2022-06-09)
235 |
236 | * Instantiate environments for `basilisk::configureBasiliskEnv()` (Fixes #66)
237 | * Allow missing obs/var names when `use_hdf5 = TRUE` (Fixes #65)
238 |
239 | ## zellkonverter 1.6.1 (2022-05-17)
240 |
241 | * Fix bug in long tests
242 |
243 | ## zellkonverter 1.5.4 (2022-04-25)
244 |
245 | * Fix progress messages in `.convert_anndata_df()`
246 | * Allow `data.frames` in `varm` in `SCE2AnnData()`
247 | * Standardise `uns` names to match R conventions in `AnnData2SCE()`
248 | * Adjust long tests
249 |
250 | ## zellkonverter 1.5.3 (2022-04-19)
251 |
252 | * Reduce **scipy** version to 1.7.3
253 | * **scipy** >= 1.8.0 is incompatible with **{reticulate}** <= 1.24 (see
254 | https://github.com/rstudio/reticulate/pull/1173)
255 | * Add GTEX 8 tissues dataset to long tests (see #58)
256 |
257 | ## zellkonverter 1.5.2 (2022-04-17)
258 |
259 | * Update the default Python environment to use **anndata** v0.8.0
260 | * **anndata** 0.8.0
261 | * **h5py** 3.6.0
262 | * **hdf5** 1.12.1
263 | * **natsort** 8.1.0
264 | * **numpy** 1.22.3
265 | * **packaging** 21.3
266 | * **pandas** 1.4.2
267 | * **python** 3.8.13
268 | * **scipy** 1.8.0
269 | * **sqlite** 3.38.2
270 | * Add options to choose Python environments with different versions of
271 | **anndata**
272 | * To facilitate this `zellkonverterAnnDataEnv()` and `AnnDataDependencies()`
273 | are new functions rather than variables
274 | * Added a new `.AnnDataVersions` variable which stores the available
275 | **anndata** versions
276 | * Updates to the vignette and function documentation explaining this option
277 |
278 | ## zellkonverter 1.5.1 (2022-03-21)
279 |
280 | * Modify how Pandas DataFrames are converted to R
281 | * Columns should now use R approved names with a warning when changes are
282 | made
283 |
284 | ## zellkonverter 1.5.0 (2021-10-27)
285 |
286 | * Bioconductor 3.15 devel
287 |
288 | # zellkonverter 1.4.0 (2021-10-27)
289 |
290 | * Bioconductor 3.14, October 2021
291 |
292 | ## zellkonverter 1.3.3 (2021-10-20)
293 |
294 | * Add progress messages to various functions
295 | * Can be controlled by function arguments or a global variable
296 | * Split `konverter.R` into two files (`AnnData2SCE.R` and `SCE2AnnData.R`)
297 | * Add arguments to control how slots are converted in `AnnData2SCE()` and
298 | `SCE2AnnData()` (Fixes #47)
299 | * Each slot can now be fully converted, skipped entirely or only selected
300 | items converted.
301 | * Add support for converting the `raw` slot to an `altExp` in `AnnData2SCE()`
302 | (Fixes #53, fixes #57)
303 |
304 | ## zellkonverter 1.3.2 (2021-09-09)
305 |
306 | * Add recursive conversion of lists in `AnnData2SCE()`
307 | * Correctly handle `DataFrame` objects stored in `adata.obsm`
308 | * Remove **pandas** indexes from converted `DataFrame` objects
309 | * Add functions for validating `SingleCellExperiment` objects (for testing)
310 | * Add long tests for various public datasets
311 |
312 | ## zellkonverter 1.3.1 (2021-06-22)
313 |
314 | * Fix bug in converting `dgRMatrix` sparse matrices (Fixes #55)
315 |
316 | ## zellkonverter 1.3.0 (2021-05-20)
317 |
318 | * Bioconductor 3.14 devel
319 |
320 | # zellkonverter 1.2.0 (2021-05-20)
321 |
322 | * Bioconductor 3.13, May 2021
323 |
324 | ## zellkonverter 1.2.1 (2021-06-22)
325 |
326 | * Fix bug in converting `dgRMatrix` sparse matrices (Fixes #55)
327 |
328 | ## zellkonverter 1.1.11 (2021-05-19)
329 |
330 | * Add experimental native R reader to `readH5AD()`
331 |
332 | ## zellkonverter 1.1.10 (2021-05-18)
333 |
334 | * Update NEWS for release
335 |
336 | ## zellkonverter 1.1.9 (2021-05-12)
337 |
338 | * `AnnData2SCE()` no longer returns `dgRMatrix` sparse matrices (Fixes #34)
339 |
340 | ## zellkonverter 1.1.8 (2021-05-03)
341 |
342 | * Add conversion checks to all slots in `AnnData2SCE()` (See #45)
343 | * Enable return conversion of `varm` in `SCE2AnnData()` (Fixes #43)
344 | * Store `X_name` in `AnnData2SCE()` for use by `SCE2AnnData()` and add an
345 | `X_name` argument to `AnnData2SCE()` and `readH5AD()` (Fixes #7)
346 |
347 | ## zellkonverter 1.1.7 (2021-04-30)
348 |
349 | * Add `compression` argument to `writeH5AD()` (Fixes #49)
350 | * Update **anndata** Python dependencies, now using **anndata** v0.7.6
351 |
352 | ## zellkonverter 1.1.6 (2021-04-27)
353 |
354 | * Adapt to changes in `HDF5Array::HDF5Array()`
355 |
356 | ## zellkonverter 1.1.5 (2021-03-05)
357 |
358 | * Better support for **anndata** `SparseDataset` arrays (PR #41, Fixes #37,
359 | Fixes #42)
360 | * More consistent conversion of `metadata` to `uns` in `SCE2AnnData()`
361 | (Fixes #40)
362 | * Add handling of list columns in `colData` and `rowData` in `SCE2AnnData()`
363 | (Fixes #26)
364 | * Export `zellkonverterAnnDataEnv` (Fixes #38)
365 |
366 | ## zellkonverter 1.1.4 (2021-02-18)
367 |
368 | * Handle writing **DelayedArray** assays on the R side in `writeH5AD()`
369 | (PR #35, Fixes #32)
370 |
371 | ## zellkonverter 1.1.3 (2021-01-22)
372 |
373 | * Adjust `SCE2AnnData()` example (Fixes #31)
374 |
375 | ## zellkonverter 1.1.2 (2020-12-19)
376 |
377 | * Improved support for HDF5 backed conversion (PR #29, fixes #13)
378 |
379 | ## zellkonverter 1.1.1 (2020-12-03)
380 |
381 | * Add `example_anndata.h5ad` file to `inst/extdata/` and creation script to `inst/scripts/`
382 | * Improve conversion checks when converting `.uns` to `metadata`
383 | * Avoid converting `obsp` and `varp` to dense matrices
384 |
385 | ## zellkonverter 1.1.0 (2020-10-28)
386 |
387 | * Bioconductor 3.13 devel
388 |
389 | # zellkonverter 1.0.0 (2020-10-28)
390 |
391 | * Bioconductor 3.12, October 2020
392 |
393 | ## zellkonverter 1.0.3 (2021-03-08)
394 |
395 | * Avoid converting `obsp` and `varp` to dense matrices
396 |
397 | ## zellkonverter 1.0.2 (2021-01-28)
398 |
399 | * Merge remaining commits for HDF5 conversion (fixes #33)
400 |
401 | ## zellkonverter 1.0.1 (2021-01-26)
402 |
403 | * Improved support for HDF5 backed conversion (PR #29, fixes #13, fixes #33)
404 |
405 | ## zellkonverter 0.99.7 (2020-10-16)
406 |
407 | * Update Python dependencies
408 | * **numpy** 1.18.5 -> 1.19.1
409 | * **pandas** 1.0.4 -> 1.1.2
410 | * **scipy** 1.4.1 -> 1.5.2
411 | * **sqlite** 3.30.1 -> 3.33.0
412 |
413 | ## zellkonverter 0.99.6 (2020-10-12)
414 |
415 | * Document character to factor coercion in `writeH5ad()` (Fixes #6)
416 | * Add `X_name` argument to `writeH5AD()` (Fixes #23)
417 |
418 | ## zellkonverter 0.99.5 (2020-09-28)
419 |
420 | * Tidy NEWS files for Bioconductor release
421 |
422 | ## zellkonverter 0.99.4 (2020-08-28)
423 |
424 | * Bump anndata version to 0.7.4
425 |
426 | ## zellkonverter 0.99.3 (2020-08-21)
427 |
428 | * Document the `krumsiek11.h5ad` file
429 | * Remove the `internal` keyword from the `zellkonverter-package` documentation
430 |
431 | ## zellkonverter 0.99.2 (2020-08-21)
432 |
433 | * Update `.gitignore`
434 |
435 | ## zellkonverter 0.99.1 (2020-07-15)
436 |
437 | * Fix SCE to AnnData map figure in PDF manual
438 | * Use `expect_equal()` instead of `expect_identical()` in `writeH5AD()` sparse
439 | matrices test
440 | * Edit package title and description
441 |
442 | ## zellkonverter 0.99.0 (2020-07-10)
443 |
444 | * Initial Bioconductor submission
445 |
446 | # zellkonverter 0.0.0 (early development version)
447 |
448 | ## zellkonverter 0.0.0.9017 (2020-07-10)
449 |
450 | * Add biocViews to DESCRIPTION
451 | * Edit package description
452 | * Tidy code
453 | * Replace 1:... with `seq_len()`
454 |
455 | ## zellkonverter 0.0.0.9016 (2020-07-10)
456 |
457 | * Add check for **scRNAseq** in examples (Fixes #18)
458 |
459 | ## zellkonverter 0.0.0.9015 (2020-07-02)
460 |
461 | * Skip `AnnData` matrices without a transposable R counterpart
462 | * Only replace skipped matrices when `use_hdf5 = TRUE` in `readH5AD()`
463 | (Fixes #12)
464 | * Additional tests for sparse matrices
465 |
466 | ## zellkonverter 0.0.0.9014 (2020-06-30)
467 |
468 | * Allow assay skipping when converting from `SingleCellExperiment` to `AnnData`
469 | * Allow skipping of assays that aren't **numpy** friendly in `writeH5AD()`
470 | * Wait for **basilisk** process shutdown to release `.h5ad` file
471 | * Updates to documentation and tests
472 |
473 | ## zellkonverter 0.0.0.9013 (2020-06-25)
474 |
475 | * Improve conversion between `SingleCellExperiment` and `AnnData` (See #8)
476 | * Convert between `metadata` and `uns` (where objects are compatible)
477 | * Convert between `rowPairs` and `varp`
478 | * Convert between `colPairs` and `obsp`
479 | * Convert from `varm` to `rowData` (but not in reverse)
480 | * Add mapping table to docs
481 |
482 | ## zellkonverter 0.0.0.9012 (2020-06-19)
483 |
484 | * Tidy documentation and code
485 | * Tidy vignette
486 |
487 | ## zellkonverter 0.0.0.9011 (2020-06-18)
488 |
489 | * Support for HDF5Array outputs in `readH5AD()` (Fixes #4)
490 |
491 | ## zellkonverter 0.0.0.9010 (2020-06-17)
492 |
493 | * Avoid checking column names for `colData` and `rowData` in `SCE2AnnData()`
494 | * Make sure that all matrices passes to **{reticulate}** are **numpy** friendly
495 | * Add more tests
496 | * Update vignette front matter
497 |
498 | ## zellkonverter 0.0.0.9009 (2020-06-15)
499 |
500 | * Add vignette
501 |
502 | ## zellkonverter 0.0.0.9008 (2020-06-12)
503 |
504 | * Add examples and improve documentation
505 | * Export `.AnnDataDependencies` for external use
506 |
507 | ## zellkonverter 0.0.0.9007 (2020-06-11)
508 |
509 | * Add `SCE2AnnData()` function
510 | * Add `writeH5AD()` function
511 |
512 | ## zellkonverter 0.0.0.9006 (2020-06-11)
513 |
514 | * Use internal function in `readH5AD()`
515 |
516 | ## zellkonverter 0.0.0.9005 (2020-06-09)
517 |
518 | * Rename `adata2SCE()` to `AnnData2SCE()`
519 | * Remove **{basilisk}** context from `AnnData2SCE()` (See #1)
520 | * Now uses the calling context
521 |
522 | ## zellkonverter 0.0.0.9004 (2020-06-09)
523 |
524 | * Pin more **AnnData** dependencies (See #1)
525 |
526 | ## zellkonverter 0.0.0.9003 (2020-06-08)
527 |
528 | * Add test `.h5ad` file
529 | * Add test for `readH5AD()`
530 | * Add package man page
531 |
532 | ## zellkonverter 0.0.0.9002 (2020-06-08)
533 |
534 | * Add `adata2SCE()` function
535 | * Add `readH5AD()` function
536 |
537 | ## zellkonverter 0.0.0.9001 (2020-06-08)
538 |
539 | * Add **{basilisk}** infrastructure
540 |
541 | ## zellkonverter 0.0.0.9000 (2020-06-08)
542 |
543 | * Set up package
544 |
--------------------------------------------------------------------------------
/R/SCE2AnnData.R:
--------------------------------------------------------------------------------
1 | #' @rdname AnnData-Conversion
2 | #'
3 | #' @param sce A \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment}
4 | #' object.
5 | #' @param X_name For `SCE2AnnData()` name of the assay to use as the primary
6 | #' matrix (`X`) of the AnnData object. If `NULL`, the first assay of `sce` will
7 | #' be used by default. For `AnnData2SCE()` name used when saving `X` as an
8 | #' assay. If `NULL` looks for an `X_name` value in `uns`, otherwise uses `"X"`.
9 | #' @param assays,colData,rowData,reducedDims,metadata,colPairs,rowPairs
10 | #' Arguments specifying how these slots are converted. If `TRUE` everything in
11 | #' that slot is converted, if `FALSE` nothing is converted and if a character
12 | #' vector only those items or columns are converted.
13 | #' @param verbose Logical scalar indicating whether to print progress messages.
14 | #' If `NULL` uses `getOption("zellkonverter.verbose")`.
15 | #'
16 | #' @export
17 | #' @importFrom utils capture.output
18 | #' @importFrom S4Vectors metadata make_zero_col_DFrame
19 | #' @importFrom reticulate import r_to_py py_to_r
20 | SCE2AnnData <- function(sce, X_name = NULL, assays = TRUE, colData = TRUE,
21 | rowData = TRUE, varm = TRUE, reducedDims = TRUE,
22 | metadata = TRUE, colPairs = TRUE, rowPairs = TRUE,
23 | skip_assays = FALSE, verbose = NULL) {
24 | anndata <- import("anndata")
25 |
26 | # Create a list to store parts of the AnnData
27 | adata_list <- list()
28 |
29 | .ui_process(
30 | "Converting {.field AnnData} to {.field SingleCellExperiment}"
31 | )
32 |
33 | if (is.null(X_name)) {
34 | .ui_step(
35 | "Selecting {.field X matrix}",
36 | msg_done = "Selected {.field X matrix}"
37 | )
38 | if (length(assays(sce)) == 0) {
39 | stop("'sce' does not contain any assays")
40 | }
41 | X_name <- assayNames(sce)[1]
42 | cli::cli_alert_info(
43 | "Using the {.field '{X_name}'} assay as the {.field X matrix}"
44 | )
45 | cli::cli_progress_done()
46 | }
47 |
48 | .ui_step(
49 | "Converting {.field assays${X_name}} to {.field X matrix}",
50 | msg_done = "{.field assays${X_name}} converted to {.field X matrix}"
51 | )
52 | if (!skip_assays) {
53 | X <- assay(sce, X_name)
54 | X <- .makeNumpyFriendly(X)
55 | } else {
56 | cli::cli_alert_warning(paste(
57 | "{.field skip_assays} is {.field TRUE}",
58 | "so {.field X/layers} will be empty"
59 | ))
60 | X <- fake_mat <- .make_fake_mat(rev(dim(sce)))
61 | }
62 | # NOTE: Previously dtype was set here but was removed due to deprecation
63 | adata_list$X <- reticulate::r_to_py(X)
64 | cli::cli_progress_done()
65 |
66 | assay_names <- assayNames(sce)
67 | assay_names <- assay_names[!assay_names == X_name]
68 | if (isFALSE(assays)) {
69 | .ui_info("Skipping conversion of {.field assays}")
70 | } else if (length(assay_names) == 0) {
71 | .ui_info("No {.field additional assays} present, assays were skipped")
72 | } else {
73 | .ui_step(
74 | "Converting {.field additional assays} to {.field layers}",
75 | msg_done = "{.field additional assays} converted to {.field layers}"
76 | )
77 | if (is.character(assays)) {
78 | if (!all(assays %in% assay_names)) {
79 | missing <- assays[!c(assays %in% assay_names)]
80 | .ui_warn(
81 | "These selected assays are not in the object: {.field {missing}}"
82 | )
83 | }
84 | assay_names <- assay_names[assay_names %in% assays]
85 | }
86 | if (!skip_assays) {
87 | assays_list <- assays(sce, withDimnames = FALSE)
88 | assays_list <- lapply(assays_list[assay_names], .makeNumpyFriendly)
89 | } else {
90 | assays_list <- rep(list(fake_mat), length(assay_names))
91 | names(assays_list) <- assay_names
92 | }
93 | adata_list$layers <- assays_list
94 | cli::cli_progress_done()
95 | }
96 |
97 | if (isFALSE(colData)) {
98 | .ui_info("Skipping conversion of {.field colData}")
99 | } else {
100 | sce <- .store_non_atomic(sce, "colData")
101 | adata_list$obs <- .convert_sce_df(colData(sce), "colData", "obs", select = colData)
102 | }
103 |
104 | if (is.null(adata_list$obs)) {
105 | # Add a dummy data.frame if obs is currently empty
106 | adata_list$obs <- as.data.frame(make_zero_col_DFrame(ncol(sce)))
107 | }
108 |
109 | # Convert to python now because python DFs can have duplicates in
110 | # their index
111 | adata_list$obs <- r_to_py(adata_list$obs)
112 | if (!is.null(colnames(sce))) {
113 | adata_list$obs$index <- colnames(sce)
114 | } else if (ncol(adata_list$obs) == 0) {
115 | # If there are no colnames and obs has no columns delete it
116 | adata_list$obs <- NULL
117 | } else {
118 | # Otherwise convert the index to string
119 | adata_list$obs$index <- adata_list$obs$index$astype("str")
120 | }
121 |
122 | if (!is.null(int_metadata(sce)$has_varm)) {
123 | varm_list <- as.list(rowData(sce)[["varm"]])
124 | rowData(sce)[["varm"]] <- NULL
125 |
126 | if (isFALSE(varm)) {
127 | .ui_info("Skipping conversion of {.field rowData$varm}")
128 | } else {
129 | .ui_step(
130 | "Converting {.field rowData$varm} to {.field varm}",
131 | msg_done = "{.field rowData$varm} converted to {.field varm}"
132 | )
133 |
134 | if (is.character(varm)) {
135 | varm <- .check_select(varm, "rowData$varm", names(varm_list))
136 | varm_list <- varm_list[varm]
137 | }
138 |
139 | adata_list$varm <- varm_list
140 | cli::cli_progress_done()
141 | }
142 | } else {
143 | .ui_info("{.field rowData$varm} is empty and was skipped")
144 | }
145 |
146 | if (isFALSE(rowData)) {
147 | .ui_info("Skipping conversion of {.field rowData}")
148 | } else {
149 | sce <- .store_non_atomic(sce, "rowData")
150 | adata_list$var <- .convert_sce_df(rowData(sce), "rowData", "var",
151 | select = rowData
152 | )
153 | }
154 |
155 | if (is.null(adata_list$var)) {
156 | # Add a dummy data.frame if var is currently empty
157 | adata_list$var <- as.data.frame(make_zero_col_DFrame(nrow(sce)))
158 | }
159 |
160 | # Convert to python now because python DFs can have duplicates in
161 | # their index
162 | adata_list$var <- r_to_py(adata_list$var)
163 | if (!is.null(rownames(sce))) {
164 | adata_list$var$index <- rownames(sce)
165 | } else if (ncol(adata_list$var) == 0) {
166 | # If there are no rownames and var has no columns delete it
167 | adata_list$var <- NULL
168 | } else {
169 | # Otherwise convert the index to string
170 | adata_list$var$index <- adata_list$var$index$astype("str")
171 | }
172 |
173 | if (inherits(sce, "SpatialExperiment")) {
174 | .ui_info("Adding {.field spatialCoords} to {.field reducedDim}")
175 | coords <- SpatialExperiment::spatialCoords(sce)
176 | if (ncol(coords) > 1) {
177 | SingleCellExperiment::reducedDim(sce, "spatial") <- coords
178 | }
179 | }
180 |
181 | if (isFALSE(reducedDims)) {
182 | .ui_info("Skipping conversion of {.field reducedDims}")
183 | } else if (length(reducedDims(sce)) == 0) {
184 | .ui_info("{.field reducedDims} is empty and was skipped")
185 | } else {
186 | .ui_step(
187 | "Converting {.field reducedDims} to {.field obsm}",
188 | msg_done = "{.field reducedDims} converted to {.field obsm}"
189 | )
190 | red_dims <- as.list(reducedDims(sce))
191 | if (is.character(reducedDims)) {
192 | reducedDims <- .check_select(
193 | reducedDims, "reducedDims", names(red_dims)
194 | )
195 | red_dims <- red_dims[reducedDims]
196 | }
197 | red_dims <- lapply(red_dims, .makeNumpyFriendly, transpose = FALSE)
198 | red_dims <- lapply(red_dims, function(rd) {
199 | if (!is.null(colnames(rd))) {
200 | rd <- r_to_py(as.data.frame(rd))
201 | if (!is.null(adata_list$obs)) {
202 | rd <- rd$set_axis(adata_list$obs$index)
203 | }
204 | }
205 |
206 | rd
207 | })
208 | adata_list$obsm <- red_dims
209 | cli::cli_progress_done()
210 | }
211 |
212 | uns_list <- list()
213 | uns_list[["X_name"]] <- X_name
214 | if (isFALSE(metadata)) {
215 | .ui_info("Skipping conversion of {.field metadata}")
216 | } else if (length(metadata(sce)) == 0) {
217 | .ui_info("{.field metadata} is empty and was skipped")
218 | } else {
219 | .ui_step(
220 | "Converting {.field metadata} to {.field uns}",
221 | msg_done = "{.field metadata} converted to {.field uns}"
222 | )
223 | meta_list <- .addListNames(metadata(sce))
224 | if (is.character(metadata)) {
225 | metadata <- .check_select(metadata, "metadata", names(meta_list))
226 | meta_list <- meta_list[metadata]
227 | }
228 | for (item_name in names(meta_list)) {
229 | item <- meta_list[[item_name]]
230 | tryCatch(
231 | {
232 | # Try to convert the item using reticulate, skip if it fails
233 | # Capture the object output printed by reticulate
234 | capture.output(r_to_py(item))
235 | uns_list[[item_name]] <- item
236 | },
237 | error = function(err) {
238 | .ui_warn(paste(
239 | "The {.field {item_name}} item in {.field metadata}",
240 | "cannot be converted to a Python type and has been",
241 | "skipped"
242 | ))
243 | }
244 | )
245 | }
246 | cli::cli_progress_done()
247 | }
248 | adata_list$uns <- r_to_py(uns_list)
249 |
250 | if (length(rowPairs(sce)) > 0) {
251 | .ui_step(
252 | "Converting {.field rowPairs} to {.field varp}",
253 | msg_done = "{.field rowPairs} converted to {.field varp}"
254 | )
255 | adata_list$varp <- as.list(rowPairs(sce, asSparse = TRUE))
256 | cli::cli_progress_done()
257 | } else {
258 | .ui_info("{.field rowPairs} is empty and was skipped")
259 | }
260 |
261 | adata_list$obsp <- .convert_sce_pairs(sce, "colPairs", "obsp", colPairs)
262 | adata_list$varp <- .convert_sce_pairs(sce, "rowPairs", "varp", rowPairs)
263 |
264 | do.call(anndata$AnnData, adata_list)
265 | }
266 |
267 | #' @importFrom methods as is
268 | #' @importClassesFrom Matrix CsparseMatrix
269 | #' @importFrom DelayedArray is_sparse
270 | #' @importFrom Matrix t
271 | # Original code from Charlotte Soneson in kevinrue/velociraptor
272 | .makeNumpyFriendly <- function(x, transpose = TRUE) {
273 | if (transpose) {
274 | x <- t(x)
275 | }
276 |
277 | if (is_sparse(x)) {
278 | x <- as(x, "CsparseMatrix")
279 | if (transpose) {
280 | x <- as(x, "RsparseMatrix")
281 | }
282 | x
283 | } else {
284 | as.matrix(x)
285 | }
286 | }
287 |
288 | .addListNames <- function(x) {
289 | if (length(x) == 0) {
290 | return(x)
291 | }
292 |
293 | if (is.null(names(x))) {
294 | names(x) <- paste0("item", seq_along(x))
295 | return(x)
296 | }
297 |
298 | list_names <- names(x)
299 | is_empty <- list_names == ""
300 | list_names[is_empty] <- paste0("item", seq_along(x)[is_empty])
301 | list_names <- make.names(list_names, unique = TRUE)
302 |
303 | names(x) <- list_names
304 |
305 | return(x)
306 | }
307 |
308 | .store_non_atomic <- function(sce, slot = c("rowData", "colData")) {
309 | slot <- match.arg(slot)
310 |
311 | df <- switch(slot,
312 | rowData = rowData(sce),
313 | colData = colData(sce)
314 | )
315 |
316 | is_atomic <- vapply(df, is.atomic, NA)
317 |
318 | if (all(is_atomic)) {
319 | return(sce)
320 | }
321 |
322 | non_atomic_cols <- colnames(df)[!is_atomic]
323 | .ui_warn(paste(
324 | "The following {.field {slot}} columns are not atomic and will be",
325 | "stored in {.field metadata(sce)$.colData} before conversion:",
326 | "{.val {non_atomic_cols}}"
327 | ))
328 |
329 | meta_slot <- paste0(".", slot)
330 | if (meta_slot %in% names(metadata(sce))) {
331 | meta_list <- metadata(sce)[[meta_slot]]
332 | } else {
333 | meta_list <- list()
334 | }
335 |
336 | for (col in non_atomic_cols) {
337 | store_name <- make.names(c(col, names(meta_list)), unique = TRUE)[1]
338 | meta_list[[store_name]] <- df[[col]]
339 | }
340 |
341 | df[non_atomic_cols] <- NULL
342 | metadata(sce)[[meta_slot]] <- meta_list
343 |
344 | if (slot == "rowData") {
345 | rowData(sce) <- df
346 | } else {
347 | colData(sce) <- df
348 | }
349 |
350 | return(sce)
351 | }
352 |
353 | .check_select <- function(select, slot_name, options) {
354 | verbose <- parent.frame()$verbose
355 |
356 | if (!all(select %in% options)) {
357 | missing <- select[!c(select %in% options)]
358 | .ui_warn(paste(
359 | "These selected {.field {slot_name}} items are not in the",
360 | "object: {.field {missing}}"
361 | ))
362 | }
363 |
364 | select <- select[select %in% options]
365 |
366 | return(select)
367 | }
368 |
369 | .convert_sce_df <- function(sce_df, slot_name, to_name, select = TRUE) {
370 | if (ncol(sce_df) == 0) {
371 | .ui_info("{.field {slot_name}} is empty and was skipped")
372 | return(NULL)
373 | }
374 |
375 | .ui_step(
376 | "Converting {.field {slot_name}} to {.field {to_name}}",
377 | msg_done = "{.field {slot_name}} converted to {.field {to_name}}"
378 | )
379 | if (is.character(select)) {
380 | select <- .check_select(select, slot_name, colnames(sce_df))
381 |
382 | if (length(select) == 0) {
383 | return(NULL)
384 | }
385 |
386 | df <- sce_df[, select, drop = FALSE]
387 | } else {
388 | df <- sce_df
389 | }
390 |
391 | df <- do.call(
392 | data.frame,
393 | c(
394 | as.list(df),
395 | check.names = FALSE,
396 | stringsAsFactors = FALSE
397 | )
398 | )
399 | cli::cli_progress_done()
400 |
401 | return(df)
402 | }
403 |
404 | .convert_sce_pairs <- function(sce, slot_name = c("rowPairs", "colPairs"),
405 | to_name, select) {
406 | slot_name <- match.arg(slot_name)
407 |
408 |
409 | if (isFALSE(select)) {
410 | .ui_info("Skipping conversion of {.field {slot_name}}")
411 | return(NULL)
412 | }
413 |
414 | pairs <- switch(slot_name,
415 | rowPairs = as.list(rowPairs(sce, asSparse = TRUE)),
416 | colPairs = as.list(colPairs(sce, asSparse = TRUE))
417 | )
418 |
419 | if (length(pairs) == 0) {
420 | .ui_info("{.field {slot_name}} is empty and was skipped")
421 | return(NULL)
422 | }
423 |
424 | .ui_step(
425 | "Converting {.field {slot_name}} to {.field {to_name}}",
426 | msg_done = "{.field {slot_name}} converted to {.field {to_name}}"
427 | )
428 |
429 | if (is.character(select)) {
430 | select <- .check_select(select, slot_name, names(pairs))
431 | pairs <- pairs[select]
432 | }
433 | cli::cli_progress_done()
434 |
435 | return(pairs)
436 | }
437 |
--------------------------------------------------------------------------------
/R/basilisk.R:
--------------------------------------------------------------------------------
1 | #' AnnData environment
2 | #'
3 | #' The Python environment used by **zellkonverter** for interfacing with the
4 | #' **anndata** Python library (and H5AD files) is described by the dependencies
5 | #' in returned by `AnnDataDependencies()`. The `zellkonverterAnnDataEnv()`
6 | #' functions returns the [basilisk::BasiliskEnvironment()] containing these
7 | #' dependencies used by **zellkonverter**. Allowed versions of **anndata** are
8 | #' available in `.AnnDataVersions`.
9 | #'
10 | #' @details
11 | #'
12 | #' ## Using Python environments
13 | #'
14 | #' When a **zellkonverter** is first run a conda environment containing all of
15 | #' the necessary dependencies for that version with be instantiated. This will
16 | #' not be performed on any subsequent run or if any other **zellkonverter**
17 | #' function has been run prior with the same environment version.
18 | #'
19 | #' By default the **zellkonverter** conda environment will become the shared R
20 | #' Python environment if one does not already exist. When one does exist (for
21 | #' example when a **zellkonverter** function has already been run using a
22 | #' a different environment version) then a separate environment will be used.
23 | #' See [basilisk::setBasiliskShared()] for more information on this behaviour.
24 | #' Note the when the environment is not shared progress messages are lost.
25 | #'
26 | #' ## Development
27 | #'
28 | #' The `AnnDataDependencies()` function is exposed for use by other package
29 | #' developers who want an easy way to define the dependencies required for
30 | #' creating a Python environment to work with AnnData objects, most typically
31 | #' within a **basilisk** context. For example, we can simply combine this
32 | #' vector with additional dependencies to create a **basilisk** environment with
33 | #' Python package versions that are consistent with those in **zellkonverter**.
34 | #'
35 | #' If you want to run code in the exact environment used by **zellkonverter**
36 | #' this can be done using `zellkonverterAnnDataEnv()` in combination with
37 | #' [basilisk::basiliskStart()] and/or [basilisk::basiliskRun()]. Please refer to
38 | #' the **basilisk** documentation for more information on using these
39 | #' environments.
40 | #'
41 | #' @author Luke Zappia
42 | #' @author Aaron Lun
43 | #'
44 | #' @examples
45 | #' .AnnDataVersions
46 | #'
47 | #' AnnDataDependencies()
48 | #' AnnDataDependencies(version = "0.7.6")
49 | #'
50 | #' cl <- basilisk::basiliskStart(zellkonverterAnnDataEnv())
51 | #' anndata <- reticulate::import("anndata")
52 | #' basilisk::basiliskStop(cl)
53 | #' @name AnnData-Environment
54 | #' @rdname AnnData-Environment
55 | NULL
56 |
57 | #' @rdname AnnData-Environment
58 | #'
59 | #' @format
60 | #' For `.AnnDataVersions` a character vector containing allowed **anndata**
61 | #' version strings.
62 | #'
63 | #' @export
64 | .AnnDataVersions <- c("0.11.4", "0.10.9", "0.10.6", "0.10.2", "0.9.2", "0.8.0", "0.7.6")
65 |
66 | #' @rdname AnnData-Environment
67 | #'
68 | #' @param version A string giving the version of the **anndata** Python library
69 | #' to use. Allowed values are available in `.AnnDataVersions`. By default the
70 | #' latest version is used.
71 | #'
72 | #' @returns
73 | #' For `AnnDataDependencies` a character vector containing the pinned versions
74 | #' of all Python packages to be used by `zellkonverterAnnDataEnv()`.
75 | #'
76 | #' @export
77 | AnnDataDependencies <- function(version = .AnnDataVersions) {
78 | version <- match.arg(version)
79 |
80 | switch(version,
81 | "0.7.6" = c(
82 | "anndata==0.7.6",
83 | "h5py==3.2.1",
84 | "hdf5==1.10.6",
85 | "natsort==7.1.1",
86 | "numpy==1.20.2",
87 | "packaging==20.9",
88 | "pandas==1.2.4",
89 | "python==3.7.10",
90 | "scipy==1.6.3",
91 | "sqlite==3.35.5"
92 | ),
93 | "0.8.0" = c(
94 | "anndata==0.8.0",
95 | "h5py==3.6.0",
96 | "hdf5==1.12.1",
97 | "natsort==8.1.0",
98 | "numpy==1.22.3",
99 | "packaging==21.3",
100 | "pandas==1.4.2",
101 | "python==3.8.13",
102 | "scipy==1.7.3",
103 | "sqlite==3.38.2"
104 | ),
105 | "0.9.2" = c(
106 | "anndata==0.9.2",
107 | "h5py==3.9.0",
108 | "hdf5==1.14.2",
109 | "natsort==8.4.0",
110 | "numpy==1.26.0",
111 | "packaging==23.2",
112 | "pandas==2.1.1",
113 | "python==3.11.5",
114 | "scipy==1.11.3"
115 | ),
116 | "0.10.2" = c(
117 | "anndata==0.10.2",
118 | "h5py==3.10.0",
119 | "hdf5==1.14.2",
120 | "natsort==8.4.0",
121 | "numpy==1.26.0",
122 | "packaging==23.2",
123 | "pandas==2.1.1",
124 | "python==3.11.5",
125 | "scipy==1.11.3"
126 | ),
127 | "0.10.6" = c(
128 | "anndata==0.10.6",
129 | "h5py==3.10.0",
130 | "hdf5==1.14.3",
131 | "natsort==8.4.0",
132 | "numpy==1.26.4",
133 | "packaging==24.0",
134 | "pandas==2.2.1",
135 | "python==3.12.2",
136 | "scipy==1.12.0"
137 | ),
138 | "0.10.9" = c(
139 | "anndata==0.10.9",
140 | "h5py==3.12.1",
141 | "hdf5==1.14.3",
142 | "natsort==8.4.0",
143 | "numpy==2.1.2",
144 | "packaging==24.1",
145 | "pandas==2.2.3",
146 | "python==3.12.7",
147 | "scipy==1.14.1"
148 | ),
149 | "0.11.4" = c(
150 | "anndata==0.11.4",
151 | "h5py==3.13.0",
152 | "hdf5==1.14.3",
153 | "natsort==8.4.0",
154 | "numpy==2.2.4",
155 | "packaging==24.2",
156 | "pandas==2.2.3",
157 | "python==3.13.2",
158 | "scipy==1.15.2"
159 | )
160 | )
161 | }
162 |
163 | #' @rdname AnnData-Environment
164 | #'
165 | #' @return
166 | #' For `zellkonverterAnnDataEnv` a [basilisk::BasiliskEnvironment()] containing
167 | #' **zellkonverter**'s AnnData Python environment.
168 | #'
169 | #' @include ui.R
170 | #' @export
171 | zellkonverterAnnDataEnv <- function(version = .AnnDataVersions) {
172 | version <- match.arg(version)
173 |
174 | basilisk::BasiliskEnvironment(
175 | envname = paste0("zellkonverterAnnDataEnv-", version),
176 | pkgname = "zellkonverter",
177 | packages = AnnDataDependencies(version)
178 | )
179 | }
180 |
181 | # Instantiate environments so they can be found by
182 | # `basilisk::configureBasiliskEnv()` when `BASILISK_USE_SYSTEM_DIR=1`.
183 | # See https://github.com/theislab/zellkonverter/issues/66.
184 | anndata_env_0.7.6 <- zellkonverterAnnDataEnv(version = "0.7.6")
185 | anndata_env_0.8.0 <- zellkonverterAnnDataEnv(version = "0.8.0")
186 | anndata_env_0.9.2 <- zellkonverterAnnDataEnv(version = "0.9.2")
187 | anndata_env_0.10.2 <- zellkonverterAnnDataEnv(version = "0.10.2")
188 | anndata_env_0.10.6 <- zellkonverterAnnDataEnv(version = "0.10.6")
189 | anndata_env_0.10.9 <- zellkonverterAnnDataEnv(version = "0.10.9")
190 | anndata_env_0.11.4 <- zellkonverterAnnDataEnv(version = "0.11.4")
191 |
--------------------------------------------------------------------------------
/R/read.R:
--------------------------------------------------------------------------------
1 | #' Read H5AD
2 | #'
3 | #' Reads a H5AD file and returns a
4 | #' \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment}
5 | #' object.
6 | #'
7 | #' @param file String containing a path to a `.h5ad` file.
8 | #' @param X_name Name used when saving `X` as an assay. If `NULL` looks for an
9 | #' `X_name` value in `uns`, otherwise uses `"X"`.
10 | #' @param use_hdf5 Logical scalar indicating whether assays should be
11 | #' loaded as HDF5-based matrices from the **HDF5Array** package.
12 | #' @param reader Which HDF5 reader to use. Either `"python"` for reading with
13 | #' the **anndata** Python package via **reticulate** or `"R"` for
14 | #' **zellkonverter**'s native R reader.
15 | #' @param version A string giving the version of the **anndata** Python library
16 | #' to use. Allowed values are available in `.AnnDataVersions`. By default the
17 | #' latest version is used.
18 | #' @param verbose Logical scalar indicating whether to print progress messages.
19 | #' If `NULL` uses `getOption("zellkonverter.verbose")`.
20 | #' @inheritDotParams AnnData2SCE -adata -hdf5_backed
21 | #'
22 | #' @details
23 | #' Setting `use_hdf5 = TRUE` allows for very large datasets to be efficiently
24 | #' represented on machines with little memory. However, this comes at the cost
25 | #' of access speed as data needs to be fetched from the HDF5 file upon request.
26 | #'
27 | #' Setting `reader = "R"` will use an experimental native R reader instead of
28 | #' reading the file into Python and converting the result. This avoids the need
29 | #' for a Python environment and some of the issues with conversion but is still
30 | #' under development and is likely to return slightly different output.
31 | #'
32 | #' See [AnnData-Environment] for more details on **zellkonverter** Python
33 | #' environments.
34 | #'
35 | #' @return A \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment}
36 | #' object is returned.
37 | #'
38 | #' @examples
39 | #' library(SummarizedExperiment)
40 | #'
41 | #' file <- system.file("extdata", "krumsiek11.h5ad", package = "zellkonverter")
42 | #' sce <- readH5AD(file)
43 | #' class(assay(sce))
44 | #'
45 | #' sce2 <- readH5AD(file, use_hdf5 = TRUE)
46 | #' class(assay(sce2))
47 | #'
48 | #' sce3 <- readH5AD(file, reader = "R")
49 | #' @author Luke Zappia
50 | #' @author Aaron Lun
51 | #'
52 | #' @seealso
53 | #' [`writeH5AD()`], to write a
54 | #' \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment}
55 | #' object to a H5AD file.
56 | #'
57 | #' [`AnnData2SCE()`], for developers to convert existing AnnData instances to a
58 | #' \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment}.
59 | #'
60 | #' @export
61 | #' @importFrom basilisk basiliskRun
62 | #' @importFrom methods slot
63 | readH5AD <- function(file, X_name = NULL, use_hdf5 = FALSE,
64 | reader = c("python", "R"), version = NULL,
65 | verbose = NULL, ...) {
66 | file <- path.expand(file)
67 | reader <- match.arg(reader)
68 |
69 | if (reader == "python") {
70 | .ui_info("Using the {.field Python} reader")
71 | env <- zellkonverterAnnDataEnv(version)
72 | version <- gsub("zellkonverterAnnDataEnv-", "", slot(env, "envname"))
73 | .ui_info("Using {.field anndata} version {.field {version}}")
74 |
75 | sce <- basiliskRun(
76 | env = env,
77 | fun = .H5ADreader,
78 | testload = "anndata",
79 | file = file,
80 | X_name = X_name,
81 | backed = use_hdf5,
82 | verbose = verbose,
83 | ...
84 | )
85 | } else if (reader == "R") {
86 | sce <- .native_reader(file, backed = use_hdf5, verbose = verbose)
87 | }
88 |
89 | return(sce)
90 | }
91 |
92 | #' @importFrom reticulate import
93 | .H5ADreader <- function(file, X_name = NULL, backed = FALSE, verbose = NULL, ...) {
94 | anndata <- import("anndata")
95 | .ui_step(
96 | "Reading {.file { .trim_path(file)} }",
97 | msg_done = "Read {.file { .trim_path(file) }}",
98 | spinner = TRUE
99 | )
100 | adata <- anndata$read_h5ad(file, backed = if (backed) "r" else FALSE)
101 | cli::cli_progress_done()
102 |
103 | AnnData2SCE(
104 | adata,
105 | X_name = X_name, hdf5_backed = backed, verbose = verbose, ...
106 | )
107 | }
108 |
109 | #' @importFrom S4Vectors I DataFrame wmsg
110 | #' @importFrom SummarizedExperiment assays assays<- rowData colData rowData<- colData<-
111 | #' @importFrom SingleCellExperiment SingleCellExperiment reducedDims<- colPairs<- rowPairs<-
112 | .native_reader <- function(file, backed = FALSE, verbose = FALSE) {
113 | .ui_info("Using the {.field R} reader")
114 | .ui_step("Reading {.file {file}}", spinner = TRUE)
115 |
116 | contents <- .list_contents(file)
117 |
118 | all.assays <- list()
119 |
120 | # Let's read in the X matrix first... if it's there.
121 | if ("X" %in% names(contents)) {
122 | all.assays[["X"]] <- .read_matrix(file, "X", contents[["X"]], backed = backed)
123 | }
124 |
125 | for (layer in names(contents[["layers"]])) {
126 | tryCatch(
127 | {
128 | all.assays[[layer]] <- .read_matrix(
129 | file,
130 | file.path("layers", layer),
131 | contents[["layers"]][[layer]],
132 | backed = backed
133 | )
134 | },
135 | error = function(e) {
136 | warning(wmsg(
137 | "setting additional assays from 'layers' failed for '",
138 | file, "':\n ", conditionMessage(e)
139 | ))
140 | }
141 | )
142 | }
143 |
144 | sce <- SingleCellExperiment(all.assays)
145 |
146 | # Adding the various pieces of data.
147 | tryCatch(
148 | {
149 | col_data <- .read_dim_data(file, "obs", contents[["obs"]])
150 | if (!is.null(col_data)) {
151 | colData(sce) <- col_data
152 | }
153 | },
154 | error = function(e) {
155 | warning(wmsg(
156 | "setting 'colData' failed for '", file, "':\n ",
157 | conditionMessage(e)
158 | ))
159 | }
160 | )
161 |
162 | tryCatch(
163 | {
164 | row_data <- .read_dim_data(file, "var", contents[["var"]])
165 | if (!is.null(row_data)) {
166 | rowData(sce) <- row_data
167 | # Manually set SCE rownames, because setting rowData
168 | # doesn't seem to set them. (Even tho setting colData
169 | # does set the colnames)
170 | rownames(sce) <- rownames(row_data)
171 | }
172 | },
173 | error = function(e) {
174 | warning(wmsg(
175 | "setting 'rowData' failed for '", file, "':\n ",
176 | conditionMessage(e)
177 | ))
178 | }
179 | )
180 |
181 | # Adding the reduced dimensions and other bits and pieces.
182 | tryCatch(
183 | {
184 | reducedDims(sce) <- .read_dim_mats(file, "obsm", contents[["obsm"]])
185 | },
186 | error = function(e) {
187 | warning(wmsg(
188 | "setting 'reducedDims' failed for '", file, "':\n ",
189 | conditionMessage(e)
190 | ))
191 | }
192 | )
193 |
194 | tryCatch(
195 | {
196 | row_mat <- .read_dim_mats(file, "varm", contents[["varm"]])
197 | if (length(row_mat)) {
198 | row_mat_df <- do.call(DataFrame, lapply(row_mat, I))
199 | rowData(sce) <- cbind(rowData(sce), row_mat_df)
200 | }
201 | },
202 | error = function(e) {
203 | warning(wmsg(
204 | "extracting 'varm' failed for '", file, "':\n ",
205 | conditionMessage(e)
206 | ))
207 | }
208 | )
209 |
210 | # Adding pairings, if any exist.
211 | tryCatch(
212 | {
213 | rowPairs(sce) <- .read_dim_pairs(file, "varp", contents[["varp"]])
214 | },
215 | error = function(e) {
216 | warning(wmsg(
217 | "setting 'rowPairs' failed for '", file, "':\n ",
218 | conditionMessage(e)
219 | ))
220 | }
221 | )
222 |
223 | tryCatch(
224 | {
225 | colPairs(sce) <- .read_dim_pairs(file, "obsp", contents[["obsp"]])
226 | },
227 | error = function(e) {
228 | warning(wmsg(
229 | "setting 'colPairs' failed for '", file, "':\n ",
230 | conditionMessage(e)
231 | ))
232 | }
233 | )
234 |
235 | if ("uns" %in% names(contents)) {
236 | tryCatch(
237 | {
238 | uns <- rhdf5::h5read(file, "uns")
239 | uns <- .convert_element(
240 | uns, "uns", file,
241 | recursive = TRUE
242 | )
243 | metadata(sce) <- uns
244 | },
245 | error = function(e) {
246 | warning(wmsg(
247 | "setting 'metadata' failed for '", file, "':\n ",
248 | conditionMessage(e)
249 | ))
250 | }
251 | )
252 | }
253 |
254 | if (("X_name" %in% names(metadata(sce))) && ("X" %in% names(contents))) {
255 | stopifnot(names(assays(sce))[1] == "X") # should be true b/c X is read 1st
256 | names(assays(sce))[1] <- metadata(sce)[["X_name"]]
257 | metadata(sce)[["X_name"]] <- NULL
258 | }
259 |
260 | sce
261 | }
262 |
263 | .list_contents <- function(file) {
264 | manifest <- rhdf5::h5ls(file)
265 |
266 | set_myself <- function(x, series, value) {
267 | if (length(series) != 1) {
268 | value <- set_myself(x[[series[1]]], series[-1], value)
269 | }
270 | if (is.null(x)) {
271 | x <- list()
272 | }
273 | x[[series[1]]] <- value
274 |
275 | x
276 | }
277 |
278 | contents <- list()
279 | for (i in seq_len(nrow(manifest))) {
280 | components <- c(
281 | strsplit(manifest[i, "group"], "/")[[1]], manifest[i, "name"]
282 | )
283 | if (components[1] == "") {
284 | components <- components[-1]
285 | }
286 |
287 | info <- manifest[i, c("otype", "dclass", "dim")]
288 | if (info$otype == "H5I_GROUP") {
289 | info <- list()
290 | }
291 | contents <- set_myself(contents, components, info)
292 | }
293 |
294 | contents
295 | }
296 |
297 | .read_matrix <- function(file, path, fields, backed) {
298 | if (is.data.frame(fields)) {
299 | mat <- HDF5Array::HDF5Array(file, path)
300 | } else {
301 | mat <- HDF5Array::H5SparseMatrix(file, path)
302 | }
303 |
304 | if (!backed) {
305 | if (DelayedArray::is_sparse(mat)) {
306 | mat <- as(mat, "sparseMatrix")
307 | } else {
308 | mat <- as.matrix(mat)
309 | }
310 | }
311 |
312 | mat
313 | }
314 |
315 | .convert_element <- function(obj, path, file, recursive = FALSE) {
316 | element_attrs <- rhdf5::h5readAttributes(file, path)
317 |
318 | # Convert categorical element for AnnData v0.8+
319 | if (identical(element_attrs[["encoding-type"]], "categorical") &&
320 | all(c("codes", "categories") %in% names(obj))) {
321 | codes <- obj[["codes"]] + 1
322 | codes[codes == 0] <- NA
323 | levels <- obj[["categories"]]
324 |
325 | ord <- as.logical(element_attrs[["ordered"]])
326 |
327 | obj <- factor(levels[codes], levels = levels, ordered = ord)
328 | return(obj)
329 | }
330 |
331 | # Handle booleans. Non-nullable booleans have encoding-type
332 | # "array", so we have to infer the type from the enum levels
333 | if (is.factor(obj) && identical(levels(obj), c("FALSE", "TRUE"))) {
334 | obj <- as.logical(obj)
335 | return(obj)
336 | }
337 |
338 | # Recursively convert element members
339 | if (recursive && is.list(obj) && !is.null(names(obj))) {
340 | for (k in names(obj)) {
341 | obj[[k]] <- rhdf5::h5read(file, file.path(path, k))
342 | obj[[k]] <- .convert_element(
343 | obj[[k]], file.path(path, k),
344 | file,
345 | recursive = TRUE
346 | )
347 | }
348 | }
349 |
350 | if (is.list(obj) && !is.null(names(obj))) {
351 | names(obj) <- make.names(names(obj))
352 | }
353 |
354 | obj
355 | }
356 |
357 | #' @importFrom S4Vectors DataFrame
358 | .read_dim_data <- function(file, path, fields) {
359 | col_names <- setdiff(names(fields), "__categories")
360 | out_cols <- list()
361 | for (col_name in col_names) {
362 | vec <- rhdf5::h5read(file, file.path(path, col_name))
363 |
364 | vec <- .convert_element(
365 | vec, file.path(path, col_name),
366 | file,
367 | recursive = FALSE
368 | )
369 |
370 | if (!is.factor(vec)) {
371 | vec <- as.vector(vec)
372 | }
373 |
374 | out_cols[[col_name]] <- vec
375 | }
376 |
377 | # for AnnData versions <= 0.7
378 | cat_names <- names(fields[["__categories"]])
379 | for (cat_name in cat_names) {
380 | levels <- as.vector(
381 | rhdf5::h5read(file, file.path(path, "__categories", cat_name))
382 | )
383 | codes <- out_cols[[cat_name]] + 1L
384 | out_cols[[cat_name]] <- factor(levels[codes], levels = levels)
385 | }
386 |
387 | ## rhdf5::h5readAttributes(file, "var") |> str()
388 | ## List of 4
389 | ## $ _index : chr "feature_id"
390 | ## $ column-order : chr [1:4(1d)] "feature_is_filtered" "feature_name" "feature_reference" "feature_biotype"
391 | ## $ encoding-type : chr "dataframe"
392 | ## $ encoding-version: chr "0.2.0"
393 | attributes <- rhdf5::h5readAttributes(file, path)
394 | index <- attributes[["_index"]]
395 | if (!is.null(index)) {
396 | indices <- out_cols[[index]]
397 | } else {
398 | indices <- NULL
399 | }
400 |
401 | column_order <- attributes[["column-order"]]
402 | if (!is.null(column_order)) {
403 | out_cols <- out_cols[column_order]
404 | }
405 |
406 | if (length(out_cols)) {
407 | df <- do.call(DataFrame, out_cols)
408 | rownames(df) <- indices
409 | } else if (!is.null(indices)) {
410 | df <- DataFrame(row.names = indices)
411 | } else {
412 | df <- NULL
413 | }
414 |
415 | df
416 | }
417 |
418 | .read_dim_mats <- function(file, path, fields) {
419 | all.contents <- list()
420 | for (field in names(fields)) {
421 | # Because everything's transposed.
422 | all.contents[[field]] <- t(rhdf5::h5read(file, file.path(path, field)))
423 | }
424 | all.contents
425 | }
426 |
427 | .read_dim_pairs <- function(file, path, fields) {
428 | all.pairs <- list()
429 | for (field in names(fields)) {
430 | mat <- HDF5Array::H5SparseMatrix(file, file.path(path, field))
431 | all.pairs[[field]] <- as(mat, "sparseMatrix")
432 | }
433 | all.pairs
434 | }
435 |
--------------------------------------------------------------------------------
/R/reticulate.R:
--------------------------------------------------------------------------------
1 | #' Convert between Python and R objects
2 | #'
3 | #' @param x A Python object.
4 | #'
5 | #' @return An \R object, as converted from the Python object.
6 | #'
7 | #' @details
8 | #' These functions are extensions of the default conversion functions in the
9 | #' `reticulate` package for the following reasons:
10 | #'
11 | #' - `numpy.ndarray` - Handle conversion of **numpy** recarrays
12 | #' - `pandas.core.arrays.masked.BaseMaskedArray` - Handle conversion of
13 | #' **pandas** arrays (used when by `AnnData` objects when there are missing
14 | #' values)
15 | #' - `pandas.core.arrays.categorical.Categorical` - Handle conversion of
16 | #' **pandas** categorical arrays
17 | #'
18 | #' @author Luke Zappia
19 | #'
20 | #' @seealso
21 | #' [reticulate::py_to_r()] for the base `reticulate` functions
22 | #'
23 | #' @name r-py-conversion
24 | #' @export
25 | py_to_r.numpy.ndarray <- function(x) {
26 | disable_conversion_scope(x)
27 |
28 | # Suggested method to detect recarrays from
29 | # https://stackoverflow.com/a/62491135/4384120
30 | if (!is.null(py_to_r(x$dtype$names))) {
31 | # Convert via pandas DataFrame as suggested here
32 | # https://stackoverflow.com/a/60614003/4384120
33 | # Not as efficient but less messing around with types
34 | pandas <- import("pandas", convert = FALSE)
35 | out <- tryCatch(
36 | {
37 | x <- pandas$DataFrame(x)$to_numpy()
38 | py_to_r(x)
39 | },
40 | error = function(err) {
41 | stop("Failed to convert recarray with error: ", err$message,
42 | call. = FALSE
43 | )
44 | }
45 | )
46 | return(out)
47 | }
48 |
49 | # No special handler found, delegate to next method
50 | NextMethod()
51 | }
52 |
53 | #' @export
54 | py_to_r.pandas.core.arrays.masked.BaseMaskedArray <- function(x) {
55 | disable_conversion_scope(x)
56 |
57 | if (is(x, "pandas.core.arrays.boolean.BooleanArray")) {
58 | dtype <- "bool"
59 | fill <- FALSE
60 | } else if (is(x, "pandas.core.arrays.integer.IntegerArray")) {
61 | dtype <- "int"
62 | fill <- 0L
63 | } else if (is(x, "pandas.core.arrays.floating.FloatingArray")) {
64 | dtype <- "float"
65 | fill <- 0.0
66 | } else if (is(x, "pandas.core.arrays.string_.StringArray")) {
67 | dtype <- "str"
68 | fill <- ""
69 | } else {
70 | stop(
71 | "No conversion exists for this Pandas array type: ",
72 | paste(class(x), collapse = ", ")
73 | )
74 | }
75 |
76 | # Record which values should be NA
77 | is_na <- reticulate::py_to_r(x$isna())
78 |
79 | # Fill NA values with a dummy
80 | x <- x$fillna(value = fill)
81 |
82 | # Convert to numpy array and then to R using default conversion
83 | x <- x$to_numpy()$astype(dtype)
84 | x <- reticulate::py_to_r(x)
85 |
86 | # Restore the NA values
87 | x[is_na] <- NA
88 |
89 | return(x)
90 | }
91 |
92 | #' @export
93 | py_to_r.pandas.core.arrays.categorical.Categorical <- function(x) {
94 | disable_conversion_scope(x)
95 |
96 | # Get the category levels
97 | cats <- reticulate::py_to_r(x$categories$to_list())
98 |
99 | # Record which values should be NA
100 | is_na <- reticulate::py_to_r(x$isna())
101 |
102 | # Fill NA values with a dummy
103 | x <- x$fillna(value = cats[1])
104 |
105 | # Convert to list and then to R using default conversion
106 | x <- x$tolist()
107 | x <- reticulate::py_to_r(x)
108 |
109 | # Restore the NA values
110 | x[is_na] <- NA
111 |
112 | # Convert to factor
113 | x <- factor(x, levels = cats)
114 |
115 | return(x)
116 | }
117 |
--------------------------------------------------------------------------------
/R/ui.R:
--------------------------------------------------------------------------------
1 | #' Set zellkonverter verbose
2 | #'
3 | #' Set the zellkonverter verbosity option
4 | #'
5 | #' @param verbose Logical value for the verbosity option.
6 | #'
7 | #' @details
8 | #' Running `setZellkonverterVerbose(TRUE)` will turn on **zellkonverter**
9 | #' progress messages by default without having to set `verbose = TRUE` in each
10 | #' function call. This is done by setting the `"zellkonverter.verbose"` option.
11 | #' Running `setZellkonverterVerbose(FALSE)` will turn default verbosity off.
12 | #'
13 | #' @return The value of getOption("zellkonverter.verbose") invisibly
14 | #' @export
15 | #'
16 | #' @examples
17 | #' current <- getOption("zellkonverter.verbose")
18 | #' setZellkonverterVerbose(TRUE)
19 | #' getOption("zellkonverter.verbose")
20 | #' setZellkonverterVerbose(FALSE)
21 | #' getOption("zellkonverter.verbose")
22 | #' setZellkonverterVerbose(current)
23 | #' getOption("zellkonverter.verbose")
24 | setZellkonverterVerbose <- function(verbose = TRUE) {
25 | options(zellkonverter.verbose = isTRUE(verbose))
26 | invisible(getOption("zellkonverter.verbose"))
27 | }
28 |
29 | .get_verbose <- function(envir) {
30 | verbose <- envir$verbose
31 |
32 | if (is.null(verbose)) {
33 | verbose <- getOption("zellkonverter.verbose")
34 | }
35 |
36 | isTRUE(verbose)
37 | }
38 |
39 | .ui_rule <- function(msg, ...) {
40 | envir <- parent.frame()
41 |
42 | if (.get_verbose(envir)) {
43 | cli::cli_rule(msg, ..., .envir = envir)
44 | }
45 | }
46 |
47 | .ui_info <- function(msg, ...) {
48 | envir <- parent.frame()
49 |
50 | if (.get_verbose(envir)) {
51 | cli::cli_alert_info(msg, ..., .envir = envir)
52 | }
53 | }
54 |
55 | .ui_warn <- function(msg, warn = TRUE, ...) {
56 | envir <- parent.frame()
57 |
58 | msg <- cli::format_message(msg, .envir = envir)
59 |
60 | if (.get_verbose(envir)) {
61 | cli::cli_alert_warning(msg, ..., .envir = envir)
62 | }
63 |
64 | if (warn) {
65 | warning(msg, call. = FALSE)
66 | }
67 | }
68 |
69 | .ui_step <- function(msg, ...) {
70 | envir <- parent.frame()
71 |
72 | if (.get_verbose(envir)) {
73 | cli::cli_progress_step(msg, ..., .envir = envir)
74 | }
75 | }
76 |
77 | .ui_process <- function(msg, ...) {
78 | envir <- parent.frame()
79 |
80 | if (.get_verbose(envir)) {
81 | cli::cli_process_start(msg, ..., .envir = envir)
82 | }
83 | }
84 |
85 | .ui_process_done <- function(...) {
86 | envir <- parent.frame()
87 |
88 | if (.get_verbose(envir)) {
89 | cli::cli_process_done(..., .envir = envir)
90 | }
91 | }
92 |
93 | .trim_path <- function(path, n = 40) {
94 | path_split <- .split_path(path)
95 |
96 | for (level in seq_along(path_split)) {
97 | trimmed_path <- do.call(file.path, as.list(path_split))
98 | trimmed_path <- gsub("^//", "/", trimmed_path)
99 | if (nchar(trimmed_path) <= n) {
100 | break
101 | } else if (nchar(path_split[level]) >= 3) {
102 | path_split[level] <- "..."
103 | }
104 | }
105 |
106 | return(trimmed_path)
107 | }
108 |
109 | .split_path <- function(path) {
110 | if (dirname(path) != path) {
111 | path <- c(.split_path(dirname(path)), basename(path))
112 | }
113 |
114 | return(path)
115 | }
116 |
--------------------------------------------------------------------------------
/R/utils.R:
--------------------------------------------------------------------------------
1 | # yoinked from reticulate ->
2 | # https://github.com/rstudio/reticulate/blob/fe0eda154a80b22c0d45e043b74390b73ab8b64e/R/utils.R#L49
3 | yoink <- function(package, symbol) {
4 | do.call(":::", list(package, symbol))
5 | }
6 | disable_conversion_scope <- yoink("reticulate", "disable_conversion_scope")
7 |
--------------------------------------------------------------------------------
/R/validation.R:
--------------------------------------------------------------------------------
1 | #' Validate H5AD SCE
2 | #'
3 | #' Validate a SingleCellExperiment created by `readH5AD()`. Designed to be used
4 | #' inside `testhat::test_that()` during package testing.
5 | #'
6 | #' @param sce A \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment}
7 | #' object.
8 | #' @param names Named list of expected names. Names are slots and values are
9 | #' vectors of names that are expected to exist in that slot.
10 | #' @param missing Named list of known missing names. Names are slots and values
11 | #' are vectors of names that are expected to not exist in that slot.
12 | #'
13 | #' @details
14 | #' This function checks that a SingleCellExperiment contains the expected items
15 | #' in each slot. The main reason for this function is avoid repeating code when
16 | #' testing multiple `.h5ad` files. The following items in `names` and `missing`
17 | #' are recognised:
18 | #'
19 | #' * `assays` - Assay names
20 | #' * `colData` - colData column names
21 | #' * `rowData` - rowData column names
22 | #' * `metadata` - metadata names
23 | #' * `redDim` - Reduced dimension names
24 | #' * `varm` - Column names of the `varm` rowData column (from the AnnData varm
25 | #' slot)
26 | #' * `colPairs` - Column pair names
27 | #' * `rowPairs` - rowData pair names
28 | #' * `raw_rowData` - rowData columns names in the `raw` altExp
29 | #' * `raw_varm` - Column names of the raw `varm` rowData column (from the
30 | #' AnnData varm slot)
31 | #'
32 | #' If an item in `names` or `missing` is `NULL` then it won't be checked. The
33 | #' items in `missing` are checked that they explicitly do not exist. This is
34 | #' mostly for record keeping when something is known to not be converted but can
35 | #' also be useful when the corresponding `names` item is `NULL`.
36 | #'
37 | #' @return If checks are successful `TRUE` invisibly, if not other output
38 | #' depending on the context
39 | #'
40 | #' @author Luke Zappia
41 | validateH5ADSCE <- function(sce, names, missing) {
42 | if ("varm" %in% colnames(SummarizedExperiment::rowData(sce))) {
43 | varm <- SummarizedExperiment::rowData(sce)$varm
44 | SummarizedExperiment::rowData(sce)$varm <- NULL
45 | } else {
46 | varm <- NULL
47 | }
48 |
49 | .names_validator(
50 | "Assay names",
51 | SummarizedExperiment::assayNames(sce),
52 | names$assays,
53 | missing$assays
54 | )
55 |
56 | .names_validator(
57 | "colData names",
58 | colnames(SummarizedExperiment::colData(sce)),
59 | names$colData,
60 | missing$colData
61 | )
62 |
63 | .names_validator(
64 | "rowData names",
65 | colnames(SummarizedExperiment::rowData(sce)),
66 | names$rowData,
67 | missing$rowData
68 | )
69 |
70 | .names_validator(
71 | "metadata names",
72 | names(S4Vectors::metadata(sce)),
73 | names$metadata,
74 | missing$metadata
75 | )
76 |
77 | .names_validator(
78 | "redDim names",
79 | SingleCellExperiment::reducedDimNames(sce),
80 | names$redDim,
81 | missing$redDim
82 | )
83 |
84 | .names_validator(
85 | "varm names",
86 | colnames(varm),
87 | names$varm,
88 | missing$varm
89 | )
90 |
91 | .names_validator(
92 | "colPairs names",
93 | names(SingleCellExperiment::colPairs(sce)),
94 | names$colPairs,
95 | missing$colPairs
96 | )
97 |
98 | .names_validator(
99 | "rowPairs names",
100 | names(SingleCellExperiment::rowPairs(sce)),
101 | names$rowPairs,
102 | missing$rowPairs
103 | )
104 |
105 | if ("raw" %in% altExpNames(sce)) {
106 | raw_rowData <- SummarizedExperiment::rowData(altExp(sce, "raw"))
107 |
108 | if ("varm" %in% colnames(raw_rowData)) {
109 | raw_varm <- raw_rowData$varm
110 | raw_rowData$varm <- NULL
111 | } else {
112 | varm <- NULL
113 | }
114 |
115 | .names_validator(
116 | "raw rowData names",
117 | colnames(raw_rowData),
118 | names$raw_rowData,
119 | missing$raw_rowData
120 | )
121 |
122 | .names_validator(
123 | "varm names",
124 | colnames(raw_varm),
125 | names$raw_varm,
126 | missing$raw_varm
127 | )
128 | }
129 |
130 | invisible(TRUE)
131 | }
132 |
133 | .names_validator <- function(label, actual_names, correct_names, missing_names) {
134 | if (!is.null(correct_names)) {
135 | testthat::expect_identical(
136 | actual_names,
137 | correct_names,
138 | label = label
139 | )
140 | }
141 |
142 | if (!is.null(missing_names)) {
143 | testthat::expect_true(
144 | !any(missing_names %in% actual_names),
145 | label = paste(label, "missing")
146 | )
147 | }
148 |
149 | invisible(TRUE)
150 | }
151 |
152 | #' Expect SCE
153 | #'
154 | #' Test that a SingleCellExperiment matches an expected object. Designed to be
155 | #' used inside `testhat::test_that()` during package testing.
156 | #'
157 | #' @param sce A \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment}
158 | #' object.
159 | #' @param expected A template \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment}
160 | #' object to compare to.
161 | #'
162 | #' @return `TRUE` invisibly if checks pass
163 | #'
164 | #' @author Luke Zappia
165 | expectSCE <- function(sce, expected) {
166 | testthat::expect_identical(dimnames(sce), dimnames(expected))
167 | if (length(metadata(expected)) > 0) {
168 | testthat::expect_identical(
169 | S4Vectors::metadata(sce),
170 | S4Vectors::metadata(expected)
171 | )
172 | }
173 | testthat::expect_identical(
174 | SummarizedExperiment::assayNames(sce),
175 | SummarizedExperiment::assayNames(expected)
176 | )
177 | for (assay in SummarizedExperiment::assayNames(expected)) {
178 | testthat::expect_equal(
179 | SummarizedExperiment::assay(sce, assay),
180 | SummarizedExperiment::assay(expected, assay)
181 | )
182 | }
183 | testthat::expect_identical(
184 | SingleCellExperiment::reducedDims(sce),
185 | SingleCellExperiment::reducedDims(expected)
186 | )
187 | testthat::expect_identical(
188 | SummarizedExperiment::rowData(sce),
189 | SummarizedExperiment::rowData(expected)
190 | )
191 | testthat::expect_identical(
192 | SummarizedExperiment::colData(sce),
193 | SummarizedExperiment::colData(expected)
194 | )
195 | testthat::expect_identical(
196 | SingleCellExperiment::rowPairs(sce),
197 | SingleCellExperiment::rowPairs(expected)
198 | )
199 | testthat::expect_identical(
200 | SingleCellExperiment::colPairs(sce),
201 | SingleCellExperiment::colPairs(expected)
202 | )
203 |
204 | invisible(TRUE)
205 | }
206 |
--------------------------------------------------------------------------------
/R/write.R:
--------------------------------------------------------------------------------
1 | #' Write H5AD
2 | #'
3 | #' Write a H5AD file from a
4 | #' \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment}
5 | #' object.
6 | #'
7 | #' @param sce A \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment}
8 | #' object.
9 | #' @param file String containing a path to write the new `.h5ad` file.
10 | #' @param X_name Name of the assay to use as the primary matrix (`X`) of the
11 | #' AnnData object. If `NULL`, the first assay of `sce` will be used by default.
12 | #' @param skip_assays Logical scalar indicating whether assay matrices should
13 | #' be ignored when writing to `file`.
14 | #' @param compression Type of compression when writing the new `.h5ad` file.
15 | #' @param version A string giving the version of the **anndata** Python library
16 | #' to use. Allowed values are available in `.AnnDataVersions`. By default the
17 | #' latest version is used.
18 | #' @param verbose Logical scalar indicating whether to print progress messages.
19 | #' If `NULL` uses `getOption("zellkonverter.verbose")`.
20 | #' @inheritDotParams SCE2AnnData
21 | #'
22 | #' @details
23 | #'
24 | #' ## Skipping assays
25 | #'
26 | #' Setting `skip_assays = TRUE` can occasionally be useful if the matrices in
27 | #' `sce` are stored in a format that is not amenable for efficient conversion
28 | #' to a **numpy**-compatible format. In such cases, it can be better to create
29 | #' an empty placeholder dataset in `file` and fill it in R afterwards.
30 | #'
31 | #' ## **DelayedArray** assays
32 | #'
33 | #' If `sce` contains any **DelayedArray** matrices as assays `writeH5AD()` will
34 | #' write them to disk using the **rhdf5** package directly rather than via
35 | #' Python to avoid instantiating them in memory. However there is currently
36 | #' an issue which prevents this being done for sparse **DelayedArray** matrices.
37 | #'
38 | #' ## Known conversion issues
39 | #'
40 | #' ### Coercion to factors
41 | #'
42 | #' The **anndata** package automatically converts some character vectors to
43 | #' factors when saving `.h5ad` files. This can effect columns of `rowData(sce)`
44 | #' and `colData(sce)` which may change type when the `.h5ad` file is read back
45 | #' into R.
46 | #'
47 | #' ## Environment
48 | #'
49 | #' See [AnnData-Environment] for more details on **zellkonverter** Python
50 | #' environments.
51 | #'
52 | #' @return A `NULL` is invisibly returned.
53 | #'
54 | #' @author Luke Zappia
55 | #' @author Aaron Lun
56 | #'
57 | #' @seealso
58 | #' [`readH5AD()`], to read a
59 | #' \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment}
60 | #' object from a H5AD file.
61 | #'
62 | #' [`SCE2AnnData()`], for developers to create an AnnData object from a
63 | #' \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment}.
64 | #'
65 | #' @examples
66 | #' # Using the Zeisel brain dataset
67 | #' if (requireNamespace("scRNAseq", quietly = TRUE)) {
68 | #' library(scRNAseq)
69 | #' sce <- ZeiselBrainData()
70 | #'
71 | #' # Writing to a H5AD file
72 | #' temp <- tempfile(fileext = ".h5ad")
73 | #' writeH5AD(sce, temp)
74 | #' }
75 | #' @export
76 | #' @importFrom basilisk basiliskRun
77 | #' @importFrom Matrix sparseMatrix
78 | #' @importFrom DelayedArray is_sparse
79 | writeH5AD <- function(
80 | sce, file, X_name = NULL, skip_assays = FALSE,
81 | compression = c("none", "gzip", "lzf"), version = NULL,
82 | verbose = NULL, ...) {
83 | compression <- match.arg(compression)
84 |
85 | if (compression == "none") {
86 | compression <- NULL
87 | }
88 |
89 | # Loop over and replace DelayedArrays.
90 | ass_list <- assays(sce)
91 | is_da <- logical(length(ass_list))
92 | for (a in seq_along(ass_list)) {
93 | # Skip sparse DelayedArrays due to rhdf5 issue
94 | # https://github.com/grimbough/rhdf5/issues/79
95 | if (is(ass_list[[a]], "DelayedMatrix") && !is_sparse(ass_list[[a]])) {
96 | is_da[a] <- TRUE
97 | assay(sce, a, withDimnames = FALSE) <- .make_fake_mat(dim(sce))
98 | }
99 | }
100 |
101 | env <- zellkonverterAnnDataEnv(version)
102 | version <- gsub("zellkonverterAnnDataEnv-", "", slot(env, "envname"))
103 | .ui_info("Using {.field anndata} version {.field {version}}")
104 |
105 | file <- path.expand(file)
106 | basiliskRun(
107 | env = env,
108 | fun = .H5ADwriter,
109 | testload = "anndata",
110 | sce = sce,
111 | file = file,
112 | X_name = X_name,
113 | skip_assays = skip_assays,
114 | compression = compression,
115 | verbose = verbose,
116 | ...
117 | )
118 |
119 | # Going back out and replacing each of them.
120 | if (any(is_da)) {
121 | for (p in which(is_da)) {
122 | if (p == 1L) {
123 | curp <- "X"
124 | } else {
125 | curp <- file.path("layers", assayNames(sce)[p])
126 | }
127 | rhdf5::h5delete(file, curp)
128 | mat <- ass_list[[p]]
129 |
130 | if (!is_sparse(mat)) {
131 | HDF5Array::writeHDF5Array(
132 | mat,
133 | filepath = file, name = curp, with.dimnames = FALSE
134 | )
135 | } else {
136 | .write_CSR_matrix(file, name = curp, mat = mat)
137 | }
138 | }
139 | }
140 |
141 | invisible(NULL)
142 | }
143 |
144 | #' @importFrom reticulate import
145 | .H5ADwriter <- function(sce, file, X_name, skip_assays, compression,
146 | verbose = NULL, ...) {
147 | adata <- SCE2AnnData(
148 | sce,
149 | X_name = X_name, skip_assays = skip_assays, verbose = verbose, ...
150 | )
151 | .ui_step(
152 | "Writing {.file { .trim_path(file)} }",
153 | msg_done = "Wrote {.file { .trim_path(file)} }",
154 | spinner = TRUE
155 | )
156 | if (!is.null(compression)) {
157 | .ui_info("Using {.field compression} compression")
158 | }
159 | adata$write_h5ad(file, compression = compression)
160 | }
161 |
162 | # nocov start
163 |
164 | # Skipping code coverage on these function because they aren't used until the
165 | # sparse DelayedArray rhdf5 issue mentioned above is addressed
166 |
167 | #' @importFrom DelayedArray blockApply rowAutoGrid type
168 | .write_CSR_matrix <- function(file, name, mat, chunk_dim = 10000) {
169 | handle <- rhdf5::H5Fopen(file)
170 | on.exit(rhdf5::H5Fclose(handle))
171 |
172 | rhdf5::h5createGroup(handle, name)
173 | ghandle <- rhdf5::H5Gopen(handle, name)
174 | on.exit(rhdf5::H5Gclose(ghandle), add = TRUE, after = FALSE)
175 |
176 | rhdf5::h5writeAttribute("csc_matrix", ghandle, "encoding-type")
177 | rhdf5::h5writeAttribute("0.1.0", ghandle, "encoding-version")
178 | rhdf5::h5writeAttribute(rev(dim(mat)), ghandle, "shape")
179 |
180 | rhdf5::h5createDataset(
181 | handle,
182 | file.path(name, "data"),
183 | dims = 0,
184 | maxdims = rhdf5::H5Sunlimited(),
185 | H5type = if (type(mat) == "integer") {
186 | "H5T_NATIVE_INT32"
187 | } else {
188 | "H5T_NATIVE_DOUBLE"
189 | },
190 | chunk = chunk_dim
191 | )
192 |
193 | rhdf5::h5createDataset(
194 | handle,
195 | file.path(name, "indices"),
196 | dims = 0,
197 | maxdims = rhdf5::H5Sunlimited(),
198 | H5type = "H5T_NATIVE_UINT32",
199 | chunk = chunk_dim
200 | )
201 |
202 | env <- new.env() # persist the 'last' counter.
203 | env$last <- 0L
204 | out <- blockApply(
205 | mat,
206 | grid = rowAutoGrid(mat),
207 | FUN = .blockwise_sparse_writer,
208 | env = env,
209 | file = handle,
210 | name = name,
211 | as.sparse = TRUE
212 | )
213 |
214 | out <- as.double(unlist(out))
215 | iname <- file.path(name, "indptr")
216 |
217 | rhdf5::h5createDataset(
218 | handle,
219 | iname,
220 | dims = length(out) + 1L,
221 | H5type = "H5T_NATIVE_UINT64"
222 | )
223 |
224 | rhdf5::h5writeDataset(c(0, cumsum(out)), handle, iname)
225 | }
226 |
227 | #' @importFrom DelayedArray nzdata nzindex
228 | .blockwise_sparse_writer <- function(block, env, file, name) {
229 | nzdex <- nzindex(block)
230 | i <- nzdex[, 1]
231 | j <- nzdex[, 2]
232 | v <- nzdata(block)
233 |
234 | o <- order(i)
235 | i <- i[o]
236 | j <- j[o]
237 | v <- v[o]
238 |
239 | last <- env$last
240 | index <- list(last + seq_along(j))
241 |
242 | iname <- file.path(name, "indices")
243 | rhdf5::h5set_extent(file, iname, last + length(j))
244 | rhdf5::h5writeDataset(j - 1L, file, iname, index = index)
245 |
246 | vname <- file.path(name, "data")
247 | rhdf5::h5set_extent(file, vname, last + length(j))
248 | rhdf5::h5writeDataset(v, file, vname, index = index)
249 |
250 | env$last <- last + length(j)
251 | tabulate(i, nrow(block))
252 | }
253 |
254 | # nocov end
255 |
--------------------------------------------------------------------------------
/R/zellkonverter-package.R:
--------------------------------------------------------------------------------
1 | #' @import SummarizedExperiment
2 | #' @import SingleCellExperiment
3 | "_PACKAGE"
4 |
5 | # The following block is used by usethis to automatically manage
6 | # roxygen namespace tags. Modify with care!
7 | ## usethis namespace: start
8 | ## usethis namespace: end
9 | NULL
10 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | # zellkonverter
4 |
5 |
6 | [](http://www.repostatus.org/#active)
7 | [](https://lifecycle.r-lib.org/articles/stages.html#stable)
8 | [](https://app.codecov.io/gh/theislab/zellonverter)
9 | [](https://github.com/theislab/zellkonverter/actions/workflows/check-bioc.yml)
10 | [](https://bioconductor.org/checkResults/release/bioc-LATEST/zellkonverter)
11 | [](https://bioconductor.org/checkResults/devel/bioc-LATEST/zellkonverter)
12 | [](http://bioconductor.org/packages/stats/bioc/zellkonverter/)
13 | [](https://support.bioconductor.org/tag/zellkonverter)
14 | [](https://bioconductor.org/packages/release/bioc/html/zellkonverter.html#since)
15 | [](http://bioconductor.org/checkResults/devel/bioc-LATEST/zellkonverter/)
16 | [](https://bioconductor.org/packages/release/bioc/html/zellkonverter.html#since)
17 |
18 |
19 | **zellkonverter** is a small package for converting between SingleCellExperiment
20 | objects and alternative objects for storing single-cell RNA-sequencing data
21 | (such as AnnData). It is built on top of the [**basilisk**][basilisk] package.
22 |
23 | For documentation see please refer to [Bioconductor][bioc]. Development
24 | documentation is also available on [Bioconductor devel][bioc-devel] or the
25 | [pkgdown site][pkgdown].
26 |
27 | ## Installation
28 |
29 | **zellkonverter** can be installed from Bioconductor using the **BiocManager**
30 | package:
31 |
32 | ```r
33 | if (!requireNamespace("BiocManager", quietly=TRUE)) {
34 | install.packages("BiocManager")
35 | }
36 | BiocManager::install("zellkonverter")
37 | ```
38 |
39 | ## Build status
40 |
41 | | Source | Checks | Updated |
42 | |:----------------:|:----------------:|:------------:|
43 | | [Bioc release](http://bioconductor.org/packages/release/bioc/html/zellkonverter.html) | [](https://bioconductor.org/checkResults/release/bioc-LATEST/zellkonverter) |  |
44 | | [Bioc devel](http://bioconductor.org/packages/devel/bioc/html/zellkonverter.html) | [](https://bioconductor.org/checkResults/devel/bioc-LATEST/zellkonverter) |  |
45 | | [GitHub actions](https://github.com/theislab/zellkonverter/actions) | [](https://github.com/theislab/zellkonverter/actions) |  |
46 |
47 | ## Code of Conduct
48 |
49 | Please note that the **zellkonverter** project is released with a
50 | [Contributor Code of Conduct](https://contributor-covenant.org/version/2/0/CODE_OF_CONDUCT.html).
51 | By contributing to this project, you agree to abide by its terms.
52 |
53 | ## Contributors
54 |
55 |
56 |
57 |
58 |
59 | Made with [contributors-img](https://contrib.rocks).
60 |
61 | [basilisk]: https://www.bioconductor.org/packages/basilisk/ "basilisk on Bioconductor"
62 | [bioc]: https://bioconductor.org/packages/zellkonverter/ "zellkonverter on Bioconductor"
63 | [bioc-devel]: https://bioconductor.org/packages/devel/bioc/html/zellkonverter.html "zellkonverter on Bioconductor devel"
64 | [pkgdown]: https://theislab.github.io/zellkonverter/ "zellkonverter pkgdown site"
65 |
66 |
--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
1 | comment: false
2 |
3 | coverage:
4 | status:
5 | project:
6 | default:
7 | target: auto
8 | threshold: 1%
9 | informational: true
10 | patch:
11 | default:
12 | target: auto
13 | threshold: 1%
14 | informational: true
15 |
--------------------------------------------------------------------------------
/configure:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | ${R_HOME}/bin/Rscript -e "basilisk::configureBasiliskEnv()"
4 |
--------------------------------------------------------------------------------
/configure.win:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | ${R_HOME}/bin${R_ARCH_BIN}/Rscript.exe -e "basilisk::configureBasiliskEnv()"
4 |
--------------------------------------------------------------------------------
/inst/NEWS.Rd:
--------------------------------------------------------------------------------
1 | \name{NEWS}
2 | \title{News for Package \pkg{zellkonverter}}
3 |
4 | \section{Version 1.18.0, Bioconductor 3.31 Release (April 2025)}{
5 | \subsection{New features}{
6 | \itemize{
7 | \item{
8 | Add minimal support for \code{SpatialExperiment} objects to
9 | \code{writeH5AD()} and \code{SCE2AnnData()}. This stores the spatial
10 | coordinates in a \code{obsm} item named "spatial" as expected by the
11 | \bold{squidpy} Python package. (PR from @mcmero)
12 | }
13 | }}
14 | \subsection{Major changes}{
15 | \itemize{
16 | \item{
17 | Add environment for \bold{anndata} v0.11.4. This is now the default
18 | environment for the Python reader/writer.
19 | }
20 | \item{
21 | Modify \code{SCE2AnnData()} to covert sparse matrices to \code{dgRMatrix}
22 | when they are. This mostly applies to assays and should be more compatible
23 | with what is expected by Python packages.
24 | }
25 | }}
26 | \subsection{Minor changes}{
27 | \itemize{
28 | \item{
29 | Add a \code{testload} argument to \code{basiliskRun()} calls which may
30 | help with problems creating Python environments
31 | }
32 | \item{
33 | Updates to documentation and tests
34 | }
35 | }}
36 | \subsection{Bug fixes}{
37 | \itemize{
38 | \item{
39 | Improve handling of missing row or column names in \code{SCE2AnnData()}
40 | }
41 | }}
42 | }
43 |
44 | \section{Version 1.16.0, Bioconductor 3.30 Release (October 2024)}{
45 | \subsection{Major changes}{
46 | \itemize{
47 | \item{
48 | Add environment for \bold{anndata} v0.10.9. This is now the default
49 | environment for the Python reader/writer.
50 | }
51 | }}
52 | \subsection{Minor changes}{
53 | \itemize{
54 | \item{
55 | Avoid deprecation warning due to setting \code{dtype} when creating Python
56 | \code{AnnData} objects
57 | }
58 | \item{
59 | Standardise code styling using \bold{{styler}}
60 | }
61 | \item{
62 | Add test using the more complete example H5AD file
63 | }
64 | }}
65 | \subsection{Bug fixes}{
66 | \itemize{
67 | \item{
68 | Correctly assign levels to factors in R reader with \bold{anndata} v0.7
69 | files
70 | }
71 | \item{
72 | Correctly set \code{filepath} in the R reader with reading
73 | \code{adata.raw} with \code{use_hdf5 = TRUE} (PR from @GabrielHoffman)
74 | }
75 | }}
76 | }
77 |
78 | \section{Version 1.14.0, Bioconductor 3.19 Release (May 2024)}{
79 | \subsection{Major changes}{
80 | \itemize{
81 | \item{
82 | Add environment for \bold{anndata} v0.10.6. This is now the default
83 | environment for the Python reader/writer.
84 | }
85 | }}
86 | \subsection{Minor changes}{
87 | \itemize{
88 | \item{
89 | Improve warnings when converting matrices fails
90 | }
91 | \item{
92 | Minor change to writing \code{DelayedArray} matrices for compatibility
93 | with \bold{{HDF5Array}} >= v1.31.1
94 | }
95 | }}
96 | \subsection{Bug fixes}{
97 | \itemize{
98 | \item{
99 | Correctly handle \code{use_backed = TRUE} with newer \bold{anndata}
100 | versions
101 | }
102 | \item{
103 | Correctly instantiate the \bold{anndata} v0.10.2 environment
104 | }
105 | \item{
106 | Minor fixes for typos etc.
107 | }
108 | }}
109 | }
110 |
111 | \section{Version 1.12.0, Bioconductor 3.18 Release (October 2023)}{
112 | \subsection{Major changes}{
113 | \itemize{
114 | \item{
115 | Add environments for \bold{anndata} v0.9.2 and v0.10.2. Version 0.10.20 is
116 | now the default envrionment for the Python reader/writer.
117 | }
118 | }}
119 | \subsection{Minor changes}{
120 | \itemize{
121 | \item{
122 | Changes for compatibility with \bold{rhdf5} v2.45.1 including enum types
123 | that simplifies reading of nullable types in the native R reader
124 | }
125 | \item{
126 | Dimensions are now passed correctly when converting the \code{raw} slot
127 | }
128 | \item{
129 | Backed sparse matrices are now converted in \code{AnnData2SCE()}
130 | }
131 | }}
132 | }
133 |
134 | \section{Version 1.10.0, Bioconductor 3.17 Release (April 2023)}{
135 | \subsection{Major changes}{
136 | \itemize{
137 | \item{
138 | Add compatibility with the \bold{anndata} v0.8 H5AD format to the the
139 | native R writer (By @jackkamm and @mtmorgan)
140 | }
141 | \item{
142 | Add functions for converting \bold{pandas} arrays used by \bold{anndata}
143 | when arrays have missing values
144 | }
145 | }}
146 | \subsection{Minor changes}{
147 | \itemize{
148 | \item{
149 | Add Robrecht Cannoodt and Jack Kamm as contributors!
150 | }
151 | \item{
152 | Minor adjustments to tests to match reader changes
153 | }
154 | }}
155 | }
156 |
157 | \section{Version 1.8.0, Bioconductor 3.16 Release (October 2022)}{
158 | \subsection{Major changes}{
159 | \itemize{
160 | \item{
161 | Improve compatibility with the R \bold{anndata} package. This required
162 | modifying conversion functions so that Python objects are explicitly
163 | converted rather than relying on automatic conversion.
164 | }
165 | \item{
166 | Added support for \bold{numpy} recarrays. This solves a long-standing
167 | issue and allows results from \bold{scanpy}'s \code{rank_genes_groups()}
168 | function to be read.
169 | }
170 | }}
171 | \subsection{Minor changes}{
172 | \itemize{
173 | \item{
174 | The Python version is now pinned in the \bold{anndata} v0.7.6 environment
175 | for compatibility with changes in \bold{basilisk}
176 | }
177 | \item{
178 | Instatiate Python environments so they can be properly picked up by
179 | \code{basilisk::configureBasiliskEnv()}
180 | }
181 | \item{
182 | Allow missing obs/var names when \code{use_hdf5 = TRUE}
183 | }
184 | \item{
185 | Minor changes to the UI functions for compatibility with \bold{cli} v3.4.0
186 | }
187 | \item{
188 | Minor changes for compatibility with \bold{Matrix} v1.4-2
189 | }
190 | \item{
191 | Improvements to the UI for warnings
192 | }
193 | \item{
194 | Updates and improvments to tests
195 | }
196 | }}
197 | }
198 |
199 | \section{Version 1.6.0, Bioconductor 3.15 Release (April 2022)}{
200 | \subsection{Major changes}{
201 | \itemize{
202 | \item{
203 | Added support for multiple \bold{basilisk} environments with different
204 | \bold{anndata} versions. Users can now specify the environment to use with
205 | options in \code{readH5AD()} and \code{writeH5AD()}. To faciliate this
206 | some exported objects where converted to functions but this should only
207 | effect developers.
208 | }
209 | \item{
210 | Updated the default environment to use \bold{anndata} v0.8.0. This is a
211 | major update and files written with v0.8.0 cannot be read by previous
212 | \bold{anndata} versions. This was the motivation for supporting multiple
213 | environments and users can select the previous environment with
214 | \bold{anndata} v0.7.6 if compatibility is required.
215 | }
216 | \item{
217 | Standardise naming in \code{AnnData2SCE()}. Column names of data frames
218 | and names of list items will now be modified to match R conventions
219 | (according to \code{make.names()}). When this happens a warning will be
220 | issued listing the modifications. This makes sure than everything in the
221 | created \code{SingleCellExperiment} is accessible.
222 | }
223 | }}
224 | \subsection{Minor changes}{
225 | \itemize{
226 | \item{
227 | Allow \code{data.frame}'s stored in \code{varm} to be converted in
228 | \code{SCE2AnnData()}
229 | }
230 | \item{
231 | Minor updates to the vignette and other documentation.
232 | }
233 | \item{
234 | Updates to tests to match the changes above.
235 | }
236 | }}
237 | }
238 |
239 | \section{Version 1.4.0, Bioconductor 3.14 Release (October 2021)}{
240 | \itemize{
241 | \item{
242 | Add arguments to control how slots are converted in \code{AnnData2SCE()}
243 | and \code{SCE2AnnData()}. Each slot can now be fully converted, skipped
244 | entirely or only selected items converted.
245 | }
246 | \item{
247 | Add support for converting the \code{raw} slot to an \code{altExp} in
248 | \code{AnnData2SCE()}
249 | }
250 | \item{
251 | Add recursive conversion of lists in \code{AnnData2SCE()}
252 | }
253 | \item{
254 | Add progress messages to various functions. These can be controlled by
255 | function arguments or a global variable.
256 | }
257 | \item{
258 | Add long tests for various public datasets. This should help to make the
259 | package more robust
260 | }
261 | \item{
262 | Fix bug in converting \code{dgRMatrix} sparse matrices
263 | }
264 | \item{
265 | Correctly handle \code{DataFrame} objects stored in \code{adata.obsm}
266 | }
267 | }
268 | }
269 |
270 | \section{Version 1.2.0, Bioconductor 3.13 Release (May 2021)}{
271 | \itemize{
272 | \item{
273 | Update \strong{anndata} and other Python dependencies, now using
274 | \strong{anndata} v0.7.6
275 | }
276 | \item{
277 | Improved conversion checks for all slots in \code{AnnData2SCE()}
278 | }
279 | \item{
280 | Enable return conversion of the \code{varm} slot in \code{AnnData2SCE()}
281 | }
282 | \item{
283 | Avoid converting \code{obsp} and \code{varp} to dense matrices in
284 | \code{AnnData2SCE()}
285 | }
286 | \item{
287 | \code{AnnData2SCE()} should now always return \code{dgCMatrix} matrices
288 | when assays are sparse
289 | }
290 | \item{
291 | More consistent conversion of \code{metadata} to \code{uns} in
292 | \code{SCE2AnnData()}
293 | }
294 | \item{
295 | Handle conversion of list columns in \code{colData} and \code{rowData} in
296 | \code{SCE2AnnData()}
297 | }
298 | \item{
299 | Better support for converting \strong{anndata} \code{SparseDataset} arrays
300 | }
301 | \item{
302 | Improved support for conversion of HDF5 backed \code{AnnData} objects
303 | }
304 | \item{
305 | Better support for writing \code{DelayedArray} assays in
306 | \code{writeH5AD()}
307 | }
308 | \item{
309 | Store \code{X_name} in \code{AnnData2SCE()} for use by
310 | \code{SCE2AnnData()} and add an \code{X_name} argument to
311 | \code{AnnData2SCE()} and \code{readH5AD()}
312 | }
313 | \item{
314 | Add a \code{compression} argument to \code{writeH5AD()}
315 | }
316 | \item{
317 | Add an experimental native R reader to \code{readH5AD()}
318 | }
319 | \item{
320 | Export \code{zellkonverterAnnDataEnv} for use by other packages
321 | }
322 | }
323 | }
324 |
325 | \section{Version 1.0.0, Bioconductor 3.12 Release (October 2020)}{
326 | \itemize{
327 | \item{Accepted into Bioconductor for Release 3.12}
328 | \item{
329 | zellkonverter provides methods to convert between Python AnnData objects
330 | and SingleCellExperiment objects. These are primarily intended for use by
331 | downstream Bioconductor packages that wrap Python methods for single-cell
332 | data analysis. It also includes functions to read and write H5AD files
333 | used for saving AnnData objects to disk.
334 | }
335 | }
336 | }
337 |
--------------------------------------------------------------------------------
/inst/WORDLIST:
--------------------------------------------------------------------------------
1 | AnnData
2 | Bioc
3 | BiocManager
4 | Codecov
5 | DataFrames
6 | DelayedArray
7 | GTEX
8 | HDF
9 | Lifecycle
10 | ORCID
11 | SCE
12 | SingleCellExperiment
13 | Zeisel
14 | altExp
15 | anndata
16 | biocViews
17 | cli
18 | colData
19 | conda
20 | dtype
21 | getOption
22 | hdf
23 | img
24 | mtmorgan
25 | natsort
26 | numpy
27 | obs
28 | pkgdown
29 | py
30 | recarrays
31 | rhdf
32 | rowData
33 | scRNA
34 | scRNAseq
35 | scipy
36 | seq
37 | sqlite
38 | var
39 | varm
40 | zellkonverter's
41 |
--------------------------------------------------------------------------------
/inst/extdata/example_anndata.h5ad:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theislab/zellkonverter/c17a17220088ff880d512c392d5de4aacb9e9bb1/inst/extdata/example_anndata.h5ad
--------------------------------------------------------------------------------
/inst/extdata/krumsiek11.h5ad:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theislab/zellkonverter/c17a17220088ff880d512c392d5de4aacb9e9bb1/inst/extdata/krumsiek11.h5ad
--------------------------------------------------------------------------------
/inst/extdata/krumsiek11_augmented_v0-8.h5ad:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theislab/zellkonverter/c17a17220088ff880d512c392d5de4aacb9e9bb1/inst/extdata/krumsiek11_augmented_v0-8.h5ad
--------------------------------------------------------------------------------
/inst/scripts/example_anndata.R:
--------------------------------------------------------------------------------
1 | # This script was used to create the `example_anndata.h5ad` file.
2 | # This file contains an example AnnData object for use in examples and tests.
3 | # A dataset with 200 cells and 500 genes was generated using the Splat simulation in the Splatter package.
4 | # A Python AnnData object was created using this data (via reticulate) and run through a standard Scanpy analysis workflow to populate the various slots.
5 | # The file object was then saved to disk as a .h5ad file.
6 | #
7 | # Key package versions:
8 | #
9 | # splatter v1.14.0
10 | # reticulate v1.18
11 | # scanpy v1.5.1
12 | # anndata v0.7.4
13 |
14 | library(splatter)
15 | library(reticulate)
16 |
17 | mini_sim <- splatSimulateGroups(batchCells = 200, nGenes = 500, lib.loc = 8,
18 | group.prob = c(0.5, 0.5), seed = 1)
19 |
20 | anndata <- import("anndata")
21 | scanpy <- import("scanpy")
22 |
23 | adata <- anndata$AnnData(t(counts(mini_sim)))
24 | adata$obs_names <- colnames(mini_sim)
25 | adata$var_names <- rownames(mini_sim)
26 | adata$layers <- list(counts = t(counts(mini_sim)))
27 |
28 | scanpy$pp$filter_genes(adata, min_counts = 10)
29 | scanpy$pp$normalize_total(adata, target_sum = 1e4)
30 | scanpy$pp$log1p(adata)
31 | scanpy$pp$highly_variable_genes(adata)
32 | scanpy$tl$pca(adata, svd_solver = "arpack")
33 | scanpy$pp$neighbors(adata, n_pcs = 10L)
34 | scanpy$tl$umap(adata)
35 | scanpy$tl$louvain(adata)
36 | scanpy$tl$rank_genes_groups(adata, "louvain")
37 |
38 | adata$write_h5ad("example_anndata.h5ad")
39 |
--------------------------------------------------------------------------------
/inst/scripts/krumsiek11.md:
--------------------------------------------------------------------------------
1 | The `krumsiek11.h5ad` file contains an AnnData object with a simulated myeloid
2 | progenitor scRNA-seq dataset. It was created using the\
3 | `scanpy.datasets.krumsiek11()` function in the **scanpy** Python package and
4 | saved as a `.h5ad` file using the `adata.write()` method. This file is included
5 | in **zellkonverter** for tests and examples that require reading a `.h5ad` file
6 | from disk. More details on this dataset can be found in the **scanpy**
7 | documentation at https://scanpy.readthedocs.io/en/stable/api/scanpy.datasets.krumsiek11.html#scanpy.datasets.krumsiek11.
8 |
--------------------------------------------------------------------------------
/inst/scripts/krumsiek11_augmented.py:
--------------------------------------------------------------------------------
1 | # This script was used to create the `krumsiek11_augmented_v0-8.h5ad`
2 | # file. It adds some extra data to the previous `krumsiek11.h5ad`
3 | # dataset to cover some additional cases for testing (NAs, booleans,
4 | # etc). The data was saved in AnnData=0.8.0 format.
5 | #
6 | # Key package versions:
7 | # - anndata=0.8.0
8 | # - h5py=3.8.0
9 | # - hdf5=1.14.0
10 | # - numpy=1.23.5
11 | # - pandas=1.5.3
12 | # - python=3.9.16
13 | # - scanpy=1.9.2
14 |
15 | import numpy as np
16 | import pandas as pd
17 | import anndata as ad
18 |
19 | adata = ad.read_h5ad("krumsiek11.h5ad")
20 |
21 | # add string column to rowData/var. Make the entries unique so it's
22 | # saved as str instead of factor
23 | adata.var["dummy_str"] = [f"row{i}" for i in range(adata.shape[1])]
24 |
25 | # add float column to colData/obs
26 | adata.obs["dummy_num"] = 42.42
27 |
28 | # float column with NA
29 | adata.obs["dummy_num2"] = adata.obs["dummy_num"]
30 | adata.obs["dummy_num2"][0] = float("nan")
31 |
32 | # int column
33 | adata.obs["dummy_int"] = np.arange(adata.shape[0])
34 |
35 | # int column with NA
36 | adata.obs["dummy_int2"] = pd.array([None] + [42] * (adata.shape[0] - 1))
37 |
38 | # bool column
39 | adata.obs["dummy_bool"] = True
40 | adata.obs["dummy_bool"][0] = False
41 |
42 | # bool column with NA
43 | adata.obs["dummy_bool2"] = pd.array([False, None] + [True] * (adata.shape[0] - 2))
44 |
45 | # also add some entries to the metadata/uns
46 | adata.uns["dummy_category"] = pd.array(["a", "b", None], dtype="category")
47 |
48 | adata.uns["dummy_bool"] = [True, True, False]
49 | adata.uns["dummy_bool2"] = pd.array([True, False, None])
50 |
51 | adata.uns["dummy_int"] = [1,2,3]
52 | adata.uns["dummy_int2"] = pd.array([1,2,None])
53 |
54 | adata.write("krumsiek11_augmented_v0-8.h5ad")
55 |
--------------------------------------------------------------------------------
/longtests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(zellkonverter)
3 |
4 | test_check("zellkonverter")
5 |
--------------------------------------------------------------------------------
/longtests/testthat/test-cellrank_pancreas.R:
--------------------------------------------------------------------------------
1 | library(SingleCellExperiment)
2 | library(BiocFileCache)
3 |
4 | cache <- BiocFileCache(ask = FALSE)
5 | file <- bfcrpath(cache, "https://figshare.com/ndownloader/files/30683438")
6 | outfile <- tempfile(fileext = ".h5ad")
7 |
8 | names <- list(
9 | assays = c("X", "Ms", "Mu", "fit_t", "fit_tau", "fit_tau_", "spliced",
10 | "unspliced", "velocity", "velocity_u"),
11 | colData = c("day", "proliferation", "G2M_score", "S_score", "phase",
12 | "clusters_coarse", "clusters", "clusters_fine", "louvain_Alpha",
13 | "louvain_Beta", "palantir_pseudotime", "initial_size_spliced",
14 | "initial_size_unspliced", "initial_size", "n_counts",
15 | "velocity_self_transition", "terminal_states",
16 | "terminal_states_probs", "initial_states",
17 | "initial_states_probs", "velocity_pseudotime", "latent_time",
18 | "dpt_pseudotime"),
19 | rowData = c("highly_variable_genes", "gene_count_corr", "means",
20 | "dispersions", "dispersions_norm", "highly_variable", "fit_r2",
21 | "fit_alpha", "fit_beta", "fit_gamma", "fit_t_", "fit_scaling",
22 | "fit_std_u", "fit_std_s", "fit_likelihood", "fit_u0", "fit_s0",
23 | "fit_pval_steady", "fit_steady_u", "fit_steady_s",
24 | "fit_variance", "fit_alignment_scaling", "velocity_genes",
25 | "to.Epsilon.corr", "to.Alpha.corr", "to.Beta.corr",
26 | "to.Epsilon.qval", "to.Alpha.qval", "to.Beta.qval"),
27 | metadata = c("T_bwd_params", "clusters_colors", "clusters_fine_colors",
28 | "clusters_sizes", "diffmap_evals", "eig_bwd", "eig_fwd",
29 | "initial_states_colors", "initial_states_names", "iroot",
30 | "louvain_Alpha_colors", "louvain_Beta_colors", "neighbors",
31 | "paga", "pca", "recover_dynamics", "terminal_states_colors",
32 | "terminal_states_names", "to_terminal_states_colors",
33 | "to_terminal_states_names", "velocity_graph",
34 | "velocity_graph_neg", "velocity_params"),
35 | redDim = c("X_diffmap", "X_pca", "X_umap", "macrostates_bwd",
36 | "macrostates_fwd", "to_terminal_states", "velocity_umap"),
37 | varm = c("PCs", "loss"),
38 | colPairs = c("T_bwd", "T_fwd", "connectivities", "distances")
39 | )
40 |
41 | missing <- list()
42 |
43 | test_that("Reading H5AD works", {
44 | expect_warning(
45 | {sce <- readH5AD(file)},
46 | "The names of these selected var columns have been modified"
47 | )
48 | expect_s4_class(sce, "SingleCellExperiment")
49 | })
50 |
51 | sce <- suppressWarnings(readH5AD(file))
52 |
53 | test_that("SCE is valid", {
54 | validateH5ADSCE(sce, names, missing)
55 | })
56 |
57 | test_that("Writing H5AD works", {
58 | writeH5AD(sce, outfile)
59 | expect_true(file.exists(outfile))
60 | })
61 |
62 | test_that("Round trip is as expected", {
63 | out <- readH5AD(outfile)
64 |
65 | expectSCE(out, sce)
66 | })
67 |
--------------------------------------------------------------------------------
/longtests/testthat/test-example_anndata.R:
--------------------------------------------------------------------------------
1 | library(SingleCellExperiment)
2 |
3 | file <- system.file("extdata", "example_anndata.h5ad",
4 | package = "zellkonverter")
5 | outfile <- tempfile(fileext = ".h5ad")
6 |
7 | names <- list(
8 | assays = c("X", "counts"),
9 | colData = "louvain",
10 | rowData = c("n_counts", "highly_variable", "means", "dispersions",
11 | "dispersions_norm"),
12 | metadata = c("louvain", "neighbors", "pca", "rank_genes_groups", "umap"),
13 | redDim = c("X_pca", "X_umap"),
14 | varm = "PCs",
15 | colPairs = c("connectivities", "distances")
16 | )
17 |
18 | missing <- list()
19 |
20 | test_that("Reading H5AD works", {
21 | sce <- readH5AD(file)
22 | expect_s4_class(sce, "SingleCellExperiment")
23 | })
24 |
25 | sce <- suppressWarnings(readH5AD(file))
26 |
27 | test_that("SCE is valid", {
28 | validateH5ADSCE(sce, names, missing)
29 | })
30 |
31 | test_that("Writing H5AD works", {
32 | writeH5AD(sce, outfile)
33 | expect_true(file.exists(outfile))
34 | })
35 |
36 | test_that("Round trip is as expected", {
37 | out <- readH5AD(outfile)
38 | expectSCE(out, sce)
39 | })
40 |
--------------------------------------------------------------------------------
/longtests/testthat/test-gtex_8tissues.R:
--------------------------------------------------------------------------------
1 | library(SingleCellExperiment)
2 | library(BiocFileCache)
3 |
4 | cache <- BiocFileCache(ask = FALSE)
5 | # Available from https://www.gtexportal.org/home/downloads/adult-gtex/single_cell
6 | file <- bfcrpath(cache, "https://storage.googleapis.com/adult-gtex/single-cell/v9/snrna-seq-data/GTEx_8_tissues_snRNAseq_atlas_071421.public_obs.h5ad")
7 | outfile <- tempfile(fileext = ".h5ad")
8 |
9 | names <- list(
10 | assays = c("X", "counts"),
11 | colData = c(
12 | "n_genes", "fpr", "tissue", "prep", "individual", "nGenes", "nUMIs",
13 | "PercentMito", "PercentRibo", "Age_bin", "Sex", "Sample.ID",
14 | "Participant.ID", "Sample.ID.short",
15 | "RIN.score.from.PAXgene.tissue.Aliquot",
16 | "RIN.score.from.Frozen.tissue.Aliquot", "Autolysis.Score",
17 | "Sample.Ischemic.Time..mins.", "Tissue.Site.Detail", "scrublet",
18 | "scrublet_score", "barcode", "batch", "n_counts",
19 | "tissue.individual.prep", "Broad.cell.type", "Granular.cell.type",
20 | "introns", "junctions", "exons", "sense", "antisense", "intergenic",
21 | "batch.barcode", "exon_ratio", "intron_ratio", "junction_ratio",
22 | "log10_nUMIs", "leiden", "leiden_tissue", "Tissue.composition",
23 | "Cell.types.level.2", "Cell.types.level.3", "Broad.cell.type.numbers",
24 | "Broad.cell.type..numbers.", "Tissue", "channel"
25 | ),
26 | rowData = c(
27 | "gene_ids", "Chromosome", "Source", "Start", "End", "Strand",
28 | "gene_name", "gene_source", "gene_biotype", "gene_length",
29 | "gene_coding_length", "Approved.symbol", "Approved.name", "Status",
30 | "Previous.symbols", "Alias.symbols", "gene_include", "n_cells"
31 | ),
32 | metadata = c(
33 | "Broad.cell.type..numbers._colors", "Broad.cell.type.numbers_colors",
34 | "Broad.cell.type_colors", "Broad.cell.type_logregcv_vae_colors",
35 | "Broad.cell.type_sizes", "Granular.cell.type_colors",
36 | "Participant.ID_colors", "Sex_colors", "Tissue.composition_colors",
37 | "Tissue_colors", "dendrogram_..Broad.cell.type..", "leiden",
38 | "leiden_colors", "leiden_sub_colors", "neighbors", "paga",
39 | "prep_colors", "tissue_colors", "umap"
40 | ),
41 | redDim = c(
42 | "X_pca", "X_umap", "X_umap_tissue", "X_vae_mean", "X_vae_mean_tissue",
43 | "X_vae_samples", "X_vae_var"
44 | ),
45 | varm = c("spring_leiden_sub"),
46 | colPairs = c("connectivities", "distances")
47 | )
48 |
49 | missing <- list()
50 |
51 | test_that("Reading H5AD works", {
52 | expect_warning(
53 | {sce <- readH5AD(file)},
54 | "The names of these selected uns items have been modified"
55 | )
56 | expect_s4_class(sce, "SingleCellExperiment")
57 | })
58 |
59 | sce <- suppressWarnings(readH5AD(file))
60 |
61 | test_that("SCE is valid", {
62 | validateH5ADSCE(sce, names, missing)
63 | })
64 |
65 | test_that("Writing H5AD works", {
66 | writeH5AD(sce, outfile)
67 | expect_true(file.exists(outfile))
68 | })
69 |
70 | test_that("Round trip is as expected", {
71 | out <- readH5AD(outfile)
72 | expectSCE(out, sce)
73 | })
74 |
--------------------------------------------------------------------------------
/longtests/testthat/test-pegasus_marrow.R:
--------------------------------------------------------------------------------
1 | library(SingleCellExperiment)
2 | library(BiocFileCache)
3 |
4 | cache <- BiocFileCache(ask = FALSE)
5 | file <- bfcrpath(cache, "https://figshare.com/ndownloader/files/30682400")
6 | outfile <- tempfile(fileext = ".h5ad")
7 |
8 | names <- list(
9 | assays = c("X"),
10 | colData = c("n_genes", "Channel", "n_counts", "percent_mito", "scale",
11 | "Group", "louvain_labels", "anno"),
12 | rowData = c("featureid", "n_cells", "percent_cells", "robust",
13 | "highly_variable_features", "mean", "var", "hvf_loess",
14 | "hvf_rank"),
15 | metadata = c("Channels", "Groups", "PCs", "W_diffmap", "W_pca_harmony",
16 | "c2gid", "diffmap_evals", "diffmap_knn_distances",
17 | "diffmap_knn_indices", "genome", "gncells",
18 | "louvain_resolution", "modality", "ncells", "norm_count",
19 | "pca", "pca_features", "pca_harmony_knn_distances",
20 | "pca_harmony_knn_indices", "stdzn_max_value", "stdzn_mean",
21 | "stdzn_std"),
22 | redDim = c("X_diffmap", "X_fle", "X_pca", "X_pca_harmony", "X_phi",
23 | "X_tsne", "X_umap"),
24 | varm = c("de_res", "gmeans", "gstds", "means", "partial_sum")
25 | )
26 |
27 | missing <- list()
28 |
29 | test_that("Reading H5AD works", {
30 | sce <- readH5AD(file)
31 | expect_s4_class(sce, "SingleCellExperiment")
32 | })
33 |
34 | sce <- suppressWarnings(readH5AD(file))
35 |
36 | test_that("SCE is valid", {
37 | validateH5ADSCE(sce, names, missing)
38 | })
39 |
40 | test_that("Writing H5AD works", {
41 | writeH5AD(sce, outfile)
42 | expect_true(file.exists(outfile))
43 | })
44 |
45 | test_that("Round trip is as expected", {
46 | out <- readH5AD(outfile)
47 |
48 | expectSCE(out, sce)
49 | })
50 |
--------------------------------------------------------------------------------
/longtests/testthat/test-scIB_pancreas.R:
--------------------------------------------------------------------------------
1 | library(SingleCellExperiment)
2 | library(BiocFileCache)
3 |
4 | cache <- BiocFileCache(ask = FALSE)
5 | file <- bfcrpath(cache, "https://ndownloader.figshare.com/files/24539828")
6 | outfile <- tempfile(fileext = ".h5ad")
7 |
8 | names <- list(
9 | assays = c("X", "counts"),
10 | colData = c("tech", "celltype", "size_factors")
11 | )
12 | missing <- list()
13 |
14 | test_that("Reading H5AD works", {
15 | sce <- readH5AD(file)
16 | expect_s4_class(sce, "SingleCellExperiment")
17 | })
18 |
19 | sce <- suppressWarnings(readH5AD(file))
20 |
21 | test_that("SCE is valid", {
22 | validateH5ADSCE(sce, names, missing)
23 | })
24 |
25 | test_that("Writing H5AD works", {
26 | writeH5AD(sce, outfile)
27 | expect_true(file.exists(outfile))
28 | })
29 |
30 | test_that("Round trip is as expected", {
31 | out <- readH5AD(outfile)
32 | expectSCE(out, sce)
33 | })
34 |
--------------------------------------------------------------------------------
/longtests/testthat/test-scanpy_pbmc3k.R:
--------------------------------------------------------------------------------
1 | library(SingleCellExperiment)
2 | library(BiocFileCache)
3 |
4 | cache <- BiocFileCache(ask = FALSE)
5 | file <- bfcrpath(cache, "https://ndownloader.figshare.com/files/30462915")
6 | outfile <- tempfile(fileext = ".h5ad")
7 |
8 | names <- list(
9 | assays = c("X"),
10 | colData = c("n_genes", "n_genes_by_counts", "total_counts",
11 | "total_counts_mt", "pct_counts_mt", "leiden"),
12 | rowData = c("gene_ids", "n_cells", "mt", "n_cells_by_counts", "mean_counts",
13 | "pct_dropout_by_counts", "total_counts", "highly_variable",
14 | "means", "dispersions", "dispersions_norm", "mean", "std"),
15 | metadata = c("hvg", "leiden", "neighbors", "pca", "rank_genes_groups",
16 | "umap"),
17 | redDim = c("X_pca", "X_umap"),
18 | varm = c("PCs"),
19 | colPairs = c("connectivities", "distances")
20 | )
21 |
22 | missing <- list()
23 |
24 | test_that("Reading H5AD works", {
25 | sce <- readH5AD(file)
26 | expect_s4_class(sce, "SingleCellExperiment")
27 | })
28 |
29 | sce <- suppressWarnings(readH5AD(file))
30 |
31 | test_that("SCE is valid", {
32 | validateH5ADSCE(sce, names, missing)
33 | })
34 |
35 | test_that("Writing H5AD works", {
36 | writeH5AD(sce, outfile)
37 | expect_true(file.exists(outfile))
38 | })
39 |
40 | test_that("Round trip is as expected", {
41 | out <- readH5AD(outfile)
42 | expectSCE(out, sce)
43 | })
44 |
--------------------------------------------------------------------------------
/longtests/testthat/test-scanpy_trajectory.R:
--------------------------------------------------------------------------------
1 | library(SingleCellExperiment)
2 | library(BiocFileCache)
3 |
4 | cache <- BiocFileCache(ask = FALSE)
5 | file <- bfcrpath(cache, "https://figshare.com/ndownloader/files/30594477")
6 | outfile <- tempfile(fileext = ".h5ad")
7 |
8 | names <- list(
9 | assays = c("X"),
10 | colData = c("paul15_clusters", "n_counts_all", "louvain", "dpt_pseudotime"),
11 | rowData = c("n_counts", "mean", "std"),
12 | metadata = c("diffmap_evals", "draw_graph", "iroot", "louvain",
13 | "louvain_sizes", "neighbors", "paga", "pca"),
14 | redDim = c("X_diffmap", "X_draw_graph_fa", "X_pca"),
15 | varm = c("PCs"),
16 | colPairs = c("connectivities", "distances")
17 | )
18 |
19 | missing <- list()
20 |
21 | test_that("Reading H5AD works", {
22 | sce <- readH5AD(file)
23 | expect_s4_class(sce, "SingleCellExperiment")
24 | })
25 |
26 | sce <- suppressWarnings(readH5AD(file))
27 |
28 | test_that("SCE is valid", {
29 | validateH5ADSCE(sce, names, missing)
30 | })
31 |
32 | test_that("Writing H5AD works", {
33 | writeH5AD(sce, outfile)
34 | expect_true(file.exists(outfile))
35 | })
36 |
37 | test_that("Round trip is as expected", {
38 | out <- readH5AD(outfile)
39 | expectSCE(out, sce)
40 | })
41 |
--------------------------------------------------------------------------------
/longtests/testthat/test-scvelo_pancreas.R:
--------------------------------------------------------------------------------
1 | library(SingleCellExperiment)
2 | library(BiocFileCache)
3 |
4 | cache <- BiocFileCache(ask = FALSE)
5 | file <- bfcrpath(cache, "https://figshare.com/ndownloader/files/30595479")
6 | outfile <- tempfile(fileext = ".h5ad")
7 |
8 | names <- list(
9 | assays = c("X", "Ms", "Mu", "fit_t", "fit_tau", "fit_tau_", "spliced",
10 | "unspliced", "variance_velocity", "velocity", "velocity_u"),
11 | colData = c("clusters_coarse", "clusters", "S_score", "G2M_score",
12 | "initial_size_spliced", "initial_size_unspliced",
13 | "initial_size", "n_counts", "velocity_self_transition", "phase",
14 | "velocity_length", "velocity_confidence",
15 | "velocity_confidence_transition", "root_cells", "end_points",
16 | "velocity_pseudotime", "latent_time"),
17 | rowData = c("highly_variable_genes", "gene_count_corr", "means",
18 | "dispersions", "dispersions_norm", "highly_variable",
19 | "velocity_gamma", "velocity_qreg_ratio", "velocity_r2",
20 | "velocity_genes", "spearmans_score", "velocity_score",
21 | "fit_alpha", "fit_beta", "fit_gamma", "fit_t_", "fit_scaling",
22 | "fit_std_u", "fit_std_s", "fit_likelihood", "fit_u0", "fit_s0",
23 | "fit_pval_steady", "fit_steady_u", "fit_steady_s",
24 | "fit_variance", "fit_alignment_scaling", "fit_r2"),
25 | metadata = c("clusters_coarse_colors", "clusters_colors", "clusters_sizes",
26 | "day_colors", "neighbors", "paga", "pca",
27 | "rank_dynamical_genes", "rank_velocity_genes",
28 | "recover_dynamics", "velocity_graph", "velocity_graph_neg",
29 | "velocity_params"),
30 | redDim = c("X_pca", "X_umap", "velocity_umap"),
31 | varm = c("loss"),
32 | colPairs = c("connectivities", "distances")
33 | )
34 |
35 | missing <- list()
36 |
37 | test_that("Reading H5AD works", {
38 | sce <- readH5AD(file)
39 | expect_s4_class(sce, "SingleCellExperiment")
40 | })
41 |
42 | sce <- suppressWarnings(readH5AD(file))
43 |
44 | test_that("SCE is valid", {
45 | validateH5ADSCE(sce, names, missing)
46 | })
47 |
48 | test_that("Writing H5AD works", {
49 | writeH5AD(sce, outfile)
50 | expect_true(file.exists(outfile))
51 | })
52 |
53 | test_that("Round trip is as expected", {
54 | out <- readH5AD(outfile)
55 | expectSCE(out, sce)
56 | })
57 |
--------------------------------------------------------------------------------
/longtests/testthat/test-scvi_citeseq.R:
--------------------------------------------------------------------------------
1 | library(SingleCellExperiment)
2 | library(BiocFileCache)
3 |
4 | cache <- BiocFileCache(ask = FALSE)
5 | file <- bfcrpath(cache, "https://figshare.com/ndownloader/files/30612834")
6 | outfile <- tempfile(fileext = ".h5ad")
7 |
8 | names <- list(
9 | assays = c("X", "counts", "denoised_rna"),
10 | colData = c("n_genes", "percent_mito", "n_counts", "batch", "X_scvi_batch",
11 | "X_scvi_labels", "X_scvi_local_l_mean", "X_scvi_local_l_var",
12 | "leiden_totalVI"),
13 | rowData = c("highly_variable", "highly_variable_rank", "means", "variances",
14 | "variances_norm", "highly_variable_nbatches"),
15 | metadata = c("X_scvi", "hvg", "leiden", "neighbors", "umap"),
16 | redDim = c("X_totalVI", "X_umap", "denoised_protein",
17 | "protein_expression", "protein_foreground_prob"),
18 | colPairs = c("connectivities", "distances")
19 | )
20 |
21 | missing <- list()
22 |
23 | test_that("Reading H5AD works", {
24 | sce <- expect_warning(readH5AD(file))
25 | expect_s4_class(sce, "SingleCellExperiment")
26 | })
27 |
28 | sce <- suppressWarnings(readH5AD(file))
29 |
30 | test_that("SCE is valid", {
31 | validateH5ADSCE(sce, names, missing)
32 | })
33 |
34 | test_that("Writing H5AD works", {
35 | writeH5AD(sce, outfile)
36 | expect_true(file.exists(outfile))
37 | })
38 |
39 | test_that("Round trip is as expected", {
40 | out <- suppressWarnings(readH5AD(outfile))
41 |
42 | # For some reason "_scvi" gets changed to "X_scvi", not sure why...
43 | names(S4Vectors::metadata(sce))[1] <- "X_scvi"
44 |
45 | expectSCE(out, sce)
46 | })
47 |
--------------------------------------------------------------------------------
/longtests/testthat/test-squidpy_visium.R:
--------------------------------------------------------------------------------
1 | library(SingleCellExperiment)
2 | library(BiocFileCache)
3 |
4 | cache <- BiocFileCache(ask = FALSE)
5 | file <- bfcrpath(cache, "https://figshare.com/ndownloader/files/30639279")
6 | outfile <- tempfile(fileext = ".h5ad")
7 |
8 | names <- list(
9 | assays = c("X"),
10 | colData = c("in_tissue", "array_row", "array_col", "n_genes_by_counts",
11 | "log1p_n_genes_by_counts", "total_counts", "log1p_total_counts",
12 | "pct_counts_in_top_50_genes", "pct_counts_in_top_100_genes",
13 | "pct_counts_in_top_200_genes", "pct_counts_in_top_500_genes",
14 | "total_counts_MT", "log1p_total_counts_MT", "pct_counts_MT",
15 | "n_counts", "leiden", "cluster", "features_summary_cluster",
16 | "features_histogram_cluster", "features_texture_cluster"),
17 | rowData = c("gene_ids", "feature_types", "genome", "MT",
18 | "n_cells_by_counts", "mean_counts", "log1p_mean_counts",
19 | "pct_dropout_by_counts", "total_counts", "log1p_total_counts",
20 | "n_cells", "highly_variable", "highly_variable_rank", "means",
21 | "variances", "variances_norm"),
22 | metadata = c("cluster_co_occurrence", "cluster_colors", "cluster_ligrec",
23 | "cluster_nhood_enrichment", "hvg", "leiden", "leiden_colors",
24 | "moranI", "neighbors", "pca", "spatial", "spatial_neighbors",
25 | "umap"),
26 | redDim = c("X_pca", "X_umap", "features", "features_context",
27 | "features_lowres", "features_orig", "features_segmentation",
28 | "spatial"),
29 | varm = c("PCs"),
30 | colPairs = c("connectivities", "distances", "spatial_connectivities",
31 | "spatial_distances")
32 | )
33 |
34 | missing <- list()
35 |
36 | test_that("Reading H5AD works", {
37 | sce <- readH5AD(file)
38 | expect_s4_class(sce, "SingleCellExperiment")
39 | })
40 |
41 | sce <- suppressWarnings(readH5AD(file))
42 |
43 | test_that("SCE is valid", {
44 | validateH5ADSCE(sce, names, missing)
45 | })
46 |
47 | test_that("Writing H5AD works", {
48 | writeH5AD(sce, outfile)
49 | expect_true(file.exists(outfile))
50 | })
51 |
52 | test_that("Round trip is as expected", {
53 | out <- readH5AD(outfile)
54 |
55 | expectSCE(out, sce)
56 | })
57 |
--------------------------------------------------------------------------------
/man/AnnData-Conversion.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/AnnData2SCE.R, R/SCE2AnnData.R
3 | \name{AnnData-Conversion}
4 | \alias{AnnData-Conversion}
5 | \alias{AnnData2SCE}
6 | \alias{SCE2AnnData}
7 | \title{Convert between AnnData and SingleCellExperiment}
8 | \usage{
9 | AnnData2SCE(
10 | adata,
11 | X_name = NULL,
12 | layers = TRUE,
13 | uns = TRUE,
14 | var = TRUE,
15 | obs = TRUE,
16 | varm = TRUE,
17 | obsm = TRUE,
18 | varp = TRUE,
19 | obsp = TRUE,
20 | raw = FALSE,
21 | skip_assays = FALSE,
22 | hdf5_backed = TRUE,
23 | verbose = NULL
24 | )
25 |
26 | SCE2AnnData(
27 | sce,
28 | X_name = NULL,
29 | assays = TRUE,
30 | colData = TRUE,
31 | rowData = TRUE,
32 | varm = TRUE,
33 | reducedDims = TRUE,
34 | metadata = TRUE,
35 | colPairs = TRUE,
36 | rowPairs = TRUE,
37 | skip_assays = FALSE,
38 | verbose = NULL
39 | )
40 | }
41 | \arguments{
42 | \item{adata}{A \strong{reticulate} reference to a Python AnnData object.}
43 |
44 | \item{X_name}{For \code{SCE2AnnData()} name of the assay to use as the primary
45 | matrix (\code{X}) of the AnnData object. If \code{NULL}, the first assay of \code{sce} will
46 | be used by default. For \code{AnnData2SCE()} name used when saving \code{X} as an
47 | assay. If \code{NULL} looks for an \code{X_name} value in \code{uns}, otherwise uses \code{"X"}.}
48 |
49 | \item{layers, uns, var, obs, varm, obsm, varp, obsp, raw}{Arguments specifying how
50 | these slots are converted. If \code{TRUE} everything in that slot is converted, if
51 | \code{FALSE} nothing is converted and if a character vector only those items or
52 | columns are converted.}
53 |
54 | \item{skip_assays}{Logical scalar indicating whether to skip conversion of
55 | any assays in \code{sce} or \code{adata}, replacing them with empty sparse matrices
56 | instead.}
57 |
58 | \item{hdf5_backed}{Logical scalar indicating whether HDF5-backed matrices
59 | in \code{adata} should be represented as HDF5Array objects. This assumes that
60 | \code{adata} is created with \code{backed="r"}.}
61 |
62 | \item{verbose}{Logical scalar indicating whether to print progress messages.
63 | If \code{NULL} uses \code{getOption("zellkonverter.verbose")}.}
64 |
65 | \item{sce}{A \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment}
66 | object.}
67 |
68 | \item{assays, colData, rowData, reducedDims, metadata, colPairs, rowPairs}{Arguments specifying how these slots are converted. If \code{TRUE} everything in
69 | that slot is converted, if \code{FALSE} nothing is converted and if a character
70 | vector only those items or columns are converted.}
71 | }
72 | \value{
73 | \code{AnnData2SCE()} will return a
74 | \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment}
75 | containing the equivalent data from \code{adata}.
76 |
77 | \code{SCE2AnnData()} will return a \strong{reticulate} reference to an AnnData object
78 | containing the content of \code{sce}.
79 | }
80 | \description{
81 | Conversion between Python AnnData objects and
82 | \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment}
83 | objects.
84 | }
85 | \details{
86 | \subsection{Python environment}{
87 |
88 | These functions assume that an appropriate Python environment has already
89 | been loaded. As such, they are largely intended for developer use, most
90 | typically inside a \strong{basilisk} context.
91 | }
92 |
93 | \subsection{Conversion mapping}{
94 |
95 | The conversion is not entirely lossless. The current mapping is shown below
96 | (also at \url{https://tinyurl.com/AnnData2SCE}):
97 |
98 | \if{html}{
99 | \figure{AnnData2SCE.png}{options: width=800, alt="SCE-AnnData map"}
100 | }
101 | \if{latex}{\figure{AnnData2SCE.png}{options: width=5in}}
102 | }
103 |
104 | \subsection{Matrix conversion}{
105 |
106 | In \code{SCE2AnnData()}, matrices are converted to a \strong{numpy}-friendly format.
107 | Sparse matrices are converted to
108 | \link[Matrix:dgCMatrix-class]{Matrix::dgCMatrix} objects while all
109 | other matrices are converted into ordinary matrices. If \code{skip_assays = TRUE},
110 | empty sparse matrices are created instead and the user is expected to fill in
111 | the assays on the Python side.
112 |
113 | For \code{AnnData2SCE()}, a warning is raised if there is no corresponding R
114 | format for a matrix in the \code{AnnData} object, and an empty sparse matrix is
115 | created instead as a placeholder. If \code{skip_assays = NA}, no warning is
116 | emitted but variables are created in the
117 | \code{\link[SingleCellExperiment:internals]{int_metadata()}} of the output to
118 | specify which assays were skipped.
119 |
120 | If \code{skip_assays = TRUE}, empty sparse matrices are created for all assays,
121 | regardless of whether they might be convertible to an R format or not.
122 | In both cases, the user is expected to fill in the assays on the R side.
123 | }
124 |
125 | \subsection{\code{metadata}/\code{uns} conversion}{
126 |
127 | We attempt to convert between items in the
128 | \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment}
129 | \code{\link[S4Vectors:Annotated-class]{metadata()}} slot and the \code{AnnData} \code{uns} slot. If
130 | an item cannot be converted a warning will be raised.
131 | }
132 |
133 | \subsection{\code{uns} conversion}{
134 |
135 | Values stored in the \code{varm} slot of an \code{AnnData} object are stored in a
136 | column of \code{\link[SummarizedExperiment:SummarizedExperiment-class]{rowData()}} in a
137 | \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment}
138 | as a \link[S4Vectors:DataFrame-class]{S4Vectors::DataFrame-class} of matrices.
139 | If this column is present an attempt is made to transfer this information
140 | when converting from
141 | \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment}
142 | to \code{AnnData}.
143 | }
144 |
145 | \subsection{\code{SpatialExperiment} conversion}{
146 |
147 | In \code{SCE2AnnData()}, if \code{sce} is a \link[SpatialExperiment:SpatialExperiment-class]{SpatialExperiment::SpatialExperiment}
148 | object, the spatial coordinates are added to the \code{reducedDims} slot before
149 | conversion to an \code{AnnData} object.
150 | }
151 | }
152 | \examples{
153 | if (requireNamespace("scRNAseq", quietly = TRUE)) {
154 | library(basilisk)
155 | library(scRNAseq)
156 | seger <- SegerstolpePancreasData()
157 |
158 | # These functions are designed to be run inside
159 | # a specified Python environment
160 | roundtrip <- basiliskRun(fun = function(sce) {
161 | # Convert SCE to AnnData:
162 | adata <- zellkonverter::SCE2AnnData(sce)
163 |
164 | # Maybe do some work in Python on 'adata':
165 | # BLAH BLAH BLAH
166 |
167 | # Convert back to an SCE:
168 | zellkonverter::AnnData2SCE(adata)
169 | }, env = zellkonverterAnnDataEnv(), sce = seger)
170 | }
171 | }
172 | \seealso{
173 | \code{\link[=writeH5AD]{writeH5AD()}} and \code{\link[=readH5AD]{readH5AD()}} for dealing directly with H5AD files.
174 | }
175 | \author{
176 | Luke Zappia
177 |
178 | Aaron Lun
179 | }
180 |
--------------------------------------------------------------------------------
/man/AnnData-Environment.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/basilisk.R
3 | \docType{data}
4 | \name{AnnData-Environment}
5 | \alias{AnnData-Environment}
6 | \alias{.AnnDataVersions}
7 | \alias{AnnDataDependencies}
8 | \alias{zellkonverterAnnDataEnv}
9 | \title{AnnData environment}
10 | \format{
11 | For \code{.AnnDataVersions} a character vector containing allowed \strong{anndata}
12 | version strings.
13 | }
14 | \usage{
15 | .AnnDataVersions
16 |
17 | AnnDataDependencies(version = .AnnDataVersions)
18 |
19 | zellkonverterAnnDataEnv(version = .AnnDataVersions)
20 | }
21 | \arguments{
22 | \item{version}{A string giving the version of the \strong{anndata} Python library
23 | to use. Allowed values are available in \code{.AnnDataVersions}. By default the
24 | latest version is used.}
25 | }
26 | \value{
27 | For \code{AnnDataDependencies} a character vector containing the pinned versions
28 | of all Python packages to be used by \code{zellkonverterAnnDataEnv()}.
29 |
30 | For \code{zellkonverterAnnDataEnv} a \code{\link[basilisk:BasiliskEnvironment-class]{basilisk::BasiliskEnvironment()}} containing
31 | \strong{zellkonverter}'s AnnData Python environment.
32 | }
33 | \description{
34 | The Python environment used by \strong{zellkonverter} for interfacing with the
35 | \strong{anndata} Python library (and H5AD files) is described by the dependencies
36 | in returned by \code{AnnDataDependencies()}. The \code{zellkonverterAnnDataEnv()}
37 | functions returns the \code{\link[basilisk:BasiliskEnvironment-class]{basilisk::BasiliskEnvironment()}} containing these
38 | dependencies used by \strong{zellkonverter}. Allowed versions of \strong{anndata} are
39 | available in \code{.AnnDataVersions}.
40 | }
41 | \details{
42 | \subsection{Using Python environments}{
43 |
44 | When a \strong{zellkonverter} is first run a conda environment containing all of
45 | the necessary dependencies for that version with be instantiated. This will
46 | not be performed on any subsequent run or if any other \strong{zellkonverter}
47 | function has been run prior with the same environment version.
48 |
49 | By default the \strong{zellkonverter} conda environment will become the shared R
50 | Python environment if one does not already exist. When one does exist (for
51 | example when a \strong{zellkonverter} function has already been run using a
52 | a different environment version) then a separate environment will be used.
53 | See \code{\link[basilisk:basiliskOptions]{basilisk::setBasiliskShared()}} for more information on this behaviour.
54 | Note the when the environment is not shared progress messages are lost.
55 | }
56 |
57 | \subsection{Development}{
58 |
59 | The \code{AnnDataDependencies()} function is exposed for use by other package
60 | developers who want an easy way to define the dependencies required for
61 | creating a Python environment to work with AnnData objects, most typically
62 | within a \strong{basilisk} context. For example, we can simply combine this
63 | vector with additional dependencies to create a \strong{basilisk} environment with
64 | Python package versions that are consistent with those in \strong{zellkonverter}.
65 |
66 | If you want to run code in the exact environment used by \strong{zellkonverter}
67 | this can be done using \code{zellkonverterAnnDataEnv()} in combination with
68 | \code{\link[basilisk:basiliskStart]{basilisk::basiliskStart()}} and/or \code{\link[basilisk:basiliskStart]{basilisk::basiliskRun()}}. Please refer to
69 | the \strong{basilisk} documentation for more information on using these
70 | environments.
71 | }
72 | }
73 | \examples{
74 | .AnnDataVersions
75 |
76 | AnnDataDependencies()
77 | AnnDataDependencies(version = "0.7.6")
78 |
79 | cl <- basilisk::basiliskStart(zellkonverterAnnDataEnv())
80 | anndata <- reticulate::import("anndata")
81 | basilisk::basiliskStop(cl)
82 | }
83 | \author{
84 | Luke Zappia
85 |
86 | Aaron Lun
87 | }
88 | \keyword{datasets}
89 |
--------------------------------------------------------------------------------
/man/expectSCE.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/validation.R
3 | \name{expectSCE}
4 | \alias{expectSCE}
5 | \title{Expect SCE}
6 | \usage{
7 | expectSCE(sce, expected)
8 | }
9 | \arguments{
10 | \item{sce}{A \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment}
11 | object.}
12 |
13 | \item{expected}{A template \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment}
14 | object to compare to.}
15 | }
16 | \value{
17 | \code{TRUE} invisibly if checks pass
18 | }
19 | \description{
20 | Test that a SingleCellExperiment matches an expected object. Designed to be
21 | used inside \code{testhat::test_that()} during package testing.
22 | }
23 | \author{
24 | Luke Zappia
25 | }
26 |
--------------------------------------------------------------------------------
/man/figures/AnnData2SCE.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theislab/zellkonverter/c17a17220088ff880d512c392d5de4aacb9e9bb1/man/figures/AnnData2SCE.png
--------------------------------------------------------------------------------
/man/figures/zellkonverter.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theislab/zellkonverter/c17a17220088ff880d512c392d5de4aacb9e9bb1/man/figures/zellkonverter.png
--------------------------------------------------------------------------------
/man/r-py-conversion.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/reticulate.R
3 | \name{r-py-conversion}
4 | \alias{r-py-conversion}
5 | \alias{py_to_r.numpy.ndarray}
6 | \title{Convert between Python and R objects}
7 | \usage{
8 | \method{py_to_r}{numpy.ndarray}(x)
9 | }
10 | \arguments{
11 | \item{x}{A Python object.}
12 | }
13 | \value{
14 | An \R object, as converted from the Python object.
15 | }
16 | \description{
17 | Convert between Python and R objects
18 | }
19 | \details{
20 | These functions are extensions of the default conversion functions in the
21 | \code{reticulate} package for the following reasons:
22 | \itemize{
23 | \item \code{numpy.ndarray} - Handle conversion of \strong{numpy} recarrays
24 | \item \code{pandas.core.arrays.masked.BaseMaskedArray} - Handle conversion of
25 | \strong{pandas} arrays (used when by \code{AnnData} objects when there are missing
26 | values)
27 | \item \code{pandas.core.arrays.categorical.Categorical} - Handle conversion of
28 | \strong{pandas} categorical arrays
29 | }
30 | }
31 | \seealso{
32 | \code{\link[reticulate:r-py-conversion]{reticulate::py_to_r()}} for the base \code{reticulate} functions
33 | }
34 | \author{
35 | Luke Zappia
36 | }
37 |
--------------------------------------------------------------------------------
/man/readH5AD.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/read.R
3 | \name{readH5AD}
4 | \alias{readH5AD}
5 | \title{Read H5AD}
6 | \usage{
7 | readH5AD(
8 | file,
9 | X_name = NULL,
10 | use_hdf5 = FALSE,
11 | reader = c("python", "R"),
12 | version = NULL,
13 | verbose = NULL,
14 | ...
15 | )
16 | }
17 | \arguments{
18 | \item{file}{String containing a path to a \code{.h5ad} file.}
19 |
20 | \item{X_name}{Name used when saving \code{X} as an assay. If \code{NULL} looks for an
21 | \code{X_name} value in \code{uns}, otherwise uses \code{"X"}.}
22 |
23 | \item{use_hdf5}{Logical scalar indicating whether assays should be
24 | loaded as HDF5-based matrices from the \strong{HDF5Array} package.}
25 |
26 | \item{reader}{Which HDF5 reader to use. Either \code{"python"} for reading with
27 | the \strong{anndata} Python package via \strong{reticulate} or \code{"R"} for
28 | \strong{zellkonverter}'s native R reader.}
29 |
30 | \item{version}{A string giving the version of the \strong{anndata} Python library
31 | to use. Allowed values are available in \code{.AnnDataVersions}. By default the
32 | latest version is used.}
33 |
34 | \item{verbose}{Logical scalar indicating whether to print progress messages.
35 | If \code{NULL} uses \code{getOption("zellkonverter.verbose")}.}
36 |
37 | \item{...}{
38 | Arguments passed on to \code{\link[=AnnData2SCE]{AnnData2SCE}}
39 | \describe{
40 | \item{\code{layers,uns,var,obs,varm,obsm,varp,obsp,raw}}{Arguments specifying how
41 | these slots are converted. If \code{TRUE} everything in that slot is converted, if
42 | \code{FALSE} nothing is converted and if a character vector only those items or
43 | columns are converted.}
44 | \item{\code{skip_assays}}{Logical scalar indicating whether to skip conversion of
45 | any assays in \code{sce} or \code{adata}, replacing them with empty sparse matrices
46 | instead.}
47 | }}
48 | }
49 | \value{
50 | A \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment}
51 | object is returned.
52 | }
53 | \description{
54 | Reads a H5AD file and returns a
55 | \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment}
56 | object.
57 | }
58 | \details{
59 | Setting \code{use_hdf5 = TRUE} allows for very large datasets to be efficiently
60 | represented on machines with little memory. However, this comes at the cost
61 | of access speed as data needs to be fetched from the HDF5 file upon request.
62 |
63 | Setting \code{reader = "R"} will use an experimental native R reader instead of
64 | reading the file into Python and converting the result. This avoids the need
65 | for a Python environment and some of the issues with conversion but is still
66 | under development and is likely to return slightly different output.
67 |
68 | See \link{AnnData-Environment} for more details on \strong{zellkonverter} Python
69 | environments.
70 | }
71 | \examples{
72 | library(SummarizedExperiment)
73 |
74 | file <- system.file("extdata", "krumsiek11.h5ad", package = "zellkonverter")
75 | sce <- readH5AD(file)
76 | class(assay(sce))
77 |
78 | sce2 <- readH5AD(file, use_hdf5 = TRUE)
79 | class(assay(sce2))
80 |
81 | sce3 <- readH5AD(file, reader = "R")
82 | }
83 | \seealso{
84 | \code{\link[=writeH5AD]{writeH5AD()}}, to write a
85 | \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment}
86 | object to a H5AD file.
87 |
88 | \code{\link[=AnnData2SCE]{AnnData2SCE()}}, for developers to convert existing AnnData instances to a
89 | \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment}.
90 | }
91 | \author{
92 | Luke Zappia
93 |
94 | Aaron Lun
95 | }
96 |
--------------------------------------------------------------------------------
/man/setZellkonverterVerbose.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/ui.R
3 | \name{setZellkonverterVerbose}
4 | \alias{setZellkonverterVerbose}
5 | \title{Set zellkonverter verbose}
6 | \usage{
7 | setZellkonverterVerbose(verbose = TRUE)
8 | }
9 | \arguments{
10 | \item{verbose}{Logical value for the verbosity option.}
11 | }
12 | \value{
13 | The value of getOption("zellkonverter.verbose") invisibly
14 | }
15 | \description{
16 | Set the zellkonverter verbosity option
17 | }
18 | \details{
19 | Running \code{setZellkonverterVerbose(TRUE)} will turn on \strong{zellkonverter}
20 | progress messages by default without having to set \code{verbose = TRUE} in each
21 | function call. This is done by setting the \code{"zellkonverter.verbose"} option.
22 | Running \code{setZellkonverterVerbose(FALSE)} will turn default verbosity off.
23 | }
24 | \examples{
25 | current <- getOption("zellkonverter.verbose")
26 | setZellkonverterVerbose(TRUE)
27 | getOption("zellkonverter.verbose")
28 | setZellkonverterVerbose(FALSE)
29 | getOption("zellkonverter.verbose")
30 | setZellkonverterVerbose(current)
31 | getOption("zellkonverter.verbose")
32 | }
33 |
--------------------------------------------------------------------------------
/man/validateH5ADSCE.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/validation.R
3 | \name{validateH5ADSCE}
4 | \alias{validateH5ADSCE}
5 | \title{Validate H5AD SCE}
6 | \usage{
7 | validateH5ADSCE(sce, names, missing)
8 | }
9 | \arguments{
10 | \item{sce}{A \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment}
11 | object.}
12 |
13 | \item{names}{Named list of expected names. Names are slots and values are
14 | vectors of names that are expected to exist in that slot.}
15 |
16 | \item{missing}{Named list of known missing names. Names are slots and values
17 | are vectors of names that are expected to not exist in that slot.}
18 | }
19 | \value{
20 | If checks are successful \code{TRUE} invisibly, if not other output
21 | depending on the context
22 | }
23 | \description{
24 | Validate a SingleCellExperiment created by \code{readH5AD()}. Designed to be used
25 | inside \code{testhat::test_that()} during package testing.
26 | }
27 | \details{
28 | This function checks that a SingleCellExperiment contains the expected items
29 | in each slot. The main reason for this function is avoid repeating code when
30 | testing multiple \code{.h5ad} files. The following items in \code{names} and \code{missing}
31 | are recognised:
32 | \itemize{
33 | \item \code{assays} - Assay names
34 | \item \code{colData} - colData column names
35 | \item \code{rowData} - rowData column names
36 | \item \code{metadata} - metadata names
37 | \item \code{redDim} - Reduced dimension names
38 | \item \code{varm} - Column names of the \code{varm} rowData column (from the AnnData varm
39 | slot)
40 | \item \code{colPairs} - Column pair names
41 | \item \code{rowPairs} - rowData pair names
42 | \item \code{raw_rowData} - rowData columns names in the \code{raw} altExp
43 | \item \code{raw_varm} - Column names of the raw \code{varm} rowData column (from the
44 | AnnData varm slot)
45 | }
46 |
47 | If an item in \code{names} or \code{missing} is \code{NULL} then it won't be checked. The
48 | items in \code{missing} are checked that they explicitly do not exist. This is
49 | mostly for record keeping when something is known to not be converted but can
50 | also be useful when the corresponding \code{names} item is \code{NULL}.
51 | }
52 | \author{
53 | Luke Zappia
54 | }
55 |
--------------------------------------------------------------------------------
/man/writeH5AD.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/write.R
3 | \name{writeH5AD}
4 | \alias{writeH5AD}
5 | \title{Write H5AD}
6 | \usage{
7 | writeH5AD(
8 | sce,
9 | file,
10 | X_name = NULL,
11 | skip_assays = FALSE,
12 | compression = c("none", "gzip", "lzf"),
13 | version = NULL,
14 | verbose = NULL,
15 | ...
16 | )
17 | }
18 | \arguments{
19 | \item{sce}{A \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment}
20 | object.}
21 |
22 | \item{file}{String containing a path to write the new \code{.h5ad} file.}
23 |
24 | \item{X_name}{Name of the assay to use as the primary matrix (\code{X}) of the
25 | AnnData object. If \code{NULL}, the first assay of \code{sce} will be used by default.}
26 |
27 | \item{skip_assays}{Logical scalar indicating whether assay matrices should
28 | be ignored when writing to \code{file}.}
29 |
30 | \item{compression}{Type of compression when writing the new \code{.h5ad} file.}
31 |
32 | \item{version}{A string giving the version of the \strong{anndata} Python library
33 | to use. Allowed values are available in \code{.AnnDataVersions}. By default the
34 | latest version is used.}
35 |
36 | \item{verbose}{Logical scalar indicating whether to print progress messages.
37 | If \code{NULL} uses \code{getOption("zellkonverter.verbose")}.}
38 |
39 | \item{...}{
40 | Arguments passed on to \code{\link[=SCE2AnnData]{SCE2AnnData}}
41 | \describe{
42 | \item{\code{assays,colData,rowData,reducedDims,metadata,colPairs,rowPairs}}{Arguments specifying how these slots are converted. If \code{TRUE} everything in
43 | that slot is converted, if \code{FALSE} nothing is converted and if a character
44 | vector only those items or columns are converted.}
45 | }}
46 | }
47 | \value{
48 | A \code{NULL} is invisibly returned.
49 | }
50 | \description{
51 | Write a H5AD file from a
52 | \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment}
53 | object.
54 | }
55 | \details{
56 | \subsection{Skipping assays}{
57 |
58 | Setting \code{skip_assays = TRUE} can occasionally be useful if the matrices in
59 | \code{sce} are stored in a format that is not amenable for efficient conversion
60 | to a \strong{numpy}-compatible format. In such cases, it can be better to create
61 | an empty placeholder dataset in \code{file} and fill it in R afterwards.
62 | }
63 |
64 | \subsection{\strong{DelayedArray} assays}{
65 |
66 | If \code{sce} contains any \strong{DelayedArray} matrices as assays \code{writeH5AD()} will
67 | write them to disk using the \strong{rhdf5} package directly rather than via
68 | Python to avoid instantiating them in memory. However there is currently
69 | an issue which prevents this being done for sparse \strong{DelayedArray} matrices.
70 | }
71 |
72 | \subsection{Known conversion issues}{
73 | \subsection{Coercion to factors}{
74 |
75 | The \strong{anndata} package automatically converts some character vectors to
76 | factors when saving \code{.h5ad} files. This can effect columns of \code{rowData(sce)}
77 | and \code{colData(sce)} which may change type when the \code{.h5ad} file is read back
78 | into R.
79 | }
80 |
81 | }
82 |
83 | \subsection{Environment}{
84 |
85 | See \link{AnnData-Environment} for more details on \strong{zellkonverter} Python
86 | environments.
87 | }
88 | }
89 | \examples{
90 | # Using the Zeisel brain dataset
91 | if (requireNamespace("scRNAseq", quietly = TRUE)) {
92 | library(scRNAseq)
93 | sce <- ZeiselBrainData()
94 |
95 | # Writing to a H5AD file
96 | temp <- tempfile(fileext = ".h5ad")
97 | writeH5AD(sce, temp)
98 | }
99 | }
100 | \seealso{
101 | \code{\link[=readH5AD]{readH5AD()}}, to read a
102 | \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment}
103 | object from a H5AD file.
104 |
105 | \code{\link[=SCE2AnnData]{SCE2AnnData()}}, for developers to create an AnnData object from a
106 | \link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment::SingleCellExperiment}.
107 | }
108 | \author{
109 | Luke Zappia
110 |
111 | Aaron Lun
112 | }
113 |
--------------------------------------------------------------------------------
/man/zellkonverter-package.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/zellkonverter-package.R
3 | \docType{package}
4 | \name{zellkonverter-package}
5 | \alias{zellkonverter}
6 | \alias{zellkonverter-package}
7 | \title{zellkonverter: Conversion Between scRNA-seq Objects}
8 | \description{
9 | Provides methods to convert between Python AnnData objects and SingleCellExperiment objects. These are primarily intended for use by downstream Bioconductor packages that wrap Python methods for single-cell data analysis. It also includes functions to read and write H5AD files used for saving AnnData objects to disk.
10 | }
11 | \seealso{
12 | Useful links:
13 | \itemize{
14 | \item \url{https://github.com/theislab/zellkonverter}
15 | \item Report bugs at \url{https://github.com/theislab/zellkonverter/issues}
16 | }
17 |
18 | }
19 | \author{
20 | \strong{Maintainer}: Luke Zappia \email{luke@lazappi.id.au} (\href{https://orcid.org/0000-0001-7744-8565}{ORCID})
21 |
22 | Authors:
23 | \itemize{
24 | \item Aaron Lun \email{infinite.monkeys.with.keyboards@gmail.com} (\href{https://orcid.org/0000-0002-3564-4813}{ORCID})
25 | }
26 |
27 | Other contributors:
28 | \itemize{
29 | \item Jack Kamm \email{jackkamm@gmail.com} (\href{https://orcid.org/0000-0003-2412-756X}{ORCID}) [contributor]
30 | \item Robrecht Cannoodt \email{rcannood@gmail.com} (\href{https://orcid.org/0000-0003-3641-729X}{ORCID}) (rcannood) [contributor]
31 | \item Gabriel Hoffman \email{gabriel.hoffman@mssm.edu} (\href{https://orcid.org/0000-0002-0957-0224}{ORCID}) (GabrielHoffman) [contributor]
32 | \item Marek Cmero \email{cmero.ma@wehi.edu.au} (\href{https://orcid.org/0000-0001-7783-5530}{ORCID}) (mcmero) [contributor]
33 | }
34 |
35 | }
36 |
--------------------------------------------------------------------------------
/tests/spelling.R:
--------------------------------------------------------------------------------
1 | if (requireNamespace("spelling", quietly = TRUE)) {
2 | spelling::spell_check_test(
3 | vignettes = TRUE,
4 | error = FALSE,
5 | skip_on_cran = TRUE
6 | )
7 | }
8 |
--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(zellkonverter)
3 |
4 | test_check("zellkonverter")
5 |
--------------------------------------------------------------------------------
/tests/testthat/default.profraw:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theislab/zellkonverter/c17a17220088ff880d512c392d5de4aacb9e9bb1/tests/testthat/default.profraw
--------------------------------------------------------------------------------
/tests/testthat/test-SCE2AnnData.R:
--------------------------------------------------------------------------------
1 | test_that(".makeNumpyFriendly() works correctly", {
2 | mat <- matrix(1:50, nrow = 10, ncol = 5)
3 |
4 | friendly_mat <- .makeNumpyFriendly(mat, transpose = TRUE)
5 | expect_identical(friendly_mat, t(mat))
6 | expect_identical(dim(friendly_mat), rev(dim(mat)))
7 |
8 | friendly_mat <- .makeNumpyFriendly(mat, transpose = FALSE)
9 | expect_identical(friendly_mat, mat)
10 | expect_identical(dim(friendly_mat), dim(mat))
11 |
12 | sparse_mat <- Matrix::Matrix(mat, sparse = TRUE)
13 | friendly_sparse_mat <- .makeNumpyFriendly(sparse_mat, transpose = TRUE)
14 | expect_s4_class(friendly_sparse_mat, "dgRMatrix")
15 | expect_identical(dim(friendly_sparse_mat), rev(dim(sparse_mat)))
16 |
17 | friendly_sparse_mat <- .makeNumpyFriendly(sparse_mat, transpose = FALSE)
18 | expect_s4_class(friendly_sparse_mat, "dgCMatrix")
19 | expect_identical(dim(friendly_sparse_mat), dim(sparse_mat))
20 |
21 | delayed_mat <- DelayedArray::DelayedArray(mat)
22 | friendly_delayed_mat <- .makeNumpyFriendly(delayed_mat, transpose = TRUE)
23 | expect_identical(friendly_delayed_mat, t(mat))
24 | expect_identical(dim(friendly_delayed_mat), rev(dim(mat)))
25 |
26 | friendly_delayed_mat <- .makeNumpyFriendly(delayed_mat, transpose = FALSE)
27 | expect_identical(friendly_delayed_mat, mat)
28 | expect_identical(dim(friendly_delayed_mat), dim(mat))
29 |
30 | sparse_delayed_mat <- DelayedArray::DelayedArray(sparse_mat)
31 | friendly_sparse_delayed_mat <- .makeNumpyFriendly(sparse_delayed_mat, transpose = TRUE)
32 | expect_s4_class(friendly_sparse_delayed_mat, "dgRMatrix")
33 | expect_identical(dim(friendly_sparse_delayed_mat), rev(dim(sparse_delayed_mat)))
34 |
35 | friendly_sparse_delayed_mat <- .makeNumpyFriendly(sparse_delayed_mat, transpose = FALSE)
36 | expect_s4_class(friendly_sparse_delayed_mat, "dgCMatrix")
37 | expect_identical(dim(friendly_sparse_delayed_mat), dim(sparse_delayed_mat))
38 | })
39 |
--------------------------------------------------------------------------------
/tests/testthat/test-read.R:
--------------------------------------------------------------------------------
1 | # This tests the readH5AD function (and by implication, SCE2AnnData).
2 | library(SummarizedExperiment)
3 | file <- system.file("extdata", "krumsiek11.h5ad", package = "zellkonverter")
4 | file_example <- system.file("extdata", "example_anndata.h5ad", package = "zellkonverter")
5 | file_v08 <- system.file("extdata", "krumsiek11_augmented_v0-8.h5ad", package = "zellkonverter")
6 |
7 | test_that("Reading H5AD works", {
8 | sce <- readH5AD(file)
9 | expect_s4_class(sce, "SingleCellExperiment")
10 |
11 | expect_identical(assayNames(sce), "X")
12 | expect_identical(colnames(colData(sce)), "cell_type")
13 | })
14 |
15 | test_that("Reading example H5AD works", {
16 | names <- list(
17 | assays = c("X", "counts"),
18 | colData = "louvain",
19 | rowData = c(
20 | "n_counts", "highly_variable", "means", "dispersions",
21 | "dispersions_norm"
22 | ),
23 | metadata = c("louvain", "neighbors", "pca", "rank_genes_groups", "umap"),
24 | redDim = c("X_pca", "X_umap"),
25 | varm = "PCs",
26 | colPairs = c("connectivities", "distances")
27 | )
28 | missing <- list()
29 |
30 | sce <- expect_silent(readH5AD(file_example))
31 | expect_s4_class(sce, "SingleCellExperiment")
32 |
33 | validateH5ADSCE(sce, names, missing)
34 | })
35 |
36 | test_that("Reading H5AD works with version 0.10.9", {
37 | sce <- readH5AD(file, version = "0.10.9")
38 | expect_s4_class(sce, "SingleCellExperiment")
39 |
40 | expect_identical(assayNames(sce), "X")
41 | expect_identical(colnames(colData(sce)), "cell_type")
42 | })
43 |
44 | test_that("Reading H5AD works with version 0.10.6", {
45 | sce <- readH5AD(file, version = "0.10.6")
46 | expect_s4_class(sce, "SingleCellExperiment")
47 |
48 | expect_identical(assayNames(sce), "X")
49 | expect_identical(colnames(colData(sce)), "cell_type")
50 | })
51 |
52 | test_that("Reading H5AD works with version 0.10.2", {
53 | sce <- readH5AD(file, version = "0.10.2")
54 | expect_s4_class(sce, "SingleCellExperiment")
55 |
56 | expect_identical(assayNames(sce), "X")
57 | expect_identical(colnames(colData(sce)), "cell_type")
58 | })
59 |
60 | test_that("Reading H5AD works with version 0.9.2", {
61 | sce <- readH5AD(file, version = "0.9.2")
62 | expect_s4_class(sce, "SingleCellExperiment")
63 |
64 | expect_identical(assayNames(sce), "X")
65 | expect_identical(colnames(colData(sce)), "cell_type")
66 | })
67 |
68 | test_that("Reading H5AD works with version 0.8.0", {
69 | sce <- readH5AD(file, version = "0.8.0")
70 | expect_s4_class(sce, "SingleCellExperiment")
71 |
72 | expect_identical(assayNames(sce), "X")
73 | expect_identical(colnames(colData(sce)), "cell_type")
74 | })
75 |
76 | test_that("Reading H5AD works with version 0.7.6", {
77 | # Python 3.7 is not available for aarch64
78 | skip_on_os("mac", arch = "aarch64")
79 | skip_on_os("linux", arch = "aarch64")
80 |
81 | sce <- readH5AD(file, version = "0.7.6")
82 | expect_s4_class(sce, "SingleCellExperiment")
83 |
84 | expect_identical(assayNames(sce), "X")
85 | expect_identical(colnames(colData(sce)), "cell_type")
86 | })
87 |
88 | test_that("Reading H5AD works with verbose=TRUE", {
89 | sce <- readH5AD(file, verbose = TRUE)
90 | expect_s4_class(sce, "SingleCellExperiment")
91 |
92 | expect_identical(assayNames(sce), "X")
93 | expect_identical(colnames(colData(sce)), "cell_type")
94 | })
95 |
96 | test_that("Reading H5AD works with HDF5Arrays", {
97 | sce <- readH5AD(file, use_hdf5 = TRUE)
98 | expect_s4_class(sce, "SingleCellExperiment")
99 | expect_s4_class(DelayedArray::seed(assay(sce)), "HDF5ArraySeed")
100 |
101 | ref <- readH5AD(file)
102 | expect_identical(as.matrix(assay(ref)), as.matrix(assay(sce)))
103 |
104 | # Properly sleeps to wait for the process to shut down.
105 | expect_s4_class(
106 | sce <- readH5AD(file, use_hdf5 = TRUE),
107 | "SingleCellExperiment"
108 | )
109 | })
110 |
111 | test_that("Reading H5AD works with a mixture of sparse and HDF5Arrays", {
112 | sce <- readH5AD(file)
113 | assay(sce, "more") <- as(assay(sce, "X"), "CsparseMatrix")
114 |
115 | temp <- tempfile(fileext = ".h5ad")
116 | writeH5AD(sce, temp)
117 |
118 | backed <- readH5AD(temp, use_hdf5 = TRUE)
119 | expect_s4_class(DelayedArray::seed(assay(backed)), "HDF5ArraySeed")
120 | expect_s4_class(assay(backed, "more"), "CsparseMatrix")
121 | })
122 |
123 | test_that("readH5AD works in a separate process", {
124 | oldshare <- basilisk::getBasiliskShared()
125 | basilisk::setBasiliskShared(FALSE)
126 | oldfork <- basilisk::getBasiliskFork()
127 | basilisk::setBasiliskFork(FALSE)
128 |
129 | sce <- readH5AD(file)
130 | expect_s4_class(sce, "SingleCellExperiment")
131 |
132 | basilisk::setBasiliskShared(oldshare)
133 | basilisk::setBasiliskFork(oldfork)
134 | })
135 |
136 | test_that("Reading H5AD works with native reader", {
137 | sce <- readH5AD(file, reader = "R")
138 | expect_s4_class(sce, "SingleCellExperiment")
139 |
140 | expect_identical(assayNames(sce), "X")
141 | expect_identical(colnames(colData(sce)), "cell_type")
142 | })
143 |
144 | test_that("Reading v0.8 H5AD works with native reader", {
145 | sce_py <- readH5AD(file_v08)
146 | sce_r <- readH5AD(file_v08, reader = "R")
147 |
148 | expect_identical(rownames(sce_py), rownames(sce_r))
149 | expect_identical(colnames(sce_py), colnames(sce_r))
150 |
151 | expect_identical(rowData(sce_py), rowData(sce_r))
152 |
153 | expect_identical(colnames(colData(sce_py)), colnames(colData(sce_r)))
154 | expect_equal(colData(sce_py), colData(sce_r))
155 |
156 | # check the X assay
157 | expect_identical(assays(sce_py), assays(sce_r))
158 |
159 | # check the easy metadata columns
160 | for (key in c(
161 | "dummy_category", "dummy_int", "dummy_int2", "highlight",
162 | "iroot"
163 | )) {
164 | expect_equal(metadata(sce_py)[[key]], metadata(sce_r)[[key]])
165 | }
166 |
167 | # For these columns the Python reader reads an array
168 | for (key in c("dummy_bool", "dummy_bool2")) {
169 | expect_equal(as.vector(metadata(sce_py)[[key]]), metadata(sce_r)[[key]])
170 | }
171 | })
172 |
173 | test_that("Skipping slot conversion works", {
174 | sce <- readH5AD(file,
175 | layers = FALSE, uns = FALSE, var = FALSE, obs = FALSE,
176 | varm = FALSE, obsm = FALSE, varp = FALSE, obsp = FALSE
177 | )
178 |
179 | expect_identical(assayNames(sce), "X")
180 | expect_identical(metadata(sce), list())
181 | expect_equal(ncol(rowData(sce)), 0)
182 | expect_equal(ncol(colData(sce)), 0)
183 | expect_equal(length(reducedDims(sce)), 0)
184 | expect_equal(length(rowPairs(sce)), 0)
185 | expect_equal(length(colPairs(sce)), 0)
186 | })
187 |
188 | test_that("Selective slot conversion works", {
189 | sce <- readH5AD(file, uns = "iroot")
190 |
191 | expect_identical(names(metadata(sce)), "iroot")
192 | })
193 |
194 | test_that("Selective DF conversion works", {
195 | sce <- readH5AD(file, obs = "cell_type")
196 |
197 | expect_identical(names(colData(sce)), "cell_type")
198 | })
199 |
200 | test_that("Conversion of raw works", {
201 | skip_if_offline()
202 |
203 | cache <- BiocFileCache::BiocFileCache(ask = FALSE)
204 | example_file <- BiocFileCache::bfcrpath(
205 | cache, "https://ndownloader.figshare.com/files/30462915"
206 | )
207 |
208 | sce <- readH5AD(example_file, raw = TRUE)
209 |
210 | names <- list(
211 | assays = c("X"),
212 | colData = c(
213 | "n_genes", "n_genes_by_counts", "total_counts",
214 | "total_counts_mt", "pct_counts_mt", "leiden"
215 | ),
216 | rowData = c(
217 | "gene_ids", "n_cells", "mt", "n_cells_by_counts",
218 | "mean_counts", "pct_dropout_by_counts", "total_counts",
219 | "highly_variable", "means", "dispersions",
220 | "dispersions_norm", "mean", "std"
221 | ),
222 | metadata = c(
223 | "hvg", "leiden", "neighbors", "pca", "rank_genes_groups",
224 | "umap"
225 | ),
226 | redDim = c("X_pca", "X_umap"),
227 | varm = c("PCs"),
228 | colPairs = c("connectivities", "distances"),
229 | raw_rowData = c(
230 | "gene_ids", "n_cells", "mt", "n_cells_by_counts",
231 | "mean_counts", "pct_dropout_by_counts", "total_counts",
232 | "highly_variable", "means", "dispersions",
233 | "dispersions_norm"
234 | )
235 | )
236 |
237 | missing <- list()
238 |
239 | validateH5ADSCE(sce, names, missing)
240 | })
241 |
242 | test_that("Conversion of raw works with use_hdf5 = TRUE", {
243 | skip_if_offline()
244 |
245 | cache <- BiocFileCache::BiocFileCache(ask = FALSE)
246 | example_file <- BiocFileCache::bfcrpath(
247 | cache, "https://ndownloader.figshare.com/files/30462915"
248 | )
249 |
250 | sce <- readH5AD(example_file, raw = TRUE, use_hdf5 = TRUE)
251 |
252 | names <- list(
253 | assays = c("X"),
254 | colData = c(
255 | "n_genes", "n_genes_by_counts", "total_counts",
256 | "total_counts_mt", "pct_counts_mt", "leiden"
257 | ),
258 | rowData = c(
259 | "gene_ids", "n_cells", "mt", "n_cells_by_counts",
260 | "mean_counts", "pct_dropout_by_counts", "total_counts",
261 | "highly_variable", "means", "dispersions",
262 | "dispersions_norm", "mean", "std"
263 | ),
264 | metadata = c(
265 | "hvg", "leiden", "neighbors", "pca", "rank_genes_groups",
266 | "umap"
267 | ),
268 | redDim = c("X_pca", "X_umap"),
269 | varm = c("PCs"),
270 | colPairs = c("connectivities", "distances"),
271 | raw_rowData = c(
272 | "gene_ids", "n_cells", "mt", "n_cells_by_counts",
273 | "mean_counts", "pct_dropout_by_counts", "total_counts",
274 | "highly_variable", "means", "dispersions",
275 | "dispersions_norm"
276 | )
277 | )
278 |
279 | missing <- list()
280 |
281 | validateH5ADSCE(sce, names, missing)
282 | })
283 |
--------------------------------------------------------------------------------
/tests/testthat/test-validation.R:
--------------------------------------------------------------------------------
1 | file <- system.file("extdata", "example_anndata.h5ad",
2 | package = "zellkonverter"
3 | )
4 | sce <- readH5AD(file)
5 |
6 | names <- list(
7 | assays = c("X", "counts"),
8 | colData = "louvain",
9 | rowData = c(
10 | "n_counts", "highly_variable", "means", "dispersions",
11 | "dispersions_norm"
12 | ),
13 | metadata = c("louvain", "neighbors", "pca", "rank_genes_groups", "umap"),
14 | redDim = c("X_pca", "X_umap"),
15 | varm = "PCs",
16 | colPairs = c("connectivities", "distances")
17 | )
18 |
19 | missing <- list()
20 |
21 | test_that("validateH5ADSCE works", {
22 | validateH5ADSCE(sce, names, missing)
23 | expect_error(
24 | validateH5ADSCE(sce, names, list(varm = "PCs")),
25 | "varm names missing is not TRUE"
26 | )
27 | })
28 |
29 | test_that("expectSCE works", {
30 | expectSCE(sce, sce)
31 | })
32 |
--------------------------------------------------------------------------------
/tests/testthat/test-write.R:
--------------------------------------------------------------------------------
1 | # This tests the writeH5AD function (and by implication, AnnData2SCE).
2 | library(scRNAseq)
3 |
4 | sce <- ZeiselBrainData()
5 | reducedDim(sce, "WHEE") <- matrix(runif(ncol(sce) * 10), ncol = 10)
6 |
7 | test_that("writeH5AD works as expected", {
8 | temp <- tempfile(fileext = ".h5ad")
9 | writeH5AD(sce, temp)
10 | expect_true(file.exists(temp))
11 |
12 | # Reading it back out again. Hopefully we didn't lose anything important.
13 | out <- readH5AD(temp)
14 |
15 | expect_identical(dimnames(out), dimnames(sce))
16 | expect_equal(assay(out), assay(sce))
17 | expect_identical(reducedDims(out), reducedDims(sce))
18 |
19 | # Need to coerce the factors back to strings.
20 | row_data <- rowData(out)
21 | for (i in seq_len(ncol(row_data))) {
22 | if (is.factor(row_data[[i]])) {
23 | row_data[[i]] <- as.character(row_data[[i]])
24 | }
25 | }
26 | expect_identical(row_data, rowData(sce))
27 |
28 | col_data <- colData(out)
29 | for (i in seq_len(ncol(col_data))) {
30 | if (is.factor(col_data[[i]])) {
31 | col_data[[i]] <- as.character(col_data[[i]])
32 | }
33 | }
34 | names(col_data) <- names(colData(sce))
35 | expect_identical(col_data, colData(sce))
36 | })
37 |
38 | test_that("writeH5AD works as expected with version 0.10.9", {
39 | temp <- tempfile(fileext = ".h5ad")
40 | writeH5AD(sce, temp, version = "0.10.9")
41 | expect_true(file.exists(temp))
42 |
43 | # Reading it back out again. Hopefully we didn't lose anything important.
44 | out <- readH5AD(temp, version = "0.10.9")
45 |
46 | expect_identical(dimnames(out), dimnames(sce))
47 | expect_equal(assay(out), assay(sce))
48 | expect_identical(reducedDims(out), reducedDims(sce))
49 |
50 | # Need to coerce the factors back to strings.
51 | row_data <- rowData(out)
52 | for (i in seq_len(ncol(row_data))) {
53 | if (is.factor(row_data[[i]])) {
54 | row_data[[i]] <- as.character(row_data[[i]])
55 | }
56 | }
57 | expect_identical(row_data, rowData(sce))
58 |
59 | col_data <- colData(out)
60 | for (i in seq_len(ncol(col_data))) {
61 | if (is.factor(col_data[[i]])) {
62 | col_data[[i]] <- as.character(col_data[[i]])
63 | }
64 | }
65 | names(col_data) <- names(colData(sce))
66 | expect_identical(col_data, colData(sce))
67 | })
68 |
69 | test_that("writeH5AD works as expected with version 0.10.6", {
70 | temp <- tempfile(fileext = ".h5ad")
71 | writeH5AD(sce, temp, version = "0.10.6")
72 | expect_true(file.exists(temp))
73 |
74 | # Reading it back out again. Hopefully we didn't lose anything important.
75 | out <- readH5AD(temp, version = "0.10.6")
76 |
77 | expect_identical(dimnames(out), dimnames(sce))
78 | expect_equal(assay(out), assay(sce))
79 | expect_identical(reducedDims(out), reducedDims(sce))
80 |
81 | # Need to coerce the factors back to strings.
82 | row_data <- rowData(out)
83 | for (i in seq_len(ncol(row_data))) {
84 | if (is.factor(row_data[[i]])) {
85 | row_data[[i]] <- as.character(row_data[[i]])
86 | }
87 | }
88 | expect_identical(row_data, rowData(sce))
89 |
90 | col_data <- colData(out)
91 | for (i in seq_len(ncol(col_data))) {
92 | if (is.factor(col_data[[i]])) {
93 | col_data[[i]] <- as.character(col_data[[i]])
94 | }
95 | }
96 | names(col_data) <- names(colData(sce))
97 | expect_identical(col_data, colData(sce))
98 | })
99 |
100 | test_that("writeH5AD works as expected with version 0.10.2", {
101 | temp <- tempfile(fileext = ".h5ad")
102 | writeH5AD(sce, temp, version = "0.10.2")
103 | expect_true(file.exists(temp))
104 |
105 | # Reading it back out again. Hopefully we didn't lose anything important.
106 | out <- readH5AD(temp, version = "0.10.2")
107 |
108 | expect_identical(dimnames(out), dimnames(sce))
109 | expect_equal(assay(out), assay(sce))
110 | expect_identical(reducedDims(out), reducedDims(sce))
111 |
112 | # Need to coerce the factors back to strings.
113 | row_data <- rowData(out)
114 | for (i in seq_len(ncol(row_data))) {
115 | if (is.factor(row_data[[i]])) {
116 | row_data[[i]] <- as.character(row_data[[i]])
117 | }
118 | }
119 | expect_identical(row_data, rowData(sce))
120 |
121 | col_data <- colData(out)
122 | for (i in seq_len(ncol(col_data))) {
123 | if (is.factor(col_data[[i]])) {
124 | col_data[[i]] <- as.character(col_data[[i]])
125 | }
126 | }
127 | names(col_data) <- names(colData(sce))
128 | expect_identical(col_data, colData(sce))
129 | })
130 |
131 | test_that("writeH5AD works as expected with version 0.9.2", {
132 | temp <- tempfile(fileext = ".h5ad")
133 | writeH5AD(sce, temp, version = "0.9.2")
134 | expect_true(file.exists(temp))
135 |
136 | # Reading it back out again. Hopefully we didn't lose anything important.
137 | out <- readH5AD(temp, version = "0.9.2")
138 |
139 | expect_identical(dimnames(out), dimnames(sce))
140 | expect_equal(assay(out), assay(sce))
141 | expect_identical(reducedDims(out), reducedDims(sce))
142 |
143 | # Need to coerce the factors back to strings.
144 | row_data <- rowData(out)
145 | for (i in seq_len(ncol(row_data))) {
146 | if (is.factor(row_data[[i]])) {
147 | row_data[[i]] <- as.character(row_data[[i]])
148 | }
149 | }
150 | expect_identical(row_data, rowData(sce))
151 |
152 | col_data <- colData(out)
153 | for (i in seq_len(ncol(col_data))) {
154 | if (is.factor(col_data[[i]])) {
155 | col_data[[i]] <- as.character(col_data[[i]])
156 | }
157 | }
158 | names(col_data) <- names(colData(sce))
159 | expect_identical(col_data, colData(sce))
160 | })
161 |
162 | test_that("writeH5AD works as expected with version 0.8.0", {
163 | temp <- tempfile(fileext = ".h5ad")
164 | writeH5AD(sce, temp, version = "0.8.0")
165 | expect_true(file.exists(temp))
166 |
167 | # Reading it back out again. Hopefully we didn't lose anything important.
168 | out <- readH5AD(temp, version = "0.8.0")
169 |
170 | expect_identical(dimnames(out), dimnames(sce))
171 | expect_equal(assay(out), assay(sce))
172 | expect_identical(reducedDims(out), reducedDims(sce))
173 |
174 | # Need to coerce the factors back to strings.
175 | row_data <- rowData(out)
176 | for (i in seq_len(ncol(row_data))) {
177 | if (is.factor(row_data[[i]])) {
178 | row_data[[i]] <- as.character(row_data[[i]])
179 | }
180 | }
181 | expect_identical(row_data, rowData(sce))
182 |
183 | col_data <- colData(out)
184 | for (i in seq_len(ncol(col_data))) {
185 | if (is.factor(col_data[[i]])) {
186 | col_data[[i]] <- as.character(col_data[[i]])
187 | }
188 | }
189 | names(col_data) <- names(colData(sce))
190 | expect_identical(col_data, colData(sce))
191 | })
192 |
193 | test_that("writeH5AD works as expected with version 0.7.6", {
194 | # Python 3.7 is not available for aarch64
195 | skip_on_os("mac", arch = "aarch64")
196 | skip_on_os("linux", arch = "aarch64")
197 |
198 | temp <- tempfile(fileext = ".h5ad")
199 | writeH5AD(sce, temp, version = "0.7.6")
200 | expect_true(file.exists(temp))
201 |
202 | # Reading it back out again. Hopefully we didn't lose anything important.
203 | out <- readH5AD(temp, version = "0.7.6")
204 |
205 | expect_identical(dimnames(out), dimnames(sce))
206 | expect_equal(assay(out), assay(sce))
207 | expect_identical(reducedDims(out), reducedDims(sce))
208 |
209 | # Need to coerce the factors back to strings.
210 | row_data <- rowData(out)
211 | for (i in seq_len(ncol(row_data))) {
212 | if (is.factor(row_data[[i]])) {
213 | row_data[[i]] <- as.character(row_data[[i]])
214 | }
215 | }
216 | expect_identical(row_data, rowData(sce))
217 |
218 | col_data <- colData(out)
219 | for (i in seq_len(ncol(col_data))) {
220 | if (is.factor(col_data[[i]])) {
221 | col_data[[i]] <- as.character(col_data[[i]])
222 | }
223 | }
224 | names(col_data) <- names(colData(sce))
225 | expect_identical(col_data, colData(sce))
226 | })
227 |
228 | test_that("writeH5AD works as expected with verbose=TRUE", {
229 | temp <- tempfile(fileext = ".h5ad")
230 | writeH5AD(sce, temp, verbose = TRUE)
231 | expect_true(file.exists(temp))
232 |
233 | # Reading it back out again. Hopefully we didn't lose anything important.
234 | out <- readH5AD(temp)
235 |
236 | expect_identical(dimnames(out), dimnames(sce))
237 | expect_equal(assay(out), assay(sce))
238 | expect_identical(reducedDims(out), reducedDims(sce))
239 |
240 | # Need to coerce the factors back to strings.
241 | row_data <- rowData(out)
242 | for (i in seq_len(ncol(row_data))) {
243 | if (is.factor(row_data[[i]])) {
244 | row_data[[i]] <- as.character(row_data[[i]])
245 | }
246 | }
247 | expect_identical(row_data, rowData(sce))
248 |
249 | col_data <- colData(out)
250 | for (i in seq_len(ncol(col_data))) {
251 | if (is.factor(col_data[[i]])) {
252 | col_data[[i]] <- as.character(col_data[[i]])
253 | }
254 | }
255 | names(col_data) <- names(colData(sce))
256 | expect_identical(col_data, colData(sce))
257 | })
258 |
259 | test_that("writeH5AD works as expected with sparse matrices", {
260 | sparse_sce <- sce
261 | mat <- assay(sparse_sce)
262 | counts(sparse_sce) <- as(mat, "CsparseMatrix")
263 | logcounts(sparse_sce) <- counts(sparse_sce) * 10
264 | assay(sparse_sce, "random") <- mat # throwing in a dense matrix in a mixture.
265 |
266 | temp <- tempfile(fileext = ".h5ad")
267 | writeH5AD(sparse_sce, temp)
268 | expect_true(file.exists(temp))
269 |
270 | # Reading it back out again. Hopefully we didn't lose anything important.
271 | out <- readH5AD(temp, X_name = "X")
272 |
273 | expect_identical(counts(sparse_sce), assay(out, "X"))
274 | expect_identical(logcounts(sparse_sce), logcounts(out))
275 | # expect_identical() was failing on Windows for some reason...
276 | expect_equal(assay(sparse_sce, "random"), assay(out, "random"))
277 | })
278 |
279 | test_that("writeH5AD works with assay skipping", {
280 | temp <- tempfile(fileext = ".h5ad")
281 | writeH5AD(sce, temp, skip_assays = TRUE)
282 | expect_true(file.exists(temp))
283 |
284 | out <- HDF5Array::HDF5Array(temp, "X/data")
285 | expect_identical(sum(out), 0) # it's empty!
286 | })
287 |
288 | test_that("writeH5AD works with X_name", {
289 | temp <- tempfile(fileext = ".h5ad")
290 | writeH5AD(sce, temp, X_name = "counts")
291 | expect_true(file.exists(temp))
292 |
293 | out <- readH5AD(temp, X_name = "X")
294 | expect_equal(assay(out, "X"), assay(sce, "counts"))
295 | })
296 |
297 | test_that("writeH5AD works in a separate process", {
298 | oldshare <- basilisk::getBasiliskShared()
299 | basilisk::setBasiliskShared(FALSE)
300 | oldfork <- basilisk::getBasiliskFork()
301 | basilisk::setBasiliskFork(FALSE)
302 |
303 | temp <- tempfile(fileext = ".h5ad")
304 | writeH5AD(sce, temp)
305 | expect_true(file.exists(temp))
306 |
307 | basilisk::setBasiliskShared(oldshare)
308 | basilisk::setBasiliskFork(oldfork)
309 | })
310 |
311 | test_that("writeH5AD DelayedArray X works", {
312 | delayed_sce <- sce
313 | counts(delayed_sce) <- DelayedArray::DelayedArray(counts(delayed_sce))
314 |
315 | temp <- tempfile(fileext = ".h5ad")
316 |
317 | writeH5AD(delayed_sce, temp, X_name = "counts")
318 | expect_true(file.exists(temp))
319 |
320 | out <- readH5AD(temp, X_name = "X")
321 |
322 | # Identical fail on Windows for some reason
323 | expect_equal(counts(sce), assay(out, "X"))
324 | })
325 |
326 | test_that("writeH5AD sparse DelayedArray X works", {
327 | delayed_sce <- sce
328 | sparse_counts <- as(counts(delayed_sce), "CsparseMatrix")
329 | counts(delayed_sce) <- DelayedArray::DelayedArray(sparse_counts)
330 |
331 | temp <- tempfile(fileext = ".h5ad")
332 |
333 | writeH5AD(delayed_sce, temp, X_name = "counts")
334 | expect_true(file.exists(temp))
335 |
336 | out <- readH5AD(temp, X_name = "X")
337 |
338 | # Sparse DelayedArrays are currently coerced into memory
339 | # This expectation will need to be changed once that is fixed
340 | expect_identical(sparse_counts, assay(out, "X"))
341 | })
342 |
343 | test_that("writeH5AD DelayedArray layer works", {
344 | delayed_sce <- sce
345 | assay(delayed_sce, "layer") <- DelayedArray::DelayedArray(
346 | counts(delayed_sce)
347 | )
348 |
349 | temp <- tempfile(fileext = ".h5ad")
350 |
351 | writeH5AD(delayed_sce, temp)
352 | expect_true(file.exists(temp))
353 |
354 | out <- readH5AD(temp, X_name = "X")
355 |
356 | # Identical fails on Windows for some reason
357 | expect_equal(counts(sce), assay(out, "layer"))
358 | })
359 |
360 | test_that("writeH5AD works with colData list columns", {
361 | list_sce <- sce
362 | colData(list_sce)$ListCol <- lapply(seq_len(ncol(list_sce)), function(x) {
363 | sample(LETTERS, 2)
364 | })
365 |
366 | temp <- tempfile(fileext = ".h5ad")
367 |
368 | expect_warning(writeH5AD(list_sce, temp), "columns are not atomic")
369 | expect_true(file.exists(temp))
370 |
371 | # Knowing what comes back is hard so just check there is something
372 | out <- readH5AD(temp, X_name = "X")
373 | expect_true("ListCol" %in% names(metadata(out)$.colData))
374 | })
375 |
376 | test_that("writeH5AD works with rowData list columns", {
377 | list_sce <- sce
378 | rowData(list_sce)$ListCol <- lapply(seq_len(nrow(list_sce)), function(x) {
379 | sample(LETTERS, 2)
380 | })
381 |
382 | temp <- tempfile(fileext = ".h5ad")
383 |
384 | expect_warning(writeH5AD(list_sce, temp), "columns are not atomic")
385 | expect_true(file.exists(temp))
386 |
387 | # Knowing what comes back is hard so just check there is something
388 | out <- readH5AD(temp, X_name = "X")
389 | expect_true("ListCol" %in% names(metadata(out)$.rowData))
390 | })
391 |
392 | test_that("writeH5AD works with gzip compression", {
393 | temp <- tempfile(fileext = ".h5ad")
394 | writeH5AD(sce, temp, X_name = "counts", compression = "gzip")
395 | expect_true(file.exists(temp))
396 |
397 | out <- readH5AD(temp, X_name = "X")
398 | expect_equal(assay(out, "X"), assay(sce, "counts"))
399 | })
400 |
401 | test_that("writeH5AD works with lzf compression", {
402 | temp <- tempfile(fileext = ".h5ad")
403 | writeH5AD(sce, temp, X_name = "counts", compression = "lzf")
404 | expect_true(file.exists(temp))
405 |
406 | out <- readH5AD(temp, X_name = "X")
407 | expect_equal(assay(out, "X"), assay(sce, "counts"))
408 | })
409 |
410 | test_that("Skipping slot conversion works", {
411 | temp <- tempfile(fileext = ".h5ad")
412 | writeH5AD(sce, temp,
413 | assays = FALSE, colData = FALSE, rowData = FALSE,
414 | varm = FALSE, reducedDims = FALSE, metadata = FALSE,
415 | colPairs = FALSE, rowPairs = FALSE
416 | )
417 |
418 | out <- readH5AD(temp, X_name = "X")
419 |
420 | expect_identical(assayNames(out), "X")
421 | expect_identical(metadata(out), list(X_name = "counts"))
422 | expect_equal(ncol(rowData(out)), 0)
423 | expect_equal(ncol(colData(out)), 0)
424 | expect_equal(length(reducedDims(out)), 0)
425 | expect_equal(length(rowPairs(out)), 0)
426 | expect_equal(length(colPairs(out)), 0)
427 | })
428 |
429 | test_that("Selective DF conversion works", {
430 | temp <- tempfile(fileext = ".h5ad")
431 | writeH5AD(sce, temp, assays = FALSE, colData = "tissue")
432 |
433 | out <- readH5AD(temp, X_name = "X")
434 |
435 | expect_identical(names(colData(out)), "tissue")
436 | })
437 |
438 | test_that("Writing works with empty rowData/colData", {
439 | mini_sce <- SingleCellExperiment::SingleCellExperiment(
440 | assays = list(counts = matrix(rpois(100 * 50, 4), nrow = 100, ncol = 50))
441 | )
442 |
443 | temp <- tempfile(fileext = ".h5ad")
444 | writeH5AD(mini_sce, temp)
445 |
446 | out <- readH5AD(temp, X_name = "X")
447 | expect_true(all(counts(mini_sce) == assay(out, "X")))
448 | })
449 |
450 | test_that("writeH5AD works with SpatialExperiment objects", {
451 | skip_if_not_installed("SpatialExperiment")
452 |
453 | spe <- SpatialExperiment::SpatialExperiment(
454 | assays = list(counts = SingleCellExperiment::counts(sce))
455 | )
456 | spcoords <- matrix(
457 | runif(ncol(sce) * 2),
458 | ncol = 2
459 | )
460 | rownames(spcoords) <- colnames(sce)
461 | colnames(spcoords) <- paste0("Spatial", 1:2)
462 | SpatialExperiment::spatialCoords(spe) <- spcoords
463 |
464 | temp <- tempfile(fileext = ".h5ad")
465 |
466 | writeH5AD(spe, temp)
467 | expect_true(file.exists(temp))
468 |
469 | out <- readH5AD(temp, X_name = "X")
470 |
471 | expect_identical(assay(out, "X"), assay(spe, "counts"))
472 | expect_identical(dimnames(out), dimnames(spe))
473 |
474 | # Check the spatial coordinates.
475 | expect_identical(reducedDims(out)$spatial, spcoords)
476 | })
477 |
478 | test_that("writeH5AD works with SpatialExperiment objects without names", {
479 | skip_if_not_installed("SpatialExperiment")
480 |
481 | spe <- SpatialExperiment::SpatialExperiment(
482 | assays = list(counts = SingleCellExperiment::counts(sce))
483 | )
484 | spcoords <- matrix(
485 | runif(ncol(sce) * 2),
486 | ncol = 2
487 | )
488 | SpatialExperiment::spatialCoords(spe) <- spcoords
489 |
490 | temp <- tempfile(fileext = ".h5ad")
491 |
492 | writeH5AD(spe, temp)
493 | expect_true(file.exists(temp))
494 |
495 | out <- readH5AD(temp, X_name = "X")
496 |
497 | expect_identical(assay(out, "X"), assay(spe, "counts"))
498 | expect_identical(dimnames(out), dimnames(spe))
499 |
500 | # Check the spatial coordinates.
501 | expect_identical(reducedDim(out, "spatial", withDimnames = FALSE), spcoords)
502 | })
503 |
504 | test_that("writeH5AD works without names", {
505 | nameless_sce <- SingleCellExperiment::SingleCellExperiment(
506 | assays = list(
507 | counts = matrix(rpois(100 * 50, 4), nrow = 100, ncol = 50)
508 | ),
509 | reducedDims = list(
510 | redDim = matrix(runif(50 * 10), ncol = 10)
511 | )
512 | )
513 |
514 | temp <- tempfile(fileext = ".h5ad")
515 | writeH5AD(nameless_sce, temp)
516 |
517 | out <- readH5AD(temp, X_name = "X")
518 | expect_true(all(counts(nameless_sce) == assay(out, "X")))
519 |
520 | expect_identical(
521 | reducedDim(out, "redDim", withDimnames = FALSE),
522 | reducedDim(nameless_sce, "redDim")
523 | )
524 | })
525 |
526 | test_that("writeH5AD keeps dimnames", {
527 | cells <- letters[1:8]
528 | genes <- LETTERS[1:5]
529 | ncells <- length(cells)
530 | ngenes <- length(genes)
531 | counts <- matrix(
532 | rpois(ngenes * ncells, 5),
533 | ncol = ncells,
534 | dimnames = list(genes, cells)
535 | )
536 | dimname_sce <- SingleCellExperiment::SingleCellExperiment(
537 | list(counts = counts)
538 | )
539 |
540 | temp <- tempfile(fileext = ".h5ad")
541 | writeH5AD(dimname_sce, temp)
542 |
543 | out <- readH5AD(temp, X_name = "X")
544 |
545 | expect_identical(dimnames(out), dimnames(dimname_sce))
546 | })
547 |
--------------------------------------------------------------------------------
/tests/testthat/test-zzz-anndata.R:
--------------------------------------------------------------------------------
1 | # This file tests compatibility with the R {anndata} package
2 | # Despite best efforts the package isn't reliably unloaded so these tests have
3 | # been moved to a separate file that is (hopefully) always run last
4 |
5 | test_that("Reading is compatible with R anndata", {
6 | skip_if_offline()
7 | skip_if_not_installed("withr")
8 | skip_if_not_installed("anndata")
9 |
10 | withr::with_package("anndata", {
11 | file <- system.file("extdata", "krumsiek11.h5ad",
12 | package = "zellkonverter"
13 | )
14 | sce <- readH5AD(file)
15 | expect_s4_class(sce, "SingleCellExperiment")
16 |
17 | expect_identical(assayNames(sce), "X")
18 | expect_identical(colnames(colData(sce)), "cell_type")
19 |
20 | cache <- BiocFileCache::BiocFileCache(ask = FALSE)
21 | example_file <- BiocFileCache::bfcrpath(
22 | cache, "https://ndownloader.figshare.com/files/30462915"
23 | )
24 |
25 | sce <- readH5AD(example_file, raw = TRUE)
26 |
27 | names <- list(
28 | assays = c("X"),
29 | colData = c(
30 | "n_genes", "n_genes_by_counts", "total_counts",
31 | "total_counts_mt", "pct_counts_mt", "leiden"
32 | ),
33 | rowData = c(
34 | "gene_ids", "n_cells", "mt", "n_cells_by_counts",
35 | "mean_counts", "pct_dropout_by_counts", "total_counts",
36 | "highly_variable", "means", "dispersions",
37 | "dispersions_norm", "mean", "std"
38 | ),
39 | raw_rowData = c(
40 | "gene_ids", "n_cells", "mt", "n_cells_by_counts",
41 | "mean_counts", "pct_dropout_by_counts",
42 | "total_counts", "highly_variable", "means",
43 | "dispersions", "dispersions_norm"
44 | ),
45 | redDim = c("X_pca", "X_umap"),
46 | varm = c("PCs"),
47 | colPairs = c("connectivities", "distances"),
48 | metadata = c(
49 | "hvg", "leiden", "neighbors", "pca",
50 | "rank_genes_groups", "umap"
51 | )
52 | )
53 |
54 | missing <- list()
55 |
56 | validateH5ADSCE(sce, names, missing)
57 | })
58 |
59 | pkgload::unload("anndata")
60 | })
61 |
62 | test_that("Writing is compatible with R anndata", {
63 | skip_if_offline()
64 | skip_if_not_installed("withr")
65 | skip_if_not_installed("anndata")
66 |
67 | withr::with_package("anndata", {
68 | sce <- scRNAseq::ZeiselBrainData()
69 | temp <- tempfile(fileext = ".h5ad")
70 | writeH5AD(sce, temp)
71 | expect_true(file.exists(temp))
72 |
73 | # Reading it back out again. Hopefully we didn't lose anything important
74 | out <- readH5AD(temp)
75 |
76 | expect_identical(dimnames(out), dimnames(sce))
77 | expect_equal(assay(out), assay(sce))
78 | expect_identical(reducedDims(out), reducedDims(sce))
79 |
80 | # Need to coerce the factors back to strings.
81 | row_data <- rowData(out)
82 | for (i in seq_len(ncol(row_data))) {
83 | if (is.factor(row_data[[i]])) {
84 | row_data[[i]] <- as.character(row_data[[i]])
85 | }
86 | }
87 | expect_identical(row_data, rowData(sce))
88 |
89 | col_data <- colData(out)
90 | for (i in seq_len(ncol(col_data))) {
91 | if (is.factor(col_data[[i]])) {
92 | col_data[[i]] <- as.character(col_data[[i]])
93 | }
94 | }
95 | names(col_data) <- names(colData(sce))
96 | expect_identical(col_data, colData(sce))
97 | })
98 |
99 | pkgload::unload("anndata")
100 | })
101 |
--------------------------------------------------------------------------------
/vignettes/zellkonverter.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: Converting single-cell data structures between Bioconductor and Python
3 | author:
4 | - name: Luke Zappia
5 | email: luke@lazappi.id.au
6 | - name: Aaron Lun
7 | email: infinite.monkeys.with.keyboards@gmail.com
8 | date: "Revised: 17 April 2022"
9 | output:
10 | BiocStyle::html_document:
11 | toc_float: true
12 | package: zellkonverter
13 | vignette: >
14 | %\VignetteIndexEntry{Converting to/from AnnData to SingleCellExperiments}
15 | %\VignetteEngine{knitr::rmarkdown}
16 | %\VignetteEncoding{UTF-8}
17 | ---
18 |
19 | ```{r setup, echo = FALSE, results = "hide", message = FALSE}
20 | require(knitr)
21 | library(BiocStyle)
22 | opts_chunk$set(error = FALSE, message = FALSE, warning = FALSE)
23 | ```
24 |
25 | Overview
26 | ========
27 |
28 | This package provides a lightweight interface between the Bioconductor
29 | `SingleCellExperiment` data structure and the Python `AnnData`-based single-cell
30 | analysis environment. The idea is to enable users and developers to easily move
31 | data between these frameworks to construct a multi-language analysis pipeline
32 | across R/Bioconductor and Python.
33 |
34 | Reading and writing H5AD files
35 | ==============================
36 |
37 | The `readH5AD()` function can be used to read a `SingleCellExperiment` from a
38 | H5AD file. This can be manipulated in the usual way as described in the
39 | `r Biocpkg("SingleCellExperiment")` documentation.
40 |
41 | ```{r read}
42 | library(zellkonverter)
43 |
44 | # Obtaining an example H5AD file.
45 | example_h5ad <- system.file(
46 | "extdata", "krumsiek11.h5ad",
47 | package = "zellkonverter"
48 | )
49 | readH5AD(example_h5ad)
50 | ```
51 |
52 | We can also write a `SingleCellExperiment` to a H5AD file with the
53 | `writeH5AD()` function. This is demonstrated below on the classic Zeisel mouse
54 | brain dataset from the `r Biocpkg("scRNAseq")` package. The resulting file can
55 | then be directly used in compatible Python-based analysis frameworks.
56 |
57 | ```{r write}
58 | library(scRNAseq)
59 |
60 | sce_zeisel <- ZeiselBrainData()
61 | out_path <- tempfile(pattern = ".h5ad")
62 | writeH5AD(sce_zeisel, file = out_path)
63 | ```
64 |
65 | Converting between `SingleCellExperiment` and `AnnData` objects
66 | ===============================================================
67 |
68 | Developers and power users who control their Python environments can directly
69 | convert between `SingleCellExperiment` and
70 | [`AnnData` objects](https://anndata.readthedocs.io/en/stable/) using the
71 | `SCE2AnnData()` and `AnnData2SCE()` utilities. These functions expect that
72 | `r CRANpkg("reticulate")` has already been loaded along with an appropriate
73 | version of the [_anndata_](https://pypi.org/project/anndata/) package. We
74 | suggest using the `r Biocpkg("basilisk")` package to set up the Python
75 | environment before using these functions.
76 |
77 | ```{r convert}
78 | library(basilisk)
79 | library(scRNAseq)
80 |
81 | seger <- SegerstolpePancreasData()
82 | roundtrip <- basiliskRun(fun = function(sce) {
83 | # Convert SCE to AnnData:
84 | adata <- SCE2AnnData(sce)
85 |
86 | # Maybe do some work in Python on 'adata':
87 | # BLAH BLAH BLAH
88 |
89 | # Convert back to an SCE:
90 | AnnData2SCE(adata)
91 | }, env = zellkonverterAnnDataEnv(), sce = seger)
92 | ```
93 |
94 | Package developers can guarantee that they are using the same versions of Python
95 | packages as `r Biocpkg("zellkonverter")` by using the `AnnDataDependencies()`
96 | function to set up their Python environments.
97 |
98 | ```{r anndata-deps}
99 | AnnDataDependencies()
100 | ```
101 |
102 | This function can also be used to return dependencies for environments using
103 | older versions of _anndata_.
104 |
105 | ```{r anndata-deps-old}
106 | AnnDataDependencies(version = "0.7.6")
107 | ```
108 |
109 | Progress messages
110 | =================
111 |
112 | By default the functions in `r Biocpkg("zellkonverter")` don't display any
113 | information about their progress but this can be turned on by setting the
114 | `verbose = TRUE` argument.
115 |
116 | ```{r verbose}
117 | readH5AD(example_h5ad, verbose = TRUE)
118 | ```
119 |
120 | If you would like to see progress messages for all functions by default you can
121 | turn this on using the `setZellkonverterVerbose()` function.
122 |
123 | ```{r verbose-set, eval = FALSE}
124 | # This is not run here
125 | setZellkonverterVerbose(TRUE)
126 | ```
127 |
128 | Session information
129 | ===================
130 |
131 | ```{r}
132 | sessionInfo()
133 | ```
134 |
--------------------------------------------------------------------------------