├── .codecov.yaml ├── .cruft.json ├── .editorconfig ├── .github └── workflows │ ├── execute-nbs.yaml │ └── make-tutorials-json.yml ├── .gitignore ├── .pre-commit-config.yaml ├── .readthedocs.yaml ├── CHANGELOG.md ├── LICENSE ├── README.md ├── docs ├── Makefile ├── _static │ ├── .gitkeep │ └── css │ │ └── custom.css ├── _templates │ ├── .gitkeep │ └── autosummary │ │ └── class.rst ├── conf.py ├── extensions │ └── typed_returns.py ├── how-to-dask.md ├── index.md ├── notebooks │ ├── .gitignore │ ├── anndata_getting_started.ipynb │ ├── basic-scrna-tutorial.ipynb │ ├── data │ │ └── pbmc3k_processed.h5ad │ ├── img │ │ ├── X.png │ │ ├── anndata_schema_full.png │ │ ├── layers.png │ │ ├── names.png │ │ ├── obsmvarm.png │ │ ├── obspvarp.png │ │ └── obsvar.png │ ├── scverse_data_backed.ipynb │ ├── scverse_data_interoperability.ipynb │ ├── tutorial_axes_anndata_mudata.ipynb │ └── tutorial_concatenation_anndata_mudata.ipynb ├── patched-deps.txt ├── references.bib └── references.md ├── environment.yml ├── pyproject.toml └── tutorial-registry ├── categories.yml ├── schema.json ├── tutorials ├── ATAC-preprocessing │ ├── icon.png │ └── meta.yaml ├── CITEseq-integration │ ├── icon.png │ └── meta.yaml ├── Joint-analysis-of-multiomic-data-with-MultiVI │ ├── icon.png │ └── meta.yaml ├── RNA-velocity │ ├── icon.png │ └── meta.yaml ├── advanced-plotting │ ├── icon.png │ └── meta.yaml ├── anndata-getting-started │ ├── icon.svg │ └── meta.yaml ├── anndata-mudata-axes │ ├── icon.svg │ └── meta.yaml ├── bentotools-subcellular-resolution │ ├── icon.png │ └── meta.yaml ├── cellcharter-spatial-transcriptomics │ ├── icon.png │ └── meta.yaml ├── compositional-analysis │ ├── icon.png │ └── meta.yaml ├── concatenation-of-multimodal-data │ ├── icon.png │ └── meta.yaml ├── concatenation-of-unimodal-data │ ├── icon.png │ └── meta.yaml ├── decoupler-pseudobulk-de │ ├── icon.png │ └── meta.yaml ├── interoperability │ ├── icon.png │ ├── icon.webp │ └── meta.yaml ├── perturbation-modeling │ ├── icon.png │ └── meta.yaml ├── plotting-in-scanpy │ ├── icon.png │ └── meta.yaml ├── preprocessing-and-clustering │ ├── icon.webp │ └── meta.yaml ├── pseudotemporal-ordering │ ├── icon.png │ └── meta.yaml ├── scirpy-tcr │ ├── icon.svg │ └── meta.yaml ├── scverse-objects-in-backed-mode │ ├── icon.png │ └── meta.yaml ├── scvi-batch-effect-removal │ ├── icon.svg │ └── meta.yaml └── squidpy-spatial │ ├── icon.png │ └── meta.yaml └── validate.py /.codecov.yaml: -------------------------------------------------------------------------------- 1 | # Based on pydata/xarray 2 | codecov: 3 | require_ci_to_pass: no 4 | 5 | coverage: 6 | status: 7 | project: 8 | default: 9 | # Require 1% coverage, i.e., always succeed 10 | target: 1 11 | patch: false 12 | changes: false 13 | 14 | comment: 15 | layout: diff, flags, files 16 | behavior: once 17 | require_base: no 18 | -------------------------------------------------------------------------------- /.cruft.json: -------------------------------------------------------------------------------- 1 | { 2 | "template": "https://github.com/scverse/cookiecutter-scverse", 3 | "commit": "87a407a65408d75a949c0b54b19fd287475a56f8", 4 | "checkout": "v0.4.0", 5 | "context": { 6 | "cookiecutter": { 7 | "project_name": "scverse-tutorials", 8 | "package_name": "scverse_tutorials", 9 | "project_description": "A very interesting piece of code", 10 | "author_full_name": "scverse team", 11 | "author_email": "core-team@scverse.org", 12 | "github_user": "scverse", 13 | "project_repo": "https://github.com/scverse/scverse-tutorials", 14 | "license": "BSD 3-Clause License", 15 | "_copy_without_render": [ 16 | ".github/workflows/build.yaml", 17 | ".github/workflows/test.yaml", 18 | "docs/_templates/autosummary/**.rst" 19 | ], 20 | "_render_devdocs": false, 21 | "_jinja2_env_vars": { 22 | "lstrip_blocks": true, 23 | "trim_blocks": true 24 | }, 25 | "_template": "https://github.com/scverse/cookiecutter-scverse" 26 | } 27 | }, 28 | "directory": null 29 | } 30 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | indent_style = space 5 | indent_size = 4 6 | end_of_line = lf 7 | charset = utf-8 8 | trim_trailing_whitespace = true 9 | insert_final_newline = true 10 | 11 | [*.{yml,yaml}] 12 | indent_size = 2 13 | 14 | [.cruft.json] 15 | indent_size = 2 16 | 17 | [Makefile] 18 | indent_style = tab 19 | -------------------------------------------------------------------------------- /.github/workflows/execute-nbs.yaml: -------------------------------------------------------------------------------- 1 | name: Execute notebooks 2 | on: 3 | pull_request: 4 | branches: [main] 5 | paths: 6 | - ".github/workflows/execute-nbs.yaml" 7 | - "docs/**" 8 | - "environment.yml" 9 | 10 | defaults: 11 | run: 12 | shell: bash -el {0} 13 | 14 | concurrency: 15 | group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} 16 | cancel-in-progress: true 17 | 18 | jobs: 19 | runnbs: 20 | runs-on: ubuntu-latest 21 | permissions: 22 | contents: write # Allow GH actions to push changes to repo 23 | steps: 24 | - uses: actions/checkout@v4 25 | with: 26 | repository: ${{ github.event.pull_request.head.repo.full_name }} 27 | ref: ${{ github.head_ref }} 28 | filter: blob:none 29 | fetch-depth: 0 30 | - name: Cache conda 31 | uses: actions/cache@v4 32 | env: 33 | # Increase this value to reset cache if env file has not changed 34 | CACHE_NUMBER: 0 35 | with: 36 | path: ~/conda_pkgs_dir 37 | key: ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-${{ hashFiles('environment.yml') }} 38 | - name: Cache notebooks 39 | uses: actions/cache@v4 40 | env: 41 | # Increase this value to reset cache if env file has not changed 42 | CACHE_NUMBER: 0 43 | with: 44 | path: .jupyter_cache 45 | key: ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-${{ hashFiles('environment.yml') }} 46 | - uses: conda-incubator/setup-miniconda@v3 47 | with: 48 | activate-environment: tutorials 49 | channel-priority: flexible 50 | environment-file: environment.yml 51 | miniforge-variant: Miniforge3 52 | miniforge-version: latest 53 | use-mamba: true 54 | # some important packages are not available as .tar.bz2 anymore 55 | # use-only-tar-bz2: true # This needs to be set for caching to work properly! 56 | 57 | - name: execute notebooks 58 | run: | 59 | mkdir -p .jupyter_cache 60 | # set kernel to "python3" (just in case someone used a custom conda env kernel locally) 61 | for f in docs/notebooks/*.ipynb ; do 62 | jupytext --to notebook --set-kernel python3 $f 63 | done 64 | 65 | # First remove all notebooks from the project, then add all again. 66 | # This does not remove them from the cache. This step is required since a notebook could be in the cache, but not in the PR. 67 | jcache notebook clear --force 68 | jcache notebook add docs/notebooks/*.ipynb 69 | jcache project execute --timeout 1800 70 | 71 | - name: output logs 72 | run: | 73 | jcache notebook list 74 | for f in docs/notebooks/*.ipynb ; do 75 | jcache notebook info --tb $f 76 | done 77 | 78 | # write changed outputs back to the ipynb files 79 | # will fail if there was an execution error 80 | - name: merge notebooks 81 | run: | 82 | for f in docs/notebooks/*.ipynb ; do 83 | jcache notebook merge $f $f 84 | done 85 | 86 | - uses: stefanzweifel/git-auto-commit-action@v4 87 | with: 88 | commit_message: Render notebooks 89 | -------------------------------------------------------------------------------- /.github/workflows/make-tutorials-json.yml: -------------------------------------------------------------------------------- 1 | name: Make JSON of tutorial registry 2 | 3 | on: 4 | push: 5 | branches: [main] 6 | schedule: 7 | - cron: "0 2 * * *" 8 | pull_request: 9 | branches: ["*"] 10 | paths: 11 | - tutorial-registry/** 12 | 13 | concurrency: 14 | group: "pages" 15 | cancel-in-progress: true 16 | 17 | jobs: 18 | mkjson: 19 | runs-on: ubuntu-latest 20 | 21 | steps: 22 | - name: Checkout repository 23 | uses: actions/checkout@v4 24 | with: 25 | filter: blob:none 26 | fetch-depth: 0 27 | 28 | - uses: actions/setup-python@v5 29 | with: 30 | python-version: "3.12" 31 | cache: "pip" # caching pip dependencies 32 | - name: Install dependencies for validation script 33 | run: pip install .[registry] 34 | - name: Execute validation script and create output directory 35 | run: | 36 | ./tutorial-registry/validate.py --outdir=build 37 | 38 | - name: Upload GitHub Pages artifact 39 | uses: actions/upload-pages-artifact@v3 40 | with: 41 | path: "build" 42 | 43 | deploy: 44 | runs-on: ubuntu-latest 45 | needs: mkjson 46 | 47 | # Grant GITHUB_TOKEN the permissions required to make a Pages deployment 48 | permissions: 49 | pages: write # to deploy to Pages 50 | id-token: write 51 | 52 | environment: 53 | name: github-pages 54 | url: ${{ steps.deployment.outputs.page_url }}/tutorials.json 55 | 56 | if: github.ref == 'refs/heads/main' && github.event_name != 'schedule' 57 | steps: 58 | - name: Deploy to GitHub Pages 59 | id: deployment 60 | uses: actions/deploy-pages@v1 61 | - name: Trigger website build 62 | run: | 63 | curl -XPOST \ 64 | -u "scverse-bot:${{ secrets.BOT_GH_TOKEN }}" \ 65 | -H "Accept: application/vnd.github.everest-preview+json" \ 66 | -H "Content-Type: application/json" \ 67 | https://api.github.com/repos/scverse/scverse.github.io/actions/workflows/gh-pages.yml/dispatches \ 68 | --data '{"ref": "main"}' 69 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Caches 2 | /.*cache/ 3 | /node_modules/ 4 | /data/ 5 | .ipynb_checkpoints/ 6 | __pycache__/ 7 | 8 | # Temp files 9 | .DS_Store 10 | *~ 11 | 12 | # Virtual environments 13 | .venv/ 14 | 15 | # Build artifacts 16 | /dist/ 17 | /docs/generated/ 18 | /docs/_build/ 19 | 20 | # User configuration 21 | /hatch.toml 22 | 23 | # IDEs 24 | /.idea/ 25 | /.vscode/ 26 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | fail_fast: false 2 | default_language_version: 3 | python: python3 4 | default_stages: 5 | - pre-commit 6 | - pre-push 7 | minimum_pre_commit_version: 2.16.0 8 | repos: 9 | - repo: https://github.com/rbubley/mirrors-prettier 10 | rev: v3.5.3 11 | hooks: 12 | - id: prettier 13 | - repo: https://github.com/astral-sh/ruff-pre-commit 14 | rev: v0.11.12 15 | hooks: 16 | - id: ruff 17 | types_or: [python, pyi, jupyter] 18 | args: [--fix, --exit-non-zero-on-fix] 19 | - id: ruff-format 20 | types_or: [python, pyi, jupyter] 21 | - repo: https://github.com/pre-commit/pre-commit-hooks 22 | rev: v5.0.0 23 | hooks: 24 | - id: detect-private-key 25 | - id: check-ast 26 | - id: end-of-file-fixer 27 | - id: mixed-line-ending 28 | args: [--fix=lf] 29 | - id: trailing-whitespace 30 | - id: check-case-conflict 31 | # Check that there are no merge conflicts (could be generated by template sync) 32 | - id: check-merge-conflict 33 | args: [--assume-in-merge] 34 | - repo: local 35 | hooks: 36 | - id: forbid-to-commit 37 | name: Don't commit rej files 38 | entry: | 39 | Cannot commit .rej files. These indicate merge conflicts that arise during automated template updates. 40 | Fix the merge conflicts manually and remove the .rej files. 41 | language: fail 42 | files: '.*\.rej$' 43 | - repo: https://github.com/python-jsonschema/check-jsonschema 44 | rev: 0.33.0 45 | hooks: 46 | - id: check-jsonschema 47 | files: "tutorial-registry/schema.json" 48 | args: ["--check-metaschema"] 49 | - repo: https://github.com/python-jsonschema/check-jsonschema 50 | rev: 0.33.0 51 | hooks: 52 | - id: check-jsonschema 53 | files: "tutorial-registry/tutorials/.*/meta.yaml" 54 | args: ["--schemafile", "tutorial-registry/schema.json"] 55 | - repo: local 56 | hooks: 57 | - id: forbid-to-commit 58 | name: Check files in `tutorials` directory 59 | entry: | 60 | Only files named `meta.yaml` or `icon.xxx` are permitted in the packages directory 61 | language: fail 62 | files: "^tutorial-registry/tutorials/.*$" 63 | exclude: "^tutorial-registry/tutorials/.*/(meta\\.yaml|icon\\.(svg|png|webp))$" 64 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # https://docs.readthedocs.io/en/stable/config-file/v2.html 2 | version: 2 3 | build: 4 | os: ubuntu-22.04 5 | tools: 6 | python: "3.12" 7 | sphinx: 8 | configuration: docs/conf.py 9 | fail_on_warning: true 10 | python: 11 | install: 12 | - requirements: docs/patched-deps.txt 13 | - method: pip 14 | path: . 15 | extra_requirements: 16 | - docs 17 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scverse/scverse-tutorials/169d1d33f00f7e738da725f350d119cd8c7d4aba/CHANGELOG.md -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2022, scverse® 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | 3. Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # scverse tutorials 2 | 3 | [![Documentation][badge-docs]][link-docs] 4 | 5 | On [scverse.org/learn](https://scverse.org/learn), we aim at providing a comprehensive overview of analyses that can be 6 | performed with scverse core and ecosystem packages. 7 | 8 | To this end, this repository contains 9 | 10 | - a registry for tutorials listed on [scverse.org/learn](https://scverse.org/learn) (see `tutorial-registry`) 11 | - shared tutorials that complement more specific tutorials provided by invidiual [core](https://scverse.org/packages/) 12 | and [ecosystem](https://scverse.org/packages/#ecosystem) packages (see `docs`) 13 | 14 | ## Adding tutorials 15 | 16 | If you believe a tutorial should be added to `scverse.org/learn`, please open an issue. We will discuss the request 17 | in the next [open community meeting](https://hackmd.io/VfVLKb3ETGKN2j_7tn8ZJQ?view) and potentially suggest 18 | improvements. 19 | 20 | To be added to our website, tutorials must fulfill at least the following requirements: 21 | 22 | - all featured packages must be scverse [core](https://scverse.org/packages/#core-packages) or 23 | [approved ecosystem packages](https://scverse.org/packages/#ecosystem). This does not apply to packages that are not 24 | specific to omics data analysis (e.g. pandas, seaborn). 25 | - the notebook author agrees to maintain the tutorial in the future and is reachable via [zulip](https://scverse.zulipchat.com). 26 | - the notebook contains a backlink to [scverse.org/learn](https://scverse.org/learn) 27 | - the notebook is self-contained: All required example data is downloaded as part of the tutorial 28 | 29 | You can easily check your changes to tutorials or the registry locally: 30 | 31 | ```shell 32 | hatch run docs:build # for tutorial notebooks 33 | hatch run registry:validate # for the tutorials registry 34 | ``` 35 | 36 | ## Structure of external tutorials 37 | 38 | While we do not mandate a specific structure for tutorials, 39 | a good tutorial typically comprises the following sections: 40 | 41 | 1. **General header**: The tutorial should have a general header that corresponds to the analysis. 42 | 2. **Brief introduction**: The tutorial should introduce the package, the analysis motivation and potentially biological background. 43 | 3. **Requirements to run the notebook**: Special computational requirements like memory or GPUs should be specified. Any required input from other notebooks should also be listed here. 44 | 4. **Package imports**: All required packages should now be imported. 45 | 5. **General setup**: General settings such as plotting settings or ignored warnings should be set up here. 46 | 6. **Data loading**: Any required datasets should be loaded here. Ideally with stable links. 47 | 7. **Data preprocessing**: Any data preprocessing should be done here. Depending on the method this step can be skipped. 48 | 8. **Package specific tutorial**: The tutorial for the package should contain a healthy mix of text and code to guide the user through the analysis. 49 | 9. **Link to other important tutorials/packages/sources of information**: Link to any other tutorials that might be of interest or the corresponding https://sc-best-practices.org chapter. 50 | 10. **References**: Any referenced papers should show up in references section. 51 | 11. **Acknowledgements**: All contributing authors and experts should be named. 52 | 53 | [link-docs]: https://scverse-tutorials.readthedocs.io/en/latest/ 54 | [badge-docs]: https://img.shields.io/readthedocs/scverse-tutorials 55 | [//]: # "numfocus-fiscal-sponsor-attribution" 56 | 57 | scverse-tutorials is part of the scverse® project ([website](https://scverse.org), [governance](https://scverse.org/about/roles)) and is fiscally sponsored by [NumFOCUS](https://numfocus.org/). 58 | If you like scverse® and want to support our mission, please consider making a tax-deductible [donation](https://numfocus.org/donate-to-scverse) to help the project pay for developer time, professional services, travel, workshops, and a variety of other needs. 59 | 60 |
61 | 62 | 66 | 67 |
68 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/_static/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scverse/scverse-tutorials/169d1d33f00f7e738da725f350d119cd8c7d4aba/docs/_static/.gitkeep -------------------------------------------------------------------------------- /docs/_static/css/custom.css: -------------------------------------------------------------------------------- 1 | /* Reduce the font size in data frames - See https://github.com/scverse/cookiecutter-scverse/issues/193 */ 2 | div.cell_output table.dataframe { 3 | font-size: 0.8em; 4 | } 5 | -------------------------------------------------------------------------------- /docs/_templates/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scverse/scverse-tutorials/169d1d33f00f7e738da725f350d119cd8c7d4aba/docs/_templates/.gitkeep -------------------------------------------------------------------------------- /docs/_templates/autosummary/class.rst: -------------------------------------------------------------------------------- 1 | {{ fullname | escape | underline}} 2 | 3 | .. currentmodule:: {{ module }} 4 | 5 | .. add toctree option to make autodoc generate the pages 6 | 7 | .. autoclass:: {{ objname }} 8 | 9 | {% block attributes %} 10 | {% if attributes %} 11 | Attributes table 12 | ~~~~~~~~~~~~~~~~~~ 13 | 14 | .. autosummary:: 15 | {% for item in attributes %} 16 | ~{{ fullname }}.{{ item }} 17 | {%- endfor %} 18 | {% endif %} 19 | {% endblock %} 20 | 21 | {% block methods %} 22 | {% if methods %} 23 | Methods table 24 | ~~~~~~~~~~~~~ 25 | 26 | .. autosummary:: 27 | {% for item in methods %} 28 | {%- if item != '__init__' %} 29 | ~{{ fullname }}.{{ item }} 30 | {%- endif -%} 31 | {%- endfor %} 32 | {% endif %} 33 | {% endblock %} 34 | 35 | {% block attributes_documentation %} 36 | {% if attributes %} 37 | Attributes 38 | ~~~~~~~~~~~ 39 | 40 | {% for item in attributes %} 41 | 42 | .. autoattribute:: {{ [objname, item] | join(".") }} 43 | {%- endfor %} 44 | 45 | {% endif %} 46 | {% endblock %} 47 | 48 | {% block methods_documentation %} 49 | {% if methods %} 50 | Methods 51 | ~~~~~~~ 52 | 53 | {% for item in methods %} 54 | {%- if item != '__init__' %} 55 | 56 | .. automethod:: {{ [objname, item] | join(".") }} 57 | {%- endif -%} 58 | {%- endfor %} 59 | 60 | {% endif %} 61 | {% endblock %} 62 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | import sys 9 | from datetime import datetime 10 | from importlib.metadata import metadata 11 | from pathlib import Path 12 | 13 | HERE = Path(__file__).parent 14 | sys.path.insert(0, str(HERE / "extensions")) 15 | 16 | 17 | # -- Project information ----------------------------------------------------- 18 | 19 | # NOTE: If you installed your project in editable mode, this might be stale. 20 | # If this is the case, reinstall it to refresh the metadata 21 | info = metadata("scverse-tutorials") 22 | project_name = info["Name"] 23 | author = info["Author"] 24 | copyright = f"{datetime.now():%Y}, {author}." 25 | version = info["Version"] 26 | urls = dict(pu.split(", ") for pu in info.get_all("Project-URL")) 27 | repository_url = urls["Source"] 28 | 29 | # The full version, including alpha/beta/rc tags 30 | release = info["Version"] 31 | 32 | bibtex_bibfiles = ["references.bib"] 33 | templates_path = ["_templates"] 34 | nitpicky = True # Warn about broken links 35 | needs_sphinx = "4.0" 36 | 37 | html_context = { 38 | "display_github": True, 39 | "github_user": "scverse", 40 | "github_repo": project_name, 41 | "github_version": "main", 42 | "conf_py_path": "/docs/", 43 | } 44 | 45 | # -- General configuration --------------------------------------------------- 46 | 47 | # Add any Sphinx extension module names here, as strings. 48 | # They can be extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones. 49 | extensions = [ 50 | "myst_nb", 51 | "sphinx_copybutton", 52 | "sphinx.ext.autodoc", 53 | "sphinx.ext.intersphinx", 54 | "sphinx.ext.autosummary", 55 | "sphinx.ext.napoleon", 56 | "sphinx_issues", 57 | "sphinxcontrib.bibtex", 58 | "sphinx_autodoc_typehints", 59 | "sphinx.ext.mathjax", 60 | "IPython.sphinxext.ipython_console_highlighting", 61 | "sphinxext.opengraph", 62 | *[p.stem for p in (HERE / "extensions").glob("*.py")], 63 | ] 64 | 65 | autosummary_generate = True 66 | autodoc_member_order = "groupwise" 67 | default_role = "literal" 68 | napoleon_google_docstring = False 69 | napoleon_numpy_docstring = True 70 | napoleon_include_init_with_doc = False 71 | napoleon_use_rtype = True # having a separate entry generally helps readability 72 | napoleon_use_param = True 73 | myst_heading_anchors = 6 # create anchors for h1-h6 74 | myst_enable_extensions = [ 75 | "amsmath", 76 | "colon_fence", 77 | "deflist", 78 | "dollarmath", 79 | "html_image", 80 | "html_admonition", 81 | ] 82 | myst_url_schemes = ("http", "https", "mailto") 83 | nb_output_stderr = "remove" 84 | nb_execution_mode = "off" 85 | nb_merge_streams = True 86 | typehints_defaults = "braces" 87 | 88 | source_suffix = {".rst": "restructuredtext", ".ipynb": "myst-nb", ".myst": "myst-nb"} 89 | 90 | intersphinx_mapping = { 91 | "python": ("https://docs.python.org/3", None), 92 | "anndata": ("https://anndata.readthedocs.io/en/latest/", None), # TODO: change back to stable after 0.12 release 93 | "numpy": ("https://numpy.org/doc/stable/", None), 94 | "scanpy": ("https://scanpy.readthedocs.io/en/stable/", None), 95 | "fast-array-utils": ("https://icb-fast-array-utils.readthedocs-hosted.com/en/stable", None), 96 | "dask": ("https://docs.dask.org/en/stable", None), 97 | "scipy": ("https://docs.scipy.org/doc/scipy", None), 98 | "rapids-singlecell": ("https://rapids-singlecell.readthedocs.io/en/stable/", None), 99 | } 100 | 101 | # List of patterns, relative to source directory, that match files and 102 | # directories to ignore when looking for source files. 103 | # This pattern also affects html_static_path and html_extra_path. 104 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", "**.ipynb_checkpoints", ".jupyter_cache"] 105 | 106 | 107 | # -- Options for HTML output ------------------------------------------------- 108 | 109 | # The theme to use for HTML and HTML Help pages. See the documentation for 110 | # a list of builtin themes. 111 | # 112 | html_theme = "sphinx_book_theme" 113 | html_static_path = ["_static"] 114 | html_css_files = ["css/custom.css"] 115 | 116 | html_title = project_name 117 | 118 | html_theme_options = { 119 | "repository_url": repository_url, 120 | "repository_branch": "main", 121 | "path_to_docs": "docs/", 122 | "navigation_with_keys": False, 123 | "use_repository_button": True, 124 | "launch_buttons": { 125 | "binderhub_url": "https://mybinder.org", 126 | # "colab_url": "https://colab.research.google.com", 127 | }, 128 | } 129 | 130 | pygments_style = "default" 131 | 132 | nitpick_ignore = [ 133 | # If building the documentation fails because of a missing link that is outside your control, 134 | # you can add an exception to this list. 135 | # ("py:class", "igraph.Graph"), 136 | ] 137 | -------------------------------------------------------------------------------- /docs/extensions/typed_returns.py: -------------------------------------------------------------------------------- 1 | # code from https://github.com/theislab/scanpy/blob/master/docs/extensions/typed_returns.py 2 | # with some minor adjustment 3 | from __future__ import annotations 4 | 5 | import re 6 | from collections.abc import Generator, Iterable 7 | 8 | from sphinx.application import Sphinx 9 | from sphinx.ext.napoleon import NumpyDocstring 10 | 11 | 12 | def _process_return(lines: Iterable[str]) -> Generator[str, None, None]: 13 | for line in lines: 14 | if m := re.fullmatch(r"(?P\w+)\s+:\s+(?P[\w.]+)", line): 15 | yield f"-{m['param']} (:class:`~{m['type']}`)" 16 | else: 17 | yield line 18 | 19 | 20 | def _parse_returns_section(self: NumpyDocstring, section: str) -> list[str]: 21 | lines_raw = self._dedent(self._consume_to_next_section()) 22 | if lines_raw[0] == ":": 23 | del lines_raw[0] 24 | lines = self._format_block(":returns: ", list(_process_return(lines_raw))) 25 | if lines and lines[-1]: 26 | lines.append("") 27 | return lines 28 | 29 | 30 | def setup(app: Sphinx): 31 | """Set app.""" 32 | NumpyDocstring._parse_returns_section = _parse_returns_section 33 | -------------------------------------------------------------------------------- /docs/how-to-dask.md: -------------------------------------------------------------------------------- 1 | # Dask Q&A 2 | 3 | Here we will go through some common questions and answers about `dask`, with a special focus on its integration with `scanpy` and `anndata`. For more comprehensive tutorials or other topics like {doc}`launching a cluster `, head over their documentation. 4 | 5 | ## Quickstart 6 | 7 | ### How do I monitor the {doc}`dask dashboard `? 8 | 9 | If you are in a jupyter notebook, when you render the `repr` of your `client`, you will see a link, usually something like `http://localhost:8787/status`. 10 | If you are working locally, this link alone should suffice. 11 | 12 | If you are working on some sort of remote notebook from a web browser, you will need to replace `http://localhost` by the root url of the notebook. 13 | 14 | If you are in vscode, there is a [`dask` extension] which will allow you to monitor there. 15 | 16 | ### How do I know how to allocate resources? 17 | 18 | In `dask`, every worker will receive an equal share of the memory available. 19 | So if you request e.g., a slurm job with 256GB of RAM, and then start 8 workers, each will have 32 GB of memory. 20 | 21 | `dask` distributes jobs to each worker generally based on the chunking of the array. 22 | So if you have dense chunks of `(30_000, 30_000)` with 32 bit integers, you will need to be have 3.6 GB for each worker, at the minimum to even load the data. 23 | Then if you do something like matrix multiplication, you will need double or even more, as an example. 24 | 25 | ### How do I read my data into a `dask` array? 26 | 27 | {func}`anndata.experimental.read_elem_lazy` or {func}`anndata.experimental.read_lazy` can help you if you already have data on-disk that was written to the `anndata` file format. 28 | If you use {func}`dask.array.to_zarr`, the data _cannot_ be read in using `anndata`'s functionality as `anndata` will look for its {doc}`specified file format metadata `. 29 | 30 | If you need to implement custom io, generally we found that using {func}`dask.array.map_blocks` provides a nice way. 31 | See [our custom h5 io code] for an example. 32 | 33 | ## Advanced use and how-to-contribute 34 | 35 | ### How do `scanpy` and `anndata` handle sparse matrices? 36 | 37 | While there is some {class}`scipy.sparse.csr_matrix` and {class}`scipy.sparse.csc_matrix` support for `dask`, it is not comprehensive and missing key functions like summation, mean etc. 38 | We have implemented custom functionality, much of which lives in {mod}`fast_array_utils`, although we have also had to implement custom algorithms like `pca` for sparse-in-dask. 39 | In the future, an [`array-api`] compatible sparse matrix like [`finch`] would help us considerably as `dask` supports the [`array-api`]. 40 | 41 | Therefore, if you run into a puzzling error after trying to run a function like {func}`numpy.sum` (or similar) on a sparse-in-dask array, consider checking {mod}`fast_array_utils`. 42 | If you need to implement the function yourself, see the next point. 43 | 44 | ### Custom block-wise array operations 45 | 46 | Sometimes you may want to do an operation on a an array that is implemented nowhere. 47 | Generally, we have found {func}`dask.array.map_blocks` to be versatile enough that most operations can be expressed on it. Click on the link to see `dask`'s own tutorial about the function. 48 | 49 | Take this (simplified) example of calculating a gram matrix from {func}`scanpy.pp.pca` for sparse-in-dask: 50 | 51 | ```python 52 | def gram_block(x_part): 53 | gram_matrix = x_part.T @ x_part 54 | return gram_matrix[None, ...] 55 | 56 | gram_matrix_dask = da.map_blocks( 57 | gram_block, 58 | x, 59 | new_axis=(1,), 60 | chunks=((1,) * x.blocks.size, (x.shape[1],), (x.shape[1],)), 61 | meta=np.array([], dtype=x.dtype), 62 | dtype=x.dtype, 63 | ).sum(axis=0) 64 | ``` 65 | 66 | This algorithm goes through every `chunk_size` number of rows and calculates the gram matrix for those rows producing a collection of `(n_vars,n_vars)` size matrix. 67 | These are the summed together to produce a single `(n_vars,n_vars)` matrix, which is the gram matrix. 68 | 69 | Because `dask` does not implement matrix multiplication for sparse-in-dask, we do it ourselves. 70 | We use `map_blocks` over a CSR sparse-in-dask array where the chunking looks something like `(chunk_size, n_vars)`. 71 | When we compute the individual block's gram matrix, we add an axis via `[None, ...]` so that we can sum over that axis i.e., the `da.map_blocks` call produces a `(n_obs // chunk_size, n_vars, n_vars)` sized-matrix which is summed over the first dimension. 72 | However, to make this work, we need to be very specific about how `da.map_blocks` expects its result to look like, done via `new_axis` and `chunks`. 73 | `new_axis` indicates that we are adding a single new axis at the front. 74 | The `chunks` argument specifies that the output of `da.map_blocks` should have `x.blocks.size` number of `(1, n_vars, n_vars)` matrixes. 75 | This `chunks` argument thus allows the inferral of the shape of the output. 76 | 77 | While this example is a bit complicated it shows how you can go from a matrix of one shape and chunking to another by operating in a clean way over blocks. 78 | 79 | ## FAQ 80 | 81 | ### What is `persist` used for in RSC notebooks? 82 | 83 | In the {doc}`multi-gpu showcase notebook for rapids-singlecell `, {meth}`dask.array.Array.persist` appears across the notebook. 84 | This loads the entire dataset into memory while keeping the representation as a dask array. 85 | Thus, lazy computation still works but only necessitates a single read into memory. 86 | The catch is that you need to have enough memory to use `persist`, but if you do it greatly speeds up the computation. 87 | 88 | ### I'm out of memory, what now? 89 | 90 | You can always reduce the number of workers you use, which will cause more memory to be allocated per worker. 91 | Some algorithms may have limitations with loading all data onto a single node; see {issue}`dask/dask-ml#985` for an example. 92 | 93 | ### How do I choose chunk sizes? 94 | 95 | Have a look at the {doc}`dask docs for chunking `, however the general rule of thumb there is to use larger chunks in memory than on disk. 96 | In this sense, it is probably a good idea to use the largest chunk size in memory allowable by your memory limits (and the algorithms you use) in order to maximize any thread-level parallelization in algorithms to its fullest. 97 | For sparse data, where the chunks in-memory do not map to those on disk, maxing out the memory available by choosing a large chunk size becomes more imperative. 98 | 99 | [`dask` extension]: https://marketplace.visualstudio.com/items?itemName=joyceerhl.vscode-das 100 | [our custom h5 io code]: https://github.com/scverse/anndata/blob/089ed929393a02200b389395f278b7c920e5bc4a/src/anndata/_io/specs/lazy_methods.py#L179-L205 101 | [`array-api`]: https://data-apis.org/array-api/latest/index.html 102 | [`finch`]: https://github.com/finch-tensor/finch-tensor-python 103 | -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | ```{include} ../README.md 2 | 3 | ``` 4 | 5 | ```{toctree} 6 | :hidden: true 7 | :maxdepth: 1 8 | 9 | notebooks/basic-scrna-tutorial 10 | notebooks/anndata_getting_started 11 | notebooks/tutorial_axes_anndata_mudata 12 | notebooks/scverse_data_backed 13 | notebooks/scverse_data_interoperability 14 | notebooks/tutorial_concatenation_anndata_mudata 15 | how-to-dask.md 16 | references.md 17 | ``` 18 | -------------------------------------------------------------------------------- /docs/notebooks/.gitignore: -------------------------------------------------------------------------------- 1 | data/ 2 | -------------------------------------------------------------------------------- /docs/notebooks/data/pbmc3k_processed.h5ad: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scverse/scverse-tutorials/169d1d33f00f7e738da725f350d119cd8c7d4aba/docs/notebooks/data/pbmc3k_processed.h5ad -------------------------------------------------------------------------------- /docs/notebooks/img/X.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scverse/scverse-tutorials/169d1d33f00f7e738da725f350d119cd8c7d4aba/docs/notebooks/img/X.png -------------------------------------------------------------------------------- /docs/notebooks/img/anndata_schema_full.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scverse/scverse-tutorials/169d1d33f00f7e738da725f350d119cd8c7d4aba/docs/notebooks/img/anndata_schema_full.png -------------------------------------------------------------------------------- /docs/notebooks/img/layers.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scverse/scverse-tutorials/169d1d33f00f7e738da725f350d119cd8c7d4aba/docs/notebooks/img/layers.png -------------------------------------------------------------------------------- /docs/notebooks/img/names.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scverse/scverse-tutorials/169d1d33f00f7e738da725f350d119cd8c7d4aba/docs/notebooks/img/names.png -------------------------------------------------------------------------------- /docs/notebooks/img/obsmvarm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scverse/scverse-tutorials/169d1d33f00f7e738da725f350d119cd8c7d4aba/docs/notebooks/img/obsmvarm.png -------------------------------------------------------------------------------- /docs/notebooks/img/obspvarp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scverse/scverse-tutorials/169d1d33f00f7e738da725f350d119cd8c7d4aba/docs/notebooks/img/obspvarp.png -------------------------------------------------------------------------------- /docs/notebooks/img/obsvar.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scverse/scverse-tutorials/169d1d33f00f7e738da725f350d119cd8c7d4aba/docs/notebooks/img/obsvar.png -------------------------------------------------------------------------------- /docs/notebooks/scverse_data_interoperability.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "fifth-grammar", 6 | "metadata": {}, 7 | "source": [ 8 | "# Interoperability between scverse data structures and other languages \n", 9 | "\n", 10 | "Here we provide a list of resources that can be used to work with scverse data structures from your language of choice.\n", 11 | "\n", 12 | "A more detailed tutorial on interoperability with other languages can be found in the [Single-cell analysis best-practices book](https://www.sc-best-practices.org/introduction/interoperability.html)." 13 | ] 14 | }, 15 | { 16 | "cell_type": "markdown", 17 | "id": "pending-grenada", 18 | "metadata": {}, 19 | "source": [ 20 | "## Conversion between python and R structures for single-cell analysis\n", 21 | "\n", 22 | "Several toolkits for single-cell analysis in R build upon [SingleCellExperiment](http://bioconductor.org/books/3.16/OSCA.intro/the-singlecellexperiment-class.html) objects or [Seurat](https://satijalab.org/seurat/) objects. The following table provides an indication of which objects slots store the same data in AnnData and R objects.\n", 23 | "\n", 24 | "| | `AnnData` | `SingleCellExperiment` | `Seurat` |\n", 25 | "|--------------------------------------|--------------------------|------------------------|------------------------------------|\n", 26 | "| **Active expression matrix** | `adata.X` | `assay(sce)` | `GetAssayData(seu)` |\n", 27 | "| **Alternative expression matrices** | `adata.layers['counts']` | `counts(sce)` | `GetAssay(seu)@counts` |\n", 28 | "| **Cell-level metadata** | `adata.obs` | `colData(sce)` | `seu@meta.data` |\n", 29 | "| **Gene-level metadata** | `adata.var` | `rowData(sce)` | `GetAssay(seu)@meta.features` |\n", 30 | "| **Dimensionality reductions** | `adata.obsm` | `reducedDim(sce)` | `seu@reductions` |\n", 31 | "| **cell IDs** | `adata.obs_names` | `colnames(sce)` | `colnames(seu)` |\n", 32 | "| **gene IDs** | `adata.var_names` | `rownames(sce)` | `rownames(seu)` |\n", 33 | "| **Cell-cell similarity graphs** | `adata.obsp` | --- | `seu@graphs` |" 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "id": "executed-authority", 39 | "metadata": {}, 40 | "source": [ 41 | "### AnnData ⇄ Seurat objects\n", 42 | "\n", 43 | "See [Seurat documentation](https://satijalab.org/seurat/) for more details about Seurat objects.\n", 44 | "\n", 45 | "- [MuDataSeurat](https://pmbio.github.io/MuDataSeurat/) - R package to read and write `h5ad` files to and from Seurat objects\n", 46 | "- [sceasy](https://github.com/cellgeni/sceasy#usage) - R package to convert between objects within a session or saving `h5ad` or `rds` files \n", 47 | "- Using [reticulate](https://theislab.github.io/scanpy-in-R/#converting-from-python-to-r-1) - tutorial for conversion within R/Rmd sessions \n", 48 | "\n", 49 | "\n", 50 | "\n", 51 | "### AnnData ⇄ SingleCellExperiment objects\n", 52 | "\n", 53 | "See [OSCA book](http://bioconductor.org/books/3.16/OSCA.intro/the-singlecellexperiment-class.html) for more details about SingleCellExperiment objects.\n", 54 | "\n", 55 | "- [zellconverter](https://theislab.github.io/zellkonverter/articles/zellkonverter.html) - R/Bioconductor package to read and write `h5ad` files and to convert objects within R sessions using [basilisk](https://bioconductor.org/packages/release/bioc/html/basilisk.html) \n", 56 | "- [anndata2ri](https://github.com/theislab/anndata2ri#anndata--singlecellexperiment) - python package to convert between objects within python sessions using [rpy2](https://github.com/rpy2/rpy2#readme) \n", 57 | "- [sceasy](https://github.com/cellgeni/sceasy#usage) - R package to convert between objects within a session or saving `h5ad` or `rds` files \n", 58 | "- Using [reticulate](https://theislab.github.io/scanpy-in-R/#converting-from-python-to-r-1) - tutorial for conversion within R/Rmd sessions \n", 59 | "\n", 60 | "### AnnData ⇄ Loom objects\n", 61 | "\n", 62 | "See [Loompy documentation](http://linnarssonlab.org/loompy/index.html) for more details about Loom objects.\n", 63 | "\n", 64 | "- Using [anndata](https://anndata.readthedocs.io/en/latest/generated/anndata.read_loom.html#anndata.read_loom) - function to read `loom` files as AnnData objects\n", 65 | "- [sceasy](https://github.com/cellgeni/sceasy#usage) - R package to convert between objects within a session or saving `h5ad` or `loom` files \n", 66 | "\n", 67 | "### MuData ⇄ Seurat objects\n", 68 | "\n", 69 | "See [Seurat documentation](https://satijalab.org/seurat/) for more details about Seurat objects.\n", 70 | "\n", 71 | "- [MuDataSeurat](https://pmbio.github.io/MuDataSeurat/) - R package to read and write `h5mu` files to and from Seurat objects\n", 72 | "\n", 73 | "### MuData ⇄ MultiAssayExperiment objects\n", 74 | "\n", 75 | "See [documentation](http://waldronlab.io/MultiAssayExperiment/) for more details about MultiAssayExperiment objects.\n", 76 | "\n", 77 | "- [MuData for MultiAssayExperiment](https://ilia-kats.github.io/MuData/articles/Getting-Started.html) - R package to read and write `h5mu` files to and from `MultiAssayExperiment` objects \n", 78 | "\n", 79 | "### MuData ⇄ ArchR objects\n", 80 | "\n", 81 | "See [ArchR documentation](https://www.archrproject.com/bookdown/what-is-an-arrow-file-archrproject.html) for more details about ArchR objects.\n", 82 | "\n", 83 | "- Using [chame](https://gtca.github.io/chame/examples/archr_io.html) - python package providing functionality to read Arrow files " 84 | ] 85 | }, 86 | { 87 | "cell_type": "markdown", 88 | "id": "virtual-street", 89 | "metadata": {}, 90 | "source": [ 91 | "## Read h5ad/h5mu in other languages\n", 92 | "\n", 93 | "### Julia\n", 94 | "\n", 95 | "- [Muon.jl](https://docs.juliahub.com/Muon/QfqCh/0.1.1/objects/) provides Julia implementations of ``AnnData`` and ``MuData`` objects, as well as IO for the HDF5 format\n", 96 | "- [scVI.jl](https://maren-ha.github.io/scVI.jl/index.html) provides a Julia implementation of ``AnnData`` as well as IO for the HDF5 format.\n", 97 | "\n", 98 | "### Javascript\n", 99 | "\n", 100 | "- [Vitessce](https://github.com/vitessce/vitessce) -contains loaders from ``AnnData``s stored as Zarr\n", 101 | "\n", 102 | "### Rust\n", 103 | "\n", 104 | "- [anndata-rs](https://github.com/kaizhang/anndata-rs) provides a Rust implementation of ``AnnData`` as well as advanced IO support for the HDF5 storage format." 105 | ] 106 | } 107 | ], 108 | "metadata": { 109 | "kernelspec": { 110 | "display_name": "Python 3 (ipykernel)", 111 | "language": "python", 112 | "name": "python3" 113 | }, 114 | "language_info": { 115 | "codemirror_mode": { 116 | "name": "ipython", 117 | "version": 3 118 | }, 119 | "file_extension": ".py", 120 | "mimetype": "text/x-python", 121 | "name": "python", 122 | "nbconvert_exporter": "python", 123 | "pygments_lexer": "ipython3", 124 | "version": "3.12.5" 125 | } 126 | }, 127 | "nbformat": 4, 128 | "nbformat_minor": 5 129 | } 130 | -------------------------------------------------------------------------------- /docs/notebooks/tutorial_concatenation_anndata_mudata.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Concatenating multimodal experiments" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import warnings\n", 17 | "\n", 18 | "import anndata as ad\n", 19 | "import numpy as np\n", 20 | "import pandas as pd\n", 21 | "from mudata import MuData\n", 22 | "\n", 23 | "warnings.simplefilter(action=\"ignore\", category=FutureWarning)\n", 24 | "\n", 25 | "np.random.seed(1979)" 26 | ] 27 | }, 28 | { 29 | "attachments": {}, 30 | "cell_type": "markdown", 31 | "metadata": {}, 32 | "source": [ 33 | "Sometimes, you may want to concatenate 2 `MuData` objects because they represent complementary slices of the same dataset on which you have applied different processing. Think of analysing B and T cells separately for your PBMC typical dataset. \n", 34 | "Other times instead you need to concatenate 2 modalities into one `AnnData` because the tool you're working with doesn't currently support `MuData` (yeah we know, how dare they?).\n", 35 | "We will showcase here these 2 scenarios of concatenation.\n", 36 | "\n", 37 | "\n", 38 | ":::{note}\n", 39 | "Native concatenation of two `MuData` objects is currently discussed in \n", 40 | "[scverse/mudata#20](https://github.com/scverse/mudata/issues/20) and may\n", 41 | "eventually make parts of this tutorial obsolete. \n", 42 | "\n", 43 | "Note that for some modalities, concatenation requires extra care. For instance, \n", 44 | "in the case of ATAC-seq, concatenation does not make sense unless fragments are aggregated first. \n", 45 | ":::" 46 | ] 47 | }, 48 | { 49 | "attachments": {}, 50 | "cell_type": "markdown", 51 | "metadata": {}, 52 | "source": [ 53 | "First, we need to import the raw data for a dataset of our choice. We use mudatasets package that conveniently collects some useful 10X single cell datasets that are publicly available. For this example we need a multimodal dataset, so select the *citeseq 5k* dataset, a collection of healthy PBMCs for which 2 modalities were profiled, RNA and PROTEINS." 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": 2, 59 | "metadata": {}, 60 | "outputs": [ 61 | { 62 | "data": { 63 | "text/plain": [ 64 | "['brain3k_multiome',\n", 65 | " 'pbmc3k_multiome',\n", 66 | " 'pbmc5k_citeseq',\n", 67 | " 'brain9k_multiome',\n", 68 | " 'pbmc10k_multiome']" 69 | ] 70 | }, 71 | "execution_count": 2, 72 | "metadata": {}, 73 | "output_type": "execute_result" 74 | } 75 | ], 76 | "source": [ 77 | "import mudatasets as mds\n", 78 | "\n", 79 | "mds.list_datasets()" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": 3, 85 | "metadata": {}, 86 | "outputs": [ 87 | { 88 | "name": "stdout", 89 | "output_type": "stream", 90 | "text": [ 91 | "■ File filtered_feature_bc_matrix.h5 from pbmc5k_citeseq has been found at /home/runner/mudatasets/pbmc5k_citeseq/filtered_feature_bc_matrix.h5\n", 92 | "■ Checksum is validated (md5) for filtered_feature_bc_matrix.h5\n" 93 | ] 94 | }, 95 | { 96 | "name": "stderr", 97 | "output_type": "stream", 98 | "text": [ 99 | "/home/runner/miniconda3/envs/tutorials/lib/python3.12/site-packages/mudatasets/core.py:203: UserWarning: Dataset is in the 10X .h5 format and can't be loaded as backed.\n", 100 | " warn(\"Dataset is in the 10X .h5 format and can't be loaded as backed.\")\n" 101 | ] 102 | }, 103 | { 104 | "name": "stderr", 105 | "output_type": "stream", 106 | "text": [ 107 | "/home/runner/miniconda3/envs/tutorials/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", 108 | " from .autonotebook import tqdm as notebook_tqdm\n" 109 | ] 110 | }, 111 | { 112 | "name": "stdout", 113 | "output_type": "stream", 114 | "text": [ 115 | "■ Loading filtered_feature_bc_matrix.h5...\n" 116 | ] 117 | }, 118 | { 119 | "name": "stderr", 120 | "output_type": "stream", 121 | "text": [ 122 | "/home/runner/miniconda3/envs/tutorials/lib/python3.12/site-packages/anndata/_core/anndata.py:1756: UserWarning: Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", 123 | " utils.warn_names_duplicates(\"var\")\n", 124 | "/home/runner/miniconda3/envs/tutorials/lib/python3.12/site-packages/anndata/_core/anndata.py:1756: UserWarning: Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", 125 | " utils.warn_names_duplicates(\"var\")\n", 126 | "/home/runner/miniconda3/envs/tutorials/lib/python3.12/site-packages/mudata/_core/mudata.py:915: UserWarning: var_names are not unique. To make them unique, call `.var_names_make_unique`.\n", 127 | " warnings.warn(\n" 128 | ] 129 | } 130 | ], 131 | "source": [ 132 | "mds.info(\"pbmc5k_citeseq\")\n", 133 | "pbmc5k = mds.load(\"pbmc5k_citeseq\", files=[\"filtered_feature_bc_matrix.h5\"])" 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": 4, 139 | "metadata": {}, 140 | "outputs": [ 141 | { 142 | "data": { 143 | "text/html": [ 144 | "
MuData object with n_obs × n_vars = 5247 × 33570\n",
145 |        "  var:\t'gene_ids', 'feature_types', 'genome', 'pattern', 'read', 'sequence'\n",
146 |        "  2 modalities\n",
147 |        "    rna:\t5247 x 33538\n",
148 |        "      var:\t'gene_ids', 'feature_types', 'genome', 'pattern', 'read', 'sequence'\n",
149 |        "    prot:\t5247 x 32\n",
150 |        "      var:\t'gene_ids', 'feature_types', 'genome', 'pattern', 'read', 'sequence'
" 151 | ], 152 | "text/plain": [ 153 | "MuData object with n_obs × n_vars = 5247 × 33570\n", 154 | " var:\t'gene_ids', 'feature_types', 'genome', 'pattern', 'read', 'sequence'\n", 155 | " 2 modalities\n", 156 | " rna:\t5247 x 33538\n", 157 | " var:\t'gene_ids', 'feature_types', 'genome', 'pattern', 'read', 'sequence'\n", 158 | " prot:\t5247 x 32\n", 159 | " var:\t'gene_ids', 'feature_types', 'genome', 'pattern', 'read', 'sequence'" 160 | ] 161 | }, 162 | "execution_count": 4, 163 | "metadata": {}, 164 | "output_type": "execute_result" 165 | } 166 | ], 167 | "source": [ 168 | "pbmc5k" 169 | ] 170 | }, 171 | { 172 | "attachments": {}, 173 | "cell_type": "markdown", 174 | "metadata": {}, 175 | "source": [ 176 | "We create 2 different subsamples of the same underlying data for both RNA and PROT modalities." 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": 5, 182 | "metadata": {}, 183 | "outputs": [], 184 | "source": [ 185 | "rna = pbmc5k.mod[\"rna\"]\n", 186 | "prot = pbmc5k.mod[\"prot\"]" 187 | ] 188 | }, 189 | { 190 | "cell_type": "code", 191 | "execution_count": 6, 192 | "metadata": {}, 193 | "outputs": [], 194 | "source": [ 195 | "rna_a = rna[np.arange(300), np.sort(np.random.choice(np.arange(1000), 1000, replace=False))].copy()\n", 196 | "prot_a = prot[rna_a.obs_names,].copy()\n", 197 | "\n", 198 | "\n", 199 | "rna_b = rna[np.arange(500, 900), np.sort(np.random.choice(np.arange(3000), 1000, replace=False))].copy()\n", 200 | "prot_b = prot[rna_b.obs_names, np.arange(15)].copy()" 201 | ] 202 | }, 203 | { 204 | "attachments": {}, 205 | "cell_type": "markdown", 206 | "metadata": {}, 207 | "source": [ 208 | "And we create the respective `MuData` objects." 209 | ] 210 | }, 211 | { 212 | "cell_type": "code", 213 | "execution_count": 7, 214 | "metadata": {}, 215 | "outputs": [], 216 | "source": [ 217 | "mdata_a = MuData({\"prot\": prot_a, \"rna\": rna_a})\n", 218 | "mdata_b = MuData({\"prot\": prot_b, \"rna\": rna_b})" 219 | ] 220 | }, 221 | { 222 | "cell_type": "code", 223 | "execution_count": 8, 224 | "metadata": {}, 225 | "outputs": [ 226 | { 227 | "data": { 228 | "text/html": [ 229 | "
MuData object with n_obs × n_vars = 300 × 1032\n",
230 |        "  var:\t'gene_ids', 'feature_types', 'genome', 'pattern', 'read', 'sequence'\n",
231 |        "  2 modalities\n",
232 |        "    prot:\t300 x 32\n",
233 |        "      var:\t'gene_ids', 'feature_types', 'genome', 'pattern', 'read', 'sequence'\n",
234 |        "    rna:\t300 x 1000\n",
235 |        "      var:\t'gene_ids', 'feature_types', 'genome', 'pattern', 'read', 'sequence'
" 236 | ], 237 | "text/plain": [ 238 | "MuData object with n_obs × n_vars = 300 × 1032\n", 239 | " var:\t'gene_ids', 'feature_types', 'genome', 'pattern', 'read', 'sequence'\n", 240 | " 2 modalities\n", 241 | " prot:\t300 x 32\n", 242 | " var:\t'gene_ids', 'feature_types', 'genome', 'pattern', 'read', 'sequence'\n", 243 | " rna:\t300 x 1000\n", 244 | " var:\t'gene_ids', 'feature_types', 'genome', 'pattern', 'read', 'sequence'" 245 | ] 246 | }, 247 | "execution_count": 8, 248 | "metadata": {}, 249 | "output_type": "execute_result" 250 | } 251 | ], 252 | "source": [ 253 | "mdata_a" 254 | ] 255 | }, 256 | { 257 | "cell_type": "code", 258 | "execution_count": 9, 259 | "metadata": {}, 260 | "outputs": [ 261 | { 262 | "data": { 263 | "text/html": [ 264 | "
MuData object with n_obs × n_vars = 400 × 1015\n",
265 |        "  var:\t'gene_ids', 'feature_types', 'genome', 'pattern', 'read', 'sequence'\n",
266 |        "  2 modalities\n",
267 |        "    prot:\t400 x 15\n",
268 |        "      var:\t'gene_ids', 'feature_types', 'genome', 'pattern', 'read', 'sequence'\n",
269 |        "    rna:\t400 x 1000\n",
270 |        "      var:\t'gene_ids', 'feature_types', 'genome', 'pattern', 'read', 'sequence'
" 271 | ], 272 | "text/plain": [ 273 | "MuData object with n_obs × n_vars = 400 × 1015\n", 274 | " var:\t'gene_ids', 'feature_types', 'genome', 'pattern', 'read', 'sequence'\n", 275 | " 2 modalities\n", 276 | " prot:\t400 x 15\n", 277 | " var:\t'gene_ids', 'feature_types', 'genome', 'pattern', 'read', 'sequence'\n", 278 | " rna:\t400 x 1000\n", 279 | " var:\t'gene_ids', 'feature_types', 'genome', 'pattern', 'read', 'sequence'" 280 | ] 281 | }, 282 | "execution_count": 9, 283 | "metadata": {}, 284 | "output_type": "execute_result" 285 | } 286 | ], 287 | "source": [ 288 | "mdata_b" 289 | ] 290 | }, 291 | { 292 | "attachments": {}, 293 | "cell_type": "markdown", 294 | "metadata": {}, 295 | "source": [ 296 | "as you see, the 2 RNA subsamples don't share any cells, but they share some features. It's the same for the PROT assay." 297 | ] 298 | }, 299 | { 300 | "cell_type": "code", 301 | "execution_count": 10, 302 | "metadata": {}, 303 | "outputs": [ 304 | { 305 | "data": { 306 | "text/plain": [ 307 | "0" 308 | ] 309 | }, 310 | "execution_count": 10, 311 | "metadata": {}, 312 | "output_type": "execute_result" 313 | } 314 | ], 315 | "source": [ 316 | "len(list(set(rna_a.obs_names.tolist()) & set(rna_b.obs_names.tolist())))" 317 | ] 318 | }, 319 | { 320 | "cell_type": "code", 321 | "execution_count": 11, 322 | "metadata": {}, 323 | "outputs": [ 324 | { 325 | "data": { 326 | "text/plain": [ 327 | "345" 328 | ] 329 | }, 330 | "execution_count": 11, 331 | "metadata": {}, 332 | "output_type": "execute_result" 333 | } 334 | ], 335 | "source": [ 336 | "len(list(set(rna_a.var_names.tolist()) & set(rna_b.var_names.tolist())))" 337 | ] 338 | }, 339 | { 340 | "cell_type": "code", 341 | "execution_count": 12, 342 | "metadata": {}, 343 | "outputs": [ 344 | { 345 | "data": { 346 | "text/plain": [ 347 | "15" 348 | ] 349 | }, 350 | "execution_count": 12, 351 | "metadata": {}, 352 | "output_type": "execute_result" 353 | } 354 | ], 355 | "source": [ 356 | "len(list(set(prot_a.var_names.tolist()) & set(prot_b.var_names.tolist())))" 357 | ] 358 | }, 359 | { 360 | "cell_type": "markdown", 361 | "metadata": {}, 362 | "source": [ 363 | "## 1. Concatenate datasets, by modality" 364 | ] 365 | }, 366 | { 367 | "cell_type": "markdown", 368 | "metadata": {}, 369 | "source": [ 370 | "In the `AnnData` convention, we store observations (samples or cells) in rows (`axis=0`)and variables (genes, proteins, atac regions, etc ...) in columns (`axis=1`).\n", 371 | "Both the rows and columns of this matrix are indexed, which allows us to link between each other the structured layers of the AnnData object. \n", 372 | "\n", 373 | "When we interact with both axes of these matrices, we modify the same axes on all the linked layers.\n", 374 | "\n", 375 | "In scRNA-seq data, each row corresponds to a cell with a barcode, and each column corresponds to a gene with a gene id, but in the protein assay of a CITEseq experiment the cells are the same along the `axis=0` and the features are different. \n", 376 | "\n", 377 | "To collect all the cells and features from 2 datasets we first have to concatenate each anndata and then build a new mudata with these.\n", 378 | "\n", 379 | "By default, anndata concatenates on `axis=0` " 380 | ] 381 | }, 382 | { 383 | "cell_type": "code", 384 | "execution_count": 13, 385 | "metadata": {}, 386 | "outputs": [ 387 | { 388 | "data": { 389 | "text/plain": [ 390 | "AnnData object with n_obs × n_vars = 700 × 345" 391 | ] 392 | }, 393 | "execution_count": 13, 394 | "metadata": {}, 395 | "output_type": "execute_result" 396 | } 397 | ], 398 | "source": [ 399 | "ad.concat([rna_a, rna_b])" 400 | ] 401 | }, 402 | { 403 | "cell_type": "code", 404 | "execution_count": 14, 405 | "metadata": {}, 406 | "outputs": [ 407 | { 408 | "data": { 409 | "text/plain": [ 410 | "AnnData object with n_obs × n_vars = 700 × 345" 411 | ] 412 | }, 413 | "execution_count": 14, 414 | "metadata": {}, 415 | "output_type": "execute_result" 416 | } 417 | ], 418 | "source": [ 419 | "ad.concat([rna_a, rna_b], axis=0)" 420 | ] 421 | }, 422 | { 423 | "cell_type": "markdown", 424 | "metadata": {}, 425 | "source": [ 426 | "You may have noticed that anndata also defaults to create a concatenated version of the 2 RNA subsets with only the features that the 2 matrices have in common. This is the default scenario obtained by setting the parameter `join=\"inner\"`.\n", 427 | "\n", 428 | "There may be instances in which you don't want to lose the features that are missing from one of the 2 RNA, so let's try setting `join=\"outer\"`" 429 | ] 430 | }, 431 | { 432 | "cell_type": "code", 433 | "execution_count": 15, 434 | "metadata": {}, 435 | "outputs": [ 436 | { 437 | "data": { 438 | "text/plain": [ 439 | "AnnData object with n_obs × n_vars = 700 × 1655" 440 | ] 441 | }, 442 | "execution_count": 15, 443 | "metadata": {}, 444 | "output_type": "execute_result" 445 | } 446 | ], 447 | "source": [ 448 | "ad.concat([rna_a, rna_b], axis=0, join=\"outer\")" 449 | ] 450 | }, 451 | { 452 | "cell_type": "markdown", 453 | "metadata": {}, 454 | "source": [ 455 | "Anndata is also filling the variables that don't match with `0`, instead of `na` values.\n", 456 | "\n", 457 | "*NB since `axis=0` is the default behaviour, we will omit it in the future calls of the `concat` command for simplicity*" 458 | ] 459 | }, 460 | { 461 | "cell_type": "code", 462 | "execution_count": 16, 463 | "metadata": {}, 464 | "outputs": [ 465 | { 466 | "data": { 467 | "text/plain": [ 468 | "array([[0., 0., 0., ..., 0., 0., 0.],\n", 469 | " [0., 0., 0., ..., 0., 0., 0.],\n", 470 | " [0., 0., 0., ..., 0., 0., 0.],\n", 471 | " ...,\n", 472 | " [0., 0., 0., ..., 0., 0., 0.],\n", 473 | " [0., 0., 0., ..., 0., 0., 0.],\n", 474 | " [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)" 475 | ] 476 | }, 477 | "execution_count": 16, 478 | "metadata": {}, 479 | "output_type": "execute_result" 480 | } 481 | ], 482 | "source": [ 483 | "ad.concat([rna_a, rna_b], join=\"outer\").X.toarray()" 484 | ] 485 | }, 486 | { 487 | "cell_type": "code", 488 | "execution_count": 17, 489 | "metadata": {}, 490 | "outputs": [ 491 | { 492 | "data": { 493 | "text/plain": [ 494 | "False" 495 | ] 496 | }, 497 | "execution_count": 17, 498 | "metadata": {}, 499 | "output_type": "execute_result" 500 | } 501 | ], 502 | "source": [ 503 | "np.isnan(ad.concat([rna_a, rna_b], join=\"outer\").X.toarray()).any()" 504 | ] 505 | }, 506 | { 507 | "cell_type": "markdown", 508 | "metadata": {}, 509 | "source": [ 510 | "We can use the same convention to concatenate the two protein assays." 511 | ] 512 | }, 513 | { 514 | "cell_type": "code", 515 | "execution_count": 18, 516 | "metadata": {}, 517 | "outputs": [], 518 | "source": [ 519 | "rna_c = ad.concat([rna_a, rna_b], join=\"outer\")\n", 520 | "prot_c = ad.concat([prot_a, prot_b], join=\"outer\")" 521 | ] 522 | }, 523 | { 524 | "cell_type": "markdown", 525 | "metadata": {}, 526 | "source": [ 527 | "And now we create the new `MuData` object with the newly concatenated assays" 528 | ] 529 | }, 530 | { 531 | "cell_type": "code", 532 | "execution_count": 19, 533 | "metadata": {}, 534 | "outputs": [ 535 | { 536 | "data": { 537 | "text/html": [ 538 | "
MuData object with n_obs × n_vars = 700 × 1687\n",
539 |        "  2 modalities\n",
540 |        "    rna:\t700 x 1655\n",
541 |        "    prot:\t700 x 32
" 542 | ], 543 | "text/plain": [ 544 | "MuData object with n_obs × n_vars = 700 × 1687\n", 545 | " 2 modalities\n", 546 | " rna:\t700 x 1655\n", 547 | " prot:\t700 x 32" 548 | ] 549 | }, 550 | "execution_count": 19, 551 | "metadata": {}, 552 | "output_type": "execute_result" 553 | } 554 | ], 555 | "source": [ 556 | "full = MuData({\"rna\": rna_c, \"prot\": prot_c})\n", 557 | "full" 558 | ] 559 | }, 560 | { 561 | "cell_type": "markdown", 562 | "metadata": {}, 563 | "source": [ 564 | "## 2. Concatenating different modalities\n", 565 | "\n", 566 | "You may want to concatenate the RNA and the PROT modalities of the same cells. While we don't recommend using this type of concatenation, because we believe that every basic operation you would want to perform on a multimodal object is covered by creating a `MuData` object instead, we know that some of the tools that deal with multimodal data integration have not implemented MuData support yet.\n" 567 | ] 568 | }, 569 | { 570 | "cell_type": "code", 571 | "execution_count": 20, 572 | "metadata": {}, 573 | "outputs": [ 574 | { 575 | "data": { 576 | "text/plain": [ 577 | "AnnData object with n_obs × n_vars = 300 × 1000\n", 578 | " var: 'gene_ids', 'feature_types', 'genome', 'pattern', 'read', 'sequence'" 579 | ] 580 | }, 581 | "execution_count": 20, 582 | "metadata": {}, 583 | "output_type": "execute_result" 584 | } 585 | ], 586 | "source": [ 587 | "rna_a" 588 | ] 589 | }, 590 | { 591 | "cell_type": "code", 592 | "execution_count": 21, 593 | "metadata": {}, 594 | "outputs": [ 595 | { 596 | "data": { 597 | "text/plain": [ 598 | "AnnData object with n_obs × n_vars = 300 × 32\n", 599 | " var: 'gene_ids', 'feature_types', 'genome', 'pattern', 'read', 'sequence'" 600 | ] 601 | }, 602 | "execution_count": 21, 603 | "metadata": {}, 604 | "output_type": "execute_result" 605 | } 606 | ], 607 | "source": [ 608 | "prot_a" 609 | ] 610 | }, 611 | { 612 | "cell_type": "code", 613 | "execution_count": 22, 614 | "metadata": {}, 615 | "outputs": [ 616 | { 617 | "data": { 618 | "text/plain": [ 619 | "AnnData object with n_obs × n_vars = 300 × 1032\n", 620 | " var: 'gene_ids', 'feature_types', 'genome', 'pattern', 'read', 'sequence'" 621 | ] 622 | }, 623 | "execution_count": 22, 624 | "metadata": {}, 625 | "output_type": "execute_result" 626 | } 627 | ], 628 | "source": [ 629 | "adata_paired = ad.concat([rna_a, prot_a], axis=1)\n", 630 | "adata_paired" 631 | ] 632 | }, 633 | { 634 | "cell_type": "markdown", 635 | "metadata": {}, 636 | "source": [ 637 | "we now have a concatenated anndata, whith 1032 `.var` and 600 `.obs`. Let's take a look at the individual layers." 638 | ] 639 | }, 640 | { 641 | "cell_type": "code", 642 | "execution_count": 23, 643 | "metadata": {}, 644 | "outputs": [ 645 | { 646 | "data": { 647 | "text/html": [ 648 | "
\n", 649 | "\n", 662 | "\n", 663 | " \n", 664 | " \n", 665 | " \n", 666 | " \n", 667 | " \n", 668 | " \n", 669 | " \n", 670 | " \n", 671 | " \n", 672 | " \n", 673 | " \n", 674 | " \n", 675 | " \n", 676 | " \n", 677 | " \n", 678 | " \n", 679 | " \n", 680 | " \n", 681 | " \n", 682 | " \n", 683 | " \n", 684 | " \n", 685 | " \n", 686 | " \n", 687 | " \n", 688 | " \n", 689 | " \n", 690 | " \n", 691 | " \n", 692 | " \n", 693 | " \n", 694 | " \n", 695 | " \n", 696 | " \n", 697 | " \n", 698 | " \n", 699 | " \n", 700 | " \n", 701 | " \n", 702 | " \n", 703 | "
AAACCCAAGAGACAAG-1
AAACCCAAGGCCTAGA-1
AAACCCAGTCGTGCCA-1
AAACCCATCGTGCATA-1
AAACGAAAGACAAGCC-1
...
ACACTGAAGTTCCGGC-1
ACACTGAGTGCCCGTA-1
ACACTGAGTTCGTTCC-1
ACAGAAAAGGTACTGG-1
ACAGAAACAAATGGAT-1
\n", 704 | "

300 rows × 0 columns

\n", 705 | "
" 706 | ], 707 | "text/plain": [ 708 | "Empty DataFrame\n", 709 | "Columns: []\n", 710 | "Index: [AAACCCAAGAGACAAG-1, AAACCCAAGGCCTAGA-1, AAACCCAGTCGTGCCA-1, AAACCCATCGTGCATA-1, AAACGAAAGACAAGCC-1, AAACGAAAGAGTGACC-1, AAACGAACATGCCATA-1, AAACGAAGTAATGATG-1, AAACGAAGTTAAAGTG-1, AAACGAATCATACAGC-1, AAACGAATCGTGGAAG-1, AAACGCTAGGTTGGAC-1, AAACGCTCATGCACTA-1, AAACGCTGTACAAACA-1, AAAGAACAGGCCGCTT-1, AAAGAACGTGGATCAG-1, AAAGAACTCCTTATAC-1, AAAGGATAGAGGCCAT-1, AAAGGATCACACTTAG-1, AAAGGATGTGATTAGA-1, AAAGGATGTGGTTTGT-1, AAAGGATTCGAGAATA-1, AAAGGATTCTAAGAAG-1, AAAGGATTCTCTATAC-1, AAAGGGCAGCCGATCC-1, AAAGGGCCAGCCGGTT-1, AAAGGGCGTGACAGCA-1, AAAGGGCGTTCAAACC-1, AAAGGGCTCAGTCACA-1, AAAGGGCTCATTTGGG-1, AAAGGGCTCGAACCAT-1, AAAGGGCTCGGTGTAT-1, AAAGGGCTCTGCTTAT-1, AAAGGTAAGTGCTCGC-1, AAAGGTACATGAGAAT-1, AAAGGTAGTCGCGTCA-1, AAAGGTAGTGTAACGG-1, AAAGGTAGTTACGATC-1, AAAGGTATCCTTATGT-1, AAAGTCCAGCCAGAGT-1, AAAGTCCCAAATTAGG-1, AAAGTCCCAACCACAT-1, AAAGTCCCACAAGGTG-1, AAAGTCCGTAGCTTTG-1, AAAGTCCGTGTGTCGC-1, AAAGTGAAGCAGCGAT-1, AAAGTGAAGTGCAGCA-1, AAAGTGAAGTTCCGTA-1, AAAGTGACAGAGCTAG-1, AAAGTGAGTCGGCTAC-1, AAAGTGAGTTTGTTGG-1, AAATGGAAGATCGCTT-1, AAATGGAAGGCGCTCT-1, AAATGGAGTGGCAGAT-1, AAATGGAGTTAAGTCC-1, AAATGGATCCGGGACT-1, AAATGGATCGCATTGA-1, AACAAAGCAACAAGTA-1, AACAAAGGTCATGCAT-1, AACAAAGTCTTCGCTG-1, AACAACCAGGATGTTA-1, AACAACCCATGACTCA-1, AACAACCGTGCACATT-1, AACAACCTCGAGTGGA-1, AACAACCTCGGTCACG-1, AACAACCTCTAAGAAG-1, AACAAGAAGAAGCCAC-1, AACAAGAAGCCGTCGT-1, AACAAGAAGCGTACAG-1, AACAAGACAACTGCTA-1, AACAAGAGTCATAACC-1, AACAAGATCCCATAGA-1, AACAAGATCGCTCTAC-1, AACACACAGATGGCGT-1, AACACACAGGGCTGAT-1, AACACACCACGCTGCA-1, AACACACGTAACAGGC-1, AACACACGTACGTGTT-1, AACACACGTTGGGATG-1, AACACACTCTATCACT-1, AACAGGGAGATACATG-1, AACAGGGGTAACATAG-1, AACAGGGGTCAACATC-1, AACAGGGGTCAGTCCG-1, AACAGGGTCAATCGGT-1, AACCAACCACAGTATC-1, AACCAACGTAACATCC-1, AACCAACGTCACAGAG-1, AACCAACGTCAGGTAG-1, AACCATGAGATCGGTG-1, AACCATGAGGTCGTCC-1, AACCATGTCTAAGGAA-1, AACCCAAAGGGTGGGA-1, AACCCAACAGCGGTTC-1, AACCCAAGTATCGGTT-1, AACCTGAAGCTGGCTC-1, AACCTGAAGTGCAGGT-1, AACCTGAGTCGAACGA-1, AACCTGAGTCGAGCTC-1, AACCTGAGTCTCTCAC-1, ...]\n", 711 | "\n", 712 | "[300 rows x 0 columns]" 713 | ] 714 | }, 715 | "execution_count": 23, 716 | "metadata": {}, 717 | "output_type": "execute_result" 718 | } 719 | ], 720 | "source": [ 721 | "adata_paired.obs" 722 | ] 723 | }, 724 | { 725 | "cell_type": "code", 726 | "execution_count": 24, 727 | "metadata": {}, 728 | "outputs": [ 729 | { 730 | "data": { 731 | "text/html": [ 732 | "
\n", 733 | "\n", 746 | "\n", 747 | " \n", 748 | " \n", 749 | " \n", 750 | " \n", 751 | " \n", 752 | " \n", 753 | " \n", 754 | " \n", 755 | " \n", 756 | " \n", 757 | " \n", 758 | " \n", 759 | " \n", 760 | " \n", 761 | " \n", 762 | " \n", 763 | " \n", 764 | " \n", 765 | " \n", 766 | " \n", 767 | " \n", 768 | " \n", 769 | " \n", 770 | " \n", 771 | " \n", 772 | " \n", 773 | " \n", 774 | " \n", 775 | " \n", 776 | " \n", 777 | " \n", 778 | " \n", 779 | " \n", 780 | " \n", 781 | " \n", 782 | " \n", 783 | " \n", 784 | " \n", 785 | " \n", 786 | " \n", 787 | " \n", 788 | " \n", 789 | " \n", 790 | " \n", 791 | " \n", 792 | " \n", 793 | " \n", 794 | " \n", 795 | " \n", 796 | " \n", 797 | " \n", 798 | " \n", 799 | " \n", 800 | " \n", 801 | " \n", 802 | " \n", 803 | " \n", 804 | " \n", 805 | " \n", 806 | " \n", 807 | " \n", 808 | " \n", 809 | " \n", 810 | " \n", 811 | " \n", 812 | " \n", 813 | " \n", 814 | " \n", 815 | " \n", 816 | " \n", 817 | " \n", 818 | " \n", 819 | " \n", 820 | " \n", 821 | " \n", 822 | " \n", 823 | " \n", 824 | " \n", 825 | " \n", 826 | " \n", 827 | " \n", 828 | " \n", 829 | " \n", 830 | " \n", 831 | " \n", 832 | " \n", 833 | " \n", 834 | " \n", 835 | " \n", 836 | " \n", 837 | " \n", 838 | " \n", 839 | " \n", 840 | " \n", 841 | " \n", 842 | " \n", 843 | " \n", 844 | " \n", 845 | " \n", 846 | " \n", 847 | " \n", 848 | " \n", 849 | " \n", 850 | " \n", 851 | " \n", 852 | " \n", 853 | " \n", 854 | " \n", 855 | " \n", 856 | " \n", 857 | " \n", 858 | " \n", 859 | "
gene_idsfeature_typesgenomepatternreadsequence
MIR1302-2HGENSG00000243485Gene ExpressionGRCh38
FAM138AENSG00000237613Gene ExpressionGRCh38
OR4F5ENSG00000186092Gene ExpressionGRCh38
AL627309.1ENSG00000238009Gene ExpressionGRCh38
AL627309.3ENSG00000239945Gene ExpressionGRCh38
.....................
HLA-DR_TotalSeqBHLA-DRAntibody Capture^NNNNNNNNNN(BC)NNNNNNNNNR2AATAGCGAGCAAGTA
TIGIT_TotalSeqBTIGITAntibody Capture^NNNNNNNNNN(BC)NNNNNNNNNR2TTGCTTACCGCCAGA
IgG1_control_TotalSeqBIgG1Antibody Capture^NNNNNNNNNN(BC)NNNNNNNNNR2GCCGGACGACATTAA
IgG2a_control_TotalSeqBIgG2aAntibody Capture^NNNNNNNNNN(BC)NNNNNNNNNR2CTCCTACCTAAACTG
IgG2b_control_TotalSeqBIgG2bAntibody Capture^NNNNNNNNNN(BC)NNNNNNNNNR2ATATGTATCACGCGA
\n", 860 | "

1032 rows × 6 columns

\n", 861 | "
" 862 | ], 863 | "text/plain": [ 864 | " gene_ids feature_types genome \\\n", 865 | "MIR1302-2HG ENSG00000243485 Gene Expression GRCh38 \n", 866 | "FAM138A ENSG00000237613 Gene Expression GRCh38 \n", 867 | "OR4F5 ENSG00000186092 Gene Expression GRCh38 \n", 868 | "AL627309.1 ENSG00000238009 Gene Expression GRCh38 \n", 869 | "AL627309.3 ENSG00000239945 Gene Expression GRCh38 \n", 870 | "... ... ... ... \n", 871 | "HLA-DR_TotalSeqB HLA-DR Antibody Capture \n", 872 | "TIGIT_TotalSeqB TIGIT Antibody Capture \n", 873 | "IgG1_control_TotalSeqB IgG1 Antibody Capture \n", 874 | "IgG2a_control_TotalSeqB IgG2a Antibody Capture \n", 875 | "IgG2b_control_TotalSeqB IgG2b Antibody Capture \n", 876 | "\n", 877 | " pattern read sequence \n", 878 | "MIR1302-2HG \n", 879 | "FAM138A \n", 880 | "OR4F5 \n", 881 | "AL627309.1 \n", 882 | "AL627309.3 \n", 883 | "... ... ... ... \n", 884 | "HLA-DR_TotalSeqB ^NNNNNNNNNN(BC)NNNNNNNNN R2 AATAGCGAGCAAGTA \n", 885 | "TIGIT_TotalSeqB ^NNNNNNNNNN(BC)NNNNNNNNN R2 TTGCTTACCGCCAGA \n", 886 | "IgG1_control_TotalSeqB ^NNNNNNNNNN(BC)NNNNNNNNN R2 GCCGGACGACATTAA \n", 887 | "IgG2a_control_TotalSeqB ^NNNNNNNNNN(BC)NNNNNNNNN R2 CTCCTACCTAAACTG \n", 888 | "IgG2b_control_TotalSeqB ^NNNNNNNNNN(BC)NNNNNNNNN R2 ATATGTATCACGCGA \n", 889 | "\n", 890 | "[1032 rows x 6 columns]" 891 | ] 892 | }, 893 | "execution_count": 24, 894 | "metadata": {}, 895 | "output_type": "execute_result" 896 | } 897 | ], 898 | "source": [ 899 | "adata_paired.var" 900 | ] 901 | }, 902 | { 903 | "cell_type": "markdown", 904 | "metadata": {}, 905 | "source": [ 906 | "the `.obs` layer is empty now, and we need to repopulate it. " 907 | ] 908 | }, 909 | { 910 | "cell_type": "code", 911 | "execution_count": 25, 912 | "metadata": {}, 913 | "outputs": [], 914 | "source": [ 915 | "rna_cols = rna_a.obs.columns\n", 916 | "prot_cols = prot_a.obs.columns\n", 917 | "\n", 918 | "rnaobs = rna_a.obs.copy()\n", 919 | "rnaobs.columns = [\"rna:\" + x for x in rna_cols]\n", 920 | "protobs = prot.obs.copy()\n", 921 | "protobs.columns = [\"prot:\" + x for x in prot_cols]\n", 922 | "adata_paired.obs = pd.merge(rnaobs, protobs, left_index=True, right_index=True)" 923 | ] 924 | }, 925 | { 926 | "cell_type": "markdown", 927 | "metadata": {}, 928 | "source": [ 929 | "For more information on how anndata perform concatenation please check this [tutorial](https://anndata.readthedocs.io/en/latest/concatenation.html) \n", 930 | "\n", 931 | "\n" 932 | ] 933 | } 934 | ], 935 | "metadata": { 936 | "kernelspec": { 937 | "display_name": "Python 3 (ipykernel)", 938 | "language": "python", 939 | "name": "python3" 940 | }, 941 | "language_info": { 942 | "codemirror_mode": { 943 | "name": "ipython", 944 | "version": 3 945 | }, 946 | "file_extension": ".py", 947 | "mimetype": "text/x-python", 948 | "name": "python", 949 | "nbconvert_exporter": "python", 950 | "pygments_lexer": "ipython3", 951 | "version": "3.12.5" 952 | }, 953 | "vscode": { 954 | "interpreter": { 955 | "hash": "9f2c2ed92b9244d41be0bdd21ae19cc7b57648bb6fdcd4cb66fc3fbd398bef96" 956 | } 957 | } 958 | }, 959 | "nbformat": 4, 960 | "nbformat_minor": 2 961 | } 962 | -------------------------------------------------------------------------------- /docs/patched-deps.txt: -------------------------------------------------------------------------------- 1 | # fix from here: https://github.com/executablebooks/MyST-NB/pull/597 2 | myst-nb @ git+https://github.com/flying-sheep/MyST-NB@eval-metadata 3 | -------------------------------------------------------------------------------- /docs/references.bib: -------------------------------------------------------------------------------- 1 | @article{Wolf2018, 2 | author = {Wolf, F. Alexander 3 | and Angerer, Philipp 4 | and Theis, Fabian J.}, 5 | title = {SCANPY: large-scale single-cell gene expression data analysis}, 6 | journal = {Genome Biology}, 7 | year = {2018}, 8 | month = {Feb}, 9 | day = {06}, 10 | volume = {19}, 11 | number = {1}, 12 | pages = {15}, 13 | abstract = {Scanpy is a scalable toolkit for analyzing single-cell gene expression data. It includes methods for preprocessing, visualization, clustering, pseudotime and trajectory inference, differential expression testing, and simulation of gene regulatory networks. Its Python-based implementation efficiently deals with data sets of more than one million cells (https://github.com/theislab/Scanpy). Along with Scanpy, we present AnnData, a generic class for handling annotated data matrices (https://github.com/theislab/anndata).}, 14 | issn = {1474-760X}, 15 | doi = {10.1186/s13059-017-1382-0}, 16 | url = {https://doi.org/10.1186/s13059-017-1382-0} 17 | } 18 | @inproceedings{luecken2021, 19 | author = {Luecken, Malte and Burkhardt, Daniel and Cannoodt, Robrecht and Lance, Christopher and Agrawal, Aditi and Aliee, Hananeh and Chen, Ann and Deconinck, Louise and Detweiler, Angela and Granados, Alejandro and Huynh, Shelly and Isacco, Laura and Kim, Yang and Klein, Dominik and DE KUMAR, BONY and Kuppasani, Sunil and Lickert, Heiko and McGeever, Aaron and Melgarejo, Joaquin and Mekonen, Honey and Morri, Maurizio and M\"{u}ller, Michaela and Neff, Norma and Paul, Sheryl and Rieck, Bastian and Schneider, Kaylie and Steelman, Scott and Sterr, Michael and Treacy, Daniel and Tong, Alexander and Villani, Alexandra-Chloe and Wang, Guilin and Yan, Jia and Zhang, Ce and Pisco, Angela and Krishnaswamy, Smita and Theis, Fabian and Bloom, Jonathan M}, 20 | booktitle = {Proceedings of the Neural Information Processing Systems Track on Datasets and Benchmarks}, 21 | editor = {J. Vanschoren and S. Yeung}, 22 | pages = {}, 23 | publisher = {Curran}, 24 | title = {A sandbox for prediction and integration of DNA, RNA, and proteins in single cells}, 25 | url = {https://datasets-benchmarks-proceedings.neurips.cc/paper_files/paper/2021/file/158f3069a435b314a80bdcb024f8e422-Paper-round2.pdf}, 26 | volume = {1}, 27 | year = {2021} 28 | } 29 | @article{McCarthy2017, 30 | doi = {10.1093/bioinformatics/btw777}, 31 | url = {https://doi.org/10.1093/bioinformatics/btw777}, 32 | year = {2017}, 33 | month = jan, 34 | publisher = {Oxford University Press ({OUP})}, 35 | volume = {33}, 36 | number = {8}, 37 | pages = {1179--1186}, 38 | author = {Davis J McCarthy and Kieran R Campbell and Aaron T L Lun and Quin F Wills}, 39 | editor = {Ivo Hofacker}, 40 | title = {Scater: pre-processing, quality control, normalization and visualization of single-cell {RNA}-seq data in R}, 41 | journal = {Bioinformatics} 42 | } 43 | @article{Wolock2019, 44 | doi = {10.1016/j.cels.2018.11.005}, 45 | url = {https://doi.org/10.1016/j.cels.2018.11.005}, 46 | year = {2019}, 47 | month = apr, 48 | publisher = {Elsevier {BV}}, 49 | volume = {8}, 50 | number = {4}, 51 | pages = {281--291.e9}, 52 | author = {Samuel L. Wolock and Romain Lopez and Allon M. Klein}, 53 | title = {Scrublet: Computational Identification of Cell Doublets in Single-Cell Transcriptomic Data}, 54 | journal = {Cell Systems} 55 | } 56 | @article{Satija2015, 57 | doi = {10.1038/nbt.3192}, 58 | url = {https://doi.org/10.1038/nbt.3192}, 59 | year = {2015}, 60 | month = apr, 61 | publisher = {Springer Science and Business Media {LLC}}, 62 | volume = {33}, 63 | number = {5}, 64 | pages = {495--502}, 65 | author = {Rahul Satija and Jeffrey A Farrell and David Gennert and Alexander F Schier and Aviv Regev}, 66 | title = {Spatial reconstruction of single-cell gene expression data}, 67 | journal = {Nature Biotechnology} 68 | } 69 | @article{Zheng2017, 70 | doi = {10.1038/ncomms14049}, 71 | url = {https://doi.org/10.1038/ncomms14049}, 72 | year = {2017}, 73 | month = jan, 74 | publisher = {Springer Science and Business Media {LLC}}, 75 | volume = {8}, 76 | number = {1}, 77 | author = {Grace X. Y. Zheng and Jessica M. Terry and Phillip Belgrader and Paul Ryvkin and Zachary W. Bent and Ryan Wilson and Solongo B. Ziraldo and Tobias D. Wheeler and Geoff P. McDermott and Junjie Zhu and Mark T. Gregory and Joe Shuga and Luz Montesclaros and Jason G. Underwood and Donald A. Masquelier and Stefanie Y. Nishimura and Michael Schnall-Levin and Paul W. Wyatt and Christopher M. Hindson and Rajiv Bharadwaj and Alexander Wong and Kevin D. Ness and Lan W. Beppu and H. Joachim Deeg and Christopher McFarland and Keith R. Loeb and William J. Valente and Nolan G. Ericson and Emily A. Stevens and Jerald P. Radich and Tarjei S. Mikkelsen and Benjamin J. Hindson and Jason H. Bielas}, 78 | title = {Massively parallel digital transcriptional profiling of single cells}, 79 | journal = {Nature Communications} 80 | } 81 | @article{stuart2019comprehensive, 82 | title = {Comprehensive integration of single-cell data}, 83 | author = {Stuart, Tim and Butler, Andrew and Hoffman, Paul and Hafemeister, Christoph and Papalexi, Efthymia and Mauck, William M and Hao, Yuhan and Stoeckius, Marlon and Smibert, Peter and Satija, Rahul}, 84 | journal = {Cell}, 85 | volume = {177}, 86 | number = {7}, 87 | pages = {1888--1902}, 88 | year = {2019}, 89 | publisher = {Elsevier} 90 | } 91 | @article{traag2019louvain, 92 | title = {From Louvain to Leiden: guaranteeing well-connected communities}, 93 | author = {Traag, Vincent A and Waltman, Ludo and Van Eck, Nees Jan}, 94 | journal = {Scientific reports}, 95 | volume = {9}, 96 | number = {1}, 97 | pages = {5233}, 98 | year = {2019}, 99 | publisher = {Nature Publishing Group UK London} 100 | } 101 | -------------------------------------------------------------------------------- /docs/references.md: -------------------------------------------------------------------------------- 1 | # References 2 | 3 | ```{bibliography} 4 | :cited: 5 | ``` 6 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | # declare dependencies for binder and for executing notebooks on CI 2 | name: tutorials 3 | channels: 4 | - conda-forge 5 | dependencies: 6 | - python=3.12 7 | - jupyter-cache 8 | - ipykernel 9 | - ipython 10 | - jupyterlab-myst 11 | - jupytext 12 | - pip>=24.2 13 | - pip: 14 | - scanpy>=1.10 15 | - scrublet 16 | - leidenalg 17 | # We need this commit: https://github.com/saezlab/decoupler-py/commit/0b3d9a975e06230ebbc32a6f97810be1730b8562 18 | - decoupler @ git+https://github.com/saezlab/decoupler-py.git 19 | - celltypist 20 | - seaborn 21 | - mudata 22 | - muon 23 | - mudatasets 24 | - omnipath 25 | - pooch 26 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | build-backend = "hatchling.build" 3 | requires = ["hatchling"] 4 | 5 | [project] 6 | name = "scverse-tutorials" 7 | version = "0.0.1" 8 | description = "Tutorials for single-cell analysis with scverse packages" 9 | readme = "README.md" 10 | requires-python = ">=3.10" 11 | license = {file = "LICENSE"} 12 | authors = [ 13 | {name = "scverse team"}, 14 | ] 15 | maintainers = [ 16 | {name = "scverse team", email = "core-team@scverse.org"}, 17 | ] 18 | urls.Documentation = "https://scverse.org/scverse-tutorials" 19 | urls.Source = "https://github.com/scverse/scverse-tutorials" 20 | urls.Home-page = "https://github.com/scverse/scverse-tutorials" 21 | classifiers = [ 22 | "Private :: Do Not Upload", # Prevent uploading to PyPI 23 | ] 24 | 25 | [project.optional-dependencies] 26 | dev = ["pre-commit"] 27 | registry = [ 28 | "jsonschema", 29 | "pillow", 30 | "httpx", 31 | "pyyaml", 32 | ] 33 | docs = [ 34 | "sphinx>=7", 35 | "sphinx-book-theme>=1.1.0", 36 | "sphinx-issues>=5.0.1", 37 | "myst-nb>=1.1.0", 38 | "sphinxcontrib-bibtex>=1.0.0", 39 | "sphinx-autodoc-typehints", 40 | "sphinxext-opengraph", 41 | # For notebooks 42 | "ipykernel", 43 | "ipython", 44 | "sphinx-copybutton", 45 | ] 46 | 47 | [tool.hatch.envs.default] 48 | installer = "uv" 49 | features = ["dev"] 50 | 51 | [tool.hatch.envs.registry] 52 | features = ["registry"] 53 | [tool.hatch.envs.registry.scripts] 54 | validate = "python tutorial-registry/validate.py {args}" 55 | 56 | [tool.hatch.envs.docs] 57 | features = ["docs"] 58 | extra-dependencies = [ 59 | "setuptools", # undeclared dependency in pybtex 60 | # fix from here: https://github.com/executablebooks/MyST-NB/pull/597 61 | "myst-nb @ git+https://github.com/flying-sheep/MyST-NB.git@eval-metadata", 62 | ] 63 | [tool.hatch.envs.docs.scripts] 64 | build = "sphinx-build -M html docs docs/_build {args}" 65 | open = "python3 -m webbrowser -t docs/_build/html/index.html" 66 | clean = "git clean -fdX -- {args:docs}" 67 | 68 | [tool.hatch.build.targets.wheel] 69 | bypass-selection = true # This is not a package 70 | 71 | [tool.ruff] 72 | line-length = 120 73 | src = ["src"] 74 | extend-include = ["*.ipynb"] 75 | 76 | [tool.ruff.lint] 77 | select = [ 78 | "F", # Errors detected by Pyflakes 79 | "E", # Error detected by Pycodestyle 80 | "W", # Warning detected by Pycodestyle 81 | "I", # isort 82 | "D", # pydocstyle 83 | "B", # flake8-bugbear 84 | "TID", # flake8-tidy-imports 85 | "C4", # flake8-comprehensions 86 | "BLE", # flake8-blind-except 87 | "UP", # pyupgrade 88 | "RUF100", # Report unused noqa directives 89 | ] 90 | ignore = [ 91 | # line too long -> we accept long comment lines; formatter gets rid of long code lines 92 | "E501", 93 | # Do not assign a lambda expression, use a def -> lambda expression assignments are convenient 94 | "E731", 95 | # allow I, O, l as variable names -> I is the identity matrix 96 | "E741", 97 | # Missing docstring in public package 98 | "D104", 99 | # Missing docstring in public module 100 | "D100", 101 | # Missing docstring in __init__ 102 | "D107", 103 | # Errors from function calls in argument defaults. These are fine when the result is immutable. 104 | "B008", 105 | # __magic__ methods are often self-explanatory, allow missing docstrings 106 | "D105", 107 | # first line should end with a period [Bug: doesn't work with single-line docstrings] 108 | "D400", 109 | # First line should be in imperative mood; try rephrasing 110 | "D401", 111 | ## Disable one in each pair of mutually incompatible rules 112 | # We don’t want a blank line before a class docstring 113 | "D203", 114 | # We want docstrings to start immediately after the opening triple quote 115 | "D213", 116 | ] 117 | 118 | [tool.ruff.lint.pydocstyle] 119 | convention = "numpy" 120 | 121 | [tool.ruff.lint.per-file-ignores] 122 | "docs/*" = [ 123 | "B018", # Trailing expressions in notebooks are not “useless” 124 | "D103", # No need for docstrings in functions, we use literate programming 125 | "E402", # Imports in non-top cells are fine 126 | ] 127 | "tests/*" = ["D"] 128 | "*/__init__.py" = ["F401"] 129 | 130 | [tool.cruft] 131 | skip = [ 132 | "tests", 133 | "src/**/__init__.py", 134 | "src/**/basic.py", 135 | "docs/api.md", 136 | "docs/changelog.md", 137 | "docs/references.bib", 138 | "docs/references.md", 139 | "docs/notebooks/example.ipynb", 140 | ] 141 | -------------------------------------------------------------------------------- /tutorial-registry/categories.yml: -------------------------------------------------------------------------------- 1 | - Data structures: 2 | description: | 3 | These tutorials teach you how to work with scverse data structures. 4 | If you are new to Python and/or scverse, we recommend you read the 5 | "getting started" and "axes" tutorials first. 6 | 7 | - scRNA-seq: 8 | description: | 9 | The following tutorials show show to analyze single-cell gene expression data. 10 | 11 | - Spatial: 12 | description: | 13 | Analyze spatial data generated with different technologies 14 | 15 | - Adaptive immune cell receptor: 16 | description: | 17 | Tutorials for analyzing single-cell B-cell and T-cell receptor sequencing data 18 | 19 | - Surface proteins: 20 | description: | 21 | CITE-seq analyses 22 | 23 | - ATAC-seq: 24 | description: | 25 | Analyse chromatin accessibility data 26 | 27 | - Multimodal: 28 | description: | 29 | Tutorials that combine data from multiple modalities 30 | 31 | - Tips & Tricks: 32 | description: | 33 | Useful tips for data analysis with scverse tools that are independent of specific packages. 34 | -------------------------------------------------------------------------------- /tutorial-registry/schema.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://json-schema.org/draft/2020-12/schema", 3 | "$id": "https://raw.githubusercontent.com/scverse/scverse-tutorials/main/schema.json", 4 | "title": "Scverse Tutorials", 5 | "description": "A tutorial for an scverse core or ecosystem package that is listed on scverse.org/learn", 6 | "type": "object", 7 | "properties": { 8 | "name": { 9 | "description": "The name of the tutorial. Must be unique.", 10 | "type": "string" 11 | }, 12 | "description": { 13 | "description": "Summary of the tutorial (1-2 sentences).", 14 | "type": "string" 15 | }, 16 | "link": { 17 | "description": "Link to the tutorial", 18 | "type": "string", 19 | "format": "uri" 20 | }, 21 | "image": { 22 | "description": "Preview image file that is stored in the same folder as the meta.yaml file. (1) supported formats: svg, png, webp. (2) filename: icon.{svg,png,webp}. (3) should either be SVG or have exactly 512px in at least one dimension).", 23 | "type": "string", 24 | "pattern": "icon\\.(svg|png|webp)" 25 | }, 26 | "primary_category": { 27 | "description": "Category the tutorial will be listed under", 28 | "type": "string", 29 | "enum": [ 30 | "Data structures", 31 | "scRNA-seq", 32 | "Spatial", 33 | "Adaptive immune cell receptor", 34 | "Surface proteins", 35 | "ATAC-seq", 36 | "Multimodal", 37 | "Tips & Tricks" 38 | ] 39 | }, 40 | "order": { 41 | "description": "Order of appearence on the website. Lower numbers mean earlier (higher priority).", 42 | "type": "number" 43 | }, 44 | "modality": { 45 | "description": "Which modalities are used in the tutorial?", 46 | "type": "array", 47 | "items": { 48 | "type": "string", 49 | "enum": ["DNA", "RNA", "protein", "ATAC", "AIRR", "spatial"] 50 | }, 51 | "uniqueItems": true 52 | }, 53 | "tags": { 54 | "description": "Additional tags that describe the tutorial", 55 | "type": "array", 56 | "items": { 57 | "type": "string", 58 | "enum": [ 59 | "data structures", 60 | "differential expression", 61 | "functional analysis", 62 | "concatenation", 63 | "preprocessing", 64 | "cell-type annotation", 65 | "quality control", 66 | "visualization", 67 | "multimodal", 68 | "data integration", 69 | "perturbation", 70 | "compositional analysis", 71 | "RNA velocity", 72 | "pseudotime", 73 | "interoperability" 74 | ] 75 | }, 76 | "uniqueItems": true 77 | }, 78 | "packages": { 79 | "type": "array", 80 | "items": { 81 | "type": "string" 82 | }, 83 | "description": "List the scverse core and ecosystem packages this tutorial is about. This should be the pypi identifier where applicable. ", 84 | "uniqueItems": true 85 | }, 86 | "authors": { 87 | "description": "List of authors/maintainers of the tutorial. Entries must be Github user IDs.", 88 | "type": "array", 89 | "items": { 90 | "type": "string" 91 | }, 92 | "minItems": 1, 93 | "uniqueItems": true 94 | } 95 | }, 96 | "required": [ 97 | "name", 98 | "description", 99 | "link", 100 | "image", 101 | "primary_category", 102 | "authors" 103 | ] 104 | } 105 | -------------------------------------------------------------------------------- /tutorial-registry/tutorials/ATAC-preprocessing/icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scverse/scverse-tutorials/169d1d33f00f7e738da725f350d119cd8c7d4aba/tutorial-registry/tutorials/ATAC-preprocessing/icon.png -------------------------------------------------------------------------------- /tutorial-registry/tutorials/ATAC-preprocessing/meta.yaml: -------------------------------------------------------------------------------- 1 | name: Processing chromatin accessibility 2 | description: | 3 | This chapter shows multimodal single-cell gene expression and 4 | chromatin accessibility analysis. In this notebook, scATAC-seq 5 | data processing is described. 6 | link: https://muon-tutorials.readthedocs.io/en/latest/single-cell-rna-atac/pbmc10k/2-Chromatin-Accessibility-Processing.html 7 | image: icon.png 8 | primary_category: ATAC-seq 9 | order: 10 10 | tags: 11 | - preprocessing 12 | - multimodal 13 | packages: 14 | - anndata 15 | - scanpy 16 | - muon 17 | authors: 18 | - gtca 19 | -------------------------------------------------------------------------------- /tutorial-registry/tutorials/CITEseq-integration/icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scverse/scverse-tutorials/169d1d33f00f7e738da725f350d119cd8c7d4aba/tutorial-registry/tutorials/CITEseq-integration/icon.png -------------------------------------------------------------------------------- /tutorial-registry/tutorials/CITEseq-integration/meta.yaml: -------------------------------------------------------------------------------- 1 | name: CITE-seq integration 2 | description: | 3 | These notebooks showcase CITE-seq analysis of PBMCs with dsb 4 | normalization, MOFA+ data integration, and weighted nearest neighbors 5 | handling multimodal embeddings. 6 | link: https://muon-tutorials.readthedocs.io/en/latest/cite-seq/1-CITE-seq-PBMC-5k.html 7 | image: icon.png 8 | primary_category: Surface proteins 9 | order: 10 10 | tags: 11 | - multimodal 12 | - data integration 13 | packages: 14 | - scanpy 15 | - muon 16 | - mofax 17 | authors: 18 | - gtca 19 | -------------------------------------------------------------------------------- /tutorial-registry/tutorials/Joint-analysis-of-multiomic-data-with-MultiVI/icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scverse/scverse-tutorials/169d1d33f00f7e738da725f350d119cd8c7d4aba/tutorial-registry/tutorials/Joint-analysis-of-multiomic-data-with-MultiVI/icon.png -------------------------------------------------------------------------------- /tutorial-registry/tutorials/Joint-analysis-of-multiomic-data-with-MultiVI/meta.yaml: -------------------------------------------------------------------------------- 1 | name: Joint analysis of paired and unpaired multiomic data with MultiVI 2 | description: | 3 | This tutorial shows how to read multiomic data, create a joint object with 4 | paired/unpaired data, train MultiVI model, visualize latent space, 5 | and run differential analyses. 6 | link: https://docs.scvi-tools.org/en/stable/tutorials/notebooks/multimodal/MultiVI_tutorial.html 7 | image: icon.png 8 | primary_category: ATAC-seq 9 | order: 20 10 | tags: 11 | - preprocessing 12 | - visualization 13 | - differential expression 14 | packages: 15 | - scanpy 16 | - scvi 17 | authors: 18 | - adamgayoso 19 | - galenxing 20 | - romain-lopez 21 | - Edouard360 22 | - martinkim0 23 | -------------------------------------------------------------------------------- /tutorial-registry/tutorials/RNA-velocity/icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scverse/scverse-tutorials/169d1d33f00f7e738da725f350d119cd8c7d4aba/tutorial-registry/tutorials/RNA-velocity/icon.png -------------------------------------------------------------------------------- /tutorial-registry/tutorials/RNA-velocity/meta.yaml: -------------------------------------------------------------------------------- 1 | name: RNA velocity 2 | description: | 3 | This tutorial guides you through how RNA velocity can be inferred from single cell RNA-seq data 4 | using scVelo. 5 | link: https://scvelo.readthedocs.io/en/stable/getting_started.html 6 | image: icon.png 7 | primary_category: scRNA-seq 8 | order: 50 9 | tags: 10 | - preprocessing 11 | - RNA velocity 12 | - visualization 13 | packages: 14 | - anndata 15 | - scanpy 16 | authors: 17 | - VolkerBergen 18 | - WeilerP 19 | - stefanpeidli 20 | - Marius1311 21 | -------------------------------------------------------------------------------- /tutorial-registry/tutorials/advanced-plotting/icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scverse/scverse-tutorials/169d1d33f00f7e738da725f350d119cd8c7d4aba/tutorial-registry/tutorials/advanced-plotting/icon.png -------------------------------------------------------------------------------- /tutorial-registry/tutorials/advanced-plotting/meta.yaml: -------------------------------------------------------------------------------- 1 | name: Advanced plotting 2 | description: | 3 | This tutorial explains how to customize matplotlib plots generated 4 | by scanpy or other scverse libraries. 5 | link: https://scanpy-tutorials.readthedocs.io/en/latest/plotting/advanced.html 6 | image: icon.png 7 | primary_category: Tips & Tricks 8 | order: 30 9 | tags: 10 | - visualization 11 | packages: 12 | - scanpy 13 | authors: 14 | - Hrovatin 15 | -------------------------------------------------------------------------------- /tutorial-registry/tutorials/anndata-getting-started/meta.yaml: -------------------------------------------------------------------------------- 1 | name: Getting started with AnnData 2 | description: | 3 | This tutorial helps you to explore the structure and content of single-cell 4 | data analysis results in a *.h5ad file using AnnData, Scanpy, and Python. 5 | link: https://scverse-tutorials.readthedocs.io/en/latest/notebooks/anndata_getting_started.html 6 | image: icon.svg 7 | primary_category: Data structures 8 | order: 10 9 | tags: 10 | - data structures 11 | packages: 12 | - anndata 13 | authors: 14 | - jlause 15 | -------------------------------------------------------------------------------- /tutorial-registry/tutorials/anndata-mudata-axes/icon.svg: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /tutorial-registry/tutorials/anndata-mudata-axes/meta.yaml: -------------------------------------------------------------------------------- 1 | name: Axes in AnnData and MuData 2 | description: | 3 | In this tutorial we showcase operations on independent AnnData objects 4 | (scRNAseq matrix + metadata), demonstrating how various processing 5 | workflows can be stored in one MuData object. 6 | link: https://scverse-tutorials.readthedocs.io/en/latest/notebooks/tutorial_axes_anndata_mudata.html 7 | primary_category: Data structures 8 | order: 20 9 | image: icon.svg 10 | tags: 11 | - data structures 12 | packages: 13 | - anndata 14 | authors: 15 | - bio-la 16 | -------------------------------------------------------------------------------- /tutorial-registry/tutorials/bentotools-subcellular-resolution/icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scverse/scverse-tutorials/169d1d33f00f7e738da725f350d119cd8c7d4aba/tutorial-registry/tutorials/bentotools-subcellular-resolution/icon.png -------------------------------------------------------------------------------- /tutorial-registry/tutorials/bentotools-subcellular-resolution/meta.yaml: -------------------------------------------------------------------------------- 1 | name: Spatial analysis with squidpy 2 | description: | 3 | This tutorial demonstrate how to use squidpy to analyse transcriptomics 4 | data with spatial resolution. 5 | link: https://squidpy.readthedocs.io/en/latest/notebooks/tutorials/tutorial_vizgen_mouse_liver.html 6 | image: icon.png 7 | primary_category: Spatial 8 | order: 10 9 | tags: 10 | - preprocessing 11 | - visualization 12 | packages: 13 | - squidpy 14 | authors: 15 | - giovp 16 | -------------------------------------------------------------------------------- /tutorial-registry/tutorials/cellcharter-spatial-transcriptomics/icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scverse/scverse-tutorials/169d1d33f00f7e738da725f350d119cd8c7d4aba/tutorial-registry/tutorials/cellcharter-spatial-transcriptomics/icon.png -------------------------------------------------------------------------------- /tutorial-registry/tutorials/cellcharter-spatial-transcriptomics/meta.yaml: -------------------------------------------------------------------------------- 1 | name: Spatial clustering of spacial transcriptomics data with CellCharter 2 | description: | 3 | This tutorial demonstrate how to use CellCharter to cluster spatial transcriptomics data, obtained with the CosMx technology from Nanostring. 4 | This method can also be used for spatial proteomics data. 5 | link: https://cellcharter.readthedocs.io/en/latest/notebooks/cosmx_human_nsclc.html 6 | image: icon.png 7 | primary_category: Spatial 8 | order: 20 9 | tags: 10 | - compositional analysis 11 | packages: 12 | - CellCharter 13 | - squidpy 14 | - scvi 15 | - scanpy 16 | - anndata 17 | authors: 18 | - marcovarrone 19 | - grst 20 | -------------------------------------------------------------------------------- /tutorial-registry/tutorials/compositional-analysis/icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scverse/scverse-tutorials/169d1d33f00f7e738da725f350d119cd8c7d4aba/tutorial-registry/tutorials/compositional-analysis/icon.png -------------------------------------------------------------------------------- /tutorial-registry/tutorials/compositional-analysis/meta.yaml: -------------------------------------------------------------------------------- 1 | name: Compositional analysis 2 | description: | 3 | This tutorial introduces compositional analysis at cell identity 4 | cluster level, based on known cell types or states affected by 5 | perturbations. 6 | link: https://www.sc-best-practices.org/conditions/compositional.html 7 | image: icon.png 8 | primary_category: scRNA-seq 9 | order: 70 10 | tags: 11 | - compositional analysis 12 | packages: 13 | - scanpy 14 | - tensorflow 15 | authors: 16 | - johannesostner 17 | - emdann 18 | - Zethson 19 | - alitinet 20 | -------------------------------------------------------------------------------- /tutorial-registry/tutorials/concatenation-of-multimodal-data/icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scverse/scverse-tutorials/169d1d33f00f7e738da725f350d119cd8c7d4aba/tutorial-registry/tutorials/concatenation-of-multimodal-data/icon.png -------------------------------------------------------------------------------- /tutorial-registry/tutorials/concatenation-of-multimodal-data/meta.yaml: -------------------------------------------------------------------------------- 1 | name: Concatenation of multimodal data 2 | description: | 3 | This tutorial shows how you can concatenate 2 MuData objects that may represent complementary 4 | slices of the same dataset or 2 modalities into one AnnData. 5 | link: https://scverse-tutorials.readthedocs.io/en/latest/notebooks/tutorial_concatenation_anndata_mudata.html 6 | image: icon.png 7 | primary_category: Data structures 8 | order: 40 9 | tags: 10 | - concatenation 11 | packages: 12 | - anndata 13 | - scanpy 14 | - mudata 15 | authors: 16 | - ivirshup 17 | - dbdimitrov 18 | - AnnaChristina 19 | - Hrovatin 20 | -------------------------------------------------------------------------------- /tutorial-registry/tutorials/concatenation-of-unimodal-data/icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scverse/scverse-tutorials/169d1d33f00f7e738da725f350d119cd8c7d4aba/tutorial-registry/tutorials/concatenation-of-unimodal-data/icon.png -------------------------------------------------------------------------------- /tutorial-registry/tutorials/concatenation-of-unimodal-data/meta.yaml: -------------------------------------------------------------------------------- 1 | name: Concatenation 2 | description: | 3 | In this notebook we showcase how to perform concatenation, meaning to 4 | keep all sub elements of each object, and stack these elements in an 5 | ordered way. 6 | link: https://anndata.readthedocs.io/en/latest/concatenation.html 7 | image: icon.png 8 | primary_category: Data structures 9 | order: 30 10 | tags: 11 | - concatenation 12 | packages: 13 | - anndata 14 | - scanpy 15 | - scipy 16 | authors: 17 | - ivirshup 18 | -------------------------------------------------------------------------------- /tutorial-registry/tutorials/decoupler-pseudobulk-de/icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scverse/scverse-tutorials/169d1d33f00f7e738da725f350d119cd8c7d4aba/tutorial-registry/tutorials/decoupler-pseudobulk-de/icon.png -------------------------------------------------------------------------------- /tutorial-registry/tutorials/decoupler-pseudobulk-de/meta.yaml: -------------------------------------------------------------------------------- 1 | name: Pseudo-bulk differential expression and functional analysis 2 | description: | 3 | This notebook showcases decoupler for pathway and TF enrichment on ~5k 4 | Blood myeloid cells from healthy and COVID-19 infected patients. 5 | link: https://decoupler-py.readthedocs.io/en/latest/notebooks/pseudobulk.html 6 | image: icon.png 7 | primary_category: scRNA-seq 8 | order: 30 9 | tags: 10 | - differential expression 11 | - functional analysis 12 | packages: 13 | - decoupler 14 | - scanpy 15 | authors: 16 | - PauBadiaM 17 | -------------------------------------------------------------------------------- /tutorial-registry/tutorials/interoperability/icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scverse/scverse-tutorials/169d1d33f00f7e738da725f350d119cd8c7d4aba/tutorial-registry/tutorials/interoperability/icon.png -------------------------------------------------------------------------------- /tutorial-registry/tutorials/interoperability/icon.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scverse/scverse-tutorials/169d1d33f00f7e738da725f350d119cd8c7d4aba/tutorial-registry/tutorials/interoperability/icon.webp -------------------------------------------------------------------------------- /tutorial-registry/tutorials/interoperability/meta.yaml: -------------------------------------------------------------------------------- 1 | name: Interoperability 2 | description: | 3 | This document lists resources for conversion to other data formats and programming 4 | languages, e.g. R, Julia, ... 5 | link: https://scverse-tutorials.readthedocs.io/en/latest/notebooks/scverse_data_interoperability.html 6 | image: icon.png 7 | primary_category: Tips & Tricks 8 | order: 20 9 | tags: 10 | - interoperability 11 | packages: 12 | - anndata 13 | - mudata 14 | authors: 15 | - emdann 16 | -------------------------------------------------------------------------------- /tutorial-registry/tutorials/perturbation-modeling/icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scverse/scverse-tutorials/169d1d33f00f7e738da725f350d119cd8c7d4aba/tutorial-registry/tutorials/perturbation-modeling/icon.png -------------------------------------------------------------------------------- /tutorial-registry/tutorials/perturbation-modeling/meta.yaml: -------------------------------------------------------------------------------- 1 | name: Perturbation modeling 2 | description: | 3 | This tutorial covers 3 approaches using single-cell perturbation data: 4 | Augur (identify affected cell types), scGen (predict transcriptional response), 5 | Mixscape (quantify CRISPR sensitivity). 6 | link: https://www.sc-best-practices.org/conditions/perturbation_modeling.html 7 | image: icon.png 8 | primary_category: scRNA-seq 9 | order: 60 10 | tags: 11 | - perturbation 12 | packages: 13 | - pertpy 14 | - scanpy 15 | authors: 16 | - Zethson 17 | - M0hammadL 18 | -------------------------------------------------------------------------------- /tutorial-registry/tutorials/plotting-in-scanpy/icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scverse/scverse-tutorials/169d1d33f00f7e738da725f350d119cd8c7d4aba/tutorial-registry/tutorials/plotting-in-scanpy/icon.png -------------------------------------------------------------------------------- /tutorial-registry/tutorials/plotting-in-scanpy/meta.yaml: -------------------------------------------------------------------------------- 1 | name: Plotting in scanpy 2 | description: | 3 | This tutorial explores the visualization possibilities of scanpy, including 4 | embeddings and the visualization of marker genes and differentially expressed genes. 5 | link: https://scanpy-tutorials.readthedocs.io/en/latest/plotting/core.html 6 | image: icon.png 7 | primary_category: Tips & Tricks 8 | order: 10 9 | tags: 10 | - visualization 11 | packages: 12 | - scanpy 13 | authors: 14 | - fidelram 15 | -------------------------------------------------------------------------------- /tutorial-registry/tutorials/preprocessing-and-clustering/icon.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scverse/scverse-tutorials/169d1d33f00f7e738da725f350d119cd8c7d4aba/tutorial-registry/tutorials/preprocessing-and-clustering/icon.webp -------------------------------------------------------------------------------- /tutorial-registry/tutorials/preprocessing-and-clustering/meta.yaml: -------------------------------------------------------------------------------- 1 | name: Preprocessing, clustering and cell-type annotation 2 | description: | 3 | This fundamental tutorial covers common analysis steps: quality control, 4 | normalization, feature selection, dimensionality reduction, clustering, 5 | and cell-type annotation. 6 | link: https://scverse-tutorials.readthedocs.io/en/latest/notebooks/basic-scrna-tutorial.html 7 | image: icon.webp 8 | primary_category: scRNA-seq 9 | order: 10 10 | tags: 11 | - preprocessing 12 | - cell-type annotation 13 | - quality control 14 | - visualization 15 | packages: 16 | - anndata 17 | - scanpy 18 | - celltypist 19 | - decoupler 20 | authors: 21 | - ivirshup 22 | - dbdimitrov 23 | - AnnaChristina 24 | - Hrovatin 25 | -------------------------------------------------------------------------------- /tutorial-registry/tutorials/pseudotemporal-ordering/icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scverse/scverse-tutorials/169d1d33f00f7e738da725f350d119cd8c7d4aba/tutorial-registry/tutorials/pseudotemporal-ordering/icon.png -------------------------------------------------------------------------------- /tutorial-registry/tutorials/pseudotemporal-ordering/meta.yaml: -------------------------------------------------------------------------------- 1 | name: Pseudotemporal ordering 2 | description: | 3 | This tutorial show how a pseudotime can be constructed and compares different pseudotimes. 4 | link: https://www.sc-best-practices.org/trajectories/pseudotemporal.html# 5 | image: icon.png 6 | primary_category: scRNA-seq 7 | order: 40 8 | tags: 9 | - pseudotime 10 | - visualization 11 | packages: 12 | - scanpy 13 | authors: 14 | - WeilerP 15 | - Zethson 16 | -------------------------------------------------------------------------------- /tutorial-registry/tutorials/scirpy-tcr/icon.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 22 | 24 | 49 | 54 | 55 | 57 | 58 | 60 | image/svg+xml 61 | 63 | 64 | 65 | 66 | 67 | 72 | 88 | 93 | 98 | 103 | 108 | 113 | 116 | 122 | 128 | 133 | 138 | 139 | 144 | 145 | 146 | 147 | -------------------------------------------------------------------------------- /tutorial-registry/tutorials/scirpy-tcr/meta.yaml: -------------------------------------------------------------------------------- 1 | name: Single-cell T-cell receptor analysis with scirpy 2 | description: | 3 | In this tutorial, we show how to perfrom QC on scTCR-seq data, 4 | define clonotype, cluster receptors by their sequence similarity 5 | and compute repertoire overlaps between patients. 6 | link: https://scirpy.scverse.org/en/latest/tutorials/tutorial_3k_tcr.html 7 | primary_category: Adaptive immune cell receptor 8 | order: 10 9 | image: icon.svg 10 | tags: 11 | - quality control 12 | - preprocessing 13 | - visualization 14 | - multimodal 15 | packages: 16 | - scirpy 17 | - muon 18 | - mudata 19 | - anndata 20 | authors: 21 | - grst 22 | -------------------------------------------------------------------------------- /tutorial-registry/tutorials/scverse-objects-in-backed-mode/icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scverse/scverse-tutorials/169d1d33f00f7e738da725f350d119cd8c7d4aba/tutorial-registry/tutorials/scverse-objects-in-backed-mode/icon.png -------------------------------------------------------------------------------- /tutorial-registry/tutorials/scverse-objects-in-backed-mode/meta.yaml: -------------------------------------------------------------------------------- 1 | name: Working with scverse objects in backed mode 2 | description: | 3 | In this tutorial, we demonstrate working with scverse data objects 4 | without loading full datasets. (AnnData and MuData are saved as .h5ad and .h5mu files) 5 | link: https://scverse-tutorials.readthedocs.io/en/latest/notebooks/scverse_data_backed.html#working-with-scverse-objects-in-backed-mode 6 | primary_category: Data structures 7 | order: 50 8 | image: icon.png 9 | tags: 10 | - data structures 11 | packages: 12 | - anndata 13 | - mudata 14 | - scanpy 15 | authors: 16 | - emdann 17 | -------------------------------------------------------------------------------- /tutorial-registry/tutorials/scvi-batch-effect-removal/icon.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 9 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /tutorial-registry/tutorials/scvi-batch-effect-removal/meta.yaml: -------------------------------------------------------------------------------- 1 | name: Batch-effect removal with scvi-tools 2 | description: | 3 | In this tutorial, we demonstrate how to use scvi-tools to fit a model to single-cell count data, 4 | correct batch effects, and perform differential gene expression analysis. 5 | link: https://docs.scvi-tools.org/en/stable/tutorials/notebooks/quick_start/api_overview.html 6 | primary_category: scRNA-seq 7 | order: 20 8 | image: icon.svg 9 | tags: 10 | - preprocessing 11 | - differential expression 12 | packages: 13 | - scvi-tools 14 | authors: 15 | - martinkim0 16 | - adamgayoso 17 | -------------------------------------------------------------------------------- /tutorial-registry/tutorials/squidpy-spatial/icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scverse/scverse-tutorials/169d1d33f00f7e738da725f350d119cd8c7d4aba/tutorial-registry/tutorials/squidpy-spatial/icon.png -------------------------------------------------------------------------------- /tutorial-registry/tutorials/squidpy-spatial/meta.yaml: -------------------------------------------------------------------------------- 1 | name: Spatial analysis at subcellular resolution 2 | description: | 3 | This tutorial shows how to use bentotools to study 4 | gene expression at subcellular resolution. 5 | link: https://bento-tools.readthedocs.io/en/latest/index.html 6 | image: icon.png 7 | primary_category: Spatial 8 | order: 30 9 | tags: 10 | - preprocessing 11 | - visualization 12 | packages: 13 | - bento-tools 14 | authors: 15 | - ckmah 16 | -------------------------------------------------------------------------------- /tutorial-registry/validate.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """Validate tutorials' meta.yaml and generate an output directory with json/images to be uploaded on github pages.""" 3 | 4 | from __future__ import annotations 5 | 6 | import argparse 7 | import json 8 | import shutil 9 | import sys 10 | from pathlib import Path 11 | from textwrap import dedent 12 | from typing import TYPE_CHECKING, Any, Literal 13 | 14 | import httpx 15 | import jsonschema 16 | import yaml 17 | from PIL import Image 18 | 19 | if TYPE_CHECKING: 20 | from collections.abc import Generator, Iterable, Mapping 21 | 22 | HERE = Path(__file__).absolute().parent 23 | 24 | 25 | def _check_url_exists(url: str) -> None: 26 | response = httpx.get(url) 27 | if response.status_code != 200: 28 | raise ValueError(f"URL {url} is not reachable (error {response.status_code}). ") 29 | 30 | 31 | def _check_image(img_path: Path) -> None: 32 | """Check that the image exists and that it is either SVG or fits into the 512x512 bounding box.""" 33 | if not img_path.exists(): 34 | raise ValueError(f"Image does not exist: {img_path}") 35 | if img_path.suffix == ".svg": 36 | return 37 | with Image.open(img_path) as img: 38 | width, height = img.size 39 | if not ((width == 512 and height <= 512) or (width <= 512 and height == 512)): 40 | raise ValueError( 41 | dedent( 42 | f"""\ 43 | When validating {img_path}: Image must fit in a 512x512px bounding box and one dimension must be 44 | exactly 512 px. Actual dimensions (width, height): ({width}, ({height}))." 45 | """ 46 | ) 47 | ) 48 | 49 | 50 | def validate_tutorials(schema_file: Path, tutorials_dir: Path) -> Generator[dict, None, None]: 51 | """Find all tutorial `meta.yaml` files in the tutorials dir and yield tutorial records.""" 52 | schema = json.loads(schema_file.read_bytes()) 53 | known_links = set() 54 | known_primary_to_orders: dict[str, set[int]] = {} 55 | 56 | for tmp_meta_file in tutorials_dir.rglob("meta.yaml"): 57 | tutorial_id = tmp_meta_file.parent.name 58 | with tmp_meta_file.open() as f: 59 | tmp_tutorial = yaml.load(f, yaml.SafeLoader) 60 | 61 | jsonschema.validate(tmp_tutorial, schema) 62 | 63 | link = tmp_tutorial["link"] 64 | if link in known_links: 65 | raise ValueError(f"When validating {tmp_meta_file}: Duplicate link: {link}") 66 | known_links.add(link) 67 | 68 | # Check for duplicate orders within the same primary category 69 | primary_category = tmp_tutorial.get("primary_category") 70 | order = tmp_tutorial.get("order") 71 | 72 | if primary_category and order is not None: 73 | if primary_category not in known_primary_to_orders: 74 | known_primary_to_orders[primary_category] = set() 75 | 76 | if order in known_primary_to_orders[primary_category]: 77 | raise ValueError( 78 | f"When validating {tmp_meta_file}: Duplicate order {order} " 79 | f"for primary category '{primary_category}'" 80 | ) 81 | 82 | known_primary_to_orders[primary_category].add(order) 83 | 84 | _check_url_exists(link) 85 | 86 | # replace image path by absolute local path to image 87 | img_path = tutorials_dir / tutorial_id / tmp_tutorial["image"] 88 | _check_image(img_path) 89 | tmp_tutorial["image"] = str(img_path) 90 | 91 | yield tmp_tutorial 92 | 93 | 94 | def load_categories(categories_file: Path) -> dict[str, Any]: 95 | """Load the categories JSON.""" 96 | with open(categories_file) as f: 97 | return yaml.load(f, yaml.SafeLoader) 98 | 99 | 100 | def make_output( 101 | categories: Iterable[Mapping[str, Mapping[Literal["description"], str]]], 102 | tutorials: Iterable[Mapping[str, str | Iterable[str]]], 103 | *, 104 | outdir: Path | None = None, 105 | ) -> None: 106 | """Create the output directory. 107 | 108 | Structure: 109 | outdir 110 | - tutorials.json # contains categories and tutorials 111 | - tutorialxxx/icon.svg # original icon filenames under a folder for each tutorial. The path of the icon is listed in the json. 112 | - tutorialyyy/icon.png 113 | """ 114 | if outdir: 115 | outdir.mkdir(parents=True) 116 | 117 | tutorials_rel = [] 118 | for tutorial in tutorials: 119 | img_srcpath = Path(tutorial["image"]) 120 | img_localpath = Path(img_srcpath.parent.name) / img_srcpath.name 121 | tut_rel = dict(tutorial) 122 | tut_rel["image"] = str(img_localpath) 123 | tutorials_rel.append(tut_rel) 124 | if outdir: 125 | img_outpath = outdir / img_localpath 126 | img_outpath.parent.mkdir() 127 | shutil.copy(img_srcpath, img_outpath) 128 | 129 | result = {"categories": categories, "tutorials": tutorials_rel} 130 | 131 | if outdir: 132 | with (outdir / "tutorials.json").open("w") as f: 133 | json.dump(result, f) 134 | else: 135 | json.dump(result, sys.stdout, indent=2) 136 | 137 | 138 | def main(schema_file: Path, meta_dir: Path, categories_file: Path, *, outdir: Path | None = None): 139 | """Validate and create output directory.""" 140 | tutorials = list(validate_tutorials(schema_file, meta_dir)) 141 | categories = load_categories(categories_file) 142 | make_output(categories, tutorials, outdir=outdir) 143 | 144 | 145 | if __name__ == "__main__": 146 | parser = argparse.ArgumentParser( 147 | prog="validate.py", 148 | description="Validate tutorials' meta.yaml and generate an output directory with json/images to be uploaded on github pages.", 149 | ) 150 | parser.add_argument("--outdir", type=Path, help="outdir that will contain the data to be uploaded on github pages") 151 | args = parser.parse_args() 152 | 153 | SCHEMA = HERE / "schema.json" 154 | META_DIR = HERE / "tutorials" 155 | CATEGORIES = HERE / "categories.yml" 156 | 157 | main(SCHEMA, META_DIR, CATEGORIES, outdir=args.outdir) 158 | --------------------------------------------------------------------------------