68 |
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line, and also
5 | # from the environment for the first two.
6 | SPHINXOPTS ?=
7 | SPHINXBUILD ?= sphinx-build
8 | SOURCEDIR = .
9 | BUILDDIR = _build
10 |
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 |
15 | .PHONY: help Makefile
16 |
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 |
--------------------------------------------------------------------------------
/docs/_static/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scverse/scverse-tutorials/169d1d33f00f7e738da725f350d119cd8c7d4aba/docs/_static/.gitkeep
--------------------------------------------------------------------------------
/docs/_static/css/custom.css:
--------------------------------------------------------------------------------
1 | /* Reduce the font size in data frames - See https://github.com/scverse/cookiecutter-scverse/issues/193 */
2 | div.cell_output table.dataframe {
3 | font-size: 0.8em;
4 | }
5 |
--------------------------------------------------------------------------------
/docs/_templates/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scverse/scverse-tutorials/169d1d33f00f7e738da725f350d119cd8c7d4aba/docs/_templates/.gitkeep
--------------------------------------------------------------------------------
/docs/_templates/autosummary/class.rst:
--------------------------------------------------------------------------------
1 | {{ fullname | escape | underline}}
2 |
3 | .. currentmodule:: {{ module }}
4 |
5 | .. add toctree option to make autodoc generate the pages
6 |
7 | .. autoclass:: {{ objname }}
8 |
9 | {% block attributes %}
10 | {% if attributes %}
11 | Attributes table
12 | ~~~~~~~~~~~~~~~~~~
13 |
14 | .. autosummary::
15 | {% for item in attributes %}
16 | ~{{ fullname }}.{{ item }}
17 | {%- endfor %}
18 | {% endif %}
19 | {% endblock %}
20 |
21 | {% block methods %}
22 | {% if methods %}
23 | Methods table
24 | ~~~~~~~~~~~~~
25 |
26 | .. autosummary::
27 | {% for item in methods %}
28 | {%- if item != '__init__' %}
29 | ~{{ fullname }}.{{ item }}
30 | {%- endif -%}
31 | {%- endfor %}
32 | {% endif %}
33 | {% endblock %}
34 |
35 | {% block attributes_documentation %}
36 | {% if attributes %}
37 | Attributes
38 | ~~~~~~~~~~~
39 |
40 | {% for item in attributes %}
41 |
42 | .. autoattribute:: {{ [objname, item] | join(".") }}
43 | {%- endfor %}
44 |
45 | {% endif %}
46 | {% endblock %}
47 |
48 | {% block methods_documentation %}
49 | {% if methods %}
50 | Methods
51 | ~~~~~~~
52 |
53 | {% for item in methods %}
54 | {%- if item != '__init__' %}
55 |
56 | .. automethod:: {{ [objname, item] | join(".") }}
57 | {%- endif -%}
58 | {%- endfor %}
59 |
60 | {% endif %}
61 | {% endblock %}
62 |
--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
1 | # Configuration file for the Sphinx documentation builder.
2 |
3 | # This file only contains a selection of the most common options. For a full
4 | # list see the documentation:
5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
6 |
7 | # -- Path setup --------------------------------------------------------------
8 | import sys
9 | from datetime import datetime
10 | from importlib.metadata import metadata
11 | from pathlib import Path
12 |
13 | HERE = Path(__file__).parent
14 | sys.path.insert(0, str(HERE / "extensions"))
15 |
16 |
17 | # -- Project information -----------------------------------------------------
18 |
19 | # NOTE: If you installed your project in editable mode, this might be stale.
20 | # If this is the case, reinstall it to refresh the metadata
21 | info = metadata("scverse-tutorials")
22 | project_name = info["Name"]
23 | author = info["Author"]
24 | copyright = f"{datetime.now():%Y}, {author}."
25 | version = info["Version"]
26 | urls = dict(pu.split(", ") for pu in info.get_all("Project-URL"))
27 | repository_url = urls["Source"]
28 |
29 | # The full version, including alpha/beta/rc tags
30 | release = info["Version"]
31 |
32 | bibtex_bibfiles = ["references.bib"]
33 | templates_path = ["_templates"]
34 | nitpicky = True # Warn about broken links
35 | needs_sphinx = "4.0"
36 |
37 | html_context = {
38 | "display_github": True,
39 | "github_user": "scverse",
40 | "github_repo": project_name,
41 | "github_version": "main",
42 | "conf_py_path": "/docs/",
43 | }
44 |
45 | # -- General configuration ---------------------------------------------------
46 |
47 | # Add any Sphinx extension module names here, as strings.
48 | # They can be extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
49 | extensions = [
50 | "myst_nb",
51 | "sphinx_copybutton",
52 | "sphinx.ext.autodoc",
53 | "sphinx.ext.intersphinx",
54 | "sphinx.ext.autosummary",
55 | "sphinx.ext.napoleon",
56 | "sphinx_issues",
57 | "sphinxcontrib.bibtex",
58 | "sphinx_autodoc_typehints",
59 | "sphinx.ext.mathjax",
60 | "IPython.sphinxext.ipython_console_highlighting",
61 | "sphinxext.opengraph",
62 | *[p.stem for p in (HERE / "extensions").glob("*.py")],
63 | ]
64 |
65 | autosummary_generate = True
66 | autodoc_member_order = "groupwise"
67 | default_role = "literal"
68 | napoleon_google_docstring = False
69 | napoleon_numpy_docstring = True
70 | napoleon_include_init_with_doc = False
71 | napoleon_use_rtype = True # having a separate entry generally helps readability
72 | napoleon_use_param = True
73 | myst_heading_anchors = 6 # create anchors for h1-h6
74 | myst_enable_extensions = [
75 | "amsmath",
76 | "colon_fence",
77 | "deflist",
78 | "dollarmath",
79 | "html_image",
80 | "html_admonition",
81 | ]
82 | myst_url_schemes = ("http", "https", "mailto")
83 | nb_output_stderr = "remove"
84 | nb_execution_mode = "off"
85 | nb_merge_streams = True
86 | typehints_defaults = "braces"
87 |
88 | source_suffix = {".rst": "restructuredtext", ".ipynb": "myst-nb", ".myst": "myst-nb"}
89 |
90 | intersphinx_mapping = {
91 | "python": ("https://docs.python.org/3", None),
92 | "anndata": ("https://anndata.readthedocs.io/en/latest/", None), # TODO: change back to stable after 0.12 release
93 | "numpy": ("https://numpy.org/doc/stable/", None),
94 | "scanpy": ("https://scanpy.readthedocs.io/en/stable/", None),
95 | "fast-array-utils": ("https://icb-fast-array-utils.readthedocs-hosted.com/en/stable", None),
96 | "dask": ("https://docs.dask.org/en/stable", None),
97 | "scipy": ("https://docs.scipy.org/doc/scipy", None),
98 | "rapids-singlecell": ("https://rapids-singlecell.readthedocs.io/en/stable/", None),
99 | }
100 |
101 | # List of patterns, relative to source directory, that match files and
102 | # directories to ignore when looking for source files.
103 | # This pattern also affects html_static_path and html_extra_path.
104 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", "**.ipynb_checkpoints", ".jupyter_cache"]
105 |
106 |
107 | # -- Options for HTML output -------------------------------------------------
108 |
109 | # The theme to use for HTML and HTML Help pages. See the documentation for
110 | # a list of builtin themes.
111 | #
112 | html_theme = "sphinx_book_theme"
113 | html_static_path = ["_static"]
114 | html_css_files = ["css/custom.css"]
115 |
116 | html_title = project_name
117 |
118 | html_theme_options = {
119 | "repository_url": repository_url,
120 | "repository_branch": "main",
121 | "path_to_docs": "docs/",
122 | "navigation_with_keys": False,
123 | "use_repository_button": True,
124 | "launch_buttons": {
125 | "binderhub_url": "https://mybinder.org",
126 | # "colab_url": "https://colab.research.google.com",
127 | },
128 | }
129 |
130 | pygments_style = "default"
131 |
132 | nitpick_ignore = [
133 | # If building the documentation fails because of a missing link that is outside your control,
134 | # you can add an exception to this list.
135 | # ("py:class", "igraph.Graph"),
136 | ]
137 |
--------------------------------------------------------------------------------
/docs/extensions/typed_returns.py:
--------------------------------------------------------------------------------
1 | # code from https://github.com/theislab/scanpy/blob/master/docs/extensions/typed_returns.py
2 | # with some minor adjustment
3 | from __future__ import annotations
4 |
5 | import re
6 | from collections.abc import Generator, Iterable
7 |
8 | from sphinx.application import Sphinx
9 | from sphinx.ext.napoleon import NumpyDocstring
10 |
11 |
12 | def _process_return(lines: Iterable[str]) -> Generator[str, None, None]:
13 | for line in lines:
14 | if m := re.fullmatch(r"(?P\w+)\s+:\s+(?P[\w.]+)", line):
15 | yield f"-{m['param']} (:class:`~{m['type']}`)"
16 | else:
17 | yield line
18 |
19 |
20 | def _parse_returns_section(self: NumpyDocstring, section: str) -> list[str]:
21 | lines_raw = self._dedent(self._consume_to_next_section())
22 | if lines_raw[0] == ":":
23 | del lines_raw[0]
24 | lines = self._format_block(":returns: ", list(_process_return(lines_raw)))
25 | if lines and lines[-1]:
26 | lines.append("")
27 | return lines
28 |
29 |
30 | def setup(app: Sphinx):
31 | """Set app."""
32 | NumpyDocstring._parse_returns_section = _parse_returns_section
33 |
--------------------------------------------------------------------------------
/docs/how-to-dask.md:
--------------------------------------------------------------------------------
1 | # Dask Q&A
2 |
3 | Here we will go through some common questions and answers about `dask`, with a special focus on its integration with `scanpy` and `anndata`. For more comprehensive tutorials or other topics like {doc}`launching a cluster `, head over their documentation.
4 |
5 | ## Quickstart
6 |
7 | ### How do I monitor the {doc}`dask dashboard `?
8 |
9 | If you are in a jupyter notebook, when you render the `repr` of your `client`, you will see a link, usually something like `http://localhost:8787/status`.
10 | If you are working locally, this link alone should suffice.
11 |
12 | If you are working on some sort of remote notebook from a web browser, you will need to replace `http://localhost` by the root url of the notebook.
13 |
14 | If you are in vscode, there is a [`dask` extension] which will allow you to monitor there.
15 |
16 | ### How do I know how to allocate resources?
17 |
18 | In `dask`, every worker will receive an equal share of the memory available.
19 | So if you request e.g., a slurm job with 256GB of RAM, and then start 8 workers, each will have 32 GB of memory.
20 |
21 | `dask` distributes jobs to each worker generally based on the chunking of the array.
22 | So if you have dense chunks of `(30_000, 30_000)` with 32 bit integers, you will need to be have 3.6 GB for each worker, at the minimum to even load the data.
23 | Then if you do something like matrix multiplication, you will need double or even more, as an example.
24 |
25 | ### How do I read my data into a `dask` array?
26 |
27 | {func}`anndata.experimental.read_elem_lazy` or {func}`anndata.experimental.read_lazy` can help you if you already have data on-disk that was written to the `anndata` file format.
28 | If you use {func}`dask.array.to_zarr`, the data _cannot_ be read in using `anndata`'s functionality as `anndata` will look for its {doc}`specified file format metadata `.
29 |
30 | If you need to implement custom io, generally we found that using {func}`dask.array.map_blocks` provides a nice way.
31 | See [our custom h5 io code] for an example.
32 |
33 | ## Advanced use and how-to-contribute
34 |
35 | ### How do `scanpy` and `anndata` handle sparse matrices?
36 |
37 | While there is some {class}`scipy.sparse.csr_matrix` and {class}`scipy.sparse.csc_matrix` support for `dask`, it is not comprehensive and missing key functions like summation, mean etc.
38 | We have implemented custom functionality, much of which lives in {mod}`fast_array_utils`, although we have also had to implement custom algorithms like `pca` for sparse-in-dask.
39 | In the future, an [`array-api`] compatible sparse matrix like [`finch`] would help us considerably as `dask` supports the [`array-api`].
40 |
41 | Therefore, if you run into a puzzling error after trying to run a function like {func}`numpy.sum` (or similar) on a sparse-in-dask array, consider checking {mod}`fast_array_utils`.
42 | If you need to implement the function yourself, see the next point.
43 |
44 | ### Custom block-wise array operations
45 |
46 | Sometimes you may want to do an operation on a an array that is implemented nowhere.
47 | Generally, we have found {func}`dask.array.map_blocks` to be versatile enough that most operations can be expressed on it. Click on the link to see `dask`'s own tutorial about the function.
48 |
49 | Take this (simplified) example of calculating a gram matrix from {func}`scanpy.pp.pca` for sparse-in-dask:
50 |
51 | ```python
52 | def gram_block(x_part):
53 | gram_matrix = x_part.T @ x_part
54 | return gram_matrix[None, ...]
55 |
56 | gram_matrix_dask = da.map_blocks(
57 | gram_block,
58 | x,
59 | new_axis=(1,),
60 | chunks=((1,) * x.blocks.size, (x.shape[1],), (x.shape[1],)),
61 | meta=np.array([], dtype=x.dtype),
62 | dtype=x.dtype,
63 | ).sum(axis=0)
64 | ```
65 |
66 | This algorithm goes through every `chunk_size` number of rows and calculates the gram matrix for those rows producing a collection of `(n_vars,n_vars)` size matrix.
67 | These are the summed together to produce a single `(n_vars,n_vars)` matrix, which is the gram matrix.
68 |
69 | Because `dask` does not implement matrix multiplication for sparse-in-dask, we do it ourselves.
70 | We use `map_blocks` over a CSR sparse-in-dask array where the chunking looks something like `(chunk_size, n_vars)`.
71 | When we compute the individual block's gram matrix, we add an axis via `[None, ...]` so that we can sum over that axis i.e., the `da.map_blocks` call produces a `(n_obs // chunk_size, n_vars, n_vars)` sized-matrix which is summed over the first dimension.
72 | However, to make this work, we need to be very specific about how `da.map_blocks` expects its result to look like, done via `new_axis` and `chunks`.
73 | `new_axis` indicates that we are adding a single new axis at the front.
74 | The `chunks` argument specifies that the output of `da.map_blocks` should have `x.blocks.size` number of `(1, n_vars, n_vars)` matrixes.
75 | This `chunks` argument thus allows the inferral of the shape of the output.
76 |
77 | While this example is a bit complicated it shows how you can go from a matrix of one shape and chunking to another by operating in a clean way over blocks.
78 |
79 | ## FAQ
80 |
81 | ### What is `persist` used for in RSC notebooks?
82 |
83 | In the {doc}`multi-gpu showcase notebook for rapids-singlecell `, {meth}`dask.array.Array.persist` appears across the notebook.
84 | This loads the entire dataset into memory while keeping the representation as a dask array.
85 | Thus, lazy computation still works but only necessitates a single read into memory.
86 | The catch is that you need to have enough memory to use `persist`, but if you do it greatly speeds up the computation.
87 |
88 | ### I'm out of memory, what now?
89 |
90 | You can always reduce the number of workers you use, which will cause more memory to be allocated per worker.
91 | Some algorithms may have limitations with loading all data onto a single node; see {issue}`dask/dask-ml#985` for an example.
92 |
93 | ### How do I choose chunk sizes?
94 |
95 | Have a look at the {doc}`dask docs for chunking `, however the general rule of thumb there is to use larger chunks in memory than on disk.
96 | In this sense, it is probably a good idea to use the largest chunk size in memory allowable by your memory limits (and the algorithms you use) in order to maximize any thread-level parallelization in algorithms to its fullest.
97 | For sparse data, where the chunks in-memory do not map to those on disk, maxing out the memory available by choosing a large chunk size becomes more imperative.
98 |
99 | [`dask` extension]: https://marketplace.visualstudio.com/items?itemName=joyceerhl.vscode-das
100 | [our custom h5 io code]: https://github.com/scverse/anndata/blob/089ed929393a02200b389395f278b7c920e5bc4a/src/anndata/_io/specs/lazy_methods.py#L179-L205
101 | [`array-api`]: https://data-apis.org/array-api/latest/index.html
102 | [`finch`]: https://github.com/finch-tensor/finch-tensor-python
103 |
--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
1 | ```{include} ../README.md
2 |
3 | ```
4 |
5 | ```{toctree}
6 | :hidden: true
7 | :maxdepth: 1
8 |
9 | notebooks/basic-scrna-tutorial
10 | notebooks/anndata_getting_started
11 | notebooks/tutorial_axes_anndata_mudata
12 | notebooks/scverse_data_backed
13 | notebooks/scverse_data_interoperability
14 | notebooks/tutorial_concatenation_anndata_mudata
15 | how-to-dask.md
16 | references.md
17 | ```
18 |
--------------------------------------------------------------------------------
/docs/notebooks/.gitignore:
--------------------------------------------------------------------------------
1 | data/
2 |
--------------------------------------------------------------------------------
/docs/notebooks/data/pbmc3k_processed.h5ad:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scverse/scverse-tutorials/169d1d33f00f7e738da725f350d119cd8c7d4aba/docs/notebooks/data/pbmc3k_processed.h5ad
--------------------------------------------------------------------------------
/docs/notebooks/img/X.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scverse/scverse-tutorials/169d1d33f00f7e738da725f350d119cd8c7d4aba/docs/notebooks/img/X.png
--------------------------------------------------------------------------------
/docs/notebooks/img/anndata_schema_full.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scverse/scverse-tutorials/169d1d33f00f7e738da725f350d119cd8c7d4aba/docs/notebooks/img/anndata_schema_full.png
--------------------------------------------------------------------------------
/docs/notebooks/img/layers.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scverse/scverse-tutorials/169d1d33f00f7e738da725f350d119cd8c7d4aba/docs/notebooks/img/layers.png
--------------------------------------------------------------------------------
/docs/notebooks/img/names.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scverse/scverse-tutorials/169d1d33f00f7e738da725f350d119cd8c7d4aba/docs/notebooks/img/names.png
--------------------------------------------------------------------------------
/docs/notebooks/img/obsmvarm.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scverse/scverse-tutorials/169d1d33f00f7e738da725f350d119cd8c7d4aba/docs/notebooks/img/obsmvarm.png
--------------------------------------------------------------------------------
/docs/notebooks/img/obspvarp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scverse/scverse-tutorials/169d1d33f00f7e738da725f350d119cd8c7d4aba/docs/notebooks/img/obspvarp.png
--------------------------------------------------------------------------------
/docs/notebooks/img/obsvar.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scverse/scverse-tutorials/169d1d33f00f7e738da725f350d119cd8c7d4aba/docs/notebooks/img/obsvar.png
--------------------------------------------------------------------------------
/docs/notebooks/scverse_data_interoperability.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "id": "fifth-grammar",
6 | "metadata": {},
7 | "source": [
8 | "# Interoperability between scverse data structures and other languages \n",
9 | "\n",
10 | "Here we provide a list of resources that can be used to work with scverse data structures from your language of choice.\n",
11 | "\n",
12 | "A more detailed tutorial on interoperability with other languages can be found in the [Single-cell analysis best-practices book](https://www.sc-best-practices.org/introduction/interoperability.html)."
13 | ]
14 | },
15 | {
16 | "cell_type": "markdown",
17 | "id": "pending-grenada",
18 | "metadata": {},
19 | "source": [
20 | "## Conversion between python and R structures for single-cell analysis\n",
21 | "\n",
22 | "Several toolkits for single-cell analysis in R build upon [SingleCellExperiment](http://bioconductor.org/books/3.16/OSCA.intro/the-singlecellexperiment-class.html) objects or [Seurat](https://satijalab.org/seurat/) objects. The following table provides an indication of which objects slots store the same data in AnnData and R objects.\n",
23 | "\n",
24 | "| | `AnnData` | `SingleCellExperiment` | `Seurat` |\n",
25 | "|--------------------------------------|--------------------------|------------------------|------------------------------------|\n",
26 | "| **Active expression matrix** | `adata.X` | `assay(sce)` | `GetAssayData(seu)` |\n",
27 | "| **Alternative expression matrices** | `adata.layers['counts']` | `counts(sce)` | `GetAssay(seu)@counts` |\n",
28 | "| **Cell-level metadata** | `adata.obs` | `colData(sce)` | `seu@meta.data` |\n",
29 | "| **Gene-level metadata** | `adata.var` | `rowData(sce)` | `GetAssay(seu)@meta.features` |\n",
30 | "| **Dimensionality reductions** | `adata.obsm` | `reducedDim(sce)` | `seu@reductions` |\n",
31 | "| **cell IDs** | `adata.obs_names` | `colnames(sce)` | `colnames(seu)` |\n",
32 | "| **gene IDs** | `adata.var_names` | `rownames(sce)` | `rownames(seu)` |\n",
33 | "| **Cell-cell similarity graphs** | `adata.obsp` | --- | `seu@graphs` |"
34 | ]
35 | },
36 | {
37 | "cell_type": "markdown",
38 | "id": "executed-authority",
39 | "metadata": {},
40 | "source": [
41 | "### AnnData ⇄ Seurat objects\n",
42 | "\n",
43 | "See [Seurat documentation](https://satijalab.org/seurat/) for more details about Seurat objects.\n",
44 | "\n",
45 | "- [MuDataSeurat](https://pmbio.github.io/MuDataSeurat/) - R package to read and write `h5ad` files to and from Seurat objects\n",
46 | "- [sceasy](https://github.com/cellgeni/sceasy#usage) - R package to convert between objects within a session or saving `h5ad` or `rds` files \n",
47 | "- Using [reticulate](https://theislab.github.io/scanpy-in-R/#converting-from-python-to-r-1) - tutorial for conversion within R/Rmd sessions \n",
48 | "\n",
49 | "\n",
50 | "\n",
51 | "### AnnData ⇄ SingleCellExperiment objects\n",
52 | "\n",
53 | "See [OSCA book](http://bioconductor.org/books/3.16/OSCA.intro/the-singlecellexperiment-class.html) for more details about SingleCellExperiment objects.\n",
54 | "\n",
55 | "- [zellconverter](https://theislab.github.io/zellkonverter/articles/zellkonverter.html) - R/Bioconductor package to read and write `h5ad` files and to convert objects within R sessions using [basilisk](https://bioconductor.org/packages/release/bioc/html/basilisk.html) \n",
56 | "- [anndata2ri](https://github.com/theislab/anndata2ri#anndata--singlecellexperiment) - python package to convert between objects within python sessions using [rpy2](https://github.com/rpy2/rpy2#readme) \n",
57 | "- [sceasy](https://github.com/cellgeni/sceasy#usage) - R package to convert between objects within a session or saving `h5ad` or `rds` files \n",
58 | "- Using [reticulate](https://theislab.github.io/scanpy-in-R/#converting-from-python-to-r-1) - tutorial for conversion within R/Rmd sessions \n",
59 | "\n",
60 | "### AnnData ⇄ Loom objects\n",
61 | "\n",
62 | "See [Loompy documentation](http://linnarssonlab.org/loompy/index.html) for more details about Loom objects.\n",
63 | "\n",
64 | "- Using [anndata](https://anndata.readthedocs.io/en/latest/generated/anndata.read_loom.html#anndata.read_loom) - function to read `loom` files as AnnData objects\n",
65 | "- [sceasy](https://github.com/cellgeni/sceasy#usage) - R package to convert between objects within a session or saving `h5ad` or `loom` files \n",
66 | "\n",
67 | "### MuData ⇄ Seurat objects\n",
68 | "\n",
69 | "See [Seurat documentation](https://satijalab.org/seurat/) for more details about Seurat objects.\n",
70 | "\n",
71 | "- [MuDataSeurat](https://pmbio.github.io/MuDataSeurat/) - R package to read and write `h5mu` files to and from Seurat objects\n",
72 | "\n",
73 | "### MuData ⇄ MultiAssayExperiment objects\n",
74 | "\n",
75 | "See [documentation](http://waldronlab.io/MultiAssayExperiment/) for more details about MultiAssayExperiment objects.\n",
76 | "\n",
77 | "- [MuData for MultiAssayExperiment](https://ilia-kats.github.io/MuData/articles/Getting-Started.html) - R package to read and write `h5mu` files to and from `MultiAssayExperiment` objects \n",
78 | "\n",
79 | "### MuData ⇄ ArchR objects\n",
80 | "\n",
81 | "See [ArchR documentation](https://www.archrproject.com/bookdown/what-is-an-arrow-file-archrproject.html) for more details about ArchR objects.\n",
82 | "\n",
83 | "- Using [chame](https://gtca.github.io/chame/examples/archr_io.html) - python package providing functionality to read Arrow files "
84 | ]
85 | },
86 | {
87 | "cell_type": "markdown",
88 | "id": "virtual-street",
89 | "metadata": {},
90 | "source": [
91 | "## Read h5ad/h5mu in other languages\n",
92 | "\n",
93 | "### Julia\n",
94 | "\n",
95 | "- [Muon.jl](https://docs.juliahub.com/Muon/QfqCh/0.1.1/objects/) provides Julia implementations of ``AnnData`` and ``MuData`` objects, as well as IO for the HDF5 format\n",
96 | "- [scVI.jl](https://maren-ha.github.io/scVI.jl/index.html) provides a Julia implementation of ``AnnData`` as well as IO for the HDF5 format.\n",
97 | "\n",
98 | "### Javascript\n",
99 | "\n",
100 | "- [Vitessce](https://github.com/vitessce/vitessce) -contains loaders from ``AnnData``s stored as Zarr\n",
101 | "\n",
102 | "### Rust\n",
103 | "\n",
104 | "- [anndata-rs](https://github.com/kaizhang/anndata-rs) provides a Rust implementation of ``AnnData`` as well as advanced IO support for the HDF5 storage format."
105 | ]
106 | }
107 | ],
108 | "metadata": {
109 | "kernelspec": {
110 | "display_name": "Python 3 (ipykernel)",
111 | "language": "python",
112 | "name": "python3"
113 | },
114 | "language_info": {
115 | "codemirror_mode": {
116 | "name": "ipython",
117 | "version": 3
118 | },
119 | "file_extension": ".py",
120 | "mimetype": "text/x-python",
121 | "name": "python",
122 | "nbconvert_exporter": "python",
123 | "pygments_lexer": "ipython3",
124 | "version": "3.12.5"
125 | }
126 | },
127 | "nbformat": 4,
128 | "nbformat_minor": 5
129 | }
130 |
--------------------------------------------------------------------------------
/docs/notebooks/tutorial_concatenation_anndata_mudata.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Concatenating multimodal experiments"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 1,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": [
16 | "import warnings\n",
17 | "\n",
18 | "import anndata as ad\n",
19 | "import numpy as np\n",
20 | "import pandas as pd\n",
21 | "from mudata import MuData\n",
22 | "\n",
23 | "warnings.simplefilter(action=\"ignore\", category=FutureWarning)\n",
24 | "\n",
25 | "np.random.seed(1979)"
26 | ]
27 | },
28 | {
29 | "attachments": {},
30 | "cell_type": "markdown",
31 | "metadata": {},
32 | "source": [
33 | "Sometimes, you may want to concatenate 2 `MuData` objects because they represent complementary slices of the same dataset on which you have applied different processing. Think of analysing B and T cells separately for your PBMC typical dataset. \n",
34 | "Other times instead you need to concatenate 2 modalities into one `AnnData` because the tool you're working with doesn't currently support `MuData` (yeah we know, how dare they?).\n",
35 | "We will showcase here these 2 scenarios of concatenation.\n",
36 | "\n",
37 | "\n",
38 | ":::{note}\n",
39 | "Native concatenation of two `MuData` objects is currently discussed in \n",
40 | "[scverse/mudata#20](https://github.com/scverse/mudata/issues/20) and may\n",
41 | "eventually make parts of this tutorial obsolete. \n",
42 | "\n",
43 | "Note that for some modalities, concatenation requires extra care. For instance, \n",
44 | "in the case of ATAC-seq, concatenation does not make sense unless fragments are aggregated first. \n",
45 | ":::"
46 | ]
47 | },
48 | {
49 | "attachments": {},
50 | "cell_type": "markdown",
51 | "metadata": {},
52 | "source": [
53 | "First, we need to import the raw data for a dataset of our choice. We use mudatasets package that conveniently collects some useful 10X single cell datasets that are publicly available. For this example we need a multimodal dataset, so select the *citeseq 5k* dataset, a collection of healthy PBMCs for which 2 modalities were profiled, RNA and PROTEINS."
54 | ]
55 | },
56 | {
57 | "cell_type": "code",
58 | "execution_count": 2,
59 | "metadata": {},
60 | "outputs": [
61 | {
62 | "data": {
63 | "text/plain": [
64 | "['brain3k_multiome',\n",
65 | " 'pbmc3k_multiome',\n",
66 | " 'pbmc5k_citeseq',\n",
67 | " 'brain9k_multiome',\n",
68 | " 'pbmc10k_multiome']"
69 | ]
70 | },
71 | "execution_count": 2,
72 | "metadata": {},
73 | "output_type": "execute_result"
74 | }
75 | ],
76 | "source": [
77 | "import mudatasets as mds\n",
78 | "\n",
79 | "mds.list_datasets()"
80 | ]
81 | },
82 | {
83 | "cell_type": "code",
84 | "execution_count": 3,
85 | "metadata": {},
86 | "outputs": [
87 | {
88 | "name": "stdout",
89 | "output_type": "stream",
90 | "text": [
91 | "■ File filtered_feature_bc_matrix.h5 from pbmc5k_citeseq has been found at /home/runner/mudatasets/pbmc5k_citeseq/filtered_feature_bc_matrix.h5\n",
92 | "■ Checksum is validated (md5) for filtered_feature_bc_matrix.h5\n"
93 | ]
94 | },
95 | {
96 | "name": "stderr",
97 | "output_type": "stream",
98 | "text": [
99 | "/home/runner/miniconda3/envs/tutorials/lib/python3.12/site-packages/mudatasets/core.py:203: UserWarning: Dataset is in the 10X .h5 format and can't be loaded as backed.\n",
100 | " warn(\"Dataset is in the 10X .h5 format and can't be loaded as backed.\")\n"
101 | ]
102 | },
103 | {
104 | "name": "stderr",
105 | "output_type": "stream",
106 | "text": [
107 | "/home/runner/miniconda3/envs/tutorials/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
108 | " from .autonotebook import tqdm as notebook_tqdm\n"
109 | ]
110 | },
111 | {
112 | "name": "stdout",
113 | "output_type": "stream",
114 | "text": [
115 | "■ Loading filtered_feature_bc_matrix.h5...\n"
116 | ]
117 | },
118 | {
119 | "name": "stderr",
120 | "output_type": "stream",
121 | "text": [
122 | "/home/runner/miniconda3/envs/tutorials/lib/python3.12/site-packages/anndata/_core/anndata.py:1756: UserWarning: Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n",
123 | " utils.warn_names_duplicates(\"var\")\n",
124 | "/home/runner/miniconda3/envs/tutorials/lib/python3.12/site-packages/anndata/_core/anndata.py:1756: UserWarning: Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n",
125 | " utils.warn_names_duplicates(\"var\")\n",
126 | "/home/runner/miniconda3/envs/tutorials/lib/python3.12/site-packages/mudata/_core/mudata.py:915: UserWarning: var_names are not unique. To make them unique, call `.var_names_make_unique`.\n",
127 | " warnings.warn(\n"
128 | ]
129 | }
130 | ],
131 | "source": [
132 | "mds.info(\"pbmc5k_citeseq\")\n",
133 | "pbmc5k = mds.load(\"pbmc5k_citeseq\", files=[\"filtered_feature_bc_matrix.h5\"])"
134 | ]
135 | },
136 | {
137 | "cell_type": "code",
138 | "execution_count": 4,
139 | "metadata": {},
140 | "outputs": [
141 | {
142 | "data": {
143 | "text/html": [
144 | "