├── docs ├── img │ └── shadows_header.jpg ├── references.md ├── api.md ├── references.bib ├── examples │ ├── index.md │ ├── shadows-zarr.ipynb │ └── shadows-features.ipynb ├── Makefile ├── make.bat ├── index.md └── conf.py ├── tests ├── conftest.py ├── test_shadows_zarr.py └── test_shadows_hdf5.py ├── src └── shadows │ ├── __init__.py │ ├── compat.py │ ├── anndatashadow.py │ ├── mudatashadow.py │ ├── elemshadow.py │ └── datashadow.py ├── .github └── workflows │ ├── pythonpackage.yml │ └── docs.yml ├── LICENSE ├── README.md ├── .gitignore └── pyproject.toml /docs/img/shadows_header.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scverse/shadows/HEAD/docs/img/shadows_header.jpg -------------------------------------------------------------------------------- /docs/references.md: -------------------------------------------------------------------------------- 1 | # References 2 | 3 | ```{bibliography} 4 | :cited: 5 | ``` 6 | 7 | 8 | ```{autosummary} 9 | :toctree: generated 10 | :recursive: 11 | 12 | * 13 | ``` 14 | -------------------------------------------------------------------------------- /docs/api.md: -------------------------------------------------------------------------------- 1 | # API 2 | 3 | 4 | ```{eval-rst} 5 | .. autoclass:: shadows 6 | :show-inheritance: 7 | :members: 8 | ``` 9 | 10 | ```{autosummary} 11 | :toctree: generated 12 | :recursive: 13 | 14 | * 15 | ``` 16 | -------------------------------------------------------------------------------- /docs/references.bib: -------------------------------------------------------------------------------- 1 | @article{bredikhin2022muon, 2 | title={Muon: multimodal omics analysis framework}, 3 | author={Bredikhin, Danila and Kats, Ilia and Stegle, Oliver}, 4 | journal={Genome Biology}, 5 | volume={23}, 6 | number={1}, 7 | pages={1--12}, 8 | year={2022}, 9 | publisher={Springer} 10 | } 11 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | 4 | @pytest.fixture(scope="module") 5 | def filepath_h5ad(tmpdir_factory): 6 | yield str(tmpdir_factory.mktemp("tmp_test_dir_shadows").join("test.h5ad")) 7 | 8 | 9 | @pytest.fixture(scope="module") 10 | def filepath_h5mu(tmpdir_factory): 11 | yield str(tmpdir_factory.mktemp("tmp_test_dir_shadows").join("test.h5mu")) 12 | 13 | 14 | @pytest.fixture(scope="module") 15 | def filepath_mudata_zarr(tmpdir_factory): 16 | yield str(tmpdir_factory.mktemp("tmp_test_dir_shadows").join("test_mudata.zarr")) 17 | -------------------------------------------------------------------------------- /src/shadows/__init__.py: -------------------------------------------------------------------------------- 1 | from .anndatashadow import AnnDataShadow 2 | from .datashadow import DataShadow 3 | from .mudatashadow import MuDataShadow 4 | 5 | try: # See https://github.com/maresb/hatch-vcs-footgun-example 6 | from setuptools_scm import get_version 7 | 8 | __version__ = get_version(root="../..", relative_to=__file__) 9 | except (ImportError, LookupError): 10 | try: 11 | from ._version import __version__ 12 | except ModuleNotFoundError: 13 | raise RuntimeError("pqdata is not correctly installed. Please install it, e.g. with pip.") 14 | 15 | __all__ = ["DataShadow", "AnnDataShadow", "MuDataShadow", "__version__"] 16 | -------------------------------------------------------------------------------- /docs/examples/index.md: -------------------------------------------------------------------------------- 1 | # Examples 2 | 3 | ```{toctree} 4 | :maxdepth: 2 5 | 6 | shadow-objects.ipynb 7 | shadows-features.ipynb 8 | shadows-zarr.ipynb 9 | ``` 10 | 11 | ```{contents} 12 | :local: 13 | :depth: 3 14 | ``` 15 | 16 | Shadows offer an interface for AnnData and MuData files on disk that enables loading the necessary parts of the datasets into memory (and caching them) only when needed. 17 | 18 | More features and details of (low!) memory consumption are outlined on the following pages: 19 | 20 | - [key features and memory consumption tracking](shadow-objects.ipynb), 21 | 22 | - [more features](shadows-features.ipynb), 23 | 24 | - [zarr interface](shadows-zarr.ipynb). 25 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /src/shadows/compat.py: -------------------------------------------------------------------------------- 1 | from anndata._io.specs import read_elem as ad_read_elem 2 | 3 | try: 4 | from pqdata.core import Array as PqArray 5 | from pqdata.core import Group as PqGroup 6 | except ImportError: 7 | 8 | class PqArray: 9 | @staticmethod 10 | def __repr__(): 11 | return "mock pqdata.core.Array" 12 | 13 | class PqGroup: 14 | @staticmethod 15 | def __repr__(): 16 | return "mock pqdata.core.Group" 17 | 18 | 19 | def read_elem(*args, **kwargs): 20 | if "_format" in kwargs: 21 | format = kwargs.pop("_format") 22 | if format == "parquet": 23 | from pqdata.core import read_elem as pq_read_elem 24 | 25 | return pq_read_elem(*args, **kwargs) 26 | else: 27 | return ad_read_elem(*args, **kwargs) 28 | else: 29 | return ad_read_elem(*args, **kwargs) 30 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /.github/workflows/pythonpackage.yml: -------------------------------------------------------------------------------- 1 | name: Python package 2 | 3 | on: [push] 4 | 5 | jobs: 6 | build: 7 | 8 | runs-on: ubuntu-latest 9 | strategy: 10 | matrix: 11 | python-version: ["3.10", "3.11", "3.12"] 12 | 13 | steps: 14 | - uses: actions/checkout@v3 15 | - name: Set up Python ${{ matrix.python-version }} 16 | uses: actions/setup-python@v4 17 | with: 18 | python-version: ${{ matrix.python-version }} 19 | - name: Install dependencies 20 | run: | 21 | python -m pip install --upgrade pip 22 | python -m pip install uv 23 | uv venv 24 | source .venv/bin/activate 25 | uv pip install ruff pytest 26 | uv pip install '.[dev,test,all]' 27 | - name: Ruff check 28 | run: | 29 | source .venv/bin/activate 30 | ruff check src/shadows 31 | - name: Test with pytest 32 | run: | 33 | source .venv/bin/activate 34 | pytest 35 | -------------------------------------------------------------------------------- /.github/workflows/docs.yml: -------------------------------------------------------------------------------- 1 | name: docs 2 | on: [push] 3 | 4 | jobs: 5 | docs: 6 | 7 | runs-on: ubuntu-latest 8 | 9 | permissions: 10 | contents: write 11 | steps: 12 | - uses: actions/checkout@v4 13 | - name: Set up Python 3.11 14 | uses: actions/setup-python@v4 15 | with: 16 | python-version: "3.11" 17 | - name: Install 18 | run: | 19 | python -m pip install --upgrade pip 20 | python -m pip install '.[doc]' 21 | - name: Install pandoc 22 | run: sudo apt-get install -y pandoc 23 | - name: Build HTML 24 | working-directory: docs 25 | run: | 26 | make html -e 27 | - name: Upload artifacts 28 | uses: actions/upload-artifact@v4 29 | with: 30 | name: html-docs 31 | path: docs/_build/html/ 32 | - name: Deploy 33 | uses: peaceiris/actions-gh-pages@v3 34 | if: github.ref == 'refs/heads/main' 35 | with: 36 | github_token: ${{ secrets.GITHUB_TOKEN }} 37 | publish_dir: docs/_build/html 38 | -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | # Shadows 2 | 3 | ```{toctree} 4 | :hidden: true 5 | :maxdepth: 2 6 | 7 | examples/index.md 8 | api.md 9 | references.md 10 | ``` 11 | 12 | `shadows` is a Python library with low-memory interfaces for [scverse](https://scverse.org) data structures such as [AnnData](https://github.com/scverse/anndata) and [MuData](https://github.com/scverse/mudata). 13 | 14 | [//]: # (numfocus-fiscal-sponsor-attribution) 15 | 16 | shadows is part of the scverse® project ([website](https://scverse.org), [governance](https://scverse.org/about/roles)) and is fiscally sponsored by [NumFOCUS](https://numfocus.org/). 17 | If you like scverse® and want to support our mission, please consider making a tax-deductible [donation](https://numfocus.org/donate-to-scverse) to help the project pay for developer time, professional services, travel, workshops, and a variety of other needs. 18 | 19 |
25 |
26 |
2 |
3 | # Shadows
4 |
5 | Shadows are on-disk interfaces for scverse data standards such as [AnnData](https://github.com/scverse/anndata) and [MuData](https://github.com/scverse/mudata).
6 |
7 | It is an experimental project.
8 |
9 | [](https://pypi.org/project/shadows)
10 |
11 | ## Installation
12 |
13 | ```
14 | pip install shadows
15 | # or
16 | pip install git+https://github.com/scverse/shadows
17 | ```
18 |
19 | ## Features
20 |
21 | The shadows library aims to implement the following features:
22 |
23 | - [x] **Shadow objects**: Read-only AnnDataShadow and MuDataShadow for HDF5 files.
24 |
25 | - [x] AnnDataShadow and MuDataShadow for Zarr files.
26 |
27 | - [x] AnnDataShadow and MuDataShadow for Parquet-based serialization ([pqdata](https://github.com/gtca/pqdata)).
28 |
29 | - [x] Data shadows for `.pqdata` and `.zarr` files on S3 storage.
30 |
31 |
32 | ### Shadow objects
33 |
34 | Briefly, shadow objects simply work like this:
35 |
36 | ```py
37 | from shadows import *
38 | ash = AnnDataShadow("pbmc3k.h5ad")
39 | msh = MuDataShadow("pbmc5k_citeseq.h5mu")
40 | ```
41 |
42 | All the various features are showcased in the following tutorials:
43 |
44 | - [Getting started with shadow objects](/docs/examples/shadow-objects.ipynb)
45 |
46 | - [Advanced features of shadow objects](/docs/examples/shadows-features.ipynb)
47 |
48 | [//]: # (numfocus-fiscal-sponsor-attribution)
49 |
50 | shadows is part of the scverse® project ([website](https://scverse.org), [governance](https://scverse.org/about/roles)) and is fiscally sponsored by [NumFOCUS](https://numfocus.org/).
51 | If you like scverse® and want to support our mission, please consider making a tax-deductible [donation](https://numfocus.org/donate-to-scverse) to help the project pay for developer time, professional services, travel, workshops, and a variety of other needs.
52 |
53 |
59 |
60 | | \n", 659 | "_index\n", 660 | " | \n", 661 | "
|---|
| \n", 664 | "object\n", 665 | " | \n", 666 | "
| \n", 671 | "CAGCCAGGTCTCGACG-1\n", 672 | " | \n", 673 | "
| \n", 676 | "TTCTTCCTCTCGGTAA-1\n", 677 | " | \n", 678 | "
| \n", 681 | "CGGGTCAAGAGAGGTA-1\n", 682 | " | \n", 683 | "
| \n", 686 | "TACCCGTCATAATCCG-1\n", 687 | " | \n", 688 | "
| \n", 691 | "TGGGTTAGTGAATTAG-1\n", 692 | " | \n", 693 | "
| \n", 1139 | " |
|---|
| CAGCCAGGTCTCGACG-1 | \n", 1144 | "
| TTCTTCCTCTCGGTAA-1 | \n", 1147 | "
| \n", 742 | "_index\n", 743 | " | \n", 744 | "\n", 745 | "celltype\n", 746 | " | \n", 747 | "\n", 748 | "leiden\n", 749 | " | \n", 750 | "\n", 751 | "n_genes_by_counts\n", 752 | " | \n", 753 | "\n", 754 | "pct_counts_mt\n", 755 | " | \n", 756 | "\n", 757 | "total_counts\n", 758 | " | \n", 759 | "\n", 760 | "total_counts_mt\n", 761 | " | \n", 762 | "
|---|---|---|---|---|---|---|
| \n", 765 | "object\n", 766 | " | \n", 767 | "\n", 768 | "cat\n", 769 | " | \n", 770 | "\n", 771 | "cat\n", 772 | " | \n", 773 | "\n", 774 | "i32\n", 775 | " | \n", 776 | "\n", 777 | "f32\n", 778 | " | \n", 779 | "\n", 780 | "f32\n", 781 | " | \n", 782 | "\n", 783 | "f32\n", 784 | " | \n", 785 | "
| \n", 790 | "AAACCCAAGAGACAAG-1\n", 791 | " | \n", 792 | "\n", 793 | ""intermediate m...\n", 794 | " | \n", 795 | "\n", 796 | ""3"\n", 797 | " | \n", 798 | "\n", 799 | "2363\n", 800 | " | \n", 801 | "\n", 802 | "6.332204\n", 803 | " | \n", 804 | "\n", 805 | "7375.0\n", 806 | " | \n", 807 | "\n", 808 | "467.0\n", 809 | " | \n", 810 | "
| \n", 813 | "AAACCCAAGGCCTAGA-1\n", 814 | " | \n", 815 | "\n", 816 | ""CD4+ naïve T"\n", 817 | " | \n", 818 | "\n", 819 | ""0"\n", 820 | " | \n", 821 | "\n", 822 | "1259\n", 823 | " | \n", 824 | "\n", 825 | "9.093319\n", 826 | " | \n", 827 | "\n", 828 | "3772.0\n", 829 | " | \n", 830 | "\n", 831 | "343.0\n", 832 | " | \n", 833 | "
| \n", 836 | "AAACCCAGTCGTGCCA-1\n", 837 | " | \n", 838 | "\n", 839 | ""CD4+ memory T"\n", 840 | " | \n", 841 | "\n", 842 | ""2"\n", 843 | " | \n", 844 | "\n", 845 | "1578\n", 846 | " | \n", 847 | "\n", 848 | "13.178295\n", 849 | " | \n", 850 | "\n", 851 | "4902.0\n", 852 | " | \n", 853 | "\n", 854 | "646.0\n", 855 | " | \n", 856 | "
| \n", 859 | "AAACCCATCGTGCATA-1\n", 860 | " | \n", 861 | "\n", 862 | ""CD4+ memory T"\n", 863 | " | \n", 864 | "\n", 865 | ""2"\n", 866 | " | \n", 867 | "\n", 868 | "1908\n", 869 | " | \n", 870 | "\n", 871 | "6.354415\n", 872 | " | \n", 873 | "\n", 874 | "6704.0\n", 875 | " | \n", 876 | "\n", 877 | "426.0\n", 878 | " | \n", 879 | "
| \n", 882 | "AAACGAAAGACAAGCC-1\n", 883 | " | \n", 884 | "\n", 885 | ""CD14 mono"\n", 886 | " | \n", 887 | "\n", 888 | ""1"\n", 889 | " | \n", 890 | "\n", 891 | "1589\n", 892 | " | \n", 893 | "\n", 894 | "9.307693\n", 895 | " | \n", 896 | "\n", 897 | "3900.0\n", 898 | " | \n", 899 | "\n", 900 | "363.0\n", 901 | " | \n", 902 | "
| \n", 1471 | " | n_genes_by_counts | \n", 1472 | "total_counts | \n", 1473 | "total_counts_mt | \n", 1474 | "pct_counts_mt | \n", 1475 | "leiden | \n", 1476 | "celltype | \n", 1477 | "
|---|---|---|---|---|---|---|
| AAACCCATCGTGCATA-1 | \n", 1482 | "1908 | \n", 1483 | "6704.0 | \n", 1484 | "426.0 | \n", 1485 | "6.354415 | \n", 1486 | "2 | \n", 1487 | "CD4+ memory T | \n", 1488 | "
| AAACGAAAGACAAGCC-1 | \n", 1491 | "1589 | \n", 1492 | "3900.0 | \n", 1493 | "363.0 | \n", 1494 | "9.307693 | \n", 1495 | "1 | \n", 1496 | "CD14 mono | \n", 1497 | "