├── .gitignore
├── .readthedocs.yml
├── LICENSE
├── README.rst
├── _static
    ├── css
    │   └── custom.css
    ├── semisup_cvae.jpg
    └── sup_cvae.jpg
├── anncollection-annloader.ipynb
├── anncollection.ipynb
├── anndata_dask_array.ipynb
├── annloader.ipynb
├── awkward-arrays.ipynb
├── conf.py
├── getting-started.ipynb
├── index.rst
├── requirements.txt
└── {read,write}_dispatched.ipynb


/.gitignore:
--------------------------------------------------------------------------------
  1 | write/*
  2 | */write/*
  3 | .DS_Store
  4 | 
  5 | # Byte-compiled / optimized / DLL files
  6 | __pycache__/
  7 | *.py[cod]
  8 | *$py.class
  9 | 
 10 | # C extensions
 11 | *.so
 12 | 
 13 | # Distribution / packaging
 14 | .Python
 15 | env/
 16 | build/
 17 | develop-eggs/
 18 | dist/
 19 | downloads/
 20 | eggs/
 21 | .eggs/
 22 | lib/
 23 | lib64/
 24 | parts/
 25 | sdist/
 26 | var/
 27 | wheels/
 28 | *.egg-info/
 29 | .installed.cfg
 30 | *.egg
 31 | 
 32 | # PyInstaller
 33 | #  Usually these files are written by a python script from a template
 34 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 35 | *.manifest
 36 | *.spec
 37 | 
 38 | # Installer logs
 39 | pip-log.txt
 40 | pip-delete-this-directory.txt
 41 | 
 42 | # Unit test / coverage reports
 43 | htmlcov/
 44 | .tox/
 45 | .coverage
 46 | .coverage.*
 47 | .cache
 48 | nosetests.xml
 49 | coverage.xml
 50 | *.cover
 51 | .hypothesis/
 52 | 
 53 | # Translations
 54 | *.mo
 55 | *.pot
 56 | 
 57 | # Django stuff:
 58 | *.log
 59 | local_settings.py
 60 | 
 61 | # Flask stuff:
 62 | instance/
 63 | .webassets-cache
 64 | 
 65 | # Scrapy stuff:
 66 | .scrapy
 67 | 
 68 | # Sphinx documentation
 69 | docs/_build/
 70 | 
 71 | # PyBuilder
 72 | target/
 73 | 
 74 | # Jupyter Notebook
 75 | .ipynb_checkpoints
 76 | 
 77 | # pyenv
 78 | .python-version
 79 | 
 80 | # celery beat schedule file
 81 | celerybeat-schedule
 82 | 
 83 | # SageMath parsed files
 84 | *.sage.py
 85 | 
 86 | # dotenv
 87 | .env
 88 | 
 89 | # virtualenv
 90 | .venv
 91 | venv/
 92 | ENV/
 93 | 
 94 | # Spyder project settings
 95 | .spyderproject
 96 | .spyproject
 97 | 
 98 | # Rope project settings
 99 | .ropeproject
100 | 
101 | # mkdocs documentation
102 | /site
103 | 
104 | # mypy
105 | .mypy_cache/
106 | 
107 | # Dask Array tutorial
108 | *.h5ad
109 | mydask.png
110 | *.zarr
111 | 


--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | build:
 3 |   os: ubuntu-22.04
 4 |   tools:
 5 |     python: "3.11"
 6 | sphinx:
 7 |   configuration: conf.py
 8 | python:
 9 |   install:
10 |     - requirements: requirements.txt
11 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2017, Alex Wolf
 4 | Copyrigght (c) 2025, scverse®
 5 | All rights reserved.
 6 | 
 7 | Redistribution and use in source and binary forms, with or without
 8 | modification, are permitted provided that the following conditions are met:
 9 | 
10 | * Redistributions of source code must retain the above copyright notice, this
11 |   list of conditions and the following disclaimer.
12 | 
13 | * Redistributions in binary form must reproduce the above copyright notice,
14 |   this list of conditions and the following disclaimer in the documentation
15 |   and/or other materials provided with the distribution.
16 | 
17 | * Neither the name of the copyright holder nor the names of its
18 |   contributors may be used to endorse or promote products derived from
19 |   this software without specific prior written permission.
20 | 
21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
25 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | anndata tutorials
 2 | =================
 3 | 
 4 | See this `page <https://anndata.readthedocs.io/en/latest/tutorials.html>`__ for more context.
 5 | 
 6 | .. _website: https://scverse.org/
 7 | .. _governance: https://scverse.org/about/roles/
 8 | .. _NumFOCUS: https://numfocus.org/
 9 | .. _donation: https://numfocus.org/donate-to-scverse/
10 | 
11 | anndata is part of the scverse® project (`website`_, `governance`_) and is fiscally sponsored by `NumFOCUS`_.
12 | Please consider making a tax-deductible `donation`_ to help the project pay for developer time, professional services, travel, workshops, and a variety of other needs.
13 | 
14 | .. raw:: html
15 | 
16 |    <p align="center">
17 |        <a href="https://numfocus.org/project/scverse">
18 |            <img src="https://raw.githubusercontent.com/numfocus/templates/master/images/numfocus-logo.png" width="200">
19 |        </a>
20 |    </p>
21 | 


--------------------------------------------------------------------------------
/_static/css/custom.css:
--------------------------------------------------------------------------------
  1 | /* ReadTheDocs theme colors */
  2 | 
  3 | .wy-nav-top { background-color: #f07e44 }
  4 | .wy-nav-content {
  5 |     max-width: 1200px;
  6 | }
  7 | .wy-side-nav-search { background-color: transparent }
  8 | .wy-side-nav-search input[type="text"] { border-width: 0 }
  9 | 
 10 | 
 11 | /* Custom classes */
 12 | 
 13 | .small { font-size:40% }
 14 | .smaller, .pr { font-size:70% }
 15 | 
 16 | 
 17 | /* Custom classes with bootstrap buttons */
 18 | 
 19 | .tutorial,
 20 | .tutorial:visited,
 21 | .tutorial:hover
 22 | {
 23 |     /* text-decoration: underline; */
 24 |     font-weight: bold;
 25 |     padding: 2px 5px;
 26 |     white-space: nowrap;
 27 |     max-width: 100%;
 28 |     background: #f07e44;
 29 |     border: solid 1px #f07e44;
 30 |     border-radius: .25rem;
 31 |     font-size: 75%;
 32 |     /* font-family: SFMono-Regular,Menlo,Monaco,Consolas,"Liberation Mono","Courier New",Courier,monospace; */
 33 |     color: #404040;
 34 |     overflow-x: auto;
 35 |     box-sizing: border-box;
 36 | }
 37 | 
 38 | 
 39 | /* Formatting of RTD markup: rubrics and sidebars and admonitions */
 40 | 
 41 | /* rubric */
 42 | .rst-content p.rubric {
 43 |     margin-bottom: 6px;
 44 |     font-weight: normal;
 45 | }
 46 | .rst-content p.rubric::after { content: ":" }
 47 | 
 48 | /* sidebar */
 49 | .rst-content .sidebar {
 50 |     /* margin: 0px 0px 0px 12px; */
 51 |     padding-bottom: 0px;
 52 | }
 53 | .rst-content .sidebar p {
 54 |     margin-bottom: 12px;
 55 | }
 56 | .rst-content .sidebar p,
 57 | .rst-content .sidebar ul,
 58 | .rst-content .sidebar dl {
 59 |     font-size: 13px;
 60 | }
 61 | 
 62 | /* less space after bullet lists in admonitions like warnings and notes */
 63 | .rst-content .section .admonition ul {
 64 |     margin-bottom: 6px;
 65 | }
 66 | 
 67 | 
 68 | /* Code: literals and links */
 69 | 
 70 | .rst-content tt.literal,
 71 | .rst-content code.literal {
 72 |     color: #404040;
 73 | }
 74 | /* slim font weight for non-link code */
 75 | .rst-content tt:not(.xref),
 76 | .rst-content code:not(.xref),
 77 | .rst-content *:not(a) > tt.xref,
 78 | .rst-content *:not(a) > code.xref,
 79 | .rst-content dl:not(.docutils) code
 80 | {
 81 |     font-weight: normal;
 82 | }
 83 | .rst-content a > tt.xref,
 84 | .rst-content a > code.xref,
 85 | .rst-content dl:not(.docutils) a > tt.xref,
 86 | .rst-content dl:not(.docutils) a > code.xref
 87 | {
 88 |     font-weight: bold;  /* underline looks clumsy, in particular with buttons and
 89 |                            other hyperlinks, which don't come with underlines */
 90 | }
 91 | 
 92 | 
 93 | /* Just one box for annotation code for a less noisy look */
 94 | 
 95 | .rst-content .annotation {
 96 |     padding: 2px 5px;
 97 |     background-color: white;
 98 |     border: 1px solid #e1e4e5;
 99 | }
100 | .rst-content .annotation tt,
101 | .rst-content .annotation code {
102 |     padding: 0 0;
103 |     background-color: transparent;
104 |     border: 0 solid transparent;
105 | }
106 | 
107 | 
108 | /* Parameter lists */
109 | 
110 | /* Mimick rubric style used for other headings */
111 | /* TODO: once scanpydoc adds classes, also change return types like this */
112 | .rst-content dl:not(.docutils) dl > dt {
113 |     font-weight: bold;
114 |     background: none transparent;
115 |     border-left: none;
116 |     margin: 0 0 12px;
117 |     padding: 3px 0 0;
118 |     font-size: 111.11%;
119 | }
120 | /* Parameters contain <strong> parts and don’t need bold font */
121 | .rst-content dl.field-list dl > dt { font-weight: unset }
122 | /* Add colon between return tuple element name and type */
123 | .rst-content dl:not(.docutils) dl > dt .classifier::before { content: ' : ' }
124 | 
125 | /* Function headers */
126 | 
127 | .rst-content dl:not(.docutils) dt {
128 |     background: #edf0f2;
129 |     color: #404040;
130 |     border-top: solid 3px #343131;
131 | }
132 | 


--------------------------------------------------------------------------------
/_static/semisup_cvae.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scverse/anndata-tutorials/4b9d01d857d949f40ff841f90fb03d1aaa9c9225/_static/semisup_cvae.jpg


--------------------------------------------------------------------------------
/_static/sup_cvae.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scverse/anndata-tutorials/4b9d01d857d949f40ff841f90fb03d1aaa9c9225/_static/sup_cvae.jpg


--------------------------------------------------------------------------------
/awkward-arrays.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "faf20ef8-a00d-47dd-a089-74202423b9a7",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "# Awkward Arrays in AnnData\n",
  9 |     "\n",
 10 |     "**Author**: [Gregor Sturm](https://github.com/grst)"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "markdown",
 15 |    "id": "0238f94a-7b50-47cf-af0f-6cb462d5c47f",
 16 |    "metadata": {
 17 |     "raw_mimetype": "text/restructuredtext",
 18 |     "tags": []
 19 |    },
 20 |    "source": [
 21 |     ":::{warning}\n",
 22 |     "Support for awkward arrays in AnnData is **experimental**.\n",
 23 |     "\n",
 24 |     "Behavior, in particular of {func}`~anndata.concat`, may change in the future. \n",
 25 |     "Please report any issues using the [issue tracker](https://github.com/scverse/anndata)\n",
 26 |     ":::"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": 1,
 32 |    "id": "3547f8e3-d19e-4cb7-bf86-b664ca8cb024",
 33 |    "metadata": {},
 34 |    "outputs": [],
 35 |    "source": [
 36 |     "import awkward as ak\n",
 37 |     "import scanpy as sc\n",
 38 |     "from biothings_client import get_client as get_biothings_client"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "markdown",
 43 |    "id": "a5ae115b-ad94-48a5-a84d-09140b605428",
 44 |    "metadata": {},
 45 |    "source": [
 46 |     "[Awkward Array](https://awkward-array.org/doc/main/) is a library for working with **nested, variable-sized data** using **NumPy-like idioms**. \n",
 47 |     "It is considerably faster than working with lists-of-lists or lists-of-dicts in Python. \n",
 48 |     "\n",
 49 |     "Here are two simple examples what an awkward array could look like: \n",
 50 |     "\n",
 51 |     "**ragged array:**"
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "code",
 56 |    "execution_count": 2,
 57 |    "id": "6a25de2c-a70a-4a79-bf8f-3a18d1ede78e",
 58 |    "metadata": {},
 59 |    "outputs": [
 60 |     {
 61 |      "data": {
 62 |       "text/html": [
 63 |        "<pre>[None,\n",
 64 |        " 2,\n",
 65 |        " 4]\n",
 66 |        "----------------\n",
 67 |        "type: 3 * ?int64</pre>"
 68 |       ],
 69 |       "text/plain": [
 70 |        "<Array [None, 2, 4] type='3 * ?int64'>"
 71 |       ]
 72 |      },
 73 |      "execution_count": 2,
 74 |      "metadata": {},
 75 |      "output_type": "execute_result"
 76 |     }
 77 |    ],
 78 |    "source": [
 79 |     "ragged = ak.Array(\n",
 80 |     "    [\n",
 81 |     "        None,\n",
 82 |     "        [1, 2, 3],\n",
 83 |     "        [3, 4],\n",
 84 |     "    ]\n",
 85 |     ")\n",
 86 |     "ragged[:, 1]"
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "markdown",
 91 |    "id": "47a2427c-0fc1-4811-acc5-75ff0d0f81d8",
 92 |    "metadata": {},
 93 |    "source": [
 94 |     "**list of records:**"
 95 |    ]
 96 |   },
 97 |   {
 98 |    "cell_type": "code",
 99 |    "execution_count": 3,
100 |    "id": "d59f1c88-7a61-4cd5-990f-26b1642556a4",
101 |    "metadata": {},
102 |    "outputs": [
103 |     {
104 |      "data": {
105 |       "text/html": [
106 |        "<pre>[1,\n",
107 |        " 3,\n",
108 |        " None]\n",
109 |        "----------------\n",
110 |        "type: 3 * ?int64</pre>"
111 |       ],
112 |       "text/plain": [
113 |        "<Array [1, 3, None] type='3 * ?int64'>"
114 |       ]
115 |      },
116 |      "execution_count": 3,
117 |      "metadata": {},
118 |      "output_type": "execute_result"
119 |     }
120 |    ],
121 |    "source": [
122 |     "records = ak.Array(\n",
123 |     "    [\n",
124 |     "        {\"a\": 1, \"b\": 2},\n",
125 |     "        {\"a\": 3, \"c\": 4},\n",
126 |     "        {\"d\": 5},\n",
127 |     "    ]\n",
128 |     ")\n",
129 |     "records[\"a\"]"
130 |    ]
131 |   },
132 |   {
133 |    "cell_type": "markdown",
134 |    "id": "4bad02dd-0403-4eae-95ba-a13e62e1206a",
135 |    "metadata": {},
136 |    "source": [
137 |     "Please refer to the [akward array documentation](https://awkward-array.org) for more information. \n",
138 |     "\n",
139 |     "Since v0.9, awkward arrays are supported in AnnData in the `.layers`, `.obsm`, `.varm` and `.uns` slots. "
140 |    ]
141 |   },
142 |   {
143 |    "cell_type": "markdown",
144 |    "id": "75b641f6-0c8d-4ebf-a57f-def03c97fa69",
145 |    "metadata": {},
146 |    "source": [
147 |     "In the following, we will explore how awkward arrays can be useful when working with single-cell data."
148 |    ]
149 |   },
150 |   {
151 |    "cell_type": "markdown",
152 |    "id": "e5599439-dc85-482f-9704-0acf797ccf91",
153 |    "metadata": {},
154 |    "source": [
155 |     "## Storing transcripts in `.varm`\n",
156 |     "\n",
157 |     "Every gene can have one or many transcripts. Using awkward arrays, we can store a ragged list of transcripts for each gene in `adata.varm`. "
158 |    ]
159 |   },
160 |   {
161 |    "cell_type": "code",
162 |    "execution_count": 4,
163 |    "id": "d71cd256-8189-42b8-8fc2-63c9dd51a312",
164 |    "metadata": {},
165 |    "outputs": [],
166 |    "source": [
167 |     "adata = sc.datasets.pbmc3k()"
168 |    ]
169 |   },
170 |   {
171 |    "cell_type": "code",
172 |    "execution_count": 5,
173 |    "id": "a5f2a529-d4c0-4f96-aee8-37321a7b348c",
174 |    "metadata": {},
175 |    "outputs": [
176 |     {
177 |      "data": {
178 |       "text/html": [
179 |        "<div>\n",
180 |        "<style scoped>\n",
181 |        "    .dataframe tbody tr th:only-of-type {\n",
182 |        "        vertical-align: middle;\n",
183 |        "    }\n",
184 |        "\n",
185 |        "    .dataframe tbody tr th {\n",
186 |        "        vertical-align: top;\n",
187 |        "    }\n",
188 |        "\n",
189 |        "    .dataframe thead th {\n",
190 |        "        text-align: right;\n",
191 |        "    }\n",
192 |        "</style>\n",
193 |        "<table border=\"1\" class=\"dataframe\">\n",
194 |        "  <thead>\n",
195 |        "    <tr style=\"text-align: right;\">\n",
196 |        "      <th></th>\n",
197 |        "      <th>gene_ids</th>\n",
198 |        "    </tr>\n",
199 |        "    <tr>\n",
200 |        "      <th>index</th>\n",
201 |        "      <th></th>\n",
202 |        "    </tr>\n",
203 |        "  </thead>\n",
204 |        "  <tbody>\n",
205 |        "    <tr>\n",
206 |        "      <th>MIR1302-10</th>\n",
207 |        "      <td>ENSG00000243485</td>\n",
208 |        "    </tr>\n",
209 |        "    <tr>\n",
210 |        "      <th>FAM138A</th>\n",
211 |        "      <td>ENSG00000237613</td>\n",
212 |        "    </tr>\n",
213 |        "    <tr>\n",
214 |        "      <th>OR4F5</th>\n",
215 |        "      <td>ENSG00000186092</td>\n",
216 |        "    </tr>\n",
217 |        "    <tr>\n",
218 |        "      <th>RP11-34P13.7</th>\n",
219 |        "      <td>ENSG00000238009</td>\n",
220 |        "    </tr>\n",
221 |        "    <tr>\n",
222 |        "      <th>RP11-34P13.8</th>\n",
223 |        "      <td>ENSG00000239945</td>\n",
224 |        "    </tr>\n",
225 |        "  </tbody>\n",
226 |        "</table>\n",
227 |        "</div>"
228 |       ],
229 |       "text/plain": [
230 |        "                     gene_ids\n",
231 |        "index                        \n",
232 |        "MIR1302-10    ENSG00000243485\n",
233 |        "FAM138A       ENSG00000237613\n",
234 |        "OR4F5         ENSG00000186092\n",
235 |        "RP11-34P13.7  ENSG00000238009\n",
236 |        "RP11-34P13.8  ENSG00000239945"
237 |       ]
238 |      },
239 |      "execution_count": 5,
240 |      "metadata": {},
241 |      "output_type": "execute_result"
242 |     }
243 |    ],
244 |    "source": [
245 |     "adata.var.head()"
246 |    ]
247 |   },
248 |   {
249 |    "cell_type": "markdown",
250 |    "id": "0f0b58f8-c2a9-4164-83a4-1517b27a4380",
251 |    "metadata": {},
252 |    "source": [
253 |     "Let's retrieve a list of transcripts for each gene using the [MyGene.info API](https://docs.mygene.info/en/latest/). "
254 |    ]
255 |   },
256 |   {
257 |    "cell_type": "code",
258 |    "execution_count": 6,
259 |    "id": "17614e98-5dee-486e-a870-958fbb060644",
260 |    "metadata": {},
261 |    "outputs": [],
262 |    "source": [
263 |     "mygene = get_biothings_client(\"gene\")"
264 |    ]
265 |   },
266 |   {
267 |    "cell_type": "code",
268 |    "execution_count": 7,
269 |    "id": "bb5c7d5d-6b18-450b-a70f-dc3e34d10893",
270 |    "metadata": {},
271 |    "outputs": [],
272 |    "source": [
273 |     "%%capture\n",
274 |     "mygene_res = mygene.querymany(\n",
275 |     "    adata.var[\"gene_ids\"],\n",
276 |     "    scopes=[\"ensembl.gene\"],\n",
277 |     "    fields=[\"ensembl.transcript\"],\n",
278 |     "    species=\"human\",\n",
279 |     "    as_dataframe=True,\n",
280 |     ")\n",
281 |     "# remove duplicated results\n",
282 |     "mygene_res = mygene_res.loc[~mygene_res.index.duplicated()]\n",
283 |     "assert (\n",
284 |     "    adata.var[\"gene_ids\"].tolist() == mygene_res.index.tolist()\n",
285 |     "), \"Order of genes does not match\""
286 |    ]
287 |   },
288 |   {
289 |    "cell_type": "markdown",
290 |    "id": "81992a77-9285-4521-a32c-7b7caa65152d",
291 |    "metadata": {},
292 |    "source": [
293 |     "The API call returns a data frame with transcripts in the `ensembl.transcript` key:"
294 |    ]
295 |   },
296 |   {
297 |    "cell_type": "code",
298 |    "execution_count": 8,
299 |    "id": "ed0677b8-0c79-4198-ab90-15ee9d919ec7",
300 |    "metadata": {},
301 |    "outputs": [
302 |     {
303 |      "data": {
304 |       "text/html": [
305 |        "<div>\n",
306 |        "<style scoped>\n",
307 |        "    .dataframe tbody tr th:only-of-type {\n",
308 |        "        vertical-align: middle;\n",
309 |        "    }\n",
310 |        "\n",
311 |        "    .dataframe tbody tr th {\n",
312 |        "        vertical-align: top;\n",
313 |        "    }\n",
314 |        "\n",
315 |        "    .dataframe thead th {\n",
316 |        "        text-align: right;\n",
317 |        "    }\n",
318 |        "</style>\n",
319 |        "<table border=\"1\" class=\"dataframe\">\n",
320 |        "  <thead>\n",
321 |        "    <tr style=\"text-align: right;\">\n",
322 |        "      <th></th>\n",
323 |        "      <th>_id</th>\n",
324 |        "      <th>_score</th>\n",
325 |        "      <th>ensembl.transcript</th>\n",
326 |        "      <th>notfound</th>\n",
327 |        "      <th>ensembl</th>\n",
328 |        "    </tr>\n",
329 |        "    <tr>\n",
330 |        "      <th>query</th>\n",
331 |        "      <th></th>\n",
332 |        "      <th></th>\n",
333 |        "      <th></th>\n",
334 |        "      <th></th>\n",
335 |        "      <th></th>\n",
336 |        "    </tr>\n",
337 |        "  </thead>\n",
338 |        "  <tbody>\n",
339 |        "    <tr>\n",
340 |        "      <th>ENSG00000243485</th>\n",
341 |        "      <td>ENSG00000243485</td>\n",
342 |        "      <td>25.719067</td>\n",
343 |        "      <td>[ENST00000469289, ENST00000473358]</td>\n",
344 |        "      <td>NaN</td>\n",
345 |        "      <td>NaN</td>\n",
346 |        "    </tr>\n",
347 |        "    <tr>\n",
348 |        "      <th>ENSG00000237613</th>\n",
349 |        "      <td>645520</td>\n",
350 |        "      <td>25.719007</td>\n",
351 |        "      <td>[ENST00000417324, ENST00000461467]</td>\n",
352 |        "      <td>NaN</td>\n",
353 |        "      <td>NaN</td>\n",
354 |        "    </tr>\n",
355 |        "    <tr>\n",
356 |        "      <th>ENSG00000186092</th>\n",
357 |        "      <td>79501</td>\n",
358 |        "      <td>24.912605</td>\n",
359 |        "      <td>ENST00000641515</td>\n",
360 |        "      <td>NaN</td>\n",
361 |        "      <td>NaN</td>\n",
362 |        "    </tr>\n",
363 |        "    <tr>\n",
364 |        "      <th>ENSG00000238009</th>\n",
365 |        "      <td>ENSG00000238009</td>\n",
366 |        "      <td>25.719582</td>\n",
367 |        "      <td>[ENST00000453576, ENST00000466430, ENST0000047...</td>\n",
368 |        "      <td>NaN</td>\n",
369 |        "      <td>NaN</td>\n",
370 |        "    </tr>\n",
371 |        "    <tr>\n",
372 |        "      <th>ENSG00000239945</th>\n",
373 |        "      <td>ENSG00000239945</td>\n",
374 |        "      <td>25.719145</td>\n",
375 |        "      <td>ENST00000495576</td>\n",
376 |        "      <td>NaN</td>\n",
377 |        "      <td>NaN</td>\n",
378 |        "    </tr>\n",
379 |        "  </tbody>\n",
380 |        "</table>\n",
381 |        "</div>"
382 |       ],
383 |       "text/plain": [
384 |        "                             _id     _score  \\\n",
385 |        "query                                         \n",
386 |        "ENSG00000243485  ENSG00000243485  25.719067   \n",
387 |        "ENSG00000237613           645520  25.719007   \n",
388 |        "ENSG00000186092            79501  24.912605   \n",
389 |        "ENSG00000238009  ENSG00000238009  25.719582   \n",
390 |        "ENSG00000239945  ENSG00000239945  25.719145   \n",
391 |        "\n",
392 |        "                                                ensembl.transcript notfound  \\\n",
393 |        "query                                                                         \n",
394 |        "ENSG00000243485                 [ENST00000469289, ENST00000473358]      NaN   \n",
395 |        "ENSG00000237613                 [ENST00000417324, ENST00000461467]      NaN   \n",
396 |        "ENSG00000186092                                    ENST00000641515      NaN   \n",
397 |        "ENSG00000238009  [ENST00000453576, ENST00000466430, ENST0000047...      NaN   \n",
398 |        "ENSG00000239945                                    ENST00000495576      NaN   \n",
399 |        "\n",
400 |        "                ensembl  \n",
401 |        "query                    \n",
402 |        "ENSG00000243485     NaN  \n",
403 |        "ENSG00000237613     NaN  \n",
404 |        "ENSG00000186092     NaN  \n",
405 |        "ENSG00000238009     NaN  \n",
406 |        "ENSG00000239945     NaN  "
407 |       ]
408 |      },
409 |      "execution_count": 8,
410 |      "metadata": {},
411 |      "output_type": "execute_result"
412 |     }
413 |    ],
414 |    "source": [
415 |     "mygene_res.head()"
416 |    ]
417 |   },
418 |   {
419 |    "cell_type": "markdown",
420 |    "id": "0d27efeb-7a70-4fa0-b8d9-f465de933314",
421 |    "metadata": {},
422 |    "source": [
423 |     "Let's construct an awkward Array from the list of dictionaries and \n",
424 |     "assign the ragged list of transcripts to `adata.varm`"
425 |    ]
426 |   },
427 |   {
428 |    "cell_type": "code",
429 |    "execution_count": 9,
430 |    "id": "b6cc25e3-44d6-4515-abe5-3b8ba4c8ab45",
431 |    "metadata": {},
432 |    "outputs": [
433 |     {
434 |      "name": "stderr",
435 |      "output_type": "stream",
436 |      "text": [
437 |       "/home/sturm/projects/2022/anndata/anndata/_core/aligned_mapping.py:54: ExperimentalFeatureWarning: Support for Awkward Arrays is currently experimental. Behavior may change in the future. Please report any issues you may encounter!\n",
438 |       "  warnings.warn(\n"
439 |      ]
440 |     }
441 |    ],
442 |    "source": [
443 |     "adata.varm[\"transcripts\"] = ak.Array(mygene_res[\"ensembl.transcript\"])"
444 |    ]
445 |   },
446 |   {
447 |    "cell_type": "markdown",
448 |    "id": "69f742f9-cf4d-4127-81ae-c0751acf70bd",
449 |    "metadata": {},
450 |    "source": [
451 |     "We can now access transcripts of individual genes by slicing the AnnData object:"
452 |    ]
453 |   },
454 |   {
455 |    "cell_type": "code",
456 |    "execution_count": 10,
457 |    "id": "1f6ff2d1-9260-44c0-87ee-febf17b1e851",
458 |    "metadata": {},
459 |    "outputs": [
460 |     {
461 |      "data": {
462 |       "text/html": [
463 |        "<pre>[[&#x27;ENST00000283635&#x27;, &#x27;ENST00000352580&#x27;, ..., &#x27;ENST00000699439&#x27;],\n",
464 |        " [&#x27;ENST00000286758&#x27;, &#x27;ENST00000506590&#x27;, &#x27;ENST00000682537&#x27;]]\n",
465 |        "----------------------------------------------------------------------------------------------------------------\n",
466 |        "type: 2 * union[\n",
467 |        "    var * string,\n",
468 |        "    string,\n",
469 |        "    float64, \n",
470 |        "parameters={&quot;_view_args&quot;: [&quot;target-140242149270096&quot;, &quot;varm&quot;, [&quot;transcripts&quot;]], &quot;__array__&quot;: &quot;AwkwardArrayView&quot;}]</pre>"
471 |       ],
472 |       "text/plain": [
473 |        "<AwkwardArrayView [['ENST00000283635', ...], [...]] type='2 * union[var * s...'>"
474 |       ]
475 |      },
476 |      "execution_count": 10,
477 |      "metadata": {},
478 |      "output_type": "execute_result"
479 |     }
480 |    ],
481 |    "source": [
482 |     "adata[:, [\"CD8A\", \"CXCL13\"]].varm[\"transcripts\"]"
483 |    ]
484 |   }
485 |  ],
486 |  "metadata": {
487 |   "kernelspec": {
488 |    "display_name": "Python [conda env:scirpy_dev]",
489 |    "language": "python",
490 |    "name": "conda-env-scirpy_dev-py"
491 |   },
492 |   "language_info": {
493 |    "codemirror_mode": {
494 |     "name": "ipython",
495 |     "version": 3
496 |    },
497 |    "file_extension": ".py",
498 |    "mimetype": "text/x-python",
499 |    "name": "python",
500 |    "nbconvert_exporter": "python",
501 |    "pygments_lexer": "ipython3",
502 |    "version": "3.9.9"
503 |   }
504 |  },
505 |  "nbformat": 4,
506 |  "nbformat_minor": 5
507 | }
508 | 


--------------------------------------------------------------------------------
/conf.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime
 2 | project = 'anndata'
 3 | author = 'anndata developers'
 4 | copyright = f'{datetime.now():%Y}, {author}'
 5 | 
 6 | version = ''
 7 | release = version
 8 | 
 9 | extensions = [
10 |     'nbsphinx',
11 | ]
12 | 
13 | templates_path = ['_templates']
14 | html_static_path = ["_static"]
15 | source_suffix = '.rst'
16 | master_doc = 'index'
17 | language = None
18 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store', '**.ipynb_checkpoints']
19 | pygments_style = 'sphinx'
20 | 
21 | # -- Options for HTML output ----------------------------------------------
22 | 
23 | html_theme = 'scanpydoc'
24 | html_theme_options = dict(navigation_depth=4)
25 | html_context = dict(
26 |     display_github=True,      # Integrate GitHub
27 |     github_user='theislab',   # Username
28 |     github_repo='anndata-tutorials',     # Repo name
29 |     github_version='master',  # Version
30 |     conf_py_path='/',    # Path in the checkout to the docs root
31 | )
32 | html_show_sphinx = False
33 | 
34 | 
35 | # -- Strip output ----------------------------------------------
36 | 
37 | # import nbclean, glob
38 | # for filename in glob.glob('**/*.ipynb', recursive=True):
39 | #     ntbk = nbclean.NotebookCleaner(filename)
40 | #     ntbk.clear('stderr')
41 | #     ntbk.save(filename)
42 | 


--------------------------------------------------------------------------------
/getting-started.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "markdown",
   5 |    "metadata": {},
   6 |    "source": [
   7 |     "# Getting started with anndata"
   8 |    ]
   9 |   },
  10 |   {
  11 |    "cell_type": "markdown",
  12 |    "metadata": {},
  13 |    "source": [
  14 |     "**Authors:** [Adam Gayoso](https://twitter.com/adamgayoso), [Alex Wolf](https://twitter.com/falexwolf)"
  15 |    ]
  16 |   },
  17 |   {
  18 |    "cell_type": "markdown",
  19 |    "metadata": {},
  20 |    "source": [
  21 |     ":::{note}\n",
  22 |     "This tutorial is based on a blog posts by [Adam in 2021](https://adamgayoso.com/posts/ten_min_to_adata/) and [Alex in 2017](https://falexwolf.me/2017/introducing-anndata/).    \n",
  23 |     ":::"
  24 |    ]
  25 |   },
  26 |   {
  27 |    "cell_type": "markdown",
  28 |    "metadata": {},
  29 |    "source": [
  30 |     "In this tutorial, we introduce basic properties of the central object, [AnnData](http://anndata.readthedocs.io/en/latest/anndata.AnnData.html) (\"Annotated Data\").\n",
  31 |     "\n",
  32 |     "`AnnData` is specifically designed for matrix-like data. By this we mean that we have $n$ observations, each of which can be represented as $d$-dimensional vectors, where each dimension corresponds to a variable or feature. Both the rows and columns of this $n \\times d$ matrix are special in the sense that they are indexed.\n",
  33 |     "\n",
  34 |     "For instance, in scRNA-seq data, each row corresponds to a cell with a barcode, and each column corresponds to a gene with a gene id. Furthermore, for each cell and each gene we might have additional metadata, like (1) donor information for each cell, or (2) alternative gene symbols for each gene. Finally, we might have other unstructured metadata like color palletes to use for plotting. Without going into every fancy Python-based data structure, we think that still today no other alternative really exists that:\n",
  35 |     "\n",
  36 |     "* Handles sparsity\n",
  37 |     "* Handles unstructured data\n",
  38 |     "* Handles observation- and feature-level metadata\n",
  39 |     "* Is user-friendly"
  40 |    ]
  41 |   },
  42 |   {
  43 |    "cell_type": "code",
  44 |    "execution_count": 1,
  45 |    "metadata": {
  46 |     "jupyter": {
  47 |      "outputs_hidden": false
  48 |     }
  49 |    },
  50 |    "outputs": [
  51 |     {
  52 |      "name": "stdout",
  53 |      "output_type": "stream",
  54 |      "text": [
  55 |       "0.8.0\n"
  56 |      ]
  57 |     }
  58 |    ],
  59 |    "source": [
  60 |     "import numpy as np\n",
  61 |     "import pandas as pd\n",
  62 |     "import anndata as ad\n",
  63 |     "from scipy.sparse import csr_matrix\n",
  64 |     "print(ad.__version__)"
  65 |    ]
  66 |   },
  67 |   {
  68 |    "cell_type": "markdown",
  69 |    "metadata": {},
  70 |    "source": [
  71 |     "## Initializing AnnData"
  72 |    ]
  73 |   },
  74 |   {
  75 |    "cell_type": "markdown",
  76 |    "metadata": {},
  77 |    "source": [
  78 |     "Let's start by building a basic AnnData object with some sparse count information, perhaps representing gene expression counts."
  79 |    ]
  80 |   },
  81 |   {
  82 |    "cell_type": "code",
  83 |    "execution_count": 2,
  84 |    "metadata": {},
  85 |    "outputs": [
  86 |     {
  87 |      "data": {
  88 |       "text/plain": [
  89 |        "AnnData object with n_obs × n_vars = 100 × 2000"
  90 |       ]
  91 |      },
  92 |      "execution_count": 2,
  93 |      "metadata": {},
  94 |      "output_type": "execute_result"
  95 |     }
  96 |    ],
  97 |    "source": [
  98 |     "counts = csr_matrix(np.random.poisson(1, size=(100, 2000)), dtype=np.float32)\n",
  99 |     "adata = ad.AnnData(counts)\n",
 100 |     "adata"
 101 |    ]
 102 |   },
 103 |   {
 104 |    "cell_type": "markdown",
 105 |    "metadata": {},
 106 |    "source": [
 107 |     "We see that AnnData provides a representation with summary stastics of the data The initial data we passed are accessible as a sparse matrix using `adata.X`."
 108 |    ]
 109 |   },
 110 |   {
 111 |    "cell_type": "code",
 112 |    "execution_count": 3,
 113 |    "metadata": {},
 114 |    "outputs": [
 115 |     {
 116 |      "data": {
 117 |       "text/plain": [
 118 |        "<100x2000 sparse matrix of type '<class 'numpy.float32'>'\n",
 119 |        "\twith 126526 stored elements in Compressed Sparse Row format>"
 120 |       ]
 121 |      },
 122 |      "execution_count": 3,
 123 |      "metadata": {},
 124 |      "output_type": "execute_result"
 125 |     }
 126 |    ],
 127 |    "source": [
 128 |     "adata.X"
 129 |    ]
 130 |   },
 131 |   {
 132 |    "cell_type": "markdown",
 133 |    "metadata": {},
 134 |    "source": [
 135 |     "Now, we provide the index to both the `obs` and `var` axes using `.obs_names` (resp. `.var_names`)."
 136 |    ]
 137 |   },
 138 |   {
 139 |    "cell_type": "code",
 140 |    "execution_count": 4,
 141 |    "metadata": {},
 142 |    "outputs": [
 143 |     {
 144 |      "name": "stdout",
 145 |      "output_type": "stream",
 146 |      "text": [
 147 |       "Index(['Cell_0', 'Cell_1', 'Cell_2', 'Cell_3', 'Cell_4', 'Cell_5', 'Cell_6',\n",
 148 |       "       'Cell_7', 'Cell_8', 'Cell_9'],\n",
 149 |       "      dtype='object')\n"
 150 |      ]
 151 |     }
 152 |    ],
 153 |    "source": [
 154 |     "adata.obs_names = [f\"Cell_{i:d}\" for i in range(adata.n_obs)]\n",
 155 |     "adata.var_names = [f\"Gene_{i:d}\" for i in range(adata.n_vars)]\n",
 156 |     "print(adata.obs_names[:10])"
 157 |    ]
 158 |   },
 159 |   {
 160 |    "cell_type": "markdown",
 161 |    "metadata": {},
 162 |    "source": [
 163 |     "### Subsetting AnnData"
 164 |    ]
 165 |   },
 166 |   {
 167 |    "cell_type": "markdown",
 168 |    "metadata": {},
 169 |    "source": [
 170 |     "These index values can be used to subset the AnnData, which provides a view of the AnnData object. We can imagine this to be useful to subset the AnnData to particular cell types or gene modules of interest. The rules for subsetting AnnData are quite similar to that of a Pandas DataFrame. You can use values in the `obs/var_names`, boolean masks, or cell index integers."
 171 |    ]
 172 |   },
 173 |   {
 174 |    "cell_type": "code",
 175 |    "execution_count": 5,
 176 |    "metadata": {},
 177 |    "outputs": [
 178 |     {
 179 |      "data": {
 180 |       "text/plain": [
 181 |        "View of AnnData object with n_obs × n_vars = 2 × 2"
 182 |       ]
 183 |      },
 184 |      "execution_count": 5,
 185 |      "metadata": {},
 186 |      "output_type": "execute_result"
 187 |     }
 188 |    ],
 189 |    "source": [
 190 |     "adata[[\"Cell_1\", \"Cell_10\"], [\"Gene_5\", \"Gene_1900\"]]"
 191 |    ]
 192 |   },
 193 |   {
 194 |    "cell_type": "markdown",
 195 |    "metadata": {},
 196 |    "source": [
 197 |     "## Adding aligned metadata\n",
 198 |     "\n",
 199 |     "### Observation/Variable level"
 200 |    ]
 201 |   },
 202 |   {
 203 |    "cell_type": "markdown",
 204 |    "metadata": {},
 205 |    "source": [
 206 |     "So we have the core of our object and now we'd like to add metadata at both the observation and variable levels. This is pretty simple with AnnData, both `adata.obs` and `adata.var` are Pandas DataFrames."
 207 |    ]
 208 |   },
 209 |   {
 210 |    "cell_type": "code",
 211 |    "execution_count": 6,
 212 |    "metadata": {},
 213 |    "outputs": [
 214 |     {
 215 |      "data": {
 216 |       "text/html": [
 217 |        "<div>\n",
 218 |        "<style scoped>\n",
 219 |        "    .dataframe tbody tr th:only-of-type {\n",
 220 |        "        vertical-align: middle;\n",
 221 |        "    }\n",
 222 |        "\n",
 223 |        "    .dataframe tbody tr th {\n",
 224 |        "        vertical-align: top;\n",
 225 |        "    }\n",
 226 |        "\n",
 227 |        "    .dataframe thead th {\n",
 228 |        "        text-align: right;\n",
 229 |        "    }\n",
 230 |        "</style>\n",
 231 |        "<table border=\"1\" class=\"dataframe\">\n",
 232 |        "  <thead>\n",
 233 |        "    <tr style=\"text-align: right;\">\n",
 234 |        "      <th></th>\n",
 235 |        "      <th>cell_type</th>\n",
 236 |        "    </tr>\n",
 237 |        "  </thead>\n",
 238 |        "  <tbody>\n",
 239 |        "    <tr>\n",
 240 |        "      <th>Cell_0</th>\n",
 241 |        "      <td>B</td>\n",
 242 |        "    </tr>\n",
 243 |        "    <tr>\n",
 244 |        "      <th>Cell_1</th>\n",
 245 |        "      <td>B</td>\n",
 246 |        "    </tr>\n",
 247 |        "    <tr>\n",
 248 |        "      <th>Cell_2</th>\n",
 249 |        "      <td>B</td>\n",
 250 |        "    </tr>\n",
 251 |        "    <tr>\n",
 252 |        "      <th>Cell_3</th>\n",
 253 |        "      <td>Monocyte</td>\n",
 254 |        "    </tr>\n",
 255 |        "    <tr>\n",
 256 |        "      <th>Cell_4</th>\n",
 257 |        "      <td>Monocyte</td>\n",
 258 |        "    </tr>\n",
 259 |        "    <tr>\n",
 260 |        "      <th>...</th>\n",
 261 |        "      <td>...</td>\n",
 262 |        "    </tr>\n",
 263 |        "    <tr>\n",
 264 |        "      <th>Cell_95</th>\n",
 265 |        "      <td>Monocyte</td>\n",
 266 |        "    </tr>\n",
 267 |        "    <tr>\n",
 268 |        "      <th>Cell_96</th>\n",
 269 |        "      <td>B</td>\n",
 270 |        "    </tr>\n",
 271 |        "    <tr>\n",
 272 |        "      <th>Cell_97</th>\n",
 273 |        "      <td>Monocyte</td>\n",
 274 |        "    </tr>\n",
 275 |        "    <tr>\n",
 276 |        "      <th>Cell_98</th>\n",
 277 |        "      <td>B</td>\n",
 278 |        "    </tr>\n",
 279 |        "    <tr>\n",
 280 |        "      <th>Cell_99</th>\n",
 281 |        "      <td>T</td>\n",
 282 |        "    </tr>\n",
 283 |        "  </tbody>\n",
 284 |        "</table>\n",
 285 |        "<p>100 rows × 1 columns</p>\n",
 286 |        "</div>"
 287 |       ],
 288 |       "text/plain": [
 289 |        "        cell_type\n",
 290 |        "Cell_0          B\n",
 291 |        "Cell_1          B\n",
 292 |        "Cell_2          B\n",
 293 |        "Cell_3   Monocyte\n",
 294 |        "Cell_4   Monocyte\n",
 295 |        "...           ...\n",
 296 |        "Cell_95  Monocyte\n",
 297 |        "Cell_96         B\n",
 298 |        "Cell_97  Monocyte\n",
 299 |        "Cell_98         B\n",
 300 |        "Cell_99         T\n",
 301 |        "\n",
 302 |        "[100 rows x 1 columns]"
 303 |       ]
 304 |      },
 305 |      "execution_count": 6,
 306 |      "metadata": {},
 307 |      "output_type": "execute_result"
 308 |     }
 309 |    ],
 310 |    "source": [
 311 |     "ct = np.random.choice([\"B\", \"T\", \"Monocyte\"], size=(adata.n_obs,))\n",
 312 |     "adata.obs[\"cell_type\"] = pd.Categorical(ct)  # Categoricals are preferred for efficiency\n",
 313 |     "adata.obs"
 314 |    ]
 315 |   },
 316 |   {
 317 |    "cell_type": "markdown",
 318 |    "metadata": {},
 319 |    "source": [
 320 |     "We can also see now that the AnnData representation has been updated:"
 321 |    ]
 322 |   },
 323 |   {
 324 |    "cell_type": "code",
 325 |    "execution_count": 7,
 326 |    "metadata": {},
 327 |    "outputs": [
 328 |     {
 329 |      "data": {
 330 |       "text/plain": [
 331 |        "AnnData object with n_obs × n_vars = 100 × 2000\n",
 332 |        "    obs: 'cell_type'"
 333 |       ]
 334 |      },
 335 |      "execution_count": 7,
 336 |      "metadata": {},
 337 |      "output_type": "execute_result"
 338 |     }
 339 |    ],
 340 |    "source": [
 341 |     "adata"
 342 |    ]
 343 |   },
 344 |   {
 345 |    "cell_type": "markdown",
 346 |    "metadata": {},
 347 |    "source": [
 348 |     "### Subsetting using metadata"
 349 |    ]
 350 |   },
 351 |   {
 352 |    "cell_type": "markdown",
 353 |    "metadata": {},
 354 |    "source": [
 355 |     "We can also subset the AnnData using these randomly generated cell types:"
 356 |    ]
 357 |   },
 358 |   {
 359 |    "cell_type": "code",
 360 |    "execution_count": 8,
 361 |    "metadata": {},
 362 |    "outputs": [
 363 |     {
 364 |      "data": {
 365 |       "text/plain": [
 366 |        "View of AnnData object with n_obs × n_vars = 26 × 2000\n",
 367 |        "    obs: 'cell_type'"
 368 |       ]
 369 |      },
 370 |      "execution_count": 8,
 371 |      "metadata": {},
 372 |      "output_type": "execute_result"
 373 |     }
 374 |    ],
 375 |    "source": [
 376 |     "bdata = adata[adata.obs.cell_type == \"B\"]\n",
 377 |     "bdata"
 378 |    ]
 379 |   },
 380 |   {
 381 |    "cell_type": "markdown",
 382 |    "metadata": {},
 383 |    "source": [
 384 |     "## Observation/variable-level matrices"
 385 |    ]
 386 |   },
 387 |   {
 388 |    "cell_type": "markdown",
 389 |    "metadata": {},
 390 |    "source": [
 391 |     "We might also have metadata at either level that has many dimensions to it, such as a UMAP embedding of the data. For this type of metadata, AnnData has the `.obsm/.varm` attributes. We use keys to identify the different matrices we insert. The restriction of `.obsm/.varm` are that `.obsm` matrices must length equal to the number of observations as `.n_obs` and `.varm` matrices must length equal to `.n_vars`. They can each independently have different number of dimensions.\n",
 392 |     "\n",
 393 |     "Let's start with a randomly generated matrix that we can interpret as a UMAP embedding of the data we'd like to store, as well as some random gene-level metadata:"
 394 |    ]
 395 |   },
 396 |   {
 397 |    "cell_type": "code",
 398 |    "execution_count": 9,
 399 |    "metadata": {},
 400 |    "outputs": [
 401 |     {
 402 |      "data": {
 403 |       "text/plain": [
 404 |        "AxisArrays with keys: X_umap"
 405 |       ]
 406 |      },
 407 |      "execution_count": 9,
 408 |      "metadata": {},
 409 |      "output_type": "execute_result"
 410 |     }
 411 |    ],
 412 |    "source": [
 413 |     "adata.obsm[\"X_umap\"] = np.random.normal(0, 1, size=(adata.n_obs, 2))\n",
 414 |     "adata.varm[\"gene_stuff\"] = np.random.normal(0, 1, size=(adata.n_vars, 5))\n",
 415 |     "adata.obsm"
 416 |    ]
 417 |   },
 418 |   {
 419 |    "cell_type": "markdown",
 420 |    "metadata": {},
 421 |    "source": [
 422 |     "Again, the AnnData representation is updated."
 423 |    ]
 424 |   },
 425 |   {
 426 |    "cell_type": "code",
 427 |    "execution_count": 10,
 428 |    "metadata": {},
 429 |    "outputs": [
 430 |     {
 431 |      "data": {
 432 |       "text/plain": [
 433 |        "AnnData object with n_obs × n_vars = 100 × 2000\n",
 434 |        "    obs: 'cell_type'\n",
 435 |        "    obsm: 'X_umap'\n",
 436 |        "    varm: 'gene_stuff'"
 437 |       ]
 438 |      },
 439 |      "execution_count": 10,
 440 |      "metadata": {},
 441 |      "output_type": "execute_result"
 442 |     }
 443 |    ],
 444 |    "source": [
 445 |     "adata"
 446 |    ]
 447 |   },
 448 |   {
 449 |    "cell_type": "markdown",
 450 |    "metadata": {},
 451 |    "source": [
 452 |     "A few more notes about `.obsm/.varm`\n",
 453 |     "\n",
 454 |     "1. The \"array-like\" metadata can originate from a Pandas DataFrame, scipy sparse matrix, or numpy dense array.\n",
 455 |     "2. When using scanpy, their values (columns) are not easily plotted, where instead items from `.obs` are easily plotted on, e.g., UMAP plots."
 456 |    ]
 457 |   },
 458 |   {
 459 |    "cell_type": "markdown",
 460 |    "metadata": {},
 461 |    "source": [
 462 |     "## Unstructured metadata"
 463 |    ]
 464 |   },
 465 |   {
 466 |    "cell_type": "markdown",
 467 |    "metadata": {},
 468 |    "source": [
 469 |     "AnnData has `.uns`, which allows for any unstructured metadata. This can be anything, like a list or a dictionary with some general information that was useful in the analysis of our data."
 470 |    ]
 471 |   },
 472 |   {
 473 |    "cell_type": "code",
 474 |    "execution_count": 11,
 475 |    "metadata": {},
 476 |    "outputs": [
 477 |     {
 478 |      "data": {
 479 |       "text/plain": [
 480 |        "OverloadedDict, wrapping:\n",
 481 |        "\tOrderedDict([('random', [1, 2, 3])])\n",
 482 |        "With overloaded keys:\n",
 483 |        "\t['neighbors']."
 484 |       ]
 485 |      },
 486 |      "execution_count": 11,
 487 |      "metadata": {},
 488 |      "output_type": "execute_result"
 489 |     }
 490 |    ],
 491 |    "source": [
 492 |     "adata.uns[\"random\"] = [1, 2, 3]\n",
 493 |     "adata.uns"
 494 |    ]
 495 |   },
 496 |   {
 497 |    "cell_type": "markdown",
 498 |    "metadata": {},
 499 |    "source": [
 500 |     "## Layers"
 501 |    ]
 502 |   },
 503 |   {
 504 |    "cell_type": "markdown",
 505 |    "metadata": {},
 506 |    "source": [
 507 |     "Finally, we may have different forms of our original core data, perhaps one that is normalized and one that is not. These can be stored in different layers in AnnData. For example, let's log transform the original data and store it in a layer:"
 508 |    ]
 509 |   },
 510 |   {
 511 |    "cell_type": "code",
 512 |    "execution_count": 12,
 513 |    "metadata": {},
 514 |    "outputs": [
 515 |     {
 516 |      "data": {
 517 |       "text/plain": [
 518 |        "AnnData object with n_obs × n_vars = 100 × 2000\n",
 519 |        "    obs: 'cell_type'\n",
 520 |        "    uns: 'random'\n",
 521 |        "    obsm: 'X_umap'\n",
 522 |        "    varm: 'gene_stuff'\n",
 523 |        "    layers: 'log_transformed'"
 524 |       ]
 525 |      },
 526 |      "execution_count": 12,
 527 |      "metadata": {},
 528 |      "output_type": "execute_result"
 529 |     }
 530 |    ],
 531 |    "source": [
 532 |     "adata.layers[\"log_transformed\"] = np.log1p(adata.X)\n",
 533 |     "adata"
 534 |    ]
 535 |   },
 536 |   {
 537 |    "cell_type": "markdown",
 538 |    "metadata": {},
 539 |    "source": [
 540 |     "## Conversion to DataFrames"
 541 |    ]
 542 |   },
 543 |   {
 544 |    "cell_type": "markdown",
 545 |    "metadata": {},
 546 |    "source": [
 547 |     "We can also ask AnnData to return us a DataFrame from one of the layers:"
 548 |    ]
 549 |   },
 550 |   {
 551 |    "cell_type": "code",
 552 |    "execution_count": 13,
 553 |    "metadata": {},
 554 |    "outputs": [
 555 |     {
 556 |      "data": {
 557 |       "text/html": [
 558 |        "<div>\n",
 559 |        "<style scoped>\n",
 560 |        "    .dataframe tbody tr th:only-of-type {\n",
 561 |        "        vertical-align: middle;\n",
 562 |        "    }\n",
 563 |        "\n",
 564 |        "    .dataframe tbody tr th {\n",
 565 |        "        vertical-align: top;\n",
 566 |        "    }\n",
 567 |        "\n",
 568 |        "    .dataframe thead th {\n",
 569 |        "        text-align: right;\n",
 570 |        "    }\n",
 571 |        "</style>\n",
 572 |        "<table border=\"1\" class=\"dataframe\">\n",
 573 |        "  <thead>\n",
 574 |        "    <tr style=\"text-align: right;\">\n",
 575 |        "      <th></th>\n",
 576 |        "      <th>Gene_0</th>\n",
 577 |        "      <th>Gene_1</th>\n",
 578 |        "      <th>Gene_2</th>\n",
 579 |        "      <th>Gene_3</th>\n",
 580 |        "      <th>Gene_4</th>\n",
 581 |        "      <th>Gene_5</th>\n",
 582 |        "      <th>Gene_6</th>\n",
 583 |        "      <th>Gene_7</th>\n",
 584 |        "      <th>Gene_8</th>\n",
 585 |        "      <th>Gene_9</th>\n",
 586 |        "      <th>...</th>\n",
 587 |        "      <th>Gene_1990</th>\n",
 588 |        "      <th>Gene_1991</th>\n",
 589 |        "      <th>Gene_1992</th>\n",
 590 |        "      <th>Gene_1993</th>\n",
 591 |        "      <th>Gene_1994</th>\n",
 592 |        "      <th>Gene_1995</th>\n",
 593 |        "      <th>Gene_1996</th>\n",
 594 |        "      <th>Gene_1997</th>\n",
 595 |        "      <th>Gene_1998</th>\n",
 596 |        "      <th>Gene_1999</th>\n",
 597 |        "    </tr>\n",
 598 |        "  </thead>\n",
 599 |        "  <tbody>\n",
 600 |        "    <tr>\n",
 601 |        "      <th>Cell_0</th>\n",
 602 |        "      <td>1.098612</td>\n",
 603 |        "      <td>0.693147</td>\n",
 604 |        "      <td>0.000000</td>\n",
 605 |        "      <td>0.693147</td>\n",
 606 |        "      <td>0.693147</td>\n",
 607 |        "      <td>0.000000</td>\n",
 608 |        "      <td>1.386294</td>\n",
 609 |        "      <td>0.693147</td>\n",
 610 |        "      <td>0.693147</td>\n",
 611 |        "      <td>0.693147</td>\n",
 612 |        "      <td>...</td>\n",
 613 |        "      <td>1.098612</td>\n",
 614 |        "      <td>0.000000</td>\n",
 615 |        "      <td>0.693147</td>\n",
 616 |        "      <td>0.000000</td>\n",
 617 |        "      <td>0.000000</td>\n",
 618 |        "      <td>0.693147</td>\n",
 619 |        "      <td>0.693147</td>\n",
 620 |        "      <td>0.000000</td>\n",
 621 |        "      <td>1.098612</td>\n",
 622 |        "      <td>0.693147</td>\n",
 623 |        "    </tr>\n",
 624 |        "    <tr>\n",
 625 |        "      <th>Cell_1</th>\n",
 626 |        "      <td>0.000000</td>\n",
 627 |        "      <td>1.098612</td>\n",
 628 |        "      <td>0.693147</td>\n",
 629 |        "      <td>0.000000</td>\n",
 630 |        "      <td>0.693147</td>\n",
 631 |        "      <td>0.693147</td>\n",
 632 |        "      <td>0.693147</td>\n",
 633 |        "      <td>0.693147</td>\n",
 634 |        "      <td>0.693147</td>\n",
 635 |        "      <td>0.000000</td>\n",
 636 |        "      <td>...</td>\n",
 637 |        "      <td>0.693147</td>\n",
 638 |        "      <td>0.000000</td>\n",
 639 |        "      <td>0.000000</td>\n",
 640 |        "      <td>0.000000</td>\n",
 641 |        "      <td>0.693147</td>\n",
 642 |        "      <td>1.098612</td>\n",
 643 |        "      <td>1.098612</td>\n",
 644 |        "      <td>0.000000</td>\n",
 645 |        "      <td>0.000000</td>\n",
 646 |        "      <td>1.386294</td>\n",
 647 |        "    </tr>\n",
 648 |        "    <tr>\n",
 649 |        "      <th>Cell_2</th>\n",
 650 |        "      <td>0.693147</td>\n",
 651 |        "      <td>0.693147</td>\n",
 652 |        "      <td>0.000000</td>\n",
 653 |        "      <td>0.693147</td>\n",
 654 |        "      <td>1.098612</td>\n",
 655 |        "      <td>0.693147</td>\n",
 656 |        "      <td>0.693147</td>\n",
 657 |        "      <td>0.000000</td>\n",
 658 |        "      <td>0.693147</td>\n",
 659 |        "      <td>1.098612</td>\n",
 660 |        "      <td>...</td>\n",
 661 |        "      <td>0.000000</td>\n",
 662 |        "      <td>0.000000</td>\n",
 663 |        "      <td>0.693147</td>\n",
 664 |        "      <td>0.693147</td>\n",
 665 |        "      <td>1.386294</td>\n",
 666 |        "      <td>0.693147</td>\n",
 667 |        "      <td>1.098612</td>\n",
 668 |        "      <td>0.000000</td>\n",
 669 |        "      <td>0.000000</td>\n",
 670 |        "      <td>0.000000</td>\n",
 671 |        "    </tr>\n",
 672 |        "    <tr>\n",
 673 |        "      <th>Cell_3</th>\n",
 674 |        "      <td>0.000000</td>\n",
 675 |        "      <td>1.098612</td>\n",
 676 |        "      <td>0.000000</td>\n",
 677 |        "      <td>0.693147</td>\n",
 678 |        "      <td>1.791759</td>\n",
 679 |        "      <td>0.693147</td>\n",
 680 |        "      <td>0.000000</td>\n",
 681 |        "      <td>0.000000</td>\n",
 682 |        "      <td>1.098612</td>\n",
 683 |        "      <td>0.000000</td>\n",
 684 |        "      <td>...</td>\n",
 685 |        "      <td>1.609438</td>\n",
 686 |        "      <td>1.098612</td>\n",
 687 |        "      <td>0.693147</td>\n",
 688 |        "      <td>0.000000</td>\n",
 689 |        "      <td>1.098612</td>\n",
 690 |        "      <td>0.000000</td>\n",
 691 |        "      <td>0.693147</td>\n",
 692 |        "      <td>0.693147</td>\n",
 693 |        "      <td>0.693147</td>\n",
 694 |        "      <td>0.693147</td>\n",
 695 |        "    </tr>\n",
 696 |        "    <tr>\n",
 697 |        "      <th>Cell_4</th>\n",
 698 |        "      <td>0.693147</td>\n",
 699 |        "      <td>0.000000</td>\n",
 700 |        "      <td>0.693147</td>\n",
 701 |        "      <td>0.000000</td>\n",
 702 |        "      <td>0.693147</td>\n",
 703 |        "      <td>0.693147</td>\n",
 704 |        "      <td>0.000000</td>\n",
 705 |        "      <td>0.693147</td>\n",
 706 |        "      <td>0.693147</td>\n",
 707 |        "      <td>1.098612</td>\n",
 708 |        "      <td>...</td>\n",
 709 |        "      <td>0.693147</td>\n",
 710 |        "      <td>1.098612</td>\n",
 711 |        "      <td>0.000000</td>\n",
 712 |        "      <td>0.000000</td>\n",
 713 |        "      <td>0.000000</td>\n",
 714 |        "      <td>1.098612</td>\n",
 715 |        "      <td>0.000000</td>\n",
 716 |        "      <td>1.098612</td>\n",
 717 |        "      <td>1.609438</td>\n",
 718 |        "      <td>0.693147</td>\n",
 719 |        "    </tr>\n",
 720 |        "    <tr>\n",
 721 |        "      <th>...</th>\n",
 722 |        "      <td>...</td>\n",
 723 |        "      <td>...</td>\n",
 724 |        "      <td>...</td>\n",
 725 |        "      <td>...</td>\n",
 726 |        "      <td>...</td>\n",
 727 |        "      <td>...</td>\n",
 728 |        "      <td>...</td>\n",
 729 |        "      <td>...</td>\n",
 730 |        "      <td>...</td>\n",
 731 |        "      <td>...</td>\n",
 732 |        "      <td>...</td>\n",
 733 |        "      <td>...</td>\n",
 734 |        "      <td>...</td>\n",
 735 |        "      <td>...</td>\n",
 736 |        "      <td>...</td>\n",
 737 |        "      <td>...</td>\n",
 738 |        "      <td>...</td>\n",
 739 |        "      <td>...</td>\n",
 740 |        "      <td>...</td>\n",
 741 |        "      <td>...</td>\n",
 742 |        "      <td>...</td>\n",
 743 |        "    </tr>\n",
 744 |        "    <tr>\n",
 745 |        "      <th>Cell_95</th>\n",
 746 |        "      <td>0.693147</td>\n",
 747 |        "      <td>0.693147</td>\n",
 748 |        "      <td>0.000000</td>\n",
 749 |        "      <td>1.098612</td>\n",
 750 |        "      <td>0.693147</td>\n",
 751 |        "      <td>1.098612</td>\n",
 752 |        "      <td>0.000000</td>\n",
 753 |        "      <td>0.000000</td>\n",
 754 |        "      <td>0.000000</td>\n",
 755 |        "      <td>0.000000</td>\n",
 756 |        "      <td>...</td>\n",
 757 |        "      <td>0.000000</td>\n",
 758 |        "      <td>0.000000</td>\n",
 759 |        "      <td>0.000000</td>\n",
 760 |        "      <td>1.098612</td>\n",
 761 |        "      <td>0.000000</td>\n",
 762 |        "      <td>0.000000</td>\n",
 763 |        "      <td>0.693147</td>\n",
 764 |        "      <td>0.693147</td>\n",
 765 |        "      <td>0.693147</td>\n",
 766 |        "      <td>0.000000</td>\n",
 767 |        "    </tr>\n",
 768 |        "    <tr>\n",
 769 |        "      <th>Cell_96</th>\n",
 770 |        "      <td>0.693147</td>\n",
 771 |        "      <td>1.098612</td>\n",
 772 |        "      <td>1.386294</td>\n",
 773 |        "      <td>0.693147</td>\n",
 774 |        "      <td>1.098612</td>\n",
 775 |        "      <td>0.000000</td>\n",
 776 |        "      <td>1.609438</td>\n",
 777 |        "      <td>0.693147</td>\n",
 778 |        "      <td>0.693147</td>\n",
 779 |        "      <td>0.693147</td>\n",
 780 |        "      <td>...</td>\n",
 781 |        "      <td>1.098612</td>\n",
 782 |        "      <td>0.000000</td>\n",
 783 |        "      <td>0.000000</td>\n",
 784 |        "      <td>0.000000</td>\n",
 785 |        "      <td>0.000000</td>\n",
 786 |        "      <td>0.693147</td>\n",
 787 |        "      <td>1.386294</td>\n",
 788 |        "      <td>0.000000</td>\n",
 789 |        "      <td>1.386294</td>\n",
 790 |        "      <td>0.000000</td>\n",
 791 |        "    </tr>\n",
 792 |        "    <tr>\n",
 793 |        "      <th>Cell_97</th>\n",
 794 |        "      <td>0.000000</td>\n",
 795 |        "      <td>0.693147</td>\n",
 796 |        "      <td>1.098612</td>\n",
 797 |        "      <td>0.693147</td>\n",
 798 |        "      <td>0.000000</td>\n",
 799 |        "      <td>0.000000</td>\n",
 800 |        "      <td>0.000000</td>\n",
 801 |        "      <td>0.000000</td>\n",
 802 |        "      <td>1.386294</td>\n",
 803 |        "      <td>0.000000</td>\n",
 804 |        "      <td>...</td>\n",
 805 |        "      <td>0.693147</td>\n",
 806 |        "      <td>0.000000</td>\n",
 807 |        "      <td>0.693147</td>\n",
 808 |        "      <td>0.000000</td>\n",
 809 |        "      <td>1.386294</td>\n",
 810 |        "      <td>1.386294</td>\n",
 811 |        "      <td>0.000000</td>\n",
 812 |        "      <td>0.000000</td>\n",
 813 |        "      <td>0.693147</td>\n",
 814 |        "      <td>0.000000</td>\n",
 815 |        "    </tr>\n",
 816 |        "    <tr>\n",
 817 |        "      <th>Cell_98</th>\n",
 818 |        "      <td>0.693147</td>\n",
 819 |        "      <td>1.098612</td>\n",
 820 |        "      <td>0.000000</td>\n",
 821 |        "      <td>0.693147</td>\n",
 822 |        "      <td>0.693147</td>\n",
 823 |        "      <td>0.000000</td>\n",
 824 |        "      <td>0.693147</td>\n",
 825 |        "      <td>0.000000</td>\n",
 826 |        "      <td>0.693147</td>\n",
 827 |        "      <td>1.098612</td>\n",
 828 |        "      <td>...</td>\n",
 829 |        "      <td>0.693147</td>\n",
 830 |        "      <td>0.000000</td>\n",
 831 |        "      <td>0.000000</td>\n",
 832 |        "      <td>1.098612</td>\n",
 833 |        "      <td>0.693147</td>\n",
 834 |        "      <td>0.000000</td>\n",
 835 |        "      <td>0.693147</td>\n",
 836 |        "      <td>0.693147</td>\n",
 837 |        "      <td>0.693147</td>\n",
 838 |        "      <td>1.098612</td>\n",
 839 |        "    </tr>\n",
 840 |        "    <tr>\n",
 841 |        "      <th>Cell_99</th>\n",
 842 |        "      <td>0.693147</td>\n",
 843 |        "      <td>0.693147</td>\n",
 844 |        "      <td>0.000000</td>\n",
 845 |        "      <td>1.791759</td>\n",
 846 |        "      <td>0.000000</td>\n",
 847 |        "      <td>1.098612</td>\n",
 848 |        "      <td>0.000000</td>\n",
 849 |        "      <td>1.098612</td>\n",
 850 |        "      <td>1.609438</td>\n",
 851 |        "      <td>0.693147</td>\n",
 852 |        "      <td>...</td>\n",
 853 |        "      <td>1.098612</td>\n",
 854 |        "      <td>1.098612</td>\n",
 855 |        "      <td>0.693147</td>\n",
 856 |        "      <td>0.693147</td>\n",
 857 |        "      <td>1.098612</td>\n",
 858 |        "      <td>0.693147</td>\n",
 859 |        "      <td>0.000000</td>\n",
 860 |        "      <td>0.000000</td>\n",
 861 |        "      <td>0.693147</td>\n",
 862 |        "      <td>0.693147</td>\n",
 863 |        "    </tr>\n",
 864 |        "  </tbody>\n",
 865 |        "</table>\n",
 866 |        "<p>100 rows × 2000 columns</p>\n",
 867 |        "</div>"
 868 |       ],
 869 |       "text/plain": [
 870 |        "           Gene_0    Gene_1    Gene_2    Gene_3    Gene_4    Gene_5    Gene_6  \\\n",
 871 |        "Cell_0   1.098612  0.693147  0.000000  0.693147  0.693147  0.000000  1.386294   \n",
 872 |        "Cell_1   0.000000  1.098612  0.693147  0.000000  0.693147  0.693147  0.693147   \n",
 873 |        "Cell_2   0.693147  0.693147  0.000000  0.693147  1.098612  0.693147  0.693147   \n",
 874 |        "Cell_3   0.000000  1.098612  0.000000  0.693147  1.791759  0.693147  0.000000   \n",
 875 |        "Cell_4   0.693147  0.000000  0.693147  0.000000  0.693147  0.693147  0.000000   \n",
 876 |        "...           ...       ...       ...       ...       ...       ...       ...   \n",
 877 |        "Cell_95  0.693147  0.693147  0.000000  1.098612  0.693147  1.098612  0.000000   \n",
 878 |        "Cell_96  0.693147  1.098612  1.386294  0.693147  1.098612  0.000000  1.609438   \n",
 879 |        "Cell_97  0.000000  0.693147  1.098612  0.693147  0.000000  0.000000  0.000000   \n",
 880 |        "Cell_98  0.693147  1.098612  0.000000  0.693147  0.693147  0.000000  0.693147   \n",
 881 |        "Cell_99  0.693147  0.693147  0.000000  1.791759  0.000000  1.098612  0.000000   \n",
 882 |        "\n",
 883 |        "           Gene_7    Gene_8    Gene_9  ...  Gene_1990  Gene_1991  Gene_1992  \\\n",
 884 |        "Cell_0   0.693147  0.693147  0.693147  ...   1.098612   0.000000   0.693147   \n",
 885 |        "Cell_1   0.693147  0.693147  0.000000  ...   0.693147   0.000000   0.000000   \n",
 886 |        "Cell_2   0.000000  0.693147  1.098612  ...   0.000000   0.000000   0.693147   \n",
 887 |        "Cell_3   0.000000  1.098612  0.000000  ...   1.609438   1.098612   0.693147   \n",
 888 |        "Cell_4   0.693147  0.693147  1.098612  ...   0.693147   1.098612   0.000000   \n",
 889 |        "...           ...       ...       ...  ...        ...        ...        ...   \n",
 890 |        "Cell_95  0.000000  0.000000  0.000000  ...   0.000000   0.000000   0.000000   \n",
 891 |        "Cell_96  0.693147  0.693147  0.693147  ...   1.098612   0.000000   0.000000   \n",
 892 |        "Cell_97  0.000000  1.386294  0.000000  ...   0.693147   0.000000   0.693147   \n",
 893 |        "Cell_98  0.000000  0.693147  1.098612  ...   0.693147   0.000000   0.000000   \n",
 894 |        "Cell_99  1.098612  1.609438  0.693147  ...   1.098612   1.098612   0.693147   \n",
 895 |        "\n",
 896 |        "         Gene_1993  Gene_1994  Gene_1995  Gene_1996  Gene_1997  Gene_1998  \\\n",
 897 |        "Cell_0    0.000000   0.000000   0.693147   0.693147   0.000000   1.098612   \n",
 898 |        "Cell_1    0.000000   0.693147   1.098612   1.098612   0.000000   0.000000   \n",
 899 |        "Cell_2    0.693147   1.386294   0.693147   1.098612   0.000000   0.000000   \n",
 900 |        "Cell_3    0.000000   1.098612   0.000000   0.693147   0.693147   0.693147   \n",
 901 |        "Cell_4    0.000000   0.000000   1.098612   0.000000   1.098612   1.609438   \n",
 902 |        "...            ...        ...        ...        ...        ...        ...   \n",
 903 |        "Cell_95   1.098612   0.000000   0.000000   0.693147   0.693147   0.693147   \n",
 904 |        "Cell_96   0.000000   0.000000   0.693147   1.386294   0.000000   1.386294   \n",
 905 |        "Cell_97   0.000000   1.386294   1.386294   0.000000   0.000000   0.693147   \n",
 906 |        "Cell_98   1.098612   0.693147   0.000000   0.693147   0.693147   0.693147   \n",
 907 |        "Cell_99   0.693147   1.098612   0.693147   0.000000   0.000000   0.693147   \n",
 908 |        "\n",
 909 |        "         Gene_1999  \n",
 910 |        "Cell_0    0.693147  \n",
 911 |        "Cell_1    1.386294  \n",
 912 |        "Cell_2    0.000000  \n",
 913 |        "Cell_3    0.693147  \n",
 914 |        "Cell_4    0.693147  \n",
 915 |        "...            ...  \n",
 916 |        "Cell_95   0.000000  \n",
 917 |        "Cell_96   0.000000  \n",
 918 |        "Cell_97   0.000000  \n",
 919 |        "Cell_98   1.098612  \n",
 920 |        "Cell_99   0.693147  \n",
 921 |        "\n",
 922 |        "[100 rows x 2000 columns]"
 923 |       ]
 924 |      },
 925 |      "execution_count": 13,
 926 |      "metadata": {},
 927 |      "output_type": "execute_result"
 928 |     }
 929 |    ],
 930 |    "source": [
 931 |     "adata.to_df(layer=\"log_transformed\")"
 932 |    ]
 933 |   },
 934 |   {
 935 |    "cell_type": "markdown",
 936 |    "metadata": {},
 937 |    "source": [
 938 |     "We see that the `.obs_names/.var_names` are used in the creation of this Pandas object."
 939 |    ]
 940 |   },
 941 |   {
 942 |    "cell_type": "markdown",
 943 |    "metadata": {},
 944 |    "source": [
 945 |     "## Writing the results to disk"
 946 |    ]
 947 |   },
 948 |   {
 949 |    "cell_type": "markdown",
 950 |    "metadata": {},
 951 |    "source": [
 952 |     "`AnnData` comes with its own persistent HDF5-based file format: `h5ad`. If string columns with small number of categories aren't yet categoricals, `AnnData` will auto-transform to categoricals."
 953 |    ]
 954 |   },
 955 |   {
 956 |    "cell_type": "code",
 957 |    "execution_count": 14,
 958 |    "metadata": {},
 959 |    "outputs": [],
 960 |    "source": [
 961 |     "adata.write('my_results.h5ad', compression=\"gzip\")"
 962 |    ]
 963 |   },
 964 |   {
 965 |    "cell_type": "code",
 966 |    "execution_count": 15,
 967 |    "metadata": {
 968 |     "jupyter": {
 969 |      "outputs_hidden": false
 970 |     }
 971 |    },
 972 |    "outputs": [
 973 |     {
 974 |      "name": "stdout",
 975 |      "output_type": "stream",
 976 |      "text": [
 977 |       "X                        Group\r\n",
 978 |       "layers                   Group\r\n",
 979 |       "obs                      Group\r\n",
 980 |       "obsm                     Group\r\n",
 981 |       "obsp                     Group\r\n",
 982 |       "uns                      Group\r\n",
 983 |       "var                      Group\r\n",
 984 |       "varm                     Group\r\n",
 985 |       "varp                     Group\r\n"
 986 |      ]
 987 |     }
 988 |    ],
 989 |    "source": [
 990 |     "!h5ls 'my_results.h5ad'"
 991 |    ]
 992 |   },
 993 |   {
 994 |    "cell_type": "markdown",
 995 |    "metadata": {},
 996 |    "source": [
 997 |     "## Wrapping up the introduction"
 998 |    ]
 999 |   },
1000 |   {
1001 |    "cell_type": "markdown",
1002 |    "metadata": {},
1003 |    "source": [
1004 |     "AnnData has become the standard for single-cell analysis in Python and for good reason -- it's straightforward to use and faciliatates more reproducible analyses with it's key-based storage. It's even becoming easier to convert to the popular R-based formats for single-cell analysis.\n",
1005 |     "\n",
1006 |     "Keep reading on to better understand \"views\", on-disk backing, and other details."
1007 |    ]
1008 |   },
1009 |   {
1010 |    "cell_type": "markdown",
1011 |    "metadata": {},
1012 |    "source": [
1013 |     "## Views and copies"
1014 |    ]
1015 |   },
1016 |   {
1017 |    "cell_type": "markdown",
1018 |    "metadata": {},
1019 |    "source": [
1020 |     "For the fun of it, let's look at another metadata use case. Imagine that the observations come from instruments characterizing 10 readouts in a multi-year study with samples taken from different subjects at different sites. We'd typically get that information in some format and then store it in a DataFrame:"
1021 |    ]
1022 |   },
1023 |   {
1024 |    "cell_type": "code",
1025 |    "execution_count": 16,
1026 |    "metadata": {
1027 |     "jupyter": {
1028 |      "outputs_hidden": false
1029 |     }
1030 |    },
1031 |    "outputs": [],
1032 |    "source": [
1033 |     "obs_meta = pd.DataFrame({\n",
1034 |     "        'time_yr': np.random.choice([0, 2, 4, 8], adata.n_obs),\n",
1035 |     "        'subject_id': np.random.choice(['subject 1', 'subject 2', 'subject 4', 'subject 8'], adata.n_obs),\n",
1036 |     "        'instrument_type': np.random.choice(['type a', 'type b'], adata.n_obs),\n",
1037 |     "        'site': np.random.choice(['site x', 'site y'], adata.n_obs),\n",
1038 |     "    },\n",
1039 |     "    index=adata.obs.index,    # these are the same IDs of observations as above!\n",
1040 |     ")"
1041 |    ]
1042 |   },
1043 |   {
1044 |    "cell_type": "markdown",
1045 |    "metadata": {},
1046 |    "source": [
1047 |     "This is how we join the readout data with the metadata. Of course, the first argument of the following call for `X` could also just be a DataFrame."
1048 |    ]
1049 |   },
1050 |   {
1051 |    "cell_type": "code",
1052 |    "execution_count": 17,
1053 |    "metadata": {
1054 |     "jupyter": {
1055 |      "outputs_hidden": false
1056 |     }
1057 |    },
1058 |    "outputs": [],
1059 |    "source": [
1060 |     "adata = ad.AnnData(adata.X, obs=obs_meta, var=adata.var)"
1061 |    ]
1062 |   },
1063 |   {
1064 |    "cell_type": "markdown",
1065 |    "metadata": {},
1066 |    "source": [
1067 |     "Now we again have a single data container that keeps track of everything."
1068 |    ]
1069 |   },
1070 |   {
1071 |    "cell_type": "code",
1072 |    "execution_count": 18,
1073 |    "metadata": {
1074 |     "jupyter": {
1075 |      "outputs_hidden": false
1076 |     }
1077 |    },
1078 |    "outputs": [
1079 |     {
1080 |      "name": "stdout",
1081 |      "output_type": "stream",
1082 |      "text": [
1083 |       "AnnData object with n_obs × n_vars = 100 × 2000\n",
1084 |       "    obs: 'time_yr', 'subject_id', 'instrument_type', 'site'\n"
1085 |      ]
1086 |     }
1087 |    ],
1088 |    "source": [
1089 |     "print(adata)"
1090 |    ]
1091 |   },
1092 |   {
1093 |    "cell_type": "markdown",
1094 |    "metadata": {},
1095 |    "source": [
1096 |     "Subsetting the joint data matrix can be important to focus on subsets of variables or observations, or to define train-test splits for a machine learning model."
1097 |    ]
1098 |   },
1099 |   {
1100 |    "cell_type": "markdown",
1101 |    "metadata": {},
1102 |    "source": [
1103 |     ":::{note}\n",
1104 |     "Similar to numpy arrays, AnnData objects can either hold actual data or reference another `AnnData` object. In the later case, they are referred to as \"view\".\n",
1105 |     "\n",
1106 |     "Subsetting AnnData objects always returns views, which has two advantages:\n",
1107 |     "\n",
1108 |     "- no new memory is allocated\n",
1109 |     "- it is possible to modify the underlying AnnData object\n",
1110 |     "\n",
1111 |     "You can get an actual AnnData object from a view by calling `.copy()` on the view. Usually, this is not necessary, as any modification of elements of a view (calling `.[]` on an attribute of the view) internally calls `.copy()` and makes the view an AnnData object that holds actual data. See the example below.\n",
1112 |     ":::"
1113 |    ]
1114 |   },
1115 |   {
1116 |    "cell_type": "code",
1117 |    "execution_count": 19,
1118 |    "metadata": {
1119 |     "jupyter": {
1120 |      "outputs_hidden": false
1121 |     }
1122 |    },
1123 |    "outputs": [
1124 |     {
1125 |      "data": {
1126 |       "text/plain": [
1127 |        "AnnData object with n_obs × n_vars = 100 × 2000\n",
1128 |        "    obs: 'time_yr', 'subject_id', 'instrument_type', 'site'"
1129 |       ]
1130 |      },
1131 |      "execution_count": 19,
1132 |      "metadata": {},
1133 |      "output_type": "execute_result"
1134 |     }
1135 |    ],
1136 |    "source": [
1137 |     "adata"
1138 |    ]
1139 |   },
1140 |   {
1141 |    "cell_type": "markdown",
1142 |    "metadata": {},
1143 |    "source": [
1144 |     "Get access to the first 5 rows for two variables."
1145 |    ]
1146 |   },
1147 |   {
1148 |    "cell_type": "markdown",
1149 |    "metadata": {},
1150 |    "source": [
1151 |     ":::{note}\n",
1152 |     "Indexing into AnnData will assume that integer arguments to `[]` behave like `.iloc` in pandas, whereas string arguments behave like `.loc`. `AnnData` always assumes string indices.\n",
1153 |     ":::"
1154 |    ]
1155 |   },
1156 |   {
1157 |    "cell_type": "code",
1158 |    "execution_count": 20,
1159 |    "metadata": {
1160 |     "jupyter": {
1161 |      "outputs_hidden": false
1162 |     }
1163 |    },
1164 |    "outputs": [
1165 |     {
1166 |      "data": {
1167 |       "text/plain": [
1168 |        "View of AnnData object with n_obs × n_vars = 5 × 2\n",
1169 |        "    obs: 'time_yr', 'subject_id', 'instrument_type', 'site'"
1170 |       ]
1171 |      },
1172 |      "execution_count": 20,
1173 |      "metadata": {},
1174 |      "output_type": "execute_result"
1175 |     }
1176 |    ],
1177 |    "source": [
1178 |     "adata[:5, ['Gene_1', 'Gene_3']]"
1179 |    ]
1180 |   },
1181 |   {
1182 |    "cell_type": "markdown",
1183 |    "metadata": {},
1184 |    "source": [
1185 |     "This is a view! If we want an `AnnData` that holds the data in memory, let's call `.copy()`"
1186 |    ]
1187 |   },
1188 |   {
1189 |    "cell_type": "code",
1190 |    "execution_count": 21,
1191 |    "metadata": {},
1192 |    "outputs": [],
1193 |    "source": [
1194 |     "adata_subset = adata[:5, ['Gene_1', 'Gene_3']].copy()"
1195 |    ]
1196 |   },
1197 |   {
1198 |    "cell_type": "markdown",
1199 |    "metadata": {},
1200 |    "source": [
1201 |     "For a view, we can also set the first 3 elements of a column."
1202 |    ]
1203 |   },
1204 |   {
1205 |    "cell_type": "code",
1206 |    "execution_count": 22,
1207 |    "metadata": {
1208 |     "jupyter": {
1209 |      "outputs_hidden": false
1210 |     }
1211 |    },
1212 |    "outputs": [
1213 |     {
1214 |      "name": "stdout",
1215 |      "output_type": "stream",
1216 |      "text": [
1217 |       "[[1.0], [2.0], [1.0]]\n",
1218 |       "[[0.0], [0.0], [0.0]]\n"
1219 |      ]
1220 |     }
1221 |    ],
1222 |    "source": [
1223 |     "print(adata[:3, 'Gene_1'].X.toarray().tolist())\n",
1224 |     "adata[:3, 'Gene_1'].X = [0, 0, 0]\n",
1225 |     "print(adata[:3, 'Gene_1'].X.toarray().tolist())"
1226 |    ]
1227 |   },
1228 |   {
1229 |    "cell_type": "markdown",
1230 |    "metadata": {},
1231 |    "source": [
1232 |     "If you try to access parts of a view of an AnnData, the content will be auto-copied and a data-storing object will be generated."
1233 |    ]
1234 |   },
1235 |   {
1236 |    "cell_type": "code",
1237 |    "execution_count": 23,
1238 |    "metadata": {
1239 |     "jupyter": {
1240 |      "outputs_hidden": false
1241 |     }
1242 |    },
1243 |    "outputs": [],
1244 |    "source": [
1245 |     "adata_subset = adata[:3, ['Gene_1', 'Gene_2']]"
1246 |    ]
1247 |   },
1248 |   {
1249 |    "cell_type": "code",
1250 |    "execution_count": 24,
1251 |    "metadata": {
1252 |     "jupyter": {
1253 |      "outputs_hidden": false
1254 |     }
1255 |    },
1256 |    "outputs": [
1257 |     {
1258 |      "data": {
1259 |       "text/plain": [
1260 |        "View of AnnData object with n_obs × n_vars = 3 × 2\n",
1261 |        "    obs: 'time_yr', 'subject_id', 'instrument_type', 'site'"
1262 |       ]
1263 |      },
1264 |      "execution_count": 24,
1265 |      "metadata": {},
1266 |      "output_type": "execute_result"
1267 |     }
1268 |    ],
1269 |    "source": [
1270 |     "adata_subset"
1271 |    ]
1272 |   },
1273 |   {
1274 |    "cell_type": "code",
1275 |    "execution_count": 25,
1276 |    "metadata": {
1277 |     "jupyter": {
1278 |      "outputs_hidden": false
1279 |     }
1280 |    },
1281 |    "outputs": [
1282 |     {
1283 |      "name": "stderr",
1284 |      "output_type": "stream",
1285 |      "text": [
1286 |       "/var/folders/bd/43q20k0n6z15tdfzxvd22r7c0000gn/T/ipykernel_25768/2955902014.py:1: ImplicitModificationWarning: Trying to modify attribute `.obs` of view, initializing view as actual.\n",
1287 |       "  adata_subset.obs['foo'] = range(3)\n"
1288 |      ]
1289 |     }
1290 |    ],
1291 |    "source": [
1292 |     "adata_subset.obs['foo'] = range(3)"
1293 |    ]
1294 |   },
1295 |   {
1296 |    "cell_type": "markdown",
1297 |    "metadata": {},
1298 |    "source": [
1299 |     "Now `adata_subset` stores the actual data and is no longer just a reference to `adata`."
1300 |    ]
1301 |   },
1302 |   {
1303 |    "cell_type": "code",
1304 |    "execution_count": 26,
1305 |    "metadata": {
1306 |     "jupyter": {
1307 |      "outputs_hidden": false
1308 |     }
1309 |    },
1310 |    "outputs": [
1311 |     {
1312 |      "data": {
1313 |       "text/plain": [
1314 |        "AnnData object with n_obs × n_vars = 3 × 2\n",
1315 |        "    obs: 'time_yr', 'subject_id', 'instrument_type', 'site', 'foo'"
1316 |       ]
1317 |      },
1318 |      "execution_count": 26,
1319 |      "metadata": {},
1320 |      "output_type": "execute_result"
1321 |     }
1322 |    ],
1323 |    "source": [
1324 |     "adata_subset"
1325 |    ]
1326 |   },
1327 |   {
1328 |    "cell_type": "markdown",
1329 |    "metadata": {},
1330 |    "source": [
1331 |     "Evidently, you can use all of pandas to slice with sequences or boolean indices."
1332 |    ]
1333 |   },
1334 |   {
1335 |    "cell_type": "code",
1336 |    "execution_count": 27,
1337 |    "metadata": {
1338 |     "jupyter": {
1339 |      "outputs_hidden": false
1340 |     }
1341 |    },
1342 |    "outputs": [
1343 |     {
1344 |      "data": {
1345 |       "text/html": [
1346 |        "<div>\n",
1347 |        "<style scoped>\n",
1348 |        "    .dataframe tbody tr th:only-of-type {\n",
1349 |        "        vertical-align: middle;\n",
1350 |        "    }\n",
1351 |        "\n",
1352 |        "    .dataframe tbody tr th {\n",
1353 |        "        vertical-align: top;\n",
1354 |        "    }\n",
1355 |        "\n",
1356 |        "    .dataframe thead th {\n",
1357 |        "        text-align: right;\n",
1358 |        "    }\n",
1359 |        "</style>\n",
1360 |        "<table border=\"1\" class=\"dataframe\">\n",
1361 |        "  <thead>\n",
1362 |        "    <tr style=\"text-align: right;\">\n",
1363 |        "      <th></th>\n",
1364 |        "      <th>time_yr</th>\n",
1365 |        "      <th>subject_id</th>\n",
1366 |        "      <th>instrument_type</th>\n",
1367 |        "      <th>site</th>\n",
1368 |        "    </tr>\n",
1369 |        "  </thead>\n",
1370 |        "  <tbody>\n",
1371 |        "    <tr>\n",
1372 |        "      <th>Cell_1</th>\n",
1373 |        "      <td>4</td>\n",
1374 |        "      <td>subject 4</td>\n",
1375 |        "      <td>type b</td>\n",
1376 |        "      <td>site y</td>\n",
1377 |        "    </tr>\n",
1378 |        "    <tr>\n",
1379 |        "      <th>Cell_2</th>\n",
1380 |        "      <td>4</td>\n",
1381 |        "      <td>subject 1</td>\n",
1382 |        "      <td>type a</td>\n",
1383 |        "      <td>site y</td>\n",
1384 |        "    </tr>\n",
1385 |        "    <tr>\n",
1386 |        "      <th>Cell_3</th>\n",
1387 |        "      <td>4</td>\n",
1388 |        "      <td>subject 1</td>\n",
1389 |        "      <td>type b</td>\n",
1390 |        "      <td>site x</td>\n",
1391 |        "    </tr>\n",
1392 |        "    <tr>\n",
1393 |        "      <th>Cell_4</th>\n",
1394 |        "      <td>2</td>\n",
1395 |        "      <td>subject 1</td>\n",
1396 |        "      <td>type b</td>\n",
1397 |        "      <td>site x</td>\n",
1398 |        "    </tr>\n",
1399 |        "    <tr>\n",
1400 |        "      <th>Cell_6</th>\n",
1401 |        "      <td>2</td>\n",
1402 |        "      <td>subject 1</td>\n",
1403 |        "      <td>type b</td>\n",
1404 |        "      <td>site x</td>\n",
1405 |        "    </tr>\n",
1406 |        "  </tbody>\n",
1407 |        "</table>\n",
1408 |        "</div>"
1409 |       ],
1410 |       "text/plain": [
1411 |        "        time_yr subject_id instrument_type    site\n",
1412 |        "Cell_1        4  subject 4          type b  site y\n",
1413 |        "Cell_2        4  subject 1          type a  site y\n",
1414 |        "Cell_3        4  subject 1          type b  site x\n",
1415 |        "Cell_4        2  subject 1          type b  site x\n",
1416 |        "Cell_6        2  subject 1          type b  site x"
1417 |       ]
1418 |      },
1419 |      "execution_count": 27,
1420 |      "metadata": {},
1421 |      "output_type": "execute_result"
1422 |     }
1423 |    ],
1424 |    "source": [
1425 |     "adata[adata.obs.time_yr.isin([2, 4])].obs.head()"
1426 |    ]
1427 |   },
1428 |   {
1429 |    "cell_type": "markdown",
1430 |    "metadata": {},
1431 |    "source": [
1432 |     "(read-partial)=\n",
1433 |     "\n",
1434 |     "## Partial reading of large data"
1435 |    ]
1436 |   },
1437 |   {
1438 |    "cell_type": "markdown",
1439 |    "metadata": {},
1440 |    "source": [
1441 |     "If a single `.h5ad` is very large, you can partially read it into memory by using backed mode:"
1442 |    ]
1443 |   },
1444 |   {
1445 |    "cell_type": "code",
1446 |    "execution_count": 28,
1447 |    "metadata": {},
1448 |    "outputs": [],
1449 |    "source": [
1450 |     "adata = ad.read('my_results.h5ad', backed='r')"
1451 |    ]
1452 |   },
1453 |   {
1454 |    "cell_type": "code",
1455 |    "execution_count": 29,
1456 |    "metadata": {},
1457 |    "outputs": [
1458 |     {
1459 |      "data": {
1460 |       "text/plain": [
1461 |        "True"
1462 |       ]
1463 |      },
1464 |      "execution_count": 29,
1465 |      "metadata": {},
1466 |      "output_type": "execute_result"
1467 |     }
1468 |    ],
1469 |    "source": [
1470 |     "adata.isbacked"
1471 |    ]
1472 |   },
1473 |   {
1474 |    "cell_type": "markdown",
1475 |    "metadata": {},
1476 |    "source": [
1477 |     "If you do this, you'll need to remember that the `AnnData` object has an open connection to the file used for reading:"
1478 |    ]
1479 |   },
1480 |   {
1481 |    "cell_type": "code",
1482 |    "execution_count": 30,
1483 |    "metadata": {},
1484 |    "outputs": [
1485 |     {
1486 |      "data": {
1487 |       "text/plain": [
1488 |        "PosixPath('my_results.h5ad')"
1489 |       ]
1490 |      },
1491 |      "execution_count": 30,
1492 |      "metadata": {},
1493 |      "output_type": "execute_result"
1494 |     }
1495 |    ],
1496 |    "source": [
1497 |     "adata.filename"
1498 |    ]
1499 |   },
1500 |   {
1501 |    "cell_type": "markdown",
1502 |    "metadata": {},
1503 |    "source": [
1504 |     "As we're using it in read-only mode, we can't damage anything. To proceed with this tutorial, we still need to explicitly close it: "
1505 |    ]
1506 |   },
1507 |   {
1508 |    "cell_type": "code",
1509 |    "execution_count": 31,
1510 |    "metadata": {},
1511 |    "outputs": [],
1512 |    "source": [
1513 |     "adata.file.close()"
1514 |    ]
1515 |   },
1516 |   {
1517 |    "cell_type": "markdown",
1518 |    "metadata": {},
1519 |    "source": [
1520 |     "As usual, you should rather use `with` statements to avoid dangling open files (up-coming feature)."
1521 |    ]
1522 |   },
1523 |   {
1524 |    "cell_type": "markdown",
1525 |    "metadata": {},
1526 |    "source": [
1527 |     "Manipulating the object on disk is possible, but experimental for sparse data. Hence, we leave it out of this tutorial."
1528 |    ]
1529 |   }
1530 |  ],
1531 |  "metadata": {
1532 |   "anaconda-cloud": {},
1533 |   "kernelspec": {
1534 |    "display_name": "Python [conda env:anndata-0.8.0]",
1535 |    "language": "python",
1536 |    "name": "conda-env-anndata-0.8.0-py"
1537 |   },
1538 |   "language_info": {
1539 |    "codemirror_mode": {
1540 |     "name": "ipython",
1541 |     "version": 3
1542 |    },
1543 |    "file_extension": ".py",
1544 |    "mimetype": "text/x-python",
1545 |    "name": "python",
1546 |    "nbconvert_exporter": "python",
1547 |    "pygments_lexer": "ipython3",
1548 |    "version": "3.9.10"
1549 |   }
1550 |  },
1551 |  "nbformat": 4,
1552 |  "nbformat_minor": 4
1553 | }
1554 | 


--------------------------------------------------------------------------------
/index.rst:
--------------------------------------------------------------------------------
 1 | .. include:: README.rst
 2 | 
 3 | 
 4 | .. toctree::
 5 |    :maxdepth: 1
 6 | 
 7 |    getting-started
 8 |    annloader
 9 |    anncollection
10 |    anncollection-annloader
11 |    anndata_dask_array
12 |    awkward-arrays
13 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | scanpydoc
2 | sphinx>=1.4
3 | ipykernel
4 | nbsphinx>=0.4
5 | nbclean
6 | 


--------------------------------------------------------------------------------
/{read,write}_dispatched.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "f424385e",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "# Dask + Zarr, but Remote!\n",
  9 |     "\n",
 10 |     "**Author**: Ilan Gold"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "markdown",
 15 |    "id": "1201f1a0",
 16 |    "metadata": {},
 17 |    "source": [
 18 |     "To begin we need to create a dataset on disk to be used with `dask` in the `zarr` format.  We will edit the `chunk_size` argument so that we make fetching expression data for groups of cells more efficient i.e., each access-per-gene over a contiguous group of cells (within the `obs` ordering) will be fast and efficient."
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": 1,
 24 |    "id": "440aa8fd",
 25 |    "metadata": {},
 26 |    "outputs": [],
 27 |    "source": [
 28 |     "import re\n",
 29 |     "\n",
 30 |     "import dask.array as da\n",
 31 |     "import zarr\n",
 32 |     "\n",
 33 |     "from anndata.experimental import read_dispatched, write_dispatched, read_elem\n",
 34 |     "import scanpy as sc"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "code",
 39 |    "execution_count": 2,
 40 |    "id": "9c2fa9e1",
 41 |    "metadata": {},
 42 |    "outputs": [],
 43 |    "source": [
 44 |     "rel_zarr_path = 'data/pbmc3k_processed.zarr'"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": 3,
 50 |    "id": "9e90c05f",
 51 |    "metadata": {},
 52 |    "outputs": [
 53 |     {
 54 |      "data": {
 55 |       "text/plain": [
 56 |        "<zarr.hierarchy.Group '/'>"
 57 |       ]
 58 |      },
 59 |      "execution_count": 3,
 60 |      "metadata": {},
 61 |      "output_type": "execute_result"
 62 |     }
 63 |    ],
 64 |    "source": [
 65 |     "adata = sc.datasets.pbmc3k_processed()\n",
 66 |     "adata.write_zarr(f'./{rel_zarr_path}', chunks=[adata.shape[0], 5])\n",
 67 |     "zarr.consolidate_metadata(f'./{rel_zarr_path}')"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "code",
 72 |    "execution_count": 4,
 73 |    "id": "4537ae1a",
 74 |    "metadata": {},
 75 |    "outputs": [],
 76 |    "source": [
 77 |     "def read_dask(store):\n",
 78 |     "    f = zarr.open(store, mode=\"r\")\n",
 79 |     "\n",
 80 |     "    def callback(func, elem_name: str, elem, iospec):\n",
 81 |     "        if iospec.encoding_type in (\n",
 82 |     "            \"dataframe\",\n",
 83 |     "            \"csr_matrix\",\n",
 84 |     "            \"csc_matrix\",\n",
 85 |     "            \"awkward-array\",\n",
 86 |     "        ):\n",
 87 |     "            # Preventing recursing inside of these types\n",
 88 |     "            return read_elem(elem)\n",
 89 |     "        elif iospec.encoding_type == \"array\":\n",
 90 |     "            return da.from_zarr(elem)\n",
 91 |     "        else:\n",
 92 |     "            return func(elem)\n",
 93 |     "\n",
 94 |     "    adata = read_dispatched(f, callback=callback)\n",
 95 |     "\n",
 96 |     "    return adata"
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "markdown",
101 |    "id": "3c7e8165",
102 |    "metadata": {},
103 |    "source": [
104 |     "Before continuing, go to a shell and run `python3 -m http.server 8080` out of the directory containing this notebook.  This will allow you to observe how different requests are handled by a file server.  After this, run the next cell to load the data via the server, using dask arrays \"over the wire\" - note that this functionality is enabled by `dask`'s deep integration with `zarr`, not `hdf5`!"
105 |    ]
106 |   },
107 |   {
108 |    "cell_type": "code",
109 |    "execution_count": 5,
110 |    "id": "fd9d864b",
111 |    "metadata": {},
112 |    "outputs": [
113 |     {
114 |      "data": {
115 |       "text/html": [
116 |        "<table>\n",
117 |        "    <tr>\n",
118 |        "        <td>\n",
119 |        "            <table style=\"border-collapse: collapse;\">\n",
120 |        "                <thead>\n",
121 |        "                    <tr>\n",
122 |        "                        <td> </td>\n",
123 |        "                        <th> Array </th>\n",
124 |        "                        <th> Chunk </th>\n",
125 |        "                    </tr>\n",
126 |        "                </thead>\n",
127 |        "                <tbody>\n",
128 |        "                    \n",
129 |        "                    <tr>\n",
130 |        "                        <th> Bytes </th>\n",
131 |        "                        <td> 18.50 MiB </td>\n",
132 |        "                        <td> 51.52 kiB </td>\n",
133 |        "                    </tr>\n",
134 |        "                    \n",
135 |        "                    <tr>\n",
136 |        "                        <th> Shape </th>\n",
137 |        "                        <td> (2638, 1838) </td>\n",
138 |        "                        <td> (2638, 5) </td>\n",
139 |        "                    </tr>\n",
140 |        "                    <tr>\n",
141 |        "                        <th> Dask graph </th>\n",
142 |        "                        <td colspan=\"2\"> 368 chunks in 2 graph layers </td>\n",
143 |        "                    </tr>\n",
144 |        "                    <tr>\n",
145 |        "                        <th> Data type </th>\n",
146 |        "                        <td colspan=\"2\"> float32 numpy.ndarray </td>\n",
147 |        "                    </tr>\n",
148 |        "                </tbody>\n",
149 |        "            </table>\n",
150 |        "        </td>\n",
151 |        "        <td>\n",
152 |        "        <svg width=\"133\" height=\"170\" style=\"stroke:rgb(0,0,0);stroke-width:1\" >\n",
153 |        "\n",
154 |        "  <!-- Horizontal lines -->\n",
155 |        "  <line x1=\"0\" y1=\"0\" x2=\"83\" y2=\"0\" style=\"stroke-width:2\" />\n",
156 |        "  <line x1=\"0\" y1=\"120\" x2=\"83\" y2=\"120\" style=\"stroke-width:2\" />\n",
157 |        "\n",
158 |        "  <!-- Vertical lines -->\n",
159 |        "  <line x1=\"0\" y1=\"0\" x2=\"0\" y2=\"120\" style=\"stroke-width:2\" />\n",
160 |        "  <line x1=\"4\" y1=\"0\" x2=\"4\" y2=\"120\" />\n",
161 |        "  <line x1=\"8\" y1=\"0\" x2=\"8\" y2=\"120\" />\n",
162 |        "  <line x1=\"13\" y1=\"0\" x2=\"13\" y2=\"120\" />\n",
163 |        "  <line x1=\"17\" y1=\"0\" x2=\"17\" y2=\"120\" />\n",
164 |        "  <line x1=\"21\" y1=\"0\" x2=\"21\" y2=\"120\" />\n",
165 |        "  <line x1=\"26\" y1=\"0\" x2=\"26\" y2=\"120\" />\n",
166 |        "  <line x1=\"30\" y1=\"0\" x2=\"30\" y2=\"120\" />\n",
167 |        "  <line x1=\"35\" y1=\"0\" x2=\"35\" y2=\"120\" />\n",
168 |        "  <line x1=\"39\" y1=\"0\" x2=\"39\" y2=\"120\" />\n",
169 |        "  <line x1=\"43\" y1=\"0\" x2=\"43\" y2=\"120\" />\n",
170 |        "  <line x1=\"48\" y1=\"0\" x2=\"48\" y2=\"120\" />\n",
171 |        "  <line x1=\"52\" y1=\"0\" x2=\"52\" y2=\"120\" />\n",
172 |        "  <line x1=\"57\" y1=\"0\" x2=\"57\" y2=\"120\" />\n",
173 |        "  <line x1=\"61\" y1=\"0\" x2=\"61\" y2=\"120\" />\n",
174 |        "  <line x1=\"65\" y1=\"0\" x2=\"65\" y2=\"120\" />\n",
175 |        "  <line x1=\"70\" y1=\"0\" x2=\"70\" y2=\"120\" />\n",
176 |        "  <line x1=\"74\" y1=\"0\" x2=\"74\" y2=\"120\" />\n",
177 |        "  <line x1=\"79\" y1=\"0\" x2=\"79\" y2=\"120\" />\n",
178 |        "  <line x1=\"83\" y1=\"0\" x2=\"83\" y2=\"120\" style=\"stroke-width:2\" />\n",
179 |        "\n",
180 |        "  <!-- Colored Rectangle -->\n",
181 |        "  <polygon points=\"0.0,0.0 83.60879454131918,0.0 83.60879454131918,120.0 0.0,120.0\" style=\"fill:#8B4903A0;stroke-width:0\"/>\n",
182 |        "\n",
183 |        "  <!-- Text -->\n",
184 |        "  <text x=\"41.804397\" y=\"140.000000\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" >1838</text>\n",
185 |        "  <text x=\"103.608795\" y=\"60.000000\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" transform=\"rotate(-90,103.608795,60.000000)\">2638</text>\n",
186 |        "</svg>\n",
187 |        "        </td>\n",
188 |        "    </tr>\n",
189 |        "</table>"
190 |       ],
191 |       "text/plain": [
192 |        "dask.array<from-zarr, shape=(2638, 1838), dtype=float32, chunksize=(2638, 5), chunktype=numpy.ndarray>"
193 |       ]
194 |      },
195 |      "execution_count": 5,
196 |      "metadata": {},
197 |      "output_type": "execute_result"
198 |     }
199 |    ],
200 |    "source": [
201 |     "adata_dask = read_dask(f'http://127.0.0.1:8080/{rel_zarr_path}')\n",
202 |     "adata_dask.X"
203 |    ]
204 |   },
205 |   {
206 |    "cell_type": "code",
207 |    "execution_count": 6,
208 |    "id": "5db37304",
209 |    "metadata": {},
210 |    "outputs": [
211 |     {
212 |      "data": {
213 |       "text/html": [
214 |        "<table>\n",
215 |        "    <tr>\n",
216 |        "        <td>\n",
217 |        "            <table style=\"border-collapse: collapse;\">\n",
218 |        "                <thead>\n",
219 |        "                    <tr>\n",
220 |        "                        <td> </td>\n",
221 |        "                        <th> Array </th>\n",
222 |        "                        <th> Chunk </th>\n",
223 |        "                    </tr>\n",
224 |        "                </thead>\n",
225 |        "                <tbody>\n",
226 |        "                    \n",
227 |        "                    <tr>\n",
228 |        "                        <th> Bytes </th>\n",
229 |        "                        <td> 41.22 kiB </td>\n",
230 |        "                        <td> 41.22 kiB </td>\n",
231 |        "                    </tr>\n",
232 |        "                    \n",
233 |        "                    <tr>\n",
234 |        "                        <th> Shape </th>\n",
235 |        "                        <td> (2638, 2) </td>\n",
236 |        "                        <td> (2638, 2) </td>\n",
237 |        "                    </tr>\n",
238 |        "                    <tr>\n",
239 |        "                        <th> Dask graph </th>\n",
240 |        "                        <td colspan=\"2\"> 1 chunks in 2 graph layers </td>\n",
241 |        "                    </tr>\n",
242 |        "                    <tr>\n",
243 |        "                        <th> Data type </th>\n",
244 |        "                        <td colspan=\"2\"> float64 numpy.ndarray </td>\n",
245 |        "                    </tr>\n",
246 |        "                </tbody>\n",
247 |        "            </table>\n",
248 |        "        </td>\n",
249 |        "        <td>\n",
250 |        "        <svg width=\"75\" height=\"170\" style=\"stroke:rgb(0,0,0);stroke-width:1\" >\n",
251 |        "\n",
252 |        "  <!-- Horizontal lines -->\n",
253 |        "  <line x1=\"0\" y1=\"0\" x2=\"25\" y2=\"0\" style=\"stroke-width:2\" />\n",
254 |        "  <line x1=\"0\" y1=\"120\" x2=\"25\" y2=\"120\" style=\"stroke-width:2\" />\n",
255 |        "\n",
256 |        "  <!-- Vertical lines -->\n",
257 |        "  <line x1=\"0\" y1=\"0\" x2=\"0\" y2=\"120\" style=\"stroke-width:2\" />\n",
258 |        "  <line x1=\"25\" y1=\"0\" x2=\"25\" y2=\"120\" style=\"stroke-width:2\" />\n",
259 |        "\n",
260 |        "  <!-- Colored Rectangle -->\n",
261 |        "  <polygon points=\"0.0,0.0 25.412616514582485,0.0 25.412616514582485,120.0 0.0,120.0\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n",
262 |        "\n",
263 |        "  <!-- Text -->\n",
264 |        "  <text x=\"12.706308\" y=\"140.000000\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" >2</text>\n",
265 |        "  <text x=\"45.412617\" y=\"60.000000\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" transform=\"rotate(-90,45.412617,60.000000)\">2638</text>\n",
266 |        "</svg>\n",
267 |        "        </td>\n",
268 |        "    </tr>\n",
269 |        "</table>"
270 |       ],
271 |       "text/plain": [
272 |        "dask.array<from-zarr, shape=(2638, 2), dtype=float64, chunksize=(2638, 2), chunktype=numpy.ndarray>"
273 |       ]
274 |      },
275 |      "execution_count": 6,
276 |      "metadata": {},
277 |      "output_type": "execute_result"
278 |     }
279 |    ],
280 |    "source": [
281 |     "adata_dask.obsm['X_draw_graph_fr']"
282 |    ]
283 |   },
284 |   {
285 |    "cell_type": "markdown",
286 |    "id": "3c19ba55",
287 |    "metadata": {},
288 |    "source": [
289 |     "Now let's make some requests - slicing over the `obs` axis should be efficient."
290 |    ]
291 |   },
292 |   {
293 |    "cell_type": "code",
294 |    "execution_count": 7,
295 |    "id": "d724bd2a",
296 |    "metadata": {},
297 |    "outputs": [
298 |     {
299 |      "data": {
300 |       "text/plain": [
301 |        "array([[-0.4751688 ],\n",
302 |        "       [-0.68339145],\n",
303 |        "       [-0.52097213],\n",
304 |        "       ...,\n",
305 |        "       [-0.40973732],\n",
306 |        "       [-0.35466102],\n",
307 |        "       [-0.42529213]], dtype=float32)"
308 |       ]
309 |      },
310 |      "execution_count": 7,
311 |      "metadata": {},
312 |      "output_type": "execute_result"
313 |     }
314 |    ],
315 |    "source": [
316 |     "adata_dask.X[:, adata.var.index == 'C1orf86'].compute()"
317 |    ]
318 |   },
319 |   {
320 |    "cell_type": "markdown",
321 |    "id": "9ab64b9b",
322 |    "metadata": {},
323 |    "source": [
324 |     "Indeed, you should only have one additional request now, which looks something like this:\n",
325 |     "\n",
326 |     "```\n",
327 |     "::ffff:127.0.0.1 - - [13/Feb/2023 20:00:36] \"GET /data/pbmc3k_processed.zarr/X/0.0 HTTP/1.1\" 200 -\n",
328 |     "```\n",
329 |     "\n",
330 |     "What about over multiple genes? `adata.var['n_cells'] > 1000 == 59` so this should be less than 59 requests (indeed there are)!"
331 |    ]
332 |   },
333 |   {
334 |    "cell_type": "code",
335 |    "execution_count": 8,
336 |    "id": "68348d05",
337 |    "metadata": {},
338 |    "outputs": [
339 |     {
340 |      "data": {
341 |       "text/plain": [
342 |        "array([[ 0.53837276, -0.862139  , -1.1624558 , ...,  0.02576654,\n",
343 |        "        -0.7214901 , -0.86157244],\n",
344 |        "       [-0.39546633, -1.4468503 , -0.23953451, ..., -1.8439665 ,\n",
345 |        "        -0.95835304, -0.04634313],\n",
346 |        "       [ 1.036884  , -0.82907706,  0.13356175, ..., -0.91740227,\n",
347 |        "         1.2407869 , -0.95057184],\n",
348 |        "       ...,\n",
349 |        "       [ 0.9374183 , -0.63782793,  1.4828881 , ..., -0.74470884,\n",
350 |        "         1.4084249 ,  1.8403655 ],\n",
351 |        "       [ 1.4825792 , -0.48758882,  1.2520502 , ..., -0.54854494,\n",
352 |        "        -0.61547786, -0.68133515],\n",
353 |        "       [ 1.2934785 ,  1.2127419 ,  1.2300901 , ..., -0.5996045 ,\n",
354 |        "         1.1535971 , -0.8018701 ]], dtype=float32)"
355 |       ]
356 |      },
357 |      "execution_count": 8,
358 |      "metadata": {},
359 |      "output_type": "execute_result"
360 |     }
361 |    ],
362 |    "source": [
363 |     "adata_dask.X[:, adata.var['n_cells'] > 1000].compute()"
364 |    ]
365 |   },
366 |   {
367 |    "cell_type": "markdown",
368 |    "id": "e4bbb07e",
369 |    "metadata": {},
370 |    "source": [
371 |     "Now what if we chunk differently, larger? There should be fewer requests made to the server, although now each request will be larger - a tradeoff that needs to be tailored to each use-case!"
372 |    ]
373 |   },
374 |   {
375 |    "cell_type": "code",
376 |    "execution_count": 9,
377 |    "id": "8ca0e661",
378 |    "metadata": {},
379 |    "outputs": [
380 |     {
381 |      "data": {
382 |       "text/plain": [
383 |        "array([[ 0.53837276, -0.862139  , -1.1624558 , ...,  0.02576654,\n",
384 |        "        -0.7214901 , -0.86157244],\n",
385 |        "       [-0.39546633, -1.4468503 , -0.23953451, ..., -1.8439665 ,\n",
386 |        "        -0.95835304, -0.04634313],\n",
387 |        "       [ 1.036884  , -0.82907706,  0.13356175, ..., -0.91740227,\n",
388 |        "         1.2407869 , -0.95057184],\n",
389 |        "       ...,\n",
390 |        "       [ 0.9374183 , -0.63782793,  1.4828881 , ..., -0.74470884,\n",
391 |        "         1.4084249 ,  1.8403655 ],\n",
392 |        "       [ 1.4825792 , -0.48758882,  1.2520502 , ..., -0.54854494,\n",
393 |        "        -0.61547786, -0.68133515],\n",
394 |        "       [ 1.2934785 ,  1.2127419 ,  1.2300901 , ..., -0.5996045 ,\n",
395 |        "         1.1535971 , -0.8018701 ]], dtype=float32)"
396 |       ]
397 |      },
398 |      "execution_count": 9,
399 |      "metadata": {},
400 |      "output_type": "execute_result"
401 |     }
402 |    ],
403 |    "source": [
404 |     "adata.write_zarr(f'./{rel_zarr_path}', chunks=[adata.shape[0], 25])\n",
405 |     "zarr.consolidate_metadata(f'./{rel_zarr_path}')\n",
406 |     "adata_dask = read_dask(f'http://127.0.0.1:8080/{rel_zarr_path}')\n",
407 |     "\n",
408 |     "adata_dask.X[:, adata.var['n_cells'] > 1000].compute()"
409 |    ]
410 |   },
411 |   {
412 |    "cell_type": "markdown",
413 |    "id": "a41a0d18",
414 |    "metadata": {},
415 |    "source": [
416 |     "Now what if we had a `layer` that we wanted to chunk in a custom way, e.g. chunked across all cells by gene)?  Just use `write_dispatched` as we did with `read_dispatched`!"
417 |    ]
418 |   },
419 |   {
420 |    "cell_type": "code",
421 |    "execution_count": 10,
422 |    "id": "dc1aa5c5",
423 |    "metadata": {},
424 |    "outputs": [],
425 |    "source": [
426 |     "adata.layers['scaled'] = adata.X.copy()\n",
427 |     "sc.pp.scale(adata, layer='scaled')"
428 |    ]
429 |   },
430 |   {
431 |    "cell_type": "code",
432 |    "execution_count": 11,
433 |    "id": "333300f3",
434 |    "metadata": {},
435 |    "outputs": [
436 |     {
437 |      "data": {
438 |       "text/plain": [
439 |        "<zarr.hierarchy.Group '/'>"
440 |       ]
441 |      },
442 |      "execution_count": 11,
443 |      "metadata": {},
444 |      "output_type": "execute_result"
445 |     }
446 |    ],
447 |    "source": [
448 |     "def write_chunked(func, store, k, elem, dataset_kwargs, iospec):\n",
449 |     "    \"\"\"Write callback that chunks X and layers\"\"\"\n",
450 |     "\n",
451 |     "    def set_chunks(d, chunks=None):\n",
452 |     "        \"\"\"Helper function for setting dataset_kwargs. Makes a copy of d.\"\"\"\n",
453 |     "        d = dict(d)\n",
454 |     "        if chunks is not None:\n",
455 |     "            d[\"chunks\"] = chunks\n",
456 |     "        else:\n",
457 |     "            d.pop(\"chunks\", None)       \n",
458 |     "        return d\n",
459 |     "\n",
460 |     "    if iospec.encoding_type == \"array\":\n",
461 |     "        if 'layers' in k or k.endswith('X'):\n",
462 |     "            dataset_kwargs = set_chunks(dataset_kwargs, (adata.shape[0], 25))\n",
463 |     "        else:\n",
464 |     "            dataset_kwargs = set_chunks(dataset_kwargs, None)\n",
465 |     "\n",
466 |     "    func(store, k, elem, dataset_kwargs=dataset_kwargs)\n",
467 |     "\n",
468 |     "output_zarr_path = \"data/pbmc3k_scaled.zarr\"\n",
469 |     "z = zarr.open_group(output_zarr_path)\n",
470 |     "\n",
471 |     "write_dispatched(z, \"/\", adata, callback=write_chunked)\n",
472 |     "zarr.consolidate_metadata(f'./{rel_zarr_path}')"
473 |    ]
474 |   },
475 |   {
476 |    "cell_type": "code",
477 |    "execution_count": 12,
478 |    "id": "4e182800",
479 |    "metadata": {},
480 |    "outputs": [],
481 |    "source": [
482 |     "adata_dask = read_dask(f'http://127.0.0.1:8080/{output_zarr_path}')"
483 |    ]
484 |   },
485 |   {
486 |    "cell_type": "code",
487 |    "execution_count": 13,
488 |    "id": "530eae20",
489 |    "metadata": {},
490 |    "outputs": [
491 |     {
492 |      "data": {
493 |       "text/html": [
494 |        "<table>\n",
495 |        "    <tr>\n",
496 |        "        <td>\n",
497 |        "            <table style=\"border-collapse: collapse;\">\n",
498 |        "                <thead>\n",
499 |        "                    <tr>\n",
500 |        "                        <td> </td>\n",
501 |        "                        <th> Array </th>\n",
502 |        "                        <th> Chunk </th>\n",
503 |        "                    </tr>\n",
504 |        "                </thead>\n",
505 |        "                <tbody>\n",
506 |        "                    \n",
507 |        "                    <tr>\n",
508 |        "                        <th> Bytes </th>\n",
509 |        "                        <td> 18.50 MiB </td>\n",
510 |        "                        <td> 257.62 kiB </td>\n",
511 |        "                    </tr>\n",
512 |        "                    \n",
513 |        "                    <tr>\n",
514 |        "                        <th> Shape </th>\n",
515 |        "                        <td> (2638, 1838) </td>\n",
516 |        "                        <td> (2638, 25) </td>\n",
517 |        "                    </tr>\n",
518 |        "                    <tr>\n",
519 |        "                        <th> Dask graph </th>\n",
520 |        "                        <td colspan=\"2\"> 74 chunks in 2 graph layers </td>\n",
521 |        "                    </tr>\n",
522 |        "                    <tr>\n",
523 |        "                        <th> Data type </th>\n",
524 |        "                        <td colspan=\"2\"> float32 numpy.ndarray </td>\n",
525 |        "                    </tr>\n",
526 |        "                </tbody>\n",
527 |        "            </table>\n",
528 |        "        </td>\n",
529 |        "        <td>\n",
530 |        "        <svg width=\"133\" height=\"170\" style=\"stroke:rgb(0,0,0);stroke-width:1\" >\n",
531 |        "\n",
532 |        "  <!-- Horizontal lines -->\n",
533 |        "  <line x1=\"0\" y1=\"0\" x2=\"83\" y2=\"0\" style=\"stroke-width:2\" />\n",
534 |        "  <line x1=\"0\" y1=\"120\" x2=\"83\" y2=\"120\" style=\"stroke-width:2\" />\n",
535 |        "\n",
536 |        "  <!-- Vertical lines -->\n",
537 |        "  <line x1=\"0\" y1=\"0\" x2=\"0\" y2=\"120\" style=\"stroke-width:2\" />\n",
538 |        "  <line x1=\"3\" y1=\"0\" x2=\"3\" y2=\"120\" />\n",
539 |        "  <line x1=\"7\" y1=\"0\" x2=\"7\" y2=\"120\" />\n",
540 |        "  <line x1=\"12\" y1=\"0\" x2=\"12\" y2=\"120\" />\n",
541 |        "  <line x1=\"17\" y1=\"0\" x2=\"17\" y2=\"120\" />\n",
542 |        "  <line x1=\"21\" y1=\"0\" x2=\"21\" y2=\"120\" />\n",
543 |        "  <line x1=\"26\" y1=\"0\" x2=\"26\" y2=\"120\" />\n",
544 |        "  <line x1=\"30\" y1=\"0\" x2=\"30\" y2=\"120\" />\n",
545 |        "  <line x1=\"35\" y1=\"0\" x2=\"35\" y2=\"120\" />\n",
546 |        "  <line x1=\"39\" y1=\"0\" x2=\"39\" y2=\"120\" />\n",
547 |        "  <line x1=\"43\" y1=\"0\" x2=\"43\" y2=\"120\" />\n",
548 |        "  <line x1=\"47\" y1=\"0\" x2=\"47\" y2=\"120\" />\n",
549 |        "  <line x1=\"52\" y1=\"0\" x2=\"52\" y2=\"120\" />\n",
550 |        "  <line x1=\"56\" y1=\"0\" x2=\"56\" y2=\"120\" />\n",
551 |        "  <line x1=\"61\" y1=\"0\" x2=\"61\" y2=\"120\" />\n",
552 |        "  <line x1=\"65\" y1=\"0\" x2=\"65\" y2=\"120\" />\n",
553 |        "  <line x1=\"70\" y1=\"0\" x2=\"70\" y2=\"120\" />\n",
554 |        "  <line x1=\"75\" y1=\"0\" x2=\"75\" y2=\"120\" />\n",
555 |        "  <line x1=\"79\" y1=\"0\" x2=\"79\" y2=\"120\" />\n",
556 |        "  <line x1=\"83\" y1=\"0\" x2=\"83\" y2=\"120\" style=\"stroke-width:2\" />\n",
557 |        "\n",
558 |        "  <!-- Colored Rectangle -->\n",
559 |        "  <polygon points=\"0.0,0.0 83.60879454131918,0.0 83.60879454131918,120.0 0.0,120.0\" style=\"fill:#8B4903A0;stroke-width:0\"/>\n",
560 |        "\n",
561 |        "  <!-- Text -->\n",
562 |        "  <text x=\"41.804397\" y=\"140.000000\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" >1838</text>\n",
563 |        "  <text x=\"103.608795\" y=\"60.000000\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" transform=\"rotate(-90,103.608795,60.000000)\">2638</text>\n",
564 |        "</svg>\n",
565 |        "        </td>\n",
566 |        "    </tr>\n",
567 |        "</table>"
568 |       ],
569 |       "text/plain": [
570 |        "dask.array<from-zarr, shape=(2638, 1838), dtype=float32, chunksize=(2638, 25), chunktype=numpy.ndarray>"
571 |       ]
572 |      },
573 |      "execution_count": 13,
574 |      "metadata": {},
575 |      "output_type": "execute_result"
576 |     }
577 |    ],
578 |    "source": [
579 |     "adata_dask.layers['scaled']"
580 |    ]
581 |   }
582 |  ],
583 |  "metadata": {
584 |   "kernelspec": {
585 |    "display_name": "Python3",
586 |    "language": "python",
587 |    "name": "python3"
588 |   },
589 |   "language_info": {
590 |    "codemirror_mode": {
591 |     "name": "ipython",
592 |     "version": 3
593 |    },
594 |    "file_extension": ".py",
595 |    "mimetype": "text/x-python",
596 |    "name": "python",
597 |    "nbconvert_exporter": "python",
598 |    "pygments_lexer": "ipython3",
599 |    "version": "3.9.12"
600 |   }
601 |  },
602 |  "nbformat": 4,
603 |  "nbformat_minor": 5
604 | }
605 | 


--------------------------------------------------------------------------------