├── .editorconfig
├── .gitattributes
├── .github
└── workflows
│ └── ci.yml
├── .gitignore
├── CHANGELOG.md
├── CITATION.cff
├── LICENSE
├── README.md
├── ismb-biovis-2023-poster.jpg
├── notebooks
├── abundance-analysis.ipynb
├── getting-started.ipynb
└── lui-2021.ipynb
├── pyproject.toml
├── src
└── cev
│ ├── __init__.py
│ ├── _cli.py
│ ├── _compare.py
│ ├── _compare_metric_dropdown.py
│ ├── _compare_selection_type_dropdown.py
│ ├── _compare_zoom_toggle.py
│ ├── _embedding.py
│ ├── _embedding_comparison_widget.py
│ ├── _embedding_widget.py
│ ├── _version.py
│ ├── _widget_utils.py
│ ├── components
│ ├── __init__.py
│ ├── _html_widget.py
│ ├── _marker_composition_logo.py
│ ├── _marker_selection_indicator.py
│ └── _width_optimizer.py
│ ├── metrics.py
│ └── widgets.py
├── tests
├── test_cev.py
└── test_widget_utils.py
└── uv.lock
/.editorconfig:
--------------------------------------------------------------------------------
1 | root = true
2 |
3 | [*]
4 | end_of_line = lf
5 | insert_final_newline = true
6 |
7 | [*.{js,py}]
8 | charset = utf-8
9 |
10 | [Snakefile]
11 | indent_style = space
12 | indent_size = 4
13 |
14 | [*.py]
15 | indent_style = space
16 | indent_size = 4
17 |
18 | [*.js]
19 | indent_style = tabs
20 | indent_size = 4
21 |
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | *.ipynb linguist-vendored
2 |
--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
1 | name: CI
2 |
3 | on:
4 | push:
5 | branches:
6 | - main
7 | tags:
8 | - "v*"
9 | pull_request:
10 | workflow_dispatch:
11 |
12 | jobs:
13 |
14 | Lint:
15 | runs-on: ubuntu-latest
16 | steps:
17 | - uses: actions/checkout@v3
18 | - uses: astral-sh/setup-uv@v3
19 | with:
20 | version: "0.5.x"
21 | - run: |
22 | uv run ruff check
23 | uv run ruff format
24 |
25 | Test:
26 | runs-on: ubuntu-latest
27 | strategy:
28 | matrix:
29 | python-version:
30 | - "3.8"
31 | - "3.9"
32 | - "3.10"
33 | - "3.11"
34 | steps:
35 | - uses: actions/checkout@v3
36 | - run: echo "${{ matrix.python-version }}" > .python-version
37 | - uses: actions/setup-python@v5
38 | with:
39 | python-version-file: ".python-version"
40 | - uses: astral-sh/setup-uv@v3
41 | with:
42 | version: "0.5.x"
43 | - run: uv run pytest --color=yes
44 |
45 | Release:
46 | if: startsWith(github.ref, 'refs/tags/')
47 | needs: [Lint, Test]
48 | runs-on: ubuntu-latest
49 | steps:
50 | - uses: actions/checkout@v3
51 |
52 | - uses: astral-sh/setup-uv@v3
53 | with:
54 | version: "0.5.x"
55 |
56 | - run: |
57 | uv build
58 | uvx twine check dist/*
59 | ls -lh dist
60 |
61 | - name: Publish to PyPI
62 | run: uvx twine upload dist/*
63 | env:
64 | TWINE_USERNAME: __token__
65 | TWINE_PASSWORD: ${{ secrets.TWINE_API_KEY }}
66 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/
2 | node_modules/
3 | data/
4 | .snakemake/
5 | .ipynb_checkpoints/
6 | *.egg-info/
7 | .vite
8 | dist/
9 | mair/
10 | .DS_Store
11 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # 0.2.1
2 |
3 | - Fix: don't contribute self in the neighborhood metric ([#42](https://github.com/OzetteTech/comparative-embedding-visualization/pull/42))
4 | - Fix: neighborhood legend labels
5 |
6 | # 0.2.0
7 |
8 | - Feat: update Jupyter Scatter and activate tooltips
9 |
10 | # 0.1.1
11 |
12 | - Fix: allow customizing embeddings via `EmbeddingComparisonWidget()`
13 |
14 | # 0.1.0
15 |
16 | - First release
17 |
--------------------------------------------------------------------------------
/CITATION.cff:
--------------------------------------------------------------------------------
1 | cff-version: "1.2.0"
2 | title: "A General Framework for Comparing Embedding Visualizations Across Class-Label Hierarchies"
3 | authors:
4 | - given-names: "Trevor"
5 | family-names: "Manz"
6 | orcid: "https://orcid.org/0000-0001-7694-5164"
7 | affiliation: "Harvard Medical School"
8 | - given-names: "Fritz"
9 | family-names: "Lekschas"
10 | orcid: "https://orcid.org/0000-0001-8432-4835"
11 | affiliation: "Ozette Technologies"
12 | - given-names: "Evan"
13 | family-names: "Greene"
14 | affiliation: "Ozette Technologies"
15 | - given-names: "Greg"
16 | family-names: "Finak"
17 | orcid: "https://orcid.org/0000-0003-4341-9090"
18 | affiliation: "Ozette Technologies"
19 | - given-names: "Nils"
20 | family-names: "Gehlenborg"
21 | affiliation: "Harvard Medical School"
22 | orcid: "https://orcid.org/0000-0003-0327-8297"
23 | url: "https://github.com/OzetteTech/comparative-embedding-visualization"
24 | message: If you use this software, please cite our article in the
25 | IEEE Transactions on Visualization and Computer Graphics.
26 | preferred-citation:
27 | type: article
28 | title: "A General Framework for Comparing Embedding Visualizations Across Class-Label Hierarchies"
29 | authors:
30 | - given-names: "Trevor"
31 | family-names: "Manz"
32 | orcid: "https://orcid.org/0000-0001-7694-5164"
33 | affiliation: "Harvard Medical School"
34 | - given-names: "Fritz"
35 | family-names: "Lekschas"
36 | orcid: "https://orcid.org/0000-0001-8432-4835"
37 | affiliation: "Ozette Technologies"
38 | - given-names: "Evan"
39 | family-names: "Greene"
40 | affiliation: "Ozette Technologies"
41 | - given-names: "Greg"
42 | family-names: "Finak"
43 | orcid: "https://orcid.org/0000-0003-4341-9090"
44 | affiliation: "Ozette Technologies"
45 | - given-names: "Nils"
46 | family-names: "Gehlenborg"
47 | affiliation: "Harvard Medical School"
48 | orcid: "https://orcid.org/0000-0003-0327-8297"
49 | date-published: 2024-09-10
50 | doi: "10.1109/TVCG.2024.3456370"
51 | journal: "Transactions on Visualization and Computer Graphics"
52 | publisher:
53 | name: IEEE
54 | url: "https://ieeexplore.ieee.org/document/10672535"
55 | month: 9
56 | year: 2024
57 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright 2022 Ozette Technologies
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 | Comparative Embedding Visualization with cev
3 |
4 |
5 |
6 |
7 |
8 | [](https://ozette.com/)
9 | [](https://pypi.org/project/cev/)
10 | [](https://github.com/OzetteTech/comparative-embedding-visualization/actions?query=workflow%3ARelease)
11 | [](notebooks)
12 | [](ismb-biovis-2023-poster.jpg)
13 |
14 |
15 |
16 |
17 |
18 | cev
is an interactive Jupyter widget for comparing a pair of 2D embeddings with shared labels.
Its novel metric allows to surface differences in label confusion, neighborhood composition, and label size.
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 | 
27 |
28 | The figure shows data from [Mair et al. (2022)](https://doi.org/10.1038/s41586-022-04718-w) that were analyzed with [Greene et al.'s (2021) FAUST method](https://doi.org/10.1016/j.patter.2021.100372).
The embeddings were generated with [Greene et al.'s (2021) annotation transformation](https://github.com/flekschas-ozette/ismb-biovis-2022) and [UMAP](https://github.com/lmcinnes/umap).
29 |
30 |
31 |
32 | `cev` is implemented with [anywidget](https://anywidget.dev) and builds upon [jupyter-scatter](https://github.com/flekschas/jupyter-scatter/).
33 |
34 |
35 |
36 | ## Quick Start
37 |
38 | The **cev** package has a cli to quickly try out a demo of comparison widget in JupyterLab. It requires [uv](https://astral.sh/uv) to be installed.
39 |
40 | ```sh
41 | uvx --python 3.11 cev demo # Downloads datasets and launches Jupyter Lab
42 | ```
43 |
44 | ## Installation
45 |
46 | > **Warning**: `cev` is new and under active development. It is not yet ready for production and APIs are subject to change.
47 |
48 | ```sh
49 | pip install cev
50 | ```
51 |
52 | ## Getting Started
53 |
54 | ```py
55 | import pandas as pd
56 | from cev.widgets import Embedding, EmbeddingComparisonWidget
57 |
58 | umap_embedding = Embedding.from_ozette(df=pd.read_parquet("../data/mair-2022-tissue-138-umap.pq"))
59 | ozette_embedding = Embedding.from_ozette(df=pd.read_parquet("../data/mair-2022-tissue-138-ozette.pq"))
60 |
61 | umap_vs_ozette = EmbeddingComparisonWidget(
62 | umap_embedding,
63 | ozette_embedding,
64 | titles=["Standard UMAP", "Annotation-Transformed UMAP"],
65 | metric="confusion",
66 | selection="synced",
67 | auto_zoom=True,
68 | row_height=320,
69 | )
70 | umap_vs_ozette
71 | ```
72 |
73 |
74 |
75 |
76 | See [notebooks/getting-started.ipynb](notebooks/getting-started.ipynb) for the complete example.
77 |
78 | ## Development
79 |
80 | We use [`uv`](https://astral.sh/uv) for development.
81 |
82 | ```sh
83 | uv run jupyter lab
84 | ```
85 |
86 | ### Commands Cheatsheet
87 |
88 | | Command | Action |
89 | | :--------------------- | :------------------------------------------------------------------ |
90 | | `uv run ruff format` | Format the source code. |
91 | | `uv run ruff check` | Check the source code for formatting issues. |
92 | | `uv run pytest` | Run unit tests with `pytest` in base environment. |
93 |
94 |
95 | ## Release
96 |
97 | releases are triggered via tagged commits
98 |
99 | ```
100 | git tag -a vX.X.X -m "vX.X.X"
101 | git push --follow-tags
102 | ```
103 |
104 | ## License
105 |
106 | `cev` is distributed under the terms of the [Apache License 2.0](LICENSE).
107 |
108 | ## Citation
109 |
110 | If you use `cev` in your research, please cite the following preprint:
111 |
112 | ```bibtex
113 | @article{manz2024general,
114 | title = {A General Framework for Comparing Embedding Visualizations Across Class-Label Hierarchies},
115 | author = {Trevor Manz and Fritz Lekschas and Evan Greene and Greg Finak and Nils Gehlenborg},
116 | url = {https://doi.org/10.1109/TVCG.2024.3456370},
117 | doi = {10.1109/TVCG.2024.3456370},
118 | journal = {IEEE Transactions on Visualization and Computer Graphics},
119 | series = {VIS ’24},
120 | publisher = {IEEE},
121 | year = {2024},
122 | month = {9},
123 | pages = {1-11}
124 | }
125 | ```
126 |
--------------------------------------------------------------------------------
/ismb-biovis-2023-poster.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OzetteTech/comparative-embedding-visualization/844676b58725b1fc54407ac615013e1208a4e572/ismb-biovis-2023-poster.jpg
--------------------------------------------------------------------------------
/notebooks/abundance-analysis.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "id": "a6801dc8-ef42-45c9-b443-6498c63d7396",
6 | "metadata": {},
7 | "source": [
8 | "# Ozette Abundance Metric Examples\n",
9 | "\n",
10 | "In this Notebook we're going to use the _Abundance_ metric on three Ozette-embedded studies to find differentially-abundant phenotypes."
11 | ]
12 | },
13 | {
14 | "cell_type": "code",
15 | "execution_count": null,
16 | "id": "52eb3296-0cbc-4ead-b386-cc23ce16345d",
17 | "metadata": {
18 | "tags": []
19 | },
20 | "outputs": [],
21 | "source": [
22 | "%load_ext autoreload\n",
23 | "%autoreload 2"
24 | ]
25 | },
26 | {
27 | "cell_type": "code",
28 | "execution_count": null,
29 | "id": "f923c828-da89-44fa-a128-963b7be0efab",
30 | "metadata": {
31 | "tags": []
32 | },
33 | "outputs": [],
34 | "source": [
35 | "import pathlib\n",
36 | "\n",
37 | "import pandas as pd\n",
38 | "\n",
39 | "from cev.widgets import Embedding, EmbeddingComparisonWidget\n",
40 | "\n",
41 | "\n",
42 | "def get_embedding(folder: str, sample: str):\n",
43 | " return Embedding.from_ozette(\n",
44 | " df=pd.read_parquet(\n",
45 | " pathlib.Path.cwd() / \"..\" / \"data\" / f\"{folder}\" / f\"{sample}.parquet\"\n",
46 | " )\n",
47 | " )"
48 | ]
49 | },
50 | {
51 | "cell_type": "markdown",
52 | "id": "00e6670f-5831-46b9-b713-5b0035594d43",
53 | "metadata": {},
54 | "source": [
55 | "# Melanoma Study\n",
56 | "\n",
57 | "### Distinct predictive biomarker candidates for response to anti-CTLA-4 and anti-PD-1 immunotherapy in melanoma patients\n",
58 | "\n",
59 | "Subrahmanyam et al., 2018. https://pubmed.ncbi.nlm.nih.gov/29510697/\n",
60 | "\n",
61 | "In this example we're going to compare phenotypes between a pair of unstimulated Pembrolizumab responder and non-responder samples."
62 | ]
63 | },
64 | {
65 | "cell_type": "code",
66 | "execution_count": null,
67 | "id": "9eb8172a-3e2c-410d-9c6e-14a905e87498",
68 | "metadata": {
69 | "tags": []
70 | },
71 | "outputs": [],
72 | "source": [
73 | "non_responder_embedding = get_embedding(\"subrahmanyam-2018\", \"OZEXPSMPL_782\")\n",
74 | "responder_embedding = get_embedding(\"subrahmanyam-2018\", \"OZEXPSMPL_804\")\n",
75 | "\n",
76 | "melanoma_comparison = EmbeddingComparisonWidget(\n",
77 | " non_responder_embedding,\n",
78 | " responder_embedding,\n",
79 | " titles=[\"Non-Responder\", \"Responder\"],\n",
80 | " metric=\"abundance\",\n",
81 | " selection=\"phenotype\",\n",
82 | " auto_zoom=True,\n",
83 | " row_height=360,\n",
84 | ")\n",
85 | "melanoma_comparison"
86 | ]
87 | },
88 | {
89 | "cell_type": "markdown",
90 | "id": "c05e7375-07eb-4096-8bd5-9907c8d1248a",
91 | "metadata": {},
92 | "source": [
93 | "**Phenotype 1:** should be more abundant in `responder` (right) compared to `non-responder` (left)"
94 | ]
95 | },
96 | {
97 | "cell_type": "code",
98 | "execution_count": null,
99 | "id": "fc8158f5-0cbc-4731-8d6f-e49968f76556",
100 | "metadata": {
101 | "tags": []
102 | },
103 | "outputs": [],
104 | "source": [
105 | "melanoma_comparison.select(\n",
106 | " \"CD8-GranzymeB-CD27+CD3+CD28+CD19-CD57-CD127+CD33-CD45RA-CD4+CD14-HLADR-CD20-CCR7+CD56-IL2-CD16-TNFa-MIP1b-CD154+GMCSF-PDL1-CD107a-IL17-Perforin-CD69+CTLA4-PDL2-PD1-TCRgd-IFNg-CD38-CD25-IL10-IL4-\"\n",
107 | ")"
108 | ]
109 | },
110 | {
111 | "cell_type": "markdown",
112 | "id": "356a7f7c-c2f8-4b74-8299-245f4b7d7b57",
113 | "metadata": {},
114 | "source": [
115 | "**Phenotype 2:** should be more abundant in `responder` (right) compared to `non-responder` (left)"
116 | ]
117 | },
118 | {
119 | "cell_type": "code",
120 | "execution_count": null,
121 | "id": "ae44dea4-3b21-45ec-b184-1b003ce626f0",
122 | "metadata": {
123 | "tags": []
124 | },
125 | "outputs": [],
126 | "source": [
127 | "melanoma_comparison.select(\n",
128 | " \"CD8-GranzymeB+CD27-CD3-CD28-CD19-CD57+CD127-CD33-CD45RA+CD4-CD14-HLADR-CD20-CCR7-CD56+IL2-CD16+TNFa-MIP1b+CD154-GMCSF-PDL1-CD107a-IL17-Perforin+CD69+CTLA4-PDL2+PD1-TCRgd-IFNg-CD38+CD25-IL10-IL4-\"\n",
129 | ")"
130 | ]
131 | },
132 | {
133 | "cell_type": "markdown",
134 | "id": "9f63edea-a9c6-4288-8fe1-6d0bfeed937a",
135 | "metadata": {},
136 | "source": [
137 | "**Phenotype 3:** should be more abundant in `responder` (right) compared to `non-responder` (left)"
138 | ]
139 | },
140 | {
141 | "cell_type": "code",
142 | "execution_count": null,
143 | "id": "c2f944d8-fa87-47ec-97af-af7a775b696c",
144 | "metadata": {
145 | "tags": []
146 | },
147 | "outputs": [],
148 | "source": [
149 | "melanoma_comparison.select(\n",
150 | " \"CD8-GranzymeB+CD27-CD3-CD28-CD19-CD57+CD127-CD33-CD45RA+CD4-CD14-HLADR-CD20-CCR7-CD56+IL2-CD16+TNFa-MIP1b+CD154-GMCSF-PDL1-CD107a-IL17-Perforin+CD69-CTLA4-PDL2+PD1-TCRgd-IFNg-CD38+CD25-IL10-IL4-\"\n",
151 | ")"
152 | ]
153 | },
154 | {
155 | "cell_type": "markdown",
156 | "id": "4e3b6b2b-e1ce-48d8-8449-5c045e1b274b",
157 | "metadata": {
158 | "tags": []
159 | },
160 | "source": [
161 | "**Phenotype 4:** should be more abundant in `responder` (right) compared to `non-responder` (left)"
162 | ]
163 | },
164 | {
165 | "cell_type": "code",
166 | "execution_count": null,
167 | "id": "c8c0e834-205d-47ac-965f-a3953bfb611c",
168 | "metadata": {
169 | "tags": []
170 | },
171 | "outputs": [],
172 | "source": [
173 | "melanoma_comparison.select(\n",
174 | " \"CD8-GranzymeB+CD27-CD3-CD28-CD19-CD57+CD127-CD33-CD45RA+CD4-CD14-HLADR-CD20-CCR7-CD56+IL2-CD16+TNFa-MIP1b-CD154-GMCSF-PDL1-CD107a-IL17-Perforin+CD69-CTLA4-PDL2-PD1-TCRgd-IFNg-CD38+CD25-IL10-IL4-\"\n",
175 | ")"
176 | ]
177 | },
178 | {
179 | "cell_type": "markdown",
180 | "id": "a494fdc5-6c47-4795-b652-84c8a4d4736a",
181 | "metadata": {
182 | "tags": []
183 | },
184 | "source": [
185 | "# Cancer Study\n",
186 | "\n",
187 | "### Extricating human tumour immune alterations from tissue inflammation\n",
188 | "\n",
189 | "Mair et al., 2022. https://www.nature.com/articles/s41586-022-04718-w\n",
190 | "\n",
191 | "In this example we're going to compare phenotypes between a pair of tumor and tissue samples."
192 | ]
193 | },
194 | {
195 | "cell_type": "code",
196 | "execution_count": null,
197 | "id": "08c90f9d-ef19-4f33-a84d-9ff36d3b1bc4",
198 | "metadata": {
199 | "tags": []
200 | },
201 | "outputs": [],
202 | "source": [
203 | "tissue_embedding = get_embedding(\"mair-2022\", \"OZEXPSMPL_26155\")\n",
204 | "tumor_embedding = get_embedding(\"mair-2022\", \"OZEXPSMPL_26146\")\n",
205 | "\n",
206 | "cancer_comparison = EmbeddingComparisonWidget(\n",
207 | " tissue_embedding,\n",
208 | " tumor_embedding,\n",
209 | " titles=[\"Tissue (Mucosa)\", \"Tumor\"],\n",
210 | " metric=\"abundance\",\n",
211 | " selection=\"phenotype\",\n",
212 | " auto_zoom=True,\n",
213 | " row_height=360,\n",
214 | ")\n",
215 | "cancer_comparison"
216 | ]
217 | },
218 | {
219 | "cell_type": "markdown",
220 | "id": "8aa9b0c4-9c58-47bb-a634-46da604f2d40",
221 | "metadata": {},
222 | "source": [
223 | "**CD8 T-Cell Phenotype** should be more abundant in `tissue` (left) compared to `tumor` (right)"
224 | ]
225 | },
226 | {
227 | "cell_type": "code",
228 | "execution_count": null,
229 | "id": "928d70df-0a2e-476d-b35d-984105a399b8",
230 | "metadata": {},
231 | "outputs": [],
232 | "source": [
233 | "cancer_comparison.select(\n",
234 | " \"CD4-CD8+CD3+CD45RA+CD27+CD19-CD103-CD28-CD69+PD1+HLADR-GranzymeB-CD25-ICOS-TCRgd-CD38-CD127-Tim3-\"\n",
235 | ")"
236 | ]
237 | },
238 | {
239 | "cell_type": "markdown",
240 | "id": "163211c0-b82a-4b9b-84b7-fc6a305290d2",
241 | "metadata": {},
242 | "source": [
243 | "**CD4 T-Cell Phenotype** should be more abundant in `tumor` (right) compared to `tissue` (left)"
244 | ]
245 | },
246 | {
247 | "cell_type": "code",
248 | "execution_count": null,
249 | "id": "ec3f14c6-8daa-41c9-87e5-6d5cca6f3d53",
250 | "metadata": {
251 | "tags": []
252 | },
253 | "outputs": [],
254 | "source": [
255 | "cancer_comparison.select(\n",
256 | " \"CD4+CD8-CD3+CD45RA-CD27+CD19-CD103-CD28+CD69+PD1+HLADR-GranzymeB-CD25+ICOS+TCRgd-CD38-CD127-Tim3+\"\n",
257 | ")"
258 | ]
259 | },
260 | {
261 | "cell_type": "markdown",
262 | "id": "40ae8012-f44d-4720-b714-b4766c6da98c",
263 | "metadata": {
264 | "tags": []
265 | },
266 | "source": [
267 | "# ICS Study\n",
268 | "\n",
269 | "### IFN-γ-independent immune markers of Mycobacterium tuberculosis exposure\n",
270 | "\n",
271 | "Lu et al., 2019. https://www.nature.com/articles/s41591-019-0441-3\n",
272 | "\n",
273 | "In this example we're going to compare phenotypes between a pair of disease (LTBI) and resister (RSTR) samples."
274 | ]
275 | },
276 | {
277 | "cell_type": "code",
278 | "execution_count": null,
279 | "id": "6c762a63-242d-44cd-9e4a-329009a4bc3b",
280 | "metadata": {
281 | "tags": []
282 | },
283 | "outputs": [],
284 | "source": [
285 | "diseased_embedding = get_embedding(\"lu-2019\", \"OZEXPSMPL_2105\")\n",
286 | "resister_embedding = get_embedding(\"lu-2019\", \"OZEXPSMPL_2136\")\n",
287 | "\n",
288 | "comparison = EmbeddingComparisonWidget(\n",
289 | " diseased_embedding,\n",
290 | " resister_embedding,\n",
291 | " titles=[\"Diseased (LTBI)\", \"Resister (RSTR)\"],\n",
292 | " metric=\"abundance\",\n",
293 | " selection=\"phenotype\",\n",
294 | " auto_zoom=True,\n",
295 | " row_height=360,\n",
296 | ")\n",
297 | "comparison"
298 | ]
299 | },
300 | {
301 | "cell_type": "markdown",
302 | "id": "9241a79e-e1b6-454d-be1e-d476019ba3b8",
303 | "metadata": {},
304 | "source": [
305 | "**Phenotype 5 from [Fig 3c](https://www.nature.com/articles/s41591-019-0441-3/figures/3)** should be more abundant in `diseased` (Left) compared to `resister` (right)"
306 | ]
307 | },
308 | {
309 | "cell_type": "code",
310 | "execution_count": null,
311 | "id": "c72ed082-22e3-41ce-a1bd-8f16c77eed7d",
312 | "metadata": {
313 | "tags": []
314 | },
315 | "outputs": [],
316 | "source": [
317 | "comparison.select(\"CD4+CD3+CD8-TNF+CD107a-IL4-IFNg+IL2+CD154+IL17a-\")"
318 | ]
319 | },
320 | {
321 | "cell_type": "code",
322 | "execution_count": null,
323 | "id": "d52f8667-fe9d-4dc2-b287-b132ce8b7877",
324 | "metadata": {},
325 | "outputs": [],
326 | "source": []
327 | }
328 | ],
329 | "metadata": {
330 | "kernelspec": {
331 | "display_name": "Python 3 (ipykernel)",
332 | "language": "python",
333 | "name": "python3"
334 | },
335 | "language_info": {
336 | "codemirror_mode": {
337 | "name": "ipython",
338 | "version": 3
339 | },
340 | "file_extension": ".py",
341 | "mimetype": "text/x-python",
342 | "name": "python",
343 | "nbconvert_exporter": "python",
344 | "pygments_lexer": "ipython3",
345 | "version": "3.10.11"
346 | }
347 | },
348 | "nbformat": 4,
349 | "nbformat_minor": 5
350 | }
351 |
--------------------------------------------------------------------------------
/notebooks/getting-started.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "id": "8efc6d60-f207-4e54-92b0-a6070b0158b4",
6 | "metadata": {},
7 | "source": [
8 | "# Getting Started\n",
9 | "\n",
10 | "In this notebook we're going to demonstrate how to use `cev` to compare (a) two _different_ embeddings of the same data and (b) two aligned embeddings of _different_ data.\n",
11 | "\n",
12 | "The embeddings we're exploring in this notebook represent single-cell surface proteomic data. In other words, each data point represents a individual cell whose surface protein expression was measured. The cells were then clustered into cellular phenotypes based on their protein expression."
13 | ]
14 | },
15 | {
16 | "cell_type": "code",
17 | "execution_count": null,
18 | "id": "47c31bea-24b3-4d16-a69a-a3ad3a746234",
19 | "metadata": {
20 | "tags": []
21 | },
22 | "outputs": [],
23 | "source": [
24 | "import pandas as pd\n",
25 | "\n",
26 | "from cev.widgets import Embedding, EmbeddingComparisonWidget"
27 | ]
28 | },
29 | {
30 | "cell_type": "markdown",
31 | "id": "dea71d70-e467-49af-9165-6e278f953977",
32 | "metadata": {},
33 | "source": [
34 | "The notebook requires downloading the three embeddings from data of from [Mair et al., 2022](https://www.nature.com/articles/s41586-022-04718-w):\n",
35 | "- Tissue sample 138 (32 MB) embedded with [UMAP](https://umap-learn.readthedocs.io/en/latest/)\n",
36 | "- Tissue sample 138 (32 MB) embedded with [UMAP](https://umap-learn.readthedocs.io/en/latest/) after being transformd with [Ozette's Annotation Transformation](https://github.com/flekschas-ozette/ismb-biovis-2022)\n",
37 | "- Tumor sample 6 (82 MB) embedded with [UMAP](https://umap-learn.readthedocs.io/en/latest/) after being transformd with [Ozette's Annotation Transformation](https://github.com/flekschas-ozette/ismb-biovis-2022)\n",
38 | "\n",
39 | "All three embeddings are annotated with [Ozette's FAUST method](https://doi.org/10.1016/j.patter.2021.100372)."
40 | ]
41 | },
42 | {
43 | "cell_type": "code",
44 | "execution_count": null,
45 | "id": "dbf802bc-f709-4163-9b49-8fa5f6ce59ab",
46 | "metadata": {
47 | "tags": []
48 | },
49 | "outputs": [],
50 | "source": [
51 | "# download the data\n",
52 | "!curl -sL https://figshare.com/ndownloader/articles/23063615/versions/1 -o data.zip\n",
53 | "!unzip data.zip -d data"
54 | ]
55 | },
56 | {
57 | "cell_type": "markdown",
58 | "id": "e62390d2-1242-49a8-9780-be976d39fa42",
59 | "metadata": {
60 | "tags": []
61 | },
62 | "source": [
63 | "## Comparing Two Embeddings of the same Data\n",
64 | "\n",
65 | "In the first example, we are going to use `cev` to compare two different embeddings methods that were run on the very same data (the tissue sample): standard UMAP and annotation transformation UMAP.\n",
66 | "\n",
67 | "Different embedding methods can produce very different embedding spaces and it's often hard to assess the difference wholelistically. `cev` enables us to quantify two properties based on shared point labels:\n",
68 | "\n",
69 | "1. Confusion: the degree to which two or more labels are visually intermixed\n",
70 | "2. Neighborhood: the degree to which the local neighborhood of a label has changed between the two embeddings\n",
71 | "\n",
72 | "Visualized as a heatmap, these two property can quickly guide us to point clusters that are better or less resolved in either one of the two embeddings. It can also help us find compositional changes between the two embeddings."
73 | ]
74 | },
75 | {
76 | "cell_type": "code",
77 | "execution_count": null,
78 | "id": "7874813c-810f-40e5-92ab-91f228046a5e",
79 | "metadata": {
80 | "tags": []
81 | },
82 | "outputs": [],
83 | "source": [
84 | "tissue_umap_embedding = Embedding.from_ozette(\n",
85 | " df=pd.read_parquet(\"./data/mair-2022-tissue-138-umap.pq\")\n",
86 | ")\n",
87 | "tissue_ozette_embedding = Embedding.from_ozette(\n",
88 | " df=pd.read_parquet(\"./data/mair-2022-tissue-138-ozette.pq\")\n",
89 | ")"
90 | ]
91 | },
92 | {
93 | "cell_type": "code",
94 | "execution_count": null,
95 | "id": "c3d7e114-9fd3-4785-bdca-e3f4bbf37df8",
96 | "metadata": {
97 | "tags": []
98 | },
99 | "outputs": [],
100 | "source": [
101 | "umap_vs_ozette = EmbeddingComparisonWidget(\n",
102 | " tissue_umap_embedding,\n",
103 | " tissue_ozette_embedding,\n",
104 | " titles=[\"Standard UMAP (Tissue)\", \"Annotation-Transformed UMAP (Tissue)\"],\n",
105 | " metric=\"confusion\",\n",
106 | " selection=\"synced\",\n",
107 | " auto_zoom=True,\n",
108 | " row_height=320,\n",
109 | ")\n",
110 | "umap_vs_ozette"
111 | ]
112 | },
113 | {
114 | "cell_type": "markdown",
115 | "id": "a516d65a-351b-4365-a267-704cd93a9c0e",
116 | "metadata": {},
117 | "source": [
118 | "In this example, we can see that the point labels are much more intermixed in the standard UMAP embedding compared to the annotation transformation UMAP. This not surprising as the standard UMAP embedding is not optimized for Flow cytometry data in any way and is thus only resolving broad cell phenotypes based on a few markers. You can see this by holding down `SHIFT` and clicking on `CD8` under _Markers_, which reduces the label resolution and shows that under a reduced label resolution, the confusion is much lower in the standard UMAP embedding.\n",
119 | "\n",
120 | "When selecting _Neighborhood_ from the _Metric_ drop down menu, we switch to the neighborhood composition difference quantification. When only a few markers (e.g., `CD4` and `CD8`) are active, we can see that most of the neighborhood remain unchanged. When we gradually add more markers, we can see how the the local neighborhood composition difference slowly increases, which is due to the fact that the annotation transformation spaces out all point label clusters.\n",
121 | "\n",
122 | "To study certain clusters or labels in detail, you can either interactively select points in the embedding via [jupyter-scatter](https://github.com/flekschas/jupyter-scatter)'s lasso selection or you can programmatically select points by their label via the `select()`. For instance, the next call will select all CD4+ T cells."
123 | ]
124 | },
125 | {
126 | "cell_type": "code",
127 | "execution_count": null,
128 | "id": "ba7a378f-4212-4953-be5b-7a273f8bc75e",
129 | "metadata": {
130 | "tags": []
131 | },
132 | "outputs": [],
133 | "source": [
134 | "umap_vs_ozette.select([\"CD3+\", \"CD4+\", \"CD8-\"])"
135 | ]
136 | },
137 | {
138 | "cell_type": "markdown",
139 | "id": "3c439e4d-0679-4e64-a1c7-4be93cbbe039",
140 | "metadata": {},
141 | "source": [
142 | "## Size Differences Between _Non-Responder_ and _Responder_\n",
143 | "\n",
144 | "Instead of comparing identical data, let's take a look at two transformed and aligned embeddings: tissue vs tumor. The embeddings are both annotation-transformed and aligned, ensuring low confusion and high neighborhood similarity (check to confirm!). The abundance metric aids in identifying potential shifts in phenotype abundance, providing a comprehensive and visually intuitive method for analyzing complex cytometry data. Remember, our metric should be used as a exploratory tool guide exploration and quickly surface potentially interesting phenotypes, but robust statical methods must be applied to confirm whether any abundance differences exist."
145 | ]
146 | },
147 | {
148 | "cell_type": "code",
149 | "execution_count": null,
150 | "id": "180f0945-d97c-4261-aa67-5368e3b560ad",
151 | "metadata": {
152 | "tags": []
153 | },
154 | "outputs": [],
155 | "source": [
156 | "tumor_ozette_embedding = Embedding.from_ozette(\n",
157 | " df=pd.read_parquet(\"./data/mair-2022-tumor-006-ozette.pq\")\n",
158 | ")"
159 | ]
160 | },
161 | {
162 | "cell_type": "code",
163 | "execution_count": null,
164 | "id": "0f99361b-6e96-4a6d-ad65-0533c23bece7",
165 | "metadata": {
166 | "tags": []
167 | },
168 | "outputs": [],
169 | "source": [
170 | "tissue_vs_tumor = EmbeddingComparisonWidget(\n",
171 | " tissue_ozette_embedding,\n",
172 | " tumor_ozette_embedding,\n",
173 | " titles=[\"Tissue\", \"Tumor\"],\n",
174 | " metric=\"abundance\",\n",
175 | " selection=\"phenotype\",\n",
176 | " auto_zoom=True,\n",
177 | " row_height=320,\n",
178 | ")\n",
179 | "\n",
180 | "tissue_vs_tumor"
181 | ]
182 | },
183 | {
184 | "cell_type": "markdown",
185 | "id": "6d632c95-dff8-4b90-b763-f3055c4e8047",
186 | "metadata": {
187 | "tags": []
188 | },
189 | "source": [
190 | "The following **CD8+ T cells** are more abundant in `tissue` (i.e., the relative abundance is higher on the left) compared to `tumor` (i.e., the relative abundance is lower on the right)"
191 | ]
192 | },
193 | {
194 | "cell_type": "code",
195 | "execution_count": null,
196 | "id": "2f7ebd73-32e7-48ed-8575-8d14d2edc73f",
197 | "metadata": {
198 | "tags": []
199 | },
200 | "outputs": [],
201 | "source": [
202 | "tissue_vs_tumor.select(\n",
203 | " \"CD4-CD8+CD3+CD45RA+CD27+CD19-CD103-CD28-CD69+PD1+HLADR-GranzymeB-CD25-ICOS-TCRgd-CD38-CD127-Tim3-\"\n",
204 | ")"
205 | ]
206 | },
207 | {
208 | "cell_type": "code",
209 | "execution_count": null,
210 | "id": "eefac753-7920-4c87-99ef-d155f1ec5114",
211 | "metadata": {},
212 | "outputs": [],
213 | "source": []
214 | }
215 | ],
216 | "metadata": {
217 | "kernelspec": {
218 | "display_name": "Python 3 (ipykernel)",
219 | "language": "python",
220 | "name": "python3"
221 | },
222 | "language_info": {
223 | "codemirror_mode": {
224 | "name": "ipython",
225 | "version": 3
226 | },
227 | "file_extension": ".py",
228 | "mimetype": "text/x-python",
229 | "name": "python",
230 | "nbconvert_exporter": "python",
231 | "pygments_lexer": "ipython3",
232 | "version": "3.10.11"
233 | }
234 | },
235 | "nbformat": 4,
236 | "nbformat_minor": 5
237 | }
238 |
--------------------------------------------------------------------------------
/notebooks/lui-2021.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "id": "8c6bbfa0-e40d-46ca-8010-45da2bdc5ed9",
6 | "metadata": {},
7 | "source": [
8 | "# Lui et al. 2021"
9 | ]
10 | },
11 | {
12 | "cell_type": "code",
13 | "execution_count": null,
14 | "id": "82fd3a9b-c1e8-473a-9679-9f64990c7bb2",
15 | "metadata": {
16 | "tags": []
17 | },
18 | "outputs": [],
19 | "source": [
20 | "%load_ext autoreload\n",
21 | "%autoreload 2"
22 | ]
23 | },
24 | {
25 | "cell_type": "code",
26 | "execution_count": null,
27 | "id": "7e05cfbf-325e-4402-8a51-e31e10398acb",
28 | "metadata": {
29 | "tags": []
30 | },
31 | "outputs": [],
32 | "source": [
33 | "import pathlib\n",
34 | "\n",
35 | "import pandas as pd\n",
36 | "\n",
37 | "from cev.widgets import Embedding, EmbeddingComparisonWidget"
38 | ]
39 | },
40 | {
41 | "cell_type": "markdown",
42 | "id": "6a1625a2-7389-4dd9-9b50-1ac99583c574",
43 | "metadata": {},
44 | "source": [
45 | "## Prepare Data and Split into two DataFrames"
46 | ]
47 | },
48 | {
49 | "cell_type": "code",
50 | "execution_count": null,
51 | "id": "ace38a79-2c9c-45b2-9f34-87f24d71ba59",
52 | "metadata": {
53 | "tags": []
54 | },
55 | "outputs": [],
56 | "source": [
57 | "df_full = pd.read_parquet(\n",
58 | " pathlib.Path.cwd() / \"..\" / \"data\" / \"lui-2021\" / \"data_for_confusion_full.parquet\"\n",
59 | ")\n",
60 | "df = pd.read_parquet(\n",
61 | " pathlib.Path.cwd() / \"..\" / \"data\" / \"lui-2021\" / \"data_for_confusion.parquet\"\n",
62 | ")\n",
63 | "\n",
64 | "markers = [\n",
65 | " \"CD3\",\n",
66 | " \"CD45RA\",\n",
67 | " \"CD8\",\n",
68 | " \"CLEC12A\",\n",
69 | " \"CD11b\",\n",
70 | " \"CD62L\",\n",
71 | " \"CD20\",\n",
72 | " \"HLA_DR\",\n",
73 | " \"CD11c\",\n",
74 | " \"CD14\",\n",
75 | " \"IgD\",\n",
76 | " \"CD4\",\n",
77 | " \"CD16\",\n",
78 | " \"CD45RO\",\n",
79 | " \"CD27\",\n",
80 | " \"CD19\",\n",
81 | " \"CD56\",\n",
82 | "]\n",
83 | "\n",
84 | "df_ozette_umap_ozette_labels = pd.DataFrame(\n",
85 | " {\n",
86 | " \"umapX\": df.platform_UMAP_X.values,\n",
87 | " \"umapY\": df.platform_UMAP_Y.values,\n",
88 | " \"faustLabels\": df.faust_clustering.map(\n",
89 | " lambda s: \"0_0_0_0_0\" if s == \"rare\" else s\n",
90 | " )\n",
91 | " .str.replace(\"/\", \"\")\n",
92 | " .values,\n",
93 | " }\n",
94 | ")\n",
95 | "\n",
96 | "df_ozette_umap_symphony_labels = pd.DataFrame(\n",
97 | " {\n",
98 | " \"umapX\": df.platform_UMAP_X.values,\n",
99 | " \"umapY\": df.platform_UMAP_Y.values,\n",
100 | " \"faustLabels\": df.liu_clustering.values,\n",
101 | " }\n",
102 | ")\n",
103 | "\n",
104 | "df_symphony_umap_ozette_labels = pd.DataFrame(\n",
105 | " {\n",
106 | " \"umapX\": df.symphony_UMAP_1.values,\n",
107 | " \"umapY\": df.symphony_UMAP_2.values,\n",
108 | " \"faustLabels\": df.faust_clustering.map(\n",
109 | " lambda s: \"0_0_0_0_0\" if s == \"rare\" else s\n",
110 | " )\n",
111 | " .str.replace(\"/\", \"\")\n",
112 | " .values,\n",
113 | " }\n",
114 | ")\n",
115 | "\n",
116 | "df_symphony_umap_symphony_labels = pd.DataFrame(\n",
117 | " {\n",
118 | " \"umapX\": df.symphony_UMAP_1.values,\n",
119 | " \"umapY\": df.symphony_UMAP_2.values,\n",
120 | " \"faustLabels\": df.liu_clustering.values,\n",
121 | " }\n",
122 | ")\n",
123 | "\n",
124 | "marker_annotations = df_full.faust_clustering.str.lstrip(\"/\").str.split(\n",
125 | " \"/\", expand=True\n",
126 | ")\n",
127 | "for column in marker_annotations:\n",
128 | " marker_annotations[column] = marker_annotations[column].str.slice(-1)\n",
129 | "\n",
130 | "df_ozette_umap_ozette_labels[[f\"{m}_faust_annotation\" for m in markers]] = (\n",
131 | " marker_annotations\n",
132 | ")\n",
133 | "df_ozette_umap_symphony_labels[[f\"{m}_faust_annotation\" for m in markers]] = (\n",
134 | " marker_annotations\n",
135 | ")\n",
136 | "df_symphony_umap_ozette_labels[[f\"{m}_faust_annotation\" for m in markers]] = (\n",
137 | " marker_annotations\n",
138 | ")\n",
139 | "df_symphony_umap_symphony_labels[[f\"{m}_faust_annotation\" for m in markers]] = (\n",
140 | " marker_annotations\n",
141 | ")"
142 | ]
143 | },
144 | {
145 | "cell_type": "markdown",
146 | "id": "e0c9a881-e2d6-488d-87d0-6f9328603960",
147 | "metadata": {
148 | "tags": []
149 | },
150 | "source": [
151 | "# Comparing the Ozette Against the Symphony Embedding using FAUST Labels"
152 | ]
153 | },
154 | {
155 | "cell_type": "code",
156 | "execution_count": null,
157 | "id": "7cefed4b-c050-4fb7-b8a5-c59a4bd93017",
158 | "metadata": {
159 | "tags": []
160 | },
161 | "outputs": [],
162 | "source": [
163 | "ozette_umap_ozette_labels_embedding = Embedding.from_ozette(\n",
164 | " df_ozette_umap_ozette_labels, robust_only=True\n",
165 | ")\n",
166 | "symphony_umap_ozette_labels_embedding = Embedding.from_ozette(\n",
167 | " df_symphony_umap_ozette_labels, robust_only=True\n",
168 | ")\n",
169 | "\n",
170 | "comparison_ozette_vs_symphony_umap_with_ozette_labels = EmbeddingComparisonWidget(\n",
171 | " ozette_umap_ozette_labels_embedding,\n",
172 | " symphony_umap_ozette_labels_embedding,\n",
173 | " titles=[\n",
174 | " \"Ozette Embedding with FAUST Labels\",\n",
175 | " \"Symphony Embedding with FAUST Labels\",\n",
176 | " ],\n",
177 | " metric=\"neighborhood\",\n",
178 | " # active_markers=[\"CD3\"],\n",
179 | " selection=\"synced\",\n",
180 | " auto_zoom=True,\n",
181 | " row_height=400,\n",
182 | ")\n",
183 | "comparison_ozette_vs_symphony_umap_with_ozette_labels"
184 | ]
185 | },
186 | {
187 | "cell_type": "code",
188 | "execution_count": null,
189 | "id": "ee3ad9d6-4b92-4a5f-9199-e6bba4527a59",
190 | "metadata": {
191 | "tags": []
192 | },
193 | "outputs": [],
194 | "source": [
195 | "comparison_ozette_vs_symphony_umap_with_ozette_labels.select(\n",
196 | " [\"CD19-\", \"CD11b-\", \"CD3+\", \"CD4+\", \"CD8-\", \"CD45RA-\", \"CD45RO+\", \"CD62L+\"]\n",
197 | ")"
198 | ]
199 | },
200 | {
201 | "cell_type": "code",
202 | "execution_count": null,
203 | "id": "42591b2c-3ee1-4fa8-ad32-4187a9a211e2",
204 | "metadata": {
205 | "tags": []
206 | },
207 | "outputs": [],
208 | "source": [
209 | "comparison_ozette_vs_symphony_umap_with_ozette_labels.select(\n",
210 | " [\"CD19-\", \"CD11b-\", \"CD3+\", \"CD4+\", \"CD8-\", \"CD45RA-\", \"CD45RO+\", \"CD62L-\"]\n",
211 | ")"
212 | ]
213 | },
214 | {
215 | "cell_type": "code",
216 | "execution_count": null,
217 | "id": "cc81a023-288e-4ef4-8bdd-1940e05c68d4",
218 | "metadata": {
219 | "tags": []
220 | },
221 | "outputs": [],
222 | "source": [
223 | "comparison_ozette_vs_symphony_umap_with_ozette_labels.select(\n",
224 | " [\"CD19-\", \"CD11b-\", \"CD3+\", \"CD4+\", \"CD8-\", \"CD45RA+\", \"CD45RO-\", \"CD62L+\"]\n",
225 | ")"
226 | ]
227 | },
228 | {
229 | "cell_type": "code",
230 | "execution_count": null,
231 | "id": "1536bfbf-7f09-48fd-b35b-567303a03416",
232 | "metadata": {
233 | "tags": []
234 | },
235 | "outputs": [],
236 | "source": [
237 | "comparison_ozette_vs_symphony_umap_with_ozette_labels.select(\n",
238 | " [\"CD19-\", \"CD11b-\", \"CD3+\", \"CD4+\", \"CD8-\", \"CD45RA+\", \"CD45RO-\", \"CD62L-\"]\n",
239 | ")"
240 | ]
241 | },
242 | {
243 | "cell_type": "markdown",
244 | "id": "e1144d71-2055-44cd-a361-14c93383d2aa",
245 | "metadata": {},
246 | "source": [
247 | "# Comparing the Ozette Against the Symphony Embedding using Symphony Labels"
248 | ]
249 | },
250 | {
251 | "cell_type": "code",
252 | "execution_count": null,
253 | "id": "ff15c719-c1d5-4780-86b7-9bb3bf4f1e1c",
254 | "metadata": {
255 | "tags": []
256 | },
257 | "outputs": [],
258 | "source": [
259 | "ozette_umap_symphony_labels_embedding = Embedding(\n",
260 | " df_ozette_umap_symphony_labels[[\"umapX\", \"umapY\"]].values,\n",
261 | " df_ozette_umap_symphony_labels.faustLabels,\n",
262 | ")\n",
263 | "symphony_umap_symphony_labels_embedding = Embedding(\n",
264 | " df_symphony_umap_symphony_labels[[\"umapX\", \"umapY\"]].values,\n",
265 | " df_symphony_umap_symphony_labels.faustLabels,\n",
266 | ")\n",
267 | "\n",
268 | "comparison_ozette_vs_symphony_umap_with_symphony_labels = EmbeddingComparisonWidget(\n",
269 | " ozette_umap_symphony_labels_embedding,\n",
270 | " symphony_umap_symphony_labels_embedding,\n",
271 | " titles=[\n",
272 | " \"Ozette Embedding with Symphony Labels\",\n",
273 | " \"Symphony Embedding with Symphony Labels\",\n",
274 | " ],\n",
275 | " selection=\"synced\",\n",
276 | " auto_zoom=True,\n",
277 | " row_height=400,\n",
278 | ")\n",
279 | "\n",
280 | "comparison_ozette_vs_symphony_umap_with_symphony_labels.left.categorical_scatter.legend(\n",
281 | " True\n",
282 | ")\n",
283 | "comparison_ozette_vs_symphony_umap_with_symphony_labels.right.categorical_scatter.legend(\n",
284 | " True\n",
285 | ")\n",
286 | "\n",
287 | "comparison_ozette_vs_symphony_umap_with_symphony_labels"
288 | ]
289 | },
290 | {
291 | "cell_type": "code",
292 | "execution_count": null,
293 | "id": "32b7afd5-c1eb-4625-b9fe-2f6c60e6f364",
294 | "metadata": {},
295 | "outputs": [],
296 | "source": []
297 | }
298 | ],
299 | "metadata": {
300 | "kernelspec": {
301 | "display_name": "Python 3 (ipykernel)",
302 | "language": "python",
303 | "name": "python3"
304 | },
305 | "language_info": {
306 | "codemirror_mode": {
307 | "name": "ipython",
308 | "version": 3
309 | },
310 | "file_extension": ".py",
311 | "mimetype": "text/x-python",
312 | "name": "python",
313 | "nbconvert_exporter": "python",
314 | "pygments_lexer": "ipython3",
315 | "version": "3.8.5"
316 | }
317 | },
318 | "nbformat": 4,
319 | "nbformat_minor": 5
320 | }
321 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["hatchling", "hatch-vcs"]
3 | build-backend = "hatchling.build"
4 |
5 | [project]
6 | name = "cev"
7 | description = "comparative embedding visualization"
8 | readme = "README.md"
9 | license = { text = "Apache-2.0" }
10 | authors = [
11 | { name = "Trevor Manz" },
12 | { name = "Fritz Lekschas" },
13 | ]
14 | classifiers = [
15 | "Intended Audience :: Developers",
16 | "Programming Language :: Python :: 3",
17 | "Programming Language :: Python :: 3.8",
18 | "Programming Language :: Python :: 3.9",
19 | "Programming Language :: Python :: 3.10",
20 | "Programming Language :: Python :: 3.11",
21 | ]
22 | requires-python = ">=3.8,<3.12"
23 | dependencies = [
24 | "anywidget>=0.2.3",
25 | "cev-metrics>=0.1.2",
26 | "ipywidgets>=8.0.0",
27 | "jinja2>=3.0.0",
28 | "jupyter-scatter>=0.14.0",
29 | "pandas>=1.0,<2.0",
30 | "numpy>=1.0,<2.0",
31 | "pyarrow",
32 | "pooch>=1.3.0",
33 | ]
34 | dynamic = ["version"]
35 |
36 | [project.optional-dependencies]
37 | notebooks = [
38 | "pyarrow",
39 | "fastparquet",
40 | "matplotlib",
41 | ]
42 |
43 | [project.scripts]
44 | cev = "cev._cli:main"
45 |
46 | [project.urls]
47 | homepage = "https://github.com/OzetteTech/comparative-embedding-visualization"
48 |
49 | [tool.hatch.build]
50 | sources = ["src"]
51 |
52 | [tool.hatch.version]
53 | source = "vcs"
54 |
55 | [tool.ruff]
56 | line-length = 88
57 | target-version = "py38"
58 |
59 | [tool.ruff.lint]
60 | extend-select = [
61 | "E", # style errors
62 | "F", # flake
63 | # "D", # pydocstyle
64 | "I001", # isort
65 | "UP", # pyupgrade
66 | "RUF", # ruff-specific rules
67 | ]
68 | ignore = ["E501"] # ignore line-length, enforced by black
69 |
70 | # https://docs.pytest.org/en/latest/customize.html
71 | [tool.pytest.ini_options]
72 | minversion = "6.0"
73 | testpaths = ["tests"]
74 | filterwarnings = [
75 | "ignore:Jupyter is migrating its paths:DeprecationWarning",
76 | "ignore:Deprecated in traitlets 4.1, use the instance .metadata:DeprecationWarning",
77 | ]
78 |
79 | [tool.uv]
80 | dev-dependencies = [
81 | "jupyterlab>=4.2.5",
82 | "pytest>=8.3.3",
83 | "ruff>=0.7.0",
84 | ]
85 |
--------------------------------------------------------------------------------
/src/cev/__init__.py:
--------------------------------------------------------------------------------
1 | from cev._version import __version__ # noqa
2 |
3 | import cev.metrics as metrics # noqa
4 | import cev.widgets as widgets # noqa
5 |
--------------------------------------------------------------------------------
/src/cev/_cli.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import json
3 | import os
4 | import shutil
5 | import sys
6 | import textwrap
7 | import zipfile
8 | from pathlib import Path
9 |
10 | import pooch
11 |
12 | from cev._version import __version__
13 |
14 | _DEV = False
15 |
16 |
17 | def download_data() -> tuple[Path, Path]:
18 | archive = pooch.retrieve(
19 | url="https://figshare.com/ndownloader/articles/23063615/versions/1",
20 | path=pooch.os_cache("cev"),
21 | fname="data.zip",
22 | known_hash=None,
23 | )
24 | archive = Path(archive)
25 | files = [
26 | "mair-2022-tissue-138-umap.pq",
27 | "mair-2022-tissue-138-ozette.pq",
28 | ]
29 | with zipfile.ZipFile(archive, "r") as zip_ref:
30 | for file in files:
31 | zip_ref.extract(file, path=archive.parent)
32 | return (
33 | archive.parent / "mair-2022-tissue-138-umap.pq",
34 | archive.parent / "mair-2022-tissue-138-ozette.pq",
35 | )
36 |
37 |
38 | def write_notebook(output: Path):
39 | umap_path, ozette_path = download_data()
40 | source = textwrap.dedent(
41 | f"""
42 | import pandas as pd
43 | from cev.widgets import Embedding, EmbeddingComparisonWidget
44 |
45 | umap_embedding = pd.read_parquet("{umap_path}").pipe(Embedding.from_ozette)
46 | ozette_embedding = pd.read_parquet("{ozette_path}").pipe(Embedding.from_ozette)
47 |
48 | EmbeddingComparisonWidget(
49 | umap_embedding,
50 | ozette_embedding,
51 | titles=("Standard UMAP", "Annotation-Transformed UMAP"),
52 | metric="confusion",
53 | selection="synced",
54 | auto_zoom=True,
55 | row_height=320,
56 | )
57 | """
58 | ).strip()
59 |
60 | nb = {
61 | "cells": [
62 | {
63 | "cell_type": "code",
64 | "execution_count": None,
65 | "metadata": {},
66 | "outputs": [],
67 | "source": source,
68 | }
69 | ],
70 | "metadata": {
71 | "kernelspec": {
72 | "display_name": "Python 3",
73 | "language": "python",
74 | "name": "python3",
75 | }
76 | },
77 | "nbformat": 4,
78 | "nbformat_minor": 5,
79 | }
80 | with output.open("w") as f:
81 | json.dump(nb, f, indent=2)
82 |
83 |
84 | def check_uv_available():
85 | if shutil.which("uv") is None:
86 | print("Error: 'uv' command not found.", file=sys.stderr)
87 | print("Please install 'uv' to run `cev demo` entrypoint.", file=sys.stderr)
88 | print(
89 | "For more information, visit: https://github.com/astral-sh/uv",
90 | file=sys.stderr,
91 | )
92 | sys.exit(1)
93 |
94 |
95 | def run_notebook(notebook_path: Path):
96 | check_uv_available()
97 | command = [
98 | "uvx",
99 | "--python",
100 | "3.11",
101 | "--from",
102 | "jupyter-core",
103 | "--with",
104 | "jupyterlab",
105 | "--with",
106 | "." if _DEV else f"cev=={__version__}",
107 | "jupyter",
108 | "lab",
109 | str(notebook_path),
110 | ]
111 | try:
112 | os.execvp(command[0], command)
113 | except OSError as e:
114 | print(f"Error executing {command[0]}: {e}", file=sys.stderr)
115 | sys.exit(1)
116 |
117 |
118 | def main():
119 | parser = argparse.ArgumentParser(prog="cev")
120 | subparsers = parser.add_subparsers(dest="command", help="Available commands")
121 | subparsers.add_parser("download", help="Download the demo notebook (and data)")
122 | subparsers.add_parser("demo", help="Run the demo notebook in JupyterLab")
123 | args = parser.parse_args()
124 |
125 | notebook_path = Path("cev-demo.ipynb")
126 | if args.command == "download":
127 | write_notebook(notebook_path)
128 | elif args.command == "demo":
129 | write_notebook(notebook_path)
130 | run_notebook(notebook_path)
131 | else:
132 | parser.print_help()
133 |
134 |
135 | if __name__ == "__main__":
136 | main()
137 |
--------------------------------------------------------------------------------
/src/cev/_compare.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import typing
4 |
5 | import ipywidgets
6 | import numpy as np
7 |
8 | from cev._compare_metric_dropdown import (
9 | create_max_depth_dropdown,
10 | create_metric_dropdown,
11 | create_update_distance_callback,
12 | create_value_range_slider,
13 | )
14 | from cev._compare_selection_type_dropdown import create_selection_type_dropdown
15 | from cev._compare_zoom_toggle import create_zoom_toggle
16 | from cev._widget_utils import (
17 | add_ilocs_trait,
18 | create_colormaps,
19 | link_widgets,
20 | parse_label,
21 | trim_label_series,
22 | )
23 | from cev.components import MarkerSelectionIndicator
24 |
25 | if typing.TYPE_CHECKING:
26 | from cev._embedding import Embedding
27 | from cev._embedding_widget import EmbeddingWidgetCollection
28 |
29 |
30 | def compare(
31 | a: Embedding, b: Embedding, row_height: int = 250, max_depth: int = 1, **kwargs
32 | ):
33 | pointwise_correspondence = has_pointwise_correspondence(a, b)
34 | left, right = a.widgets(**kwargs), b.widgets(**kwargs)
35 |
36 | # representative label
37 | markers = [m.name for m in parse_label(a.labels.iloc[0])]
38 | marker_selection = MarkerSelectionIndicator(
39 | markers=markers, active=[True] + [False for x in range(len(markers) - 1)]
40 | )
41 |
42 | metric_dropdown = create_metric_dropdown(left, right)
43 | max_depth_dropdown = create_max_depth_dropdown(metric_dropdown, max_depth)
44 | value_range_slider = create_value_range_slider(metric_dropdown)
45 | update_distances = create_update_distance_callback(
46 | metric_dropdown, max_depth_dropdown, value_range_slider, left, right
47 | )
48 | zoom = create_zoom_toggle(left, right)
49 | inverted = create_invert_color_checkbox(left, right)
50 | selection_type = create_selection_type_dropdown(
51 | left, right, pointwise_correspondence
52 | )
53 | connect_marker_selection(marker_selection, (a, left), (b, right), update_distances)
54 | header = ipywidgets.VBox(
55 | [
56 | marker_selection,
57 | ipywidgets.HBox([selection_type, metric_dropdown, inverted, zoom]),
58 | ]
59 | )
60 | main = ipywidgets.HBox(
61 | [
62 | cmp.show(row_height=row_height, layout=ipywidgets.Layout(width="50%"))
63 | for cmp in (left, right)
64 | ]
65 | )
66 | widget = ipywidgets.VBox([header, main])
67 |
68 | add_ilocs_trait(widget, left, right)
69 | typing.cast(typing.Any, widget).left = left
70 | typing.cast(typing.Any, widget).right = right
71 | return widget
72 |
73 |
74 | def has_pointwise_correspondence(a: Embedding, b: Embedding) -> bool:
75 | return np.array_equal(a.labels, b.labels) and (
76 | (a.robust is None and b.robust is None)
77 | or (
78 | a.robust is not None
79 | and b.robust is not None
80 | and np.array_equal(a.robust, b.robust)
81 | )
82 | )
83 |
84 |
85 | def create_invert_color_checkbox(
86 | left: EmbeddingWidgetCollection,
87 | right: EmbeddingWidgetCollection,
88 | default: bool = False,
89 | ):
90 | inverted = ipywidgets.Checkbox(default, description="Invert Colormap")
91 | link_widgets((left, "inverted"), (inverted, "value"))
92 | link_widgets((right, "inverted"), (inverted, "value"))
93 | return inverted
94 |
95 |
96 | def connect_marker_selection(
97 | marker_selection: MarkerSelectionIndicator,
98 | left_pair: tuple[Embedding, EmbeddingWidgetCollection],
99 | right_pair: tuple[Embedding, EmbeddingWidgetCollection],
100 | update_distances: typing.Callable,
101 | ):
102 | markers = marker_selection.markers
103 | a, left = left_pair
104 | b, right = right_pair
105 |
106 | def update_labels(active):
107 | active_markers = set([marker for i, marker in enumerate(markers) if active[i]])
108 |
109 | left.labels = trim_label_series(a.labels, active_markers)
110 | right.labels = trim_label_series(b.labels, active_markers)
111 |
112 | left.colormap, right.colormap = create_colormaps(
113 | left.robust_labels.cat.categories,
114 | right.robust_labels.cat.categories,
115 | )
116 |
117 | update_distances()
118 |
119 | def on_active_marker_selection_change(change):
120 | update_labels(change.new)
121 |
122 | update_labels(marker_selection.active)
123 |
124 | marker_selection.observe(on_active_marker_selection_change, names="active")
125 |
--------------------------------------------------------------------------------
/src/cev/_compare_metric_dropdown.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import functools
4 | import typing
5 |
6 | import ipywidgets
7 | import pandas as pd
8 |
9 | import cev.metrics as metrics
10 | from cev._widget_utils import diverging_cmap
11 |
12 | if typing.TYPE_CHECKING:
13 | from cev._embedding_widget import EmbeddingWidgetCollection
14 |
15 |
16 | CACHE_SIZE = 5
17 |
18 |
19 | def create_metric_dropdown(
20 | left: EmbeddingWidgetCollection,
21 | right: EmbeddingWidgetCollection,
22 | default: typing.Literal["confusion", "neigbhorhood", "abundance"] = "confusion",
23 | ):
24 | @functools.lru_cache(maxsize=CACHE_SIZE)
25 | def cached_confusion(emb: EmbeddingWidgetCollection):
26 | return metrics.confusion(emb._data)
27 |
28 | @functools.lru_cache(maxsize=CACHE_SIZE)
29 | def cached_neighborhood(emb: EmbeddingWidgetCollection, max_depth: int = 1):
30 | return metrics.neighborhood(emb._data, max_depth=max_depth)
31 |
32 | @functools.lru_cache(maxsize=CACHE_SIZE)
33 | def cached_abundance(
34 | left: EmbeddingWidgetCollection,
35 | right: EmbeddingWidgetCollection,
36 | max_depth: int = 1,
37 | ):
38 | frequencies = (
39 | cached_neighborhood(left, max_depth),
40 | cached_neighborhood(right, max_depth),
41 | )
42 | abundances = [
43 | metrics.transform_abundance(
44 | freq,
45 | abundances=emb.labels.value_counts().to_dict(),
46 | clr=True,
47 | )
48 | for freq, emb in zip(frequencies, (left, right))
49 | ]
50 |
51 | label_dist_a = metrics.merge_abundances_left(abundances[0], abundances[1])
52 | label_dist_a = pd.Series(
53 | label_dist_a.to_numpy().diagonal(), index=label_dist_a.index
54 | )
55 |
56 | label_dist_b = metrics.merge_abundances_left(abundances[1], abundances[0])
57 | label_dist_b = pd.Series(
58 | label_dist_b.to_numpy().diagonal(), index=label_dist_b.index
59 | )
60 |
61 | return (
62 | left.labels.map(label_dist_a - label_dist_b).astype(float),
63 | right.labels.map(label_dist_b - label_dist_a).astype(float),
64 | )
65 |
66 | def confusion(**kwargs):
67 | left_label_confusion = cached_confusion(left)
68 | right_label_confusion = cached_confusion(right)
69 | return (
70 | left.labels.map(left_label_confusion).astype(float),
71 | right.labels.map(right_label_confusion).astype(float),
72 | )
73 |
74 | def neighborhood(max_depth: int = 1):
75 | a = cached_neighborhood(left, max_depth)
76 | b = cached_neighborhood(right, max_depth)
77 | dist = metrics.compare_neighborhoods(a, b)
78 | return left.labels.map(dist).astype(float), right.labels.map(dist).astype(float)
79 |
80 | abundance = functools.partial(cached_abundance, left, right)
81 |
82 | default_value = {
83 | "confusion": confusion,
84 | "neighborhood": neighborhood,
85 | "abundance": abundance,
86 | }[default]
87 |
88 | return ipywidgets.Dropdown(
89 | options=[
90 | ("Confusion", confusion),
91 | ("Neighborhood", neighborhood),
92 | ("Abundance", abundance),
93 | ],
94 | value=default_value,
95 | description="Metric",
96 | )
97 |
98 |
99 | def has_max_depth(metric_dropdown: ipywidgets.Dropdown):
100 | return (
101 | metric_dropdown.label.lower().startswith("abundance")
102 | or metric_dropdown.label == "Neighborhood"
103 | )
104 |
105 |
106 | def create_max_depth_dropdown(
107 | metric_dropdown: ipywidgets.Dropdown,
108 | default: int = 1,
109 | ):
110 | dropdown = ipywidgets.Dropdown(
111 | options=[1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144],
112 | value=default,
113 | description="Max Depth",
114 | disabled=True,
115 | )
116 |
117 | def callback():
118 | if has_max_depth(metric_dropdown):
119 | dropdown.disabled = False
120 | else:
121 | dropdown.disabled = True
122 |
123 | metric_dropdown.observe(lambda _: callback(), names="value")
124 | callback()
125 |
126 | return dropdown
127 |
128 |
129 | def create_value_range_slider(metric_dropdown: ipywidgets.Dropdown):
130 | slider = ipywidgets.FloatRangeSlider(
131 | value=[0, 1],
132 | min=0,
133 | max=1,
134 | step=0.05,
135 | description="Range:",
136 | continuous_update=False,
137 | orientation="horizontal",
138 | readout=True,
139 | readout_format=".2f",
140 | )
141 |
142 | def callback():
143 | if metric_dropdown.label.lower().startswith("abundance"):
144 | slider.value = [0.05, 0.95]
145 | else:
146 | slider.value = [0, 1]
147 |
148 | metric_dropdown.observe(lambda _: callback(), names="value")
149 | callback()
150 |
151 | return slider
152 |
153 |
154 | def create_update_distance_callback(
155 | metric_dropdown: ipywidgets.Dropdown,
156 | max_depth_dropdown: ipywidgets.Dropdown,
157 | value_range_slider: ipywidgets.FloatRangeSlider,
158 | left: EmbeddingWidgetCollection,
159 | right: EmbeddingWidgetCollection,
160 | ):
161 | def callback():
162 | distances = metric_dropdown.value(max_depth=max_depth_dropdown.value)
163 |
164 | for dist, emb in zip(distances, (left, right)):
165 | if metric_dropdown.label == "Abundance":
166 | lower, upper = dist.quantile(value_range_slider.value)
167 | vmax = max(abs(lower), abs(upper))
168 | emb.metric_color_options = (
169 | diverging_cmap,
170 | diverging_cmap[::-1],
171 | [-vmax, vmax],
172 | ("Lower", "Higher", "Rel. Abundance"),
173 | )
174 | elif metric_dropdown.label == "Confusion":
175 | emb.metric_color_options = (
176 | "viridis",
177 | "viridis_r",
178 | value_range_slider.value,
179 | ("Low", "High", "Confusion"),
180 | )
181 | elif metric_dropdown.label == "Neighborhood":
182 | emb.metric_color_options = (
183 | "viridis",
184 | "viridis_r",
185 | value_range_slider.value,
186 | ("Similar", "Dissimilar", "Neighborhood"),
187 | )
188 | else:
189 | raise ValueError(
190 | f"color options unspecified for metric '{metric_dropdown.value.__name__}'"
191 | )
192 |
193 | emb.distances = dist
194 |
195 | return callback
196 |
--------------------------------------------------------------------------------
/src/cev/_compare_selection_type_dropdown.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import contextlib
4 | import typing
5 |
6 | import ipywidgets
7 | import numpy as np
8 |
9 | from ._widget_utils import link_widgets
10 |
11 | if typing.TYPE_CHECKING:
12 | from ._embedding_widget import EmbeddingWidgetCollection
13 |
14 |
15 | def create_selection_type_dropdown(
16 | left: EmbeddingWidgetCollection,
17 | right: EmbeddingWidgetCollection,
18 | pointwise_correspondence: bool,
19 | default: str | None = "independent",
20 | ):
21 | # SELECTION START
22 | def unlink():
23 | return None
24 |
25 | def independent():
26 | nonlocal unlink
27 |
28 | with contextlib.suppress(ValueError):
29 | unlink()
30 |
31 | # requires point-point correspondence
32 | def sync():
33 | nonlocal unlink
34 |
35 | with contextlib.suppress(ValueError):
36 | unlink()
37 |
38 | unlink = link_widgets(
39 | (left.categorical_scatter.widget, "selection"),
40 | (right.categorical_scatter.widget, "selection"),
41 | ).unlink
42 |
43 | # requires label-label correspondence
44 | def phenotype():
45 | nonlocal unlink
46 |
47 | with contextlib.suppress(ValueError):
48 | unlink()
49 |
50 | def expand_phenotype(src: EmbeddingWidgetCollection):
51 | def handler(change):
52 | phenotypes = set(src.labels.iloc[change.new].unique())
53 |
54 | for emb in (left, right):
55 | ilocs = np.where(emb.robust_labels.isin(phenotypes))[0]
56 | emb.categorical_scatter.widget.selection = ilocs
57 | emb.metric_scatter.widget.selection = ilocs
58 |
59 | return handler
60 |
61 | transform_left = expand_phenotype(left)
62 | left.categorical_scatter.widget.observe(transform_left, names="selection")
63 | transform_right = expand_phenotype(right)
64 | right.categorical_scatter.widget.observe(transform_right, names="selection")
65 |
66 | def unlink_all():
67 | left.categorical_scatter.widget.unobserve(transform_left, names="selection")
68 | right.categorical_scatter.widget.unobserve(
69 | transform_right, names="selection"
70 | )
71 |
72 | unlink = unlink_all
73 |
74 | if pointwise_correspondence:
75 | initial_selection = independent
76 |
77 | if default == "synced":
78 | initial_selection = sync
79 | elif default == "phenotype":
80 | initial_selection = phenotype
81 |
82 | selection_type_options = [
83 | ("Independent", independent),
84 | ("Synced", sync),
85 | ("Phenotype", phenotype),
86 | ]
87 |
88 | selection_type = ipywidgets.Dropdown(
89 | options=selection_type_options,
90 | value=initial_selection,
91 | description="Selection",
92 | )
93 |
94 | selection_type.observe(lambda change: change.new(), names="value") # type: ignore
95 | initial_selection()
96 | return selection_type
97 |
98 | else:
99 | initial_selection = False
100 | if default == "phenotype":
101 | initial_selection = True
102 |
103 | selection_type = ipywidgets.Checkbox(
104 | initial_selection, description="Phenotype Selection"
105 | )
106 |
107 | def handle_selection_change(change):
108 | if change.new is False:
109 | independent()
110 | else:
111 | phenotype()
112 |
113 | selection_type.observe(handle_selection_change, names="value")
114 |
115 | if initial_selection:
116 | phenotype()
117 |
118 | return selection_type
119 |
--------------------------------------------------------------------------------
/src/cev/_compare_zoom_toggle.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import typing
4 |
5 | import ipywidgets
6 |
7 | if typing.TYPE_CHECKING:
8 | from ._embedding_widget import EmbeddingWidgetCollection
9 |
10 |
11 | def create_zoom_toggle(
12 | left: EmbeddingWidgetCollection,
13 | right: EmbeddingWidgetCollection,
14 | default: bool = False,
15 | ):
16 | zoom = ipywidgets.Checkbox(default, description="Auto Zoom")
17 |
18 | def handle_selection_change_zoom(emb: EmbeddingWidgetCollection):
19 | def on_change(change):
20 | if zoom.value is False:
21 | return
22 | emb.zoom(to=change.new)
23 |
24 | return on_change
25 |
26 | left.categorical_scatter.widget.observe(
27 | handle_selection_change_zoom(left), names="selection"
28 | )
29 | right.categorical_scatter.widget.observe(
30 | handle_selection_change_zoom(right), names="selection"
31 | )
32 |
33 | def handle_zoom_change(change):
34 | if change.new is False:
35 | left.zoom(to=None)
36 | right.zoom(to=None)
37 | else:
38 | left.zoom(to=left.categorical_scatter.selection())
39 | right.zoom(to=right.categorical_scatter.selection())
40 |
41 | zoom.observe(handle_zoom_change, names="value")
42 | return zoom
43 |
--------------------------------------------------------------------------------
/src/cev/_embedding.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import dataclasses
4 | import typing
5 |
6 | import pandas as pd
7 |
8 | from cev._widget_utils import parse_label
9 |
10 | if typing.TYPE_CHECKING:
11 | import numpy as np
12 | import numpy.typing as npt
13 |
14 | NON_ROBUST_LABEL = "0_0_0_0_0"
15 |
16 |
17 | @dataclasses.dataclass
18 | class Embedding:
19 | coords: npt.ArrayLike
20 | labels: pd.Series
21 | robust: npt.NDArray[np.bool_] | None = None
22 |
23 | @classmethod
24 | def from_df(cls, df: pd.DataFrame):
25 | return cls(
26 | coords=df[["x", "y"]].values,
27 | labels=df["label"],
28 | robust=df["robust"] if "robust" in df else None,
29 | )
30 |
31 | @classmethod
32 | def from_ozette(cls, df: pd.DataFrame, **kwargs):
33 | coords, labels, robust = _prepare_ozette(df, **kwargs)
34 | return cls(coords=coords, labels=labels, robust=robust)
35 |
36 | def widgets(self, **kwargs):
37 | from ._embedding_widget import EmbeddingWidgetCollection
38 |
39 | return EmbeddingWidgetCollection.from_embedding(self, **kwargs)
40 |
41 |
42 | def _prepare_ozette(df: pd.DataFrame, robust_only: bool = True):
43 | # ISMB data
44 | if "cellType" in df.columns:
45 | robust = (df["cellType"] != NON_ROBUST_LABEL).to_numpy()
46 | if robust_only:
47 | df = df[robust].reset_index(drop=True)
48 | robust = None
49 |
50 | coords = df[["x", "y"]].to_numpy()
51 | labels = df["complete_faust_label"].to_numpy()
52 |
53 | else:
54 | robust = (df["faustLabels"] != NON_ROBUST_LABEL).to_numpy()
55 | representative_label = df["faustLabels"][robust].iloc[0]
56 |
57 | if robust_only:
58 | df = df[robust].reset_index(drop=True)
59 | labels = df["faustLabels"].to_numpy()
60 | robust = None
61 | else:
62 | labels = pd.Series("", index=df.index)
63 | for marker in parse_label(representative_label):
64 | marker_annotation = marker.name + df[f"{marker.name}_faust_annotation"]
65 | labels += marker_annotation
66 |
67 | coords = df[["umapX", "umapY"]].to_numpy()
68 | labels = pd.Series(labels, dtype="category")
69 |
70 | return coords, labels, robust
71 |
--------------------------------------------------------------------------------
/src/cev/_embedding_comparison_widget.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import re
4 | import typing
5 |
6 | import ipywidgets
7 |
8 | from cev._compare import (
9 | connect_marker_selection,
10 | create_invert_color_checkbox,
11 | has_pointwise_correspondence,
12 | )
13 | from cev._compare_metric_dropdown import (
14 | create_max_depth_dropdown,
15 | create_metric_dropdown,
16 | create_update_distance_callback,
17 | create_value_range_slider,
18 | )
19 | from cev._compare_selection_type_dropdown import create_selection_type_dropdown
20 | from cev._compare_zoom_toggle import create_zoom_toggle
21 | from cev._embedding import Embedding
22 | from cev._widget_utils import add_ilocs_trait, parse_label
23 | from cev.components import MarkerSelectionIndicator, WidthOptimizer
24 |
25 |
26 | def _create_titles(
27 | titles: tuple[str, str],
28 | ) -> tuple[ipywidgets.Widget, ipywidgets.Widget]:
29 | left_title, right_title = titles
30 | spacer = ipywidgets.HTML(
31 | value='',
32 | layout=ipywidgets.Layout(width="100%"),
33 | )
34 | title_widget = ipywidgets.HBox(
35 | [
36 | ipywidgets.HTML(
37 | value=f'{left_title}
',
38 | layout=ipywidgets.Layout(width="50%"),
39 | ),
40 | ipywidgets.HTML(
41 | value=f'{right_title}
',
42 | layout=ipywidgets.Layout(width="50%"),
43 | ),
44 | ]
45 | )
46 | return spacer, title_widget
47 |
48 |
49 | class EmbeddingComparisonWidget(ipywidgets.VBox):
50 | def __init__(
51 | self,
52 | left_embedding: Embedding,
53 | right_embedding: Embedding,
54 | row_height: int = 250,
55 | metric: typing.Literal["confusion", "neigbhorhood", "abundance"] = "confusion",
56 | inverted_colormap: bool = False,
57 | auto_zoom: bool = False,
58 | selection: typing.Literal["independent", "synced", "phenotype"] = "independent",
59 | max_depth: int = 1,
60 | titles: tuple[str, str] | None = None,
61 | active_markers: list[str] | typing.Literal["all"] = "all",
62 | **kwargs,
63 | ):
64 | pointwise_correspondence = has_pointwise_correspondence(
65 | left_embedding, right_embedding
66 | )
67 |
68 | self.left_embedding = left_embedding
69 | self.right_embedding = right_embedding
70 | self.left = left_embedding.widgets(**kwargs)
71 | self.right = right_embedding.widgets(**kwargs)
72 |
73 | metric_dropdown = create_metric_dropdown(self.left, self.right, metric)
74 | max_depth_dropdown = create_max_depth_dropdown(metric_dropdown, max_depth)
75 | value_range_slider = create_value_range_slider(metric_dropdown)
76 | update_distances = create_update_distance_callback(
77 | metric_dropdown,
78 | max_depth_dropdown,
79 | value_range_slider,
80 | self.left,
81 | self.right,
82 | )
83 |
84 | has_markers = "+" in left_embedding.labels.iloc[0]
85 |
86 | if has_markers:
87 | # representative label
88 | markers = [m.name for m in parse_label(left_embedding.labels.iloc[0])]
89 | _active_markers = (
90 | [True] * len(markers)
91 | if active_markers == "all"
92 | else [False] * len(markers)
93 | )
94 | for active_marker in active_markers:
95 | try:
96 | _active_markers[markers.index(active_marker)] = True
97 | except ValueError:
98 | pass
99 | marker_selection = MarkerSelectionIndicator(
100 | markers=markers, active=_active_markers
101 | )
102 | connect_marker_selection(
103 | marker_selection,
104 | (self.left_embedding, self.left),
105 | (self.right_embedding, self.right),
106 | update_distances,
107 | )
108 |
109 | zoom = create_zoom_toggle(self.left, self.right, auto_zoom)
110 | inverted = create_invert_color_checkbox(
111 | self.left, self.right, inverted_colormap
112 | )
113 |
114 | selection_type = create_selection_type_dropdown(
115 | self.left,
116 | self.right,
117 | pointwise_correspondence,
118 | selection,
119 | )
120 |
121 | metric_dropdown.observe(lambda _: update_distances(), names="value")
122 | max_depth_dropdown.observe(lambda _: update_distances(), names="value")
123 | value_range_slider.observe(lambda _: update_distances(), names="value")
124 |
125 | update_distances()
126 |
127 | # Header
128 | settings = ipywidgets.HBox(
129 | [
130 | WidthOptimizer(),
131 | metric_dropdown,
132 | inverted,
133 | value_range_slider,
134 | selection_type,
135 | zoom,
136 | max_depth_dropdown,
137 | ]
138 | )
139 | header = [marker_selection, settings] if has_markers else [settings]
140 | sections: list[ipywidgets.Widget] = [ipywidgets.VBox(header)]
141 |
142 | if titles is not None:
143 | sections.extend(_create_titles(titles))
144 |
145 | sections.append(
146 | ipywidgets.HBox(
147 | [
148 | cmp.show(
149 | row_height=row_height if row_height is None else row_height,
150 | layout=ipywidgets.Layout(width="50%"),
151 | )
152 | for cmp in (self.left, self.right)
153 | ]
154 | )
155 | )
156 |
157 | super().__init__(sections)
158 | add_ilocs_trait(self, self.left, self.right)
159 |
160 | @property
161 | def embeddings(self):
162 | yield [self.left_embedding, self.left]
163 | yield [self.right_embedding, self.right]
164 |
165 | def select(self, labels: str | list[str]):
166 | if isinstance(labels, str):
167 | for [embedding, embedding_widget] in self.embeddings:
168 | point_idxs = embedding.labels[
169 | embedding.labels.str.startswith(labels)
170 | ].index
171 | print(f"Found {len(point_idxs)} points")
172 | for scatter in embedding_widget.scatters:
173 | scatter.selection(point_idxs)
174 | return
175 |
176 | regexs = []
177 |
178 | for [embedding, embedding_widget] in self.embeddings:
179 | markers = list(filter(None, re.split("[+-]", embedding.labels[0])))
180 | marker_set = set(markers)
181 | marker_order = {s: i for i, s in enumerate(markers)}
182 |
183 | valid_labels = list(filter(lambda label: label[:-1] in marker_set, labels))
184 | ordered_labels = sorted(
185 | valid_labels, key=lambda label: marker_order.get(label[:-1], 0)
186 | )
187 |
188 | regex = (
189 | ".*" + ".*".join([re.escape(label) for label in ordered_labels]) + ".*"
190 | )
191 | regexs.append(regex)
192 |
193 | for i, [embedding, embedding_widget] in enumerate(self.embeddings):
194 | regex = regexs[i]
195 | point_idxs = embedding.labels[
196 | embedding.labels.str.match(regex, flags=re.IGNORECASE)
197 | ].index
198 | print(f"Found {len(point_idxs)} points")
199 | for scatter in embedding_widget.scatters:
200 | scatter.selection(point_idxs)
201 |
--------------------------------------------------------------------------------
/src/cev/_embedding_widget.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import typing
4 | from uuid import uuid4
5 |
6 | import ipywidgets
7 | import jscatter
8 | import numpy as np
9 | import numpy.typing as npt
10 | import pandas as pd
11 | import traitlets
12 |
13 | from cev._embedding import Embedding
14 | from cev._widget_utils import (
15 | NON_ROBUST_LABEL,
16 | create_colormaps,
17 | link_widgets,
18 | robust_labels,
19 | )
20 | from cev.components import MarkerCompositionLogo
21 |
22 | _LABEL_COLUMN = "label"
23 | _ROBUST_LABEL_COLUMN = "robust_label"
24 | _DISTANCE_COLUMN = "distance"
25 |
26 |
27 | class EmbeddingWidgetCollection(traitlets.HasTraits):
28 | inverted = traitlets.Bool(default_value=False)
29 | labels = traitlets.Any()
30 | distances = traitlets.Any()
31 | colormap = traitlets.Dict()
32 |
33 | def __init__(
34 | self,
35 | labels: pd.Series,
36 | categorical_scatter: jscatter.Scatter,
37 | metric_scatter: jscatter.Scatter,
38 | logo: MarkerCompositionLogo,
39 | labeler: typing.Callable[[npt.ArrayLike], pd.Series],
40 | ):
41 | super().__init__()
42 | self.categorical_scatter = categorical_scatter
43 | self.metric_scatter = metric_scatter
44 | self.logo = logo
45 | self._labeler = labeler
46 | self.metric_color_options: tuple[str, str, list[int], tuple] = (
47 | "viridis",
48 | "viridis_r",
49 | [0, 1],
50 | ("min", "max", "value"),
51 | )
52 |
53 | self.labels = labels
54 | self.distances = pd.Series(0.0, index=self._data.index, dtype="float64")
55 | self.colormap = create_colormaps(self.robust_labels.cat.categories)
56 |
57 | ipywidgets.dlink(
58 | source=(self.categorical_scatter.widget, "selection"),
59 | target=(self.logo, "counts"),
60 | transform=self.label_counts,
61 | )
62 |
63 | def label_counts(self, ilocs: None | np.ndarray = None) -> dict:
64 | labels = self.labels if ilocs is None else self.labels.iloc[ilocs]
65 | return {k: int(v) for k, v in labels.value_counts().items()}
66 |
67 | @traitlets.validate("labels")
68 | def _validate_labels(self, proposal: object):
69 | assert isinstance(proposal.value, pd.Series)
70 | # convert to category if not already
71 | return (
72 | proposal.value
73 | if not pd.api.types.is_categorical_dtype(proposal.value)
74 | else proposal.value.astype("category")
75 | )
76 |
77 | @property
78 | def _data(self) -> pd.DataFrame:
79 | assert self.categorical_scatter._data is self.metric_scatter._data
80 | assert self.categorical_scatter._data is not None
81 | return self.categorical_scatter._data
82 |
83 | @traitlets.observe("labels")
84 | def _on_labels_change(self, change):
85 | labels = change.new
86 | self._data[_LABEL_COLUMN] = pd.Series(np.asarray(labels), dtype="category")
87 | self._data[_ROBUST_LABEL_COLUMN] = pd.Series(
88 | np.asarray(self._labeler(labels)), dtype="category"
89 | )
90 | self.logo.counts = self.label_counts(self.categorical_scatter.widget.selection)
91 | self.has_markers = "+" in self._data[_LABEL_COLUMN][0]
92 |
93 | @traitlets.validate("distances")
94 | def _validate_distances(self, proposal: object):
95 | assert isinstance(proposal.value, pd.Series)
96 | assert proposal.value.dtype == "float64"
97 | return proposal.value
98 |
99 | @traitlets.observe("distances")
100 | def _on_distances_change(self, change):
101 | self._data[_DISTANCE_COLUMN] = change.new.values
102 | self._update_metric_scatter()
103 |
104 | @traitlets.observe("inverted")
105 | def _update_metric_scatter(self, *args, **kwargs):
106 | cmap, cmapr, norm, labeling = self.metric_color_options
107 | self.metric_scatter.color(
108 | by=_DISTANCE_COLUMN,
109 | map=cmapr if self.inverted else cmap,
110 | norm=norm,
111 | labeling=labeling,
112 | )
113 | self.metric_scatter.legend(True)
114 |
115 | self.metric_scatter.filter(None)
116 | robust_labels = self._data.query(
117 | f"{_ROBUST_LABEL_COLUMN} != '{NON_ROBUST_LABEL}'"
118 | )
119 | if len(robust_labels):
120 | self.metric_scatter.filter(robust_labels.index)
121 |
122 | @traitlets.observe("colormap")
123 | def _update_categorical_scatter(self, *args, **kwargs):
124 | self.categorical_scatter.legend(False)
125 | self.categorical_scatter.color(by=_ROBUST_LABEL_COLUMN, map=self.colormap)
126 |
127 | @classmethod
128 | def from_embedding(
129 | cls,
130 | emb: Embedding,
131 | background_color: str = "black",
132 | axes: bool = False,
133 | **kwargs,
134 | ):
135 | X = np.array(emb.coords)
136 | data = pd.DataFrame({"x": X[:, 0], "y": X[:, 1]})
137 |
138 | categorical_scatter, metric_scatter = (
139 | jscatter.Scatter(
140 | data=data,
141 | x="x",
142 | y="y",
143 | background_color=background_color,
144 | axes=axes,
145 | opacity_by="density",
146 | lasso_initiator=False,
147 | tooltip=True,
148 | tooltip_contents=("color"),
149 | **kwargs,
150 | )
151 | for _ in range(2)
152 | )
153 |
154 | # link the plots together with js
155 | link_widgets(
156 | (categorical_scatter.widget, "selection"),
157 | (metric_scatter.widget, "selection"),
158 | )
159 |
160 | return cls(
161 | labels=emb.labels,
162 | categorical_scatter=categorical_scatter,
163 | metric_scatter=metric_scatter,
164 | logo=MarkerCompositionLogo(),
165 | labeler=lambda labels: robust_labels(labels, emb.robust),
166 | )
167 |
168 | @property
169 | def robust_labels(self) -> pd.Series:
170 | return self._data[_ROBUST_LABEL_COLUMN]
171 |
172 | @property
173 | def scatters(self):
174 | yield self.categorical_scatter
175 | yield self.metric_scatter
176 |
177 | def show(self, row_height: int | None = None, **kwargs):
178 | widgets = []
179 |
180 | uuid = uuid4().hex
181 |
182 | for scatter in self.scatters:
183 | if row_height is not None:
184 | scatter.height(row_height)
185 | widget = scatter.show()
186 | widget.layout = {"margin": "0 0 2px 0"}
187 | widgets.append(widget)
188 | scatter.widget.view_sync = uuid
189 |
190 | if self.has_markers:
191 | widgets.append(self.logo)
192 |
193 | return ipywidgets.VBox(widgets, **kwargs)
194 |
195 | def zoom(self, to: None | npt.NDArray = None):
196 | if to is not None:
197 | to = to if len(to) > 0 else None
198 | for s in self.scatters:
199 | s.zoom(to=to)
200 |
201 | def __hash__(self):
202 | # Warning: this is a hack! You should probably not rely on this hash
203 | # unless you know what you're doing.
204 | #
205 | # Creates a unique hash for the current "state" of this object
206 | # to make sure that functools caching works correctly.
207 | # See the usage in cev._compare_metrics_dropdown
208 | obj_id = str(id(self))
209 | categories = ",".join(self.labels.cat.categories.to_list())
210 | return hash(obj_id + categories)
211 |
--------------------------------------------------------------------------------
/src/cev/_version.py:
--------------------------------------------------------------------------------
1 | from importlib.metadata import PackageNotFoundError, version
2 |
3 | try:
4 | __version__ = version("cev")
5 | except PackageNotFoundError:
6 | __version__ = "uninstalled"
7 |
--------------------------------------------------------------------------------
/src/cev/_widget_utils.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import dataclasses
4 | import itertools
5 | import re
6 | import typing
7 |
8 | import ipywidgets
9 | import numpy as np
10 | import pandas as pd
11 | import traitlets
12 | from jscatter.color_maps import glasbey_dark
13 |
14 | if typing.TYPE_CHECKING:
15 | import numpy.typing as npt
16 |
17 | from .widgets import EmbeddingWidgetCollection
18 |
19 | NON_ROBUST_LABEL = "0_0_0_0_0"
20 | _ERR_MESSAGE = (
21 | "The truth value of an array with more than one element is ambiguous. "
22 | + "Use a.any() or a.all()"
23 | )
24 |
25 |
26 | # patched version which allows for numpy comparison
27 | # https://github.com/jupyter-widgets/traittypes/issues/45
28 | class link_widgets(traitlets.link):
29 | def _update_target(self, change):
30 | try:
31 | super()._update_target(change)
32 | except ValueError as e:
33 | if e.args[0] != _ERR_MESSAGE:
34 | raise e
35 | except traitlets.TraitError:
36 | pass
37 |
38 | def _update_source(self, change):
39 | try:
40 | super()._update_source(change)
41 | except ValueError as e:
42 | if e.args[0] != _ERR_MESSAGE:
43 | raise e
44 | except traitlets.TraitError:
45 | pass
46 |
47 |
48 | @dataclasses.dataclass
49 | class Marker:
50 | name: str
51 | annotation: typing.Literal["+", "-"]
52 |
53 | def __str__(self) -> str:
54 | return self.name + self.annotation
55 |
56 |
57 | def parse_label(label: str) -> list[Marker]:
58 | return [
59 | Marker(inner_label[:-1], inner_label[-1])
60 | for inner_label in re.split("(\w+[\-|\+])", label)
61 | if inner_label
62 | ]
63 |
64 |
65 | def trim_label_series(labels: pd.Series, active_markers: set[str]):
66 | """
67 | Trims the labels to only contain the active markers.
68 |
69 | Parameters
70 | ----------
71 | labels
72 | The labels to trim. Must be a categorical series with values like "CD8+CD4-".
73 | active_markers
74 | The markers to keep. Must be a set of strings like {"CD8", "CD4"}.
75 |
76 | Returns
77 | -------
78 | pd.Series
79 | The trimmed labels.
80 | """
81 | # we only need to look at the categories, not the values
82 | # to compute all the possible new labels
83 | expanded = labels.cat.categories.str.split("(\w+[\+|\-])", regex=True)
84 |
85 | # find the column indices of the active markers in the expanded labels
86 | column_indices = []
87 | for i, marker in enumerate(expanded[0]):
88 | if marker[:-1] in active_markers:
89 | column_indices.append(i)
90 |
91 | # create the new label for each category by concatenating the active markers
92 | new_categories = pd.Series([""] * len(expanded))
93 | for column_index in column_indices:
94 | new_categories += expanded.str[column_index]
95 |
96 | # Index the new categories by the previous codes.
97 | # This creates a new array with all the updated labels.
98 | new_labels = new_categories[labels.cat.codes]
99 | return pd.Series(new_labels, dtype="category")
100 |
101 |
102 | def add_ilocs_trait(
103 | widget: traitlets.HasTraits,
104 | right: EmbeddingWidgetCollection,
105 | left: EmbeddingWidgetCollection,
106 | ):
107 | """Adds a `.ilocs` tuple trait to the final widget.
108 |
109 | Containts the (left, right) selections.
110 | """
111 | initial = (
112 | left.categorical_scatter.selection(),
113 | right.categorical_scatter.selection(),
114 | )
115 | widget.add_traits(ilocs=traitlets.Tuple(initial))
116 |
117 | ipywidgets.dlink(
118 | source=(left.categorical_scatter.widget, "selection"),
119 | target=(widget, "ilocs"),
120 | transform=lambda iloc: (iloc, widget.ilocs[1]), # type: ignore
121 | )
122 |
123 | ipywidgets.dlink(
124 | source=(right.categorical_scatter.widget, "selection"),
125 | target=(widget, "ilocs"),
126 | transform=lambda iloc: (widget.ilocs[0], iloc), # type: ignore
127 | )
128 |
129 |
130 | # Created with https://gka.github.io/palettes/#/256|d|19ffff,33bbff,444444|444444,ff5023,ffaa00|1|1
131 | diverging_cmap = [
132 | "#19ffff",
133 | "#1cfdff",
134 | "#1efcff",
135 | "#20faff",
136 | "#22f8ff",
137 | "#24f6fe",
138 | "#26f5fe",
139 | "#27f3fe",
140 | "#29f1fd",
141 | "#2af0fd",
142 | "#2beefc",
143 | "#2decfc",
144 | "#2eebfb",
145 | "#2fe9fb",
146 | "#31e7fa",
147 | "#32e6f9",
148 | "#33e4f9",
149 | "#34e2f8",
150 | "#35e1f7",
151 | "#36dff6",
152 | "#37ddf5",
153 | "#38dcf4",
154 | "#39daf3",
155 | "#39d9f2",
156 | "#3ad7f1",
157 | "#3bd5f0",
158 | "#3cd4ef",
159 | "#3dd2ee",
160 | "#3dd1ed",
161 | "#3ecfec",
162 | "#3fcdeb",
163 | "#40ccea",
164 | "#40cae8",
165 | "#41c9e7",
166 | "#42c7e6",
167 | "#42c5e5",
168 | "#43c4e3",
169 | "#43c2e2",
170 | "#44c1e1",
171 | "#45bfdf",
172 | "#45bede",
173 | "#46bcdd",
174 | "#46bbdb",
175 | "#47b9da",
176 | "#47b8d8",
177 | "#48b6d7",
178 | "#48b5d6",
179 | "#49b3d4",
180 | "#49b1d3",
181 | "#49b0d1",
182 | "#4aaed0",
183 | "#4aadce",
184 | "#4babcd",
185 | "#4baacb",
186 | "#4ba8c9",
187 | "#4ca7c8",
188 | "#4ca6c6",
189 | "#4ca4c5",
190 | "#4ca3c3",
191 | "#4da1c1",
192 | "#4da0c0",
193 | "#4d9ebe",
194 | "#4e9dbc",
195 | "#4e9bbb",
196 | "#4e9ab9",
197 | "#4e98b7",
198 | "#4e97b6",
199 | "#4f95b4",
200 | "#4f94b2",
201 | "#4f93b1",
202 | "#4f91af",
203 | "#4f90ad",
204 | "#4f8eab",
205 | "#4f8daa",
206 | "#508ba8",
207 | "#508aa6",
208 | "#5089a4",
209 | "#5087a3",
210 | "#5086a1",
211 | "#50849f",
212 | "#50839d",
213 | "#50819b",
214 | "#50809a",
215 | "#507f98",
216 | "#507d96",
217 | "#507c94",
218 | "#507b92",
219 | "#507991",
220 | "#50788f",
221 | "#50768d",
222 | "#50758b",
223 | "#4f7489",
224 | "#4f7287",
225 | "#4f7186",
226 | "#4f7084",
227 | "#4f6e82",
228 | "#4f6d80",
229 | "#4f6c7e",
230 | "#4e6a7c",
231 | "#4e697a",
232 | "#4e6879",
233 | "#4e6677",
234 | "#4e6575",
235 | "#4d6473",
236 | "#4d6271",
237 | "#4d616f",
238 | "#4d606d",
239 | "#4c5e6b",
240 | "#4c5d6a",
241 | "#4c5c68",
242 | "#4b5b66",
243 | "#4b5964",
244 | "#4b5862",
245 | "#4a5760",
246 | "#4a555e",
247 | "#4a545c",
248 | "#49535a",
249 | "#495259",
250 | "#495057",
251 | "#484f55",
252 | "#484e53",
253 | "#474d51",
254 | "#474b4f",
255 | "#464a4d",
256 | "#46494b",
257 | "#45484a",
258 | "#454648",
259 | "#454546",
260 | "#474444",
261 | "#494543",
262 | "#4c4543",
263 | "#4e4643",
264 | "#514643",
265 | "#534642",
266 | "#554742",
267 | "#584742",
268 | "#5a4842",
269 | "#5c4841",
270 | "#5f4841",
271 | "#614941",
272 | "#634941",
273 | "#654a40",
274 | "#684a40",
275 | "#6a4a40",
276 | "#6c4b40",
277 | "#6e4b3f",
278 | "#704c3f",
279 | "#724c3f",
280 | "#744c3e",
281 | "#764d3e",
282 | "#794d3e",
283 | "#7b4e3e",
284 | "#7d4e3d",
285 | "#7f4e3d",
286 | "#814f3d",
287 | "#834f3c",
288 | "#85503c",
289 | "#87503c",
290 | "#89513c",
291 | "#8b513b",
292 | "#8c513b",
293 | "#8e523b",
294 | "#90523a",
295 | "#92533a",
296 | "#94533a",
297 | "#96543a",
298 | "#985439",
299 | "#9a5539",
300 | "#9c5539",
301 | "#9d5538",
302 | "#9f5638",
303 | "#a15638",
304 | "#a35737",
305 | "#a55737",
306 | "#a75837",
307 | "#a85836",
308 | "#aa5936",
309 | "#ac5936",
310 | "#ae5a36",
311 | "#af5b35",
312 | "#b15b35",
313 | "#b35c35",
314 | "#b55c34",
315 | "#b65d34",
316 | "#b85d34",
317 | "#ba5e33",
318 | "#bb5f33",
319 | "#bd5f32",
320 | "#be6032",
321 | "#c06032",
322 | "#c26131",
323 | "#c36231",
324 | "#c56231",
325 | "#c66330",
326 | "#c86430",
327 | "#ca652f",
328 | "#cb652f",
329 | "#cd662f",
330 | "#ce672e",
331 | "#cf672e",
332 | "#d1682d",
333 | "#d2692d",
334 | "#d46a2d",
335 | "#d56b2c",
336 | "#d76b2c",
337 | "#d86c2b",
338 | "#d96d2b",
339 | "#db6e2a",
340 | "#dc6f2a",
341 | "#dd702a",
342 | "#df7129",
343 | "#e07229",
344 | "#e17328",
345 | "#e37328",
346 | "#e47427",
347 | "#e57527",
348 | "#e67626",
349 | "#e77726",
350 | "#e97825",
351 | "#ea7924",
352 | "#eb7b24",
353 | "#ec7c23",
354 | "#ed7d23",
355 | "#ee7e22",
356 | "#ef7f22",
357 | "#f08021",
358 | "#f18120",
359 | "#f28220",
360 | "#f3841f",
361 | "#f4851e",
362 | "#f4861e",
363 | "#f5871d",
364 | "#f6881c",
365 | "#f78a1b",
366 | "#f88b1a",
367 | "#f88c1a",
368 | "#f98e19",
369 | "#fa8f18",
370 | "#fa9017",
371 | "#fb9216",
372 | "#fb9315",
373 | "#fc9414",
374 | "#fc9613",
375 | "#fd9712",
376 | "#fd9911",
377 | "#fe9a10",
378 | "#fe9c0e",
379 | "#fe9d0d",
380 | "#ff9f0c",
381 | "#ffa00a",
382 | "#ffa208",
383 | "#ffa307",
384 | "#ffa505",
385 | "#ffa703",
386 | "#ffa802",
387 | "#ffaa00",
388 | ]
389 |
390 |
391 | def robust_labels(labels: npt.ArrayLike, robust: npt.NDArray[np.bool_] | None = None):
392 | if robust is not None:
393 | labels = np.where(
394 | robust,
395 | labels,
396 | NON_ROBUST_LABEL,
397 | )
398 | return pd.Series(labels, dtype="category")
399 |
400 |
401 | @typing.overload
402 | def create_colormaps(cats: typing.Iterable[str]) -> dict: ...
403 |
404 |
405 | @typing.overload
406 | def create_colormaps(
407 | cats: typing.Iterable[str], *other: typing.Iterable[str]
408 | ) -> tuple[dict, ...]: ...
409 |
410 |
411 | def create_colormaps(
412 | cats: typing.Iterable[str], *others: typing.Iterable[str]
413 | ) -> dict | tuple[dict, ...]:
414 | all_categories = set(cats)
415 | for other in others:
416 | all_categories.update(other)
417 |
418 | # create unified colormap
419 | lookup = dict(
420 | zip(
421 | all_categories,
422 | itertools.cycle(glasbey_dark[1:]),
423 | )
424 | )
425 |
426 | # force non-robust to be grey
427 | lookup[NON_ROBUST_LABEL] = "#333333"
428 |
429 | # create separate colormaps for each component
430 | cmaps = tuple({c: lookup[c] for c in cmp} for cmp in (cats, *others))
431 | if len(cmaps) == 1:
432 | return cmaps[0]
433 | return cmaps
434 |
--------------------------------------------------------------------------------
/src/cev/components/__init__.py:
--------------------------------------------------------------------------------
1 | from ._html_widget import HTMLWidget as HTMLWidget
2 | from ._marker_composition_logo import MarkerCompositionLogo as MarkerCompositionLogo
3 | from ._marker_selection_indicator import (
4 | MarkerSelectionIndicator as MarkerSelectionIndicator,
5 | )
6 | from ._width_optimizer import WidthOptimizer as WidthOptimizer
7 |
--------------------------------------------------------------------------------
/src/cev/components/_html_widget.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import uuid
4 |
5 | import IPython.display
6 | import ipywidgets
7 | import jinja2
8 |
9 | __all__ = ["HTMLWidget"]
10 |
11 |
12 | class HTMLWidget(ipywidgets.Output):
13 | _template = jinja2.Template("")
14 |
15 | def __init__(self, **kwargs):
16 | super().__init__(**kwargs)
17 | self.observe(lambda _: self._render(), names=self.class_own_traits().keys())
18 | self._render()
19 |
20 | def _render(self):
21 | state = {name: getattr(self, name) for name in self.class_own_traits()}
22 | html = self._template.render(id=uuid.uuid4().hex, **state)
23 | self.clear_output()
24 | with self:
25 | IPython.display.display(IPython.display.HTML(html))
26 |
--------------------------------------------------------------------------------
/src/cev/components/_marker_composition_logo.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import jinja2
4 | import traitlets
5 |
6 | from ._html_widget import HTMLWidget
7 |
8 | __all__ = ["MarkerCompositionLogo"]
9 |
10 |
11 | class MarkerCompositionLogo(HTMLWidget):
12 | _template = jinja2.Template(
13 | """
14 |
15 |
95 | """
96 | )
97 |
98 | counts = traitlets.Dict()
99 |
--------------------------------------------------------------------------------
/src/cev/components/_marker_selection_indicator.py:
--------------------------------------------------------------------------------
1 | import anywidget
2 | import traitlets
3 |
4 | __all__ = ["MarkerSelectionIndicator"]
5 |
6 |
7 | class MarkerSelectionIndicator(anywidget.AnyWidget):
8 | _esm = """
9 | const FONT_COLOR = "var(--jp-ui-font-color0)";
10 | const FONT_COLOR_SECONDARY = "var(--jp-ui-font-color1)";
11 | const BUTTON_BG = "var(--jp-layout-color2)";
12 | const BUTTON_HOVER_BG = "var(--jp-layout-color2)";
13 | const BUTTON_ACTIVE_BG = "#1976d2";
14 | const BUTTON_ACTIVE_HOVER_BG = "#0069d3";
15 | const BUTTON_ACTIVE_SECONDARY_BG = "var(--jp-ui-font-color3)";
16 | const NATURAL_COMPARATOR = new Intl.Collator(undefined, { numeric: true }).compare;
17 |
18 | export async function render(view) {
19 | const container = document.createElement("div");
20 | view.el.appendChild(container);
21 |
22 | Object.assign(container.style, {
23 | display: "flex",
24 | flexDirection: "column",
25 | gap: "4px",
26 | });
27 |
28 | const header = document.createElement("div");
29 | container.appendChild(header);
30 |
31 | Object.assign(header.style, {
32 | display: "flex",
33 | justifyContent: "space-between",
34 | alignItems: "center",
35 | gap: "2px",
36 | });
37 |
38 | const title = document.createElement("h4");
39 | header.appendChild(title);
40 |
41 | Object.assign(title.style, {
42 | padding: "0",
43 | margin: "0",
44 | });
45 | title.textContent = "Markers";
46 |
47 | const settings = document.createElement("div");
48 | header.appendChild(settings);
49 | Object.assign(settings.style, { display: "flex", alignItems: "center" });
50 |
51 | const sortLabel = document.createElement("div");
52 | sortLabel.textContent = "Sort by";
53 | Object.assign(sortLabel.style, { fontSize: "0.875em", marginRight: "0.25rem" });
54 |
55 | const sortImportance = document.createElement("button");
56 | sortImportance.textContent = "Expression Discriminability";
57 | Object.assign(sortImportance.style, {
58 | background: view.model.get("sort_alphabetically") ? BUTTON_BG : BUTTON_ACTIVE_SECONDARY_BG,
59 | border: `1px solid ${view.model.get("sort_alphabetically") ? BUTTON_BG : BUTTON_ACTIVE_SECONDARY_BG}`,
60 | borderRadius: "4px 0 0 4px",
61 | userSelect: "none",
62 | cursor: "pointer",
63 | });
64 | sortImportance.addEventListener("click", function() {
65 | view.model.set("sort_alphabetically", false);
66 | view.model.save_changes();
67 | });
68 |
69 | const sortAlphabetically = document.createElement("button");
70 | sortAlphabetically.textContent = "Alphabetically";
71 | Object.assign(sortAlphabetically.style, {
72 | background: view.model.get("sort_alphabetically") ? BUTTON_ACTIVE_SECONDARY_BG : BUTTON_BG,
73 | border: `1px solid ${view.model.get("sort_alphabetically") ? BUTTON_ACTIVE_SECONDARY_BG : BUTTON_BG}`,
74 | borderRadius: "0 4px 4px 0",
75 | marginLeft: "-1px",
76 | userSelect: "none",
77 | cursor: "pointer",
78 | });
79 | sortAlphabetically.addEventListener("click", function() {
80 | view.model.set("sort_alphabetically", true);
81 | view.model.save_changes();
82 | });
83 |
84 | settings.appendChild(sortLabel);
85 | settings.appendChild(sortImportance);
86 | settings.appendChild(sortAlphabetically);
87 |
88 | const markersEl = document.createElement("div");
89 | container.appendChild(markersEl);
90 |
91 | Object.assign(markersEl.style, {
92 | display: "flex",
93 | flexWrap: "wrap",
94 | gap: "2px",
95 | });
96 |
97 | function getOrder() {
98 | const markers = view.model.get("markers");
99 | return view.model.get("sort_alphabetically")
100 | ? new Map(markers.map((marker, i) => [marker, i]).sort(([a], [b]) => NATURAL_COMPARATOR(a, b)).map(([marker, i], j) => [i, j]))
101 | : undefined;
102 | }
103 |
104 | function rerender() {
105 | const markers = view.model.get("markers");
106 | const active = view.model.get("active");
107 | const diff = markers.length - markersEl.childElementCount;
108 |
109 | if (diff > 0) {
110 | for (let i = 0; i < diff; i++) {
111 | const button = document.createElement("button");
112 |
113 | Object.assign(button.style, {
114 | background: "var(--marker-selection-indicator-bg)",
115 | cursor: "pointer",
116 | padding: "4px 6px",
117 | border: "0",
118 | borderRadius: i === 0
119 | ? "2px 0 0 2px"
120 | : i === markers.length - 1
121 | ? "0 2px 2px 0"
122 | : "0",
123 | userSelect: "none",
124 | });
125 |
126 | button.addEventListener("click", function (event) {
127 | let newActive = [...view.model.get("active")];
128 |
129 | if (event.altKey) {
130 | newActive = Array.from({ length: markers.length }, (_, j) => j === i);
131 | } else if (event.shiftKey) {
132 | const order = getOrder();
133 | const _i = order ? order.get(i) : i;
134 | newActive = Array.from({ length: markers.length }, (_, j) => (order ? order.get(j) : j) <= _i);
135 | } else {
136 | const numActive = newActive.reduce((num, curr) => num + Number(curr), 0);
137 | if (!newActive[i] || numActive > 1) newActive[i] = !newActive[i];
138 | }
139 |
140 | view.model.set("active", newActive);
141 | view.model.save_changes();
142 | });
143 |
144 | button.addEventListener("mouseenter", function () {
145 | const active = view.model.get("active");
146 | button.style.setProperty("--marker-selection-indicator-bg", active[i] ? BUTTON_ACTIVE_HOVER_BG : BUTTON_HOVER_BG);
147 | });
148 |
149 | button.addEventListener("mouseleave", function () {
150 | const active = view.model.get("active");
151 | button.style.setProperty("--marker-selection-indicator-bg", active[i] ? BUTTON_ACTIVE_BG : BUTTON_BG);
152 | });
153 |
154 | markersEl.appendChild(button);
155 | }
156 | } else if (diff < 0) {
157 | for (let i = 0; i < -diff; i++) {
158 | markersEl.removeChild(markersEl.lastChild);
159 | }
160 | }
161 |
162 | const order = getOrder();
163 |
164 | for (let i = 0; i < markers.length; i++) {
165 | const child = markersEl.childNodes[i];
166 |
167 | if (active[i]) {
168 | child.style.color = "white";
169 | child.style.setProperty("--marker-selection-indicator-bg", BUTTON_ACTIVE_BG);
170 | } else {
171 | child.style.color = FONT_COLOR;
172 | child.style.setProperty("--marker-selection-indicator-bg", BUTTON_BG);
173 | }
174 |
175 | if (order?.has(i)) {
176 | child.style.order = order.get(i);
177 | } else {
178 | child.style.order = 0;
179 | }
180 |
181 | child.textContent = markers[i];
182 | }
183 |
184 | const isAlphabetically = view.model.get("sort_alphabetically");
185 | const isImportance = !isAlphabetically;
186 |
187 | const getButtonStyle = (active) => ({
188 | background: active ? BUTTON_ACTIVE_SECONDARY_BG : BUTTON_BG,
189 | border: 0,
190 | color: active ? FONT_COLOR : FONT_COLOR_SECONDARY,
191 | });
192 |
193 | Object.assign(sortImportance.style, getButtonStyle(isImportance));
194 | Object.assign(sortAlphabetically.style, getButtonStyle(isAlphabetically));
195 | }
196 |
197 | view.model.on("change:markers", rerender);
198 | view.model.on("change:active", rerender);
199 | view.model.on("change:sort_alphabetically", rerender);
200 |
201 | rerender();
202 | }
203 | """
204 |
205 | markers = traitlets.List(trait=traitlets.Unicode()).tag(sync=True)
206 | active = traitlets.List(trait=traitlets.Bool()).tag(sync=True)
207 | sort_alphabetically = traitlets.Bool().tag(sync=True)
208 |
--------------------------------------------------------------------------------
/src/cev/components/_width_optimizer.py:
--------------------------------------------------------------------------------
1 | import anywidget
2 |
3 | __all__ = ["WidthOptimizer"]
4 |
5 |
6 | class WidthOptimizer(anywidget.AnyWidget):
7 | """This widget gets rid of unwanted whitespace in front of ipywidgets"""
8 |
9 | _esm = """
10 | export function render(view) {
11 | setTimeout(() => {
12 | view.el.parentNode.style.setProperty('--jp-widgets-inline-label-width', 'auto');
13 | }, 0);
14 | }
15 | """
16 |
--------------------------------------------------------------------------------
/src/cev/metrics.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import typing
4 |
5 | import cev_metrics
6 | import numpy as np
7 | import numpy.linalg as nplg
8 | import pandas as pd
9 |
10 | if typing.TYPE_CHECKING:
11 | import numpy.typing as npt
12 |
13 | __all__ = [
14 | "centered_logratio",
15 | "compare_neighborhoods",
16 | "confusion",
17 | "merge_abundances_left",
18 | "neighborhood",
19 | "relative_abundance",
20 | "rowise_cosine_similarity",
21 | "transform_abundance",
22 | ]
23 |
24 |
25 | def confusion(df: pd.DataFrame) -> pd.Series:
26 | confusion_matrix = cev_metrics.confusion(df)
27 | normed = confusion_matrix / confusion_matrix.sum(axis=1)
28 | data = pd.Series(1 - normed.diagonal(), index=df["label"].cat.categories)
29 | # TODO: move to cev-metrics
30 | # Replace any label with 2 or less count with 0.0 confusion.
31 | counts = df["label"].value_counts()
32 | data.loc[counts[counts <= 2].index] = 0
33 | return data
34 |
35 |
36 | def neighborhood(df: pd.DataFrame, max_depth: int = 1) -> pd.DataFrame:
37 | categories = df["label"].cat.categories
38 | neighborhood_scores = cev_metrics.neighborhood(df, max_depth)
39 | np.fill_diagonal(neighborhood_scores, 0)
40 | return pd.DataFrame(neighborhood_scores, index=categories, columns=categories)
41 |
42 |
43 | def compare_neighborhoods(a: pd.DataFrame, b: pd.DataFrame) -> dict[str, float]:
44 | """Computes the cosine similarity between two neighborhood matrices.
45 |
46 | Parameters
47 | ----------
48 | a : pd.DataFrame
49 | A symmetric DataFrame with shared rows/cols.
50 | b : pd.DataFrame
51 | A symmetric DataFrame with shared rows/cols.
52 |
53 | Returns
54 | -------
55 | dict[str, float]
56 | A dictionary mapping labels to cosine similarity.
57 | """
58 | assert len(a) == len(a.columns)
59 | assert len(b) == len(b.columns)
60 | overlap = a.index.intersection(b.index)
61 | dist = {label: 0.0 for label in typing.cast(pd.Series, a.index.union(b.index))}
62 | sim = 1 - rowise_cosine_similarity(a.loc[overlap, overlap], b.loc[overlap, overlap])
63 | dist.update(sim)
64 | return dist
65 |
66 |
67 | def rowise_cosine_similarity(X0: npt.ArrayLike, X1: npt.ArrayLike):
68 | """Computes the cosine similary per row of two equally shaped 2D matrices."""
69 | return np.sum(X0 * X1, axis=1) / (nplg.norm(X0, axis=1) * nplg.norm(X1, axis=1))
70 |
71 |
72 | def transform_abundance(
73 | frequencies: pd.DataFrame,
74 | abundances: dict[str, int],
75 | force_include_self: bool = True,
76 | bit_mask: bool = False,
77 | clr: bool = False,
78 | ):
79 | """Creates an abundance-based representation.
80 |
81 | This function transforms a label-level neighborhood representation
82 | into an abundance-based representation by multiplying the frequencies
83 | with the abundances. Alternatively, a bitmask can be used to treat
84 | all non-zero frequencies as 1.
85 |
86 | Parameters
87 | ----------
88 | frequencies : pd.DataFrame
89 | A symmetric DataFrame with shared rows/cols.
90 | abundances : dict[str, int]
91 | A dictionary mapping labels to abundances.
92 | force_include_self : bool, optional
93 | Whether to include the label itself in the neighborhood, by default True.
94 | bit_mask : bool, optional
95 | Whether to use a bit mask instead of the frequencies when expanding
96 | abundances, by default False.
97 | clr : bool, optional
98 | Whether to normalize the count values by transforming them to centered
99 | log ratios, by default False.
100 | """
101 | assert (
102 | frequencies.index.to_list() == frequencies.columns.to_list()
103 | ), "must be a symmetric DataFrame with shared rows/cols"
104 |
105 | if bit_mask:
106 | mask = frequencies.to_numpy() > 0
107 | if force_include_self:
108 | np.fill_diagonal(mask, True)
109 | else:
110 | mask = frequencies.to_numpy()
111 | if force_include_self:
112 | np.fill_diagonal(mask, 1.0)
113 |
114 | if clr:
115 | inflated_counts = np.fromiter(abundances.values(), dtype=int) + 1
116 | gmean = _gmean(inflated_counts)
117 | values = dict(zip(abundances.keys(), np.log10(inflated_counts / gmean)))
118 | else:
119 | values = abundances
120 |
121 | return pd.DataFrame(
122 | mask * np.array([values[col] for col in frequencies.columns]),
123 | columns=frequencies.columns,
124 | index=frequencies.index,
125 | )
126 |
127 |
128 | def merge_abundances_left(left: pd.DataFrame, right: pd.DataFrame):
129 | """Create single label-mask using all labels from left and right.
130 | If a label in `right` is missing in `left`, the neighbors from `right`
131 | are copied into `left` for that label. The label itself is set to False.
132 | """
133 | index = pd.CategoricalIndex(left.index.union(right.index).sort_values())
134 | merged = pd.DataFrame(
135 | np.full((len(index),) * 2, 0),
136 | columns=index,
137 | index=index,
138 | )
139 | # copy left values in to unified matrix
140 | merged.loc[left.index, left.columns] = left
141 | # find missing labels for left and populate with right
142 | missing = list(set(index).difference(left.index))
143 | merged.loc[missing, right.columns] = right.loc[missing, right.columns]
144 | # make sure to zero out diagonal for right-copied rows
145 | merged.loc[missing, missing] = 0
146 | return merged
147 |
148 |
149 | def relative_abundance(abundance_representation: pd.DataFrame):
150 | return np.diagonal(abundance_representation) / abundance_representation.sum(axis=1)
151 |
152 |
153 | def centered_logratio(abundance_representation: pd.DataFrame):
154 | copy = abundance_representation.to_numpy().copy()
155 | diag = np.diagonal(copy)
156 | np.fill_diagonal(copy, np.where(diag > 0, diag, 1))
157 |
158 | def _compute(row, i):
159 | gmean = _gmean(row + 1)
160 | ratio = np.log10((row[i] + 1) / gmean)
161 | return ratio
162 |
163 | return pd.Series(
164 | [_compute(row, i) for i, row in enumerate(copy)],
165 | index=abundance_representation.index,
166 | )
167 |
168 |
169 | # from scipy.stats.mstats.gmean
170 | #
171 | # Copyright (c) 2001-2002 Enthought, Inc. 2003-2023, SciPy Developers.
172 | # All rights reserved.
173 | #
174 | # Redistribution and use in source and binary forms, with or without
175 | # modification, are permitted provided that the following conditions
176 | # are met:
177 | #
178 | # 1. Redistributions of source code must retain the above copyright
179 | # notice, this list of conditions and the following disclaimer.
180 | #
181 | # 2. Redistributions in binary form must reproduce the above
182 | # copyright notice, this list of conditions and the following
183 | # disclaimer in the documentation and/or other materials provided
184 | # with the distribution.
185 | #
186 | # 3. Neither the name of the copyright holder nor the names of its
187 | # contributors may be used to endorse or promote products derived
188 | # from this software without specific prior written permission.
189 | #
190 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
191 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
192 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
193 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
194 | # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
195 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
196 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
197 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
198 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
199 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
200 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
201 | def _gmean(a, axis=0, dtype=None, weights=None):
202 | r"""Compute the weighted geometric mean along the specified axis.
203 |
204 | The weighted geometric mean of the array :math:`a_i` associated to weights
205 | :math:`w_i` is:
206 |
207 | .. math::
208 |
209 | \exp \left( \frac{ \sum_{i=1}^n w_i \ln a_i }{ \sum_{i=1}^n w_i }
210 | \right) \, ,
211 |
212 | and, with equal weights, it gives:
213 |
214 | .. math::
215 |
216 | \sqrt[n]{ \prod_{i=1}^n a_i } \, .
217 |
218 | Parameters
219 | ----------
220 | a : array_like
221 | Input array or object that can be converted to an array.
222 | axis : int or None, optional
223 | Axis along which the geometric mean is computed. Default is 0.
224 | If None, compute over the whole array `a`.
225 | dtype : dtype, optional
226 | Type to which the input arrays are cast before the calculation is
227 | performed.
228 | weights : array_like, optional
229 | The `weights` array must be broadcastable to the same shape as `a`.
230 | Default is None, which gives each value a weight of 1.0.
231 |
232 | Returns
233 | -------
234 | gmean : ndarray
235 | See `dtype` parameter above.
236 |
237 | See Also
238 | --------
239 | numpy.mean : Arithmetic average
240 | numpy.average : Weighted average
241 | hmean : Harmonic mean
242 |
243 | References
244 | ----------
245 | .. [1] "Weighted Geometric Mean", *Wikipedia*,
246 | https://en.wikipedia.org/wiki/Weighted_geometric_mean.
247 |
248 | Examples
249 | --------
250 | >>> from scipy.stats import gmean
251 | >>> gmean([1, 4])
252 | 2.0
253 | >>> gmean([1, 2, 3, 4, 5, 6, 7])
254 | 3.3800151591412964
255 | >>> gmean([1, 4, 7], weights=[3, 1, 3])
256 | 2.80668351922014
257 |
258 | """
259 |
260 | a = np.asarray(a, dtype=dtype)
261 |
262 | if weights is not None:
263 | weights = np.asarray(weights, dtype=dtype)
264 |
265 | with np.errstate(divide="ignore"):
266 | log_a = np.log(a)
267 |
268 | return np.exp(np.average(log_a, axis=axis, weights=weights))
269 |
--------------------------------------------------------------------------------
/src/cev/widgets.py:
--------------------------------------------------------------------------------
1 | from cev._compare import compare as compare
2 | from cev._embedding import Embedding as Embedding
3 | from cev._embedding_comparison_widget import (
4 | EmbeddingComparisonWidget as EmbeddingComparisonWidget,
5 | )
6 | from cev._embedding_widget import EmbeddingWidgetCollection as EmbeddingWidgetCollection
7 |
--------------------------------------------------------------------------------
/tests/test_cev.py:
--------------------------------------------------------------------------------
1 | def test_cev():
2 | """Just to make sure it's imported somewhere"""
3 | # TODO: more tests...
4 | import cev # noqa
5 |
--------------------------------------------------------------------------------
/tests/test_widget_utils.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 |
3 | from cev._widget_utils import trim_label_series
4 |
5 |
6 | def test_trim_label_series():
7 | labels = pd.Series(
8 | ["CD8+CD4-CD3+", "CD8+CD4+CD3+", "CD8-CD4+CD3-", "CD8-CD4-CD3+"],
9 | dtype="category",
10 | )
11 | expected = pd.Series(
12 | ["CD8+CD3+", "CD8+CD3+", "CD8-CD3-", "CD8-CD3+"], dtype="category"
13 | )
14 | trimmed = trim_label_series(labels, {"CD8", "CD3"})
15 | assert trimmed.cat.categories.tolist() == expected.cat.categories.tolist()
16 | assert trimmed.tolist() == expected.tolist()
17 |
--------------------------------------------------------------------------------