├── .editorconfig ├── .gitattributes ├── .github └── workflows │ └── ci.yml ├── .gitignore ├── CHANGELOG.md ├── CITATION.cff ├── LICENSE ├── README.md ├── ismb-biovis-2023-poster.jpg ├── notebooks ├── abundance-analysis.ipynb ├── getting-started.ipynb └── lui-2021.ipynb ├── pyproject.toml ├── src └── cev │ ├── __init__.py │ ├── _cli.py │ ├── _compare.py │ ├── _compare_metric_dropdown.py │ ├── _compare_selection_type_dropdown.py │ ├── _compare_zoom_toggle.py │ ├── _embedding.py │ ├── _embedding_comparison_widget.py │ ├── _embedding_widget.py │ ├── _version.py │ ├── _widget_utils.py │ ├── components │ ├── __init__.py │ ├── _html_widget.py │ ├── _marker_composition_logo.py │ ├── _marker_selection_indicator.py │ └── _width_optimizer.py │ ├── metrics.py │ └── widgets.py ├── tests ├── test_cev.py └── test_widget_utils.py └── uv.lock /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | end_of_line = lf 5 | insert_final_newline = true 6 | 7 | [*.{js,py}] 8 | charset = utf-8 9 | 10 | [Snakefile] 11 | indent_style = space 12 | indent_size = 4 13 | 14 | [*.py] 15 | indent_style = space 16 | indent_size = 4 17 | 18 | [*.js] 19 | indent_style = tabs 20 | indent_size = 4 21 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | *.ipynb linguist-vendored 2 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | tags: 8 | - "v*" 9 | pull_request: 10 | workflow_dispatch: 11 | 12 | jobs: 13 | 14 | Lint: 15 | runs-on: ubuntu-latest 16 | steps: 17 | - uses: actions/checkout@v3 18 | - uses: astral-sh/setup-uv@v3 19 | with: 20 | version: "0.5.x" 21 | - run: | 22 | uv run ruff check 23 | uv run ruff format 24 | 25 | Test: 26 | runs-on: ubuntu-latest 27 | strategy: 28 | matrix: 29 | python-version: 30 | - "3.8" 31 | - "3.9" 32 | - "3.10" 33 | - "3.11" 34 | steps: 35 | - uses: actions/checkout@v3 36 | - run: echo "${{ matrix.python-version }}" > .python-version 37 | - uses: actions/setup-python@v5 38 | with: 39 | python-version-file: ".python-version" 40 | - uses: astral-sh/setup-uv@v3 41 | with: 42 | version: "0.5.x" 43 | - run: uv run pytest --color=yes 44 | 45 | Release: 46 | if: startsWith(github.ref, 'refs/tags/') 47 | needs: [Lint, Test] 48 | runs-on: ubuntu-latest 49 | steps: 50 | - uses: actions/checkout@v3 51 | 52 | - uses: astral-sh/setup-uv@v3 53 | with: 54 | version: "0.5.x" 55 | 56 | - run: | 57 | uv build 58 | uvx twine check dist/* 59 | ls -lh dist 60 | 61 | - name: Publish to PyPI 62 | run: uvx twine upload dist/* 63 | env: 64 | TWINE_USERNAME: __token__ 65 | TWINE_PASSWORD: ${{ secrets.TWINE_API_KEY }} 66 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | node_modules/ 3 | data/ 4 | .snakemake/ 5 | .ipynb_checkpoints/ 6 | *.egg-info/ 7 | .vite 8 | dist/ 9 | mair/ 10 | .DS_Store 11 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # 0.2.1 2 | 3 | - Fix: don't contribute self in the neighborhood metric ([#42](https://github.com/OzetteTech/comparative-embedding-visualization/pull/42)) 4 | - Fix: neighborhood legend labels 5 | 6 | # 0.2.0 7 | 8 | - Feat: update Jupyter Scatter and activate tooltips 9 | 10 | # 0.1.1 11 | 12 | - Fix: allow customizing embeddings via `EmbeddingComparisonWidget()` 13 | 14 | # 0.1.0 15 | 16 | - First release 17 | -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | cff-version: "1.2.0" 2 | title: "A General Framework for Comparing Embedding Visualizations Across Class-Label Hierarchies" 3 | authors: 4 | - given-names: "Trevor" 5 | family-names: "Manz" 6 | orcid: "https://orcid.org/0000-0001-7694-5164" 7 | affiliation: "Harvard Medical School" 8 | - given-names: "Fritz" 9 | family-names: "Lekschas" 10 | orcid: "https://orcid.org/0000-0001-8432-4835" 11 | affiliation: "Ozette Technologies" 12 | - given-names: "Evan" 13 | family-names: "Greene" 14 | affiliation: "Ozette Technologies" 15 | - given-names: "Greg" 16 | family-names: "Finak" 17 | orcid: "https://orcid.org/0000-0003-4341-9090" 18 | affiliation: "Ozette Technologies" 19 | - given-names: "Nils" 20 | family-names: "Gehlenborg" 21 | affiliation: "Harvard Medical School" 22 | orcid: "https://orcid.org/0000-0003-0327-8297" 23 | url: "https://github.com/OzetteTech/comparative-embedding-visualization" 24 | message: If you use this software, please cite our article in the 25 | IEEE Transactions on Visualization and Computer Graphics. 26 | preferred-citation: 27 | type: article 28 | title: "A General Framework for Comparing Embedding Visualizations Across Class-Label Hierarchies" 29 | authors: 30 | - given-names: "Trevor" 31 | family-names: "Manz" 32 | orcid: "https://orcid.org/0000-0001-7694-5164" 33 | affiliation: "Harvard Medical School" 34 | - given-names: "Fritz" 35 | family-names: "Lekschas" 36 | orcid: "https://orcid.org/0000-0001-8432-4835" 37 | affiliation: "Ozette Technologies" 38 | - given-names: "Evan" 39 | family-names: "Greene" 40 | affiliation: "Ozette Technologies" 41 | - given-names: "Greg" 42 | family-names: "Finak" 43 | orcid: "https://orcid.org/0000-0003-4341-9090" 44 | affiliation: "Ozette Technologies" 45 | - given-names: "Nils" 46 | family-names: "Gehlenborg" 47 | affiliation: "Harvard Medical School" 48 | orcid: "https://orcid.org/0000-0003-0327-8297" 49 | date-published: 2024-09-10 50 | doi: "10.1109/TVCG.2024.3456370" 51 | journal: "Transactions on Visualization and Computer Graphics" 52 | publisher: 53 | name: IEEE 54 | url: "https://ieeexplore.ieee.org/document/10672535" 55 | month: 9 56 | year: 2024 57 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright 2022 Ozette Technologies 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

2 | Comparative Embedding Visualization with cev 3 |

4 | 5 | 6 |
7 | 8 | [![pypi version](https://img.shields.io/badge/ozette-technologies-ozette.svg?color=0072E1&labelColor=0B1117&style=flat-square)](https://ozette.com/) 9 | [![pypi version](https://img.shields.io/pypi/v/cev.svg?color=0072E1&labelColor=0B1117&style=flat-square)](https://pypi.org/project/cev/) 10 | [![build status](https://img.shields.io/github/actions/workflow/status/OzetteTech/comparative-embedding-visualization/ci.yml?branch=main&color=0072E1&labelColor=0B1117&style=flat-square)](https://github.com/OzetteTech/comparative-embedding-visualization/actions?query=workflow%3ARelease) 11 | [![notebook examples](https://img.shields.io/badge/notebook-examples-0072E1.svg?labelColor=0B1117&style=flat-square)](notebooks) 12 | [![ISMB BioVis 2023 Poster](https://img.shields.io/badge/ISMB_BioVis_'23-poster-0072E1.svg?labelColor=0B1117&style=flat-square)](ismb-biovis-2023-poster.jpg) 13 | 14 |
15 | 16 |
17 | 18 | cev is an interactive Jupyter widget for comparing a pair of 2D embeddings with shared labels.
Its novel metric allows to surface differences in label confusion, neighborhood composition, and label size. 19 | 20 |
21 | 22 |
23 | 24 |
25 | 26 | ![Teaser](https://github.com/OzetteTech/comparative-embedding-visualization/assets/84813279/297cbdb9-b6a2-4102-bde9-b14f0ca24a09) 27 | 28 | The figure shows data from [Mair et al. (2022)](https://doi.org/10.1038/s41586-022-04718-w) that were analyzed with [Greene et al.'s (2021) FAUST method](https://doi.org/10.1016/j.patter.2021.100372).
The embeddings were generated with [Greene et al.'s (2021) annotation transformation](https://github.com/flekschas-ozette/ismb-biovis-2022) and [UMAP](https://github.com/lmcinnes/umap).
29 | 30 |
31 | 32 | `cev` is implemented with [anywidget](https://anywidget.dev) and builds upon [jupyter-scatter](https://github.com/flekschas/jupyter-scatter/). 33 | 34 |
35 | 36 | ## Quick Start 37 | 38 | The **cev** package has a cli to quickly try out a demo of comparison widget in JupyterLab. It requires [uv](https://astral.sh/uv) to be installed. 39 | 40 | ```sh 41 | uvx --python 3.11 cev demo # Downloads datasets and launches Jupyter Lab 42 | ``` 43 | 44 | ## Installation 45 | 46 | > **Warning**: `cev` is new and under active development. It is not yet ready for production and APIs are subject to change. 47 | 48 | ```sh 49 | pip install cev 50 | ``` 51 | 52 | ## Getting Started 53 | 54 | ```py 55 | import pandas as pd 56 | from cev.widgets import Embedding, EmbeddingComparisonWidget 57 | 58 | umap_embedding = Embedding.from_ozette(df=pd.read_parquet("../data/mair-2022-tissue-138-umap.pq")) 59 | ozette_embedding = Embedding.from_ozette(df=pd.read_parquet("../data/mair-2022-tissue-138-ozette.pq")) 60 | 61 | umap_vs_ozette = EmbeddingComparisonWidget( 62 | umap_embedding, 63 | ozette_embedding, 64 | titles=["Standard UMAP", "Annotation-Transformed UMAP"], 65 | metric="confusion", 66 | selection="synced", 67 | auto_zoom=True, 68 | row_height=320, 69 | ) 70 | umap_vs_ozette 71 | ``` 72 | 73 | User interface of cev's comparison widget 74 | 75 | 76 | See [notebooks/getting-started.ipynb](notebooks/getting-started.ipynb) for the complete example. 77 | 78 | ## Development 79 | 80 | We use [`uv`](https://astral.sh/uv) for development. 81 | 82 | ```sh 83 | uv run jupyter lab 84 | ``` 85 | 86 | ### Commands Cheatsheet 87 | 88 | | Command | Action | 89 | | :--------------------- | :------------------------------------------------------------------ | 90 | | `uv run ruff format` | Format the source code. | 91 | | `uv run ruff check` | Check the source code for formatting issues. | 92 | | `uv run pytest` | Run unit tests with `pytest` in base environment. | 93 | 94 | 95 | ## Release 96 | 97 | releases are triggered via tagged commits 98 | 99 | ``` 100 | git tag -a vX.X.X -m "vX.X.X" 101 | git push --follow-tags 102 | ``` 103 | 104 | ## License 105 | 106 | `cev` is distributed under the terms of the [Apache License 2.0](LICENSE). 107 | 108 | ## Citation 109 | 110 | If you use `cev` in your research, please cite the following preprint: 111 | 112 | ```bibtex 113 | @article{manz2024general, 114 | title = {A General Framework for Comparing Embedding Visualizations Across Class-Label Hierarchies}, 115 | author = {Trevor Manz and Fritz Lekschas and Evan Greene and Greg Finak and Nils Gehlenborg}, 116 | url = {https://doi.org/10.1109/TVCG.2024.3456370}, 117 | doi = {10.1109/TVCG.2024.3456370}, 118 | journal = {IEEE Transactions on Visualization and Computer Graphics}, 119 | series = {VIS ’24}, 120 | publisher = {IEEE}, 121 | year = {2024}, 122 | month = {9}, 123 | pages = {1-11} 124 | } 125 | ``` 126 | -------------------------------------------------------------------------------- /ismb-biovis-2023-poster.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OzetteTech/comparative-embedding-visualization/844676b58725b1fc54407ac615013e1208a4e572/ismb-biovis-2023-poster.jpg -------------------------------------------------------------------------------- /notebooks/abundance-analysis.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "a6801dc8-ef42-45c9-b443-6498c63d7396", 6 | "metadata": {}, 7 | "source": [ 8 | "# Ozette Abundance Metric Examples\n", 9 | "\n", 10 | "In this Notebook we're going to use the _Abundance_ metric on three Ozette-embedded studies to find differentially-abundant phenotypes." 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": null, 16 | "id": "52eb3296-0cbc-4ead-b386-cc23ce16345d", 17 | "metadata": { 18 | "tags": [] 19 | }, 20 | "outputs": [], 21 | "source": [ 22 | "%load_ext autoreload\n", 23 | "%autoreload 2" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": null, 29 | "id": "f923c828-da89-44fa-a128-963b7be0efab", 30 | "metadata": { 31 | "tags": [] 32 | }, 33 | "outputs": [], 34 | "source": [ 35 | "import pathlib\n", 36 | "\n", 37 | "import pandas as pd\n", 38 | "\n", 39 | "from cev.widgets import Embedding, EmbeddingComparisonWidget\n", 40 | "\n", 41 | "\n", 42 | "def get_embedding(folder: str, sample: str):\n", 43 | " return Embedding.from_ozette(\n", 44 | " df=pd.read_parquet(\n", 45 | " pathlib.Path.cwd() / \"..\" / \"data\" / f\"{folder}\" / f\"{sample}.parquet\"\n", 46 | " )\n", 47 | " )" 48 | ] 49 | }, 50 | { 51 | "cell_type": "markdown", 52 | "id": "00e6670f-5831-46b9-b713-5b0035594d43", 53 | "metadata": {}, 54 | "source": [ 55 | "# Melanoma Study\n", 56 | "\n", 57 | "### Distinct predictive biomarker candidates for response to anti-CTLA-4 and anti-PD-1 immunotherapy in melanoma patients\n", 58 | "\n", 59 | "Subrahmanyam et al., 2018. https://pubmed.ncbi.nlm.nih.gov/29510697/\n", 60 | "\n", 61 | "In this example we're going to compare phenotypes between a pair of unstimulated Pembrolizumab responder and non-responder samples." 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": null, 67 | "id": "9eb8172a-3e2c-410d-9c6e-14a905e87498", 68 | "metadata": { 69 | "tags": [] 70 | }, 71 | "outputs": [], 72 | "source": [ 73 | "non_responder_embedding = get_embedding(\"subrahmanyam-2018\", \"OZEXPSMPL_782\")\n", 74 | "responder_embedding = get_embedding(\"subrahmanyam-2018\", \"OZEXPSMPL_804\")\n", 75 | "\n", 76 | "melanoma_comparison = EmbeddingComparisonWidget(\n", 77 | " non_responder_embedding,\n", 78 | " responder_embedding,\n", 79 | " titles=[\"Non-Responder\", \"Responder\"],\n", 80 | " metric=\"abundance\",\n", 81 | " selection=\"phenotype\",\n", 82 | " auto_zoom=True,\n", 83 | " row_height=360,\n", 84 | ")\n", 85 | "melanoma_comparison" 86 | ] 87 | }, 88 | { 89 | "cell_type": "markdown", 90 | "id": "c05e7375-07eb-4096-8bd5-9907c8d1248a", 91 | "metadata": {}, 92 | "source": [ 93 | "**Phenotype 1:** should be more abundant in `responder` (right) compared to `non-responder` (left)" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": null, 99 | "id": "fc8158f5-0cbc-4731-8d6f-e49968f76556", 100 | "metadata": { 101 | "tags": [] 102 | }, 103 | "outputs": [], 104 | "source": [ 105 | "melanoma_comparison.select(\n", 106 | " \"CD8-GranzymeB-CD27+CD3+CD28+CD19-CD57-CD127+CD33-CD45RA-CD4+CD14-HLADR-CD20-CCR7+CD56-IL2-CD16-TNFa-MIP1b-CD154+GMCSF-PDL1-CD107a-IL17-Perforin-CD69+CTLA4-PDL2-PD1-TCRgd-IFNg-CD38-CD25-IL10-IL4-\"\n", 107 | ")" 108 | ] 109 | }, 110 | { 111 | "cell_type": "markdown", 112 | "id": "356a7f7c-c2f8-4b74-8299-245f4b7d7b57", 113 | "metadata": {}, 114 | "source": [ 115 | "**Phenotype 2:** should be more abundant in `responder` (right) compared to `non-responder` (left)" 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": null, 121 | "id": "ae44dea4-3b21-45ec-b184-1b003ce626f0", 122 | "metadata": { 123 | "tags": [] 124 | }, 125 | "outputs": [], 126 | "source": [ 127 | "melanoma_comparison.select(\n", 128 | " \"CD8-GranzymeB+CD27-CD3-CD28-CD19-CD57+CD127-CD33-CD45RA+CD4-CD14-HLADR-CD20-CCR7-CD56+IL2-CD16+TNFa-MIP1b+CD154-GMCSF-PDL1-CD107a-IL17-Perforin+CD69+CTLA4-PDL2+PD1-TCRgd-IFNg-CD38+CD25-IL10-IL4-\"\n", 129 | ")" 130 | ] 131 | }, 132 | { 133 | "cell_type": "markdown", 134 | "id": "9f63edea-a9c6-4288-8fe1-6d0bfeed937a", 135 | "metadata": {}, 136 | "source": [ 137 | "**Phenotype 3:** should be more abundant in `responder` (right) compared to `non-responder` (left)" 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": null, 143 | "id": "c2f944d8-fa87-47ec-97af-af7a775b696c", 144 | "metadata": { 145 | "tags": [] 146 | }, 147 | "outputs": [], 148 | "source": [ 149 | "melanoma_comparison.select(\n", 150 | " \"CD8-GranzymeB+CD27-CD3-CD28-CD19-CD57+CD127-CD33-CD45RA+CD4-CD14-HLADR-CD20-CCR7-CD56+IL2-CD16+TNFa-MIP1b+CD154-GMCSF-PDL1-CD107a-IL17-Perforin+CD69-CTLA4-PDL2+PD1-TCRgd-IFNg-CD38+CD25-IL10-IL4-\"\n", 151 | ")" 152 | ] 153 | }, 154 | { 155 | "cell_type": "markdown", 156 | "id": "4e3b6b2b-e1ce-48d8-8449-5c045e1b274b", 157 | "metadata": { 158 | "tags": [] 159 | }, 160 | "source": [ 161 | "**Phenotype 4:** should be more abundant in `responder` (right) compared to `non-responder` (left)" 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": null, 167 | "id": "c8c0e834-205d-47ac-965f-a3953bfb611c", 168 | "metadata": { 169 | "tags": [] 170 | }, 171 | "outputs": [], 172 | "source": [ 173 | "melanoma_comparison.select(\n", 174 | " \"CD8-GranzymeB+CD27-CD3-CD28-CD19-CD57+CD127-CD33-CD45RA+CD4-CD14-HLADR-CD20-CCR7-CD56+IL2-CD16+TNFa-MIP1b-CD154-GMCSF-PDL1-CD107a-IL17-Perforin+CD69-CTLA4-PDL2-PD1-TCRgd-IFNg-CD38+CD25-IL10-IL4-\"\n", 175 | ")" 176 | ] 177 | }, 178 | { 179 | "cell_type": "markdown", 180 | "id": "a494fdc5-6c47-4795-b652-84c8a4d4736a", 181 | "metadata": { 182 | "tags": [] 183 | }, 184 | "source": [ 185 | "# Cancer Study\n", 186 | "\n", 187 | "### Extricating human tumour immune alterations from tissue inflammation\n", 188 | "\n", 189 | "Mair et al., 2022. https://www.nature.com/articles/s41586-022-04718-w\n", 190 | "\n", 191 | "In this example we're going to compare phenotypes between a pair of tumor and tissue samples." 192 | ] 193 | }, 194 | { 195 | "cell_type": "code", 196 | "execution_count": null, 197 | "id": "08c90f9d-ef19-4f33-a84d-9ff36d3b1bc4", 198 | "metadata": { 199 | "tags": [] 200 | }, 201 | "outputs": [], 202 | "source": [ 203 | "tissue_embedding = get_embedding(\"mair-2022\", \"OZEXPSMPL_26155\")\n", 204 | "tumor_embedding = get_embedding(\"mair-2022\", \"OZEXPSMPL_26146\")\n", 205 | "\n", 206 | "cancer_comparison = EmbeddingComparisonWidget(\n", 207 | " tissue_embedding,\n", 208 | " tumor_embedding,\n", 209 | " titles=[\"Tissue (Mucosa)\", \"Tumor\"],\n", 210 | " metric=\"abundance\",\n", 211 | " selection=\"phenotype\",\n", 212 | " auto_zoom=True,\n", 213 | " row_height=360,\n", 214 | ")\n", 215 | "cancer_comparison" 216 | ] 217 | }, 218 | { 219 | "cell_type": "markdown", 220 | "id": "8aa9b0c4-9c58-47bb-a634-46da604f2d40", 221 | "metadata": {}, 222 | "source": [ 223 | "**CD8 T-Cell Phenotype** should be more abundant in `tissue` (left) compared to `tumor` (right)" 224 | ] 225 | }, 226 | { 227 | "cell_type": "code", 228 | "execution_count": null, 229 | "id": "928d70df-0a2e-476d-b35d-984105a399b8", 230 | "metadata": {}, 231 | "outputs": [], 232 | "source": [ 233 | "cancer_comparison.select(\n", 234 | " \"CD4-CD8+CD3+CD45RA+CD27+CD19-CD103-CD28-CD69+PD1+HLADR-GranzymeB-CD25-ICOS-TCRgd-CD38-CD127-Tim3-\"\n", 235 | ")" 236 | ] 237 | }, 238 | { 239 | "cell_type": "markdown", 240 | "id": "163211c0-b82a-4b9b-84b7-fc6a305290d2", 241 | "metadata": {}, 242 | "source": [ 243 | "**CD4 T-Cell Phenotype** should be more abundant in `tumor` (right) compared to `tissue` (left)" 244 | ] 245 | }, 246 | { 247 | "cell_type": "code", 248 | "execution_count": null, 249 | "id": "ec3f14c6-8daa-41c9-87e5-6d5cca6f3d53", 250 | "metadata": { 251 | "tags": [] 252 | }, 253 | "outputs": [], 254 | "source": [ 255 | "cancer_comparison.select(\n", 256 | " \"CD4+CD8-CD3+CD45RA-CD27+CD19-CD103-CD28+CD69+PD1+HLADR-GranzymeB-CD25+ICOS+TCRgd-CD38-CD127-Tim3+\"\n", 257 | ")" 258 | ] 259 | }, 260 | { 261 | "cell_type": "markdown", 262 | "id": "40ae8012-f44d-4720-b714-b4766c6da98c", 263 | "metadata": { 264 | "tags": [] 265 | }, 266 | "source": [ 267 | "# ICS Study\n", 268 | "\n", 269 | "### IFN-γ-independent immune markers of Mycobacterium tuberculosis exposure\n", 270 | "\n", 271 | "Lu et al., 2019. https://www.nature.com/articles/s41591-019-0441-3\n", 272 | "\n", 273 | "In this example we're going to compare phenotypes between a pair of disease (LTBI) and resister (RSTR) samples." 274 | ] 275 | }, 276 | { 277 | "cell_type": "code", 278 | "execution_count": null, 279 | "id": "6c762a63-242d-44cd-9e4a-329009a4bc3b", 280 | "metadata": { 281 | "tags": [] 282 | }, 283 | "outputs": [], 284 | "source": [ 285 | "diseased_embedding = get_embedding(\"lu-2019\", \"OZEXPSMPL_2105\")\n", 286 | "resister_embedding = get_embedding(\"lu-2019\", \"OZEXPSMPL_2136\")\n", 287 | "\n", 288 | "comparison = EmbeddingComparisonWidget(\n", 289 | " diseased_embedding,\n", 290 | " resister_embedding,\n", 291 | " titles=[\"Diseased (LTBI)\", \"Resister (RSTR)\"],\n", 292 | " metric=\"abundance\",\n", 293 | " selection=\"phenotype\",\n", 294 | " auto_zoom=True,\n", 295 | " row_height=360,\n", 296 | ")\n", 297 | "comparison" 298 | ] 299 | }, 300 | { 301 | "cell_type": "markdown", 302 | "id": "9241a79e-e1b6-454d-be1e-d476019ba3b8", 303 | "metadata": {}, 304 | "source": [ 305 | "**Phenotype 5 from [Fig 3c](https://www.nature.com/articles/s41591-019-0441-3/figures/3)** should be more abundant in `diseased` (Left) compared to `resister` (right)" 306 | ] 307 | }, 308 | { 309 | "cell_type": "code", 310 | "execution_count": null, 311 | "id": "c72ed082-22e3-41ce-a1bd-8f16c77eed7d", 312 | "metadata": { 313 | "tags": [] 314 | }, 315 | "outputs": [], 316 | "source": [ 317 | "comparison.select(\"CD4+CD3+CD8-TNF+CD107a-IL4-IFNg+IL2+CD154+IL17a-\")" 318 | ] 319 | }, 320 | { 321 | "cell_type": "code", 322 | "execution_count": null, 323 | "id": "d52f8667-fe9d-4dc2-b287-b132ce8b7877", 324 | "metadata": {}, 325 | "outputs": [], 326 | "source": [] 327 | } 328 | ], 329 | "metadata": { 330 | "kernelspec": { 331 | "display_name": "Python 3 (ipykernel)", 332 | "language": "python", 333 | "name": "python3" 334 | }, 335 | "language_info": { 336 | "codemirror_mode": { 337 | "name": "ipython", 338 | "version": 3 339 | }, 340 | "file_extension": ".py", 341 | "mimetype": "text/x-python", 342 | "name": "python", 343 | "nbconvert_exporter": "python", 344 | "pygments_lexer": "ipython3", 345 | "version": "3.10.11" 346 | } 347 | }, 348 | "nbformat": 4, 349 | "nbformat_minor": 5 350 | } 351 | -------------------------------------------------------------------------------- /notebooks/getting-started.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "8efc6d60-f207-4e54-92b0-a6070b0158b4", 6 | "metadata": {}, 7 | "source": [ 8 | "# Getting Started\n", 9 | "\n", 10 | "In this notebook we're going to demonstrate how to use `cev` to compare (a) two _different_ embeddings of the same data and (b) two aligned embeddings of _different_ data.\n", 11 | "\n", 12 | "The embeddings we're exploring in this notebook represent single-cell surface proteomic data. In other words, each data point represents a individual cell whose surface protein expression was measured. The cells were then clustered into cellular phenotypes based on their protein expression." 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": null, 18 | "id": "47c31bea-24b3-4d16-a69a-a3ad3a746234", 19 | "metadata": { 20 | "tags": [] 21 | }, 22 | "outputs": [], 23 | "source": [ 24 | "import pandas as pd\n", 25 | "\n", 26 | "from cev.widgets import Embedding, EmbeddingComparisonWidget" 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "id": "dea71d70-e467-49af-9165-6e278f953977", 32 | "metadata": {}, 33 | "source": [ 34 | "The notebook requires downloading the three embeddings from data of from [Mair et al., 2022](https://www.nature.com/articles/s41586-022-04718-w):\n", 35 | "- Tissue sample 138 (32 MB) embedded with [UMAP](https://umap-learn.readthedocs.io/en/latest/)\n", 36 | "- Tissue sample 138 (32 MB) embedded with [UMAP](https://umap-learn.readthedocs.io/en/latest/) after being transformd with [Ozette's Annotation Transformation](https://github.com/flekschas-ozette/ismb-biovis-2022)\n", 37 | "- Tumor sample 6 (82 MB) embedded with [UMAP](https://umap-learn.readthedocs.io/en/latest/) after being transformd with [Ozette's Annotation Transformation](https://github.com/flekschas-ozette/ismb-biovis-2022)\n", 38 | "\n", 39 | "All three embeddings are annotated with [Ozette's FAUST method](https://doi.org/10.1016/j.patter.2021.100372)." 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": null, 45 | "id": "dbf802bc-f709-4163-9b49-8fa5f6ce59ab", 46 | "metadata": { 47 | "tags": [] 48 | }, 49 | "outputs": [], 50 | "source": [ 51 | "# download the data\n", 52 | "!curl -sL https://figshare.com/ndownloader/articles/23063615/versions/1 -o data.zip\n", 53 | "!unzip data.zip -d data" 54 | ] 55 | }, 56 | { 57 | "cell_type": "markdown", 58 | "id": "e62390d2-1242-49a8-9780-be976d39fa42", 59 | "metadata": { 60 | "tags": [] 61 | }, 62 | "source": [ 63 | "## Comparing Two Embeddings of the same Data\n", 64 | "\n", 65 | "In the first example, we are going to use `cev` to compare two different embeddings methods that were run on the very same data (the tissue sample): standard UMAP and annotation transformation UMAP.\n", 66 | "\n", 67 | "Different embedding methods can produce very different embedding spaces and it's often hard to assess the difference wholelistically. `cev` enables us to quantify two properties based on shared point labels:\n", 68 | "\n", 69 | "1. Confusion: the degree to which two or more labels are visually intermixed\n", 70 | "2. Neighborhood: the degree to which the local neighborhood of a label has changed between the two embeddings\n", 71 | "\n", 72 | "Visualized as a heatmap, these two property can quickly guide us to point clusters that are better or less resolved in either one of the two embeddings. It can also help us find compositional changes between the two embeddings." 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": null, 78 | "id": "7874813c-810f-40e5-92ab-91f228046a5e", 79 | "metadata": { 80 | "tags": [] 81 | }, 82 | "outputs": [], 83 | "source": [ 84 | "tissue_umap_embedding = Embedding.from_ozette(\n", 85 | " df=pd.read_parquet(\"./data/mair-2022-tissue-138-umap.pq\")\n", 86 | ")\n", 87 | "tissue_ozette_embedding = Embedding.from_ozette(\n", 88 | " df=pd.read_parquet(\"./data/mair-2022-tissue-138-ozette.pq\")\n", 89 | ")" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": null, 95 | "id": "c3d7e114-9fd3-4785-bdca-e3f4bbf37df8", 96 | "metadata": { 97 | "tags": [] 98 | }, 99 | "outputs": [], 100 | "source": [ 101 | "umap_vs_ozette = EmbeddingComparisonWidget(\n", 102 | " tissue_umap_embedding,\n", 103 | " tissue_ozette_embedding,\n", 104 | " titles=[\"Standard UMAP (Tissue)\", \"Annotation-Transformed UMAP (Tissue)\"],\n", 105 | " metric=\"confusion\",\n", 106 | " selection=\"synced\",\n", 107 | " auto_zoom=True,\n", 108 | " row_height=320,\n", 109 | ")\n", 110 | "umap_vs_ozette" 111 | ] 112 | }, 113 | { 114 | "cell_type": "markdown", 115 | "id": "a516d65a-351b-4365-a267-704cd93a9c0e", 116 | "metadata": {}, 117 | "source": [ 118 | "In this example, we can see that the point labels are much more intermixed in the standard UMAP embedding compared to the annotation transformation UMAP. This not surprising as the standard UMAP embedding is not optimized for Flow cytometry data in any way and is thus only resolving broad cell phenotypes based on a few markers. You can see this by holding down `SHIFT` and clicking on `CD8` under _Markers_, which reduces the label resolution and shows that under a reduced label resolution, the confusion is much lower in the standard UMAP embedding.\n", 119 | "\n", 120 | "When selecting _Neighborhood_ from the _Metric_ drop down menu, we switch to the neighborhood composition difference quantification. When only a few markers (e.g., `CD4` and `CD8`) are active, we can see that most of the neighborhood remain unchanged. When we gradually add more markers, we can see how the the local neighborhood composition difference slowly increases, which is due to the fact that the annotation transformation spaces out all point label clusters.\n", 121 | "\n", 122 | "To study certain clusters or labels in detail, you can either interactively select points in the embedding via [jupyter-scatter](https://github.com/flekschas/jupyter-scatter)'s lasso selection or you can programmatically select points by their label via the `select()`. For instance, the next call will select all CD4+ T cells." 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": null, 128 | "id": "ba7a378f-4212-4953-be5b-7a273f8bc75e", 129 | "metadata": { 130 | "tags": [] 131 | }, 132 | "outputs": [], 133 | "source": [ 134 | "umap_vs_ozette.select([\"CD3+\", \"CD4+\", \"CD8-\"])" 135 | ] 136 | }, 137 | { 138 | "cell_type": "markdown", 139 | "id": "3c439e4d-0679-4e64-a1c7-4be93cbbe039", 140 | "metadata": {}, 141 | "source": [ 142 | "## Size Differences Between _Non-Responder_ and _Responder_\n", 143 | "\n", 144 | "Instead of comparing identical data, let's take a look at two transformed and aligned embeddings: tissue vs tumor. The embeddings are both annotation-transformed and aligned, ensuring low confusion and high neighborhood similarity (check to confirm!). The abundance metric aids in identifying potential shifts in phenotype abundance, providing a comprehensive and visually intuitive method for analyzing complex cytometry data. Remember, our metric should be used as a exploratory tool guide exploration and quickly surface potentially interesting phenotypes, but robust statical methods must be applied to confirm whether any abundance differences exist." 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": null, 150 | "id": "180f0945-d97c-4261-aa67-5368e3b560ad", 151 | "metadata": { 152 | "tags": [] 153 | }, 154 | "outputs": [], 155 | "source": [ 156 | "tumor_ozette_embedding = Embedding.from_ozette(\n", 157 | " df=pd.read_parquet(\"./data/mair-2022-tumor-006-ozette.pq\")\n", 158 | ")" 159 | ] 160 | }, 161 | { 162 | "cell_type": "code", 163 | "execution_count": null, 164 | "id": "0f99361b-6e96-4a6d-ad65-0533c23bece7", 165 | "metadata": { 166 | "tags": [] 167 | }, 168 | "outputs": [], 169 | "source": [ 170 | "tissue_vs_tumor = EmbeddingComparisonWidget(\n", 171 | " tissue_ozette_embedding,\n", 172 | " tumor_ozette_embedding,\n", 173 | " titles=[\"Tissue\", \"Tumor\"],\n", 174 | " metric=\"abundance\",\n", 175 | " selection=\"phenotype\",\n", 176 | " auto_zoom=True,\n", 177 | " row_height=320,\n", 178 | ")\n", 179 | "\n", 180 | "tissue_vs_tumor" 181 | ] 182 | }, 183 | { 184 | "cell_type": "markdown", 185 | "id": "6d632c95-dff8-4b90-b763-f3055c4e8047", 186 | "metadata": { 187 | "tags": [] 188 | }, 189 | "source": [ 190 | "The following **CD8+ T cells** are more abundant in `tissue` (i.e., the relative abundance is higher on the left) compared to `tumor` (i.e., the relative abundance is lower on the right)" 191 | ] 192 | }, 193 | { 194 | "cell_type": "code", 195 | "execution_count": null, 196 | "id": "2f7ebd73-32e7-48ed-8575-8d14d2edc73f", 197 | "metadata": { 198 | "tags": [] 199 | }, 200 | "outputs": [], 201 | "source": [ 202 | "tissue_vs_tumor.select(\n", 203 | " \"CD4-CD8+CD3+CD45RA+CD27+CD19-CD103-CD28-CD69+PD1+HLADR-GranzymeB-CD25-ICOS-TCRgd-CD38-CD127-Tim3-\"\n", 204 | ")" 205 | ] 206 | }, 207 | { 208 | "cell_type": "code", 209 | "execution_count": null, 210 | "id": "eefac753-7920-4c87-99ef-d155f1ec5114", 211 | "metadata": {}, 212 | "outputs": [], 213 | "source": [] 214 | } 215 | ], 216 | "metadata": { 217 | "kernelspec": { 218 | "display_name": "Python 3 (ipykernel)", 219 | "language": "python", 220 | "name": "python3" 221 | }, 222 | "language_info": { 223 | "codemirror_mode": { 224 | "name": "ipython", 225 | "version": 3 226 | }, 227 | "file_extension": ".py", 228 | "mimetype": "text/x-python", 229 | "name": "python", 230 | "nbconvert_exporter": "python", 231 | "pygments_lexer": "ipython3", 232 | "version": "3.10.11" 233 | } 234 | }, 235 | "nbformat": 4, 236 | "nbformat_minor": 5 237 | } 238 | -------------------------------------------------------------------------------- /notebooks/lui-2021.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "8c6bbfa0-e40d-46ca-8010-45da2bdc5ed9", 6 | "metadata": {}, 7 | "source": [ 8 | "# Lui et al. 2021" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": null, 14 | "id": "82fd3a9b-c1e8-473a-9679-9f64990c7bb2", 15 | "metadata": { 16 | "tags": [] 17 | }, 18 | "outputs": [], 19 | "source": [ 20 | "%load_ext autoreload\n", 21 | "%autoreload 2" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": null, 27 | "id": "7e05cfbf-325e-4402-8a51-e31e10398acb", 28 | "metadata": { 29 | "tags": [] 30 | }, 31 | "outputs": [], 32 | "source": [ 33 | "import pathlib\n", 34 | "\n", 35 | "import pandas as pd\n", 36 | "\n", 37 | "from cev.widgets import Embedding, EmbeddingComparisonWidget" 38 | ] 39 | }, 40 | { 41 | "cell_type": "markdown", 42 | "id": "6a1625a2-7389-4dd9-9b50-1ac99583c574", 43 | "metadata": {}, 44 | "source": [ 45 | "## Prepare Data and Split into two DataFrames" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": null, 51 | "id": "ace38a79-2c9c-45b2-9f34-87f24d71ba59", 52 | "metadata": { 53 | "tags": [] 54 | }, 55 | "outputs": [], 56 | "source": [ 57 | "df_full = pd.read_parquet(\n", 58 | " pathlib.Path.cwd() / \"..\" / \"data\" / \"lui-2021\" / \"data_for_confusion_full.parquet\"\n", 59 | ")\n", 60 | "df = pd.read_parquet(\n", 61 | " pathlib.Path.cwd() / \"..\" / \"data\" / \"lui-2021\" / \"data_for_confusion.parquet\"\n", 62 | ")\n", 63 | "\n", 64 | "markers = [\n", 65 | " \"CD3\",\n", 66 | " \"CD45RA\",\n", 67 | " \"CD8\",\n", 68 | " \"CLEC12A\",\n", 69 | " \"CD11b\",\n", 70 | " \"CD62L\",\n", 71 | " \"CD20\",\n", 72 | " \"HLA_DR\",\n", 73 | " \"CD11c\",\n", 74 | " \"CD14\",\n", 75 | " \"IgD\",\n", 76 | " \"CD4\",\n", 77 | " \"CD16\",\n", 78 | " \"CD45RO\",\n", 79 | " \"CD27\",\n", 80 | " \"CD19\",\n", 81 | " \"CD56\",\n", 82 | "]\n", 83 | "\n", 84 | "df_ozette_umap_ozette_labels = pd.DataFrame(\n", 85 | " {\n", 86 | " \"umapX\": df.platform_UMAP_X.values,\n", 87 | " \"umapY\": df.platform_UMAP_Y.values,\n", 88 | " \"faustLabels\": df.faust_clustering.map(\n", 89 | " lambda s: \"0_0_0_0_0\" if s == \"rare\" else s\n", 90 | " )\n", 91 | " .str.replace(\"/\", \"\")\n", 92 | " .values,\n", 93 | " }\n", 94 | ")\n", 95 | "\n", 96 | "df_ozette_umap_symphony_labels = pd.DataFrame(\n", 97 | " {\n", 98 | " \"umapX\": df.platform_UMAP_X.values,\n", 99 | " \"umapY\": df.platform_UMAP_Y.values,\n", 100 | " \"faustLabels\": df.liu_clustering.values,\n", 101 | " }\n", 102 | ")\n", 103 | "\n", 104 | "df_symphony_umap_ozette_labels = pd.DataFrame(\n", 105 | " {\n", 106 | " \"umapX\": df.symphony_UMAP_1.values,\n", 107 | " \"umapY\": df.symphony_UMAP_2.values,\n", 108 | " \"faustLabels\": df.faust_clustering.map(\n", 109 | " lambda s: \"0_0_0_0_0\" if s == \"rare\" else s\n", 110 | " )\n", 111 | " .str.replace(\"/\", \"\")\n", 112 | " .values,\n", 113 | " }\n", 114 | ")\n", 115 | "\n", 116 | "df_symphony_umap_symphony_labels = pd.DataFrame(\n", 117 | " {\n", 118 | " \"umapX\": df.symphony_UMAP_1.values,\n", 119 | " \"umapY\": df.symphony_UMAP_2.values,\n", 120 | " \"faustLabels\": df.liu_clustering.values,\n", 121 | " }\n", 122 | ")\n", 123 | "\n", 124 | "marker_annotations = df_full.faust_clustering.str.lstrip(\"/\").str.split(\n", 125 | " \"/\", expand=True\n", 126 | ")\n", 127 | "for column in marker_annotations:\n", 128 | " marker_annotations[column] = marker_annotations[column].str.slice(-1)\n", 129 | "\n", 130 | "df_ozette_umap_ozette_labels[[f\"{m}_faust_annotation\" for m in markers]] = (\n", 131 | " marker_annotations\n", 132 | ")\n", 133 | "df_ozette_umap_symphony_labels[[f\"{m}_faust_annotation\" for m in markers]] = (\n", 134 | " marker_annotations\n", 135 | ")\n", 136 | "df_symphony_umap_ozette_labels[[f\"{m}_faust_annotation\" for m in markers]] = (\n", 137 | " marker_annotations\n", 138 | ")\n", 139 | "df_symphony_umap_symphony_labels[[f\"{m}_faust_annotation\" for m in markers]] = (\n", 140 | " marker_annotations\n", 141 | ")" 142 | ] 143 | }, 144 | { 145 | "cell_type": "markdown", 146 | "id": "e0c9a881-e2d6-488d-87d0-6f9328603960", 147 | "metadata": { 148 | "tags": [] 149 | }, 150 | "source": [ 151 | "# Comparing the Ozette Against the Symphony Embedding using FAUST Labels" 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": null, 157 | "id": "7cefed4b-c050-4fb7-b8a5-c59a4bd93017", 158 | "metadata": { 159 | "tags": [] 160 | }, 161 | "outputs": [], 162 | "source": [ 163 | "ozette_umap_ozette_labels_embedding = Embedding.from_ozette(\n", 164 | " df_ozette_umap_ozette_labels, robust_only=True\n", 165 | ")\n", 166 | "symphony_umap_ozette_labels_embedding = Embedding.from_ozette(\n", 167 | " df_symphony_umap_ozette_labels, robust_only=True\n", 168 | ")\n", 169 | "\n", 170 | "comparison_ozette_vs_symphony_umap_with_ozette_labels = EmbeddingComparisonWidget(\n", 171 | " ozette_umap_ozette_labels_embedding,\n", 172 | " symphony_umap_ozette_labels_embedding,\n", 173 | " titles=[\n", 174 | " \"Ozette Embedding with FAUST Labels\",\n", 175 | " \"Symphony Embedding with FAUST Labels\",\n", 176 | " ],\n", 177 | " metric=\"neighborhood\",\n", 178 | " # active_markers=[\"CD3\"],\n", 179 | " selection=\"synced\",\n", 180 | " auto_zoom=True,\n", 181 | " row_height=400,\n", 182 | ")\n", 183 | "comparison_ozette_vs_symphony_umap_with_ozette_labels" 184 | ] 185 | }, 186 | { 187 | "cell_type": "code", 188 | "execution_count": null, 189 | "id": "ee3ad9d6-4b92-4a5f-9199-e6bba4527a59", 190 | "metadata": { 191 | "tags": [] 192 | }, 193 | "outputs": [], 194 | "source": [ 195 | "comparison_ozette_vs_symphony_umap_with_ozette_labels.select(\n", 196 | " [\"CD19-\", \"CD11b-\", \"CD3+\", \"CD4+\", \"CD8-\", \"CD45RA-\", \"CD45RO+\", \"CD62L+\"]\n", 197 | ")" 198 | ] 199 | }, 200 | { 201 | "cell_type": "code", 202 | "execution_count": null, 203 | "id": "42591b2c-3ee1-4fa8-ad32-4187a9a211e2", 204 | "metadata": { 205 | "tags": [] 206 | }, 207 | "outputs": [], 208 | "source": [ 209 | "comparison_ozette_vs_symphony_umap_with_ozette_labels.select(\n", 210 | " [\"CD19-\", \"CD11b-\", \"CD3+\", \"CD4+\", \"CD8-\", \"CD45RA-\", \"CD45RO+\", \"CD62L-\"]\n", 211 | ")" 212 | ] 213 | }, 214 | { 215 | "cell_type": "code", 216 | "execution_count": null, 217 | "id": "cc81a023-288e-4ef4-8bdd-1940e05c68d4", 218 | "metadata": { 219 | "tags": [] 220 | }, 221 | "outputs": [], 222 | "source": [ 223 | "comparison_ozette_vs_symphony_umap_with_ozette_labels.select(\n", 224 | " [\"CD19-\", \"CD11b-\", \"CD3+\", \"CD4+\", \"CD8-\", \"CD45RA+\", \"CD45RO-\", \"CD62L+\"]\n", 225 | ")" 226 | ] 227 | }, 228 | { 229 | "cell_type": "code", 230 | "execution_count": null, 231 | "id": "1536bfbf-7f09-48fd-b35b-567303a03416", 232 | "metadata": { 233 | "tags": [] 234 | }, 235 | "outputs": [], 236 | "source": [ 237 | "comparison_ozette_vs_symphony_umap_with_ozette_labels.select(\n", 238 | " [\"CD19-\", \"CD11b-\", \"CD3+\", \"CD4+\", \"CD8-\", \"CD45RA+\", \"CD45RO-\", \"CD62L-\"]\n", 239 | ")" 240 | ] 241 | }, 242 | { 243 | "cell_type": "markdown", 244 | "id": "e1144d71-2055-44cd-a361-14c93383d2aa", 245 | "metadata": {}, 246 | "source": [ 247 | "# Comparing the Ozette Against the Symphony Embedding using Symphony Labels" 248 | ] 249 | }, 250 | { 251 | "cell_type": "code", 252 | "execution_count": null, 253 | "id": "ff15c719-c1d5-4780-86b7-9bb3bf4f1e1c", 254 | "metadata": { 255 | "tags": [] 256 | }, 257 | "outputs": [], 258 | "source": [ 259 | "ozette_umap_symphony_labels_embedding = Embedding(\n", 260 | " df_ozette_umap_symphony_labels[[\"umapX\", \"umapY\"]].values,\n", 261 | " df_ozette_umap_symphony_labels.faustLabels,\n", 262 | ")\n", 263 | "symphony_umap_symphony_labels_embedding = Embedding(\n", 264 | " df_symphony_umap_symphony_labels[[\"umapX\", \"umapY\"]].values,\n", 265 | " df_symphony_umap_symphony_labels.faustLabels,\n", 266 | ")\n", 267 | "\n", 268 | "comparison_ozette_vs_symphony_umap_with_symphony_labels = EmbeddingComparisonWidget(\n", 269 | " ozette_umap_symphony_labels_embedding,\n", 270 | " symphony_umap_symphony_labels_embedding,\n", 271 | " titles=[\n", 272 | " \"Ozette Embedding with Symphony Labels\",\n", 273 | " \"Symphony Embedding with Symphony Labels\",\n", 274 | " ],\n", 275 | " selection=\"synced\",\n", 276 | " auto_zoom=True,\n", 277 | " row_height=400,\n", 278 | ")\n", 279 | "\n", 280 | "comparison_ozette_vs_symphony_umap_with_symphony_labels.left.categorical_scatter.legend(\n", 281 | " True\n", 282 | ")\n", 283 | "comparison_ozette_vs_symphony_umap_with_symphony_labels.right.categorical_scatter.legend(\n", 284 | " True\n", 285 | ")\n", 286 | "\n", 287 | "comparison_ozette_vs_symphony_umap_with_symphony_labels" 288 | ] 289 | }, 290 | { 291 | "cell_type": "code", 292 | "execution_count": null, 293 | "id": "32b7afd5-c1eb-4625-b9fe-2f6c60e6f364", 294 | "metadata": {}, 295 | "outputs": [], 296 | "source": [] 297 | } 298 | ], 299 | "metadata": { 300 | "kernelspec": { 301 | "display_name": "Python 3 (ipykernel)", 302 | "language": "python", 303 | "name": "python3" 304 | }, 305 | "language_info": { 306 | "codemirror_mode": { 307 | "name": "ipython", 308 | "version": 3 309 | }, 310 | "file_extension": ".py", 311 | "mimetype": "text/x-python", 312 | "name": "python", 313 | "nbconvert_exporter": "python", 314 | "pygments_lexer": "ipython3", 315 | "version": "3.8.5" 316 | } 317 | }, 318 | "nbformat": 4, 319 | "nbformat_minor": 5 320 | } 321 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["hatchling", "hatch-vcs"] 3 | build-backend = "hatchling.build" 4 | 5 | [project] 6 | name = "cev" 7 | description = "comparative embedding visualization" 8 | readme = "README.md" 9 | license = { text = "Apache-2.0" } 10 | authors = [ 11 | { name = "Trevor Manz" }, 12 | { name = "Fritz Lekschas" }, 13 | ] 14 | classifiers = [ 15 | "Intended Audience :: Developers", 16 | "Programming Language :: Python :: 3", 17 | "Programming Language :: Python :: 3.8", 18 | "Programming Language :: Python :: 3.9", 19 | "Programming Language :: Python :: 3.10", 20 | "Programming Language :: Python :: 3.11", 21 | ] 22 | requires-python = ">=3.8,<3.12" 23 | dependencies = [ 24 | "anywidget>=0.2.3", 25 | "cev-metrics>=0.1.2", 26 | "ipywidgets>=8.0.0", 27 | "jinja2>=3.0.0", 28 | "jupyter-scatter>=0.14.0", 29 | "pandas>=1.0,<2.0", 30 | "numpy>=1.0,<2.0", 31 | "pyarrow", 32 | "pooch>=1.3.0", 33 | ] 34 | dynamic = ["version"] 35 | 36 | [project.optional-dependencies] 37 | notebooks = [ 38 | "pyarrow", 39 | "fastparquet", 40 | "matplotlib", 41 | ] 42 | 43 | [project.scripts] 44 | cev = "cev._cli:main" 45 | 46 | [project.urls] 47 | homepage = "https://github.com/OzetteTech/comparative-embedding-visualization" 48 | 49 | [tool.hatch.build] 50 | sources = ["src"] 51 | 52 | [tool.hatch.version] 53 | source = "vcs" 54 | 55 | [tool.ruff] 56 | line-length = 88 57 | target-version = "py38" 58 | 59 | [tool.ruff.lint] 60 | extend-select = [ 61 | "E", # style errors 62 | "F", # flake 63 | # "D", # pydocstyle 64 | "I001", # isort 65 | "UP", # pyupgrade 66 | "RUF", # ruff-specific rules 67 | ] 68 | ignore = ["E501"] # ignore line-length, enforced by black 69 | 70 | # https://docs.pytest.org/en/latest/customize.html 71 | [tool.pytest.ini_options] 72 | minversion = "6.0" 73 | testpaths = ["tests"] 74 | filterwarnings = [ 75 | "ignore:Jupyter is migrating its paths:DeprecationWarning", 76 | "ignore:Deprecated in traitlets 4.1, use the instance .metadata:DeprecationWarning", 77 | ] 78 | 79 | [tool.uv] 80 | dev-dependencies = [ 81 | "jupyterlab>=4.2.5", 82 | "pytest>=8.3.3", 83 | "ruff>=0.7.0", 84 | ] 85 | -------------------------------------------------------------------------------- /src/cev/__init__.py: -------------------------------------------------------------------------------- 1 | from cev._version import __version__ # noqa 2 | 3 | import cev.metrics as metrics # noqa 4 | import cev.widgets as widgets # noqa 5 | -------------------------------------------------------------------------------- /src/cev/_cli.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | import os 4 | import shutil 5 | import sys 6 | import textwrap 7 | import zipfile 8 | from pathlib import Path 9 | 10 | import pooch 11 | 12 | from cev._version import __version__ 13 | 14 | _DEV = False 15 | 16 | 17 | def download_data() -> tuple[Path, Path]: 18 | archive = pooch.retrieve( 19 | url="https://figshare.com/ndownloader/articles/23063615/versions/1", 20 | path=pooch.os_cache("cev"), 21 | fname="data.zip", 22 | known_hash=None, 23 | ) 24 | archive = Path(archive) 25 | files = [ 26 | "mair-2022-tissue-138-umap.pq", 27 | "mair-2022-tissue-138-ozette.pq", 28 | ] 29 | with zipfile.ZipFile(archive, "r") as zip_ref: 30 | for file in files: 31 | zip_ref.extract(file, path=archive.parent) 32 | return ( 33 | archive.parent / "mair-2022-tissue-138-umap.pq", 34 | archive.parent / "mair-2022-tissue-138-ozette.pq", 35 | ) 36 | 37 | 38 | def write_notebook(output: Path): 39 | umap_path, ozette_path = download_data() 40 | source = textwrap.dedent( 41 | f""" 42 | import pandas as pd 43 | from cev.widgets import Embedding, EmbeddingComparisonWidget 44 | 45 | umap_embedding = pd.read_parquet("{umap_path}").pipe(Embedding.from_ozette) 46 | ozette_embedding = pd.read_parquet("{ozette_path}").pipe(Embedding.from_ozette) 47 | 48 | EmbeddingComparisonWidget( 49 | umap_embedding, 50 | ozette_embedding, 51 | titles=("Standard UMAP", "Annotation-Transformed UMAP"), 52 | metric="confusion", 53 | selection="synced", 54 | auto_zoom=True, 55 | row_height=320, 56 | ) 57 | """ 58 | ).strip() 59 | 60 | nb = { 61 | "cells": [ 62 | { 63 | "cell_type": "code", 64 | "execution_count": None, 65 | "metadata": {}, 66 | "outputs": [], 67 | "source": source, 68 | } 69 | ], 70 | "metadata": { 71 | "kernelspec": { 72 | "display_name": "Python 3", 73 | "language": "python", 74 | "name": "python3", 75 | } 76 | }, 77 | "nbformat": 4, 78 | "nbformat_minor": 5, 79 | } 80 | with output.open("w") as f: 81 | json.dump(nb, f, indent=2) 82 | 83 | 84 | def check_uv_available(): 85 | if shutil.which("uv") is None: 86 | print("Error: 'uv' command not found.", file=sys.stderr) 87 | print("Please install 'uv' to run `cev demo` entrypoint.", file=sys.stderr) 88 | print( 89 | "For more information, visit: https://github.com/astral-sh/uv", 90 | file=sys.stderr, 91 | ) 92 | sys.exit(1) 93 | 94 | 95 | def run_notebook(notebook_path: Path): 96 | check_uv_available() 97 | command = [ 98 | "uvx", 99 | "--python", 100 | "3.11", 101 | "--from", 102 | "jupyter-core", 103 | "--with", 104 | "jupyterlab", 105 | "--with", 106 | "." if _DEV else f"cev=={__version__}", 107 | "jupyter", 108 | "lab", 109 | str(notebook_path), 110 | ] 111 | try: 112 | os.execvp(command[0], command) 113 | except OSError as e: 114 | print(f"Error executing {command[0]}: {e}", file=sys.stderr) 115 | sys.exit(1) 116 | 117 | 118 | def main(): 119 | parser = argparse.ArgumentParser(prog="cev") 120 | subparsers = parser.add_subparsers(dest="command", help="Available commands") 121 | subparsers.add_parser("download", help="Download the demo notebook (and data)") 122 | subparsers.add_parser("demo", help="Run the demo notebook in JupyterLab") 123 | args = parser.parse_args() 124 | 125 | notebook_path = Path("cev-demo.ipynb") 126 | if args.command == "download": 127 | write_notebook(notebook_path) 128 | elif args.command == "demo": 129 | write_notebook(notebook_path) 130 | run_notebook(notebook_path) 131 | else: 132 | parser.print_help() 133 | 134 | 135 | if __name__ == "__main__": 136 | main() 137 | -------------------------------------------------------------------------------- /src/cev/_compare.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import typing 4 | 5 | import ipywidgets 6 | import numpy as np 7 | 8 | from cev._compare_metric_dropdown import ( 9 | create_max_depth_dropdown, 10 | create_metric_dropdown, 11 | create_update_distance_callback, 12 | create_value_range_slider, 13 | ) 14 | from cev._compare_selection_type_dropdown import create_selection_type_dropdown 15 | from cev._compare_zoom_toggle import create_zoom_toggle 16 | from cev._widget_utils import ( 17 | add_ilocs_trait, 18 | create_colormaps, 19 | link_widgets, 20 | parse_label, 21 | trim_label_series, 22 | ) 23 | from cev.components import MarkerSelectionIndicator 24 | 25 | if typing.TYPE_CHECKING: 26 | from cev._embedding import Embedding 27 | from cev._embedding_widget import EmbeddingWidgetCollection 28 | 29 | 30 | def compare( 31 | a: Embedding, b: Embedding, row_height: int = 250, max_depth: int = 1, **kwargs 32 | ): 33 | pointwise_correspondence = has_pointwise_correspondence(a, b) 34 | left, right = a.widgets(**kwargs), b.widgets(**kwargs) 35 | 36 | # representative label 37 | markers = [m.name for m in parse_label(a.labels.iloc[0])] 38 | marker_selection = MarkerSelectionIndicator( 39 | markers=markers, active=[True] + [False for x in range(len(markers) - 1)] 40 | ) 41 | 42 | metric_dropdown = create_metric_dropdown(left, right) 43 | max_depth_dropdown = create_max_depth_dropdown(metric_dropdown, max_depth) 44 | value_range_slider = create_value_range_slider(metric_dropdown) 45 | update_distances = create_update_distance_callback( 46 | metric_dropdown, max_depth_dropdown, value_range_slider, left, right 47 | ) 48 | zoom = create_zoom_toggle(left, right) 49 | inverted = create_invert_color_checkbox(left, right) 50 | selection_type = create_selection_type_dropdown( 51 | left, right, pointwise_correspondence 52 | ) 53 | connect_marker_selection(marker_selection, (a, left), (b, right), update_distances) 54 | header = ipywidgets.VBox( 55 | [ 56 | marker_selection, 57 | ipywidgets.HBox([selection_type, metric_dropdown, inverted, zoom]), 58 | ] 59 | ) 60 | main = ipywidgets.HBox( 61 | [ 62 | cmp.show(row_height=row_height, layout=ipywidgets.Layout(width="50%")) 63 | for cmp in (left, right) 64 | ] 65 | ) 66 | widget = ipywidgets.VBox([header, main]) 67 | 68 | add_ilocs_trait(widget, left, right) 69 | typing.cast(typing.Any, widget).left = left 70 | typing.cast(typing.Any, widget).right = right 71 | return widget 72 | 73 | 74 | def has_pointwise_correspondence(a: Embedding, b: Embedding) -> bool: 75 | return np.array_equal(a.labels, b.labels) and ( 76 | (a.robust is None and b.robust is None) 77 | or ( 78 | a.robust is not None 79 | and b.robust is not None 80 | and np.array_equal(a.robust, b.robust) 81 | ) 82 | ) 83 | 84 | 85 | def create_invert_color_checkbox( 86 | left: EmbeddingWidgetCollection, 87 | right: EmbeddingWidgetCollection, 88 | default: bool = False, 89 | ): 90 | inverted = ipywidgets.Checkbox(default, description="Invert Colormap") 91 | link_widgets((left, "inverted"), (inverted, "value")) 92 | link_widgets((right, "inverted"), (inverted, "value")) 93 | return inverted 94 | 95 | 96 | def connect_marker_selection( 97 | marker_selection: MarkerSelectionIndicator, 98 | left_pair: tuple[Embedding, EmbeddingWidgetCollection], 99 | right_pair: tuple[Embedding, EmbeddingWidgetCollection], 100 | update_distances: typing.Callable, 101 | ): 102 | markers = marker_selection.markers 103 | a, left = left_pair 104 | b, right = right_pair 105 | 106 | def update_labels(active): 107 | active_markers = set([marker for i, marker in enumerate(markers) if active[i]]) 108 | 109 | left.labels = trim_label_series(a.labels, active_markers) 110 | right.labels = trim_label_series(b.labels, active_markers) 111 | 112 | left.colormap, right.colormap = create_colormaps( 113 | left.robust_labels.cat.categories, 114 | right.robust_labels.cat.categories, 115 | ) 116 | 117 | update_distances() 118 | 119 | def on_active_marker_selection_change(change): 120 | update_labels(change.new) 121 | 122 | update_labels(marker_selection.active) 123 | 124 | marker_selection.observe(on_active_marker_selection_change, names="active") 125 | -------------------------------------------------------------------------------- /src/cev/_compare_metric_dropdown.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import functools 4 | import typing 5 | 6 | import ipywidgets 7 | import pandas as pd 8 | 9 | import cev.metrics as metrics 10 | from cev._widget_utils import diverging_cmap 11 | 12 | if typing.TYPE_CHECKING: 13 | from cev._embedding_widget import EmbeddingWidgetCollection 14 | 15 | 16 | CACHE_SIZE = 5 17 | 18 | 19 | def create_metric_dropdown( 20 | left: EmbeddingWidgetCollection, 21 | right: EmbeddingWidgetCollection, 22 | default: typing.Literal["confusion", "neigbhorhood", "abundance"] = "confusion", 23 | ): 24 | @functools.lru_cache(maxsize=CACHE_SIZE) 25 | def cached_confusion(emb: EmbeddingWidgetCollection): 26 | return metrics.confusion(emb._data) 27 | 28 | @functools.lru_cache(maxsize=CACHE_SIZE) 29 | def cached_neighborhood(emb: EmbeddingWidgetCollection, max_depth: int = 1): 30 | return metrics.neighborhood(emb._data, max_depth=max_depth) 31 | 32 | @functools.lru_cache(maxsize=CACHE_SIZE) 33 | def cached_abundance( 34 | left: EmbeddingWidgetCollection, 35 | right: EmbeddingWidgetCollection, 36 | max_depth: int = 1, 37 | ): 38 | frequencies = ( 39 | cached_neighborhood(left, max_depth), 40 | cached_neighborhood(right, max_depth), 41 | ) 42 | abundances = [ 43 | metrics.transform_abundance( 44 | freq, 45 | abundances=emb.labels.value_counts().to_dict(), 46 | clr=True, 47 | ) 48 | for freq, emb in zip(frequencies, (left, right)) 49 | ] 50 | 51 | label_dist_a = metrics.merge_abundances_left(abundances[0], abundances[1]) 52 | label_dist_a = pd.Series( 53 | label_dist_a.to_numpy().diagonal(), index=label_dist_a.index 54 | ) 55 | 56 | label_dist_b = metrics.merge_abundances_left(abundances[1], abundances[0]) 57 | label_dist_b = pd.Series( 58 | label_dist_b.to_numpy().diagonal(), index=label_dist_b.index 59 | ) 60 | 61 | return ( 62 | left.labels.map(label_dist_a - label_dist_b).astype(float), 63 | right.labels.map(label_dist_b - label_dist_a).astype(float), 64 | ) 65 | 66 | def confusion(**kwargs): 67 | left_label_confusion = cached_confusion(left) 68 | right_label_confusion = cached_confusion(right) 69 | return ( 70 | left.labels.map(left_label_confusion).astype(float), 71 | right.labels.map(right_label_confusion).astype(float), 72 | ) 73 | 74 | def neighborhood(max_depth: int = 1): 75 | a = cached_neighborhood(left, max_depth) 76 | b = cached_neighborhood(right, max_depth) 77 | dist = metrics.compare_neighborhoods(a, b) 78 | return left.labels.map(dist).astype(float), right.labels.map(dist).astype(float) 79 | 80 | abundance = functools.partial(cached_abundance, left, right) 81 | 82 | default_value = { 83 | "confusion": confusion, 84 | "neighborhood": neighborhood, 85 | "abundance": abundance, 86 | }[default] 87 | 88 | return ipywidgets.Dropdown( 89 | options=[ 90 | ("Confusion", confusion), 91 | ("Neighborhood", neighborhood), 92 | ("Abundance", abundance), 93 | ], 94 | value=default_value, 95 | description="Metric", 96 | ) 97 | 98 | 99 | def has_max_depth(metric_dropdown: ipywidgets.Dropdown): 100 | return ( 101 | metric_dropdown.label.lower().startswith("abundance") 102 | or metric_dropdown.label == "Neighborhood" 103 | ) 104 | 105 | 106 | def create_max_depth_dropdown( 107 | metric_dropdown: ipywidgets.Dropdown, 108 | default: int = 1, 109 | ): 110 | dropdown = ipywidgets.Dropdown( 111 | options=[1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144], 112 | value=default, 113 | description="Max Depth", 114 | disabled=True, 115 | ) 116 | 117 | def callback(): 118 | if has_max_depth(metric_dropdown): 119 | dropdown.disabled = False 120 | else: 121 | dropdown.disabled = True 122 | 123 | metric_dropdown.observe(lambda _: callback(), names="value") 124 | callback() 125 | 126 | return dropdown 127 | 128 | 129 | def create_value_range_slider(metric_dropdown: ipywidgets.Dropdown): 130 | slider = ipywidgets.FloatRangeSlider( 131 | value=[0, 1], 132 | min=0, 133 | max=1, 134 | step=0.05, 135 | description="Range:", 136 | continuous_update=False, 137 | orientation="horizontal", 138 | readout=True, 139 | readout_format=".2f", 140 | ) 141 | 142 | def callback(): 143 | if metric_dropdown.label.lower().startswith("abundance"): 144 | slider.value = [0.05, 0.95] 145 | else: 146 | slider.value = [0, 1] 147 | 148 | metric_dropdown.observe(lambda _: callback(), names="value") 149 | callback() 150 | 151 | return slider 152 | 153 | 154 | def create_update_distance_callback( 155 | metric_dropdown: ipywidgets.Dropdown, 156 | max_depth_dropdown: ipywidgets.Dropdown, 157 | value_range_slider: ipywidgets.FloatRangeSlider, 158 | left: EmbeddingWidgetCollection, 159 | right: EmbeddingWidgetCollection, 160 | ): 161 | def callback(): 162 | distances = metric_dropdown.value(max_depth=max_depth_dropdown.value) 163 | 164 | for dist, emb in zip(distances, (left, right)): 165 | if metric_dropdown.label == "Abundance": 166 | lower, upper = dist.quantile(value_range_slider.value) 167 | vmax = max(abs(lower), abs(upper)) 168 | emb.metric_color_options = ( 169 | diverging_cmap, 170 | diverging_cmap[::-1], 171 | [-vmax, vmax], 172 | ("Lower", "Higher", "Rel. Abundance"), 173 | ) 174 | elif metric_dropdown.label == "Confusion": 175 | emb.metric_color_options = ( 176 | "viridis", 177 | "viridis_r", 178 | value_range_slider.value, 179 | ("Low", "High", "Confusion"), 180 | ) 181 | elif metric_dropdown.label == "Neighborhood": 182 | emb.metric_color_options = ( 183 | "viridis", 184 | "viridis_r", 185 | value_range_slider.value, 186 | ("Similar", "Dissimilar", "Neighborhood"), 187 | ) 188 | else: 189 | raise ValueError( 190 | f"color options unspecified for metric '{metric_dropdown.value.__name__}'" 191 | ) 192 | 193 | emb.distances = dist 194 | 195 | return callback 196 | -------------------------------------------------------------------------------- /src/cev/_compare_selection_type_dropdown.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import contextlib 4 | import typing 5 | 6 | import ipywidgets 7 | import numpy as np 8 | 9 | from ._widget_utils import link_widgets 10 | 11 | if typing.TYPE_CHECKING: 12 | from ._embedding_widget import EmbeddingWidgetCollection 13 | 14 | 15 | def create_selection_type_dropdown( 16 | left: EmbeddingWidgetCollection, 17 | right: EmbeddingWidgetCollection, 18 | pointwise_correspondence: bool, 19 | default: str | None = "independent", 20 | ): 21 | # SELECTION START 22 | def unlink(): 23 | return None 24 | 25 | def independent(): 26 | nonlocal unlink 27 | 28 | with contextlib.suppress(ValueError): 29 | unlink() 30 | 31 | # requires point-point correspondence 32 | def sync(): 33 | nonlocal unlink 34 | 35 | with contextlib.suppress(ValueError): 36 | unlink() 37 | 38 | unlink = link_widgets( 39 | (left.categorical_scatter.widget, "selection"), 40 | (right.categorical_scatter.widget, "selection"), 41 | ).unlink 42 | 43 | # requires label-label correspondence 44 | def phenotype(): 45 | nonlocal unlink 46 | 47 | with contextlib.suppress(ValueError): 48 | unlink() 49 | 50 | def expand_phenotype(src: EmbeddingWidgetCollection): 51 | def handler(change): 52 | phenotypes = set(src.labels.iloc[change.new].unique()) 53 | 54 | for emb in (left, right): 55 | ilocs = np.where(emb.robust_labels.isin(phenotypes))[0] 56 | emb.categorical_scatter.widget.selection = ilocs 57 | emb.metric_scatter.widget.selection = ilocs 58 | 59 | return handler 60 | 61 | transform_left = expand_phenotype(left) 62 | left.categorical_scatter.widget.observe(transform_left, names="selection") 63 | transform_right = expand_phenotype(right) 64 | right.categorical_scatter.widget.observe(transform_right, names="selection") 65 | 66 | def unlink_all(): 67 | left.categorical_scatter.widget.unobserve(transform_left, names="selection") 68 | right.categorical_scatter.widget.unobserve( 69 | transform_right, names="selection" 70 | ) 71 | 72 | unlink = unlink_all 73 | 74 | if pointwise_correspondence: 75 | initial_selection = independent 76 | 77 | if default == "synced": 78 | initial_selection = sync 79 | elif default == "phenotype": 80 | initial_selection = phenotype 81 | 82 | selection_type_options = [ 83 | ("Independent", independent), 84 | ("Synced", sync), 85 | ("Phenotype", phenotype), 86 | ] 87 | 88 | selection_type = ipywidgets.Dropdown( 89 | options=selection_type_options, 90 | value=initial_selection, 91 | description="Selection", 92 | ) 93 | 94 | selection_type.observe(lambda change: change.new(), names="value") # type: ignore 95 | initial_selection() 96 | return selection_type 97 | 98 | else: 99 | initial_selection = False 100 | if default == "phenotype": 101 | initial_selection = True 102 | 103 | selection_type = ipywidgets.Checkbox( 104 | initial_selection, description="Phenotype Selection" 105 | ) 106 | 107 | def handle_selection_change(change): 108 | if change.new is False: 109 | independent() 110 | else: 111 | phenotype() 112 | 113 | selection_type.observe(handle_selection_change, names="value") 114 | 115 | if initial_selection: 116 | phenotype() 117 | 118 | return selection_type 119 | -------------------------------------------------------------------------------- /src/cev/_compare_zoom_toggle.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import typing 4 | 5 | import ipywidgets 6 | 7 | if typing.TYPE_CHECKING: 8 | from ._embedding_widget import EmbeddingWidgetCollection 9 | 10 | 11 | def create_zoom_toggle( 12 | left: EmbeddingWidgetCollection, 13 | right: EmbeddingWidgetCollection, 14 | default: bool = False, 15 | ): 16 | zoom = ipywidgets.Checkbox(default, description="Auto Zoom") 17 | 18 | def handle_selection_change_zoom(emb: EmbeddingWidgetCollection): 19 | def on_change(change): 20 | if zoom.value is False: 21 | return 22 | emb.zoom(to=change.new) 23 | 24 | return on_change 25 | 26 | left.categorical_scatter.widget.observe( 27 | handle_selection_change_zoom(left), names="selection" 28 | ) 29 | right.categorical_scatter.widget.observe( 30 | handle_selection_change_zoom(right), names="selection" 31 | ) 32 | 33 | def handle_zoom_change(change): 34 | if change.new is False: 35 | left.zoom(to=None) 36 | right.zoom(to=None) 37 | else: 38 | left.zoom(to=left.categorical_scatter.selection()) 39 | right.zoom(to=right.categorical_scatter.selection()) 40 | 41 | zoom.observe(handle_zoom_change, names="value") 42 | return zoom 43 | -------------------------------------------------------------------------------- /src/cev/_embedding.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import dataclasses 4 | import typing 5 | 6 | import pandas as pd 7 | 8 | from cev._widget_utils import parse_label 9 | 10 | if typing.TYPE_CHECKING: 11 | import numpy as np 12 | import numpy.typing as npt 13 | 14 | NON_ROBUST_LABEL = "0_0_0_0_0" 15 | 16 | 17 | @dataclasses.dataclass 18 | class Embedding: 19 | coords: npt.ArrayLike 20 | labels: pd.Series 21 | robust: npt.NDArray[np.bool_] | None = None 22 | 23 | @classmethod 24 | def from_df(cls, df: pd.DataFrame): 25 | return cls( 26 | coords=df[["x", "y"]].values, 27 | labels=df["label"], 28 | robust=df["robust"] if "robust" in df else None, 29 | ) 30 | 31 | @classmethod 32 | def from_ozette(cls, df: pd.DataFrame, **kwargs): 33 | coords, labels, robust = _prepare_ozette(df, **kwargs) 34 | return cls(coords=coords, labels=labels, robust=robust) 35 | 36 | def widgets(self, **kwargs): 37 | from ._embedding_widget import EmbeddingWidgetCollection 38 | 39 | return EmbeddingWidgetCollection.from_embedding(self, **kwargs) 40 | 41 | 42 | def _prepare_ozette(df: pd.DataFrame, robust_only: bool = True): 43 | # ISMB data 44 | if "cellType" in df.columns: 45 | robust = (df["cellType"] != NON_ROBUST_LABEL).to_numpy() 46 | if robust_only: 47 | df = df[robust].reset_index(drop=True) 48 | robust = None 49 | 50 | coords = df[["x", "y"]].to_numpy() 51 | labels = df["complete_faust_label"].to_numpy() 52 | 53 | else: 54 | robust = (df["faustLabels"] != NON_ROBUST_LABEL).to_numpy() 55 | representative_label = df["faustLabels"][robust].iloc[0] 56 | 57 | if robust_only: 58 | df = df[robust].reset_index(drop=True) 59 | labels = df["faustLabels"].to_numpy() 60 | robust = None 61 | else: 62 | labels = pd.Series("", index=df.index) 63 | for marker in parse_label(representative_label): 64 | marker_annotation = marker.name + df[f"{marker.name}_faust_annotation"] 65 | labels += marker_annotation 66 | 67 | coords = df[["umapX", "umapY"]].to_numpy() 68 | labels = pd.Series(labels, dtype="category") 69 | 70 | return coords, labels, robust 71 | -------------------------------------------------------------------------------- /src/cev/_embedding_comparison_widget.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import re 4 | import typing 5 | 6 | import ipywidgets 7 | 8 | from cev._compare import ( 9 | connect_marker_selection, 10 | create_invert_color_checkbox, 11 | has_pointwise_correspondence, 12 | ) 13 | from cev._compare_metric_dropdown import ( 14 | create_max_depth_dropdown, 15 | create_metric_dropdown, 16 | create_update_distance_callback, 17 | create_value_range_slider, 18 | ) 19 | from cev._compare_selection_type_dropdown import create_selection_type_dropdown 20 | from cev._compare_zoom_toggle import create_zoom_toggle 21 | from cev._embedding import Embedding 22 | from cev._widget_utils import add_ilocs_trait, parse_label 23 | from cev.components import MarkerSelectionIndicator, WidthOptimizer 24 | 25 | 26 | def _create_titles( 27 | titles: tuple[str, str], 28 | ) -> tuple[ipywidgets.Widget, ipywidgets.Widget]: 29 | left_title, right_title = titles 30 | spacer = ipywidgets.HTML( 31 | value='
', 32 | layout=ipywidgets.Layout(width="100%"), 33 | ) 34 | title_widget = ipywidgets.HBox( 35 | [ 36 | ipywidgets.HTML( 37 | value=f'

{left_title}

', 38 | layout=ipywidgets.Layout(width="50%"), 39 | ), 40 | ipywidgets.HTML( 41 | value=f'

{right_title}

', 42 | layout=ipywidgets.Layout(width="50%"), 43 | ), 44 | ] 45 | ) 46 | return spacer, title_widget 47 | 48 | 49 | class EmbeddingComparisonWidget(ipywidgets.VBox): 50 | def __init__( 51 | self, 52 | left_embedding: Embedding, 53 | right_embedding: Embedding, 54 | row_height: int = 250, 55 | metric: typing.Literal["confusion", "neigbhorhood", "abundance"] = "confusion", 56 | inverted_colormap: bool = False, 57 | auto_zoom: bool = False, 58 | selection: typing.Literal["independent", "synced", "phenotype"] = "independent", 59 | max_depth: int = 1, 60 | titles: tuple[str, str] | None = None, 61 | active_markers: list[str] | typing.Literal["all"] = "all", 62 | **kwargs, 63 | ): 64 | pointwise_correspondence = has_pointwise_correspondence( 65 | left_embedding, right_embedding 66 | ) 67 | 68 | self.left_embedding = left_embedding 69 | self.right_embedding = right_embedding 70 | self.left = left_embedding.widgets(**kwargs) 71 | self.right = right_embedding.widgets(**kwargs) 72 | 73 | metric_dropdown = create_metric_dropdown(self.left, self.right, metric) 74 | max_depth_dropdown = create_max_depth_dropdown(metric_dropdown, max_depth) 75 | value_range_slider = create_value_range_slider(metric_dropdown) 76 | update_distances = create_update_distance_callback( 77 | metric_dropdown, 78 | max_depth_dropdown, 79 | value_range_slider, 80 | self.left, 81 | self.right, 82 | ) 83 | 84 | has_markers = "+" in left_embedding.labels.iloc[0] 85 | 86 | if has_markers: 87 | # representative label 88 | markers = [m.name for m in parse_label(left_embedding.labels.iloc[0])] 89 | _active_markers = ( 90 | [True] * len(markers) 91 | if active_markers == "all" 92 | else [False] * len(markers) 93 | ) 94 | for active_marker in active_markers: 95 | try: 96 | _active_markers[markers.index(active_marker)] = True 97 | except ValueError: 98 | pass 99 | marker_selection = MarkerSelectionIndicator( 100 | markers=markers, active=_active_markers 101 | ) 102 | connect_marker_selection( 103 | marker_selection, 104 | (self.left_embedding, self.left), 105 | (self.right_embedding, self.right), 106 | update_distances, 107 | ) 108 | 109 | zoom = create_zoom_toggle(self.left, self.right, auto_zoom) 110 | inverted = create_invert_color_checkbox( 111 | self.left, self.right, inverted_colormap 112 | ) 113 | 114 | selection_type = create_selection_type_dropdown( 115 | self.left, 116 | self.right, 117 | pointwise_correspondence, 118 | selection, 119 | ) 120 | 121 | metric_dropdown.observe(lambda _: update_distances(), names="value") 122 | max_depth_dropdown.observe(lambda _: update_distances(), names="value") 123 | value_range_slider.observe(lambda _: update_distances(), names="value") 124 | 125 | update_distances() 126 | 127 | # Header 128 | settings = ipywidgets.HBox( 129 | [ 130 | WidthOptimizer(), 131 | metric_dropdown, 132 | inverted, 133 | value_range_slider, 134 | selection_type, 135 | zoom, 136 | max_depth_dropdown, 137 | ] 138 | ) 139 | header = [marker_selection, settings] if has_markers else [settings] 140 | sections: list[ipywidgets.Widget] = [ipywidgets.VBox(header)] 141 | 142 | if titles is not None: 143 | sections.extend(_create_titles(titles)) 144 | 145 | sections.append( 146 | ipywidgets.HBox( 147 | [ 148 | cmp.show( 149 | row_height=row_height if row_height is None else row_height, 150 | layout=ipywidgets.Layout(width="50%"), 151 | ) 152 | for cmp in (self.left, self.right) 153 | ] 154 | ) 155 | ) 156 | 157 | super().__init__(sections) 158 | add_ilocs_trait(self, self.left, self.right) 159 | 160 | @property 161 | def embeddings(self): 162 | yield [self.left_embedding, self.left] 163 | yield [self.right_embedding, self.right] 164 | 165 | def select(self, labels: str | list[str]): 166 | if isinstance(labels, str): 167 | for [embedding, embedding_widget] in self.embeddings: 168 | point_idxs = embedding.labels[ 169 | embedding.labels.str.startswith(labels) 170 | ].index 171 | print(f"Found {len(point_idxs)} points") 172 | for scatter in embedding_widget.scatters: 173 | scatter.selection(point_idxs) 174 | return 175 | 176 | regexs = [] 177 | 178 | for [embedding, embedding_widget] in self.embeddings: 179 | markers = list(filter(None, re.split("[+-]", embedding.labels[0]))) 180 | marker_set = set(markers) 181 | marker_order = {s: i for i, s in enumerate(markers)} 182 | 183 | valid_labels = list(filter(lambda label: label[:-1] in marker_set, labels)) 184 | ordered_labels = sorted( 185 | valid_labels, key=lambda label: marker_order.get(label[:-1], 0) 186 | ) 187 | 188 | regex = ( 189 | ".*" + ".*".join([re.escape(label) for label in ordered_labels]) + ".*" 190 | ) 191 | regexs.append(regex) 192 | 193 | for i, [embedding, embedding_widget] in enumerate(self.embeddings): 194 | regex = regexs[i] 195 | point_idxs = embedding.labels[ 196 | embedding.labels.str.match(regex, flags=re.IGNORECASE) 197 | ].index 198 | print(f"Found {len(point_idxs)} points") 199 | for scatter in embedding_widget.scatters: 200 | scatter.selection(point_idxs) 201 | -------------------------------------------------------------------------------- /src/cev/_embedding_widget.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import typing 4 | from uuid import uuid4 5 | 6 | import ipywidgets 7 | import jscatter 8 | import numpy as np 9 | import numpy.typing as npt 10 | import pandas as pd 11 | import traitlets 12 | 13 | from cev._embedding import Embedding 14 | from cev._widget_utils import ( 15 | NON_ROBUST_LABEL, 16 | create_colormaps, 17 | link_widgets, 18 | robust_labels, 19 | ) 20 | from cev.components import MarkerCompositionLogo 21 | 22 | _LABEL_COLUMN = "label" 23 | _ROBUST_LABEL_COLUMN = "robust_label" 24 | _DISTANCE_COLUMN = "distance" 25 | 26 | 27 | class EmbeddingWidgetCollection(traitlets.HasTraits): 28 | inverted = traitlets.Bool(default_value=False) 29 | labels = traitlets.Any() 30 | distances = traitlets.Any() 31 | colormap = traitlets.Dict() 32 | 33 | def __init__( 34 | self, 35 | labels: pd.Series, 36 | categorical_scatter: jscatter.Scatter, 37 | metric_scatter: jscatter.Scatter, 38 | logo: MarkerCompositionLogo, 39 | labeler: typing.Callable[[npt.ArrayLike], pd.Series], 40 | ): 41 | super().__init__() 42 | self.categorical_scatter = categorical_scatter 43 | self.metric_scatter = metric_scatter 44 | self.logo = logo 45 | self._labeler = labeler 46 | self.metric_color_options: tuple[str, str, list[int], tuple] = ( 47 | "viridis", 48 | "viridis_r", 49 | [0, 1], 50 | ("min", "max", "value"), 51 | ) 52 | 53 | self.labels = labels 54 | self.distances = pd.Series(0.0, index=self._data.index, dtype="float64") 55 | self.colormap = create_colormaps(self.robust_labels.cat.categories) 56 | 57 | ipywidgets.dlink( 58 | source=(self.categorical_scatter.widget, "selection"), 59 | target=(self.logo, "counts"), 60 | transform=self.label_counts, 61 | ) 62 | 63 | def label_counts(self, ilocs: None | np.ndarray = None) -> dict: 64 | labels = self.labels if ilocs is None else self.labels.iloc[ilocs] 65 | return {k: int(v) for k, v in labels.value_counts().items()} 66 | 67 | @traitlets.validate("labels") 68 | def _validate_labels(self, proposal: object): 69 | assert isinstance(proposal.value, pd.Series) 70 | # convert to category if not already 71 | return ( 72 | proposal.value 73 | if not pd.api.types.is_categorical_dtype(proposal.value) 74 | else proposal.value.astype("category") 75 | ) 76 | 77 | @property 78 | def _data(self) -> pd.DataFrame: 79 | assert self.categorical_scatter._data is self.metric_scatter._data 80 | assert self.categorical_scatter._data is not None 81 | return self.categorical_scatter._data 82 | 83 | @traitlets.observe("labels") 84 | def _on_labels_change(self, change): 85 | labels = change.new 86 | self._data[_LABEL_COLUMN] = pd.Series(np.asarray(labels), dtype="category") 87 | self._data[_ROBUST_LABEL_COLUMN] = pd.Series( 88 | np.asarray(self._labeler(labels)), dtype="category" 89 | ) 90 | self.logo.counts = self.label_counts(self.categorical_scatter.widget.selection) 91 | self.has_markers = "+" in self._data[_LABEL_COLUMN][0] 92 | 93 | @traitlets.validate("distances") 94 | def _validate_distances(self, proposal: object): 95 | assert isinstance(proposal.value, pd.Series) 96 | assert proposal.value.dtype == "float64" 97 | return proposal.value 98 | 99 | @traitlets.observe("distances") 100 | def _on_distances_change(self, change): 101 | self._data[_DISTANCE_COLUMN] = change.new.values 102 | self._update_metric_scatter() 103 | 104 | @traitlets.observe("inverted") 105 | def _update_metric_scatter(self, *args, **kwargs): 106 | cmap, cmapr, norm, labeling = self.metric_color_options 107 | self.metric_scatter.color( 108 | by=_DISTANCE_COLUMN, 109 | map=cmapr if self.inverted else cmap, 110 | norm=norm, 111 | labeling=labeling, 112 | ) 113 | self.metric_scatter.legend(True) 114 | 115 | self.metric_scatter.filter(None) 116 | robust_labels = self._data.query( 117 | f"{_ROBUST_LABEL_COLUMN} != '{NON_ROBUST_LABEL}'" 118 | ) 119 | if len(robust_labels): 120 | self.metric_scatter.filter(robust_labels.index) 121 | 122 | @traitlets.observe("colormap") 123 | def _update_categorical_scatter(self, *args, **kwargs): 124 | self.categorical_scatter.legend(False) 125 | self.categorical_scatter.color(by=_ROBUST_LABEL_COLUMN, map=self.colormap) 126 | 127 | @classmethod 128 | def from_embedding( 129 | cls, 130 | emb: Embedding, 131 | background_color: str = "black", 132 | axes: bool = False, 133 | **kwargs, 134 | ): 135 | X = np.array(emb.coords) 136 | data = pd.DataFrame({"x": X[:, 0], "y": X[:, 1]}) 137 | 138 | categorical_scatter, metric_scatter = ( 139 | jscatter.Scatter( 140 | data=data, 141 | x="x", 142 | y="y", 143 | background_color=background_color, 144 | axes=axes, 145 | opacity_by="density", 146 | lasso_initiator=False, 147 | tooltip=True, 148 | tooltip_contents=("color"), 149 | **kwargs, 150 | ) 151 | for _ in range(2) 152 | ) 153 | 154 | # link the plots together with js 155 | link_widgets( 156 | (categorical_scatter.widget, "selection"), 157 | (metric_scatter.widget, "selection"), 158 | ) 159 | 160 | return cls( 161 | labels=emb.labels, 162 | categorical_scatter=categorical_scatter, 163 | metric_scatter=metric_scatter, 164 | logo=MarkerCompositionLogo(), 165 | labeler=lambda labels: robust_labels(labels, emb.robust), 166 | ) 167 | 168 | @property 169 | def robust_labels(self) -> pd.Series: 170 | return self._data[_ROBUST_LABEL_COLUMN] 171 | 172 | @property 173 | def scatters(self): 174 | yield self.categorical_scatter 175 | yield self.metric_scatter 176 | 177 | def show(self, row_height: int | None = None, **kwargs): 178 | widgets = [] 179 | 180 | uuid = uuid4().hex 181 | 182 | for scatter in self.scatters: 183 | if row_height is not None: 184 | scatter.height(row_height) 185 | widget = scatter.show() 186 | widget.layout = {"margin": "0 0 2px 0"} 187 | widgets.append(widget) 188 | scatter.widget.view_sync = uuid 189 | 190 | if self.has_markers: 191 | widgets.append(self.logo) 192 | 193 | return ipywidgets.VBox(widgets, **kwargs) 194 | 195 | def zoom(self, to: None | npt.NDArray = None): 196 | if to is not None: 197 | to = to if len(to) > 0 else None 198 | for s in self.scatters: 199 | s.zoom(to=to) 200 | 201 | def __hash__(self): 202 | # Warning: this is a hack! You should probably not rely on this hash 203 | # unless you know what you're doing. 204 | # 205 | # Creates a unique hash for the current "state" of this object 206 | # to make sure that functools caching works correctly. 207 | # See the usage in cev._compare_metrics_dropdown 208 | obj_id = str(id(self)) 209 | categories = ",".join(self.labels.cat.categories.to_list()) 210 | return hash(obj_id + categories) 211 | -------------------------------------------------------------------------------- /src/cev/_version.py: -------------------------------------------------------------------------------- 1 | from importlib.metadata import PackageNotFoundError, version 2 | 3 | try: 4 | __version__ = version("cev") 5 | except PackageNotFoundError: 6 | __version__ = "uninstalled" 7 | -------------------------------------------------------------------------------- /src/cev/_widget_utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import dataclasses 4 | import itertools 5 | import re 6 | import typing 7 | 8 | import ipywidgets 9 | import numpy as np 10 | import pandas as pd 11 | import traitlets 12 | from jscatter.color_maps import glasbey_dark 13 | 14 | if typing.TYPE_CHECKING: 15 | import numpy.typing as npt 16 | 17 | from .widgets import EmbeddingWidgetCollection 18 | 19 | NON_ROBUST_LABEL = "0_0_0_0_0" 20 | _ERR_MESSAGE = ( 21 | "The truth value of an array with more than one element is ambiguous. " 22 | + "Use a.any() or a.all()" 23 | ) 24 | 25 | 26 | # patched version which allows for numpy comparison 27 | # https://github.com/jupyter-widgets/traittypes/issues/45 28 | class link_widgets(traitlets.link): 29 | def _update_target(self, change): 30 | try: 31 | super()._update_target(change) 32 | except ValueError as e: 33 | if e.args[0] != _ERR_MESSAGE: 34 | raise e 35 | except traitlets.TraitError: 36 | pass 37 | 38 | def _update_source(self, change): 39 | try: 40 | super()._update_source(change) 41 | except ValueError as e: 42 | if e.args[0] != _ERR_MESSAGE: 43 | raise e 44 | except traitlets.TraitError: 45 | pass 46 | 47 | 48 | @dataclasses.dataclass 49 | class Marker: 50 | name: str 51 | annotation: typing.Literal["+", "-"] 52 | 53 | def __str__(self) -> str: 54 | return self.name + self.annotation 55 | 56 | 57 | def parse_label(label: str) -> list[Marker]: 58 | return [ 59 | Marker(inner_label[:-1], inner_label[-1]) 60 | for inner_label in re.split("(\w+[\-|\+])", label) 61 | if inner_label 62 | ] 63 | 64 | 65 | def trim_label_series(labels: pd.Series, active_markers: set[str]): 66 | """ 67 | Trims the labels to only contain the active markers. 68 | 69 | Parameters 70 | ---------- 71 | labels 72 | The labels to trim. Must be a categorical series with values like "CD8+CD4-". 73 | active_markers 74 | The markers to keep. Must be a set of strings like {"CD8", "CD4"}. 75 | 76 | Returns 77 | ------- 78 | pd.Series 79 | The trimmed labels. 80 | """ 81 | # we only need to look at the categories, not the values 82 | # to compute all the possible new labels 83 | expanded = labels.cat.categories.str.split("(\w+[\+|\-])", regex=True) 84 | 85 | # find the column indices of the active markers in the expanded labels 86 | column_indices = [] 87 | for i, marker in enumerate(expanded[0]): 88 | if marker[:-1] in active_markers: 89 | column_indices.append(i) 90 | 91 | # create the new label for each category by concatenating the active markers 92 | new_categories = pd.Series([""] * len(expanded)) 93 | for column_index in column_indices: 94 | new_categories += expanded.str[column_index] 95 | 96 | # Index the new categories by the previous codes. 97 | # This creates a new array with all the updated labels. 98 | new_labels = new_categories[labels.cat.codes] 99 | return pd.Series(new_labels, dtype="category") 100 | 101 | 102 | def add_ilocs_trait( 103 | widget: traitlets.HasTraits, 104 | right: EmbeddingWidgetCollection, 105 | left: EmbeddingWidgetCollection, 106 | ): 107 | """Adds a `.ilocs` tuple trait to the final widget. 108 | 109 | Containts the (left, right) selections. 110 | """ 111 | initial = ( 112 | left.categorical_scatter.selection(), 113 | right.categorical_scatter.selection(), 114 | ) 115 | widget.add_traits(ilocs=traitlets.Tuple(initial)) 116 | 117 | ipywidgets.dlink( 118 | source=(left.categorical_scatter.widget, "selection"), 119 | target=(widget, "ilocs"), 120 | transform=lambda iloc: (iloc, widget.ilocs[1]), # type: ignore 121 | ) 122 | 123 | ipywidgets.dlink( 124 | source=(right.categorical_scatter.widget, "selection"), 125 | target=(widget, "ilocs"), 126 | transform=lambda iloc: (widget.ilocs[0], iloc), # type: ignore 127 | ) 128 | 129 | 130 | # Created with https://gka.github.io/palettes/#/256|d|19ffff,33bbff,444444|444444,ff5023,ffaa00|1|1 131 | diverging_cmap = [ 132 | "#19ffff", 133 | "#1cfdff", 134 | "#1efcff", 135 | "#20faff", 136 | "#22f8ff", 137 | "#24f6fe", 138 | "#26f5fe", 139 | "#27f3fe", 140 | "#29f1fd", 141 | "#2af0fd", 142 | "#2beefc", 143 | "#2decfc", 144 | "#2eebfb", 145 | "#2fe9fb", 146 | "#31e7fa", 147 | "#32e6f9", 148 | "#33e4f9", 149 | "#34e2f8", 150 | "#35e1f7", 151 | "#36dff6", 152 | "#37ddf5", 153 | "#38dcf4", 154 | "#39daf3", 155 | "#39d9f2", 156 | "#3ad7f1", 157 | "#3bd5f0", 158 | "#3cd4ef", 159 | "#3dd2ee", 160 | "#3dd1ed", 161 | "#3ecfec", 162 | "#3fcdeb", 163 | "#40ccea", 164 | "#40cae8", 165 | "#41c9e7", 166 | "#42c7e6", 167 | "#42c5e5", 168 | "#43c4e3", 169 | "#43c2e2", 170 | "#44c1e1", 171 | "#45bfdf", 172 | "#45bede", 173 | "#46bcdd", 174 | "#46bbdb", 175 | "#47b9da", 176 | "#47b8d8", 177 | "#48b6d7", 178 | "#48b5d6", 179 | "#49b3d4", 180 | "#49b1d3", 181 | "#49b0d1", 182 | "#4aaed0", 183 | "#4aadce", 184 | "#4babcd", 185 | "#4baacb", 186 | "#4ba8c9", 187 | "#4ca7c8", 188 | "#4ca6c6", 189 | "#4ca4c5", 190 | "#4ca3c3", 191 | "#4da1c1", 192 | "#4da0c0", 193 | "#4d9ebe", 194 | "#4e9dbc", 195 | "#4e9bbb", 196 | "#4e9ab9", 197 | "#4e98b7", 198 | "#4e97b6", 199 | "#4f95b4", 200 | "#4f94b2", 201 | "#4f93b1", 202 | "#4f91af", 203 | "#4f90ad", 204 | "#4f8eab", 205 | "#4f8daa", 206 | "#508ba8", 207 | "#508aa6", 208 | "#5089a4", 209 | "#5087a3", 210 | "#5086a1", 211 | "#50849f", 212 | "#50839d", 213 | "#50819b", 214 | "#50809a", 215 | "#507f98", 216 | "#507d96", 217 | "#507c94", 218 | "#507b92", 219 | "#507991", 220 | "#50788f", 221 | "#50768d", 222 | "#50758b", 223 | "#4f7489", 224 | "#4f7287", 225 | "#4f7186", 226 | "#4f7084", 227 | "#4f6e82", 228 | "#4f6d80", 229 | "#4f6c7e", 230 | "#4e6a7c", 231 | "#4e697a", 232 | "#4e6879", 233 | "#4e6677", 234 | "#4e6575", 235 | "#4d6473", 236 | "#4d6271", 237 | "#4d616f", 238 | "#4d606d", 239 | "#4c5e6b", 240 | "#4c5d6a", 241 | "#4c5c68", 242 | "#4b5b66", 243 | "#4b5964", 244 | "#4b5862", 245 | "#4a5760", 246 | "#4a555e", 247 | "#4a545c", 248 | "#49535a", 249 | "#495259", 250 | "#495057", 251 | "#484f55", 252 | "#484e53", 253 | "#474d51", 254 | "#474b4f", 255 | "#464a4d", 256 | "#46494b", 257 | "#45484a", 258 | "#454648", 259 | "#454546", 260 | "#474444", 261 | "#494543", 262 | "#4c4543", 263 | "#4e4643", 264 | "#514643", 265 | "#534642", 266 | "#554742", 267 | "#584742", 268 | "#5a4842", 269 | "#5c4841", 270 | "#5f4841", 271 | "#614941", 272 | "#634941", 273 | "#654a40", 274 | "#684a40", 275 | "#6a4a40", 276 | "#6c4b40", 277 | "#6e4b3f", 278 | "#704c3f", 279 | "#724c3f", 280 | "#744c3e", 281 | "#764d3e", 282 | "#794d3e", 283 | "#7b4e3e", 284 | "#7d4e3d", 285 | "#7f4e3d", 286 | "#814f3d", 287 | "#834f3c", 288 | "#85503c", 289 | "#87503c", 290 | "#89513c", 291 | "#8b513b", 292 | "#8c513b", 293 | "#8e523b", 294 | "#90523a", 295 | "#92533a", 296 | "#94533a", 297 | "#96543a", 298 | "#985439", 299 | "#9a5539", 300 | "#9c5539", 301 | "#9d5538", 302 | "#9f5638", 303 | "#a15638", 304 | "#a35737", 305 | "#a55737", 306 | "#a75837", 307 | "#a85836", 308 | "#aa5936", 309 | "#ac5936", 310 | "#ae5a36", 311 | "#af5b35", 312 | "#b15b35", 313 | "#b35c35", 314 | "#b55c34", 315 | "#b65d34", 316 | "#b85d34", 317 | "#ba5e33", 318 | "#bb5f33", 319 | "#bd5f32", 320 | "#be6032", 321 | "#c06032", 322 | "#c26131", 323 | "#c36231", 324 | "#c56231", 325 | "#c66330", 326 | "#c86430", 327 | "#ca652f", 328 | "#cb652f", 329 | "#cd662f", 330 | "#ce672e", 331 | "#cf672e", 332 | "#d1682d", 333 | "#d2692d", 334 | "#d46a2d", 335 | "#d56b2c", 336 | "#d76b2c", 337 | "#d86c2b", 338 | "#d96d2b", 339 | "#db6e2a", 340 | "#dc6f2a", 341 | "#dd702a", 342 | "#df7129", 343 | "#e07229", 344 | "#e17328", 345 | "#e37328", 346 | "#e47427", 347 | "#e57527", 348 | "#e67626", 349 | "#e77726", 350 | "#e97825", 351 | "#ea7924", 352 | "#eb7b24", 353 | "#ec7c23", 354 | "#ed7d23", 355 | "#ee7e22", 356 | "#ef7f22", 357 | "#f08021", 358 | "#f18120", 359 | "#f28220", 360 | "#f3841f", 361 | "#f4851e", 362 | "#f4861e", 363 | "#f5871d", 364 | "#f6881c", 365 | "#f78a1b", 366 | "#f88b1a", 367 | "#f88c1a", 368 | "#f98e19", 369 | "#fa8f18", 370 | "#fa9017", 371 | "#fb9216", 372 | "#fb9315", 373 | "#fc9414", 374 | "#fc9613", 375 | "#fd9712", 376 | "#fd9911", 377 | "#fe9a10", 378 | "#fe9c0e", 379 | "#fe9d0d", 380 | "#ff9f0c", 381 | "#ffa00a", 382 | "#ffa208", 383 | "#ffa307", 384 | "#ffa505", 385 | "#ffa703", 386 | "#ffa802", 387 | "#ffaa00", 388 | ] 389 | 390 | 391 | def robust_labels(labels: npt.ArrayLike, robust: npt.NDArray[np.bool_] | None = None): 392 | if robust is not None: 393 | labels = np.where( 394 | robust, 395 | labels, 396 | NON_ROBUST_LABEL, 397 | ) 398 | return pd.Series(labels, dtype="category") 399 | 400 | 401 | @typing.overload 402 | def create_colormaps(cats: typing.Iterable[str]) -> dict: ... 403 | 404 | 405 | @typing.overload 406 | def create_colormaps( 407 | cats: typing.Iterable[str], *other: typing.Iterable[str] 408 | ) -> tuple[dict, ...]: ... 409 | 410 | 411 | def create_colormaps( 412 | cats: typing.Iterable[str], *others: typing.Iterable[str] 413 | ) -> dict | tuple[dict, ...]: 414 | all_categories = set(cats) 415 | for other in others: 416 | all_categories.update(other) 417 | 418 | # create unified colormap 419 | lookup = dict( 420 | zip( 421 | all_categories, 422 | itertools.cycle(glasbey_dark[1:]), 423 | ) 424 | ) 425 | 426 | # force non-robust to be grey 427 | lookup[NON_ROBUST_LABEL] = "#333333" 428 | 429 | # create separate colormaps for each component 430 | cmaps = tuple({c: lookup[c] for c in cmp} for cmp in (cats, *others)) 431 | if len(cmaps) == 1: 432 | return cmaps[0] 433 | return cmaps 434 | -------------------------------------------------------------------------------- /src/cev/components/__init__.py: -------------------------------------------------------------------------------- 1 | from ._html_widget import HTMLWidget as HTMLWidget 2 | from ._marker_composition_logo import MarkerCompositionLogo as MarkerCompositionLogo 3 | from ._marker_selection_indicator import ( 4 | MarkerSelectionIndicator as MarkerSelectionIndicator, 5 | ) 6 | from ._width_optimizer import WidthOptimizer as WidthOptimizer 7 | -------------------------------------------------------------------------------- /src/cev/components/_html_widget.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import uuid 4 | 5 | import IPython.display 6 | import ipywidgets 7 | import jinja2 8 | 9 | __all__ = ["HTMLWidget"] 10 | 11 | 12 | class HTMLWidget(ipywidgets.Output): 13 | _template = jinja2.Template("") 14 | 15 | def __init__(self, **kwargs): 16 | super().__init__(**kwargs) 17 | self.observe(lambda _: self._render(), names=self.class_own_traits().keys()) 18 | self._render() 19 | 20 | def _render(self): 21 | state = {name: getattr(self, name) for name in self.class_own_traits()} 22 | html = self._template.render(id=uuid.uuid4().hex, **state) 23 | self.clear_output() 24 | with self: 25 | IPython.display.display(IPython.display.HTML(html)) 26 | -------------------------------------------------------------------------------- /src/cev/components/_marker_composition_logo.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import jinja2 4 | import traitlets 5 | 6 | from ._html_widget import HTMLWidget 7 | 8 | __all__ = ["MarkerCompositionLogo"] 9 | 10 | 11 | class MarkerCompositionLogo(HTMLWidget): 12 | _template = jinja2.Template( 13 | """ 14 |
15 | 95 | """ 96 | ) 97 | 98 | counts = traitlets.Dict() 99 | -------------------------------------------------------------------------------- /src/cev/components/_marker_selection_indicator.py: -------------------------------------------------------------------------------- 1 | import anywidget 2 | import traitlets 3 | 4 | __all__ = ["MarkerSelectionIndicator"] 5 | 6 | 7 | class MarkerSelectionIndicator(anywidget.AnyWidget): 8 | _esm = """ 9 | const FONT_COLOR = "var(--jp-ui-font-color0)"; 10 | const FONT_COLOR_SECONDARY = "var(--jp-ui-font-color1)"; 11 | const BUTTON_BG = "var(--jp-layout-color2)"; 12 | const BUTTON_HOVER_BG = "var(--jp-layout-color2)"; 13 | const BUTTON_ACTIVE_BG = "#1976d2"; 14 | const BUTTON_ACTIVE_HOVER_BG = "#0069d3"; 15 | const BUTTON_ACTIVE_SECONDARY_BG = "var(--jp-ui-font-color3)"; 16 | const NATURAL_COMPARATOR = new Intl.Collator(undefined, { numeric: true }).compare; 17 | 18 | export async function render(view) { 19 | const container = document.createElement("div"); 20 | view.el.appendChild(container); 21 | 22 | Object.assign(container.style, { 23 | display: "flex", 24 | flexDirection: "column", 25 | gap: "4px", 26 | }); 27 | 28 | const header = document.createElement("div"); 29 | container.appendChild(header); 30 | 31 | Object.assign(header.style, { 32 | display: "flex", 33 | justifyContent: "space-between", 34 | alignItems: "center", 35 | gap: "2px", 36 | }); 37 | 38 | const title = document.createElement("h4"); 39 | header.appendChild(title); 40 | 41 | Object.assign(title.style, { 42 | padding: "0", 43 | margin: "0", 44 | }); 45 | title.textContent = "Markers"; 46 | 47 | const settings = document.createElement("div"); 48 | header.appendChild(settings); 49 | Object.assign(settings.style, { display: "flex", alignItems: "center" }); 50 | 51 | const sortLabel = document.createElement("div"); 52 | sortLabel.textContent = "Sort by"; 53 | Object.assign(sortLabel.style, { fontSize: "0.875em", marginRight: "0.25rem" }); 54 | 55 | const sortImportance = document.createElement("button"); 56 | sortImportance.textContent = "Expression Discriminability"; 57 | Object.assign(sortImportance.style, { 58 | background: view.model.get("sort_alphabetically") ? BUTTON_BG : BUTTON_ACTIVE_SECONDARY_BG, 59 | border: `1px solid ${view.model.get("sort_alphabetically") ? BUTTON_BG : BUTTON_ACTIVE_SECONDARY_BG}`, 60 | borderRadius: "4px 0 0 4px", 61 | userSelect: "none", 62 | cursor: "pointer", 63 | }); 64 | sortImportance.addEventListener("click", function() { 65 | view.model.set("sort_alphabetically", false); 66 | view.model.save_changes(); 67 | }); 68 | 69 | const sortAlphabetically = document.createElement("button"); 70 | sortAlphabetically.textContent = "Alphabetically"; 71 | Object.assign(sortAlphabetically.style, { 72 | background: view.model.get("sort_alphabetically") ? BUTTON_ACTIVE_SECONDARY_BG : BUTTON_BG, 73 | border: `1px solid ${view.model.get("sort_alphabetically") ? BUTTON_ACTIVE_SECONDARY_BG : BUTTON_BG}`, 74 | borderRadius: "0 4px 4px 0", 75 | marginLeft: "-1px", 76 | userSelect: "none", 77 | cursor: "pointer", 78 | }); 79 | sortAlphabetically.addEventListener("click", function() { 80 | view.model.set("sort_alphabetically", true); 81 | view.model.save_changes(); 82 | }); 83 | 84 | settings.appendChild(sortLabel); 85 | settings.appendChild(sortImportance); 86 | settings.appendChild(sortAlphabetically); 87 | 88 | const markersEl = document.createElement("div"); 89 | container.appendChild(markersEl); 90 | 91 | Object.assign(markersEl.style, { 92 | display: "flex", 93 | flexWrap: "wrap", 94 | gap: "2px", 95 | }); 96 | 97 | function getOrder() { 98 | const markers = view.model.get("markers"); 99 | return view.model.get("sort_alphabetically") 100 | ? new Map(markers.map((marker, i) => [marker, i]).sort(([a], [b]) => NATURAL_COMPARATOR(a, b)).map(([marker, i], j) => [i, j])) 101 | : undefined; 102 | } 103 | 104 | function rerender() { 105 | const markers = view.model.get("markers"); 106 | const active = view.model.get("active"); 107 | const diff = markers.length - markersEl.childElementCount; 108 | 109 | if (diff > 0) { 110 | for (let i = 0; i < diff; i++) { 111 | const button = document.createElement("button"); 112 | 113 | Object.assign(button.style, { 114 | background: "var(--marker-selection-indicator-bg)", 115 | cursor: "pointer", 116 | padding: "4px 6px", 117 | border: "0", 118 | borderRadius: i === 0 119 | ? "2px 0 0 2px" 120 | : i === markers.length - 1 121 | ? "0 2px 2px 0" 122 | : "0", 123 | userSelect: "none", 124 | }); 125 | 126 | button.addEventListener("click", function (event) { 127 | let newActive = [...view.model.get("active")]; 128 | 129 | if (event.altKey) { 130 | newActive = Array.from({ length: markers.length }, (_, j) => j === i); 131 | } else if (event.shiftKey) { 132 | const order = getOrder(); 133 | const _i = order ? order.get(i) : i; 134 | newActive = Array.from({ length: markers.length }, (_, j) => (order ? order.get(j) : j) <= _i); 135 | } else { 136 | const numActive = newActive.reduce((num, curr) => num + Number(curr), 0); 137 | if (!newActive[i] || numActive > 1) newActive[i] = !newActive[i]; 138 | } 139 | 140 | view.model.set("active", newActive); 141 | view.model.save_changes(); 142 | }); 143 | 144 | button.addEventListener("mouseenter", function () { 145 | const active = view.model.get("active"); 146 | button.style.setProperty("--marker-selection-indicator-bg", active[i] ? BUTTON_ACTIVE_HOVER_BG : BUTTON_HOVER_BG); 147 | }); 148 | 149 | button.addEventListener("mouseleave", function () { 150 | const active = view.model.get("active"); 151 | button.style.setProperty("--marker-selection-indicator-bg", active[i] ? BUTTON_ACTIVE_BG : BUTTON_BG); 152 | }); 153 | 154 | markersEl.appendChild(button); 155 | } 156 | } else if (diff < 0) { 157 | for (let i = 0; i < -diff; i++) { 158 | markersEl.removeChild(markersEl.lastChild); 159 | } 160 | } 161 | 162 | const order = getOrder(); 163 | 164 | for (let i = 0; i < markers.length; i++) { 165 | const child = markersEl.childNodes[i]; 166 | 167 | if (active[i]) { 168 | child.style.color = "white"; 169 | child.style.setProperty("--marker-selection-indicator-bg", BUTTON_ACTIVE_BG); 170 | } else { 171 | child.style.color = FONT_COLOR; 172 | child.style.setProperty("--marker-selection-indicator-bg", BUTTON_BG); 173 | } 174 | 175 | if (order?.has(i)) { 176 | child.style.order = order.get(i); 177 | } else { 178 | child.style.order = 0; 179 | } 180 | 181 | child.textContent = markers[i]; 182 | } 183 | 184 | const isAlphabetically = view.model.get("sort_alphabetically"); 185 | const isImportance = !isAlphabetically; 186 | 187 | const getButtonStyle = (active) => ({ 188 | background: active ? BUTTON_ACTIVE_SECONDARY_BG : BUTTON_BG, 189 | border: 0, 190 | color: active ? FONT_COLOR : FONT_COLOR_SECONDARY, 191 | }); 192 | 193 | Object.assign(sortImportance.style, getButtonStyle(isImportance)); 194 | Object.assign(sortAlphabetically.style, getButtonStyle(isAlphabetically)); 195 | } 196 | 197 | view.model.on("change:markers", rerender); 198 | view.model.on("change:active", rerender); 199 | view.model.on("change:sort_alphabetically", rerender); 200 | 201 | rerender(); 202 | } 203 | """ 204 | 205 | markers = traitlets.List(trait=traitlets.Unicode()).tag(sync=True) 206 | active = traitlets.List(trait=traitlets.Bool()).tag(sync=True) 207 | sort_alphabetically = traitlets.Bool().tag(sync=True) 208 | -------------------------------------------------------------------------------- /src/cev/components/_width_optimizer.py: -------------------------------------------------------------------------------- 1 | import anywidget 2 | 3 | __all__ = ["WidthOptimizer"] 4 | 5 | 6 | class WidthOptimizer(anywidget.AnyWidget): 7 | """This widget gets rid of unwanted whitespace in front of ipywidgets""" 8 | 9 | _esm = """ 10 | export function render(view) { 11 | setTimeout(() => { 12 | view.el.parentNode.style.setProperty('--jp-widgets-inline-label-width', 'auto'); 13 | }, 0); 14 | } 15 | """ 16 | -------------------------------------------------------------------------------- /src/cev/metrics.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import typing 4 | 5 | import cev_metrics 6 | import numpy as np 7 | import numpy.linalg as nplg 8 | import pandas as pd 9 | 10 | if typing.TYPE_CHECKING: 11 | import numpy.typing as npt 12 | 13 | __all__ = [ 14 | "centered_logratio", 15 | "compare_neighborhoods", 16 | "confusion", 17 | "merge_abundances_left", 18 | "neighborhood", 19 | "relative_abundance", 20 | "rowise_cosine_similarity", 21 | "transform_abundance", 22 | ] 23 | 24 | 25 | def confusion(df: pd.DataFrame) -> pd.Series: 26 | confusion_matrix = cev_metrics.confusion(df) 27 | normed = confusion_matrix / confusion_matrix.sum(axis=1) 28 | data = pd.Series(1 - normed.diagonal(), index=df["label"].cat.categories) 29 | # TODO: move to cev-metrics 30 | # Replace any label with 2 or less count with 0.0 confusion. 31 | counts = df["label"].value_counts() 32 | data.loc[counts[counts <= 2].index] = 0 33 | return data 34 | 35 | 36 | def neighborhood(df: pd.DataFrame, max_depth: int = 1) -> pd.DataFrame: 37 | categories = df["label"].cat.categories 38 | neighborhood_scores = cev_metrics.neighborhood(df, max_depth) 39 | np.fill_diagonal(neighborhood_scores, 0) 40 | return pd.DataFrame(neighborhood_scores, index=categories, columns=categories) 41 | 42 | 43 | def compare_neighborhoods(a: pd.DataFrame, b: pd.DataFrame) -> dict[str, float]: 44 | """Computes the cosine similarity between two neighborhood matrices. 45 | 46 | Parameters 47 | ---------- 48 | a : pd.DataFrame 49 | A symmetric DataFrame with shared rows/cols. 50 | b : pd.DataFrame 51 | A symmetric DataFrame with shared rows/cols. 52 | 53 | Returns 54 | ------- 55 | dict[str, float] 56 | A dictionary mapping labels to cosine similarity. 57 | """ 58 | assert len(a) == len(a.columns) 59 | assert len(b) == len(b.columns) 60 | overlap = a.index.intersection(b.index) 61 | dist = {label: 0.0 for label in typing.cast(pd.Series, a.index.union(b.index))} 62 | sim = 1 - rowise_cosine_similarity(a.loc[overlap, overlap], b.loc[overlap, overlap]) 63 | dist.update(sim) 64 | return dist 65 | 66 | 67 | def rowise_cosine_similarity(X0: npt.ArrayLike, X1: npt.ArrayLike): 68 | """Computes the cosine similary per row of two equally shaped 2D matrices.""" 69 | return np.sum(X0 * X1, axis=1) / (nplg.norm(X0, axis=1) * nplg.norm(X1, axis=1)) 70 | 71 | 72 | def transform_abundance( 73 | frequencies: pd.DataFrame, 74 | abundances: dict[str, int], 75 | force_include_self: bool = True, 76 | bit_mask: bool = False, 77 | clr: bool = False, 78 | ): 79 | """Creates an abundance-based representation. 80 | 81 | This function transforms a label-level neighborhood representation 82 | into an abundance-based representation by multiplying the frequencies 83 | with the abundances. Alternatively, a bitmask can be used to treat 84 | all non-zero frequencies as 1. 85 | 86 | Parameters 87 | ---------- 88 | frequencies : pd.DataFrame 89 | A symmetric DataFrame with shared rows/cols. 90 | abundances : dict[str, int] 91 | A dictionary mapping labels to abundances. 92 | force_include_self : bool, optional 93 | Whether to include the label itself in the neighborhood, by default True. 94 | bit_mask : bool, optional 95 | Whether to use a bit mask instead of the frequencies when expanding 96 | abundances, by default False. 97 | clr : bool, optional 98 | Whether to normalize the count values by transforming them to centered 99 | log ratios, by default False. 100 | """ 101 | assert ( 102 | frequencies.index.to_list() == frequencies.columns.to_list() 103 | ), "must be a symmetric DataFrame with shared rows/cols" 104 | 105 | if bit_mask: 106 | mask = frequencies.to_numpy() > 0 107 | if force_include_self: 108 | np.fill_diagonal(mask, True) 109 | else: 110 | mask = frequencies.to_numpy() 111 | if force_include_self: 112 | np.fill_diagonal(mask, 1.0) 113 | 114 | if clr: 115 | inflated_counts = np.fromiter(abundances.values(), dtype=int) + 1 116 | gmean = _gmean(inflated_counts) 117 | values = dict(zip(abundances.keys(), np.log10(inflated_counts / gmean))) 118 | else: 119 | values = abundances 120 | 121 | return pd.DataFrame( 122 | mask * np.array([values[col] for col in frequencies.columns]), 123 | columns=frequencies.columns, 124 | index=frequencies.index, 125 | ) 126 | 127 | 128 | def merge_abundances_left(left: pd.DataFrame, right: pd.DataFrame): 129 | """Create single label-mask using all labels from left and right. 130 | If a label in `right` is missing in `left`, the neighbors from `right` 131 | are copied into `left` for that label. The label itself is set to False. 132 | """ 133 | index = pd.CategoricalIndex(left.index.union(right.index).sort_values()) 134 | merged = pd.DataFrame( 135 | np.full((len(index),) * 2, 0), 136 | columns=index, 137 | index=index, 138 | ) 139 | # copy left values in to unified matrix 140 | merged.loc[left.index, left.columns] = left 141 | # find missing labels for left and populate with right 142 | missing = list(set(index).difference(left.index)) 143 | merged.loc[missing, right.columns] = right.loc[missing, right.columns] 144 | # make sure to zero out diagonal for right-copied rows 145 | merged.loc[missing, missing] = 0 146 | return merged 147 | 148 | 149 | def relative_abundance(abundance_representation: pd.DataFrame): 150 | return np.diagonal(abundance_representation) / abundance_representation.sum(axis=1) 151 | 152 | 153 | def centered_logratio(abundance_representation: pd.DataFrame): 154 | copy = abundance_representation.to_numpy().copy() 155 | diag = np.diagonal(copy) 156 | np.fill_diagonal(copy, np.where(diag > 0, diag, 1)) 157 | 158 | def _compute(row, i): 159 | gmean = _gmean(row + 1) 160 | ratio = np.log10((row[i] + 1) / gmean) 161 | return ratio 162 | 163 | return pd.Series( 164 | [_compute(row, i) for i, row in enumerate(copy)], 165 | index=abundance_representation.index, 166 | ) 167 | 168 | 169 | # from scipy.stats.mstats.gmean 170 | # 171 | # Copyright (c) 2001-2002 Enthought, Inc. 2003-2023, SciPy Developers. 172 | # All rights reserved. 173 | # 174 | # Redistribution and use in source and binary forms, with or without 175 | # modification, are permitted provided that the following conditions 176 | # are met: 177 | # 178 | # 1. Redistributions of source code must retain the above copyright 179 | # notice, this list of conditions and the following disclaimer. 180 | # 181 | # 2. Redistributions in binary form must reproduce the above 182 | # copyright notice, this list of conditions and the following 183 | # disclaimer in the documentation and/or other materials provided 184 | # with the distribution. 185 | # 186 | # 3. Neither the name of the copyright holder nor the names of its 187 | # contributors may be used to endorse or promote products derived 188 | # from this software without specific prior written permission. 189 | # 190 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 191 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 192 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 193 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 194 | # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 195 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 196 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 197 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 198 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 199 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 200 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 201 | def _gmean(a, axis=0, dtype=None, weights=None): 202 | r"""Compute the weighted geometric mean along the specified axis. 203 | 204 | The weighted geometric mean of the array :math:`a_i` associated to weights 205 | :math:`w_i` is: 206 | 207 | .. math:: 208 | 209 | \exp \left( \frac{ \sum_{i=1}^n w_i \ln a_i }{ \sum_{i=1}^n w_i } 210 | \right) \, , 211 | 212 | and, with equal weights, it gives: 213 | 214 | .. math:: 215 | 216 | \sqrt[n]{ \prod_{i=1}^n a_i } \, . 217 | 218 | Parameters 219 | ---------- 220 | a : array_like 221 | Input array or object that can be converted to an array. 222 | axis : int or None, optional 223 | Axis along which the geometric mean is computed. Default is 0. 224 | If None, compute over the whole array `a`. 225 | dtype : dtype, optional 226 | Type to which the input arrays are cast before the calculation is 227 | performed. 228 | weights : array_like, optional 229 | The `weights` array must be broadcastable to the same shape as `a`. 230 | Default is None, which gives each value a weight of 1.0. 231 | 232 | Returns 233 | ------- 234 | gmean : ndarray 235 | See `dtype` parameter above. 236 | 237 | See Also 238 | -------- 239 | numpy.mean : Arithmetic average 240 | numpy.average : Weighted average 241 | hmean : Harmonic mean 242 | 243 | References 244 | ---------- 245 | .. [1] "Weighted Geometric Mean", *Wikipedia*, 246 | https://en.wikipedia.org/wiki/Weighted_geometric_mean. 247 | 248 | Examples 249 | -------- 250 | >>> from scipy.stats import gmean 251 | >>> gmean([1, 4]) 252 | 2.0 253 | >>> gmean([1, 2, 3, 4, 5, 6, 7]) 254 | 3.3800151591412964 255 | >>> gmean([1, 4, 7], weights=[3, 1, 3]) 256 | 2.80668351922014 257 | 258 | """ 259 | 260 | a = np.asarray(a, dtype=dtype) 261 | 262 | if weights is not None: 263 | weights = np.asarray(weights, dtype=dtype) 264 | 265 | with np.errstate(divide="ignore"): 266 | log_a = np.log(a) 267 | 268 | return np.exp(np.average(log_a, axis=axis, weights=weights)) 269 | -------------------------------------------------------------------------------- /src/cev/widgets.py: -------------------------------------------------------------------------------- 1 | from cev._compare import compare as compare 2 | from cev._embedding import Embedding as Embedding 3 | from cev._embedding_comparison_widget import ( 4 | EmbeddingComparisonWidget as EmbeddingComparisonWidget, 5 | ) 6 | from cev._embedding_widget import EmbeddingWidgetCollection as EmbeddingWidgetCollection 7 | -------------------------------------------------------------------------------- /tests/test_cev.py: -------------------------------------------------------------------------------- 1 | def test_cev(): 2 | """Just to make sure it's imported somewhere""" 3 | # TODO: more tests... 4 | import cev # noqa 5 | -------------------------------------------------------------------------------- /tests/test_widget_utils.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | from cev._widget_utils import trim_label_series 4 | 5 | 6 | def test_trim_label_series(): 7 | labels = pd.Series( 8 | ["CD8+CD4-CD3+", "CD8+CD4+CD3+", "CD8-CD4+CD3-", "CD8-CD4-CD3+"], 9 | dtype="category", 10 | ) 11 | expected = pd.Series( 12 | ["CD8+CD3+", "CD8+CD3+", "CD8-CD3-", "CD8-CD3+"], dtype="category" 13 | ) 14 | trimmed = trim_label_series(labels, {"CD8", "CD3"}) 15 | assert trimmed.cat.categories.tolist() == expected.cat.categories.tolist() 16 | assert trimmed.tolist() == expected.tolist() 17 | --------------------------------------------------------------------------------