├── .gitattributes
├── .github
    └── workflows
    │   ├── build.yaml
    │   └── lint.yaml
├── .gitignore
├── .pre-commit-config.yaml
├── .readthedocs.yaml
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── assets
    ├── Figure.png
    ├── WSI_intro.png
    ├── logo.ai
    ├── logo.svg
    ├── logo.webp
    └── logo@3x.png
├── docs
    ├── clean_up.py
    └── source
    │   ├── _static
    │       ├── cemm-logo.svg
    │       ├── custom.css
    │       ├── logo.svg
    │       └── logo@3x.png
    │   ├── _templates
    │       ├── autosummary
    │       │   └── class.rst
    │       └── models.rst
    │   ├── api
    │       ├── .gitignore
    │       ├── cv.rst
    │       ├── index.rst
    │       ├── io.rst
    │       ├── models.rst
    │       ├── plotting.rst
    │       ├── preprocess.rst
    │       ├── segmentation.rst
    │       └── tools.rst
    │   ├── conf.py
    │   ├── contributing.rst
    │   ├── contributors.rst
    │   ├── index.rst
    │   ├── installation.rst
    │   └── tutorials
    │       ├── .gitignore
    │       ├── 00_intro_wsi.ipynb
    │       ├── 01_preprocessing.ipynb
    │       ├── 02_feature_extraction.ipynb
    │       ├── 03_multiple_slides.ipynb
    │       ├── 04_genomics_integration.ipynb
    │       ├── 05_cell-segmentation.ipynb
    │       ├── 05_training_models.ipynb
    │       ├── 06_visualization.ipynb
    │       ├── 07_zero-shot-learning.ipynb
    │       ├── index.rst
    │       └── matplotlibrc
├── pyproject.toml
├── src
    └── lazyslide
    │   ├── __init__.py
    │   ├── __main__.py
    │   ├── _const.py
    │   ├── _utils.py
    │   ├── cv
    │       ├── __init__.py
    │       ├── mask.py
    │       ├── scorer
    │       │   ├── __init__.py
    │       │   ├── base.py
    │       │   ├── focuslitenn
    │       │   │   ├── __init__.py
    │       │   │   ├── focuslitenn-2kernel-mse.pt
    │       │   │   └── model.py
    │       │   ├── module.py
    │       │   └── utils.py
    │       ├── tiles_merger.py
    │       └── transform
    │       │   ├── __init__.py
    │       │   ├── compose.py
    │       │   └── mods.py
    │   ├── datasets
    │       ├── __init__.py
    │       └── _sample.py
    │   ├── io
    │       ├── __init__.py
    │       └── _annotaiton.py
    │   ├── metrics.py
    │   ├── models
    │       ├── __init__.py
    │       ├── _model_registry.py
    │       ├── _utils.py
    │       ├── base.py
    │       ├── model_registry.csv
    │       ├── multimodal
    │       │   ├── __init__.py
    │       │   ├── conch.py
    │       │   ├── plip.py
    │       │   ├── prism.py
    │       │   └── titan.py
    │       ├── segmentation
    │       │   ├── __init__.py
    │       │   ├── cellpose.py
    │       │   ├── grandqc.py
    │       │   ├── instanseg.py
    │       │   ├── nulite
    │       │   │   ├── __init__.py
    │       │   │   ├── api.py
    │       │   │   └── model.py
    │       │   ├── postprocess.py
    │       │   ├── sam.py
    │       │   └── smp.py
    │       └── vision
    │       │   ├── __init__.py
    │       │   ├── base.py
    │       │   ├── conch.py
    │       │   ├── gigapath.py
    │       │   ├── h_optimus.py
    │       │   ├── hibou.py
    │       │   ├── midnight.py
    │       │   ├── phikon.py
    │       │   ├── plip.py
    │       │   ├── uni.py
    │       │   └── virchow.py
    │   ├── plotting
    │       ├── __init__.py
    │       ├── _api.py
    │       └── _wsi_viewer.py
    │   ├── preprocess
    │       ├── __init__.py
    │       ├── _graph.py
    │       ├── _tiles.py
    │       ├── _tissue.py
    │       └── _utils.py
    │   ├── py.typed
    │   ├── segmentation
    │       ├── __init__.py
    │       ├── _artifact.py
    │       ├── _cell.py
    │       ├── _seg_runner.py
    │       ├── _tissue.py
    │       └── _zero_shot.py
    │   └── tools
    │       ├── __init__.py
    │       ├── _domain.py
    │       ├── _features.py
    │       ├── _signatures.py
    │       ├── _spatial_features.py
    │       ├── _text_annotate.py
    │       ├── _tissue_props.py
    │       └── _zero_shot.py
├── tests
    ├── conftest.py
    ├── data
    │   └── CMU-1-Small-Region.svs
    ├── test_cv.py
    ├── test_datasets.py
    ├── test_pp.py
    └── test_tl.py
├── uv.lock
└── workflow
    ├── main.nf
    └── modules
        └── qc
            └── main.nf


/.gitattributes:
--------------------------------------------------------------------------------
1 | *.ipynb linguist-detectable=false


--------------------------------------------------------------------------------
/.github/workflows/build.yaml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | on:
 4 |   push:
 5 |     paths:
 6 |       - '.github/**'
 7 |       - 'src/lazyslide/**'
 8 |       - 'tests/**'
 9 |       - 'pyproject.toml'
10 |   pull_request:
11 |     paths:
12 |       - '.github/**'
13 |       - 'src/lazyslide/**'
14 |       - 'tests/**'
15 |       - 'pyproject.toml'
16 | 
17 | jobs:
18 |   Test:
19 |     strategy:
20 |       fail-fast: false
21 |       matrix:
22 |         os: [ubuntu-latest, windows-latest, macos-latest]
23 |         python-version: ['3.11', '3.12', '3.13']
24 | 
25 |     runs-on: ${{ matrix.os }}
26 |     steps:
27 |       - uses: actions/checkout@v4
28 |       - name: Set up uv
29 |         uses: astral-sh/setup-uv@v5
30 |         with:
31 |           python-version: ${{ matrix.python-version }}
32 |           enable-cache: true
33 |           cache-dependency-glob: "uv.lock"
34 |       - name: Install project
35 |         run: uv sync --dev
36 |       - name: Tests
37 |         run: |
38 |           uv run task test-ci
39 | 
40 |   Upload_to_pypi:
41 |     runs-on: ubuntu-latest
42 |     permissions:
43 |       id-token: write
44 |     steps:
45 |       - uses: actions/checkout@v4
46 |       - name: Setup uv
47 |         uses: astral-sh/setup-uv@v5
48 |         with:
49 |           python-version: '3.12'
50 |           enable-cache: true
51 |           cache-dependency-glob: "uv.lock"
52 | 
53 |       - name: Publish to test pypi
54 |         run: |
55 |           uv build
56 |           uv publish --publish-url https://test.pypi.org/legacy/ || exit 0
57 | 
58 |       - name: Publish to pypi
59 |         if: github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/v')
60 |         run: |
61 |           uv build
62 |           uv publish


--------------------------------------------------------------------------------
/.github/workflows/lint.yaml:
--------------------------------------------------------------------------------
 1 | name: Lint with Ruff
 2 | on: [push, pull_request]
 3 | jobs:
 4 |   ruff:
 5 |     runs-on: ubuntu-latest
 6 |     steps:
 7 |       - uses: actions/checkout@v4
 8 |       - uses: chartboost/ruff-action@v1
 9 |         with:
10 |           src: "src/lazyslide"


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/build/
 73 | docs/.jupyter_cache/
 74 | docs/jupyter_execute
 75 | 
 76 | 
 77 | # PyBuilder
 78 | .pybuilder/
 79 | target/
 80 | 
 81 | # Jupyter Notebook
 82 | .ipynb_checkpoints
 83 | 
 84 | # IPython
 85 | profile_default/
 86 | ipython_config.py
 87 | 
 88 | # pyenv
 89 | #   For a library or package, you might want to ignore these files since the code is
 90 | #   intended to run in multiple environments; otherwise, check them in:
 91 | # .python-version
 92 | 
 93 | # pipenv
 94 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 95 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 96 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 97 | #   install all needed dependencies.
 98 | #Pipfile.lock
 99 | 
100 | # poetry
101 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
102 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
103 | #   commonly ignored for libraries.
104 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
105 | #poetry.lock
106 | 
107 | # pdm
108 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
109 | #pdm.lock
110 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
111 | #   in version control.
112 | #   https://pdm.fming.dev/#use-with-ide
113 | .pdm.toml
114 | 
115 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
116 | __pypackages__/
117 | 
118 | # Celery stuff
119 | celerybeat-schedule
120 | celerybeat.pid
121 | 
122 | # SageMath parsed files
123 | *.sage.py
124 | 
125 | # Environments
126 | .env
127 | .venv
128 | env/
129 | venv/
130 | ENV/
131 | env.bak/
132 | venv.bak/
133 | 
134 | # Spyder project settings
135 | .spyderproject
136 | .spyproject
137 | 
138 | # Rope project settings
139 | .ropeproject
140 | 
141 | # mkdocs documentation
142 | /site
143 | 
144 | # mypy
145 | .mypy_cache/
146 | .dmypy.json
147 | dmypy.json
148 | 
149 | # Pyre type checker
150 | .pyre/
151 | 
152 | # pytype static type analyzer
153 | .pytype/
154 | 
155 | # Cython debug symbols
156 | cython_debug/
157 | 
158 | # Ruff cache
159 | .ruff_cache/
160 | 
161 | # PyCharm
162 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
163 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
164 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
165 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
166 | .idea/
167 | .DS_Store
168 | 
169 | work/
170 | .nextflow.log*
171 | data/
172 | checkpoints/
173 | annotations/
174 | # JetBrains AI Agent
175 | .junie/
176 | pretrained_models/
177 | figures/
178 | sample_data/


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 | - repo: https://github.com/astral-sh/uv-pre-commit
 3 |   # uv version.
 4 |   rev: 0.5.26
 5 |   hooks:
 6 |     - id: uv-lock
 7 | - repo: https://github.com/astral-sh/ruff-pre-commit
 8 |   # Ruff version.
 9 |   rev: v0.6.5
10 |   hooks:
11 |     # Run the linter.
12 |     - id: ruff
13 |       args: [ --fix ]
14 |     # Run the formatter.
15 |     - id: ruff-format
16 | 


--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
 1 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 2 | version: 2
 3 | 
 4 | build:
 5 |   os: ubuntu-22.04
 6 |   tools:
 7 |     python: "3.12"
 8 |   jobs:
 9 |     post_install:
10 |       - pip install uv
11 |       - UV_PROJECT_ENVIRONMENT=$READTHEDOCS_VIRTUALENV_PATH uv sync --all-extras --group docs --link-mode=copy
12 | 
13 | sphinx:
14 |   configuration: docs/source/conf.py
15 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing
 2 | 
 3 | We welcome contributions to this project.
 4 | 
 5 | 
 6 | ## For core contributors
 7 | 
 8 | Please do not commit directly to the `main` branch. 
 9 | Instead, create a new branch for your changes and submit a pull request.
10 | 
11 | ### How to set up your development environment
12 | 
13 | 1. Clone the repository
14 | 
15 |     ```bash
16 |     git clone https://github.com/rendeirolab/LazySlide.git
17 |     # or
18 |     gh repo clone rendeirolab/LazySlide
19 |     ```
20 |    
21 | 2. Checkout a new branch
22 | 
23 |     ```bash
24 |     git checkout -b my-new-branch
25 |     ```
26 | 
27 | 3. We use [uv](https://docs.astral.sh/uv/) to manage our development environment.
28 | 
29 |     ```bash
30 |     uv lock
31 |     uv run pre-commit install
32 |     ```
33 |    
34 |     We use [pre-commit](https://pre-commit.com/) to run code formatting and linting checks before each commit.
35 | 
36 | 4. Start a IPython/Jupyter session
37 | 
38 |     ```bash
39 |    uv run --with ipython ipython
40 |    # or
41 |    uv run --with jupyter jupyter lab
42 |    ```
43 | 
44 | 5. Make your changes
45 | 
46 | 6. (If needed) Add a test case and then run the tests
47 | 
48 |     ```bash
49 |     uv run task test
50 |     ```
51 | 
52 | 7. (If needed) Update the documentation
53 | 
54 |    To build the documentation, use:
55 |    
56 |    ```bash
57 |    # Build doc with cache
58 |    uv run task doc-build
59 |    # Fresh build
60 |    uv run task doc-clean-build
61 |    ```
62 |    
63 |    To serve the documentation, use:
64 |    
65 |    ```bash
66 |    uv run task doc-serve
67 |    ```
68 |    
69 |    This will start a local server at [http://localhost:8000](http://localhost:8000).
70 | 
71 | 8. Commit your changes and push them to your fork
72 | 
73 | 9. Submit a pull request
74 | 
75 | 
76 | ## How to report bugs
77 | 
78 | 
79 | ## How to suggest enhancements
80 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2025 Rendeiro Lab
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # LazySlide
 2 | 
 3 | <p align="center">
 4 |     <picture align="center">
 5 |     <img src="https://raw.githubusercontent.com/rendeirolab/lazyslide/main/assets/logo@3x.png" width="150px">
 6 |     </picture>
 7 | </p>
 8 | <p align="center">
 9 |   <i>Accessible and interoperable whole slide image analysis</i>
10 | </p>
11 | 
12 | [![Documentation Status](https://readthedocs.org/projects/lazyslide/badge/?version=stable&style=flat-square)](https://lazyslide.readthedocs.io/en/stable)
13 | ![pypi version](https://img.shields.io/pypi/v/lazyslide?color=0098FF&logo=python&logoColor=white&style=flat-square)
14 | ![PyPI - License](https://img.shields.io/pypi/l/lazyslide?color=FFD43B&style=flat-square)
15 | ![scverse ecosystem](https://img.shields.io/badge/scverse_ecosystem-gray.svg?style=flat-square&logo=data:image/svg+xml;base64,PD94bWwgdmVyc2lvbj0iMS4wIiBlbmNvZGluZz0iVVRGLTgiIHN0YW5kYWxvbmU9Im5vIj8+PCFET0NUWVBFIHN2ZyBQVUJMSUMgIi0vL1czQy8vRFREIFNWRyAxLjEvL0VOIiAiaHR0cDovL3d3dy53My5vcmcvR3JhcGhpY3MvU1ZHLzEuMS9EVEQvc3ZnMTEuZHRkIj4KPHN2ZyB3aWR0aD0iMTAwJSIgaGVpZ2h0PSIxMDAlIiB2aWV3Qm94PSIwIDAgOTEgOTEiIHZlcnNpb249IjEuMSIKICAgIHhtbG5zPSJodHRwOi8vd3d3LnczLm9yZy8yMDAwL3N2ZyIKICAgIHhtbG5zOnhsaW5rPSJodHRwOi8vd3d3LnczLm9yZy8xOTk5L3hsaW5rIiB4bWw6c3BhY2U9InByZXNlcnZlIgogICAgeG1sbnM6c2VyaWY9Imh0dHA6Ly93d3cuc2VyaWYuY29tLyIgc3R5bGU9ImZpbGwtcnVsZTpldmVub2RkO2NsaXAtcnVsZTpldmVub2RkO3N0cm9rZS1saW5lam9pbjpyb3VuZDtzdHJva2UtbWl0ZXJsaW1pdDoyOyI+CiAgICA8ZyBpZD0iRWJlbmVfMyI+CiAgICAgICAgPGc+CiAgICAgICAgICAgIDxwYXRoIGQ9Ik0zNSw4OS42Yy0yMi4zLC0zLjQgLTMwLjYsLTE5LjggLTMwLjYsLTE5LjhjMTAuOCwxNi45IDQzLDkuMSA1Mi45LDIuNWMxMi40LC04LjMgOCwtMTUuMyA2LjgsLTE4LjFjNS40LDcuMiA1LjMsMjMuNSAtMS4xLDI5LjRjLTUuNiw1LjEgLTE1LjMsNy45IC0yOCw2WiIgc3R5bGU9ImZpbGw6I2ZmZjtmaWxsLXJ1bGU6bm9uemVybztzdHJva2U6IzAwMDtzdHJva2Utd2lkdGg6MXB4OyIvPgogICAgICAgICAgICA8cGF0aCBkPSJNODMuOSw0My41YzIuOSwtNy4xIDAuOCwtMTIuNSAwLjUsLTEzLjNjLTAuNywtMS4zIC0xLjUsLTIuMyAtMi40LC0zLjFjLTE2LjEsLTEyLjYgLTU1LjksMSAtNzAuOSwxNi44Yy0xMC45LDExLjUgLTEwLjEsMjAgLTYuNywyNS44YzMuMSw0LjggNy45LDcuNiAxMy40LDljLTExLjUsLTEyLjQgOS44LC0zMS4xIDI5LC0zOGMyMSwtNy41IDMyLjUsLTMgMzcuMSwyLjhaIiBzdHlsZT0iZmlsbDojMzQzNDM0O2ZpbGwtcnVsZTpub256ZXJvO3N0cm9rZTojMDAwO3N0cm9rZS13aWR0aDoxcHg7Ii8+CiAgICAgICAgICAgIDxwYXRoIGQ9Ik03OS42LDUwLjRjOSwtMTAuNSA1LC0xOS43IDQuOCwtMjAuNGMtMCwwIDQuNCw3LjEgMi4yLDIyLjZjLTEuMiw4LjUgLTUuNCwxNiAtMTAuMSwxMS44Yy0yLjEsLTEuOCAtMywtNi45IDMuMSwtMTRaIiBzdHlsZT0iZmlsbDojZmZmO2ZpbGwtcnVsZTpub256ZXJvO3N0cm9rZTojMDAwO3N0cm9rZS13aWR0aDoxcHg7Ii8+CiAgICAgICAgICAgIDxwYXRoIGQ9Ik02NCw1NC4yYy0zLjMsLTQuOCAtOC4xLC03LjQgLTEyLjMsLTEwLjhjLTIuMiwtMS43IC0xNi40LC0xMS4yIC0xOS4yLC0xNS4xYy02LjQsLTYuNCAtOS41LC0xNi45IC0zLjQsLTIzLjFjLTQuNCwtMC44IC04LjIsMC4yIC0xMC42LDEuNWMtMS4xLDAuNiAtMi4xLDEuMiAtMi44LDJjLTYuNyw2LjIgLTUuOCwxNyAtMS42LDI0LjNjNC41LDcuOCAxMy4yLDE1LjQgMjQuMywyMi44YzUuMSwzLjQgMTUuNiw4LjQgMTkuMywxNmMxMS43LC04LjEgNy42LC0xNC45IDYuMywtMTcuNloiIHN0eWxlPSJmaWxsOiNiNGI0YjQ7ZmlsbC1ydWxlOm5vbnplcm87c3Ryb2tlOiMwMDA7c3Ryb2tlLXdpZHRoOjFweDsiLz4KICAgICAgICAgICAgPHBhdGggZD0iTTM4LjcsOS44YzcuOSw2LjMgMTIuNCw5LjggMjAsOC41YzUuNywtMSA0LjksLTcuOSAtNCwtMTMuNmMtNC40LC0yLjggLTkuNCwtNC4yIC0xNS43LC00LjJjLTcuNSwtMCAtMTYuMywzLjkgLTIwLjYsNi40YzQsLTIuMyAxMS45LC0zLjggMjAuMywyLjlaIiBzdHlsZT0iZmlsbDojZmZmO2ZpbGwtcnVsZTpub256ZXJvO3N0cm9rZTojMDAwO3N0cm9rZS13aWR0aDoxcHg7Ii8+CiAgICAgICAgPC9nPgogICAgPC9nPgo8L3N2Zz4=)
16 | 
17 | [Installation](https://lazyslide.readthedocs.io/en/stable/installation.html) | 
18 | [Tutorials](https://lazyslide.readthedocs.io/en/stable/tutorials/index.html)
19 | 
20 | LazySlide is a Python framework for whole slide image (WSI) analysis, designed to integrate seamlessly with the scverse
21 | ecosystem.
22 | 
23 | By adopting standardized data structures and APIs familiar to the single-cell and genomics community, LazySlide enables
24 | intuitive, interoperable, and reproducible workflows for histological analysis. It supports a range of tasks from basic
25 | preprocessing to advanced deep learning applications, facilitating the integration of histopathology into modern
26 | computational biology.
27 | 
28 | ## Key features
29 | 
30 | - **Interoperability**: Built on top of SpatialData, ensuring compatibility with scverse tools like scanpy, anndata, and
31 |   squidpy.
32 | - **Accessibility**: User-friendly APIs that cater to both beginners and experts in digital pathology.
33 | - **Scalability**: Efficient handling of large WSIs, enabling high-throughput analyses.
34 | - **Multimodal integration**: Combine histological data with transcriptomics, genomics, and textual annotations.
35 | - **Foundation model support**: Native integration with state-of-the-art models (e.g., UNI, CONCH, Gigapath, Virchow)
36 |   for tasks like zero-shot classification and captioning.
37 | - **Deep learning ready**: Provides PyTorch dataloaders for seamless integration into machine learning pipelines.​
38 | 
39 | ![figure](assets/Figure.png)
40 | 
41 | ## Documentation
42 | 
43 | Comprehensive documentation is available at [https://lazyslide.readthedocs.io](https://lazyslide.readthedocs.io). It
44 | includes tutorials, API references, and guides to help you get started.​
45 | 
46 | ## Installation
47 | 
48 | Lazyslide is available at the [PyPI index](https://pypi.org/project/lazyslide). This means that you can get it with your
49 | favourite package manager:
50 | 
51 | - `pip install lazyslide` or
52 | - `uv add lazyslide`
53 | 
54 | For full instructions, please refer to
55 | the [Installation page in the documentation](https://lazyslide.readthedocs.io/en/stable/installation.html).
56 | 
57 | ## Quick start
58 | 
59 | With a few lines of code, you can quickly run process a whole slide image (tissue segmentation, tesselation, feature
60 | extraction):
61 | 
62 | ```python
63 | import lazyslide as zs
64 | 
65 | wsi = zs.datasets.sample()
66 | 
67 | # Pipeline
68 | zs.pp.find_tissues(wsi)
69 | zs.pp.tile_tissues(wsi, tile_px=256, mpp=0.5)
70 | zs.tl.feature_extraction(wsi, model='resnet50')
71 | 
72 | # Access the features
73 | features = wsi['resnet50_tiles']
74 | ```
75 | 
76 | ## Contributing
77 | 
78 | We welcome contributions from the community. Please refer to our [contributing guide](CONTRIBUTING.md) for guidelines on
79 | how to contribute.
80 | 
81 | ## Licence
82 | 
83 | LazySlide is released under the [MIT License](LICENCE).
84 | 


--------------------------------------------------------------------------------
/assets/Figure.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rendeirolab/LazySlide/f39634cc994b3098b0933075b9d25ecd99b9014e/assets/Figure.png


--------------------------------------------------------------------------------
/assets/WSI_intro.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rendeirolab/LazySlide/f39634cc994b3098b0933075b9d25ecd99b9014e/assets/WSI_intro.png


--------------------------------------------------------------------------------
/assets/logo.ai:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rendeirolab/LazySlide/f39634cc994b3098b0933075b9d25ecd99b9014e/assets/logo.ai


--------------------------------------------------------------------------------
/assets/logo.webp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rendeirolab/LazySlide/f39634cc994b3098b0933075b9d25ecd99b9014e/assets/logo.webp


--------------------------------------------------------------------------------
/assets/logo@3x.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rendeirolab/LazySlide/f39634cc994b3098b0933075b9d25ecd99b9014e/assets/logo@3x.png


--------------------------------------------------------------------------------
/docs/clean_up.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | import shutil
 3 | 
 4 | root = Path(__file__).parent  # ./docs
 5 | 
 6 | target_folders = [
 7 |     root / "build",
 8 |     root / "source" / "api" / "_autogen",
 9 |     root / "jupyter_execute",
10 | ]
11 | 
12 | 
13 | if __name__ == "__main__":
14 |     for folder in target_folders:
15 |         if folder.exists():
16 |             shutil.rmtree(folder)
17 | 


--------------------------------------------------------------------------------
/docs/source/_static/custom.css:
--------------------------------------------------------------------------------
 1 | html[data-theme="light"] {
 2 |     --pst-color-primary: #C68FE6;
 3 |     --pst-color-secondary: #FFCD05;
 4 |     --pst-color-link: #C68FE6;
 5 |     --pst-color-inline-code: rgb(96, 141, 130);
 6 | }
 7 | 
 8 | html[data-theme="dark"] {
 9 |     --pst-color-primary: #C68FE6;
10 |     --pst-color-secondary: #FFCD05;
11 | }
12 | 
13 | /* Change the highlight color, increase contrast*/
14 | html[data-theme="light"] .highlight .hll {
15 |     background-color: #fcf427;
16 | }
17 | 
18 | .cell_output img {
19 |     height: auto !important;
20 | }
21 | 
22 | .navbar-brand .logo__image {
23 |     height: 150px;
24 | }


--------------------------------------------------------------------------------
/docs/source/_static/logo@3x.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rendeirolab/LazySlide/f39634cc994b3098b0933075b9d25ecd99b9014e/docs/source/_static/logo@3x.png


--------------------------------------------------------------------------------
/docs/source/_templates/autosummary/class.rst:
--------------------------------------------------------------------------------
1 | {{ fullname | escape | underline}}
2 | 
3 | .. currentmodule:: {{ module }}
4 | 
5 | .. autoclass:: {{ objname }}
6 |     :special-members: __call__


--------------------------------------------------------------------------------
/docs/source/_templates/models.rst:
--------------------------------------------------------------------------------
 1 | {{ fullname | escape | underline }}
 2 | 
 3 | 
 4 | .. currentmodule:: {{ module }}
 5 | 
 6 | 
 7 | {% if objtype in ['class'] %}
 8 | .. auto{{ objtype }}:: {{ objname }}
 9 |     :show-inheritance:
10 |     :special-members: __call__
11 | 
12 | {% else %}
13 | .. auto{{ objtype }}:: {{ objname }}
14 | 
15 | {% endif %}


--------------------------------------------------------------------------------
/docs/source/api/.gitignore:
--------------------------------------------------------------------------------
1 | _autogen/


--------------------------------------------------------------------------------
/docs/source/api/cv.rst:
--------------------------------------------------------------------------------
 1 | Computer vision utilities
 2 | -------------------------
 3 | 
 4 | Scorers
 5 | ~~~~~~~
 6 | 
 7 | .. currentmodule:: lazyslide.cv.scorer
 8 | 
 9 | .. autosummary::
10 |     :toctree: _autogen
11 |     :nosignatures:
12 | 
13 |     FocusLite
14 |     Contrast
15 |     SplitRGB
16 |     Redness
17 |     Brightness
18 |     ScorerBase
19 | 
20 | 
21 | Mask
22 | ~~~~
23 | 
24 | .. currentmodule:: lazyslide.cv
25 | 
26 | .. autosummary::
27 |     :toctree: _autogen
28 |     :nosignatures:
29 | 
30 |     Mask
31 |     BinaryMask
32 |     MultiLabelMask
33 |     MultiClassMask
34 | 
35 | 
36 | Polygon merging
37 | ~~~~~~~~~~~~~~~
38 | 
39 | .. currentmodule:: lazyslide.cv
40 | 
41 | .. autosummary::
42 |     :toctree: _autogen
43 |     :nosignatures:
44 | 
45 |     merge_polygons
46 | 


--------------------------------------------------------------------------------
/docs/source/api/index.rst:
--------------------------------------------------------------------------------
 1 | API Reference
 2 | =============
 3 | 
 4 | .. toctree::
 5 |     :maxdepth: 1
 6 |     :hidden:
 7 | 
 8 |     preprocess
 9 |     tools
10 |     plotting
11 |     segmentation
12 |     io
13 |     models
14 |     cv
15 | 
16 | 
17 | .. grid:: 1 2 2 2
18 |    :gutter: 2
19 | 
20 |    .. grid-item-card:: Preprocessing
21 |       :link: preprocess
22 |       :link-type: doc
23 | 
24 |       Preprocessing functions for WSI
25 | 
26 |    .. grid-item-card:: Tools
27 |       :link: tools
28 |       :link-type: doc
29 | 
30 |       Tools for WSI analysis
31 | 
32 |    .. grid-item-card:: Plotting
33 |       :link: plotting
34 |       :link-type: doc
35 | 
36 |       Plotting functions for WSI
37 | 
38 |    .. grid-item-card:: Segmentation
39 |       :link: segmentation
40 |       :link-type: doc
41 | 
42 |       Segmentation tasks on WSI
43 | 
44 |    .. grid-item-card:: Models
45 |       :link: models
46 |       :link-type: doc
47 | 
48 |       Models for WSI analysis
49 | 
50 |    .. grid-item-card:: Computer Vision
51 |       :link: cv
52 |       :link-type: doc
53 | 
54 |       Computer Vision utilities for WSI
55 | 
56 |    .. grid-item-card:: IO
57 |       :link: io
58 |       :link-type: doc
59 | 
60 |       IO for annotations
61 | 


--------------------------------------------------------------------------------
/docs/source/api/io.rst:
--------------------------------------------------------------------------------
 1 | IO :code:`io`
 2 | -------------
 3 | 
 4 | .. currentmodule:: lazyslide
 5 | 
 6 | .. autosummary::
 7 |     :toctree: _autogen
 8 |     :nosignatures:
 9 | 
10 |     io.load_annotations
11 |     io.export_annotations


--------------------------------------------------------------------------------
/docs/source/api/models.rst:
--------------------------------------------------------------------------------
 1 | Models
 2 | ------
 3 | 
 4 | .. currentmodule:: lazyslide.models
 5 | 
 6 | .. autosummary::
 7 |     :toctree: _autogen
 8 |     :nosignatures:
 9 | 
10 |     list_models
11 | 
12 | 
13 | Vision Models
14 | ~~~~~~~~~~~~~
15 | 
16 | .. currentmodule:: lazyslide.models.vision
17 | 
18 | .. autosummary::
19 |     :toctree: _autogen
20 |     :nosignatures:
21 | 
22 |     UNI
23 |     UNI2
24 |     GigaPath
25 |     PLIPVision
26 |     CONCHVision
27 |     Virchow
28 |     Virchow2
29 |     Phikon
30 |     PhikonV2
31 |     HOptimus0
32 |     HOptimus1
33 |     H0Mini
34 | 
35 | 
36 | Image-Text Models
37 | ~~~~~~~~~~~~~~~~~~~
38 | 
39 | .. currentmodule:: lazyslide.models.multimodal
40 | 
41 | .. autosummary::
42 |     :toctree: _autogen
43 |     :nosignatures:
44 | 
45 |     PLIP
46 |     CONCH
47 |     Titan
48 |     Prism
49 | 
50 | 
51 | Segmentation Models
52 | ~~~~~~~~~~~~~~~~~~~
53 | 
54 | .. currentmodule:: lazyslide.models.segmentation
55 | 
56 | .. autosummary::
57 |     :toctree: _autogen
58 |     :nosignatures:
59 | 
60 |     Instanseg
61 |     NuLite
62 |     GrandQCTissue
63 |     GrandQCArtifact
64 |     SMPBase
65 | 
66 | Base Models
67 | ~~~~~~~~~~~
68 | 
69 | .. currentmodule:: lazyslide.models.base
70 | 
71 | .. autosummary::
72 |     :toctree: _autogen
73 |     :nosignatures:
74 | 
75 |     ModelBase
76 |     ImageModel
77 |     ImageTextModel
78 |     SegmentationModel
79 |     SlideEncoderModel
80 |     TimmModel
81 | 


--------------------------------------------------------------------------------
/docs/source/api/plotting.rst:
--------------------------------------------------------------------------------
 1 | Plotting: :code:`pl`
 2 | --------------------
 3 | 
 4 | .. currentmodule:: lazyslide
 5 | 
 6 | .. autosummary::
 7 |     :toctree: _autogen
 8 |     :nosignatures:
 9 | 
10 |     pl.tissue
11 |     pl.tiles
12 |     pl.annotations
13 |     pl.WSIViewer
14 | 


--------------------------------------------------------------------------------
/docs/source/api/preprocess.rst:
--------------------------------------------------------------------------------
 1 | Preprocessing: :code:`pp`
 2 | -------------------------
 3 | 
 4 | .. currentmodule:: lazyslide
 5 | 
 6 | .. autosummary::
 7 |     :toctree: _autogen
 8 |     :nosignatures:
 9 | 
10 |     pp.find_tissues
11 |     pp.score_tissues
12 |     pp.tile_tissues
13 |     pp.score_tiles
14 |     pp.tile_graph
15 | 


--------------------------------------------------------------------------------
/docs/source/api/segmentation.rst:
--------------------------------------------------------------------------------
 1 | Segmentation :code:`seg`
 2 | -------------------------
 3 | 
 4 | .. currentmodule:: lazyslide
 5 | 
 6 | .. autosummary::
 7 |     :toctree: _autogen
 8 |     :nosignatures:
 9 | 
10 |     seg.cells
11 |     seg.nulite
12 |     seg.semantic
13 |     seg.tissue
14 |     seg.artifact
15 | 


--------------------------------------------------------------------------------
/docs/source/api/tools.rst:
--------------------------------------------------------------------------------
 1 | Tools: :code:`tl`
 2 | -----------------
 3 | 
 4 | 
 5 | Image Embedding
 6 | ~~~~~~~~~~~~~~~
 7 | 
 8 | .. currentmodule:: lazyslide
 9 | 
10 | .. autosummary::
11 |     :toctree: _autogen
12 |     :nosignatures:
13 | 
14 |     tl.feature_extraction
15 |     tl.feature_aggregation
16 |     tl.spatial_features
17 |     tl.feature_utag
18 | 
19 | Tissue Geometry
20 | ~~~~~~~~~~~~~~~
21 | 
22 | .. currentmodule:: lazyslide
23 | 
24 | .. autosummary::
25 |     :toctree: _autogen
26 |     :nosignatures:
27 | 
28 |     tl.tissue_props
29 | 
30 | 
31 | Tissue Spatial Domain
32 | ~~~~~~~~~~~~~~~~~~~~~
33 | 
34 | .. currentmodule:: lazyslide
35 | 
36 | .. autosummary::
37 |     :toctree: _autogen
38 |     :nosignatures:
39 | 
40 |     tl.spatial_domain
41 |     tl.tile_shaper
42 | 
43 | 
44 | Multi-Modal Analysis
45 | ~~~~~~~~~~~~~~~~~~~~
46 | 
47 | .. currentmodule:: lazyslide
48 | 
49 | .. autosummary::
50 |     :toctree: _autogen
51 |     :nosignatures:
52 | 
53 |     tl.text_embedding
54 |     tl.text_image_similarity
55 | 
56 | 
57 | Zero-shot Learning
58 | ~~~~~~~~~~~~~~~~~~
59 | 
60 | .. currentmodule:: lazyslide
61 | 
62 | .. autosummary::
63 |     :toctree: _autogen
64 |     :nosignatures:
65 | 
66 |     tl.zero_shot_score
67 |     tl.slide_caption
68 | 


--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime
 2 | 
 3 | import lazyslide
 4 | 
 5 | project = "LazySlide"
 6 | copyright = f"{datetime.now().year}, Rendeiro Lab"
 7 | author = "LazySlide Contributors"
 8 | release = lazyslide.__version__
 9 | 
10 | # -- General configuration ---------------------------------------------------
11 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
12 | 
13 | extensions = [
14 |     "numpydoc",
15 |     "sphinx.ext.autodoc",
16 |     "sphinx.ext.autosummary",
17 |     "sphinx.ext.autosectionlabel",
18 |     "matplotlib.sphinxext.plot_directive",
19 |     "sphinx.ext.intersphinx",
20 |     "sphinx_design",
21 |     "sphinx_copybutton",
22 |     "myst_nb",
23 |     "sphinx_contributors",
24 | ]
25 | autoclass_content = "class"
26 | autodoc_docstring_signature = True
27 | autodoc_default_options = {
28 |     "members": True,
29 |     "show-inheritance": True,
30 |     "no-undoc-members": True,
31 |     "special-members": "__call__",
32 |     "exclude-members": "__init__, __weakref__",
33 |     "class-doc-from": "class",
34 | }
35 | autodoc_typehints = "none"
36 | # setting autosummary
37 | autosummary_generate = True
38 | numpydoc_show_class_members = False
39 | add_module_names = False
40 | 
41 | templates_path = ["_templates"]
42 | exclude_patterns = []
43 | 
44 | 
45 | html_theme = "sphinx_book_theme"
46 | html_static_path = ["_static"]
47 | html_logo = "_static/logo@3x.png"
48 | html_css_files = ["custom.css"]
49 | html_theme_options = {
50 |     "repository_url": "https://github.com/rendeirolab/LazySlide",
51 |     "navigation_with_keys": True,
52 |     "show_prev_next": False,
53 | }
54 | # html_sidebars = {"installation": [], "cli": []}
55 | 
56 | nb_output_stderr = "remove"
57 | nb_execution_mode = "off"
58 | nb_merge_streams = True
59 | myst_enable_extensions = [
60 |     "colon_fence",
61 |     "html_image",
62 | ]
63 | 
64 | copybutton_prompt_text = r">>> |\.\.\. |\$ |In \[\d*\]: | {2,5}\.\.\.: | {5," r"8}: "
65 | copybutton_prompt_is_regexp = True
66 | 
67 | # Plot directive
68 | plot_include_source = True
69 | plot_html_show_source_link = False
70 | plot_html_show_formats = False
71 | plot_formats = [("png", 200)]
72 | 
73 | intersphinx_mapping = {
74 |     "wsidata": ("https://wsidata.readthedocs.io/en/latest", None),
75 |     "torch": ("https://pytorch.org/docs/stable/", None),
76 | }
77 | 


--------------------------------------------------------------------------------
/docs/source/contributing.rst:
--------------------------------------------------------------------------------
  1 | Contributing
  2 | ============
  3 | 
  4 | We welcome contributions to the LazySlide project. This document provides guidelines for contributing to the project.
  5 | 
  6 | Project overview
  7 | ----------------
  8 | 
  9 | LazySlide is a modularized and scalable whole slide image analysis toolkit. The project is structured as follows:
 10 | 
 11 | - ``src/lazyslide``: Main package code
 12 | - ``tests``: Test files
 13 | - ``docs``: Documentation
 14 | 
 15 | 
 16 | For core contributors
 17 | ---------------------
 18 | 
 19 | Please do not commit directly to the ``main`` branch.
 20 | Instead, create a new branch for your changes and submit a pull request.
 21 | 
 22 | Set up development environment
 23 | ------------------------------
 24 | 
 25 | We use `uv <https://docs.astral.sh/uv/>`_ to manage our development environment.
 26 | Please make sure you have it installed before proceeding.
 27 | 
 28 | 1. Clone the repository::
 29 | 
 30 |     git clone https://github.com/rendeirolab/lazyslide.git
 31 |     # or
 32 |     gh repo clone rendeirolab/lazyslide
 33 | 
 34 | 2. Checkout a new branch::
 35 | 
 36 |     git checkout -b my-new-branch
 37 | 
 38 | 3. We use `uv <https://docs.astral.sh/uv/>`_ to manage our development environment::
 39 | 
 40 |     uv lock
 41 |     uv run pre-commit install
 42 | 
 43 |    We use `pre-commit <https://pre-commit.com/>`_ to run code formatting and linting checks before each commit.
 44 | 
 45 | 4. Start an IPython/Jupyter session::
 46 | 
 47 |    uv run --with ipython ipython
 48 |    # or
 49 |    uv run --with jupyter jupyter lab
 50 | 
 51 | 5. Make your changes.
 52 | 
 53 | Testing
 54 | -------
 55 | 
 56 | LazySlide uses pytest for testing. Tests are located in the ``tests`` directory.
 57 | 
 58 | To run all tests::
 59 | 
 60 |     uv run task test
 61 | 
 62 | To run a specific test file::
 63 | 
 64 |     uv run python -m pytest tests/test_example.py
 65 | 
 66 | When adding new tests:
 67 | 
 68 | 1. Create a new file in the ``tests`` directory with a name starting with ``test_``.
 69 | 2. Import pytest and the module you want to test.
 70 | 3. Write test functions with names starting with ``test_``.
 71 | 4. Use assertions to verify expected behavior.
 72 | 
 73 | Code style and development guidelines
 74 | -------------------------------------
 75 | 
 76 | LazySlide uses `ruff <https://github.com/astral-sh/ruff>`_ for both linting and formatting. 
 77 | The configuration is defined in ``pyproject.toml`` and enforced through pre-commit hooks.
 78 | 
 79 | To format code::
 80 | 
 81 |     uv run task fmt
 82 |     # or
 83 |     ruff format docs/source src/lazyslide tests
 84 | 
 85 | Documentation
 86 | -------------
 87 | 
 88 | Documentation is built using Sphinx and is located in the ``docs`` directory.
 89 | 
 90 | To build the documentation::
 91 | 
 92 |    # Build doc with cache
 93 |    uv run task doc-build
 94 |    # Fresh build
 95 |    uv run task doc-clean-build
 96 | 
 97 | To serve the documentation locally::
 98 | 
 99 |    uv run task doc-serve
100 | 
101 | This will start a local server at http://localhost:8000.
102 | 
103 | Documentation is written in reStructuredText (.rst) and Jupyter notebooks (.ipynb) using the myst-nb extension.
104 | 
105 | Submitting changes
106 | ------------------
107 | 
108 | 1. Commit your changes and push them to your branch.
109 | 2. Create a pull request on GitHub.
110 | 3. Ensure all CI checks pass.
111 | 4. Wait for a review from a maintainer.
112 | 
113 | Reporting issues
114 | ----------------
115 | 
116 | If you encounter a bug or have a feature request, please open an issue on the 
117 | `GitHub repository <https://github.com/rendeirolab/lazyslide/issues>`_.
118 | 
119 | When reporting a bug, please include:
120 | 
121 | - A clear description of the issue
122 | - Steps to reproduce the problem
123 | - Expected behavior
124 | - Actual behavior
125 | - Any relevant logs or error messages
126 | - Your environment (OS, Python version, package versions)
127 | 


--------------------------------------------------------------------------------
/docs/source/contributors.rst:
--------------------------------------------------------------------------------
 1 | Contributors
 2 | ============
 3 | 
 4 | 
 5 | .. card:: Rendeiro Lab
 6 | 
 7 |     LazySlide is developed by `Rendeiro Lab <https://cemm.at/research/groups/andre-rendeiro/publications/group>`_
 8 |     at the `CeMM Research Center for Molecular Medicine <https://cemm.at/>`_.
 9 | 
10 |     .. image:: _static/cemm-logo.svg
11 |        :width: 200px
12 |        :align: center
13 | 
14 | 
15 | Developers
16 | ----------
17 | 
18 | - `Yimin Zheng <https://github.com/Mr-Milk>`_, lead developer.
19 | - `Ernesto Abila <https://github.com/eabila>`_, developer.
20 | - `Andre Rendeiro <https://github.com/afrendeiro>`_, lab leader, guidance and support.
21 | 
22 | .. contributors:: rendeirolab/LazySlide
23 |     :avatars:
24 | 
25 | 


--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
 1 | LazySlide: Accessible and interoperable whole slide image analysis
 2 | ==================================================================
 3 | 
 4 | .. grid:: 1 2 2 2
 5 | 
 6 |    .. grid-item::
 7 |        :columns: 12 4 4 4
 8 | 
 9 |        .. image:: _static/logo@3x.png
10 |           :align: center
11 |           :width: 150px
12 | 
13 |    .. grid-item::
14 |       :columns: 12 8 8 8
15 |       :child-align: center
16 | 
17 |       **LasySlide** LazySlide is a Python framework for whole slide image (WSI) analysis,
18 |       designed to integrate seamlessly with the `scverse`_ ecosystem.
19 | 
20 |       By adopting standardized data structures and APIs familiar to the single-cell and genomics community,
21 |       LazySlide enables intuitive, interoperable, and reproducible workflows for histological analysis.
22 |       It supports a range of tasks from basic preprocessing to advanced deep learning applications,
23 |       facilitating the integration of histopathology into modern computational biology.
24 | 
25 | Key features
26 | ------------
27 | 
28 | * **Interoperability**: Built on top of `SpatialData`_, ensuring compatibility with scverse tools like `Scanpy`_, `Anndata`_, and `Squidpy`_. Check out `WSIData`_ for more details.
29 | * **Accessibility**: User-friendly APIs that cater to both beginners and experts in digital pathology.
30 | * **Scalability**: Efficient handling of large WSIs, enabling high-throughput analyses.
31 | * **Multimodal integration**: Combine histological data with transcriptomics, genomics, and textual annotations.
32 | * **Foundation model support**: Native integration with state-of-the-art models (e.g., UNI, CONCH, Gigapath, Virchow) for tasks like zero-shot classification and captioning.
33 | * **Deep learning ready**: Provides PyTorch dataloaders for seamless integration into machine learning pipelines.
34 | 
35 | Whether you're a novice in digital pathology or an expert computational biologist, LazySlide provides a scalable and modular foundation to accelerate AI-driven discovery in tissue biology and pathology.
36 | 
37 | .. image:: https://github.com/rendeirolab/LazySlide/raw/main/assets/Figure.png
38 | 
39 | |
40 | 
41 | .. toctree::
42 |     :maxdepth: 1
43 |     :hidden:
44 | 
45 |     installation
46 |     tutorials/index
47 |     api/index
48 |     contributing
49 |     contributors
50 | 
51 | 
52 | .. grid:: 1 2 2 2
53 |    :gutter: 2
54 | 
55 |    .. grid-item-card:: Installation
56 |       :link: installation
57 |       :link-type: doc
58 | 
59 |       How to install LazySlide
60 | 
61 |    .. grid-item-card:: Tutorials
62 |       :link: tutorials/index
63 |       :link-type: doc
64 | 
65 |       Get started with LazySlide
66 | 
67 |    .. grid-item-card:: Contributing
68 |       :link: contributing
69 |       :link-type: doc
70 | 
71 |       Contribute to Lazyslide
72 | 
73 |    .. grid-item-card:: Contributors
74 |       :link: contributors
75 |       :link-type: doc
76 | 
77 |       The team behind LazySlide
78 | 
79 | .. _scverse: https://scverse.org/
80 | .. _WSIData: https://wsidata.readthedocs.io/
81 | .. _SpatialData: https://spatialdata.scverse.org/
82 | .. _Scanpy: https://scanpy.readthedocs.io/
83 | .. _Anndata: https://anndata.readthedocs.io/
84 | .. _Squidpy: https://squidpy.readthedocs.io/
85 | 
86 | 


--------------------------------------------------------------------------------
/docs/source/installation.rst:
--------------------------------------------------------------------------------
  1 | Installation
  2 | ============
  3 | 
  4 | You can install :code:`lazyslide` with different package manager you prefer.
  5 | 
  6 | .. tab-set::
  7 | 
  8 |     .. tab-item:: PyPI
  9 | 
 10 |         The default installation.
 11 | 
 12 |         .. code-block:: bash
 13 | 
 14 |             pip install lazyslide
 15 | 
 16 |     .. tab-item:: uv
 17 | 
 18 |         .. code-block:: bash
 19 | 
 20 |             uv add lazyslide
 21 | 
 22 |     .. tab-item:: Conda
 23 | 
 24 |         .. warning::
 25 | 
 26 |            Not available yet.
 27 | 
 28 |         .. code-block:: bash
 29 | 
 30 |             conda install -c conda-forge lazyslide
 31 | 
 32 |     .. tab-item:: Mamba
 33 | 
 34 |         .. warning::
 35 | 
 36 |            Not available yet.
 37 | 
 38 |         .. code-block:: bash
 39 | 
 40 |             mamba install lazyslide
 41 | 
 42 |     .. tab-item:: Development
 43 | 
 44 |         If you want to install the latest version from the GitHub repository, you can use the following command:
 45 | 
 46 |         .. code-block:: bash
 47 | 
 48 |             pip install git+https://github.com/rendeirolab/lazyslide.git
 49 | 
 50 | 
 51 | Installation of slide readers
 52 | -----------------------------
 53 | 
 54 | LazySlide uses :code:`wsidata` to handle the IO with the slide files.
 55 | To support different file formats, you need to install corresponding slide readers.
 56 | The reader will be automatically detected by :code:`wsidata` when you open the slide file.
 57 | 
 58 | 
 59 | .. tab-set::
 60 | 
 61 |     .. tab-item:: TiffSlide
 62 | 
 63 |         `TiffSlide <https://github.com/Bayer-Group/tiffslide>`_ is a cloud native openslide-python replacement
 64 |         based on tifffile.
 65 | 
 66 |         TiffSlide is installed by default. You don't need to install it manually.
 67 | 
 68 |         .. code-block:: bash
 69 | 
 70 |             pip install tiffslide
 71 | 
 72 |     .. tab-item:: OpenSlide
 73 | 
 74 |         `OpenSlide <https://openslide.org/>`_ is a C library that provides a simple interface to read whole-slide images.
 75 | 
 76 |         OpenSlide is installed by default, you don't need to install it manually.
 77 | 
 78 |         But you can always install from PyPI
 79 | 
 80 |         .. code-block:: bash
 81 | 
 82 |             pip install openslide-python openslide-bin
 83 | 
 84 |         In case your OpenSlide installation is not working, you can install it manually.
 85 | 
 86 |         For Linux and OSX users, it's suggested that you install :code:`openslide` with conda or mamba:
 87 | 
 88 |         .. code-block:: bash
 89 | 
 90 |             conda install -c conda-forge openslide-python
 91 |             # or
 92 |             mamba install -c conda-forge openslide-python
 93 | 
 94 | 
 95 |         For Windows users, you need to download compiled :code:`openslide` from
 96 |         `GitHub Release <https://github.com/openslide/openslide-bin/releases>`_.
 97 |         If you open the folder, you should find a :code:`bin` folder.
 98 | 
 99 |         Make sure you point the :code:`bin` folder for python to locate the :code:`openslide` binary.
100 |         You need to run following code to import the :code:`openslide`,
101 |         it's suggested to run this code before everything:
102 | 
103 |         .. code-block:: python
104 | 
105 |             import os
106 |             with os.add_dll_directory("path/to/openslide/bin")):
107 |                 import openslide
108 | 
109 |     .. tab-item:: BioFormats
110 | 
111 |         `BioFormats <https://www.openmicroscopy.org/bio-formats/>`_ is a standalone Java library
112 |         for reading and writing life sciences image file formats.
113 | 
114 |         `scyjava <https://github.com/scijava/scyjava>`_ is used to interact with the BioFormats library.
115 | 
116 |         .. code-block:: bash
117 | 
118 |             pip install scyjava
119 | 
120 |     .. tab-item:: CuCIM
121 | 
122 |         `CuCIM <https://github.com/rapidsai/cucim>`_ is a GPU-accelerated image I/O library.
123 | 
124 |         .. warning::
125 | 
126 |             CuCIM support is not available yet.
127 | 
128 |         Please refer to the `CuCIM GitHub <https://github.com/rapidsai/cucim>`_.


--------------------------------------------------------------------------------
/docs/source/tutorials/.gitignore:
--------------------------------------------------------------------------------
1 | /tmp
2 | GTEx*
3 | *.sha256


--------------------------------------------------------------------------------
/docs/source/tutorials/05_training_models.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "id": "1df73331906bcf35",
 6 |    "metadata": {},
 7 |    "source": [
 8 |     "# Training deep learning models with LazySlide"
 9 |    ]
10 |   },
11 |   {
12 |    "cell_type": "markdown",
13 |    "id": "6a559156-6909-46c5-bd0c-597fc02f2fe5",
14 |    "metadata": {},
15 |    "source": [
16 |     "## Classification task"
17 |    ]
18 |   },
19 |   {
20 |    "cell_type": "markdown",
21 |    "id": "3b777679-2963-476c-8136-1b5d17ac33ee",
22 |    "metadata": {},
23 |    "source": [
24 |     "## Segmentation task"
25 |    ]
26 |   },
27 |   {
28 |    "cell_type": "markdown",
29 |    "id": "a774bc01-fc85-41ac-9d02-0f0914e6e804",
30 |    "metadata": {},
31 |    "source": [
32 |     "## Tissue generative model"
33 |    ]
34 |   },
35 |   {
36 |    "cell_type": "code",
37 |    "execution_count": null,
38 |    "id": "4ad9b7e0-f76f-4ad3-b922-5e1c4815f316",
39 |    "metadata": {},
40 |    "outputs": [],
41 |    "source": []
42 |   }
43 |  ],
44 |  "metadata": {
45 |   "kernelspec": {
46 |    "display_name": "Python 3 (ipykernel)",
47 |    "language": "python",
48 |    "name": "python3"
49 |   },
50 |   "language_info": {
51 |    "codemirror_mode": {
52 |     "name": "ipython",
53 |     "version": 3
54 |    },
55 |    "file_extension": ".py",
56 |    "mimetype": "text/x-python",
57 |    "name": "python",
58 |    "nbconvert_exporter": "python",
59 |    "pygments_lexer": "ipython3",
60 |    "version": "3.12.8"
61 |   },
62 |   "widgets": {
63 |    "application/vnd.jupyter.widget-state+json": {
64 |     "state": {},
65 |     "version_major": 2,
66 |     "version_minor": 0
67 |    }
68 |   }
69 |  },
70 |  "nbformat": 4,
71 |  "nbformat_minor": 5
72 | }
73 | 


--------------------------------------------------------------------------------
/docs/source/tutorials/index.rst:
--------------------------------------------------------------------------------
 1 | Tutorials
 2 | =========
 3 | 
 4 | Here is a list of tutorials that will help you get started with the LazySlide.
 5 | 
 6 | .. toctree::
 7 |     :hidden:
 8 |     :maxdepth: 1
 9 | 
10 |     00_intro_wsi
11 |     01_preprocessing
12 |     02_feature_extraction
13 |     03_multiple_slides
14 |     04_genomics_integration
15 |     05_cell-segmentation
16 |     06_visualization
17 |     07_zero-shot-learning
18 | 
19 | .. card:: Introduction to WSI
20 | 
21 |     :doc:`00_intro_wsi`
22 | 
23 | .. card:: Preprocessing
24 | 
25 |     :doc:`01_preprocessing`
26 | 
27 | .. card:: Feature extraction and spatial analysis
28 | 
29 |     :doc:`02_feature_extraction`
30 | 
31 | .. card:: Working with multiple slides
32 | 
33 |     :doc:`03_multiple_slides`
34 | 
35 | .. card:: Integration with RNA-seq
36 | 
37 |     :doc:`04_genomics_integration`
38 | 
39 | .. card:: Cell segmentation
40 | 
41 |     :doc:`05_cell-segmentation`
42 | 
43 | .. card:: WSI visualization in LazySlide
44 | 
45 |     :doc:`06_visualization`
46 | 
47 | .. card:: Zero-shot learning LazySlide
48 | 
49 |     :doc:`07_zero-shot-learning`
50 | 
51 | 


--------------------------------------------------------------------------------
/docs/source/tutorials/matplotlibrc:
--------------------------------------------------------------------------------
 1 | pdf.fonttype: 42
 2 | svg.fonttype: none
 3 | font.family: sans-serif
 4 | font.sans-serif: Arial
 5 | font.size: 10.0
 6 | figure.figsize: 4.0, 4.0
 7 | savefig.dpi:       300      # figure dots per inch or 'figure'
 8 | savefig.facecolor: none        # figure face color when saving
 9 | savefig.edgecolor: none        # figure edge color when saving
10 | savefig.bbox: tight


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
  1 | [build-system]
  2 | requires = ["hatchling"]
  3 | build-backend = "hatchling.build"
  4 | 
  5 | [project]
  6 | name = "lazyslide"
  7 | description = "Modularized and scalable whole slide image analysis"
  8 | readme = "README.md"
  9 | requires-python = ">=3.10"
 10 | license = "MIT"
 11 | authors = [
 12 |     {name = "Yimin Zheng", email = "yzheng@cemm.at"},
 13 |     {name = "Ernesto Abila", email = "eabila@cemm.at"},
 14 |     {name = "André F. Rendeiro", email = "arendeiro@cemm.at"},
 15 | ]
 16 | keywords = ["histopathology", "whole slide image", "image analysis", "segmentation", "deep learning"]
 17 | classifiers = [
 18 |     "Development Status :: 3 - Alpha",
 19 |     "Intended Audience :: Science/Research",
 20 |     "License :: OSI Approved :: MIT License",
 21 |     "Natural Language :: English",
 22 |     "Operating System :: OS Independent",
 23 |     "Programming Language :: Python :: 3",
 24 |     "Topic :: File Formats",
 25 |     "Topic :: Scientific/Engineering :: Bio-Informatics",
 26 | ]
 27 | Documentation = "https://lazyslide.readthedocs.io"
 28 | repository = "https://github.com/rendeirolab/lazyslide"
 29 | dynamic = ["version"]
 30 | dependencies = [
 31 |     "wsidata>=0.6.0",
 32 |     "scikit-learn>=1.0",
 33 |     "matplotlib>=3.9.0",
 34 |     "matplotlib-scalebar>=0.9.0",
 35 |     "legendkit>=0.3.4",
 36 |     "rich>=13.0.0",
 37 |     "cyclopts>=3.0.0",
 38 |     "timm>=1.0.3",
 39 |     "torch>=2.0.0",
 40 |     "seaborn>=0.12.2",
 41 |     "psutil>=5.9.0",
 42 | ]
 43 | 
 44 | [project.optional-dependencies]
 45 | all = [
 46 |     "scipy>=1.15.1",
 47 |     "scanpy>=1.10.4",
 48 |     "torchvision>=0.15",  # >0.15
 49 |     "torchstain>=1.4.1",
 50 |     "transformers>=4.49.0",
 51 | ]
 52 | 
 53 | # Define entry points
 54 | [project.scripts]
 55 | lazyslide = "lazyslide.__main__:app"
 56 | zs = "lazyslide.__main__:app"
 57 | 
 58 | [tool.hatch.version]
 59 | path = "src/lazyslide/__init__.py"
 60 | 
 61 | [tool.hatch.build.targets.sdist]
 62 | exclude = [
 63 |     "docs",
 64 |     "data",
 65 |     "assets",
 66 |     "tests",
 67 |     "scripts",
 68 |     ".readthedocs.yaml",
 69 |     ".github",
 70 |     ".gitignore",
 71 | ]
 72 | include = [
 73 |     "README.md",
 74 |     "LICENSE",
 75 |     "pyproject.toml",
 76 |     "src/lazyslide",
 77 | ]
 78 | 
 79 | [tool.hatch.build.targets.wheel]
 80 | packages = ["src/lazyslide", "README.md", "LICENSE", "pyproject.toml"]
 81 | 
 82 | [tool.hatch.metadata]
 83 | allow-direct-references = true
 84 | 
 85 | [tool.ruff]
 86 | lint.ignore = ["F401"]
 87 | line-length = 88
 88 | 
 89 | [tool.ruff.lint.per-file-ignores]
 90 | "tests/test_example.py" = ["E402"]
 91 | "tests/test_loader.py" = ["E402"]
 92 | 
 93 | [tool.mypy]
 94 | ignore_missing_imports = true
 95 | 
 96 | [tool.taskipy.tasks]
 97 | hello = "echo Hello, World!"
 98 | test = "pytest tests --disable-warnings"
 99 | test-ci = "python -X faulthandler -m pytest tests -v --tb=short --disable-warnings"
100 | doc-build = "sphinx-build -b html docs/source docs/build"
101 | doc-clean-build = "python docs/clean_up.py && sphinx-build -b html docs/source docs/build"
102 | doc-serve = "python -m http.server -d docs/build"
103 | fmt = "ruff format docs/source src/lazyslide tests"
104 | 
105 | [tool.uv]
106 | default-groups = ["dev", "docs", "tutorials", "model"]
107 | 
108 | [dependency-groups]
109 | dev = [
110 |     "jupyterlab>=4.3.5",
111 |     "pytest>=8.3.4",
112 |     "pre-commit>=4.1.0",
113 |     "ruff>=0.9.4",
114 |     "taskipy>=1.14.1",
115 |     "torchvision>=0.21.0",
116 |     "torchstain>=1.4.1",
117 |     "matplotlib>=3.10.0",
118 |     "matplotlib-scalebar>=0.9.0",
119 |     "scikit-learn>=1.6.1",
120 |     "scanpy>=1.10.4",
121 |     "scipy>=1.15.1",
122 |     "segmentation-models-pytorch>=0.4.0",
123 |     "albumentations>=2.0.3",
124 |     "spatialdata-plot>=0.2.9",
125 |     "scyjava>=1.12.0",
126 | ]
127 | docs = [
128 |     "sphinx>=8.1.3",
129 |     "sphinx-copybutton>=0.5.2",
130 |     "sphinx-design>=0.6.1",
131 |     "myst-nb>=1.1.2",
132 |     "numpydoc>=1.8.0",
133 |     "pydata-sphinx-theme>=0.16.1",
134 |     "sphinx>=8.1.3",
135 |     "sphinx-copybutton>=0.5.2",
136 |     "sphinx-design>=0.6.1",
137 |     "sphinx-book-theme>=1.1.3",
138 |     "sphinx-contributors>=0.2.7",
139 | ]
140 | tutorials = [
141 |     "igraph>=0.11.8",
142 |     "ipywidgets>=8.1.5",
143 |     "marsilea>=0.5.1",
144 |     "parse>=1.20.2",
145 |     "gseapy>=1.1.7",
146 |     "mpl-fontkit>=0.5.1",
147 |     "matplotlib-venn>=1.1.2",
148 |     "muon>=0.1.7",
149 |     "mofapy2>=0.7.2",
150 |     "pypalettes>=0.1.5",
151 |     "bokeh>=3.7.2",
152 |     "dask-jobqueue>=0.9.0",
153 | ]
154 | napari = [
155 |     "napari[all]>=0.5.6",
156 |     "napari-spatialdata>=0.5.5",
157 |     "spatialdata-plot>=0.2.9",
158 | ]
159 | model = [
160 |     "einops>=0.8.1",
161 |     "einops-exts>=0.0.4",
162 |     "environs>=14.1.1",
163 |     "sacremoses>=0.1.1",
164 |     "conch",
165 |     "transformers>=4.49.0",
166 | ]
167 | 
168 | 
169 | [tool.uv.sources]
170 | # wsidata = { git = "https://github.com/rendeirolab/wsidata", branch = "main" }
171 | # wsidata = { path = "../wsidata", editable = true }
172 | conch = { git = "https://github.com/mahmoodlab/CONCH.git" }
173 | 
174 | [tool.uv.workspace]
175 | members = ["scripts/grandqc/artifacts_detection"]
176 | 
177 | [tool.pytest.ini_options]
178 | filterwarnings = [
179 |     "ignore::UserWarning"
180 | ]
181 | 


--------------------------------------------------------------------------------
/src/lazyslide/__init__.py:
--------------------------------------------------------------------------------
 1 | """Efficient and Scalable Whole Slide Image (WSI) processing library."""
 2 | 
 3 | __version__ = "0.6.0"
 4 | 
 5 | 
 6 | import sys
 7 | 
 8 | # Re-export the public API
 9 | from wsidata import open_wsi, agg_wsi
10 | 
11 | from . import cv
12 | from . import io
13 | from . import models
14 | from . import plotting as pl
15 | from . import preprocess as pp
16 | from . import segmentation as seg
17 | from . import tools as tl
18 | from . import datasets
19 | from . import metrics
20 | 
21 | # Inject the aliases into the current module
22 | sys.modules.update({f"{__name__}.{m}": globals()[m] for m in ["tl", "pp", "pl", "seg"]})
23 | del sys
24 | 
25 | 
26 | __all__ = [
27 |     "open_wsi",
28 |     "agg_wsi",
29 |     "pp",
30 |     "tl",
31 |     "pl",
32 |     "seg",
33 |     "cv",
34 |     "models",
35 |     "io",
36 | ]
37 | 


--------------------------------------------------------------------------------
/src/lazyslide/_const.py:
--------------------------------------------------------------------------------
 1 | class Key:
 2 |     tissue_qc = "qc"
 3 |     tile_qc = "qc"
 4 |     tissue: str = "tissues"
 5 |     tissue_id: str = "tissue_id"
 6 |     tiles = "tiles"
 7 |     tile_spec: str = "tile_spec"
 8 |     annotations: str = "annotations"
 9 | 
10 |     @classmethod
11 |     def tile_graph(cls, name):
12 |         return f"{name}_graph"
13 | 
14 |     @classmethod
15 |     def feature(cls, name, tile_key=None):
16 |         tile_key = tile_key or cls.tiles
17 |         return f"{name}_{tile_key}"
18 | 
19 |     @classmethod
20 |     def feature_slide(cls, name, tile_key=None):
21 |         tile_key = tile_key or cls.tiles
22 |         return f"{name}_{tile_key}_slide"
23 | 


--------------------------------------------------------------------------------
/src/lazyslide/_utils.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import inspect
  4 | import os
  5 | from functools import wraps
  6 | from types import FrameType
  7 | 
  8 | from rich.console import Console
  9 | 
 10 | console = Console()
 11 | 
 12 | 
 13 | def get_torch_device():
 14 |     """Automatically get the torch device"""
 15 |     import torch
 16 | 
 17 |     if torch.cuda.is_available():
 18 |         device = torch.device("cuda")
 19 |     elif torch.backends.mps.is_available():
 20 |         device = torch.device("mps")
 21 |     else:
 22 |         device = torch.device("cpu")
 23 |     return device
 24 | 
 25 | 
 26 | def default_pbar(disable=False):
 27 |     """Get the default progress bar"""
 28 |     from rich.progress import Progress
 29 |     from rich.progress import (
 30 |         TextColumn,
 31 |         BarColumn,
 32 |         TaskProgressColumn,
 33 |         TimeRemainingColumn,
 34 |     )
 35 | 
 36 |     return Progress(
 37 |         TextColumn("[progress.description]{task.description}"),
 38 |         BarColumn(bar_width=30),
 39 |         TaskProgressColumn(),
 40 |         TimeRemainingColumn(compact=True, elapsed_when_finished=True),
 41 |         disable=disable,
 42 |         console=console,
 43 |         transient=True,
 44 |     )
 45 | 
 46 | 
 47 | def chunker(seq, num_workers):
 48 |     avg = len(seq) / num_workers
 49 |     out = []
 50 |     last = 0.0
 51 | 
 52 |     while last < len(seq):
 53 |         out.append(seq[int(last) : int(last + avg)])
 54 |         last += avg
 55 | 
 56 |     return out
 57 | 
 58 | 
 59 | def find_stack_level() -> int:
 60 |     """
 61 |     Find the first place in the stack that is not inside pandas
 62 |     (tests notwithstanding).
 63 |     """
 64 | 
 65 |     import pandas as pd
 66 | 
 67 |     pkg_dir = os.path.dirname(pd.__file__)
 68 |     test_dir = os.path.join(pkg_dir, "tests")
 69 | 
 70 |     # https://stackoverflow.com/questions/17407119/python-inspect-stack-is-slow
 71 |     frame: FrameType | None = inspect.currentframe()
 72 |     try:
 73 |         n = 0
 74 |         while frame:
 75 |             filename = inspect.getfile(frame)
 76 |             if filename.startswith(pkg_dir) and not filename.startswith(test_dir):
 77 |                 frame = frame.f_back
 78 |                 n += 1
 79 |             else:
 80 |                 break
 81 |     finally:
 82 |         # See note in
 83 |         # https://docs.python.org/3/library/inspect.html#inspect.Traceback
 84 |         del frame
 85 |     return n
 86 | 
 87 | 
 88 | def _param_doc(param_type, param_text):
 89 |     return f"""{param_type}\n\t{param_text}"""
 90 | 
 91 | 
 92 | PARAMS_DOCSTRING = {
 93 |     "wsi": _param_doc(
 94 |         param_type=":class:`WSIData <wsidata.WSIData>`",
 95 |         param_text="The WSIData object to work on.",
 96 |     ),
 97 |     "key_added": _param_doc(
 98 |         param_type="str, default: '{key_added}'",
 99 |         param_text="The key to save the result in the WSIData object.",
100 |     ),
101 | }
102 | 
103 | 
104 | def _doc(obj=None, *, key_added: str = None):
105 |     """
106 |     A decorator to inject docstring to an object by replacing the placeholder in docstring by looking up a dict.
107 |     """
108 | 
109 |     def decorator(obj):
110 |         if obj.__doc__ is not None:
111 |             if key_added is not None:
112 |                 PARAMS_DOCSTRING["key_added"] = PARAMS_DOCSTRING["key_added"].format(
113 |                     key_added=key_added
114 |                 )
115 |             obj.__doc__ = obj.__doc__.format(**PARAMS_DOCSTRING)
116 | 
117 |         @wraps(obj)
118 |         def wrapper(*args, **kwargs):
119 |             return obj(*args, **kwargs)
120 | 
121 |         return wrapper
122 | 
123 |     if obj is None:
124 |         return decorator
125 |     else:
126 |         return decorator(obj)
127 | 


--------------------------------------------------------------------------------
/src/lazyslide/cv/__init__.py:
--------------------------------------------------------------------------------
1 | from .mask import Mask, BinaryMask, MultiLabelMask, MultiClassMask
2 | from .tiles_merger import merge_polygons
3 | 


--------------------------------------------------------------------------------
/src/lazyslide/cv/scorer/__init__.py:
--------------------------------------------------------------------------------
1 | from .base import ScorerBase, ComposeScorer
2 | from .focuslitenn import FocusLite
3 | from .module import Contrast, SplitRGB, Redness, Brightness
4 | 


--------------------------------------------------------------------------------
/src/lazyslide/cv/scorer/base.py:
--------------------------------------------------------------------------------
 1 | from collections import namedtuple
 2 | 
 3 | ScoreResult = namedtuple("ScoreResult", ["scores", "qc"])
 4 | 
 5 | 
 6 | class ScorerBase:
 7 |     """
 8 |     Base class for all scorers.
 9 | 
10 |     All scores are operated on a patch.
11 | 
12 |     Image -> float
13 |     """
14 | 
15 |     def __call__(self, patch, mask=None):
16 |         return self.apply(patch, mask=None)
17 | 
18 |     def __repr__(self):
19 |         return f"{self.__class__.__name__}()"
20 | 
21 |     def apply(self, patch, mask=None) -> ScoreResult:
22 |         """The scorer will return the scores and the bool value indicating of QC"""
23 |         raise NotImplementedError
24 | 
25 | 
26 | class ComposeScorer(ScorerBase):
27 |     """
28 |     Compose multiple scorers into one.
29 | 
30 |     Parameters
31 |     ----------
32 |     scorers : List[ScorerBase]
33 |         List of scorers to be composed.
34 |     """
35 | 
36 |     def __init__(self, scorers):
37 |         self.scorers = scorers
38 | 
39 |     def apply(self, patch, mask=None) -> ScoreResult:
40 |         scores = {}
41 |         qc = True
42 |         for scorer in self.scorers:
43 |             score, _qc = scorer.apply(patch, mask)
44 |             scores.update(score)
45 |             qc &= _qc
46 |         return ScoreResult(scores=scores, qc=qc)
47 | 


--------------------------------------------------------------------------------
/src/lazyslide/cv/scorer/focuslitenn/__init__.py:
--------------------------------------------------------------------------------
1 | from .model import FocusLite
2 | 


--------------------------------------------------------------------------------
/src/lazyslide/cv/scorer/focuslitenn/focuslitenn-2kernel-mse.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rendeirolab/LazySlide/f39634cc994b3098b0933075b9d25ecd99b9014e/src/lazyslide/cv/scorer/focuslitenn/focuslitenn-2kernel-mse.pt


--------------------------------------------------------------------------------
/src/lazyslide/cv/scorer/focuslitenn/model.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | from pathlib import Path
 3 | 
 4 | import numpy as np
 5 | 
 6 | from lazyslide.cv.scorer.base import ScorerBase, ScoreResult
 7 | 
 8 | try:
 9 |     import torch
10 | 
11 |     class FocusLiteNN(torch.nn.Module):
12 |         """
13 |         A FocusLiteNN model for filtering out-of-focus regions in whole slide images.
14 |         """
15 | 
16 |         def __init__(self, num_channel=2):
17 |             super().__init__()
18 |             self.num_channel = num_channel
19 |             self.conv = torch.nn.Conv2d(
20 |                 3, self.num_channel, 7, stride=5, padding=1
21 |             )  # 47x47
22 |             self.maxpool = torch.nn.MaxPool2d(kernel_size=47)
23 |             if self.num_channel > 1:
24 |                 self.fc = torch.nn.Conv2d(self.num_channel, 1, 1, stride=1, padding=0)
25 | 
26 |             for m in self.modules():
27 |                 if isinstance(m, torch.nn.Conv2d):
28 |                     n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
29 |                     m.weight.data.normal_(0, math.sqrt(2.0 / n))
30 | 
31 |         def forward(self, x):
32 |             batch_size = x.size()[0]
33 | 
34 |             x = self.conv(x)
35 |             x = -self.maxpool(-x)  # minpooling
36 |             if self.num_channel > 1:
37 |                 x = self.fc(x)
38 |             x = x.view(batch_size, -1)
39 | 
40 |             return x
41 | except ImportError:
42 | 
43 |     class FocusLiteNN:
44 |         def __init__(self, *args, **kwargs):
45 |             raise ImportError(
46 |                 "FocusLiteNN requires torch. You can install it using `pip install torch`."
47 |                 "Please restart the kernel after installation."
48 |             )
49 | 
50 | 
51 | def load_focuslite_model(device="cpu"):
52 |     model = FocusLiteNN()
53 |     if not hasattr(model, "forward"):
54 |         raise ModuleNotFoundError("To use Focuslite, you need to install pytorch")
55 |     ckpt = torch.load(
56 |         Path(__file__).parent / "focuslitenn-2kernel-mse.pt",
57 |         map_location=device,
58 |         weights_only=True,
59 |     )
60 |     model.load_state_dict(ckpt["state_dict"])
61 |     model.eval()
62 |     # model = torch.compile(model)
63 |     return model
64 | 
65 | 
66 | class FocusLite(ScorerBase):
67 |     # The device must be CPU, otherwise this module cannot be serialized
68 |     def __init__(self, threshold=3, device="cpu"):
69 |         from torchvision.transforms import ToTensor, Resize
70 | 
71 |         # threshold should be between 1 and 12
72 |         if not (1 <= threshold <= 12):
73 |             raise ValueError("threshold should be between 1 and 12")
74 |         self.threshold = threshold
75 |         self.model = load_focuslite_model(device)
76 |         self.to_tensor = ToTensor()
77 |         self.resize = Resize((256, 256), antialias=False)
78 | 
79 |     def apply(self, patch, mask=None):
80 |         """Higher score means the patch is more clean, range from 0 to 1"""
81 |         arr = self.to_tensor(patch)
82 |         # If the image is not big enough, resize it
83 |         if arr.shape[1] < 256 or arr.shape[2] < 256:
84 |             arr = self.resize(arr)
85 |         arr = torch.stack([arr], dim=0)
86 |         score = self.model(arr)
87 |         score = max(0, np.mean(torch.squeeze(score.cpu().data, dim=1).numpy()))
88 |         return ScoreResult(scores={"focus": score}, qc=score < self.threshold)
89 | 


--------------------------------------------------------------------------------
/src/lazyslide/cv/scorer/module.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import numpy as np
  3 | 
  4 | from .base import ScorerBase, ScoreResult
  5 | from .utils import dtype_limits
  6 | 
  7 | 
  8 | class SplitRGB(ScorerBase):
  9 |     """
 10 |     Calculate the RGB value of a patch.
 11 | 
 12 |     Brightness is calculated as the mean of the pixel values.
 13 | 
 14 |     The patch need to be in shape (H, W, 3).
 15 | 
 16 |     Parameters
 17 |     ----------
 18 |     red_threshold : float
 19 |         Threshold to determine if a patch is red enough.
 20 | 
 21 |     """
 22 | 
 23 |     def __init__(
 24 |         self,
 25 |         threshold: (int, int, int) = (
 26 |             0,
 27 |             0,
 28 |             0,
 29 |         ),
 30 |         method="mean",
 31 |         dim="xyc",
 32 |     ):
 33 |         self.threshold = np.array(threshold)
 34 |         self.method = method
 35 |         self.dim = dim
 36 |         if dim == "xyc":
 37 |             self.func = self._score_xyc
 38 |         elif dim == "cyx":
 39 |             self.func = self._score_cyx
 40 |         else:
 41 |             raise ValueError(f"Unknown dim {dim}, should be 'xyc' or 'cyx'")
 42 | 
 43 |     def _score_xyc(self, patch, mask=None):
 44 |         if mask is not None:
 45 |             img = patch[mask]
 46 |         else:
 47 |             img = patch
 48 |         c_int = getattr(img, self.method)(axis=(0, 1))
 49 |         return {"red": c_int[0], "green": c_int[1], "blue": c_int[2]}
 50 | 
 51 |     def _score_cyx(self, patch, mask=None):
 52 |         if mask is not None:
 53 |             c_int = [patch[c][mask].mean() for c in range(3)]
 54 |         else:
 55 |             c_int = [patch[c].mean() for c in range(3)]
 56 |         return {"red": c_int[0], "green": c_int[1], "blue": c_int[2]}
 57 | 
 58 |     def apply(self, patch, mask=None):
 59 |         scores = self.func(patch, mask)
 60 |         return ScoreResult(scores=scores, qc=scores > self.threshold)
 61 | 
 62 | 
 63 | class Redness(SplitRGB):
 64 |     def __init__(self, red_threshold=0.5, **kwargs):
 65 |         self.red_threshold = red_threshold
 66 |         super().__init__(**kwargs)
 67 | 
 68 |     def apply(self, patch, mask=None):
 69 |         scores = self.func(patch, mask)
 70 |         return ScoreResult(
 71 |             scores={"redness": scores["red"]}, qc=scores["red"] > self.red_threshold
 72 |         )
 73 | 
 74 | 
 75 | class Brightness(ScorerBase):
 76 |     def __init__(self, threshold=235):
 77 |         self.threshold = threshold
 78 | 
 79 |     def apply(self, patch, mask=None) -> ScoreResult:
 80 |         if mask is not None:
 81 |             bright = patch[mask].mean()
 82 |         else:
 83 |             bright = patch.mean()
 84 |         return ScoreResult(scores={"brightness": bright}, qc=bright < self.threshold)
 85 | 
 86 | 
 87 | class Contrast(ScorerBase):
 88 |     """
 89 |     Calculate the contrast of a patch.
 90 | 
 91 |     Contrast is calculated as the standard deviation of the pixel values.
 92 | 
 93 |     Parameters
 94 |     ----------
 95 |     threshold : float
 96 |         Threshold to determine if a patch is contrasted or not.
 97 |     """
 98 | 
 99 |     def __init__(
100 |         self,
101 |         fraction_threshold=0.05,
102 |         lower_percentile=1,
103 |         upper_percentile=99,
104 |     ):
105 |         self.fraction_threshold = fraction_threshold
106 |         self.lower_percentile = lower_percentile
107 |         self.upper_percentile = upper_percentile
108 | 
109 |     def apply(self, patch, mask=None):
110 |         patch = np.asarray(patch)
111 |         if patch.dtype == bool:
112 |             ratio = int((patch.max() == 1) and (patch.min() == 0))
113 |         elif patch.ndim == 3:
114 |             if patch.shape[2] == 4:
115 |                 patch = cv2.cvtColor(patch, cv2.COLOR_RGBA2RGB)
116 |             if patch.shape[2] == 3:
117 |                 patch = cv2.cvtColor(patch, cv2.COLOR_RGB2GRAY)
118 | 
119 |             dlimits = dtype_limits(patch, clip_negative=False)
120 |             limits = np.percentile(
121 |                 patch, [self.lower_percentile, self.upper_percentile]
122 |             )
123 |             ratio = (limits[1] - limits[0]) / (dlimits[1] - dlimits[0])
124 |         else:
125 |             raise NotImplementedError("Only support 3D image or 2D image")
126 | 
127 |         return ScoreResult(
128 |             scores={"contrast": ratio}, qc=ratio > self.fraction_threshold
129 |         )
130 | 
131 | 
132 | class Sharpness(ScorerBase):
133 |     """
134 |     Calculate the sharpness of a patch.
135 | 
136 |     Sharpness is calculated as the variance of the Laplacian of the pixel values.
137 | 
138 |     Parameters
139 |     ----------
140 |     threshold : float
141 |         Threshold to determine if a patch is sharp or not.
142 |     """
143 | 
144 |     def __init__(self, threshold: float = 0.5):
145 |         self.threshold = threshold
146 | 
147 |     def apply(self, patch, mask=None):
148 |         score = cv2.Laplacian(patch, cv2.CV_64F).var()
149 |         return ScoreResult(scores={"sharpness": score}, qc=score > self.threshold)
150 | 
151 | 
152 | class Sobel(ScorerBase):
153 |     """
154 |     Calculate the sobel of a patch.
155 | 
156 |     Sobel is calculated as the variance of the Sobel of the pixel values.
157 | 
158 |     Parameters
159 |     ----------
160 |     threshold : float
161 |         Threshold to determine if a patch is sharp or not.
162 |     """
163 | 
164 |     name = "sobel"
165 | 
166 |     def __init__(self, threshold: float = 0.5):
167 |         self.threshold = threshold
168 | 
169 |     def apply(self, patch, mask=None):
170 |         score = cv2.Sobel(patch, 3, 3, 3).var()
171 |         return ScoreResult(scores={"sobel": score}, qc=score > self.threshold)
172 | 
173 | 
174 | class Canny(ScorerBase):
175 |     """
176 |     Calculate the canny of a patch.
177 | 
178 |     Canny is calculated as the variance of the Canny of the pixel values.
179 | 
180 |     Parameters
181 |     ----------
182 |     threshold : float
183 |         Threshold to determine if a patch is sharp or not.
184 |     """
185 | 
186 |     name = "canny"
187 | 
188 |     def __init__(self, threshold: float = 0.5):
189 |         self.threshold = threshold
190 | 
191 |     def apply(self, patch, mask=None):
192 |         score = cv2.Canny(patch, cv2.CV_64F).var()
193 |         return ScoreResult(scores={"canny": score}, qc=score > self.threshold)
194 | 


--------------------------------------------------------------------------------
/src/lazyslide/cv/scorer/utils.py:
--------------------------------------------------------------------------------
 1 | # This is copied from https://github.com/scikit-image/scikit-image/blob/v0.24.0/skimage/util/dtype.py
 2 | import warnings
 3 | 
 4 | import numpy as np
 5 | 
 6 | _integer_types = (
 7 |     np.int8,
 8 |     np.byte,
 9 |     np.int16,
10 |     np.short,
11 |     np.int32,
12 |     np.int64,
13 |     np.longlong,
14 |     np.int_,
15 |     np.intp,
16 |     np.intc,
17 |     int,
18 |     np.uint8,
19 |     np.ubyte,
20 |     np.uint16,
21 |     np.ushort,
22 |     np.uint32,
23 |     np.uint64,
24 |     np.ulonglong,
25 |     np.uint,
26 |     np.uintp,
27 |     np.uintc,
28 | )
29 | _integer_ranges = {t: (np.iinfo(t).min, np.iinfo(t).max) for t in _integer_types}
30 | dtype_range = {
31 |     bool: (False, True),
32 |     np.bool_: (False, True),
33 |     float: (-1, 1),
34 |     np.float16: (-1, 1),
35 |     np.float32: (-1, 1),
36 |     np.float64: (-1, 1),
37 | }
38 | 
39 | with warnings.catch_warnings():
40 |     warnings.filterwarnings("ignore", category=DeprecationWarning)
41 | 
42 |     # np.bool8 is a deprecated alias of np.bool_
43 |     if hasattr(np, "bool8"):
44 |         dtype_range[np.bool8] = (False, True)
45 | 
46 | dtype_range.update(_integer_ranges)
47 | 
48 | _supported_types = list(dtype_range.keys())
49 | 
50 | 
51 | def dtype_limits(image, clip_negative=False):
52 |     """Return intensity limits, i.e. (min, max) tuple, of the image's dtype.
53 | 
54 |     Parameters
55 |     ----------
56 |     image : ndarray
57 |         Input image.
58 |     clip_negative : bool, optional
59 |         If True, clip the negative range (i.e. return 0 for min intensity)
60 |         even if the image dtype allows negative values.
61 | 
62 |     Returns
63 |     -------
64 |     imin, imax : tuple
65 |         Lower and upper intensity limits.
66 |     """
67 |     imin, imax = dtype_range[image.dtype.type]
68 |     if clip_negative:
69 |         imin = 0
70 |     return imin, imax
71 | 


--------------------------------------------------------------------------------
/src/lazyslide/cv/tiles_merger.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | from itertools import combinations
  4 | 
  5 | import geopandas as gpd
  6 | import numpy as np
  7 | from shapely.ops import unary_union
  8 | from shapely.strtree import STRtree
  9 | 
 10 | 
 11 | class PolygonMerger:
 12 |     """
 13 |     Merge polygons from different tiles.
 14 | 
 15 |     If the polygons are overlapping/touching, the overlapping regions are merged.
 16 | 
 17 |     If probabilities exist, the probabilities are averaged weighted by the area of the polygons.
 18 | 
 19 |     Parameters
 20 |     ----------
 21 |     gdf : `GeoDataFrame <geopandas.GeoDataFrame>`
 22 |         The GeoDataFrame containing the polygons.
 23 |     class_col : str, default: None
 24 |         The column that specify the names of the polygons.
 25 |     prob_col : str, default: None
 26 |         The column that specify the probabilities of the polygons.
 27 |     buffer_px : float, default: 0
 28 |         The buffer size for the polygons to test the intersection.
 29 |     drop_overlap : float, default: 0.9
 30 |         The ratio to drop the overlapping polygons.
 31 | 
 32 |     """
 33 | 
 34 |     def __init__(
 35 |         self,
 36 |         gdf: gpd.GeoDataFrame,
 37 |         class_col: str = None,
 38 |         prob_col: str = None,
 39 |         buffer_px: float = 0,
 40 |         drop_overlap: float = 0.9,
 41 |     ):
 42 |         self.gdf = gdf
 43 |         self.class_col = class_col
 44 |         self.prob_col = prob_col
 45 |         self.buffer_px = buffer_px
 46 |         self.drop_overlap = drop_overlap
 47 | 
 48 |         self._has_class = class_col in gdf.columns if class_col else False
 49 |         self._has_prob = prob_col in gdf.columns if prob_col else False
 50 |         self._preprocessed_polygons = self._preprocess_polys()
 51 |         self._merged_polygons = None
 52 | 
 53 |     def _preprocess_polys(self):
 54 |         """Preprocess the polygons."""
 55 |         new_gdf = self.gdf.copy()
 56 |         if self.buffer_px > 0:
 57 |             new_gdf["geometry"] = self.gdf["geometry"].buffer(self.buffer_px)
 58 |         # Filter out invalid and empty geometries efficiently
 59 |         return new_gdf[new_gdf["geometry"].is_valid & ~new_gdf["geometry"].is_empty]
 60 | 
 61 |     def _merge_overlap(self, gdf: gpd.GeoDataFrame):
 62 |         """
 63 |         Merge the overlapping polygons recursively.
 64 | 
 65 |         This function has no assumptions about the class or probability
 66 |         """
 67 |         pass
 68 | 
 69 |     def _tree_merge(self, gdf: gpd.GeoDataFrame):
 70 |         polygons = gdf["geometry"].tolist()
 71 |         tree = STRtree(polygons)
 72 |         visited = set()
 73 |         merged = []
 74 | 
 75 |         for geom in polygons:
 76 |             if geom in visited:
 77 |                 continue
 78 | 
 79 |             groups_ix = tree.query(geom, predicate="intersects")
 80 |             groups_ix = set([g for g in groups_ix if g not in visited])
 81 |             if len(groups_ix) == 0:
 82 |                 continue
 83 |             else:
 84 |                 # continue finding other polygons that intersect with the group
 85 |                 # until the group size is stable
 86 |                 current_group_size = len(groups_ix)
 87 |                 while True:
 88 |                     new_groups_ix = set()
 89 |                     for ix in groups_ix:
 90 |                         c_groups_ix = tree.query(polygons[ix], predicate="intersects")
 91 |                         c_groups_ix = [g for g in c_groups_ix if g not in visited]
 92 |                         new_groups_ix.update(c_groups_ix)
 93 |                     groups_ix.update(new_groups_ix)
 94 |                     if len(groups_ix) == current_group_size:
 95 |                         break
 96 |                     current_group_size = len(groups_ix)
 97 | 
 98 |             # Sort the group index
 99 |             groups_ix = np.sort(list(groups_ix))
100 | 
101 |             # Merge the group
102 |             merged_geoms = []  # (polygon, row_ix, groups_ix)
103 | 
104 |             if len(groups_ix) == 1:
105 |                 ix = groups_ix[0]
106 |                 m_geoms = polygons[ix]
107 |                 merged_geoms.append((m_geoms, ix, groups_ix))
108 |             else:
109 |                 m_geoms = [polygons[g] for g in groups_ix]
110 |                 if self._has_class:
111 |                     ref_df = gpd.GeoDataFrame(
112 |                         {
113 |                             "names": [gdf[self.class_col].values[g] for g in groups_ix],
114 |                             "index": groups_ix,
115 |                             "geometry": m_geoms,
116 |                         }
117 |                     )
118 | 
119 |                     # {class_name: polygon}
120 |                     named_polys = (
121 |                         ref_df[["names", "geometry"]]
122 |                         .groupby("names")
123 |                         .apply(unary_union)
124 |                         .to_dict()
125 |                     )
126 | 
127 |                     if self.drop_overlap > 0:
128 |                         # If the two classes instances are more than 90% overlapping
129 |                         # The smaller one is removed
130 |                         while len(named_polys) > 1:
131 |                             names = list(named_polys.keys())
132 |                             combs = combinations(names, 2)
133 |                             for n1, n2 in combs:
134 |                                 if n1 in named_polys and n2 in named_polys:
135 |                                     p1, p2 = named_polys[n1], named_polys[n2]
136 |                                     if p1.intersection(p2).is_empty:
137 |                                         continue
138 |                                     area, drop = (
139 |                                         (p1.area, n1)
140 |                                         if p1.area < p2.area
141 |                                         else (p2.area, n2)
142 |                                     )
143 |                                     union = p1.union(p2).area
144 |                                     overlap_ratio = union / area
145 |                                     if overlap_ratio > self.drop_overlap:
146 |                                         del named_polys[drop]
147 |                             break
148 |                     for n, p in named_polys.items():
149 |                         gs = ref_df[ref_df["names"] == n]["index"].tolist()
150 |                         merged_geoms.append((p, gs[0], gs))
151 |                 else:
152 |                     m_geoms = unary_union(m_geoms)
153 |                     merged_geoms.append((m_geoms, groups_ix[0], groups_ix))
154 |             # Postprocess the merged polygon
155 |             for m_geom, ix, gs_ix in merged_geoms:
156 |                 if self.buffer_px > 0:
157 |                     m_geom = m_geom.buffer(-self.buffer_px).buffer(0)
158 |                 if m_geom.is_valid & (m_geom.is_empty is False):
159 |                     m_data = gdf.iloc[ix].copy()
160 |                     m_data["geometry"] = m_geom
161 |                     if self._has_prob:
162 |                         gs_gdf = gdf.iloc[gs_ix]
163 |                         m_data[self.prob_col] = np.average(
164 |                             gs_gdf[self.prob_col], weights=gs_gdf["geometry"].area
165 |                         )
166 |                     merged.append(m_data)
167 |             for g in groups_ix:
168 |                 visited.add(g)
169 |         return gpd.GeoDataFrame(merged)
170 | 
171 |     def merge(self):
172 |         """Launch the merging process."""
173 |         self._merged_polygons = self._tree_merge(self._preprocessed_polygons)
174 | 
175 |     @property
176 |     def merged_polygons(self):
177 |         return self._merged_polygons
178 | 
179 | 
180 | def merge_polygons(
181 |     gdf: gpd.GeoDataFrame,
182 |     class_col: str = None,
183 |     prob_col: str = None,
184 |     buffer_px: float = 0,
185 |     drop_overlap: float = 0.9,
186 | ):
187 |     merger = PolygonMerger(gdf, class_col, prob_col, buffer_px, drop_overlap)
188 |     merger.merge()
189 |     return merger.merged_polygons
190 | 
191 | 
192 | merge_polygons.__doc__ = PolygonMerger.__doc__
193 | 


--------------------------------------------------------------------------------
/src/lazyslide/cv/transform/__init__.py:
--------------------------------------------------------------------------------
 1 | """This module is highly inspired by both torchvison and pathml"""
 2 | 
 3 | from .compose import TissueDetectionHE
 4 | 
 5 | from .mods import (
 6 |     MedianBlur,
 7 |     GaussianBlur,
 8 |     BoxBlur,
 9 |     MorphOpen,
10 |     MorphClose,
11 |     BinaryThreshold,
12 |     ArtifactFilterThreshold,
13 |     Compose,
14 | )
15 | 


--------------------------------------------------------------------------------
/src/lazyslide/cv/transform/compose.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | from shapely import Polygon
  3 | 
  4 | from .mods import (
  5 |     Transform,
  6 |     MedianBlur,
  7 |     MorphClose,
  8 |     ArtifactFilterThreshold,
  9 |     BinaryThreshold,
 10 |     ForegroundDetection,
 11 | )
 12 | 
 13 | 
 14 | class TissueDetectionHE(Transform):
 15 |     """
 16 |     Detect tissue regions from H&E stained slide.
 17 |     First applies a median blur, then binary thresholding, then morphological opening and closing, and finally
 18 |     foreground detection.
 19 | 
 20 |     Parameters
 21 |     ----------
 22 |     use_saturation : bool
 23 |         Whether to convert to HSV and use saturation channel for tissue detection.
 24 |         If False, convert from RGB to greyscale and use greyscale image_ref for tissue detection. Defaults to True.
 25 |     blur_ksize : int
 26 |         kernel size used to apply median blurring. Defaults to 15.
 27 |     threshold : int
 28 |         threshold for binary thresholding. If None, uses Otsu's method. Defaults to None.
 29 |     morph_n_iter : int
 30 |         number of iterations of morphological opening and closing to apply. Defaults to 3.
 31 |     morph_k_size : int
 32 |         kernel size for morphological opening and closing. Defaults to 7.
 33 |     min_region_size : int
 34 |     """
 35 | 
 36 |     def __init__(
 37 |         self,
 38 |         use_saturation=False,
 39 |         blur_ksize=17,
 40 |         threshold=7,
 41 |         morph_n_iter=3,
 42 |         morph_k_size=7,
 43 |         min_tissue_area=0.01,
 44 |         min_hole_area=0.0001,
 45 |         detect_holes=True,
 46 |         filter_artifacts=True,
 47 |     ):
 48 |         self.set_params(
 49 |             use_saturation=use_saturation,
 50 |             blur_ksize=blur_ksize,
 51 |             threshold=threshold,
 52 |             morph_n_iter=morph_n_iter,
 53 |             morph_k_size=morph_k_size,
 54 |             min_tissue_area=min_tissue_area,
 55 |             min_hole_area=min_hole_area,
 56 |             detect_holes=detect_holes,
 57 |             filter_artifacts=filter_artifacts,
 58 |         )
 59 | 
 60 |         if filter_artifacts:
 61 |             thresholder = ArtifactFilterThreshold(threshold=threshold)
 62 |         else:
 63 |             if threshold is None:
 64 |                 thresholder = BinaryThreshold(use_otsu=True)
 65 |             else:
 66 |                 thresholder = BinaryThreshold(use_otsu=False, threshold=threshold)
 67 | 
 68 |         foreground = ForegroundDetection(
 69 |             min_foreground_area=min_tissue_area,
 70 |             min_hole_area=min_hole_area,
 71 |             detect_holes=detect_holes,
 72 |         )
 73 | 
 74 |         self.pipeline = [
 75 |             MedianBlur(kernel_size=blur_ksize),
 76 |             thresholder,
 77 |             # MorphOpen(kernel_size=morph_k_size, n_iterations=morph_n_iter),
 78 |             MorphClose(kernel_size=morph_k_size, n_iterations=morph_n_iter),
 79 |             foreground,
 80 |         ]
 81 | 
 82 |     def apply(self, image):
 83 |         filter_artifacts = self.params["filter_artifacts"]
 84 |         use_saturation = self.params["use_saturation"]
 85 | 
 86 |         if not filter_artifacts:
 87 |             if use_saturation:
 88 |                 image = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)[:, :, 1]
 89 |             else:
 90 |                 image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
 91 | 
 92 |         for p in self.pipeline:
 93 |             image = p.apply(image)
 94 |         return image
 95 | 
 96 | 
 97 | class Mask2Polygon(Transform):
 98 |     """
 99 |     Convert binary mask to polygon.
100 | 
101 |     Parameters
102 |     ----------
103 |     min_area : int
104 |         Minimum area of detected regions to be included in the polygon.
105 |     """
106 | 
107 |     def __init__(
108 |         self,
109 |         min_area=0,
110 |         morph_k_size=7,
111 |         morph_n_iter=3,
112 |         min_tissue_area=0.01,
113 |         min_hole_area=0.0001,
114 |         detect_holes=True,
115 |     ):
116 |         self.set_params(min_area=min_area)
117 | 
118 |         self.pipeline = [
119 |             # MorphOpen(kernel_size=morph_k_size, n_iterations=morph_n_iter),
120 |             MorphClose(kernel_size=morph_k_size, n_iterations=morph_n_iter),
121 |             ForegroundDetection(
122 |                 min_foreground_area=min_tissue_area,
123 |                 min_hole_area=min_hole_area,
124 |                 detect_holes=detect_holes,
125 |             ),
126 |         ]
127 | 
128 |     def apply(self, mask):
129 |         min_area = self.params["min_area"]
130 | 
131 |         for p in self.pipeline:
132 |             try:
133 |                 mask = p.apply(mask)
134 |             except Exception as e:
135 |                 print(self.__class__.__name__, e)
136 | 
137 |         tissue_instances = mask
138 |         polygons = []
139 |         if len(tissue_instances) == 0:
140 |             return []
141 |         for tissue in tissue_instances:
142 |             shell = tissue.contour
143 |             if len(tissue.holes) == 0:
144 |                 tissue_poly = Polygon(shell)
145 |             else:
146 |                 holes = [hole for hole in tissue.holes]
147 |                 tissue_poly = Polygon(shell, holes=holes)
148 |             if tissue_poly.area < min_area:
149 |                 continue
150 |             polygons.append(tissue_poly)
151 |         return polygons
152 | 


--------------------------------------------------------------------------------
/src/lazyslide/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | from ._sample import (
2 |     sample,
3 |     gtex_artery,
4 |     lung_carcinoma,
5 | )
6 | 


--------------------------------------------------------------------------------
/src/lazyslide/datasets/_sample.py:
--------------------------------------------------------------------------------
 1 | import pooch
 2 | from wsidata import open_wsi
 3 | 
 4 | ENTRY = pooch.create(
 5 |     path=pooch.os_cache("lazyslide"),
 6 |     base_url="https://lazyslide.blob.core.windows.net/lazyslide-data",
 7 |     registry={
 8 |         "sample.svs": "sha256:ed92d5a9f2e86df67640d6f92ce3e231419ce127131697fbbce42ad5e002c8a7",
 9 |         "sample.zarr.zip": "sha256:075a3ab61e6958673d79612cc29796a92cf875ad049fc1fe5780587968635378",
10 |         "GTEX-1117F-0526.svs": "sha256:222ab7f2bb42dcd0bcfaccd910cb13be452b453499e6117ab553aa6cd60a135e",
11 |         "GTEX-1117F-0526.zarr.zip": "sha256:2323b656322d2dcc7e9d18aaf586b39a88bf8f2a3959f642f109eb54268f3732",
12 |         "lung_carcinoma.ndpi": "sha256:3297b0a564f22940208c61caaca56d97ba81c9b6b7816ebc4042a087e557f85e",
13 |         "lung_carcinoma.zarr.zip": "sha256:0a8ccfc608f55624b473c6711b55739c3279d3b6fc5b654395dfc23b010bf866",
14 |     },
15 | )
16 | 
17 | logger = pooch.get_logger()
18 | logger.setLevel("WARNING")
19 | 
20 | 
21 | def _load_dataset(slide_file, zarr_file, with_data=True, pbar=False):
22 |     slide = ENTRY.fetch(slide_file)
23 |     _ = ENTRY.fetch(
24 |         zarr_file,
25 |         progressbar=pbar,
26 |         processor=pooch.Unzip(extract_dir=zarr_file.rstrip(".zip")),
27 |     )
28 |     store = "auto" if with_data else None
29 |     return open_wsi(slide, store=store)
30 | 
31 | 
32 | def sample(with_data: bool = True, pbar: bool = False):
33 |     """
34 |     Load a small sample slide (~1.9 MB).
35 | 
36 |     Source: https://openslide.cs.cmu.edu/download/openslide-testdata/Aperio/CMU-1-Small-Region.svs
37 | 
38 |     Parameters
39 |     ----------
40 |     with_data : bool, default: True
41 |         Whether to load the associated zarr storage data.
42 |     pbar : bool, default: False
43 |         Whether to show the progress bar.
44 | 
45 |     """
46 |     return _load_dataset(
47 |         "sample.svs", "sample.zarr.zip", with_data=with_data, pbar=pbar
48 |     )
49 | 
50 | 
51 | def gtex_artery(with_data: bool = True, pbar: bool = False):
52 |     """
53 |     A GTEX artery slide.
54 | 
55 |     Source: https://gtexportal.org/home/histologyPage, GTEX-1117F-0526
56 | 
57 |     Parameters
58 |     ----------
59 |     with_data : bool, default: True
60 |         Whether to load the associated zarr storage data.
61 |     pbar : bool, default: False
62 |         Whether to show the progress bar.
63 | 
64 |     """
65 |     return _load_dataset(
66 |         "GTEX-1117F-0526.svs",
67 |         "GTEX-1117F-0526.zarr.zip",
68 |         with_data=with_data,
69 |         pbar=pbar,
70 |     )
71 | 
72 | 
73 | def lung_carcinoma(with_data: bool = True, pbar: bool = False):
74 |     """
75 |     A lung carcinoma slide.
76 | 
77 |     Source: https://idr.openmicroscopy.org/webclient/img_detail/9846318/?dataset=10801
78 | 
79 |     Parameters
80 |     ----------
81 |     with_data : bool, default: True
82 |         Whether to load the associated zarr storage data.
83 |     pbar : bool, default: False
84 |         Whether to show the progress bar.
85 | 
86 |     """
87 | 
88 |     return _load_dataset(
89 |         "lung_carcinoma.ndpi", "lung_carcinoma.zarr.zip", with_data=with_data, pbar=pbar
90 |     )
91 | 


--------------------------------------------------------------------------------
/src/lazyslide/io/__init__.py:
--------------------------------------------------------------------------------
1 | from ._annotaiton import load_annotations, export_annotations
2 | 


--------------------------------------------------------------------------------
/src/lazyslide/io/_annotaiton.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import json
  4 | from itertools import cycle
  5 | from pathlib import Path
  6 | from typing import List, Literal, Mapping, Iterable
  7 | 
  8 | import pandas as pd
  9 | from geopandas import GeoDataFrame
 10 | from wsidata import WSIData
 11 | from wsidata.io import update_shapes_data, add_shapes
 12 | 
 13 | from lazyslide._const import Key
 14 | 
 15 | 
 16 | def _in_bounds_transform(wsi: WSIData, annos: GeoDataFrame, reverse: bool = False):
 17 |     from functools import partial
 18 |     from shapely.affinity import translate
 19 | 
 20 |     xoff, yoff, _, _ = wsi.properties.bounds
 21 |     if reverse:
 22 |         xoff, yoff = -xoff, -yoff
 23 |     trans = partial(translate, xoff=xoff, yoff=yoff)
 24 |     annos["geometry"] = annos["geometry"].apply(lambda x: trans(x))
 25 |     return annos
 26 | 
 27 | 
 28 | def load_annotations(
 29 |     wsi: WSIData,
 30 |     annotations: str | Path | GeoDataFrame = None,
 31 |     *,
 32 |     explode: bool = True,
 33 |     in_bounds: bool = False,
 34 |     join_with: str | List[str] = Key.tissue,
 35 |     join_to: str = None,
 36 |     json_flatten: str | List[str] = "classification",
 37 |     min_area: float = 1e2,
 38 |     key_added: str = "annotations",
 39 | ):
 40 |     """Load the annotation file and add it to the WSIData
 41 | 
 42 |     Parameters
 43 |     ----------
 44 |     wsi : :class:`WSIData <wsidata.WSIData>`
 45 |         The WSIData object to work on.
 46 |     annotations : str, Path, GeoDataFrame
 47 |         The path to the annotation file or the GeoDataFrame.
 48 |     explode : bool, default: True
 49 |         Whether to explode the annotations.
 50 |     in_bounds : bool, default: False
 51 |         Whether to move the annotations to the slide bounds.
 52 |     join_with : str, List[str], default: 'tissues'
 53 |         The key to join the annotations with.
 54 |     join_to : str, default: None
 55 |         The key to join the annotations to.
 56 |     json_flatten : str, default: "classification"
 57 |         The column(s) to flatten the json data, if not exist, it will be ignored.
 58 |         "classification" is the default column for the QuPath annotations.
 59 |     min_area : float, default: 1e2
 60 |         The minimum area of the annotation.
 61 |     key_added : str, default: 'annotations'
 62 |         The key to store the annotations.
 63 | 
 64 |     """
 65 |     import geopandas as gpd
 66 | 
 67 |     if isinstance(annotations, (str, Path)):
 68 |         geo_path = Path(annotations)
 69 |         anno_df = gpd.read_file(geo_path)
 70 |     elif isinstance(annotations, GeoDataFrame):
 71 |         anno_df = annotations
 72 |     else:
 73 |         raise ValueError(f"Invalid annotations: {annotations}")
 74 | 
 75 |     # remove crs
 76 |     anno_df.crs = None
 77 | 
 78 |     if explode:
 79 |         anno_df = (
 80 |             anno_df.explode()
 81 |             .assign(**{"__area__": lambda x: x.geometry.area})
 82 |             .query(f"__area__ > {min_area}")
 83 |             .drop(columns=["__area__"], errors="ignore")
 84 |             .reset_index(drop=True)
 85 |         )
 86 | 
 87 |     if json_flatten is not None:
 88 | 
 89 |         def flatten_json(x):
 90 |             if isinstance(x, dict):
 91 |                 return x
 92 |             elif isinstance(x, str):
 93 |                 try:
 94 |                     return json.loads(x)
 95 |                 except json.JSONDecodeError:
 96 |                     return {}
 97 | 
 98 |         if isinstance(json_flatten, str):
 99 |             json_flatten = [json_flatten]
100 |         for col in json_flatten:
101 |             if col in anno_df.columns:
102 |                 anno_df[col] = anno_df[col].apply(flatten_json)
103 |                 anno_df = anno_df.join(
104 |                     anno_df[col].apply(pd.Series).add_prefix(f"{col}_")
105 |                 )
106 |                 anno_df.drop(columns=[col], inplace=True)
107 | 
108 |     if in_bounds:
109 |         anno_df = _in_bounds_transform(wsi, anno_df)
110 | 
111 |     # get tiles
112 |     if isinstance(join_with, str):
113 |         join_with = [join_with]
114 | 
115 |     join_anno_df = anno_df.copy()
116 |     for key in join_with:
117 |         if key in wsi:
118 |             shapes_df = wsi[key]
119 |             # join the annotations with the tiles
120 |             join_anno_df = (
121 |                 gpd.sjoin(shapes_df, join_anno_df, how="right", predicate="intersects")
122 |                 .reset_index(drop=True)
123 |                 .drop(columns=["index_left"])
124 |             )
125 |     add_shapes(wsi, key_added, join_anno_df)
126 | 
127 |     # TODO: still Buggy
128 |     if join_to is not None:
129 |         if join_to in wsi:
130 |             shapes_df = wsi[join_to]
131 |             # join the annotations with the tiles
132 |             shapes_df = (
133 |                 gpd.sjoin(
134 |                     shapes_df[["geometry"]], anno_df, how="left", predicate="intersects"
135 |                 )
136 |                 .reset_index(drop=True)
137 |                 .drop(columns=["index_right"], errors="ignore")
138 |             )
139 |             update_shapes_data(wsi, join_to, shapes_df)
140 | 
141 | 
142 | def export_annotations(
143 |     wsi: WSIData,
144 |     key: str,
145 |     *,
146 |     in_bounds: bool = False,
147 |     classes: str = None,
148 |     colors: str | Mapping = None,
149 |     format: Literal["qupath"] = "qupath",
150 |     file: str | Path = None,
151 | ):
152 |     """
153 |     Export the annotations
154 | 
155 |     Parameters
156 |     ----------
157 |     wsi : :class:`WSIData <wsidata.WSIData>`
158 |         The WSIData object to work on.
159 |     key : str
160 |         The key to export.
161 |     in_bounds : bool, default: False
162 |         Whether to move the annotations to the slide bounds.
163 |     classes : str, default: None
164 |         The column to use for the classification.
165 |         If None, the classification will be ignored.
166 |     colors : str, Mapping, default: None
167 |         The column to use for the color.
168 |         If None, the color will be ignored.
169 |     format : str, default: 'qupath'
170 |         The format to export.
171 |         Currently only 'qupath' is supported.
172 |     file : str, Path, default: None
173 |         The file to save the annotations.
174 |         If None, the annotations will not be saved.
175 | 
176 | 
177 |     """
178 |     gdf = wsi.shapes[key].copy()
179 |     if in_bounds:
180 |         gdf = _in_bounds_transform(wsi, gdf, reverse=True)
181 | 
182 |     if format == "qupath":
183 |         # Prepare classification column
184 |         import json
185 | 
186 |         if classes is not None:
187 |             class_values = gdf[classes]
188 | 
189 |             if colors is None:
190 |                 # Assign default colors
191 |                 colors = cycle(
192 |                     [
193 |                         "#1B9E77",  # Teal Green
194 |                         "#D95F02",  # Burnt Orange
195 |                         "#7570B3",  # Deep Lavender
196 |                         "#E7298A",  # Magenta
197 |                         "#66A61E",  # Olive Green
198 |                         "#E6AB02",  # Goldenrod
199 |                         "#A6761D",  # Earthy Brown
200 |                         "#666666",  # Charcoal Gray
201 |                         "#1F78B4",  # Cool Blue
202 |                     ]
203 |                 )
204 | 
205 |             if colors is not None:
206 |                 color_values = cycle([])
207 |                 if isinstance(colors, str):
208 |                     color_values = gdf[colors]
209 |                 elif isinstance(colors, Iterable):
210 |                     # if sequence of colors, map to class values
211 |                     colors = dict(zip(pd.unique(class_values), colors))
212 |                 else:
213 |                     raise ValueError(f"Invalid colors: {colors}")
214 | 
215 |                 if isinstance(colors, Mapping):
216 |                     color_values = map(lambda x: colors.get(x, None), gdf[classes])
217 | 
218 |                 # covert color to rgb array
219 |                 from matplotlib.colors import to_rgb
220 | 
221 |                 color_values = map(
222 |                     lambda x: tuple(int(255 * c) for c in to_rgb(x))
223 |                     if x is not None
224 |                     else None,
225 |                     color_values,
226 |                 )
227 | 
228 |             classifications = []
229 |             for class_value, color_value in zip(class_values, color_values):
230 |                 json_string = json.dumps({"name": class_value, "color": color_value})
231 |                 classifications.append(json_string)
232 |             gdf["classification"] = classifications
233 | 
234 |     if file is not None:
235 |         gdf.to_file(file)
236 | 
237 |     return gdf
238 | 


--------------------------------------------------------------------------------
/src/lazyslide/metrics.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from anndata import AnnData
 3 | 
 4 | 
 5 | def topk_score(
 6 |     matrix: np.ndarray | AnnData,
 7 |     k: int = 5,
 8 |     agg_method: str = "max",
 9 | ) -> np.ndarray:
10 |     """
11 |     Get the top k score from a feature x class matrix.
12 | 
13 |     Parameters
14 |     ----------
15 |     matrix : np.ndarray | AnnData
16 |         The input matrix. Feature x class.
17 |     k : int, default: 5
18 |         The number of top scores to return.
19 |     agg_method : str, default: "max"
20 |         The method to use for aggregation.
21 |         Can be "max", "mean", "median" or "sum".
22 | 
23 |     Returns
24 |     -------
25 |     np.ndarray
26 |         The top k scores.
27 | 
28 |     """
29 |     if isinstance(matrix, AnnData):
30 |         matrix = matrix.X
31 | 
32 |     top_k_score = np.sort(matrix, axis=0)[-k:]
33 |     score = getattr(np, agg_method)(top_k_score, axis=0)
34 |     return score
35 | 


--------------------------------------------------------------------------------
/src/lazyslide/models/__init__.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, Type
 2 | 
 3 | from . import multimodal
 4 | from . import segmentation
 5 | from . import vision
 6 | from .base import (
 7 |     ModelBase,
 8 |     ImageModel,
 9 |     ImageTextModel,
10 |     SegmentationModel,
11 |     SlideEncoderModel,
12 |     TimmModel,
13 | )
14 | 
15 | from ._model_registry import MODEL_REGISTRY, list_models
16 | 


--------------------------------------------------------------------------------
/src/lazyslide/models/_model_registry.py:
--------------------------------------------------------------------------------
  1 | from dataclasses import dataclass
  2 | from enum import Enum
  3 | from pathlib import Path
  4 | from typing import Type, List
  5 | 
  6 | import pandas as pd
  7 | 
  8 | from . import ModelBase
  9 | from . import multimodal
 10 | from . import segmentation
 11 | from . import vision
 12 | 
 13 | 
 14 | class ModelTask(Enum):
 15 |     vision = "vision"
 16 |     segmentation = "segmentation"
 17 |     multimodal = "multimodal"
 18 | 
 19 | 
 20 | @dataclass
 21 | class ModelCard:
 22 |     name: str
 23 |     model_type: ModelTask
 24 |     module: Type[ModelBase]
 25 |     github_url: str = None
 26 |     hf_url: str = None
 27 |     paper_url: str = None
 28 |     description: str = None
 29 |     keys: List[str] = None
 30 | 
 31 |     def __post_init__(self):
 32 |         try:
 33 |             inject_doc = str(self)
 34 |             origin_doc = self.module.__doc__
 35 |             if origin_doc is None:
 36 |                 origin_doc = ""
 37 |             else:
 38 |                 origin_doc = f"\n\n{origin_doc}"
 39 |             self.module.__doc__ = f"{inject_doc}{origin_doc}"
 40 |         except AttributeError:
 41 |             # If the module does not have a __doc__ attribute, skip the injection
 42 |             pass
 43 | 
 44 |         if self.keys is None:
 45 |             self.keys = [self.name.lower()]
 46 | 
 47 |     def __str__(self):
 48 |         skeleton = ""
 49 |         if self.github_url is not None:
 50 |             skeleton += f":octicon:`mark-github;1em;` `GitHub <{self.github_url}>`__ \\"
 51 |         if self.hf_url is not None:
 52 |             skeleton += f"🤗 `Hugging Face <{self.hf_url}>`__ \\"
 53 |         if self.paper_url is not None:
 54 |             skeleton += f" :octicon:`book;1em;` `Paper <{self.paper_url}>`__"
 55 |         if self.description is not None:
 56 |             skeleton += f"\n| {self.description}"
 57 | 
 58 |         return skeleton
 59 | 
 60 | 
 61 | MODEL_REGISTRY = {}
 62 | 
 63 | MODEL_DB = pd.read_csv(f"{Path(__file__).parent}/model_registry.csv")
 64 | _modules = {
 65 |     ModelTask.vision: vision,
 66 |     ModelTask.segmentation: segmentation,
 67 |     ModelTask.multimodal: multimodal,
 68 | }
 69 | 
 70 | for _, row in MODEL_DB.iterrows():
 71 |     model_type = ModelTask(row["model_type"])
 72 |     card = ModelCard(
 73 |         name=row["name"],
 74 |         model_type=model_type,
 75 |         module=getattr(_modules[model_type], row["module"]),
 76 |         github_url=None if pd.isna(row["github_url"]) else row["github_url"],
 77 |         hf_url=None if pd.isna(row["hf_url"]) else row["hf_url"],
 78 |         paper_url=None if pd.isna(row["paper_url"]) else row["paper_url"],
 79 |         description=None if pd.isna(row["description"]) else row["description"],
 80 |     )
 81 |     keys = [i.strip() for i in row["keys"].split(",")] if row["keys"] else []
 82 |     for key in keys:
 83 |         MODEL_REGISTRY[key] = card
 84 | 
 85 | 
 86 | def list_models(task: ModelTask = None):
 87 |     """List all available models.
 88 | 
 89 |     If you want to get models for feature extraction,
 90 |     you can use task='vision' or task='multimodal'.
 91 | 
 92 |     Parameters
 93 |     ----------
 94 |     task : {'vision', 'segmentation', 'multimodal'}, default: None
 95 |         The task to filter the models. If None, return all models.
 96 | 
 97 |     Returns
 98 |     -------
 99 |     list
100 |         A list of model names.
101 | 
102 |     """
103 |     if task is None:
104 |         return list(MODEL_REGISTRY.keys())
105 |     if task is not None:
106 |         task = ModelTask(task)
107 |         if task in ModelTask:
108 |             return [
109 |                 name
110 |                 for name, model in MODEL_REGISTRY.items()
111 |                 if model.model_type == task
112 |             ]
113 |         else:
114 |             raise ValueError(
115 |                 f"Unknown task: {task}. "
116 |                 "Available tasks are: vision, segmentation, multimodal."
117 |             )
118 | 


--------------------------------------------------------------------------------
/src/lazyslide/models/_utils.py:
--------------------------------------------------------------------------------
 1 | from contextlib import contextmanager
 2 | 
 3 | import torch
 4 | 
 5 | 
 6 | def _fake_class(name, deps, inject=""):
 7 |     def init(self, *args, **kwargs):
 8 |         raise ImportError(
 9 |             f"To use {name}, you need to install {', '.join(deps)}."
10 |             f"{inject}"
11 |             "Please restart the kernel after installation."
12 |         )
13 | 
14 |     # Dynamically create the class
15 |     new_class = type(name, (object,), {"__init__": init})
16 | 
17 |     return new_class
18 | 
19 | 
20 | @contextmanager
21 | def hf_access(name):
22 |     """
23 |     Context manager for Hugging Face access.
24 |     """
25 |     from huggingface_hub.errors import GatedRepoError
26 | 
27 |     try:
28 |         yield
29 |     except GatedRepoError as e:
30 |         raise GatedRepoError(
31 |             f"You don't have access to {name}. Please request access to the model on HuggingFace. "
32 |             "After access granted, please login to HuggingFace with huggingface-cli on this machine "
33 |             "with a token that has access to this model. "
34 |             "You may also pass token as an argument in LazySlide, however, this is not recommended."
35 |         ) from e
36 | 
37 | 
38 | def get_default_transform():
39 |     """The default transform for the model."""
40 |     from torchvision.transforms import InterpolationMode
41 |     from torchvision.transforms.v2 import (
42 |         Compose,
43 |         Normalize,
44 |         CenterCrop,
45 |         ToImage,
46 |         ToDtype,
47 |         Resize,
48 |     )
49 | 
50 |     transforms = [
51 |         ToImage(),
52 |         Resize(
53 |             size=(224, 224),
54 |             interpolation=InterpolationMode.BICUBIC,
55 |             max_size=None,
56 |             antialias=True,
57 |         ),
58 |         CenterCrop(224),
59 |         ToDtype(dtype=torch.float32, scale=True),
60 |         Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
61 |     ]
62 |     return Compose(transforms)
63 | 


--------------------------------------------------------------------------------
/src/lazyslide/models/base.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | from pathlib import Path
  4 | from typing import Callable
  5 | 
  6 | import numpy as np
  7 | import torch
  8 | 
  9 | from lazyslide.models._utils import hf_access, get_default_transform
 10 | 
 11 | 
 12 | class ModelBase:
 13 |     model: torch.nn.Module
 14 |     name: str = "ModelBase"
 15 |     is_restricted: bool = False
 16 | 
 17 |     def get_transform(self):
 18 |         return None
 19 | 
 20 |     def to(self, device):
 21 |         self.model.to(device)
 22 |         return self
 23 | 
 24 |     @staticmethod
 25 |     def load_weights(url, progress=True):
 26 |         from timm.models.hub import download_cached_file
 27 | 
 28 |         return Path(download_cached_file(url, progress=progress))
 29 | 
 30 | 
 31 | class ImageModel(ModelBase):
 32 |     # TODO: Add a config that specify the recommended input tile size and mpp
 33 | 
 34 |     def get_transform(self):
 35 |         import torch
 36 |         from torchvision.transforms.v2 import (
 37 |             Compose,
 38 |             ToImage,
 39 |             ToDtype,
 40 |             Resize,
 41 |             Normalize,
 42 |         )
 43 | 
 44 |         return Compose(
 45 |             [
 46 |                 ToImage(),
 47 |                 ToDtype(dtype=torch.float32, scale=True),
 48 |                 Resize(size=(224, 224), antialias=False),
 49 |                 Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
 50 |             ]
 51 |         )
 52 | 
 53 |     def encode_image(self, image) -> np.ndarray[np.float32]:
 54 |         raise NotImplementedError
 55 | 
 56 |     def __call__(self, image):
 57 |         return self.encode_image(image)
 58 | 
 59 | 
 60 | class TimmModel(ImageModel):
 61 |     def __init__(self, name, token=None, compile=False, compile_kws=None, **kwargs):
 62 |         import timm
 63 |         from huggingface_hub import login
 64 | 
 65 |         if token is not None:
 66 |             login(token)
 67 | 
 68 |         default_kws = {"pretrained": True, "num_classes": 0}
 69 |         default_kws.update(kwargs)
 70 | 
 71 |         with hf_access(name):
 72 |             self.model = timm.create_model(name, **default_kws)
 73 | 
 74 |         if compile:
 75 |             if compile_kws is None:
 76 |                 compile_kws = {}
 77 |             self.compiled_model = torch.compile(self.model, **compile_kws)
 78 | 
 79 |     def get_transform(self):
 80 |         return get_default_transform()
 81 | 
 82 |     @torch.inference_mode()
 83 |     def encode_image(self, image):
 84 |         with torch.inference_mode():
 85 |             return self.model(image).cpu().detach().numpy()
 86 | 
 87 | 
 88 | class SlideEncoderModel(ModelBase):
 89 |     def encode_slide(self, embeddings, coords=None):
 90 |         raise NotImplementedError
 91 | 
 92 | 
 93 | class ImageTextModel(ImageModel):
 94 |     def encode_image(self, image):
 95 |         """This should return the image feature before normalize."""
 96 |         raise NotImplementedError
 97 | 
 98 |     def encode_text(self, text):
 99 |         raise NotImplementedError
100 | 
101 |     def tokenize(self, text):
102 |         raise NotImplementedError
103 | 
104 | 
105 | class SegmentationModel(ModelBase):
106 |     CLASS_MAPPING = None
107 | 
108 |     def get_transform(self):
109 |         import torch
110 |         from torchvision.transforms.v2 import Compose, ToImage, ToDtype, Normalize
111 | 
112 |         return Compose(
113 |             [
114 |                 ToImage(),
115 |                 ToDtype(dtype=torch.float32, scale=True),
116 |                 Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
117 |             ]
118 |         )
119 | 
120 |     def segment(self, image):
121 |         raise NotImplementedError
122 | 
123 |     def get_postprocess(self) -> Callable | None:
124 |         return None
125 | 


--------------------------------------------------------------------------------
/src/lazyslide/models/model_registry.csv:
--------------------------------------------------------------------------------
 1 | name,keys,model_type,module,github_url,hf_url,paper_url,description
 2 | CONCH,conch,multimodal,CONCH,https://github.com/mahmoodlab/CONCH,https://huggingface.co/MahmoodLab/conch,https://doi.org/10.1038/s41591-024-02856-4,Multimodal foundation model
 3 | PLIP,plip,multimodal,PLIP,https://github.com/PathologyFoundation/plip,https://huggingface.co/vinid/plip,https://doi.org/10.1038/s41591-023-02504-3,Multimodal foundation model
 4 | Prism,prism,multimodal,Prism,https://github.com/mahmoodlab/PRISM,https://huggingface.co/paige-ai/Prism,https://doi.org/10.48550/arXiv.2405.10254,Slide-Level multimodal generative model
 5 | Titan,"titan, conch_v1.5",multimodal,Titan,https://github.com/mahmoodlab/TITAN,https://huggingface.co/MahmoodLab/TITAN,https://doi.org/10.48550/arXiv.2411.19666,Multimodal foundation model
 6 | Uni,uni,vision,UNI,https://github.com/mahmoodlab/UNI,https://huggingface.co/MahmoodLab/UNI,https://doi.org/10.1038/s41591-024-02857-3,Vision foundation model
 7 | Uni2,uni2,vision,UNI2,https://github.com/mahmoodlab/UNI,https://huggingface.co/MahmoodLab/UNI2-h,https://doi.org/10.1038/s41591-024-02857-3,Vision foundation model
 8 | GigaPath,gigapath,vision,GigaPath,https://github.com/prov-gigapath/prov-gigapath,https://huggingface.co/prov-gigapath/prov-gigapath,https://doi.org/10.1038/s41586-024-07441-w,Vision foundation model
 9 | Virchow,virchow,vision,Virchow,,https://huggingface.co/paige-ai/Virchow,https://doi.org/10.1038/s41591-024-03141-0,Vision foundation model
10 | Virchow2,virchow2,vision,Virchow2,,https://huggingface.co/paige-ai/Virchow2,https://doi.org/10.48550/arXiv.2408.00738,Vision foundation model
11 | Phikon,phikon,vision,Phikon,https://github.com/owkin/HistoSSLscaling/,https://huggingface.co/owkin/phikon,https://doi.org/10.1101/2023.07.21.23292757,Vision foundation model
12 | PhikonV2,phikonv2,vision,PhikonV2,https://github.com/owkin,https://huggingface.co/owkin/phikon-v2,https://doi.org/10.48550/arXiv.2409.09173,Vision foundation model
13 | H-optimus-0,h-optimus-0,vision,HOptimus0,https://github.com/bioptimus,https://huggingface.co/bioptimus/H-optimus-0,,Vision foundation model
14 | H-optimus-1,h-optimus-1,vision,HOptimus1,https://github.com/bioptimus,https://huggingface.co/bioptimus/H-optimus-1,,Vision foundation model
15 | H0-mini,h0-mini,vision,H0Mini,https://github.com/bioptimus,https://huggingface.co/bioptimus/H0-mini,https://doi.org/10.48550/arXiv.2501.16239,Vision foundation model
16 | CONCHVision,conch_vision,vision,CONCHVision,https://github.com/mahmoodlab/CONCH,https://huggingface.co/MahmoodLab/conch,https://doi.org/10.1038/s41591-024-02856-4,Multimodal foundation model
17 | PLIPVision,plip_vision,vision,PLIPVision,https://github.com/PathologyFoundation/plip,https://huggingface.co/vinid/plip,https://doi.org/10.1038/s41591-023-02504-3,Multimodal foundation model
18 | NuLite,nulite,segmentation,NuLite,https://github.com/CosmoIknosLab/NuLite,,https://doi.org/10.48550/arXiv.2408.01797,Cell segmentation and classification
19 | InstanSeg,instanseg,segmentation,Instanseg,https://github.com/instanseg/instanseg,,https://doi.org/10.48550/arXiv.2408.15954,Cell segmentation
20 | GrandQC-Tissue,grandqc-tissue,segmentation,GrandQCTissue,https://github.com/cpath-ukk/grandqc,,https://doi.org/10.1038/s41467-024-54769-y,Tissue segmentation
21 | GrandQC-Artifact,grandqc-artifact,segmentation,GrandQCArtifact,https://github.com/cpath-ukk/grandqc,,https://doi.org/10.1038/s41467-024-54769-y,Artifact segmentation
22 | Midnight,midnight,vision,Midnight,https://github.com/kaiko-ai/midnight,https://huggingface.co/kaiko-ai/midnight,https://doi.org/10.48550/arXiv.2504.05186,Vision foundation model
23 | HibouB,hibou-b,vision,HibouB,https://github.com/HistAI/hibou/tree/main,https://huggingface.co/histai/hibou-b,https://doi.org/10.48550/arXiv.2406.05074,Foundation Vision Transformer
24 | HibouL,hibou-l,vision,HibouL,https://github.com/HistAI/hibou/tree/main,https://huggingface.co/histai/hibou-l,https://doi.org/10.48550/arXiv.2406.05074,Foundation Vision Transformer


--------------------------------------------------------------------------------
/src/lazyslide/models/multimodal/__init__.py:
--------------------------------------------------------------------------------
1 | from .conch import CONCH
2 | from .plip import PLIP
3 | from .titan import Titan
4 | from .prism import Prism
5 | 


--------------------------------------------------------------------------------
/src/lazyslide/models/multimodal/conch.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from .._utils import hf_access
 4 | from ..base import ImageTextModel
 5 | 
 6 | 
 7 | class CONCH(ImageTextModel):
 8 |     def __init__(self, model_path=None, token=None):
 9 |         try:
10 |             from conch.open_clip_custom import create_model_from_pretrained
11 |             from conch.open_clip_custom import get_tokenizer
12 |         except ImportError:
13 |             raise ImportError(
14 |                 "Conch is not installed. You can install it using "
15 |                 "`pip install git+https://github.com/mahmoodlab/CONCH.git`."
16 |             )
17 | 
18 |         if model_path is None:
19 |             model_path = "hf_hub:MahmoodLab/conch"
20 | 
21 |         with hf_access(model_path):
22 |             self.model, self.processor = create_model_from_pretrained(
23 |                 "conch_ViT-B-16", model_path, hf_auth_token=token
24 |             )
25 |             self.tokenizer = get_tokenizer()
26 | 
27 |     @torch.inference_mode()
28 |     def encode_image(self, image):
29 |         if not isinstance(image, torch.Tensor):
30 |             image = self.processor(image)
31 |         if image.dim() == 3:
32 |             image = image.unsqueeze(0)
33 | 
34 |         image_feature = self.model.encode_image(
35 |             image, normalize=True, proj_contrast=True
36 |         )
37 |         return image_feature
38 | 
39 |     def tokenize(self, text):
40 |         from conch.open_clip_custom import tokenize
41 | 
42 |         return tokenize(self.tokenizer, text)
43 | 
44 |     @torch.inference_mode()
45 |     def encode_text(self, text):
46 |         encode_texts = self.tokenize(text)
47 |         text_feature = self.model.encode_text(encode_texts)
48 |         return text_feature
49 | 


--------------------------------------------------------------------------------
/src/lazyslide/models/multimodal/plip.py:
--------------------------------------------------------------------------------
 1 | # Modified from https://github.com/PathologyFoundation/plip/blob/main/plip.py
 2 | 
 3 | import torch
 4 | 
 5 | from .._utils import hf_access
 6 | from ..base import ImageTextModel
 7 | 
 8 | 
 9 | class PLIP(ImageTextModel):
10 |     def __init__(self, model_path=None, token=None):
11 |         try:
12 |             from transformers import CLIPModel, CLIPProcessor
13 |         except ImportError:
14 |             raise ImportError(
15 |                 "Please install the 'transformers' package to use the PLIP model"
16 |             )
17 | 
18 |         if model_path is None:
19 |             model_path = "vinid/plip"
20 | 
21 |         with hf_access(model_path):
22 |             self.model = CLIPModel.from_pretrained(model_path, use_auth_token=token)
23 |             self.processor = CLIPProcessor.from_pretrained(
24 |                 model_path, use_auth_token=token
25 |             )
26 | 
27 |     def get_transform(self):
28 |         return None
29 | 
30 |     @torch.inference_mode()
31 |     def encode_image(self, image):
32 |         inputs = self.processor(images=image, return_tensors="pt")
33 |         inputs = {k: v.to(self.model.device) for k, v in inputs.items()}
34 |         image_features = self.model.get_image_features(**inputs)
35 |         image_features = torch.nn.functional.normalize(image_features, p=2, dim=-1)
36 |         return image_features
37 | 
38 |     @torch.inference_mode()
39 |     def encode_text(self, text):
40 |         inputs = self.processor(
41 |             text=text,
42 |             return_tensors="pt",
43 |             max_length=77,
44 |             padding="max_length",
45 |             truncation=True,
46 |         )
47 |         inputs = {k: v.to(self.model.device) for k, v in inputs.items()}
48 |         text_features = self.model.get_text_features(**inputs)
49 |         return text_features
50 | 


--------------------------------------------------------------------------------
/src/lazyslide/models/multimodal/prism.py:
--------------------------------------------------------------------------------
 1 | import warnings
 2 | 
 3 | import torch
 4 | 
 5 | from .._utils import hf_access
 6 | from ..base import ModelBase
 7 | 
 8 | 
 9 | class Prism(ModelBase):
10 |     def __init__(self, model_path=None, token=None):
11 |         from transformers import AutoModel
12 | 
13 |         # Suppress warnings from transformers
14 |         with warnings.catch_warnings(), hf_access(model_path):
15 |             warnings.simplefilter("ignore")
16 | 
17 |             self.model = AutoModel.from_pretrained(
18 |                 "paige-ai/Prism",
19 |                 trust_remote_code=True,
20 |                 token=token,
21 |             )
22 | 
23 |     @torch.inference_mode()
24 |     def encode_slide(self, embeddings, coords=None) -> dict:
25 |         # Make sure the embeddings has a batch dimension
26 |         if len(embeddings.shape) == 2:
27 |             embeddings = embeddings.unsqueeze(0)
28 |         return self.model.slide_representations(embeddings)
29 | 
30 |     @torch.inference_mode()
31 |     def score(
32 |         self,
33 |         slide_embedding,
34 |         prompts: list[list[str]],
35 |     ):
36 |         if len(prompts):
37 |             pass
38 | 
39 |         device = self.model.device
40 | 
41 |         # Flatten all prompts and track indices for class reconstruction
42 |         flat_prompts = []
43 |         group_lengths = []
44 |         for group in prompts:
45 |             flat_prompts.extend(group)
46 |             group_lengths.append(len(group))
47 | 
48 |         token_ids = self.model.tokenize(flat_prompts)[:, :-1].to(device)
49 | 
50 |         dummy_image_latents = torch.empty(
51 |             (len(flat_prompts), 1, self.model.text_decoder.context_dim), device=device
52 |         )
53 |         decoder_out = self.model.text_decoder(token_ids, dummy_image_latents)
54 | 
55 |         text_proj = self.model.text_to_latents(decoder_out["text_embedding"])
56 |         image_proj = self.model.img_to_latents(slide_embedding)
57 | 
58 |         sim = torch.einsum("i d, j d -> i j", image_proj, text_proj)  # (image, prompt)
59 |         sim = sim * self.model.temperature.exp()
60 |         zero_shot_probs = torch.softmax(
61 |             sim.to(torch.float), dim=-1
62 |         )  # (Bi, total_prompts)
63 | 
64 |         # Sum probabilities per group (class)
65 |         class_probs = []
66 |         start = 0
67 |         for length in group_lengths:
68 |             end = start + length
69 |             class_probs.append(zero_shot_probs[:, start:end].sum(dim=-1, keepdim=True))
70 |             start = end
71 | 
72 |         probs = torch.cat(class_probs, dim=-1)
73 |         return probs.detach().cpu().numpy()
74 | 
75 |     @torch.inference_mode()
76 |     def caption(
77 |         self,
78 |         img_latents,
79 |         prompt: list[str],
80 |         max_length: int = 100,
81 |     ):
82 |         genned_ids = self.model.generate(
83 |             self.model.tokenize(prompt).to(self.model.device),
84 |             key_value_states=img_latents,
85 |             do_sample=False,
86 |             num_beams=5,
87 |             num_beam_groups=1,
88 |             max_length=max_length,
89 |         )
90 |         genned_caption = self.model.untokenize(genned_ids)
91 | 
92 |         return genned_caption
93 | 


--------------------------------------------------------------------------------
/src/lazyslide/models/multimodal/titan.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from .._utils import hf_access
 4 | from ..base import ImageModel
 5 | 
 6 | 
 7 | class Titan(ImageModel):
 8 |     name = "titan"
 9 | 
10 |     TEMPLATES = [
11 |         "CLASSNAME.",
12 |         "an image of CLASSNAME.",
13 |         "the image shows CLASSNAME.",
14 |         "the image displays CLASSNAME.",
15 |         "the image exhibits CLASSNAME.",
16 |         "an example of CLASSNAME.",
17 |         "CLASSNAME is shown.",
18 |         "this is CLASSNAME.",
19 |         "I observe CLASSNAME.",
20 |         "the pathology image shows CLASSNAME.",
21 |         "a pathology image shows CLASSNAME.",
22 |         "the pathology slide shows CLASSNAME.",
23 |         "shows CLASSNAME.",
24 |         "contains CLASSNAME.",
25 |         "presence of CLASSNAME.",
26 |         "CLASSNAME is present.",
27 |         "CLASSNAME is observed.",
28 |         "the pathology image reveals CLASSNAME.",
29 |         "a microscopic image of showing CLASSNAME.",
30 |         "histology shows CLASSNAME.",
31 |         "CLASSNAME can be seen.",
32 |         "the tissue shows CLASSNAME.",
33 |         "CLASSNAME is identified.",
34 |     ]
35 | 
36 |     def __init__(self, model_path=None, token=None):
37 |         from transformers import AutoModel
38 | 
39 |         with hf_access(model_path):
40 |             self.model = AutoModel.from_pretrained(
41 |                 "MahmoodLab/TITAN",
42 |                 add_pooling_layer=False,
43 |                 use_auth_token=token,
44 |                 trust_remote_code=True,
45 |             )
46 |             self.conch, self.conch_transform = self.model.return_conch()
47 | 
48 |     def to(self, device):
49 |         super().to(device)
50 |         self.conch.to(device)
51 | 
52 |     def get_transform(self):
53 |         from torchvision.transforms import InterpolationMode
54 |         from torchvision.transforms.v2 import (
55 |             Resize,
56 |             CenterCrop,
57 |             ToImage,
58 |             ToDtype,
59 |             Normalize,
60 |             Compose,
61 |         )
62 | 
63 |         return Compose(
64 |             [
65 |                 ToImage(),
66 |                 Resize(448, interpolation=InterpolationMode.BICUBIC, antialias=True),
67 |                 CenterCrop(448),
68 |                 ToDtype(dtype=torch.float32, scale=True),
69 |                 Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
70 |             ]
71 |         )
72 | 
73 |     @torch.inference_mode()
74 |     def encode_image(self, image):
75 |         image_feature = self.conch(image)
76 |         return image_feature.detach().cpu().numpy()
77 | 
78 |     @torch.inference_mode()
79 |     def encode_slide(self, embeddings, coords=None, base_tile_size=None, **kwargs):
80 |         slide_embeddings = self.model.encode_slide_from_patch_features(
81 |             embeddings, coords, base_tile_size
82 |         )
83 |         return slide_embeddings.detach().cpu().numpy()
84 | 
85 |     @torch.inference_mode()
86 |     def score(
87 |         self, slide_embeddings, prompts: list[str], template: str = None, **kwargs
88 |     ):
89 |         if template is None:
90 |             template = self.TEMPLATES
91 | 
92 |         classifier = self.model.zero_shot_classifier(prompts, template)
93 |         scores = self.model.zero_shot(slide_embeddings, classifier)
94 |         return scores.squeeze(0).detach().cpu().numpy()
95 | 


--------------------------------------------------------------------------------
/src/lazyslide/models/segmentation/__init__.py:
--------------------------------------------------------------------------------
 1 | from .instanseg import Instanseg
 2 | from .nulite import NuLite
 3 | from .grandqc import GrandQCTissue, GrandQCArtifact
 4 | from .postprocess import (
 5 |     instanseg_postprocess,
 6 |     semanticseg_postprocess,
 7 | )
 8 | from .smp import SMPBase
 9 | from .sam import SAM
10 | 


--------------------------------------------------------------------------------
/src/lazyslide/models/segmentation/cellpose.py:
--------------------------------------------------------------------------------
 1 | from lazyslide.models.base import SegmentationModel
 2 | 
 3 | 
 4 | class Cellpose(SegmentationModel):
 5 |     def __init__(self, model_type="nuclei"):
 6 |         from cellpose import models
 7 | 
 8 |         self.cellpose_model = models.Cellpose(model_type=model_type, gpu=False)
 9 | 
10 |     def to(self, device):
11 |         self.cellpose_model.device = device
12 | 
13 |     def get_transform(self):
14 |         return None
15 | 
16 |     def segment(self, image):
17 |         masks, flows, styles = self.cellpose_model.eval(
18 |             image, diameter=30, channels=[0, 0]
19 |         )
20 | 


--------------------------------------------------------------------------------
/src/lazyslide/models/segmentation/grandqc.py:
--------------------------------------------------------------------------------
 1 | from typing import Literal
 2 | 
 3 | import torch
 4 | from lazyslide.models.base import SegmentationModel
 5 | from lazyslide.models.segmentation.postprocess import semanticseg_postprocess
 6 | from lazyslide.models.segmentation.smp import SMPBase
 7 | 
 8 | 
 9 | class GrandQCArtifact(SegmentationModel):
10 |     CLASS_MAPPING = {
11 |         0: "Background",
12 |         1: "Normal Tissue",
13 |         2: "Fold",
14 |         3: "Darkspot & Foreign Object",
15 |         4: "PenMarking",
16 |         5: "Edge & Air Bubble",
17 |         6: "Out of Focus",
18 |         7: "Background",
19 |     }
20 | 
21 |     def __init__(self, model: Literal["5x", "7x", "10x"] = "7x"):
22 |         from huggingface_hub import hf_hub_download
23 | 
24 |         weights_map = {
25 |             "5x": "GrandQC_MPP2_traced.pt",
26 |             "7x": "GrandQC_MPP15_traced.pt",
27 |             "10x": "GrandQC_MPP1_traced.pt",
28 |         }
29 |         weights = hf_hub_download(
30 |             "RendeiroLab/LazySlide-models", f"grandqc/{weights_map[model]}"
31 |         )
32 | 
33 |         self.model = torch.jit.load(weights)
34 | 
35 |     def get_transform(self):
36 |         import torch
37 |         from torchvision.transforms.v2 import (
38 |             Compose,
39 |             ToImage,
40 |             ToDtype,
41 |             Normalize,
42 |         )
43 | 
44 |         return Compose(
45 |             [
46 |                 ToImage(),
47 |                 ToDtype(dtype=torch.float32, scale=True),
48 |                 Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
49 |             ]
50 |         )
51 | 
52 |     @torch.inference_mode()
53 |     def segment(self, image):
54 |         out = self.model(image)
55 |         return out.detach().cpu().numpy()
56 | 
57 |     def get_postprocess(self):
58 |         return semanticseg_postprocess
59 | 
60 | 
61 | class GrandQCTissue(SMPBase):
62 |     CLASS_MAPPING = {
63 |         0: "Background",
64 |         1: "Tissue",
65 |     }
66 | 
67 |     def __init__(self):
68 |         from huggingface_hub import hf_hub_download
69 | 
70 |         weights = hf_hub_download(
71 |             "RendeiroLab/LazySlide-models", "grandqc/Tissue_Detection_MPP10.pth"
72 |         )
73 | 
74 |         super().__init__(
75 |             arch="unetplusplus",
76 |             encoder_name="timm-efficientnet-b0",
77 |             encoder_weights="imagenet",
78 |             in_channels=3,
79 |             classes=2,
80 |             activation=None,
81 |         )
82 |         self.model.load_state_dict(
83 |             torch.load(weights, map_location=torch.device("cpu"), weights_only=True)
84 |         )
85 |         self.model.eval()
86 | 
87 |     @torch.inference_mode()
88 |     def segment(self, image):
89 |         return self.model.predict(image)
90 | 


--------------------------------------------------------------------------------
/src/lazyslide/models/segmentation/instanseg.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from typing import Callable
 4 | 
 5 | import numpy as np
 6 | import torch
 7 | 
 8 | from lazyslide.models.base import SegmentationModel
 9 | from .postprocess import instanseg_postprocess
10 | 
11 | 
12 | class PercentileNormalize:
13 |     def __call__(self, image: torch.Tensor) -> torch.Tensor:
14 |         # image shape should be [C, H, W]
15 |         for c in range(image.shape[0]):
16 |             channel = image[c]
17 |             min_i = torch.quantile(channel.flatten(), 0.001)
18 |             max_i = torch.quantile(channel.flatten(), 0.999)
19 |             image[c] = (channel - min_i) / max(1e-3, max_i - min_i)
20 |         return image
21 | 
22 |     def __repr__(self):
23 |         return self.__class__.__name__ + "()"
24 | 
25 | 
26 | class Instanseg(SegmentationModel):
27 |     """Apply the InstaSeg model to the input image."""
28 | 
29 |     _base_mpp = 0.5
30 | 
31 |     def __init__(self, model_file=None):
32 |         from huggingface_hub import hf_hub_download
33 | 
34 |         model_file = hf_hub_download(
35 |             "RendeiroLab/LazySlide-models", "instanseg/instanseg_v0_1_0.pt"
36 |         )
37 | 
38 |         self.model = torch.jit.load(model_file, map_location="cpu")
39 | 
40 |     def get_transform(self):
41 |         from torchvision.transforms.v2 import ToImage, ToDtype, Compose
42 | 
43 |         return Compose(
44 |             [
45 |                 ToImage(),  # Converts numpy or PIL to torch.Tensor in [C, H, W] format
46 |                 ToDtype(dtype=torch.float32, scale=False),
47 |                 PercentileNormalize(),
48 |             ]
49 |         )
50 | 
51 |     @torch.inference_mode()
52 |     def segment(self, image):
53 |         # with torch.inference_mode():
54 |         out = self.model(image)
55 |         return out.squeeze().cpu().numpy().astype(np.uint16)
56 | 
57 |     def get_postprocess(self) -> Callable | None:
58 |         return instanseg_postprocess
59 | 


--------------------------------------------------------------------------------
/src/lazyslide/models/segmentation/nulite/__init__.py:
--------------------------------------------------------------------------------
1 | from .api import NuLite
2 | 


--------------------------------------------------------------------------------
/src/lazyslide/models/segmentation/nulite/api.py:
--------------------------------------------------------------------------------
  1 | from typing import Literal
  2 | 
  3 | import cv2
  4 | import numpy as np
  5 | import torch
  6 | import geopandas as gpd
  7 | 
  8 | from lazyslide.cv import Mask
  9 | from lazyslide.models.base import SegmentationModel
 10 | 
 11 | from .model import NuLite as NuLiteModel
 12 | 
 13 | 
 14 | class NuLite(SegmentationModel):
 15 |     def __init__(
 16 |         self,
 17 |         variant: Literal["H", "M", "T"] = "H",
 18 |     ):
 19 |         from huggingface_hub import hf_hub_download
 20 | 
 21 |         model_file = hf_hub_download(
 22 |             "RendeiroLab/LazySlide-models", f"nulite/NuLite-{variant}-Weights.pth"
 23 |         )
 24 | 
 25 |         weights = torch.load(model_file, map_location="cpu")
 26 | 
 27 |         config = weights["config"]
 28 |         self.model = NuLiteModel(
 29 |             config["data.num_nuclei_classes"],
 30 |             config["data.num_tissue_classes"],
 31 |             config["model.backbone"],
 32 |         )
 33 |         self.model.load_state_dict(weights["model_state_dict"])
 34 | 
 35 |     def get_transform(self):
 36 |         from torchvision.transforms.v2 import ToImage, ToDtype, Normalize, Compose
 37 | 
 38 |         return Compose(
 39 |             [
 40 |                 ToImage(),
 41 |                 ToDtype(dtype=torch.float32, scale=True),
 42 |                 Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)),
 43 |             ]
 44 |         )
 45 | 
 46 |     @torch.inference_mode()
 47 |     def segment(self, image):
 48 |         return self.model.forward(image, retrieve_tokens=True)
 49 | 
 50 |     def get_postprocess(self):
 51 |         return nulite_preprocess
 52 | 
 53 | 
 54 | CLASS_MAPPING = {
 55 |     0: "Background",
 56 |     1: "Neoplastic",
 57 |     2: "Inflammatory",
 58 |     3: "Connective",
 59 |     4: "Dead",
 60 |     5: "Epithelial",
 61 | }
 62 | 
 63 | 
 64 | def nulite_preprocess(
 65 |     output,
 66 |     ksize: int = 11,
 67 |     min_object_size: int = 3,
 68 |     nucleus_size: (int, int) = (20, 5000),
 69 | ) -> gpd.GeoDataFrame:
 70 |     """Preprocess the image for NuLite model."""
 71 | 
 72 |     binary_mask = output["nuclei_binary_map"].softmax(0).detach().cpu().numpy()[1]
 73 |     hv_map = output["hv_map"].detach().cpu().numpy()
 74 |     type_prob_map = (
 75 |         output["nuclei_type_map"].softmax(0).detach().cpu().numpy()[1::]
 76 |     )  # to skip background
 77 | 
 78 |     _, blb = cv2.threshold(binary_mask.astype(np.float32), 0.5, 1, cv2.THRESH_BINARY)
 79 |     blb = blb.astype(np.uint8)
 80 | 
 81 |     # Remove small objects based on connected components.
 82 |     # Use cv2.connectedComponentsWithStats to label regions and filter by area.
 83 |     num_labels, labels, stats, _ = cv2.connectedComponentsWithStats(blb, connectivity=8)
 84 |     min_size = 3  # Minimum pixel area to keep an object
 85 |     blb_clean = np.zeros_like(blb)
 86 |     for label in range(1, num_labels):  # label 0 is the background.
 87 |         if stats[label, cv2.CC_STAT_AREA] >= min_size:
 88 |             blb_clean[labels == label] = 1
 89 | 
 90 |     h_map, v_map = hv_map
 91 |     # STEP 2: Normalize directional maps
 92 |     h_dir_norm = cv2.normalize(
 93 |         h_map, None, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX
 94 |     ).astype(np.float32)
 95 |     v_dir_norm = cv2.normalize(
 96 |         v_map, None, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX
 97 |     ).astype(np.float32)
 98 | 
 99 |     # STEP 3: Compute edges using Sobel operators
100 |     # ksize = 11  # Kernel size for Sobel operators; adjust for edge sensitivity.
101 |     sobelh = cv2.Sobel(h_dir_norm, cv2.CV_64F, dx=1, dy=0, ksize=ksize)
102 |     sobelv = cv2.Sobel(v_dir_norm, cv2.CV_64F, dx=0, dy=1, ksize=ksize)
103 | 
104 |     # Normalize the edge responses and invert them to prepare for the "distance" map.
105 |     sobelh_norm = 1 - cv2.normalize(
106 |         sobelh, None, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX
107 |     )
108 |     sobelv_norm = 1 - cv2.normalize(
109 |         sobelv, None, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX
110 |     )
111 | 
112 |     # Combine edge images by taking the maximum value at each pixel.
113 |     overall = np.maximum(sobelh_norm, sobelv_norm)
114 | 
115 |     # Remove non-nuclei regions from the edge map.
116 |     overall = overall - (1 - blb_clean.astype(np.float32))
117 |     overall[overall < 0] = 0  # Set negative values to zero
118 | 
119 |     # STEP 4: Create an inverse “distance” map for watershed
120 |     # The idea is to make the centers of nuclei correspond to local minima.
121 |     # dist = (1.0 - overall) * blb_clean.astype(np.float32)
122 |     # dist = -cv2.GaussianBlur(dist, (3, 3), 0)
123 | 
124 |     # STEP 5: Create markers for watershed (seed regions)
125 |     # Identify the nucleus interior by thresholding the overall edge image.
126 |     _, overall_bin = cv2.threshold(overall, 0.4, 1, cv2.THRESH_BINARY)
127 |     overall_bin = overall_bin.astype(np.uint8)
128 | 
129 |     # Subtract the boundaries from the clean binary mask
130 |     marker = blb_clean - overall_bin
131 |     marker[marker < 0] = 0
132 | 
133 |     # Fill holes and do a morphological closing to smooth marker regions.
134 |     kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
135 |     marker_closed = cv2.morphologyEx(marker, cv2.MORPH_CLOSE, kernel)
136 | 
137 |     # Again, remove tiny markers using connected component analysis.
138 |     num_labels, markers, stats, _ = cv2.connectedComponentsWithStats(
139 |         marker_closed, connectivity=8
140 |     )
141 |     object_size = 10  # Minimum size (in pixels) for a marker region
142 |     markers_clean = np.zeros_like(markers, dtype=np.int32)
143 |     for label in range(1, num_labels):
144 |         if stats[label, cv2.CC_STAT_AREA] >= object_size:
145 |             markers_clean[markers == label] = label
146 | 
147 |     # STEP 6: Apply the Watershed algorithm using only OpenCV
148 |     # The watershed function in OpenCV requires a 3-channel image.
149 |     # Here, we build a dummy 3-channel (RGB) image from our binary mask (for visualization/masking purposes).
150 |     dummy_img = cv2.cvtColor((blb_clean * 255).astype(np.uint8), cv2.COLOR_GRAY2BGR)
151 | 
152 |     # Watershed modifies the marker image in place.
153 |     # The boundaries between segmented regions will be marked with -1.
154 |     cv2.watershed(dummy_img, markers_clean)
155 | 
156 |     unique_labels = np.unique(markers_clean)
157 |     final_seg = np.zeros_like(markers_clean, dtype=np.int32)
158 |     cells = []
159 |     nucleus_size_min, nucleus_size_max = nucleus_size
160 |     for lbl in unique_labels:
161 |         if lbl <= 1:  # Skip background (-1) and unknown (1)
162 |             continue
163 |         mask = markers_clean == lbl
164 |         x, y = np.where(mask)
165 |         area = len(x)
166 | 
167 |         if nucleus_size_min <= area <= nucleus_size_max:
168 |             probs = type_prob_map[:, x, y].mean(1)
169 |             class_ix = np.argmax(probs)
170 |             class_prob = type_prob_map[class_ix, x, y].mean()
171 |             m = Mask.from_array(mask.astype(np.uint8))
172 |             poly = m.to_polygons()[0]
173 |             cells.append([CLASS_MAPPING[class_ix + 1], class_prob, poly])
174 |             final_seg[markers_clean == lbl] = lbl
175 |     return gpd.GeoDataFrame(cells, columns=["name", "prob", "geometry"])
176 | 


--------------------------------------------------------------------------------
/src/lazyslide/models/segmentation/postprocess.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import geopandas as gpd
 3 | import numpy as np
 4 | 
 5 | 
 6 | def instanseg_postprocess(
 7 |     mask: np.ndarray,
 8 | ):
 9 |     """
10 |     Postprocess the mask to get the cell polygons.
11 | 
12 |     The feature of each cell is average-pooling the feature map within the cell's bounding box.
13 | 
14 |     Parameters
15 |     ----------
16 |     mask: np.ndarray
17 |         The mask array.
18 | 
19 |     """
20 |     from lazyslide.cv import MultiLabelMask
21 | 
22 |     mmask = MultiLabelMask(mask)
23 |     polys = mmask.to_polygons(min_area=5, detect_holes=False)
24 |     cells = []
25 |     for k, vs in polys.items():
26 |         if len(vs) == 0:
27 |             continue
28 |         elif len(vs) == 1:
29 |             cell = vs[0]
30 |         else:
31 |             # Get the largest polygon
32 |             svs = sorted(vs, key=lambda x: x.area)
33 |             cell = svs[-1]
34 | 
35 |         cells.append(cell)
36 | 
37 |     container = {"geometry": cells}
38 |     return gpd.GeoDataFrame(container)
39 | 
40 | 
41 | def semanticseg_postprocess(
42 |     probs: np.ndarray,
43 |     ignore_index: list[int] = None,
44 |     min_area: int = 5,
45 |     mapping: dict = None,
46 | ):
47 |     from lazyslide.cv import MultiLabelMask
48 | 
49 |     mask = np.argmax(probs, axis=0).astype(np.uint8)
50 |     mmask = MultiLabelMask(mask)
51 |     polys = mmask.to_polygons(ignore_index=ignore_index, min_area=min_area)
52 |     data = []
53 |     for k, vs in polys.items():
54 |         for v in vs:
55 |             empty_mask = np.zeros_like(mask)
56 | 
57 |             cv2.drawContours(  # noqa
58 |                 empty_mask,
59 |                 [np.array(v.exterior.coords).astype(np.int32)],
60 |                 -1,
61 |                 1,
62 |                 thickness=cv2.FILLED,
63 |             )
64 | 
65 |             prob = np.mean(probs[k][empty_mask == 1])
66 |             class_name = k
67 |             if mapping is not None:
68 |                 class_name = mapping[k]
69 |             data.append([class_name, prob, v])
70 | 
71 |     return gpd.GeoDataFrame(data, columns=["class", "prob", "geometry"])
72 | 


--------------------------------------------------------------------------------
/src/lazyslide/models/segmentation/sam.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | 
  3 | from ..base import SegmentationModel
  4 | 
  5 | 
  6 | class SAM(SegmentationModel):
  7 |     SAM_VARIENTS = [
  8 |         "facebook/sam-vit-base",
  9 |         "facebook/sam-vit-large",
 10 |         "facebook/sam-vit-huge",
 11 |     ]
 12 | 
 13 |     SAM_HQ_VARIENTS = [
 14 |         "syscv-community/sam-hq-vit-base",
 15 |         "syscv-community/sam-hq-vit-large",
 16 |         "syscv-community/sam-hq-vit-huge",
 17 |     ]
 18 | 
 19 |     def __init__(self, variant="facebook/sam-vit-base", model_path=None, token=None):
 20 |         self.variant = variant
 21 |         if variant in self.SAM_VARIENTS:
 22 |             from transformers import SamModel, SamProcessor
 23 | 
 24 |             self.model = SamModel.from_pretrained(variant, use_auth_token=token)
 25 |             self.processor = SamProcessor.from_pretrained(variant, use_auth_token=token)
 26 |             self._is_hq = False
 27 | 
 28 |         elif variant in self.SAM_HQ_VARIENTS:
 29 |             from transformers import SamHQModel, SamHQProcessor
 30 | 
 31 |             self.model = SamHQModel.from_pretrained(variant, use_auth_token=token)
 32 |             self.processor = SamHQProcessor.from_pretrained(
 33 |                 variant, use_auth_token=token
 34 |             )
 35 |             self._is_hq = True
 36 |         else:
 37 |             raise ValueError(
 38 |                 f"Unsupported SAM variant: {variant}. "
 39 |                 f"Choose from {self.SAM_VARIENTS + self.SAM_HQ_VARIENTS}."
 40 |             )
 41 | 
 42 |     def get_transform(self):
 43 |         return self.processor.image_processor
 44 | 
 45 |     @torch.inference_mode()
 46 |     def get_image_embedding(self, image) -> torch.Tensor:
 47 |         """
 48 |         Get the image embedding from the SAM model.
 49 | 
 50 |         Returns:
 51 |             torch.Tensor: Image embedding tensor of shape (1, C, H, W).
 52 | 
 53 |         """
 54 |         img_inputs = self.processor(image, return_tensors="pt").to(self.model.device)
 55 | 
 56 |         with torch.inference_mode():
 57 |             embeddings = self.model.get_image_embeddings(img_inputs["pixel_values"])
 58 |         if self._is_hq:
 59 |             embeddings = embeddings[0]
 60 |         return embeddings.detach().cpu()
 61 | 
 62 |     @torch.inference_mode()
 63 |     def segment(
 64 |         self,
 65 |         image,
 66 |         image_embedding=None,
 67 |         input_points=None,
 68 |         input_labels=None,
 69 |         input_boxes=None,
 70 |         segmentation_maps=None,
 71 |         multimask_output=False,
 72 |     ) -> torch.Tensor:
 73 |         """
 74 |         Segment the input image using the SAM model.
 75 | 
 76 |         Args:
 77 |             image (torch.Tensor): Input image tensor of shape (C, H, W).
 78 | 
 79 |         Returns:
 80 |             torch.Tensor: Segmentation mask tensor of shape (H, W).
 81 |         """
 82 |         inputs = self.processor(
 83 |             image,
 84 |             input_points=input_points,
 85 |             input_labels=input_labels,
 86 |             input_boxes=input_boxes,
 87 |             segmentation_maps=segmentation_maps,
 88 |             return_tensors="pt",
 89 |         )
 90 |         if image_embedding is not None:
 91 |             del inputs["pixel_values"]
 92 |             inputs["image_embeddings"] = image_embedding
 93 | 
 94 |         for k, v in inputs.items():
 95 |             if isinstance(v, torch.Tensor) and v.dtype == torch.float64:
 96 |                 inputs[k] = v.to(dtype=torch.float32)
 97 | 
 98 |         inputs = inputs.to(self.model.device)
 99 |         outputs = self.model(**inputs, multimask_output=multimask_output)
100 |         masks = self.processor.image_processor.post_process_masks(
101 |             outputs.pred_masks.cpu(),
102 |             inputs["original_sizes"].cpu(),
103 |             inputs["reshaped_input_sizes"].cpu(),
104 |             mask_threshold=0,
105 |         )
106 |         return masks[0]
107 | 


--------------------------------------------------------------------------------
/src/lazyslide/models/segmentation/smp.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from typing import Callable
 4 | 
 5 | import torch
 6 | 
 7 | from lazyslide.models.base import SegmentationModel
 8 | from lazyslide.models.segmentation.postprocess import semanticseg_postprocess
 9 | 
10 | 
11 | class SMPBase(SegmentationModel):
12 |     """This is a base class for any models from segmentation models pytorch"""
13 | 
14 |     def __init__(
15 |         self,
16 |         arch: str = "unetplusplus",
17 |         encoder_name: str = "timm-efficientnet-b0",
18 |         encoder_weights: str = "imagenet",
19 |         in_channels: int = 3,
20 |         classes: int = 3,
21 |         **kwargs,
22 |     ):
23 |         try:
24 |             import segmentation_models_pytorch as smp
25 |         except ModuleNotFoundError:
26 |             raise ModuleNotFoundError(
27 |                 "Please install segmentation_models_pytorch to use this model."
28 |             )
29 | 
30 |         self.encoder_name = encoder_name
31 |         self.encoder_weights = encoder_weights
32 | 
33 |         self.model = smp.create_model(
34 |             arch=arch,
35 |             encoder_name=encoder_name,
36 |             encoder_weights=encoder_weights,
37 |             in_channels=in_channels,
38 |             classes=classes,
39 |             **kwargs,
40 |         )
41 | 
42 |     def get_transform(self):
43 |         from torchvision.transforms.v2 import Compose, ToImage, ToDtype, Normalize
44 | 
45 |         # default_fn = smp.encoders.get_preprocessing_fn(
46 |         #     self.encoder_name, self.encoder_weights
47 |         # )
48 | 
49 |         return Compose(
50 |             [
51 |                 ToImage(),
52 |                 ToDtype(torch.float32, scale=True),
53 |                 Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
54 |                 # default_fn
55 |             ]
56 |         )
57 | 
58 |     def get_postprocess(self) -> Callable:
59 |         return semanticseg_postprocess
60 | 


--------------------------------------------------------------------------------
/src/lazyslide/models/vision/__init__.py:
--------------------------------------------------------------------------------
 1 | from .conch import CONCHVision
 2 | from .gigapath import GigaPath, GigaPathSlideEncoder
 3 | from .plip import PLIPVision
 4 | from .uni import UNI, UNI2
 5 | from .virchow import Virchow, Virchow2
 6 | from .phikon import Phikon, PhikonV2
 7 | from .h_optimus import HOptimus0, HOptimus1, H0Mini
 8 | from .midnight import Midnight
 9 | from .hibou import HibouB, HibouL
10 | 


--------------------------------------------------------------------------------
/src/lazyslide/models/vision/base.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rendeirolab/LazySlide/f39634cc994b3098b0933075b9d25ecd99b9014e/src/lazyslide/models/vision/base.py


--------------------------------------------------------------------------------
/src/lazyslide/models/vision/conch.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from lazyslide.models._utils import hf_access
 4 | from lazyslide.models.base import ImageModel
 5 | 
 6 | 
 7 | class CONCHVision(ImageModel):
 8 |     def __init__(self, model_path=None, token=None):
 9 |         try:
10 |             from conch.open_clip_custom import create_model_from_pretrained
11 |         except ImportError:
12 |             raise ImportError(
13 |                 "Conch is not installed. You can install it using "
14 |                 "`pip install git+https://github.com/mahmoodlab/CONCH.git`."
15 |             )
16 | 
17 |         with hf_access("conch_ViT-B-16"):
18 |             self.model, self.processor = create_model_from_pretrained(
19 |                 "conch_ViT-B-16", model_path, hf_auth_token=token
20 |             )
21 | 
22 |     def get_transform(self):
23 |         return None
24 | 
25 |     @torch.inference_mode()
26 |     def encode_image(self, image):
27 |         if not isinstance(image, torch.Tensor):
28 |             image = self.processor(image)
29 |         if image.dim() == 3:
30 |             image = image.unsqueeze(0)
31 | 
32 |         image_feature = self.model.encode_image(
33 |             image, normalize=False, proj_contrast=False
34 |         )
35 |         return image_feature
36 | 


--------------------------------------------------------------------------------
/src/lazyslide/models/vision/gigapath.py:
--------------------------------------------------------------------------------
 1 | from platformdirs import user_cache_path
 2 | 
 3 | from lazyslide.models.base import SlideEncoderModel, TimmModel
 4 | 
 5 | 
 6 | class GigaPath(TimmModel):
 7 |     name = "GigaPath"
 8 | 
 9 |     def __init__(self, model_path=None, token=None):
10 |         # Version check
11 |         import timm
12 | 
13 |         try:
14 |             from packaging import version
15 | 
16 |             timm_version = version.parse(timm.__version__)
17 |             minimum_version = version.parse("1.0.3")
18 |             if timm_version < minimum_version:
19 |                 raise ImportError(
20 |                     f"Gigapath needs timm >= 1.0.3. You have version {timm_version}."
21 |                     f"Run `pip install --upgrade timm` to install the latest version."
22 |                 )
23 |         # If packaging is not installed, skip the version check
24 |         except ModuleNotFoundError:
25 |             pass
26 | 
27 |         super().__init__("hf_hub:prov-gigapath/prov-gigapath", token=token)
28 | 
29 | 
30 | class GigaPathSlideEncoder(SlideEncoderModel):
31 |     def __init__(self, model_path=None, token=None):
32 |         from huggingface_hub import login
33 | 
34 |         super().__init__()
35 | 
36 |         if token is not None:
37 |             login(token)
38 | 
39 |         from gigapath.slide_encoder import create_model
40 | 
41 |         model = create_model(
42 |             "hf_hub:prov-gigapath/prov-gigapath",
43 |             "gigapath_slide_enc12l768d",
44 |             1536,
45 |             local_dir=str(user_cache_path("lazyslide")),
46 |         )
47 |         self.model = model
48 | 
49 |     def encode_slide(self, tile_embed, coordinates):
50 |         return self.model(tile_embed, coordinates).squeeze()
51 | 


--------------------------------------------------------------------------------
/src/lazyslide/models/vision/h_optimus.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from lazyslide.models.base import TimmModel
 4 | 
 5 | 
 6 | def get_hoptimus_transform():
 7 |     from torchvision.transforms.v2 import (
 8 |         Compose,
 9 |         ToImage,
10 |         Resize,
11 |         CenterCrop,
12 |         ToDtype,
13 |         Normalize,
14 |     )
15 |     from torchvision.transforms import InterpolationMode
16 | 
17 |     return Compose(
18 |         [
19 |             ToImage(),
20 |             Resize(
21 |                 size=(224, 224),
22 |                 interpolation=InterpolationMode.BICUBIC,
23 |                 max_size=None,
24 |                 antialias=True,
25 |             ),
26 |             CenterCrop(224),
27 |             ToDtype(dtype=torch.float32, scale=True),
28 |             Normalize(
29 |                 mean=(0.707223, 0.578729, 0.703617), std=(0.211883, 0.230117, 0.177517)
30 |             ),
31 |         ]
32 |     )
33 | 
34 | 
35 | class HOptimus0(TimmModel):
36 |     name = "H-optimus-0"
37 | 
38 |     def __init__(self, model_path=None, token=None):
39 |         super().__init__(
40 |             "hf-hub:bioptimus/H-optimus-0",
41 |             pretrained=True,
42 |             init_values=1e-5,
43 |             dynamic_img_size=False,
44 |             token=token,
45 |         )
46 | 
47 |     def get_transform(self):
48 |         return get_hoptimus_transform()
49 | 
50 | 
51 | class HOptimus1(TimmModel):
52 |     name = "H-optimus-1"
53 | 
54 |     def __init__(self, model_path=None, token=None):
55 |         super().__init__(
56 |             "hf-hub:bioptimus/H-optimus-1",
57 |             pretrained=True,
58 |             init_values=1e-5,
59 |             dynamic_img_size=False,
60 |             token=token,
61 |         )
62 | 
63 |     def get_transform(self):
64 |         return get_hoptimus_transform()
65 | 
66 | 
67 | class H0Mini(TimmModel):
68 |     name = "H0-mini"
69 | 
70 |     def __init__(self, model_path=None, token=None):
71 |         import timm
72 | 
73 |         super().__init__(
74 |             "hf-hub:bioptimus/H0-mini",
75 |             pretrained=True,
76 |             mlp_layer=timm.layers.SwiGLUPacked,
77 |             act_layer=torch.nn.SiLU,
78 |             token=token,
79 |         )
80 | 
81 |     def get_transform(self):
82 |         return get_hoptimus_transform()
83 | 
84 |     @torch.inference_mode()
85 |     def encode_image(self, image):
86 |         output = self.model(image)
87 |         # CLS token features (1, 768):
88 |         cls_features = output[:, 0]
89 |         # Patch token features (1, 256, 768):
90 |         patch_token_features = output[:, self.model.num_prefix_tokens :]
91 |         # Concatenate the CLS token features with the mean of the patch token
92 |         # features (1, 1536):
93 |         concatenated_features = torch.cat(
94 |             [cls_features, patch_token_features.mean(1)], dim=-1
95 |         )
96 |         return concatenated_features.cpu().detach().numpy()
97 | 


--------------------------------------------------------------------------------
/src/lazyslide/models/vision/hibou.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from lazyslide.models._utils import hf_access
 4 | from lazyslide.models.base import ImageModel
 5 | 
 6 | 
 7 | class Hibou(ImageModel):
 8 |     def __init__(self, hibou_version: str, model_path=None, token=None):
 9 |         try:
10 |             from transformers import AutoModel
11 |         except ImportError:
12 |             raise ImportError(
13 |                 "transformers is not installed. You can install it using "
14 |                 "`pip install transformers`."
15 |             )
16 | 
17 |         self.version = hibou_version
18 | 
19 |         with hf_access(f"histai/{self.version}"):
20 |             self.model = AutoModel.from_pretrained(
21 |                 f"histai/{self.version}", trust_remote_code=True
22 |             )
23 | 
24 |     def get_transform(self):
25 |         from torchvision.transforms.v2 import (
26 |             Compose,
27 |             ToImage,
28 |             Resize,
29 |             CenterCrop,
30 |             ToDtype,
31 |             Normalize,
32 |         )
33 |         from torchvision.transforms import InterpolationMode
34 | 
35 |         return Compose(
36 |             [
37 |                 ToImage(),
38 |                 Resize(
39 |                     size=(224, 224),
40 |                     interpolation=InterpolationMode.BICUBIC,
41 |                     max_size=None,
42 |                     antialias=True,
43 |                 ),
44 |                 CenterCrop(224),
45 |                 ToDtype(dtype=torch.float32, scale=True),
46 |                 Normalize(mean=(0.7068, 0.5755, 0.722), std=(0.195, 0.2316, 0.1816)),
47 |             ]
48 |         )
49 | 
50 |     @torch.inference_mode()
51 |     def encode_image(self, image):
52 |         image_features = self.model(pixel_values=image)
53 |         return image_features.pooler_output
54 | 
55 | 
56 | class HibouB(Hibou):
57 |     def __init__(self, token=None, model_path=None):
58 |         super().__init__(hibou_version="hibou-b", token=token)
59 | 
60 | 
61 | class HibouL(Hibou):
62 |     def __init__(self, token=None, model_path=None):
63 |         super().__init__(hibou_version="hibou-l", token=token)
64 | 


--------------------------------------------------------------------------------
/src/lazyslide/models/vision/midnight.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from lazyslide.models._utils import hf_access
 4 | from lazyslide.models.base import ImageModel
 5 | 
 6 | 
 7 | class Midnight(ImageModel):
 8 |     def __init__(self, model_path=None, token=None):
 9 |         try:
10 |             from transformers import AutoImageProcessor, AutoModel
11 |         except ImportError:
12 |             raise ImportError(
13 |                 "transformers is not installed. You can install it using "
14 |                 "`pip install transformers`."
15 |             )
16 | 
17 |         with hf_access("kaiko-ai/midnight"):
18 |             self.model = AutoModel.from_pretrained("kaiko-ai/midnight")
19 | 
20 |     def get_transform(self):
21 |         from torchvision.transforms import v2
22 | 
23 |         return v2.Compose(
24 |             [
25 |                 v2.ToImage(),
26 |                 v2.Resize(224),
27 |                 v2.CenterCrop(224),
28 |                 v2.ToDtype(dtype=torch.float32, scale=True),
29 |                 v2.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)),
30 |             ]
31 |         )
32 | 
33 |     @staticmethod
34 |     def extract_classification_embedding(tensor):
35 |         cls_embedding = tensor[:, 0, :]
36 |         patch_embedding = tensor[:, 1:, :].mean(dim=1)
37 |         return torch.cat([cls_embedding, patch_embedding], dim=-1)
38 | 
39 |     @torch.inference_mode()
40 |     def encode_image(self, image):
41 |         output = self.model(image).last_hidden_state
42 |         image_feature = self.extract_classification_embedding(output)
43 |         return image_feature
44 | 


--------------------------------------------------------------------------------
/src/lazyslide/models/vision/phikon.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | 
 4 | from lazyslide.models._utils import hf_access
 5 | from lazyslide.models.base import ImageModel
 6 | 
 7 | 
 8 | class Phikon(ImageModel):
 9 |     name = "phikon"
10 | 
11 |     def __init__(self, model_path=None, token=None):
12 |         from transformers import AutoImageProcessor, ViTModel
13 | 
14 |         with hf_access("owkin/phikon"):
15 |             self.model = ViTModel.from_pretrained(
16 |                 "owkin/phikon",
17 |                 add_pooling_layer=False,
18 |                 use_auth_token=token,
19 |             )
20 |             self.img_processor = AutoImageProcessor.from_pretrained(
21 |                 "owkin/phikon", use_fast=True
22 |             )
23 | 
24 |     def get_transform(self):
25 |         return None
26 | 
27 |     @torch.inference_mode()
28 |     def encode_image(self, image) -> np.ndarray[np.float32]:
29 |         inputs = self.img_processor(images=image, return_tensors="pt")
30 |         inputs = {k: v.to(self.model.device) for k, v in inputs.items()}
31 |         return self.model(**inputs).last_hidden_state[:, 0, :].cpu().detach().numpy()
32 | 
33 | 
34 | class PhikonV2(ImageModel):
35 |     name = "phikon-v2"
36 | 
37 |     def __init__(self, model_path=None, token=None):
38 |         from transformers import AutoImageProcessor, AutoModel
39 | 
40 |         with hf_access("owkin/phikon-v2"):
41 |             self.model = AutoModel.from_pretrained(
42 |                 "owkin/phikon-v2",
43 |                 add_pooling_layer=False,
44 |                 use_auth_token=token,
45 |             )
46 |             self.img_processor = AutoImageProcessor.from_pretrained(
47 |                 "owkin/phikon-v2", use_fast=True
48 |             )
49 | 
50 |     def get_transform(self):
51 |         return None
52 | 
53 |     @torch.inference_mode()
54 |     def encode_image(self, image) -> np.ndarray[np.float32]:
55 |         inputs = self.img_processor(images=image, return_tensors="pt")
56 |         inputs = {k: v.to(self.model.device) for k, v in inputs.items()}
57 |         return self.model(**inputs).last_hidden_state[:, 0, :].cpu().detach().numpy()
58 | 


--------------------------------------------------------------------------------
/src/lazyslide/models/vision/plip.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from lazyslide.models._utils import hf_access
 4 | from lazyslide.models.base import ImageModel
 5 | 
 6 | 
 7 | class PLIPVision(ImageModel):
 8 |     def __init__(self, model_path=None, token=None):
 9 |         try:
10 |             from transformers import CLIPVisionModelWithProjection, CLIPProcessor
11 |         except ImportError:
12 |             raise ImportError(
13 |                 "Please install the 'transformers' package to use the PLIP model"
14 |             )
15 | 
16 |         super().__init__()
17 | 
18 |         if model_path is None:
19 |             model_path = "vinid/plip"
20 | 
21 |         with hf_access(model_path):
22 |             self.model = CLIPVisionModelWithProjection.from_pretrained(
23 |                 model_path, use_auth_token=token
24 |             )
25 |             self.processor = CLIPProcessor.from_pretrained(
26 |                 model_path, use_auth_token=token
27 |             )
28 | 
29 |     def get_transform(self):
30 |         return None
31 | 
32 |     @torch.inference_mode()
33 |     def encode_image(self, image):
34 |         inputs = self.processor(images=image, return_tensors="pt")
35 |         inputs = {k: v.to(self.model.device) for k, v in inputs.items()}
36 |         image_features = self.model.get_image_features(**inputs)
37 |         return image_features
38 | 


--------------------------------------------------------------------------------
/src/lazyslide/models/vision/uni.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from lazyslide.models.base import TimmModel
 4 | 
 5 | 
 6 | class UNI(TimmModel):
 7 |     def __init__(self, model_path=None, token=None):
 8 |         # from huggingface_hub import hf_hub_download
 9 |         # model_path = hf_hub_download("MahmoodLab/UNI", filename="pytorch_model.bin")
10 | 
11 |         if model_path is not None:
12 |             super().__init__(
13 |                 "vit_large_patch16_224",
14 |                 token=token,
15 |                 img_size=224,
16 |                 patch_size=16,
17 |                 init_values=1e-5,
18 |                 num_classes=0,
19 |                 dynamic_img_size=True,
20 |                 pretrained=False,
21 |             )
22 |             self.model.load_state_dict(torch.load(model_path, map_location="cpu"))
23 |         else:
24 |             super().__init__(
25 |                 "hf-hub:MahmoodLab/uni",
26 |                 token=token,
27 |                 init_values=1e-5,
28 |                 dynamic_img_size=True,
29 |             )
30 | 
31 | 
32 | class UNI2(TimmModel):
33 |     def __init__(self, model_path=None, token=None):
34 |         import timm
35 | 
36 |         timm_kwargs = {
37 |             "img_size": 224,
38 |             "patch_size": 14,
39 |             "depth": 24,
40 |             "num_heads": 24,
41 |             "init_values": 1e-5,
42 |             "embed_dim": 1536,
43 |             "mlp_ratio": 2.66667 * 2,
44 |             "num_classes": 0,
45 |             "no_embed_class": True,
46 |             "mlp_layer": timm.layers.SwiGLUPacked,
47 |             "act_layer": torch.nn.SiLU,
48 |             "reg_tokens": 8,
49 |             "dynamic_img_size": True,
50 |         }
51 | 
52 |         # from huggingface_hub import hf_hub_download
53 |         # model_path = hf_hub_download("MahmoodLab/UNI2-h", filename="pytorch_model.bin")
54 | 
55 |         if model_path is not None:
56 |             super().__init__(
57 |                 "vit_giant_patch14_224", token=token, pretrained=False, **timm_kwargs
58 |             )
59 |             self.model.load_state_dict(
60 |                 torch.load(model_path, map_location="cpu"), strict=True
61 |             )
62 |         else:
63 |             super().__init__("hf-hub:MahmoodLab/UNI2-h", **timm_kwargs)
64 | 


--------------------------------------------------------------------------------
/src/lazyslide/models/vision/virchow.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from lazyslide.models.base import TimmModel
 4 | 
 5 | 
 6 | class Virchow(TimmModel):
 7 |     _hf_hub_id = "paige-ai/Virchow"
 8 | 
 9 |     def __init__(self, model_path=None, token=None):
10 |         from timm.layers import SwiGLUPacked
11 | 
12 |         super().__init__(
13 |             f"hf-hub:{self._hf_hub_id}",
14 |             pretrained=True,
15 |             mlp_layer=SwiGLUPacked,
16 |             act_layer=torch.nn.SiLU,
17 |             token=token,
18 |         )
19 | 
20 |     @torch.inference_mode()
21 |     def encode_image(self, img):
22 |         output = self.model(img)
23 |         # CLS token features (1, 768):
24 |         cls_features = output[:, 0]
25 |         # Patch token features (1, 256, 768):
26 |         patch_features = output[:, self.model.num_prefix_tokens :]
27 |         return torch.cat((cls_features, patch_features.mean(1)), dim=-1)
28 | 
29 | 
30 | class Virchow2(Virchow):
31 |     _hf_hub_id = "paige-ai/Virchow2"
32 | 


--------------------------------------------------------------------------------
/src/lazyslide/plotting/__init__.py:
--------------------------------------------------------------------------------
1 | from ._api import tissue, tiles, annotations
2 | from ._wsi_viewer import WSIViewer
3 | 


--------------------------------------------------------------------------------
/src/lazyslide/preprocess/__init__.py:
--------------------------------------------------------------------------------
1 | __all__ = ["find_tissues", "score_tissues", "tile_tissues", "score_tiles"]
2 | 
3 | from ._graph import tile_graph
4 | from ._tiles import tile_tissues, score_tiles
5 | from ._tissue import find_tissues, score_tissues
6 | 


--------------------------------------------------------------------------------
/src/lazyslide/preprocess/_graph.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import warnings
  4 | from itertools import chain
  5 | 
  6 | import numpy as np
  7 | import pandas as pd
  8 | from anndata import AnnData
  9 | from numba import njit
 10 | from scipy.sparse import csr_matrix, spmatrix, isspmatrix_csr, SparseEfficiencyWarning
 11 | from scipy.spatial import Delaunay
 12 | from wsidata import WSIData
 13 | from wsidata.io import add_table
 14 | 
 15 | from lazyslide._const import Key
 16 | 
 17 | 
 18 | def tile_graph(
 19 |     wsi: WSIData,
 20 |     n_neighs: int = 6,
 21 |     n_rings: int = 1,
 22 |     delaunay=False,
 23 |     transform: str = None,
 24 |     set_diag: bool = False,
 25 |     tile_key: str = Key.tiles,
 26 |     table_key: str = None,
 27 | ):
 28 |     """
 29 |     Compute the spatial graph of the tiles.
 30 | 
 31 |     Parameters
 32 |     ----------
 33 |     wsi : :class:`WSIData <wsidata.WSIData>`
 34 |         The WSIData object to work on.
 35 |     n_neighs : int, default: 6
 36 |         The number of neighbors to consider.
 37 |     n_rings : int, default: 1
 38 |         The number of rings to consider.
 39 |     delaunay : bool, default: False
 40 |         Whether to use Delaunay triangulation.
 41 |     transform : str, default: None
 42 |         The transformation to apply to the graph.
 43 |     set_diag : bool, default: False
 44 |         Whether to set the diagonal to 1.
 45 |     tile_key : str, default: 'tiles'
 46 |         The tile key.
 47 |     table_key : str, default: None
 48 |         The table key to store the graph.
 49 | 
 50 |     Returns
 51 |     -------
 52 |     The tiles with spatial connectivities and distances in an anndata format.
 53 | 
 54 |     - The feature spatial connectivities and distances will be added to :bdg-danger:`tables` slot of the spatial data object.
 55 | 
 56 |     Examples
 57 |     --------
 58 |     .. code-block:: python
 59 | 
 60 |         >>> import lazyslide as zs
 61 |         >>> wsi = zs.datasets.sample()
 62 |         >>> zs.pp.find_tissues(wsi)
 63 |         >>> zs.pp.tile_graph(wsi)
 64 |         >>> wsi['tile_graph']
 65 | 
 66 | 
 67 |     """
 68 |     coords = wsi[tile_key].bounds[["minx", "miny"]].values
 69 |     Adj, Dst = _spatial_neighbor(
 70 |         coords, n_neighs, delaunay, n_rings, transform, set_diag
 71 |     )
 72 | 
 73 |     conns_key = "spatial_connectivities"
 74 |     dists_key = "spatial_distances"
 75 |     neighbors_dict = {
 76 |         "connectivities_key": conns_key,
 77 |         "distances_key": dists_key,
 78 |         "params": {
 79 |             "n_neighbors": n_neighs,
 80 |             "transform": transform,
 81 |         },
 82 |     }
 83 |     # TODO: Store in a anndata object
 84 |     if table_key is None:
 85 |         table_key = Key.tile_graph(tile_key)
 86 |     if table_key not in wsi:
 87 |         table = AnnData(
 88 |             obs=pd.DataFrame(index=np.arange(coords.shape[0], dtype=int).astype(str)),
 89 |             obsp={conns_key: Adj, dists_key: Dst},
 90 |             uns={"spatial": neighbors_dict},
 91 |         )
 92 |         add_table(wsi, table_key, table)
 93 |     else:
 94 |         table = wsi[table_key]
 95 |         table.obsp[conns_key] = Adj
 96 |         table.obsp[dists_key] = Dst
 97 |         table.uns["spatial"] = neighbors_dict
 98 | 
 99 | 
100 | def _spatial_neighbor(
101 |     coords,
102 |     n_neighs: int = 6,
103 |     delaunay: bool = False,
104 |     n_rings: int = 1,
105 |     transform: str = None,
106 |     set_diag: bool = False,
107 | ) -> tuple[csr_matrix, csr_matrix]:
108 |     with warnings.catch_warnings():
109 |         warnings.simplefilter("ignore", SparseEfficiencyWarning)
110 |         Adj, Dst = _build_grid(
111 |             coords,
112 |             n_neighs=n_neighs,
113 |             n_rings=n_rings,
114 |             delaunay=delaunay,
115 |             set_diag=set_diag,
116 |         )
117 | 
118 |     Adj.eliminate_zeros()
119 |     Dst.eliminate_zeros()
120 | 
121 |     # check transform
122 |     if transform == "spectral":
123 |         Adj = _transform_a_spectral(Adj)
124 |     elif transform == "cosine":
125 |         Adj = _transform_a_cosine(Adj)
126 |     elif transform == "none" or transform is None:
127 |         pass
128 |     else:
129 |         raise NotImplementedError(f"Transform `{transform}` is not yet implemented.")
130 | 
131 |     return Adj, Dst
132 | 
133 | 
134 | def _build_grid(
135 |     coords,
136 |     n_neighs: int,
137 |     n_rings: int,
138 |     delaunay: bool = False,
139 |     set_diag: bool = False,
140 | ) -> tuple[csr_matrix, csr_matrix]:
141 |     if n_rings > 1:
142 |         Adj: csr_matrix = _build_connectivity(
143 |             coords,
144 |             n_neighs=n_neighs,
145 |             neigh_correct=True,
146 |             set_diag=True,
147 |             delaunay=delaunay,
148 |             return_distance=False,
149 |         )
150 |         Res, Walk = Adj, Adj
151 |         for i in range(n_rings - 1):
152 |             Walk = Walk @ Adj
153 |             Walk[Res.nonzero()] = 0.0
154 |             Walk.eliminate_zeros()
155 |             Walk.data[:] = i + 2.0
156 |             Res = Res + Walk
157 |         Adj = Res
158 |         Adj.setdiag(float(set_diag))
159 |         Adj.eliminate_zeros()
160 | 
161 |         Dst = Adj.copy()
162 |         Adj.data[:] = 1.0
163 |     else:
164 |         Adj = _build_connectivity(
165 |             coords,
166 |             n_neighs=n_neighs,
167 |             neigh_correct=True,
168 |             delaunay=delaunay,
169 |             set_diag=set_diag,
170 |         )
171 |         Dst = Adj.copy()
172 | 
173 |     Dst.setdiag(0.0)
174 | 
175 |     return Adj, Dst
176 | 
177 | 
178 | def _build_connectivity(
179 |     coords,
180 |     n_neighs: int,
181 |     radius: float | tuple[float, float] | None = None,
182 |     delaunay: bool = False,
183 |     neigh_correct: bool = False,
184 |     set_diag: bool = False,
185 |     return_distance: bool = False,
186 | ) -> csr_matrix | tuple[csr_matrix, csr_matrix]:
187 |     from sklearn.metrics import euclidean_distances
188 |     from sklearn.neighbors import NearestNeighbors
189 | 
190 |     N = coords.shape[0]
191 |     if delaunay:
192 |         tri = Delaunay(coords)
193 |         indptr, indices = tri.vertex_neighbor_vertices
194 |         Adj = csr_matrix(
195 |             (np.ones_like(indices, dtype=np.float64), indices, indptr), shape=(N, N)
196 |         )
197 | 
198 |         if return_distance:
199 |             # fmt: off
200 |             dists = np.array(list(chain(*(
201 |                 euclidean_distances(coords[indices[indptr[i]: indptr[i + 1]], :], coords[np.newaxis, i, :])
202 |                 for i in range(N)
203 |                 if len(indices[indptr[i]: indptr[i + 1]])
204 |             )))).squeeze()
205 |             Dst = csr_matrix((dists, indices, indptr), shape=(N, N))
206 |             # fmt: on
207 |     else:
208 |         r = (
209 |             1
210 |             if radius is None
211 |             else radius
212 |             if isinstance(radius, (int, float))
213 |             else max(radius)
214 |         )
215 |         tree = NearestNeighbors(n_neighbors=n_neighs, radius=r, metric="euclidean")
216 |         tree.fit(coords)
217 | 
218 |         if radius is None:
219 |             dists, col_indices = tree.kneighbors()
220 |             dists, col_indices = dists.reshape(-1), col_indices.reshape(-1)
221 |             row_indices = np.repeat(np.arange(N), n_neighs)
222 |             if neigh_correct:
223 |                 dist_cutoff = np.median(dists) * 1.3  # there's a small amount of sway
224 |                 mask = dists < dist_cutoff
225 |                 row_indices, col_indices, dists = (
226 |                     row_indices[mask],
227 |                     col_indices[mask],
228 |                     dists[mask],
229 |                 )
230 |         else:
231 |             dists, col_indices = tree.radius_neighbors()
232 |             row_indices = np.repeat(np.arange(N), [len(x) for x in col_indices])
233 |             dists = np.concatenate(dists)
234 |             col_indices = np.concatenate(col_indices)
235 | 
236 |         Adj = csr_matrix(
237 |             (np.ones_like(row_indices, dtype=np.float64), (row_indices, col_indices)),
238 |             shape=(N, N),
239 |         )
240 |         if return_distance:
241 |             Dst = csr_matrix((dists, (row_indices, col_indices)), shape=(N, N))
242 | 
243 |     # radius-based filtering needs same indices/indptr: do not remove 0s
244 |     Adj.setdiag(1.0 if set_diag else Adj.diagonal())
245 |     if return_distance:
246 |         Dst.setdiag(0.0)
247 |         return Adj, Dst
248 | 
249 |     return Adj
250 | 
251 | 
252 | @njit
253 | def outer(indices, indptr, degrees):
254 |     res = np.empty_like(indices, dtype=np.float64)
255 |     start = 0
256 |     for i in range(len(indptr) - 1):
257 |         ixs = indices[indptr[i] : indptr[i + 1]]
258 |         res[start : start + len(ixs)] = degrees[i] * degrees[ixs]
259 |         start += len(ixs)
260 | 
261 |     return res
262 | 
263 | 
264 | def _transform_a_spectral(a: spmatrix) -> spmatrix:
265 |     if not isspmatrix_csr(a):
266 |         a = a.tocsr()
267 |     if not a.nnz:
268 |         return a
269 | 
270 |     degrees = np.squeeze(np.array(np.sqrt(1.0 / a.sum(axis=0))))
271 |     a = a.multiply(outer(a.indices, a.indptr, degrees))
272 |     a.eliminate_zeros()
273 | 
274 |     return a
275 | 
276 | 
277 | def _transform_a_cosine(a: spmatrix) -> spmatrix:
278 |     from sklearn.metrics.pairwise import cosine_similarity
279 | 
280 |     return cosine_similarity(a, dense_output=False)
281 | 


--------------------------------------------------------------------------------
/src/lazyslide/preprocess/_utils.py:
--------------------------------------------------------------------------------
 1 | from typing import Union
 2 | 
 3 | from lazyslide.cv.scorer import ScorerBase
 4 | 
 5 | Scorer = Union[ScorerBase, str]
 6 | 
 7 | 
 8 | def get_scorer(scorers):
 9 |     from lazyslide.cv.scorer import (
10 |         ScorerBase,
11 |         ComposeScorer,
12 |         FocusLite,
13 |         Contrast,
14 |         Brightness,
15 |         Redness,
16 |     )
17 | 
18 |     scorer_mapper = {
19 |         "focus": FocusLite,
20 |         "contrast": Contrast,
21 |         "brightness": Brightness,
22 |         "redness": Redness,
23 |     }
24 | 
25 |     scorer_list = []
26 |     for s in scorers:
27 |         if isinstance(s, ScorerBase):
28 |             scorer_list.append(s)
29 |         elif isinstance(s, str):
30 |             scorer = scorer_mapper.get(s)
31 |             if scorer is None:
32 |                 raise ValueError(
33 |                     f"Unknown scorer {s}, "
34 |                     f"available scorers are {'.'.join(scorer_mapper.keys())}"
35 |                 )
36 |             # The scorer should be initialized when used
37 |             scorer_list.append(scorer())
38 |         else:
39 |             raise TypeError(f"Unknown scorer type {type(s)}")
40 |     compose_scorer = ComposeScorer(scorer_list)
41 |     return compose_scorer
42 | 


--------------------------------------------------------------------------------
/src/lazyslide/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rendeirolab/LazySlide/f39634cc994b3098b0933075b9d25ecd99b9014e/src/lazyslide/py.typed


--------------------------------------------------------------------------------
/src/lazyslide/segmentation/__init__.py:
--------------------------------------------------------------------------------
1 | from ._seg_runner import SegmentationRunner, semantic
2 | from ._cell import cells, nulite
3 | from ._artifact import artifact
4 | from ._tissue import tissue
5 | from ._zero_shot import zero_shot
6 | 


--------------------------------------------------------------------------------
/src/lazyslide/segmentation/_artifact.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | from typing import Literal
  4 | 
  5 | from wsidata import WSIData
  6 | from wsidata.io import add_shapes
  7 | 
  8 | from lazyslide._const import Key
  9 | from lazyslide._utils import get_torch_device
 10 | from ._seg_runner import SegmentationRunner
 11 | from ..models.segmentation import GrandQCArtifact
 12 | 
 13 | # Define class mapping
 14 | CLASS_MAPPING = {
 15 |     1: "Normal Tissue",
 16 |     2: "Fold",
 17 |     3: "Dark spot & Foreign Object",
 18 |     4: "PenMarking",
 19 |     5: "Edge & Air Bubble",
 20 |     6: "Out of Focus",
 21 |     7: "Background",
 22 | }
 23 | 
 24 | 
 25 | def artifact(
 26 |     wsi: WSIData,
 27 |     tile_key: str,
 28 |     variants: Literal["grandqc_5x", "grandqc_7x", "grandqc_10x"] = "grandqc_7x",
 29 |     tissue_key: str = Key.tissue,
 30 |     batch_size: int = 4,
 31 |     num_workers: int = 0,
 32 |     device: str | None = None,
 33 |     key_added: str = "artifacts",
 34 | ):
 35 |     """
 36 |     Artifact segmentation for the whole slide image.
 37 | 
 38 |     Run GrandQC artifact segmentation model on the whole slide image.
 39 |     The model is trained on 512x512 tiles with mpp=1.5, 2, or 1.
 40 | 
 41 |     It can detect the following artifacts:
 42 |     - Fold
 43 |     - Darkspot & Foreign Object
 44 |     - Pen Marking
 45 |     - Edge & Air Bubble
 46 |     - Out of Focus
 47 | 
 48 |     Parameters
 49 |     ----------
 50 |     wsi : WSIData
 51 |         The whole slide image data.
 52 |     tile_key : str
 53 |         The key of the tile table.
 54 |     variants : {"grandqc_5x", "grandqc_7x", "grandqc_10x"}, default: "grandqc_7x"
 55 |         The model variant to use for segmentation.
 56 |     tissue_key : str, default: Key.tissue
 57 |         The key of the tissue table.
 58 |     batch_size : int, default: 4
 59 |         The batch size for segmentation.
 60 |     num_workers : int, default: 0
 61 |         The number of workers for data loading.
 62 |     device : str, default: None
 63 |         The device for the model.
 64 |     key_added : str, default: "artifacts"
 65 |         The key for the added artifact shapes.
 66 | 
 67 |     """
 68 |     if tissue_key not in wsi:
 69 |         raise ValueError(
 70 |             "Tissue segmentation is required before artifact segmentation."
 71 |             "Please run `pp.find_tissues` first."
 72 |         )
 73 | 
 74 |     if device is None:
 75 |         device = get_torch_device()
 76 | 
 77 |     model_mpp = {
 78 |         "grandqc_5x": 2,
 79 |         "grandqc_7x": 1.5,
 80 |         "grandqc_10x": 1,
 81 |     }
 82 | 
 83 |     mpp = model_mpp[variants]
 84 | 
 85 |     if tile_key is not None:
 86 |         # Check if the tile spec is compatible with the model
 87 |         spec = wsi.tile_spec(tile_key)
 88 |         if spec is None:
 89 |             raise ValueError(f"Tiles or tile spec for {tile_key} not found.")
 90 |         if spec.mpp != mpp:
 91 |             raise ValueError(
 92 |                 f"Tile spec mpp {spec.mpp} is not "
 93 |                 f"compatible with the model mpp {mpp}"
 94 |             )
 95 |         if spec.width != 512 or spec.height != 512:
 96 |             raise ValueError("Tile should be 512x512.")
 97 | 
 98 |     model = GrandQCArtifact(model=variants.lstrip("grandqc_"))
 99 | 
100 |     runner = SegmentationRunner(
101 |         wsi,
102 |         model,
103 |         tile_key,
104 |         transform=None,
105 |         batch_size=batch_size,
106 |         num_workers=num_workers,
107 |         device=device,
108 |         class_col="class",
109 |         postprocess_kws={
110 |             "ignore_index": [0, 1, 7],  # Ignore background, normal tissue
111 |             "mapping": CLASS_MAPPING,
112 |         },
113 |     )
114 |     arts = runner.run()
115 |     add_shapes(wsi, key=key_added, shapes=arts)
116 | 


--------------------------------------------------------------------------------
/src/lazyslide/segmentation/_cell.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import warnings
  4 | 
  5 | from wsidata import WSIData
  6 | from wsidata.io import add_shapes
  7 | 
  8 | from lazyslide.models import SegmentationModel
  9 | from lazyslide.models.segmentation import Instanseg, NuLite
 10 | from ._seg_runner import SegmentationRunner
 11 | from .._const import Key
 12 | 
 13 | 
 14 | def cells(
 15 |     wsi: WSIData,
 16 |     model: str | SegmentationModel = "instanseg",
 17 |     tile_key=Key.tiles,
 18 |     transform=None,
 19 |     batch_size=4,
 20 |     num_workers=0,
 21 |     device=None,
 22 |     key_added="cells",
 23 | ):
 24 |     """Cell segmentation for the whole slide image.
 25 | 
 26 |     Tiles should be prepared before segmentation.
 27 | 
 28 |     Recommended tile setting:
 29 |     - **instanseg**: 512x512, mpp=0.5
 30 | 
 31 |     Parameters
 32 |     ----------
 33 |     wsi : WSIData
 34 |         The whole slide image data.
 35 |     model : str | SegmentationModel, default: "instanseg"
 36 |         The cell segmentation model.
 37 |     tile_key : str, default: "tiles"
 38 |         The key of the tile table.
 39 |     transform : callable, default: None
 40 |         The transformation for the input tiles.
 41 |     batch_size : int, default: 4
 42 |         The batch size for segmentation.
 43 |     num_workers : int, default: 0
 44 |         The number of workers for data loading.
 45 |     device : str, default: None
 46 |         The device for the model.
 47 |     key_added : str, default: "cells"
 48 |         The key for the added cell shapes.
 49 | 
 50 |     """
 51 |     if model == "instanseg":
 52 |         model = Instanseg()
 53 |         # Run tile check
 54 |         tile_spec = wsi.tile_spec(tile_key)
 55 |         check_mpp = tile_spec.mpp == 0.5
 56 |         check_size = tile_spec.height == 512 and tile_spec.width == 512
 57 |         if not check_mpp or not check_size:
 58 |             warnings.warn(
 59 |                 f"To optimize the performance of Instanseg model, "
 60 |                 f"the tile size should be 512x512 and the mpp should be 0.5. "
 61 |                 f"Current tile size is {tile_spec.width}x{tile_spec.height} with {tile_spec.mpp} mpp."
 62 |             )
 63 | 
 64 |     runner = SegmentationRunner(
 65 |         wsi,
 66 |         model,
 67 |         tile_key,
 68 |         transform=transform,
 69 |         batch_size=batch_size,
 70 |         num_workers=num_workers,
 71 |         device=device,
 72 |     )
 73 |     cells = runner.run()
 74 |     # Add cells to the WSIData
 75 |     add_shapes(wsi, key=key_added, shapes=cells)
 76 | 
 77 | 
 78 | def nulite(
 79 |     wsi: WSIData,
 80 |     tile_key="tiles",
 81 |     transform=None,
 82 |     batch_size=4,
 83 |     num_workers=0,
 84 |     device=None,
 85 |     key_added="cell_types",
 86 | ):
 87 |     """Cell type segmentation for the whole slide image.
 88 | 
 89 |     Tiles should be prepared before segmentation.
 90 | 
 91 |     Recommended tile setting:
 92 |     - **nulite**: 512x512, mpp=0.5
 93 | 
 94 |     Parameters
 95 |     ----------
 96 |     wsi : WSIData
 97 |         The whole slide image data.
 98 |     tile_key : str, default: "tiles"
 99 |         The key of the tile table.
100 |     transform : callable, default: None
101 |         The transformation for the input tiles.
102 |     batch_size : int, default: 4
103 |         The batch size for segmentation.
104 |     num_workers : int, default: 0
105 |         The number of workers for data loading.
106 |     device : str, default: None
107 |         The device for the model.
108 |     key_added : str, default: "cell_types"
109 |         The key for the added cell type shapes.
110 | 
111 |     """
112 | 
113 |     model = NuLite()
114 | 
115 |     runner = SegmentationRunner(
116 |         wsi,
117 |         model,
118 |         tile_key,
119 |         transform=transform,
120 |         batch_size=batch_size,
121 |         num_workers=num_workers,
122 |         device=device,
123 |     )
124 |     cells = runner.run()
125 |     # Add cells to the WSIData
126 |     add_shapes(wsi, key=key_added, shapes=cells)
127 | 


--------------------------------------------------------------------------------
/src/lazyslide/segmentation/_seg_runner.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | from functools import partial
  4 | from typing import Literal, Callable, Mapping
  5 | 
  6 | import geopandas as gpd
  7 | import numpy as np
  8 | import pandas as pd
  9 | import torch
 10 | from shapely.affinity import scale, translate
 11 | from torch.utils.data import DataLoader
 12 | from wsidata import WSIData
 13 | from wsidata.io import add_shapes
 14 | 
 15 | from lazyslide._const import Key
 16 | from lazyslide._utils import default_pbar, get_torch_device
 17 | from lazyslide.cv import merge_polygons
 18 | from lazyslide.models.base import SegmentationModel
 19 | 
 20 | 
 21 | def semantic(
 22 |     wsi: WSIData,
 23 |     model: SegmentationModel,
 24 |     tile_key=Key.tiles,
 25 |     transform=None,
 26 |     batch_size=4,
 27 |     num_workers=0,
 28 |     device=None,
 29 |     key_added="anatomical_structures",
 30 | ):
 31 |     """
 32 |     Semantic segmentation for the whole slide image.
 33 | 
 34 |     Parameters
 35 |     ----------
 36 |     wsi : WSIData
 37 |         The whole slide image data.
 38 |     model : SegmentationModel
 39 |         The segmentation model.
 40 |     tile_key : str, default: "tiles"
 41 |         The key of the tile table.
 42 |     transform : callable, default: None
 43 |         The transformation for the input tiles.
 44 |     batch_size : int, default: 4
 45 |         The batch size for segmentation.
 46 |     num_workers : int, default: 0
 47 |         The number of workers for data loading.
 48 |     device : str, default: None
 49 |         The device for the model.
 50 |     key_added : str, default: "anatomical_structures"
 51 |         The key for the added instance shapes.
 52 | 
 53 |     """
 54 |     runner = SegmentationRunner(
 55 |         wsi=wsi,
 56 |         model=model,
 57 |         tile_key=tile_key,
 58 |         transform=transform,
 59 |         batch_size=batch_size,
 60 |         num_workers=num_workers,
 61 |         device=device,
 62 |     )
 63 |     shapes = runner.run()
 64 |     # Add the segmentation results to the WSIData
 65 |     add_shapes(wsi, key=key_added, shapes=shapes)
 66 | 
 67 | 
 68 | class SegmentationRunner:
 69 |     """
 70 |     Segmentation runner for the whole slide image.
 71 | 
 72 |     Parameters
 73 |     ----------
 74 |     wsi : :class:`WSIData <wsidata.WSIData>`
 75 |         The whole slide image data.
 76 |     model : :class:`SegmentationModel <lazyslide.models.base.SegmentationModel>`
 77 |         The segmentation model.
 78 |     tile_key : str
 79 |         The key of the tile table.
 80 |     transform : callable, default: None
 81 |         The transformation for the input tiles.
 82 |     batch_size : int, default: 4
 83 |         The batch size for segmentation.
 84 |     num_workers : int, default: 0
 85 |         The number of workers for data loading.
 86 |     device : str, default: None
 87 |         The device for the model.
 88 |     postprocess_kws : dict, default: None
 89 |         The keyword arguments for the postprocess function defined in the model class
 90 |     dataloader_kws : dict, default: None
 91 |         The keyword arguments for the DataLoader.
 92 |     class_col : str, default: None
 93 |         The column name for the class in the output GeoDataFrame.
 94 |     prob_col : str, default: None
 95 |         The column name for the probability in the output GeoDataFrame.
 96 |     buffer_px : int, default: 0
 97 |         The buffer size in pixels for the polygons.
 98 |     drop_overlap : float, default: 0.9
 99 |         The overlap threshold for dropping polygons.
100 |     pbar : bool, default: True
101 |         Whether to show the progress bar.
102 | 
103 |     """
104 | 
105 |     def __init__(
106 |         self,
107 |         wsi: WSIData,
108 |         model: SegmentationModel,
109 |         tile_key: str,
110 |         transform: Callable = None,
111 |         batch_size: int = 4,
112 |         num_workers: int = 0,
113 |         device: str = None,
114 |         postprocess_kws: dict = None,
115 |         dataloader_kws: dict = None,
116 |         class_col: str = None,
117 |         prob_col: str = None,
118 |         buffer_px: int = 0,
119 |         drop_overlap: float = 0.9,
120 |         pbar: bool = True,
121 |     ):
122 |         self.wsi = wsi
123 |         self.model = model
124 |         if device is None:
125 |             device = get_torch_device()
126 |         self.device = device
127 |         self.tile_key = tile_key
128 |         self.downsample = wsi.tile_spec(tile_key).base_downsample
129 | 
130 |         if transform is None:
131 |             transform = model.get_transform()
132 |         self.transform = transform
133 | 
134 |         if postprocess_kws is None:
135 |             postprocess_kws = {}
136 |         postprocess_fn = model.get_postprocess()
137 |         self.postprocess_fn = partial(postprocess_fn, **postprocess_kws)
138 | 
139 |         if dataloader_kws is None:
140 |             dataloader_kws = {}
141 |         dataloader_kws.setdefault("num_workers", num_workers)
142 |         dataloader_kws.setdefault("batch_size", batch_size)
143 |         self.dataloader_kws = dataloader_kws
144 |         self.merge_kws = dict(
145 |             class_col=class_col,
146 |             prob_col=prob_col,
147 |             buffer_px=buffer_px,
148 |             drop_overlap=drop_overlap,
149 |         )
150 | 
151 |         self.pbar = pbar
152 | 
153 |     def _batch_postprocess(self, output, xs, ys):
154 |         results = []
155 | 
156 |         if isinstance(output, (torch.Tensor, np.ndarray)):
157 |             batches = zip(output, xs, ys)
158 |         elif isinstance(output, tuple):
159 |             batches = zip(list(zip(*output)), xs, ys)
160 |         elif isinstance(output, Mapping):
161 |             flattened = [
162 |                 dict(zip(output.keys(), values)) for values in zip(*output.values())
163 |             ]
164 |             batches = zip(flattened, xs, ys)
165 |         else:
166 |             raise NotImplementedError(f"Unsupported model output type {type(output)}")
167 | 
168 |         for batch, x, y in batches:
169 |             result = self.postprocess_fn(batch)
170 |             # The output of postprocess_fn is a gpd.GeoDataFrame
171 |             # transform the polygons to the global coordinate
172 |             polys = []
173 |             for poly in result["geometry"]:
174 |                 poly = scale(
175 |                     poly, xfact=self.downsample, yfact=self.downsample, origin=(0, 0)
176 |                 )
177 |                 poly = translate(poly, xoff=x, yoff=y)
178 |                 polys.append(poly)
179 |             result["geometry"] = polys
180 |             if len(result) > 0:
181 |                 results.append(result)
182 | 
183 |         return results
184 | 
185 |     def __call__(self):
186 |         dataset = self.wsi.ds.tile_images(
187 |             tile_key=self.tile_key, transform=self.transform
188 |         )
189 |         dl = DataLoader(dataset, **self.dataloader_kws)
190 | 
191 |         # Move model to device
192 |         if self.device is not None:
193 |             self.model.to(self.device)
194 | 
195 |         with default_pbar(disable=not self.pbar) as progress_bar:
196 |             task = progress_bar.add_task("Segmentation", total=len(dataset))
197 | 
198 |             results = []
199 |             for chunk in dl:
200 |                 images = chunk["image"]
201 |                 xs, ys = np.asarray(chunk["x"]), np.asarray(chunk["y"])
202 |                 if self.device is not None:
203 |                     images = images.to(self.device)
204 |                 output = self.model.segment(images)
205 | 
206 |                 rs = self._batch_postprocess(output, xs, ys)
207 |                 # Update only if the output is not empty
208 |                 results.extend(rs)
209 |                 progress_bar.update(task, advance=len(xs))
210 |             polys_df = gpd.GeoDataFrame(pd.concat(results).reset_index(drop=True))
211 |             progress_bar.update(task, description="Merging tiles...")
212 |             # === Merge the polygons ===
213 |             polys_df = merge_polygons(polys_df, **self.merge_kws)
214 |             # === Refresh the progress bar ===
215 |             progress_bar.update(task, description="Segmentation")
216 |             progress_bar.refresh()
217 | 
218 |         polys_df = polys_df.explode().reset_index(drop=True)
219 |         return polys_df
220 | 
221 |     def run(self):
222 |         """
223 |         Run the segmentation.
224 |         """
225 |         return self.__call__()
226 | 


--------------------------------------------------------------------------------
/src/lazyslide/segmentation/_tissue.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import cv2
  4 | import numpy as np
  5 | import torch
  6 | from shapely.affinity import scale
  7 | from wsidata import WSIData
  8 | from wsidata.io import add_tissues
  9 | 
 10 | from lazyslide._const import Key
 11 | from lazyslide._utils import get_torch_device
 12 | from lazyslide.cv import BinaryMask
 13 | from lazyslide.models.segmentation import GrandQCTissue
 14 | 
 15 | 
 16 | def tissue(
 17 |     wsi: WSIData,
 18 |     level: int = None,
 19 |     device: str | None = None,
 20 |     key_added: str = Key.tissue,
 21 | ):
 22 |     """
 23 |     Return a dataset for tissue segmentation.
 24 | 
 25 |     Parameters
 26 |     ----------
 27 |     wsi: :class:`wsidata.WSIData`
 28 |         The whole slide image.
 29 |     level : int, default: None
 30 |         The level to segment the tissue.
 31 |     device : str, default: None
 32 |         The device to run the model.
 33 |     key_added : str, default: 'tissues'
 34 |         The key to add the tissue polygons.
 35 | 
 36 |     """
 37 | 
 38 |     if device is None:
 39 |         device = get_torch_device()
 40 | 
 41 |     props = wsi.properties
 42 |     if level is None:
 43 |         level_mpp = np.array(props.level_downsample) * props.mpp
 44 |         # Get the nearest level that towards mpp=10
 45 |         level = np.argmin(np.abs(level_mpp - 10))
 46 |     shape = props.level_shape[level]
 47 | 
 48 |     model = GrandQCTissue()
 49 |     transform = model.get_transform()
 50 | 
 51 |     model.to(device)
 52 | 
 53 |     # Ensure the image size is multiple of 32
 54 |     # Calculate the nearest multiples of 32
 55 |     height, width = shape
 56 |     new_height = (height + 31) // 32 * 32
 57 |     new_width = (width + 31) // 32 * 32
 58 |     img = wsi.reader.get_region(0, 0, width, height, level=level)
 59 |     downsample = props.level_downsample[level]
 60 | 
 61 |     # We cannot read the image directly from the reader.
 62 |     # The padding from image reader will introduce padding at only two sides
 63 |     # We need to pad the image on all four sides
 64 |     # without shifting the image equilibrium
 65 |     # Otherwise, this will introduce artifacts in the segmentation
 66 | 
 67 |     # # Compute padding amounts
 68 |     top_pad = (new_height - height) // 2
 69 |     bottom_pad = new_height - height - top_pad
 70 |     left_pad = (new_width - width) // 2
 71 |     right_pad = new_width - width - left_pad
 72 | 
 73 |     # Apply padding
 74 |     img = np.pad(
 75 |         img,
 76 |         pad_width=((top_pad, bottom_pad), (left_pad, right_pad), (0, 0)),
 77 |         mode="constant",
 78 |         constant_values=0,  # Pad with black pixels
 79 |     )
 80 | 
 81 |     # Simulate JPEG compression
 82 |     encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), 80]
 83 |     result, img = cv2.imencode(".jpg", img, encode_param)
 84 |     img = cv2.imdecode(img, 1)
 85 | 
 86 |     img = torch.tensor(img).permute(2, 0, 1)
 87 | 
 88 |     img_t = transform(img).unsqueeze(0)
 89 |     img_t = img_t.to(device)
 90 |     pred = model.segment(img_t)
 91 | 
 92 |     pred = pred.squeeze().detach().cpu().numpy()
 93 |     mask = np.argmax(pred, axis=0).astype(np.uint8)
 94 |     # Flip the mask
 95 |     mask = 1 - mask
 96 |     polygons = BinaryMask(mask).to_polygons(
 97 |         min_area=1e-3,
 98 |         min_hole_area=1e-5,
 99 |         detect_holes=True,
100 |     )
101 |     polygons = [
102 |         scale(p, xfact=downsample, yfact=downsample, origin=(0, 0)) for p in polygons
103 |     ]
104 |     add_tissues(wsi, key_added, polygons)
105 | 


--------------------------------------------------------------------------------
/src/lazyslide/tools/__init__.py:
--------------------------------------------------------------------------------
1 | from ._domain import spatial_domain, tile_shaper
2 | from ._features import feature_extraction, feature_aggregation
3 | from ._signatures import RNALinker
4 | from ._text_annotate import text_embedding, text_image_similarity
5 | from ._tissue_props import tissue_props
6 | from ._spatial_features  import spatial_features, feature_utag
7 | from ._zero_shot import zero_shot_score, slide_caption
8 | 


--------------------------------------------------------------------------------
/src/lazyslide/tools/_domain.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from wsidata import WSIData
  3 | from wsidata.io import update_shapes_data, add_shapes
  4 | 
  5 | from lazyslide._const import Key
  6 | 
  7 | 
  8 | def spatial_domain(
  9 |     wsi: WSIData,
 10 |     feature_key: str,
 11 |     tile_key: str = Key.tiles,
 12 |     layer: str = None,
 13 |     resolution: float = 0.1,
 14 |     key_added: str = "domain",
 15 | ):
 16 |     """Return the unsupervised domain of the WSI"""
 17 |     try:
 18 |         import scanpy as sc
 19 |     except ImportError:
 20 |         raise ImportError(
 21 |             "Please install scanpy to use this function, try `pip install scanpy`."
 22 |         )
 23 |     feature_key = wsi._check_feature_key(feature_key, tile_key)
 24 |     adata = wsi.fetch.features_anndata(feature_key, tile_key, tile_graph=False)
 25 |     sc.pp.scale(adata, layer=layer)
 26 |     sc.pp.pca(adata, layer=layer)
 27 |     sc.pp.neighbors(adata)
 28 |     sc.tl.leiden(adata, flavor="igraph", key_added=key_added, resolution=resolution)
 29 |     # Add to tile table
 30 |     update_shapes_data(wsi, tile_key, {key_added: adata.obs[key_added].to_numpy()})
 31 | 
 32 | 
 33 | def tile_shaper(
 34 |     wsi: WSIData,
 35 |     groupby: str = "domain",
 36 |     tile_key: str = Key.tiles,
 37 |     key_added: str = "domain_shapes",
 38 | ):
 39 |     # """Return the domain shapes of the WSI
 40 |     # Parameters
 41 |     # ----------
 42 |     # wsi: :class:`WSIData <wsidata.WSIData>`
 43 |     #     The WSIData object.
 44 |     # groupby: str
 45 |     #     The groupby key.
 46 |     # tile_key: str
 47 |     #     The tile key.
 48 |     # key_added: str
 49 |     #     The key to add the shapes to.
 50 |     #
 51 |     # Returns
 52 |     # -------
 53 |     # None
 54 |     #     The shapes will be added to the WSIData object.
 55 |     # - The shapes will be added to the `domain_shapes` layer of the tile table.
 56 |     #
 57 |     # Examples
 58 |     # --------
 59 |     # .. code-block:: python
 60 |     #
 61 |     #     >>> import lazyslide as zs
 62 |     #     >>> wsi = zs.datasets.sample()
 63 |     #     >>> zs.pp.find_tissues(wsi)
 64 |     #     >>> zs.pp.tile_tissues(wsi, 256, mpp=0.5)
 65 |     #     >>> zs.tl.feature_extraction(wsi, "resnet50")
 66 |     #     >>> zs.pp.tile_graph(wsi)
 67 |     #     >>> zs.tl.spatial_domain(wsi, layer="utag", feature_key="resnet50", resolution=0.3)
 68 |     #     >>> zs.tl.tile_shaper(wsi)
 69 |     #
 70 |     # """
 71 |     import geopandas as gpd
 72 |     from lazyslide.cv import BinaryMask
 73 |     from shapely.affinity import scale, translate
 74 | 
 75 |     result = []
 76 | 
 77 |     tile_table = wsi[tile_key]
 78 | 
 79 |     spec = wsi.tile_spec(tile_key)
 80 | 
 81 |     # To avoid large memory allocation of mask, get domain in each tissue
 82 |     for _, tissue_group in tile_table.groupby("tissue_id"):
 83 |         for name, group in tissue_group.groupby(groupby):
 84 |             bounds = (group.bounds / spec.base_height).astype(int)
 85 |             minx, miny, maxx, maxy = (
 86 |                 bounds["minx"].min(),
 87 |                 bounds["miny"].min(),
 88 |                 bounds["maxx"].max(),
 89 |                 bounds["maxy"].max(),
 90 |             )
 91 |             w, h = int(maxx - minx), int(maxy - miny)
 92 |             mask = np.zeros((h, w), dtype=np.uint8)
 93 |             for _, row in bounds.iterrows():
 94 |                 mask[row["miny"] - miny, row["minx"] - minx] = 1
 95 |             polys = BinaryMask(mask).to_polygons()
 96 |             # scale back
 97 |             polys = [
 98 |                 scale(
 99 |                     poly, xfact=spec.base_height, yfact=spec.base_height, origin=(0, 0)
100 |                 )
101 |                 for poly in polys
102 |             ]
103 |             # translate
104 |             polys = [
105 |                 translate(
106 |                     poly, xoff=minx * spec.base_height, yoff=miny * spec.base_height
107 |                 )
108 |                 for poly in polys
109 |             ]
110 |             for poly in polys:
111 |                 result.append([name, poly])
112 | 
113 |     domain_shapes = gpd.GeoDataFrame(data=result, columns=[groupby, "geometry"])
114 |     add_shapes(wsi, key_added, domain_shapes)
115 |     # return domain_shapes
116 | 


--------------------------------------------------------------------------------
/src/lazyslide/tools/_spatial_features.py:
--------------------------------------------------------------------------------
 1 | import warnings
 2 | 
 3 | import numpy as np
 4 | from wsidata import WSIData
 5 | 
 6 | from lazyslide._const import Key
 7 | from lazyslide._utils import find_stack_level
 8 | 
 9 | 
10 | def spatial_features(
11 |     wsi: WSIData,
12 |     feature_key: str,
13 |     method: str = "smoothing",
14 |     tile_key: str = Key.tiles,
15 |     graph_key: str = None,
16 |     layer_key: str = "spatial_features",
17 | ):
18 |     """
19 |     Integrate spatial tile context with vision features using spatial feature smoothing.
20 | 
21 |     Parameters
22 |     ----------
23 |     wsi : :class:`WSIData <wsidata.WSIData>`
24 |         The WSIData object.
25 |     feature_key : str
26 |         The feature key.
27 |     method : str, default: 'smoothing'
28 |         The method used for spatial feature smoothing. Currently only 'smoothing' is supported.
29 |     tile_key : str, default: 'tiles'
30 |         The tile key.
31 |     graph_key : str, optional
32 |         The graph key. If None, defaults to '{tile_key}_graph'.
33 |     layer_key : str, default: 'spatial_features'
34 |         The key for the output layer in the feature table.
35 | 
36 |     Returns
37 |     -------
38 |     None. The transformed feature will be added to the `spatial_features` layer of the feature table.
39 | 
40 |     Examples
41 |     --------
42 |     .. code-block:: python
43 | 
44 |         >>> import lazyslide as zs
45 |         >>> wsi = zs.datasets.sample()
46 |         >>> zs.pp.find_tissues(wsi)
47 |         >>> zs.pp.tile_tissues(wsi, 256, mpp=0.5)
48 |         >>> zs.tl.feature_extraction(wsi, "resnet50")
49 |         >>> zs.pp.tile_graph(wsi)
50 |         >>> zs.tl.spatial_features(wsi, "resnet50")
51 |         >>> wsi["resnet50"].layers["spatial_features"]
52 | 
53 |     """
54 |     if method != "smoothing":
55 |         raise ValueError(f"Unknown method '{method}'. Only 'smoothing' is currently supported.")
56 | 
57 |     # Get the spatial connectivity
58 |     try:
59 |         if graph_key is None:
60 |             graph_key = f"{tile_key}_graph"
61 |         A = wsi.tables[graph_key].obsp["spatial_connectivities"]
62 |     except KeyError:
63 |         raise ValueError(
64 |             "The tile graph is needed to transform feature with spatial smoothing. Please run `pp.tile_graph` first."
65 |         )
66 |     A = A + np.eye(A.shape[0])
67 |     # L1 norm for each row
68 |     norms = np.sum(np.abs(A), axis=1)
69 |     # Normalize the array
70 |     A_norm = A / norms
71 | 
72 |     feature_key = wsi._check_feature_key(feature_key, tile_key)
73 |     feature_X = wsi.tables[feature_key].X
74 |     A_spatial = np.transpose(feature_X) @ A_norm
75 |     A_spatial = np.transpose(A_spatial)
76 |     wsi.tables[feature_key].layers[layer_key] = np.asarray(A_spatial)
77 | 
78 | 
79 | def feature_utag(
80 |     wsi: WSIData,
81 |     feature_key: str,
82 |     tile_key: str = Key.tiles,
83 |     graph_key: str = None,
84 | ):
85 |     """
86 |     Deprecated. Use :func:`spatial_features` instead.
87 |     """
88 |     warnings.warn(
89 |         "`tl.feature_utag` is deprecated and will be removed after 0.8.0, "
90 |         "please use `tl.spatial_features` instead.",
91 |         stacklevel=find_stack_level(),
92 |     )
93 |     return spatial_features(wsi, feature_key, method="smoothing", tile_key=tile_key, graph_key=graph_key, layer_key="spatial_features")
94 | 


--------------------------------------------------------------------------------
/src/lazyslide/tools/_text_annotate.py:
--------------------------------------------------------------------------------
  1 | from typing import List, Literal
  2 | 
  3 | import numpy as np
  4 | import pandas as pd
  5 | from wsidata import WSIData
  6 | from wsidata.io import add_features
  7 | 
  8 | from lazyslide._const import Key
  9 | 
 10 | 
 11 | def text_embedding(
 12 |     texts: List[str],
 13 |     model: Literal["plip", "conch"] = "plip",
 14 | ):
 15 |     """Embed the text into a vector in the text-vision co-embedding using
 16 |     `PLIP <https://www.nature.com/articles/s41591-023-02504-3>`_ or
 17 |     `CONCH <https://www.nature.com/articles/s41591-024-02856-4>`_.
 18 | 
 19 |     Parameters
 20 |     ----------
 21 |     texts : List[str]
 22 |         The list of texts.
 23 |     model : Literal["plip", "conch"], default: "plip"
 24 |         The text embedding model, either PLIP or CONCH
 25 | 
 26 |     Returns
 27 |     -------
 28 |     pd.DataFrame
 29 |         The embeddings of the texts, with texts as index.
 30 | 
 31 |     Examples
 32 |     --------
 33 |     .. code-block:: python
 34 | 
 35 |         >>> import lazyslide as zs
 36 |         >>> wsi = zs.datasets.sample()
 37 |         >>> zs.pp.find_tissues(wsi)
 38 |         >>> zs.pp.tile_tissues(wsi, 256, mpp=0.5, key_added="text_tiles")
 39 |         >>> zs.tl.feature_extraction(wsi, "plip", tile_key="text_tiles")
 40 |         >>> terms = ["mucosa", "submucosa", "musclaris", "lymphocyte"]
 41 |         >>> zs.tl.text_embedding(terms, model="plip")
 42 | 
 43 |     """
 44 |     import torch
 45 | 
 46 |     if model == "plip":
 47 |         from lazyslide.models.multimodal import PLIP
 48 | 
 49 |         model_ins = PLIP()
 50 |     elif model == "conch":
 51 |         from lazyslide.models.multimodal import CONCH
 52 | 
 53 |         model_ins = CONCH()
 54 |     else:
 55 |         raise ValueError(f"Invalid model: {model}")
 56 | 
 57 |     # use numpy record array to store the embeddings
 58 |     with torch.inference_mode():
 59 |         embeddings = model_ins.encode_text(texts).detach().cpu().numpy()
 60 |     return pd.DataFrame(embeddings, index=texts)
 61 | 
 62 | 
 63 | def text_image_similarity(
 64 |     wsi: WSIData,
 65 |     text_embeddings: pd.DataFrame,
 66 |     model: Literal["plip", "conch"] = "plip",
 67 |     tile_key: str = Key.tiles,
 68 |     feature_key: str = None,
 69 |     key_added: str = None,
 70 | ):
 71 |     """
 72 |     Compute the similarity between text and image.
 73 | 
 74 |     Parameters
 75 |     ----------
 76 |     wsi : WSIData
 77 |         The WSIData object.
 78 |     text_embeddings : pd.DataFrame
 79 |         The embeddings of the texts, with texts as index.
 80 |         You can use :func:`zs.tl.text_embedding <lazyslide.tl.text.embedding>` to get the embeddings.
 81 |     model : Literal["plip", "conch"], default: "plip"
 82 |         The text embedding model.
 83 |     tile_key : str, default: 'tiles'
 84 |         The tile key.
 85 |     feature_key : str
 86 |         The feature key.
 87 |     key_added : str
 88 | 
 89 |     Returns
 90 |     -------
 91 |     None
 92 | 
 93 |     - The similarity scores will be added to :bdg-danger:`tables` slot of the spatial data object.
 94 | 
 95 |     Examples
 96 |     --------
 97 |     .. code-block:: python
 98 |         >>> import lazyslide as zs
 99 |         >>> wsi = zs.datasets.sample()
100 |         >>> zs.pp.find_tissues(wsi)
101 |         >>> zs.pp.tile_tissues(wsi, 256, mpp=0.5, key_added="text_tiles")
102 |         >>> zs.tl.feature_extraction(wsi, "plip", tile_key="text_tiles")
103 |         >>> terms = ["mucosa", "submucosa", "musclaris", "lymphocyte"]
104 |         >>> embeddings = zs.tl.text_embedding(terms, model="plip")
105 |         >>> zs.tl.text_image_similarity(wsi, embeddings, model="plip", tile_key="text_tiles")
106 | 
107 |     """
108 | 
109 |     if feature_key is None:
110 |         feature_key = model
111 |     feature_key = wsi._check_feature_key(feature_key, tile_key)
112 |     key_added = f"{feature_key}_text_similarity" or key_added
113 | 
114 |     feature_X = wsi.tables[feature_key].X
115 |     similarity_score = np.dot(text_embeddings.values, feature_X.T).T
116 | 
117 |     add_features(
118 |         wsi,
119 |         key_added,
120 |         tile_key,
121 |         similarity_score,
122 |         var=pd.DataFrame(index=text_embeddings.index),
123 |     )
124 | 


--------------------------------------------------------------------------------
/src/lazyslide/tools/_tissue_props.py:
--------------------------------------------------------------------------------
  1 | from functools import cached_property
  2 | 
  3 | import cv2
  4 | import numpy as np
  5 | import pandas as pd
  6 | from wsidata import WSIData
  7 | from wsidata.io import update_shapes_data
  8 | 
  9 | from lazyslide._const import Key
 10 | 
 11 | 
 12 | def point2shape(
 13 |     wsi: WSIData,
 14 |     key: str = "tiles",
 15 |     groupby: str = None,
 16 | ):
 17 |     pass
 18 | 
 19 | 
 20 | def tissue_props(
 21 |     wsi: WSIData,
 22 |     key: str = Key.tissue,
 23 | ):
 24 |     """Compute a series of geometrical properties of tissue piecies
 25 | 
 26 |     - "area"
 27 |     - "area_filled"
 28 |     - "convex_area"
 29 |     - "solidity"
 30 |     - "convexity"
 31 |     - "axis_major_length"
 32 |     - "axis_minor_length"
 33 |     - "eccentricity"
 34 |     - "orientation"
 35 |     - "extent"
 36 |     - "perimeter"
 37 |     - "circularity"
 38 | 
 39 |     Parameters
 40 |     ----------
 41 |     wsi : :class:`WSIData <wsidata.WSIData>`
 42 |         The WSIData object.
 43 |     key : str
 44 |         The tissue key.
 45 | 
 46 |     Returns
 47 |     -------
 48 |     None
 49 | 
 50 |     - The tissue properties will be added to the same table as the tissue shapes.
 51 | 
 52 |     Examples
 53 |     --------
 54 |     .. code-block:: python
 55 | 
 56 |         >>> import lazyslide as zs
 57 |         >>> wsi = zs.datasets.sample()
 58 |         >>> zs.pp.find_tissues(wsi)
 59 |         >>> zs.tl.tissue_props(wsi)
 60 |         >>> wsi['tissues']
 61 | 
 62 |     """
 63 | 
 64 |     props = []
 65 |     cnts = []
 66 |     for tissue_contour in wsi.iter.tissue_contours(key):
 67 |         cnt = tissue_contour.contour
 68 |         holes = tissue_contour.holes
 69 | 
 70 |         cnt_array = np.asarray(cnt.exterior.coords.xy, dtype=np.int32).T
 71 |         holes_array = [
 72 |             np.asarray(h.exterior.coords.xy, dtype=np.int32).T for h in holes
 73 |         ]
 74 | 
 75 |         _props = contour_props(cnt_array, holes_array)
 76 |         cnts.append(cnt)
 77 |         props.append(_props)
 78 | 
 79 |     props = pd.DataFrame(props).to_dict(orient="list")
 80 |     update_shapes_data(wsi, key, props)
 81 | 
 82 | 
 83 | class ContourProps:
 84 |     def __init__(self, cnt, holes=None):
 85 |         self.cnt = cnt
 86 |         self.holes = holes
 87 | 
 88 |     @cached_property
 89 |     def area_filled(self):
 90 |         return cv2.contourArea(self.cnt)
 91 | 
 92 |     @cached_property
 93 |     def area(self):
 94 |         """Area without holes."""
 95 |         if self.holes is None:
 96 |             return self.area_filled
 97 |         else:
 98 |             area = self.area_filled
 99 |             for hole in self.holes:
100 |                 area -= cv2.contourArea(hole)
101 |             return area
102 | 
103 |     @cached_property
104 |     def bbox(self):
105 |         x, y, w, h = cv2.boundingRect(self.cnt)
106 |         return x, y, w, h
107 | 
108 |     @cached_property
109 |     def centroid(self):
110 |         M = self.moments
111 |         cX = int(M["m10"] / M["m00"])
112 |         cY = int(M["m01"] / M["m00"])
113 |         return cX, cY
114 | 
115 |     @cached_property
116 |     def convex_hull(self):
117 |         return cv2.convexHull(self.cnt)
118 | 
119 |     @cached_property
120 |     def convex_area(self):
121 |         return cv2.contourArea(self.convex_hull)
122 | 
123 |     @cached_property
124 |     def solidity(self):
125 |         """Solidity is the ratio of the contour area to the convex area."""
126 |         if self.convex_area == 0:
127 |             return 0
128 |         return self.area / self.convex_area
129 | 
130 |     @cached_property
131 |     def convexity(self):
132 |         """Convexity is the ratio of the convex area to the contour area."""
133 |         if self.area == 0:
134 |             return 0
135 |         return self.convex_area / self.area
136 | 
137 |     @cached_property
138 |     def ellipse(self):
139 |         return cv2.fitEllipse(self.cnt)
140 | 
141 |     @cached_property
142 |     def axis_major_length(self):
143 |         x1, x2 = self.ellipse[1]
144 |         if x1 < x2:
145 |             return x2
146 |         return x1
147 | 
148 |     @cached_property
149 |     def axis_minor_length(self):
150 |         x1, x2 = self.ellipse[1]
151 |         if x1 < x2:
152 |             return x1
153 |         return x2
154 | 
155 |     @cached_property
156 |     def eccentricity(self):
157 |         if self.axis_major_length == 0:
158 |             return 0
159 |         return np.sqrt(1 - (self.axis_minor_length**2) / (self.axis_major_length**2))
160 | 
161 |     @cached_property
162 |     def orientation(self):
163 |         return self.ellipse[2]
164 | 
165 |     @cached_property
166 |     def extent(self):
167 |         if self.area == 0:
168 |             return 0
169 |         return self.area / (self.bbox[2] * self.bbox[3])
170 | 
171 |     @cached_property
172 |     def perimeter(self):
173 |         return cv2.arcLength(self.cnt, True)
174 | 
175 |     @cached_property
176 |     def circularity(self):
177 |         if self.perimeter == 0:
178 |             return 0
179 |         return 4 * np.pi * self.area / (self.perimeter**2)
180 | 
181 |     @cached_property
182 |     def moments(self):
183 |         return cv2.moments(self.cnt)
184 | 
185 |     @cached_property
186 |     def moments_hu(self):
187 |         return cv2.HuMoments(self.moments)
188 | 
189 |     def __call__(self):
190 |         props = {
191 |             "area": self.area,
192 |             "area_filled": self.area_filled,
193 |             "convex_area": self.convex_area,
194 |             "solidity": self.solidity,
195 |             "convexity": self.convexity,
196 |             "axis_major_length": self.axis_major_length,
197 |             "axis_minor_length": self.axis_minor_length,
198 |             "eccentricity": self.eccentricity,
199 |             "orientation": self.orientation,
200 |             "extent": self.extent,
201 |             "perimeter": self.perimeter,
202 |             "circularity": self.circularity,
203 |         }
204 | 
205 |         for ix, box in enumerate(self.bbox):
206 |             props[f"bbox-{ix}"] = box
207 | 
208 |         for ix, c in enumerate(self.centroid):
209 |             props[f"centroid-{ix}"] = c
210 | 
211 |         for i, hu in enumerate(self.moments_hu):
212 |             props[f"hu-{i}"] = hu[0]
213 | 
214 |         for key, value in self.moments.items():
215 |             props[f"moment-{key}"] = value
216 | 
217 |         return props
218 | 
219 | 
220 | def contour_props(cnt: np.ndarray, holes=None):
221 |     """Calculate the properties of a contour."""
222 |     return ContourProps(cnt, holes)()
223 | 


--------------------------------------------------------------------------------
/src/lazyslide/tools/_zero_shot.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | from typing import Sequence, List, Iterable
  4 | 
  5 | import numpy as np
  6 | import pandas as pd
  7 | import torch
  8 | from wsidata import WSIData
  9 | 
 10 | from lazyslide._utils import get_torch_device
 11 | 
 12 | 
 13 | def _preprocess_prompts(prompts: List[str | List[str]]) -> List[List[str]]:
 14 |     """
 15 |     Preprocess the prompts to ensure they are in the correct format.
 16 |     """
 17 |     processed_prompts = []
 18 |     for prompt in prompts:
 19 |         if isinstance(prompt, str):
 20 |             processed_prompts.append([prompt])
 21 |         elif isinstance(prompt, Iterable):
 22 |             processed_prompts.append(list(prompt))
 23 |         else:
 24 |             raise ValueError(f"Invalid prompt type: {type(prompt)}")
 25 |     return processed_prompts
 26 | 
 27 | 
 28 | def _get_agg_info(
 29 |     wsi: WSIData,
 30 |     feature_key,
 31 |     agg_key: str = None,
 32 |     agg_by: str | Sequence[str] = None,
 33 | ):
 34 |     if agg_key is None:
 35 |         if agg_by is None:
 36 |             agg_key = "agg_slide"
 37 |         else:
 38 |             if isinstance(agg_by, str):
 39 |                 agg_by = [agg_by]
 40 |             agg_key = f"agg_{'_'.join(agg_by)}"
 41 |     agg_info = wsi[feature_key].uns["agg_ops"][agg_key]
 42 |     annos = None
 43 |     if "keys" in agg_info:
 44 |         annos = pd.DataFrame(
 45 |             data=agg_info["values"],
 46 |             columns=agg_info["keys"],
 47 |         )
 48 |     return agg_info, annos
 49 | 
 50 | 
 51 | def zero_shot_score(
 52 |     wsi: WSIData,
 53 |     prompts: list[list[str]],
 54 |     feature_key,
 55 |     *,
 56 |     agg_key: str = None,
 57 |     agg_by: str | Sequence[str] = None,
 58 |     model: str = "prism",
 59 |     device: str = None,
 60 | ):
 61 |     """
 62 |     Perform zero-shot classification on the WSI
 63 | 
 64 |     Supported models:
 65 |     - prism: `Prism model <https://huggingface.co/paige-ai/Prism>`_.
 66 |     - titan: `Titan model <https://huggingface.co/MahmoodLab/TITAN>`_.
 67 | 
 68 |     Corresponding slide-level features are required for the model.
 69 | 
 70 | 
 71 |     Parameters
 72 |     ----------
 73 |     wsi : :class:`wsidata.WSIData`
 74 |         The WSI data object.
 75 |     prompts : array of str
 76 |         The text labels to classify. You can use a list of strings to
 77 |         add more information to one class.
 78 |     feature_key : str
 79 |         The tile features to be used.
 80 |     agg_key : str
 81 |         The aggregation key
 82 |     agg_by : str or list of str
 83 |         The aggregation keys that were used to create the slide features.
 84 |     model: {"prism", "titan"}
 85 |         The model to use for zero-shot classification.
 86 |     device : str
 87 |         The device to use for inference. If None, the default device will be used.
 88 | 
 89 |     Returns
 90 |     -------
 91 |     pd.DataFrame
 92 |         The classification results (probability). The columns are the text labels and the
 93 |         rows are the slide features.
 94 | 
 95 |     # - The classification results will be added to :bdg-danger:`tables` slot of the spatial data object.
 96 | 
 97 |     Examples
 98 |     --------
 99 |     .. code-block:: python
100 | 
101 |         >>> import lazyslide as zs
102 |         >>> wsi = zs.datasets.lung_carcinoma(with_data=False)
103 |         >>> zs.pp.find_tissues(wsi)
104 |         >>> zs.pp.tile_tissues(wsi, 512, background_fraction=0.95, mpp=0.5)
105 |         >>> zs.tl.feature_extraction(wsi, "virchow")
106 |         >>> zs.tl.feature_aggregation(wsi, feature_key="virchow", encoder="prism")
107 |         >>> print(zs.tl.zero_shot_score(wsi, classes, feature_key="virchow_tiles"))
108 | 
109 |     """
110 |     if device is None:
111 |         device = get_torch_device()
112 | 
113 |     prompts = _preprocess_prompts(prompts)
114 | 
115 |     if model == "prism":
116 |         from lazyslide.models.multimodal import Prism
117 | 
118 |         model = Prism()
119 |     elif model == "titan":
120 |         from lazyslide.models.multimodal import Titan
121 | 
122 |         model = Titan()
123 |     model.to(device)
124 |     # Get the embeddings from the WSI
125 |     agg_info, annos = _get_agg_info(
126 |         wsi,
127 |         feature_key,
128 |         agg_key=agg_key,
129 |         agg_by=agg_by,
130 |     )
131 | 
132 |     all_probs = []
133 |     for ix, f in enumerate(agg_info["features"]):
134 |         f = torch.tensor(f).unsqueeze(0).to(device)
135 |         probs = model.score(f, prompts=prompts)
136 |         all_probs.append(probs)
137 | 
138 |     all_probs = np.vstack(all_probs)
139 | 
140 |     named_prompts = [", ".join(p) for p in prompts]
141 |     results = pd.DataFrame(
142 |         data=all_probs,
143 |         columns=named_prompts,
144 |     )
145 |     if annos is not None:
146 |         results = pd.concat([annos, results], axis=1)
147 |     return results
148 | 
149 | 
150 | def slide_caption(
151 |     wsi: WSIData,
152 |     prompt: list[str],
153 |     feature_key,
154 |     *,
155 |     agg_key: str = None,
156 |     agg_by: str | Sequence[str] = None,
157 |     max_length: int = 100,
158 |     model: str = "prism",
159 |     device: str = None,
160 | ):
161 |     """
162 |     Generate captions for the slide.
163 | 
164 |     Parameters
165 |     ----------
166 |     wsi : :class:`wsidata.WSIData`
167 |         The WSI data object.
168 |     prompt : list of str
169 |         The text instruction to generate the caption.
170 |     feature_key : str
171 |         The slide features to be used.
172 |     agg_key : str
173 |         The aggregation key
174 |     agg_by : str or list of str
175 |         The aggregation keys that were used to create the slide features.
176 |     max_length : int
177 |         The maximum length of the generated caption.
178 |     model : {"prism"}
179 |         The caption generation model to use.
180 |     device : str
181 |         The device to use for inference. If None, the default device will be used.
182 | 
183 |     """
184 | 
185 |     if device is None:
186 |         device = get_torch_device()
187 | 
188 |     from lazyslide.models.multimodal import Prism
189 | 
190 |     model = Prism()
191 |     model.to(device)
192 | 
193 |     agg_info, annos = _get_agg_info(
194 |         wsi,
195 |         feature_key,
196 |         agg_key=agg_key,
197 |         agg_by=agg_by,
198 |     )
199 | 
200 |     captions = []
201 | 
202 |     for ix, lat in enumerate(agg_info["latents"]):
203 |         lat = torch.tensor(lat).unsqueeze(0).to(device)
204 |         caption = model.caption(
205 |             lat,
206 |             prompt=prompt,
207 |             max_length=max_length,
208 |         )
209 |         captions.append(caption)
210 | 
211 |     results = pd.DataFrame(
212 |         {
213 |             "caption": captions,
214 |         }
215 |     )
216 |     if annos is not None:
217 |         results = pd.concat([annos, results], axis=1)
218 |     return results
219 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import pytest
 4 | import torch
 5 | 
 6 | 
 7 | class MockNet(torch.nn.Module):
 8 |     def __init__(self):
 9 |         super().__init__()
10 | 
11 |     def forward(self, x):
12 |         return torch.zeros(x.shape[0], 1000)
13 | 
14 | 
15 | @pytest.fixture(scope="session", autouse=True)
16 | def wsi():
17 |     import lazyslide as zs
18 | 
19 |     return zs.datasets.gtex_artery()
20 | 
21 | 
22 | @pytest.fixture(scope="session")
23 | def tmp_path_session(tmp_path_factory):
24 |     return tmp_path_factory.mktemp("session_tmp")
25 | 
26 | 
27 | @pytest.fixture(scope="session", autouse=True)
28 | def torch_model_file(tmp_path_session):
29 |     model = MockNet()
30 |     torch.save(model, tmp_path_session / "model.pt")
31 |     return tmp_path_session / "model.pt"
32 | 
33 | 
34 | @pytest.fixture(scope="session", autouse=True)
35 | def torch_jit_file(tmp_path_session):
36 |     model = MockNet()
37 |     torch.jit.script(model).save(tmp_path_session / "jit_model.pt")
38 |     return tmp_path_session / "jit_model.pt"
39 | 
40 | 
41 | def pytest_collection_modifyitems(config, items):
42 |     if os.getenv("GITHUB_ACTIONS") == "true":
43 |         skip_on_ci = pytest.mark.skip(reason="Skipped on GitHub CI")
44 |         for item in items:
45 |             if "skip_on_ci" in item.keywords:
46 |                 item.add_marker(skip_on_ci)
47 | 


--------------------------------------------------------------------------------
/tests/data/CMU-1-Small-Region.svs:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rendeirolab/LazySlide/f39634cc994b3098b0933075b9d25ecd99b9014e/tests/data/CMU-1-Small-Region.svs


--------------------------------------------------------------------------------
/tests/test_cv.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import numpy as np
 3 | 
 4 | 
 5 | np.random.seed(42)
 6 | 
 7 | H, W = 100, 100
 8 | N_CLASS = 5
 9 | binary_mask = np.random.randint(0, 2, (H, W), dtype=np.uint8)
10 | multilabel_mask = np.random.randint(0, N_CLASS, (H, W), dtype=np.uint8)
11 | multiclass_mask = np.random.randint(
12 |     0,
13 |     2,
14 |     (
15 |         N_CLASS,
16 |         H,
17 |         W,
18 |     ),
19 |     dtype=np.uint8,
20 | )
21 | 
22 | 
23 | class TestMask:
24 |     @pytest.mark.parametrize("mask", [binary_mask, multilabel_mask, multiclass_mask])
25 |     def test_mask_to_polygon(self, mask):
26 |         from lazyslide.cv.mask import Mask
27 | 
28 |         mask = Mask.from_array(mask)
29 |         mask.to_polygons()
30 | 


--------------------------------------------------------------------------------
/tests/test_datasets.py:
--------------------------------------------------------------------------------
 1 | import lazyslide as zs
 2 | 
 3 | 
 4 | def test_load_sample():
 5 |     wsi = zs.datasets.sample()
 6 |     assert wsi is not None
 7 | 
 8 | 
 9 | def test_load_gtex_artery():
10 |     wsi = zs.datasets.gtex_artery()
11 |     assert wsi is not None
12 | 
13 | 
14 | def test_load_lung_carcinoma():
15 |     wsi = zs.datasets.lung_carcinoma()
16 |     assert wsi is not None
17 | 


--------------------------------------------------------------------------------
/tests/test_pp.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | import lazyslide as zs
 4 | 
 5 | 
 6 | @pytest.mark.parametrize("detect_holes", [True, False])
 7 | @pytest.mark.parametrize("key_added", ["tissue", "tissue2"])
 8 | def test_pp_find_tissues(wsi, detect_holes, key_added):
 9 |     zs.pp.find_tissues(wsi, detect_holes=detect_holes, key_added=key_added)
10 | 
11 |     assert key_added in wsi.shapes
12 |     if not detect_holes:
13 |         tissue = wsi[key_added].geometry[0]
14 |         assert len(tissue.interiors) == 0
15 | 
16 | 
17 | class TestPPTileTissues:
18 |     def test_tile_px(self, wsi):
19 |         zs.pp.find_tissues(wsi)
20 |         zs.pp.tile_tissues(wsi, 256, key_added="tiles")
21 | 
22 |     def test_mpp(self, wsi):
23 |         zs.pp.tile_tissues(wsi, 256, mpp=1, key_added="tiles1")
24 | 
25 |     @pytest.mark.xfail(raises=ValueError)
26 |     def test_slide_mpp(self, wsi):
27 |         zs.pp.tile_tissues(wsi, 256, slide_mpp=1, key_added="tiles2")
28 | 
29 |     def test_assert(self, wsi):
30 |         s0 = len(wsi["tiles"])
31 |         s1 = len(wsi["tiles1"])
32 | 
33 |         assert s0 > 0
34 |         assert s1 < s0
35 | 


--------------------------------------------------------------------------------
/tests/test_tl.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import lazyslide as zs
 3 | 
 4 | TIMM_MODEL = "mobilenetv3_small_050"
 5 | 
 6 | 
 7 | class TestFeatureExtraction:
 8 |     def test_load_model(self, wsi, torch_model_file):
 9 |         zs.pp.find_tissues(wsi)
10 |         zs.pp.tile_tissues(wsi, 512)
11 |         zs.tl.feature_extraction(wsi, model_path=torch_model_file)
12 |         # Test feature aggregation
13 |         zs.tl.feature_aggregation(wsi, feature_key="MockNet")
14 | 
15 |     def test_load_jit_model(self, wsi, torch_jit_file):
16 |         zs.tl.feature_extraction(wsi, model_path=torch_jit_file)
17 | 
18 |     @pytest.mark.skip_on_ci
19 |     def test_timm_model(self, wsi):
20 |         zs.tl.feature_extraction(wsi, model=TIMM_MODEL)
21 | 


--------------------------------------------------------------------------------
/workflow/main.nf:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env nextflow
 2 | nextflow.enable.dsl = 2
 3 | 
 4 | params.slide_table = null
 5 | params.tile_px = 256
 6 | params.report_dir = "reports"
 7 | params.models = "resnet50"
 8 | 
 9 | process PREPROCESS {
10 |     publishDir params.report_dir, mode: 'move'
11 |     // conda "${projectDir}/env.yaml"
12 | 
13 |     input:
14 |     tuple val(wsi), val(storage)
15 |     val tile_px
16 | 
17 |     output:
18 |     path '*_report.txt', emit: report
19 |     tuple val(wsi), val(storage), emit: slide
20 | 
21 |     script:
22 | 
23 |     def wsi_base = wsi.baseName
24 | 
25 |     """
26 |     lazyslide preprocess ${wsi} ${tile_px} --output ${storage}
27 |     touch ${wsi_base}_report.txt
28 |     """
29 | }
30 | 
31 | process FEATURE {
32 |     // conda "${projectDir}/env.yaml"
33 | 
34 |     input:
35 |     tuple val(wsi), val(storage)
36 |     each model
37 | 
38 |     script:
39 |     """
40 |     lazyslide feature ${wsi} ${model} --output ${storage}
41 |     """
42 | }
43 | 
44 | 
45 | 
46 | workflow {
47 | 
48 |     log.info """
49 |     ██       █████  ███████ ██    ██ ███████ ██      ██ ██████  ███████
50 |     ██      ██   ██    ███   ██  ██  ██      ██      ██ ██   ██ ██
51 |     ██      ███████   ███     ████   ███████ ██      ██ ██   ██ █████
52 |     ██      ██   ██  ███       ██         ██ ██      ██ ██   ██ ██
53 |     ███████ ██   ██ ███████    ██    ███████ ███████ ██ ██████  ███████
54 | 
55 |     ===================================================================
56 | 
57 |     Workflow information:
58 |     Workflow: ${workflow.projectDir}
59 | 
60 |     Input parameters:
61 |     Slide table: ${file(params.slide_table)}
62 | 
63 |     """
64 | 
65 |     slides_ch = Channel
66 |         .fromPath( params.slide_table, checkIfExists: true )
67 |         .splitCsv( header: true )
68 |         .map { row -> 
69 |                 def slide_file = file(row.file, checkIfExists: true)
70 |                 def slide_storage = row.storage
71 |                 if (row.storage == null) {  slide_storage = slide_file.parent / slide_file.baseName + ".zarr" }
72 |                 return tuple(slide_file, slide_storage)
73 |          }
74 | 
75 |     // slides_ch.view()
76 | 
77 |     out_ch = PREPROCESS(slides_ch, params.tile_px)
78 | 
79 |     // println "Ouput of PREPROCESS: "
80 |     // out_ch.slide.view()
81 | 
82 |     models = Channel.of(params.models?.split(','))
83 | 
84 |     FEATURE(out_ch.slide, models)
85 | 
86 | }


--------------------------------------------------------------------------------
/workflow/modules/qc/main.nf:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env nextflow
 2 | nextflow.enable.dsl = 2
 3 | 
 4 | process SlideQC {
 5 | 
 6 |     input:
 7 |         val mpp
 8 |         val
 9 |         path slide
10 | 
11 |     output:
12 |         path("*.qc.csv") into qc_ch
13 | 
14 |     script:
15 |     """
16 |     lazyslide qc $slide
17 |     """
18 | }


--------------------------------------------------------------------------------