├── cxr-foundation ├── cxr_foundation │ ├── __init__.py │ ├── testdata │ │ ├── fake.dcm │ │ ├── expected.png │ │ ├── random.png │ │ ├── 00000001_000.tfrecord │ │ ├── 00000001_001.tfrecord │ │ ├── 00000001_002.tfrecord │ │ ├── 00000002_000.tfrecord │ │ └── 00000003_000.tfrecord │ ├── mimic.py │ ├── example_generator_lib_test.py │ ├── constants.py │ ├── embeddings_data.py │ ├── example_generator_lib.py │ └── inference.py ├── logo.png ├── cxr_foundation_interactive_demo_deps │ ├── fullscreen.gif │ ├── index.html │ └── cxr.css ├── Dockerfile ├── build_pip_package.sh ├── CONTRIBUTING.md ├── setup.py ├── CXR_Foundation_Interactive_Demo.ipynb ├── README.md ├── LICENSE └── MIMIC_Embeddings_Demo.ipynb ├── ct-foundation ├── API_specification.md ├── img │ ├── create-group.png │ └── create-identity.png ├── CONTRIBUTING.md ├── LICENSE └── README.md ├── wet-amd-prediction ├── CONTRIBUTING ├── README ├── LICENSE └── ex_amd_model.py ├── derm-foundation ├── create-group.png ├── create-identity.png ├── CONTRIBUTING.md ├── LICENSE └── README.md ├── path-foundation ├── img │ ├── create-group.png │ └── create-identity.png ├── CONTRIBUTING.md ├── LICENSE └── README.md ├── README.md └── .gitignore /cxr-foundation/cxr_foundation/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ct-foundation/API_specification.md: -------------------------------------------------------------------------------- 1 | # API Specification 2 | -------------------------------------------------------------------------------- /wet-amd-prediction/CONTRIBUTING: -------------------------------------------------------------------------------- 1 | We are not accepting contributions for this project. 2 | 3 | -------------------------------------------------------------------------------- /cxr-foundation/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Google-Health/imaging-research/HEAD/cxr-foundation/logo.png -------------------------------------------------------------------------------- /derm-foundation/create-group.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Google-Health/imaging-research/HEAD/derm-foundation/create-group.png -------------------------------------------------------------------------------- /ct-foundation/img/create-group.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Google-Health/imaging-research/HEAD/ct-foundation/img/create-group.png -------------------------------------------------------------------------------- /derm-foundation/create-identity.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Google-Health/imaging-research/HEAD/derm-foundation/create-identity.png -------------------------------------------------------------------------------- /path-foundation/img/create-group.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Google-Health/imaging-research/HEAD/path-foundation/img/create-group.png -------------------------------------------------------------------------------- /ct-foundation/img/create-identity.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Google-Health/imaging-research/HEAD/ct-foundation/img/create-identity.png -------------------------------------------------------------------------------- /path-foundation/img/create-identity.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Google-Health/imaging-research/HEAD/path-foundation/img/create-identity.png -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | This repo includes imaging research code shared by Google Health. 2 | Please see the README and LICENSE files in each project for more information. 3 | -------------------------------------------------------------------------------- /cxr-foundation/cxr_foundation/testdata/fake.dcm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Google-Health/imaging-research/HEAD/cxr-foundation/cxr_foundation/testdata/fake.dcm -------------------------------------------------------------------------------- /cxr-foundation/cxr_foundation/testdata/expected.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Google-Health/imaging-research/HEAD/cxr-foundation/cxr_foundation/testdata/expected.png -------------------------------------------------------------------------------- /cxr-foundation/cxr_foundation/testdata/random.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Google-Health/imaging-research/HEAD/cxr-foundation/cxr_foundation/testdata/random.png -------------------------------------------------------------------------------- /cxr-foundation/cxr_foundation/testdata/00000001_000.tfrecord: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Google-Health/imaging-research/HEAD/cxr-foundation/cxr_foundation/testdata/00000001_000.tfrecord -------------------------------------------------------------------------------- /cxr-foundation/cxr_foundation/testdata/00000001_001.tfrecord: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Google-Health/imaging-research/HEAD/cxr-foundation/cxr_foundation/testdata/00000001_001.tfrecord -------------------------------------------------------------------------------- /cxr-foundation/cxr_foundation/testdata/00000001_002.tfrecord: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Google-Health/imaging-research/HEAD/cxr-foundation/cxr_foundation/testdata/00000001_002.tfrecord -------------------------------------------------------------------------------- /cxr-foundation/cxr_foundation/testdata/00000002_000.tfrecord: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Google-Health/imaging-research/HEAD/cxr-foundation/cxr_foundation/testdata/00000002_000.tfrecord -------------------------------------------------------------------------------- /cxr-foundation/cxr_foundation/testdata/00000003_000.tfrecord: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Google-Health/imaging-research/HEAD/cxr-foundation/cxr_foundation/testdata/00000003_000.tfrecord -------------------------------------------------------------------------------- /cxr-foundation/cxr_foundation_interactive_demo_deps/fullscreen.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Google-Health/imaging-research/HEAD/cxr-foundation/cxr_foundation_interactive_demo_deps/fullscreen.gif -------------------------------------------------------------------------------- /cxr-foundation/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM apache/beam_python3.7_sdk:2.35.0 2 | 3 | COPY . . 4 | 5 | RUN pip install cxr-foundation 6 | 7 | # Set the entrypoint to Apache Beam SDK launcher. 8 | ENTRYPOINT ["/opt/apache/beam/boot"] -------------------------------------------------------------------------------- /cxr-foundation/build_pip_package.sh: -------------------------------------------------------------------------------- 1 | python -m venv .env 2 | source .env/bin/activate 3 | 4 | pip install --upgrade pip 5 | 6 | pip install --upgrade twine 7 | python setup.py sdist 8 | python setup.py bdist_wheel 9 | python -m twine check dist/* 10 | 11 | python -m twine upload dist/* 12 | -------------------------------------------------------------------------------- /wet-amd-prediction/README: -------------------------------------------------------------------------------- 1 | # Wet AMD prediction network architecture code 2 | 3 | This repo contains an implementation of the exAMD prediction network described in Yim J. et al. (2020) "Predicting conversion to wet age related macular 4 | degeneration using deep learning", Nature Medicine doi:10.1038/s41591-020-0867-7. 5 | 6 | Note this code illustrates only the network architecture and does not contain any data or training code. 7 | 8 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # Jupyter Notebook 30 | .ipynb_checkpoints 31 | -------------------------------------------------------------------------------- /cxr-foundation/cxr_foundation/mimic.py: -------------------------------------------------------------------------------- 1 | """Module for managing/parsing MIMIC data files""" 2 | 3 | import re 4 | 5 | 6 | # Example: 'files/p19/p19692222/s59566639/965b6053-a2c70d67-c0467ca6-02372346-fb7c6224.tfrecord' 7 | FILE_PATTERN = re.compile( 8 | r"files/(?:\w+)/p(?P\w+)/s(?P\w+)/(?P[\w-]+)\.tfrecord" 9 | ) 10 | 11 | 12 | def parse_embedding_file_pattern(file_path: str): 13 | """Extracts the subject_id, study_id, and dicom_id 14 | 15 | from the full file path string of a MIMIC CXR Embedding file: 16 | 17 | https://physionet.org/content/image-embeddings-mimic-cxr/ 18 | 19 | Example input: 20 | files/p19/p19692222/s59566639/965b6053-a2c70d67-c0467ca6-02372346-fb7c6224.tfrecord 21 | """ 22 | match = FILE_PATTERN.fullmatch(file_path) 23 | if not match: 24 | raise Exception(f"Failed to match file path: {file_path}") 25 | return (int(match[1]), int(match[2]), match[3]) 26 | -------------------------------------------------------------------------------- /derm-foundation/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to Contribute 2 | 3 | We'd love to accept your patches and contributions to this project. There are 4 | just a few small guidelines you need to follow. 5 | 6 | ## Contributor License Agreement 7 | 8 | Contributions to this project must be accompanied by a Contributor License 9 | Agreement. You (or your employer) retain the copyright to your contribution; 10 | this simply gives us permission to use and redistribute your contributions as 11 | part of the project. Head over to to see 12 | your current agreements on file or to sign a new one. 13 | 14 | You generally only need to submit a CLA once, so if you've already submitted one 15 | (even if it was for a different project), you probably don't need to do it 16 | again. 17 | 18 | ## Code Reviews 19 | 20 | All submissions, including submissions by project members, require review. We 21 | use GitHub pull requests for this purpose. Consult 22 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more 23 | information on using pull requests. 24 | 25 | ## Community Guidelines 26 | 27 | This project follows [Google's Open Source Community 28 | Guidelines](https://opensource.google/conduct/). -------------------------------------------------------------------------------- /cxr-foundation/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to Contribute 2 | 3 | We'd love to accept your patches and contributions to this project. There are 4 | just a few small guidelines you need to follow. 5 | 6 | ## Contributor License Agreement 7 | 8 | Contributions to this project must be accompanied by a Contributor License 9 | Agreement. You (or your employer) retain the copyright to your contribution; 10 | this simply gives us permission to use and redistribute your contributions as 11 | part of the project. Head over to to see 12 | your current agreements on file or to sign a new one. 13 | 14 | You generally only need to submit a CLA once, so if you've already submitted one 15 | (even if it was for a different project), you probably don't need to do it 16 | again. 17 | 18 | ## Code Reviews 19 | 20 | All submissions, including submissions by project members, require review. We 21 | use GitHub pull requests for this purpose. Consult 22 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more 23 | information on using pull requests. 24 | 25 | ## Community Guidelines 26 | 27 | This project follows [Google's Open Source Community 28 | Guidelines](https://opensource.google/conduct/). 29 | -------------------------------------------------------------------------------- /cxr-foundation/setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # 3 | # Copyright 2022 Google LLC 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | import setuptools 17 | 18 | setuptools.setup( 19 | name='cxr-foundation', 20 | version='1.0.0', 21 | description='CXR Foundation: chest x-ray embeddings generation.', 22 | install_requires=[ 23 | 'google-api-python-client', 24 | 'google-apitools', 25 | 'google-cloud-aiplatform', 26 | 'pandas', 27 | 'tensorflow >= 2.13.0', 28 | 'pillow', 29 | 'pypng', 30 | 'pydicom', 31 | 'typing-extensions', 32 | 'tensorflow_text', 33 | 'tensorflow-hub', 34 | ], 35 | packages=setuptools.find_packages(), 36 | ) 37 | -------------------------------------------------------------------------------- /ct-foundation/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to Contribute 2 | 3 | We'd love to accept your patches and contributions to this project. 4 | 5 | ## Before you begin 6 | 7 | ### Sign our Contributor License Agreement 8 | 9 | Contributions to this project must be accompanied by a 10 | [Contributor License Agreement](https://cla.developers.google.com/about) (CLA). 11 | You (or your employer) retain the copyright to your contribution; this simply 12 | gives us permission to use and redistribute your contributions as part of the 13 | project. 14 | 15 | If you or your current employer have already signed the Google CLA (even if it 16 | was for a different project), you probably don't need to do it again. 17 | 18 | Visit to see your current agreements or to 19 | sign a new one. 20 | 21 | ### Review our Community Guidelines 22 | 23 | This project follows [Google's Open Source Community 24 | Guidelines](https://opensource.google/conduct/). 25 | 26 | ## Contribution process 27 | 28 | ### Code Reviews 29 | 30 | All submissions, including submissions by project members, require review. We 31 | use GitHub pull requests for this purpose. Consult 32 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more 33 | information on using pull requests. -------------------------------------------------------------------------------- /path-foundation/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to Contribute 2 | 3 | We'd love to accept your patches and contributions to this project. 4 | 5 | ## Before you begin 6 | 7 | ### Sign our Contributor License Agreement 8 | 9 | Contributions to this project must be accompanied by a 10 | [Contributor License Agreement](https://cla.developers.google.com/about) (CLA). 11 | You (or your employer) retain the copyright to your contribution; this simply 12 | gives us permission to use and redistribute your contributions as part of the 13 | project. 14 | 15 | If you or your current employer have already signed the Google CLA (even if it 16 | was for a different project), you probably don't need to do it again. 17 | 18 | Visit to see your current agreements or to 19 | sign a new one. 20 | 21 | ### Review our Community Guidelines 22 | 23 | This project follows [Google's Open Source Community 24 | Guidelines](https://opensource.google/conduct/). 25 | 26 | ## Contribution process 27 | 28 | ### Code Reviews 29 | 30 | All submissions, including submissions by project members, require review. We 31 | use GitHub pull requests for this purpose. Consult 32 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more 33 | information on using pull requests. 34 | -------------------------------------------------------------------------------- /cxr-foundation/cxr_foundation/example_generator_lib_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # 3 | # Copyright 2022 Google LLC 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | """Tests for example_generator_lib.""" 17 | import unittest 18 | import example_generator_lib 19 | import numpy as np 20 | import pydicom 21 | 22 | 23 | class ExampleGeneratorLibTest(unittest.TestCase): 24 | 25 | def test_create_example(self): 26 | """Test the creation of examples.""" 27 | # This is a DICOM with a grayscale fake image. 28 | dicom_path = './testdata/fake.dcm' 29 | dicom = pydicom.dcmread(dicom_path) 30 | 31 | test_example = example_generator_lib.dicom_to_tfexample(dicom) 32 | f_dict = test_example.features.feature 33 | self.assertEqual(f_dict['image/format'].bytes_list.value[:], [b'png']) 34 | self.assertEqual(len(f_dict['image/encoded'].bytes_list.value[0]), 23287) 35 | 36 | 37 | if __name__ == '__main__': 38 | unittest.main() 39 | -------------------------------------------------------------------------------- /wet-amd-prediction/LICENSE: -------------------------------------------------------------------------------- 1 | LIMITED LICENSE: 2 | 3 | Copyright (c) 2020 Google LLC 4 | Limited License: Under no circumstance is commercial use, reproduction, or 5 | distribution permitted. Use, reproduction, and distribution are permitted 6 | solely for academic use in evaluating and reviewing claims made in 7 | Yim J. et al. (2020) "Predicting conversion to wet age related macular 8 | degeneration using deep learning", Nature Medicine doi:10.1038/s41591-020-0867-7, 9 | provided that the following conditions are met: 10 | 11 | * Any reproduction or distribution of source code must retain the above 12 | copyright notice and the full text of this license including the Disclaimer, 13 | below.
 14 | 15 | * Any reproduction or distribution in binary form must reproduce the above 16 | copyright notice and the full text of this license including the Disclaimer 17 | below in the documentation and/or other materials provided with the 18 | Distribution. 19 | 20 | * Any publication that discloses findings arising from using this source 21 | code must cite Yim J. et al. (2020) "Predicting conversion to wet age 22 | related macular degeneration using deep learning", Nature Medicine 23 | doi:10.1038/s41591-020-0867-7. 24 | 25 | DISCLAIMER 26 | 27 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 28 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 29 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 30 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 31 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 32 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 33 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 34 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 35 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 36 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 37 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 38 | 39 | -------------------------------------------------------------------------------- /cxr-foundation/cxr_foundation/constants.py: -------------------------------------------------------------------------------- 1 | # /usr/bin/python 2 | # 3 | # Copyright 2023 Google LLC 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | import dataclasses 17 | 18 | # GCP project ID of the embeddings endpoint. 19 | _EMBEDDINGS_PROJECT_NAME = 'gh-rad-validation-cxrembd-deid' 20 | # Vertex was initially offered only in "us-central1". 21 | _LOCATION = 'us-central1' 22 | 23 | # ID of the V1 embeddings endpoint. 24 | _V1_ENDPOINT_ID = 8327848403333545984 25 | # IDs of the V2 embeddings endpoints. 26 | _V2_C_ENDPOINT_ID = 3169498397715464192 27 | _V2_B_ENDPOINT_ID = 2156540325277990912 28 | 29 | 30 | @dataclasses.dataclass(frozen=True) 31 | class VertexEndpoint: 32 | """Vertex AI end-point configuration. 33 | 34 | Fields 35 | ------ 36 | project_name 37 | Name of the GCP Project which hosts the model. 38 | endpoint_location 39 | GCP Location (Zone) where the Vertex AI end-point is hosted. 40 | endpoint_id 41 | Numeric ID for the hosted end-point. 42 | """ 43 | 44 | project_name: str 45 | endpoint_location: str 46 | endpoint_id: int 47 | 48 | 49 | # End-point configuration for the CXR Foundation V1 model. 50 | ENDPOINT_V1 = VertexEndpoint( 51 | project_name=_EMBEDDINGS_PROJECT_NAME, 52 | endpoint_location=_LOCATION, 53 | endpoint_id=_V1_ENDPOINT_ID, 54 | ) 55 | 56 | 57 | # End-point configurations for the CXR Foundation V2 model. 58 | ENDPOINT_V2_C = VertexEndpoint( 59 | project_name=_EMBEDDINGS_PROJECT_NAME, 60 | endpoint_location=_LOCATION, 61 | endpoint_id=_V2_C_ENDPOINT_ID, 62 | ) 63 | ENDPOINT_V2_B = VertexEndpoint( 64 | project_name=_EMBEDDINGS_PROJECT_NAME, 65 | endpoint_location=_LOCATION, 66 | endpoint_id=_V2_B_ENDPOINT_ID, 67 | ) 68 | 69 | 70 | GCS_PREFIX = 'gs://' 71 | 72 | # tf.Example feature keys required by the embeddings service 73 | IMAGE_KEY = 'image/encoded' 74 | IMAGE_FORMAT_KEY = 'image/format' 75 | 76 | # Key for this library to serialize generated embeddings 77 | EMBEDDING_KEY = 'embedding' 78 | 79 | # The size of the float vector embeddings generated by the CXR foundation API 80 | DEFAULT_EMBEDDINGS_SIZE = 1376 81 | -------------------------------------------------------------------------------- /cxr-foundation/CXR_Foundation_Interactive_Demo.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "provenance": [], 7 | "private_outputs": true, 8 | "cell_execution_strategy": "setup" 9 | }, 10 | "kernelspec": { 11 | "name": "python3", 12 | "display_name": "Python 3" 13 | }, 14 | "language_info": { 15 | "name": "python" 16 | } 17 | }, 18 | "cells": [ 19 | { 20 | "cell_type": "markdown", 21 | "source": [ 22 | "\n", 23 | " \n", 26 | " \n", 29 | "
\n", 24 | " Run in Google Colab\n", 25 | " \n", 27 | " View source on GitHub\n", 28 | "
" 30 | ], 31 | "metadata": { 32 | "id": "SmaSUdnET28O" 33 | } 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "source": [ 38 | "# CXR Foundations Demo App\n", 39 | "\n", 40 | "Run the below code cell to test out CXR Foundations with a GUI. This allows you to retrieve embeddings for x-rays, label the x-rays for some binary classification task, split the data into train and test and then finally train a linear classifier for the model. The demo can load in [NIH Chest X-ray demo x-rays](https://nihcc.app.box.com/v/ChestXray-NIHCC) but you can also bring your own DICOMs." 41 | ], 42 | "metadata": { 43 | "id": "iBVn2ob9lr6e" 44 | } 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": null, 49 | "metadata": { 50 | "id": "yoAb5R0fgH84", 51 | "cellView": "form" 52 | }, 53 | "outputs": [], 54 | "source": [ 55 | "#@title Authenticate and Render Demo App\n", 56 | "\n", 57 | "from google.colab import auth\n", 58 | "from google.auth import default\n", 59 | "from google.auth.transport.requests import Request\n", 60 | "\n", 61 | "auth.authenticate_user()\n", 62 | "credentials, _ = default()\n", 63 | "credentials.refresh(Request())\n", 64 | "bearer_token = credentials.token\n", 65 | "\n", 66 | "import requests\n", 67 | "response = requests.get('https://cdn.jsdelivr.net/gh/Google-Health/imaging-research@latest/cxr-foundation/cxr_foundation_interactive_demo_deps/index.html')\n", 68 | "html_content = response.text\n", 69 | "html_content = html_content.replace('ACCESS_TOKEN', bearer_token)\n", 70 | "\n", 71 | "from IPython.core.display import display, HTML\n", 72 | "display(HTML(html_content))\n", 73 | "\n" 74 | ] 75 | } 76 | ] 77 | } 78 | -------------------------------------------------------------------------------- /cxr-foundation/cxr_foundation/embeddings_data.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # 3 | # Copyright 2022 Google LLC 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | """Module for reading and creating datasets from embeddings files generated by the CXR Foundation service. 17 | 18 | Unless specified otherwise, these functions are NOT generalizable/usable on 19 | embeddings files 20 | or tfrecord files from other sources. 21 | 22 | 23 | Expected structure of the generated TFRecord proto: 24 | 25 | features { 26 | feature { 27 | key: "embedding" 28 | value { 29 | float_list { 30 | value: -1.3924500942230225 31 | value: 0.4983276426792145 32 | value: 1.1395248174667358 33 | value: 0.6487054228782654 34 | ... 35 | } 36 | } 37 | } 38 | } 39 | """ 40 | from typing import Iterable 41 | 42 | from cxr_foundation import constants 43 | import numpy as np 44 | import tensorflow as tf 45 | 46 | 47 | def read_tfrecord_example(filename: str) -> tf.train.Example: 48 | """Read the tf.Example data contained in a single TFRecord embedding file. 49 | 50 | Args: 51 | filename: The path of the .tfrecord file to read 52 | 53 | Returns: 54 | The `tf.Example` data contained in the TFRecord. 55 | 56 | Note: This is a convenience function for exploring/exporting. Do not use this 57 | in TF pipelines. 58 | """ 59 | raw_dataset = tf.data.TFRecordDataset(filename) 60 | # Expect only one element in the TFRecord. 61 | for raw_record in raw_dataset.take(1): 62 | example = tf.train.Example() 63 | example.ParseFromString(raw_record.numpy()) 64 | 65 | return example 66 | 67 | 68 | def _parse_example_values(example_data: tf.train.Example) -> np.ndarray: 69 | """Extract the embeddings values contained in an Example object, extracted from a file 70 | 71 | generated by the CXR foundation service. Helper function for 72 | `read_record_values`. 73 | 74 | Args: 75 | example_data: The Example object to extract the values from 76 | 77 | Returns: 78 | The 1D float array of the embeddings values 79 | 80 | Note: This is a convenience function for exploring/exporting. Do not use this 81 | in TF pipelines. 82 | """ 83 | # Unpack nested proto and create np array from google.protobuf.pyext._message.RepeatedScalarContainer 84 | try: 85 | values = np.array( 86 | example_data.features.feature[constants.EMBEDDING_KEY].float_list.value, 87 | dtype="float32", 88 | ) 89 | return values 90 | except ValueError: 91 | print( 92 | f"Input Example does not contain expected CXR Foundation embedding" 93 | f" structure." 94 | ) 95 | raise 96 | 97 | 98 | def read_tfrecord_values(filename: str) -> np.ndarray: 99 | """Read the embeddings values contained in a .tfrecord embedding file, generated by this library. 100 | 101 | Args: 102 | filename: The path of the .tfrecord file to read 103 | 104 | Returns: 105 | The 1D float array of the embeddings values 106 | 107 | Note: This is a convenience function for exploring/exporting. Do not use this 108 | in TF pipelines. 109 | """ 110 | return _parse_example_values(read_tfrecord_example(filename)) 111 | 112 | 113 | def read_npz_values(filename: str) -> np.ndarray: 114 | """Read the embeddings values contained in a .npz embedding file, generated by this package. 115 | 116 | Args: 117 | filename: The path of the .npz file to read 118 | 119 | Returns: 120 | The 1D float array of the embeddings values 121 | 122 | Note: You can load the generated .npz files without installing this package 123 | and its dependencies, by copying this simple function. You would only need to 124 | install Numpy. 125 | """ 126 | data = np.load(filename) 127 | return data[constants.EMBEDDING_KEY] 128 | 129 | 130 | def parse_serialized_example_values( 131 | serialized_example: bytes, 132 | embeddings_size: int = constants.DEFAULT_EMBEDDINGS_SIZE 133 | ) -> tf.Tensor: 134 | """Parses and extracts the embeddings values from a serialized tf.Example generated by the CXR foundation service. 135 | 136 | Args: 137 | serialized_example: The bytes of the tf.Example to parse. 138 | 139 | Returns: 140 | The 1D Tensor of float embeddings 141 | """ 142 | features = { 143 | constants.EMBEDDING_KEY: tf.io.FixedLenFeature( 144 | [embeddings_size], 145 | tf.float32, 146 | default_value=tf.constant( 147 | 0.0, shape=[embeddings_size] 148 | ), 149 | ) 150 | } 151 | parsed_tensors = tf.io.parse_example(serialized_example, features=features) 152 | return parsed_tensors[constants.EMBEDDING_KEY] 153 | 154 | 155 | def get_dataset( 156 | filenames: Iterable[str], 157 | labels: Iterable[int], 158 | embeddings_size: int = constants.DEFAULT_EMBEDDINGS_SIZE 159 | ) -> tf.data.Dataset: 160 | """Create a tf.data.Dataset from the specified tfrecord files and labels. 161 | 162 | Args: 163 | filenames: The set of .tfrecord file names. 164 | labels: The corresponding label for each record. 165 | 166 | Returns: 167 | The Dataset, containing for each element: (embeddings, label) 168 | """ 169 | ds_embeddings = tf.data.TFRecordDataset( 170 | filenames, num_parallel_reads=tf.data.AUTOTUNE 171 | ).map(lambda x: parse_serialized_example_values(x, embeddings_size)) 172 | ds_labels = tf.data.Dataset.from_tensor_slices(labels) 173 | 174 | return tf.data.Dataset.zip((ds_embeddings, ds_labels)) 175 | -------------------------------------------------------------------------------- /wet-amd-prediction/ex_amd_model.py: -------------------------------------------------------------------------------- 1 | """exAMD prediction network architecture. 2 | 3 | Implementation of exAMD prediction network described in Yim J. et al. (2020) "Predicting conversion to wet age related macular 4 | degeneration using deep learning", Nature Medicine doi:10.1038/s41591-020-0867-7, 5 | 6 | Copyright 2020 Google LLC 7 | Limited license: see LICENSE 8 | """ 9 | 10 | import sonnet as snt 11 | import tensorflow as tf 12 | 13 | 14 | class ExAmdNet(snt.AbstractModule): 15 | """Future exAMD prediction deep learning network. 16 | 17 | Takes as input either a grey-scale 3D OCT volume or a one-hot encoded 18 | segmentation map of a 3D OCT volume. See manuscript for architecture details. 19 | """ 20 | 21 | def __init__(self, 22 | name='ex_amd_net'): 23 | """Initializes the model and parameters. 24 | 25 | Args: 26 | name: Variable name of module. 27 | """ 28 | super(ExAmdNet, self).__init__(name=name) 29 | 30 | # Convolution parameters. 31 | self._filter_chs = 32 32 | self._bottleneck_chs = 32 33 | 34 | def _build(self, inputs, is_training=True): 35 | """Internal method to build the sonnet module. 36 | 37 | Args: 38 | inputs: tensor of batch input OCT or dense segmentation maps. 39 | OCT shape: [batch, 41, 450, 450, 1] 40 | Segmentation map shape: [batch, 41, 450, 450, 17] 41 | is_training: flag for model usage when training 42 | 43 | Returns: 44 | Output tensor of module. A tensor with size equal to 45 | number of classes. 46 | """ 47 | net = inputs 48 | 49 | # First level. 50 | net = block(net, 'l1', self._filter_chs // 4, 51 | block_kernels=[(1, 3, 3), (1, 3, 3)]) 52 | net = max_pool3d(net, pool_size=(1, 2, 2), strides=(1, 2, 2), name='l1_out') 53 | print('Shape after L1: %s' % net.shape.as_list()) 54 | 55 | # Second level 56 | net = block(net, 'l2', 57 | channels_per_layer=self._filter_chs // 2) 58 | net = max_pool3d(net, pool_size=(1, 2, 2), strides=(1, 2, 2), name='l2_out') 59 | print('Shape after L2: %s' % net.shape.as_list()) 60 | 61 | # Third level 62 | net = conv_1x1x1(net, self._bottleneck_chs * 4, 'l3_1x1x1') 63 | net = block(net, 'l3', 64 | channels_per_layer=self._filter_chs // 2) 65 | net = max_pool3d(net, pool_size=(2, 2, 2), strides=(2, 2, 2), name='l3_out') 66 | print('Shape after L3 level: %s' % net.shape.as_list()) 67 | 68 | # Fourth level 69 | net = conv_1x1x1(net, self._bottleneck_chs * 4, 'l4_1x1x1') 70 | for i in range(2): 71 | net = block(net, 'l4_b%d' % (i+1), 72 | channels_per_layer=self._filter_chs) 73 | net = max_pool3d(net, pool_size=(2, 2, 2), strides=(2, 2, 2), name='l4_out') 74 | print('Shape after L4 level: %s' % net.shape.as_list()) 75 | 76 | # Fifth level 77 | net = conv_1x1x1(net, self._bottleneck_chs * 4, 'l5_1x1x1') 78 | for i in range(2): 79 | net = block(net, 'l5_b%d' % i, 80 | channels_per_layer=self._filter_chs) 81 | net = max_pool3d(net, pool_size=(2, 2, 2), strides=(2, 2, 2), name='l5_out') 82 | print('Shape after L5 level: %s' % net.shape.as_list()) 83 | 84 | # Sixth level 85 | net = conv_1x1x1(net, self._bottleneck_chs * 8, 'l6_1x1x1') 86 | for i in range(2): 87 | net = block(net, 'l6_b%d' % i, 88 | channels_per_layer=self._filter_chs) 89 | print('Shape after L6 level: %s' % net.shape.as_list()) 90 | 91 | # Output 92 | net = snt.Conv3D(output_channels=self._bottleneck_chs * 4, 93 | kernel_shape=(1, 1, 1), 94 | stride=1, 95 | padding=snt.SAME, 96 | name='final_1x1x1')(net) 97 | print('Output shape: %s' % net.shape.as_list()) 98 | return net 99 | 100 | 101 | def conv_3d(inputs, 102 | output_channels, 103 | kernel_shape, 104 | strides, 105 | name, 106 | activation=tf.nn.relu, 107 | use_bias=True): 108 | """Wraps sonnet 3D conv module with a nonlinear activation.""" 109 | conv_out = snt.Conv3D( 110 | output_channels=output_channels, 111 | kernel_shape=kernel_shape, 112 | stride=strides, 113 | use_bias=use_bias, 114 | name=name)( 115 | inputs) 116 | return activation(conv_out) 117 | 118 | 119 | def block(inputs, 120 | name_prefix, 121 | channels_per_layer, 122 | block_kernels=None, 123 | activation=tf.nn.relu, 124 | stride=1): 125 | """Consecutive convolution filters with skip connections.""" 126 | if not block_kernels: 127 | # Full block length if not specified. 128 | block_kernels = [(1, 3, 3), (1, 3, 3), (3, 1, 1), (1, 3, 3), (1, 3, 3), 129 | (3, 1, 1)] 130 | layer_stack = [inputs] 131 | for kernel in block_kernels: 132 | # Iterate through all kernels to construct a stack of intermediate 133 | # representations. 134 | layer_stack.append( 135 | conv_3d( 136 | inputs=layer_stack[-1], 137 | output_channels=channels_per_layer, 138 | kernel_shape=kernel, 139 | strides=stride, 140 | activation=activation, 141 | name='{}_{}'.format(name_prefix, 142 | 'x'.join([str(x) for x in kernel])))) 143 | # Concatenate all representations in the layer output as final output. 144 | output = tf.concat(layer_stack, axis=-1) 145 | return output 146 | 147 | 148 | def max_pool3d(inputs, pool_size, strides, name): 149 | return tf.keras.layers.MaxPool3D( 150 | pool_size=pool_size, strides=strides, name=name)( 151 | inputs) 152 | 153 | 154 | def conv_1x1x1(inputs, channels, name): 155 | return snt.Conv3D(output_channels=channels, 156 | kernel_shape=(1, 1, 1), 157 | stride=1, 158 | padding=snt.SAME, 159 | name=name)(inputs) 160 | 161 | -------------------------------------------------------------------------------- /cxr-foundation/cxr_foundation_interactive_demo_deps/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
⭅ Click here for fullscreen
9 | 10 |
11 |
12 | 13 |

CXR Train Classifer Over Embeddings

14 |
15 | This model is working on a endpoint from the CXR foundations, please sign up before using. 16 |
17 |
18 | Enter your label terminology: 19 |
20 | 24 | 28 |
29 |
30 |
31 | Dataset to use: 32 |
33 | 34 | 35 | 36 | 37 |
38 | The NIH ChestX-ray14 dataset, consists of over 100,000 de-identified images of chest x-rays, with fourteen common disease labels, text-mined from the text radiological reports via NLP techniques. 39 | 43 | 44 |
45 |
46 | 47 |
48 | 49 | Select multiple CXR DICOM P10 files on your local file system. 50 | Loaded image will be marked by default as: 51 |
53 |
absent
54 | 🔄 55 | (click to toggle) 56 |
57 |
58 |
59 |
60 |
61 | 62 | 63 | You can adjust image labels by clicking the tags 64 |
present
and 65 |
absent
by clicking the GT badge.
66 |

67 |
73 |
74 |
75 |
76 |
77 |
78 | 87 |
88 |
89 | 118 |
119 | 120 |



121 |
122 |
123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | -------------------------------------------------------------------------------- /cxr-foundation/cxr_foundation/example_generator_lib.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # 3 | # Copyright 2022 Google LLC 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | """Methods to create tf.examples for model inference via pydicom.""" 17 | 18 | 19 | import io 20 | from typing import Iterable, Union 21 | 22 | from cxr_foundation import constants 23 | import numpy as np 24 | import png 25 | import pydicom 26 | from pydicom.pixel_data_handlers.util import apply_modality_lut 27 | import tensorflow as tf 28 | 29 | _BITS_PER_BYTE = 8 30 | _WINDOWWIDTH = 'WindowWidth' 31 | _WINDOWCENTER = 'WindowCenter' 32 | 33 | 34 | def _encode_png(array: np.ndarray) -> bytes: 35 | """Converts an unsigned integer 2-D NumPy array to a PNG-encoded string. 36 | 37 | Unsigned 8-bit and 16-bit images are supported. 38 | 39 | Args: 40 | array: Array to be encoded. 41 | 42 | Returns: 43 | PNG-encoded string. 44 | 45 | Raises: 46 | ValueError: If any of the following occurs: 47 | - `array` is not 2-D. 48 | - `array` data type is unsupported. 49 | """ 50 | supported_types = frozenset([np.uint8, np.uint16]) 51 | # Sanity checks. 52 | if array.ndim != 2: 53 | raise ValueError(f'Array must be 2-D. Actual dimensions: {array.ndim}') 54 | if array.dtype.type not in supported_types: 55 | raise ValueError( 56 | 'Pixels must be either `uint8` or `uint16`. ' 57 | f'Actual type: {array.dtype.name!r}' 58 | ) 59 | 60 | # Actual conversion. 61 | writer = png.Writer( 62 | width=array.shape[1], 63 | height=array.shape[0], 64 | greyscale=True, 65 | bitdepth=_BITS_PER_BYTE * array.dtype.itemsize, 66 | ) 67 | output_data = io.BytesIO() 68 | writer.write(output_data, array.tolist()) 69 | return output_data.getvalue() 70 | 71 | 72 | def _rescale_dynamic_range(image: np.ndarray) -> np.ndarray: 73 | """Rescales the dynamic range in an integer image to use the full bit range. 74 | 75 | Args: 76 | image: An image containing unsigned integer pixels. 77 | 78 | Returns: 79 | Rescaled copy of `image` that uses all the available bits per pixel. 80 | 81 | Raises: 82 | ValueError: If pixels are not of an integer type. 83 | """ 84 | if not np.issubdtype(image.dtype, np.integer): 85 | raise ValueError( 86 | 'Image pixels must be an integer type. ' 87 | f'Actual type: {image.dtype.name!r}' 88 | ) 89 | iinfo = np.iinfo(image.dtype) 90 | return np.interp( 91 | image, (image.min(), image.max()), (iinfo.min, iinfo.max) 92 | ).astype(iinfo) 93 | 94 | 95 | def _shift_to_unsigned(image: np.ndarray) -> np.ndarray: 96 | """Shifts values by the minimum value to an unsigned array suitible for PNG. 97 | 98 | This works with signed images and converts them to unsigned versions. It 99 | involves an inefficient step to convert to a larger data structure for 100 | shifting all values by the minimum value in the array. It also support float 101 | data by converting them into uint16. 102 | 103 | Args: 104 | image: An image containing signed integer pixels. 105 | 106 | Returns: 107 | Copy of `image` in an unsigned format. Note that the exact same image is 108 | returned when given an unsigned version. 109 | 110 | Raises: 111 | ValueError: If pixels are not of an integer type or float. 112 | """ 113 | if image.dtype == np.uint16 or image.dtype == np.uint8: 114 | return image 115 | elif image.dtype == np.int16: 116 | image = image.astype(np.int32) 117 | return (image - np.min(image)).astype(np.uint16) 118 | elif image.dtype == np.int8: 119 | image = image.astype(np.int16) 120 | return (image - np.min(image)).astype(np.uint8) 121 | elif image.dtype in (np.float32, np.float64): 122 | uint16_max = np.iinfo(np.uint16).max 123 | image = image - np.min(image) 124 | if np.max(image) > uint16_max: 125 | image = image * (uint16_max / np.max(image)) 126 | image[image > uint16_max] = uint16_max 127 | return image.astype(np.uint16) 128 | raise ValueError( 129 | 'Image pixels must be an 8, 16 bit integer or float type. ' 130 | f'Actual type: {image.dtype.name!r}' 131 | ) 132 | 133 | 134 | def _apply_pydicom_prep(ds: pydicom.Dataset) -> np.ndarray: 135 | """Prepares pixel data after applying data handling from pydicom.""" 136 | 137 | def window_u16( 138 | image: np.ndarray, window_center: int, window_width: int 139 | ) -> np.ndarray: 140 | max_window = np.iinfo(np.uint16).max 141 | top_clip = window_center - 1 + window_width / 2 142 | bottom_clip = window_center - window_width / 2 143 | return np.interp( 144 | image.clip(bottom_clip, top_clip), 145 | (bottom_clip, top_clip), 146 | (0, max_window), 147 | ) 148 | 149 | arr = ds.pixel_array 150 | pixel_array = apply_modality_lut(arr, ds) 151 | if _WINDOWWIDTH in ds and _WINDOWCENTER in ds: 152 | window_center = ds.WindowCenter 153 | window_width = ds.WindowWidth 154 | if isinstance(ds.WindowCenter, pydicom.multival.MultiValue): 155 | window_center = int(ds.WindowCenter[0]) 156 | if isinstance(ds.WindowWidth, pydicom.multival.MultiValue): 157 | window_width = int(ds.WindowWidth[0]) 158 | pixel_array = window_u16(pixel_array, window_center, window_width) 159 | if ds.PhotometricInterpretation == 'MONOCHROME1': 160 | pixel_array = np.max(pixel_array) - pixel_array 161 | pixel_array = _shift_to_unsigned(pixel_array) 162 | # Don't rescale dynamic range for 8-bit images like CXR14. 163 | if pixel_array.dtype != np.uint8: 164 | pixel_array = _rescale_dynamic_range(pixel_array) 165 | return pixel_array 166 | 167 | 168 | def _assign_bytes_feature( 169 | feature: tf.train.Feature, value: Union[bytes, Iterable[bytes]] 170 | ) -> None: 171 | """Assigns a bytes float value into feature.""" 172 | if isinstance(value, bytes): 173 | feature.bytes_list.value[:] = [value] 174 | else: 175 | assert not isinstance(value, str) 176 | feature.bytes_list.value[:] = list(value) 177 | 178 | 179 | def png_to_tfexample(image_array: np.ndarray) -> tf.train.Example: 180 | """Create a tf.example for inference. 181 | 182 | The image will be spread to the full bit-depth of 16-bit images. 183 | 184 | Args: 185 | image_array: An image to use to create the example. 186 | 187 | Returns: 188 | example: A tf.example for inference. 189 | """ 190 | pixel_array = _shift_to_unsigned(image_array) 191 | # Don't rescale dynamic range for 8-bit images like CXR14. 192 | if pixel_array.dtype != np.uint8: 193 | pixel_array = _rescale_dynamic_range(pixel_array) 194 | png_bytes = _encode_png(pixel_array) 195 | example = tf.train.Example() 196 | features = example.features.feature 197 | _assign_bytes_feature(features[constants.IMAGE_KEY], png_bytes) 198 | _assign_bytes_feature(features[constants.IMAGE_FORMAT_KEY], b'png') 199 | return example 200 | 201 | 202 | def dicom_to_tfexample(single_dicom: pydicom.Dataset) -> tf.train.Example: 203 | """Create a tf.example for inference. 204 | 205 | Resulting images are spread to the full bit-depth of 16-bit images. 206 | Applies apply_modality_lut first followed by window/level if prresent. 207 | 208 | Args: 209 | single_dicom: A pydicom dataset used to create the example. 210 | 211 | Returns: 212 | example: A tf.example for inference. 213 | """ 214 | image_array = _apply_pydicom_prep(single_dicom) 215 | png_bytes = _encode_png(image_array) 216 | example = tf.train.Example() 217 | features = example.features.feature 218 | _assign_bytes_feature(features[constants.IMAGE_KEY], png_bytes) 219 | _assign_bytes_feature(features[constants.IMAGE_FORMAT_KEY], b'png') 220 | return example 221 | -------------------------------------------------------------------------------- /cxr-foundation/README.md: -------------------------------------------------------------------------------- 1 | CXR Foundation 2 | 3 | **CXR Foundation** is a tool to generate custom embeddings from chest x-ray (CXR) images. These embeddings can be used to develop custom machine learning models for CXR with less data and compute. You can read more about the research behind CXR Foundation in our recent publication: [Simplified Transfer Learning for Chest Radiography Models Using Less Data](https://doi.org/10.1148/radiol.212482). 4 | 5 | ## How to use CXR Foundation 6 | 7 | 1. Fill out the [API access form](https://forms.gle/SJBSawfDw19JZRA59). Your provided Gmail account will be used for access once approved for non-clinical use. 8 | 2. Once granted you’ll get an email and can use the CXR Foundation API with your own images. 9 | 3. If you want to get started in a no code environment, please run our [GUI-based demo](https://colab.research.google.com/github/Google-Health/imaging-research/blob/master/cxr-foundation/CXR_Foundation_Interactive_Demo.ipynb). This provides a web interface to: 10 | * import your own images in DICOM format and view these with windowing options 11 | * label them 12 | * Retrieve embeddings 13 | * Split data into train and eval sets 14 | * Train a linear probe 15 | * Evaluate performance on the eval set and pick an operating point 16 | * Running in Colab this app will let you: 17 | We’ve also linked it directly to CXR-14 data, so you can try it out on public data as well. 18 | 4. You also have access to this GitHub repository containing Python source code to: 19 | 1. Convert DICOM images into PNGs suitable for calling CXR Foundation 20 | 2. Call the API to generate embeddings from the PNG 21 | 5. [Install the gcloud CLI](https://cloud.google.com/sdk/docs/install) and [log in](https://cloud.google.com/sdk/gcloud/reference/auth/login): 22 | 23 | gcloud auth application-default login 24 | 25 | 6. Clone the Repository into a local directory 26 | 27 | git clone https://github.com/Google-Health/imaging-research.git 28 | cd imaging-research/cxr-foundation 29 | 30 | 7. Install the CXR Foundation package: 31 | 32 | pip install . 33 | 34 | 8. Run the CXR Foundation code: 35 | 1. Upload your chest x-ray DICOMs or PNGs to a cloud bucket or use a local directory. 36 | 2. Generate and save embeddings. 37 | 3. Read them and use them to train your model. 38 | 39 | See the notebooks for examples of how to use the embeddings service and this package. 40 | 41 | - [GUI-based demo](https://colab.research.google.com/github/Google-Health/imaging-research/blob/master/cxr-foundation/CXR_Foundation_Interactive_Demo.ipynb) allows you to get started exploring embeddings really quickly. 42 | - [CXR_Foundation_Demo.ipynb](./CXR_Foundation_Demo.ipynb) - Demonstrates how to use the CXR Foundation service to generate embeddings from DICOMs. Uses the generated embeddings in a model. 43 | - [MIMIC_Embeddings_Demo.ipynb](MIMIC_Embeddings_Demo.ipynb) - Uses the already generated embeddings from the [MIMIC CXR Embeddings Database](https://physionet.org/content/image-embeddings-mimic-cxr/1.0/) in a model. 44 | 45 | Have questions? Email [cxr-foundation@google.com](mailto:cxr-foundation@google.com). 46 | 47 | ## Third Party Apps 48 | 49 | CXR Foundation is also available on [Superbio.ai](https://app.superbio.ai/apps/247?id=640613393e3000de4dfb424d) as an online app. After agreeing to Google’s Terms for the CXR Foundation tool, you can access and utilize the app. 50 | 51 | ## Package APIs - Generating and Using Embeddings 52 | 53 | The following code block highlights the pertinent functions. See the notebooks for demo usage. 54 | 55 | ```python 56 | from cxr_foundation.inference import generate_embeddings 57 | from cxr_foundation.embeddings_data import read_tfrecord_values, read_npz_values, get_dataset 58 | 59 | 60 | help(generate_embeddings) 61 | help(read_tfrecord_values) 62 | help(read_npz_values) 63 | help(get_dataset) 64 | ``` 65 | 66 | Note: `.npz` format embeddings files generated by this package and the Foundation API, CAN be read without this package. If you want to use generated embeddings files in a Python environment, but don't want to install this package and its dependencies in the same environment, just copy the `embeddings_data.read_npz_values` function into one of your modules, which only requires numpy. 67 | 68 | ## General Notes 69 | 70 | - Google does not keep a copy of any images sent. 71 | - Google monitors daily query volume and aggregates on a per-user and per-organization basis. Access can be revoked if a user or organization exceeds a reasonable query volume. 72 | - If you consented to follow-up, Google may reach out for feedback. 73 | - Please use the following reference for any published work: 74 | - Sellergren AB, Chen C, Nabulsi Z, Li Y, Maschinot A, Sarna A, Huang J, Lau C, Kalidindi SR, Etemadi M, Garcia-Vicente F, Melnick D, Liu Y, Eswaran K, Tse D, Beladia N, Krishnan D, Shetty S. Simplified Transfer Learning for Chest Radiography Models Using Less Data. Radiology. 2022 Nov;305(2):454-465. doi: 10.1148/radiol.212482. Epub 2022 Jul 19. PMID: 35852426. 75 | 76 | ## Contributing 77 | 78 | See [`CONTRIBUTING.md`](CONTRIBUTING.md) for details. 79 | 80 | ## License 81 | 82 | See [`LICENSE`](LICENSE) for details. 83 | 84 | ## Disclaimer 85 | 86 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 87 | 88 | # Model Card for CXR Foundation 89 | 90 | ## Model Details 91 | 92 | ### Overview 93 | 94 | This model generates embeddings for chest x-rays (CXRs). Embeddings are n-dimensional vectors of floating points representing a projection of the original image into a compressed feature space capable of describing potential abnormalities that exist in the image. These embeddings are to be used by “downstream models” for final tasks such as detecting a particular abnormality in a CXR. The model uses the EfficientNet-L2 architecture (https://arxiv.org/pdf/1911.04252v4.pdf). It was trained on 821,544 CXRs from India and the US using abnormal vs. normal labels, i.e. the image contained any kind of abnormality, and the Supervised Contrastive loss (https://arxiv.org/abs/2004.11362v1). The abnormal vs. normal labels were obtained from more granular labels (e.g. pneumothorax, fracture) as well as regular expressions on radiology reports (https://pubmed.ncbi.nlm.nih.gov/34471144/). 95 | 96 | ### Version 97 | 98 | name: v1.0 99 | date: 2022-07-19 100 | 101 | ### Owners 102 | 103 | Andrew Sellergren, asellerg@google.com 104 | 105 | ### Licenses 106 | 107 | - See [CXR Foundation - Additional Terms of Service](https://forms.gle/SJBSawfDw19JZRA59). 108 | 109 | ### References 110 | 111 | - https://arxiv.org/pdf/1911.04252v4.pdf 112 | - https://arxiv.org/pdf/1912.11370.pdf 113 | - https://arxiv.org/abs/2004.11362v1 114 | - https://pubmed.ncbi.nlm.nih.gov/34471144/ 115 | 116 | ### Citations 117 | 118 | - Sellergren A, Chen C, et al. Simplified Transfer Learning for Chest Radiography Models Using Less Data. Radiology. 2022. 119 | 120 | ## Considerations 121 | 122 | ### Use Cases 123 | 124 | - Embeddings can reduce barriers to entry for training custom models with less data, setup, and compute. 125 | - Embeddings can allow for quick evaluation. 126 | 127 | ### Limitations 128 | 129 | - The model was trained using only data from the US and India and may not generalize well to data from other countries, patient populations, or manufacturers not used in training. 130 | - The model is only used to generate embeddings of the user-owned dataset. It does not generate any predictions or diagnosis on its own. 131 | 132 | ### Ethical Considerations 133 | 134 | - Risk: Although Google does not store permanently any data sent to this model, it is the data owner's responsibility to ensure that Personally identifiable information (PII) and Protected Health Information (PHI) are removed prior to being sent to the model. \ 135 | - Mitigation Strategy: Do not send data containing PII or PHI. 136 | -------------------------------------------------------------------------------- /ct-foundation/LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /cxr-foundation/LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /derm-foundation/LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /path-foundation/LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /cxr-foundation/cxr_foundation_interactive_demo_deps/cxr.css: -------------------------------------------------------------------------------- 1 | :root { 2 | --text-color: rgb(67 80 102); 3 | --body-bg: #f8f9fa; 4 | --border: #ced4da; 5 | } 6 | 7 | body { 8 | font-family: 'Roboto', sans-serif; 9 | font-size: 16px; 10 | line-height: 1.6; 11 | color: var(--text-color); 12 | background-color: var(--body-bg); 13 | -webkit-user-select: none; /* Safari */ 14 | -moz-user-select: none; /* Firefox */ 15 | -ms-user-select: none; /* Internet Explorer/Edge */ 16 | user-select: none; /* Supported by modern browsers */ 17 | } 18 | 19 | .container { 20 | display: flex; 21 | flex-direction: column; 22 | align-items: center; 23 | } 24 | 25 | h1, h2, h3, h4, h5, h6 { 26 | font-family: 'Roboto', sans-serif; 27 | font-weight: bold; 28 | margin-bottom: 1rem; 29 | } 30 | 31 | button { 32 | display: inline-block; 33 | padding: 0.75rem 1.5rem; 34 | font-size: 1rem; 35 | text-align: center; 36 | text-decoration: none; 37 | border-width: 1px; 38 | border-radius: 5px; 39 | cursor: pointer; 40 | transition: background-color 0.3s ease; 41 | } 42 | 43 | .module { 44 | background-color: white; 45 | border-radius: 5px; 46 | border: 1px solid var(--border); 47 | margin: 25px; 48 | padding: 15px; 49 | width: fit-content; 50 | display: flex; 51 | flex-direction: column; 52 | align-items: center; 53 | } 54 | 55 | .terminology-inputs { 56 | display: flex; 57 | flex-direction: row; 58 | justify-content: flex-start; 59 | } 60 | 61 | .terminology-label { 62 | display: flex; 63 | flex-direction: column; 64 | padding: 5px; 65 | margin: 5px; 66 | align-items: center; 67 | } 68 | 69 | .terminology-label span { 70 | font-size: 12px; 71 | } 72 | 73 | .terminology-label input[type="text"] { 74 | margin-top: 5px; 75 | padding: 5px; 76 | border: 1px solid #ced4da; 77 | border-radius: 5px; 78 | background-color: #eee; 79 | text-align: center; 80 | } 81 | 82 | .module.dataset { 83 | width: 500px 84 | } 85 | 86 | .module.dataset #loadDatasetButton { 87 | padding: 3px; 88 | } 89 | 90 | #selectFile { 91 | display: block; 92 | width: 100%; 93 | max-width: 500px; 94 | padding: 0.75rem 1.5rem; 95 | font-size: 1rem; 96 | line-height: 1.5; 97 | color: #495057; 98 | background-color: #fff; 99 | border: 1px solid #ced4da; 100 | border-radius: 5px; 101 | cursor: pointer; 102 | transition: background-color 0.3s ease, border-color 0.3s ease; 103 | margin-top: 30px; 104 | } 105 | 106 | #selectFile:hover { 107 | background-color: #e9ecef; 108 | border-color: #adb5bd; 109 | } 110 | 111 | #selectFile::-webkit-file-upload-button { 112 | visibility: hidden; 113 | } 114 | 115 | #selectFile::before { 116 | content: 'Choose Files'; 117 | display: inline-block; 118 | padding: 0.375rem 0.75rem; 119 | font-size: 1rem; 120 | font-weight: bold; 121 | line-height: 1.5; 122 | color: buttontext; 123 | background-color: buttonface; 124 | border: none; 125 | border-radius: 3px; 126 | cursor: pointer; 127 | margin-right: 1rem; 128 | } 129 | 130 | #selectFile:hover::before { 131 | background-color: #0056b3; 132 | } 133 | 134 | h1 { font-size: 2.5rem; } 135 | 136 | .viewer-container { 137 | width: 300px; 138 | height: 300px; 139 | margin: 3px; 140 | } 141 | 142 | .viewer-container:has(.popup) { 143 | background-image: linear-gradient( 144 | 45deg, 145 | black, 146 | gray 147 | ); 148 | 149 | } 150 | .viewer { 151 | position: relative; 152 | width: 300px; 153 | height: 300px; 154 | cursor: crosshair; 155 | } 156 | 157 | .zoomButton { 158 | position: absolute; 159 | color: white; 160 | text-shadow: -1px -1px 0 #000; 161 | 162 | width: 15px; 163 | height: 15px; 164 | bottom: 10px; 165 | right: 5px; 166 | } 167 | 168 | .popup .zoomButton { 169 | top: 0px; 170 | right: 0px; 171 | visibility: hidden; 172 | } 173 | 174 | .popup .zoomButton::before { 175 | visibility:visible; 176 | position: absolute; 177 | color: white; 178 | content: '✕'; 179 | z-index: 5; 180 | right: 5px; 181 | } 182 | 183 | /* hide zoom buttons of other images */ 184 | .childPopup :not(.popup) > .zoomButton { 185 | display: none; 186 | } 187 | 188 | #backdrop { 189 | position: fixed; 190 | background-color: #000; 191 | opacity: 0; 192 | top: 0; 193 | left: 0; 194 | width: 100%; 195 | height: 100%; 196 | z-index: -1; 197 | } 198 | 199 | #backdrop:has(+.childPopup) { 200 | position: fixed; 201 | background-color: #000; 202 | opacity: .6; 203 | transition: opacity 0.5s ease; 204 | z-index: 3; 205 | } 206 | 207 | .popup { 208 | position: fixed; 209 | top: 50%; 210 | left: 50%; 211 | transform: translate(-50%, -50%) scale(2); 212 | z-index: 3; 213 | } 214 | 215 | .viewList { 216 | display: flex; 217 | flex-direction: row; 218 | flex-wrap: wrap; 219 | } 220 | 221 | 222 | .badge { 223 | position: absolute; 224 | padding: 0px 5px; 225 | border-radius: 12px; 226 | color: black; 227 | font-size: 12px; 228 | font-weight: bold; 229 | text-align: center; 230 | display: none; 231 | transition: background-color 0.2s ease-in-out, color 0.2s ease-in-out; 232 | } 233 | 234 | .image-loader-container { 235 | display: inline-block; 236 | } 237 | 238 | .badge-in-text { 239 | position: unset; 240 | display: inline-flex; 241 | vertical-align: middle; 242 | margin: 0 3px; 243 | white-space: nowrap; 244 | width: fit-content; 245 | } 246 | 247 | .badge.ai { 248 | top: 30px; 249 | left: 5px; 250 | background: grey; 251 | } 252 | 253 | .ai-positive .badge.ai { 254 | background-color: #f0ad4e; 255 | display: block; 256 | } 257 | 258 | .ai-negative .badge.ai { 259 | background-color: #d3d3d3; 260 | display: block; 261 | } 262 | 263 | .ai-negative.positive .badge.ai { 264 | color: red; 265 | } 266 | 267 | .negative.ai-positive .badge.ai { 268 | color: red; 269 | } 270 | 271 | 272 | .badge.ai::before { 273 | content: "AI: "; 274 | } 275 | 276 | 277 | .badge.gt { 278 | top: 5px; 279 | left: 5px; 280 | background: grey; 281 | 282 | } 283 | 284 | .positive .badge.gt { 285 | background-color: #f0ad4e; 286 | display: block; 287 | } 288 | 289 | .negative .badge.gt { 290 | background-color: #d3d3d3; 291 | display: block; 292 | } 293 | 294 | .badge.gt::before { 295 | content: "GT: "; 296 | } 297 | 298 | .badge.dataset { 299 | top: 3px; 300 | right: 3px; 301 | border-radius: 2px; 302 | color: white; 303 | } 304 | 305 | .training .badge.dataset { 306 | background: #5bc0de; 307 | display: block; 308 | } 309 | 310 | .training .badge.dataset::before { 311 | content: "training set"; 312 | } 313 | 314 | .eval .badge.dataset { 315 | background: #5cb85c; 316 | display: block; 317 | } 318 | 319 | .eval .badge.dataset::before { 320 | content: "eval set"; 321 | } 322 | 323 | .badge-in-text .badge { 324 | position: unset; 325 | border-width: 1px; 326 | border-color: black; 327 | border-style: solid; 328 | } 329 | 330 | .embedding::before { 331 | content: "✅"; 332 | position: absolute; 333 | bottom: 10px; 334 | left: 10px; 335 | width: 20px; 336 | height: 20px; 337 | animation: moveToCorner 1s ease-in-out forwards; 338 | z-index: 1; 339 | color: lightyellow; 340 | } 341 | 342 | @keyframes moveToCorner { 343 | from { 344 | bottom: 50%; 345 | left: 50%; 346 | scale: 3; 347 | opacity: 1; 348 | } 349 | to { 350 | bottom: 8px; 351 | left: 5px; 352 | scale: 1; 353 | opacity: 0.7; 354 | } 355 | } 356 | 357 | .positive { 358 | position: relative; 359 | } 360 | 361 | .negative { 362 | position: relative; 363 | } 364 | 365 | .toggleButton { 366 | height: 12px; 367 | position: absolute; 368 | top: 3px; 369 | left: 90px; 370 | cursor: alias; 371 | font-size: 1rem; 372 | } 373 | 374 | #toast-container { 375 | position: fixed; 376 | top: 10px; 377 | right: 10px; 378 | z-index: 9999; 379 | } 380 | 381 | .toast { 382 | background-color: #FF5F6D; 383 | color: white; 384 | padding: 10px 20px; 385 | margin-top: 10px; 386 | border-radius: 5px; 387 | box-shadow: 0 2px 5px rgba(0, 0, 0, 0.1); 388 | opacity: 0; 389 | transition: opacity 0.5s ease-in-out; 390 | } 391 | 392 | .center-child { 393 | position: relative; 394 | } 395 | 396 | .loader { 397 | position: fixed; 398 | left: 50%; 399 | bottom: 2em; 400 | font-size:48px; 401 | color: #F00; 402 | width: 1em; 403 | height: 1em; 404 | box-sizing: border-box; 405 | border-radius: 50%; 406 | transform: translateX(-50%) rotateX(-60deg) perspective(1000px); 407 | } 408 | .loader:before, 409 | .loader:after { 410 | content: ''; 411 | display: block; 412 | position: absolute; 413 | box-sizing: border-box; 414 | top: 0; 415 | left: 0; 416 | width: inherit; 417 | height: inherit; 418 | border-radius: inherit; 419 | animation: flowerFlow 1s ease-out infinite; 420 | } 421 | .loader:after { 422 | animation-delay: .4s; 423 | } 424 | 425 | @keyframes flowerFlow { 426 | 0% { 427 | opacity: 1; 428 | transform: rotate(0deg); 429 | box-shadow: 0 0 0 -.5em currentcolor, 430 | 0 0 0 -.5em currentcolor, 431 | 0 0 0 -.5em currentcolor, 432 | 0 0 0 -.5em currentcolor, 433 | 0 0 0 -.5em currentcolor, 434 | 0 0 0 -.5em currentcolor, 435 | 0 0 0 -.5em currentcolor, 436 | 0 0 0 -.5em currentcolor; 437 | } 438 | 100% { 439 | opacity: 0; 440 | transform: rotate(180deg); 441 | box-shadow: -1em -1em 0 -.35em currentcolor, 442 | 0 -1.5em 0 -.35em currentcolor, 443 | 1em -1em 0 -.35em currentcolor, 444 | -1.5em 0 0 -.35em currentcolor, 445 | 1.5em -0 0 -.35em currentcolor, 446 | -1em 1em 0 -.35em currentcolor, 447 | 0 1.5em 0 -.35em currentcolor, 448 | 1em 1em 0 -.35em currentcolor; 449 | } 450 | } 451 | 452 | .thumbnail { 453 | width: 100px; 454 | margin: 1px; 455 | } 456 | 457 | .loading { 458 | animation: pulse 2.5s infinite; 459 | } 460 | 461 | @keyframes pulse { 462 | 0%, 100% { 463 | background-color: white; 464 | } 465 | 50% { 466 | background-color: yellow; 467 | } 468 | } 469 | 470 | .loaded::before { 471 | content:'[add icon symbol here]'; 472 | display:inline-block; 473 | vertical-align: top; 474 | line-height: 1em; 475 | width: 1em; 476 | height:1em; 477 | margin-right: 0.3em; 478 | text-align: center; 479 | color: #999; 480 | 481 | } 482 | 483 | .step3 { 484 | position: fixed; 485 | right: 0px; 486 | bottom: 10px; 487 | background: lightskyblue; 488 | opacity: 90%; 489 | padding: 10px; 490 | border-color: black; 491 | border-width: 1px; 492 | border-style: solid; 493 | z-index: 2; 494 | } 495 | 496 | .step3-container { 497 | display: flex; 498 | justify-content: space-between; 499 | height: 300px; 500 | width: 100%; 501 | flex-direction: row; 502 | } 503 | 504 | .vflex { 505 | display: flex; 506 | flex-direction: column; 507 | justify-content: space-between; 508 | } 509 | 510 | #metrics p { 511 | margin: 0; 512 | } 513 | 514 | .verticalSeperator { 515 | width: 10px; margin: 10px 50px; background-color: gray; 516 | } 517 | 518 | .verticalSeperator:has(+ div[style*="display: none"]) { 519 | display: none; 520 | } 521 | 522 | /**** tabs ****/ 523 | .tabs { 524 | width: 100%; 525 | } 526 | 527 | .tabs .content { 528 | display: none; 529 | opacity: 0; 530 | padding: 20px; 531 | border-top: 2px solid var(--border); 532 | } 533 | 534 | .tabs input[type='radio'] { 535 | width: 0; 536 | height: 0; 537 | opacity: 0; 538 | } 539 | 540 | .tabs label { 541 | cursor: pointer; 542 | display: inline-flex; 543 | justify-content: center; 544 | align-items: center; 545 | width: fit-content; 546 | height: 30px; 547 | background-color: #eee; 548 | border-style: solid solid none solid; 549 | border-width: 2px; 550 | border-color: transparent; 551 | border-radius: 5px 5px 0 0; 552 | padding: 0 4px; 553 | } 554 | 555 | #tab1:checked+label { 556 | border-color: var(--border); 557 | } 558 | 559 | #tab2:checked+label { 560 | border-color: var(--border); 561 | } 562 | 563 | #tab1:checked~#content1 { 564 | display: block; 565 | width: unset; 566 | opacity: 1; 567 | } 568 | 569 | #tab2:checked~#content2 { 570 | display: block; 571 | width: unset; 572 | opacity: 1; 573 | } 574 | 575 | .fullscreenNote { 576 | position: fixed; 577 | top: 5px; 578 | animation: disappear 0s ease-in 5s forwards; 579 | height: 100%; 580 | width: 100%; 581 | background: rgba(0, 0, 0, 0.6); 582 | color: yellow; 583 | } 584 | 585 | @keyframes disappear { 586 | to { 587 | opacity: 0; 588 | height: 0; 589 | width: 0; 590 | } 591 | } -------------------------------------------------------------------------------- /cxr-foundation/cxr_foundation/inference.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # 3 | # Copyright 2023 Google LLC 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | """Collection of functions to generate embeddings.""" 17 | import base64 18 | import enum 19 | import io 20 | import logging 21 | import os 22 | from typing import Any, Iterable, Sequence, Union 23 | 24 | from cxr_foundation import constants 25 | from cxr_foundation import example_generator_lib 26 | from google.api_core import exceptions 27 | from google.api_core.client_options import ClientOptions 28 | from google.api_core.retry import Retry 29 | from google.cloud import aiplatform 30 | import numpy as np 31 | from PIL import Image 32 | import pydicom 33 | import tensorflow as tf 34 | import tensorflow_text as tf_text 35 | import tensorflow_hub as hub 36 | 37 | _RETRIABLE_TYPES = ( 38 | exceptions.TooManyRequests, # HTTP 429 39 | exceptions.InternalServerError, # HTTP 500 40 | exceptions.BadGateway, # HTTP 502 41 | exceptions.ServiceUnavailable, # HTTP 503 42 | exceptions.DeadlineExceeded, # HTTP 504 43 | ) 44 | 45 | _API_ENDPOINT = 'us-central1-aiplatform.googleapis.com' 46 | _VIEW_POSITION = 'ViewPosition' 47 | _FRONTAL_VIEW_POSITIONS = ('AP', 'PA') 48 | 49 | _ELIXR_B_RESPONSE_SHAPE = { 50 | 'img_emb': (32, 768), 51 | 'all_contrastive_img_emb': (32, 128), 52 | 'contrastive_txt_emb': (128,), 53 | } 54 | _ELIXR_C_RESPONSE_SHAPE = (1, 8, 8, 1376) 55 | 56 | 57 | class ModelVersion(enum.Enum): 58 | V1 = enum.auto() # CXR Foundation model V1. 59 | V2 = enum.auto() # Data efficient classification output from 2-stage ELIXR model. 60 | V2_CONTRASTIVE = enum.auto() # Contrastive output from 2-stage ELIXR model. 61 | 62 | 63 | class InputFileType(enum.Enum): 64 | PNG = 'png' 65 | DICOM = 'dicom' 66 | 67 | def __str__(self): 68 | return self.value 69 | 70 | 71 | class OutputFileType(enum.Enum): 72 | TFRECORD = 'tfrecord' 73 | NPZ = 'npz' 74 | 75 | def __str__(self): 76 | return self.value 77 | 78 | 79 | def _image_id_to_filebase(image_id: str) -> str: 80 | filebase, _ = os.path.splitext(os.path.basename(image_id)) 81 | return filebase 82 | 83 | 84 | def _output_file_name( 85 | input_file: str, output_dir: str, format: OutputFileType 86 | ) -> str: 87 | filebase = _image_id_to_filebase(input_file) 88 | if format == OutputFileType.TFRECORD: 89 | return os.path.join(output_dir, f'{filebase}.tfrecord') 90 | elif format == OutputFileType.NPZ: 91 | return os.path.join(output_dir, f'{filebase}.npz') 92 | raise ValueError('Unknown file type.') 93 | 94 | 95 | def generate_embeddings( 96 | input_files: Iterable[str], 97 | output_dir: str, 98 | input_type: InputFileType, 99 | output_type: OutputFileType, 100 | overwrite_existing: bool = False, 101 | model_version: ModelVersion = ModelVersion.V1, 102 | ) -> None: 103 | """Generate embedding files from a set of input image files. 104 | 105 | Parameters 106 | ---------- 107 | input_files 108 | The set of image files to generate the embeddings from. 109 | output_dir 110 | The directory to write the embedding files to. The output file names will be 111 | constructed 112 | from the base name of the input files and the output file type. 113 | input_type 114 | The file type of the input images. DICOM or PNG. 115 | overwrite_existing 116 | If an output file already exists, whether to overwrite or skip inference. 117 | model_version 118 | The CXR foundation model version. 119 | 120 | Raises 121 | ------ 122 | ValueError 123 | If the `model_version` is unsupported. 124 | """ 125 | if model_version == ModelVersion.V1: 126 | embeddings_fn = embeddings_v1 127 | elif model_version == ModelVersion.V2: 128 | embeddings_fn = lambda x: embeddings_v2(x, 'img_emb') 129 | elif model_version == ModelVersion.V2_CONTRASTIVE: 130 | embeddings_fn = lambda x: embeddings_v2(x, 'all_contrastive_img_emb') 131 | else: 132 | raise ValueError('Model version {model_version.name!r} is unsupported.') 133 | 134 | for file in input_files: 135 | output_file = _output_file_name( 136 | file, output_dir=output_dir, format=output_type 137 | ) 138 | 139 | if not overwrite_existing and os.path.exists(output_file): 140 | logging.info(f'Found existing output file. Skipping: {output_file!r}') 141 | continue 142 | 143 | image_example = create_example_from_image( 144 | image_file=file, input_type=input_type 145 | ) 146 | assert constants.IMAGE_KEY in image_example.features.feature 147 | 148 | embeddings = embeddings_fn(image_example) 149 | 150 | save_embeddings( 151 | embeddings, 152 | output_file=output_file, 153 | format=output_type, 154 | image_example=image_example, 155 | ) 156 | logging.info(f'Successfully generated {output_file!r}') 157 | 158 | 159 | def embeddings_v1(image_example: tf.train.Example) -> np.ndarray: 160 | """Create CXR Foundation V1 model embeddings. 161 | 162 | Parameters 163 | ---------- 164 | image_example: TF Example with image bytes. 165 | 166 | Returns 167 | ------- 168 | NumPy array of shape (1376,). 169 | """ 170 | instance = { 171 | 'b64': base64.b64encode(image_example.SerializeToString()).decode() 172 | } 173 | response = _embeddings_from_service( 174 | instance, 175 | constants.ENDPOINT_V1.project_name, 176 | constants.ENDPOINT_V1.endpoint_location, 177 | constants.ENDPOINT_V1.endpoint_id, 178 | ) 179 | assert len(response) == 1 180 | assert len(response[0]) == 1 181 | embeddings = np.array(response[0][0], dtype=np.float32) 182 | assert embeddings.shape == (1376,) 183 | return embeddings 184 | 185 | 186 | def embeddings_v2(image_example: tf.train.Example, fetch_key: str) -> np.ndarray: 187 | """Create CXR Foundation V2 model embeddings. 188 | 189 | This is a two-step process: 190 | - Query ELIXR-C for a 1x8x8x1376 dimension embedding. 191 | - Query ELIXR-B with the embedding from the previous step to obtain a semantic 192 | embedding for the text generation model. 193 | 194 | Parameters 195 | ---------- 196 | image_example: TF Example with image bytes. 197 | fetch_key: which output to fetch from the inference results. 198 | 199 | Returns 200 | ------- 201 | NumPy array of shape (32, 768). For data efficient learning features. OR 202 | NumPy array of shape (32, 128). For image-text aligned contrastive features. 203 | """ 204 | instance = { 205 | 'b64': base64.b64encode(image_example.SerializeToString()).decode() 206 | } 207 | elixr_c_response = _embeddings_from_service( 208 | instance, 209 | constants.ENDPOINT_V2_C.project_name, 210 | constants.ENDPOINT_V2_C.endpoint_location, 211 | constants.ENDPOINT_V2_C.endpoint_id, 212 | ) 213 | elixr_c_embedding = np.expand_dims( 214 | np.array(elixr_c_response[0], dtype=np.float32), axis=0 215 | ) 216 | assert elixr_c_embedding.shape == _ELIXR_C_RESPONSE_SHAPE 217 | instance = { 218 | 'image_feature': elixr_c_embedding.tolist(), 219 | 'ids': np.zeros((1, 1, 128), dtype=np.int32).tolist(), 220 | 'paddings': np.zeros((1, 1, 128), dtype=np.float32).tolist(), 221 | } 222 | elixr_b_response = _embeddings_from_service( 223 | instance, 224 | constants.ENDPOINT_V2_B.project_name, 225 | constants.ENDPOINT_V2_B.endpoint_location, 226 | constants.ENDPOINT_V2_B.endpoint_id, 227 | ) 228 | assert len(elixr_b_response) == 1 229 | assert fetch_key in elixr_b_response[0] 230 | elixr_b_embedding = np.array( 231 | elixr_b_response[0][fetch_key], dtype=np.float32 232 | ) 233 | assert elixr_b_embedding.shape == _ELIXR_B_RESPONSE_SHAPE[fetch_key] 234 | return elixr_b_embedding 235 | 236 | 237 | def tokenize(preprocessor, text): 238 | out = preprocessor(tf.constant([text])) 239 | ids = out['input_word_ids'].numpy().astype(np.int32) 240 | masks = out['input_mask'].numpy().astype(np.float32) 241 | paddings = 1.0 - masks 242 | end_token_idx = ids == 102 243 | ids[end_token_idx] = 0 244 | paddings[end_token_idx] = 1.0 245 | ids = np.expand_dims(ids, axis=1) 246 | paddings = np.expand_dims(paddings, axis=1) 247 | assert ids.shape == (1, 1, 128) 248 | assert paddings.shape == (1, 1, 128) 249 | return ids, paddings 250 | 251 | 252 | def generate_elixr_text_embeddings(text): 253 | preprocessor = hub.KerasLayer( 254 | "https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3") 255 | text = text.lower() 256 | ids, paddings = tokenize(preprocessor, text) 257 | instance = { 258 | # dummy image input 259 | 'image_feature': np.zeros([1, 8, 8, 1376], dtype=np.float32).tolist(), 260 | 'ids': ids.tolist(), 261 | 'paddings': paddings.tolist(), 262 | } 263 | response = _embeddings_from_service( 264 | instance, 265 | constants.ENDPOINT_V2_B.project_name, 266 | constants.ENDPOINT_V2_B.endpoint_location, 267 | constants.ENDPOINT_V2_B.endpoint_id, 268 | ) 269 | assert len(response) == 1 270 | assert 'contrastive_txt_emb' in response[0] 271 | embedding = np.array( 272 | response[0]['contrastive_txt_emb'], dtype=np.float32 273 | ) 274 | assert embedding.shape == _ELIXR_B_RESPONSE_SHAPE['contrastive_txt_emb'] 275 | return embedding 276 | 277 | 278 | def create_example_from_image( 279 | image_file: str, input_type: InputFileType 280 | ) -> tf.train.Example: 281 | """Create a tf.train.Example from an image file.""" 282 | with open(image_file, 'rb') as f: 283 | if input_type == InputFileType.PNG: 284 | img = np.asarray(Image.open(io.BytesIO(f.read())).convert('L')) 285 | return example_generator_lib.png_to_tfexample(img) 286 | elif input_type == InputFileType.DICOM: 287 | dicom = pydicom.dcmread(io.BytesIO(f.read())) 288 | if ( 289 | _VIEW_POSITION in dicom 290 | and dicom.ViewPosition not in _FRONTAL_VIEW_POSITIONS 291 | ): 292 | raise RuntimeError( 293 | f'DICOM file: {image_file} - view position is not in accepted' 294 | ' set: ', 295 | _FRONTAL_VIEW_POSITIONS, 296 | ) 297 | return example_generator_lib.dicom_to_tfexample(dicom) 298 | 299 | raise ValueError('Unknown file type.') 300 | 301 | 302 | def _is_retryable(exc): 303 | return isinstance(exc, _RETRIABLE_TYPES) 304 | 305 | 306 | def _embeddings_from_service( 307 | instance: dict[Any, Any], 308 | project_name: str, 309 | location: str, 310 | endpoint_id: int, 311 | ) -> Any: 312 | """Returns embeddings from a Vertex (AI Platform) model prediction endpoint. 313 | 314 | Parameters 315 | ---------- 316 | instance 317 | dict type input instance for prediction. 318 | project_name 319 | The GCP project name that hosts embeddings API. 320 | location 321 | The GCP Location (Zone) where the model serving end-point is deployed. 322 | endpoint_id 323 | The numerical endpoint ID of the embeddings API. 324 | 325 | Returns 326 | ------ 327 | The embeddings generated by the service. Differences in Vertex 328 | end-point configurations may change the return type. The caller is 329 | responsible for interpreting this value and extracting the requisite 330 | data. 331 | """ 332 | api_client = aiplatform.gapic.PredictionServiceClient( 333 | client_options=ClientOptions(api_endpoint=_API_ENDPOINT) 334 | ) 335 | 336 | endpoint = api_client.endpoint_path( 337 | project=project_name, location=location, endpoint=endpoint_id 338 | ) 339 | retry_policy = Retry(predicate=_is_retryable) 340 | response = api_client.predict( 341 | endpoint=endpoint, instances=[instance], retry=retry_policy, timeout=60 342 | ) 343 | return response.predictions 344 | 345 | 346 | def save_embeddings( 347 | embeddings: np.ndarray, 348 | output_file: str, 349 | format: OutputFileType, 350 | image_example: tf.train.Example = None, 351 | ): 352 | """Save the embeddings values to a numpy or tfrecord file. 353 | 354 | Parameters 355 | --------- 356 | embeddings 357 | The vector embeddings values to save 358 | output_file 359 | The file path to save to 360 | format 361 | The format to save the embeddings to - .npz or .tfrecord. 362 | image_example 363 | The original Example generated from the image. This is only required if 364 | saving as .tfrecord. 365 | """ 366 | embeddings_array = embeddings.astype(np.float32).flatten() 367 | 368 | if format == OutputFileType.NPZ: 369 | # Keyed by "embedding" 370 | np.savez(output_file, embedding=embeddings_array) 371 | elif format == OutputFileType.TFRECORD: 372 | if image_example is None: 373 | raise RuntimeError( 374 | 'Missing image_example param required for saving as tfrecord.' 375 | ) 376 | 377 | # Add embeddings values to example 378 | image_example.features.feature[constants.EMBEDDING_KEY].float_list.value[ 379 | : 380 | ] = embeddings_array 381 | 382 | # Remove unnecessary existing fields to prevent serializing them 383 | for key in (constants.IMAGE_FORMAT_KEY, constants.IMAGE_KEY): 384 | if key in image_example.features.feature: 385 | del image_example.features.feature[key] 386 | 387 | with tf.io.TFRecordWriter(output_file) as w: 388 | w.write(image_example.SerializeToString()) 389 | 390 | else: 391 | raise ValueError('Unknown file type.') 392 | -------------------------------------------------------------------------------- /derm-foundation/README.md: -------------------------------------------------------------------------------- 1 | # Derm Foundation 2 | 3 | **Derm Foundation** is a tool to generate 4 | [embeddings](https://developers.google.com/machine-learning/crash-course/embeddings/video-lecture) 5 | from dermatological images. These embeddings can be used to develop custom 6 | machine learning models for dermatology use-cases with less data and compute 7 | compared to traditional model development methods. 8 | 9 | ## How to use the Derm Foundation API 10 | 11 | 1. Decide if you want to get access as an individual or group. For more information see [Access Options](#access-options) 12 | 13 | 1. With the individual or group email identity at hand from the previous step, 14 | fill out the [API access form](https://forms.gle/VBFuzSJXhQjNmF776). 15 | 16 | 1. Once access is granted, you’ll be notified via the provided email address 17 | and can start using the API. 18 | 19 | 1. The [Demo Notebook](https://colab.research.google.com/github/Google-Health/imaging-research/blob/master/derm-foundation/derm_foundation_demo.ipynb) shows you how to use the API to 20 | train a sample model [with our test data](#use-our-test-data). You can 21 | modify the Demo Notebook to train a model using 22 | [your own data](#use-your-own-data). This Notebook provides an example of 23 | the following steps: 24 | 25 | * Generating a temporary access token to grant the API access to images in 26 | GCS. 27 | * Calling the API with a given GCS bucket name, GCS object path, and the 28 | access token. 29 | * Saving the embedding. 30 | * Using the embeddings to train a simple model. 31 | * Evaluate the results of the model 32 | 33 | 1. If you need support or have questions, please [contact us](#contact-us). 34 | 35 | ## Use our test data 36 | 37 | Upon gaining access to the API, you'll also have access to publicly available 38 | data we've curated specifically for testing. This is to help you get started 39 | with your initial experiments. The default state of the 40 | [Demo Notebook](https://colab.research.google.com/github/Google-Health/imaging-research/blob/master/derm-foundation/derm_foundation_demo.ipynb) is set to use this test data, which is 41 | stored in a 42 | [Cloud Storage (GCS) bucket](https://cloud.google.com/storage/docs/creating-buckets) 43 | managed by us for your convenience. 44 | 45 | ## Use your own data 46 | 47 | WARNING: You hold responsibility for the data stored in your GCS bucket that you 48 | use with the API. It's important to comply with all the terms of use any data is subject to. 49 | 50 | NOTE: The [Demo Notebook](https://colab.research.google.com/github/Google-Health/imaging-research/blob/master/derm-foundation/derm_foundation_demo.ipynb) demonstrates how to call the 51 | API using short-lived access tokens. These tokens provide temporary access to 52 | the API for processing your images and are specific to the individual running 53 | the Colab. It's important to note that the API is stateless and does not store 54 | the images it processes. 55 | 56 | 1. If you don't have access to an existing 57 | [GCP Project](https://cloud.google.com/storage/docs/projects), you need to 58 | [create one](https://cloud.google.com/free). 59 | 60 | 1. [Create a GCS bucket](https://cloud.google.com/storage/docs/creating-buckets) 61 | in the above project. 62 | 63 | 1. On your local machine 64 | [install the gcloud SDK](https://cloud.google.com/sdk/docs/install) and 65 | [log in](https://cloud.google.com/sdk/gcloud/reference/auth/login): 66 | 67 | ``` 68 | gcloud auth application-default login 69 | ``` 70 | 71 | 1. From your local machine use the 72 | [gcloud storage commands](https://cloud.google.com/storage/docs/gsuti https://cloud.google.com/sdk/gcloud/reference/storage) 73 | to transfer images in PNG format to the GCS bucket you set up in the 74 | previous step. If you have a large number of files to upload, you may 75 | consider using the 76 | [`rsync` command](https://cloud.google.com/sdk/gcloud/reference/storage/rsync) 77 | instead of `cp`. 78 | 79 | You should also include a path to a CSV file in gcs_metadata_csv. This CSV should contain a column with the file_names of the images you're uploading, titled by default 'img_id' and a label column for the task you want to train on, titled by default 'diagnostic'. We have set these titles as parameters however so you can adjust them if you like when you adjust the Demo Notebook to match your CSV. 80 | 81 | 1. Make sure that [the email identity you selected](#how-to-gain-access) has 82 | the necessary permissions to view the images. The simplest method is to 83 | assign the predefined role of `roles/storage.objectViewer` to the chosen 84 | email identity. There are 85 | [several ways to do this](https://cloud.google.com/storage/docs/access-control/using-iam-permissions#bucket-add). 86 | You should familiarize yourself with 87 | [GCS access control](https://cloud.google.com/storage/docs/access-control). 88 | 89 | 1. Modify the [Demo Notebook](https://colab.research.google.com/github/Google-Health/imaging-research/blob/master/derm-foundation/derm_foundation_demo.ipynb#scrollTo=OxzYsc8NDpwa) and replace the values for: 90 | * gcp_project 91 | * gcs_bucket_name 92 | * gcs_metadata_csv 93 | * gcs_image_dir (leave blank if the images are in the root directory); and 94 | * label_column (the name of the columns for the label you're training for) 95 | * img_join_column (the name of the column you want to join your image files on) 96 | 97 | With the values from your GCS bucket. 98 | 99 | Also make sure you uncheck the "gcs_use_precomputed embeddings" flag. 100 | 101 | ## Access Options 102 | 103 | You have the option to request access to the API either as 104 | [an individual](#as-an-individual-non-gmail-account) or for [a group](#as-a-group-recommended). 105 | Choose the process that best aligns with your needs. Remember to note the email 106 | identifier for which you will be requesting access. It should be in one of these 107 | formats: 108 | 109 | * YOUR-GROUP-NAME@YOUR-DOMAIN 110 | * INDIVIDUAL-ID@YOUR-DOMAIN 111 | * INDIVIDUAL-ID@gmail.com 112 | 113 | ### As a group (recommended) 114 | 115 | If your organization is a Google Workspace or Google Cloud Platform (GCP) 116 | customer, contact your Google admin and ask them to create a group with the list 117 | of individuals who will be using the API. Let them know that this group is used 118 | for contacting you and also as a security principal for authorizing your access 119 | to the API. 120 | 121 | ![Create Google Group](img/create-group.png) 122 | 123 | Otherwise, 124 | [create a free Cloud Identity Account](https://cloud.google.com/identity/docs/set-up-cloud-identity-admin) 125 | for your domain name and in the process become the interim Google admin for your 126 | organization. Visit [Google Admin console](https://admin.google.com/) and create 127 | the above-mentioned group. If your individual identities are unknown to Google, 128 | they will need to follow the process for the [individuals](#as-an-individual) 129 | before you can add them to the group. 130 | 131 | ### As an individual (non-gmail account) 132 | This section applies for the INDIVIDUAL-ID@YOUR-DOMAIN case (e.g. `person@university.org` or `person@company.com`) 133 | 134 | If your organization is a Google Workspace or GCP customer, identity federation 135 | is most likely set up between your corporate identity directory and 136 | [Google Identity and Access Management](https://cloud.google.com/security/products/iam) 137 | and therefore individuals already have Google identities in the form of their 138 | corporate emails. Check with your IT department to find out whether identity 139 | federation is already in place or will be established soon. 140 | 141 | Otherwise, 142 | [create a Google identity based on your email](https://accounts.google.com/signup/v2/webcreateaccount?flowName=GlifWebSignIn&flowEntry=SignUp). 143 | Opt for the "use my current email address instead" option, as shown in the 144 | screen capture below. 145 | 146 | IMPORTANT: You should choose a password that is different from your corporate 147 | password. 148 | 149 | ![Create Google Id](img/create-identity.png) 150 | 151 | ### As an individual (`@gmail.com` account) 152 | 153 | If you want to sign up as an individual with a gmail account, you can submit the form directly with your gmail address. 154 | 155 | 156 | ## General notes 157 | 158 | * Google does not keep a copy of any images sent. 159 | * Google monitors daily query volume and aggregates on a per-user and 160 | per-organization basis. Access can be revoked if a user or organization 161 | exceeds a reasonable query volume. 162 | 163 | ## Contributing 164 | 165 | See [`CONTRIBUTING.md`](docs/CONTRIBUTING.md) for details. 166 | 167 | ## License 168 | 169 | See [`LICENSE`](LICENSE) for details. 170 | 171 | ## Disclaimer 172 | 173 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 174 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 175 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 176 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 177 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 178 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 179 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 180 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 181 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 182 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 183 | 184 | ## Contact us 185 | 186 | Please reach out to us at 187 | [derm-foundation@google.com](mailto:derm-foundation@google.com]) for issues such 188 | as, but not limited to: 189 | 190 | - Seeking technical assistance 191 | - Providing feedback 192 | - Requesting permissions for publications 193 | - Discussing clinical use cases 194 | - Discussing enterprise requirements such as: 195 | - Fitting within strict security perimeters of your organization 196 | - Governing your data in GCS 197 | - Training and serving custom models at scale on 198 | [Vertex AI](https://cloud.google.com/vertex-ai?hl=en) 199 | 200 | # Model Card for Derm Foundation 201 | 202 | This tool uses an ML model to provide the embedding results. This section 203 | briefly overviews the background and limitations of that model. 204 | 205 | ## Model Details 206 | 207 | ### Overview 208 | 209 | This model generates embeddings for images of dermatological skin conditions. 210 | Embeddings are n-dimensional vectors of floating points representing a 211 | projection of the original image into a compressed feature space capable of 212 | describing image features relevant to differentiating skin conditions and 213 | properties (age, body part, etc.). These embeddings are to be used by 214 | “downstream models” for final tasks such as condition category classification or 215 | body part identification. The model uses the BiT-101x3 architecture 216 | (https://arxiv.org/pdf/1912.11370.pdf). It was trained in two stages. The first 217 | pre-training stage used contrastive learning to train on a large number of 218 | public image-text pairs from the internet. The image component of this 219 | pre-trained model was then fine-tuned for condition classification and a couple 220 | other downstream tasks using a number of clinical datasets (see below). 221 | 222 | Training Data: 223 | 224 | * Base model (pre-training): A large number of health-related image-text pairs 225 | from the public web 226 | * SFT (supervised fine-tuned) model: tele-dermatology datasets from the United 227 | States and Colombia, a skin cancer dataset from Australia, and additional 228 | public images. The images come from a mix of device types, including images 229 | from smartphone cameras, other cameras, and dermatoscopes. The images also 230 | have a mix of image takers; images may have been taken by clinicians during 231 | consultations or self-captured by patients. 232 | 233 | ### Version 234 | 235 | ``` 236 | name: v1.0.0 237 | date: 2023-12-19 238 | ``` 239 | 240 | ### Owners 241 | 242 | ``` 243 | derm-foundation@google.com 244 | ``` 245 | 246 | ### Licenses 247 | 248 | - See 249 | [Derm Foundation - Additional Terms of Service](https://forms.gle/VBFuzSJXhQjNmF776). 250 | 251 | ### References 252 | 253 | - BiT: https://arxiv.org/pdf/1912.11370.pdf 254 | - CLIP: https://arxiv.org/abs/2103.00020 255 | 256 | ## Considerations 257 | 258 | ### Use Cases 259 | 260 | - Embeddings can reduce barriers to entry for training custom models for 261 | derm-specific tasks with less data, setup, and compute. 262 | - Embeddings can allow for quick evaluation. 263 | 264 | ### Limitations 265 | 266 | - The base model was trained using image-text pairs from the public web. These 267 | images come from a variety of sources but may by noisy or low-quality. The 268 | SFT (supervised fine-tuned) model was trained data from a limited set of 269 | countries (United States, Colombia, Australia, public images) and settings 270 | (mostly clinical). It may not generalize well to data from other countries, 271 | patient populations, or image types not used in training. 272 | - The model is only used to generate embeddings of the user-owned dataset. It 273 | does not generate any predictions or diagnosis on its own. 274 | - Developers should ensure any downstream model developed using this tool is 275 | validated to ensure performance is consistent against intended demographics 276 | e.g., skin tone, age, sex, gender etc. 277 | 278 | ### Ethical Considerations 279 | 280 | - Risk: Although Google does not store permanently any data sent to this 281 | model, it is the data owner's responsibility to ensure that Personally 282 | identifiable information (PII) and Protected Health Information (PHI) are 283 | removed prior to being sent to the model. \ 284 | - Mitigation Strategy: Do not send data containing PII or PHI. 285 | -------------------------------------------------------------------------------- /path-foundation/README.md: -------------------------------------------------------------------------------- 1 | # Path Foundation 2 | 3 | Path Foundation is a tool that enables users to transform pathology images into 4 | a machine learning representation of the images known as embeddings. Embeddings 5 | are a list of floating point values that represent a projection of the original 6 | image into a compressed feature space. This tool utilizes a model trained via 7 | self-supervised learning (see [model card](#model-card-for-path-foundation-model) below) in order to create embeddings 8 | for image patches from histopathology whole slide images (WSIs). These 9 | embeddings can be used to develop custom machine learning models for pathology 10 | use-cases using less data and compute compared to traditional model development 11 | methods. 12 | 13 | For more information please see this [video](https://www.youtube.com/watch?v=Q_09Kqv1y1E). 14 | 15 | You can read more about the research and underlying model in our 16 | manuscript: 17 | [Domain-specific optimization and diverse evaluation of self-supervised models for histopathology](https://arxiv.org/abs/2310.13259). 18 | 19 | ## How to use the Path Foundation API 20 | 21 | 1. Decide if you want to get access as an individual or a group. For more information see [Access Options](#access-options) 22 | 23 | 1. With the individual or group email identity at hand from the previous step, 24 | fill out the [API access form](http://bit.ly/fm-path-access-form). 25 | 26 | 1. Once access is granted, you’ll be notified via the provided email address 27 | and can start using the API. 28 | 29 | 1. The [Demo Colab](https://colab.research.google.com/github/Google-Health/imaging-research/blob/master/path-foundation/linear-classifier-demo.ipynb) shows you how 30 | to train a sample linear classifier. You can experiment with 31 | [our sample digitized pathology images & training labels](#use-our-test-data) 32 | to understand the API, then modify the Colab to use 33 | [your own data](#use-your-own-data). 34 | 35 | The Colab includes instructions for: 36 | 37 | * Generating training labels in JSON format from masks in PNG format. 38 | * Generating a temporary access token for the API to read the DICOM images 39 | from a [Cloud DICOM Store](https://cloud.google.com/healthcare-api/docs/concepts/dicom) 40 | on behalf of the person running the Colab. 41 | * Calling the API on WSI DICOMs stored in Cloud DICOM store to generate embeddings 42 | * Training a linear classifier using these embeddings and training labels from a 43 | [Cloud Storage (GCS) bucket](https://cloud.google.com/storage) and evaluating the result of this classifier. 44 | * An alternative way of calling the API to generate embeddings from digital pathology images (JPEG, TIFF or PNG) stored in GCS 45 | 46 | We have a [video walkthrough](https://www.youtube.com/watch?v=Q_09Kqv1y1E) of the demo if you'd like more information. 47 | [Contact us](#contact) if you find training your custom model is more 48 | involved and requires more advanced batching. We're happy to help! 49 | 50 | ## Use our test data 51 | 52 | Upon gaining access to the API, you'll also have access to publicly available 53 | data we've curated specifically for testing on the [Demo Colab](https://colab.research.google.com/github/Google-Health/imaging-research/blob/master/path-foundation/linear-classifier-demo.ipynb). This data comprises of DICOM images stored in a Google Cloud DICOM Store and training labels in PNG and JSON formats in a GCS bucket. The [Demo Colab](https://colab.research.google.com/github/Google-Health/imaging-research/blob/master/path-foundation/linear-classifier-demo.ipynb) 54 | uses this test data to train a tumor detector. 55 | 56 | ## Use your own data 57 | 58 | WARNING: You hold responsibility for the data stored in your GCS bucket that you 59 | use with the API. It's important to comply with all the terms of use any data is subject to. 60 | 61 | To use your own data with the API, you will need the following GCP resources: 62 | * A [GCP Project](https://cloud.google.com/storage/docs/projects) 63 | * A Cloud DICOM Store in the project for storing digitized pathology images 64 | * A GCS bucket in the project for storing data in file format (i.e. training 65 | labels, embeddings, and DICOM files) 66 | 67 | WARNING: While the API can read data from any 68 | [DICOMweb-compliant](https://www.dicomstandard.org/using/dicomweb) storage 69 | system, Google Cloud DICOM Store is optimized for the scale and latency required 70 | for handling 71 | [digitized pathology images](https://cloud.google.com/healthcare-api/docs/how-tos/dicom-digital-pathology). 72 | We cannot guarantee the same performance or functionality with other storage 73 | systems. 74 | 75 | NOTE: The demo Colab demonstrates how to call the API using short-lived access 76 | tokens. These tokens permit the API to read and process the images on behalf of 77 | the individual who is running the Colab. It's important to note that the API 78 | cannot access your data independently. The API processes images when you 79 | instruct it to using a time-limited access token and does not store the images 80 | after processing. 81 | 82 | 1. If you don't have access to an existing GCP Project, you will need to 83 | [create one](https://cloud.google.com/free). 84 | 85 | 1. Follow [these instructions](https://cloud.google.com/storage/docs/creating-buckets) 86 | to create the GCS bucket. 87 | 88 | 1. Follow [these instructions](https://cloud.google.com/healthcare-api/docs/how-tos/dicom) 89 | to create a Cloud DICOM Store. 90 | 91 | 1. Use [Google Cloud IAM panel](https://console.cloud.google.com/iam-admin) to 92 | grant the following permissions to the GCP resources: 93 | 94 | * Allow the individual running the rest of the steps to manage objects in the 95 | GCS bucket by granting them the predefined role `roles/storage.objectAdmin`. 96 | 97 | * Allow [the identity(ies) who have access to our API](#how-to-gain-access) to: 98 | * read training labels and persist embeddings in the GCS bucket by 99 | granting them the predefined role `roles/storage.objectAdmin`. 100 | * read DICOM images from the Cloud DICOM Store by granting them the 101 | predefined role `roles/healthcare.dicomViewer`. 102 | 103 | 1. On your local machine 104 | [install the gcloud SDK](https://cloud.google.com/sdk/docs/install) and 105 | [log in](https://cloud.google.com/sdk/gcloud/reference/auth/login): 106 | 107 | gcloud auth application-default login 108 | 109 | 1. From your local machine use the 110 | [gcloud storage commands](https://cloud.google.com/sdk/gcloud/reference/storage) 111 | to transfer training labels in PNG or JSON format and DICOM files to the GCS 112 | bucket. You may use the [`rsync` command](https://cloud.google.com/sdk/gcloud/reference/storage/rsync) 113 | instead of `cp` to handle the large volume of files that's typical for 114 | digitized pathology use cases. 115 | 116 | 1. Follow [these instructions](https://cloud.google.com/healthcare-api/docs/how-tos/dicom-import-export#gcloud) 117 | to bulk import DICOM files from the GCS bucket to your Cloud DICOM Store. 118 | 119 | 1. Modify the [Demo Notebook](https://github.com/Google-Health/imaging-research/blob/master/path-foundation/linear-classifier-demo.ipynb) to point to your data: 120 | 121 | 1 To use your training labels, replace `hai-cd3-foundations-pathology-vault-entry` 122 | with the name of your GCS bucket. 123 | 124 | 1 To use your DICOM images, change the the Cloud DICOM Store urls. They take 125 | the following format: 126 | `https://healthcare.googleapis.com/v1/projects/YOUR_PROJECT_ID/locations/YOUR_LOCATION/datasets/YOUR_DATASET_ID/dicomStores/YOUR_DICOM_STORE_ID/`. You need to substitute `YOUR_PROJECT_ID` with the project Id you obtained in step 127 | 1 and `YOUR_LOCATION`, `YOUR_DATASET_ID`, `YOUR_DICOM_STORE_ID` from step 3. 128 | 129 | ## Access Options 130 | 131 | You have the option to request access to the API either as 132 | [an individual](#as-an-individual-non-gmail-account) or for [a group](#as-a-group-recommended). 133 | Choose the process that best aligns with your needs. Remember to note the email 134 | identifier for which you will be requesting access. It should be in one of these 135 | formats: 136 | 137 | * YOUR-GROUP-NAME@YOUR-DOMAIN 138 | * INDIVIDUAL-ID@YOUR-DOMAIN 139 | * INDIVIDUAL-ID@gmail.com 140 | 141 | ### As a group (recommended) 142 | 143 | If your organization is a Google Workspace or Google Cloud Platform (GCP) 144 | customer, contact your Google admin and ask them to create a group with the list 145 | of individuals who will be using the API. Let them know that this group is used 146 | for contacting you and also as a security principal for authorizing your access 147 | to the API. 148 | 149 | ![Create Google Group](img/create-group.png) 150 | 151 | Otherwise, 152 | [create a free Cloud Identity Account](https://cloud.google.com/identity/docs/set-up-cloud-identity-admin) 153 | for your domain name and in the process become the interim Google admin for your 154 | organization. Visit [Google Admin console](https://admin.google.com/) and create 155 | the above-mentioned group. If your individual identities are unknown to Google, 156 | they will need to follow the process for the [individuals](#as-an-individual) 157 | before you can add them to the group. 158 | 159 | ### As an individual (non-gmail account) 160 | This section applies for the INDIVIDUAL-ID@YOUR-DOMAIN case (e.g. `person@university.org` or `person@company.com`) 161 | 162 | If your organization is a Google Workspace or GCP customer, identity federation 163 | is most likely set up between your corporate identity directory and 164 | [Google Identity and Access Management](https://cloud.google.com/security/products/iam) 165 | and therefore individuals already have Google identities in the form of their 166 | corporate emails. Check with your IT department to find out whether identity 167 | federation is already in place or will be established soon. 168 | 169 | Otherwise, 170 | [create a Google identity based on your email](https://accounts.google.com/signup/v2/webcreateaccount?flowName=GlifWebSignIn&flowEntry=SignUp). 171 | Opt for the "use my current email address instead" option, as shown in the 172 | screen capture below. 173 | 174 | IMPORTANT: You should choose a password that is different from your corporate 175 | password. 176 | 177 | ![Create Google Id](img/create-identity.png) 178 | 179 | ### As an individual (`@gmail.com` account) 180 | 181 | If you want to sign up as an individual with a gmail account, you can submit the form directly with your gmail address. 182 | 183 | ## General notes 184 | 185 | * Google does not keep a copy of any DICOM images processed. 186 | * Google monitors daily query volume and aggregates on a per-user and 187 | per-organization basis. Access can be revoked if a user or organization 188 | exceeds a reasonable query volume. 189 | 190 | ## Contributing 191 | 192 | See [`CONTRIBUTING.md`](CONTRIBUTING.md) for details. 193 | 194 | ## License 195 | 196 | See [`LICENSE`](LICENSE) for details. 197 | 198 | # Model Card for Path Foundation Model 199 | 200 | This tool uses an ML model to provide the embedding results. This section 201 | briefly overviews the background and limitations of that model. 202 | 203 | ## Model Details 204 | 205 | This self-supervised model produces embeddings for image patches from 206 | histopathology whole slide images (WSIs). Embeddings are n-dimensional vectors 207 | of floating point values that represent a projection of the original image into 208 | a compressed feature space. The model uses the ViT-S architecture and was 209 | trained across magnifications with domain specific tuning and optimization. The 210 | resulting feature representations provided by the model offer robust input 211 | for downstream tasks in histopathology. Additional information can be found in 212 | the preprint [manuscript](https://arxiv.org/abs/2310.13259). 213 | 214 | ### Version 215 | * Version: 1.0.0 216 | * Date: 2023-12-19 217 | 218 | ### License 219 | Research use only. Not suitable for product development. 220 | - See [Path Foundation - Additional Terms of Service](https://docs.google.com/forms/d/1auyo2VkzlzuiAXavZy1AWUyQHAqO7T3BLK-7ofKUvug/viewform). 221 | 222 | ### Manuscript 223 | https://arxiv.org/abs/2310.13259 224 | 225 | ### Contact 226 | path-foundation@google.com 227 | 228 | 229 | ### Intended Use 230 | * Path Foundation can reduce the training data, compute, and technical 231 | expertise necessary to develop task-specific models for H&E pathology slides. 232 | * Embeddings from the model can be used for a variety of user-defined downstream 233 | tasks including, but not limited to: cancer detection, classification, and 234 | grading; metadata prediction (stain, tissue type, specimen type, etc.); and 235 | quality assessment (e.g., imaging artifacts). 236 | * The embeddings can also be used to explore the feature space of histopathology 237 | images for biomarker development associated with prognostic and predictive 238 | tasks. 239 | 240 | ### Training Data 241 | Training data consisted of hematoxylin and eosin stained (H&E) WSIs from The 242 | Cancer Genome Atlas (TCGA) accessed via https://portal.gdc.cancer.gov. 243 | Training was performed using 60 million patches across three magnifications 244 | (~2 µm/pixel, ~1 µm/pixel, ~0.5 µm/pixel) and 32 TCGA studies (representing 245 | different cancer types). 246 | 247 | ### Performance & Validation 248 | Linear probe evaluation was conducted across a diverse set of benchmark tasks 249 | involving 17 unique tissue types and 12 unique cancer types and spanning 250 | different optimal magnifications and task types. 251 | See [preprint manuscript](https://arxiv.org/abs/2310.13259) for more details including performance on additional slide-level tasks (eg. tissue type classification and molecular findings), as well as results for data titration with fine tuning for select tasks. 252 | 253 | ### Risks 254 | Although Google does not store any data sent to this model, it is the data 255 | owner's responsibility to ensure that Personally identifiable information (PII) 256 | and Protected Health Information (PHI) are removed prior to being sent to the 257 | model. 258 | Mitigation Strategy: Do not send data containing PII or PHI. 259 | Training dataset is a de-identified public dataset and pathology imaging (pixel 260 | data) does not contain PHI. 261 | 262 | ### Limitations 263 | Intended for research purposes only. The model has only been validated for a 264 | limited number of the many potential downstream tasks involving H&E 265 | histopathology. This model version was trained and validated only on H&E images 266 | from a limited set of scanners and countries. Model output may not generalize 267 | well to data from other image types, patient populations, or scanner 268 | manufacturers not used in training. Task-specific validation remains an 269 | important aspect of model development by the end-user. Training and validation 270 | was performed on patches corresponding to 5x, 10x, and 20x magnification 271 | (~2 µm/pixel, ~1 µm/pixel, ~0.5 µm/pixel, respectively). Using input patches 272 | corresponding to magnifications other than these has not been evaluated. The 273 | model is only used to generate embeddings of user-owned data or the provided, 274 | publicly available data. It does not generate any predictions or diagnosis on 275 | its own. As with any research, developers should ensure any downstream 276 | application is validated to understand performance using data that is 277 | appropriately representative of the intended use setting (e.g., age, sex, 278 | gender, condition, scanner, etc.). 279 | -------------------------------------------------------------------------------- /ct-foundation/README.md: -------------------------------------------------------------------------------- 1 | # CT Foundation 2 | 3 | **CT Foundation** is a tool that enables users to transform Computed Tomography 4 | (CT) volumes comprised of axial slices into an information-rich vector 5 | representation known as an 6 | [embedding](https://developers.google.com/machine-learning/crash-course/embeddings/video-lecture). 7 | These embeddings can be used to develop custom machine learning models for task 8 | specific use-cases using less data and compute compared to traditional model 9 | development methods. 10 | 11 | If you use any part of CT Foundation or this repository, please cite our paper: 12 | 13 | ``` 14 | @article{yang2024advancing, 15 | title={Advancing multimodal medical capabilities of Gemini}, 16 | author={Yang, Lin and Xu, Shawn and Sellergren, Andrew and Kohlberger, Timo and Zhou, Yuchen and Ktena, Ira and Kiraly, Atilla and Ahmed, Faruk and Hormozdiari, Farhad and Jaroensri, Tiam and others}, 17 | journal={arXiv preprint arXiv:2405.03162}, 18 | year={2024} 19 | } 20 | ``` 21 | ## How to use the CT Foundation API 22 | 23 | 1. Decide if you want to get access as an individual or a group. For more 24 | information see [Access Options](#access-options) 25 | 26 | 1. With the Google identity from the previous step at hand, fill out the 27 | [API access form](https://docs.google.com/forms/d/e/1FAIpQLSfkSBbCi5dOlJxuDB3t6biFEBIA9JL66A99YRZa8qR2Fn5mUA/viewform?resourcekey=0-vPNR0VQ-vibGDJ564j4mCA). 28 | 29 | 1. Once access is granted, you’ll be notified via the provided email address 30 | and can start using the API. 31 | 32 | 1. Use the 33 | [Demo Notebook](https://colab.research.google.com/github/google-health/imaging-research/blob/master/ct-foundation/CT_Foundation_Demo.ipynb) 34 | to see how to use the API to compute embeddings and how to train a sample 35 | classifier. You can experiment with 36 | [our sample CT images & training labels](#use-our-test-data) to understand 37 | the API, then modify the Colab to use [your own data](#use-your-own-data). 38 | 39 | The demo Colab includes instructions for: 40 | 41 | * Generating a temporary access token for the API to read the DICOM images 42 | from a 43 | [Cloud DICOM Store](https://cloud.google.com/healthcare-api/docs/concepts/dicom) 44 | on behalf of the person running the Colab. 45 | * Calling the API on CT scans stored in Cloud DICOM Store to generate 46 | embeddings 47 | * Training a linear classifier for lung cancer using pre-computed embeddings 48 | on [NLST](https://www.cancerimagingarchive.net/collection/nlst/) 49 | * Evaluating the result of this classifier. 50 | 51 | [Contact us](#contact) if you have questions or need help. 52 | 53 | ## Use our test data 54 | 55 | Upon gaining access to CT Foundation, you'll also have access to publicly 56 | available data we've curated specifically for testing. This includes CT studies 57 | from the [LIDC-IDRI](https://www.cancerimagingarchive.net/collection/lidc/) 58 | dataset stored in a 59 | [Google Cloud DICOM Store](https://cloud.google.com/healthcare-api/docs/concepts/dicom). 60 | We also store pre-computed embeddings run on CT Foundation from 61 | [NLST](https://www.cancerimagingarchive.net/collection/nlst/) Our 62 | [Demo Notebook](https://colab.research.google.com/github/google-health/imaging-research/blob/master/ct-foundation/CT_Foundation_Demo.ipynb) 63 | shows you how to call CT Foundation on the LIDC_IDRI DICOMS and also how to 64 | train a performant model using the precomputed NLST embeddings. 65 | 66 | ## Use your own data 67 | 68 | WARNING: You hold responsibility for the data that you use with the API. It's 69 | important to comply with all the terms of use your data is subject to. 70 | 71 | NOTE: The current version of the API expects the CT images in a 72 | [Google Cloud DICOM Store](https://cloud.google.com/healthcare-api/docs/concepts/dicom). 73 | This section provides instructions how to get your data to a Cloud DICOM Store 74 | that you own. If you need support for complying with your Cloud enterprise 75 | policies, [Contact us](#contact). We're happy to help! 76 | 77 | NOTE: The demo Colab demonstrates how to call the API using short-lived access 78 | tokens. These tokens permit the API to read and process the images on behalf of 79 | the individual who is running the Colab. It's important to note that the API 80 | cannot access your data independently. The API processes images when you 81 | instruct it to using a time-limited access token and does not store the images 82 | after processing. 83 | 84 | To use your own data with the API, you will need the following GCP resources: 85 | 86 | * A [GCP Project](https://cloud.google.com/storage/docs/projects) 87 | * A 88 | [Google Cloud DICOM Store](https://cloud.google.com/healthcare-api/docs/concepts/dicom) 89 | in the project for storing CT images 90 | * A GCS bucket in the project for staging dicoms before adding them to DICOM 91 | Store. This can also be used to store data labels to train your downstream 92 | model. 93 | 94 | 1. If you don't have access to an existing GCP Project, you will need to 95 | [create one](https://cloud.google.com/free). 96 | 97 | 1. Follow 98 | [these instructions](https://cloud.google.com/storage/docs/creating-buckets) 99 | to create the GCS bucket. 100 | 101 | 1. Follow 102 | [these instructions](https://cloud.google.com/healthcare-api/docs/how-tos/dicom) 103 | to create a Cloud DICOM Store. 104 | 105 | 1. Use [Google Cloud IAM panel](https://console.cloud.google.com/iam-admin) 106 | to grant the following permissions to the GCP resources: 107 | 108 | * Allow the individual running the rest of the steps to manage objects in 109 | the GCS bucket by granting them the predefined role 110 | `roles/storage.objectAdmin`. 111 | 112 | * Allow 113 | [the identity(ies) who have access to our API](#how-to-gain-access) to: 114 | 115 | * read training labels and persist embeddings in the GCS bucket by 116 | granting them the predefined role `roles/storage.objectAdmin`. 117 | * read DICOM images from the Cloud DICOM Store by granting them the 118 | predefined role `roles/healthcare.dicomViewer`. 119 | 120 | 1. On your local machine 121 | [install the gcloud SDK](https://cloud.google.com/sdk/docs/install) and 122 | [log in](https://cloud.google.com/sdk/gcloud/reference/auth/login): 123 | 124 | ``` 125 | gcloud auth application-default login 126 | ``` 127 | 128 | 1. From your local machine use the 129 | [gcloud storage commands](https://cloud.google.com/sdk/gcloud/reference/storage) 130 | to transfer DICOM files in .dcm format to 131 | the GCS bucket. You may use the 132 | [`rsync` command](https://cloud.google.com/sdk/gcloud/reference/storage/rsync) 133 | instead of `cp` to handle larger volume of files. You can also optionally add labels to train downstream models. 134 | 135 | 1. Follow 136 | [these instructions](https://cloud.google.com/healthcare-api/docs/how-tos/dicom-import-export#gcloud) 137 | to bulk import DICOM files from the GCS bucket to your Cloud DICOM Store. 138 | 139 | 1. Modify the 140 | [Demo Notebook](https://colab.research.google.com/github/google-health/imaging-research/blob/master/ct-foundation/CT_Foundation_Demo.ipynb) 141 | to point to your data: 142 | 143 | 1. If storing your labels in GCS replace `hai-cd3-foundations-ct3d-vault-entry` 144 | with the name of your GCS bucket. Otherwise import them into the Notebook from wherever you are storing them. 145 | 146 | 1. To use your DICOM images, change the the Cloud DICOM Store urls. They take 147 | the following format: 148 | `https://healthcare.googleapis.com/v1/projects/YOUR_PROJECT_ID/locations/YOUR_LOCATION/datasets/YOUR_DATASET_ID/dicomStores/YOUR_DICOM_STORE_ID/`. 149 | You need to substitute `YOUR_PROJECT_ID` with the project Id you obtained in 150 | step 1 and `YOUR_LOCATION`, `YOUR_DATASET_ID`, `YOUR_DICOM_STORE_ID` from 151 | step 3. 152 | 153 | ## Access Options 154 | 155 | You have the option to request access to the API either as 156 | [an individual](#as-an-individual-non-gmail-account) or for 157 | [a group](#as-a-group-recommended). Choose the process that best aligns with 158 | your needs. Remember to note the email identifier for which you will be 159 | requesting access. It should be in one of these formats: 160 | 161 | * YOUR-GROUP-NAME@YOUR-DOMAIN 162 | * INDIVIDUAL-ID@YOUR-DOMAIN 163 | * INDIVIDUAL-ID@gmail.com 164 | 165 | ### As a group (recommended) 166 | 167 | If your organization is a Google Workspace or Google Cloud Platform (GCP) 168 | customer, contact your Google admin and ask them to create a group with the list 169 | of individuals who will be using the API. Let them know that this group is used 170 | for contacting you and also as a security principal for authorizing your access 171 | to the API. 172 | 173 | ![Create Google Group](img/create-group.png) 174 | 175 | Otherwise, 176 | [create a free Cloud Identity Account](https://cloud.google.com/identity/docs/set-up-cloud-identity-admin) 177 | for your domain name and in the process become the interim Google admin for your 178 | organization. Visit [Google Admin console](https://admin.google.com/) and create 179 | the above-mentioned group. If your individual identities are unknown to Google, 180 | they will need to follow the process for the [individuals](#as-an-individual) 181 | before you can add them to the group. 182 | 183 | ### As an individual (non-gmail account) 184 | 185 | This section applies for the INDIVIDUAL-ID@YOUR-DOMAIN case (e.g. 186 | `person@university.org` or `person@company.com`) 187 | 188 | If your organization is a Google Workspace or GCP customer, identity federation 189 | is most likely set up between your corporate identity directory and 190 | [Google Identity and Access Management](https://cloud.google.com/security/products/iam) 191 | and therefore individuals already have Google identities in the form of their 192 | corporate emails. Check with your IT department to find out whether identity 193 | federation is already in place or will be established soon. 194 | 195 | Otherwise, 196 | [create a Google identity based on your email](https://accounts.google.com/signup/v2/webcreateaccount?flowName=GlifWebSignIn&flowEntry=SignUp). 197 | Opt for the "use my current email address instead" option, as shown in the 198 | screen capture below. 199 | 200 | IMPORTANT: You should choose a password that is different from the password you 201 | use for the email account. 202 | 203 | ![Create Google Id](img/create-identity.png) 204 | 205 | ### As an individual (`@gmail.com` account) 206 | 207 | If you want to sign up as an individual with a gmail account, you can submit the 208 | form directly with your gmail address. 209 | 210 | ## General notes 211 | 212 | * Google does not keep a copy of any DICOM images processed. 213 | * Google monitors daily query volume and aggregates on a per-user and 214 | per-organization basis. Access can be revoked if a user or organization 215 | exceeds a reasonable query volume. 216 | 217 | ## Contributing 218 | 219 | See [`CONTRIBUTING.md`](CONTRIBUTING.md) for details. 220 | 221 | ## License 222 | 223 | See [`LICENSE`](LICENSE) for details. 224 | 225 | ## Contact 226 | 227 | Please reach out to us at 228 | [ct-foundation@google.com](mailto:ct-foundation@google.com]) for issues such as, 229 | but not limited to: 230 | 231 | - Seeking technical assistance 232 | - Providing feedback 233 | - Requesting permissions for publications 234 | - Discussing clinical use cases 235 | - Discussing enterprise requirements such as: 236 | - Fitting within strict security perimeters of your organization 237 | - Governing your data in GCS 238 | - Training and serving custom models at scale on 239 | [Vertex AI](https://cloud.google.com/vertex-ai?hl=en) 240 | 241 | ## Data Attribution 242 | 243 | The included demo notebook makes use of two public datasets provided by the Cancer Imaging Archive which is managed by the United States National Cancer Institute 244 | 245 | ### NLST Radiology CT Images CC BY 4.0 246 | [https://www.cancerimagingarchive.net/collection/nlst/](https://www.cancerimagingarchive.net/collection/nlst/) 247 | 248 | #### NLST Data Citation 249 | National Lung Screening Trial Research Team. (2013). Data from the National Lung Screening Trial (NLST) [Data set]. The Cancer Imaging Archive. https://doi.org/10.7937/TCIA.HMQ8-J677 250 | ### LIDC-IDRI Data Access CC BY 3.0 251 | https://www.cancerimagingarchive.net/collection/lidc-idri/ 252 | 253 | #### LIDC-IDRI Data Citation 254 | 255 | Armato III, S. G., McLennan, G., Bidaut, L., McNitt-Gray, M. F., Meyer, C. R., Reeves, A. P., Zhao, B., Aberle, D. R., Henschke, C. I., Hoffman, E. A., Kazerooni, E. A., MacMahon, H., Van Beek, E. J. R., Yankelevitz, D., Biancardi, A. M., Bland, P. H., Brown, M. S., Engelmann, R. M., Laderach, G. E., Max, D., Pais, R. C. , Qing, D. P. Y. , Roberts, R. Y., Smith, A. R., Starkey, A., Batra, P., Caligiuri, P., Farooqi, A., Gladish, G. W., Jude, C. M., Munden, R. F., Petkovska, I., Quint, L. E., Schwartz, L. H., Sundaram, B., Dodd, L. E., Fenimore, C., Gur, D., Petrick, N., Freymann, J., Kirby, J., Hughes, B., Casteele, A. V., Gupte, S., Sallam, M., Heath, M. D., Kuhn, M. H., Dharaiya, E., Burns, R., Fryd, D. S., Salganicoff, M., Anand, V., Shreter, U., Vastagh, S., Croft, B. Y., Clarke, L. P. (2015). Data From LIDC-IDRI [Data set]. The Cancer Imaging Archive. https://doi.org/10.7937/K9/TCIA.2015.LO9QL9SX 256 | 257 | 258 | # Model Card for CT Foundation 259 | 260 | This section briefly overviews the background and limitations of CT Foundation. 261 | 262 | ## Model Details 263 | ### Overview 264 | 265 | CT Foundation produces embeddings of size 1408 from a CT volume. Embeddings are 266 | n-dimensional vectors of floating points representing a projection of the 267 | original image into a compressed feature space capable of describing image 268 | features relevant to CT image analysis. The model is based on the 269 | [Video CoCa architecture](https://arxiv.org/abs/2212.04979). CT Foundation was 270 | trained in two stages. 271 | 272 | * Firstly train a medical image–specific 2D CoCa model 273 | * Use this 2D model as a basis for VideoCoCa. Training on specifically 274 | prepared axial CT slices (series of CT slices in a scan) coupled with 275 | radiology reports. 276 | 277 | The resulting feature representations provided by CT Foundation offer robust 278 | input for downstream tasks in CT image analysis. Additional information on our 279 | evaluation tasks can be found in our 280 | [blog post](https://research.google/blog/taking-medical-imaging-embeddings-3d). 281 | 282 | ### Version 283 | 284 | ``` 285 | name: v1.0.0 286 | date: 2024-10-18 287 | ``` 288 | 289 | ### Owners 290 | 291 | ``` 292 | ct-foundation@google.com 293 | ``` 294 | 295 | ### License 296 | 297 | Research use only. Not suitable for product development. - See 298 | [CT Foundation - Additional Terms of Service](https://docs.google.com/forms/d/e/1FAIpQLSfkSBbCi5dOlJxuDB3t6biFEBIA9JL66A99YRZa8qR2Fn5mUA/viewform?resourcekey=0-vPNR0VQ-vibGDJ564j4mCA). 299 | 300 | ### Intended Use 301 | 302 | * CT Foundation can reduce the training data, compute, and technical expertise 303 | necessary to develop task-specific models based on Computed Tomography 304 | Scans. 305 | * Embeddings from the model can be used for a variety of user-defined 306 | downstream tasks across different CT studies of various body parts. Validation includes lung 307 | cancer identification within chest CTs, aortic aneurysm in abdominal CTs, 308 | hemorrhage within head CTs. The model can also be used to classify different 309 | CT studies by body part or image quality. 310 | 311 | ### Training Data 312 | 313 | A comprehensive private dataset comprising 527,078 CT studies with associated 314 | radiology reports from 430,772 patients was obtained from three major hospital 315 | regions in the United States. 316 | 317 | ### Validation 318 | 319 | Evaluation was conducted across a diverse set of 7 benchmarking tests using 320 | non-linear 321 | [multilayer perceptrons](https://en.wikipedia.org/wiki/Multilayer_perceptron). 322 | These tasks were related to classifying: intracranial hemorrhage, calcifications 323 | in the chest and heart, lung cancer prediction in the chest, suspicious 324 | abdominal lesions, urolithiasis, and abdominal aortic aneurysm in abdominopelvic 325 | CTs. Results can be found in our 326 | [blog post](https://research.google/blog/taking-medical-imaging-embeddings-3d). 327 | 328 | ### Risks 329 | 330 | Although Google does not store any data sent to this model, it is the data 331 | owner's responsibility to ensure that Personally identifiable information (PII) 332 | and Protected Health Information (PHI) are removed prior to being sent to the 333 | model. Mitigation Strategy: Do not send data containing PII or PHI. Training 334 | dataset is a de-identified public dataset and CT imaging (pixel data) does not 335 | contain PHI. 336 | 337 | ### Limitations 338 | 339 | This is a research model and is intended for research purposes only. It has not 340 | been extensively validated across different scanner manufacturers. As with any 341 | research, developers should ensure any downstream application is validated to 342 | understand performance using data that is appropriately representative of the 343 | intended use setting (e.g., age, sex, gender, condition, scanner, etc.). 344 | -------------------------------------------------------------------------------- /cxr-foundation/MIMIC_Embeddings_Demo.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "attachments": {}, 5 | "cell_type": "markdown", 6 | "metadata": {}, 7 | "source": [ 8 | "\n", 9 | " \n", 12 | " \n", 15 | "
\n", 10 | " Run in Google Colab\n", 11 | " \n", 13 | " View source on GitHub\n", 14 | "
" 16 | ] 17 | }, 18 | { 19 | "attachments": {}, 20 | "cell_type": "markdown", 21 | "metadata": {}, 22 | "source": [ 23 | "# MIMIC CXR Embeddings Demo\n", 24 | "\n", 25 | "## Overview\n", 26 | "\n", 27 | "This notebook demonstrates how to train a simple neural network for a supervised classification task, using a set of Chest X-ray image embeddings.\n", 28 | "\n", 29 | "The datasets leveraged in this notebook are both derived from the [MIMIC-CXR Dataset](https://physionet.org/content/mimic-cxr/2.0.0/), which contains over 300,000 DICOMs and radiology reports:\n", 30 | "1. [The MIMIC-CXR JPG Dataset](https://physionet.org/content/mimic-cxr-jpg/2.0.0/) - contains JPG files derived from the DICOM images and structured labels derived from the free-text reports.\n", 31 | "2. [The MIMIC-CXR Image Embeddings Dataset](https://physionet.org/content/image-embeddings-mimic-cxr/1.0/) - which was generated from MIMIC-CXR using the Google Health [CXR Foundation tool](https://github.com/Google-Health/imaging-research/blob/master/cxr-foundation/README.md).\n", 32 | "\n", 33 | "## Prerequisites\n", 34 | "\n", 35 | "1. **Data access** - the MIMIC datasets are access-controlled. Follow the instructions on the [files](https://physionet.org/content/image-embeddings-mimic-cxr/1.0/#files) section to get access to the data. Overall, you must:\n", 36 | " - Be a credentialled PhysioNet user\n", 37 | " - Complete the appropriate institutional research training and get it verified by PhysioNet\n", 38 | " - Ensure the email you use to access Google Cloud is [selected](https://physionet.org/settings/cloud/) in your PhysioNet profile.\n", 39 | " - Sign the data use agreement for each dataset\n", 40 | " - Request access to the dataset's GCS bucket\n", 41 | "2. **Billing** - this notebook downloads data directly from PhysioNet's GCS buckets, which are set to [requester pays](https://cloud.google.com/storage/docs/requester-pays). Therefore you must have a Google Cloud project with an associated billing account. (The download cost in this notebook should be < $1)\n", 42 | "\n", 43 | "Note: PhysioNet hosts its data on its on-prem servers, which can be downloaded free of charge. Some of its databases are copied onto GCS buckets, which have much faster download speeds." 44 | ] 45 | }, 46 | { 47 | "attachments": {}, 48 | "cell_type": "markdown", 49 | "metadata": {}, 50 | "source": [ 51 | "# Installation\n", 52 | "\n", 53 | "Install the CXR Foundation package" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": null, 59 | "metadata": {}, 60 | "outputs": [], 61 | "source": [ 62 | "!git clone https://github.com/Google-Health/imaging-research.git\n", 63 | "!pip install imaging-research/cxr-foundation/\n", 64 | "\n", 65 | "# Notebook specific dependencies\n", 66 | "!pip install tf-models-official>=2.13.0 google-cloud-storage" 67 | ] 68 | }, 69 | { 70 | "attachments": {}, 71 | "cell_type": "markdown", 72 | "metadata": {}, 73 | "source": [ 74 | "**IMPORTANT**: If you are using Colab, you must restart the runtime after installing new packages.\n", 75 | "\n", 76 | "NOTE: There will be some ERROR messages due to the protobuf library - this is normal." 77 | ] 78 | }, 79 | { 80 | "attachments": {}, 81 | "cell_type": "markdown", 82 | "metadata": {}, 83 | "source": [ 84 | "# Authenticate to Access Data\n", 85 | "\n", 86 | "The following cell is for Colab only. If running elsewhere, authenticate with the [gcloud CLI](https://cloud.google.com/sdk/gcloud/reference/auth/login)." 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": null, 92 | "metadata": {}, 93 | "outputs": [], 94 | "source": [ 95 | "from google.colab import auth\n", 96 | "\n", 97 | "# Authenticate user for access. There will be a popup asking you to sign in with your user and approve access.\n", 98 | "auth.authenticate_user()" 99 | ] 100 | }, 101 | { 102 | "attachments": {}, 103 | "cell_type": "markdown", 104 | "metadata": {}, 105 | "source": [ 106 | "# Download and Process Metadata" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": 1, 112 | "metadata": {}, 113 | "outputs": [], 114 | "source": [ 115 | "import os\n", 116 | "\n", 117 | "from google.cloud import storage\n", 118 | "from google.cloud.storage import Bucket\n", 119 | "import pandas as pd\n", 120 | "\n", 121 | "from cxr_foundation.mimic import parse_embedding_file_pattern\n", 122 | "\n", 123 | "\n", 124 | "def download_blob(bucket: Bucket, source_blob_name: str, destination_file_name: str, print_name : str = None):\n", 125 | " \"\"\"\n", 126 | " Downloads a blob from the bucket.\n", 127 | "\n", 128 | " https://cloud.google.com/storage/docs/downloading-objects\n", 129 | "\n", 130 | " Params:\n", 131 | " print_name : Print the file name when downloaded. Options: \"source\" or \"dest\" or None.\n", 132 | " \"\"\"\n", 133 | " blob = bucket.blob(source_blob_name)\n", 134 | " try:\n", 135 | " blob.download_to_filename(destination_file_name)\n", 136 | " except Exception as e:\n", 137 | " print('Error during download - do you have the right permissions?')\n", 138 | " print(e)\n", 139 | " return\n", 140 | "\n", 141 | " if print_name == \"source\":\n", 142 | " print(f\"Downloaded: {source_blob_name}\")\n", 143 | " elif print_name == \"dest\":\n", 144 | " print(f\"Downloaded: {destination_file_name}\")\n", 145 | "\n", 146 | "\n", 147 | "DATA_DIR = \"data\"\n", 148 | "EMBEDDINGS_DATA_DIR = os.path.abspath(os.path.join(DATA_DIR, \"mimic-embeddings-files\"))\n", 149 | "\n", 150 | "\n", 151 | "# Make a directory to download the data\n", 152 | "if not os.path.exists(DATA_DIR):\n", 153 | " os.mkdir(DATA_DIR)\n", 154 | "\n", 155 | "if not os.path.exists(EMBEDDINGS_DATA_DIR):\n", 156 | " os.mkdir(EMBEDDINGS_DATA_DIR)" 157 | ] 158 | }, 159 | { 160 | "attachments": {}, 161 | "cell_type": "markdown", 162 | "metadata": {}, 163 | "source": [ 164 | "## Embeddings Metadata\n", 165 | "\n", 166 | "Data source:\n", 167 | "- https://physionet.org/content/image-embeddings-mimic-cxr/1.0/\n", 168 | "- https://console.cloud.google.com/storage/browser/image-embeddings-mimic-cxr-1.0.physionet.org\n", 169 | "\n", 170 | "Download the checksums file which contains a list of the embeddings files. Extract the data components from the file names." 171 | ] 172 | }, 173 | { 174 | "cell_type": "code", 175 | "execution_count": null, 176 | "metadata": {}, 177 | "outputs": [], 178 | "source": [ 179 | "storage_client = storage.Client()\n", 180 | "\n", 181 | "embeddings_bucket = storage_client.bucket(\n", 182 | " 'image-embeddings-mimic-cxr-1.0.physionet.org') \n", 183 | "\n", 184 | "# Download the checksums file which contains a records list\n", 185 | "download_blob(embeddings_bucket, \"SHA256SUMS.txt\", \"data/SHA256SUMS.txt\")" 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": null, 191 | "metadata": {}, 192 | "outputs": [], 193 | "source": [ 194 | "df_embeddings = pd.read_csv(\"data/SHA256SUMS.txt\", delimiter=\" \", header=None, skiprows=[0]) # Skip the license file entry\n", 195 | "display(df_embeddings.head())" 196 | ] 197 | }, 198 | { 199 | "cell_type": "code", 200 | "execution_count": null, 201 | "metadata": {}, 202 | "outputs": [], 203 | "source": [ 204 | "SOURCE_COL_NAME = \"embeddings_file\" # Remote bucket embedding file location\n", 205 | "DL_COL_NAME = \"local_embeddings_file\" # Download file to this location\n", 206 | "\n", 207 | "# Create additional columns from file path components\n", 208 | "df_embeddings = df_embeddings[[1]]\n", 209 | "df_embeddings.rename(columns={1: \"embeddings_file\"}, inplace=True)\n", 210 | "df_embeddings[[\"subject_id\",\"study_id\", \"dicom_id\"]] = df_embeddings.apply(\n", 211 | " lambda x: parse_embedding_file_pattern(x[SOURCE_COL_NAME]), axis=1, result_type=\"expand\")\n", 212 | "df_embeddings[DL_COL_NAME] = df_embeddings[SOURCE_COL_NAME].apply(lambda x: os.path.join(EMBEDDINGS_DATA_DIR, os.path.basename(x))) # For download\n", 213 | "\n", 214 | "display(df_embeddings)" 215 | ] 216 | }, 217 | { 218 | "attachments": {}, 219 | "cell_type": "markdown", 220 | "metadata": {}, 221 | "source": [ 222 | "## CXR Metadata\n", 223 | "\n", 224 | "Data source:\n", 225 | "- https://physionet.org/content/mimic-cxr-jpg/2.0.0/\n", 226 | "- https://console.cloud.google.com/storage/browser/mimic-cxr-jpg-2.0.0.physionet.org\n", 227 | "\n", 228 | "Download and visualize three metadata files:\n", 229 | "1. `mimic-cxr-2.0.0-metadata.csv`: Meta-data derived from the original DICOM files\n", 230 | "2. `mimic-cxr-2.0.0-split.csv`: A reference dataset split for studies using MIMIC-CXR-JPG\n", 231 | "3. `mimic-cxr-2.0.0-chexpert.csv`: Lists all studies with labels generated by the CheXpert labeler.\n", 232 | "\n", 233 | "The first two files were used to generate the embeddings database. Embeddings files were only generated for the frontal view CXRs, so there are fewer embeddings files than there are original DICOMs/JPGs.\n" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": null, 239 | "metadata": {}, 240 | "outputs": [], 241 | "source": [ 242 | "cxr_jpg_bucket = storage_client.bucket(\n", 243 | " 'mimic-cxr-jpg-2.0.0.physionet.org')\n", 244 | "\n", 245 | "CXR_JPG_METADATA_FILES = (\n", 246 | " \"mimic-cxr-2.0.0-metadata.csv.gz\",\n", 247 | " \"mimic-cxr-2.0.0-split.csv.gz\",\n", 248 | " \"mimic-cxr-2.0.0-chexpert.csv.gz\")\n", 249 | "\n", 250 | "for fname in CXR_JPG_METADATA_FILES:\n", 251 | " download_blob(cxr_jpg_bucket, fname, f\"{DATA_DIR}/{fname}\")" 252 | ] 253 | }, 254 | { 255 | "cell_type": "code", 256 | "execution_count": null, 257 | "metadata": {}, 258 | "outputs": [], 259 | "source": [ 260 | "CXR_JPG_METADATA_FILES = (\n", 261 | " \"mimic-cxr-2.0.0-metadata.csv.gz\",\n", 262 | " \"mimic-cxr-2.0.0-split.csv.gz\",\n", 263 | " \"mimic-cxr-2.0.0-chexpert.csv.gz\")\n", 264 | "\n", 265 | "df_metadata = pd.read_csv(f\"data/{CXR_JPG_METADATA_FILES[0]}\", compression=\"gzip\")\n", 266 | "df_split = pd.read_csv(f\"data/{CXR_JPG_METADATA_FILES[1]}\", compression=\"gzip\")\n", 267 | "df_labels_chexpert = pd.read_csv(f\"data/{CXR_JPG_METADATA_FILES[2]}\", compression=\"gzip\")\n", 268 | "\n", 269 | "display(df_metadata.head())\n", 270 | "display(df_split.head())\n", 271 | "display(df_labels_chexpert.head())" 272 | ] 273 | }, 274 | { 275 | "attachments": {}, 276 | "cell_type": "markdown", 277 | "metadata": {}, 278 | "source": [ 279 | "## Create the full labels file\n", 280 | "\n", 281 | "Join embeddings list with Chexpert metadata files" 282 | ] 283 | }, 284 | { 285 | "cell_type": "code", 286 | "execution_count": null, 287 | "metadata": {}, 288 | "outputs": [], 289 | "source": [ 290 | "# Each study contains one or more DICOMs\n", 291 | "# Chexpert labels df does not contain DICOM ID. Must join on (subject_id + study_id)\n", 292 | "df_labels_all = df_split.merge(df_labels_chexpert, on=['subject_id', 'study_id'])\n", 293 | "df_labels_all = df_labels_all.merge(df_metadata, on=['dicom_id'])\n", 294 | "df_labels_all = df_embeddings.merge(df_labels_all, on=['dicom_id'], how='left')\n", 295 | "\n", 296 | "display(df_labels_all)" 297 | ] 298 | }, 299 | { 300 | "attachments": {}, 301 | "cell_type": "markdown", 302 | "metadata": {}, 303 | "source": [ 304 | "## Make Labels files for Individual Diagnoses" 305 | ] 306 | }, 307 | { 308 | "cell_type": "code", 309 | "execution_count": null, 310 | "metadata": {}, 311 | "outputs": [], 312 | "source": [ 313 | "# Dict of data frames for individual diagnoses\n", 314 | "diagnoses_dataframes = {}\n", 315 | "\n", 316 | "# Choose some of the Chexpert generated diagnoses\n", 317 | "for diagnosis in ('Atelectasis', 'Cardiomegaly', 'Consolidation', 'Edema', 'Fracture'):\n", 318 | " # Remove missing/unsure labels\n", 319 | " df_diagnosis = df_labels_all[df_labels_all[diagnosis].isin((0, 1))]\n", 320 | " # Only extract required columns for the ML model\n", 321 | " df_diagnosis = df_diagnosis[[diagnosis, SOURCE_COL_NAME, DL_COL_NAME, 'split']]\n", 322 | " \n", 323 | " diagnoses_dataframes[diagnosis] = df_diagnosis\n", 324 | " df_diagnosis.to_csv(f'data/{diagnosis}.csv', index=False)\n", 325 | " print(f\"Created {diagnosis}.csv with {len(df_diagnosis)} rows\")\n", 326 | " display(df_diagnosis.nunique())\n", 327 | " \n", 328 | " # Show label and split value distributions\n", 329 | " display(df_diagnosis[diagnosis].value_counts())\n", 330 | " display(df_diagnosis['split'].value_counts())\n", 331 | " print(\"\\n\")" 332 | ] 333 | }, 334 | { 335 | "attachments": {}, 336 | "cell_type": "markdown", 337 | "metadata": {}, 338 | "source": [ 339 | "# Download Embeddings Files for Model Training\n", 340 | "\n", 341 | "There are many labels for Cardiomegaly. We will train our model using the embeddings with this label." 342 | ] 343 | }, 344 | { 345 | "cell_type": "code", 346 | "execution_count": null, 347 | "metadata": {}, 348 | "outputs": [], 349 | "source": [ 350 | "DIAGNOSIS = 'Cardiomegaly'\n", 351 | "LABELS_CSV = f\"data/{DIAGNOSIS}.csv\"\n", 352 | "MAX_TRAINING_SAMPLES = 500\n", 353 | "MAX_VALIDATION_SAMPLES = 200\n", 354 | "\n", 355 | "df_diagnosis = pd.read_csv(LABELS_CSV)\n", 356 | "\n", 357 | "df_train = df_diagnosis[df_diagnosis[\"split\"] == \"train\"][:MAX_TRAINING_SAMPLES]\n", 358 | "df_validate = df_diagnosis[df_diagnosis[\"split\"] == \"validate\"][:MAX_VALIDATION_SAMPLES]\n", 359 | " \n", 360 | "\n", 361 | "display(df_train)\n", 362 | "display(df_validate)" 363 | ] 364 | }, 365 | { 366 | "cell_type": "code", 367 | "execution_count": null, 368 | "metadata": {}, 369 | "outputs": [], 370 | "source": [ 371 | "# Takes ~2m\n", 372 | "for i, row in df_train.iterrows():\n", 373 | " download_blob(embeddings_bucket, row[SOURCE_COL_NAME], row[DL_COL_NAME], print_name=\"dest\")\n", 374 | "\n", 375 | "for i, row in df_validate.iterrows():\n", 376 | " download_blob(embeddings_bucket, row[SOURCE_COL_NAME], row[DL_COL_NAME], print_name=\"dest\")" 377 | ] 378 | }, 379 | { 380 | "cell_type": "code", 381 | "execution_count": null, 382 | "metadata": {}, 383 | "outputs": [], 384 | "source": [ 385 | "# Inspect an embedding files. A single file is only 5.6kb\n", 386 | "from cxr_foundation import embeddings_data\n", 387 | "\n", 388 | "\n", 389 | "filename = df_train[DL_COL_NAME][0]\n", 390 | "\n", 391 | "# Read the tf.train.Example object from the first tfrecord file\n", 392 | "example = embeddings_data.read_tfrecord_example(filename)\n", 393 | "print(example)\n", 394 | "\n", 395 | "# If you don't care about the structure of the .tfrecord file, and/or if\n", 396 | "# you don't use Tensorflow, you can use the following function to read\n", 397 | "# the values directly into a numpy array.\n", 398 | "values = embeddings_data.read_tfrecord_values(filename)\n", 399 | "print(values)" 400 | ] 401 | }, 402 | { 403 | "attachments": {}, 404 | "cell_type": "markdown", 405 | "metadata": {}, 406 | "source": [ 407 | "# Create and Train Model\n" 408 | ] 409 | }, 410 | { 411 | "cell_type": "code", 412 | "execution_count": null, 413 | "metadata": {}, 414 | "outputs": [], 415 | "source": [ 416 | "import tensorflow as tf\n", 417 | "import tensorflow_models as tfm\n", 418 | "\n", 419 | "\n", 420 | "def create_model(heads,\n", 421 | " embeddings_size=1376,\n", 422 | " learning_rate=0.1,\n", 423 | " end_lr_factor=1.0,\n", 424 | " dropout=0.0,\n", 425 | " decay_steps=1000,\n", 426 | " loss_weights=None,\n", 427 | " hidden_layer_sizes=[512, 256],\n", 428 | " weight_decay=0.0,\n", 429 | " seed=None) -> tf.keras.Model:\n", 430 | " \"\"\"\n", 431 | " Creates linear probe or multilayer perceptron using LARS + cosine decay.\n", 432 | "\n", 433 | " \"\"\"\n", 434 | " inputs = tf.keras.Input(shape=(embeddings_size,))\n", 435 | " hidden = inputs\n", 436 | " # If no hidden_layer_sizes are provided, model will be a linear probe.\n", 437 | " for size in hidden_layer_sizes:\n", 438 | " hidden = tf.keras.layers.Dense(\n", 439 | " size,\n", 440 | " activation='relu',\n", 441 | " kernel_initializer=tf.keras.initializers.HeUniform(seed=seed),\n", 442 | " kernel_regularizer=tf.keras.regularizers.l2(l2=weight_decay),\n", 443 | " bias_regularizer=tf.keras.regularizers.l2(l2=weight_decay))(\n", 444 | " hidden)\n", 445 | " hidden = tf.keras.layers.BatchNormalization()(hidden)\n", 446 | " hidden = tf.keras.layers.Dropout(dropout, seed=seed)(hidden)\n", 447 | " output = tf.keras.layers.Dense(\n", 448 | " units=len(heads),\n", 449 | " activation='sigmoid',\n", 450 | " kernel_initializer=tf.keras.initializers.HeUniform(seed=seed))(\n", 451 | " hidden)\n", 452 | "\n", 453 | " outputs = {}\n", 454 | " for i, head in enumerate(heads):\n", 455 | " outputs[head] = tf.keras.layers.Lambda(\n", 456 | " lambda x: x[..., i:i + 1], name=head.lower())(\n", 457 | " output)\n", 458 | "\n", 459 | " model = tf.keras.Model(inputs, outputs)\n", 460 | "\n", 461 | " learning_rate_fn = tf.keras.experimental.CosineDecay(\n", 462 | " tf.cast(learning_rate, tf.float32),\n", 463 | " tf.cast(decay_steps, tf.float32),\n", 464 | " alpha=tf.cast(end_lr_factor, tf.float32))\n", 465 | " \n", 466 | " model.compile(\n", 467 | " optimizer=tfm.optimization.lars.LARS(\n", 468 | " learning_rate=learning_rate_fn),\n", 469 | " loss=dict([(head, 'binary_crossentropy') for head in heads]),\n", 470 | " loss_weights=loss_weights or dict([(head, 1.) for head in heads]),\n", 471 | " weighted_metrics=['AUC'])\n", 472 | " return model" 473 | ] 474 | }, 475 | { 476 | "cell_type": "code", 477 | "execution_count": null, 478 | "metadata": {}, 479 | "outputs": [], 480 | "source": [ 481 | "# Create training and validation Datasets\n", 482 | "training_data = embeddings_data.get_dataset(filenames=df_train[DL_COL_NAME].values,\n", 483 | " labels=df_train[DIAGNOSIS].values)\n", 484 | "\n", 485 | "\n", 486 | "validation_data = embeddings_data.get_dataset(filenames=df_validate[DL_COL_NAME].values,\n", 487 | " labels=df_validate[DIAGNOSIS].values)\n", 488 | "\n", 489 | "# Create and train the model\n", 490 | "model = create_model([DIAGNOSIS])\n", 491 | "\n", 492 | "model.fit(\n", 493 | " x=training_data.batch(512).prefetch(tf.data.AUTOTUNE).cache(),\n", 494 | " validation_data=validation_data.batch(1).cache(),\n", 495 | " epochs=20,\n", 496 | ")" 497 | ] 498 | }, 499 | { 500 | "cell_type": "code", 501 | "execution_count": null, 502 | "metadata": {}, 503 | "outputs": [], 504 | "source": [ 505 | "model.summary()" 506 | ] 507 | }, 508 | { 509 | "cell_type": "code", 510 | "execution_count": null, 511 | "metadata": {}, 512 | "outputs": [], 513 | "source": [ 514 | "# Optional: serialize model for later use\n", 515 | "# model.save(\"embeddings_model\", include_optimizer=False)" 516 | ] 517 | } 518 | ], 519 | "metadata": { 520 | "kernelspec": { 521 | "display_name": "cxr", 522 | "language": "python", 523 | "name": "python3" 524 | }, 525 | "language_info": { 526 | "codemirror_mode": { 527 | "name": "ipython", 528 | "version": 3 529 | }, 530 | "file_extension": ".py", 531 | "mimetype": "text/x-python", 532 | "name": "python", 533 | "nbconvert_exporter": "python", 534 | "pygments_lexer": "ipython3", 535 | "version": "3.9.16" 536 | }, 537 | "orig_nbformat": 4, 538 | "vscode": { 539 | "interpreter": { 540 | "hash": "d3ac608b8f9188be2227ae82298dfd5de684cbdc4496f362d4b3b9040509447c" 541 | } 542 | } 543 | }, 544 | "nbformat": 4, 545 | "nbformat_minor": 2 546 | } 547 | --------------------------------------------------------------------------------