├── cxr-foundation
    ├── cxr_foundation
    │   ├── __init__.py
    │   ├── testdata
    │   │   ├── fake.dcm
    │   │   ├── expected.png
    │   │   ├── random.png
    │   │   ├── 00000001_000.tfrecord
    │   │   ├── 00000001_001.tfrecord
    │   │   ├── 00000001_002.tfrecord
    │   │   ├── 00000002_000.tfrecord
    │   │   └── 00000003_000.tfrecord
    │   ├── mimic.py
    │   ├── example_generator_lib_test.py
    │   ├── constants.py
    │   ├── embeddings_data.py
    │   ├── example_generator_lib.py
    │   └── inference.py
    ├── logo.png
    ├── cxr_foundation_interactive_demo_deps
    │   ├── fullscreen.gif
    │   ├── index.html
    │   └── cxr.css
    ├── Dockerfile
    ├── build_pip_package.sh
    ├── CONTRIBUTING.md
    ├── setup.py
    ├── CXR_Foundation_Interactive_Demo.ipynb
    ├── README.md
    ├── LICENSE
    └── MIMIC_Embeddings_Demo.ipynb
├── ct-foundation
    ├── API_specification.md
    ├── img
    │   ├── create-group.png
    │   └── create-identity.png
    ├── CONTRIBUTING.md
    ├── LICENSE
    └── README.md
├── wet-amd-prediction
    ├── CONTRIBUTING
    ├── README
    ├── LICENSE
    └── ex_amd_model.py
├── derm-foundation
    ├── create-group.png
    ├── create-identity.png
    ├── CONTRIBUTING.md
    ├── LICENSE
    └── README.md
├── path-foundation
    ├── img
    │   ├── create-group.png
    │   └── create-identity.png
    ├── CONTRIBUTING.md
    ├── LICENSE
    └── README.md
├── README.md
└── .gitignore


/cxr-foundation/cxr_foundation/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/ct-foundation/API_specification.md:
--------------------------------------------------------------------------------
1 | # API Specification
2 | 


--------------------------------------------------------------------------------
/wet-amd-prediction/CONTRIBUTING:
--------------------------------------------------------------------------------
1 | We are not accepting contributions for this project.
2 | 
3 | 


--------------------------------------------------------------------------------
/cxr-foundation/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Google-Health/imaging-research/HEAD/cxr-foundation/logo.png


--------------------------------------------------------------------------------
/derm-foundation/create-group.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Google-Health/imaging-research/HEAD/derm-foundation/create-group.png


--------------------------------------------------------------------------------
/ct-foundation/img/create-group.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Google-Health/imaging-research/HEAD/ct-foundation/img/create-group.png


--------------------------------------------------------------------------------
/derm-foundation/create-identity.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Google-Health/imaging-research/HEAD/derm-foundation/create-identity.png


--------------------------------------------------------------------------------
/path-foundation/img/create-group.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Google-Health/imaging-research/HEAD/path-foundation/img/create-group.png


--------------------------------------------------------------------------------
/ct-foundation/img/create-identity.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Google-Health/imaging-research/HEAD/ct-foundation/img/create-identity.png


--------------------------------------------------------------------------------
/path-foundation/img/create-identity.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Google-Health/imaging-research/HEAD/path-foundation/img/create-identity.png


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | This repo includes imaging research code shared by Google Health. 
2 | Please see the README and LICENSE files in each project for more information.
3 | 


--------------------------------------------------------------------------------
/cxr-foundation/cxr_foundation/testdata/fake.dcm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Google-Health/imaging-research/HEAD/cxr-foundation/cxr_foundation/testdata/fake.dcm


--------------------------------------------------------------------------------
/cxr-foundation/cxr_foundation/testdata/expected.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Google-Health/imaging-research/HEAD/cxr-foundation/cxr_foundation/testdata/expected.png


--------------------------------------------------------------------------------
/cxr-foundation/cxr_foundation/testdata/random.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Google-Health/imaging-research/HEAD/cxr-foundation/cxr_foundation/testdata/random.png


--------------------------------------------------------------------------------
/cxr-foundation/cxr_foundation/testdata/00000001_000.tfrecord:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Google-Health/imaging-research/HEAD/cxr-foundation/cxr_foundation/testdata/00000001_000.tfrecord


--------------------------------------------------------------------------------
/cxr-foundation/cxr_foundation/testdata/00000001_001.tfrecord:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Google-Health/imaging-research/HEAD/cxr-foundation/cxr_foundation/testdata/00000001_001.tfrecord


--------------------------------------------------------------------------------
/cxr-foundation/cxr_foundation/testdata/00000001_002.tfrecord:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Google-Health/imaging-research/HEAD/cxr-foundation/cxr_foundation/testdata/00000001_002.tfrecord


--------------------------------------------------------------------------------
/cxr-foundation/cxr_foundation/testdata/00000002_000.tfrecord:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Google-Health/imaging-research/HEAD/cxr-foundation/cxr_foundation/testdata/00000002_000.tfrecord


--------------------------------------------------------------------------------
/cxr-foundation/cxr_foundation/testdata/00000003_000.tfrecord:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Google-Health/imaging-research/HEAD/cxr-foundation/cxr_foundation/testdata/00000003_000.tfrecord


--------------------------------------------------------------------------------
/cxr-foundation/cxr_foundation_interactive_demo_deps/fullscreen.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Google-Health/imaging-research/HEAD/cxr-foundation/cxr_foundation_interactive_demo_deps/fullscreen.gif


--------------------------------------------------------------------------------
/cxr-foundation/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM apache/beam_python3.7_sdk:2.35.0
2 | 
3 | COPY . .
4 | 
5 | RUN pip install cxr-foundation
6 | 
7 | # Set the entrypoint to Apache Beam SDK launcher.
8 | ENTRYPOINT ["/opt/apache/beam/boot"]


--------------------------------------------------------------------------------
/cxr-foundation/build_pip_package.sh:
--------------------------------------------------------------------------------
 1 | python -m venv .env
 2 | source .env/bin/activate
 3 | 
 4 | pip install --upgrade pip
 5 | 
 6 | pip install --upgrade twine
 7 | python setup.py sdist
 8 | python setup.py bdist_wheel
 9 | python -m twine check dist/*
10 | 
11 | python -m twine upload dist/*
12 | 


--------------------------------------------------------------------------------
/wet-amd-prediction/README:
--------------------------------------------------------------------------------
1 | # Wet AMD prediction network architecture code
2 | 
3 | This repo contains an implementation of the exAMD prediction network described in Yim J. et al. (2020) "Predicting conversion to wet age related macular
4 | degeneration using deep learning", Nature Medicine doi:10.1038/s41591-020-0867-7.
5 | 
6 | Note this code illustrates only the network architecture and does not contain any data or training code.
7 | 
8 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # C extensions
 7 | *.so
 8 | 
 9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | share/python-wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 | MANIFEST
28 | 
29 | # Jupyter Notebook
30 | .ipynb_checkpoints
31 | 


--------------------------------------------------------------------------------
/cxr-foundation/cxr_foundation/mimic.py:
--------------------------------------------------------------------------------
 1 | """Module for managing/parsing MIMIC data files"""
 2 | 
 3 | import re
 4 | 
 5 | 
 6 | # Example: 'files/p19/p19692222/s59566639/965b6053-a2c70d67-c0467ca6-02372346-fb7c6224.tfrecord'
 7 | FILE_PATTERN = re.compile(
 8 |     r"files/(?:\w+)/p(?P<subject_id>\w+)/s(?P<study_id>\w+)/(?P<dicom_id>[\w-]+)\.tfrecord"
 9 | )
10 | 
11 | 
12 | def parse_embedding_file_pattern(file_path: str):
13 |   """Extracts the subject_id, study_id, and dicom_id
14 | 
15 |   from the full file path string of a MIMIC CXR Embedding file:
16 | 
17 |   https://physionet.org/content/image-embeddings-mimic-cxr/
18 | 
19 |   Example input:
20 |   files/p19/p19692222/s59566639/965b6053-a2c70d67-c0467ca6-02372346-fb7c6224.tfrecord
21 |   """
22 |   match = FILE_PATTERN.fullmatch(file_path)
23 |   if not match:
24 |     raise Exception(f"Failed to match file path: {file_path}")
25 |   return (int(match[1]), int(match[2]), match[3])
26 | 


--------------------------------------------------------------------------------
/derm-foundation/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # How to Contribute
 2 | 
 3 | We'd love to accept your patches and contributions to this project. There are
 4 | just a few small guidelines you need to follow.
 5 | 
 6 | ## Contributor License Agreement
 7 | 
 8 | Contributions to this project must be accompanied by a Contributor License
 9 | Agreement. You (or your employer) retain the copyright to your contribution;
10 | this simply gives us permission to use and redistribute your contributions as
11 | part of the project. Head over to <https://cla.developers.google.com/> to see
12 | your current agreements on file or to sign a new one.
13 | 
14 | You generally only need to submit a CLA once, so if you've already submitted one
15 | (even if it was for a different project), you probably don't need to do it
16 | again.
17 | 
18 | ## Code Reviews
19 | 
20 | All submissions, including submissions by project members, require review. We
21 | use GitHub pull requests for this purpose. Consult
22 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
23 | information on using pull requests.
24 | 
25 | ## Community Guidelines
26 | 
27 | This project follows [Google's Open Source Community
28 | Guidelines](https://opensource.google/conduct/).


--------------------------------------------------------------------------------
/cxr-foundation/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # How to Contribute
 2 | 
 3 | We'd love to accept your patches and contributions to this project. There are
 4 | just a few small guidelines you need to follow.
 5 | 
 6 | ## Contributor License Agreement
 7 | 
 8 | Contributions to this project must be accompanied by a Contributor License
 9 | Agreement. You (or your employer) retain the copyright to your contribution;
10 | this simply gives us permission to use and redistribute your contributions as
11 | part of the project. Head over to <https://cla.developers.google.com/> to see
12 | your current agreements on file or to sign a new one.
13 | 
14 | You generally only need to submit a CLA once, so if you've already submitted one
15 | (even if it was for a different project), you probably don't need to do it
16 | again.
17 | 
18 | ## Code Reviews
19 | 
20 | All submissions, including submissions by project members, require review. We
21 | use GitHub pull requests for this purpose. Consult
22 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
23 | information on using pull requests.
24 | 
25 | ## Community Guidelines
26 | 
27 | This project follows [Google's Open Source Community
28 | Guidelines](https://opensource.google/conduct/).
29 | 


--------------------------------------------------------------------------------
/cxr-foundation/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | #
 3 | # Copyright 2022 Google LLC
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #      http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | import setuptools
17 | 
18 | setuptools.setup(
19 |     name='cxr-foundation',
20 |     version='1.0.0',
21 |     description='CXR Foundation: chest x-ray embeddings generation.',
22 |     install_requires=[
23 |         'google-api-python-client',
24 |         'google-apitools',
25 |         'google-cloud-aiplatform',
26 |         'pandas',
27 |         'tensorflow >= 2.13.0',
28 |         'pillow',
29 |         'pypng',
30 |         'pydicom',
31 |         'typing-extensions',
32 |         'tensorflow_text',
33 |         'tensorflow-hub',
34 |     ],
35 |     packages=setuptools.find_packages(),
36 | )
37 | 


--------------------------------------------------------------------------------
/ct-foundation/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # How to Contribute
 2 | 
 3 | We'd love to accept your patches and contributions to this project.
 4 | 
 5 | ## Before you begin
 6 | 
 7 | ### Sign our Contributor License Agreement
 8 | 
 9 | Contributions to this project must be accompanied by a
10 | [Contributor License Agreement](https://cla.developers.google.com/about) (CLA).
11 | You (or your employer) retain the copyright to your contribution; this simply
12 | gives us permission to use and redistribute your contributions as part of the
13 | project.
14 | 
15 | If you or your current employer have already signed the Google CLA (even if it
16 | was for a different project), you probably don't need to do it again.
17 | 
18 | Visit <https://cla.developers.google.com/> to see your current agreements or to
19 | sign a new one.
20 | 
21 | ### Review our Community Guidelines
22 | 
23 | This project follows [Google's Open Source Community
24 | Guidelines](https://opensource.google/conduct/).
25 | 
26 | ## Contribution process
27 | 
28 | ### Code Reviews
29 | 
30 | All submissions, including submissions by project members, require review. We
31 | use GitHub pull requests for this purpose. Consult
32 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
33 | information on using pull requests.


--------------------------------------------------------------------------------
/path-foundation/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # How to Contribute
 2 | 
 3 | We'd love to accept your patches and contributions to this project.
 4 | 
 5 | ## Before you begin
 6 | 
 7 | ### Sign our Contributor License Agreement
 8 | 
 9 | Contributions to this project must be accompanied by a
10 | [Contributor License Agreement](https://cla.developers.google.com/about) (CLA).
11 | You (or your employer) retain the copyright to your contribution; this simply
12 | gives us permission to use and redistribute your contributions as part of the
13 | project.
14 | 
15 | If you or your current employer have already signed the Google CLA (even if it
16 | was for a different project), you probably don't need to do it again.
17 | 
18 | Visit <https://cla.developers.google.com/> to see your current agreements or to
19 | sign a new one.
20 | 
21 | ### Review our Community Guidelines
22 | 
23 | This project follows [Google's Open Source Community
24 | Guidelines](https://opensource.google/conduct/).
25 | 
26 | ## Contribution process
27 | 
28 | ### Code Reviews
29 | 
30 | All submissions, including submissions by project members, require review. We
31 | use GitHub pull requests for this purpose. Consult
32 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
33 | information on using pull requests.
34 | 


--------------------------------------------------------------------------------
/cxr-foundation/cxr_foundation/example_generator_lib_test.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | #
 3 | # Copyright 2022 Google LLC
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #      http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | """Tests for example_generator_lib."""
17 | import unittest
18 | import example_generator_lib
19 | import numpy as np
20 | import pydicom
21 | 
22 | 
23 | class ExampleGeneratorLibTest(unittest.TestCase):
24 | 
25 |   def test_create_example(self):
26 |     """Test the creation of examples."""
27 |     # This is a DICOM with a grayscale fake image.
28 |     dicom_path = './testdata/fake.dcm'
29 |     dicom = pydicom.dcmread(dicom_path)
30 | 
31 |     test_example = example_generator_lib.dicom_to_tfexample(dicom)
32 |     f_dict = test_example.features.feature
33 |     self.assertEqual(f_dict['image/format'].bytes_list.value[:], [b'png'])
34 |     self.assertEqual(len(f_dict['image/encoded'].bytes_list.value[0]), 23287)
35 | 
36 | 
37 | if __name__ == '__main__':
38 |   unittest.main()
39 | 


--------------------------------------------------------------------------------
/wet-amd-prediction/LICENSE:
--------------------------------------------------------------------------------
 1 | LIMITED LICENSE:
 2 | 
 3 | Copyright (c) 2020 Google LLC
 4 | Limited License: Under no circumstance is commercial use, reproduction, or
 5 | distribution permitted. Use, reproduction, and distribution are permitted
 6 | solely for academic use in evaluating and reviewing claims made in
 7 | Yim J. et al. (2020) "Predicting conversion to wet age related macular
 8 | degeneration using deep learning", Nature Medicine doi:10.1038/s41591-020-0867-7, 
 9 | provided that the following conditions are met:
10 | 
11 | * Any reproduction or distribution of source code must retain the above
12 | copyright notice and the full text of this license including the Disclaimer,
13 | below. 
14 | 
15 | * Any reproduction or distribution in binary form must reproduce the above
16 | copyright notice and the full text of this license including the Disclaimer
17 | below in the documentation and/or other materials provided with the
18 | Distribution.
19 | 
20 | * Any publication that discloses findings arising from using this source 
21 | code must cite Yim J. et al. (2020) "Predicting conversion to wet age
22 | related macular degeneration using deep learning", Nature Medicine 
23 | doi:10.1038/s41591-020-0867-7.
24 | 
25 | DISCLAIMER
26 | 
27 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
30 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
32 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
33 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
34 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
35 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
36 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
37 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 | 
39 | 


--------------------------------------------------------------------------------
/cxr-foundation/cxr_foundation/constants.py:
--------------------------------------------------------------------------------
 1 | # /usr/bin/python
 2 | #
 3 | # Copyright 2023 Google LLC
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #      http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | import dataclasses
17 | 
18 | # GCP project ID of the embeddings endpoint.
19 | _EMBEDDINGS_PROJECT_NAME = 'gh-rad-validation-cxrembd-deid'
20 | # Vertex was initially offered only in "us-central1".
21 | _LOCATION = 'us-central1'
22 | 
23 | # ID of the V1 embeddings endpoint.
24 | _V1_ENDPOINT_ID = 8327848403333545984
25 | # IDs of the V2 embeddings endpoints.
26 | _V2_C_ENDPOINT_ID = 3169498397715464192
27 | _V2_B_ENDPOINT_ID = 2156540325277990912
28 | 
29 | 
30 | @dataclasses.dataclass(frozen=True)
31 | class VertexEndpoint:
32 |   """Vertex AI end-point configuration.
33 | 
34 |   Fields
35 |   ------
36 |   project_name
37 |     Name of the GCP Project which hosts the model.
38 |   endpoint_location
39 |     GCP Location (Zone) where the Vertex AI end-point is hosted.
40 |   endpoint_id
41 |     Numeric ID for the hosted end-point.
42 |   """
43 | 
44 |   project_name: str
45 |   endpoint_location: str
46 |   endpoint_id: int
47 | 
48 | 
49 | # End-point configuration for the CXR Foundation V1 model.
50 | ENDPOINT_V1 = VertexEndpoint(
51 |     project_name=_EMBEDDINGS_PROJECT_NAME,
52 |     endpoint_location=_LOCATION,
53 |     endpoint_id=_V1_ENDPOINT_ID,
54 | )
55 | 
56 | 
57 | # End-point configurations for the CXR Foundation V2 model.
58 | ENDPOINT_V2_C = VertexEndpoint(
59 |     project_name=_EMBEDDINGS_PROJECT_NAME,
60 |     endpoint_location=_LOCATION,
61 |     endpoint_id=_V2_C_ENDPOINT_ID,
62 | )
63 | ENDPOINT_V2_B = VertexEndpoint(
64 |     project_name=_EMBEDDINGS_PROJECT_NAME,
65 |     endpoint_location=_LOCATION,
66 |     endpoint_id=_V2_B_ENDPOINT_ID,
67 | )
68 | 
69 | 
70 | GCS_PREFIX = 'gs://'
71 | 
72 | # tf.Example feature keys required by the embeddings service
73 | IMAGE_KEY = 'image/encoded'
74 | IMAGE_FORMAT_KEY = 'image/format'
75 | 
76 | # Key for this library to serialize generated embeddings
77 | EMBEDDING_KEY = 'embedding'
78 | 
79 | # The size of the float vector embeddings generated by the CXR foundation API
80 | DEFAULT_EMBEDDINGS_SIZE = 1376
81 | 


--------------------------------------------------------------------------------
/cxr-foundation/CXR_Foundation_Interactive_Demo.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |   "nbformat": 4,
 3 |   "nbformat_minor": 0,
 4 |   "metadata": {
 5 |     "colab": {
 6 |       "provenance": [],
 7 |       "private_outputs": true,
 8 |       "cell_execution_strategy": "setup"
 9 |     },
10 |     "kernelspec": {
11 |       "name": "python3",
12 |       "display_name": "Python 3"
13 |     },
14 |     "language_info": {
15 |       "name": "python"
16 |     }
17 |   },
18 |   "cells": [
19 |     {
20 |       "cell_type": "markdown",
21 |       "source": [
22 |         "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
23 |         "  <td>\n",
24 |         "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/Google-Health/imaging-research/blob/master/cxr-foundation/CXR_Foundation_Interactive_Demo.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
25 |         "  </td>\n",
26 |         "  <td>\n",
27 |         "    <a target=\"_blank\" href=\"https://github.com/Google-Health/imaging-research/blob/master/cxr-foundation/CXR_Foundation_Interactive_Demo.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View source on GitHub</a>\n",
28 |         "  </td>\n",
29 |         "</table>"
30 |       ],
31 |       "metadata": {
32 |         "id": "SmaSUdnET28O"
33 |       }
34 |     },
35 |     {
36 |       "cell_type": "markdown",
37 |       "source": [
38 |         "# CXR Foundations Demo App\n",
39 |         "\n",
40 |         "Run the below code cell to test out CXR Foundations with a GUI. This allows you to retrieve embeddings for x-rays, label the x-rays for some binary classification task, split the data into train and test and then finally train a linear classifier for the model. The demo can load in [NIH Chest X-ray demo x-rays](https://nihcc.app.box.com/v/ChestXray-NIHCC) but you can also bring your own DICOMs."
41 |       ],
42 |       "metadata": {
43 |         "id": "iBVn2ob9lr6e"
44 |       }
45 |     },
46 |     {
47 |       "cell_type": "code",
48 |       "execution_count": null,
49 |       "metadata": {
50 |         "id": "yoAb5R0fgH84",
51 |         "cellView": "form"
52 |       },
53 |       "outputs": [],
54 |       "source": [
55 |         "#@title Authenticate and Render Demo App\n",
56 |         "\n",
57 |         "from google.colab import auth\n",
58 |         "from google.auth import default\n",
59 |         "from google.auth.transport.requests import Request\n",
60 |         "\n",
61 |         "auth.authenticate_user()\n",
62 |         "credentials, _ = default()\n",
63 |         "credentials.refresh(Request())\n",
64 |         "bearer_token = credentials.token\n",
65 |         "\n",
66 |         "import requests\n",
67 |         "response = requests.get('https://cdn.jsdelivr.net/gh/Google-Health/imaging-research@latest/cxr-foundation/cxr_foundation_interactive_demo_deps/index.html')\n",
68 |         "html_content = response.text\n",
69 |         "html_content = html_content.replace('ACCESS_TOKEN', bearer_token)\n",
70 |         "\n",
71 |         "from IPython.core.display import display, HTML\n",
72 |         "display(HTML(html_content))\n",
73 |         "\n"
74 |       ]
75 |     }
76 |   ]
77 | }
78 | 


--------------------------------------------------------------------------------
/cxr-foundation/cxr_foundation/embeddings_data.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | #
  3 | # Copyright 2022 Google LLC
  4 | #
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | #
  9 | #      http://www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # limitations under the License.
 16 | """Module for reading and creating datasets from embeddings files generated by the CXR Foundation service.
 17 | 
 18 | Unless specified otherwise, these functions are NOT generalizable/usable on
 19 | embeddings files
 20 | or tfrecord files from other sources.
 21 | 
 22 | 
 23 | Expected structure of the generated TFRecord proto:
 24 | 
 25 | features {
 26 |   feature {
 27 |     key: "embedding"
 28 |     value {
 29 |       float_list {
 30 |         value: -1.3924500942230225
 31 |         value: 0.4983276426792145
 32 |         value: 1.1395248174667358
 33 |         value: 0.6487054228782654
 34 |         ...
 35 |       }
 36 |     }
 37 |   }
 38 | }
 39 | """
 40 | from typing import Iterable
 41 | 
 42 | from cxr_foundation import constants
 43 | import numpy as np
 44 | import tensorflow as tf
 45 | 
 46 | 
 47 | def read_tfrecord_example(filename: str) -> tf.train.Example:
 48 |   """Read the tf.Example data contained in a single TFRecord embedding file.
 49 | 
 50 |   Args:
 51 |     filename: The path of the .tfrecord file to read
 52 | 
 53 |   Returns:
 54 |     The `tf.Example` data contained in the TFRecord.
 55 | 
 56 |   Note: This is a convenience function for exploring/exporting. Do not use this
 57 |   in TF pipelines.
 58 |   """
 59 |   raw_dataset = tf.data.TFRecordDataset(filename)
 60 |   # Expect only one element in the TFRecord.
 61 |   for raw_record in raw_dataset.take(1):
 62 |     example = tf.train.Example()
 63 |     example.ParseFromString(raw_record.numpy())
 64 | 
 65 |   return example
 66 | 
 67 | 
 68 | def _parse_example_values(example_data: tf.train.Example) -> np.ndarray:
 69 |   """Extract the embeddings values contained in an Example object, extracted from a file
 70 | 
 71 |   generated by the CXR foundation service. Helper function for
 72 |   `read_record_values`.
 73 | 
 74 |   Args:
 75 |     example_data: The Example object to extract the values from
 76 | 
 77 |   Returns:
 78 |     The 1D float array of the embeddings values
 79 | 
 80 |   Note: This is a convenience function for exploring/exporting. Do not use this
 81 |   in TF pipelines.
 82 |   """
 83 |   # Unpack nested proto and create np array from google.protobuf.pyext._message.RepeatedScalarContainer
 84 |   try:
 85 |     values = np.array(
 86 |         example_data.features.feature[constants.EMBEDDING_KEY].float_list.value,
 87 |         dtype="float32",
 88 |     )
 89 |     return values
 90 |   except ValueError:
 91 |     print(
 92 |         f"Input Example does not contain expected CXR Foundation embedding"
 93 |         f" structure."
 94 |     )
 95 |     raise
 96 | 
 97 | 
 98 | def read_tfrecord_values(filename: str) -> np.ndarray:
 99 |   """Read the embeddings values contained in a .tfrecord embedding file, generated by this library.
100 | 
101 |   Args:
102 |     filename: The path of the .tfrecord file to read
103 | 
104 |   Returns:
105 |     The 1D float array of the embeddings values
106 | 
107 |   Note: This is a convenience function for exploring/exporting. Do not use this
108 |   in TF pipelines.
109 |   """
110 |   return _parse_example_values(read_tfrecord_example(filename))
111 | 
112 | 
113 | def read_npz_values(filename: str) -> np.ndarray:
114 |   """Read the embeddings values contained in a .npz embedding file, generated by this package.
115 | 
116 |   Args:
117 |     filename: The path of the .npz file to read
118 | 
119 |   Returns:
120 |     The 1D float array of the embeddings values
121 | 
122 |   Note: You can load the generated .npz files without installing this package
123 |   and its dependencies, by copying this simple function. You would only need to
124 |   install Numpy.
125 |   """
126 |   data = np.load(filename)
127 |   return data[constants.EMBEDDING_KEY]
128 | 
129 | 
130 | def parse_serialized_example_values(
131 |     serialized_example: bytes,
132 |     embeddings_size: int = constants.DEFAULT_EMBEDDINGS_SIZE
133 | ) -> tf.Tensor:
134 |   """Parses and extracts the embeddings values from a serialized tf.Example generated by the CXR foundation service.
135 | 
136 |   Args:
137 |     serialized_example: The bytes of the tf.Example to parse.
138 | 
139 |   Returns:
140 |     The 1D Tensor of float embeddings
141 |   """
142 |   features = {
143 |       constants.EMBEDDING_KEY: tf.io.FixedLenFeature(
144 |           [embeddings_size],
145 |           tf.float32,
146 |           default_value=tf.constant(
147 |               0.0, shape=[embeddings_size] 
148 |           ),
149 |       )
150 |   }
151 |   parsed_tensors = tf.io.parse_example(serialized_example, features=features)
152 |   return parsed_tensors[constants.EMBEDDING_KEY]
153 | 
154 | 
155 | def get_dataset(
156 |     filenames: Iterable[str],
157 |     labels: Iterable[int],
158 |     embeddings_size: int = constants.DEFAULT_EMBEDDINGS_SIZE
159 | ) -> tf.data.Dataset:
160 |   """Create a tf.data.Dataset from the specified tfrecord files and labels.
161 | 
162 |   Args:
163 |     filenames: The set of .tfrecord file names.
164 |     labels: The corresponding label for each record.
165 | 
166 |   Returns:
167 |     The Dataset, containing for each element: (embeddings, label)
168 |   """
169 |   ds_embeddings = tf.data.TFRecordDataset(
170 |       filenames, num_parallel_reads=tf.data.AUTOTUNE
171 |   ).map(lambda x: parse_serialized_example_values(x, embeddings_size)) 
172 |   ds_labels = tf.data.Dataset.from_tensor_slices(labels)
173 | 
174 |   return tf.data.Dataset.zip((ds_embeddings, ds_labels))
175 | 


--------------------------------------------------------------------------------
/wet-amd-prediction/ex_amd_model.py:
--------------------------------------------------------------------------------
  1 | """exAMD prediction network architecture.
  2 | 
  3 | Implementation of exAMD prediction network described in  Yim J. et al. (2020) "Predicting conversion to wet age related macular
  4 | degeneration using deep learning", Nature Medicine doi:10.1038/s41591-020-0867-7, 
  5 | 
  6 | Copyright 2020 Google LLC
  7 | Limited license: see LICENSE
  8 | """
  9 | 
 10 | import sonnet as snt
 11 | import tensorflow as tf
 12 | 
 13 | 
 14 | class ExAmdNet(snt.AbstractModule):
 15 |   """Future exAMD prediction deep learning network.
 16 | 
 17 |   Takes as input either a grey-scale 3D OCT volume or a one-hot encoded
 18 |   segmentation map of a 3D OCT volume. See manuscript for architecture details.
 19 |   """
 20 | 
 21 |   def __init__(self,
 22 |                name='ex_amd_net'):
 23 |     """Initializes the model and parameters.
 24 | 
 25 |     Args:
 26 |       name: Variable name of module.
 27 |     """
 28 |     super(ExAmdNet, self).__init__(name=name)
 29 | 
 30 |     # Convolution parameters.
 31 |     self._filter_chs = 32
 32 |     self._bottleneck_chs = 32
 33 | 
 34 |   def _build(self, inputs, is_training=True):
 35 |     """Internal method to build the sonnet module.
 36 | 
 37 |     Args:
 38 |       inputs: tensor of batch input OCT or dense segmentation maps.
 39 |               OCT shape: [batch, 41, 450, 450, 1]
 40 |               Segmentation map shape: [batch, 41, 450, 450, 17]
 41 |       is_training: flag for model usage when training
 42 | 
 43 |     Returns:
 44 |       Output tensor of module. A tensor with size equal to
 45 |       number of classes.
 46 |     """
 47 |     net = inputs
 48 | 
 49 |     # First level.
 50 |     net = block(net, 'l1', self._filter_chs // 4,
 51 |                 block_kernels=[(1, 3, 3), (1, 3, 3)])
 52 |     net = max_pool3d(net, pool_size=(1, 2, 2), strides=(1, 2, 2), name='l1_out')
 53 |     print('Shape after L1: %s' % net.shape.as_list())
 54 | 
 55 |     # Second level
 56 |     net = block(net, 'l2',
 57 |                 channels_per_layer=self._filter_chs // 2)
 58 |     net = max_pool3d(net, pool_size=(1, 2, 2), strides=(1, 2, 2), name='l2_out')
 59 |     print('Shape after L2: %s' % net.shape.as_list())
 60 | 
 61 |     # Third level
 62 |     net = conv_1x1x1(net, self._bottleneck_chs * 4, 'l3_1x1x1')
 63 |     net = block(net, 'l3',
 64 |                 channels_per_layer=self._filter_chs // 2)
 65 |     net = max_pool3d(net, pool_size=(2, 2, 2), strides=(2, 2, 2), name='l3_out')
 66 |     print('Shape after L3 level: %s' % net.shape.as_list())
 67 | 
 68 |     # Fourth level
 69 |     net = conv_1x1x1(net, self._bottleneck_chs * 4, 'l4_1x1x1')
 70 |     for i in range(2):
 71 |       net = block(net, 'l4_b%d' % (i+1),
 72 |                   channels_per_layer=self._filter_chs)
 73 |     net = max_pool3d(net, pool_size=(2, 2, 2), strides=(2, 2, 2), name='l4_out')
 74 |     print('Shape after L4 level: %s' % net.shape.as_list())
 75 | 
 76 |     # Fifth level
 77 |     net = conv_1x1x1(net, self._bottleneck_chs * 4, 'l5_1x1x1')
 78 |     for i in range(2):
 79 |       net = block(net, 'l5_b%d' % i,
 80 |                   channels_per_layer=self._filter_chs)
 81 |     net = max_pool3d(net, pool_size=(2, 2, 2), strides=(2, 2, 2), name='l5_out')
 82 |     print('Shape after L5 level: %s' % net.shape.as_list())
 83 | 
 84 |     # Sixth level
 85 |     net = conv_1x1x1(net, self._bottleneck_chs * 8, 'l6_1x1x1')
 86 |     for i in range(2):
 87 |       net = block(net, 'l6_b%d' % i,
 88 |                   channels_per_layer=self._filter_chs)
 89 |     print('Shape after L6 level: %s' % net.shape.as_list())
 90 | 
 91 |     # Output
 92 |     net = snt.Conv3D(output_channels=self._bottleneck_chs * 4,
 93 |                      kernel_shape=(1, 1, 1),
 94 |                      stride=1,
 95 |                      padding=snt.SAME,
 96 |                      name='final_1x1x1')(net)
 97 |     print('Output shape: %s' % net.shape.as_list())
 98 |     return net
 99 | 
100 | 
101 | def conv_3d(inputs,
102 |             output_channels,
103 |             kernel_shape,
104 |             strides,
105 |             name,
106 |             activation=tf.nn.relu,
107 |             use_bias=True):
108 |   """Wraps sonnet 3D conv module with a nonlinear activation."""
109 |   conv_out = snt.Conv3D(
110 |       output_channels=output_channels,
111 |       kernel_shape=kernel_shape,
112 |       stride=strides,
113 |       use_bias=use_bias,
114 |       name=name)(
115 |           inputs)
116 |   return activation(conv_out)
117 | 
118 | 
119 | def block(inputs,
120 |           name_prefix,
121 |           channels_per_layer,
122 |           block_kernels=None,
123 |           activation=tf.nn.relu,
124 |           stride=1):
125 |   """Consecutive convolution filters with skip connections."""
126 |   if not block_kernels:
127 |     # Full block length if not specified.
128 |     block_kernels = [(1, 3, 3), (1, 3, 3), (3, 1, 1), (1, 3, 3), (1, 3, 3),
129 |                      (3, 1, 1)]
130 |   layer_stack = [inputs]
131 |   for kernel in block_kernels:
132 |     # Iterate through all kernels to construct a stack of intermediate
133 |     # representations.
134 |     layer_stack.append(
135 |         conv_3d(
136 |             inputs=layer_stack[-1],
137 |             output_channels=channels_per_layer,
138 |             kernel_shape=kernel,
139 |             strides=stride,
140 |             activation=activation,
141 |             name='{}_{}'.format(name_prefix,
142 |                                 'x'.join([str(x) for x in kernel]))))
143 |   # Concatenate all representations in the layer output as final output.
144 |   output = tf.concat(layer_stack, axis=-1)
145 |   return output
146 | 
147 | 
148 | def max_pool3d(inputs, pool_size, strides, name):
149 |   return tf.keras.layers.MaxPool3D(
150 |       pool_size=pool_size, strides=strides, name=name)(
151 |           inputs)
152 | 
153 | 
154 | def conv_1x1x1(inputs, channels, name):
155 |   return snt.Conv3D(output_channels=channels,
156 |                     kernel_shape=(1, 1, 1),
157 |                     stride=1,
158 |                     padding=snt.SAME,
159 |                     name=name)(inputs)
160 | 
161 | 


--------------------------------------------------------------------------------
/cxr-foundation/cxr_foundation_interactive_demo_deps/index.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE HTML>
  2 | <html>
  3 | <head>
  4 |     <meta charset="UTF-8">
  5 |     <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/Google-Health/imaging-research@latest/cxr-foundation/cxr_foundation_interactive_demo_deps/cxr.css">
  6 | </head>
  7 | <body>
  8 | <div class="fullscreenNote">⭅ Click here for fullscreen<br>
  9 |     <img src="https://cdn.jsdelivr.net/gh/Google-Health/imaging-research@latest/cxr-foundation/cxr_foundation_interactive_demo_deps/fullscreen.gif">
 10 | </div>
 11 | <div class="container">
 12 |     <img style="max-width: 50%" src="https://raw.githubusercontent.com/Google-Health/imaging-research/master/cxr-foundation/logo.png">
 13 |     <h1>CXR Train Classifer Over Embeddings</h1>
 14 |     <div>
 15 |     This model is working on a endpoint from the <a href="https://github.com/Google-Health/imaging-research/blob/master/cxr-foundation/README.md">CXR foundations</a>, please sign up before using.
 16 |     </div>
 17 |     <div class="module">
 18 |         <b>Enter your label terminology:</b>
 19 |         <div class="terminology-inputs">
 20 |             <label class="terminology-label">
 21 |                 <span>Positive</span>
 22 |                 <input type="text" name="1" value="present" maxlength="10" oninput="updateTerminology(this)"/>
 23 |             </label>
 24 |             <label class="terminology-label">
 25 |                 <span>Negative</span>
 26 |                 <input type="text" name="0" value="absent" maxlength="10" oninput="updateTerminology(this)"/>
 27 |             </label>
 28 |         </div>
 29 |     </div>
 30 |     <div class="module dataset">
 31 |         <b>Dataset to use:</b>
 32 |         <div class="tabs">
 33 |             <input type="radio" name="tab" id="tab1" checked>
 34 |             <label for="tab1">NIH Chest X-ray14</label>
 35 |             <input type="radio" name="tab" id="tab2">
 36 |             <label for="tab2">Upload DICOM</label>
 37 |             <div class="content" id="content1">
 38 |                 The NIH ChestX-ray14 dataset, consists of over 100,000 de-identified images of chest x-rays, with fourteen common disease labels, text-mined from the text radiological reports via NLP techniques.
 39 |                 <select id="diagnosis-cxr14"
 40 |                 onchange="const b = document.getElementById('loadDatasetButton'); b.disabled=!this.value; b.value=this.value">
 41 |                     <option value="">Choose a diagnosis</option>
 42 |                 </select>
 43 |                 <button id="loadDatasetButton" disabled onclick="clearDicoms();fetchCXR14Dataset(this.value,40)">Load</button>
 44 |             </div>
 45 |             <div class="content" id="content2">
 46 | 
 47 |     <div class="image-loader-container">
 48 | 
 49 |             <span>Select multiple CXR DICOM P10 files on your local file system.</span>
 50 |             Loaded image will be marked by default as:
 51 |             <div id="defaultLabel" class="negative badge-in-text" style="height:20px"
 52 |             onclick='posNegBadgeToggler(this.firstElementChild,terminologyTags,this,gtCssClassMatchingTermonology);'>
 53 |                 <div class="badge gt">absent</div>
 54 |                 🔄
 55 |                 (click to toggle)
 56 |             </div>
 57 |     </div>
 58 |     <input type="file" id="selectFile" multiple></div>
 59 |         </div>
 60 |     </div>
 61 | 
 62 | 
 63 |     <span>You can adjust image labels by clicking the tags
 64 |         <div class="positive badge-in-text"><div class="badge gt">present</div></div> and
 65 |         <div class="negative badge-in-text"><div class="badge gt">absent</div></div> by clicking the GT badge.</span>
 66 |     <br><br>
 67 |         <div
 68 |                 oncontextmenu="return false"
 69 |                 class='disable-selection noIbar'
 70 |                 unselectable='on'
 71 |                 onselectstart='return false;'
 72 |                 onmousedown='return false;'>
 73 |             <div id='backdrop'></div>
 74 |             <div id="dicomImage" class="viewList">
 75 |             </div>
 76 |         </div>
 77 |     <br>
 78 |     <div id="step2" style="display: none;">
 79 |         <button onclick="clearDicoms()">Clear All</button>
 80 |         <button id="downloadCSVButton" onclick="downloadCSV()">Save labels as CSV</button>
 81 |         <button id="embeddingsButton" onclick="waiting(true); computeEmbeddings()">Compute Embeddings</button>
 82 |         <label>
 83 |         <input type="checkbox" id="cacheToggle" checked>
 84 |         Cache embeddings (assumes file names are unique)
 85 |         </label>
 86 |     </div>
 87 |     <br>
 88 |     <div id="commentEl"></div>
 89 |     <div id="step3" class="step3" style="display: none;">
 90 |         <div class="step3-container">
 91 |             <div class="vflex" style="width: 200px">
 92 |                 <span style="font-weight: 900;">Model Training</span>
 93 |                 <span>% of as training set out of the loaded data. Notice the
 94 |                     <div class="training badge-in-text"><div class="badge dataset"></div></div> and
 95 |                     <div class="eval badge-in-text"><div class="badge dataset"></div></div> badges on the images as you change the percentage.</span>
 96 |                 <div>
 97 |                     <input type="range" min="0" max="100" value="50" id="trainingSetPct">
 98 |                     <span id="trainingSetPctShow">50%</span>
 99 |                 </div>
100 |                 <button id="trainButton" onclick="trainModel()" style="margin-top: auto">Train Model</button>
101 |                 <button id="downloadButton" onclick="downloadModel()" style="margin-top: auto; display: none">Download Model</button>
102 |             </div>
103 |             <div class="verticalSeperator"></div>
104 |                 <div id="predictionNotes" style="display: none">
105 |                     <div class="vflex" style="height: 100%">
106 |                         <div class="vflex">
107 |                             <span style="font-weight: 900;">Eval</span>
108 |                             <span>Threshold</span>
109 |                             <input type="range" min="0" max="1" value="0.5" step="0.01" id="threshold">
110 |                         </div>
111 |                         <div id="metrics"></div>
112 |                         <button onclick="downloadRawResults()" style="margin-top: auto">Download Raw</button>
113 |                     </div>
114 |                  </div>
115 |                 <canvas id="rocChart" height="100%" style="display: none"></canvas>
116 |             </div>
117 |         </div>
118 |     </div>
119 | </div>
120 | <br><br><br><br>
121 | <div id='toast-container'></div>
122 | <div class="center-child"><span id='spinner'></span></div>
123 | </body>
124 | 
125 | <script src="https://cdn.jsdelivr.net/npm/cornerstone-core@latest/dist/cornerstone.min.js"></script>
126 | <script src="https://cdn.jsdelivr.net/npm/cornerstone-math@latest/dist/cornerstoneMath.min.js"></script>
127 | <script src="https://cdn.jsdelivr.net/npm/cornerstone-tools@2.0.0/dist/cornerstoneTools.min.js"></script>
128 | <script src="https://cdn.jsdelivr.net/npm/cornerstone-wado-image-loader@latest/dist/cornerstoneWADOImageLoader.bundle.min.js"></script>
129 | <script src="https://cdn.jsdelivr.net/npm/dicom-parser@latest/dist/dicomParser.min.js"></script>
130 | <script src="https://cdn.jsdelivr.net/npm/@tensorflow/tfjs@4.20.0/dist/tf.min.js"></script>
131 | <script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
132 | <script type="text/javascript">let accessToken = "ACCESS_TOKEN"</script>
133 | <script src="https://cdn.jsdelivr.net/gh/Google-Health/imaging-research@latest/cxr-foundation/cxr_foundation_interactive_demo_deps/cxr_interactive_demo.js"></script>
134 | </html>
135 | 


--------------------------------------------------------------------------------
/cxr-foundation/cxr_foundation/example_generator_lib.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | #
  3 | # Copyright 2022 Google LLC
  4 | #
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | #
  9 | #      http://www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # limitations under the License.
 16 | """Methods to create tf.examples for model inference via pydicom."""
 17 | 
 18 | 
 19 | import io
 20 | from typing import Iterable, Union
 21 | 
 22 | from cxr_foundation import constants
 23 | import numpy as np
 24 | import png
 25 | import pydicom
 26 | from pydicom.pixel_data_handlers.util import apply_modality_lut
 27 | import tensorflow as tf
 28 | 
 29 | _BITS_PER_BYTE = 8
 30 | _WINDOWWIDTH = 'WindowWidth'
 31 | _WINDOWCENTER = 'WindowCenter'
 32 | 
 33 | 
 34 | def _encode_png(array: np.ndarray) -> bytes:
 35 |   """Converts an unsigned integer 2-D NumPy array to a PNG-encoded string.
 36 | 
 37 |   Unsigned 8-bit and 16-bit images are supported.
 38 | 
 39 |   Args:
 40 |     array: Array to be encoded.
 41 | 
 42 |   Returns:
 43 |     PNG-encoded string.
 44 | 
 45 |   Raises:
 46 |     ValueError: If any of the following occurs:
 47 |       - `array` is not 2-D.
 48 |       - `array` data type is unsupported.
 49 |   """
 50 |   supported_types = frozenset([np.uint8, np.uint16])
 51 |   # Sanity checks.
 52 |   if array.ndim != 2:
 53 |     raise ValueError(f'Array must be 2-D. Actual dimensions: {array.ndim}')
 54 |   if array.dtype.type not in supported_types:
 55 |     raise ValueError(
 56 |         'Pixels must be either `uint8` or `uint16`. '
 57 |         f'Actual type: {array.dtype.name!r}'
 58 |     )
 59 | 
 60 |   # Actual conversion.
 61 |   writer = png.Writer(
 62 |       width=array.shape[1],
 63 |       height=array.shape[0],
 64 |       greyscale=True,
 65 |       bitdepth=_BITS_PER_BYTE * array.dtype.itemsize,
 66 |   )
 67 |   output_data = io.BytesIO()
 68 |   writer.write(output_data, array.tolist())
 69 |   return output_data.getvalue()
 70 | 
 71 | 
 72 | def _rescale_dynamic_range(image: np.ndarray) -> np.ndarray:
 73 |   """Rescales the dynamic range in an integer image to use the full bit range.
 74 | 
 75 |   Args:
 76 |     image: An image containing unsigned integer pixels.
 77 | 
 78 |   Returns:
 79 |     Rescaled copy of `image` that uses all the available bits per pixel.
 80 | 
 81 |   Raises:
 82 |     ValueError: If pixels are not of an integer type.
 83 |   """
 84 |   if not np.issubdtype(image.dtype, np.integer):
 85 |     raise ValueError(
 86 |         'Image pixels must be an integer type. '
 87 |         f'Actual type: {image.dtype.name!r}'
 88 |     )
 89 |   iinfo = np.iinfo(image.dtype)
 90 |   return np.interp(
 91 |       image, (image.min(), image.max()), (iinfo.min, iinfo.max)
 92 |   ).astype(iinfo)
 93 | 
 94 | 
 95 | def _shift_to_unsigned(image: np.ndarray) -> np.ndarray:
 96 |   """Shifts values by the minimum value to an unsigned array suitible for PNG.
 97 | 
 98 |   This works with signed images and converts them to unsigned versions. It
 99 |   involves an inefficient step to convert to a larger data structure for
100 |   shifting all values by the minimum value in the array. It also support float
101 |   data by converting them into uint16.
102 | 
103 |   Args:
104 |     image: An image containing signed integer pixels.
105 | 
106 |   Returns:
107 |     Copy of `image` in an unsigned format. Note that the exact same image is
108 |       returned when given an unsigned version.
109 | 
110 |   Raises:
111 |     ValueError: If pixels are not of an integer type or float.
112 |   """
113 |   if image.dtype == np.uint16 or image.dtype == np.uint8:
114 |     return image
115 |   elif image.dtype == np.int16:
116 |     image = image.astype(np.int32)
117 |     return (image - np.min(image)).astype(np.uint16)
118 |   elif image.dtype == np.int8:
119 |     image = image.astype(np.int16)
120 |     return (image - np.min(image)).astype(np.uint8)
121 |   elif image.dtype in (np.float32, np.float64):
122 |     uint16_max = np.iinfo(np.uint16).max
123 |     image = image - np.min(image)
124 |     if np.max(image) > uint16_max:
125 |       image = image * (uint16_max / np.max(image))
126 |       image[image > uint16_max] = uint16_max
127 |     return image.astype(np.uint16)
128 |   raise ValueError(
129 |       'Image pixels must be an 8, 16 bit integer or float type. '
130 |       f'Actual type: {image.dtype.name!r}'
131 |   )
132 | 
133 | 
134 | def _apply_pydicom_prep(ds: pydicom.Dataset) -> np.ndarray:
135 |   """Prepares pixel data after applying data handling from pydicom."""
136 | 
137 |   def window_u16(
138 |       image: np.ndarray, window_center: int, window_width: int
139 |   ) -> np.ndarray:
140 |     max_window = np.iinfo(np.uint16).max
141 |     top_clip = window_center - 1 + window_width / 2
142 |     bottom_clip = window_center - window_width / 2
143 |     return np.interp(
144 |         image.clip(bottom_clip, top_clip),
145 |         (bottom_clip, top_clip),
146 |         (0, max_window),
147 |     )
148 | 
149 |   arr = ds.pixel_array
150 |   pixel_array = apply_modality_lut(arr, ds)
151 |   if _WINDOWWIDTH in ds and _WINDOWCENTER in ds:
152 |     window_center = ds.WindowCenter
153 |     window_width = ds.WindowWidth
154 |     if isinstance(ds.WindowCenter, pydicom.multival.MultiValue):
155 |       window_center = int(ds.WindowCenter[0])
156 |     if isinstance(ds.WindowWidth, pydicom.multival.MultiValue):
157 |       window_width = int(ds.WindowWidth[0])
158 |     pixel_array = window_u16(pixel_array, window_center, window_width)
159 |   if ds.PhotometricInterpretation == 'MONOCHROME1':
160 |     pixel_array = np.max(pixel_array) - pixel_array
161 |   pixel_array = _shift_to_unsigned(pixel_array)
162 |   # Don't rescale dynamic range for 8-bit images like CXR14.
163 |   if pixel_array.dtype != np.uint8:
164 |     pixel_array = _rescale_dynamic_range(pixel_array)
165 |   return pixel_array
166 | 
167 | 
168 | def _assign_bytes_feature(
169 |     feature: tf.train.Feature, value: Union[bytes, Iterable[bytes]]
170 | ) -> None:
171 |   """Assigns a bytes float value into feature."""
172 |   if isinstance(value, bytes):
173 |     feature.bytes_list.value[:] = [value]
174 |   else:
175 |     assert not isinstance(value, str)
176 |     feature.bytes_list.value[:] = list(value)
177 | 
178 | 
179 | def png_to_tfexample(image_array: np.ndarray) -> tf.train.Example:
180 |   """Create a tf.example for inference.
181 | 
182 |   The image will be spread to the full bit-depth of 16-bit images.
183 | 
184 |   Args:
185 |     image_array: An image to use to create the example.
186 | 
187 |   Returns:
188 |     example: A tf.example for inference.
189 |   """
190 |   pixel_array = _shift_to_unsigned(image_array)
191 |   # Don't rescale dynamic range for 8-bit images like CXR14.
192 |   if pixel_array.dtype != np.uint8:
193 |     pixel_array = _rescale_dynamic_range(pixel_array)
194 |   png_bytes = _encode_png(pixel_array)
195 |   example = tf.train.Example()
196 |   features = example.features.feature
197 |   _assign_bytes_feature(features[constants.IMAGE_KEY], png_bytes)
198 |   _assign_bytes_feature(features[constants.IMAGE_FORMAT_KEY], b'png')
199 |   return example
200 | 
201 | 
202 | def dicom_to_tfexample(single_dicom: pydicom.Dataset) -> tf.train.Example:
203 |   """Create a tf.example for inference.
204 | 
205 |   Resulting images are spread to the full bit-depth of 16-bit images.
206 |   Applies apply_modality_lut first followed by window/level if prresent.
207 | 
208 |   Args:
209 |     single_dicom: A pydicom dataset used to create the example.
210 | 
211 |   Returns:
212 |     example: A tf.example for inference.
213 |   """
214 |   image_array = _apply_pydicom_prep(single_dicom)
215 |   png_bytes = _encode_png(image_array)
216 |   example = tf.train.Example()
217 |   features = example.features.feature
218 |   _assign_bytes_feature(features[constants.IMAGE_KEY], png_bytes)
219 |   _assign_bytes_feature(features[constants.IMAGE_FORMAT_KEY], b'png')
220 |   return example
221 | 


--------------------------------------------------------------------------------
/cxr-foundation/README.md:
--------------------------------------------------------------------------------
  1 | <img src="logo.png" alt="CXR Foundation" width="100%"/>
  2 | 
  3 | **CXR Foundation** is a tool to generate custom embeddings from chest x-ray (CXR) images. These embeddings can be used to develop custom machine learning models for CXR with less data and compute. You can read more about the research behind CXR Foundation in our recent publication: [Simplified Transfer Learning for Chest Radiography Models Using Less Data](https://doi.org/10.1148/radiol.212482).
  4 | 
  5 | ## How to use CXR Foundation
  6 | 
  7 | 1.  Fill out the [API access form](https://forms.gle/SJBSawfDw19JZRA59). Your provided Gmail account will be used for access once approved for non-clinical use.
  8 | 2.  Once granted you’ll get an email and can use the CXR Foundation API with your own images.
  9 | 3.  If you want to get started in a no code environment, please run our [GUI-based demo](https://colab.research.google.com/github/Google-Health/imaging-research/blob/master/cxr-foundation/CXR_Foundation_Interactive_Demo.ipynb). This provides a web interface to:
 10 |     * import your own images in DICOM format and view these with windowing options
 11 |     * label them
 12 |     * Retrieve embeddings
 13 |     * Split data into train and eval sets
 14 |     * Train a linear probe
 15 |     * Evaluate performance on the eval set and pick an operating point
 16 |     * Running in Colab this app will let you:
 17 | We’ve also linked it directly to CXR-14 data, so you can try it out on public data as well. 
 18 | 4.  You also have access to this GitHub repository containing Python source code to:
 19 |     1. Convert DICOM images into PNGs suitable for calling CXR Foundation
 20 |     2. Call the API to generate embeddings from the PNG
 21 | 5.  [Install the gcloud CLI](https://cloud.google.com/sdk/docs/install) and [log in](https://cloud.google.com/sdk/gcloud/reference/auth/login):
 22 | 
 23 |         gcloud auth application-default login
 24 | 
 25 | 6.  Clone the Repository into a local directory
 26 | 
 27 |         git clone https://github.com/Google-Health/imaging-research.git
 28 |         cd imaging-research/cxr-foundation
 29 | 
 30 | 7.  Install the CXR Foundation package:
 31 | 
 32 |         pip install .
 33 | 
 34 | 8.  Run the CXR Foundation code:
 35 |     1. Upload your chest x-ray DICOMs or PNGs to a cloud bucket or use a local directory.
 36 |     2. Generate and save embeddings.
 37 |     3. Read them and use them to train your model.
 38 | 
 39 | See the notebooks for examples of how to use the embeddings service and this package.
 40 | 
 41 | - [GUI-based demo](https://colab.research.google.com/github/Google-Health/imaging-research/blob/master/cxr-foundation/CXR_Foundation_Interactive_Demo.ipynb) allows you to get started exploring embeddings really quickly. 
 42 | - [CXR_Foundation_Demo.ipynb](./CXR_Foundation_Demo.ipynb) - Demonstrates how to use the CXR Foundation service to generate embeddings from DICOMs. Uses the generated embeddings in a model.
 43 | - [MIMIC_Embeddings_Demo.ipynb](MIMIC_Embeddings_Demo.ipynb) - Uses the already generated embeddings from the [MIMIC CXR Embeddings Database](https://physionet.org/content/image-embeddings-mimic-cxr/1.0/) in a model.
 44 | 
 45 | Have questions? Email [cxr-foundation@google.com](mailto:cxr-foundation@google.com).
 46 | 
 47 | ## Third Party Apps
 48 | 
 49 | CXR Foundation is also available on [Superbio.ai](https://app.superbio.ai/apps/247?id=640613393e3000de4dfb424d) as an online app. After agreeing to Google’s Terms for the CXR Foundation tool, you can access and utilize the app.
 50 | 
 51 | ## Package APIs - Generating and Using Embeddings
 52 | 
 53 | The following code block highlights the pertinent functions. See the notebooks for demo usage.
 54 | 
 55 | ```python
 56 | from cxr_foundation.inference import generate_embeddings
 57 | from cxr_foundation.embeddings_data import read_tfrecord_values, read_npz_values, get_dataset
 58 | 
 59 | 
 60 | help(generate_embeddings)
 61 | help(read_tfrecord_values)
 62 | help(read_npz_values)
 63 | help(get_dataset)
 64 | ```
 65 | 
 66 | Note: `.npz` format embeddings files generated by this package and the Foundation API, CAN be read without this package. If you want to use generated embeddings files in a Python environment, but don't want to install this package and its dependencies in the same environment, just copy the `embeddings_data.read_npz_values` function into one of your modules, which only requires numpy.
 67 | 
 68 | ## General Notes
 69 | 
 70 | - Google does not keep a copy of any images sent.
 71 | - Google monitors daily query volume and aggregates on a per-user and per-organization basis. Access can be revoked if a user or organization exceeds a reasonable query volume.
 72 | - If you consented to follow-up, Google may reach out for feedback.
 73 | - Please use the following reference for any published work:
 74 |   - Sellergren AB, Chen C, Nabulsi Z, Li Y, Maschinot A, Sarna A, Huang J, Lau C, Kalidindi SR, Etemadi M, Garcia-Vicente F, Melnick D, Liu Y, Eswaran K, Tse D, Beladia N, Krishnan D, Shetty S. Simplified Transfer Learning for Chest Radiography Models Using Less Data. Radiology. 2022 Nov;305(2):454-465. doi: 10.1148/radiol.212482. Epub 2022 Jul 19. PMID: 35852426.
 75 | 
 76 | ## Contributing
 77 | 
 78 | See [`CONTRIBUTING.md`](CONTRIBUTING.md) for details.
 79 | 
 80 | ## License
 81 | 
 82 | See [`LICENSE`](LICENSE) for details.
 83 | 
 84 | ## Disclaimer
 85 | 
 86 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 87 | 
 88 | # Model Card for CXR Foundation
 89 | 
 90 | ## Model Details
 91 | 
 92 | ### Overview
 93 | 
 94 | This model generates embeddings for chest x-rays (CXRs). Embeddings are n-dimensional vectors of floating points representing a projection of the original image into a compressed feature space capable of describing potential abnormalities that exist in the image. These embeddings are to be used by “downstream models” for final tasks such as detecting a particular abnormality in a CXR. The model uses the EfficientNet-L2 architecture (https://arxiv.org/pdf/1911.04252v4.pdf). It was trained on 821,544 CXRs from India and the US using abnormal vs. normal labels, i.e. the image contained any kind of abnormality, and the Supervised Contrastive loss (https://arxiv.org/abs/2004.11362v1). The abnormal vs. normal labels were obtained from more granular labels (e.g. pneumothorax, fracture) as well as regular expressions on radiology reports (https://pubmed.ncbi.nlm.nih.gov/34471144/).
 95 | 
 96 | ### Version
 97 | 
 98 |     name: v1.0
 99 |     date: 2022-07-19
100 | 
101 | ### Owners
102 | 
103 |     Andrew Sellergren, asellerg@google.com
104 | 
105 | ### Licenses
106 | 
107 | - See [CXR Foundation - Additional Terms of Service](https://forms.gle/SJBSawfDw19JZRA59).
108 | 
109 | ### References
110 | 
111 | - https://arxiv.org/pdf/1911.04252v4.pdf
112 | - https://arxiv.org/pdf/1912.11370.pdf
113 | - https://arxiv.org/abs/2004.11362v1
114 | - https://pubmed.ncbi.nlm.nih.gov/34471144/
115 | 
116 | ### Citations
117 | 
118 | - Sellergren A, Chen C, et al. Simplified Transfer Learning for Chest Radiography Models Using Less Data. Radiology. 2022.
119 | 
120 | ## Considerations
121 | 
122 | ### Use Cases
123 | 
124 | - Embeddings can reduce barriers to entry for training custom models with less data, setup, and compute.
125 | - Embeddings can allow for quick evaluation.
126 | 
127 | ### Limitations
128 | 
129 | - The model was trained using only data from the US and India and may not generalize well to data from other countries, patient populations, or manufacturers not used in training.
130 | - The model is only used to generate embeddings of the user-owned dataset. It does not generate any predictions or diagnosis on its own.
131 | 
132 | ### Ethical Considerations
133 | 
134 | - Risk: Although Google does not store permanently any data sent to this model, it is the data owner's responsibility to ensure that Personally identifiable information (PII) and Protected Health Information (PHI) are removed prior to being sent to the model. \
135 | - Mitigation Strategy: Do not send data containing PII or PHI.
136 | 


--------------------------------------------------------------------------------
/ct-foundation/LICENSE:
--------------------------------------------------------------------------------
  1 | 
  2 |                                  Apache License
  3 |                            Version 2.0, January 2004
  4 |                         http://www.apache.org/licenses/
  5 | 
  6 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  7 | 
  8 |    1. Definitions.
  9 | 
 10 |       "License" shall mean the terms and conditions for use, reproduction,
 11 |       and distribution as defined by Sections 1 through 9 of this document.
 12 | 
 13 |       "Licensor" shall mean the copyright owner or entity authorized by
 14 |       the copyright owner that is granting the License.
 15 | 
 16 |       "Legal Entity" shall mean the union of the acting entity and all
 17 |       other entities that control, are controlled by, or are under common
 18 |       control with that entity. For the purposes of this definition,
 19 |       "control" means (i) the power, direct or indirect, to cause the
 20 |       direction or management of such entity, whether by contract or
 21 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 22 |       outstanding shares, or (iii) beneficial ownership of such entity.
 23 | 
 24 |       "You" (or "Your") shall mean an individual or Legal Entity
 25 |       exercising permissions granted by this License.
 26 | 
 27 |       "Source" form shall mean the preferred form for making modifications,
 28 |       including but not limited to software source code, documentation
 29 |       source, and configuration files.
 30 | 
 31 |       "Object" form shall mean any form resulting from mechanical
 32 |       transformation or translation of a Source form, including but
 33 |       not limited to compiled object code, generated documentation,
 34 |       and conversions to other media types.
 35 | 
 36 |       "Work" shall mean the work of authorship, whether in Source or
 37 |       Object form, made available under the License, as indicated by a
 38 |       copyright notice that is included in or attached to the work
 39 |       (an example is provided in the Appendix below).
 40 | 
 41 |       "Derivative Works" shall mean any work, whether in Source or Object
 42 |       form, that is based on (or derived from) the Work and for which the
 43 |       editorial revisions, annotations, elaborations, or other modifications
 44 |       represent, as a whole, an original work of authorship. For the purposes
 45 |       of this License, Derivative Works shall not include works that remain
 46 |       separable from, or merely link (or bind by name) to the interfaces of,
 47 |       the Work and Derivative Works thereof.
 48 | 
 49 |       "Contribution" shall mean any work of authorship, including
 50 |       the original version of the Work and any modifications or additions
 51 |       to that Work or Derivative Works thereof, that is intentionally
 52 |       submitted to Licensor for inclusion in the Work by the copyright owner
 53 |       or by an individual or Legal Entity authorized to submit on behalf of
 54 |       the copyright owner. For the purposes of this definition, "submitted"
 55 |       means any form of electronic, verbal, or written communication sent
 56 |       to the Licensor or its representatives, including but not limited to
 57 |       communication on electronic mailing lists, source code control systems,
 58 |       and issue tracking systems that are managed by, or on behalf of, the
 59 |       Licensor for the purpose of discussing and improving the Work, but
 60 |       excluding communication that is conspicuously marked or otherwise
 61 |       designated in writing by the copyright owner as "Not a Contribution."
 62 | 
 63 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 64 |       on behalf of whom a Contribution has been received by Licensor and
 65 |       subsequently incorporated within the Work.
 66 | 
 67 |    2. Grant of Copyright License. Subject to the terms and conditions of
 68 |       this License, each Contributor hereby grants to You a perpetual,
 69 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 70 |       copyright license to reproduce, prepare Derivative Works of,
 71 |       publicly display, publicly perform, sublicense, and distribute the
 72 |       Work and such Derivative Works in Source or Object form.
 73 | 
 74 |    3. Grant of Patent License. Subject to the terms and conditions of
 75 |       this License, each Contributor hereby grants to You a perpetual,
 76 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 77 |       (except as stated in this section) patent license to make, have made,
 78 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 79 |       where such license applies only to those patent claims licensable
 80 |       by such Contributor that are necessarily infringed by their
 81 |       Contribution(s) alone or by combination of their Contribution(s)
 82 |       with the Work to which such Contribution(s) was submitted. If You
 83 |       institute patent litigation against any entity (including a
 84 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 85 |       or a Contribution incorporated within the Work constitutes direct
 86 |       or contributory patent infringement, then any patent licenses
 87 |       granted to You under this License for that Work shall terminate
 88 |       as of the date such litigation is filed.
 89 | 
 90 |    4. Redistribution. You may reproduce and distribute copies of the
 91 |       Work or Derivative Works thereof in any medium, with or without
 92 |       modifications, and in Source or Object form, provided that You
 93 |       meet the following conditions:
 94 | 
 95 |       (a) You must give any other recipients of the Work or
 96 |           Derivative Works a copy of this License; and
 97 | 
 98 |       (b) You must cause any modified files to carry prominent notices
 99 |           stating that You changed the files; and
100 | 
101 |       (c) You must retain, in the Source form of any Derivative Works
102 |           that You distribute, all copyright, patent, trademark, and
103 |           attribution notices from the Source form of the Work,
104 |           excluding those notices that do not pertain to any part of
105 |           the Derivative Works; and
106 | 
107 |       (d) If the Work includes a "NOTICE" text file as part of its
108 |           distribution, then any Derivative Works that You distribute must
109 |           include a readable copy of the attribution notices contained
110 |           within such NOTICE file, excluding those notices that do not
111 |           pertain to any part of the Derivative Works, in at least one
112 |           of the following places: within a NOTICE text file distributed
113 |           as part of the Derivative Works; within the Source form or
114 |           documentation, if provided along with the Derivative Works; or,
115 |           within a display generated by the Derivative Works, if and
116 |           wherever such third-party notices normally appear. The contents
117 |           of the NOTICE file are for informational purposes only and
118 |           do not modify the License. You may add Your own attribution
119 |           notices within Derivative Works that You distribute, alongside
120 |           or as an addendum to the NOTICE text from the Work, provided
121 |           that such additional attribution notices cannot be construed
122 |           as modifying the License.
123 | 
124 |       You may add Your own copyright statement to Your modifications and
125 |       may provide additional or different license terms and conditions
126 |       for use, reproduction, or distribution of Your modifications, or
127 |       for any such Derivative Works as a whole, provided Your use,
128 |       reproduction, and distribution of the Work otherwise complies with
129 |       the conditions stated in this License.
130 | 
131 |    5. Submission of Contributions. Unless You explicitly state otherwise,
132 |       any Contribution intentionally submitted for inclusion in the Work
133 |       by You to the Licensor shall be under the terms and conditions of
134 |       this License, without any additional terms or conditions.
135 |       Notwithstanding the above, nothing herein shall supersede or modify
136 |       the terms of any separate license agreement you may have executed
137 |       with Licensor regarding such Contributions.
138 | 
139 |    6. Trademarks. This License does not grant permission to use the trade
140 |       names, trademarks, service marks, or product names of the Licensor,
141 |       except as required for reasonable and customary use in describing the
142 |       origin of the Work and reproducing the content of the NOTICE file.
143 | 
144 |    7. Disclaimer of Warranty. Unless required by applicable law or
145 |       agreed to in writing, Licensor provides the Work (and each
146 |       Contributor provides its Contributions) on an "AS IS" BASIS,
147 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 |       implied, including, without limitation, any warranties or conditions
149 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 |       PARTICULAR PURPOSE. You are solely responsible for determining the
151 |       appropriateness of using or redistributing the Work and assume any
152 |       risks associated with Your exercise of permissions under this License.
153 | 
154 |    8. Limitation of Liability. In no event and under no legal theory,
155 |       whether in tort (including negligence), contract, or otherwise,
156 |       unless required by applicable law (such as deliberate and grossly
157 |       negligent acts) or agreed to in writing, shall any Contributor be
158 |       liable to You for damages, including any direct, indirect, special,
159 |       incidental, or consequential damages of any character arising as a
160 |       result of this License or out of the use or inability to use the
161 |       Work (including but not limited to damages for loss of goodwill,
162 |       work stoppage, computer failure or malfunction, or any and all
163 |       other commercial damages or losses), even if such Contributor
164 |       has been advised of the possibility of such damages.
165 | 
166 |    9. Accepting Warranty or Additional Liability. While redistributing
167 |       the Work or Derivative Works thereof, You may choose to offer,
168 |       and charge a fee for, acceptance of support, warranty, indemnity,
169 |       or other liability obligations and/or rights consistent with this
170 |       License. However, in accepting such obligations, You may act only
171 |       on Your own behalf and on Your sole responsibility, not on behalf
172 |       of any other Contributor, and only if You agree to indemnify,
173 |       defend, and hold each Contributor harmless for any liability
174 |       incurred by, or claims asserted against, such Contributor by reason
175 |       of your accepting any such warranty or additional liability.
176 | 
177 |    END OF TERMS AND CONDITIONS
178 | 
179 |    APPENDIX: How to apply the Apache License to your work.
180 | 
181 |       To apply the Apache License to your work, attach the following
182 |       boilerplate notice, with the fields enclosed by brackets "[]"
183 |       replaced with your own identifying information. (Don't include
184 |       the brackets!)  The text should be enclosed in the appropriate
185 |       comment syntax for the file format. We also recommend that a
186 |       file or class name and description of purpose be included on the
187 |       same "printed page" as the copyright notice for easier
188 |       identification within third-party archives.
189 | 
190 |    Copyright [yyyy] [name of copyright owner]
191 | 
192 |    Licensed under the Apache License, Version 2.0 (the "License");
193 |    you may not use this file except in compliance with the License.
194 |    You may obtain a copy of the License at
195 | 
196 |        http://www.apache.org/licenses/LICENSE-2.0
197 | 
198 |    Unless required by applicable law or agreed to in writing, software
199 |    distributed under the License is distributed on an "AS IS" BASIS,
200 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 |    See the License for the specific language governing permissions and
202 |    limitations under the License.
203 | 


--------------------------------------------------------------------------------
/cxr-foundation/LICENSE:
--------------------------------------------------------------------------------
  1 | 
  2 |                                  Apache License
  3 |                            Version 2.0, January 2004
  4 |                         http://www.apache.org/licenses/
  5 | 
  6 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  7 | 
  8 |    1. Definitions.
  9 | 
 10 |       "License" shall mean the terms and conditions for use, reproduction,
 11 |       and distribution as defined by Sections 1 through 9 of this document.
 12 | 
 13 |       "Licensor" shall mean the copyright owner or entity authorized by
 14 |       the copyright owner that is granting the License.
 15 | 
 16 |       "Legal Entity" shall mean the union of the acting entity and all
 17 |       other entities that control, are controlled by, or are under common
 18 |       control with that entity. For the purposes of this definition,
 19 |       "control" means (i) the power, direct or indirect, to cause the
 20 |       direction or management of such entity, whether by contract or
 21 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 22 |       outstanding shares, or (iii) beneficial ownership of such entity.
 23 | 
 24 |       "You" (or "Your") shall mean an individual or Legal Entity
 25 |       exercising permissions granted by this License.
 26 | 
 27 |       "Source" form shall mean the preferred form for making modifications,
 28 |       including but not limited to software source code, documentation
 29 |       source, and configuration files.
 30 | 
 31 |       "Object" form shall mean any form resulting from mechanical
 32 |       transformation or translation of a Source form, including but
 33 |       not limited to compiled object code, generated documentation,
 34 |       and conversions to other media types.
 35 | 
 36 |       "Work" shall mean the work of authorship, whether in Source or
 37 |       Object form, made available under the License, as indicated by a
 38 |       copyright notice that is included in or attached to the work
 39 |       (an example is provided in the Appendix below).
 40 | 
 41 |       "Derivative Works" shall mean any work, whether in Source or Object
 42 |       form, that is based on (or derived from) the Work and for which the
 43 |       editorial revisions, annotations, elaborations, or other modifications
 44 |       represent, as a whole, an original work of authorship. For the purposes
 45 |       of this License, Derivative Works shall not include works that remain
 46 |       separable from, or merely link (or bind by name) to the interfaces of,
 47 |       the Work and Derivative Works thereof.
 48 | 
 49 |       "Contribution" shall mean any work of authorship, including
 50 |       the original version of the Work and any modifications or additions
 51 |       to that Work or Derivative Works thereof, that is intentionally
 52 |       submitted to Licensor for inclusion in the Work by the copyright owner
 53 |       or by an individual or Legal Entity authorized to submit on behalf of
 54 |       the copyright owner. For the purposes of this definition, "submitted"
 55 |       means any form of electronic, verbal, or written communication sent
 56 |       to the Licensor or its representatives, including but not limited to
 57 |       communication on electronic mailing lists, source code control systems,
 58 |       and issue tracking systems that are managed by, or on behalf of, the
 59 |       Licensor for the purpose of discussing and improving the Work, but
 60 |       excluding communication that is conspicuously marked or otherwise
 61 |       designated in writing by the copyright owner as "Not a Contribution."
 62 | 
 63 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 64 |       on behalf of whom a Contribution has been received by Licensor and
 65 |       subsequently incorporated within the Work.
 66 | 
 67 |    2. Grant of Copyright License. Subject to the terms and conditions of
 68 |       this License, each Contributor hereby grants to You a perpetual,
 69 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 70 |       copyright license to reproduce, prepare Derivative Works of,
 71 |       publicly display, publicly perform, sublicense, and distribute the
 72 |       Work and such Derivative Works in Source or Object form.
 73 | 
 74 |    3. Grant of Patent License. Subject to the terms and conditions of
 75 |       this License, each Contributor hereby grants to You a perpetual,
 76 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 77 |       (except as stated in this section) patent license to make, have made,
 78 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 79 |       where such license applies only to those patent claims licensable
 80 |       by such Contributor that are necessarily infringed by their
 81 |       Contribution(s) alone or by combination of their Contribution(s)
 82 |       with the Work to which such Contribution(s) was submitted. If You
 83 |       institute patent litigation against any entity (including a
 84 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 85 |       or a Contribution incorporated within the Work constitutes direct
 86 |       or contributory patent infringement, then any patent licenses
 87 |       granted to You under this License for that Work shall terminate
 88 |       as of the date such litigation is filed.
 89 | 
 90 |    4. Redistribution. You may reproduce and distribute copies of the
 91 |       Work or Derivative Works thereof in any medium, with or without
 92 |       modifications, and in Source or Object form, provided that You
 93 |       meet the following conditions:
 94 | 
 95 |       (a) You must give any other recipients of the Work or
 96 |           Derivative Works a copy of this License; and
 97 | 
 98 |       (b) You must cause any modified files to carry prominent notices
 99 |           stating that You changed the files; and
100 | 
101 |       (c) You must retain, in the Source form of any Derivative Works
102 |           that You distribute, all copyright, patent, trademark, and
103 |           attribution notices from the Source form of the Work,
104 |           excluding those notices that do not pertain to any part of
105 |           the Derivative Works; and
106 | 
107 |       (d) If the Work includes a "NOTICE" text file as part of its
108 |           distribution, then any Derivative Works that You distribute must
109 |           include a readable copy of the attribution notices contained
110 |           within such NOTICE file, excluding those notices that do not
111 |           pertain to any part of the Derivative Works, in at least one
112 |           of the following places: within a NOTICE text file distributed
113 |           as part of the Derivative Works; within the Source form or
114 |           documentation, if provided along with the Derivative Works; or,
115 |           within a display generated by the Derivative Works, if and
116 |           wherever such third-party notices normally appear. The contents
117 |           of the NOTICE file are for informational purposes only and
118 |           do not modify the License. You may add Your own attribution
119 |           notices within Derivative Works that You distribute, alongside
120 |           or as an addendum to the NOTICE text from the Work, provided
121 |           that such additional attribution notices cannot be construed
122 |           as modifying the License.
123 | 
124 |       You may add Your own copyright statement to Your modifications and
125 |       may provide additional or different license terms and conditions
126 |       for use, reproduction, or distribution of Your modifications, or
127 |       for any such Derivative Works as a whole, provided Your use,
128 |       reproduction, and distribution of the Work otherwise complies with
129 |       the conditions stated in this License.
130 | 
131 |    5. Submission of Contributions. Unless You explicitly state otherwise,
132 |       any Contribution intentionally submitted for inclusion in the Work
133 |       by You to the Licensor shall be under the terms and conditions of
134 |       this License, without any additional terms or conditions.
135 |       Notwithstanding the above, nothing herein shall supersede or modify
136 |       the terms of any separate license agreement you may have executed
137 |       with Licensor regarding such Contributions.
138 | 
139 |    6. Trademarks. This License does not grant permission to use the trade
140 |       names, trademarks, service marks, or product names of the Licensor,
141 |       except as required for reasonable and customary use in describing the
142 |       origin of the Work and reproducing the content of the NOTICE file.
143 | 
144 |    7. Disclaimer of Warranty. Unless required by applicable law or
145 |       agreed to in writing, Licensor provides the Work (and each
146 |       Contributor provides its Contributions) on an "AS IS" BASIS,
147 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 |       implied, including, without limitation, any warranties or conditions
149 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 |       PARTICULAR PURPOSE. You are solely responsible for determining the
151 |       appropriateness of using or redistributing the Work and assume any
152 |       risks associated with Your exercise of permissions under this License.
153 | 
154 |    8. Limitation of Liability. In no event and under no legal theory,
155 |       whether in tort (including negligence), contract, or otherwise,
156 |       unless required by applicable law (such as deliberate and grossly
157 |       negligent acts) or agreed to in writing, shall any Contributor be
158 |       liable to You for damages, including any direct, indirect, special,
159 |       incidental, or consequential damages of any character arising as a
160 |       result of this License or out of the use or inability to use the
161 |       Work (including but not limited to damages for loss of goodwill,
162 |       work stoppage, computer failure or malfunction, or any and all
163 |       other commercial damages or losses), even if such Contributor
164 |       has been advised of the possibility of such damages.
165 | 
166 |    9. Accepting Warranty or Additional Liability. While redistributing
167 |       the Work or Derivative Works thereof, You may choose to offer,
168 |       and charge a fee for, acceptance of support, warranty, indemnity,
169 |       or other liability obligations and/or rights consistent with this
170 |       License. However, in accepting such obligations, You may act only
171 |       on Your own behalf and on Your sole responsibility, not on behalf
172 |       of any other Contributor, and only if You agree to indemnify,
173 |       defend, and hold each Contributor harmless for any liability
174 |       incurred by, or claims asserted against, such Contributor by reason
175 |       of your accepting any such warranty or additional liability.
176 | 
177 |    END OF TERMS AND CONDITIONS
178 | 
179 |    APPENDIX: How to apply the Apache License to your work.
180 | 
181 |       To apply the Apache License to your work, attach the following
182 |       boilerplate notice, with the fields enclosed by brackets "[]"
183 |       replaced with your own identifying information. (Don't include
184 |       the brackets!)  The text should be enclosed in the appropriate
185 |       comment syntax for the file format. We also recommend that a
186 |       file or class name and description of purpose be included on the
187 |       same "printed page" as the copyright notice for easier
188 |       identification within third-party archives.
189 | 
190 |    Copyright [yyyy] [name of copyright owner]
191 | 
192 |    Licensed under the Apache License, Version 2.0 (the "License");
193 |    you may not use this file except in compliance with the License.
194 |    You may obtain a copy of the License at
195 | 
196 |        http://www.apache.org/licenses/LICENSE-2.0
197 | 
198 |    Unless required by applicable law or agreed to in writing, software
199 |    distributed under the License is distributed on an "AS IS" BASIS,
200 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 |    See the License for the specific language governing permissions and
202 |    limitations under the License.
203 | 


--------------------------------------------------------------------------------
/derm-foundation/LICENSE:
--------------------------------------------------------------------------------
  1 | 
  2 |                                  Apache License
  3 |                            Version 2.0, January 2004
  4 |                         http://www.apache.org/licenses/
  5 | 
  6 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  7 | 
  8 |    1. Definitions.
  9 | 
 10 |       "License" shall mean the terms and conditions for use, reproduction,
 11 |       and distribution as defined by Sections 1 through 9 of this document.
 12 | 
 13 |       "Licensor" shall mean the copyright owner or entity authorized by
 14 |       the copyright owner that is granting the License.
 15 | 
 16 |       "Legal Entity" shall mean the union of the acting entity and all
 17 |       other entities that control, are controlled by, or are under common
 18 |       control with that entity. For the purposes of this definition,
 19 |       "control" means (i) the power, direct or indirect, to cause the
 20 |       direction or management of such entity, whether by contract or
 21 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 22 |       outstanding shares, or (iii) beneficial ownership of such entity.
 23 | 
 24 |       "You" (or "Your") shall mean an individual or Legal Entity
 25 |       exercising permissions granted by this License.
 26 | 
 27 |       "Source" form shall mean the preferred form for making modifications,
 28 |       including but not limited to software source code, documentation
 29 |       source, and configuration files.
 30 | 
 31 |       "Object" form shall mean any form resulting from mechanical
 32 |       transformation or translation of a Source form, including but
 33 |       not limited to compiled object code, generated documentation,
 34 |       and conversions to other media types.
 35 | 
 36 |       "Work" shall mean the work of authorship, whether in Source or
 37 |       Object form, made available under the License, as indicated by a
 38 |       copyright notice that is included in or attached to the work
 39 |       (an example is provided in the Appendix below).
 40 | 
 41 |       "Derivative Works" shall mean any work, whether in Source or Object
 42 |       form, that is based on (or derived from) the Work and for which the
 43 |       editorial revisions, annotations, elaborations, or other modifications
 44 |       represent, as a whole, an original work of authorship. For the purposes
 45 |       of this License, Derivative Works shall not include works that remain
 46 |       separable from, or merely link (or bind by name) to the interfaces of,
 47 |       the Work and Derivative Works thereof.
 48 | 
 49 |       "Contribution" shall mean any work of authorship, including
 50 |       the original version of the Work and any modifications or additions
 51 |       to that Work or Derivative Works thereof, that is intentionally
 52 |       submitted to Licensor for inclusion in the Work by the copyright owner
 53 |       or by an individual or Legal Entity authorized to submit on behalf of
 54 |       the copyright owner. For the purposes of this definition, "submitted"
 55 |       means any form of electronic, verbal, or written communication sent
 56 |       to the Licensor or its representatives, including but not limited to
 57 |       communication on electronic mailing lists, source code control systems,
 58 |       and issue tracking systems that are managed by, or on behalf of, the
 59 |       Licensor for the purpose of discussing and improving the Work, but
 60 |       excluding communication that is conspicuously marked or otherwise
 61 |       designated in writing by the copyright owner as "Not a Contribution."
 62 | 
 63 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 64 |       on behalf of whom a Contribution has been received by Licensor and
 65 |       subsequently incorporated within the Work.
 66 | 
 67 |    2. Grant of Copyright License. Subject to the terms and conditions of
 68 |       this License, each Contributor hereby grants to You a perpetual,
 69 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 70 |       copyright license to reproduce, prepare Derivative Works of,
 71 |       publicly display, publicly perform, sublicense, and distribute the
 72 |       Work and such Derivative Works in Source or Object form.
 73 | 
 74 |    3. Grant of Patent License. Subject to the terms and conditions of
 75 |       this License, each Contributor hereby grants to You a perpetual,
 76 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 77 |       (except as stated in this section) patent license to make, have made,
 78 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 79 |       where such license applies only to those patent claims licensable
 80 |       by such Contributor that are necessarily infringed by their
 81 |       Contribution(s) alone or by combination of their Contribution(s)
 82 |       with the Work to which such Contribution(s) was submitted. If You
 83 |       institute patent litigation against any entity (including a
 84 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 85 |       or a Contribution incorporated within the Work constitutes direct
 86 |       or contributory patent infringement, then any patent licenses
 87 |       granted to You under this License for that Work shall terminate
 88 |       as of the date such litigation is filed.
 89 | 
 90 |    4. Redistribution. You may reproduce and distribute copies of the
 91 |       Work or Derivative Works thereof in any medium, with or without
 92 |       modifications, and in Source or Object form, provided that You
 93 |       meet the following conditions:
 94 | 
 95 |       (a) You must give any other recipients of the Work or
 96 |           Derivative Works a copy of this License; and
 97 | 
 98 |       (b) You must cause any modified files to carry prominent notices
 99 |           stating that You changed the files; and
100 | 
101 |       (c) You must retain, in the Source form of any Derivative Works
102 |           that You distribute, all copyright, patent, trademark, and
103 |           attribution notices from the Source form of the Work,
104 |           excluding those notices that do not pertain to any part of
105 |           the Derivative Works; and
106 | 
107 |       (d) If the Work includes a "NOTICE" text file as part of its
108 |           distribution, then any Derivative Works that You distribute must
109 |           include a readable copy of the attribution notices contained
110 |           within such NOTICE file, excluding those notices that do not
111 |           pertain to any part of the Derivative Works, in at least one
112 |           of the following places: within a NOTICE text file distributed
113 |           as part of the Derivative Works; within the Source form or
114 |           documentation, if provided along with the Derivative Works; or,
115 |           within a display generated by the Derivative Works, if and
116 |           wherever such third-party notices normally appear. The contents
117 |           of the NOTICE file are for informational purposes only and
118 |           do not modify the License. You may add Your own attribution
119 |           notices within Derivative Works that You distribute, alongside
120 |           or as an addendum to the NOTICE text from the Work, provided
121 |           that such additional attribution notices cannot be construed
122 |           as modifying the License.
123 | 
124 |       You may add Your own copyright statement to Your modifications and
125 |       may provide additional or different license terms and conditions
126 |       for use, reproduction, or distribution of Your modifications, or
127 |       for any such Derivative Works as a whole, provided Your use,
128 |       reproduction, and distribution of the Work otherwise complies with
129 |       the conditions stated in this License.
130 | 
131 |    5. Submission of Contributions. Unless You explicitly state otherwise,
132 |       any Contribution intentionally submitted for inclusion in the Work
133 |       by You to the Licensor shall be under the terms and conditions of
134 |       this License, without any additional terms or conditions.
135 |       Notwithstanding the above, nothing herein shall supersede or modify
136 |       the terms of any separate license agreement you may have executed
137 |       with Licensor regarding such Contributions.
138 | 
139 |    6. Trademarks. This License does not grant permission to use the trade
140 |       names, trademarks, service marks, or product names of the Licensor,
141 |       except as required for reasonable and customary use in describing the
142 |       origin of the Work and reproducing the content of the NOTICE file.
143 | 
144 |    7. Disclaimer of Warranty. Unless required by applicable law or
145 |       agreed to in writing, Licensor provides the Work (and each
146 |       Contributor provides its Contributions) on an "AS IS" BASIS,
147 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 |       implied, including, without limitation, any warranties or conditions
149 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 |       PARTICULAR PURPOSE. You are solely responsible for determining the
151 |       appropriateness of using or redistributing the Work and assume any
152 |       risks associated with Your exercise of permissions under this License.
153 | 
154 |    8. Limitation of Liability. In no event and under no legal theory,
155 |       whether in tort (including negligence), contract, or otherwise,
156 |       unless required by applicable law (such as deliberate and grossly
157 |       negligent acts) or agreed to in writing, shall any Contributor be
158 |       liable to You for damages, including any direct, indirect, special,
159 |       incidental, or consequential damages of any character arising as a
160 |       result of this License or out of the use or inability to use the
161 |       Work (including but not limited to damages for loss of goodwill,
162 |       work stoppage, computer failure or malfunction, or any and all
163 |       other commercial damages or losses), even if such Contributor
164 |       has been advised of the possibility of such damages.
165 | 
166 |    9. Accepting Warranty or Additional Liability. While redistributing
167 |       the Work or Derivative Works thereof, You may choose to offer,
168 |       and charge a fee for, acceptance of support, warranty, indemnity,
169 |       or other liability obligations and/or rights consistent with this
170 |       License. However, in accepting such obligations, You may act only
171 |       on Your own behalf and on Your sole responsibility, not on behalf
172 |       of any other Contributor, and only if You agree to indemnify,
173 |       defend, and hold each Contributor harmless for any liability
174 |       incurred by, or claims asserted against, such Contributor by reason
175 |       of your accepting any such warranty or additional liability.
176 | 
177 |    END OF TERMS AND CONDITIONS
178 | 
179 |    APPENDIX: How to apply the Apache License to your work.
180 | 
181 |       To apply the Apache License to your work, attach the following
182 |       boilerplate notice, with the fields enclosed by brackets "[]"
183 |       replaced with your own identifying information. (Don't include
184 |       the brackets!)  The text should be enclosed in the appropriate
185 |       comment syntax for the file format. We also recommend that a
186 |       file or class name and description of purpose be included on the
187 |       same "printed page" as the copyright notice for easier
188 |       identification within third-party archives.
189 | 
190 |    Copyright [yyyy] [name of copyright owner]
191 | 
192 |    Licensed under the Apache License, Version 2.0 (the "License");
193 |    you may not use this file except in compliance with the License.
194 |    You may obtain a copy of the License at
195 | 
196 |        http://www.apache.org/licenses/LICENSE-2.0
197 | 
198 |    Unless required by applicable law or agreed to in writing, software
199 |    distributed under the License is distributed on an "AS IS" BASIS,
200 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 |    See the License for the specific language governing permissions and
202 |    limitations under the License.
203 | 


--------------------------------------------------------------------------------
/path-foundation/LICENSE:
--------------------------------------------------------------------------------
  1 | 
  2 |                                  Apache License
  3 |                            Version 2.0, January 2004
  4 |                         http://www.apache.org/licenses/
  5 | 
  6 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  7 | 
  8 |    1. Definitions.
  9 | 
 10 |       "License" shall mean the terms and conditions for use, reproduction,
 11 |       and distribution as defined by Sections 1 through 9 of this document.
 12 | 
 13 |       "Licensor" shall mean the copyright owner or entity authorized by
 14 |       the copyright owner that is granting the License.
 15 | 
 16 |       "Legal Entity" shall mean the union of the acting entity and all
 17 |       other entities that control, are controlled by, or are under common
 18 |       control with that entity. For the purposes of this definition,
 19 |       "control" means (i) the power, direct or indirect, to cause the
 20 |       direction or management of such entity, whether by contract or
 21 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 22 |       outstanding shares, or (iii) beneficial ownership of such entity.
 23 | 
 24 |       "You" (or "Your") shall mean an individual or Legal Entity
 25 |       exercising permissions granted by this License.
 26 | 
 27 |       "Source" form shall mean the preferred form for making modifications,
 28 |       including but not limited to software source code, documentation
 29 |       source, and configuration files.
 30 | 
 31 |       "Object" form shall mean any form resulting from mechanical
 32 |       transformation or translation of a Source form, including but
 33 |       not limited to compiled object code, generated documentation,
 34 |       and conversions to other media types.
 35 | 
 36 |       "Work" shall mean the work of authorship, whether in Source or
 37 |       Object form, made available under the License, as indicated by a
 38 |       copyright notice that is included in or attached to the work
 39 |       (an example is provided in the Appendix below).
 40 | 
 41 |       "Derivative Works" shall mean any work, whether in Source or Object
 42 |       form, that is based on (or derived from) the Work and for which the
 43 |       editorial revisions, annotations, elaborations, or other modifications
 44 |       represent, as a whole, an original work of authorship. For the purposes
 45 |       of this License, Derivative Works shall not include works that remain
 46 |       separable from, or merely link (or bind by name) to the interfaces of,
 47 |       the Work and Derivative Works thereof.
 48 | 
 49 |       "Contribution" shall mean any work of authorship, including
 50 |       the original version of the Work and any modifications or additions
 51 |       to that Work or Derivative Works thereof, that is intentionally
 52 |       submitted to Licensor for inclusion in the Work by the copyright owner
 53 |       or by an individual or Legal Entity authorized to submit on behalf of
 54 |       the copyright owner. For the purposes of this definition, "submitted"
 55 |       means any form of electronic, verbal, or written communication sent
 56 |       to the Licensor or its representatives, including but not limited to
 57 |       communication on electronic mailing lists, source code control systems,
 58 |       and issue tracking systems that are managed by, or on behalf of, the
 59 |       Licensor for the purpose of discussing and improving the Work, but
 60 |       excluding communication that is conspicuously marked or otherwise
 61 |       designated in writing by the copyright owner as "Not a Contribution."
 62 | 
 63 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 64 |       on behalf of whom a Contribution has been received by Licensor and
 65 |       subsequently incorporated within the Work.
 66 | 
 67 |    2. Grant of Copyright License. Subject to the terms and conditions of
 68 |       this License, each Contributor hereby grants to You a perpetual,
 69 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 70 |       copyright license to reproduce, prepare Derivative Works of,
 71 |       publicly display, publicly perform, sublicense, and distribute the
 72 |       Work and such Derivative Works in Source or Object form.
 73 | 
 74 |    3. Grant of Patent License. Subject to the terms and conditions of
 75 |       this License, each Contributor hereby grants to You a perpetual,
 76 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 77 |       (except as stated in this section) patent license to make, have made,
 78 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 79 |       where such license applies only to those patent claims licensable
 80 |       by such Contributor that are necessarily infringed by their
 81 |       Contribution(s) alone or by combination of their Contribution(s)
 82 |       with the Work to which such Contribution(s) was submitted. If You
 83 |       institute patent litigation against any entity (including a
 84 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 85 |       or a Contribution incorporated within the Work constitutes direct
 86 |       or contributory patent infringement, then any patent licenses
 87 |       granted to You under this License for that Work shall terminate
 88 |       as of the date such litigation is filed.
 89 | 
 90 |    4. Redistribution. You may reproduce and distribute copies of the
 91 |       Work or Derivative Works thereof in any medium, with or without
 92 |       modifications, and in Source or Object form, provided that You
 93 |       meet the following conditions:
 94 | 
 95 |       (a) You must give any other recipients of the Work or
 96 |           Derivative Works a copy of this License; and
 97 | 
 98 |       (b) You must cause any modified files to carry prominent notices
 99 |           stating that You changed the files; and
100 | 
101 |       (c) You must retain, in the Source form of any Derivative Works
102 |           that You distribute, all copyright, patent, trademark, and
103 |           attribution notices from the Source form of the Work,
104 |           excluding those notices that do not pertain to any part of
105 |           the Derivative Works; and
106 | 
107 |       (d) If the Work includes a "NOTICE" text file as part of its
108 |           distribution, then any Derivative Works that You distribute must
109 |           include a readable copy of the attribution notices contained
110 |           within such NOTICE file, excluding those notices that do not
111 |           pertain to any part of the Derivative Works, in at least one
112 |           of the following places: within a NOTICE text file distributed
113 |           as part of the Derivative Works; within the Source form or
114 |           documentation, if provided along with the Derivative Works; or,
115 |           within a display generated by the Derivative Works, if and
116 |           wherever such third-party notices normally appear. The contents
117 |           of the NOTICE file are for informational purposes only and
118 |           do not modify the License. You may add Your own attribution
119 |           notices within Derivative Works that You distribute, alongside
120 |           or as an addendum to the NOTICE text from the Work, provided
121 |           that such additional attribution notices cannot be construed
122 |           as modifying the License.
123 | 
124 |       You may add Your own copyright statement to Your modifications and
125 |       may provide additional or different license terms and conditions
126 |       for use, reproduction, or distribution of Your modifications, or
127 |       for any such Derivative Works as a whole, provided Your use,
128 |       reproduction, and distribution of the Work otherwise complies with
129 |       the conditions stated in this License.
130 | 
131 |    5. Submission of Contributions. Unless You explicitly state otherwise,
132 |       any Contribution intentionally submitted for inclusion in the Work
133 |       by You to the Licensor shall be under the terms and conditions of
134 |       this License, without any additional terms or conditions.
135 |       Notwithstanding the above, nothing herein shall supersede or modify
136 |       the terms of any separate license agreement you may have executed
137 |       with Licensor regarding such Contributions.
138 | 
139 |    6. Trademarks. This License does not grant permission to use the trade
140 |       names, trademarks, service marks, or product names of the Licensor,
141 |       except as required for reasonable and customary use in describing the
142 |       origin of the Work and reproducing the content of the NOTICE file.
143 | 
144 |    7. Disclaimer of Warranty. Unless required by applicable law or
145 |       agreed to in writing, Licensor provides the Work (and each
146 |       Contributor provides its Contributions) on an "AS IS" BASIS,
147 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 |       implied, including, without limitation, any warranties or conditions
149 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 |       PARTICULAR PURPOSE. You are solely responsible for determining the
151 |       appropriateness of using or redistributing the Work and assume any
152 |       risks associated with Your exercise of permissions under this License.
153 | 
154 |    8. Limitation of Liability. In no event and under no legal theory,
155 |       whether in tort (including negligence), contract, or otherwise,
156 |       unless required by applicable law (such as deliberate and grossly
157 |       negligent acts) or agreed to in writing, shall any Contributor be
158 |       liable to You for damages, including any direct, indirect, special,
159 |       incidental, or consequential damages of any character arising as a
160 |       result of this License or out of the use or inability to use the
161 |       Work (including but not limited to damages for loss of goodwill,
162 |       work stoppage, computer failure or malfunction, or any and all
163 |       other commercial damages or losses), even if such Contributor
164 |       has been advised of the possibility of such damages.
165 | 
166 |    9. Accepting Warranty or Additional Liability. While redistributing
167 |       the Work or Derivative Works thereof, You may choose to offer,
168 |       and charge a fee for, acceptance of support, warranty, indemnity,
169 |       or other liability obligations and/or rights consistent with this
170 |       License. However, in accepting such obligations, You may act only
171 |       on Your own behalf and on Your sole responsibility, not on behalf
172 |       of any other Contributor, and only if You agree to indemnify,
173 |       defend, and hold each Contributor harmless for any liability
174 |       incurred by, or claims asserted against, such Contributor by reason
175 |       of your accepting any such warranty or additional liability.
176 | 
177 |    END OF TERMS AND CONDITIONS
178 | 
179 |    APPENDIX: How to apply the Apache License to your work.
180 | 
181 |       To apply the Apache License to your work, attach the following
182 |       boilerplate notice, with the fields enclosed by brackets "[]"
183 |       replaced with your own identifying information. (Don't include
184 |       the brackets!)  The text should be enclosed in the appropriate
185 |       comment syntax for the file format. We also recommend that a
186 |       file or class name and description of purpose be included on the
187 |       same "printed page" as the copyright notice for easier
188 |       identification within third-party archives.
189 | 
190 |    Copyright [yyyy] [name of copyright owner]
191 | 
192 |    Licensed under the Apache License, Version 2.0 (the "License");
193 |    you may not use this file except in compliance with the License.
194 |    You may obtain a copy of the License at
195 | 
196 |        http://www.apache.org/licenses/LICENSE-2.0
197 | 
198 |    Unless required by applicable law or agreed to in writing, software
199 |    distributed under the License is distributed on an "AS IS" BASIS,
200 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 |    See the License for the specific language governing permissions and
202 |    limitations under the License.
203 | 


--------------------------------------------------------------------------------
/cxr-foundation/cxr_foundation_interactive_demo_deps/cxr.css:
--------------------------------------------------------------------------------
  1 | :root {
  2 |   --text-color: rgb(67 80 102);
  3 |   --body-bg: #f8f9fa;
  4 |   --border: #ced4da;
  5 | }
  6 | 
  7 | body {
  8 |   font-family: 'Roboto', sans-serif;
  9 |   font-size: 16px; 
 10 |   line-height: 1.6; 
 11 |   color: var(--text-color); 
 12 |   background-color: var(--body-bg);
 13 |   -webkit-user-select: none; /* Safari */
 14 |   -moz-user-select: none;    /* Firefox */
 15 |   -ms-user-select: none;     /* Internet Explorer/Edge */
 16 |   user-select: none;         /* Supported by modern browsers */
 17 | }
 18 | 
 19 | .container {
 20 |   display: flex;
 21 |   flex-direction: column;
 22 |   align-items: center;
 23 | }
 24 | 
 25 | h1, h2, h3, h4, h5, h6 {
 26 |   font-family: 'Roboto', sans-serif;
 27 |   font-weight: bold;
 28 |   margin-bottom: 1rem; 
 29 | }
 30 | 
 31 | button {
 32 |   display: inline-block;
 33 |   padding: 0.75rem 1.5rem;
 34 |   font-size: 1rem;
 35 |   text-align: center;
 36 |   text-decoration: none;
 37 |   border-width: 1px;
 38 |   border-radius: 5px;
 39 |   cursor: pointer;
 40 |   transition: background-color 0.3s ease;
 41 | }
 42 | 
 43 | .module {
 44 |   background-color: white;
 45 |   border-radius: 5px;
 46 |   border: 1px solid var(--border);
 47 |   margin: 25px;
 48 |   padding: 15px;
 49 |   width: fit-content;
 50 | display: flex;
 51 |   flex-direction: column;
 52 |   align-items: center;
 53 | }
 54 | 
 55 | .terminology-inputs {
 56 |   display: flex;
 57 |   flex-direction: row;
 58 |   justify-content: flex-start;
 59 | }
 60 | 
 61 | .terminology-label {
 62 |   display: flex;
 63 |   flex-direction: column;
 64 |   padding: 5px;
 65 |   margin: 5px;
 66 |   align-items: center;
 67 | }
 68 | 
 69 | .terminology-label span {
 70 |   font-size: 12px;
 71 | }
 72 | 
 73 | .terminology-label input[type="text"] {
 74 |   margin-top: 5px;
 75 |   padding: 5px;
 76 |   border: 1px solid #ced4da;
 77 |   border-radius: 5px;
 78 |   background-color: #eee;
 79 |   text-align: center;
 80 | }
 81 | 
 82 | .module.dataset {
 83 |     width: 500px
 84 | }
 85 | 
 86 | .module.dataset #loadDatasetButton {
 87 |     padding: 3px;
 88 | }
 89 | 
 90 | #selectFile {
 91 |   display: block;
 92 |   width: 100%;
 93 |   max-width: 500px;
 94 |   padding: 0.75rem 1.5rem;
 95 |   font-size: 1rem;
 96 |   line-height: 1.5;
 97 |   color: #495057;
 98 |   background-color: #fff;
 99 |   border: 1px solid #ced4da;
100 |   border-radius: 5px;
101 |   cursor: pointer;
102 |   transition: background-color 0.3s ease, border-color 0.3s ease;
103 |   margin-top: 30px;
104 | }
105 | 
106 | #selectFile:hover {
107 |   background-color: #e9ecef;
108 |   border-color: #adb5bd;
109 | }
110 | 
111 | #selectFile::-webkit-file-upload-button {
112 |   visibility: hidden;
113 | }
114 | 
115 | #selectFile::before {
116 |   content: 'Choose Files';
117 |   display: inline-block;
118 |   padding: 0.375rem 0.75rem;
119 |   font-size: 1rem;
120 |   font-weight: bold;
121 |   line-height: 1.5;
122 |   color: buttontext;
123 |   background-color: buttonface;
124 |   border: none;
125 |   border-radius: 3px;
126 |   cursor: pointer;
127 |   margin-right: 1rem;
128 | }
129 | 
130 | #selectFile:hover::before {
131 |   background-color: #0056b3;
132 | }
133 | 
134 | h1 { font-size: 2.5rem; }
135 | 
136 | .viewer-container {
137 |   width: 300px;
138 |   height: 300px;
139 |   margin: 3px;
140 | }
141 | 
142 | .viewer-container:has(.popup) {
143 | background-image: linear-gradient(
144 |   45deg,
145 |   black,
146 |   gray
147 | );
148 | 
149 | }
150 | .viewer {
151 |   position: relative;
152 |   width: 300px;
153 |   height: 300px;
154 |   cursor: crosshair;
155 | }
156 | 
157 | .zoomButton {
158 |   position: absolute;
159 |   color: white;
160 |   text-shadow: -1px -1px 0 #000;
161 | 
162 |   width: 15px;
163 |   height: 15px;
164 |   bottom: 10px;
165 |   right: 5px;
166 | }
167 | 
168 | .popup .zoomButton {
169 |     top: 0px;
170 |     right: 0px;
171 |     visibility: hidden;
172 | }
173 | 
174 | .popup .zoomButton::before {
175 |     visibility:visible;
176 |     position: absolute;
177 |     color: white;
178 |     content: '✕';
179 |     z-index: 5;
180 |     right: 5px;
181 | }
182 | 
183 | /* hide zoom buttons of other images */
184 | .childPopup :not(.popup) > .zoomButton {
185 |   display: none;
186 | }
187 | 
188 | #backdrop {
189 |   position: fixed;
190 |   background-color: #000;
191 |   opacity: 0;
192 |   top: 0;
193 |   left: 0;
194 |   width: 100%;
195 |   height: 100%;
196 |   z-index: -1;  
197 | }
198 | 
199 | #backdrop:has(+.childPopup) {
200 |   position: fixed;
201 |   background-color: #000;
202 |   opacity: .6;
203 |   transition: opacity 0.5s ease;
204 |   z-index: 3;
205 | }
206 | 
207 | .popup {
208 |   position: fixed;
209 |   top: 50%;
210 |   left: 50%;
211 |   transform: translate(-50%, -50%) scale(2);
212 |   z-index: 3;
213 | }
214 | 
215 | .viewList {
216 |   display: flex;
217 |   flex-direction: row;
218 |   flex-wrap: wrap;
219 | }
220 | 
221 | 
222 | .badge {
223 |   position: absolute;
224 |   padding: 0px 5px;
225 |   border-radius: 12px;
226 |   color: black;
227 |   font-size: 12px;
228 |   font-weight: bold;
229 |   text-align: center;
230 |   display: none;
231 |   transition: background-color 0.2s ease-in-out, color 0.2s ease-in-out;
232 | }
233 | 
234 | .image-loader-container {
235 |   display: inline-block;
236 | }
237 | 
238 | .badge-in-text {
239 |   position: unset;
240 |   display: inline-flex;
241 |   vertical-align: middle;
242 |   margin: 0 3px;
243 |   white-space: nowrap;
244 |   width: fit-content;
245 | }
246 | 
247 | .badge.ai {
248 |   top: 30px;
249 |   left: 5px;
250 |   background: grey;
251 | }
252 | 
253 | .ai-positive .badge.ai {
254 |   background-color: #f0ad4e;
255 |   display: block;
256 | }
257 | 
258 | .ai-negative .badge.ai {
259 |   background-color: #d3d3d3;
260 |   display: block;
261 | }
262 | 
263 | .ai-negative.positive .badge.ai {
264 |   color: red;
265 | }
266 | 
267 | .negative.ai-positive .badge.ai {
268 |   color: red;
269 | }
270 | 
271 | 
272 | .badge.ai::before {
273 |   content: "AI: ";
274 | }
275 | 
276 | 
277 | .badge.gt {
278 |   top: 5px;
279 |   left: 5px;
280 |   background: grey;
281 | 
282 | }
283 | 
284 | .positive .badge.gt {
285 |   background-color: #f0ad4e;
286 |   display: block;
287 | }
288 | 
289 | .negative .badge.gt {
290 |   background-color: #d3d3d3;
291 |   display: block;
292 | }
293 | 
294 | .badge.gt::before {
295 |   content: "GT: ";
296 | }
297 | 
298 | .badge.dataset {
299 |   top: 3px;
300 |   right: 3px;
301 |   border-radius: 2px;
302 |   color: white;
303 | }
304 | 
305 | .training .badge.dataset {
306 |   background: #5bc0de;
307 |   display: block;
308 | }
309 | 
310 | .training .badge.dataset::before {
311 |   content: "training set";
312 | }
313 | 
314 | .eval .badge.dataset {
315 |   background: #5cb85c;
316 |   display: block;
317 | }      
318 | 
319 | .eval .badge.dataset::before {
320 |   content: "eval set";
321 | }
322 | 
323 | .badge-in-text .badge {
324 |   position: unset;
325 |   border-width: 1px;
326 |   border-color: black;
327 |   border-style: solid;
328 | }
329 | 
330 | .embedding::before {
331 |   content: "✅";
332 |   position: absolute;
333 |   bottom: 10px;
334 |   left: 10px;
335 |   width: 20px;
336 |   height: 20px;
337 |   animation: moveToCorner 1s ease-in-out forwards;
338 |   z-index: 1;
339 |   color: lightyellow;
340 | }
341 | 
342 | @keyframes moveToCorner {
343 |   from {
344 |       bottom: 50%;
345 |       left: 50%;
346 |       scale: 3;
347 |       opacity: 1;
348 |   }
349 |   to {
350 |       bottom: 8px;
351 |       left: 5px;
352 |       scale: 1;
353 |       opacity: 0.7;
354 |   }
355 | }
356 | 
357 | .positive {
358 | position: relative;
359 | }
360 | 
361 | .negative {
362 | position: relative;
363 | }
364 | 
365 | .toggleButton {
366 |   height: 12px;
367 |   position: absolute;
368 |   top: 3px;
369 |   left: 90px;
370 |   cursor: alias;
371 |   font-size: 1rem;
372 | }
373 | 
374 | #toast-container {
375 | position: fixed;
376 | top: 10px;
377 | right: 10px;
378 | z-index: 9999;
379 | }
380 | 
381 | .toast {
382 | background-color: #FF5F6D;
383 | color: white;
384 | padding: 10px 20px;
385 | margin-top: 10px;
386 | border-radius: 5px;
387 | box-shadow: 0 2px 5px rgba(0, 0, 0, 0.1);
388 | opacity: 0;
389 | transition: opacity 0.5s ease-in-out;
390 | }
391 | 
392 | .center-child {
393 |   position: relative;
394 | }
395 | 
396 | .loader {
397 |   position: fixed;
398 |   left: 50%;
399 |   bottom: 2em;
400 |   font-size:48px;
401 |   color: #F00;
402 |   width: 1em;
403 |   height: 1em;
404 |   box-sizing: border-box;
405 |   border-radius: 50%;
406 |   transform: translateX(-50%) rotateX(-60deg) perspective(1000px);
407 | }
408 | .loader:before,
409 | .loader:after {
410 |   content: '';
411 |   display: block;
412 |   position: absolute;
413 |   box-sizing: border-box;
414 |   top: 0;
415 |   left: 0;
416 |   width: inherit;
417 |   height: inherit;
418 |   border-radius: inherit;
419 |   animation: flowerFlow 1s ease-out infinite;
420 | }
421 | .loader:after {
422 |   animation-delay: .4s;
423 | }
424 | 
425 | @keyframes flowerFlow {
426 |   0% {
427 |       opacity: 1;
428 |       transform: rotate(0deg);
429 |       box-shadow: 0 0 0 -.5em currentcolor,
430 |       0 0 0 -.5em currentcolor,
431 |       0 0 0 -.5em currentcolor,
432 |       0 0 0 -.5em currentcolor,
433 |       0 0 0 -.5em currentcolor,
434 |       0 0 0 -.5em currentcolor,
435 |       0 0 0 -.5em currentcolor,
436 |       0 0 0 -.5em currentcolor;
437 |   }
438 |   100% {
439 |       opacity: 0;
440 |       transform: rotate(180deg);
441 |       box-shadow: -1em -1em 0 -.35em currentcolor,
442 |       0 -1.5em 0 -.35em currentcolor,
443 |       1em -1em 0 -.35em currentcolor,
444 |       -1.5em 0 0 -.35em currentcolor,
445 |       1.5em -0 0 -.35em currentcolor,
446 |       -1em 1em 0 -.35em currentcolor,
447 |       0 1.5em 0 -.35em currentcolor,
448 |       1em 1em 0 -.35em currentcolor;
449 |   }
450 | }
451 | 
452 | .thumbnail {
453 |   width: 100px;
454 |   margin: 1px;
455 | }
456 | 
457 | .loading {
458 |   animation: pulse 2.5s infinite;
459 | }
460 | 
461 | @keyframes pulse {
462 |   0%, 100% {
463 |       background-color: white;
464 |   }
465 |   50% {
466 |       background-color: yellow;
467 |   }
468 | }
469 | 
470 | .loaded::before {
471 | content:'[add icon symbol here]';
472 | display:inline-block;
473 | vertical-align: top;
474 | line-height: 1em;
475 | width: 1em;
476 | height:1em;
477 | margin-right: 0.3em;
478 | text-align: center;
479 | color: #999;
480 | 
481 | }
482 | 
483 | .step3 {
484 |   position: fixed;
485 |   right: 0px;
486 |   bottom: 10px;
487 |   background: lightskyblue;
488 |   opacity: 90%;
489 |   padding: 10px;
490 |   border-color: black;
491 |   border-width: 1px;
492 |   border-style: solid;
493 |   z-index: 2;
494 | }
495 | 
496 | .step3-container {
497 |   display: flex;
498 |   justify-content: space-between;
499 |   height: 300px;
500 |   width: 100%;
501 |   flex-direction: row;
502 | }
503 | 
504 | .vflex {
505 |   display: flex;
506 |   flex-direction: column;
507 |   justify-content: space-between;
508 | }
509 | 
510 | #metrics p {
511 |   margin: 0;
512 | }
513 | 
514 | .verticalSeperator {
515 |   width: 10px; margin: 10px 50px; background-color: gray;
516 | }
517 | 
518 | .verticalSeperator:has(+ div[style*="display: none"]) {
519 |   display: none;
520 | }
521 | 
522 | /**** tabs ****/
523 | .tabs {
524 |     width: 100%;
525 | }
526 | 
527 | .tabs .content {
528 |     display: none;
529 |     opacity: 0;
530 |     padding: 20px;
531 |     border-top: 2px solid var(--border);
532 | }
533 | 
534 | .tabs input[type='radio'] {
535 |     width: 0;
536 |     height: 0;
537 |     opacity: 0;
538 | }
539 | 
540 | .tabs label {
541 |     cursor: pointer;
542 |     display: inline-flex;
543 |     justify-content: center;
544 |     align-items: center;
545 |     width: fit-content;
546 |     height: 30px;
547 |     background-color: #eee;
548 |     border-style: solid solid none solid;
549 |     border-width: 2px;
550 |     border-color: transparent;
551 |     border-radius: 5px 5px 0 0;
552 |     padding: 0 4px;
553 | }
554 | 
555 | #tab1:checked+label {
556 |     border-color: var(--border);
557 | }
558 | 
559 | #tab2:checked+label {
560 |     border-color: var(--border);
561 | }
562 | 
563 | #tab1:checked~#content1 {
564 |     display: block;
565 |     width: unset;
566 |     opacity: 1;
567 | }
568 | 
569 | #tab2:checked~#content2 {
570 |     display: block;
571 |     width: unset;
572 |     opacity: 1;
573 | }
574 | 
575 | .fullscreenNote {
576 |   position: fixed;
577 |   top: 5px;
578 |   animation: disappear 0s ease-in 5s forwards;
579 |   height: 100%;
580 |   width: 100%;
581 |   background: rgba(0, 0, 0, 0.6);
582 |   color: yellow;
583 | }
584 | 
585 | @keyframes disappear {
586 |     to {
587 |       opacity: 0;
588 |       height: 0;
589 |       width: 0;
590 |     }
591 | }


--------------------------------------------------------------------------------
/cxr-foundation/cxr_foundation/inference.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | #
  3 | # Copyright 2023 Google LLC
  4 | #
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | #
  9 | #      http://www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # limitations under the License.
 16 | """Collection of functions to generate embeddings."""
 17 | import base64
 18 | import enum
 19 | import io
 20 | import logging
 21 | import os
 22 | from typing import Any, Iterable, Sequence, Union
 23 | 
 24 | from cxr_foundation import constants
 25 | from cxr_foundation import example_generator_lib
 26 | from google.api_core import exceptions
 27 | from google.api_core.client_options import ClientOptions
 28 | from google.api_core.retry import Retry
 29 | from google.cloud import aiplatform
 30 | import numpy as np
 31 | from PIL import Image
 32 | import pydicom
 33 | import tensorflow as tf
 34 | import tensorflow_text as tf_text
 35 | import tensorflow_hub as hub
 36 | 
 37 | _RETRIABLE_TYPES = (
 38 |     exceptions.TooManyRequests,  # HTTP 429
 39 |     exceptions.InternalServerError,  # HTTP 500
 40 |     exceptions.BadGateway,  # HTTP 502
 41 |     exceptions.ServiceUnavailable,  # HTTP 503
 42 |     exceptions.DeadlineExceeded,  # HTTP 504
 43 | )
 44 | 
 45 | _API_ENDPOINT = 'us-central1-aiplatform.googleapis.com'
 46 | _VIEW_POSITION = 'ViewPosition'
 47 | _FRONTAL_VIEW_POSITIONS = ('AP', 'PA')
 48 | 
 49 | _ELIXR_B_RESPONSE_SHAPE = {
 50 |   'img_emb': (32, 768),
 51 |   'all_contrastive_img_emb': (32, 128),
 52 |   'contrastive_txt_emb': (128,),
 53 | }
 54 | _ELIXR_C_RESPONSE_SHAPE = (1, 8, 8, 1376)
 55 | 
 56 | 
 57 | class ModelVersion(enum.Enum):
 58 |   V1 = enum.auto()  # CXR Foundation model V1.
 59 |   V2 = enum.auto()  # Data efficient classification output from 2-stage ELIXR model.
 60 |   V2_CONTRASTIVE = enum.auto() # Contrastive output from 2-stage ELIXR model.
 61 | 
 62 | 
 63 | class InputFileType(enum.Enum):
 64 |   PNG = 'png'
 65 |   DICOM = 'dicom'
 66 | 
 67 |   def __str__(self):
 68 |     return self.value
 69 | 
 70 | 
 71 | class OutputFileType(enum.Enum):
 72 |   TFRECORD = 'tfrecord'
 73 |   NPZ = 'npz'
 74 | 
 75 |   def __str__(self):
 76 |     return self.value
 77 | 
 78 | 
 79 | def _image_id_to_filebase(image_id: str) -> str:
 80 |   filebase, _ = os.path.splitext(os.path.basename(image_id))
 81 |   return filebase
 82 | 
 83 | 
 84 | def _output_file_name(
 85 |     input_file: str, output_dir: str, format: OutputFileType
 86 | ) -> str:
 87 |   filebase = _image_id_to_filebase(input_file)
 88 |   if format == OutputFileType.TFRECORD:
 89 |     return os.path.join(output_dir, f'{filebase}.tfrecord')
 90 |   elif format == OutputFileType.NPZ:
 91 |     return os.path.join(output_dir, f'{filebase}.npz')
 92 |   raise ValueError('Unknown file type.')
 93 | 
 94 | 
 95 | def generate_embeddings(
 96 |     input_files: Iterable[str],
 97 |     output_dir: str,
 98 |     input_type: InputFileType,
 99 |     output_type: OutputFileType,
100 |     overwrite_existing: bool = False,
101 |     model_version: ModelVersion = ModelVersion.V1,
102 | ) -> None:
103 |   """Generate embedding files from a set of input image files.
104 | 
105 |   Parameters
106 |   ----------
107 |   input_files
108 |     The set of image files to generate the embeddings from.
109 |   output_dir
110 |     The directory to write the embedding files to. The output file names will be
111 |     constructed
112 |     from the base name of the input files and the output file type.
113 |   input_type
114 |     The file type of the input images. DICOM or PNG.
115 |   overwrite_existing
116 |     If an output file already exists, whether to overwrite or skip inference.
117 |   model_version
118 |     The CXR foundation model version.
119 | 
120 |   Raises
121 |   ------
122 |     ValueError
123 |       If the `model_version` is unsupported.
124 |   """
125 |   if model_version == ModelVersion.V1:
126 |     embeddings_fn = embeddings_v1
127 |   elif model_version == ModelVersion.V2:
128 |     embeddings_fn = lambda x: embeddings_v2(x, 'img_emb')
129 |   elif model_version == ModelVersion.V2_CONTRASTIVE:
130 |     embeddings_fn = lambda x: embeddings_v2(x, 'all_contrastive_img_emb')
131 |   else:
132 |     raise ValueError('Model version {model_version.name!r} is unsupported.')
133 | 
134 |   for file in input_files:
135 |     output_file = _output_file_name(
136 |         file, output_dir=output_dir, format=output_type
137 |     )
138 | 
139 |     if not overwrite_existing and os.path.exists(output_file):
140 |       logging.info(f'Found existing output file. Skipping: {output_file!r}')
141 |       continue
142 | 
143 |     image_example = create_example_from_image(
144 |         image_file=file, input_type=input_type
145 |     )
146 |     assert constants.IMAGE_KEY in image_example.features.feature
147 | 
148 |     embeddings = embeddings_fn(image_example)
149 | 
150 |     save_embeddings(
151 |         embeddings,
152 |         output_file=output_file,
153 |         format=output_type,
154 |         image_example=image_example,
155 |     )
156 |     logging.info(f'Successfully generated {output_file!r}')
157 | 
158 | 
159 | def embeddings_v1(image_example: tf.train.Example) -> np.ndarray:
160 |   """Create CXR Foundation V1 model embeddings.
161 | 
162 |   Parameters
163 |   ----------
164 |   image_example: TF Example with image bytes.
165 | 
166 |   Returns
167 |   -------
168 |   NumPy array of shape (1376,).
169 |   """
170 |   instance = {
171 |       'b64': base64.b64encode(image_example.SerializeToString()).decode()
172 |   } 
173 |   response = _embeddings_from_service(
174 |       instance,
175 |       constants.ENDPOINT_V1.project_name,
176 |       constants.ENDPOINT_V1.endpoint_location,
177 |       constants.ENDPOINT_V1.endpoint_id,
178 |   )
179 |   assert len(response) == 1
180 |   assert len(response[0]) == 1
181 |   embeddings = np.array(response[0][0], dtype=np.float32)
182 |   assert embeddings.shape == (1376,)
183 |   return embeddings
184 | 
185 | 
186 | def embeddings_v2(image_example: tf.train.Example, fetch_key: str) -> np.ndarray:
187 |   """Create CXR Foundation V2 model embeddings.
188 | 
189 |   This is a two-step process:
190 |   - Query ELIXR-C for a 1x8x8x1376 dimension embedding.
191 |   - Query ELIXR-B with the embedding from the previous step to obtain a semantic
192 |     embedding for the text generation model.
193 | 
194 |   Parameters
195 |   ----------
196 |   image_example: TF Example with image bytes.
197 |   fetch_key: which output to fetch from the inference results.
198 | 
199 |   Returns
200 |   -------
201 |   NumPy array of shape (32, 768). For data efficient learning features. OR
202 |   NumPy array of shape (32, 128). For image-text aligned contrastive features.
203 |   """
204 |   instance = {
205 |       'b64': base64.b64encode(image_example.SerializeToString()).decode()
206 |   }
207 |   elixr_c_response = _embeddings_from_service(
208 |       instance,
209 |       constants.ENDPOINT_V2_C.project_name,
210 |       constants.ENDPOINT_V2_C.endpoint_location,
211 |       constants.ENDPOINT_V2_C.endpoint_id,
212 |   )
213 |   elixr_c_embedding = np.expand_dims(
214 |       np.array(elixr_c_response[0], dtype=np.float32), axis=0
215 |   )
216 |   assert elixr_c_embedding.shape == _ELIXR_C_RESPONSE_SHAPE
217 |   instance = {
218 |     'image_feature': elixr_c_embedding.tolist(),
219 |     'ids': np.zeros((1, 1, 128), dtype=np.int32).tolist(),
220 |     'paddings': np.zeros((1, 1, 128), dtype=np.float32).tolist(),
221 |   }
222 |   elixr_b_response = _embeddings_from_service(
223 |       instance,
224 |       constants.ENDPOINT_V2_B.project_name,
225 |       constants.ENDPOINT_V2_B.endpoint_location,
226 |       constants.ENDPOINT_V2_B.endpoint_id,
227 |   )
228 |   assert len(elixr_b_response) == 1
229 |   assert fetch_key in elixr_b_response[0]
230 |   elixr_b_embedding = np.array(
231 |       elixr_b_response[0][fetch_key], dtype=np.float32
232 |   )
233 |   assert elixr_b_embedding.shape == _ELIXR_B_RESPONSE_SHAPE[fetch_key]
234 |   return elixr_b_embedding
235 | 
236 | 
237 | def tokenize(preprocessor, text):
238 |   out = preprocessor(tf.constant([text]))
239 |   ids = out['input_word_ids'].numpy().astype(np.int32)
240 |   masks = out['input_mask'].numpy().astype(np.float32)
241 |   paddings = 1.0 - masks
242 |   end_token_idx = ids == 102
243 |   ids[end_token_idx] = 0
244 |   paddings[end_token_idx] = 1.0
245 |   ids = np.expand_dims(ids, axis=1)
246 |   paddings = np.expand_dims(paddings, axis=1)
247 |   assert ids.shape == (1, 1, 128)
248 |   assert paddings.shape == (1, 1, 128)
249 |   return ids, paddings
250 | 
251 | 
252 | def generate_elixr_text_embeddings(text):
253 |   preprocessor = hub.KerasLayer(
254 |       "https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3")
255 |   text = text.lower()
256 |   ids, paddings = tokenize(preprocessor, text)
257 |   instance =  {
258 |       # dummy image input
259 |       'image_feature': np.zeros([1, 8, 8, 1376], dtype=np.float32).tolist(),
260 |       'ids': ids.tolist(),
261 |       'paddings': paddings.tolist(),
262 |   }
263 |   response = _embeddings_from_service(
264 |       instance,
265 |       constants.ENDPOINT_V2_B.project_name,
266 |       constants.ENDPOINT_V2_B.endpoint_location,
267 |       constants.ENDPOINT_V2_B.endpoint_id,
268 |   )
269 |   assert len(response) == 1
270 |   assert 'contrastive_txt_emb' in response[0]
271 |   embedding = np.array(
272 |       response[0]['contrastive_txt_emb'], dtype=np.float32
273 |   )
274 |   assert embedding.shape == _ELIXR_B_RESPONSE_SHAPE['contrastive_txt_emb']
275 |   return embedding
276 | 
277 | 
278 | def create_example_from_image(
279 |     image_file: str, input_type: InputFileType
280 | ) -> tf.train.Example:
281 |   """Create a tf.train.Example from an image file."""
282 |   with open(image_file, 'rb') as f:
283 |     if input_type == InputFileType.PNG:
284 |       img = np.asarray(Image.open(io.BytesIO(f.read())).convert('L'))
285 |       return example_generator_lib.png_to_tfexample(img)
286 |     elif input_type == InputFileType.DICOM:
287 |       dicom = pydicom.dcmread(io.BytesIO(f.read()))
288 |       if (
289 |           _VIEW_POSITION in dicom
290 |           and dicom.ViewPosition not in _FRONTAL_VIEW_POSITIONS
291 |       ):
292 |         raise RuntimeError(
293 |             f'DICOM file: {image_file} - view position is not in accepted'
294 |             ' set: ',
295 |             _FRONTAL_VIEW_POSITIONS,
296 |         )
297 |       return example_generator_lib.dicom_to_tfexample(dicom)
298 | 
299 |     raise ValueError('Unknown file type.')
300 | 
301 | 
302 | def _is_retryable(exc):
303 |   return isinstance(exc, _RETRIABLE_TYPES)
304 | 
305 | 
306 | def _embeddings_from_service(
307 |     instance: dict[Any, Any],
308 |     project_name: str,
309 |     location: str,
310 |     endpoint_id: int,
311 | ) -> Any:
312 |   """Returns embeddings from a Vertex (AI Platform) model prediction endpoint.
313 | 
314 |   Parameters
315 |   ----------
316 |   instance
317 |     dict type input instance for prediction.
318 |   project_name
319 |     The GCP project name that hosts embeddings API.
320 |   location
321 |     The GCP Location (Zone) where the model serving end-point is deployed.
322 |   endpoint_id
323 |     The numerical endpoint ID of the embeddings API.
324 | 
325 |   Returns
326 |   ------
327 |   The embeddings generated by the service. Differences in Vertex
328 |   end-point configurations may change the return type. The caller is
329 |   responsible for interpreting this value and extracting the requisite
330 |   data.
331 |   """
332 |   api_client = aiplatform.gapic.PredictionServiceClient(
333 |       client_options=ClientOptions(api_endpoint=_API_ENDPOINT)
334 |   )
335 | 
336 |   endpoint = api_client.endpoint_path(
337 |       project=project_name, location=location, endpoint=endpoint_id
338 |   )
339 |   retry_policy = Retry(predicate=_is_retryable)
340 |   response = api_client.predict(
341 |       endpoint=endpoint, instances=[instance], retry=retry_policy, timeout=60
342 |   )
343 |   return response.predictions
344 | 
345 | 
346 | def save_embeddings(
347 |     embeddings: np.ndarray,
348 |     output_file: str,
349 |     format: OutputFileType,
350 |     image_example: tf.train.Example = None,
351 | ):
352 |   """Save the embeddings values to a numpy or tfrecord file.
353 | 
354 |   Parameters
355 |   ---------
356 |   embeddings
357 |     The vector embeddings values to save
358 |   output_file
359 |     The file path to save to
360 |   format
361 |     The format to save the embeddings to - .npz or .tfrecord.
362 |   image_example
363 |     The original Example generated from the image. This is only required if
364 |     saving as .tfrecord.
365 |   """
366 |   embeddings_array = embeddings.astype(np.float32).flatten()
367 | 
368 |   if format == OutputFileType.NPZ:
369 |     # Keyed by "embedding"
370 |     np.savez(output_file, embedding=embeddings_array)
371 |   elif format == OutputFileType.TFRECORD:
372 |     if image_example is None:
373 |       raise RuntimeError(
374 |           'Missing image_example param required for saving as tfrecord.'
375 |       )
376 | 
377 |     # Add embeddings values to example
378 |     image_example.features.feature[constants.EMBEDDING_KEY].float_list.value[
379 |         :
380 |     ] = embeddings_array
381 | 
382 |     # Remove unnecessary existing fields to prevent serializing them
383 |     for key in (constants.IMAGE_FORMAT_KEY, constants.IMAGE_KEY):
384 |       if key in image_example.features.feature:
385 |         del image_example.features.feature[key]
386 | 
387 |     with tf.io.TFRecordWriter(output_file) as w:
388 |       w.write(image_example.SerializeToString())
389 | 
390 |   else:
391 |     raise ValueError('Unknown file type.')
392 | 


--------------------------------------------------------------------------------
/derm-foundation/README.md:
--------------------------------------------------------------------------------
  1 | # Derm Foundation
  2 | 
  3 | **Derm Foundation** is a tool to generate
  4 | [embeddings](https://developers.google.com/machine-learning/crash-course/embeddings/video-lecture)
  5 | from dermatological images. These embeddings can be used to develop custom
  6 | machine learning models for dermatology use-cases with less data and compute
  7 | compared to traditional model development methods.
  8 | 
  9 | ## How to use the Derm Foundation API
 10 | 
 11 | 1.  Decide if you want to get access as an individual or group. For more information see [Access Options](#access-options)
 12 | 
 13 | 1.  With the individual or group email identity at hand from the previous step,
 14 |     fill out the [API access form](https://forms.gle/VBFuzSJXhQjNmF776).
 15 | 
 16 | 1.  Once access is granted, you’ll be notified via the provided email address
 17 |     and can start using the API.
 18 | 
 19 | 1.  The [Demo Notebook](https://colab.research.google.com/github/Google-Health/imaging-research/blob/master/derm-foundation/derm_foundation_demo.ipynb) shows you how to use the API to
 20 |     train a sample model [with our test data](#use-our-test-data). You can
 21 |     modify the Demo Notebook to train a model using
 22 |     [your own data](#use-your-own-data). This Notebook provides an example of
 23 |     the following steps:
 24 | 
 25 |     *   Generating a temporary access token to grant the API access to images in
 26 |         GCS.
 27 |     *   Calling the API with a given GCS bucket name, GCS object path, and the
 28 |         access token.
 29 |     *   Saving the embedding.
 30 |     *   Using the embeddings to train a simple model.
 31 |     *   Evaluate the results of the model
 32 | 
 33 | 1.  If you need support or have questions, please [contact us](#contact-us).
 34 | 
 35 | ## Use our test data
 36 | 
 37 | Upon gaining access to the API, you'll also have access to publicly available
 38 | data we've curated specifically for testing. This is to help you get started
 39 | with your initial experiments. The default state of the
 40 | [Demo Notebook](https://colab.research.google.com/github/Google-Health/imaging-research/blob/master/derm-foundation/derm_foundation_demo.ipynb) is set to use this test data, which is
 41 | stored in a
 42 | [Cloud Storage (GCS) bucket](https://cloud.google.com/storage/docs/creating-buckets)
 43 | managed by us for your convenience.
 44 | 
 45 | ## Use your own data
 46 | 
 47 | WARNING: You hold responsibility for the data stored in your GCS bucket that you
 48 | use with the API. It's important to comply with all the terms of use any data is subject to.
 49 | 
 50 | NOTE: The [Demo Notebook](https://colab.research.google.com/github/Google-Health/imaging-research/blob/master/derm-foundation/derm_foundation_demo.ipynb) demonstrates how to call the
 51 | API using short-lived access tokens. These tokens provide temporary access to
 52 | the API for processing your images and are specific to the individual running
 53 | the Colab. It's important to note that the API is stateless and does not store
 54 | the images it processes.
 55 | 
 56 | 1.  If you don't have access to an existing
 57 |     [GCP Project](https://cloud.google.com/storage/docs/projects), you need to
 58 |     [create one](https://cloud.google.com/free).
 59 | 
 60 | 1.  [Create a GCS bucket](https://cloud.google.com/storage/docs/creating-buckets)
 61 |     in the above project.
 62 | 
 63 | 1.  On your local machine
 64 |     [install the gcloud SDK](https://cloud.google.com/sdk/docs/install) and
 65 |     [log in](https://cloud.google.com/sdk/gcloud/reference/auth/login):
 66 | 
 67 |     ```
 68 |     gcloud auth application-default login
 69 |     ```
 70 | 
 71 | 1.  From your local machine use the
 72 |     [gcloud storage commands](https://cloud.google.com/storage/docs/gsuti https://cloud.google.com/sdk/gcloud/reference/storage)
 73 |     to transfer images in PNG format to the GCS bucket you set up in the
 74 |     previous step. If you have a large number of files to upload, you may
 75 |     consider using the
 76 |     [`rsync` command](https://cloud.google.com/sdk/gcloud/reference/storage/rsync)
 77 |     instead of `cp`.
 78 | 
 79 |     You should also include a path to a CSV file in gcs_metadata_csv. This CSV should contain a column with the file_names of the images you're uploading, titled by default 'img_id' and a label column for the task you want to train on, titled by default 'diagnostic'. We have set these titles as parameters however so you can adjust them if you like when you adjust the Demo Notebook to match your CSV.
 80 | 
 81 | 1.  Make sure that [the email identity you selected](#how-to-gain-access) has
 82 |     the necessary permissions to view the images. The simplest method is to
 83 |     assign the predefined role of `roles/storage.objectViewer` to the chosen
 84 |     email identity. There are
 85 |     [several ways to do this](https://cloud.google.com/storage/docs/access-control/using-iam-permissions#bucket-add).
 86 |     You should familiarize yourself with
 87 |     [GCS access control](https://cloud.google.com/storage/docs/access-control).
 88 | 
 89 | 1.  Modify the [Demo Notebook](https://colab.research.google.com/github/Google-Health/imaging-research/blob/master/derm-foundation/derm_foundation_demo.ipynb#scrollTo=OxzYsc8NDpwa) and replace the values for:
 90 |     * gcp_project
 91 |     * gcs_bucket_name
 92 |     * gcs_metadata_csv
 93 |     * gcs_image_dir (leave blank if the images are in the root directory); and
 94 |     * label_column (the name of the columns for the label you're training for)
 95 |     * img_join_column (the name of the column you want to join your image files on)
 96 | 
 97 |     With the values from your GCS bucket.
 98 | 
 99 |     Also make sure you uncheck the "gcs_use_precomputed embeddings" flag.
100 | 
101 | ## Access Options
102 | 
103 | You have the option to request access to the API either as
104 | [an individual](#as-an-individual-non-gmail-account) or for [a group](#as-a-group-recommended).
105 | Choose the process that best aligns with your needs. Remember to note the email
106 | identifier for which you will be requesting access. It should be in one of these
107 | formats:
108 | 
109 | *   YOUR-GROUP-NAME@YOUR-DOMAIN
110 | *   INDIVIDUAL-ID@YOUR-DOMAIN
111 | *   INDIVIDUAL-ID@gmail.com
112 | 
113 | ### As a group (recommended)
114 | 
115 | If your organization is a Google Workspace or Google Cloud Platform (GCP)
116 | customer, contact your Google admin and ask them to create a group with the list
117 | of individuals who will be using the API. Let them know that this group is used
118 | for contacting you and also as a security principal for authorizing your access
119 | to the API.
120 | 
121 | ![Create Google Group](img/create-group.png)
122 | 
123 | Otherwise,
124 | [create a free Cloud Identity Account](https://cloud.google.com/identity/docs/set-up-cloud-identity-admin)
125 | for your domain name and in the process become the interim Google admin for your
126 | organization. Visit [Google Admin console](https://admin.google.com/) and create
127 | the above-mentioned group. If your individual identities are unknown to Google,
128 | they will need to follow the process for the [individuals](#as-an-individual)
129 | before you can add them to the group.
130 | 
131 | ### As an individual (non-gmail account)
132 | This section applies for the INDIVIDUAL-ID@YOUR-DOMAIN case (e.g. `person@university.org` or `person@company.com`)
133 | 
134 | If your organization is a Google Workspace or GCP customer, identity federation
135 | is most likely set up between your corporate identity directory and
136 | [Google Identity and Access Management](https://cloud.google.com/security/products/iam)
137 | and therefore individuals already have Google identities in the form of their
138 | corporate emails. Check with your IT department to find out whether identity
139 | federation is already in place or will be established soon.
140 | 
141 | Otherwise,
142 | [create a Google identity based on your email](https://accounts.google.com/signup/v2/webcreateaccount?flowName=GlifWebSignIn&flowEntry=SignUp).
143 | Opt for the "use my current email address instead" option, as shown in the
144 | screen capture below.
145 | 
146 | IMPORTANT: You should choose a password that is different from your corporate
147 | password.
148 | 
149 | ![Create Google Id](img/create-identity.png)
150 | 
151 | ### As an individual (`@gmail.com` account)
152 | 
153 | If you want to sign up as an individual with a gmail account, you can submit the form directly with your gmail address.
154 | 
155 | 
156 | ## General notes
157 | 
158 | *   Google does not keep a copy of any images sent.
159 | *   Google monitors daily query volume and aggregates on a per-user and
160 |     per-organization basis. Access can be revoked if a user or organization
161 |     exceeds a reasonable query volume.
162 | 
163 | ## Contributing
164 | 
165 | See [`CONTRIBUTING.md`](docs/CONTRIBUTING.md) for details.
166 | 
167 | ## License
168 | 
169 | See [`LICENSE`](LICENSE) for details.
170 | 
171 | ## Disclaimer
172 | 
173 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
174 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
175 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
176 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
177 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
178 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
179 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
180 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
181 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
182 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
183 | 
184 | ## Contact us
185 | 
186 | Please reach out to us at
187 | [derm-foundation@google.com](mailto:derm-foundation@google.com]) for issues such
188 | as, but not limited to:
189 | 
190 | -   Seeking technical assistance
191 | -   Providing feedback
192 | -   Requesting permissions for publications
193 | -   Discussing clinical use cases
194 | -   Discussing enterprise requirements such as:
195 |     -   Fitting within strict security perimeters of your organization
196 |     -   Governing your data in GCS
197 |     -   Training and serving custom models at scale on
198 |         [Vertex AI](https://cloud.google.com/vertex-ai?hl=en)
199 | 
200 | # Model Card for Derm Foundation
201 | 
202 | This tool uses an ML model to provide the embedding results. This section
203 | briefly overviews the background and limitations of that model.
204 | 
205 | ## Model Details
206 | 
207 | ### Overview
208 | 
209 | This model generates embeddings for images of dermatological skin conditions.
210 | Embeddings are n-dimensional vectors of floating points representing a
211 | projection of the original image into a compressed feature space capable of
212 | describing image features relevant to differentiating skin conditions and
213 | properties (age, body part, etc.). These embeddings are to be used by
214 | “downstream models” for final tasks such as condition category classification or
215 | body part identification. The model uses the BiT-101x3 architecture
216 | (https://arxiv.org/pdf/1912.11370.pdf). It was trained in two stages. The first
217 | pre-training stage used contrastive learning to train on a large number of
218 | public image-text pairs from the internet. The image component of this
219 | pre-trained model was then fine-tuned for condition classification and a couple
220 | other downstream tasks using a number of clinical datasets (see below).
221 | 
222 | Training Data:
223 | 
224 | *   Base model (pre-training): A large number of health-related image-text pairs
225 |     from the public web
226 | *   SFT (supervised fine-tuned) model: tele-dermatology datasets from the United
227 |     States and Colombia, a skin cancer dataset from Australia, and additional
228 |     public images. The images come from a mix of device types, including images
229 |     from smartphone cameras, other cameras, and dermatoscopes. The images also
230 |     have a mix of image takers; images may have been taken by clinicians during
231 |     consultations or self-captured by patients.
232 | 
233 | ### Version
234 | 
235 | ```
236 | name: v1.0.0
237 | date: 2023-12-19
238 | ```
239 | 
240 | ### Owners
241 | 
242 | ```
243 | derm-foundation@google.com
244 | ```
245 | 
246 | ### Licenses
247 | 
248 | -   See
249 |     [Derm Foundation - Additional Terms of Service](https://forms.gle/VBFuzSJXhQjNmF776).
250 | 
251 | ### References
252 | 
253 | -   BiT: https://arxiv.org/pdf/1912.11370.pdf
254 | -   CLIP: https://arxiv.org/abs/2103.00020
255 | 
256 | ## Considerations
257 | 
258 | ### Use Cases
259 | 
260 | -   Embeddings can reduce barriers to entry for training custom models for
261 |     derm-specific tasks with less data, setup, and compute.
262 | -   Embeddings can allow for quick evaluation.
263 | 
264 | ### Limitations
265 | 
266 | -   The base model was trained using image-text pairs from the public web. These
267 |     images come from a variety of sources but may by noisy or low-quality. The
268 |     SFT (supervised fine-tuned) model was trained data from a limited set of
269 |     countries (United States, Colombia, Australia, public images) and settings
270 |     (mostly clinical). It may not generalize well to data from other countries,
271 |     patient populations, or image types not used in training.
272 | -   The model is only used to generate embeddings of the user-owned dataset. It
273 |     does not generate any predictions or diagnosis on its own.
274 | -   Developers should ensure any downstream model developed using this tool is
275 |     validated to ensure performance is consistent against intended demographics
276 |     e.g., skin tone, age, sex, gender etc.
277 | 
278 | ### Ethical Considerations
279 | 
280 | -   Risk: Although Google does not store permanently any data sent to this
281 |     model, it is the data owner's responsibility to ensure that Personally
282 |     identifiable information (PII) and Protected Health Information (PHI) are
283 |     removed prior to being sent to the model. \
284 | -   Mitigation Strategy: Do not send data containing PII or PHI.
285 | 


--------------------------------------------------------------------------------
/path-foundation/README.md:
--------------------------------------------------------------------------------
  1 | # Path Foundation
  2 | 
  3 | Path Foundation is a tool that enables users to transform pathology images into
  4 | a machine learning representation of the images known as embeddings. Embeddings
  5 | are a list of floating point values that represent a projection of the original
  6 | image into a compressed feature space. This tool utilizes a model trained via
  7 | self-supervised learning (see [model card](#model-card-for-path-foundation-model) below) in order to create embeddings
  8 | for image patches from histopathology whole slide images (WSIs). These
  9 | embeddings can be used to develop custom machine learning models for pathology
 10 | use-cases using less data and compute compared to traditional model development
 11 | methods.
 12 | 
 13 | For more information please see this [video](https://www.youtube.com/watch?v=Q_09Kqv1y1E).
 14 | 
 15 | You can read more about the research and underlying model in our
 16 | manuscript:
 17 | [Domain-specific optimization and diverse evaluation of self-supervised models for histopathology](https://arxiv.org/abs/2310.13259).
 18 | 
 19 | ## How to use the Path Foundation API
 20 | 
 21 | 1.  Decide if you want to get access as an individual or a group. For more information see [Access Options](#access-options)
 22 | 
 23 | 1. With the individual or group email identity at hand from the previous step,
 24 | fill out the [API access form](http://bit.ly/fm-path-access-form).
 25 | 
 26 | 1.  Once access is granted, you’ll be notified via the provided email address
 27 |     and can start using the API.
 28 | 
 29 | 1. The [Demo Colab](https://colab.research.google.com/github/Google-Health/imaging-research/blob/master/path-foundation/linear-classifier-demo.ipynb) shows you how
 30 | to train a sample linear classifier. You can experiment with
 31 | [our sample digitized pathology images & training labels](#use-our-test-data)
 32 | to understand the API, then modify the Colab to use
 33 | [your own data](#use-your-own-data).
 34 | 
 35 | The Colab includes instructions for:
 36 | 
 37 | * Generating training labels in JSON format from masks in PNG format.
 38 | * Generating a temporary access token for the API to read the DICOM images
 39 | from a [Cloud DICOM Store](https://cloud.google.com/healthcare-api/docs/concepts/dicom)
 40 | on behalf of the person running the Colab.
 41 | * Calling the API on WSI DICOMs stored in Cloud DICOM store to generate embeddings
 42 | * Training a linear classifier using these embeddings and training labels from a
 43 | [Cloud Storage (GCS) bucket](https://cloud.google.com/storage) and evaluating the result of this classifier.
 44 | * An alternative way of calling the API to generate embeddings from digital pathology images (JPEG, TIFF or PNG) stored in GCS
 45 | 
 46 | We have a [video walkthrough](https://www.youtube.com/watch?v=Q_09Kqv1y1E) of the demo if you'd like more information.
 47 | [Contact us](#contact) if you find training your custom model is more
 48 | involved and requires more advanced batching. We're happy to help!
 49 | 
 50 | ## Use our test data
 51 | 
 52 | Upon gaining access to the API, you'll also have access to publicly available
 53 | data we've curated specifically for testing on the [Demo Colab](https://colab.research.google.com/github/Google-Health/imaging-research/blob/master/path-foundation/linear-classifier-demo.ipynb). This data comprises of DICOM images stored in a Google Cloud DICOM Store and training labels in PNG and JSON formats in a GCS bucket. The [Demo Colab](https://colab.research.google.com/github/Google-Health/imaging-research/blob/master/path-foundation/linear-classifier-demo.ipynb)
 54 | uses this test data to train a tumor detector.
 55 | 
 56 | ## Use your own data
 57 | 
 58 | WARNING: You hold responsibility for the data stored in your GCS bucket that you
 59 | use with the API. It's important to comply with all the terms of use any data is subject to.
 60 | 
 61 | To use your own data with the API, you will need the following GCP resources:
 62 | * A [GCP Project](https://cloud.google.com/storage/docs/projects)
 63 | * A Cloud DICOM Store in the project for storing digitized pathology images
 64 | * A GCS bucket in the project for storing data in file format (i.e. training
 65 | labels, embeddings, and DICOM files)
 66 | 
 67 | WARNING: While the API can read data from any
 68 | [DICOMweb-compliant](https://www.dicomstandard.org/using/dicomweb) storage
 69 | system, Google Cloud DICOM Store is optimized for the scale and latency required
 70 | for handling
 71 | [digitized pathology images](https://cloud.google.com/healthcare-api/docs/how-tos/dicom-digital-pathology).
 72 | We cannot guarantee the same performance or functionality with other storage
 73 | systems.
 74 | 
 75 | NOTE: The demo Colab demonstrates how to call the API using short-lived access
 76 | tokens. These tokens permit the API to read and process the images on behalf of
 77 | the individual who is running the Colab. It's important to note that the API
 78 | cannot access your data independently. The API processes images when you
 79 | instruct it to using a time-limited access token and does not store the images
 80 | after processing.
 81 | 
 82 | 1. If you don't have access to an existing GCP Project, you will need to
 83 | [create one](https://cloud.google.com/free).
 84 | 
 85 |    1. Follow [these instructions](https://cloud.google.com/storage/docs/creating-buckets)
 86 | to create the GCS bucket.
 87 | 
 88 |    1. Follow [these instructions](https://cloud.google.com/healthcare-api/docs/how-tos/dicom)
 89 | to create a Cloud DICOM Store.
 90 | 
 91 |    1. Use [Google Cloud IAM panel](https://console.cloud.google.com/iam-admin) to
 92 | grant the following permissions to the GCP resources:
 93 | 
 94 |    * Allow the individual running the rest of the steps to manage objects in the
 95 |    GCS bucket by granting them the predefined role `roles/storage.objectAdmin`.
 96 | 
 97 |    * Allow [the identity(ies) who have access to our API](#how-to-gain-access) to:
 98 |       * read training labels and persist embeddings in the GCS bucket by
 99 |       granting them the predefined role `roles/storage.objectAdmin`.
100 |       * read DICOM images from the Cloud DICOM Store by granting them the
101 |       predefined role `roles/healthcare.dicomViewer`.
102 | 
103 | 1. On your local machine
104 | [install the gcloud SDK](https://cloud.google.com/sdk/docs/install) and
105 | [log in](https://cloud.google.com/sdk/gcloud/reference/auth/login):
106 | 
107 |         gcloud auth application-default login
108 | 
109 | 1. From your local machine use the
110 | [gcloud storage commands](https://cloud.google.com/sdk/gcloud/reference/storage)
111 | to transfer training labels in PNG or JSON format and DICOM files to the GCS
112 | bucket. You may use the [`rsync` command](https://cloud.google.com/sdk/gcloud/reference/storage/rsync)
113 | instead of `cp` to handle the large volume of files that's typical for
114 | digitized pathology use cases.
115 | 
116 | 1. Follow [these instructions](https://cloud.google.com/healthcare-api/docs/how-tos/dicom-import-export#gcloud)
117 | to bulk import DICOM files from the GCS bucket to your Cloud DICOM Store.
118 | 
119 | 1. Modify the [Demo Notebook](https://github.com/Google-Health/imaging-research/blob/master/path-foundation/linear-classifier-demo.ipynb) to point to your data:
120 | 
121 |   1 To use your training labels, replace `hai-cd3-foundations-pathology-vault-entry`
122 |   with the name of your GCS bucket.
123 | 
124 |   1 To use your DICOM images, change the the Cloud DICOM Store urls. They take
125 |   the following format:
126 |   `https://healthcare.googleapis.com/v1/projects/YOUR_PROJECT_ID/locations/YOUR_LOCATION/datasets/YOUR_DATASET_ID/dicomStores/YOUR_DICOM_STORE_ID/`. You need to substitute `YOUR_PROJECT_ID` with the project Id you obtained in step
127 |   1 and `YOUR_LOCATION`, `YOUR_DATASET_ID`, `YOUR_DICOM_STORE_ID` from step 3.
128 | 
129 | ## Access Options
130 | 
131 | You have the option to request access to the API either as
132 | [an individual](#as-an-individual-non-gmail-account) or for [a group](#as-a-group-recommended).
133 | Choose the process that best aligns with your needs. Remember to note the email
134 | identifier for which you will be requesting access. It should be in one of these
135 | formats:
136 | 
137 | *   YOUR-GROUP-NAME@YOUR-DOMAIN
138 | *   INDIVIDUAL-ID@YOUR-DOMAIN
139 | *   INDIVIDUAL-ID@gmail.com
140 | 
141 | ### As a group (recommended)
142 | 
143 | If your organization is a Google Workspace or Google Cloud Platform (GCP)
144 | customer, contact your Google admin and ask them to create a group with the list
145 | of individuals who will be using the API. Let them know that this group is used
146 | for contacting you and also as a security principal for authorizing your access
147 | to the API.
148 | 
149 | ![Create Google Group](img/create-group.png)
150 | 
151 | Otherwise,
152 | [create a free Cloud Identity Account](https://cloud.google.com/identity/docs/set-up-cloud-identity-admin)
153 | for your domain name and in the process become the interim Google admin for your
154 | organization. Visit [Google Admin console](https://admin.google.com/) and create
155 | the above-mentioned group. If your individual identities are unknown to Google,
156 | they will need to follow the process for the [individuals](#as-an-individual)
157 | before you can add them to the group.
158 | 
159 | ### As an individual (non-gmail account)
160 | This section applies for the INDIVIDUAL-ID@YOUR-DOMAIN case (e.g. `person@university.org` or `person@company.com`)
161 | 
162 | If your organization is a Google Workspace or GCP customer, identity federation
163 | is most likely set up between your corporate identity directory and
164 | [Google Identity and Access Management](https://cloud.google.com/security/products/iam)
165 | and therefore individuals already have Google identities in the form of their
166 | corporate emails. Check with your IT department to find out whether identity
167 | federation is already in place or will be established soon.
168 | 
169 | Otherwise,
170 | [create a Google identity based on your email](https://accounts.google.com/signup/v2/webcreateaccount?flowName=GlifWebSignIn&flowEntry=SignUp).
171 | Opt for the "use my current email address instead" option, as shown in the
172 | screen capture below.
173 | 
174 | IMPORTANT: You should choose a password that is different from your corporate
175 | password.
176 | 
177 | ![Create Google Id](img/create-identity.png)
178 | 
179 | ### As an individual (`@gmail.com` account)
180 | 
181 | If you want to sign up as an individual with a gmail account, you can submit the form directly with your gmail address.
182 | 
183 | ## General notes
184 | 
185 | * Google does not keep a copy of any DICOM images processed.
186 | * Google monitors daily query volume and aggregates on a per-user and
187 |   per-organization basis. Access can be revoked if a user or organization
188 |   exceeds a reasonable query volume.
189 | 
190 | ## Contributing
191 | 
192 | See [`CONTRIBUTING.md`](CONTRIBUTING.md) for details.
193 | 
194 | ## License
195 | 
196 | See [`LICENSE`](LICENSE) for details.
197 | 
198 | # Model Card for Path Foundation Model
199 | 
200 | This tool uses an ML model to provide the embedding results. This section
201 | briefly overviews the background and limitations of that model.
202 | 
203 | ## Model Details
204 | 
205 | This self-supervised model produces embeddings for image patches from
206 | histopathology whole slide images (WSIs). Embeddings are n-dimensional vectors
207 | of floating point values that represent a projection of the original image into
208 | a compressed feature space. The model uses the ViT-S architecture and was
209 | trained across magnifications with domain specific tuning and optimization. The
210 | resulting feature representations provided by the model offer robust input
211 | for downstream tasks in histopathology. Additional information can be found in
212 | the preprint [manuscript](https://arxiv.org/abs/2310.13259).
213 | 
214 | ### Version
215 |   * Version: 1.0.0
216 |   * Date: 2023-12-19
217 | 
218 | ### License
219 |   Research use only. Not suitable for product development.
220 |   - See [Path Foundation - Additional Terms of Service](https://docs.google.com/forms/d/1auyo2VkzlzuiAXavZy1AWUyQHAqO7T3BLK-7ofKUvug/viewform).
221 | 
222 | ### Manuscript
223 |   https://arxiv.org/abs/2310.13259
224 | 
225 | ### Contact
226 |   path-foundation@google.com
227 | 
228 | 
229 | ### Intended Use
230 | * Path Foundation can reduce the training data, compute, and technical
231 | expertise necessary to develop task-specific models for H&E pathology slides.
232 | * Embeddings from the model can be used for a variety of user-defined downstream
233 | tasks including, but not limited to:  cancer detection, classification, and
234 | grading; metadata prediction (stain, tissue type, specimen type, etc.); and
235 | quality assessment (e.g., imaging artifacts).
236 | * The embeddings can also be used to explore the feature space of histopathology
237 | images for biomarker development associated with prognostic and predictive
238 | tasks.
239 | 
240 | ### Training Data
241 | Training data consisted of hematoxylin and eosin stained (H&E) WSIs from The
242 | Cancer Genome Atlas (TCGA) accessed via https://portal.gdc.cancer.gov.
243 | Training was performed using 60 million patches across three magnifications
244 | (~2 µm/pixel, ~1 µm/pixel, ~0.5 µm/pixel) and 32 TCGA studies (representing
245 | different cancer types).
246 | 
247 | ### Performance & Validation
248 | Linear probe evaluation was conducted across a diverse set of benchmark tasks
249 | involving 17 unique tissue types and 12 unique cancer types and spanning
250 | different optimal magnifications and task types.
251 | See [preprint manuscript](https://arxiv.org/abs/2310.13259) for more details including performance on additional slide-level tasks (eg. tissue type classification and molecular findings), as well as results for data titration with fine tuning for select tasks.
252 | 
253 | ### Risks
254 | Although Google does not store any data sent to this model, it is the data
255 | owner's responsibility to ensure that Personally identifiable information (PII)
256 | and Protected Health Information (PHI) are removed prior to being sent to the
257 | model.
258 | Mitigation Strategy: Do not send data containing PII or PHI.
259 | Training dataset is a de-identified public dataset and pathology imaging (pixel
260 | data) does not contain PHI.
261 | 
262 | ### Limitations
263 | Intended for research purposes only. The model has only been validated for a
264 | limited number of the many potential downstream tasks involving H&E
265 | histopathology. This model version was trained and validated only on H&E images
266 | from a limited set of scanners and countries. Model output may not generalize
267 | well to data from other image types, patient populations, or scanner
268 | manufacturers not used in training. Task-specific validation remains an
269 | important aspect of model development by the end-user. Training and validation
270 | was performed on patches corresponding to 5x, 10x, and 20x magnification
271 | (~2 µm/pixel, ~1 µm/pixel, ~0.5 µm/pixel, respectively). Using input patches
272 | corresponding to magnifications other than these has not been evaluated. The
273 | model is only used to generate embeddings of user-owned data or the provided,
274 | publicly available data. It does not generate any predictions or diagnosis on
275 | its own. As with any research, developers should ensure any downstream
276 | application is validated to understand performance using data that is
277 | appropriately representative of the intended use setting (e.g., age, sex,
278 | gender, condition, scanner, etc.).
279 | 


--------------------------------------------------------------------------------
/ct-foundation/README.md:
--------------------------------------------------------------------------------
  1 | # CT Foundation
  2 | 
  3 | **CT Foundation** is a tool that enables users to transform Computed Tomography
  4 | (CT) volumes comprised of axial slices into an information-rich vector
  5 | representation known as an
  6 | [embedding](https://developers.google.com/machine-learning/crash-course/embeddings/video-lecture).
  7 | These embeddings can be used to develop custom machine learning models for task
  8 | specific use-cases using less data and compute compared to traditional model
  9 | development methods.
 10 | 
 11 | If you use any part of CT Foundation or this repository, please cite our paper:
 12 | 
 13 | ```
 14 | @article{yang2024advancing,
 15 |   title={Advancing multimodal medical capabilities of Gemini},
 16 |   author={Yang, Lin and Xu, Shawn and Sellergren, Andrew and Kohlberger, Timo and Zhou, Yuchen and Ktena, Ira and Kiraly, Atilla and Ahmed, Faruk and Hormozdiari, Farhad and Jaroensri, Tiam and others},
 17 |   journal={arXiv preprint arXiv:2405.03162},
 18 |   year={2024}
 19 | }
 20 | ```
 21 | ## How to use the CT Foundation API
 22 | 
 23 | 1.  Decide if you want to get access as an individual or a group. For more
 24 |     information see [Access Options](#access-options)
 25 | 
 26 | 1.  With the Google identity from the previous step at hand, fill out the
 27 |     [API access form](https://docs.google.com/forms/d/e/1FAIpQLSfkSBbCi5dOlJxuDB3t6biFEBIA9JL66A99YRZa8qR2Fn5mUA/viewform?resourcekey=0-vPNR0VQ-vibGDJ564j4mCA).
 28 | 
 29 | 1.  Once access is granted, you’ll be notified via the provided email address
 30 |     and can start using the API.
 31 | 
 32 | 1.  Use the
 33 |     [Demo Notebook](https://colab.research.google.com/github/google-health/imaging-research/blob/master/ct-foundation/CT_Foundation_Demo.ipynb)
 34 |     to see how to use the API to compute embeddings and how to train a sample
 35 |     classifier. You can experiment with
 36 |     [our sample CT images & training labels](#use-our-test-data) to understand
 37 |     the API, then modify the Colab to use [your own data](#use-your-own-data).
 38 | 
 39 | The demo Colab includes instructions for:
 40 | 
 41 | *   Generating a temporary access token for the API to read the DICOM images
 42 |     from a
 43 |     [Cloud DICOM Store](https://cloud.google.com/healthcare-api/docs/concepts/dicom)
 44 |     on behalf of the person running the Colab.
 45 | *   Calling the API on CT scans stored in Cloud DICOM Store to generate
 46 |     embeddings
 47 | *   Training a linear classifier for lung cancer using pre-computed embeddings
 48 |     on [NLST](https://www.cancerimagingarchive.net/collection/nlst/)
 49 | *   Evaluating the result of this classifier.
 50 | 
 51 | [Contact us](#contact) if you have questions or need help.
 52 | 
 53 | ## Use our test data
 54 | 
 55 | Upon gaining access to CT Foundation, you'll also have access to publicly
 56 | available data we've curated specifically for testing. This includes CT studies
 57 | from the [LIDC-IDRI](https://www.cancerimagingarchive.net/collection/lidc/)
 58 | dataset stored in a
 59 | [Google Cloud DICOM Store](https://cloud.google.com/healthcare-api/docs/concepts/dicom).
 60 | We also store pre-computed embeddings run on CT Foundation from
 61 | [NLST](https://www.cancerimagingarchive.net/collection/nlst/) Our
 62 | [Demo Notebook](https://colab.research.google.com/github/google-health/imaging-research/blob/master/ct-foundation/CT_Foundation_Demo.ipynb)
 63 | shows you how to call CT Foundation on the LIDC_IDRI DICOMS and also how to
 64 | train a performant model using the precomputed NLST embeddings.
 65 | 
 66 | ## Use your own data
 67 | 
 68 | WARNING: You hold responsibility for the data that you use with the API. It's
 69 | important to comply with all the terms of use your data is subject to.
 70 | 
 71 | NOTE: The current version of the API expects the CT images in a
 72 | [Google Cloud DICOM Store](https://cloud.google.com/healthcare-api/docs/concepts/dicom).
 73 | This section provides instructions how to get your data to a Cloud DICOM Store
 74 | that you own. If you need support for complying with your Cloud enterprise
 75 | policies, [Contact us](#contact). We're happy to help!
 76 | 
 77 | NOTE: The demo Colab demonstrates how to call the API using short-lived access
 78 | tokens. These tokens permit the API to read and process the images on behalf of
 79 | the individual who is running the Colab. It's important to note that the API
 80 | cannot access your data independently. The API processes images when you
 81 | instruct it to using a time-limited access token and does not store the images
 82 | after processing.
 83 | 
 84 | To use your own data with the API, you will need the following GCP resources:
 85 | 
 86 | *   A [GCP Project](https://cloud.google.com/storage/docs/projects)
 87 | *   A
 88 |     [Google Cloud DICOM Store](https://cloud.google.com/healthcare-api/docs/concepts/dicom)
 89 |     in the project for storing CT images
 90 | *   A GCS bucket in the project for staging dicoms before adding them to DICOM
 91 |     Store. This can also be used to store data labels to train your downstream
 92 |     model.
 93 | 
 94 | 1.  If you don't have access to an existing GCP Project, you will need to
 95 |     [create one](https://cloud.google.com/free).
 96 | 
 97 |     1.  Follow
 98 |         [these instructions](https://cloud.google.com/storage/docs/creating-buckets)
 99 |         to create the GCS bucket.
100 | 
101 |     1.  Follow
102 |         [these instructions](https://cloud.google.com/healthcare-api/docs/how-tos/dicom)
103 |         to create a Cloud DICOM Store.
104 | 
105 |     1.  Use [Google Cloud IAM panel](https://console.cloud.google.com/iam-admin)
106 |         to grant the following permissions to the GCP resources:
107 | 
108 |     *   Allow the individual running the rest of the steps to manage objects in
109 |         the GCS bucket by granting them the predefined role
110 |         `roles/storage.objectAdmin`.
111 | 
112 |     *   Allow
113 |         [the identity(ies) who have access to our API](#how-to-gain-access) to:
114 | 
115 |         *   read training labels and persist embeddings in the GCS bucket by
116 |             granting them the predefined role `roles/storage.objectAdmin`.
117 |         *   read DICOM images from the Cloud DICOM Store by granting them the
118 |             predefined role `roles/healthcare.dicomViewer`.
119 | 
120 | 1.  On your local machine
121 |     [install the gcloud SDK](https://cloud.google.com/sdk/docs/install) and
122 |     [log in](https://cloud.google.com/sdk/gcloud/reference/auth/login):
123 | 
124 |     ```
125 |     gcloud auth application-default login
126 |     ```
127 | 
128 | 1.  From your local machine use the
129 |     [gcloud storage commands](https://cloud.google.com/sdk/gcloud/reference/storage)
130 |     to transfer  DICOM files in .dcm format to
131 |     the GCS bucket. You may use the
132 |     [`rsync` command](https://cloud.google.com/sdk/gcloud/reference/storage/rsync)
133 |     instead of `cp` to handle larger volume of files. You can also optionally add labels to train downstream models.
134 | 
135 | 1.  Follow
136 |     [these instructions](https://cloud.google.com/healthcare-api/docs/how-tos/dicom-import-export#gcloud)
137 |     to bulk import DICOM files from the GCS bucket to your Cloud DICOM Store.
138 | 
139 | 1.  Modify the
140 |     [Demo Notebook](https://colab.research.google.com/github/google-health/imaging-research/blob/master/ct-foundation/CT_Foundation_Demo.ipynb)
141 |     to point to your data:
142 | 
143 | 1.  If storing your labels in GCS replace `hai-cd3-foundations-ct3d-vault-entry`
144 |     with the name of your GCS bucket. Otherwise import them into the Notebook from wherever you are storing them.
145 | 
146 | 1.  To use your DICOM images, change the the Cloud DICOM Store urls. They take
147 |     the following format:
148 |     `https://healthcare.googleapis.com/v1/projects/YOUR_PROJECT_ID/locations/YOUR_LOCATION/datasets/YOUR_DATASET_ID/dicomStores/YOUR_DICOM_STORE_ID/`.
149 |     You need to substitute `YOUR_PROJECT_ID` with the project Id you obtained in
150 |     step 1 and `YOUR_LOCATION`, `YOUR_DATASET_ID`, `YOUR_DICOM_STORE_ID` from
151 |     step 3.
152 | 
153 | ## Access Options
154 | 
155 | You have the option to request access to the API either as
156 | [an individual](#as-an-individual-non-gmail-account) or for
157 | [a group](#as-a-group-recommended). Choose the process that best aligns with
158 | your needs. Remember to note the email identifier for which you will be
159 | requesting access. It should be in one of these formats:
160 | 
161 | *   YOUR-GROUP-NAME@YOUR-DOMAIN
162 | *   INDIVIDUAL-ID@YOUR-DOMAIN
163 | *   INDIVIDUAL-ID@gmail.com
164 | 
165 | ### As a group (recommended)
166 | 
167 | If your organization is a Google Workspace or Google Cloud Platform (GCP)
168 | customer, contact your Google admin and ask them to create a group with the list
169 | of individuals who will be using the API. Let them know that this group is used
170 | for contacting you and also as a security principal for authorizing your access
171 | to the API.
172 | 
173 | ![Create Google Group](img/create-group.png)
174 | 
175 | Otherwise,
176 | [create a free Cloud Identity Account](https://cloud.google.com/identity/docs/set-up-cloud-identity-admin)
177 | for your domain name and in the process become the interim Google admin for your
178 | organization. Visit [Google Admin console](https://admin.google.com/) and create
179 | the above-mentioned group. If your individual identities are unknown to Google,
180 | they will need to follow the process for the [individuals](#as-an-individual)
181 | before you can add them to the group.
182 | 
183 | ### As an individual (non-gmail account)
184 | 
185 | This section applies for the INDIVIDUAL-ID@YOUR-DOMAIN case (e.g.
186 | `person@university.org` or `person@company.com`)
187 | 
188 | If your organization is a Google Workspace or GCP customer, identity federation
189 | is most likely set up between your corporate identity directory and
190 | [Google Identity and Access Management](https://cloud.google.com/security/products/iam)
191 | and therefore individuals already have Google identities in the form of their
192 | corporate emails. Check with your IT department to find out whether identity
193 | federation is already in place or will be established soon.
194 | 
195 | Otherwise,
196 | [create a Google identity based on your email](https://accounts.google.com/signup/v2/webcreateaccount?flowName=GlifWebSignIn&flowEntry=SignUp).
197 | Opt for the "use my current email address instead" option, as shown in the
198 | screen capture below.
199 | 
200 | IMPORTANT: You should choose a password that is different from the password you
201 | use for the email account.
202 | 
203 | ![Create Google Id](img/create-identity.png)
204 | 
205 | ### As an individual (`@gmail.com` account)
206 | 
207 | If you want to sign up as an individual with a gmail account, you can submit the
208 | form directly with your gmail address.
209 | 
210 | ## General notes
211 | 
212 | *   Google does not keep a copy of any DICOM images processed.
213 | *   Google monitors daily query volume and aggregates on a per-user and
214 |     per-organization basis. Access can be revoked if a user or organization
215 |     exceeds a reasonable query volume.
216 | 
217 | ## Contributing
218 | 
219 | See [`CONTRIBUTING.md`](CONTRIBUTING.md) for details.
220 | 
221 | ## License
222 | 
223 | See [`LICENSE`](LICENSE) for details.
224 | 
225 | ## Contact
226 | 
227 | Please reach out to us at
228 | [ct-foundation@google.com](mailto:ct-foundation@google.com]) for issues such as,
229 | but not limited to:
230 | 
231 | -   Seeking technical assistance
232 | -   Providing feedback
233 | -   Requesting permissions for publications
234 | -   Discussing clinical use cases
235 | -   Discussing enterprise requirements such as:
236 |     -   Fitting within strict security perimeters of your organization
237 |     -   Governing your data in GCS
238 |     -   Training and serving custom models at scale on
239 |         [Vertex AI](https://cloud.google.com/vertex-ai?hl=en)
240 | 
241 | ## Data Attribution
242 | 
243 | The included demo notebook makes use of two public datasets provided by the Cancer Imaging Archive which is managed by the United States  National Cancer Institute
244 | 
245 | ###  NLST Radiology CT Images CC BY 4.0
246 | [https://www.cancerimagingarchive.net/collection/nlst/](https://www.cancerimagingarchive.net/collection/nlst/)
247 | 
248 | #### NLST Data Citation
249 |  National Lung Screening Trial Research Team. (2013). Data from the National Lung Screening Trial (NLST) [Data set]. The Cancer Imaging Archive. https://doi.org/10.7937/TCIA.HMQ8-J677
250 | ### LIDC-IDRI Data Access CC BY 3.0
251 | https://www.cancerimagingarchive.net/collection/lidc-idri/
252 | 
253 | #### LIDC-IDRI Data Citation
254 | 
255 | Armato III, S. G., McLennan, G., Bidaut, L., McNitt-Gray, M. F., Meyer, C. R., Reeves, A. P., Zhao, B., Aberle, D. R., Henschke, C. I., Hoffman, E. A., Kazerooni, E. A., MacMahon, H., Van Beek, E. J. R., Yankelevitz, D., Biancardi, A. M., Bland, P. H., Brown, M. S., Engelmann, R. M., Laderach, G. E., Max, D., Pais, R. C. , Qing, D. P. Y. , Roberts, R. Y., Smith, A. R., Starkey, A., Batra, P., Caligiuri, P., Farooqi, A., Gladish, G. W., Jude, C. M., Munden, R. F., Petkovska, I., Quint, L. E., Schwartz, L. H., Sundaram, B., Dodd, L. E., Fenimore, C., Gur, D., Petrick, N., Freymann, J., Kirby, J., Hughes, B., Casteele, A. V., Gupte, S., Sallam, M., Heath, M. D., Kuhn, M. H., Dharaiya, E., Burns, R., Fryd, D. S., Salganicoff, M., Anand, V., Shreter, U., Vastagh, S., Croft, B. Y., Clarke, L. P. (2015). Data From LIDC-IDRI [Data set]. The Cancer Imaging Archive. https://doi.org/10.7937/K9/TCIA.2015.LO9QL9SX
256 | 
257 | 
258 | # Model Card for CT Foundation
259 | 
260 | This section briefly overviews the background and limitations of CT Foundation.
261 | 
262 | ## Model Details
263 | ### Overview
264 | 
265 | CT Foundation produces embeddings of size 1408 from a CT volume. Embeddings are
266 | n-dimensional vectors of floating points representing a projection of the
267 | original image into a compressed feature space capable of describing image
268 | features relevant to CT image analysis. The model is based on the
269 | [Video CoCa architecture](https://arxiv.org/abs/2212.04979). CT Foundation was
270 | trained in two stages.
271 | 
272 | *   Firstly train a medical image–specific 2D CoCa model
273 | *   Use this 2D model as a basis for VideoCoCa. Training on specifically
274 |     prepared axial CT slices (series of CT slices in a scan) coupled with
275 |     radiology reports.
276 | 
277 | The resulting feature representations provided by CT Foundation offer robust
278 | input for downstream tasks in CT image analysis. Additional information on our
279 | evaluation tasks can be found in our
280 | [blog post](https://research.google/blog/taking-medical-imaging-embeddings-3d).
281 | 
282 | ### Version
283 | 
284 | ```
285 | name: v1.0.0
286 | date: 2024-10-18
287 | ```
288 | 
289 | ### Owners
290 | 
291 | ```
292 | ct-foundation@google.com
293 | ```
294 | 
295 | ### License
296 | 
297 | Research use only. Not suitable for product development. - See
298 | [CT Foundation - Additional Terms of Service](https://docs.google.com/forms/d/e/1FAIpQLSfkSBbCi5dOlJxuDB3t6biFEBIA9JL66A99YRZa8qR2Fn5mUA/viewform?resourcekey=0-vPNR0VQ-vibGDJ564j4mCA).
299 | 
300 | ### Intended Use
301 | 
302 | *   CT Foundation can reduce the training data, compute, and technical expertise
303 |     necessary to develop task-specific models based on Computed Tomography
304 |     Scans.
305 | *   Embeddings from the model can be used for a variety of user-defined
306 |     downstream tasks across different CT studies of various body parts. Validation includes lung
307 |     cancer identification within chest CTs, aortic aneurysm in abdominal CTs,
308 |     hemorrhage within head CTs. The model can also be used to classify different
309 |     CT studies by body part or image quality.
310 | 
311 | ### Training Data
312 | 
313 | A comprehensive private dataset comprising 527,078 CT studies with associated
314 | radiology reports from 430,772 patients was obtained from three major hospital
315 | regions in the United States.
316 | 
317 | ### Validation
318 | 
319 | Evaluation was conducted across a diverse set of 7 benchmarking tests using
320 | non-linear
321 | [multilayer perceptrons](https://en.wikipedia.org/wiki/Multilayer_perceptron).
322 | These tasks were related to classifying: intracranial hemorrhage, calcifications
323 | in the chest and heart, lung cancer prediction in the chest, suspicious
324 | abdominal lesions, urolithiasis, and abdominal aortic aneurysm in abdominopelvic
325 | CTs. Results can be found in our
326 | [blog post](https://research.google/blog/taking-medical-imaging-embeddings-3d).
327 | 
328 | ### Risks
329 | 
330 | Although Google does not store any data sent to this model, it is the data
331 | owner's responsibility to ensure that Personally identifiable information (PII)
332 | and Protected Health Information (PHI) are removed prior to being sent to the
333 | model. Mitigation Strategy: Do not send data containing PII or PHI. Training
334 | dataset is a de-identified public dataset and CT imaging (pixel data) does not
335 | contain PHI.
336 | 
337 | ### Limitations
338 | 
339 | This is a research model and is intended for research purposes only. It has not
340 | been extensively validated across different scanner manufacturers. As with any
341 | research, developers should ensure any downstream application is validated to
342 | understand performance using data that is appropriately representative of the
343 | intended use setting (e.g., age, sex, gender, condition, scanner, etc.).
344 | 


--------------------------------------------------------------------------------
/cxr-foundation/MIMIC_Embeddings_Demo.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "attachments": {},
  5 |    "cell_type": "markdown",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
  9 |     "  <td>\n",
 10 |     "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/Google-Health/imaging-research/blob/master/cxr-foundation/MIMIC_Embeddings_Demo.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
 11 |     "  </td>\n",
 12 |     "  <td>\n",
 13 |     "    <a target=\"_blank\" href=\"https://github.com/Google-Health/imaging-research/blob/master/cxr-foundation/MIMIC_Embeddings_Demo.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View source on GitHub</a>\n",
 14 |     "  </td>\n",
 15 |     "</table>"
 16 |    ]
 17 |   },
 18 |   {
 19 |    "attachments": {},
 20 |    "cell_type": "markdown",
 21 |    "metadata": {},
 22 |    "source": [
 23 |     "# MIMIC CXR Embeddings Demo\n",
 24 |     "\n",
 25 |     "## Overview\n",
 26 |     "\n",
 27 |     "This notebook demonstrates how to train a simple neural network for a supervised classification task, using a set of Chest X-ray image embeddings.\n",
 28 |     "\n",
 29 |     "The datasets leveraged in this notebook are both derived from the [MIMIC-CXR Dataset](https://physionet.org/content/mimic-cxr/2.0.0/), which contains over 300,000 DICOMs and radiology reports:\n",
 30 |     "1. [The MIMIC-CXR JPG Dataset](https://physionet.org/content/mimic-cxr-jpg/2.0.0/) - contains JPG files derived from the DICOM images and structured labels derived from the free-text reports.\n",
 31 |     "2. [The MIMIC-CXR Image Embeddings Dataset](https://physionet.org/content/image-embeddings-mimic-cxr/1.0/) - which was generated from MIMIC-CXR using the Google Health [CXR Foundation tool](https://github.com/Google-Health/imaging-research/blob/master/cxr-foundation/README.md).\n",
 32 |     "\n",
 33 |     "## Prerequisites\n",
 34 |     "\n",
 35 |     "1. **Data access** - the MIMIC datasets are access-controlled. Follow the instructions on the [files](https://physionet.org/content/image-embeddings-mimic-cxr/1.0/#files) section to get access to the data. Overall, you must:\n",
 36 |     "   - Be a credentialled PhysioNet user\n",
 37 |     "   - Complete the appropriate institutional research training and get it verified by PhysioNet\n",
 38 |     "   - Ensure the email you use to access Google Cloud is [selected](https://physionet.org/settings/cloud/) in your PhysioNet profile.\n",
 39 |     "   - Sign the data use agreement for each dataset\n",
 40 |     "   - Request access to the dataset's GCS bucket\n",
 41 |     "2. **Billing** - this notebook downloads data directly from PhysioNet's GCS buckets, which are set to [requester pays](https://cloud.google.com/storage/docs/requester-pays). Therefore you must have a Google Cloud project with an associated billing account. (The download cost in this notebook should be < $1)\n",
 42 |     "\n",
 43 |     "Note: PhysioNet hosts its data on its on-prem servers, which can be downloaded free of charge. Some of its databases are copied onto GCS buckets, which have much faster download speeds."
 44 |    ]
 45 |   },
 46 |   {
 47 |    "attachments": {},
 48 |    "cell_type": "markdown",
 49 |    "metadata": {},
 50 |    "source": [
 51 |     "# Installation\n",
 52 |     "\n",
 53 |     "Install the CXR Foundation package"
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "code",
 58 |    "execution_count": null,
 59 |    "metadata": {},
 60 |    "outputs": [],
 61 |    "source": [
 62 |     "!git clone https://github.com/Google-Health/imaging-research.git\n",
 63 |     "!pip install imaging-research/cxr-foundation/\n",
 64 |     "\n",
 65 |     "# Notebook specific dependencies\n",
 66 |     "!pip install tf-models-official>=2.13.0 google-cloud-storage"
 67 |    ]
 68 |   },
 69 |   {
 70 |    "attachments": {},
 71 |    "cell_type": "markdown",
 72 |    "metadata": {},
 73 |    "source": [
 74 |     "**IMPORTANT**: If you are using Colab, you must restart the runtime after installing new packages.\n",
 75 |     "\n",
 76 |     "NOTE: There will be some ERROR messages due to the protobuf library - this is normal."
 77 |    ]
 78 |   },
 79 |   {
 80 |    "attachments": {},
 81 |    "cell_type": "markdown",
 82 |    "metadata": {},
 83 |    "source": [
 84 |     "# Authenticate to Access Data\n",
 85 |     "\n",
 86 |     "The following cell is for Colab only. If running elsewhere, authenticate with the [gcloud CLI](https://cloud.google.com/sdk/gcloud/reference/auth/login)."
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "code",
 91 |    "execution_count": null,
 92 |    "metadata": {},
 93 |    "outputs": [],
 94 |    "source": [
 95 |     "from google.colab import auth\n",
 96 |     "\n",
 97 |     "# Authenticate user for access. There will be a popup asking you to sign in with your user and approve access.\n",
 98 |     "auth.authenticate_user()"
 99 |    ]
100 |   },
101 |   {
102 |    "attachments": {},
103 |    "cell_type": "markdown",
104 |    "metadata": {},
105 |    "source": [
106 |     "# Download and Process Metadata"
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "code",
111 |    "execution_count": 1,
112 |    "metadata": {},
113 |    "outputs": [],
114 |    "source": [
115 |     "import os\n",
116 |     "\n",
117 |     "from google.cloud import storage\n",
118 |     "from google.cloud.storage import Bucket\n",
119 |     "import pandas as pd\n",
120 |     "\n",
121 |     "from cxr_foundation.mimic import parse_embedding_file_pattern\n",
122 |     "\n",
123 |     "\n",
124 |     "def download_blob(bucket: Bucket, source_blob_name: str, destination_file_name: str, print_name : str = None):\n",
125 |     "    \"\"\"\n",
126 |     "    Downloads a blob from the bucket.\n",
127 |     "\n",
128 |     "    https://cloud.google.com/storage/docs/downloading-objects\n",
129 |     "\n",
130 |     "    Params:\n",
131 |     "    print_name : Print the file name when downloaded. Options: \"source\" or \"dest\" or None.\n",
132 |     "    \"\"\"\n",
133 |     "    blob = bucket.blob(source_blob_name)\n",
134 |     "    try:\n",
135 |     "      blob.download_to_filename(destination_file_name)\n",
136 |     "    except Exception as e:\n",
137 |     "      print('Error during download - do you have the right permissions?')\n",
138 |     "      print(e)\n",
139 |     "      return\n",
140 |     "\n",
141 |     "    if print_name == \"source\":\n",
142 |     "      print(f\"Downloaded: {source_blob_name}\")\n",
143 |     "    elif print_name == \"dest\":\n",
144 |     "       print(f\"Downloaded: {destination_file_name}\")\n",
145 |     "\n",
146 |     "\n",
147 |     "DATA_DIR = \"data\"\n",
148 |     "EMBEDDINGS_DATA_DIR = os.path.abspath(os.path.join(DATA_DIR, \"mimic-embeddings-files\"))\n",
149 |     "\n",
150 |     "\n",
151 |     "# Make a directory to download the data\n",
152 |     "if not os.path.exists(DATA_DIR):\n",
153 |     "  os.mkdir(DATA_DIR)\n",
154 |     "\n",
155 |     "if not os.path.exists(EMBEDDINGS_DATA_DIR):\n",
156 |     "  os.mkdir(EMBEDDINGS_DATA_DIR)"
157 |    ]
158 |   },
159 |   {
160 |    "attachments": {},
161 |    "cell_type": "markdown",
162 |    "metadata": {},
163 |    "source": [
164 |     "## Embeddings Metadata\n",
165 |     "\n",
166 |     "Data source:\n",
167 |     "- https://physionet.org/content/image-embeddings-mimic-cxr/1.0/\n",
168 |     "- https://console.cloud.google.com/storage/browser/image-embeddings-mimic-cxr-1.0.physionet.org\n",
169 |     "\n",
170 |     "Download the checksums file which contains a list of the embeddings files. Extract the data components from the file names."
171 |    ]
172 |   },
173 |   {
174 |    "cell_type": "code",
175 |    "execution_count": null,
176 |    "metadata": {},
177 |    "outputs": [],
178 |    "source": [
179 |     "storage_client = storage.Client()\n",
180 |     "\n",
181 |     "embeddings_bucket = storage_client.bucket(\n",
182 |     "    'image-embeddings-mimic-cxr-1.0.physionet.org')    \n",
183 |     "\n",
184 |     "# Download the checksums file which contains a records list\n",
185 |     "download_blob(embeddings_bucket, \"SHA256SUMS.txt\", \"data/SHA256SUMS.txt\")"
186 |    ]
187 |   },
188 |   {
189 |    "cell_type": "code",
190 |    "execution_count": null,
191 |    "metadata": {},
192 |    "outputs": [],
193 |    "source": [
194 |     "df_embeddings = pd.read_csv(\"data/SHA256SUMS.txt\", delimiter=\" \", header=None, skiprows=[0])  # Skip the license file entry\n",
195 |     "display(df_embeddings.head())"
196 |    ]
197 |   },
198 |   {
199 |    "cell_type": "code",
200 |    "execution_count": null,
201 |    "metadata": {},
202 |    "outputs": [],
203 |    "source": [
204 |     "SOURCE_COL_NAME = \"embeddings_file\"  # Remote bucket embedding file location\n",
205 |     "DL_COL_NAME = \"local_embeddings_file\"  # Download file to this location\n",
206 |     "\n",
207 |     "# Create additional columns from file path components\n",
208 |     "df_embeddings = df_embeddings[[1]]\n",
209 |     "df_embeddings.rename(columns={1: \"embeddings_file\"}, inplace=True)\n",
210 |     "df_embeddings[[\"subject_id\",\"study_id\", \"dicom_id\"]] = df_embeddings.apply(\n",
211 |     "    lambda x: parse_embedding_file_pattern(x[SOURCE_COL_NAME]), axis=1, result_type=\"expand\")\n",
212 |     "df_embeddings[DL_COL_NAME] = df_embeddings[SOURCE_COL_NAME].apply(lambda x: os.path.join(EMBEDDINGS_DATA_DIR, os.path.basename(x)))  # For download\n",
213 |     "\n",
214 |     "display(df_embeddings)"
215 |    ]
216 |   },
217 |   {
218 |    "attachments": {},
219 |    "cell_type": "markdown",
220 |    "metadata": {},
221 |    "source": [
222 |     "## CXR Metadata\n",
223 |     "\n",
224 |     "Data source:\n",
225 |     "- https://physionet.org/content/mimic-cxr-jpg/2.0.0/\n",
226 |     "- https://console.cloud.google.com/storage/browser/mimic-cxr-jpg-2.0.0.physionet.org\n",
227 |     "\n",
228 |     "Download and visualize three metadata files:\n",
229 |     "1. `mimic-cxr-2.0.0-metadata.csv`: Meta-data derived from the original DICOM files\n",
230 |     "2. `mimic-cxr-2.0.0-split.csv`: A reference dataset split for studies using MIMIC-CXR-JPG\n",
231 |     "3. `mimic-cxr-2.0.0-chexpert.csv`:  Lists all studies with labels generated by the CheXpert labeler.\n",
232 |     "\n",
233 |     "The first two files were used to generate the embeddings database. Embeddings files were only generated for the frontal view CXRs, so there are fewer embeddings files than there are original DICOMs/JPGs.\n"
234 |    ]
235 |   },
236 |   {
237 |    "cell_type": "code",
238 |    "execution_count": null,
239 |    "metadata": {},
240 |    "outputs": [],
241 |    "source": [
242 |     "cxr_jpg_bucket = storage_client.bucket(\n",
243 |     "    'mimic-cxr-jpg-2.0.0.physionet.org')\n",
244 |     "\n",
245 |     "CXR_JPG_METADATA_FILES = (\n",
246 |     "    \"mimic-cxr-2.0.0-metadata.csv.gz\",\n",
247 |     "    \"mimic-cxr-2.0.0-split.csv.gz\",\n",
248 |     "    \"mimic-cxr-2.0.0-chexpert.csv.gz\")\n",
249 |     "\n",
250 |     "for fname in CXR_JPG_METADATA_FILES:\n",
251 |     "  download_blob(cxr_jpg_bucket, fname, f\"{DATA_DIR}/{fname}\")"
252 |    ]
253 |   },
254 |   {
255 |    "cell_type": "code",
256 |    "execution_count": null,
257 |    "metadata": {},
258 |    "outputs": [],
259 |    "source": [
260 |     "CXR_JPG_METADATA_FILES = (\n",
261 |     "    \"mimic-cxr-2.0.0-metadata.csv.gz\",\n",
262 |     "    \"mimic-cxr-2.0.0-split.csv.gz\",\n",
263 |     "    \"mimic-cxr-2.0.0-chexpert.csv.gz\")\n",
264 |     "\n",
265 |     "df_metadata = pd.read_csv(f\"data/{CXR_JPG_METADATA_FILES[0]}\", compression=\"gzip\")\n",
266 |     "df_split = pd.read_csv(f\"data/{CXR_JPG_METADATA_FILES[1]}\", compression=\"gzip\")\n",
267 |     "df_labels_chexpert = pd.read_csv(f\"data/{CXR_JPG_METADATA_FILES[2]}\", compression=\"gzip\")\n",
268 |     "\n",
269 |     "display(df_metadata.head())\n",
270 |     "display(df_split.head())\n",
271 |     "display(df_labels_chexpert.head())"
272 |    ]
273 |   },
274 |   {
275 |    "attachments": {},
276 |    "cell_type": "markdown",
277 |    "metadata": {},
278 |    "source": [
279 |     "## Create the full labels file\n",
280 |     "\n",
281 |     "Join embeddings list with Chexpert metadata files"
282 |    ]
283 |   },
284 |   {
285 |    "cell_type": "code",
286 |    "execution_count": null,
287 |    "metadata": {},
288 |    "outputs": [],
289 |    "source": [
290 |     "# Each study contains one or more DICOMs\n",
291 |     "# Chexpert labels df does not contain DICOM ID. Must join on (subject_id + study_id)\n",
292 |     "df_labels_all = df_split.merge(df_labels_chexpert, on=['subject_id', 'study_id'])\n",
293 |     "df_labels_all = df_labels_all.merge(df_metadata, on=['dicom_id'])\n",
294 |     "df_labels_all = df_embeddings.merge(df_labels_all, on=['dicom_id'], how='left')\n",
295 |     "\n",
296 |     "display(df_labels_all)"
297 |    ]
298 |   },
299 |   {
300 |    "attachments": {},
301 |    "cell_type": "markdown",
302 |    "metadata": {},
303 |    "source": [
304 |     "## Make Labels files for Individual Diagnoses"
305 |    ]
306 |   },
307 |   {
308 |    "cell_type": "code",
309 |    "execution_count": null,
310 |    "metadata": {},
311 |    "outputs": [],
312 |    "source": [
313 |     "# Dict of data frames for individual diagnoses\n",
314 |     "diagnoses_dataframes = {}\n",
315 |     "\n",
316 |     "# Choose some of the Chexpert generated diagnoses\n",
317 |     "for diagnosis in ('Atelectasis', 'Cardiomegaly', 'Consolidation', 'Edema', 'Fracture'):\n",
318 |     "  # Remove missing/unsure labels\n",
319 |     "  df_diagnosis = df_labels_all[df_labels_all[diagnosis].isin((0, 1))]\n",
320 |     "  # Only extract required columns for the ML model\n",
321 |     "  df_diagnosis = df_diagnosis[[diagnosis, SOURCE_COL_NAME, DL_COL_NAME, 'split']]\n",
322 |     "  \n",
323 |     "  diagnoses_dataframes[diagnosis] = df_diagnosis\n",
324 |     "  df_diagnosis.to_csv(f'data/{diagnosis}.csv', index=False)\n",
325 |     "  print(f\"Created {diagnosis}.csv with {len(df_diagnosis)} rows\")\n",
326 |     "  display(df_diagnosis.nunique())\n",
327 |     "  \n",
328 |     "  # Show label and split value distributions\n",
329 |     "  display(df_diagnosis[diagnosis].value_counts())\n",
330 |     "  display(df_diagnosis['split'].value_counts())\n",
331 |     "  print(\"\\n\")"
332 |    ]
333 |   },
334 |   {
335 |    "attachments": {},
336 |    "cell_type": "markdown",
337 |    "metadata": {},
338 |    "source": [
339 |     "# Download Embeddings Files for Model Training\n",
340 |     "\n",
341 |     "There are many labels for Cardiomegaly. We will train our model using the embeddings with this label."
342 |    ]
343 |   },
344 |   {
345 |    "cell_type": "code",
346 |    "execution_count": null,
347 |    "metadata": {},
348 |    "outputs": [],
349 |    "source": [
350 |     "DIAGNOSIS = 'Cardiomegaly'\n",
351 |     "LABELS_CSV = f\"data/{DIAGNOSIS}.csv\"\n",
352 |     "MAX_TRAINING_SAMPLES = 500\n",
353 |     "MAX_VALIDATION_SAMPLES = 200\n",
354 |     "\n",
355 |     "df_diagnosis = pd.read_csv(LABELS_CSV)\n",
356 |     "\n",
357 |     "df_train = df_diagnosis[df_diagnosis[\"split\"] == \"train\"][:MAX_TRAINING_SAMPLES]\n",
358 |     "df_validate = df_diagnosis[df_diagnosis[\"split\"] == \"validate\"][:MAX_VALIDATION_SAMPLES]\n",
359 |     "     \n",
360 |     "\n",
361 |     "display(df_train)\n",
362 |     "display(df_validate)"
363 |    ]
364 |   },
365 |   {
366 |    "cell_type": "code",
367 |    "execution_count": null,
368 |    "metadata": {},
369 |    "outputs": [],
370 |    "source": [
371 |     "# Takes ~2m\n",
372 |     "for i, row in df_train.iterrows():\n",
373 |     "    download_blob(embeddings_bucket, row[SOURCE_COL_NAME], row[DL_COL_NAME], print_name=\"dest\")\n",
374 |     "\n",
375 |     "for i, row in df_validate.iterrows():\n",
376 |     "    download_blob(embeddings_bucket, row[SOURCE_COL_NAME], row[DL_COL_NAME], print_name=\"dest\")"
377 |    ]
378 |   },
379 |   {
380 |    "cell_type": "code",
381 |    "execution_count": null,
382 |    "metadata": {},
383 |    "outputs": [],
384 |    "source": [
385 |     "# Inspect an embedding files. A single file is only 5.6kb\n",
386 |     "from cxr_foundation import embeddings_data\n",
387 |     "\n",
388 |     "\n",
389 |     "filename = df_train[DL_COL_NAME][0]\n",
390 |     "\n",
391 |     "# Read the tf.train.Example object from the first tfrecord file\n",
392 |     "example = embeddings_data.read_tfrecord_example(filename)\n",
393 |     "print(example)\n",
394 |     "\n",
395 |     "# If you don't care about the structure of the .tfrecord file, and/or if\n",
396 |     "# you don't use Tensorflow, you can use the following function to read\n",
397 |     "# the values directly into a numpy array.\n",
398 |     "values = embeddings_data.read_tfrecord_values(filename)\n",
399 |     "print(values)"
400 |    ]
401 |   },
402 |   {
403 |    "attachments": {},
404 |    "cell_type": "markdown",
405 |    "metadata": {},
406 |    "source": [
407 |     "# Create and Train Model\n"
408 |    ]
409 |   },
410 |   {
411 |    "cell_type": "code",
412 |    "execution_count": null,
413 |    "metadata": {},
414 |    "outputs": [],
415 |    "source": [
416 |     "import tensorflow as tf\n",
417 |     "import tensorflow_models as tfm\n",
418 |     "\n",
419 |     "\n",
420 |     "def create_model(heads,\n",
421 |     "                 embeddings_size=1376,\n",
422 |     "                 learning_rate=0.1,\n",
423 |     "                 end_lr_factor=1.0,\n",
424 |     "                 dropout=0.0,\n",
425 |     "                 decay_steps=1000,\n",
426 |     "                 loss_weights=None,\n",
427 |     "                 hidden_layer_sizes=[512, 256],\n",
428 |     "                 weight_decay=0.0,\n",
429 |     "                 seed=None) -> tf.keras.Model:\n",
430 |     "  \"\"\"\n",
431 |     "  Creates linear probe or multilayer perceptron using LARS + cosine decay.\n",
432 |     "\n",
433 |     "  \"\"\"\n",
434 |     "  inputs = tf.keras.Input(shape=(embeddings_size,))\n",
435 |     "  hidden = inputs\n",
436 |     "  # If no hidden_layer_sizes are provided, model will be a linear probe.\n",
437 |     "  for size in hidden_layer_sizes:\n",
438 |     "    hidden = tf.keras.layers.Dense(\n",
439 |     "        size,\n",
440 |     "        activation='relu',\n",
441 |     "        kernel_initializer=tf.keras.initializers.HeUniform(seed=seed),\n",
442 |     "        kernel_regularizer=tf.keras.regularizers.l2(l2=weight_decay),\n",
443 |     "        bias_regularizer=tf.keras.regularizers.l2(l2=weight_decay))(\n",
444 |     "            hidden)\n",
445 |     "    hidden = tf.keras.layers.BatchNormalization()(hidden)\n",
446 |     "    hidden = tf.keras.layers.Dropout(dropout, seed=seed)(hidden)\n",
447 |     "  output = tf.keras.layers.Dense(\n",
448 |     "      units=len(heads),\n",
449 |     "      activation='sigmoid',\n",
450 |     "      kernel_initializer=tf.keras.initializers.HeUniform(seed=seed))(\n",
451 |     "          hidden)\n",
452 |     "\n",
453 |     "  outputs = {}\n",
454 |     "  for i, head in enumerate(heads):\n",
455 |     "    outputs[head] = tf.keras.layers.Lambda(\n",
456 |     "        lambda x: x[..., i:i + 1], name=head.lower())(\n",
457 |     "            output)\n",
458 |     "\n",
459 |     "  model = tf.keras.Model(inputs, outputs)\n",
460 |     "\n",
461 |     "  learning_rate_fn = tf.keras.experimental.CosineDecay(\n",
462 |     "      tf.cast(learning_rate, tf.float32),\n",
463 |     "      tf.cast(decay_steps, tf.float32),\n",
464 |     "      alpha=tf.cast(end_lr_factor, tf.float32))\n",
465 |     "      \n",
466 |     "  model.compile(\n",
467 |     "      optimizer=tfm.optimization.lars.LARS(\n",
468 |     "          learning_rate=learning_rate_fn),\n",
469 |     "      loss=dict([(head, 'binary_crossentropy') for head in heads]),\n",
470 |     "      loss_weights=loss_weights or dict([(head, 1.) for head in heads]),\n",
471 |     "      weighted_metrics=['AUC'])\n",
472 |     "  return model"
473 |    ]
474 |   },
475 |   {
476 |    "cell_type": "code",
477 |    "execution_count": null,
478 |    "metadata": {},
479 |    "outputs": [],
480 |    "source": [
481 |     "# Create training and validation Datasets\n",
482 |     "training_data = embeddings_data.get_dataset(filenames=df_train[DL_COL_NAME].values,\n",
483 |     "                        labels=df_train[DIAGNOSIS].values)\n",
484 |     "\n",
485 |     "\n",
486 |     "validation_data = embeddings_data.get_dataset(filenames=df_validate[DL_COL_NAME].values,\n",
487 |     "                        labels=df_validate[DIAGNOSIS].values)\n",
488 |     "\n",
489 |     "# Create and train the model\n",
490 |     "model = create_model([DIAGNOSIS])\n",
491 |     "\n",
492 |     "model.fit(\n",
493 |     "    x=training_data.batch(512).prefetch(tf.data.AUTOTUNE).cache(),\n",
494 |     "    validation_data=validation_data.batch(1).cache(),\n",
495 |     "    epochs=20,\n",
496 |     ")"
497 |    ]
498 |   },
499 |   {
500 |    "cell_type": "code",
501 |    "execution_count": null,
502 |    "metadata": {},
503 |    "outputs": [],
504 |    "source": [
505 |     "model.summary()"
506 |    ]
507 |   },
508 |   {
509 |    "cell_type": "code",
510 |    "execution_count": null,
511 |    "metadata": {},
512 |    "outputs": [],
513 |    "source": [
514 |     "# Optional: serialize model for later use\n",
515 |     "# model.save(\"embeddings_model\", include_optimizer=False)"
516 |    ]
517 |   }
518 |  ],
519 |  "metadata": {
520 |   "kernelspec": {
521 |    "display_name": "cxr",
522 |    "language": "python",
523 |    "name": "python3"
524 |   },
525 |   "language_info": {
526 |    "codemirror_mode": {
527 |     "name": "ipython",
528 |     "version": 3
529 |    },
530 |    "file_extension": ".py",
531 |    "mimetype": "text/x-python",
532 |    "name": "python",
533 |    "nbconvert_exporter": "python",
534 |    "pygments_lexer": "ipython3",
535 |    "version": "3.9.16"
536 |   },
537 |   "orig_nbformat": 4,
538 |   "vscode": {
539 |    "interpreter": {
540 |     "hash": "d3ac608b8f9188be2227ae82298dfd5de684cbdc4496f362d4b3b9040509447c"
541 |    }
542 |   }
543 |  },
544 |  "nbformat": 4,
545 |  "nbformat_minor": 2
546 | }
547 | 


--------------------------------------------------------------------------------