├── data_conversion
    ├── dicom_ctpt_to_nifti_conversion_file.csv
    ├── dicom_rtstruct_to_nifti_conversion_file.csv
    ├── resample_ct2pt.py
    └── dicom_to_nifti.py
├── documentation
    ├── assets
    │   ├── all_logos.png
    │   ├── cohort_table.png
    │   ├── segmentation_performance_visualization.png
    │   ├── autopet_data_zenodo.svg
    │   └── monai_zenodo.svg
    ├── conda_env.md
    ├── calculate_test_metrics.md
    ├── generate_lesion_measures.md
    ├── inference.md
    ├── dataset_format.md
    ├── trainddp.md
    ├── dicom_to_nifti_conversion.md
    └── results_format.md
├── segmentation
    ├── predict.sh
    ├── calculate_test_metrics.sh
    ├── generate_lesion_measures.sh
    ├── train.sh
    ├── plot_logs.py
    ├── inference.py
    ├── generate_lesion_measures.py
    ├── calculate_test_metrics.py
    ├── trainddp.py
    └── initialize_train.py
├── CODE_OF_CONDUCT.md
├── config.py
├── LICENSE
├── SUPPORT.md
├── SECURITY.md
├── environment.yml
├── README.md
└── metrics
    └── metrics.py


/data_conversion/dicom_ctpt_to_nifti_conversion_file.csv:
--------------------------------------------------------------------------------
1 | PatientID,CT_dir,PET_dir,convert 


--------------------------------------------------------------------------------
/data_conversion/dicom_rtstruct_to_nifti_conversion_file.csv:
--------------------------------------------------------------------------------
1 | PatientID,RTSTRUCT_dir,REF_dir,convert 


--------------------------------------------------------------------------------
/documentation/assets/all_logos.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/lymphoma-segmentation-dnn/HEAD/documentation/assets/all_logos.png


--------------------------------------------------------------------------------
/documentation/assets/cohort_table.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/lymphoma-segmentation-dnn/HEAD/documentation/assets/cohort_table.png


--------------------------------------------------------------------------------
/documentation/assets/segmentation_performance_visualization.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/lymphoma-segmentation-dnn/HEAD/documentation/assets/segmentation_performance_visualization.png


--------------------------------------------------------------------------------
/segmentation/predict.sh:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation. All rights reserved.
2 | # Licensed under the MIT License.
3 | 
4 | python inference.py --fold=1 --network-name='unet' --input-patch-size=192 --num_workers=2  --sw-bs=2 


--------------------------------------------------------------------------------
/segmentation/calculate_test_metrics.sh:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation. All rights reserved.
2 | # Licensed under the MIT License.
3 | 
4 | python calculate_test_metrics.py --fold=0 --network-name='unet' --input-patch-size=192
5 | 


--------------------------------------------------------------------------------
/segmentation/generate_lesion_measures.sh:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation. All rights reserved.
2 | # Licensed under the MIT License.
3 | 
4 | python generate_lesion_measures.py --fold=1 --network-name='unet' --input-patch-size=192


--------------------------------------------------------------------------------
/segmentation/train.sh:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation. All rights reserved.
2 | # Licensed under the MIT License.
3 | 
4 | torchrun --standalone --nproc_per_node=1 trainddp.py --fold=1 --network-name='unet' --epochs=4 --input-patch-size=192 --train-bs=1 --num_workers=2 --lr=2e-4 --wd=1e-5 --val-interval=2 --sw-bs=2 --cache-rate=1


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Microsoft Open Source Code of Conduct
 2 | 
 3 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
 4 | 
 5 | Resources:
 6 | 
 7 | - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/)
 8 | - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/)
 9 | - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns
10 | 


--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
 1 | #%%
 2 | '''
 3 | Copyright (c) Microsoft Corporation. All rights reserved.
 4 | Licensed under the MIT License.
 5 | '''
 6 | import os 
 7 | 
 8 | LYMPHOMA_SEGMENTATION_FOLDER = '' # path to the directory containing `data` and `results` (this will be created by the pipeline) folders.
 9 | 
10 | DATA_FOLDER = os.path.join(LYMPHOMA_SEGMENTATION_FOLDER, 'data')
11 | RESULTS_FOLDER = os.path.join(LYMPHOMA_SEGMENTATION_FOLDER, 'results')
12 | os.makedirs(RESULTS_FOLDER, exist_ok=True)
13 | WORKING_FOLDER = os.path.dirname(os.path.abspath(__file__))
14 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 Shadab Ahamed
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/documentation/assets/autopet_data_zenodo.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="148" height="20" role="img" aria-label="DOI: 10.7937/gkr0-xv29"><title>DOI: 10.7937/gkr0-xv29</title><linearGradient id="s" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="r"><rect width="148" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#r)"><rect width="31" height="20" fill="#555"/><rect x="31" width="117" height="20" fill="#007ec6"/><rect width="148" height="20" fill="url(#s)"/></g><g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110"><text aria-hidden="true" x="165" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="210">DOI</text><text x="165" y="140" transform="scale(.1)" fill="#fff" textLength="210">DOI</text><text aria-hidden="true" x="885" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="1070">10.7937/gkr0-xv29</text><text x="885" y="140" transform="scale(.1)" fill="#fff" textLength="1070">10.7937/gkr0-xv29</text></g></svg>


--------------------------------------------------------------------------------
/SUPPORT.md:
--------------------------------------------------------------------------------
 1 | # TODO: The maintainer of this repo has not yet edited this file
 2 | 
 3 | **REPO OWNER**: Do you want Customer Service & Support (CSS) support for this product/project?
 4 | 
 5 | - **No CSS support:** Fill out this template with information about how to file issues and get help.
 6 | - **Yes CSS support:** Fill out an intake form at [aka.ms/onboardsupport](https://aka.ms/onboardsupport). CSS will work with/help you to determine next steps.
 7 | - **Not sure?** Fill out an intake as though the answer were "Yes". CSS will help you decide.
 8 | 
 9 | *Then remove this first heading from this SUPPORT.MD file before publishing your repo.*
10 | 
11 | # Support
12 | 
13 | ## How to file issues and get help  
14 | 
15 | This project uses GitHub Issues to track bugs and feature requests. Please search the existing 
16 | issues before filing new issues to avoid duplicates.  For new issues, file your bug or 
17 | feature request as a new Issue.
18 | 
19 | For help and questions about using this project, please **REPO MAINTAINER: INSERT INSTRUCTIONS HERE 
20 | FOR HOW TO ENGAGE REPO OWNERS OR COMMUNITY FOR HELP. COULD BE A STACK OVERFLOW TAG OR OTHER
21 | CHANNEL. WHERE WILL YOU HELP PEOPLE?**.
22 | 
23 | ## Microsoft Support Policy  
24 | 
25 | Support for this **PROJECT or PRODUCT** is limited to the resources listed above.
26 | 


--------------------------------------------------------------------------------
/documentation/assets/monai_zenodo.svg:
--------------------------------------------------------------------------------
 1 | <svg xmlns="http://www.w3.org/2000/svg"
 2 |      width="186" height="20">
 3 |         <linearGradient id="b" x2="0" y2="100%">
 4 |             <stop offset="0" stop-color="#bbb" stop-opacity=".1"/>
 5 |             <stop offset="1" stop-opacity=".1"/>
 6 |         </linearGradient>
 7 |         <mask id="a" width="186" height="20">
 8 |             <rect width="186" height="20" rx="3"
 9 |             fill="#fff"/>
10 |         </mask>
11 |         <g mask="url(#a)">
12 |             <path fill="#555" d="M0 0h31v20H0z" />
13 |             <path fill="#007ec6"
14 |             d="M31 0h155v20H31z"
15 |             />
16 |             <path fill="url(#b)" d="M0 0h186v20H0z" />
17 |         </g>
18 |         <g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,
19 |         Verdana,Geneva,sans-serif" font-size="11">
20 |             <text x="16" y="15" fill="#010101"
21 |             fill-opacity=".3">
22 |                 DOI
23 |             </text>
24 |             <text x="16" y="14">
25 |                 DOI
26 |             </text>
27 |             <text x="108"
28 |             y="15" fill="#010101" fill-opacity=".3">
29 |                 10.5281/zenodo.7459814
30 |             </text>
31 |             <text x="108" y="14">
32 |                 10.5281/zenodo.7459814
33 |             </text>
34 |         </g>
35 |     </svg>


--------------------------------------------------------------------------------
/documentation/conda_env.md:
--------------------------------------------------------------------------------
 1 | # Getting started
 2 | 
 3 | Welcome to our GitHub codebase for lymphoma lesion segmentation from PET/CT images. 
 4 | 
 5 | ## Cloning the repository
 6 | To get started, the first step is the clone this repository to your local machine and navigate inside the resulting git directory:
 7 | 
 8 | ```
 9 | git clone 'https://github.com/microsoft/lymphoma-segmentation-dnn.git'
10 | cd lymphoma-segmentation-dnn
11 | ```
12 | 
13 | ## Installing packages from `environment.yml` file
14 | This code base was developed primarily using python=3.8.10, PyTorch=1.11.0, monai=1.2.0, with CUDA 11.3 on an Ubuntu 20.04 virtual machine, so the codebase has been tested only with these configurations. We hope that it will run in other suitable combinations of different versions of python, PyTorch, monai, and CUDA, but we cannot guarantee that. Proceed with caution!  
15 | 
16 | Firstly, we will use the [environment.yml](/environment.yml) file to create a conda environment (`lymphoma_seg`) and install all required packages as mentioned in the [environment.yml](/environment.yml) file. For this step, run,
17 | 
18 | ```
19 | conda env create --file environment.yml
20 | ```
21 | 
22 | If the above step is completed successfully without errors, you will have a new conda environment called `lymphoma_seg`. To activate this environment, use
23 | 
24 | ```
25 | conda activate lymphoma_seg
26 | ```
27 | 
28 | The environment can be deactivated using
29 | 
30 | ```
31 | conda deactivate
32 | ```
33 | 
34 | With the conda environment set up, you have all the necessary tools to start a training or inference experiment, except the training/test dataset. The next step is get your dataset in the format which can be used by our codebase, as explained in [/documentation/dataset_format.md](/documentation/dataset_format.md).
35 | 


--------------------------------------------------------------------------------
/documentation/calculate_test_metrics.md:
--------------------------------------------------------------------------------
 1 | # How to calculate test metrics on the test set predicted masks?
 2 | Once you have trained some models (as described in [trainddp.md](./trainddp.md)) and used them to perform inference (as described in [inference.md](./inference.md)) to generate predicted masks on the test images, you can proceed with the computation of test metrics. We compute three segmentation metrics: `Dice similarity coefficient (DSC)`, `false positive volume (FPV) in ml`, `false negative volume (FNV) in ml`. We also compute detection metrics such as `true positive (TP)`, `false positive (FP)`, and `false negative (FN)` lesion detections via three different criterion labeled as `Criterion1`, `Criterion2`, and `Criterion3`. These metrics have been defined in [metrics/metrics.py](./../metrics/metrics.py). 
 3 | 
 4 | ## Step 1: Activate the required conda environment (`lymphoma_seg`) and navigate to `segmentation` folder
 5 | First, activate the conda environment `lymphoma_seg` using (created as described in [conda_env.md](./conda_env.md)):  
 6 | 
 7 | ```
 8 | conda activate lymphoma_seg
 9 | cd segmentation
10 | ```
11 | 
12 | ## Step 2: Run the script to compute test metrics
13 | After this, run the following script in your terminal,
14 | ```
15 | python calculate_test_metrics.py --fold=0 --network-name='unet' --input-patch-size=192
16 | ```
17 | 
18 | Alternatively, modify the [segmentation/calculate_test_metrics.sh](./../segmentation/calculate_test_metrics.sh) for your use-case (which contains the same bash script as above) and run:
19 | 
20 | ```
21 | bash calculate_test_metrics.sh
22 | ```
23 | 
24 | The test metrics will be written to `LYMPHOMA_SEGMENTATION_FOLDER/results/test_metrics/fold{fold}/{network_name}/{experiment_code}/testmetrics.csv`, as described in [results_format.md](./results_format.md) file. The relevant directory structure may then look like:
25 | 
26 |     └───lymphoma.segmentation/
27 |             ├── data
28 |             └── results
29 |                 ├── logs
30 |                 ├── models
31 |                 ├── predictions
32 |                 └── test_metrics
33 |                     ├── fold0
34 |                     │   └── unet
35 |                     │       └── unet_fold0_randcrop192
36 |                     │           └── testmetrics.csv   
37 |                     └── fold1
38 |                         └── unet
39 |                             └── unet_fold1_randcrop192
40 |                                 └── testmetrics.csv


--------------------------------------------------------------------------------
/documentation/generate_lesion_measures.md:
--------------------------------------------------------------------------------
 1 | # How to generate lesion measures from the test set predicted masks?
 2 | Once you have performed the inference and saved the network predicted masks in NIFTI format (as described in [inference.md](./inference.md)), you can proceed with the generation of lesion measures from test set predicted and ground truth lesions masks. We compute six different patient level lesion measures: patient-level lesion SUV<sub>mean</sub>, lesion SUV<sub>max</sub>, number of lesions, total metabolic tumor volume (TMTV) in ml, total lesion glycolysis (TLG) in ml, and lesion dissemination (D<sub>max</sub>) in cm. These metrics have been defined in [metrics/metrics.py](./../metrics/metrics.py) and have been shown to be prognostic biomarkers in lymphoma. 
 3 | 
 4 | ## Step 1: Activate the required conda environment (`lymphoma_seg`) and navigate to `segmentation` folder
 5 | First, activate the conda environment `lymphoma_seg` using (created as described in [conda_env.md](./conda_env.md)):  
 6 | 
 7 | ```
 8 | conda activate lymphoma_seg
 9 | cd segmentation
10 | ```
11 | 
12 | ## Step 2: Run the script to compute test metrics
13 | After this, run the following script in your terminal,
14 | ```
15 | python generate_lesion_measures.py --fold=0 --network-name='unet' --input-patch-size=192
16 | ```
17 | 
18 | Alternatively, modify the [segmentation/generate_lesion_measures.sh](./../segmentation/generate_lesion_measures.sh) for your use-case (which contains the same bash script as above) and run:
19 | 
20 | ```
21 | bash generate_lesion_measures.sh
22 | ```
23 | 
24 | The ground truth and predicted lesion measures on the test set will be written to `LYMPHOMA_SEGMENTATION_FOLDER/results/test_lesion_measures/fold{fold}/{network_name}/{experiment_code}/testlesionmeasures.csv`, as described in [results_format.md](./results_format.md) file. The relevant directory structure may then look like:
25 | 
26 |     └───lymphoma.segmentation/
27 |             ├── data
28 |             └── results
29 |                 ├── logs
30 |                 ├── models
31 |                 ├── predictions
32 |                 └── test_metrics
33 |                 └── test_lesion_measures
34 |                     ├── fold0
35 |                     │   └── unet
36 |                     │       └── unet_fold0_randcrop192
37 |                     │           └── testlesionmeasures.csv   
38 |                     └── fold1
39 |                         └── unet
40 |                             └── unet_fold1_randcrop192
41 |                                 └── testlesionmeasures.csv  


--------------------------------------------------------------------------------
/data_conversion/resample_ct2pt.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Copyright (c) Microsoft Corporation. All rights reserved.
 3 | Licensed under the MIT License.
 4 | '''
 5 | 
 6 | import numpy as np
 7 | import os
 8 | import SimpleITK as sitk
 9 | import pandas as pd
10 | from pathlib import Path
11 | import glob
12 | 
13 | 
14 | def resample_ct_to_pt_geometry(
15 |     ctpath: str, 
16 |     ptpath: str,
17 |     savedir: str = ''
18 | ):
19 |     """ Function to resample CT images to the corresponding PET image geometry.
20 |     This functions assumes that the CT and PET are already coregistered.
21 | 
22 |     Args:
23 |         ctpath (str): path to NIFTI file for (high-resolution) CT image 
24 |         ptpath (str): path to NIFTI file for PET image
25 |         savedir (str, optional): Directory to write the downsampled CT NIFTI image. Defaults to ''.
26 |     """
27 |     ctimg = sitk.ReadImage(ctpath)
28 |     ptimg = sitk.ReadImage(ptpath)
29 |     resampled_ctimg = sitk.Resample(ctimg, ptimg, interpolator=sitk.sitkLinear, defaultPixelValue=-1024)
30 |     resampled_ct_filepath = os.path.join(savedir, os.path.basename(ctpath))
31 |     
32 |     sitk.WriteImage(resampled_ctimg, resampled_ct_filepath)
33 |     print('Resampled CT to PET geometry')
34 |     print(f'Saving the low-resolution CT NIFTI image at {resampled_ct_filepath}')
35 |  
36 | def resample_gt_to_pt_geometry(
37 |     gtpath: str, 
38 |     ptpath: str,
39 |     savedir: str = ''
40 | ):
41 |     """ Function to resample GT images (if applicable) to the corresponding PET image geometry.
42 |     You may or may not need to do this resampling. Do this if your ground truth segmentations 
43 |     were performed on CT images, and hence your GT masks are in the geometry of CT instead of PET.
44 |     If the annoatations were performed on PET, then the GT mask and PET should (ideally) be in the 
45 |     same geometry and hence this step may not be required.
46 | 
47 |     Args:
48 |         gtpath (str): path to NIFTI file for (high-resolution) GT image 
49 |         ptpath (str): path to NIFTI file for PET image
50 |         savedir (str, optional): Directory to write the downsampled GT NIFTI image. Defaults to ''.
51 |     """
52 |     gtimg = sitk.ReadImage(gtpath)
53 |     ptimg = sitk.ReadImage(ptpath)
54 |     resampled_gtimg = sitk.Resample(gtimg, ptimg, interpolator=sitk.sitkNearestNeighbor, defaultPixelValue=0)
55 |     resampled_gt_filepath = os.path.join(savedir, os.path.basename(gtpath))
56 |     
57 |     sitk.WriteImage(resampled_gtimg, resampled_gt_filepath)
58 |     print('Resampled GT to PET geometry')
59 |     print(f'Saving the low-resolution CT NIFTI image at {resampled_gt_filepath}')


--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
 1 | <!-- BEGIN MICROSOFT SECURITY.MD V0.0.9 BLOCK -->
 2 | 
 3 | ## Security
 4 | 
 5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet) and [Xamarin](https://github.com/xamarin).
 6 | 
 7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://aka.ms/security.md/definition), please report it to us as described below.
 8 | 
 9 | ## Reporting Security Issues
10 | 
11 | **Please do not report security vulnerabilities through public GitHub issues.**
12 | 
13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://aka.ms/security.md/msrc/create-report).
14 | 
15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com).  If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/security.md/msrc/pgp).
16 | 
17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc). 
18 | 
19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue:
20 | 
21 |   * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.)
22 |   * Full paths of source file(s) related to the manifestation of the issue
23 |   * The location of the affected source code (tag/branch/commit or direct URL)
24 |   * Any special configuration required to reproduce the issue
25 |   * Step-by-step instructions to reproduce the issue
26 |   * Proof-of-concept or exploit code (if possible)
27 |   * Impact of the issue, including how an attacker might exploit the issue
28 | 
29 | This information will help us triage your report more quickly.
30 | 
31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/security.md/msrc/bounty) page for more details about our active programs.
32 | 
33 | ## Preferred Languages
34 | 
35 | We prefer all communications to be in English.
36 | 
37 | ## Policy
38 | 
39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://aka.ms/security.md/cvd).
40 | 
41 | <!-- END MICROSOFT SECURITY.MD BLOCK -->
42 | 


--------------------------------------------------------------------------------
/segmentation/plot_logs.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Copyright (c) Microsoft Corporation. All rights reserved.
 3 | Licensed under the MIT License.
 4 | '''
 5 | import pandas as pd 
 6 | import matplotlib.pyplot as plt
 7 | import numpy as np 
 8 | import os 
 9 | import glob
10 | import sys
11 | config_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..")
12 | sys.path.append(config_dir)
13 | from config import RESULTS_FOLDER
14 | 
15 | # %%
16 | def plot_train_logs(train_fpaths, valid_fpaths, network_names):
17 |     train_dfs = [pd.read_csv(path) for path in train_fpaths]
18 |     valid_dfs = [pd.read_csv(path) for path in valid_fpaths]
19 | 
20 |     train_losses = [df['Loss'].values for df in train_dfs]
21 |     valid_metrics = [df['Metric'].values for df in valid_dfs]
22 |     train_epochs = [np.arange(len(train_loss))+1 for train_loss in train_losses]
23 |     valid_epochs = [2*(np.arange(len(valid_metric))+1) for valid_metric in valid_metrics]
24 |     min_losses = [np.min(train_loss) for train_loss in train_losses]
25 |     min_losses_epoch = [np.argmin(train_loss) + 1 for train_loss in train_losses]
26 |     max_dscs = [np.max(valid_metric) for valid_metric in valid_metrics]
27 |     max_dscs_epoch = [2*(np.argmax(valid_metric)+1) for valid_metric in valid_metrics]
28 |     fig, ax = plt.subplots(1,2, figsize=(20,10))
29 |     fig.patch.set_facecolor('white')
30 |     fig.patch.set_alpha(1)
31 | 
32 |     for i in range(len(train_losses)):
33 |         ax[0].plot(train_epochs[i], train_losses[i])
34 |         ax[1].plot(valid_epochs[i], valid_metrics[i])
35 |         ax[0].plot(min_losses_epoch[i], min_losses[i], '-o', color='red')
36 |         ax[1].plot(max_dscs_epoch[i], max_dscs[i], '-o', color='red')
37 |         
38 |         ax[0].text(np.min(train_epochs[i]), np.min(train_losses[i]), f'Total epochs: {len(train_epochs[i])}', fontsize=15)
39 |         
40 |     legend_labels_trainloss = [f"{network_names[i]}; Min loss: {round(min_losses[i], 4)} ({len(train_epochs[i])})" for i in range(len(network_names))]
41 |     legend_labels_validdice = [f"{network_names[i]}; Max DSC: {round(max_dscs[i], 4)} ({len(valid_epochs[i])})" for i in range(len(network_names))]
42 | 
43 |     ax[0].legend(legend_labels_trainloss, fontsize=16)
44 |     ax[1].legend(legend_labels_validdice, fontsize=16)
45 |     ax[0].set_title('Train loss', fontsize=25)
46 |     ax[1].set_title('Valid DSC', fontsize=25)
47 |     ax[0].set_ylabel('Dice loss', fontsize=20)
48 |     ax[1].set_ylabel('Dice score', fontsize=20)
49 |     ax[0].grid(True)
50 |     ax[1].grid(True)
51 |     plt.show()
52 | 
53 | #%%
54 | fold = 0
55 | network = ['unet']
56 | inputsize = [192, 192, 160, 128]
57 | p = 2
58 | inputsize_dict = {
59 |     'unet': 192,
60 |     'attentionunet': 192,
61 |     'segresnet': 192,
62 |     'dynunet': 160,
63 |     'unetr': 160,
64 |     'swinunetr': 128
65 | }
66 | 
67 | experiment_code = [f"{network[i]}_fold{fold}_randcrop{inputsize[i]}" for i in range(len(network))]
68 | save_logs_dir = os.path.join(RESULTS_FOLDER, 'logs')
69 | save_logs_folders = [os.path.join(save_logs_dir, 'fold'+str(fold), network[i], experiment_code[i]) for i in range(len(experiment_code))]
70 | train_fpaths = [os.path.join(save_logs_folders[i], 'trainlog_gpu0.csv') for i in range(len(save_logs_folders))]
71 | valid_fpaths = [os.path.join(save_logs_folders[i], 'validlog_gpu0.csv') for i in range(len(save_logs_folders))]
72 | legend_lbls = [f'{network[i]}, N = {inputsize[i]}' for i in range(len(network))]
73 | plot_train_logs(train_fpaths, valid_fpaths, legend_lbls)
74 | 
75 | 


--------------------------------------------------------------------------------
/documentation/inference.md:
--------------------------------------------------------------------------------
 1 | # How to run inference on test images using your trained model?
 2 | 
 3 | Once your have trained some models using the training script described in [trainddp.md](./trainddp.md), you have model(s) that could be used for predicting the segmentation masks for test images. Running inference primarily uses three files from this codebase: [config.py](./../config.py), [segmentation/initialize_train.py](./../segmentation/initialize_train.py), and [segmentation/inference.py](./../segmentation/inference.py). Ensure that the [config.py](./../config.py) is correctly initialized (as described in [trainddp.md](./trainddp.md)) so that the inference code can find the path to the test images.   
 4 | 
 5 | ## Step 1: Activate the required conda environment (`lymphoma_seg`) and navigate to `segmentation` folder
 6 | First, activate the conda environment `lymphoma_seg` using (created as described in [conda_env.md](./conda_env.md)):  
 7 | 
 8 | ```
 9 | conda activate lymphoma_seg
10 | cd segmentation
11 | ```
12 | 
13 | ## Step 2: Run the inference script
14 | After this, run the following script in your terminal. Note: we run the inference only on one GPU (denoted by `cuda:0` in your machine).  
15 | ```
16 | python inference.py --fold=0 --network-name='unet' --input-patch-size=192 --num_workers=2  --sw-bs=2 
17 | ``` 
18 | 
19 | - `inference.py` is the inference code that this script is using.
20 | 
21 | - `--fold` defines which fold's trained model you want to use for inference. When training script is run for the first time, two files, namely, `train_filepaths.csv` and `test_filepaths.csv` gets created within the folder `WORKING_FOLDER/data_split`, where the former contains the filepaths (CT, PT, mask) for training images (from `imagesTr` and `labelsTr` folders as described in `dataset_format.md`), and the latter contains the filepaths for test images (from `imagesTs` and `labelsTs`), respectively. The purpose of setting `fold` in this case is not to point to the specific fold dataset (since we are only using the test set for inference), but to define which fold's trained model to use. Defaults to 0.
22 | 
23 | - `--network-name` defines the name of the network. In this work, we have trained UNet, SegResNet, DynUNet and SwinUNETR (adpated from MONAI [LINK]). Hence, the `--network-name` should be set to one of `unet`, `segresnet`, `dynunet`, or `swinunetr`. Defaults to `unet`.
24 | 
25 | - `--input-patch-size` defines the size of the cubic input patch that is cropped from the input images during training. We used `input-patch-size` of 224 for UNet, 192 for SegResNet, 160 for DynUNet and 128 for SwinUNETR. Defaults to 192.
26 | 
27 | - `--num-workers` defines the `num_workers` argument inside training and validation DataLoaders. Defaults to 2.
28 | 
29 | - `--sw-bs` defines the batch size for performing the sliding-window inference via `monai.inferers.sliding_window_inference` on the test inputs. Defaults to 2. 
30 | 
31 | 
32 | Alternatively, modify the [segmentation/predict.sh](./../segmentation/predict.sh) script for your use-case (which contains the same bash script as above) and run:
33 | 
34 | ```
35 | bash predict.sh
36 | ```
37 | 
38 | The predicted masks will be written to `LYMPHOMA_SEGMENTATION_FOLDER/results/predictions/fold{fold}/{network_name}/{experiment_code}`, as described in [results_format.md](./results_format.md) file. The predicted masks are assigned the same filenames as the corresponding original ground truth segmentation masks. The relevant directory structure may then look like:
39 | 
40 |     └───lymphoma.segmentation/
41 |             ├── data
42 |             └── results
43 |                 ├── logs
44 |                 ├── models
45 |                 └── predictions
46 |                     └── fold0
47 |                         └── unet
48 |                             └── unet_fold0_randcrop192
49 |                                 ├── Patient0003_20190402.nii.gz
50 |                                 ├── Patient0004_20160204.nii.gz 
51 |                                 ├── ...
52 | 
53 | 


--------------------------------------------------------------------------------
/documentation/dataset_format.md:
--------------------------------------------------------------------------------
 1 | # Dataset format
 2 | In this work, the dataset consist of three components: CT and PET images and the corresponding lesion segmentation mask, all in NIFTI file format. If your dataset is in DICOM format, you can convert them to NIFTI using the method described in [dicom_to_nifti_conversion.md](./dicom_to_nifti_conversion.md). After converting DICOM images to NIFTI format, you may have to resample you CT (and/or GT) images to PET geometry (if your CT or GT images are not in PET geometry). If this is the case, use the functions `resample_ct_to_pt_geometry()` and `resample_gt_to_pt_geometry()` in [data_conversion/resample_ct2pt.py](./../data_conversion/resample_ct2pt.py). 
 3 | 
 4 | ## Training cases filenaming convention
 5 | We follow a similar filenaming convention as used by [nnUNet](https://github.com/MIC-DKFZ/nnUNet/blob/master/documentation/dataset_format.md). Each training case is associated with a unique identifier, which is a unique name for that case. This identifier is used by our code to connect images (PET/CT) with the correct segmentation mask. We suggest using the unique identifier as `{PatientID}_{StudyDate}`.
 6 | 
 7 | A training case consists of images and their corresponding segmentation masks. 
 8 | 
 9 | **Images**: Our networks utilize two channel 3D images, the first channel being the CT images and the second channel being the PET image.  Both CT and PET **MUST** have the same geometry (same size, spacing, origin, direction) and must be (approximately) coregistered (if applicable). To resample CT images to PET resolution, use the function `resample_ct_to_pt_geometry()` in [data_conversion/resample_ct2pt.py](./../data_conversion/resample_ct2pt.py). Within a training case, all image geometries (input channels, corresponding segmentation) must match. Between training cases, they can of course differ. 
10 | 
11 | **Segmentations** must share the same geometry as their corresponding images (same size, spacing, origin, direction). Segmentations are 
12 | integer maps with each value representing a semantic class; the background is represented by 0. In our work, we used segmentation masks with two classes: 0 for background and 1 for lesions. All masks in the training set **MUST** have 0s and 1s; the current version of code cannot handle negative images (images with no lesions) without changing some of the preprocessing transforms (like `RandCropByPosNegLabeld`, etc.) applied to the images before giving them as inputs to the network.  
13 | 
14 | Given a unique identifier for a case, {PatientID}_{StudyDate}, the CT, PET and GT image filenames should be:  
15 | CT image: `{PatientID}_{StudyDate}_0000.nii.gz`,  
16 | PET image: `{PatientID}_{StudyDate}_0001.nii.gz`,   
17 | GT image: `{PatientID}_{StudyDate}.nii.gz`,  
18 | 
19 | **Important:** The input channels must be consistent! Concretely, **all images need the same input channels in the same 
20 | order and all input channels have to be present every time**. This is also true for inference!
21 | 
22 | 
23 | ## Dataset folder structure
24 | Create a folder named `lymphoma.segmentation` in the location of your choice. The is the master folder that stored all your datasets, the trained models and training/validation logs, predictions or any other results based on predictions. Go to the file [config.py](./../config.py) and update the variable `LYMPHOMA_SEGMENTATION_FOLDER` as the absolute path to the folder `lymphoma.segmentation`. Within `lymphoma.segmentation`, create a folder named `data`, which should be the location of your training and test datasets. After these steps, your directory structure is expected to look like this:
25 | 
26 |     └───lymphoma.segmentation/data
27 |         ├── imagesTr
28 |         ├── imagesTs  # optional
29 |         ├── labelsTr  
30 |         └── labelsTs # optional
31 | 
32 | - `imagesTr` contains the images (CT and PET) belonging to the training cases. Each corresponding CT and PET images should be in the same geometry (same size, spacing, origin, direction) in this folder.  
33 | - `imagesTs` (optional) contains the images that belong to the test cases. Each corresponding CT and PET images should be in the same geometry (same size, spacing, origin, direction) in this folder.  
34 | - `labelsTr` contains the images with the ground truth segmentation maps for the training cases. These should be in the same geometry (same size, spacing, origin, direction) as their corresponding PET/CT images in `imagesTr`.
35 | - `labelsTs` (optional) contains the images with the ground truth segmentation maps for the test cases. These should be in the same geometry (same size, spacing, origin, direction) as their corresponding PET/CT images in `imagesTs`.
36 | 
37 | 
38 | After moving all the training and test images and masks in the respective folders, the directory structure should look like this:
39 | 
40 |     └───lymphoma.segmentation/data/
41 |         ├── imagesTr
42 |         │   ├── Patient0001_20110502_0000.nii.gz
43 |         │   ├── Patient0001_20110502_0001.nii.gz
44 |         │   ├── Patient0002_20150514_0000.nii.gz
45 |         │   ├── Patient0002_20150514_0001.nii.gz
46 |         │   ├── ...
47 |         ├── imagesTs  # optional
48 |         │   ├── Patient0003_20190402_0000.nii.gz
49 |         │   ├── Patient0003_20190402_0001.nii.gz
50 |         │   ├── Patient0004_20150514_0000.nii.gz
51 |         │   ├── Patient0004_20150514_0001.nii.gz
52 |         │   ├── ...
53 |         ├── labelsTr 
54 |         │   ├── Patient0001_20110502.nii.gz
55 |         │   ├── Patient0002_20110502.nii.gz 
56 |         │   ├── ...
57 |         └── labelsTs # optional
58 |             ├── Patient0003_20190402.nii.gz
59 |             ├── Patient0004_20160204.nii.gz 
60 |             ├── ...
61 | 


--------------------------------------------------------------------------------
/segmentation/inference.py:
--------------------------------------------------------------------------------
  1 | #%%
  2 | '''
  3 | Copyright (c) Microsoft Corporation. All rights reserved.
  4 | Licensed under the MIT License.
  5 | '''
  6 | import numpy as np 
  7 | import glob
  8 | import os 
  9 | import pandas as pd 
 10 | import SimpleITK as sitk
 11 | import sys
 12 | import argparse
 13 | from monai.inferers import sliding_window_inference
 14 | from monai.data import DataLoader, Dataset, decollate_batch
 15 | import torch
 16 | import os
 17 | import glob
 18 | import pandas as pd
 19 | import numpy as np
 20 | import torch.nn as nn
 21 | import time
 22 | from initialize_train import (
 23 |     get_validation_sliding_window_size,
 24 |     get_model,
 25 |     get_test_data_in_dict_format,
 26 |     get_valid_transforms,
 27 |     get_post_transforms
 28 | )
 29 | config_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..")
 30 | sys.path.append(config_dir)
 31 | from config import RESULTS_FOLDER
 32 | #%%
 33 | def convert_to_4digits(str_num):
 34 |     if len(str_num) == 1:
 35 |         new_num = '000' + str_num
 36 |     elif len(str_num) == 2:
 37 |         new_num = '00' + str_num
 38 |     elif len(str_num) == 3:
 39 |         new_num = '0' + str_num
 40 |     else:
 41 |         new_num = str_num
 42 |     return new_num
 43 | 
 44 | def create_dictionary_ctptgt(ctpaths, ptpaths, gtpaths):
 45 |     data = []
 46 |     for i in range(len(gtpaths)):
 47 |         ctpath = ctpaths[i]
 48 |         ptpath = ptpaths[i]
 49 |         gtpath = gtpaths[i]
 50 |         data.append({'CT':ctpath, 'PT':ptpath, 'GT':gtpath})
 51 |     return data
 52 | 
 53 | def read_image_array(path):
 54 |     img =  sitk.ReadImage(path)
 55 |     array = np.transpose(sitk.GetArrayFromImage(img), (2,1,0))
 56 |     return array
 57 | 
 58 | #%%
 59 | def main(args):
 60 |     # initialize inference
 61 |     fold = args.fold
 62 |     network = args.network_name
 63 |     inputsize = args.input_patch_size
 64 |     experiment_code = f"{network}_fold{fold}_randcrop{inputsize}"
 65 |     sw_roi_size = get_validation_sliding_window_size(inputsize) # get sliding_window inference size for given input patch size
 66 |     
 67 |     # find the best model for this experiment from the training/validation logs
 68 |     # best model is the model with the best validation `Metric` (DSC)
 69 |     save_logs_dir = os.path.join(RESULTS_FOLDER, 'logs')
 70 |     validlog_fname = os.path.join(save_logs_dir, 'fold'+str(fold), network, experiment_code, 'validlog_gpu0.csv')
 71 |     validlog = pd.read_csv(validlog_fname)
 72 |     best_epoch = 2*(np.argmax(validlog['Metric']) + 1)
 73 |     best_metric = np.max(validlog['Metric'])
 74 |     print(f"Using the {network} model at epoch={best_epoch} with mean valid DSC = {round(best_metric, 4)}")
 75 | 
 76 |     # get the best model and push it to device=cuda:0
 77 |     save_models_dir = os.path.join(RESULTS_FOLDER,'models')
 78 |     save_models_dir = os.path.join(save_models_dir, 'fold'+str(fold), network, experiment_code)
 79 |     best_model_fname = 'model_ep=' + convert_to_4digits(str(best_epoch)) +'.pth'
 80 |     model_path = os.path.join(save_models_dir, best_model_fname)
 81 |     device = torch.device(f"cuda:0")
 82 |     model = get_model(network, input_patch_size=inputsize)
 83 |     model.load_state_dict(torch.load(model_path, map_location=device))
 84 |     model.to(device)
 85 |         
 86 |     # initialize the location to save predicted masks
 87 |     save_preds_dir = os.path.join(RESULTS_FOLDER, f'predictions')
 88 |     save_preds_dir = os.path.join(save_preds_dir, 'fold'+str(fold), network, experiment_code)
 89 |     os.makedirs(save_preds_dir, exist_ok=True)
 90 | 
 91 |     # get test data (in dictionary format for MONAI dataloader), test_transforms and post_transforms
 92 |     test_data = get_test_data_in_dict_format()
 93 |     test_transforms = get_valid_transforms()
 94 |     post_transforms = get_post_transforms(test_transforms, save_preds_dir)
 95 |     
 96 |     # initalize PyTorch dataset and Dataloader
 97 |     dataset_test = Dataset(data=test_data, transform=test_transforms)
 98 |     dataloader_test = DataLoader(dataset_test, batch_size=1, shuffle=False, num_workers=args.num_workers)
 99 | 
100 |     model.eval()
101 |     with torch.no_grad():
102 |         for data in dataloader_test:
103 |             inputs = data['CTPT'].to(device)
104 |             sw_batch_size = args.sw_bs
105 |             print(sw_batch_size)
106 |             data['Pred'] = sliding_window_inference(inputs, sw_roi_size, sw_batch_size, model)
107 |             data = [post_transforms(i) for i in decollate_batch(data)]
108 | 
109 | 
110 | if __name__ == "__main__": 
111 |     parser = argparse.ArgumentParser(description='Lymphoma PET/CT lesion segmentation using MONAI-PyTorch')
112 |     parser.add_argument('--fold', type=int, default=0, metavar='fold',
113 |                         help='validation fold (default: 0), remaining folds will be used for training')
114 |     parser.add_argument('--network-name', type=str, default='unet', metavar='netname',
115 |                         help='network name for training (default: unet)')
116 |     parser.add_argument('--input-patch-size', type=int, default=192, metavar='inputsize',
117 |                         help='size of cropped input patch for training (default: 192)')
118 |     parser.add_argument('--num_workers', type=int, default=2, metavar='nw',
119 |                         help='num_workers for train and validation dataloaders (default: 2)')
120 |     parser.add_argument('--sw-bs', type=int, default=2, metavar='sw-bs',
121 |                         help='batchsize for sliding window inference (default=2)')
122 |     args = parser.parse_args()
123 |     
124 |     main(args)
125 | 
126 | 


--------------------------------------------------------------------------------
/documentation/trainddp.md:
--------------------------------------------------------------------------------
 1 | # How to train a model using this codebase?
 2 | 
 3 | The models in this work are trained on a single-node with `torch.cuda.device_count()` GPUs. In our work, we had `torch.cuda.device_count() == 4` on a single Microsoft Azure VM (node). Each GPU consisted of 16 GiB of RAM. The machine consisted of 24 vCPUs and 448 GiB of RAM. 
 4 | 
 5 | Running a training experiment primarily uses only three files from this codebase: [config.py](./../config.py), [segmentation/trainddp.py](./../segmentation/trainddp.py) and [segmentation/initialize_train.py](./../segmentation/initialize_train.py). The first step is to initialize the correct values for the variable `LYMPHOMA_SEGMENTATION_FOLDER` in the [config.py](./../config.py). Put all the training (and test, if applicable) data inside the `LYMPHOMA_SEGMENTATION_FOLDER/data` folder, as described in [dataset_format.md](./dataset_format.md).
 6 | 
 7 | ```
 8 | import os 
 9 | 
10 | LYMPHOMA_SEGMENTATION_FOLDER = '/path/to/lymphoma.segmentation/folder/for/data/and/results' # path to the directory containing `data` and `results` (this will be created by the pipeline) folders.
11 | 
12 | DATA_FOLDER = os.path.join(LYMPHOMA_SEGMENTATION_FOLDER, 'data')
13 | RESULTS_FOLDER = os.path.join(LYMPHOMA_SEGMENTATION_FOLDER, 'results')
14 | os.makedirs(RESULTS_FOLDER, exist_ok=True)
15 | WORKING_FOLDER = os.path.dirname(os.path.abspath(__file__))
16 | ```
17 | 
18 | If all the dataset is correctly configured based on the explanations in [dataset_format.md](./dataset_format.md) and the [config.py](./../config.py) is correctly initialized as well, you are all set to initiate the training script. 
19 | 
20 | ## Step 1: Activate the required conda environment (`lymphoma_seg`) and navigate to `segmentation` folder
21 | First, activate the conda environment `lymphoma_seg` using (created as described in [conda_env.md](./conda_env.md)):  
22 | 
23 | ```
24 | conda activate lymphoma_seg
25 | cd segmentation
26 | ```
27 | 
28 | ## Step 2: Run the training script
29 | After this, run the following script in your terminal:  
30 | 
31 | ```
32 | torchrun --standalone --nproc_per_node=1 trainddp.py --fold=0 --network-name='unet' --epochs=500 --input-patch-size=192 --train-bs=1 --num_workers=2 --cache-rate=0.5 --lr=2e-4 --wd=1e-5 --val-interval=2 --sw-bs=2 
33 | ```
34 | 
35 | Here, we are using PyTorch's `torchrun` to start a multi-GPU training. The `standalone` represents that we are using just one node. 
36 | 
37 | - `--nproc_per_node` defines the number of processes per node; in this case it represents the number of GPUs you want to use to train your model. We used `--nproc_per_node=4`, but feel free to set this variable to the number of GPUs available in your machine. 
38 | 
39 | - `trainddp.py` is the file containing the code for training that uses `torch.nn.parallel.DistributedDataParallel`. 
40 | 
41 | - `--fold` defines the fold for which you want to run training. When the above script is run for the first time, two files, namely, `train_filepaths.csv` and `test_filepaths.csv` gets created within the folder `WORKING_FOLDER/data_split`, where the former contains the filepaths (CT, PT, mask) for training images (from `imagesTr` and `labelsTr` folders as described in `dataset_format.md`), and the latter contains the filepaths for test images (from `imagesTs` and `labelsTs`), respectively. The `train_filepaths.csv` contains a column named `FoldID` with values in `{0, 1, 2, 3, 4}` defining which fold the data in that row belongs to. When `--fold=0` (for example), the code uses all the data with `FoldID == 0` for validation and the data with `FoldID != 0` for training. Defaults to 0.
42 | 
43 | - `--network-name` defines the name of the network. In this work, we have trained UNet, SegResNet, DynUNet and SwinUNETR (adpated from MONAI [LINK]). Hence, the `--network-name` should be set to one of `unet`, `segresnet`, `dynunet`, or `swinunetr`. Defaults to `unet`.
44 | 
45 | - `--epochs` is the total number of epochs for running the training. Defaults to 500.
46 | 
47 | - `--input-patch-size` defines the size of the cubic input patch that is cropped from the input images during training. The code uses `monai.transforms.RRandCropByPosNegLabeld` (used inside `segmentation\initialize_train.py`) for creating these cropped patches. We used `input-patch-size` of 224 for UNet, 192 for SegResNet, 160 for DynUNet and 128 for SwinUNETR. Defaults to 192.
48 | 
49 | - `--train-bs` is the training batch size. We used `--train-bs = 1` for all our experiments in this work, since for the given `input-patch-size` for the networks above, we couldn't accommodate larger batch sizes for SegResNet, DynUNet, and SwinUNETR. Defaults to 1.
50 | 
51 | - `--num-workers` defines the `num_workers` argument inside training and validation DataLoaders. Defaults to 2.
52 | 
53 | - `--cache-rate` defines the precentage of cached data argument to be used inside the `monai.data.CacheDataset`. This type of dataset (unlike `torch.utils.data.Dataset`) can load and cache deterministic transforms result during training. A cache-rate of 1 caches all the data into the memory, while a cache-rate of 0 doesn't cache anything into the memory. A higher cache rate leads to faster training (but more memory consumption). Defaults to 0.1.
54 | 
55 | - `--lr` defines the initial learning rate. Cosine annealing scheduler is used to update the learning rate from the initial value to 0 in `epochs` epochs. Defaults to 2e-4.
56 | 
57 | - `--wd` defines the weight-decay for the AdamW optimizer used in this work. Defaults to 1e-5.
58 | 
59 | - `--val_interval` defines the interval for performing validation and saving the model being trained. Defaults to 2. 
60 | 
61 | - `--sw-bs` defines the batch size for performing the sliding-window inference via `monai.inferers.sliding_window_inference` on the validation inputs. Defaults to 2. 
62 | 
63 | 
64 | 
65 | Alternatively, modify the [segmentation/train.sh](./../segmentation/train.sh) script for your use-case (which contains the same bash script as above) and run:
66 | 
67 | ```
68 | bash train.sh
69 | ```


--------------------------------------------------------------------------------
/segmentation/generate_lesion_measures.py:
--------------------------------------------------------------------------------
  1 | #%%
  2 | '''
  3 | Copyright (c) Microsoft Corporation. All rights reserved.
  4 | Licensed under the MIT License.
  5 | '''
  6 | import pandas as pd 
  7 | import numpy as np
  8 | import SimpleITK as sitk 
  9 | import os 
 10 | from glob import glob
 11 | import sys
 12 | import argparse
 13 | config_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..")
 14 | sys.path.append(config_dir)
 15 | from config import RESULTS_FOLDER
 16 | from metrics.metrics import *
 17 | 
 18 | def get_spacing_from_niftipath(path):
 19 |     spacing = sitk.ReadImage(path).GetSpacing()
 20 |     return spacing
 21 | 
 22 | 
 23 | def main(args):
 24 |     fold = args.fold
 25 |     network = args.network_name
 26 |     inputsize = args.input_patch_size
 27 |     experiment_code = f"{network}_fold{fold}_randcrop{inputsize}"
 28 |     preddir = os.path.join(RESULTS_FOLDER, 'predictions', f'fold{fold}', network, experiment_code)
 29 |     predpaths = sorted(glob(os.path.join(preddir, '*.nii.gz')))
 30 |     gtpaths = sorted(list(pd.read_csv('./../data_split/test_filepaths.csv')['GTPATH']))
 31 |     ptpaths = sorted(list(pd.read_csv('./../data_split/test_filepaths.csv')['PTPATH'])) # PET image paths (ptpaths) for calculating the detection metrics using criterion3 
 32 |     
 33 |     imageids = [os.path.basename(path)[:-7] for path in gtpaths]
 34 |     DSC = [] 
 35 |     SUVmean_orig, SUVmean_pred = [], []
 36 |     SUVmax_orig, SUVmax_pred = [], [] 
 37 |     LesionCount_orig, LesionCount_pred = [], [] 
 38 |     TMTV_orig, TMTV_pred = [], []
 39 |     TLG_orig, TLG_pred = [], []
 40 |     Dmax_orig, Dmax_pred = [], []
 41 |     
 42 |     for i in range(len(gtpaths)):
 43 |         ptpath = ptpaths[i]
 44 |         gtpath = gtpaths[i]
 45 |         predpath = predpaths[i]
 46 |         
 47 |         ptarray = get_3darray_from_niftipath(ptpath)
 48 |         gtarray = get_3darray_from_niftipath(gtpath)
 49 |         predarray = get_3darray_from_niftipath(predpath)
 50 |         spacing = get_spacing_from_niftipath(gtpath)
 51 | 
 52 |         # Dice score between mask gt and pred
 53 |         dsc = calculate_patient_level_dice_score(gtarray, predarray)
 54 |         # Lesion SUVmean
 55 |         suvmean_orig = calculate_patient_level_lesion_suvmean_suvmax(ptarray, gtarray, marker='SUVmean')
 56 |         suvmean_pred = calculate_patient_level_lesion_suvmean_suvmax(ptarray, predarray, marker='SUVmean')
 57 |         # Lesion SUVmax
 58 |         suvmax_orig = calculate_patient_level_lesion_suvmean_suvmax(ptarray, gtarray, marker='SUVmax')
 59 |         suvmax_pred = calculate_patient_level_lesion_suvmean_suvmax(ptarray, predarray, marker='SUVmax')
 60 |         # Lesion Count 
 61 |         lesioncount_orig = calculate_patient_level_lesion_count(gtarray)
 62 |         lesioncount_pred = calculate_patient_level_lesion_count(predarray)
 63 |         # TMTV
 64 |         tmtv_orig = calculate_patient_level_tmtv(gtarray, spacing)
 65 |         tmtv_pred = calculate_patient_level_tmtv(predarray, spacing)
 66 |         # TLG
 67 |         tlg_orig = calculate_patient_level_tlg(ptarray, gtarray, spacing)
 68 |         tlg_pred = calculate_patient_level_tlg(ptarray, predarray, spacing)
 69 |         # Dmax
 70 |         dmax_orig = calculate_patient_level_dissemination(gtarray, spacing)
 71 |         dmax_pred = calculate_patient_level_dissemination(predarray, spacing)
 72 |         
 73 |         DSC.append(dsc)
 74 |         SUVmean_orig.append(suvmean_orig)
 75 |         SUVmean_pred.append(suvmean_pred)
 76 |         SUVmax_orig.append(suvmax_orig)
 77 |         SUVmax_pred.append(suvmax_pred)
 78 |         LesionCount_orig.append(lesioncount_orig)
 79 |         LesionCount_pred.append(lesioncount_pred)
 80 |         TMTV_orig.append(tmtv_orig)
 81 |         TMTV_pred.append(tmtv_pred)
 82 |         TLG_orig.append(tlg_orig)
 83 |         TLG_pred.append(tlg_pred)
 84 |         Dmax_orig.append(dmax_orig)
 85 |         Dmax_pred.append(dmax_pred)
 86 |         
 87 |         
 88 |         print(f"{i}: {imageids[i]}")
 89 |         print(f"Dice Score: {round(dsc,4)}")
 90 |         print(f"SUVmean: GT: {suvmean_orig}, Pred: {suvmean_pred}")
 91 |         print(f"SUVmax: GT: {suvmax_orig}, Pred: {suvmax_pred}")
 92 |         print(f"LesionCount: GT: {lesioncount_orig}, Pred: {lesioncount_pred}")
 93 |         print(f"TMTV: GT: {tmtv_orig} ml, Pred: {tmtv_pred} ml")
 94 |         print(f"TLG: GT: {tlg_orig} ml, Pred: {tlg_pred} ml")
 95 |         print(f"Dmax: GT: {dmax_orig} cm, Pred: {dmax_pred} cm")
 96 |         print("\n")
 97 | 
 98 |     save_lesionmeasures_dir = os.path.join(RESULTS_FOLDER, f'test_lesion_measures', 'fold'+str(fold), network, experiment_code)
 99 |     os.makedirs(save_lesionmeasures_dir, exist_ok=True)
100 |     filepath = os.path.join(save_lesionmeasures_dir, f'testlesionmeasures.csv')
101 |     
102 |     data = np.column_stack(
103 |             [
104 |                 imageids,
105 |                 DSC,
106 |                 SUVmean_orig,
107 |                 SUVmean_pred,
108 |                 SUVmax_orig,
109 |                 SUVmax_pred,
110 |                 LesionCount_orig,
111 |                 LesionCount_pred,
112 |                 TMTV_orig,
113 |                 TMTV_pred,
114 |                 TLG_orig,
115 |                 TLG_pred,
116 |                 Dmax_orig,
117 |                 Dmax_pred
118 |             ]
119 |         )
120 | 
121 |     data_df = pd.DataFrame(
122 |         data=data,
123 |         columns=[
124 |             'PatientID',
125 |             'DSC',
126 |             'SUVmean_orig',
127 |             'SUVmean_pred',
128 |             'SUVmax_orig',
129 |             'SUVmax_pred',
130 |             'LesionCount_orig',
131 |             'LesionCount_pred',
132 |             'TMTV_orig',
133 |             'TMTV_pred',
134 |             'TLG_orig',
135 |             'TLG_pred',
136 |             'Dmax_orig',
137 |             'Dmax_pred'
138 |         ]
139 |     )
140 |     data_df.to_csv(filepath, index=False)
141 |         
142 | 
143 | if __name__ == "__main__":  
144 |     parser = argparse.ArgumentParser(description='Lymphoma PET/CT lesion segmentation using MONAI-PyTorch')
145 |     parser.add_argument('--fold', type=int, default=0, metavar='fold',
146 |                         help='validation fold (default: 0), remaining folds will be used for training')
147 |     parser.add_argument('--network-name', type=str, default='unet', metavar='netname',
148 |                         help='network name for training (default: unet)')
149 |     parser.add_argument('--input-patch-size', type=int, default=192, metavar='inputsize',
150 |                         help='size of cropped input patch for training (default: 192)')
151 |     args = parser.parse_args()
152 |     main(args)
153 |     


--------------------------------------------------------------------------------
/data_conversion/dicom_to_nifti.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Copyright (c) Microsoft Corporation. All rights reserved.
  3 | Licensed under the MIT License.
  4 | '''
  5 | 
  6 | '''
  7 | This code does the following:
  8 | (1) converts PET DICOM images in units Bq/ml to decay-corrected SUV and saved as 3D NIFTI files
  9 | (2) converts CT DICOM images to NIFTI
 10 | (3) converts DICOM RTSTRUCT images to NIFTI (using rt-utils)
 11 | '''
 12 | #%%
 13 | import SimpleITK as sitk
 14 | from pydicom import dcmread, FileDataset
 15 | from rt_utils import RTStructBuilder, RTStruct
 16 | import numpy as np
 17 | import dateutil
 18 | import pandas as pd
 19 | import os
 20 | import time
 21 | 
 22 | #%%
 23 | '''
 24 | Script to convert PET and CT dicom series to niftii files. Works under 
 25 | the assumption that the rescale slope and intercept in the PET dicom 
 26 | series map image intensities to Bq/mL. Saved PET files will have image
 27 | intensities of SUVbw, and saved CT files will have HU units.
 28 | 
 29 | '''
 30 | def bqml_to_suv(dcm_file: FileDataset) -> float:
 31 |     
 32 |     # Calculates the SUV conversion factor from Bq/mL to SUVbw using 
 33 |     # the dicom header information in one of the images from a dicom series
 34 |     
 35 |     nuclide_dose = dcm_file[0x054, 0x0016][0][0x0018, 0x1074].value  # Total injected dose (Bq)
 36 |     weight = dcm_file[0x0010, 0x1030].value  # Patient weight (Kg)
 37 |     half_life = float(dcm_file[0x054, 0x0016][0][0x0018, 0x1075].value)  # Radionuclide half life (s)
 38 | 
 39 |     parse = lambda x: dateutil.parser.parse(x)
 40 | 
 41 |     series_time = str(dcm_file[0x0008, 0x00031].value)  # Series start time (hh:mm:ss)
 42 |     series_date = str(dcm_file[0x0008, 0x00021].value)  # Series start date (yyy:mm:dd)
 43 |     series_datetime_str = series_date + ' ' + series_time
 44 |     series_dt = parse(series_datetime_str)
 45 | 
 46 |     nuclide_time = str(dcm_file[0x054, 0x0016][0][0x0018, 0x1072].value)  # Radionuclide time of injection (hh:mm:ss)
 47 |     nuclide_datetime_str = series_date + ' ' + nuclide_time
 48 |     nuclide_dt = parse(nuclide_datetime_str)
 49 | 
 50 |     delta_time = (series_dt - nuclide_dt).total_seconds()
 51 |     decay_correction = 2 ** (-1 * delta_time/half_life)
 52 |     suv_factor = (weight * 1000) / (decay_correction * nuclide_dose)
 53 | 
 54 |     return(suv_factor)
 55 | 
 56 | def get_filtered_roi_list(rois):
 57 |     filtered_rois = []
 58 |     for roi in rois:
 59 |         if roi.endswith('PETEdge'):
 60 |             filtered_rois.append(roi)
 61 |         else:
 62 |             pass
 63 |     return filtered_rois
 64 | 
 65 | 
 66 | def load_merge_masks(rtstruct: RTStruct) -> np.ndarray:
 67 |     '''
 68 |     Load and merge masks from a dicom RTStruct. All of the
 69 |     masks in the RTStruct will be merged. Add an extra line
 70 |     of code if you want to filter for/out certain masks.
 71 |     '''
 72 |     rois = rtstruct.get_roi_names()
 73 |     rois = get_filtered_roi_list(rois)
 74 |     masks = []
 75 |     for roi in rois:
 76 |         print(roi)
 77 |         mask_3d = rtstruct.get_roi_mask_by_name(roi).astype(int)
 78 |         masks.append(mask_3d)
 79 | 
 80 |     final_mask = sum(masks)  # sums element-wise
 81 |     final_mask = np.where(final_mask>=1, 1, 0)
 82 |     # Reorient the mask to line up with the reference image
 83 |     final_mask = np.moveaxis(final_mask, [0, 1, 2], [1, 2, 0])
 84 | 
 85 |     return final_mask
 86 | 
 87 | ############################################################################################
 88 | ########  Update the three variables below with the locations of your choice  ##############
 89 | ############################################################################################
 90 | save_dir_ct = '' # path to directory where your new CT files in NIFTI format will be written
 91 | save_dir_pt = '' # path to directory where your new PET files in NIFTI format will be written
 92 | save_dir_gt = '' # path to directory where your new GT files in NIFTI format will be written
 93 | ############################################################################################
 94 | ############################################################################################
 95 | ############################################################################################
 96 | 
 97 | cases = pd.read_csv('dicom_ctpt_to_nifti_conversion_file.csv')
 98 | cases = list(cases.itertuples(index=False, name=None)) 
 99 | structs = pd.read_csv('dicom_rtstruct_to_nifti_conversion_file.csv')
100 | structs = list(structs.itertuples(index=False, name=None))
101 | # Execution
102 | start = time.time()
103 | 
104 | for case in cases:
105 |     patient_id, ct_folder, pet_folder, convert = case
106 |     if convert=='N':
107 |         continue
108 |     print(f'Converting patient Id: {patient_id}')
109 | 
110 |     # Convert CT series
111 |     ct_reader = sitk.ImageSeriesReader()
112 |     ct_series_names = ct_reader.GetGDCMSeriesFileNames(ct_folder)
113 |     ct_reader.SetFileNames(ct_series_names)
114 |     ct = ct_reader.Execute()
115 |     sitk.WriteImage(ct, os.path.join(save_dir_ct, f"{patient_id}_0000.nii.gz"), imageIO='NiftiImageIO')
116 |     print('Saved nifti CT')
117 | 
118 |     # Convert PET series
119 |     pet_reader = sitk.ImageSeriesReader()
120 |     pet_series_names = pet_reader.GetGDCMSeriesFileNames(pet_folder)
121 |     pet_reader.SetFileNames(pet_series_names)
122 |     pet = pet_reader.Execute()
123 | 
124 |     pet_img = dcmread(pet_series_names[0])  # read one of the images for header info
125 |     suv_factor = bqml_to_suv(pet_img)
126 |     pet = sitk.Multiply(pet, suv_factor)
127 |     sitk.WriteImage(pet, os.path.join(save_dir_pt, f"{patient_id}_0001.nii.gz"), imageIO='NiftiImageIO')
128 |     print('Saved nifti PET')
129 | 
130 | # Execution
131 | for struct in structs:
132 |     patient_id, struct_folder, ref_folder, convert = struct
133 |     if convert=='N':
134 |         continue
135 | 
136 |     # print('Converting RTStruct for patient {}'.format(num))
137 |     # Get all the paths in order
138 |     struct_file = os.listdir(struct_folder)[0]
139 |     struct_path = os.path.join(struct_folder, struct_file)
140 | 
141 |     # Create the mask
142 |     rtstruct = RTStructBuilder.create_from(dicom_series_path= ref_folder, rt_struct_path=struct_path)
143 |     final_mask = load_merge_masks(rtstruct)
144 | 
145 |     # Load original DICOM image for reference
146 |     reader = sitk.ImageSeriesReader()
147 |     dicom_names = reader.GetGDCMSeriesFileNames(ref_folder)
148 |     reader.SetFileNames(dicom_names)
149 |     ref_img = reader.Execute()
150 | 
151 |     # Properly reference and convert the mask to an image object
152 |     mask_img = sitk.GetImageFromArray(final_mask)
153 |     mask_img.CopyInformation(ref_img)
154 |     sitk.WriteImage(mask_img, os.path.join(save_dir_gt, f"{patient_id}.nii.gz"), imageIO="NiftiImageIO")
155 | 
156 |     print('Patient {} mask saved'.format(patient_id))
157 | 


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
  1 | name: lymphoma_seg
  2 | channels:
  3 |   - pytorch
  4 |   - conda-forge
  5 |   - defaults
  6 | dependencies:
  7 |   - _libgcc_mutex=0.1=main
  8 |   - _openmp_mutex=5.1=1_gnu
  9 |   - asttokens=2.4.0=pyhd8ed1ab_0
 10 |   - backcall=0.2.0=pyh9f0ad1d_0
 11 |   - backports=1.0=pyhd8ed1ab_3
 12 |   - backports.functools_lru_cache=1.6.5=pyhd8ed1ab_0
 13 |   - blas=1.0=mkl
 14 |   - brotlipy=0.7.0=py38h27cfd23_1003
 15 |   - bzip2=1.0.8=h7b6447c_0
 16 |   - ca-certificates=2023.7.22=hbcca054_0
 17 |   - certifi=2023.7.22=pyhd8ed1ab_0
 18 |   - cffi=1.15.1=py38h74dc2b5_0
 19 |   - charset-normalizer=2.0.4=pyhd3eb1b0_0
 20 |   - comm=0.1.4=pyhd8ed1ab_0
 21 |   - cryptography=41.0.3=py38h130f0dd_0
 22 |   - cudatoolkit=11.3.1=h2bc3f7f_2
 23 |   - debugpy=1.6.7=py38h6a678d5_0
 24 |   - decorator=5.1.1=pyhd8ed1ab_0
 25 |   - entrypoints=0.4=pyhd8ed1ab_0
 26 |   - executing=1.2.0=pyhd8ed1ab_0
 27 |   - ffmpeg=4.3=hf484d3e_0
 28 |   - freetype=2.12.1=h4a9f257_0
 29 |   - giflib=5.2.1=h5eee18b_3
 30 |   - gmp=6.2.1=h295c915_3
 31 |   - gnutls=3.6.15=he1e5248_0
 32 |   - idna=3.4=py38h06a4308_0
 33 |   - intel-openmp=2021.4.0=h06a4308_3561
 34 |   - ipykernel=6.25.2=pyh2140261_0
 35 |   - ipython=8.12.0=pyh41d4057_0
 36 |   - jedi=0.19.1=pyhd8ed1ab_0
 37 |   - jpeg=9e=h5eee18b_1
 38 |   - jupyter_client=7.3.4=pyhd8ed1ab_0
 39 |   - jupyter_core=4.12.0=py38h578d9bd_0
 40 |   - lame=3.100=h7b6447c_0
 41 |   - lcms2=2.12=h3be6417_0
 42 |   - ld_impl_linux-64=2.38=h1181459_1
 43 |   - lerc=3.0=h295c915_0
 44 |   - libdeflate=1.17=h5eee18b_1
 45 |   - libffi=3.3=he6710b0_2
 46 |   - libgcc-ng=11.2.0=h1234567_1
 47 |   - libgomp=11.2.0=h1234567_1
 48 |   - libiconv=1.16=h7f8727e_2
 49 |   - libidn2=2.3.4=h5eee18b_0
 50 |   - libpng=1.6.39=h5eee18b_0
 51 |   - libsodium=1.0.18=h36c2ea0_1
 52 |   - libstdcxx-ng=11.2.0=h1234567_1
 53 |   - libtasn1=4.19.0=h5eee18b_0
 54 |   - libtiff=4.5.1=h6a678d5_0
 55 |   - libunistring=0.9.10=h27cfd23_0
 56 |   - libuv=1.44.2=h5eee18b_0
 57 |   - libwebp=1.3.2=h11a3e52_0
 58 |   - libwebp-base=1.3.2=h5eee18b_0
 59 |   - lz4-c=1.9.4=h6a678d5_0
 60 |   - matplotlib-inline=0.1.6=pyhd8ed1ab_0
 61 |   - mkl=2021.4.0=h06a4308_640
 62 |   - mkl-service=2.4.0=py38h7f8727e_0
 63 |   - mkl_fft=1.3.1=py38hd3c417c_0
 64 |   - mkl_random=1.2.2=py38h51133e4_0
 65 |   - ncurses=6.4=h6a678d5_0
 66 |   - nest-asyncio=1.5.6=pyhd8ed1ab_0
 67 |   - nettle=3.7.3=hbbd107a_1
 68 |   - numpy=1.24.3=py38h14f4228_0
 69 |   - numpy-base=1.24.3=py38h31eccc5_0
 70 |   - openh264=2.1.1=h4ff587b_0
 71 |   - openssl=1.1.1w=h7f8727e_0
 72 |   - packaging=23.2=pyhd8ed1ab_0
 73 |   - parso=0.8.3=pyhd8ed1ab_0
 74 |   - pexpect=4.8.0=pyh1a96a4e_2
 75 |   - pickleshare=0.7.5=py_1003
 76 |   - pillow=9.4.0=py38h6a678d5_1
 77 |   - pip=23.2.1=py38h06a4308_0
 78 |   - prompt-toolkit=3.0.39=pyha770c72_0
 79 |   - prompt_toolkit=3.0.39=hd8ed1ab_0
 80 |   - ptyprocess=0.7.0=pyhd3deb0d_0
 81 |   - pure_eval=0.2.2=pyhd8ed1ab_0
 82 |   - pycparser=2.21=pyhd3eb1b0_0
 83 |   - pygments=2.16.1=pyhd8ed1ab_0
 84 |   - pyopenssl=23.2.0=py38h06a4308_0
 85 |   - pysocks=1.7.1=py38h06a4308_0
 86 |   - python=3.8.10=h12debd9_8
 87 |   - python-dateutil=2.8.2=pyhd8ed1ab_0
 88 |   - python_abi=3.8=2_cp38
 89 |   - pytorch=1.11.0=py3.8_cuda11.3_cudnn8.2.0_0
 90 |   - pytorch-mutex=1.0=cuda
 91 |   - pyzmq=25.1.0=py38h6a678d5_0
 92 |   - readline=8.2=h5eee18b_0
 93 |   - requests=2.31.0=py38h06a4308_0
 94 |   - setuptools=68.0.0=py38h06a4308_0
 95 |   - six=1.16.0=pyhd3eb1b0_1
 96 |   - sqlite=3.41.2=h5eee18b_0
 97 |   - stack_data=0.6.2=pyhd8ed1ab_0
 98 |   - tk=8.6.12=h1ccaba5_0
 99 |   - torchaudio=0.11.0=py38_cu113
100 |   - torchvision=0.12.0=py38_cu113
101 |   - tornado=6.1=py38h0a891b7_3
102 |   - traitlets=5.10.1=pyhd8ed1ab_0
103 |   - typing_extensions=4.7.1=py38h06a4308_0
104 |   - urllib3=1.26.16=py38h06a4308_0
105 |   - wcwidth=0.2.8=pyhd8ed1ab_0
106 |   - wheel=0.41.2=py38h06a4308_0
107 |   - xz=5.4.2=h5eee18b_0
108 |   - zeromq=4.3.4=h9c3ff4c_1
109 |   - zlib=1.2.13=h5eee18b_0
110 |   - zstd=1.5.5=hc292b87_0
111 |   - pip:
112 |     - absl-py==2.0.0
113 |     - alembic==1.12.0
114 |     - astor==0.8.1
115 |     - attrs==23.1.0
116 |     - beautifulsoup4==4.12.2
117 |     - blinker==1.6.2
118 |     - cachetools==5.3.1
119 |     - clearml==1.13.1
120 |     - click==8.1.7
121 |     - cloudpickle==2.2.1
122 |     - cmaes==0.10.0
123 |     - colorama==0.4.6
124 |     - coloredlogs==15.0.1
125 |     - colorlog==6.7.0
126 |     - connected-components-3d==3.12.3
127 |     - contextlib2==21.6.0
128 |     - contourpy==1.1.1
129 |     - cucim==23.8.0
130 |     - cycler==0.12.0
131 |     - databricks-cli==0.17.8
132 |     - dataclasses==0.6
133 |     - docker==6.1.3
134 |     - einops==0.7.0
135 |     - filelock==3.11.0
136 |     - fire==0.5.0
137 |     - flask==2.3.3
138 |     - flatbuffers==23.5.26
139 |     - fonttools==4.43.0
140 |     - fsspec==2023.9.2
141 |     - furl==2.1.3
142 |     - gdown==4.7.1
143 |     - gitdb==4.0.10
144 |     - gitpython==3.1.37
145 |     - google-auth==2.23.2
146 |     - google-auth-oauthlib==1.0.0
147 |     - greenlet==3.0.0
148 |     - grpcio==1.59.0
149 |     - gunicorn==21.2.0
150 |     - h5py==3.9.0
151 |     - huggingface-hub==0.17.3
152 |     - humanfriendly==10.0
153 |     - imagecodecs==2023.3.16
154 |     - imageio==2.31.5
155 |     - importlib-metadata==6.8.0
156 |     - importlib-resources==6.1.0
157 |     - itk==5.3.0
158 |     - itk-core==5.3.0
159 |     - itk-filtering==5.3.0
160 |     - itk-io==5.3.0
161 |     - itk-numerics==5.3.0
162 |     - itk-registration==5.3.0
163 |     - itk-segmentation==5.3.0
164 |     - itsdangerous==2.1.2
165 |     - jinja2==3.1.2
166 |     - joblib==1.3.2
167 |     - json-tricks==3.17.3
168 |     - jsonschema==4.19.1
169 |     - jsonschema-specifications==2023.7.1
170 |     - kiwisolver==1.4.5
171 |     - lazy-loader==0.3
172 |     - lmdb==1.4.1
173 |     - mako==1.2.4
174 |     - markdown==3.4.4
175 |     - markupsafe==2.1.3
176 |     - matplotlib==3.7.3
177 |     - mlflow==2.7.1
178 |     - monai==1.2.0
179 |     - mpmath==1.3.0
180 |     - networkx==3.1
181 |     - nibabel==5.1.0
182 |     - ninja==1.11.1
183 |     - nni==3.0
184 |     - nptyping==2.5.0
185 |     - nvidia-ml-py==12.535.108
186 |     - oauthlib==3.2.2
187 |     - onnx==1.14.1
188 |     - onnxruntime==1.16.0
189 |     - opencv-python==4.8.1.78
190 |     - openslide-python==1.1.2
191 |     - optuna==3.3.0
192 |     - orderedmultidict==1.0.1
193 |     - pandas==2.0.3
194 |     - pathlib2==2.3.7.post1
195 |     - pkgutil-resolve-name==1.3.10
196 |     - prettytable==3.9.0
197 |     - protobuf==4.24.3
198 |     - psutil==5.9.5
199 |     - pyarrow==13.0.0
200 |     - pyasn1==0.5.0
201 |     - pyasn1-modules==0.3.0
202 |     - pydicom==2.4.3
203 |     - pyjwt==2.4.0
204 |     - pynrrd==1.0.0
205 |     - pyparsing==3.1.1
206 |     - pythonwebhdfs==0.2.3
207 |     - pytorch-ignite==0.4.11
208 |     - pytz==2023.3.post1
209 |     - pywavelets==1.4.1
210 |     - pyyaml==6.0.1
211 |     - querystring-parser==1.2.4
212 |     - referencing==0.30.2
213 |     - regex==2023.10.3
214 |     - requests-oauthlib==1.3.1
215 |     - responses==0.23.3
216 |     - rpds-py==0.10.3
217 |     - rsa==4.9
218 |     - rt-utils==1.2.7
219 |     - schema==0.7.5
220 |     - scikit-image==0.21.0
221 |     - scikit-learn==1.3.1
222 |     - scipy==1.10.1
223 |     - simpleitk==2.3.0
224 |     - simplejson==3.19.1
225 |     - smmap==5.0.1
226 |     - soupsieve==2.5
227 |     - sqlalchemy==2.0.21
228 |     - sqlparse==0.4.4
229 |     - sympy==1.12
230 |     - tabulate==0.9.0
231 |     - tensorboard==2.14.0
232 |     - tensorboard-data-server==0.7.1
233 |     - tensorboardx==2.6.2.2
234 |     - termcolor==2.3.0
235 |     - threadpoolctl==3.2.0
236 |     - tifffile==2023.7.10
237 |     - tokenizers==0.12.1
238 |     - tqdm==4.66.1
239 |     - transformers==4.21.3
240 |     - typeguard==4.1.2
241 |     - types-pyyaml==6.0.12.12
242 |     - tzdata==2023.3
243 |     - websocket-client==1.6.3
244 |     - websockets==11.0.3
245 |     - werkzeug==3.0.0
246 |     - zipp==3.17.0
247 | prefix: /anaconda/envs/lymphoma_seg
248 | 


--------------------------------------------------------------------------------
/documentation/dicom_to_nifti_conversion.md:
--------------------------------------------------------------------------------
 1 | # Converting DICOM series to 3D NIFTI files
 2 | 
 3 | PET/CT images are usually stored in DICOM format (the format from hell). In our work, we have converted DICOM PET/CT and RTSTRUCT images as NIFIT images for use by our networks. Unlike the DICOM series that consists of several (axial) `.dcm` images within a folder for one case, the NIFTI images are just one file (`.nii.gz`) which stores the entire 3D array + associated metadata. Hence, NIFTI images are much easier to handle and suitable format to use in deep learning applications. 
 4 | 
 5 | Here, we provide the script [dicom_to_nifti.py](./../data_conversion/dicom_to_nifti.py) for converting DICOM series (for PET and CT) and DICOM RTSTRUCT (for segmentation masks in DICOM format) to 3D NIFTI files. Before using this code, you need to create two specific files: `dicom_ctpt_to_nifti_conversion_file.csv` and `dicom_rtstruct_to_nifti_conversion_file.csv`. Examples of these files are given in [here](./../data_conversion/dicom_ctpt_to_nifti_conversion_file.csv) and [here](./../data_conversion/dicom_rtstruct_to_nifti_conversion_file.csv), respectively. Both these files are used by [dicom_to_nifti.py](./../data_conversion/dicom_to_nifti.py) for performing the required conversions. **DO NOT FORGET TO READ THE `VERY IMPORTANT NOTES` SECTION AT THE BOTTOM OF THIS DOCUMENT**.
 6 | 
 7 | ## Creating the `dicom_ctpt_to_nifti_conversion_file.csv` file
 8 | `dicom_ctpt_to_nifti_conversion_file.csv` must be a .csv file and its contents must look like this:
 9 | 
10 | | PatientID | CT_dir | PET_dir | convert | 
11 | | ----------|--------|---------|---------|
12 | | Patient00001_28071996 | path/to/ct/dicom/series/directory/for/Patient00001_28071996 | path/to/pet/dicom/series/directory/for/Patient00001_28071996 | Y |
13 | | Patient00002_02021996 | path/to/ct/dicom/series/directory/for/Patient00002_02021996 | path/to/pet/dicom/series/directory/for/Patient00002_02021996 | Y |
14 | 
15 | Here, the first column is `PatientID`. For the purpose of illustration, we are using the unique identifier `{PatientID}_{StudyDate}`, as described in [dataset_format.md](./dataset_format.md), but you can use any other naming convention too. The second and third columns should be the path to the DICOM directories for CT and PET respectively for patient with ID `PatientID`. The last column should be either `Y` or `N`, for whether to convert (to NIFTI) or not. Rows with `convert=N` are ignored in [dicom_to_nifti.py](./../data_conversion/dicom_to_nifti.py) during conversion. Populate this .csv file with this information corresponding to your custom DICOM data. 
16 | 
17 | 
18 | ## Creating the `dicom_rstruct_to_nifti_conversion_file.csv` file
19 | `dicom_rtstruct_to_nifti_conversion_file.csv` must be a .csv file and its contents must look like this:
20 | 
21 | | PatientID | RTSTRUCT_dir | REF_dir | convert | 
22 | | ----------|--------------|---------|---------|
23 | | Patient00001_28071996 | path/to/dicom/rtstruct/directory/for/Patient00001_28071996 | path/to/reference/dicom/series/for/Patient00001_28071996 | Y |
24 | | Patient00002_02021996 | path/to/dicom/rtstruct/directory/for/Patient00002_02021996 | path/to/reference/dicom/series/for/Patient00002_02021996 | Y |
25 | 
26 | Here, the first column in the `PatientID`. The second column is the path to RTSTRUCT directory for patient with ID `PatientID`. The third column is the path to the directory that stores the reference image on which the RTSTRUCT was created. This reference image could be either PET or CT depending on which image was used to create RTSTRUCT annotations. BE CAREFUL with assigning the correct reference image, otherwise the code [dicom_to_nifti.py](./../data_conversion/dicom_to_nifti.py) will fail. The last column is the same as the previous step.
27 | 
28 | 
29 | ## Updating the `save_dir_ct`, `save_dir_pt`, and `save_dir_gt` in `dicom_to_nifti.py`
30 | Go to the middle of [dicom_to_nifti.py](./../data_conversion/dicom_to_nifti.py) file (around line number 87-95) and update the values of variables `save_dir_ct`, `save_dir_pt`, and `save_dir_gt` with the path locations to directories (in your local machine) where you want the converted images in NIFTI format to be written, corresponding to CT, PET, and GT (ground truth) masks, respectively. 
31 | ```
32 | ############################################################################################
33 | ########  Update the three variables below with the locations of your choice  ##############
34 | ############################################################################################
35 | save_dir_ct = '' # path to directory where your new CT files in NIFTI format will be written
36 | save_dir_pt = '' # path to directory where your new PET files in NIFTI format will be written
37 | save_dir_gt = '' # path to directory where your new GT files in NIFTI format will be written
38 | ############################################################################################
39 | ############################################################################################
40 | ############################################################################################
41 | ```
42 | 
43 | ## Running conversion script  `dicom_to_nifti.py`
44 | This step assumes that you have already cloned this repository, create a conda environment `lymphoma_seg` with all the necessary packages installed from [environment.yml](./../environment.yml) file. If you haven't done these steps, first finish them using [conda_env.md](./conda_env.md) before proceeding further. Also, read the next section `VERY IMPORTANT NOTES` before running the conversion script below (as you might have to update `dicom_to_nifti.py` further), 
45 | ```
46 | conda activate lymphoma_seg
47 | cd data_conversion
48 | python dicom_to_nifti.py
49 | ``` 
50 | 
51 | 
52 | ## VERY IMPORTANT NOTES
53 | -  [dicom_to_nifti.py](./../data_conversion/dicom_to_nifti.py) uses [rt-utils](https://github.com/qurit/rt-utils) for converting DICOM RTSTRUCT to 3D numpy arrays which are eventually saved as 3D NIFTI masks. The code contains a function `get_filtered_roi_list(.)`, as given below:
54 |     ```
55 |     def get_filtered_roi_list(rois):
56 |         filtered_rois = []
57 |         for roi in rois:
58 |             if roi.endswith('PETEdge'):
59 |                 filtered_rois.append(roi)
60 |             else:
61 |                 pass
62 |         return filtered_rois
63 |     ```
64 | 
65 |     The `rois` which is passed as an argument to this function is list of ROIs within the RTSTRUCT (as extracted by the `rt_utils.RTStructBuilder`). In our datasets, all the ROIs in the RTSTRUCT files ending with the string `PETEdge` corresponded to lesions, hence we use `get_filtered_roi_list(.)` to filter only the ROIs for lesions. Your dataset may or may not be like this, hence **BE VERY CAUTIOUS WHILE USING THIS CODE!!!!!!!!  Update the code accordingly depending on your use-case**. 
66 | 
67 | - This code also assumes that the original DICOM PET series intensities were in units of Bq/ml. This code converts performs decay-correction of PET intensities and converts them to SUV values, before converting them to NIFTI images. The CT images intensities remains the same (i.e., Hounsfield Units (HU)) before and after conversion to NIFTI. 
68 | 
69 | - After converting to NIFTI format, your CT, PET, and GT still might not be in the same geometry. For example, your high-resolution CT images could have a matrix size much larger than your lower-resolution PET images. You must resample (and resave) the CT images (and also GT masks, if application) to the geometry of PET images. The final PET, CT and GT mask for a specific `PatientID` should all have the same size, spacing, origin, and direction. To perform this resampling, use the functions `resample_ct_to_pt_geometry()` and/or `resample_gt_to_pt_geometry()` in [resample_ct2pt.py](./../data_conversion/resample_ct2pt.py). If you do not perform this final resampling of PET/CT/GT images to the same geometry, the subsequent training code will fail, as described in [dataset_format.md](./dataset_format.md). 
70 | 
71 | 
72 | 
73 | 


--------------------------------------------------------------------------------
/segmentation/calculate_test_metrics.py:
--------------------------------------------------------------------------------
  1 | #%%
  2 | '''
  3 | Copyright (c) Microsoft Corporation. All rights reserved.
  4 | Licensed under the MIT License.
  5 | '''
  6 | import numpy as np 
  7 | import pandas as pd 
  8 | import SimpleITK as sitk 
  9 | import os 
 10 | from glob import glob 
 11 | import sys 
 12 | import argparse
 13 | config_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..")
 14 | sys.path.append(config_dir)
 15 | from config import RESULTS_FOLDER
 16 | from metrics.metrics import (
 17 |     get_3darray_from_niftipath,
 18 |     calculate_patient_level_dice_score,
 19 |     calculate_patient_level_false_positive_volume,
 20 |     calculate_patient_level_false_negative_volume,
 21 |     calculate_patient_level_tp_fp_fn
 22 | )
 23 | 
 24 | def get_spacing_from_niftipath(path):
 25 |     image = sitk.ReadImage(path)
 26 |     return image.GetSpacing()
 27 | 
 28 | def get_column_statistics(col):
 29 |     mean = col.mean()
 30 |     std = col.std()
 31 |     median = col.median()
 32 |     quantile25 = col.quantile(q=0.25)
 33 |     quantile75 = col.quantile(q=0.75)
 34 |     return (mean, std, median, quantile25, quantile75)
 35 | 
 36 | def get_prediction_statistics(data_df):
 37 |     dsc_stats = get_column_statistics(data_df['DSC'].astype(float))
 38 |     fpv_stats = get_column_statistics(data_df['FPV'].astype(float))
 39 |     fnv_stats = get_column_statistics(data_df['FNV'].astype(float))
 40 |     
 41 |     c1_sensitivity = data_df[f'TP_C1']/(data_df[f'TP_C1'] + data_df[f'FN_C1'])
 42 |     c2_sensitivity = data_df[f'TP_C2']/(data_df[f'TP_C2'] + data_df[f'FN_C2'])
 43 |     c3_sensitivity = data_df[f'TP_C3']/(data_df[f'TP_C3'] + data_df[f'FN_C3'])
 44 |     sens_c1_stats = get_column_statistics(c1_sensitivity)
 45 |     sens_c2_stats = get_column_statistics(c2_sensitivity)
 46 |     sens_c3_stats = get_column_statistics(c3_sensitivity)
 47 |     
 48 |     fp_c1_stats = get_column_statistics(data_df['FP_M1'].astype(float))
 49 |     fp_c2_stats = get_column_statistics(data_df['FP_M2'].astype(float))
 50 |     fp_c3_stats = get_column_statistics(data_df['FP_M3'].astype(float))
 51 |     
 52 |     dsc_stats = [round(d, 2) for d in dsc_stats]
 53 |     fpv_stats = [round(d, 2) for d in fpv_stats]
 54 |     fnv_stats = [round(d, 2) for d in fnv_stats]
 55 |     sens_c1_stats = [round(d, 2) for d in sens_c1_stats]
 56 |     sens_c2_stats = [round(d, 2) for d in sens_c2_stats]
 57 |     sens_c3_stats = [round(d, 2) for d in sens_c3_stats]
 58 |     fp_c1_stats = [round(d, 0) for d in fp_c1_stats]
 59 |     fp_c2_stats = [round(d, 0) for d in fp_c2_stats]
 60 |     fp_c3_stats = [round(d, 0) for d in fp_c3_stats]
 61 | 
 62 |     print(f"DSC (Mean): {dsc_stats[0]} +/- {dsc_stats[1]}")
 63 |     print(f"DSC (Median): {dsc_stats[2]} [{dsc_stats[3]}, {dsc_stats[4]}]")
 64 |     print(f"FPV (Median): {fpv_stats[2]} [{fpv_stats[3]}, {fpv_stats[4]}]")
 65 |     print(f"FNV (Median): {fnv_stats[2]} [{fnv_stats[3]}, {fnv_stats[4]}]")
 66 |     print(f"Sensitivity - Criterion1 (Median): {sens_c1_stats[2]} [{sens_c1_stats[3]}, {sens_c1_stats[4]}]")
 67 |     print(f"FP - Criterion1 (Median): {fp_c1_stats[2]} [{fp_c1_stats[3]}, {fp_c1_stats[4]}]")
 68 |     print(f"Sensitivity - Criterion2 (Median): {sens_c2_stats[2]} [{sens_c2_stats[3]}, {sens_c2_stats[4]}]")
 69 |     print(f"FP - Criterion1 (Median): {fp_c2_stats[2]} [{fp_c2_stats[3]}, {fp_c2_stats[4]}]")
 70 |     print(f"Sensitivity - Criterion3 (Median): {sens_c3_stats[2]} [{sens_c3_stats[3]}, {sens_c3_stats[4]}]")
 71 |     print(f"FP - Criterion3 (Median): {fp_c3_stats[2]} [{fp_c3_stats[3]}, {fp_c3_stats[4]}]")
 72 |     print('\n')
 73 |     
 74 | #%%
 75 | def main(args):
 76 |     fold = args.fold
 77 |     network = args.network_name
 78 |     inputsize = args.input_patch_size
 79 |     experiment_code = f"{network}_fold{fold}_randcrop{inputsize}"
 80 |     preddir = os.path.join(RESULTS_FOLDER, 'predictions', f'fold{fold}', network, experiment_code)
 81 |     predpaths = sorted(glob(os.path.join(preddir, '*.nii.gz')))
 82 |     gtpaths = sorted(list(pd.read_csv('./../data_split/test_filepaths.csv')['GTPATH']))
 83 |     ptpaths = sorted(list(pd.read_csv('./../data_split/test_filepaths.csv')['PTPATH'])) # PET image paths (ptpaths) for calculating the detection metrics using criterion3 
 84 |     
 85 |     imageids = [os.path.basename(path)[:-7] for path in gtpaths]
 86 |     TEST_DSCs, TEST_FPVs, TEST_FNVs = [], [], []
 87 |     TEST_TP_criterion1, TEST_FP_criterion1, TEST_FN_criterion1 = [], [], []
 88 |     TEST_TP_criterion2, TEST_FP_criterion2, TEST_FN_criterion2 = [], [], []
 89 |     TEST_TP_criterion3, TEST_FP_criterion3, TEST_FN_criterion3 = [], [], []
 90 | 
 91 |         
 92 |     for i in range(len(gtpaths)):
 93 |         gtpath = gtpaths[i]
 94 |         ptpath = ptpaths[i]
 95 |         predpath = predpaths[i]
 96 | 
 97 |         gtarray = get_3darray_from_niftipath(gtpath)
 98 |         ptarray = get_3darray_from_niftipath(ptpath)
 99 |         predarray = get_3darray_from_niftipath(predpath)
100 |         spacing = get_spacing_from_niftipath(gtpath)
101 | 
102 |         dsc = calculate_patient_level_dice_score(gtarray, predarray)
103 |         fpv = calculate_patient_level_false_positive_volume(gtarray, predarray, spacing)
104 |         fnv = calculate_patient_level_false_negative_volume(gtarray, predarray, spacing)
105 |         tp_c1, fp_c1, fn_c1 = calculate_patient_level_tp_fp_fn(gtarray, predarray, criterion='criterion1')
106 |         tp_c2, fp_c2, fn_c2 = calculate_patient_level_tp_fp_fn(gtarray, predarray, criterion='criterion2', threshold=0.5)
107 |         tp_c3, fp_c3, fn_c3 = calculate_patient_level_tp_fp_fn(gtarray, predarray, criterion='criterion3', ptarray=ptarray)
108 |         
109 |         TEST_DSCs.append(dsc)
110 |         TEST_FPVs.append(fpv)
111 |         TEST_FNVs.append(fnv)
112 |         TEST_TP_criterion1.append(tp_c1)
113 |         TEST_FP_criterion1.append(fp_c1)
114 |         TEST_FN_criterion1.append(fn_c1)
115 |         
116 |         TEST_TP_criterion2.append(tp_c2)
117 |         TEST_FP_criterion2.append(fp_c2)
118 |         TEST_FN_criterion2.append(fn_c2)
119 |         
120 |         TEST_TP_criterion3.append(tp_c3)
121 |         TEST_FP_criterion3.append(fp_c3)
122 |         TEST_FN_criterion3.append(fn_c3)
123 |         print(f"{imageids[i]}: DSC = {round(dsc, 4)}\nFPV = {round(fpv, 4)} ml\nFNV = {round(fnv, 4)} ml")
124 | 
125 |     save_testmetrics_dir = os.path.join(RESULTS_FOLDER, 'test_metrics', 'fold'+str(fold), network, experiment_code)
126 |     os.makedirs(save_testmetrics_dir, exist_ok=True)
127 |     save_testmetrics_fpath = os.path.join(save_testmetrics_dir, 'testmetrics.csv')
128 | 
129 |     data = np.column_stack(
130 |         (
131 |             imageids, TEST_DSCs, TEST_FPVs, TEST_FNVs,
132 |             TEST_TP_criterion1, TEST_FP_criterion1, TEST_FN_criterion1,
133 |             TEST_TP_criterion2, TEST_FP_criterion2, TEST_FN_criterion2,
134 |             TEST_TP_criterion3, TEST_FP_criterion3, TEST_FN_criterion3
135 |         )
136 |     )
137 |     column_names = [
138 |         'PatientID', 'DSC', 'FPV', 'FNV',
139 |         'TP_C1', 'FP_C1', 'FN_C1',
140 |         'TP_C2', 'FP_C2', 'FN_C2',
141 |         'TP_C3', 'FP_C3', 'FN_C3',
142 |     ]
143 |     data_df = pd.DataFrame(data=data, columns=column_names)
144 |     data_df.to_csv(save_testmetrics_fpath, index=False)
145 |     
146 |     
147 | 
148 |     
149 | if __name__ == "__main__":  
150 |     parser = argparse.ArgumentParser(description='Lymphoma PET/CT lesion segmentation using MONAI-PyTorch')
151 |     parser.add_argument('--fold', type=int, default=0, metavar='fold',
152 |                         help='validation fold (default: 0), remaining folds will be used for training')
153 |     parser.add_argument('--network-name', type=str, default='unet', metavar='netname',
154 |                         help='network name for training (default: unet)')
155 |     parser.add_argument('--input-patch-size', type=int, default=192, metavar='inputsize',
156 |                         help='size of cropped input patch for training (default: 192)')
157 |     args = parser.parse_args()
158 |     main(args)
159 |     
160 | # %%
161 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Lymphoma lesion segmentation and quantitation from FDG PET/CT images using deep neural networks
 2 | 
 3 | ## Introduction
 4 | 
 5 | Please cite the following paper when you use our code:<br>
 6 | > S. Ahamed, Y. Xu, C. Gowdy, I. Bloise, D. Wilson, P. Martineau, F. Bénard, F. Yousefirizi, R. Dodhia, J. M. Lavista, W. B. Weeks, C. F. Uribe, A. Rahmim,
 7 | > _Comprehensive Evaluation and Insights into the Use of Deep Neural Networks to Detect and Quantify Lymphoma Lesions in PET/CT Images_,
 8 | > [arXiv:2311.09614](https://arxiv.org/pdf/2311.09614.pdf).
 9 | 
10 | 
11 | Lymphoma lesion segmentation and quantitation plays a pivotal role in the diagnosis, treatment planning, and monitoring of lymphoma patients. Accurate segmentation allows for the precise delineation of pathological regions, aiding clinicians in assessing disease extent and progression. Moreover, lesion quantitation, such as measuring lesion size and metabolic activity, etc. provides critical information for treatment response evaluation. Deep learning-based segmentation methods have emerged as a game-changer in this domain, offering the potential to automate and standardize lesion delineation, reducing inter-observer variability and saving valuable clinical time. 
12 | 
13 | <p align="center">
14 |   <img src="./documentation/assets/segmentation_performance_visualization.png" alt="Figure" height="600" />
15 |   <br>
16 |   <em>
17 |   <strong>
18 |     Figure 1: Visualization of performances of networks, UNet, SegResNet, DynUNet, and SwinUNETR on the coronal maximum intensity projection views for 8 representative cases. (a)-(d) show cases where all the networks had similar performances, while (e)-(h) show cases where the networks had dissimilar performances, often due to some of them predicting large false positive volumes (FPVs). Some of the prominent FPVs have been indicated with blue arrows. The number on the bottom-right of each plot shows the DSC between the 3D predicted mask and the ground truth.
19 |    </strong>
20 |   </em>
21 | </p>
22 | 
23 | In this work, we trained four deep neural networks, **UNet**, **SegResNet**, **DynUNet**, and **SwinUNETR** (adapted from MONAI [1]) for the segmentation of lesions from multi-institutional FDG PET/CT images. In this work, we used a large and diverse whole-body PET/CT dataset with a total of **611 cases** coming from four retrospective cohorts, as given in Table 1 below. The first three cohorts (internal cohorts) are privately-owned lymphoma datasets, while the fourth cohort is a subset of the public dataset acquired from [2]. Our models were trained/validated on 80% dataset from the former three cohorts. The remaining 20% data from these three cohorts was used as internal test set, while the fourth cohort was solely used as an external (unseen) test set.    
24 | 
25 | <p align="center">
26 |   <img src="./documentation/assets/cohort_table.png" alt="Table1" height="250" />
27 | </p>
28 | 
29 | 
30 | 
31 | ## How to get started?
32 | Follow the intructions given below to set up the necessary conda environment, install packages, process dataset in the correct format so it can be accepted as inputs by our code, train models, perform inferences on trained models, compute evaluation metrics (for segmentation and detection) on the test set, and calculate lesion measures from the predicted 3D lesions masks. 
33 | 
34 | - **Clone the repository, create conda environment and install necessary packages:**  
35 |     The first step is to clone this GitHub codebase in your local machine, create a conda environment, and install all the necessary packages. For this step, follow the detailed instructions in [/documentation/conda_env.md](/documentation/conda_env.md). 
36 | 
37 | - **Get your dataset in the required format:**  
38 |     After you have successfully executed the previous step, it is time to get your PET/CT and binary mask data in a format so that our code can use them as inputs. We have tested this codebased only for NIFTI images (.nii.gz), and provide a script for converting your DICOM PET/CT volumes and RTSTRUCTs to NIFTI format. This data directory setup is slightly based on [nnUNet](https://github.com/MIC-DKFZ/nnUNet/tree/master). For this step, follow the detailed instructions in [/documentation/dataset_format.md](/documentation/dataset_format.md). 
39 | 
40 | - **Train a model:**  
41 |     Once you have the data folders and data set up, as described in the previous step, you can procceed with training a model. In this work, we have used UNet [], SegResNet [], DynUNet [], and SwinUNETR [] adapted from the MONAI package. For detailed instructions of initiating and running the training script, read [/documentation/trainddp.md](/documentation/trainddp.md). 
42 | 
43 |     This repository is capable of performing 5 fold cross-validation training, followed by inference on unseen test set, although in the paper above, we only used a single split of train, validation and test images. It is also worth reading [/documentation/results_format.md](/documentation/results_format.md) to understand the overall data and result directories created at different steps of the pipeline and how the different types of results are stored.   
44 | 
45 | - **Perform inference on test images using a trained model:**  
46 |     Once you have trained your segmentation model(s), you can use them to perform inference on the test set images. In this step, you will save the network predictions as NIFTI files to your local machine. The predicted masks will be in the same geometry as the corresponding ground truth segmentation masks. For this step, follow the detailed instructions given in [/documentation/inference.md](/documentation/inference.md).
47 | 
48 | - **Compute test set evaluation metrics (for detection and segmentation):**  
49 |     Once the test set predicted masks have been saved, you can proceed to compute the evaluation metrics between the predicted and ground truth segmentation masks. In this work, we use three segmentation metrics: Dice similarity coefficient (DSC), false positive volume (FPV) in ml, and false negative volume (FNV) in ml. We also define three detection based criterion, `Criterion1`, `Criterion2`, and `Criterion3`. Briefly, `Criterion1` labels a predicted lesion as True positive (TP) if it has a non-zero overalap with any of the ground truth lesions. `Criterion2` labels a predicted lesion as TP if it has an intersection-over-union (IoU) > 0.5 with any of the ground truth lesions. `Criterion3` labels a predicted lesion as TP if it overlaps with a ground truth lesion's SUV<sub>max</sub> voxel. For `Criterion2` and `Criterion3`, we first perform a matching to pair up a ground truth lesion with a predicted lesion via IoU maximization. These metrics have been defined in [/metrics/metrics.py](/metrics/metrics.py). 
50 | 
51 |     To run an evaluation script, follow the detailed instructions given in [/documentation/calculate_test_metrics.md](/documentation/calculate_test_metrics.md).
52 | 
53 | 
54 | - **Generate lesion measures from the predicted lesion masks:**    
55 |     We further use the predicted lesion masks to evaluate the predicted lesion measures. In this work, we focus on six patient level lesion measures, namely, patient-level lesion SUV<sub>mean</sub>, lesion SUV<sub>max</sub>, number of lesions, total metabolic tumor volume (TMTV) in ml, total lesion glycolysis (TLG) in ml, and lesion dissemination (D<sub>max</sub>) in cm. All these lesion measures have been shown to be prognostic biomarkers in lymphoma patients in several studies [3-5]. These predicted lesions measures can be correlated with the ground truth lesion measures to assess how good the trained models are in predicting these clinically-relevant metrics of interest.
56 | 
57 |     To run an lesion measures generation script, follow the detailed instructions given in [/documentation/generate_lesion_measures.md](/documentation/generate_lesion_measures.md).
58 | 
59 | 
60 | # Acknowledgments
61 | <p align="center">
62 |   <img src="./documentation/assets/all_logos.png" alt="Table1" height="250" />
63 | </p>
64 | 
65 | 
66 | # References
67 | <a id="1">[1]</a> 
68 | MONAI: Medical Open Network for AI,
69 | *AI Toolkit for Healthcare Imaging*
70 | [![DOI](/documentation/assets/monai_zenodo.svg)](https://zenodo.org/record/7459814)
71 | 
72 | <a id="2">[2]</a> 
73 | Gatidis S, Kuestner T., "A whole-body FDG-PET/CT dataset with manually annotated tumor lesions (FDG-PET-CT-Lesions)" [Dataset] (2022), The Cancer Imaging Archive. 
74 | [![DOI](/documentation/assets/autopet_data_zenodo.svg)](https://doi.org/10.7937/gkr0-xv29)
75 | 
76 | <a id="3">[3]</a> 
77 | K. Okuyucu et al. "Prognosis estimation under the light of metabolic tumor parameters on initial FDG-PET/CT
78 | in patients with primary extranodal lymphoma". en. In: Radiol. Oncol. 50.4 (2016), pp. 360–369
79 | [(doi)](https://doi.org/10.1515/raon-2016-0045)
80 | 
81 | <a id="4">[4]</a> 
82 | X. Xia et al. "Baseline SUVmax of 18F-FDG PET-CT indicates prognosis of extranodal natural killer/T-cell
83 | lymphoma", en. In: Medicine (Baltimore) 99.37 (2020), e22143.
84 | [(doi)](https://doi.org/10.1097%2FMD.0000000000022143)
85 | 
86 | <a id="5">[5]</a> 
87 | A.-S. Cottereau et al. "18F-FDG PET dissemination features in diffuse large B-cell lymphoma are predictive of outcome". en. In: J. Nucl. Med. 61.1 (2020),pp. 40–45.
88 | [(doi)](https://doi.org/10.2967/jnumed.119.229450)
89 | 


--------------------------------------------------------------------------------
/documentation/results_format.md:
--------------------------------------------------------------------------------
  1 | # Results folder format
  2 | In this work, results include: trained models, training and validation logs, predicted masks, metrics on the test set, etc. These will all be written to a folder called `results` as defined in the variable `RESULTS_FOLDER` in the `config.py` (`./../config.py`) file. This folder will be next to the `data` folder, as explained in [dataset_format.md](LINK).
  3 | 
  4 | ## Results folder/filenaming convention
  5 | 
  6 | ### `logs` and `models` folders
  7 | While a model is training (see `trainddp.md` for details), the following two folders will be created within `results` folder: `logs` and `models` and the directory structure may look like this:
  8 | 
  9 |     └───lymphoma.segmentation/
 10 |         ├── data
 11 |         └── results
 12 |             ├── logs
 13 |             │    ├── fold0
 14 |             │    │   └── unet
 15 |             │    │       └── unet_fold0_rancrop192
 16 |             │    │           ├── trainlog_gpu0.csv
 17 |             │    │           ├── trainlog_gpu1.csv
 18 |             │    │           ├── validlog_gpu0.csv
 19 |             │    │           └── validlog_gpu1.csv
 20 |             │    └── fold1
 21 |             │        └── unet
 22 |             │            └── unet_fold1_rancrop192
 23 |             │                ├── trainlog_gpu0.csv
 24 |             │                ├── trainlog_gpu1.csv
 25 |             │                ├── validlog_gpu0.csv
 26 |             │                └── validlog_gpu1.csv
 27 |             ├── models
 28 |             │    ├── fold0
 29 |             │    │   └── unet
 30 |             │    │       └── unet_fold0_rancrop192
 31 |             │    │           ├── model_ep=0002.csv
 32 |             │    │           ├── model_ep=0004.csv
 33 |             │    │           ├── model_ep=0006.csv
 34 |             │    │           ├── model_ep=0008.csv
 35 |             │    │           ├── ...
 36 |             │    └── fold1
 37 |             │        └── unet
 38 |             │            └── unet_fold1_rancrop192
 39 |             │                ├── model_ep=0002.csv
 40 |             │                ├── model_ep=0004.csv
 41 |             │                ├── model_ep=0006.csv
 42 |             │                ├── model_ep=0008.csv
 43 |             │                ├── ...
 44 |             ├── ...  
 45 | 
 46 | 
 47 | This directory stucture shows that so far, the model `unet` has been (or is being) trained on two folds: `fold0` and `fold1`. Within the `logs` or `models` folder, the directory structure is `{logs_or_models}/fold{fold}/{network_name}/{experiment_code}`, where the `experiment_code` is defined as `{network_name}_fold{fold}_randcrop{input_patch_size}`. The above directory structure shows that for both folds `fold0` and `fold1`, the `experiment_code` is `{unet}_fold{0 or 1}_randcrop{192}`, meaning we trained/are training `unet` for fold 0 or 1 with an `input_patch_size = 192`. If you train other networks (like `segresnet`, `dynunet`, or `swinunetr` as was the case in this work), they will appear accordingly within the framework of the above directory structure.
 48 | 
 49 | Since the training in this work was carried out using the PyTorch's `torch.nn.parallel.DistributedDataParallel`, the `trainlog_gpu0.csv`, `trainlog_gpu1.csv`, `validlog_gpu0.csv`, `validlog_gpu1.csv` store the training and validation logs on accumulated on GPU with deviceids 0 and 1. All the `validlog_gpu[i].csv` are identical and hence redundant so you can use any one of them analysis (we will resolve this to save only one file, in the later versions). All the `trainlog_gpu[i].csv` are NOT identical, hence each file separately stores the loss accumulated using the distributed data on two GPUs. In our work, we used 4 GPUs, but the above directory structure only shows training on 2 GPUs for the purpose of illustration. The typical `trainlog_gpu[i].csv` file looks like this:
 50 | 
 51 | ```
 52 | Loss
 53 | 0.6536665889951918
 54 | 0.6449973914358351
 55 | 0.6385666595564948
 56 | 0.6357755064964294
 57 | ...
 58 | ```
 59 | 
 60 | where each line shows the mean `DiceLoss` on the training inputs (averaged over all batches) at epoch `j+1` with `j` in the range `np.arange(0, epochs)`; `epochs` is the total number of epochs for which we are running the training. Similarly, a typical `validlog_gpu[i].csv` file looks like this:
 61 | 
 62 | ```
 63 | Metric
 64 | 0.0011193332029506564
 65 | 0.001015653251670301
 66 | ...
 67 | ```
 68 | where each line shows the mean `DiceMetric` on the validation inputs at epoch `j` with `j` in the range `np.arange(2, epochs+1, val_interval)`, `epochs` is the total number of epochs for which we are running the training and `val_interval` (default=2) is the epoch interval at which we are running validation, computing Dice metric and saving the trained model. The variables `val_internal`, `epochs`, etc. can be set in `train.sh` script which is used for running the training.  
 69 | 
 70 | The saved models are saved in the similar way under the correspding /fold/network/experiment_code folder with filenames `model_ep=0002.pth`, `model_ep=0004.pth`, etc. In this case, `val_interval = 2` (for example), so the models are saved at interval of 2 starting from the second epoch.
 71 | 
 72 | 
 73 | ### `predictions` and `test_metrics` folders
 74 | After the trained models are used for predicting the segmentation masks on test images (see `inference.md` for details), based on the `fold`, `network_name` and `experiment_code`, the predicted masks will be written to `LYMPHOMA_SEGMENTATION_FOLDER/results/predictions/fold{fold}/{network_name}/{experiment_code}`. Once the predicted masks have been generated and saved, the metrics computed on the test set using the test ground truth and predicted masks will be written to `LYMPHOMA_SEGMENTATION_FOLDER/results/test_metrics/fold{fold}/{network_name}/{experiment_code}/testmetrics.csv`. We compute three segmentation metrics: `Dice similarity coefficient (DSC)`, `false positive volume (FPV) in ml`, `false negative volume (FNV) in ml`. We also compute detection metrics such as `true positive (TP)`, `false positive (FP)`, and `false negative (FN)` lesion detections via three different criterion labeled as `Criterion1`, `Criterion2`, and `Criterion3`. These metrics have been defined in [metrics/metrics.py](./../metrics/metrics.py). After running inference and calculating the test metrics, the (relevant) directory structure may look like:
 75 | 
 76 |     └───lymphoma.segmentation/
 77 |             ├── data
 78 |             └── results
 79 |                 ├── logs
 80 |                 ├── models
 81 |                 ├── predictions
 82 |                 │   ├── fold0
 83 |                 │   │   └── unet
 84 |                 │   │       └── unet_fold0_randcrop192
 85 |                 │   │           ├── Patient0003_20190402.nii.gz
 86 |                 │   │           ├── Patient0004_20160204.nii.gz 
 87 |                 │   │           ├── ...
 88 |                 │   └── fold1
 89 |                 │       └── unet
 90 |                 │           └── unet_fold1_randcrop192
 91 |                 │               ├── Patient0003_20190402.nii.gz
 92 |                 │               ├── Patient0004_20160204.nii.gz 
 93 |                 │               ├── ...
 94 |                 │
 95 |                 └── test_metrics
 96 |                     ├── fold0
 97 |                     │   └── unet
 98 |                     │       └── unet_fold0_randcrop192
 99 |                     │           └── testmetrics.csv   
100 |                     └── fold1
101 |                         └── unet
102 |                             └── unet_fold1_randcrop192
103 |                                 └── testmetrics.csv
104 | 
105 | The predicted masks are in the same geometry (same size, spacing, origin, direction) as their corresponding ground truth masks. A typical `testmetrics.csv` file looks like:
106 | 
107 | | PatientID | DSC | FPV | FNV | TP_C1 | FP_C1 | FN_C1 | TP_C2 | FP_C2 | FN_C2 | TP_C3 | FP_C3 | FN_C3 |
108 | |-----------|-----|-----|-----|-------|-------|-------|-------|-------|-------|-------|-------|-------|
109 | | Patient0003_20190402 | 0.7221043699618158 | 17.5164623503173 | 1.173559512304143 | 3 | 6 | 2 | 2 | 7  | 3 | 3 | 6 | 2 | 
110 | | Patient0004_20160204 | 0.0807955251709131 | 53.4186903933997 | 5.563541391664086 | 2 | 8 | 1 | 0 | 10 | 3 | 2 | 8 | 1 |
111 | 
112 | Here, all the metrics are at the patient level and FPV and FNV are expressed in ml.
113 | 
114 | ### `test_lesion_measures` folder
115 | In this work, we have performed further analyses on the predicted segmentation masks on the test set and compared them to the ground truth masks. These include comparing the patient-level lesion SUV<sub>mean</sub>, lesion SUV<sub>max</sub>, number of lesions, total metabolic tumor volume (TMTV) in ml, total lesion glycolysis (TLG) in ml, lesion dissemination (D<sub>max</sub>) in cm. These metrics have been defined in [metrics/metrics.py](./../metrics/metrics.py). The test set predicted lesion measures are written to `LYMPHOMA_SEGMENTATION_FOLDER/results/test_lesion_measures/fold{fold}/{network_name}/{experiment_code}/testlesionmeasures.csv`. After generating `testlesionmeasures.csv` files, the relevant directory structure may look like:
116 | 
117 |     └───lymphoma.segmentation/
118 |             ├── data
119 |             └── results
120 |                 ├── logs
121 |                 ├── models
122 |                 ├── predictions
123 |                 ├── test_metrics
124 |                 └── test_lesion_measures
125 |                     ├── fold0
126 |                     │   └── unet
127 |                     │       └── unet_fold0_randcrop192
128 |                     │           └── testlesionmeasures.csv   
129 |                     └── fold1
130 |                         └── unet
131 |                             └── unet_fold1_randcrop192
132 |                                 └── testlesionmeasures.csv
133 | 
134 | A typical `testlesionmeasures.csv` file looks like:
135 | 
136 | | PatientID | DSC | SUVmean_orig | SUVmean_pred | SUVmax_orig | SUVmax_pred | LesionCount_orig | LesionCount_pred | TMTV_orig | TMTV_pred | TLG_orig | TLG_pred | Dmax_orig | Dmax_pred |
137 | |-----------|-----|--------------|--------------|-------------|-------------|------------------|------------------|-----------|-----------|----------|----------|----------|-----------|
138 | | Patient0003_20190402 | 0.7221043699618158  | 2.935304139385291 | 4.362726242681123 | 6.1822732035904515 | 7.827266273892102 | 3 | 4 | 13.691527643548337 | 18.6272625128359097 | 40.18879776661558 | 50.2728492927217289 | 15.837606584884108 | 25.82763813918739 | 
139 | | Patient0004_20160204 | 0.0807955251709131  | 8.72882540822585 | 12.71524350987 | 40.294842200490244 | 45.9483628492382 | 9 | 6 | 20.732884717373196 | 16.756373846353748 | 180.9737309068245 | 120.2387139879348 | 14.737477375372881 | 7.652628627281008 |
140 | 
141 | Here, all the lesion measures are at the patient level. TMTV and TLG are expressed in ml and D<sub>max</sub> in cm.
142 | 


--------------------------------------------------------------------------------
/segmentation/trainddp.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Copyright (c) Microsoft Corporation. All rights reserved.
  3 | Licensed under the MIT License.
  4 | '''
  5 | 
  6 | from monai.transforms import (
  7 |     AsDiscrete,
  8 |     Compose,
  9 | )
 10 | import argparse
 11 | from monai.inferers import sliding_window_inference
 12 | from monai.data import CacheDataset, DataLoader, decollate_batch
 13 | import torch
 14 | import matplotlib.pyplot as plt
 15 | import os
 16 | import pandas as pd
 17 | import time
 18 | from torch.utils.data.distributed import DistributedSampler
 19 | from torch.nn.parallel import DistributedDataParallel as DDP
 20 | import torch.distributed as dist 
 21 | import os
 22 | from initialize_train import (
 23 |     create_data_split_files,
 24 |     get_train_valid_data_in_dict_format, 
 25 |     get_train_transforms, 
 26 |     get_valid_transforms, 
 27 |     get_model, 
 28 |     get_loss_function,
 29 |     get_optimizer, 
 30 |     get_scheduler,
 31 |     get_metric,
 32 |     get_validation_sliding_window_size
 33 | )
 34 | 
 35 | import sys
 36 | config_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..")
 37 | sys.path.append(config_dir)
 38 | from config import RESULTS_FOLDER
 39 | torch.backends.cudnn.benchmark = True
 40 | #%%
 41 | def ddp_setup():
 42 |     dist.init_process_group(backend='nccl', init_method="env://")
 43 | 
 44 | def convert_to_4digits(str_num):
 45 |     if len(str_num) == 1:
 46 |         new_num = '000' + str_num
 47 |     elif len(str_num) == 2:
 48 |         new_num = '00' + str_num
 49 |     elif len(str_num) == 3:
 50 |         new_num = '0' + str_num
 51 |     else:
 52 |         new_num = str_num
 53 |     return new_num
 54 | 
 55 | #%%
 56 | def load_train_objects(args):
 57 |     train_data, valid_data = get_train_valid_data_in_dict_format(args.fold) 
 58 |     train_transforms = get_train_transforms(args.input_patch_size)
 59 |     valid_transforms = get_valid_transforms()
 60 |     model = get_model(args.network_name, args.input_patch_size) 
 61 |     optimizer = get_optimizer(model, learning_rate=args.lr, weight_decay=args.wd)
 62 |     loss_function = get_loss_function()
 63 |     scheduler = get_scheduler(optimizer, args.epochs)
 64 |     metric = get_metric()
 65 | 
 66 |     return (
 67 |         train_data,
 68 |         valid_data,
 69 |         train_transforms,
 70 |         valid_transforms,
 71 |         model,
 72 |         loss_function,
 73 |         optimizer,
 74 |         scheduler,
 75 |         metric
 76 |     )
 77 | 
 78 | 
 79 | def prepare_dataset(data, transforms, args):
 80 |     dataset = CacheDataset(data=data, transform=transforms, cache_rate=args.cache_rate, num_workers=args.num_workers)
 81 |     return dataset
 82 | 
 83 | 
 84 | def main_worker(save_models_dir, save_logs_dir, args):
 85 |     # init_process_group
 86 |     ddp_setup() 
 87 |     # get local rank on the GPU
 88 |     local_rank = int(dist.get_rank())
 89 |     if local_rank == 0:
 90 |         print(f"Training {args.network_name} on fold {args.fold}")
 91 |         print(f"The models will be saved in {save_models_dir}")
 92 |         print(f"The training/validation logs will be saved in {save_logs_dir}")
 93 | 
 94 |     # get all training and validation objects
 95 |     train_data, valid_data, train_transforms, valid_transforms, model, loss_function, optimizer, scheduler, metric = load_train_objects(args)
 96 | 
 97 |     # get dataset of object-type CacheDataset 
 98 |     train_dataset = prepare_dataset(train_data, train_transforms, args)
 99 |     valid_dataset = prepare_dataset(valid_data, valid_transforms, args)
100 | 
101 |     # get DistributedSampler instances for both training and validation dataloader
102 |     # this will be used to split data into different GPUs
103 |     train_sampler = DistributedSampler(dataset=train_dataset, shuffle=True)
104 |     valid_sampler = DistributedSampler(dataset=valid_dataset, shuffle=False)
105 |     
106 |     # initializing train and valid dataloaders
107 |     train_dataloader = DataLoader(
108 |         train_dataset,
109 |         batch_size=args.train_bs,
110 |         pin_memory=True,
111 |         shuffle=False,
112 |         sampler=train_sampler,
113 |         num_workers=args.num_workers
114 |     )
115 |     valid_dataloader = DataLoader(
116 |         valid_dataset,
117 |         batch_size=1,
118 |         pin_memory=True,
119 |         shuffle=False,
120 |         sampler=valid_sampler,
121 |         num_workers=args.num_workers
122 |     )
123 | 
124 |     post_pred = Compose([AsDiscrete(argmax=True, to_onehot=2)])
125 |     post_label = Compose([AsDiscrete(to_onehot=2)])
126 | 
127 |     # filepaths for storing training and validation logs from different GPUs
128 |     trainlog_fpath = os.path.join(save_logs_dir, f'trainlog_gpu{local_rank}.csv')
129 |     validlog_fpath = os.path.join(save_logs_dir, f'validlog_gpu{local_rank}.csv')
130 | 
131 |     # initialize the GPU device    
132 |     device = torch.device(f"cuda:{local_rank}")
133 |     torch.cuda.set_device(device)
134 | 
135 |     # number of epochs and epoch interval for running validation
136 |     max_epochs = args.epochs
137 |     val_interval = args.val_interval
138 | 
139 |     # push models to device
140 |     model = model.to(device)
141 | 
142 |     epoch_loss_values = []
143 |     metric_values = []
144 | 
145 |     # wrap the model with DDP
146 |     model = DDP(model, device_ids=[device])
147 |         
148 |     experiment_start_time = time.time()
149 |     
150 |     for epoch in range(max_epochs):
151 |         epoch_start_time = time.time()
152 |         print(f"[GPU{local_rank}]: Running training: epoch = {epoch + 1}")
153 |         model.train()
154 |         epoch_loss = 0
155 |         step = 0
156 |         train_sampler.set_epoch(epoch)
157 |         for batch_data in train_dataloader:
158 |             step += 1
159 |             inputs, labels = (
160 |                 batch_data['CTPT'].to(device),
161 |                 batch_data['GT'].to(device),
162 |             )
163 |             optimizer.zero_grad()
164 |             outputs = model(inputs)
165 |             loss = loss_function(outputs, labels)
166 |             loss.backward()
167 |             optimizer.step()
168 |             epoch_loss += loss.item()
169 |         epoch_loss /= step
170 |         print(f"[GPU:{local_rank}]: epoch {epoch + 1}/{max_epochs}: average loss: {epoch_loss:.4f}")
171 |         epoch_loss_values.append(epoch_loss)
172 | 
173 |         # steps forward the CosineAnnealingLR scheduler
174 |         scheduler.step()
175 | 
176 |         # update the training log file
177 |         epoch_loss_values_df = pd.DataFrame(data=epoch_loss_values, columns=['Loss'])
178 |         epoch_loss_values_df.to_csv(trainlog_fpath, index=False)
179 | 
180 | 
181 |         if (epoch + 1) % val_interval == 0:
182 |             print(f"[GPU{local_rank}]: Running validation")
183 |             model.eval()
184 |             with torch.no_grad():
185 |                 for val_data in valid_dataloader:
186 |                     val_inputs, val_labels = (
187 |                         val_data['CTPT'].to(device),
188 |                         val_data['GT'].to(device),
189 |                     )
190 |                     roi_size = get_validation_sliding_window_size(args.input_patch_size) 
191 |                     sw_batch_size = args.sw_bs
192 |                     val_outputs = sliding_window_inference(
193 |                         val_inputs, roi_size, sw_batch_size, model)
194 |                     val_outputs = [post_pred(i) for i in decollate_batch(val_outputs)]
195 |                     val_labels = [post_label(i) for i in decollate_batch(val_labels)]
196 |                     # compute metric for current iteration
197 |                     metric(y_pred=val_outputs, y=val_labels)
198 | 
199 |                 # aggregate the final mean dice result
200 |                 metric_val = metric.aggregate().item()
201 |                 metric.reset()
202 |                 metric_values.append(metric_val)
203 |                 metric_values_df = pd.DataFrame(data=metric_values, columns=['Metric'])
204 |                 metric_values_df.to_csv(validlog_fpath, index=False)
205 |                
206 |                 print(f"[GPU:{local_rank}] SAVING MODEL at epoch: {epoch + 1}; Mean DSC: {metric_val:.4f}")
207 |                 savepath = os.path.join(save_models_dir, "model_ep="+convert_to_4digits(str(int(epoch + 1)))+".pth")
208 |                 torch.save(model.module.state_dict(), savepath)
209 | 
210 |         epoch_end_time = (time.time() - epoch_start_time)/60
211 |         print(f"[GPU:{local_rank}]: Epoch {epoch + 1} time: {round(epoch_end_time,2)} min")
212 |        
213 |     experiment_end_time = (time.time() - experiment_start_time)/(60*60)
214 |     print(f"[GPU:{local_rank}]: Total time: {round(experiment_end_time,2)} hr")
215 | 
216 |     dist.destroy_process_group()
217 | 
218 | def main(args):
219 |     os.environ['OMP_NUM_THREADS'] = '6'
220 |     fold = args.fold
221 |     network = args.network_name
222 |     inputsize = f'randcrop{args.input_patch_size}'
223 | 
224 |     experiment_code = f"{network}_fold{fold}_{inputsize}"
225 | 
226 |     #save models folder
227 |     save_models_dir = os.path.join(RESULTS_FOLDER,'models')
228 |     save_models_dir = os.path.join(save_models_dir, 'fold'+str(fold), network, experiment_code)
229 |     os.makedirs(save_models_dir, exist_ok=True)
230 |     
231 |     # save train and valid logs folder
232 |     save_logs_dir = os.path.join(RESULTS_FOLDER,'logs')
233 |     save_logs_dir = os.path.join(save_logs_dir, 'fold'+str(fold), network, experiment_code)
234 |     os.makedirs(save_logs_dir, exist_ok=True)
235 |     
236 |     main_worker(save_models_dir, save_logs_dir, args)
237 |     
238 | 
239 | 
240 | if __name__ == "__main__": 
241 |     # create datasplit files for train and test images
242 |     # follow all the instructions for dataset directory creation and images/labels file names as given in: LINK
243 |     create_data_split_files() 
244 |     parser = argparse.ArgumentParser(description='Lymphoma PET/CT lesion segmentation using MONAI-PyTorch')
245 |     parser.add_argument('--fold', type=int, default=0, metavar='fold',
246 |                         help='validation fold (default: 0), remaining folds will be used for training')
247 |     parser.add_argument('--network-name', type=str, default='unet', metavar='netname',
248 |                         help='network name for training (default: unet)')
249 |     parser.add_argument('--epochs', type=int, default=500, metavar='epochs',
250 |                         help='number of epochs to train (default: 10)')
251 |     parser.add_argument('--input-patch-size', type=int, default=192, metavar='inputsize',
252 |                         help='size of cropped input patch for training (default: 192)')
253 |     parser.add_argument('--train-bs', type=int, default=1, metavar='train-bs',
254 |                         help='mini-batchsize for training (default: 1)')
255 |     parser.add_argument('--num_workers', type=int, default=2, metavar='nw',
256 |                         help='num_workers for train and validation dataloaders (default: 2)')
257 |     parser.add_argument('--cache-rate', type=float, default=0.1, metavar='cr',
258 |                         help='cache_rate for CacheDataset from MONAI (default=0.1)')
259 |     parser.add_argument('--lr', type=float, default=2e-4, metavar='lr',
260 |                         help='initial learning rate for AdamW optimizer (default=2e-4); Cosine scheduler will decrease this to 0 in args.epochs epochs')
261 |     parser.add_argument('--wd', type=float, default=1e-5, metavar='wd',
262 |                         help='weight-decay for AdamW optimizer (default=1e-5)')
263 |     parser.add_argument('--val-interval', type=int, default=2, metavar='val-interval',
264 |                         help='epochs interval for which validation will be performed (default=2)')
265 |     parser.add_argument('--sw-bs', type=int, default=2, metavar='sw-bs',
266 |                         help='batchsize for sliding window inference (default=2)')
267 |     args = parser.parse_args()
268 |     
269 |     main(args)
270 | 
271 | 


--------------------------------------------------------------------------------
/segmentation/initialize_train.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Copyright (c) Microsoft Corporation. All rights reserved.
  3 | Licensed under the MIT License.
  4 | '''
  5 | from monai.transforms import (
  6 |     EnsureChannelFirstd,
  7 |     Compose,
  8 |     CropForegroundd,
  9 |     LoadImaged,
 10 |     Orientationd,
 11 |     RandCropByPosNegLabeld,
 12 |     DeleteItemsd,
 13 |     Spacingd,
 14 |     RandAffined,
 15 |     ConcatItemsd,
 16 |     ScaleIntensityRanged,
 17 |     ResizeWithPadOrCropd,
 18 |     Invertd,
 19 |     AsDiscreted,
 20 |     SaveImaged,
 21 |     
 22 | )
 23 | from monai.networks.nets import UNet, SegResNet, DynUNet, SwinUNETR, UNETR, AttentionUnet
 24 | from monai.networks.layers import Norm
 25 | from monai.metrics import DiceMetric
 26 | from monai.losses import DiceLoss
 27 | import torch
 28 | import matplotlib.pyplot as plt
 29 | from glob import glob 
 30 | import pandas as pd
 31 | import numpy as np
 32 | from torch.optim.lr_scheduler import CosineAnnealingLR
 33 | import os
 34 | import sys 
 35 | config_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..")
 36 | sys.path.append(config_dir)
 37 | from config import DATA_FOLDER, WORKING_FOLDER
 38 | #%%
 39 | def convert_to_4digits(str_num):
 40 |     if len(str_num) == 1:
 41 |         new_num = '000' + str_num
 42 |     elif len(str_num) == 2:
 43 |         new_num = '00' + str_num
 44 |     elif len(str_num) == 3:
 45 |         new_num = '0' + str_num
 46 |     else:
 47 |         new_num = str_num
 48 |     return new_num
 49 | 
 50 | def create_dictionary_ctptgt(ctpaths, ptpaths, gtpaths):
 51 |     data = []
 52 |     for i in range(len(gtpaths)):
 53 |         ctpath = ctpaths[i]
 54 |         ptpath = ptpaths[i]
 55 |         gtpath = gtpaths[i]
 56 |         data.append({'CT':ctpath, 'PT':ptpath, 'GT':gtpath})
 57 |     return data
 58 | 
 59 | def remove_all_extensions(filename):
 60 |     while True:
 61 |         name, ext = os.path.splitext(filename)
 62 |         if ext == '':
 63 |             return name
 64 |         filename = name
 65 | #%%
 66 | def create_data_split_files():
 67 |     """Creates filepaths data for training/validation and test images and saves 
 68 |     them as `train_filepaths.csv` and `test_filepaths.csv` files under WORKING_FOLDER/data_split/; 
 69 |     all training images will be assigned a FoldID specifying which fold (out of the 5 folds) 
 70 |     the image belongs to. If the `train_filepaths.csv` and `test_filepaths.csv` already exist, 
 71 |     this function is skipped
 72 |     """
 73 |     train_filepaths = os.path.join(WORKING_FOLDER, 'data_split', 'train_filepaths.csv')
 74 |     test_filepaths = os.path.join(WORKING_FOLDER, 'data_split', 'test_filepaths.csv')
 75 |     if os.path.exists(train_filepaths) and os.path.exists(test_filepaths):
 76 |         return 
 77 |     else:
 78 |         data_split_folder = os.path.join(WORKING_FOLDER, 'data_split')
 79 |         os.makedirs(data_split_folder, exist_ok=True)
 80 |         
 81 |         imagesTr = os.path.join(DATA_FOLDER, 'imagesTr')
 82 |         labelsTr = os.path.join(DATA_FOLDER, 'labelsTr')
 83 | 
 84 |         ctpaths = sorted(glob(os.path.join(imagesTr, '*0000.nii.gz')))
 85 |         ptpaths = sorted(glob(os.path.join(imagesTr, '*0001.nii.gz')))
 86 |         gtpaths = sorted(glob(os.path.join(labelsTr, '*.nii.gz')))
 87 |         imageids = [remove_all_extensions(os.path.basename(path)) for path in gtpaths]
 88 | 
 89 |         n_folds = 5
 90 |         part_size = len(imageids) // n_folds
 91 |         remaining_elements = len(imageids) % n_folds    
 92 |         start = 0
 93 |         train_folds = []
 94 |         for i in range(n_folds):
 95 |             end = start + part_size + (1 if i < remaining_elements else 0)
 96 |             train_folds.append(imageids[start:end])
 97 |             start = end
 98 |         
 99 |         fold_sizes = [len(fold) for fold in train_folds]
100 |         foldids = [fold_sizes[i]*[i] for i in range(len(fold_sizes))]
101 |         foldids = [item for sublist in foldids for item in sublist]
102 |         
103 |         trainfolds_data = np.column_stack((imageids, foldids, ctpaths, ptpaths, gtpaths))  
104 |         train_df = pd.DataFrame(trainfolds_data, columns=['ImageID', 'FoldID', 'CTPATH', 'PTPATH', 'GTPATH'])
105 |         
106 |         train_df.to_csv(train_filepaths, index=False)
107 | 
108 |         imagesTs = os.path.join(DATA_FOLDER, 'imagesTs')
109 |         labelsTs = os.path.join(DATA_FOLDER, 'labelsTs')
110 |         ctpaths_test = sorted(glob(os.path.join(imagesTs, '*0000.nii.gz')))
111 |         ptpaths_test = sorted(glob(os.path.join(imagesTs, '*0001.nii.gz')))
112 |         gtpaths_test = sorted(glob(os.path.join(labelsTs, '*.nii.gz')))
113 |         imageids_test = [remove_all_extensions(os.path.basename(path)) for path in gtpaths_test]
114 |         test_data = np.column_stack((imageids_test, ctpaths_test, ptpaths_test, gtpaths_test))
115 |         test_df = pd.DataFrame(test_data, columns=['ImageID', 'CTPATH', 'PTPATH', 'GTPATH'])
116 |         test_df.to_csv(test_filepaths, index=False)
117 | 
118 | #%%
119 | def get_train_valid_data_in_dict_format(fold):
120 |     trainvalid_fpath = os.path.join(WORKING_FOLDER, 'data_split/train_filepaths.csv')
121 |     trainvalid_df = pd.read_csv(trainvalid_fpath)
122 |     train_df = trainvalid_df[trainvalid_df['FoldID'] != fold]
123 |     valid_df = trainvalid_df[trainvalid_df['FoldID'] == fold]
124 | 
125 |     ctpaths_train, ptpaths_train, gtpaths_train = list(train_df['CTPATH'].values), list(train_df['PTPATH'].values),  list(train_df['GTPATH'].values)
126 |     ctpaths_valid, ptpaths_valid, gtpaths_valid = list(valid_df['CTPATH'].values), list(valid_df['PTPATH'].values),  list(valid_df['GTPATH'].values)
127 | 
128 |     train_data = create_dictionary_ctptgt(ctpaths_train, ptpaths_train, gtpaths_train)
129 |     valid_data = create_dictionary_ctptgt(ctpaths_valid, ptpaths_valid, gtpaths_valid)
130 | 
131 |     return train_data, valid_data
132 | 
133 | #%%
134 | def get_test_data_in_dict_format():
135 |     test_fpaths = os.path.join(WORKING_FOLDER, 'data_split/test_filepaths.csv')
136 |     test_df = pd.read_csv(test_fpaths)
137 |     ctpaths_test, ptpaths_test, gtpaths_test = list(test_df['CTPATH'].values), list(test_df['PTPATH'].values),  list(test_df['GTPATH'].values)
138 |     test_data = create_dictionary_ctptgt(ctpaths_test, ptpaths_test, gtpaths_test)
139 |     return test_data
140 | 
141 | def get_spatial_size(input_patch_size=192):
142 |     trsz = input_patch_size
143 |     return (trsz, trsz, trsz)
144 | 
145 | def get_spacing():
146 |     spc = 2
147 |     return (spc, spc, spc)
148 | 
149 | def get_train_transforms(input_patch_size=192):
150 |     spatialsize = get_spatial_size(input_patch_size)
151 |     spacing = get_spacing()
152 |     mod_keys = ['CT', 'PT', 'GT']
153 |     train_transforms = Compose(
154 |     [
155 |         LoadImaged(keys=mod_keys, image_only=True),
156 |         EnsureChannelFirstd(keys=mod_keys),
157 |         CropForegroundd(keys=mod_keys, source_key='CT'),
158 |         ScaleIntensityRanged(keys=['CT'], a_min=-154, a_max=325, b_min=0, b_max=1, clip=True),
159 |         Orientationd(keys=mod_keys, axcodes="RAS"),
160 |         Spacingd(keys=mod_keys, pixdim=spacing, mode=('bilinear', 'bilinear', 'nearest')),
161 |         RandCropByPosNegLabeld(
162 |             keys=mod_keys,
163 |             label_key='GT',
164 |             spatial_size = spatialsize,
165 |             pos=2,
166 |             neg=1,
167 |             num_samples=1,
168 |             image_key='PT',
169 |             image_threshold=0,
170 |             allow_smaller=True,
171 |         ),
172 |         ResizeWithPadOrCropd(
173 |             keys=mod_keys,
174 |             spatial_size=spatialsize,
175 |             mode='constant'
176 |         ),
177 |         RandAffined(
178 |             keys=mod_keys,
179 |             mode=('bilinear', 'bilinear', 'nearest'),
180 |             prob=0.5,
181 |             spatial_size = spatialsize,
182 |             translate_range=(10,10,10),
183 |             rotate_range=(0, 0, np.pi/15),
184 |             scale_range=(0.1, 0.1, 0.1)),
185 |         ConcatItemsd(keys=['CT', 'PT'], name='CTPT', dim=0),
186 |         DeleteItemsd(keys=['CT', 'PT'])
187 |     ])
188 | 
189 |     return train_transforms
190 | 
191 | #%%
192 | def get_valid_transforms():
193 |     spacing = get_spacing()
194 |     mod_keys = ['CT', 'PT', 'GT']
195 |     valid_transforms = Compose(
196 |     [
197 |         LoadImaged(keys=mod_keys),
198 |         EnsureChannelFirstd(keys=mod_keys),
199 |         CropForegroundd(keys=mod_keys, source_key='CT'),
200 |         ScaleIntensityRanged(keys=['CT'], a_min=-154, a_max=325, b_min=0, b_max=1, clip=True),
201 |         Orientationd(keys=mod_keys, axcodes="RAS"),
202 |         Spacingd(keys=mod_keys, pixdim=spacing, mode=('bilinear', 'bilinear', 'nearest')),
203 |         ConcatItemsd(keys=['CT', 'PT'], name='CTPT', dim=0),
204 |         DeleteItemsd(keys=['CT', 'PT'])
205 |     ])
206 | 
207 |     return valid_transforms
208 | 
209 | 
210 | def get_post_transforms(test_transforms, save_preds_dir):
211 |     post_transforms = Compose([
212 |         Invertd(
213 |             keys="Pred",
214 |             transform=test_transforms,
215 |             orig_keys="GT",
216 |             meta_keys="pred_meta_dict",
217 |             orig_meta_keys="image_meta_dict",
218 |             meta_key_postfix="meta_dict",
219 |             nearest_interp=False,
220 |             to_tensor=True,
221 |         ),
222 |         AsDiscreted(keys="Pred", argmax=True),
223 |         SaveImaged(keys="Pred", meta_keys="pred_meta_dict", output_dir=save_preds_dir, output_postfix="", separate_folder=False, resample=False),
224 |     ])
225 |     return post_transforms
226 | 
227 | def get_kernels_strides(patch_size, spacings):
228 |     """
229 |     This function is only used for decathlon datasets with the provided patch sizes.
230 |     When refering this method for other tasks, please ensure that the patch size for each spatial dimension should
231 |     be divisible by the product of all strides in the corresponding dimension.
232 |     In addition, the minimal spatial size should have at least one dimension that has twice the size of
233 |     the product of all strides. For patch sizes that cannot find suitable strides, an error will be raised.
234 |     """
235 |     sizes, spacings = patch_size, spacings
236 |     input_size = sizes
237 |     strides, kernels = [], []
238 |     while True:
239 |         spacing_ratio = [sp / min(spacings) for sp in spacings]
240 |         stride = [2 if ratio <= 2 and size >= 8 else 1 for (ratio, size) in zip(spacing_ratio, sizes)]
241 |         kernel = [3 if ratio <= 2 else 1 for ratio in spacing_ratio]
242 |         if all(s == 1 for s in stride):
243 |             break
244 |         for idx, (i, j) in enumerate(zip(sizes, stride)):
245 |             if i % j != 0:
246 |                 raise ValueError(
247 |                     f"Patch size is not supported, please try to modify the size {input_size[idx]} in the spatial dimension {idx}."
248 |                 )
249 |         sizes = [i / j for i, j in zip(sizes, stride)]
250 |         spacings = [i * j for i, j in zip(spacings, stride)]
251 |         kernels.append(kernel)
252 |         strides.append(stride)
253 | 
254 |     strides.insert(0, len(spacings) * [1])
255 |     kernels.append(len(spacings) * [3])
256 |     return kernels, strides
257 | #%%
258 | def get_model(network_name = 'unet', input_patch_size=192):
259 |     if network_name == 'unet':
260 |         model = UNet(
261 |             spatial_dims=3,
262 |             in_channels=2,
263 |             out_channels=2,
264 |             channels=(16, 32, 64, 128, 256, 512),
265 |             strides=(2, 2, 2, 2, 2),
266 |             num_res_units=2,
267 |             norm=Norm.BATCH
268 |         )
269 |     elif network_name == 'swinunetr':
270 |         spatialsize = get_spatial_size(input_patch_size)
271 |         model = SwinUNETR(
272 |             img_size=spatialsize,
273 |             in_channels=2,
274 |             out_channels=2,
275 |             feature_size=12,
276 |             use_checkpoint=False,
277 |         )
278 |     elif network_name =='segresnet':
279 |         model = SegResNet(
280 |             spatial_dims=3,
281 |             blocks_down=[1, 2, 2, 4],
282 |             blocks_up=[1, 1, 1],
283 |             init_filters=16,
284 |             in_channels=2,
285 |             out_channels=2,
286 |         )
287 |     elif network_name == 'dynunet':
288 |         spatialsize = get_spatial_size(input_patch_size)
289 |         spacing = get_spacing()
290 |         krnls, strds = get_kernels_strides(spatialsize, spacing)
291 |         model = DynUNet(
292 |             spatial_dims=3,
293 |             in_channels=2,
294 |             out_channels=2,
295 |             kernel_size=krnls,
296 |             strides=strds,
297 |             upsample_kernel_size=strds[1:],
298 |         )
299 |     else:
300 |         pass
301 |     return model
302 | 
303 | 
304 | #%%
305 | def get_loss_function():
306 |     loss_function = DiceLoss(to_onehot_y=True, softmax=True)
307 |     return loss_function
308 | 
309 | def get_optimizer(model, learning_rate=2e-4, weight_decay=1e-5):
310 |     optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
311 |     return optimizer
312 | 
313 | def get_metric():
314 |     metric = DiceMetric(include_background=False, reduction="mean")
315 |     return metric
316 | 
317 | def get_scheduler(optimizer, max_epochs=500):
318 |     scheduler = CosineAnnealingLR(optimizer, T_max=max_epochs, eta_min=0)
319 |     return scheduler
320 | 
321 | def get_validation_sliding_window_size(input_patch_size=192):
322 |     dict_W_for_N = {
323 |         96:128,
324 |         128:160,
325 |         160:192,
326 |         192:192,
327 |         224:224,
328 |         256:256
329 |     }
330 |     vlsz = dict_W_for_N[input_patch_size]
331 |     return (vlsz, vlsz, vlsz)


--------------------------------------------------------------------------------
/metrics/metrics.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Copyright (c) Microsoft Corporation. All rights reserved.
  3 | Licensed under the MIT License.
  4 | '''
  5 | 
  6 | import SimpleITK as sitk 
  7 | import numpy as np  
  8 | import cc3d
  9 | 
 10 | #%%
 11 | def get_3darray_from_niftipath(
 12 |     path: str,
 13 | ) -> np.ndarray:
 14 |     """Get a numpy array of a Nifti image using the filepath
 15 | 
 16 |     Args:
 17 |         path (str): path of the Nifti file
 18 | 
 19 |     Returns:
 20 |         np.ndarray: 3D numpy array for the image
 21 |     """
 22 |     image = sitk.ReadImage(path)
 23 |     array = np.transpose(sitk.GetArrayFromImage(image), (2,1,0))
 24 |     return array
 25 | 
 26 | def calculate_patient_level_lesion_suvmean_suvmax(
 27 |     ptarray: np.ndarray, 
 28 |     maskarray: np.ndarray,
 29 |     marker: str = 'SUVmean'
 30 | ) -> np.float64:
 31 |     """Function to return the lesion SUVmean or SUVmax for all lesions in 
 32 |     a 3D PET image using the corresponding 3D segmentation mask 
 33 | 
 34 |     Args:
 35 |         ptarray (np.ndarray): numpy ndarray for 3D PET image
 36 |         maskarray (np.ndarray): numpy ndarray for 3D mask image
 37 |         marker (str, optional): Whether you want to calculate SUVmean or SUVmax . 
 38 |         Defaults to 'SUVmean'.
 39 | 
 40 |     Returns:
 41 |         np.float64: patient-level SUVmean or SUVmax
 42 |     """
 43 |     prod = np.multiply(ptarray, maskarray)
 44 |     num_nonzero_voxels = len(np.nonzero(maskarray)[0])
 45 | 
 46 |     if num_nonzero_voxels == 0:
 47 |         return 0.0
 48 |     else:
 49 |         if marker == 'SUVmean':
 50 |             return np.sum(prod)/num_nonzero_voxels
 51 |         elif marker == 'SUVmax':
 52 |             return np.max(prod)
 53 | 
 54 | #%%
 55 | def calculate_patient_level_tmtv(
 56 |     maskarray: np.ndarray,
 57 |     spacing: tuple
 58 | ) -> np.float64:
 59 |     """Function to return the total metabolic tumor volume (TMTV) in cm^3 using 
 60 |     3D mask containing 0s for background and 1s for lesions/tumors
 61 |     Args:
 62 |         maskarray (np.ndarray): numpy ndarray for 3D mask image
 63 | 
 64 |     Returns:
 65 |         np.float64: 
 66 |     """
 67 |     voxel_volume_cc = np.prod(spacing)/1000 # voxel volume in cm^3
 68 | 
 69 |     num_lesion_voxels = len(np.nonzero(maskarray)[0])
 70 |     tmtv_cc = voxel_volume_cc*num_lesion_voxels
 71 |     return tmtv_cc
 72 | 
 73 | #%%
 74 | 
 75 | def calculate_patient_level_lesion_count(
 76 |     maskarray: np.ndarray,
 77 | ) -> int:
 78 |     """Function to return the total number of lesions using the 3D segmentation mask 
 79 |     Args:
 80 |         maskarray (np.ndarray): numpy ndarray for 3D mask image
 81 | 
 82 |     Returns:
 83 |         int: _description_
 84 |     """
 85 |     _, num_lesions = cc3d.connected_components(maskarray, connectivity=18, return_N=True)
 86 |     return num_lesions
 87 | 
 88 | #%%
 89 | def calculate_patient_level_tlg(
 90 |     ptarray: np.ndarray,
 91 |     maskarray: np.ndarray,
 92 |     spacing: tuple
 93 | ) -> np.float64:
 94 |     """Function to return the total lesion glycolysis (TLG) using a 3D PET image 
 95 |     and the corresponding 3D segmentation mask (containing 0s for background and
 96 |     1s for lesion/tumor)
 97 |     TLG = SUV1*V1 + SUV2*V2 + ... + SUVn*Vn, where SUV1...SUVn are the SUVmean 
 98 |     values of lesions 1...n with volumes V1...Vn, respectively
 99 | 
100 |     Args:
101 |         ptarray (np.ndarray): numpy ndarray for 3D PET image
102 |         maskarray (np.ndarray): numpy ndarray for 3D mask image
103 | 
104 |     Returns:
105 |         np.float64: total lesion glycolysis in cm^3 (assuming SUV is unitless)
106 |     """
107 |     voxel_volume_cc = np.prod(spacing)/1000 # voxel volume in cm^3
108 | 
109 |     labels_out, num_lesions = cc3d.connected_components(maskarray, connectivity=18, return_N=True)
110 |     if num_lesions == 0:
111 |         return 0.0
112 |     else:
113 |         _, lesion_num_voxels = np.unique(labels_out, return_counts=True)
114 |         lesion_num_voxels = lesion_num_voxels[1:]
115 |         lesion_mtvs = voxel_volume_cc*lesion_num_voxels
116 |         lesion_suvmeans = []
117 |         
118 |         for i in range(1, num_lesions+1):
119 |             mask = np.zeros_like(labels_out)
120 |             mask[labels_out == i] = 1
121 |             prod = np.multiply(mask, ptarray)
122 |             num_nonzero_voxels = len(np.nonzero(mask)[0])
123 |             lesion_suvmeans.append(np.sum(prod)/num_nonzero_voxels)
124 |         
125 |         tlg = np.sum(np.multiply(lesion_mtvs, lesion_suvmeans))
126 |         return tlg
127 | #%%
128 | def calculate_patient_level_dissemination(
129 |     maskarray: np.ndarray,
130 |     spacing: tuple
131 | ) -> np.float64:
132 |     """Function to return the tumor dissemination (Dmax) using 3D segmentation mask
133 |     Dmax = max possible distance between any two foreground voxels in a patient;
134 |     these two voxels can come form the same lesions (in case of one lesion) 
135 |     or from different lesions (in case of multiple lesions) 
136 |    
137 |     Args:
138 |         maskarray (np.ndarray): numpy array for 3D mask image
139 | 
140 |     Returns:
141 |         np.float64: dissemination value in cm
142 |     """
143 |     maskarray = maskarray.astype(np.int8)
144 |     nonzero_voxels = np.argwhere(maskarray == 1)
145 |     distances = np.sqrt(np.sum(((nonzero_voxels[:, None] - nonzero_voxels) * spacing)**2, axis=2))
146 |     farthest_indices = np.unravel_index(np.argmax(distances), distances.shape)
147 |     dmax = distances[farthest_indices]/10  # converting to cm
148 |     del maskarray 
149 |     del nonzero_voxels
150 |     del distances 
151 |     return dmax 
152 | 
153 | #%%
154 | def calculate_patient_level_dice_score(
155 |     gtarray: np.ndarray,
156 |     predarray: np.ndarray, 
157 | ) -> np.float64:
158 |     """Function to return the Dice similarity coefficient (Dice score) between
159 |     2 segmentation masks (containing 0s for background and 1s for lesions/tumors)
160 | 
161 |     Args:
162 |         maskarray_1 (np.ndarray): numpy ndarray for the first mask
163 |         maskarray_2 (np.ndarray): numpy ndarray for the second mask
164 | 
165 |     Returns:
166 |         np.float64: Dice score
167 |     """
168 |     dice_score = 2.0*np.sum(predarray[gtarray == 1])/(np.sum(gtarray) + np.sum(predarray))
169 |     return dice_score
170 | #%%
171 | def calculate_patient_level_iou(
172 |     gtarray: np.ndarray,
173 |     predarray: np.ndarray, 
174 | ) -> np.float64:
175 |     """Function to return the Intersection-over-Union (IoU) between
176 |     2 segmentation masks (containing 0s for background and 1s for lesions/tumors)
177 | 
178 |     Args:
179 |         maskarray_1 (np.ndarray): numpy ndarray for the first mask
180 |         maskarray_2 (np.ndarray): numpy ndarray for the second mask
181 | 
182 |     Returns:
183 |         np.float64: Dice score
184 |     """
185 |     intersection = np.sum(predarray[gtarray == 1])
186 |     union = np.sum(gtarray) + np.sum(predarray) - intersection
187 |     iou = intersection/union
188 |     return iou
189 | 
190 | def calculate_patient_level_intersection(
191 |     gtarray: np.ndarray,
192 |     predarray: np.ndarray, 
193 | ) -> np.float64:
194 |     """Function to return the Intersection etween
195 |     2 segmentation masks (containing 0s for background and 1s for lesions/tumors)
196 | 
197 |     Args:
198 |         maskarray_1 (np.ndarray): numpy ndarray for the first mask
199 |         maskarray_2 (np.ndarray): numpy ndarray for the second mask
200 | 
201 |     Returns:
202 |         np.float64: Dice score
203 |     """
204 |     intersection = np.sum(predarray[gtarray == 1])
205 |     return intersection
206 | #%%
207 | 
208 | def calculate_patient_level_false_positive_volume(
209 |     gtarray: np.ndarray,
210 |     predarray: np.ndarray,
211 |     spacing: tuple
212 | ) -> np.float64:
213 |     # compute number of voxels of false positive connected components in prediction mask
214 |     pred_connected_components = cc3d.connected_components(predarray, connectivity=18)
215 |     
216 |     false_positive = 0
217 |     for idx in range(1,pred_connected_components.max()+1):
218 |         comp_mask = np.isin(pred_connected_components, idx)
219 |         if (comp_mask*gtarray).sum() == 0:
220 |             false_positive += comp_mask.sum()
221 |     
222 |     voxel_volume_cc = np.prod(spacing)/1000
223 |     return false_positive*voxel_volume_cc
224 | 
225 | #%%
226 | def calculate_patient_level_false_negative_volume(
227 |     gtarray: np.ndarray,
228 |     predarray: np.ndarray,
229 |     spacing: tuple
230 | ) -> np.float64:
231 |     # compute number of voxels of false negative connected components (of the ground truth mask) in the prediction mask
232 |     gt_connected_components = cc3d.connected_components(gtarray, connectivity=18)
233 |     
234 |     false_negative = 0
235 |     for idx in range(1,gt_connected_components.max()+1):
236 |         comp_mask = np.isin(gt_connected_components, idx)
237 |         if (comp_mask*predarray).sum() == 0:
238 |             false_negative += comp_mask.sum()
239 | 
240 |     voxel_volume_cc = np.prod(spacing)/1000
241 |     return false_negative*voxel_volume_cc
242 | 
243 | # %%
244 | def is_suvmax_detected(
245 |     gtarray: np.ndarray,
246 |     predarray: np.ndarray,
247 |     ptarray: np.ndarray,
248 | ) -> bool:
249 |     prod = np.multiply(gtarray, ptarray)
250 |     max_index = np.unravel_index(np.argmax(prod), prod.shape)
251 |     if predarray[max_index] == 1:
252 |         return True
253 |     else:
254 |         return False
255 | 
256 | 
257 | def calculate_patient_level_tp_fp_fn(
258 |     gtarray: np.ndarray,
259 |     predarray: np.ndarray,
260 |     criterion: str,
261 |     threshold: np.float64 = None,
262 |     ptarray: np.ndarray = None,
263 | ) -> (int, int, int):
264 |     """Calculate patient-level TP, FP, and FN (for detection based metrics)
265 |     via 3 criteria:
266 | 
267 |     criterion1: A predicted lesion is TP if any one of it's foreground voxels 
268 |     overlaps with GT foreground. A predicted lesions that doesn't overlap with any 
269 |     GT foreground is FP. As soon as a lesion is predicted as TP, it is removed
270 |     from the set of GT lesions. The lesions that remain in the end in the GT lesions
271 |     are FN. `criterion1` is the weakest detection criterion.
272 | 
273 |     criterion2: A predicted lesion is TP if more than `threshold`% of it's volume 
274 |     overlaps with foreground GT. A predicted lesion is FP if it overlap fraction
275 |     with foreground GT is between 0% and `threshold`%. As soon as a lesion is 
276 |     predicted as TP, it is removed from the set of GT lesions. The lesions that 
277 |     remain in the end in the GT lesions are FN. `criterion2` can be hard or weak 
278 |     criterion based on the value of `threshold`.
279 | 
280 |     criterion3: A predicted lesion is TP if it overlaps with one the the GT lesion's 
281 |     SUVmax voxel, hence this criterion requires the use of PET data (`ptarray`). A 
282 |     predicted lesion that doesn't overlap with any GT lesion's SUVmax voxel is 
283 |     considered FP. As soon as a lesion is predicted as TP, it is removed from the 
284 |     set of GT lesions. The lesions that remain in the end in the GT lesions are FN. 
285 |     `criterion3` is likely an easy criterion since a network is more likely to segment 
286 |     high(er)-uptake regions`.
287 | 
288 |     Args:
289 |         int (_type_): _description_
290 |         int (_type_): _description_
291 |         gtarray (_type_, optional): _description_. Defaults to None, ptarray: np.ndarray = None, )->(int.
292 |     """
293 |     
294 |     gtarray_labeled_mask, num_lesions_gt = cc3d.connected_components(gtarray, connectivity=18, return_N=True)
295 |     predarray_labeled_mask, num_lesions_pred = cc3d.connected_components(predarray, connectivity=18, return_N=True)
296 |     gt_lesions_list = list(np.arange(1, num_lesions_gt+1))
297 |     #initial values for TP, FP, FN
298 |     TP = 0
299 |     FP = 0 
300 |     FN = num_lesions_gt 
301 | 
302 |     if criterion == 'criterion1':
303 |         FN = 0 # for this criterion we are counting the number of FPs from 0 onwards, hence the reassignment
304 |         for i in range(1, num_lesions_pred+1):
305 |             pred_lesion_mask = np.where(predarray_labeled_mask == i, 1, 0)
306 |             if np.any(pred_lesion_mask & (gtarray_labeled_mask > 0)):
307 |                 TP += 1
308 |             else:
309 |                 FP += 1
310 |         for j in range(1, num_lesions_gt+1):
311 |             gt_lesion_mask = np.where(gtarray_labeled_mask == j, 1, 0)
312 |             if not np.any(gt_lesion_mask & (predarray_labeled_mask > 0)):
313 |                 FN += 1
314 | 
315 |     elif criterion == 'criterion2':
316 |         for i in range(1, num_lesions_pred+1):
317 |             max_iou = 0
318 |             match_gt_lesion = None 
319 |             pred_lesion_mask = np.where(predarray_labeled_mask == i, 1, 0)
320 |             for j in range(1, num_lesions_gt+1):
321 |                 gt_lesion_mask = np.where(gtarray_labeled_mask == j, 1, 0)
322 |                 iou = calculate_patient_level_iou(gt_lesion_mask, pred_lesion_mask)
323 |                 if iou > max_iou:
324 |                     max_iou = iou
325 |                     match_gt_lesion = j
326 |             if max_iou >= threshold:
327 |                 TP += 1
328 |                 gt_lesions_list.remove(match_gt_lesion)
329 |             else:
330 |                 FP += 1
331 |         FN = len(gt_lesions_list)
332 | 
333 |     elif criterion == 'criterion3':
334 |         for i in range(1, num_lesions_pred+1):
335 |             max_iou = 0
336 |             match_gt_lesion = None
337 |             pred_lesion_mask = np.where(predarray_labeled_mask == i, 1, 0)
338 |             for j in range(1, num_lesions_gt+1):
339 |                 gt_lesion_mask = np.where(gtarray_labeled_mask == j, 1, 0)
340 |                 iou = calculate_patient_level_iou(gt_lesion_mask, pred_lesion_mask)
341 |                 if iou > max_iou:
342 |                     max_iou = iou 
343 |                     match_gt_lesion = j
344 |             
345 |             # match_gt_lesion has been defined with has the maximum iou with pred lesion i
346 |             arr_gt_lesion = np.where(gtarray_labeled_mask == match_gt_lesion, 1, 0)
347 |             if is_suvmax_detected(arr_gt_lesion, pred_lesion_mask, ptarray):
348 |                 TP += 1
349 |                 gt_lesions_list.remove(match_gt_lesion)
350 |             else:
351 |                 FP += 1
352 |         
353 |         FN = len(gt_lesions_list)
354 | 
355 |     else:
356 |         print('Invalid criterion. Choose between criterion1, criterion2, or criterion3')
357 |         return 
358 |     
359 |     return TP, FP, FN
360 | 
361 | 


--------------------------------------------------------------------------------