├── data_conversion ├── dicom_ctpt_to_nifti_conversion_file.csv ├── dicom_rtstruct_to_nifti_conversion_file.csv ├── resample_ct2pt.py └── dicom_to_nifti.py ├── documentation ├── assets │ ├── all_logos.png │ ├── cohort_table.png │ ├── segmentation_performance_visualization.png │ ├── autopet_data_zenodo.svg │ └── monai_zenodo.svg ├── conda_env.md ├── calculate_test_metrics.md ├── generate_lesion_measures.md ├── inference.md ├── dataset_format.md ├── trainddp.md ├── dicom_to_nifti_conversion.md └── results_format.md ├── segmentation ├── predict.sh ├── calculate_test_metrics.sh ├── generate_lesion_measures.sh ├── train.sh ├── plot_logs.py ├── inference.py ├── generate_lesion_measures.py ├── calculate_test_metrics.py ├── trainddp.py └── initialize_train.py ├── CODE_OF_CONDUCT.md ├── config.py ├── LICENSE ├── SUPPORT.md ├── SECURITY.md ├── environment.yml ├── README.md └── metrics └── metrics.py /data_conversion/dicom_ctpt_to_nifti_conversion_file.csv: -------------------------------------------------------------------------------- 1 | PatientID,CT_dir,PET_dir,convert -------------------------------------------------------------------------------- /data_conversion/dicom_rtstruct_to_nifti_conversion_file.csv: -------------------------------------------------------------------------------- 1 | PatientID,RTSTRUCT_dir,REF_dir,convert -------------------------------------------------------------------------------- /documentation/assets/all_logos.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/lymphoma-segmentation-dnn/HEAD/documentation/assets/all_logos.png -------------------------------------------------------------------------------- /documentation/assets/cohort_table.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/lymphoma-segmentation-dnn/HEAD/documentation/assets/cohort_table.png -------------------------------------------------------------------------------- /documentation/assets/segmentation_performance_visualization.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/lymphoma-segmentation-dnn/HEAD/documentation/assets/segmentation_performance_visualization.png -------------------------------------------------------------------------------- /segmentation/predict.sh: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT License. 3 | 4 | python inference.py --fold=1 --network-name='unet' --input-patch-size=192 --num_workers=2 --sw-bs=2 -------------------------------------------------------------------------------- /segmentation/calculate_test_metrics.sh: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT License. 3 | 4 | python calculate_test_metrics.py --fold=0 --network-name='unet' --input-patch-size=192 5 | -------------------------------------------------------------------------------- /segmentation/generate_lesion_measures.sh: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT License. 3 | 4 | python generate_lesion_measures.py --fold=1 --network-name='unet' --input-patch-size=192 -------------------------------------------------------------------------------- /segmentation/train.sh: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT License. 3 | 4 | torchrun --standalone --nproc_per_node=1 trainddp.py --fold=1 --network-name='unet' --epochs=4 --input-patch-size=192 --train-bs=1 --num_workers=2 --lr=2e-4 --wd=1e-5 --val-interval=2 --sw-bs=2 --cache-rate=1 -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Microsoft Open Source Code of Conduct 2 | 3 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 4 | 5 | Resources: 6 | 7 | - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/) 8 | - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) 9 | - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns 10 | -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | #%% 2 | ''' 3 | Copyright (c) Microsoft Corporation. All rights reserved. 4 | Licensed under the MIT License. 5 | ''' 6 | import os 7 | 8 | LYMPHOMA_SEGMENTATION_FOLDER = '' # path to the directory containing `data` and `results` (this will be created by the pipeline) folders. 9 | 10 | DATA_FOLDER = os.path.join(LYMPHOMA_SEGMENTATION_FOLDER, 'data') 11 | RESULTS_FOLDER = os.path.join(LYMPHOMA_SEGMENTATION_FOLDER, 'results') 12 | os.makedirs(RESULTS_FOLDER, exist_ok=True) 13 | WORKING_FOLDER = os.path.dirname(os.path.abspath(__file__)) 14 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Shadab Ahamed 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /documentation/assets/autopet_data_zenodo.svg: -------------------------------------------------------------------------------- 1 | DOI: 10.7937/gkr0-xv29DOI10.7937/gkr0-xv29 -------------------------------------------------------------------------------- /SUPPORT.md: -------------------------------------------------------------------------------- 1 | # TODO: The maintainer of this repo has not yet edited this file 2 | 3 | **REPO OWNER**: Do you want Customer Service & Support (CSS) support for this product/project? 4 | 5 | - **No CSS support:** Fill out this template with information about how to file issues and get help. 6 | - **Yes CSS support:** Fill out an intake form at [aka.ms/onboardsupport](https://aka.ms/onboardsupport). CSS will work with/help you to determine next steps. 7 | - **Not sure?** Fill out an intake as though the answer were "Yes". CSS will help you decide. 8 | 9 | *Then remove this first heading from this SUPPORT.MD file before publishing your repo.* 10 | 11 | # Support 12 | 13 | ## How to file issues and get help 14 | 15 | This project uses GitHub Issues to track bugs and feature requests. Please search the existing 16 | issues before filing new issues to avoid duplicates. For new issues, file your bug or 17 | feature request as a new Issue. 18 | 19 | For help and questions about using this project, please **REPO MAINTAINER: INSERT INSTRUCTIONS HERE 20 | FOR HOW TO ENGAGE REPO OWNERS OR COMMUNITY FOR HELP. COULD BE A STACK OVERFLOW TAG OR OTHER 21 | CHANNEL. WHERE WILL YOU HELP PEOPLE?**. 22 | 23 | ## Microsoft Support Policy 24 | 25 | Support for this **PROJECT or PRODUCT** is limited to the resources listed above. 26 | -------------------------------------------------------------------------------- /documentation/assets/monai_zenodo.svg: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | 6 | 7 | 8 | 10 | 11 | 12 | 13 | 16 | 17 | 18 | 20 | 22 | DOI 23 | 24 | 25 | DOI 26 | 27 | 29 | 10.5281/zenodo.7459814 30 | 31 | 32 | 10.5281/zenodo.7459814 33 | 34 | 35 | -------------------------------------------------------------------------------- /documentation/conda_env.md: -------------------------------------------------------------------------------- 1 | # Getting started 2 | 3 | Welcome to our GitHub codebase for lymphoma lesion segmentation from PET/CT images. 4 | 5 | ## Cloning the repository 6 | To get started, the first step is the clone this repository to your local machine and navigate inside the resulting git directory: 7 | 8 | ``` 9 | git clone 'https://github.com/microsoft/lymphoma-segmentation-dnn.git' 10 | cd lymphoma-segmentation-dnn 11 | ``` 12 | 13 | ## Installing packages from `environment.yml` file 14 | This code base was developed primarily using python=3.8.10, PyTorch=1.11.0, monai=1.2.0, with CUDA 11.3 on an Ubuntu 20.04 virtual machine, so the codebase has been tested only with these configurations. We hope that it will run in other suitable combinations of different versions of python, PyTorch, monai, and CUDA, but we cannot guarantee that. Proceed with caution! 15 | 16 | Firstly, we will use the [environment.yml](/environment.yml) file to create a conda environment (`lymphoma_seg`) and install all required packages as mentioned in the [environment.yml](/environment.yml) file. For this step, run, 17 | 18 | ``` 19 | conda env create --file environment.yml 20 | ``` 21 | 22 | If the above step is completed successfully without errors, you will have a new conda environment called `lymphoma_seg`. To activate this environment, use 23 | 24 | ``` 25 | conda activate lymphoma_seg 26 | ``` 27 | 28 | The environment can be deactivated using 29 | 30 | ``` 31 | conda deactivate 32 | ``` 33 | 34 | With the conda environment set up, you have all the necessary tools to start a training or inference experiment, except the training/test dataset. The next step is get your dataset in the format which can be used by our codebase, as explained in [/documentation/dataset_format.md](/documentation/dataset_format.md). 35 | -------------------------------------------------------------------------------- /documentation/calculate_test_metrics.md: -------------------------------------------------------------------------------- 1 | # How to calculate test metrics on the test set predicted masks? 2 | Once you have trained some models (as described in [trainddp.md](./trainddp.md)) and used them to perform inference (as described in [inference.md](./inference.md)) to generate predicted masks on the test images, you can proceed with the computation of test metrics. We compute three segmentation metrics: `Dice similarity coefficient (DSC)`, `false positive volume (FPV) in ml`, `false negative volume (FNV) in ml`. We also compute detection metrics such as `true positive (TP)`, `false positive (FP)`, and `false negative (FN)` lesion detections via three different criterion labeled as `Criterion1`, `Criterion2`, and `Criterion3`. These metrics have been defined in [metrics/metrics.py](./../metrics/metrics.py). 3 | 4 | ## Step 1: Activate the required conda environment (`lymphoma_seg`) and navigate to `segmentation` folder 5 | First, activate the conda environment `lymphoma_seg` using (created as described in [conda_env.md](./conda_env.md)): 6 | 7 | ``` 8 | conda activate lymphoma_seg 9 | cd segmentation 10 | ``` 11 | 12 | ## Step 2: Run the script to compute test metrics 13 | After this, run the following script in your terminal, 14 | ``` 15 | python calculate_test_metrics.py --fold=0 --network-name='unet' --input-patch-size=192 16 | ``` 17 | 18 | Alternatively, modify the [segmentation/calculate_test_metrics.sh](./../segmentation/calculate_test_metrics.sh) for your use-case (which contains the same bash script as above) and run: 19 | 20 | ``` 21 | bash calculate_test_metrics.sh 22 | ``` 23 | 24 | The test metrics will be written to `LYMPHOMA_SEGMENTATION_FOLDER/results/test_metrics/fold{fold}/{network_name}/{experiment_code}/testmetrics.csv`, as described in [results_format.md](./results_format.md) file. The relevant directory structure may then look like: 25 | 26 | └───lymphoma.segmentation/ 27 | ├── data 28 | └── results 29 | ├── logs 30 | ├── models 31 | ├── predictions 32 | └── test_metrics 33 | ├── fold0 34 | │ └── unet 35 | │ └── unet_fold0_randcrop192 36 | │ └── testmetrics.csv 37 | └── fold1 38 | └── unet 39 | └── unet_fold1_randcrop192 40 | └── testmetrics.csv -------------------------------------------------------------------------------- /documentation/generate_lesion_measures.md: -------------------------------------------------------------------------------- 1 | # How to generate lesion measures from the test set predicted masks? 2 | Once you have performed the inference and saved the network predicted masks in NIFTI format (as described in [inference.md](./inference.md)), you can proceed with the generation of lesion measures from test set predicted and ground truth lesions masks. We compute six different patient level lesion measures: patient-level lesion SUVmean, lesion SUVmax, number of lesions, total metabolic tumor volume (TMTV) in ml, total lesion glycolysis (TLG) in ml, and lesion dissemination (Dmax) in cm. These metrics have been defined in [metrics/metrics.py](./../metrics/metrics.py) and have been shown to be prognostic biomarkers in lymphoma. 3 | 4 | ## Step 1: Activate the required conda environment (`lymphoma_seg`) and navigate to `segmentation` folder 5 | First, activate the conda environment `lymphoma_seg` using (created as described in [conda_env.md](./conda_env.md)): 6 | 7 | ``` 8 | conda activate lymphoma_seg 9 | cd segmentation 10 | ``` 11 | 12 | ## Step 2: Run the script to compute test metrics 13 | After this, run the following script in your terminal, 14 | ``` 15 | python generate_lesion_measures.py --fold=0 --network-name='unet' --input-patch-size=192 16 | ``` 17 | 18 | Alternatively, modify the [segmentation/generate_lesion_measures.sh](./../segmentation/generate_lesion_measures.sh) for your use-case (which contains the same bash script as above) and run: 19 | 20 | ``` 21 | bash generate_lesion_measures.sh 22 | ``` 23 | 24 | The ground truth and predicted lesion measures on the test set will be written to `LYMPHOMA_SEGMENTATION_FOLDER/results/test_lesion_measures/fold{fold}/{network_name}/{experiment_code}/testlesionmeasures.csv`, as described in [results_format.md](./results_format.md) file. The relevant directory structure may then look like: 25 | 26 | └───lymphoma.segmentation/ 27 | ├── data 28 | └── results 29 | ├── logs 30 | ├── models 31 | ├── predictions 32 | └── test_metrics 33 | └── test_lesion_measures 34 | ├── fold0 35 | │ └── unet 36 | │ └── unet_fold0_randcrop192 37 | │ └── testlesionmeasures.csv 38 | └── fold1 39 | └── unet 40 | └── unet_fold1_randcrop192 41 | └── testlesionmeasures.csv -------------------------------------------------------------------------------- /data_conversion/resample_ct2pt.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright (c) Microsoft Corporation. All rights reserved. 3 | Licensed under the MIT License. 4 | ''' 5 | 6 | import numpy as np 7 | import os 8 | import SimpleITK as sitk 9 | import pandas as pd 10 | from pathlib import Path 11 | import glob 12 | 13 | 14 | def resample_ct_to_pt_geometry( 15 | ctpath: str, 16 | ptpath: str, 17 | savedir: str = '' 18 | ): 19 | """ Function to resample CT images to the corresponding PET image geometry. 20 | This functions assumes that the CT and PET are already coregistered. 21 | 22 | Args: 23 | ctpath (str): path to NIFTI file for (high-resolution) CT image 24 | ptpath (str): path to NIFTI file for PET image 25 | savedir (str, optional): Directory to write the downsampled CT NIFTI image. Defaults to ''. 26 | """ 27 | ctimg = sitk.ReadImage(ctpath) 28 | ptimg = sitk.ReadImage(ptpath) 29 | resampled_ctimg = sitk.Resample(ctimg, ptimg, interpolator=sitk.sitkLinear, defaultPixelValue=-1024) 30 | resampled_ct_filepath = os.path.join(savedir, os.path.basename(ctpath)) 31 | 32 | sitk.WriteImage(resampled_ctimg, resampled_ct_filepath) 33 | print('Resampled CT to PET geometry') 34 | print(f'Saving the low-resolution CT NIFTI image at {resampled_ct_filepath}') 35 | 36 | def resample_gt_to_pt_geometry( 37 | gtpath: str, 38 | ptpath: str, 39 | savedir: str = '' 40 | ): 41 | """ Function to resample GT images (if applicable) to the corresponding PET image geometry. 42 | You may or may not need to do this resampling. Do this if your ground truth segmentations 43 | were performed on CT images, and hence your GT masks are in the geometry of CT instead of PET. 44 | If the annoatations were performed on PET, then the GT mask and PET should (ideally) be in the 45 | same geometry and hence this step may not be required. 46 | 47 | Args: 48 | gtpath (str): path to NIFTI file for (high-resolution) GT image 49 | ptpath (str): path to NIFTI file for PET image 50 | savedir (str, optional): Directory to write the downsampled GT NIFTI image. Defaults to ''. 51 | """ 52 | gtimg = sitk.ReadImage(gtpath) 53 | ptimg = sitk.ReadImage(ptpath) 54 | resampled_gtimg = sitk.Resample(gtimg, ptimg, interpolator=sitk.sitkNearestNeighbor, defaultPixelValue=0) 55 | resampled_gt_filepath = os.path.join(savedir, os.path.basename(gtpath)) 56 | 57 | sitk.WriteImage(resampled_gtimg, resampled_gt_filepath) 58 | print('Resampled GT to PET geometry') 59 | print(f'Saving the low-resolution CT NIFTI image at {resampled_gt_filepath}') -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ## Security 4 | 5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet) and [Xamarin](https://github.com/xamarin). 6 | 7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://aka.ms/security.md/definition), please report it to us as described below. 8 | 9 | ## Reporting Security Issues 10 | 11 | **Please do not report security vulnerabilities through public GitHub issues.** 12 | 13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://aka.ms/security.md/msrc/create-report). 14 | 15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/security.md/msrc/pgp). 16 | 17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc). 18 | 19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue: 20 | 21 | * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.) 22 | * Full paths of source file(s) related to the manifestation of the issue 23 | * The location of the affected source code (tag/branch/commit or direct URL) 24 | * Any special configuration required to reproduce the issue 25 | * Step-by-step instructions to reproduce the issue 26 | * Proof-of-concept or exploit code (if possible) 27 | * Impact of the issue, including how an attacker might exploit the issue 28 | 29 | This information will help us triage your report more quickly. 30 | 31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/security.md/msrc/bounty) page for more details about our active programs. 32 | 33 | ## Preferred Languages 34 | 35 | We prefer all communications to be in English. 36 | 37 | ## Policy 38 | 39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://aka.ms/security.md/cvd). 40 | 41 | 42 | -------------------------------------------------------------------------------- /segmentation/plot_logs.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright (c) Microsoft Corporation. All rights reserved. 3 | Licensed under the MIT License. 4 | ''' 5 | import pandas as pd 6 | import matplotlib.pyplot as plt 7 | import numpy as np 8 | import os 9 | import glob 10 | import sys 11 | config_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..") 12 | sys.path.append(config_dir) 13 | from config import RESULTS_FOLDER 14 | 15 | # %% 16 | def plot_train_logs(train_fpaths, valid_fpaths, network_names): 17 | train_dfs = [pd.read_csv(path) for path in train_fpaths] 18 | valid_dfs = [pd.read_csv(path) for path in valid_fpaths] 19 | 20 | train_losses = [df['Loss'].values for df in train_dfs] 21 | valid_metrics = [df['Metric'].values for df in valid_dfs] 22 | train_epochs = [np.arange(len(train_loss))+1 for train_loss in train_losses] 23 | valid_epochs = [2*(np.arange(len(valid_metric))+1) for valid_metric in valid_metrics] 24 | min_losses = [np.min(train_loss) for train_loss in train_losses] 25 | min_losses_epoch = [np.argmin(train_loss) + 1 for train_loss in train_losses] 26 | max_dscs = [np.max(valid_metric) for valid_metric in valid_metrics] 27 | max_dscs_epoch = [2*(np.argmax(valid_metric)+1) for valid_metric in valid_metrics] 28 | fig, ax = plt.subplots(1,2, figsize=(20,10)) 29 | fig.patch.set_facecolor('white') 30 | fig.patch.set_alpha(1) 31 | 32 | for i in range(len(train_losses)): 33 | ax[0].plot(train_epochs[i], train_losses[i]) 34 | ax[1].plot(valid_epochs[i], valid_metrics[i]) 35 | ax[0].plot(min_losses_epoch[i], min_losses[i], '-o', color='red') 36 | ax[1].plot(max_dscs_epoch[i], max_dscs[i], '-o', color='red') 37 | 38 | ax[0].text(np.min(train_epochs[i]), np.min(train_losses[i]), f'Total epochs: {len(train_epochs[i])}', fontsize=15) 39 | 40 | legend_labels_trainloss = [f"{network_names[i]}; Min loss: {round(min_losses[i], 4)} ({len(train_epochs[i])})" for i in range(len(network_names))] 41 | legend_labels_validdice = [f"{network_names[i]}; Max DSC: {round(max_dscs[i], 4)} ({len(valid_epochs[i])})" for i in range(len(network_names))] 42 | 43 | ax[0].legend(legend_labels_trainloss, fontsize=16) 44 | ax[1].legend(legend_labels_validdice, fontsize=16) 45 | ax[0].set_title('Train loss', fontsize=25) 46 | ax[1].set_title('Valid DSC', fontsize=25) 47 | ax[0].set_ylabel('Dice loss', fontsize=20) 48 | ax[1].set_ylabel('Dice score', fontsize=20) 49 | ax[0].grid(True) 50 | ax[1].grid(True) 51 | plt.show() 52 | 53 | #%% 54 | fold = 0 55 | network = ['unet'] 56 | inputsize = [192, 192, 160, 128] 57 | p = 2 58 | inputsize_dict = { 59 | 'unet': 192, 60 | 'attentionunet': 192, 61 | 'segresnet': 192, 62 | 'dynunet': 160, 63 | 'unetr': 160, 64 | 'swinunetr': 128 65 | } 66 | 67 | experiment_code = [f"{network[i]}_fold{fold}_randcrop{inputsize[i]}" for i in range(len(network))] 68 | save_logs_dir = os.path.join(RESULTS_FOLDER, 'logs') 69 | save_logs_folders = [os.path.join(save_logs_dir, 'fold'+str(fold), network[i], experiment_code[i]) for i in range(len(experiment_code))] 70 | train_fpaths = [os.path.join(save_logs_folders[i], 'trainlog_gpu0.csv') for i in range(len(save_logs_folders))] 71 | valid_fpaths = [os.path.join(save_logs_folders[i], 'validlog_gpu0.csv') for i in range(len(save_logs_folders))] 72 | legend_lbls = [f'{network[i]}, N = {inputsize[i]}' for i in range(len(network))] 73 | plot_train_logs(train_fpaths, valid_fpaths, legend_lbls) 74 | 75 | -------------------------------------------------------------------------------- /documentation/inference.md: -------------------------------------------------------------------------------- 1 | # How to run inference on test images using your trained model? 2 | 3 | Once your have trained some models using the training script described in [trainddp.md](./trainddp.md), you have model(s) that could be used for predicting the segmentation masks for test images. Running inference primarily uses three files from this codebase: [config.py](./../config.py), [segmentation/initialize_train.py](./../segmentation/initialize_train.py), and [segmentation/inference.py](./../segmentation/inference.py). Ensure that the [config.py](./../config.py) is correctly initialized (as described in [trainddp.md](./trainddp.md)) so that the inference code can find the path to the test images. 4 | 5 | ## Step 1: Activate the required conda environment (`lymphoma_seg`) and navigate to `segmentation` folder 6 | First, activate the conda environment `lymphoma_seg` using (created as described in [conda_env.md](./conda_env.md)): 7 | 8 | ``` 9 | conda activate lymphoma_seg 10 | cd segmentation 11 | ``` 12 | 13 | ## Step 2: Run the inference script 14 | After this, run the following script in your terminal. Note: we run the inference only on one GPU (denoted by `cuda:0` in your machine). 15 | ``` 16 | python inference.py --fold=0 --network-name='unet' --input-patch-size=192 --num_workers=2 --sw-bs=2 17 | ``` 18 | 19 | - `inference.py` is the inference code that this script is using. 20 | 21 | - `--fold` defines which fold's trained model you want to use for inference. When training script is run for the first time, two files, namely, `train_filepaths.csv` and `test_filepaths.csv` gets created within the folder `WORKING_FOLDER/data_split`, where the former contains the filepaths (CT, PT, mask) for training images (from `imagesTr` and `labelsTr` folders as described in `dataset_format.md`), and the latter contains the filepaths for test images (from `imagesTs` and `labelsTs`), respectively. The purpose of setting `fold` in this case is not to point to the specific fold dataset (since we are only using the test set for inference), but to define which fold's trained model to use. Defaults to 0. 22 | 23 | - `--network-name` defines the name of the network. In this work, we have trained UNet, SegResNet, DynUNet and SwinUNETR (adpated from MONAI [LINK]). Hence, the `--network-name` should be set to one of `unet`, `segresnet`, `dynunet`, or `swinunetr`. Defaults to `unet`. 24 | 25 | - `--input-patch-size` defines the size of the cubic input patch that is cropped from the input images during training. We used `input-patch-size` of 224 for UNet, 192 for SegResNet, 160 for DynUNet and 128 for SwinUNETR. Defaults to 192. 26 | 27 | - `--num-workers` defines the `num_workers` argument inside training and validation DataLoaders. Defaults to 2. 28 | 29 | - `--sw-bs` defines the batch size for performing the sliding-window inference via `monai.inferers.sliding_window_inference` on the test inputs. Defaults to 2. 30 | 31 | 32 | Alternatively, modify the [segmentation/predict.sh](./../segmentation/predict.sh) script for your use-case (which contains the same bash script as above) and run: 33 | 34 | ``` 35 | bash predict.sh 36 | ``` 37 | 38 | The predicted masks will be written to `LYMPHOMA_SEGMENTATION_FOLDER/results/predictions/fold{fold}/{network_name}/{experiment_code}`, as described in [results_format.md](./results_format.md) file. The predicted masks are assigned the same filenames as the corresponding original ground truth segmentation masks. The relevant directory structure may then look like: 39 | 40 | └───lymphoma.segmentation/ 41 | ├── data 42 | └── results 43 | ├── logs 44 | ├── models 45 | └── predictions 46 | └── fold0 47 | └── unet 48 | └── unet_fold0_randcrop192 49 | ├── Patient0003_20190402.nii.gz 50 | ├── Patient0004_20160204.nii.gz 51 | ├── ... 52 | 53 | -------------------------------------------------------------------------------- /documentation/dataset_format.md: -------------------------------------------------------------------------------- 1 | # Dataset format 2 | In this work, the dataset consist of three components: CT and PET images and the corresponding lesion segmentation mask, all in NIFTI file format. If your dataset is in DICOM format, you can convert them to NIFTI using the method described in [dicom_to_nifti_conversion.md](./dicom_to_nifti_conversion.md). After converting DICOM images to NIFTI format, you may have to resample you CT (and/or GT) images to PET geometry (if your CT or GT images are not in PET geometry). If this is the case, use the functions `resample_ct_to_pt_geometry()` and `resample_gt_to_pt_geometry()` in [data_conversion/resample_ct2pt.py](./../data_conversion/resample_ct2pt.py). 3 | 4 | ## Training cases filenaming convention 5 | We follow a similar filenaming convention as used by [nnUNet](https://github.com/MIC-DKFZ/nnUNet/blob/master/documentation/dataset_format.md). Each training case is associated with a unique identifier, which is a unique name for that case. This identifier is used by our code to connect images (PET/CT) with the correct segmentation mask. We suggest using the unique identifier as `{PatientID}_{StudyDate}`. 6 | 7 | A training case consists of images and their corresponding segmentation masks. 8 | 9 | **Images**: Our networks utilize two channel 3D images, the first channel being the CT images and the second channel being the PET image. Both CT and PET **MUST** have the same geometry (same size, spacing, origin, direction) and must be (approximately) coregistered (if applicable). To resample CT images to PET resolution, use the function `resample_ct_to_pt_geometry()` in [data_conversion/resample_ct2pt.py](./../data_conversion/resample_ct2pt.py). Within a training case, all image geometries (input channels, corresponding segmentation) must match. Between training cases, they can of course differ. 10 | 11 | **Segmentations** must share the same geometry as their corresponding images (same size, spacing, origin, direction). Segmentations are 12 | integer maps with each value representing a semantic class; the background is represented by 0. In our work, we used segmentation masks with two classes: 0 for background and 1 for lesions. All masks in the training set **MUST** have 0s and 1s; the current version of code cannot handle negative images (images with no lesions) without changing some of the preprocessing transforms (like `RandCropByPosNegLabeld`, etc.) applied to the images before giving them as inputs to the network. 13 | 14 | Given a unique identifier for a case, {PatientID}_{StudyDate}, the CT, PET and GT image filenames should be: 15 | CT image: `{PatientID}_{StudyDate}_0000.nii.gz`, 16 | PET image: `{PatientID}_{StudyDate}_0001.nii.gz`, 17 | GT image: `{PatientID}_{StudyDate}.nii.gz`, 18 | 19 | **Important:** The input channels must be consistent! Concretely, **all images need the same input channels in the same 20 | order and all input channels have to be present every time**. This is also true for inference! 21 | 22 | 23 | ## Dataset folder structure 24 | Create a folder named `lymphoma.segmentation` in the location of your choice. The is the master folder that stored all your datasets, the trained models and training/validation logs, predictions or any other results based on predictions. Go to the file [config.py](./../config.py) and update the variable `LYMPHOMA_SEGMENTATION_FOLDER` as the absolute path to the folder `lymphoma.segmentation`. Within `lymphoma.segmentation`, create a folder named `data`, which should be the location of your training and test datasets. After these steps, your directory structure is expected to look like this: 25 | 26 | └───lymphoma.segmentation/data 27 | ├── imagesTr 28 | ├── imagesTs # optional 29 | ├── labelsTr 30 | └── labelsTs # optional 31 | 32 | - `imagesTr` contains the images (CT and PET) belonging to the training cases. Each corresponding CT and PET images should be in the same geometry (same size, spacing, origin, direction) in this folder. 33 | - `imagesTs` (optional) contains the images that belong to the test cases. Each corresponding CT and PET images should be in the same geometry (same size, spacing, origin, direction) in this folder. 34 | - `labelsTr` contains the images with the ground truth segmentation maps for the training cases. These should be in the same geometry (same size, spacing, origin, direction) as their corresponding PET/CT images in `imagesTr`. 35 | - `labelsTs` (optional) contains the images with the ground truth segmentation maps for the test cases. These should be in the same geometry (same size, spacing, origin, direction) as their corresponding PET/CT images in `imagesTs`. 36 | 37 | 38 | After moving all the training and test images and masks in the respective folders, the directory structure should look like this: 39 | 40 | └───lymphoma.segmentation/data/ 41 | ├── imagesTr 42 | │ ├── Patient0001_20110502_0000.nii.gz 43 | │ ├── Patient0001_20110502_0001.nii.gz 44 | │ ├── Patient0002_20150514_0000.nii.gz 45 | │ ├── Patient0002_20150514_0001.nii.gz 46 | │ ├── ... 47 | ├── imagesTs # optional 48 | │ ├── Patient0003_20190402_0000.nii.gz 49 | │ ├── Patient0003_20190402_0001.nii.gz 50 | │ ├── Patient0004_20150514_0000.nii.gz 51 | │ ├── Patient0004_20150514_0001.nii.gz 52 | │ ├── ... 53 | ├── labelsTr 54 | │ ├── Patient0001_20110502.nii.gz 55 | │ ├── Patient0002_20110502.nii.gz 56 | │ ├── ... 57 | └── labelsTs # optional 58 | ├── Patient0003_20190402.nii.gz 59 | ├── Patient0004_20160204.nii.gz 60 | ├── ... 61 | -------------------------------------------------------------------------------- /segmentation/inference.py: -------------------------------------------------------------------------------- 1 | #%% 2 | ''' 3 | Copyright (c) Microsoft Corporation. All rights reserved. 4 | Licensed under the MIT License. 5 | ''' 6 | import numpy as np 7 | import glob 8 | import os 9 | import pandas as pd 10 | import SimpleITK as sitk 11 | import sys 12 | import argparse 13 | from monai.inferers import sliding_window_inference 14 | from monai.data import DataLoader, Dataset, decollate_batch 15 | import torch 16 | import os 17 | import glob 18 | import pandas as pd 19 | import numpy as np 20 | import torch.nn as nn 21 | import time 22 | from initialize_train import ( 23 | get_validation_sliding_window_size, 24 | get_model, 25 | get_test_data_in_dict_format, 26 | get_valid_transforms, 27 | get_post_transforms 28 | ) 29 | config_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..") 30 | sys.path.append(config_dir) 31 | from config import RESULTS_FOLDER 32 | #%% 33 | def convert_to_4digits(str_num): 34 | if len(str_num) == 1: 35 | new_num = '000' + str_num 36 | elif len(str_num) == 2: 37 | new_num = '00' + str_num 38 | elif len(str_num) == 3: 39 | new_num = '0' + str_num 40 | else: 41 | new_num = str_num 42 | return new_num 43 | 44 | def create_dictionary_ctptgt(ctpaths, ptpaths, gtpaths): 45 | data = [] 46 | for i in range(len(gtpaths)): 47 | ctpath = ctpaths[i] 48 | ptpath = ptpaths[i] 49 | gtpath = gtpaths[i] 50 | data.append({'CT':ctpath, 'PT':ptpath, 'GT':gtpath}) 51 | return data 52 | 53 | def read_image_array(path): 54 | img = sitk.ReadImage(path) 55 | array = np.transpose(sitk.GetArrayFromImage(img), (2,1,0)) 56 | return array 57 | 58 | #%% 59 | def main(args): 60 | # initialize inference 61 | fold = args.fold 62 | network = args.network_name 63 | inputsize = args.input_patch_size 64 | experiment_code = f"{network}_fold{fold}_randcrop{inputsize}" 65 | sw_roi_size = get_validation_sliding_window_size(inputsize) # get sliding_window inference size for given input patch size 66 | 67 | # find the best model for this experiment from the training/validation logs 68 | # best model is the model with the best validation `Metric` (DSC) 69 | save_logs_dir = os.path.join(RESULTS_FOLDER, 'logs') 70 | validlog_fname = os.path.join(save_logs_dir, 'fold'+str(fold), network, experiment_code, 'validlog_gpu0.csv') 71 | validlog = pd.read_csv(validlog_fname) 72 | best_epoch = 2*(np.argmax(validlog['Metric']) + 1) 73 | best_metric = np.max(validlog['Metric']) 74 | print(f"Using the {network} model at epoch={best_epoch} with mean valid DSC = {round(best_metric, 4)}") 75 | 76 | # get the best model and push it to device=cuda:0 77 | save_models_dir = os.path.join(RESULTS_FOLDER,'models') 78 | save_models_dir = os.path.join(save_models_dir, 'fold'+str(fold), network, experiment_code) 79 | best_model_fname = 'model_ep=' + convert_to_4digits(str(best_epoch)) +'.pth' 80 | model_path = os.path.join(save_models_dir, best_model_fname) 81 | device = torch.device(f"cuda:0") 82 | model = get_model(network, input_patch_size=inputsize) 83 | model.load_state_dict(torch.load(model_path, map_location=device)) 84 | model.to(device) 85 | 86 | # initialize the location to save predicted masks 87 | save_preds_dir = os.path.join(RESULTS_FOLDER, f'predictions') 88 | save_preds_dir = os.path.join(save_preds_dir, 'fold'+str(fold), network, experiment_code) 89 | os.makedirs(save_preds_dir, exist_ok=True) 90 | 91 | # get test data (in dictionary format for MONAI dataloader), test_transforms and post_transforms 92 | test_data = get_test_data_in_dict_format() 93 | test_transforms = get_valid_transforms() 94 | post_transforms = get_post_transforms(test_transforms, save_preds_dir) 95 | 96 | # initalize PyTorch dataset and Dataloader 97 | dataset_test = Dataset(data=test_data, transform=test_transforms) 98 | dataloader_test = DataLoader(dataset_test, batch_size=1, shuffle=False, num_workers=args.num_workers) 99 | 100 | model.eval() 101 | with torch.no_grad(): 102 | for data in dataloader_test: 103 | inputs = data['CTPT'].to(device) 104 | sw_batch_size = args.sw_bs 105 | print(sw_batch_size) 106 | data['Pred'] = sliding_window_inference(inputs, sw_roi_size, sw_batch_size, model) 107 | data = [post_transforms(i) for i in decollate_batch(data)] 108 | 109 | 110 | if __name__ == "__main__": 111 | parser = argparse.ArgumentParser(description='Lymphoma PET/CT lesion segmentation using MONAI-PyTorch') 112 | parser.add_argument('--fold', type=int, default=0, metavar='fold', 113 | help='validation fold (default: 0), remaining folds will be used for training') 114 | parser.add_argument('--network-name', type=str, default='unet', metavar='netname', 115 | help='network name for training (default: unet)') 116 | parser.add_argument('--input-patch-size', type=int, default=192, metavar='inputsize', 117 | help='size of cropped input patch for training (default: 192)') 118 | parser.add_argument('--num_workers', type=int, default=2, metavar='nw', 119 | help='num_workers for train and validation dataloaders (default: 2)') 120 | parser.add_argument('--sw-bs', type=int, default=2, metavar='sw-bs', 121 | help='batchsize for sliding window inference (default=2)') 122 | args = parser.parse_args() 123 | 124 | main(args) 125 | 126 | -------------------------------------------------------------------------------- /documentation/trainddp.md: -------------------------------------------------------------------------------- 1 | # How to train a model using this codebase? 2 | 3 | The models in this work are trained on a single-node with `torch.cuda.device_count()` GPUs. In our work, we had `torch.cuda.device_count() == 4` on a single Microsoft Azure VM (node). Each GPU consisted of 16 GiB of RAM. The machine consisted of 24 vCPUs and 448 GiB of RAM. 4 | 5 | Running a training experiment primarily uses only three files from this codebase: [config.py](./../config.py), [segmentation/trainddp.py](./../segmentation/trainddp.py) and [segmentation/initialize_train.py](./../segmentation/initialize_train.py). The first step is to initialize the correct values for the variable `LYMPHOMA_SEGMENTATION_FOLDER` in the [config.py](./../config.py). Put all the training (and test, if applicable) data inside the `LYMPHOMA_SEGMENTATION_FOLDER/data` folder, as described in [dataset_format.md](./dataset_format.md). 6 | 7 | ``` 8 | import os 9 | 10 | LYMPHOMA_SEGMENTATION_FOLDER = '/path/to/lymphoma.segmentation/folder/for/data/and/results' # path to the directory containing `data` and `results` (this will be created by the pipeline) folders. 11 | 12 | DATA_FOLDER = os.path.join(LYMPHOMA_SEGMENTATION_FOLDER, 'data') 13 | RESULTS_FOLDER = os.path.join(LYMPHOMA_SEGMENTATION_FOLDER, 'results') 14 | os.makedirs(RESULTS_FOLDER, exist_ok=True) 15 | WORKING_FOLDER = os.path.dirname(os.path.abspath(__file__)) 16 | ``` 17 | 18 | If all the dataset is correctly configured based on the explanations in [dataset_format.md](./dataset_format.md) and the [config.py](./../config.py) is correctly initialized as well, you are all set to initiate the training script. 19 | 20 | ## Step 1: Activate the required conda environment (`lymphoma_seg`) and navigate to `segmentation` folder 21 | First, activate the conda environment `lymphoma_seg` using (created as described in [conda_env.md](./conda_env.md)): 22 | 23 | ``` 24 | conda activate lymphoma_seg 25 | cd segmentation 26 | ``` 27 | 28 | ## Step 2: Run the training script 29 | After this, run the following script in your terminal: 30 | 31 | ``` 32 | torchrun --standalone --nproc_per_node=1 trainddp.py --fold=0 --network-name='unet' --epochs=500 --input-patch-size=192 --train-bs=1 --num_workers=2 --cache-rate=0.5 --lr=2e-4 --wd=1e-5 --val-interval=2 --sw-bs=2 33 | ``` 34 | 35 | Here, we are using PyTorch's `torchrun` to start a multi-GPU training. The `standalone` represents that we are using just one node. 36 | 37 | - `--nproc_per_node` defines the number of processes per node; in this case it represents the number of GPUs you want to use to train your model. We used `--nproc_per_node=4`, but feel free to set this variable to the number of GPUs available in your machine. 38 | 39 | - `trainddp.py` is the file containing the code for training that uses `torch.nn.parallel.DistributedDataParallel`. 40 | 41 | - `--fold` defines the fold for which you want to run training. When the above script is run for the first time, two files, namely, `train_filepaths.csv` and `test_filepaths.csv` gets created within the folder `WORKING_FOLDER/data_split`, where the former contains the filepaths (CT, PT, mask) for training images (from `imagesTr` and `labelsTr` folders as described in `dataset_format.md`), and the latter contains the filepaths for test images (from `imagesTs` and `labelsTs`), respectively. The `train_filepaths.csv` contains a column named `FoldID` with values in `{0, 1, 2, 3, 4}` defining which fold the data in that row belongs to. When `--fold=0` (for example), the code uses all the data with `FoldID == 0` for validation and the data with `FoldID != 0` for training. Defaults to 0. 42 | 43 | - `--network-name` defines the name of the network. In this work, we have trained UNet, SegResNet, DynUNet and SwinUNETR (adpated from MONAI [LINK]). Hence, the `--network-name` should be set to one of `unet`, `segresnet`, `dynunet`, or `swinunetr`. Defaults to `unet`. 44 | 45 | - `--epochs` is the total number of epochs for running the training. Defaults to 500. 46 | 47 | - `--input-patch-size` defines the size of the cubic input patch that is cropped from the input images during training. The code uses `monai.transforms.RRandCropByPosNegLabeld` (used inside `segmentation\initialize_train.py`) for creating these cropped patches. We used `input-patch-size` of 224 for UNet, 192 for SegResNet, 160 for DynUNet and 128 for SwinUNETR. Defaults to 192. 48 | 49 | - `--train-bs` is the training batch size. We used `--train-bs = 1` for all our experiments in this work, since for the given `input-patch-size` for the networks above, we couldn't accommodate larger batch sizes for SegResNet, DynUNet, and SwinUNETR. Defaults to 1. 50 | 51 | - `--num-workers` defines the `num_workers` argument inside training and validation DataLoaders. Defaults to 2. 52 | 53 | - `--cache-rate` defines the precentage of cached data argument to be used inside the `monai.data.CacheDataset`. This type of dataset (unlike `torch.utils.data.Dataset`) can load and cache deterministic transforms result during training. A cache-rate of 1 caches all the data into the memory, while a cache-rate of 0 doesn't cache anything into the memory. A higher cache rate leads to faster training (but more memory consumption). Defaults to 0.1. 54 | 55 | - `--lr` defines the initial learning rate. Cosine annealing scheduler is used to update the learning rate from the initial value to 0 in `epochs` epochs. Defaults to 2e-4. 56 | 57 | - `--wd` defines the weight-decay for the AdamW optimizer used in this work. Defaults to 1e-5. 58 | 59 | - `--val_interval` defines the interval for performing validation and saving the model being trained. Defaults to 2. 60 | 61 | - `--sw-bs` defines the batch size for performing the sliding-window inference via `monai.inferers.sliding_window_inference` on the validation inputs. Defaults to 2. 62 | 63 | 64 | 65 | Alternatively, modify the [segmentation/train.sh](./../segmentation/train.sh) script for your use-case (which contains the same bash script as above) and run: 66 | 67 | ``` 68 | bash train.sh 69 | ``` -------------------------------------------------------------------------------- /segmentation/generate_lesion_measures.py: -------------------------------------------------------------------------------- 1 | #%% 2 | ''' 3 | Copyright (c) Microsoft Corporation. All rights reserved. 4 | Licensed under the MIT License. 5 | ''' 6 | import pandas as pd 7 | import numpy as np 8 | import SimpleITK as sitk 9 | import os 10 | from glob import glob 11 | import sys 12 | import argparse 13 | config_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..") 14 | sys.path.append(config_dir) 15 | from config import RESULTS_FOLDER 16 | from metrics.metrics import * 17 | 18 | def get_spacing_from_niftipath(path): 19 | spacing = sitk.ReadImage(path).GetSpacing() 20 | return spacing 21 | 22 | 23 | def main(args): 24 | fold = args.fold 25 | network = args.network_name 26 | inputsize = args.input_patch_size 27 | experiment_code = f"{network}_fold{fold}_randcrop{inputsize}" 28 | preddir = os.path.join(RESULTS_FOLDER, 'predictions', f'fold{fold}', network, experiment_code) 29 | predpaths = sorted(glob(os.path.join(preddir, '*.nii.gz'))) 30 | gtpaths = sorted(list(pd.read_csv('./../data_split/test_filepaths.csv')['GTPATH'])) 31 | ptpaths = sorted(list(pd.read_csv('./../data_split/test_filepaths.csv')['PTPATH'])) # PET image paths (ptpaths) for calculating the detection metrics using criterion3 32 | 33 | imageids = [os.path.basename(path)[:-7] for path in gtpaths] 34 | DSC = [] 35 | SUVmean_orig, SUVmean_pred = [], [] 36 | SUVmax_orig, SUVmax_pred = [], [] 37 | LesionCount_orig, LesionCount_pred = [], [] 38 | TMTV_orig, TMTV_pred = [], [] 39 | TLG_orig, TLG_pred = [], [] 40 | Dmax_orig, Dmax_pred = [], [] 41 | 42 | for i in range(len(gtpaths)): 43 | ptpath = ptpaths[i] 44 | gtpath = gtpaths[i] 45 | predpath = predpaths[i] 46 | 47 | ptarray = get_3darray_from_niftipath(ptpath) 48 | gtarray = get_3darray_from_niftipath(gtpath) 49 | predarray = get_3darray_from_niftipath(predpath) 50 | spacing = get_spacing_from_niftipath(gtpath) 51 | 52 | # Dice score between mask gt and pred 53 | dsc = calculate_patient_level_dice_score(gtarray, predarray) 54 | # Lesion SUVmean 55 | suvmean_orig = calculate_patient_level_lesion_suvmean_suvmax(ptarray, gtarray, marker='SUVmean') 56 | suvmean_pred = calculate_patient_level_lesion_suvmean_suvmax(ptarray, predarray, marker='SUVmean') 57 | # Lesion SUVmax 58 | suvmax_orig = calculate_patient_level_lesion_suvmean_suvmax(ptarray, gtarray, marker='SUVmax') 59 | suvmax_pred = calculate_patient_level_lesion_suvmean_suvmax(ptarray, predarray, marker='SUVmax') 60 | # Lesion Count 61 | lesioncount_orig = calculate_patient_level_lesion_count(gtarray) 62 | lesioncount_pred = calculate_patient_level_lesion_count(predarray) 63 | # TMTV 64 | tmtv_orig = calculate_patient_level_tmtv(gtarray, spacing) 65 | tmtv_pred = calculate_patient_level_tmtv(predarray, spacing) 66 | # TLG 67 | tlg_orig = calculate_patient_level_tlg(ptarray, gtarray, spacing) 68 | tlg_pred = calculate_patient_level_tlg(ptarray, predarray, spacing) 69 | # Dmax 70 | dmax_orig = calculate_patient_level_dissemination(gtarray, spacing) 71 | dmax_pred = calculate_patient_level_dissemination(predarray, spacing) 72 | 73 | DSC.append(dsc) 74 | SUVmean_orig.append(suvmean_orig) 75 | SUVmean_pred.append(suvmean_pred) 76 | SUVmax_orig.append(suvmax_orig) 77 | SUVmax_pred.append(suvmax_pred) 78 | LesionCount_orig.append(lesioncount_orig) 79 | LesionCount_pred.append(lesioncount_pred) 80 | TMTV_orig.append(tmtv_orig) 81 | TMTV_pred.append(tmtv_pred) 82 | TLG_orig.append(tlg_orig) 83 | TLG_pred.append(tlg_pred) 84 | Dmax_orig.append(dmax_orig) 85 | Dmax_pred.append(dmax_pred) 86 | 87 | 88 | print(f"{i}: {imageids[i]}") 89 | print(f"Dice Score: {round(dsc,4)}") 90 | print(f"SUVmean: GT: {suvmean_orig}, Pred: {suvmean_pred}") 91 | print(f"SUVmax: GT: {suvmax_orig}, Pred: {suvmax_pred}") 92 | print(f"LesionCount: GT: {lesioncount_orig}, Pred: {lesioncount_pred}") 93 | print(f"TMTV: GT: {tmtv_orig} ml, Pred: {tmtv_pred} ml") 94 | print(f"TLG: GT: {tlg_orig} ml, Pred: {tlg_pred} ml") 95 | print(f"Dmax: GT: {dmax_orig} cm, Pred: {dmax_pred} cm") 96 | print("\n") 97 | 98 | save_lesionmeasures_dir = os.path.join(RESULTS_FOLDER, f'test_lesion_measures', 'fold'+str(fold), network, experiment_code) 99 | os.makedirs(save_lesionmeasures_dir, exist_ok=True) 100 | filepath = os.path.join(save_lesionmeasures_dir, f'testlesionmeasures.csv') 101 | 102 | data = np.column_stack( 103 | [ 104 | imageids, 105 | DSC, 106 | SUVmean_orig, 107 | SUVmean_pred, 108 | SUVmax_orig, 109 | SUVmax_pred, 110 | LesionCount_orig, 111 | LesionCount_pred, 112 | TMTV_orig, 113 | TMTV_pred, 114 | TLG_orig, 115 | TLG_pred, 116 | Dmax_orig, 117 | Dmax_pred 118 | ] 119 | ) 120 | 121 | data_df = pd.DataFrame( 122 | data=data, 123 | columns=[ 124 | 'PatientID', 125 | 'DSC', 126 | 'SUVmean_orig', 127 | 'SUVmean_pred', 128 | 'SUVmax_orig', 129 | 'SUVmax_pred', 130 | 'LesionCount_orig', 131 | 'LesionCount_pred', 132 | 'TMTV_orig', 133 | 'TMTV_pred', 134 | 'TLG_orig', 135 | 'TLG_pred', 136 | 'Dmax_orig', 137 | 'Dmax_pred' 138 | ] 139 | ) 140 | data_df.to_csv(filepath, index=False) 141 | 142 | 143 | if __name__ == "__main__": 144 | parser = argparse.ArgumentParser(description='Lymphoma PET/CT lesion segmentation using MONAI-PyTorch') 145 | parser.add_argument('--fold', type=int, default=0, metavar='fold', 146 | help='validation fold (default: 0), remaining folds will be used for training') 147 | parser.add_argument('--network-name', type=str, default='unet', metavar='netname', 148 | help='network name for training (default: unet)') 149 | parser.add_argument('--input-patch-size', type=int, default=192, metavar='inputsize', 150 | help='size of cropped input patch for training (default: 192)') 151 | args = parser.parse_args() 152 | main(args) 153 | -------------------------------------------------------------------------------- /data_conversion/dicom_to_nifti.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright (c) Microsoft Corporation. All rights reserved. 3 | Licensed under the MIT License. 4 | ''' 5 | 6 | ''' 7 | This code does the following: 8 | (1) converts PET DICOM images in units Bq/ml to decay-corrected SUV and saved as 3D NIFTI files 9 | (2) converts CT DICOM images to NIFTI 10 | (3) converts DICOM RTSTRUCT images to NIFTI (using rt-utils) 11 | ''' 12 | #%% 13 | import SimpleITK as sitk 14 | from pydicom import dcmread, FileDataset 15 | from rt_utils import RTStructBuilder, RTStruct 16 | import numpy as np 17 | import dateutil 18 | import pandas as pd 19 | import os 20 | import time 21 | 22 | #%% 23 | ''' 24 | Script to convert PET and CT dicom series to niftii files. Works under 25 | the assumption that the rescale slope and intercept in the PET dicom 26 | series map image intensities to Bq/mL. Saved PET files will have image 27 | intensities of SUVbw, and saved CT files will have HU units. 28 | 29 | ''' 30 | def bqml_to_suv(dcm_file: FileDataset) -> float: 31 | 32 | # Calculates the SUV conversion factor from Bq/mL to SUVbw using 33 | # the dicom header information in one of the images from a dicom series 34 | 35 | nuclide_dose = dcm_file[0x054, 0x0016][0][0x0018, 0x1074].value # Total injected dose (Bq) 36 | weight = dcm_file[0x0010, 0x1030].value # Patient weight (Kg) 37 | half_life = float(dcm_file[0x054, 0x0016][0][0x0018, 0x1075].value) # Radionuclide half life (s) 38 | 39 | parse = lambda x: dateutil.parser.parse(x) 40 | 41 | series_time = str(dcm_file[0x0008, 0x00031].value) # Series start time (hh:mm:ss) 42 | series_date = str(dcm_file[0x0008, 0x00021].value) # Series start date (yyy:mm:dd) 43 | series_datetime_str = series_date + ' ' + series_time 44 | series_dt = parse(series_datetime_str) 45 | 46 | nuclide_time = str(dcm_file[0x054, 0x0016][0][0x0018, 0x1072].value) # Radionuclide time of injection (hh:mm:ss) 47 | nuclide_datetime_str = series_date + ' ' + nuclide_time 48 | nuclide_dt = parse(nuclide_datetime_str) 49 | 50 | delta_time = (series_dt - nuclide_dt).total_seconds() 51 | decay_correction = 2 ** (-1 * delta_time/half_life) 52 | suv_factor = (weight * 1000) / (decay_correction * nuclide_dose) 53 | 54 | return(suv_factor) 55 | 56 | def get_filtered_roi_list(rois): 57 | filtered_rois = [] 58 | for roi in rois: 59 | if roi.endswith('PETEdge'): 60 | filtered_rois.append(roi) 61 | else: 62 | pass 63 | return filtered_rois 64 | 65 | 66 | def load_merge_masks(rtstruct: RTStruct) -> np.ndarray: 67 | ''' 68 | Load and merge masks from a dicom RTStruct. All of the 69 | masks in the RTStruct will be merged. Add an extra line 70 | of code if you want to filter for/out certain masks. 71 | ''' 72 | rois = rtstruct.get_roi_names() 73 | rois = get_filtered_roi_list(rois) 74 | masks = [] 75 | for roi in rois: 76 | print(roi) 77 | mask_3d = rtstruct.get_roi_mask_by_name(roi).astype(int) 78 | masks.append(mask_3d) 79 | 80 | final_mask = sum(masks) # sums element-wise 81 | final_mask = np.where(final_mask>=1, 1, 0) 82 | # Reorient the mask to line up with the reference image 83 | final_mask = np.moveaxis(final_mask, [0, 1, 2], [1, 2, 0]) 84 | 85 | return final_mask 86 | 87 | ############################################################################################ 88 | ######## Update the three variables below with the locations of your choice ############## 89 | ############################################################################################ 90 | save_dir_ct = '' # path to directory where your new CT files in NIFTI format will be written 91 | save_dir_pt = '' # path to directory where your new PET files in NIFTI format will be written 92 | save_dir_gt = '' # path to directory where your new GT files in NIFTI format will be written 93 | ############################################################################################ 94 | ############################################################################################ 95 | ############################################################################################ 96 | 97 | cases = pd.read_csv('dicom_ctpt_to_nifti_conversion_file.csv') 98 | cases = list(cases.itertuples(index=False, name=None)) 99 | structs = pd.read_csv('dicom_rtstruct_to_nifti_conversion_file.csv') 100 | structs = list(structs.itertuples(index=False, name=None)) 101 | # Execution 102 | start = time.time() 103 | 104 | for case in cases: 105 | patient_id, ct_folder, pet_folder, convert = case 106 | if convert=='N': 107 | continue 108 | print(f'Converting patient Id: {patient_id}') 109 | 110 | # Convert CT series 111 | ct_reader = sitk.ImageSeriesReader() 112 | ct_series_names = ct_reader.GetGDCMSeriesFileNames(ct_folder) 113 | ct_reader.SetFileNames(ct_series_names) 114 | ct = ct_reader.Execute() 115 | sitk.WriteImage(ct, os.path.join(save_dir_ct, f"{patient_id}_0000.nii.gz"), imageIO='NiftiImageIO') 116 | print('Saved nifti CT') 117 | 118 | # Convert PET series 119 | pet_reader = sitk.ImageSeriesReader() 120 | pet_series_names = pet_reader.GetGDCMSeriesFileNames(pet_folder) 121 | pet_reader.SetFileNames(pet_series_names) 122 | pet = pet_reader.Execute() 123 | 124 | pet_img = dcmread(pet_series_names[0]) # read one of the images for header info 125 | suv_factor = bqml_to_suv(pet_img) 126 | pet = sitk.Multiply(pet, suv_factor) 127 | sitk.WriteImage(pet, os.path.join(save_dir_pt, f"{patient_id}_0001.nii.gz"), imageIO='NiftiImageIO') 128 | print('Saved nifti PET') 129 | 130 | # Execution 131 | for struct in structs: 132 | patient_id, struct_folder, ref_folder, convert = struct 133 | if convert=='N': 134 | continue 135 | 136 | # print('Converting RTStruct for patient {}'.format(num)) 137 | # Get all the paths in order 138 | struct_file = os.listdir(struct_folder)[0] 139 | struct_path = os.path.join(struct_folder, struct_file) 140 | 141 | # Create the mask 142 | rtstruct = RTStructBuilder.create_from(dicom_series_path= ref_folder, rt_struct_path=struct_path) 143 | final_mask = load_merge_masks(rtstruct) 144 | 145 | # Load original DICOM image for reference 146 | reader = sitk.ImageSeriesReader() 147 | dicom_names = reader.GetGDCMSeriesFileNames(ref_folder) 148 | reader.SetFileNames(dicom_names) 149 | ref_img = reader.Execute() 150 | 151 | # Properly reference and convert the mask to an image object 152 | mask_img = sitk.GetImageFromArray(final_mask) 153 | mask_img.CopyInformation(ref_img) 154 | sitk.WriteImage(mask_img, os.path.join(save_dir_gt, f"{patient_id}.nii.gz"), imageIO="NiftiImageIO") 155 | 156 | print('Patient {} mask saved'.format(patient_id)) 157 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: lymphoma_seg 2 | channels: 3 | - pytorch 4 | - conda-forge 5 | - defaults 6 | dependencies: 7 | - _libgcc_mutex=0.1=main 8 | - _openmp_mutex=5.1=1_gnu 9 | - asttokens=2.4.0=pyhd8ed1ab_0 10 | - backcall=0.2.0=pyh9f0ad1d_0 11 | - backports=1.0=pyhd8ed1ab_3 12 | - backports.functools_lru_cache=1.6.5=pyhd8ed1ab_0 13 | - blas=1.0=mkl 14 | - brotlipy=0.7.0=py38h27cfd23_1003 15 | - bzip2=1.0.8=h7b6447c_0 16 | - ca-certificates=2023.7.22=hbcca054_0 17 | - certifi=2023.7.22=pyhd8ed1ab_0 18 | - cffi=1.15.1=py38h74dc2b5_0 19 | - charset-normalizer=2.0.4=pyhd3eb1b0_0 20 | - comm=0.1.4=pyhd8ed1ab_0 21 | - cryptography=41.0.3=py38h130f0dd_0 22 | - cudatoolkit=11.3.1=h2bc3f7f_2 23 | - debugpy=1.6.7=py38h6a678d5_0 24 | - decorator=5.1.1=pyhd8ed1ab_0 25 | - entrypoints=0.4=pyhd8ed1ab_0 26 | - executing=1.2.0=pyhd8ed1ab_0 27 | - ffmpeg=4.3=hf484d3e_0 28 | - freetype=2.12.1=h4a9f257_0 29 | - giflib=5.2.1=h5eee18b_3 30 | - gmp=6.2.1=h295c915_3 31 | - gnutls=3.6.15=he1e5248_0 32 | - idna=3.4=py38h06a4308_0 33 | - intel-openmp=2021.4.0=h06a4308_3561 34 | - ipykernel=6.25.2=pyh2140261_0 35 | - ipython=8.12.0=pyh41d4057_0 36 | - jedi=0.19.1=pyhd8ed1ab_0 37 | - jpeg=9e=h5eee18b_1 38 | - jupyter_client=7.3.4=pyhd8ed1ab_0 39 | - jupyter_core=4.12.0=py38h578d9bd_0 40 | - lame=3.100=h7b6447c_0 41 | - lcms2=2.12=h3be6417_0 42 | - ld_impl_linux-64=2.38=h1181459_1 43 | - lerc=3.0=h295c915_0 44 | - libdeflate=1.17=h5eee18b_1 45 | - libffi=3.3=he6710b0_2 46 | - libgcc-ng=11.2.0=h1234567_1 47 | - libgomp=11.2.0=h1234567_1 48 | - libiconv=1.16=h7f8727e_2 49 | - libidn2=2.3.4=h5eee18b_0 50 | - libpng=1.6.39=h5eee18b_0 51 | - libsodium=1.0.18=h36c2ea0_1 52 | - libstdcxx-ng=11.2.0=h1234567_1 53 | - libtasn1=4.19.0=h5eee18b_0 54 | - libtiff=4.5.1=h6a678d5_0 55 | - libunistring=0.9.10=h27cfd23_0 56 | - libuv=1.44.2=h5eee18b_0 57 | - libwebp=1.3.2=h11a3e52_0 58 | - libwebp-base=1.3.2=h5eee18b_0 59 | - lz4-c=1.9.4=h6a678d5_0 60 | - matplotlib-inline=0.1.6=pyhd8ed1ab_0 61 | - mkl=2021.4.0=h06a4308_640 62 | - mkl-service=2.4.0=py38h7f8727e_0 63 | - mkl_fft=1.3.1=py38hd3c417c_0 64 | - mkl_random=1.2.2=py38h51133e4_0 65 | - ncurses=6.4=h6a678d5_0 66 | - nest-asyncio=1.5.6=pyhd8ed1ab_0 67 | - nettle=3.7.3=hbbd107a_1 68 | - numpy=1.24.3=py38h14f4228_0 69 | - numpy-base=1.24.3=py38h31eccc5_0 70 | - openh264=2.1.1=h4ff587b_0 71 | - openssl=1.1.1w=h7f8727e_0 72 | - packaging=23.2=pyhd8ed1ab_0 73 | - parso=0.8.3=pyhd8ed1ab_0 74 | - pexpect=4.8.0=pyh1a96a4e_2 75 | - pickleshare=0.7.5=py_1003 76 | - pillow=9.4.0=py38h6a678d5_1 77 | - pip=23.2.1=py38h06a4308_0 78 | - prompt-toolkit=3.0.39=pyha770c72_0 79 | - prompt_toolkit=3.0.39=hd8ed1ab_0 80 | - ptyprocess=0.7.0=pyhd3deb0d_0 81 | - pure_eval=0.2.2=pyhd8ed1ab_0 82 | - pycparser=2.21=pyhd3eb1b0_0 83 | - pygments=2.16.1=pyhd8ed1ab_0 84 | - pyopenssl=23.2.0=py38h06a4308_0 85 | - pysocks=1.7.1=py38h06a4308_0 86 | - python=3.8.10=h12debd9_8 87 | - python-dateutil=2.8.2=pyhd8ed1ab_0 88 | - python_abi=3.8=2_cp38 89 | - pytorch=1.11.0=py3.8_cuda11.3_cudnn8.2.0_0 90 | - pytorch-mutex=1.0=cuda 91 | - pyzmq=25.1.0=py38h6a678d5_0 92 | - readline=8.2=h5eee18b_0 93 | - requests=2.31.0=py38h06a4308_0 94 | - setuptools=68.0.0=py38h06a4308_0 95 | - six=1.16.0=pyhd3eb1b0_1 96 | - sqlite=3.41.2=h5eee18b_0 97 | - stack_data=0.6.2=pyhd8ed1ab_0 98 | - tk=8.6.12=h1ccaba5_0 99 | - torchaudio=0.11.0=py38_cu113 100 | - torchvision=0.12.0=py38_cu113 101 | - tornado=6.1=py38h0a891b7_3 102 | - traitlets=5.10.1=pyhd8ed1ab_0 103 | - typing_extensions=4.7.1=py38h06a4308_0 104 | - urllib3=1.26.16=py38h06a4308_0 105 | - wcwidth=0.2.8=pyhd8ed1ab_0 106 | - wheel=0.41.2=py38h06a4308_0 107 | - xz=5.4.2=h5eee18b_0 108 | - zeromq=4.3.4=h9c3ff4c_1 109 | - zlib=1.2.13=h5eee18b_0 110 | - zstd=1.5.5=hc292b87_0 111 | - pip: 112 | - absl-py==2.0.0 113 | - alembic==1.12.0 114 | - astor==0.8.1 115 | - attrs==23.1.0 116 | - beautifulsoup4==4.12.2 117 | - blinker==1.6.2 118 | - cachetools==5.3.1 119 | - clearml==1.13.1 120 | - click==8.1.7 121 | - cloudpickle==2.2.1 122 | - cmaes==0.10.0 123 | - colorama==0.4.6 124 | - coloredlogs==15.0.1 125 | - colorlog==6.7.0 126 | - connected-components-3d==3.12.3 127 | - contextlib2==21.6.0 128 | - contourpy==1.1.1 129 | - cucim==23.8.0 130 | - cycler==0.12.0 131 | - databricks-cli==0.17.8 132 | - dataclasses==0.6 133 | - docker==6.1.3 134 | - einops==0.7.0 135 | - filelock==3.11.0 136 | - fire==0.5.0 137 | - flask==2.3.3 138 | - flatbuffers==23.5.26 139 | - fonttools==4.43.0 140 | - fsspec==2023.9.2 141 | - furl==2.1.3 142 | - gdown==4.7.1 143 | - gitdb==4.0.10 144 | - gitpython==3.1.37 145 | - google-auth==2.23.2 146 | - google-auth-oauthlib==1.0.0 147 | - greenlet==3.0.0 148 | - grpcio==1.59.0 149 | - gunicorn==21.2.0 150 | - h5py==3.9.0 151 | - huggingface-hub==0.17.3 152 | - humanfriendly==10.0 153 | - imagecodecs==2023.3.16 154 | - imageio==2.31.5 155 | - importlib-metadata==6.8.0 156 | - importlib-resources==6.1.0 157 | - itk==5.3.0 158 | - itk-core==5.3.0 159 | - itk-filtering==5.3.0 160 | - itk-io==5.3.0 161 | - itk-numerics==5.3.0 162 | - itk-registration==5.3.0 163 | - itk-segmentation==5.3.0 164 | - itsdangerous==2.1.2 165 | - jinja2==3.1.2 166 | - joblib==1.3.2 167 | - json-tricks==3.17.3 168 | - jsonschema==4.19.1 169 | - jsonschema-specifications==2023.7.1 170 | - kiwisolver==1.4.5 171 | - lazy-loader==0.3 172 | - lmdb==1.4.1 173 | - mako==1.2.4 174 | - markdown==3.4.4 175 | - markupsafe==2.1.3 176 | - matplotlib==3.7.3 177 | - mlflow==2.7.1 178 | - monai==1.2.0 179 | - mpmath==1.3.0 180 | - networkx==3.1 181 | - nibabel==5.1.0 182 | - ninja==1.11.1 183 | - nni==3.0 184 | - nptyping==2.5.0 185 | - nvidia-ml-py==12.535.108 186 | - oauthlib==3.2.2 187 | - onnx==1.14.1 188 | - onnxruntime==1.16.0 189 | - opencv-python==4.8.1.78 190 | - openslide-python==1.1.2 191 | - optuna==3.3.0 192 | - orderedmultidict==1.0.1 193 | - pandas==2.0.3 194 | - pathlib2==2.3.7.post1 195 | - pkgutil-resolve-name==1.3.10 196 | - prettytable==3.9.0 197 | - protobuf==4.24.3 198 | - psutil==5.9.5 199 | - pyarrow==13.0.0 200 | - pyasn1==0.5.0 201 | - pyasn1-modules==0.3.0 202 | - pydicom==2.4.3 203 | - pyjwt==2.4.0 204 | - pynrrd==1.0.0 205 | - pyparsing==3.1.1 206 | - pythonwebhdfs==0.2.3 207 | - pytorch-ignite==0.4.11 208 | - pytz==2023.3.post1 209 | - pywavelets==1.4.1 210 | - pyyaml==6.0.1 211 | - querystring-parser==1.2.4 212 | - referencing==0.30.2 213 | - regex==2023.10.3 214 | - requests-oauthlib==1.3.1 215 | - responses==0.23.3 216 | - rpds-py==0.10.3 217 | - rsa==4.9 218 | - rt-utils==1.2.7 219 | - schema==0.7.5 220 | - scikit-image==0.21.0 221 | - scikit-learn==1.3.1 222 | - scipy==1.10.1 223 | - simpleitk==2.3.0 224 | - simplejson==3.19.1 225 | - smmap==5.0.1 226 | - soupsieve==2.5 227 | - sqlalchemy==2.0.21 228 | - sqlparse==0.4.4 229 | - sympy==1.12 230 | - tabulate==0.9.0 231 | - tensorboard==2.14.0 232 | - tensorboard-data-server==0.7.1 233 | - tensorboardx==2.6.2.2 234 | - termcolor==2.3.0 235 | - threadpoolctl==3.2.0 236 | - tifffile==2023.7.10 237 | - tokenizers==0.12.1 238 | - tqdm==4.66.1 239 | - transformers==4.21.3 240 | - typeguard==4.1.2 241 | - types-pyyaml==6.0.12.12 242 | - tzdata==2023.3 243 | - websocket-client==1.6.3 244 | - websockets==11.0.3 245 | - werkzeug==3.0.0 246 | - zipp==3.17.0 247 | prefix: /anaconda/envs/lymphoma_seg 248 | -------------------------------------------------------------------------------- /documentation/dicom_to_nifti_conversion.md: -------------------------------------------------------------------------------- 1 | # Converting DICOM series to 3D NIFTI files 2 | 3 | PET/CT images are usually stored in DICOM format (the format from hell). In our work, we have converted DICOM PET/CT and RTSTRUCT images as NIFIT images for use by our networks. Unlike the DICOM series that consists of several (axial) `.dcm` images within a folder for one case, the NIFTI images are just one file (`.nii.gz`) which stores the entire 3D array + associated metadata. Hence, NIFTI images are much easier to handle and suitable format to use in deep learning applications. 4 | 5 | Here, we provide the script [dicom_to_nifti.py](./../data_conversion/dicom_to_nifti.py) for converting DICOM series (for PET and CT) and DICOM RTSTRUCT (for segmentation masks in DICOM format) to 3D NIFTI files. Before using this code, you need to create two specific files: `dicom_ctpt_to_nifti_conversion_file.csv` and `dicom_rtstruct_to_nifti_conversion_file.csv`. Examples of these files are given in [here](./../data_conversion/dicom_ctpt_to_nifti_conversion_file.csv) and [here](./../data_conversion/dicom_rtstruct_to_nifti_conversion_file.csv), respectively. Both these files are used by [dicom_to_nifti.py](./../data_conversion/dicom_to_nifti.py) for performing the required conversions. **DO NOT FORGET TO READ THE `VERY IMPORTANT NOTES` SECTION AT THE BOTTOM OF THIS DOCUMENT**. 6 | 7 | ## Creating the `dicom_ctpt_to_nifti_conversion_file.csv` file 8 | `dicom_ctpt_to_nifti_conversion_file.csv` must be a .csv file and its contents must look like this: 9 | 10 | | PatientID | CT_dir | PET_dir | convert | 11 | | ----------|--------|---------|---------| 12 | | Patient00001_28071996 | path/to/ct/dicom/series/directory/for/Patient00001_28071996 | path/to/pet/dicom/series/directory/for/Patient00001_28071996 | Y | 13 | | Patient00002_02021996 | path/to/ct/dicom/series/directory/for/Patient00002_02021996 | path/to/pet/dicom/series/directory/for/Patient00002_02021996 | Y | 14 | 15 | Here, the first column is `PatientID`. For the purpose of illustration, we are using the unique identifier `{PatientID}_{StudyDate}`, as described in [dataset_format.md](./dataset_format.md), but you can use any other naming convention too. The second and third columns should be the path to the DICOM directories for CT and PET respectively for patient with ID `PatientID`. The last column should be either `Y` or `N`, for whether to convert (to NIFTI) or not. Rows with `convert=N` are ignored in [dicom_to_nifti.py](./../data_conversion/dicom_to_nifti.py) during conversion. Populate this .csv file with this information corresponding to your custom DICOM data. 16 | 17 | 18 | ## Creating the `dicom_rstruct_to_nifti_conversion_file.csv` file 19 | `dicom_rtstruct_to_nifti_conversion_file.csv` must be a .csv file and its contents must look like this: 20 | 21 | | PatientID | RTSTRUCT_dir | REF_dir | convert | 22 | | ----------|--------------|---------|---------| 23 | | Patient00001_28071996 | path/to/dicom/rtstruct/directory/for/Patient00001_28071996 | path/to/reference/dicom/series/for/Patient00001_28071996 | Y | 24 | | Patient00002_02021996 | path/to/dicom/rtstruct/directory/for/Patient00002_02021996 | path/to/reference/dicom/series/for/Patient00002_02021996 | Y | 25 | 26 | Here, the first column in the `PatientID`. The second column is the path to RTSTRUCT directory for patient with ID `PatientID`. The third column is the path to the directory that stores the reference image on which the RTSTRUCT was created. This reference image could be either PET or CT depending on which image was used to create RTSTRUCT annotations. BE CAREFUL with assigning the correct reference image, otherwise the code [dicom_to_nifti.py](./../data_conversion/dicom_to_nifti.py) will fail. The last column is the same as the previous step. 27 | 28 | 29 | ## Updating the `save_dir_ct`, `save_dir_pt`, and `save_dir_gt` in `dicom_to_nifti.py` 30 | Go to the middle of [dicom_to_nifti.py](./../data_conversion/dicom_to_nifti.py) file (around line number 87-95) and update the values of variables `save_dir_ct`, `save_dir_pt`, and `save_dir_gt` with the path locations to directories (in your local machine) where you want the converted images in NIFTI format to be written, corresponding to CT, PET, and GT (ground truth) masks, respectively. 31 | ``` 32 | ############################################################################################ 33 | ######## Update the three variables below with the locations of your choice ############## 34 | ############################################################################################ 35 | save_dir_ct = '' # path to directory where your new CT files in NIFTI format will be written 36 | save_dir_pt = '' # path to directory where your new PET files in NIFTI format will be written 37 | save_dir_gt = '' # path to directory where your new GT files in NIFTI format will be written 38 | ############################################################################################ 39 | ############################################################################################ 40 | ############################################################################################ 41 | ``` 42 | 43 | ## Running conversion script `dicom_to_nifti.py` 44 | This step assumes that you have already cloned this repository, create a conda environment `lymphoma_seg` with all the necessary packages installed from [environment.yml](./../environment.yml) file. If you haven't done these steps, first finish them using [conda_env.md](./conda_env.md) before proceeding further. Also, read the next section `VERY IMPORTANT NOTES` before running the conversion script below (as you might have to update `dicom_to_nifti.py` further), 45 | ``` 46 | conda activate lymphoma_seg 47 | cd data_conversion 48 | python dicom_to_nifti.py 49 | ``` 50 | 51 | 52 | ## VERY IMPORTANT NOTES 53 | - [dicom_to_nifti.py](./../data_conversion/dicom_to_nifti.py) uses [rt-utils](https://github.com/qurit/rt-utils) for converting DICOM RTSTRUCT to 3D numpy arrays which are eventually saved as 3D NIFTI masks. The code contains a function `get_filtered_roi_list(.)`, as given below: 54 | ``` 55 | def get_filtered_roi_list(rois): 56 | filtered_rois = [] 57 | for roi in rois: 58 | if roi.endswith('PETEdge'): 59 | filtered_rois.append(roi) 60 | else: 61 | pass 62 | return filtered_rois 63 | ``` 64 | 65 | The `rois` which is passed as an argument to this function is list of ROIs within the RTSTRUCT (as extracted by the `rt_utils.RTStructBuilder`). In our datasets, all the ROIs in the RTSTRUCT files ending with the string `PETEdge` corresponded to lesions, hence we use `get_filtered_roi_list(.)` to filter only the ROIs for lesions. Your dataset may or may not be like this, hence **BE VERY CAUTIOUS WHILE USING THIS CODE!!!!!!!! Update the code accordingly depending on your use-case**. 66 | 67 | - This code also assumes that the original DICOM PET series intensities were in units of Bq/ml. This code converts performs decay-correction of PET intensities and converts them to SUV values, before converting them to NIFTI images. The CT images intensities remains the same (i.e., Hounsfield Units (HU)) before and after conversion to NIFTI. 68 | 69 | - After converting to NIFTI format, your CT, PET, and GT still might not be in the same geometry. For example, your high-resolution CT images could have a matrix size much larger than your lower-resolution PET images. You must resample (and resave) the CT images (and also GT masks, if application) to the geometry of PET images. The final PET, CT and GT mask for a specific `PatientID` should all have the same size, spacing, origin, and direction. To perform this resampling, use the functions `resample_ct_to_pt_geometry()` and/or `resample_gt_to_pt_geometry()` in [resample_ct2pt.py](./../data_conversion/resample_ct2pt.py). If you do not perform this final resampling of PET/CT/GT images to the same geometry, the subsequent training code will fail, as described in [dataset_format.md](./dataset_format.md). 70 | 71 | 72 | 73 | -------------------------------------------------------------------------------- /segmentation/calculate_test_metrics.py: -------------------------------------------------------------------------------- 1 | #%% 2 | ''' 3 | Copyright (c) Microsoft Corporation. All rights reserved. 4 | Licensed under the MIT License. 5 | ''' 6 | import numpy as np 7 | import pandas as pd 8 | import SimpleITK as sitk 9 | import os 10 | from glob import glob 11 | import sys 12 | import argparse 13 | config_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..") 14 | sys.path.append(config_dir) 15 | from config import RESULTS_FOLDER 16 | from metrics.metrics import ( 17 | get_3darray_from_niftipath, 18 | calculate_patient_level_dice_score, 19 | calculate_patient_level_false_positive_volume, 20 | calculate_patient_level_false_negative_volume, 21 | calculate_patient_level_tp_fp_fn 22 | ) 23 | 24 | def get_spacing_from_niftipath(path): 25 | image = sitk.ReadImage(path) 26 | return image.GetSpacing() 27 | 28 | def get_column_statistics(col): 29 | mean = col.mean() 30 | std = col.std() 31 | median = col.median() 32 | quantile25 = col.quantile(q=0.25) 33 | quantile75 = col.quantile(q=0.75) 34 | return (mean, std, median, quantile25, quantile75) 35 | 36 | def get_prediction_statistics(data_df): 37 | dsc_stats = get_column_statistics(data_df['DSC'].astype(float)) 38 | fpv_stats = get_column_statistics(data_df['FPV'].astype(float)) 39 | fnv_stats = get_column_statistics(data_df['FNV'].astype(float)) 40 | 41 | c1_sensitivity = data_df[f'TP_C1']/(data_df[f'TP_C1'] + data_df[f'FN_C1']) 42 | c2_sensitivity = data_df[f'TP_C2']/(data_df[f'TP_C2'] + data_df[f'FN_C2']) 43 | c3_sensitivity = data_df[f'TP_C3']/(data_df[f'TP_C3'] + data_df[f'FN_C3']) 44 | sens_c1_stats = get_column_statistics(c1_sensitivity) 45 | sens_c2_stats = get_column_statistics(c2_sensitivity) 46 | sens_c3_stats = get_column_statistics(c3_sensitivity) 47 | 48 | fp_c1_stats = get_column_statistics(data_df['FP_M1'].astype(float)) 49 | fp_c2_stats = get_column_statistics(data_df['FP_M2'].astype(float)) 50 | fp_c3_stats = get_column_statistics(data_df['FP_M3'].astype(float)) 51 | 52 | dsc_stats = [round(d, 2) for d in dsc_stats] 53 | fpv_stats = [round(d, 2) for d in fpv_stats] 54 | fnv_stats = [round(d, 2) for d in fnv_stats] 55 | sens_c1_stats = [round(d, 2) for d in sens_c1_stats] 56 | sens_c2_stats = [round(d, 2) for d in sens_c2_stats] 57 | sens_c3_stats = [round(d, 2) for d in sens_c3_stats] 58 | fp_c1_stats = [round(d, 0) for d in fp_c1_stats] 59 | fp_c2_stats = [round(d, 0) for d in fp_c2_stats] 60 | fp_c3_stats = [round(d, 0) for d in fp_c3_stats] 61 | 62 | print(f"DSC (Mean): {dsc_stats[0]} +/- {dsc_stats[1]}") 63 | print(f"DSC (Median): {dsc_stats[2]} [{dsc_stats[3]}, {dsc_stats[4]}]") 64 | print(f"FPV (Median): {fpv_stats[2]} [{fpv_stats[3]}, {fpv_stats[4]}]") 65 | print(f"FNV (Median): {fnv_stats[2]} [{fnv_stats[3]}, {fnv_stats[4]}]") 66 | print(f"Sensitivity - Criterion1 (Median): {sens_c1_stats[2]} [{sens_c1_stats[3]}, {sens_c1_stats[4]}]") 67 | print(f"FP - Criterion1 (Median): {fp_c1_stats[2]} [{fp_c1_stats[3]}, {fp_c1_stats[4]}]") 68 | print(f"Sensitivity - Criterion2 (Median): {sens_c2_stats[2]} [{sens_c2_stats[3]}, {sens_c2_stats[4]}]") 69 | print(f"FP - Criterion1 (Median): {fp_c2_stats[2]} [{fp_c2_stats[3]}, {fp_c2_stats[4]}]") 70 | print(f"Sensitivity - Criterion3 (Median): {sens_c3_stats[2]} [{sens_c3_stats[3]}, {sens_c3_stats[4]}]") 71 | print(f"FP - Criterion3 (Median): {fp_c3_stats[2]} [{fp_c3_stats[3]}, {fp_c3_stats[4]}]") 72 | print('\n') 73 | 74 | #%% 75 | def main(args): 76 | fold = args.fold 77 | network = args.network_name 78 | inputsize = args.input_patch_size 79 | experiment_code = f"{network}_fold{fold}_randcrop{inputsize}" 80 | preddir = os.path.join(RESULTS_FOLDER, 'predictions', f'fold{fold}', network, experiment_code) 81 | predpaths = sorted(glob(os.path.join(preddir, '*.nii.gz'))) 82 | gtpaths = sorted(list(pd.read_csv('./../data_split/test_filepaths.csv')['GTPATH'])) 83 | ptpaths = sorted(list(pd.read_csv('./../data_split/test_filepaths.csv')['PTPATH'])) # PET image paths (ptpaths) for calculating the detection metrics using criterion3 84 | 85 | imageids = [os.path.basename(path)[:-7] for path in gtpaths] 86 | TEST_DSCs, TEST_FPVs, TEST_FNVs = [], [], [] 87 | TEST_TP_criterion1, TEST_FP_criterion1, TEST_FN_criterion1 = [], [], [] 88 | TEST_TP_criterion2, TEST_FP_criterion2, TEST_FN_criterion2 = [], [], [] 89 | TEST_TP_criterion3, TEST_FP_criterion3, TEST_FN_criterion3 = [], [], [] 90 | 91 | 92 | for i in range(len(gtpaths)): 93 | gtpath = gtpaths[i] 94 | ptpath = ptpaths[i] 95 | predpath = predpaths[i] 96 | 97 | gtarray = get_3darray_from_niftipath(gtpath) 98 | ptarray = get_3darray_from_niftipath(ptpath) 99 | predarray = get_3darray_from_niftipath(predpath) 100 | spacing = get_spacing_from_niftipath(gtpath) 101 | 102 | dsc = calculate_patient_level_dice_score(gtarray, predarray) 103 | fpv = calculate_patient_level_false_positive_volume(gtarray, predarray, spacing) 104 | fnv = calculate_patient_level_false_negative_volume(gtarray, predarray, spacing) 105 | tp_c1, fp_c1, fn_c1 = calculate_patient_level_tp_fp_fn(gtarray, predarray, criterion='criterion1') 106 | tp_c2, fp_c2, fn_c2 = calculate_patient_level_tp_fp_fn(gtarray, predarray, criterion='criterion2', threshold=0.5) 107 | tp_c3, fp_c3, fn_c3 = calculate_patient_level_tp_fp_fn(gtarray, predarray, criterion='criterion3', ptarray=ptarray) 108 | 109 | TEST_DSCs.append(dsc) 110 | TEST_FPVs.append(fpv) 111 | TEST_FNVs.append(fnv) 112 | TEST_TP_criterion1.append(tp_c1) 113 | TEST_FP_criterion1.append(fp_c1) 114 | TEST_FN_criterion1.append(fn_c1) 115 | 116 | TEST_TP_criterion2.append(tp_c2) 117 | TEST_FP_criterion2.append(fp_c2) 118 | TEST_FN_criterion2.append(fn_c2) 119 | 120 | TEST_TP_criterion3.append(tp_c3) 121 | TEST_FP_criterion3.append(fp_c3) 122 | TEST_FN_criterion3.append(fn_c3) 123 | print(f"{imageids[i]}: DSC = {round(dsc, 4)}\nFPV = {round(fpv, 4)} ml\nFNV = {round(fnv, 4)} ml") 124 | 125 | save_testmetrics_dir = os.path.join(RESULTS_FOLDER, 'test_metrics', 'fold'+str(fold), network, experiment_code) 126 | os.makedirs(save_testmetrics_dir, exist_ok=True) 127 | save_testmetrics_fpath = os.path.join(save_testmetrics_dir, 'testmetrics.csv') 128 | 129 | data = np.column_stack( 130 | ( 131 | imageids, TEST_DSCs, TEST_FPVs, TEST_FNVs, 132 | TEST_TP_criterion1, TEST_FP_criterion1, TEST_FN_criterion1, 133 | TEST_TP_criterion2, TEST_FP_criterion2, TEST_FN_criterion2, 134 | TEST_TP_criterion3, TEST_FP_criterion3, TEST_FN_criterion3 135 | ) 136 | ) 137 | column_names = [ 138 | 'PatientID', 'DSC', 'FPV', 'FNV', 139 | 'TP_C1', 'FP_C1', 'FN_C1', 140 | 'TP_C2', 'FP_C2', 'FN_C2', 141 | 'TP_C3', 'FP_C3', 'FN_C3', 142 | ] 143 | data_df = pd.DataFrame(data=data, columns=column_names) 144 | data_df.to_csv(save_testmetrics_fpath, index=False) 145 | 146 | 147 | 148 | 149 | if __name__ == "__main__": 150 | parser = argparse.ArgumentParser(description='Lymphoma PET/CT lesion segmentation using MONAI-PyTorch') 151 | parser.add_argument('--fold', type=int, default=0, metavar='fold', 152 | help='validation fold (default: 0), remaining folds will be used for training') 153 | parser.add_argument('--network-name', type=str, default='unet', metavar='netname', 154 | help='network name for training (default: unet)') 155 | parser.add_argument('--input-patch-size', type=int, default=192, metavar='inputsize', 156 | help='size of cropped input patch for training (default: 192)') 157 | args = parser.parse_args() 158 | main(args) 159 | 160 | # %% 161 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Lymphoma lesion segmentation and quantitation from FDG PET/CT images using deep neural networks 2 | 3 | ## Introduction 4 | 5 | Please cite the following paper when you use our code:
6 | > S. Ahamed, Y. Xu, C. Gowdy, I. Bloise, D. Wilson, P. Martineau, F. Bénard, F. Yousefirizi, R. Dodhia, J. M. Lavista, W. B. Weeks, C. F. Uribe, A. Rahmim, 7 | > _Comprehensive Evaluation and Insights into the Use of Deep Neural Networks to Detect and Quantify Lymphoma Lesions in PET/CT Images_, 8 | > [arXiv:2311.09614](https://arxiv.org/pdf/2311.09614.pdf). 9 | 10 | 11 | Lymphoma lesion segmentation and quantitation plays a pivotal role in the diagnosis, treatment planning, and monitoring of lymphoma patients. Accurate segmentation allows for the precise delineation of pathological regions, aiding clinicians in assessing disease extent and progression. Moreover, lesion quantitation, such as measuring lesion size and metabolic activity, etc. provides critical information for treatment response evaluation. Deep learning-based segmentation methods have emerged as a game-changer in this domain, offering the potential to automate and standardize lesion delineation, reducing inter-observer variability and saving valuable clinical time. 12 | 13 |

14 | Figure 15 |
16 | 17 | 18 | Figure 1: Visualization of performances of networks, UNet, SegResNet, DynUNet, and SwinUNETR on the coronal maximum intensity projection views for 8 representative cases. (a)-(d) show cases where all the networks had similar performances, while (e)-(h) show cases where the networks had dissimilar performances, often due to some of them predicting large false positive volumes (FPVs). Some of the prominent FPVs have been indicated with blue arrows. The number on the bottom-right of each plot shows the DSC between the 3D predicted mask and the ground truth. 19 | 20 | 21 |

22 | 23 | In this work, we trained four deep neural networks, **UNet**, **SegResNet**, **DynUNet**, and **SwinUNETR** (adapted from MONAI [1]) for the segmentation of lesions from multi-institutional FDG PET/CT images. In this work, we used a large and diverse whole-body PET/CT dataset with a total of **611 cases** coming from four retrospective cohorts, as given in Table 1 below. The first three cohorts (internal cohorts) are privately-owned lymphoma datasets, while the fourth cohort is a subset of the public dataset acquired from [2]. Our models were trained/validated on 80% dataset from the former three cohorts. The remaining 20% data from these three cohorts was used as internal test set, while the fourth cohort was solely used as an external (unseen) test set. 24 | 25 |

26 | Table1 27 |

28 | 29 | 30 | 31 | ## How to get started? 32 | Follow the intructions given below to set up the necessary conda environment, install packages, process dataset in the correct format so it can be accepted as inputs by our code, train models, perform inferences on trained models, compute evaluation metrics (for segmentation and detection) on the test set, and calculate lesion measures from the predicted 3D lesions masks. 33 | 34 | - **Clone the repository, create conda environment and install necessary packages:** 35 | The first step is to clone this GitHub codebase in your local machine, create a conda environment, and install all the necessary packages. For this step, follow the detailed instructions in [/documentation/conda_env.md](/documentation/conda_env.md). 36 | 37 | - **Get your dataset in the required format:** 38 | After you have successfully executed the previous step, it is time to get your PET/CT and binary mask data in a format so that our code can use them as inputs. We have tested this codebased only for NIFTI images (.nii.gz), and provide a script for converting your DICOM PET/CT volumes and RTSTRUCTs to NIFTI format. This data directory setup is slightly based on [nnUNet](https://github.com/MIC-DKFZ/nnUNet/tree/master). For this step, follow the detailed instructions in [/documentation/dataset_format.md](/documentation/dataset_format.md). 39 | 40 | - **Train a model:** 41 | Once you have the data folders and data set up, as described in the previous step, you can procceed with training a model. In this work, we have used UNet [], SegResNet [], DynUNet [], and SwinUNETR [] adapted from the MONAI package. For detailed instructions of initiating and running the training script, read [/documentation/trainddp.md](/documentation/trainddp.md). 42 | 43 | This repository is capable of performing 5 fold cross-validation training, followed by inference on unseen test set, although in the paper above, we only used a single split of train, validation and test images. It is also worth reading [/documentation/results_format.md](/documentation/results_format.md) to understand the overall data and result directories created at different steps of the pipeline and how the different types of results are stored. 44 | 45 | - **Perform inference on test images using a trained model:** 46 | Once you have trained your segmentation model(s), you can use them to perform inference on the test set images. In this step, you will save the network predictions as NIFTI files to your local machine. The predicted masks will be in the same geometry as the corresponding ground truth segmentation masks. For this step, follow the detailed instructions given in [/documentation/inference.md](/documentation/inference.md). 47 | 48 | - **Compute test set evaluation metrics (for detection and segmentation):** 49 | Once the test set predicted masks have been saved, you can proceed to compute the evaluation metrics between the predicted and ground truth segmentation masks. In this work, we use three segmentation metrics: Dice similarity coefficient (DSC), false positive volume (FPV) in ml, and false negative volume (FNV) in ml. We also define three detection based criterion, `Criterion1`, `Criterion2`, and `Criterion3`. Briefly, `Criterion1` labels a predicted lesion as True positive (TP) if it has a non-zero overalap with any of the ground truth lesions. `Criterion2` labels a predicted lesion as TP if it has an intersection-over-union (IoU) > 0.5 with any of the ground truth lesions. `Criterion3` labels a predicted lesion as TP if it overlaps with a ground truth lesion's SUVmax voxel. For `Criterion2` and `Criterion3`, we first perform a matching to pair up a ground truth lesion with a predicted lesion via IoU maximization. These metrics have been defined in [/metrics/metrics.py](/metrics/metrics.py). 50 | 51 | To run an evaluation script, follow the detailed instructions given in [/documentation/calculate_test_metrics.md](/documentation/calculate_test_metrics.md). 52 | 53 | 54 | - **Generate lesion measures from the predicted lesion masks:** 55 | We further use the predicted lesion masks to evaluate the predicted lesion measures. In this work, we focus on six patient level lesion measures, namely, patient-level lesion SUVmean, lesion SUVmax, number of lesions, total metabolic tumor volume (TMTV) in ml, total lesion glycolysis (TLG) in ml, and lesion dissemination (Dmax) in cm. All these lesion measures have been shown to be prognostic biomarkers in lymphoma patients in several studies [3-5]. These predicted lesions measures can be correlated with the ground truth lesion measures to assess how good the trained models are in predicting these clinically-relevant metrics of interest. 56 | 57 | To run an lesion measures generation script, follow the detailed instructions given in [/documentation/generate_lesion_measures.md](/documentation/generate_lesion_measures.md). 58 | 59 | 60 | # Acknowledgments 61 |

62 | Table1 63 |

64 | 65 | 66 | # References 67 | [1] 68 | MONAI: Medical Open Network for AI, 69 | *AI Toolkit for Healthcare Imaging* 70 | [![DOI](/documentation/assets/monai_zenodo.svg)](https://zenodo.org/record/7459814) 71 | 72 | [2] 73 | Gatidis S, Kuestner T., "A whole-body FDG-PET/CT dataset with manually annotated tumor lesions (FDG-PET-CT-Lesions)" [Dataset] (2022), The Cancer Imaging Archive. 74 | [![DOI](/documentation/assets/autopet_data_zenodo.svg)](https://doi.org/10.7937/gkr0-xv29) 75 | 76 | [3] 77 | K. Okuyucu et al. "Prognosis estimation under the light of metabolic tumor parameters on initial FDG-PET/CT 78 | in patients with primary extranodal lymphoma". en. In: Radiol. Oncol. 50.4 (2016), pp. 360–369 79 | [(doi)](https://doi.org/10.1515/raon-2016-0045) 80 | 81 | [4] 82 | X. Xia et al. "Baseline SUVmax of 18F-FDG PET-CT indicates prognosis of extranodal natural killer/T-cell 83 | lymphoma", en. In: Medicine (Baltimore) 99.37 (2020), e22143. 84 | [(doi)](https://doi.org/10.1097%2FMD.0000000000022143) 85 | 86 | [5] 87 | A.-S. Cottereau et al. "18F-FDG PET dissemination features in diffuse large B-cell lymphoma are predictive of outcome". en. In: J. Nucl. Med. 61.1 (2020),pp. 40–45. 88 | [(doi)](https://doi.org/10.2967/jnumed.119.229450) 89 | -------------------------------------------------------------------------------- /documentation/results_format.md: -------------------------------------------------------------------------------- 1 | # Results folder format 2 | In this work, results include: trained models, training and validation logs, predicted masks, metrics on the test set, etc. These will all be written to a folder called `results` as defined in the variable `RESULTS_FOLDER` in the `config.py` (`./../config.py`) file. This folder will be next to the `data` folder, as explained in [dataset_format.md](LINK). 3 | 4 | ## Results folder/filenaming convention 5 | 6 | ### `logs` and `models` folders 7 | While a model is training (see `trainddp.md` for details), the following two folders will be created within `results` folder: `logs` and `models` and the directory structure may look like this: 8 | 9 | └───lymphoma.segmentation/ 10 | ├── data 11 | └── results 12 | ├── logs 13 | │ ├── fold0 14 | │ │ └── unet 15 | │ │ └── unet_fold0_rancrop192 16 | │ │ ├── trainlog_gpu0.csv 17 | │ │ ├── trainlog_gpu1.csv 18 | │ │ ├── validlog_gpu0.csv 19 | │ │ └── validlog_gpu1.csv 20 | │ └── fold1 21 | │ └── unet 22 | │ └── unet_fold1_rancrop192 23 | │ ├── trainlog_gpu0.csv 24 | │ ├── trainlog_gpu1.csv 25 | │ ├── validlog_gpu0.csv 26 | │ └── validlog_gpu1.csv 27 | ├── models 28 | │ ├── fold0 29 | │ │ └── unet 30 | │ │ └── unet_fold0_rancrop192 31 | │ │ ├── model_ep=0002.csv 32 | │ │ ├── model_ep=0004.csv 33 | │ │ ├── model_ep=0006.csv 34 | │ │ ├── model_ep=0008.csv 35 | │ │ ├── ... 36 | │ └── fold1 37 | │ └── unet 38 | │ └── unet_fold1_rancrop192 39 | │ ├── model_ep=0002.csv 40 | │ ├── model_ep=0004.csv 41 | │ ├── model_ep=0006.csv 42 | │ ├── model_ep=0008.csv 43 | │ ├── ... 44 | ├── ... 45 | 46 | 47 | This directory stucture shows that so far, the model `unet` has been (or is being) trained on two folds: `fold0` and `fold1`. Within the `logs` or `models` folder, the directory structure is `{logs_or_models}/fold{fold}/{network_name}/{experiment_code}`, where the `experiment_code` is defined as `{network_name}_fold{fold}_randcrop{input_patch_size}`. The above directory structure shows that for both folds `fold0` and `fold1`, the `experiment_code` is `{unet}_fold{0 or 1}_randcrop{192}`, meaning we trained/are training `unet` for fold 0 or 1 with an `input_patch_size = 192`. If you train other networks (like `segresnet`, `dynunet`, or `swinunetr` as was the case in this work), they will appear accordingly within the framework of the above directory structure. 48 | 49 | Since the training in this work was carried out using the PyTorch's `torch.nn.parallel.DistributedDataParallel`, the `trainlog_gpu0.csv`, `trainlog_gpu1.csv`, `validlog_gpu0.csv`, `validlog_gpu1.csv` store the training and validation logs on accumulated on GPU with deviceids 0 and 1. All the `validlog_gpu[i].csv` are identical and hence redundant so you can use any one of them analysis (we will resolve this to save only one file, in the later versions). All the `trainlog_gpu[i].csv` are NOT identical, hence each file separately stores the loss accumulated using the distributed data on two GPUs. In our work, we used 4 GPUs, but the above directory structure only shows training on 2 GPUs for the purpose of illustration. The typical `trainlog_gpu[i].csv` file looks like this: 50 | 51 | ``` 52 | Loss 53 | 0.6536665889951918 54 | 0.6449973914358351 55 | 0.6385666595564948 56 | 0.6357755064964294 57 | ... 58 | ``` 59 | 60 | where each line shows the mean `DiceLoss` on the training inputs (averaged over all batches) at epoch `j+1` with `j` in the range `np.arange(0, epochs)`; `epochs` is the total number of epochs for which we are running the training. Similarly, a typical `validlog_gpu[i].csv` file looks like this: 61 | 62 | ``` 63 | Metric 64 | 0.0011193332029506564 65 | 0.001015653251670301 66 | ... 67 | ``` 68 | where each line shows the mean `DiceMetric` on the validation inputs at epoch `j` with `j` in the range `np.arange(2, epochs+1, val_interval)`, `epochs` is the total number of epochs for which we are running the training and `val_interval` (default=2) is the epoch interval at which we are running validation, computing Dice metric and saving the trained model. The variables `val_internal`, `epochs`, etc. can be set in `train.sh` script which is used for running the training. 69 | 70 | The saved models are saved in the similar way under the correspding /fold/network/experiment_code folder with filenames `model_ep=0002.pth`, `model_ep=0004.pth`, etc. In this case, `val_interval = 2` (for example), so the models are saved at interval of 2 starting from the second epoch. 71 | 72 | 73 | ### `predictions` and `test_metrics` folders 74 | After the trained models are used for predicting the segmentation masks on test images (see `inference.md` for details), based on the `fold`, `network_name` and `experiment_code`, the predicted masks will be written to `LYMPHOMA_SEGMENTATION_FOLDER/results/predictions/fold{fold}/{network_name}/{experiment_code}`. Once the predicted masks have been generated and saved, the metrics computed on the test set using the test ground truth and predicted masks will be written to `LYMPHOMA_SEGMENTATION_FOLDER/results/test_metrics/fold{fold}/{network_name}/{experiment_code}/testmetrics.csv`. We compute three segmentation metrics: `Dice similarity coefficient (DSC)`, `false positive volume (FPV) in ml`, `false negative volume (FNV) in ml`. We also compute detection metrics such as `true positive (TP)`, `false positive (FP)`, and `false negative (FN)` lesion detections via three different criterion labeled as `Criterion1`, `Criterion2`, and `Criterion3`. These metrics have been defined in [metrics/metrics.py](./../metrics/metrics.py). After running inference and calculating the test metrics, the (relevant) directory structure may look like: 75 | 76 | └───lymphoma.segmentation/ 77 | ├── data 78 | └── results 79 | ├── logs 80 | ├── models 81 | ├── predictions 82 | │ ├── fold0 83 | │ │ └── unet 84 | │ │ └── unet_fold0_randcrop192 85 | │ │ ├── Patient0003_20190402.nii.gz 86 | │ │ ├── Patient0004_20160204.nii.gz 87 | │ │ ├── ... 88 | │ └── fold1 89 | │ └── unet 90 | │ └── unet_fold1_randcrop192 91 | │ ├── Patient0003_20190402.nii.gz 92 | │ ├── Patient0004_20160204.nii.gz 93 | │ ├── ... 94 | │ 95 | └── test_metrics 96 | ├── fold0 97 | │ └── unet 98 | │ └── unet_fold0_randcrop192 99 | │ └── testmetrics.csv 100 | └── fold1 101 | └── unet 102 | └── unet_fold1_randcrop192 103 | └── testmetrics.csv 104 | 105 | The predicted masks are in the same geometry (same size, spacing, origin, direction) as their corresponding ground truth masks. A typical `testmetrics.csv` file looks like: 106 | 107 | | PatientID | DSC | FPV | FNV | TP_C1 | FP_C1 | FN_C1 | TP_C2 | FP_C2 | FN_C2 | TP_C3 | FP_C3 | FN_C3 | 108 | |-----------|-----|-----|-----|-------|-------|-------|-------|-------|-------|-------|-------|-------| 109 | | Patient0003_20190402 | 0.7221043699618158 | 17.5164623503173 | 1.173559512304143 | 3 | 6 | 2 | 2 | 7 | 3 | 3 | 6 | 2 | 110 | | Patient0004_20160204 | 0.0807955251709131 | 53.4186903933997 | 5.563541391664086 | 2 | 8 | 1 | 0 | 10 | 3 | 2 | 8 | 1 | 111 | 112 | Here, all the metrics are at the patient level and FPV and FNV are expressed in ml. 113 | 114 | ### `test_lesion_measures` folder 115 | In this work, we have performed further analyses on the predicted segmentation masks on the test set and compared them to the ground truth masks. These include comparing the patient-level lesion SUVmean, lesion SUVmax, number of lesions, total metabolic tumor volume (TMTV) in ml, total lesion glycolysis (TLG) in ml, lesion dissemination (Dmax) in cm. These metrics have been defined in [metrics/metrics.py](./../metrics/metrics.py). The test set predicted lesion measures are written to `LYMPHOMA_SEGMENTATION_FOLDER/results/test_lesion_measures/fold{fold}/{network_name}/{experiment_code}/testlesionmeasures.csv`. After generating `testlesionmeasures.csv` files, the relevant directory structure may look like: 116 | 117 | └───lymphoma.segmentation/ 118 | ├── data 119 | └── results 120 | ├── logs 121 | ├── models 122 | ├── predictions 123 | ├── test_metrics 124 | └── test_lesion_measures 125 | ├── fold0 126 | │ └── unet 127 | │ └── unet_fold0_randcrop192 128 | │ └── testlesionmeasures.csv 129 | └── fold1 130 | └── unet 131 | └── unet_fold1_randcrop192 132 | └── testlesionmeasures.csv 133 | 134 | A typical `testlesionmeasures.csv` file looks like: 135 | 136 | | PatientID | DSC | SUVmean_orig | SUVmean_pred | SUVmax_orig | SUVmax_pred | LesionCount_orig | LesionCount_pred | TMTV_orig | TMTV_pred | TLG_orig | TLG_pred | Dmax_orig | Dmax_pred | 137 | |-----------|-----|--------------|--------------|-------------|-------------|------------------|------------------|-----------|-----------|----------|----------|----------|-----------| 138 | | Patient0003_20190402 | 0.7221043699618158 | 2.935304139385291 | 4.362726242681123 | 6.1822732035904515 | 7.827266273892102 | 3 | 4 | 13.691527643548337 | 18.6272625128359097 | 40.18879776661558 | 50.2728492927217289 | 15.837606584884108 | 25.82763813918739 | 139 | | Patient0004_20160204 | 0.0807955251709131 | 8.72882540822585 | 12.71524350987 | 40.294842200490244 | 45.9483628492382 | 9 | 6 | 20.732884717373196 | 16.756373846353748 | 180.9737309068245 | 120.2387139879348 | 14.737477375372881 | 7.652628627281008 | 140 | 141 | Here, all the lesion measures are at the patient level. TMTV and TLG are expressed in ml and Dmax in cm. 142 | -------------------------------------------------------------------------------- /segmentation/trainddp.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright (c) Microsoft Corporation. All rights reserved. 3 | Licensed under the MIT License. 4 | ''' 5 | 6 | from monai.transforms import ( 7 | AsDiscrete, 8 | Compose, 9 | ) 10 | import argparse 11 | from monai.inferers import sliding_window_inference 12 | from monai.data import CacheDataset, DataLoader, decollate_batch 13 | import torch 14 | import matplotlib.pyplot as plt 15 | import os 16 | import pandas as pd 17 | import time 18 | from torch.utils.data.distributed import DistributedSampler 19 | from torch.nn.parallel import DistributedDataParallel as DDP 20 | import torch.distributed as dist 21 | import os 22 | from initialize_train import ( 23 | create_data_split_files, 24 | get_train_valid_data_in_dict_format, 25 | get_train_transforms, 26 | get_valid_transforms, 27 | get_model, 28 | get_loss_function, 29 | get_optimizer, 30 | get_scheduler, 31 | get_metric, 32 | get_validation_sliding_window_size 33 | ) 34 | 35 | import sys 36 | config_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..") 37 | sys.path.append(config_dir) 38 | from config import RESULTS_FOLDER 39 | torch.backends.cudnn.benchmark = True 40 | #%% 41 | def ddp_setup(): 42 | dist.init_process_group(backend='nccl', init_method="env://") 43 | 44 | def convert_to_4digits(str_num): 45 | if len(str_num) == 1: 46 | new_num = '000' + str_num 47 | elif len(str_num) == 2: 48 | new_num = '00' + str_num 49 | elif len(str_num) == 3: 50 | new_num = '0' + str_num 51 | else: 52 | new_num = str_num 53 | return new_num 54 | 55 | #%% 56 | def load_train_objects(args): 57 | train_data, valid_data = get_train_valid_data_in_dict_format(args.fold) 58 | train_transforms = get_train_transforms(args.input_patch_size) 59 | valid_transforms = get_valid_transforms() 60 | model = get_model(args.network_name, args.input_patch_size) 61 | optimizer = get_optimizer(model, learning_rate=args.lr, weight_decay=args.wd) 62 | loss_function = get_loss_function() 63 | scheduler = get_scheduler(optimizer, args.epochs) 64 | metric = get_metric() 65 | 66 | return ( 67 | train_data, 68 | valid_data, 69 | train_transforms, 70 | valid_transforms, 71 | model, 72 | loss_function, 73 | optimizer, 74 | scheduler, 75 | metric 76 | ) 77 | 78 | 79 | def prepare_dataset(data, transforms, args): 80 | dataset = CacheDataset(data=data, transform=transforms, cache_rate=args.cache_rate, num_workers=args.num_workers) 81 | return dataset 82 | 83 | 84 | def main_worker(save_models_dir, save_logs_dir, args): 85 | # init_process_group 86 | ddp_setup() 87 | # get local rank on the GPU 88 | local_rank = int(dist.get_rank()) 89 | if local_rank == 0: 90 | print(f"Training {args.network_name} on fold {args.fold}") 91 | print(f"The models will be saved in {save_models_dir}") 92 | print(f"The training/validation logs will be saved in {save_logs_dir}") 93 | 94 | # get all training and validation objects 95 | train_data, valid_data, train_transforms, valid_transforms, model, loss_function, optimizer, scheduler, metric = load_train_objects(args) 96 | 97 | # get dataset of object-type CacheDataset 98 | train_dataset = prepare_dataset(train_data, train_transforms, args) 99 | valid_dataset = prepare_dataset(valid_data, valid_transforms, args) 100 | 101 | # get DistributedSampler instances for both training and validation dataloader 102 | # this will be used to split data into different GPUs 103 | train_sampler = DistributedSampler(dataset=train_dataset, shuffle=True) 104 | valid_sampler = DistributedSampler(dataset=valid_dataset, shuffle=False) 105 | 106 | # initializing train and valid dataloaders 107 | train_dataloader = DataLoader( 108 | train_dataset, 109 | batch_size=args.train_bs, 110 | pin_memory=True, 111 | shuffle=False, 112 | sampler=train_sampler, 113 | num_workers=args.num_workers 114 | ) 115 | valid_dataloader = DataLoader( 116 | valid_dataset, 117 | batch_size=1, 118 | pin_memory=True, 119 | shuffle=False, 120 | sampler=valid_sampler, 121 | num_workers=args.num_workers 122 | ) 123 | 124 | post_pred = Compose([AsDiscrete(argmax=True, to_onehot=2)]) 125 | post_label = Compose([AsDiscrete(to_onehot=2)]) 126 | 127 | # filepaths for storing training and validation logs from different GPUs 128 | trainlog_fpath = os.path.join(save_logs_dir, f'trainlog_gpu{local_rank}.csv') 129 | validlog_fpath = os.path.join(save_logs_dir, f'validlog_gpu{local_rank}.csv') 130 | 131 | # initialize the GPU device 132 | device = torch.device(f"cuda:{local_rank}") 133 | torch.cuda.set_device(device) 134 | 135 | # number of epochs and epoch interval for running validation 136 | max_epochs = args.epochs 137 | val_interval = args.val_interval 138 | 139 | # push models to device 140 | model = model.to(device) 141 | 142 | epoch_loss_values = [] 143 | metric_values = [] 144 | 145 | # wrap the model with DDP 146 | model = DDP(model, device_ids=[device]) 147 | 148 | experiment_start_time = time.time() 149 | 150 | for epoch in range(max_epochs): 151 | epoch_start_time = time.time() 152 | print(f"[GPU{local_rank}]: Running training: epoch = {epoch + 1}") 153 | model.train() 154 | epoch_loss = 0 155 | step = 0 156 | train_sampler.set_epoch(epoch) 157 | for batch_data in train_dataloader: 158 | step += 1 159 | inputs, labels = ( 160 | batch_data['CTPT'].to(device), 161 | batch_data['GT'].to(device), 162 | ) 163 | optimizer.zero_grad() 164 | outputs = model(inputs) 165 | loss = loss_function(outputs, labels) 166 | loss.backward() 167 | optimizer.step() 168 | epoch_loss += loss.item() 169 | epoch_loss /= step 170 | print(f"[GPU:{local_rank}]: epoch {epoch + 1}/{max_epochs}: average loss: {epoch_loss:.4f}") 171 | epoch_loss_values.append(epoch_loss) 172 | 173 | # steps forward the CosineAnnealingLR scheduler 174 | scheduler.step() 175 | 176 | # update the training log file 177 | epoch_loss_values_df = pd.DataFrame(data=epoch_loss_values, columns=['Loss']) 178 | epoch_loss_values_df.to_csv(trainlog_fpath, index=False) 179 | 180 | 181 | if (epoch + 1) % val_interval == 0: 182 | print(f"[GPU{local_rank}]: Running validation") 183 | model.eval() 184 | with torch.no_grad(): 185 | for val_data in valid_dataloader: 186 | val_inputs, val_labels = ( 187 | val_data['CTPT'].to(device), 188 | val_data['GT'].to(device), 189 | ) 190 | roi_size = get_validation_sliding_window_size(args.input_patch_size) 191 | sw_batch_size = args.sw_bs 192 | val_outputs = sliding_window_inference( 193 | val_inputs, roi_size, sw_batch_size, model) 194 | val_outputs = [post_pred(i) for i in decollate_batch(val_outputs)] 195 | val_labels = [post_label(i) for i in decollate_batch(val_labels)] 196 | # compute metric for current iteration 197 | metric(y_pred=val_outputs, y=val_labels) 198 | 199 | # aggregate the final mean dice result 200 | metric_val = metric.aggregate().item() 201 | metric.reset() 202 | metric_values.append(metric_val) 203 | metric_values_df = pd.DataFrame(data=metric_values, columns=['Metric']) 204 | metric_values_df.to_csv(validlog_fpath, index=False) 205 | 206 | print(f"[GPU:{local_rank}] SAVING MODEL at epoch: {epoch + 1}; Mean DSC: {metric_val:.4f}") 207 | savepath = os.path.join(save_models_dir, "model_ep="+convert_to_4digits(str(int(epoch + 1)))+".pth") 208 | torch.save(model.module.state_dict(), savepath) 209 | 210 | epoch_end_time = (time.time() - epoch_start_time)/60 211 | print(f"[GPU:{local_rank}]: Epoch {epoch + 1} time: {round(epoch_end_time,2)} min") 212 | 213 | experiment_end_time = (time.time() - experiment_start_time)/(60*60) 214 | print(f"[GPU:{local_rank}]: Total time: {round(experiment_end_time,2)} hr") 215 | 216 | dist.destroy_process_group() 217 | 218 | def main(args): 219 | os.environ['OMP_NUM_THREADS'] = '6' 220 | fold = args.fold 221 | network = args.network_name 222 | inputsize = f'randcrop{args.input_patch_size}' 223 | 224 | experiment_code = f"{network}_fold{fold}_{inputsize}" 225 | 226 | #save models folder 227 | save_models_dir = os.path.join(RESULTS_FOLDER,'models') 228 | save_models_dir = os.path.join(save_models_dir, 'fold'+str(fold), network, experiment_code) 229 | os.makedirs(save_models_dir, exist_ok=True) 230 | 231 | # save train and valid logs folder 232 | save_logs_dir = os.path.join(RESULTS_FOLDER,'logs') 233 | save_logs_dir = os.path.join(save_logs_dir, 'fold'+str(fold), network, experiment_code) 234 | os.makedirs(save_logs_dir, exist_ok=True) 235 | 236 | main_worker(save_models_dir, save_logs_dir, args) 237 | 238 | 239 | 240 | if __name__ == "__main__": 241 | # create datasplit files for train and test images 242 | # follow all the instructions for dataset directory creation and images/labels file names as given in: LINK 243 | create_data_split_files() 244 | parser = argparse.ArgumentParser(description='Lymphoma PET/CT lesion segmentation using MONAI-PyTorch') 245 | parser.add_argument('--fold', type=int, default=0, metavar='fold', 246 | help='validation fold (default: 0), remaining folds will be used for training') 247 | parser.add_argument('--network-name', type=str, default='unet', metavar='netname', 248 | help='network name for training (default: unet)') 249 | parser.add_argument('--epochs', type=int, default=500, metavar='epochs', 250 | help='number of epochs to train (default: 10)') 251 | parser.add_argument('--input-patch-size', type=int, default=192, metavar='inputsize', 252 | help='size of cropped input patch for training (default: 192)') 253 | parser.add_argument('--train-bs', type=int, default=1, metavar='train-bs', 254 | help='mini-batchsize for training (default: 1)') 255 | parser.add_argument('--num_workers', type=int, default=2, metavar='nw', 256 | help='num_workers for train and validation dataloaders (default: 2)') 257 | parser.add_argument('--cache-rate', type=float, default=0.1, metavar='cr', 258 | help='cache_rate for CacheDataset from MONAI (default=0.1)') 259 | parser.add_argument('--lr', type=float, default=2e-4, metavar='lr', 260 | help='initial learning rate for AdamW optimizer (default=2e-4); Cosine scheduler will decrease this to 0 in args.epochs epochs') 261 | parser.add_argument('--wd', type=float, default=1e-5, metavar='wd', 262 | help='weight-decay for AdamW optimizer (default=1e-5)') 263 | parser.add_argument('--val-interval', type=int, default=2, metavar='val-interval', 264 | help='epochs interval for which validation will be performed (default=2)') 265 | parser.add_argument('--sw-bs', type=int, default=2, metavar='sw-bs', 266 | help='batchsize for sliding window inference (default=2)') 267 | args = parser.parse_args() 268 | 269 | main(args) 270 | 271 | -------------------------------------------------------------------------------- /segmentation/initialize_train.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright (c) Microsoft Corporation. All rights reserved. 3 | Licensed under the MIT License. 4 | ''' 5 | from monai.transforms import ( 6 | EnsureChannelFirstd, 7 | Compose, 8 | CropForegroundd, 9 | LoadImaged, 10 | Orientationd, 11 | RandCropByPosNegLabeld, 12 | DeleteItemsd, 13 | Spacingd, 14 | RandAffined, 15 | ConcatItemsd, 16 | ScaleIntensityRanged, 17 | ResizeWithPadOrCropd, 18 | Invertd, 19 | AsDiscreted, 20 | SaveImaged, 21 | 22 | ) 23 | from monai.networks.nets import UNet, SegResNet, DynUNet, SwinUNETR, UNETR, AttentionUnet 24 | from monai.networks.layers import Norm 25 | from monai.metrics import DiceMetric 26 | from monai.losses import DiceLoss 27 | import torch 28 | import matplotlib.pyplot as plt 29 | from glob import glob 30 | import pandas as pd 31 | import numpy as np 32 | from torch.optim.lr_scheduler import CosineAnnealingLR 33 | import os 34 | import sys 35 | config_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..") 36 | sys.path.append(config_dir) 37 | from config import DATA_FOLDER, WORKING_FOLDER 38 | #%% 39 | def convert_to_4digits(str_num): 40 | if len(str_num) == 1: 41 | new_num = '000' + str_num 42 | elif len(str_num) == 2: 43 | new_num = '00' + str_num 44 | elif len(str_num) == 3: 45 | new_num = '0' + str_num 46 | else: 47 | new_num = str_num 48 | return new_num 49 | 50 | def create_dictionary_ctptgt(ctpaths, ptpaths, gtpaths): 51 | data = [] 52 | for i in range(len(gtpaths)): 53 | ctpath = ctpaths[i] 54 | ptpath = ptpaths[i] 55 | gtpath = gtpaths[i] 56 | data.append({'CT':ctpath, 'PT':ptpath, 'GT':gtpath}) 57 | return data 58 | 59 | def remove_all_extensions(filename): 60 | while True: 61 | name, ext = os.path.splitext(filename) 62 | if ext == '': 63 | return name 64 | filename = name 65 | #%% 66 | def create_data_split_files(): 67 | """Creates filepaths data for training/validation and test images and saves 68 | them as `train_filepaths.csv` and `test_filepaths.csv` files under WORKING_FOLDER/data_split/; 69 | all training images will be assigned a FoldID specifying which fold (out of the 5 folds) 70 | the image belongs to. If the `train_filepaths.csv` and `test_filepaths.csv` already exist, 71 | this function is skipped 72 | """ 73 | train_filepaths = os.path.join(WORKING_FOLDER, 'data_split', 'train_filepaths.csv') 74 | test_filepaths = os.path.join(WORKING_FOLDER, 'data_split', 'test_filepaths.csv') 75 | if os.path.exists(train_filepaths) and os.path.exists(test_filepaths): 76 | return 77 | else: 78 | data_split_folder = os.path.join(WORKING_FOLDER, 'data_split') 79 | os.makedirs(data_split_folder, exist_ok=True) 80 | 81 | imagesTr = os.path.join(DATA_FOLDER, 'imagesTr') 82 | labelsTr = os.path.join(DATA_FOLDER, 'labelsTr') 83 | 84 | ctpaths = sorted(glob(os.path.join(imagesTr, '*0000.nii.gz'))) 85 | ptpaths = sorted(glob(os.path.join(imagesTr, '*0001.nii.gz'))) 86 | gtpaths = sorted(glob(os.path.join(labelsTr, '*.nii.gz'))) 87 | imageids = [remove_all_extensions(os.path.basename(path)) for path in gtpaths] 88 | 89 | n_folds = 5 90 | part_size = len(imageids) // n_folds 91 | remaining_elements = len(imageids) % n_folds 92 | start = 0 93 | train_folds = [] 94 | for i in range(n_folds): 95 | end = start + part_size + (1 if i < remaining_elements else 0) 96 | train_folds.append(imageids[start:end]) 97 | start = end 98 | 99 | fold_sizes = [len(fold) for fold in train_folds] 100 | foldids = [fold_sizes[i]*[i] for i in range(len(fold_sizes))] 101 | foldids = [item for sublist in foldids for item in sublist] 102 | 103 | trainfolds_data = np.column_stack((imageids, foldids, ctpaths, ptpaths, gtpaths)) 104 | train_df = pd.DataFrame(trainfolds_data, columns=['ImageID', 'FoldID', 'CTPATH', 'PTPATH', 'GTPATH']) 105 | 106 | train_df.to_csv(train_filepaths, index=False) 107 | 108 | imagesTs = os.path.join(DATA_FOLDER, 'imagesTs') 109 | labelsTs = os.path.join(DATA_FOLDER, 'labelsTs') 110 | ctpaths_test = sorted(glob(os.path.join(imagesTs, '*0000.nii.gz'))) 111 | ptpaths_test = sorted(glob(os.path.join(imagesTs, '*0001.nii.gz'))) 112 | gtpaths_test = sorted(glob(os.path.join(labelsTs, '*.nii.gz'))) 113 | imageids_test = [remove_all_extensions(os.path.basename(path)) for path in gtpaths_test] 114 | test_data = np.column_stack((imageids_test, ctpaths_test, ptpaths_test, gtpaths_test)) 115 | test_df = pd.DataFrame(test_data, columns=['ImageID', 'CTPATH', 'PTPATH', 'GTPATH']) 116 | test_df.to_csv(test_filepaths, index=False) 117 | 118 | #%% 119 | def get_train_valid_data_in_dict_format(fold): 120 | trainvalid_fpath = os.path.join(WORKING_FOLDER, 'data_split/train_filepaths.csv') 121 | trainvalid_df = pd.read_csv(trainvalid_fpath) 122 | train_df = trainvalid_df[trainvalid_df['FoldID'] != fold] 123 | valid_df = trainvalid_df[trainvalid_df['FoldID'] == fold] 124 | 125 | ctpaths_train, ptpaths_train, gtpaths_train = list(train_df['CTPATH'].values), list(train_df['PTPATH'].values), list(train_df['GTPATH'].values) 126 | ctpaths_valid, ptpaths_valid, gtpaths_valid = list(valid_df['CTPATH'].values), list(valid_df['PTPATH'].values), list(valid_df['GTPATH'].values) 127 | 128 | train_data = create_dictionary_ctptgt(ctpaths_train, ptpaths_train, gtpaths_train) 129 | valid_data = create_dictionary_ctptgt(ctpaths_valid, ptpaths_valid, gtpaths_valid) 130 | 131 | return train_data, valid_data 132 | 133 | #%% 134 | def get_test_data_in_dict_format(): 135 | test_fpaths = os.path.join(WORKING_FOLDER, 'data_split/test_filepaths.csv') 136 | test_df = pd.read_csv(test_fpaths) 137 | ctpaths_test, ptpaths_test, gtpaths_test = list(test_df['CTPATH'].values), list(test_df['PTPATH'].values), list(test_df['GTPATH'].values) 138 | test_data = create_dictionary_ctptgt(ctpaths_test, ptpaths_test, gtpaths_test) 139 | return test_data 140 | 141 | def get_spatial_size(input_patch_size=192): 142 | trsz = input_patch_size 143 | return (trsz, trsz, trsz) 144 | 145 | def get_spacing(): 146 | spc = 2 147 | return (spc, spc, spc) 148 | 149 | def get_train_transforms(input_patch_size=192): 150 | spatialsize = get_spatial_size(input_patch_size) 151 | spacing = get_spacing() 152 | mod_keys = ['CT', 'PT', 'GT'] 153 | train_transforms = Compose( 154 | [ 155 | LoadImaged(keys=mod_keys, image_only=True), 156 | EnsureChannelFirstd(keys=mod_keys), 157 | CropForegroundd(keys=mod_keys, source_key='CT'), 158 | ScaleIntensityRanged(keys=['CT'], a_min=-154, a_max=325, b_min=0, b_max=1, clip=True), 159 | Orientationd(keys=mod_keys, axcodes="RAS"), 160 | Spacingd(keys=mod_keys, pixdim=spacing, mode=('bilinear', 'bilinear', 'nearest')), 161 | RandCropByPosNegLabeld( 162 | keys=mod_keys, 163 | label_key='GT', 164 | spatial_size = spatialsize, 165 | pos=2, 166 | neg=1, 167 | num_samples=1, 168 | image_key='PT', 169 | image_threshold=0, 170 | allow_smaller=True, 171 | ), 172 | ResizeWithPadOrCropd( 173 | keys=mod_keys, 174 | spatial_size=spatialsize, 175 | mode='constant' 176 | ), 177 | RandAffined( 178 | keys=mod_keys, 179 | mode=('bilinear', 'bilinear', 'nearest'), 180 | prob=0.5, 181 | spatial_size = spatialsize, 182 | translate_range=(10,10,10), 183 | rotate_range=(0, 0, np.pi/15), 184 | scale_range=(0.1, 0.1, 0.1)), 185 | ConcatItemsd(keys=['CT', 'PT'], name='CTPT', dim=0), 186 | DeleteItemsd(keys=['CT', 'PT']) 187 | ]) 188 | 189 | return train_transforms 190 | 191 | #%% 192 | def get_valid_transforms(): 193 | spacing = get_spacing() 194 | mod_keys = ['CT', 'PT', 'GT'] 195 | valid_transforms = Compose( 196 | [ 197 | LoadImaged(keys=mod_keys), 198 | EnsureChannelFirstd(keys=mod_keys), 199 | CropForegroundd(keys=mod_keys, source_key='CT'), 200 | ScaleIntensityRanged(keys=['CT'], a_min=-154, a_max=325, b_min=0, b_max=1, clip=True), 201 | Orientationd(keys=mod_keys, axcodes="RAS"), 202 | Spacingd(keys=mod_keys, pixdim=spacing, mode=('bilinear', 'bilinear', 'nearest')), 203 | ConcatItemsd(keys=['CT', 'PT'], name='CTPT', dim=0), 204 | DeleteItemsd(keys=['CT', 'PT']) 205 | ]) 206 | 207 | return valid_transforms 208 | 209 | 210 | def get_post_transforms(test_transforms, save_preds_dir): 211 | post_transforms = Compose([ 212 | Invertd( 213 | keys="Pred", 214 | transform=test_transforms, 215 | orig_keys="GT", 216 | meta_keys="pred_meta_dict", 217 | orig_meta_keys="image_meta_dict", 218 | meta_key_postfix="meta_dict", 219 | nearest_interp=False, 220 | to_tensor=True, 221 | ), 222 | AsDiscreted(keys="Pred", argmax=True), 223 | SaveImaged(keys="Pred", meta_keys="pred_meta_dict", output_dir=save_preds_dir, output_postfix="", separate_folder=False, resample=False), 224 | ]) 225 | return post_transforms 226 | 227 | def get_kernels_strides(patch_size, spacings): 228 | """ 229 | This function is only used for decathlon datasets with the provided patch sizes. 230 | When refering this method for other tasks, please ensure that the patch size for each spatial dimension should 231 | be divisible by the product of all strides in the corresponding dimension. 232 | In addition, the minimal spatial size should have at least one dimension that has twice the size of 233 | the product of all strides. For patch sizes that cannot find suitable strides, an error will be raised. 234 | """ 235 | sizes, spacings = patch_size, spacings 236 | input_size = sizes 237 | strides, kernels = [], [] 238 | while True: 239 | spacing_ratio = [sp / min(spacings) for sp in spacings] 240 | stride = [2 if ratio <= 2 and size >= 8 else 1 for (ratio, size) in zip(spacing_ratio, sizes)] 241 | kernel = [3 if ratio <= 2 else 1 for ratio in spacing_ratio] 242 | if all(s == 1 for s in stride): 243 | break 244 | for idx, (i, j) in enumerate(zip(sizes, stride)): 245 | if i % j != 0: 246 | raise ValueError( 247 | f"Patch size is not supported, please try to modify the size {input_size[idx]} in the spatial dimension {idx}." 248 | ) 249 | sizes = [i / j for i, j in zip(sizes, stride)] 250 | spacings = [i * j for i, j in zip(spacings, stride)] 251 | kernels.append(kernel) 252 | strides.append(stride) 253 | 254 | strides.insert(0, len(spacings) * [1]) 255 | kernels.append(len(spacings) * [3]) 256 | return kernels, strides 257 | #%% 258 | def get_model(network_name = 'unet', input_patch_size=192): 259 | if network_name == 'unet': 260 | model = UNet( 261 | spatial_dims=3, 262 | in_channels=2, 263 | out_channels=2, 264 | channels=(16, 32, 64, 128, 256, 512), 265 | strides=(2, 2, 2, 2, 2), 266 | num_res_units=2, 267 | norm=Norm.BATCH 268 | ) 269 | elif network_name == 'swinunetr': 270 | spatialsize = get_spatial_size(input_patch_size) 271 | model = SwinUNETR( 272 | img_size=spatialsize, 273 | in_channels=2, 274 | out_channels=2, 275 | feature_size=12, 276 | use_checkpoint=False, 277 | ) 278 | elif network_name =='segresnet': 279 | model = SegResNet( 280 | spatial_dims=3, 281 | blocks_down=[1, 2, 2, 4], 282 | blocks_up=[1, 1, 1], 283 | init_filters=16, 284 | in_channels=2, 285 | out_channels=2, 286 | ) 287 | elif network_name == 'dynunet': 288 | spatialsize = get_spatial_size(input_patch_size) 289 | spacing = get_spacing() 290 | krnls, strds = get_kernels_strides(spatialsize, spacing) 291 | model = DynUNet( 292 | spatial_dims=3, 293 | in_channels=2, 294 | out_channels=2, 295 | kernel_size=krnls, 296 | strides=strds, 297 | upsample_kernel_size=strds[1:], 298 | ) 299 | else: 300 | pass 301 | return model 302 | 303 | 304 | #%% 305 | def get_loss_function(): 306 | loss_function = DiceLoss(to_onehot_y=True, softmax=True) 307 | return loss_function 308 | 309 | def get_optimizer(model, learning_rate=2e-4, weight_decay=1e-5): 310 | optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=weight_decay) 311 | return optimizer 312 | 313 | def get_metric(): 314 | metric = DiceMetric(include_background=False, reduction="mean") 315 | return metric 316 | 317 | def get_scheduler(optimizer, max_epochs=500): 318 | scheduler = CosineAnnealingLR(optimizer, T_max=max_epochs, eta_min=0) 319 | return scheduler 320 | 321 | def get_validation_sliding_window_size(input_patch_size=192): 322 | dict_W_for_N = { 323 | 96:128, 324 | 128:160, 325 | 160:192, 326 | 192:192, 327 | 224:224, 328 | 256:256 329 | } 330 | vlsz = dict_W_for_N[input_patch_size] 331 | return (vlsz, vlsz, vlsz) -------------------------------------------------------------------------------- /metrics/metrics.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright (c) Microsoft Corporation. All rights reserved. 3 | Licensed under the MIT License. 4 | ''' 5 | 6 | import SimpleITK as sitk 7 | import numpy as np 8 | import cc3d 9 | 10 | #%% 11 | def get_3darray_from_niftipath( 12 | path: str, 13 | ) -> np.ndarray: 14 | """Get a numpy array of a Nifti image using the filepath 15 | 16 | Args: 17 | path (str): path of the Nifti file 18 | 19 | Returns: 20 | np.ndarray: 3D numpy array for the image 21 | """ 22 | image = sitk.ReadImage(path) 23 | array = np.transpose(sitk.GetArrayFromImage(image), (2,1,0)) 24 | return array 25 | 26 | def calculate_patient_level_lesion_suvmean_suvmax( 27 | ptarray: np.ndarray, 28 | maskarray: np.ndarray, 29 | marker: str = 'SUVmean' 30 | ) -> np.float64: 31 | """Function to return the lesion SUVmean or SUVmax for all lesions in 32 | a 3D PET image using the corresponding 3D segmentation mask 33 | 34 | Args: 35 | ptarray (np.ndarray): numpy ndarray for 3D PET image 36 | maskarray (np.ndarray): numpy ndarray for 3D mask image 37 | marker (str, optional): Whether you want to calculate SUVmean or SUVmax . 38 | Defaults to 'SUVmean'. 39 | 40 | Returns: 41 | np.float64: patient-level SUVmean or SUVmax 42 | """ 43 | prod = np.multiply(ptarray, maskarray) 44 | num_nonzero_voxels = len(np.nonzero(maskarray)[0]) 45 | 46 | if num_nonzero_voxels == 0: 47 | return 0.0 48 | else: 49 | if marker == 'SUVmean': 50 | return np.sum(prod)/num_nonzero_voxels 51 | elif marker == 'SUVmax': 52 | return np.max(prod) 53 | 54 | #%% 55 | def calculate_patient_level_tmtv( 56 | maskarray: np.ndarray, 57 | spacing: tuple 58 | ) -> np.float64: 59 | """Function to return the total metabolic tumor volume (TMTV) in cm^3 using 60 | 3D mask containing 0s for background and 1s for lesions/tumors 61 | Args: 62 | maskarray (np.ndarray): numpy ndarray for 3D mask image 63 | 64 | Returns: 65 | np.float64: 66 | """ 67 | voxel_volume_cc = np.prod(spacing)/1000 # voxel volume in cm^3 68 | 69 | num_lesion_voxels = len(np.nonzero(maskarray)[0]) 70 | tmtv_cc = voxel_volume_cc*num_lesion_voxels 71 | return tmtv_cc 72 | 73 | #%% 74 | 75 | def calculate_patient_level_lesion_count( 76 | maskarray: np.ndarray, 77 | ) -> int: 78 | """Function to return the total number of lesions using the 3D segmentation mask 79 | Args: 80 | maskarray (np.ndarray): numpy ndarray for 3D mask image 81 | 82 | Returns: 83 | int: _description_ 84 | """ 85 | _, num_lesions = cc3d.connected_components(maskarray, connectivity=18, return_N=True) 86 | return num_lesions 87 | 88 | #%% 89 | def calculate_patient_level_tlg( 90 | ptarray: np.ndarray, 91 | maskarray: np.ndarray, 92 | spacing: tuple 93 | ) -> np.float64: 94 | """Function to return the total lesion glycolysis (TLG) using a 3D PET image 95 | and the corresponding 3D segmentation mask (containing 0s for background and 96 | 1s for lesion/tumor) 97 | TLG = SUV1*V1 + SUV2*V2 + ... + SUVn*Vn, where SUV1...SUVn are the SUVmean 98 | values of lesions 1...n with volumes V1...Vn, respectively 99 | 100 | Args: 101 | ptarray (np.ndarray): numpy ndarray for 3D PET image 102 | maskarray (np.ndarray): numpy ndarray for 3D mask image 103 | 104 | Returns: 105 | np.float64: total lesion glycolysis in cm^3 (assuming SUV is unitless) 106 | """ 107 | voxel_volume_cc = np.prod(spacing)/1000 # voxel volume in cm^3 108 | 109 | labels_out, num_lesions = cc3d.connected_components(maskarray, connectivity=18, return_N=True) 110 | if num_lesions == 0: 111 | return 0.0 112 | else: 113 | _, lesion_num_voxels = np.unique(labels_out, return_counts=True) 114 | lesion_num_voxels = lesion_num_voxels[1:] 115 | lesion_mtvs = voxel_volume_cc*lesion_num_voxels 116 | lesion_suvmeans = [] 117 | 118 | for i in range(1, num_lesions+1): 119 | mask = np.zeros_like(labels_out) 120 | mask[labels_out == i] = 1 121 | prod = np.multiply(mask, ptarray) 122 | num_nonzero_voxels = len(np.nonzero(mask)[0]) 123 | lesion_suvmeans.append(np.sum(prod)/num_nonzero_voxels) 124 | 125 | tlg = np.sum(np.multiply(lesion_mtvs, lesion_suvmeans)) 126 | return tlg 127 | #%% 128 | def calculate_patient_level_dissemination( 129 | maskarray: np.ndarray, 130 | spacing: tuple 131 | ) -> np.float64: 132 | """Function to return the tumor dissemination (Dmax) using 3D segmentation mask 133 | Dmax = max possible distance between any two foreground voxels in a patient; 134 | these two voxels can come form the same lesions (in case of one lesion) 135 | or from different lesions (in case of multiple lesions) 136 | 137 | Args: 138 | maskarray (np.ndarray): numpy array for 3D mask image 139 | 140 | Returns: 141 | np.float64: dissemination value in cm 142 | """ 143 | maskarray = maskarray.astype(np.int8) 144 | nonzero_voxels = np.argwhere(maskarray == 1) 145 | distances = np.sqrt(np.sum(((nonzero_voxels[:, None] - nonzero_voxels) * spacing)**2, axis=2)) 146 | farthest_indices = np.unravel_index(np.argmax(distances), distances.shape) 147 | dmax = distances[farthest_indices]/10 # converting to cm 148 | del maskarray 149 | del nonzero_voxels 150 | del distances 151 | return dmax 152 | 153 | #%% 154 | def calculate_patient_level_dice_score( 155 | gtarray: np.ndarray, 156 | predarray: np.ndarray, 157 | ) -> np.float64: 158 | """Function to return the Dice similarity coefficient (Dice score) between 159 | 2 segmentation masks (containing 0s for background and 1s for lesions/tumors) 160 | 161 | Args: 162 | maskarray_1 (np.ndarray): numpy ndarray for the first mask 163 | maskarray_2 (np.ndarray): numpy ndarray for the second mask 164 | 165 | Returns: 166 | np.float64: Dice score 167 | """ 168 | dice_score = 2.0*np.sum(predarray[gtarray == 1])/(np.sum(gtarray) + np.sum(predarray)) 169 | return dice_score 170 | #%% 171 | def calculate_patient_level_iou( 172 | gtarray: np.ndarray, 173 | predarray: np.ndarray, 174 | ) -> np.float64: 175 | """Function to return the Intersection-over-Union (IoU) between 176 | 2 segmentation masks (containing 0s for background and 1s for lesions/tumors) 177 | 178 | Args: 179 | maskarray_1 (np.ndarray): numpy ndarray for the first mask 180 | maskarray_2 (np.ndarray): numpy ndarray for the second mask 181 | 182 | Returns: 183 | np.float64: Dice score 184 | """ 185 | intersection = np.sum(predarray[gtarray == 1]) 186 | union = np.sum(gtarray) + np.sum(predarray) - intersection 187 | iou = intersection/union 188 | return iou 189 | 190 | def calculate_patient_level_intersection( 191 | gtarray: np.ndarray, 192 | predarray: np.ndarray, 193 | ) -> np.float64: 194 | """Function to return the Intersection etween 195 | 2 segmentation masks (containing 0s for background and 1s for lesions/tumors) 196 | 197 | Args: 198 | maskarray_1 (np.ndarray): numpy ndarray for the first mask 199 | maskarray_2 (np.ndarray): numpy ndarray for the second mask 200 | 201 | Returns: 202 | np.float64: Dice score 203 | """ 204 | intersection = np.sum(predarray[gtarray == 1]) 205 | return intersection 206 | #%% 207 | 208 | def calculate_patient_level_false_positive_volume( 209 | gtarray: np.ndarray, 210 | predarray: np.ndarray, 211 | spacing: tuple 212 | ) -> np.float64: 213 | # compute number of voxels of false positive connected components in prediction mask 214 | pred_connected_components = cc3d.connected_components(predarray, connectivity=18) 215 | 216 | false_positive = 0 217 | for idx in range(1,pred_connected_components.max()+1): 218 | comp_mask = np.isin(pred_connected_components, idx) 219 | if (comp_mask*gtarray).sum() == 0: 220 | false_positive += comp_mask.sum() 221 | 222 | voxel_volume_cc = np.prod(spacing)/1000 223 | return false_positive*voxel_volume_cc 224 | 225 | #%% 226 | def calculate_patient_level_false_negative_volume( 227 | gtarray: np.ndarray, 228 | predarray: np.ndarray, 229 | spacing: tuple 230 | ) -> np.float64: 231 | # compute number of voxels of false negative connected components (of the ground truth mask) in the prediction mask 232 | gt_connected_components = cc3d.connected_components(gtarray, connectivity=18) 233 | 234 | false_negative = 0 235 | for idx in range(1,gt_connected_components.max()+1): 236 | comp_mask = np.isin(gt_connected_components, idx) 237 | if (comp_mask*predarray).sum() == 0: 238 | false_negative += comp_mask.sum() 239 | 240 | voxel_volume_cc = np.prod(spacing)/1000 241 | return false_negative*voxel_volume_cc 242 | 243 | # %% 244 | def is_suvmax_detected( 245 | gtarray: np.ndarray, 246 | predarray: np.ndarray, 247 | ptarray: np.ndarray, 248 | ) -> bool: 249 | prod = np.multiply(gtarray, ptarray) 250 | max_index = np.unravel_index(np.argmax(prod), prod.shape) 251 | if predarray[max_index] == 1: 252 | return True 253 | else: 254 | return False 255 | 256 | 257 | def calculate_patient_level_tp_fp_fn( 258 | gtarray: np.ndarray, 259 | predarray: np.ndarray, 260 | criterion: str, 261 | threshold: np.float64 = None, 262 | ptarray: np.ndarray = None, 263 | ) -> (int, int, int): 264 | """Calculate patient-level TP, FP, and FN (for detection based metrics) 265 | via 3 criteria: 266 | 267 | criterion1: A predicted lesion is TP if any one of it's foreground voxels 268 | overlaps with GT foreground. A predicted lesions that doesn't overlap with any 269 | GT foreground is FP. As soon as a lesion is predicted as TP, it is removed 270 | from the set of GT lesions. The lesions that remain in the end in the GT lesions 271 | are FN. `criterion1` is the weakest detection criterion. 272 | 273 | criterion2: A predicted lesion is TP if more than `threshold`% of it's volume 274 | overlaps with foreground GT. A predicted lesion is FP if it overlap fraction 275 | with foreground GT is between 0% and `threshold`%. As soon as a lesion is 276 | predicted as TP, it is removed from the set of GT lesions. The lesions that 277 | remain in the end in the GT lesions are FN. `criterion2` can be hard or weak 278 | criterion based on the value of `threshold`. 279 | 280 | criterion3: A predicted lesion is TP if it overlaps with one the the GT lesion's 281 | SUVmax voxel, hence this criterion requires the use of PET data (`ptarray`). A 282 | predicted lesion that doesn't overlap with any GT lesion's SUVmax voxel is 283 | considered FP. As soon as a lesion is predicted as TP, it is removed from the 284 | set of GT lesions. The lesions that remain in the end in the GT lesions are FN. 285 | `criterion3` is likely an easy criterion since a network is more likely to segment 286 | high(er)-uptake regions`. 287 | 288 | Args: 289 | int (_type_): _description_ 290 | int (_type_): _description_ 291 | gtarray (_type_, optional): _description_. Defaults to None, ptarray: np.ndarray = None, )->(int. 292 | """ 293 | 294 | gtarray_labeled_mask, num_lesions_gt = cc3d.connected_components(gtarray, connectivity=18, return_N=True) 295 | predarray_labeled_mask, num_lesions_pred = cc3d.connected_components(predarray, connectivity=18, return_N=True) 296 | gt_lesions_list = list(np.arange(1, num_lesions_gt+1)) 297 | #initial values for TP, FP, FN 298 | TP = 0 299 | FP = 0 300 | FN = num_lesions_gt 301 | 302 | if criterion == 'criterion1': 303 | FN = 0 # for this criterion we are counting the number of FPs from 0 onwards, hence the reassignment 304 | for i in range(1, num_lesions_pred+1): 305 | pred_lesion_mask = np.where(predarray_labeled_mask == i, 1, 0) 306 | if np.any(pred_lesion_mask & (gtarray_labeled_mask > 0)): 307 | TP += 1 308 | else: 309 | FP += 1 310 | for j in range(1, num_lesions_gt+1): 311 | gt_lesion_mask = np.where(gtarray_labeled_mask == j, 1, 0) 312 | if not np.any(gt_lesion_mask & (predarray_labeled_mask > 0)): 313 | FN += 1 314 | 315 | elif criterion == 'criterion2': 316 | for i in range(1, num_lesions_pred+1): 317 | max_iou = 0 318 | match_gt_lesion = None 319 | pred_lesion_mask = np.where(predarray_labeled_mask == i, 1, 0) 320 | for j in range(1, num_lesions_gt+1): 321 | gt_lesion_mask = np.where(gtarray_labeled_mask == j, 1, 0) 322 | iou = calculate_patient_level_iou(gt_lesion_mask, pred_lesion_mask) 323 | if iou > max_iou: 324 | max_iou = iou 325 | match_gt_lesion = j 326 | if max_iou >= threshold: 327 | TP += 1 328 | gt_lesions_list.remove(match_gt_lesion) 329 | else: 330 | FP += 1 331 | FN = len(gt_lesions_list) 332 | 333 | elif criterion == 'criterion3': 334 | for i in range(1, num_lesions_pred+1): 335 | max_iou = 0 336 | match_gt_lesion = None 337 | pred_lesion_mask = np.where(predarray_labeled_mask == i, 1, 0) 338 | for j in range(1, num_lesions_gt+1): 339 | gt_lesion_mask = np.where(gtarray_labeled_mask == j, 1, 0) 340 | iou = calculate_patient_level_iou(gt_lesion_mask, pred_lesion_mask) 341 | if iou > max_iou: 342 | max_iou = iou 343 | match_gt_lesion = j 344 | 345 | # match_gt_lesion has been defined with has the maximum iou with pred lesion i 346 | arr_gt_lesion = np.where(gtarray_labeled_mask == match_gt_lesion, 1, 0) 347 | if is_suvmax_detected(arr_gt_lesion, pred_lesion_mask, ptarray): 348 | TP += 1 349 | gt_lesions_list.remove(match_gt_lesion) 350 | else: 351 | FP += 1 352 | 353 | FN = len(gt_lesions_list) 354 | 355 | else: 356 | print('Invalid criterion. Choose between criterion1, criterion2, or criterion3') 357 | return 358 | 359 | return TP, FP, FN 360 | 361 | --------------------------------------------------------------------------------