├── tests ├── __init__.py ├── conftest.py ├── test_config.py ├── test_utils.py ├── test_pipeline.py ├── test_compare.py ├── test_quarantine.py ├── test_segmentation.py ├── test_convert.py ├── test_structure_set.py ├── test_input.py └── test_generate.py ├── pydicer ├── cli │ ├── __init__.py │ ├── contants.py │ ├── run.py │ └── input.py ├── analyse │ └── __init__.py ├── convert │ ├── __init__.py │ ├── headers.py │ └── rtstruct.py ├── dataset │ ├── __init__.py │ ├── preparation.py │ └── structureset.py ├── generate │ ├── __init__.py │ ├── mhubconfigs │ │ ├── platipy.yml │ │ ├── lungmask.yml │ │ ├── totalsegmentator.yml │ │ ├── nnunet_liver.yml │ │ ├── casust.yml │ │ └── nnunet_pancreas.yml │ └── models.py ├── input │ ├── __init__.py │ ├── test.py │ ├── filesystem.py │ ├── base.py │ ├── web.py │ ├── tcia.py │ ├── pacs.py │ └── orthanc.py ├── preprocess │ └── __init__.py ├── visualise │ └── __init__.py ├── __init__.py ├── constants.py ├── logger.py ├── quarantine.py └── config.py ├── docs ├── contributing.rst ├── code_of_conduct.rst ├── utils.rst ├── nnunet.rst ├── tool.rst ├── config.rst ├── preprocess.rst ├── visualise.rst ├── analyse.rst ├── generate.rst ├── dataset.rst ├── _static │ └── custom.css ├── convert.rst ├── input.rst ├── Makefile ├── make.bat ├── index.rst └── conf.py ├── docker-compose.yml ├── .coveragerc ├── .devcontainer ├── install-dev-tools.sh ├── docker-compose.yml └── devcontainer.json ├── assets └── pydicer-working-directory-structure.png ├── requirements.txt ├── requirements-dev.txt ├── .vscode └── settings.json ├── Makefile ├── .github └── workflows │ ├── docs.yml │ └── pull-request.yml ├── examples ├── ASMIRTWorkshop │ └── README.md ├── VisualiseData.ipynb ├── DoseMetrics.ipynb ├── Configuration.ipynb ├── WorkingWithData.ipynb ├── GettingStarted.ipynb ├── ConvertingData.ipynb └── Radiomics.ipynb ├── pyproject.toml ├── CITATION.cff ├── .gitignore ├── CONTRIBUTING.md ├── CODE_OF_CONDUCT.md └── README.md /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pydicer/cli/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pydicer/analyse/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pydicer/convert/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pydicer/dataset/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pydicer/generate/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pydicer/input/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pydicer/preprocess/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pydicer/visualise/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/contributing.rst: -------------------------------------------------------------------------------- 1 | .. mdinclude:: ../CONTRIBUTING.md -------------------------------------------------------------------------------- /docs/code_of_conduct.rst: -------------------------------------------------------------------------------- 1 | .. mdinclude:: ../CODE_OF_CONDUCT.md -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | services: 2 | pydicer-dev: 3 | build: 4 | context: . 5 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | # Omit the CLI directory from coverage 3 | omit = 4 | pydicer/cli/* -------------------------------------------------------------------------------- /docs/utils.rst: -------------------------------------------------------------------------------- 1 | ##################### 2 | Utils 3 | ##################### 4 | 5 | .. automodule:: pydicer.utils 6 | :members: 7 | -------------------------------------------------------------------------------- /.devcontainer/install-dev-tools.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | sudo apt update && sudo apt install -y libtbb-dev 4 | poetry install --with=dev 5 | -------------------------------------------------------------------------------- /docs/nnunet.rst: -------------------------------------------------------------------------------- 1 | ##################### 2 | nnUNet 3 | ##################### 4 | 5 | .. autoclass:: pydicer.dataset.nnunet.NNUNetDataset 6 | :members: -------------------------------------------------------------------------------- /docs/tool.rst: -------------------------------------------------------------------------------- 1 | ##################### 2 | PyDicer 3 | ##################### 4 | 5 | 6 | .. autoclass:: pydicer.tool.PyDicer 7 | :members: 8 | -------------------------------------------------------------------------------- /docs/config.rst: -------------------------------------------------------------------------------- 1 | ##################### 2 | Configuration 3 | ##################### 4 | 5 | 6 | .. autoclass:: pydicer.config.PyDicerConfig 7 | :members: -------------------------------------------------------------------------------- /docs/preprocess.rst: -------------------------------------------------------------------------------- 1 | ##################### 2 | Preprocessing 3 | ##################### 4 | 5 | .. automodule:: pydicer.preprocess.data 6 | :members: 7 | -------------------------------------------------------------------------------- /docs/visualise.rst: -------------------------------------------------------------------------------- 1 | ##################### 2 | Visualisation 3 | ##################### 4 | 5 | .. automodule:: pydicer.visualise.data 6 | :members: 7 | -------------------------------------------------------------------------------- /assets/pydicer-working-directory-structure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AustralianCancerDataNetwork/pydicer/HEAD/assets/pydicer-working-directory-structure.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | pydicom >= 2.1.2 2 | SimpleITK >= 2.0.2 3 | platipy >= 0.5.0 4 | pyorthanc >= 0.2.14 5 | pyradiomics >= 3.1.0 6 | argparse >= 1.4.0 7 | tqdm >= 4.55.1 8 | -------------------------------------------------------------------------------- /docs/analyse.rst: -------------------------------------------------------------------------------- 1 | ##################### 2 | Analyse 3 | ##################### 4 | 5 | .. automodule:: pydicer.analyse.data 6 | :members: 7 | 8 | .. automodule:: pydicer.analyse.compare 9 | :members: 10 | -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | pylint 2 | black 3 | pytest 4 | numpy 5 | sphinx==5.0.2 6 | m2r2==0.3.2 7 | nbstripout==0.5.0 8 | furo==2022.6.4.1 9 | sphinxcontrib-napoleon==0.7 10 | sphinx-theme==1.0 11 | nbstripout==0.5.0 12 | furo==2022.6.4.1 13 | nbsphinx==0.8.9 14 | MarkupSafe==2.0.1 15 | recommonmark==0.7.1 16 | -------------------------------------------------------------------------------- /docs/generate.rst: -------------------------------------------------------------------------------- 1 | ##################### 2 | Generation 3 | ##################### 4 | 5 | Objects 6 | ======= 7 | 8 | .. automodule:: pydicer.generate.object 9 | :members: 10 | 11 | Auto-segmentation 12 | ================= 13 | 14 | .. automodule:: pydicer.generate.segmentation 15 | :members: 16 | 17 | 18 | Models 19 | ====== 20 | 21 | .. automodule:: pydicer.generate.models 22 | :members: 23 | 24 | -------------------------------------------------------------------------------- /docs/dataset.rst: -------------------------------------------------------------------------------- 1 | ##################### 2 | Dataset Preparation 3 | ##################### 4 | 5 | Prepare Dataset 6 | =============== 7 | 8 | .. automodule:: pydicer.dataset.preparation 9 | :members: 10 | 11 | Preparation Functions 12 | ===================== 13 | 14 | .. automodule:: pydicer.dataset.functions 15 | :members: 16 | 17 | Structure Sets 18 | ============== 19 | 20 | .. automodule:: pydicer.dataset.structureset 21 | :members: 22 | -------------------------------------------------------------------------------- /pydicer/__init__.py: -------------------------------------------------------------------------------- 1 | from .tool import PyDicer 2 | 3 | __project__ = "pydicer" 4 | __version__ = "0.2.0" 5 | __keywords__ = [ 6 | "medical imaging", 7 | "visualisation", 8 | "conversion", 9 | "DICOM", 10 | "radiotherapy", 11 | "image analysis", 12 | ] 13 | __author__ = "Ingham Medical Physics" 14 | __author_email__ = "phillip.chlap@unsw.edu.au" 15 | __url__ = "https://australiancancerdatanetwork.github.io/pydicer/" 16 | __platforms__ = "ALL" 17 | -------------------------------------------------------------------------------- /pydicer/generate/mhubconfigs/platipy.yml: -------------------------------------------------------------------------------- 1 | general: 2 | data_base_dir: /app/data 3 | version: 1.0.0 4 | description: custom pipeline from nifti to nifti 5 | 6 | execute: 7 | - FileStructureImporter 8 | - PlatipyRunner 9 | - DataOrganizer 10 | 11 | modules: 12 | FileStructureImporter: 13 | input_dir: "input_data" 14 | structures: 15 | - image.nii.gz@instance@nifti:mod=ct 16 | import_id: _instance 17 | 18 | DataOrganizer: 19 | targets: 20 | - nifti:mod=seg-->/app/data/output_data/[d:roi].nii.gz 21 | -------------------------------------------------------------------------------- /docs/_static/custom.css: -------------------------------------------------------------------------------- 1 | table { 2 | color: var(--color-foreground-primary) !important; 3 | } 4 | 5 | table thead { 6 | background-color: var(--color-card-marginals-background) !important; 7 | } 8 | 9 | table tr:nth-child(odd) { 10 | background: var(--color-background-border) !important; 11 | } 12 | 13 | table tr:nth-child(even) { 14 | background: var(--color-card-marginals-background) !important; 15 | } 16 | 17 | div.nboutput.container div.output_area.stderr { 18 | background: var(--color-background-primary) !important; 19 | } -------------------------------------------------------------------------------- /docs/convert.rst: -------------------------------------------------------------------------------- 1 | ##################### 2 | Conversion 3 | ##################### 4 | 5 | Data Conversion 6 | =============== 7 | 8 | .. automodule:: pydicer.convert.data 9 | :members: 10 | 11 | Header Conversion 12 | ================= 13 | 14 | .. automodule:: pydicer.convert.headers 15 | :members: 16 | 17 | RTSTRUCT Conversion 18 | =================== 19 | 20 | .. automodule:: pydicer.convert.rtstruct 21 | :members: 22 | 23 | PET Conversion 24 | ============== 25 | 26 | .. automodule:: pydicer.convert.pt 27 | :members: 28 | 29 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from pydicer.utils import fetch_converted_test_data 4 | 5 | 6 | @pytest.fixture 7 | def test_data_converted(): 8 | """Fixture to grab the test data with already converted into PyDicer format""" 9 | 10 | return fetch_converted_test_data("./testdata_hnscc", dataset="HNSCC") 11 | 12 | 13 | @pytest.fixture 14 | def test_data_autoseg(): 15 | """Fixture to grab the test data in PyDicer format for auto-seg tests""" 16 | 17 | return fetch_converted_test_data("./testdata_lctsc", dataset="LCTSC") 18 | -------------------------------------------------------------------------------- /pydicer/generate/mhubconfigs/lungmask.yml: -------------------------------------------------------------------------------- 1 | general: 2 | data_base_dir: /app/data 3 | version: 1.0.0 4 | description: custom pipeline from nifti to nifti 5 | 6 | execute: 7 | - FileStructureImporter 8 | - LungMaskRunner 9 | - DataOrganizer 10 | 11 | modules: 12 | FileStructureImporter: 13 | input_dir: "input_data" 14 | structures: 15 | - image.nii.gz@instance@nifti:mod=ct 16 | import_id: _instance 17 | 18 | LungMaskRunner: 19 | batchsize: 64 20 | 21 | DataOrganizer: 22 | targets: 23 | - nifti:mod=seg-->/app/data/output_data/[d:roi].nii.gz 24 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "python.linting.enabled": true, 3 | "python.linting.pylintEnabled": true, 4 | "python.linting.pylintArgs": [ 5 | "--rcfile=.pylintrc" 6 | ], 7 | "python.formatting.blackArgs": [ 8 | "--line-length", 9 | "99" 10 | ], 11 | "python.formatting.provider": "black", 12 | "editor.rulers": [ 13 | 99 14 | ], 15 | "python.testing.pytestArgs": [ 16 | "." 17 | ], 18 | "python.testing.unittestEnabled": false, 19 | "python.testing.nosetestsEnabled": false, 20 | "python.testing.pytestEnabled": true 21 | } -------------------------------------------------------------------------------- /docs/input.rst: -------------------------------------------------------------------------------- 1 | ##################### 2 | Input 3 | ##################### 4 | 5 | 6 | .. automodule:: pydicer.input.filesystem 7 | :members: 8 | 9 | ---- 10 | 11 | 12 | .. automodule:: pydicer.input.pacs 13 | :members: 14 | 15 | 16 | ---- 17 | 18 | 19 | .. automodule:: pydicer.input.orthanc 20 | :members: 21 | 22 | 23 | ---- 24 | 25 | 26 | .. automodule:: pydicer.input.tcia 27 | :members: 28 | 29 | 30 | ---- 31 | 32 | 33 | 34 | .. automodule:: pydicer.input.test 35 | :members: 36 | 37 | 38 | ---- 39 | 40 | 41 | 42 | .. automodule:: pydicer.input.web 43 | :members: 44 | -------------------------------------------------------------------------------- /pydicer/generate/mhubconfigs/totalsegmentator.yml: -------------------------------------------------------------------------------- 1 | general: 2 | data_base_dir: /app/data 3 | version: 1.0.0 4 | description: custom pipeline from nifti to nifti 5 | 6 | execute: 7 | - FileStructureImporter 8 | - TotalSegmentatorRunner 9 | - DataOrganizer 10 | 11 | modules: 12 | FileStructureImporter: 13 | input_dir: "input_data" 14 | structures: 15 | - image.nii.gz@instance@nifti:mod=ct 16 | import_id: _instance 17 | 18 | TotalSegmentatorRunner: 19 | use_fast_mode: true 20 | 21 | DataOrganizer: 22 | targets: 23 | - nifti:mod=seg-->/app/data/output_data/[d:roi].nii.gz 24 | -------------------------------------------------------------------------------- /pydicer/constants.py: -------------------------------------------------------------------------------- 1 | RT_STRUCTURE_STORAGE_UID = "1.2.840.10008.5.1.4.1.1.481.3" 2 | RT_DOSE_STORAGE_UID = "1.2.840.10008.5.1.4.1.1.481.2" 3 | RT_PLAN_STORAGE_UID = "1.2.840.10008.5.1.4.1.1.481.5" 4 | CT_IMAGE_STORAGE_UID = "1.2.840.10008.5.1.4.1.1.2" 5 | PET_IMAGE_STORAGE_UID = "1.2.840.10008.5.1.4.1.1.128" 6 | MR_IMAGE_STORAGE_UID = "1.2.840.10008.5.1.4.1.1.4" 7 | 8 | PYDICER_DIR_NAME = ".pydicer" 9 | CONVERTED_DIR_NAME = "data" 10 | QUARANTINE_DIR_NAME = "quarantine" 11 | DEFAULT_MAPPING_ID = "default" 12 | 13 | DICOM_FILE_EXTENSIONS = [ 14 | "dcm", 15 | "DCM", 16 | "dcim", 17 | "DCIM", 18 | "dicom", 19 | "DICOM", 20 | ] 21 | -------------------------------------------------------------------------------- /pydicer/generate/mhubconfigs/nnunet_liver.yml: -------------------------------------------------------------------------------- 1 | general: 2 | data_base_dir: /app/data 3 | version: 1.0.0 4 | description: custom pipeline from nifti to nifti 5 | 6 | execute: 7 | - FileStructureImporter 8 | - NNUnetRunner 9 | - DataOrganizer 10 | 11 | modules: 12 | FileStructureImporter: 13 | input_dir: "input_data" 14 | structures: 15 | - image.nii.gz@instance@nifti:mod=ct 16 | import_id: _instance 17 | 18 | NNUnetRunner: 19 | nnunet_task: "Task003_Liver" 20 | nnunet_model: "3d_lowres" 21 | roi: LIVER,LIVER+NEOPLASM_MALIGNANT_PRIMARY 22 | 23 | DataOrganizer: 24 | targets: 25 | - nifti:mod=seg-->/app/data/output_data/[d:roi].nii.gz 26 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = docs/source 9 | BUILDDIR = docs 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /pydicer/input/test.py: -------------------------------------------------------------------------------- 1 | from typing import Union 2 | 3 | from pydicer.input.web import WebInput 4 | 5 | 6 | class TestInput(WebInput): 7 | __test__ = False # pytest will try to use this as a test class without this 8 | 9 | def __init__(self, working_directory: Union[str, list] = None): 10 | """ 11 | A test input class to download example data from zenodo 12 | 13 | Args: 14 | working_directory (str|pathlib.Path, optional): The working directory in which to 15 | store the data fetched. Defaults to a temp directory. 16 | """ 17 | 18 | data_url = "https://zenodo.org/record/5276878/files/HNSCC.zip" 19 | 20 | super().__init__(data_url, working_directory) 21 | -------------------------------------------------------------------------------- /pydicer/generate/mhubconfigs/casust.yml: -------------------------------------------------------------------------------- 1 | general: 2 | data_base_dir: /app/data 3 | version: 1.0.0 4 | description: custom pipeline from nifti to nifti 5 | 6 | execute: 7 | - FileStructureImporter 8 | - NNUnetRunner 9 | - CasustRunner 10 | - DataOrganizer 11 | 12 | modules: 13 | FileStructureImporter: 14 | input_dir: "input_data" 15 | structures: 16 | - image.nii.gz@instance@nifti:mod=ct 17 | import_id: _instance 18 | 19 | NNUnetRunner: 20 | folds: all 21 | nnunet_task: Task400_OPEN_HEART_1FOLD 22 | nnunet_model: 3d_lowres 23 | roi: HEART 24 | 25 | CasustRunner: 26 | test_time_augmentation: 0 27 | 28 | DataOrganizer: 29 | targets: 30 | - nifti:mod=seg-->/app/data/output_data/[d:roi].nii.gz 31 | -------------------------------------------------------------------------------- /pydicer/input/filesystem.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from typing import Union 3 | 4 | from pydicer.input.base import InputBase 5 | 6 | 7 | class FileSystemInput(InputBase): 8 | def __init__(self, directory: Union[str, Path]): 9 | """ 10 | Class for inputing files from the file system 11 | 12 | Args: 13 | directory (str|pathlib.Path): The directory in which to find DICOM files. 14 | """ 15 | 16 | super().__init__(directory) 17 | 18 | if not self.working_directory.exists(): 19 | raise FileNotFoundError("The directory provided does not exist") 20 | 21 | if not self.working_directory.is_dir(): 22 | raise AttributeError("Ensure that the path specified is a directory") 23 | -------------------------------------------------------------------------------- /.devcontainer/docker-compose.yml: -------------------------------------------------------------------------------- 1 | services: 2 | # Update this to the name of the service you want to work with in your docker-compose.yml file 3 | pydicer-dev: 4 | # Use a development image for dev 5 | build: !reset "null" 6 | image: mcr.microsoft.com/devcontainers/python:1-3.9-bookworm 7 | 8 | volumes: 9 | # Update this to wherever you want VS Code to mount the folder of your project 10 | - ..:/workspaces:cached 11 | 12 | # Uncomment the next four lines if you will use a ptrace-based debugger like C++, Go, and Rust. 13 | # cap_add: 14 | # - SYS_PTRACE 15 | # security_opt: 16 | # - seccomp:unconfined 17 | 18 | # Overrides default command so things don't shut down after the process ends. 19 | command: /bin/sh -c "while sleep 1000; do :; done" 20 | -------------------------------------------------------------------------------- /pydicer/input/base.py: -------------------------------------------------------------------------------- 1 | import tempfile 2 | import logging 3 | from typing import Union 4 | 5 | import abc 6 | from pathlib import Path 7 | 8 | 9 | logger = logging.getLogger(__name__) 10 | 11 | 12 | class InputBase(abc.ABC): 13 | def __init__(self, working_directory: Union[str, Path] = None): 14 | """ 15 | Base class for input modules. 16 | 17 | Args: 18 | working_directory (str|pathlib.Path, optional): The working directory in which to 19 | store the data fetched. Defaults to a temp directory. 20 | """ 21 | 22 | if working_directory is None: 23 | working_directory = tempfile.mkdtemp() 24 | 25 | self.working_directory = Path(working_directory) 26 | 27 | logger.debug("Working directory set to: %s", self.working_directory) 28 | -------------------------------------------------------------------------------- /pydicer/generate/mhubconfigs/nnunet_pancreas.yml: -------------------------------------------------------------------------------- 1 | general: 2 | data_base_dir: /app/data 3 | version: 1.0.0 4 | description: custom pipeline from nifti to nifti 5 | 6 | execute: 7 | - FileStructureImporter 8 | - NNUnetRunner 9 | - DataOrganizer 10 | 11 | modules: 12 | FileStructureImporter: 13 | input_dir: "input_data" 14 | structures: 15 | - image.nii.gz@instance@nifti:mod=ct 16 | import_id: _instance 17 | 18 | NNUnetRunner: 19 | input_data_type: nifti:mod=ct 20 | nnunet_task: Task007_Pancreas 21 | nnunet_model: 3d_lowres 22 | export_prob_maps: False 23 | roi: PANCREAS,PANCREAS+NEOPLASM_MALIGNANT_PRIMARY 24 | prob_map_segments: [Background, Pancreas, Pancreatic_cancer] 25 | 26 | DataOrganizer: 27 | targets: 28 | - nifti:mod=seg-->/app/data/output_data/[d:roi].nii.gz 29 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | %SPHINXBUILD% >NUL 2>NUL 14 | if errorlevel 9009 ( 15 | echo. 16 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 17 | echo.installed, then set the SPHINXBUILD environment variable to point 18 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 19 | echo.may add the Sphinx directory to PATH. 20 | echo. 21 | echo.If you don't have Sphinx installed, grab it from 22 | echo.https://www.sphinx-doc.org/ 23 | exit /b 1 24 | ) 25 | 26 | if "%1" == "" goto help 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /.github/workflows/docs.yml: -------------------------------------------------------------------------------- 1 | name: Build Docs 2 | 3 | on: 4 | workflow_dispatch: 5 | push: 6 | branches: ["main"] 7 | 8 | jobs: 9 | docs: 10 | runs-on: ubuntu-latest 11 | 12 | steps: 13 | - uses: actions/checkout@v2 14 | - name: Set up Python 15 | uses: actions/setup-python@v2 16 | with: 17 | python-version: "3.9" 18 | - name: Install dependencies 19 | run: | 20 | curl -sSL https://install.python-poetry.org | python - --version 1.3.2 21 | poetry install --with docs --all-extras 22 | echo "PYTHONPATH=`pwd`" >> $GITHUB_ENV 23 | - name: Build Docs 24 | run: | 25 | sudo apt-get update -y && sudo apt-get install -y pandoc python3-pkg-resources python3-setuptools 26 | poetry run sphinx-build -b html -a docs docs/site 27 | - name: Deploy docs 28 | uses: peaceiris/actions-gh-pages@v3 29 | with: 30 | github_token: ${{ secrets.GITHUB_TOKEN }} 31 | publish_dir: ./docs/site 32 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. mdinclude:: ../README.md 2 | 3 | .. toctree:: 4 | :caption: Examples 5 | :maxdepth: 2 6 | :hidden: 7 | 8 | _examples/GettingStarted 9 | _examples/ConvertingData 10 | _examples/VisualiseData 11 | _examples/Radiomics 12 | _examples/DoseMetrics 13 | _examples/DatasetPreparation 14 | _examples/AutoSegmentation 15 | _examples/nnUNet 16 | 17 | .. toctree:: 18 | :caption: Guides 19 | :maxdepth: 2 20 | :hidden: 21 | 22 | _examples/WorkingWithData 23 | _examples/WorkingWithStructures 24 | _examples/ObjectGeneration 25 | _examples/Configuration 26 | 27 | .. toctree:: 28 | :caption: Developers 29 | :maxdepth: 2 30 | :hidden: 31 | 32 | contributing 33 | code_of_conduct 34 | 35 | .. toctree:: 36 | :caption: Reference 37 | :maxdepth: 5 38 | :hidden: 39 | 40 | tool 41 | input 42 | config 43 | utils 44 | preprocess 45 | convert 46 | visualise 47 | dataset 48 | analyse 49 | generate 50 | nnunet 51 | -------------------------------------------------------------------------------- /pydicer/convert/headers.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import json 3 | from typing import Union 4 | from pathlib import Path 5 | 6 | import pydicom 7 | 8 | logger = logging.getLogger(__name__) 9 | 10 | 11 | def convert_dicom_headers( 12 | dcm_file: Union[str, Path], binary_path: str, json_file: Union[str, Path] 13 | ): 14 | """Save the DICOM Headers as a JSON file 15 | 16 | Args: 17 | dcm_file (str|pathlib.Path): The files from which to save the headers. 18 | binary_path (str): Relative path to binary data which will be placed into JSON. 19 | json_file (str|pathlib.Path): Path to JSON file to save output. 20 | """ 21 | 22 | # Write the DICOM headers (of the first slice) to JSON 23 | dcm_ds = pydicom.read_file(dcm_file, force=True) 24 | dcm_dict = dcm_ds.to_json_dict( 25 | bulk_data_threshold=4096, bulk_data_element_handler=lambda _: binary_path 26 | ) 27 | 28 | with open(json_file, "w", encoding="utf8") as jsonfile: 29 | json.dump(dcm_dict, jsonfile, indent=2) 30 | 31 | logger.debug("DICOM Headers written to: %s", json_file) 32 | -------------------------------------------------------------------------------- /tests/test_config.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=redefined-outer-name,missing-function-docstring 2 | 3 | import tempfile 4 | 5 | import pytest 6 | from pydicer.config import PyDicerConfig 7 | 8 | 9 | def test_generate_nrrd_config(): 10 | 11 | with tempfile.TemporaryDirectory() as directory: 12 | 13 | config = PyDicerConfig(directory) 14 | 15 | # Assert that generate NRRD is True (default) 16 | assert config.get_config("generate_nrrd") 17 | 18 | # Update the config 19 | config.set_config("generate_nrrd", False) 20 | 21 | # Assert that it is now False 22 | assert not config.get_config("generate_nrrd") 23 | 24 | 25 | def test_config_not_exists(): 26 | 27 | with tempfile.TemporaryDirectory() as directory: 28 | 29 | config = PyDicerConfig(directory) 30 | 31 | with pytest.raises(AttributeError): 32 | config.get_config("doesn't_exist") 33 | 34 | with pytest.raises(AttributeError): 35 | config.set_config("doesn't_exist", 123) 36 | 37 | 38 | def test_config_invalid_value(): 39 | 40 | with tempfile.TemporaryDirectory() as directory: 41 | 42 | config = PyDicerConfig(directory) 43 | 44 | with pytest.raises(ValueError): 45 | config.set_config("generate_nrrd", 123) 46 | -------------------------------------------------------------------------------- /pydicer/input/web.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from pathlib import Path 3 | from typing import Union 4 | 5 | from pydicer.input.base import InputBase 6 | from pydicer.utils import download_and_extract_zip_file 7 | 8 | logger = logging.getLogger(__name__) 9 | 10 | 11 | class WebInput(InputBase): 12 | def __init__(self, data_url: str, working_directory: Union[str, Path] = None): 13 | """ 14 | Class for downloading and saving input data off the internet 15 | 16 | Args: 17 | data_url (str): The URL of where the data is stored. For now, it must be a link to a 18 | zip file 19 | working_directory (str|pathlib.Path, optional): The working directory in which to 20 | store the data fetched. Defaults to a temp directory. 21 | """ 22 | super().__init__(working_directory) 23 | self.data_url = data_url 24 | 25 | def fetch_data(self): 26 | """Download the data.""" 27 | 28 | files_in_directory = list(self.working_directory.glob("*")) 29 | if len(files_in_directory) > 0: 30 | logger.warning("Directory not empty, won't download files") 31 | return 32 | 33 | logger.info("Downloading files from %s", self.data_url) 34 | download_and_extract_zip_file(self.data_url, self.working_directory) 35 | -------------------------------------------------------------------------------- /examples/ASMIRTWorkshop/README.md: -------------------------------------------------------------------------------- 1 | # ASMIRT Workshop 2023 2 | 3 | This directory contains a series of Jupyter notebooks prepared for the **Radiotherapy image data 4 | analysis using Python** Workshop at ASMIRT 2023 in Sydney, Australia. 5 | 6 | ## Part 1: Python Basics 7 | 8 | [Slides](https://unsw-my.sharepoint.com/:p:/g/personal/z3523015_ad_unsw_edu_au/EdvxEVKSZV1Glpb6pHn55mgBD7xC5Whu_SPoFrfJBITEYg?e=DKiv6G) 9 | 10 | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/AustralianCancerDataNetwork/pydicer/blob/main/examples/ASMIRTWorkshop/1_Python_Intro.ipynb) 11 | 12 | ## Part 2: Working with DICOM 13 | 14 | [Slides](https://unsw-my.sharepoint.com/:p:/g/personal/z3523015_ad_unsw_edu_au/EXyvFauxn4FBhFXiSskhzd0BYaE-Q3xPzI_PlxoTtC6_8Q?e=qFp3zm) 15 | 16 | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/AustralianCancerDataNetwork/pydicer/blob/main/examples/ASMIRTWorkshop/2_DICOM.ipynb) 17 | 18 | ## Part 3: Converting and analysing data 19 | 20 | [Slides](https://unsw-my.sharepoint.com/:p:/g/personal/z3523015_ad_unsw_edu_au/ETopzSi06zxNqQgpqkHbR94B2DIuWy8TflxFB8ozfw1R-g?e=eq4hIC) 21 | 22 | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/AustralianCancerDataNetwork/pydicer/blob/main/examples/ASMIRTWorkshop/3_RT_Data_Analysis.ipynb) 23 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "pydicer" 3 | version = "0.2.0" 4 | description = "PYthon Dicom Image ConvertER" 5 | authors = ["Ingham Medical Physics"] 6 | license = "Apache License 2.0" 7 | classifiers = [ 8 | "Programming Language :: Python :: 3", 9 | "License :: OSI Approved :: Apache Software License", 10 | "Operating System :: OS Independent", 11 | "Topic :: Scientific/Engineering :: Image Processing", 12 | "Topic :: Scientific/Engineering :: Medical Science Apps.", 13 | "Topic :: Scientific/Engineering :: Visualization", 14 | "Development Status :: 4 - Beta", 15 | ] 16 | readme = "README.md" 17 | 18 | [tool.poetry.dependencies] 19 | python = "^3.8" 20 | pydicom = ">=2.1.2" 21 | SimpleITK = ">=2.0.2" 22 | pyorthanc = ">=1.11.2" 23 | platipy = ">=0.5.0" 24 | argparse = ">=1.4.0" 25 | seaborn = "^0.12.0" 26 | tqdm = "^4.55.1" 27 | scikit-learn = "^1.2.2" 28 | pyradiomics = ">=3.0.1" 29 | 30 | [tool.poetry.group.dev.dependencies] 31 | pylint = "^2.13.5" 32 | black = "^22.3.0" 33 | pytest = "6.2.5" 34 | mypy = "^1.14.0" 35 | pytest-mock = "^3.14.0" 36 | 37 | [tool.poetry.group.docs.dependencies] 38 | sphinx = "^5.1.1" 39 | sphinxcontrib-napoleon = "^0.7" 40 | sphinx-theme = "^1.0" 41 | sphinx-click = "^4.3.0" 42 | furo = "^2022.6.21" 43 | nbsphinx = "^0.8.9" 44 | m2r2 = "^0.3.3" 45 | notebook = "^6.5.4" 46 | 47 | 48 | [build-system] 49 | requires = ["poetry-core>=1.0.0"] 50 | build-backend = "poetry.core.masonry.api" 51 | -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=redefined-outer-name,missing-function-docstring 2 | 3 | from pathlib import Path 4 | 5 | import pytest 6 | import pydicom 7 | 8 | from pydicer.input.test import TestInput 9 | from pydicer.utils import determine_dcm_datetime 10 | 11 | 12 | @pytest.fixture 13 | def test_data(): 14 | """Fixture to grab the test data""" 15 | 16 | directory = Path("./testdata") 17 | directory.mkdir(exist_ok=True, parents=True) 18 | 19 | working_directory = directory.joinpath("dicom") 20 | working_directory.mkdir(exist_ok=True, parents=True) 21 | 22 | test_input = TestInput(working_directory) 23 | test_input.fetch_data() 24 | 25 | return working_directory 26 | 27 | 28 | def test_fetch_ds_datetime(test_data): 29 | 30 | rt_struct_file = test_data.joinpath( 31 | "HNSCC", 32 | "HNSCC-01-0019", 33 | "07-04-1998-NA-RT SIMULATION-48452", 34 | "1.000000-NA-10361", 35 | "1-1.dcm", 36 | ) 37 | 38 | ds = pydicom.read_file(rt_struct_file) 39 | ds_datetime = determine_dcm_datetime(ds) 40 | assert ds_datetime.year == 1998 41 | assert ds_datetime.month == 7 42 | assert ds_datetime.day == 4 43 | assert ds_datetime.hour == 0 44 | assert ds_datetime.minute == 0 45 | 46 | ds_datetime = determine_dcm_datetime(ds, require_time=True) 47 | assert ds_datetime.year == 2001 48 | assert ds_datetime.month == 10 49 | assert ds_datetime.day == 28 50 | assert ds_datetime.hour == 12 51 | assert ds_datetime.minute == 48 52 | -------------------------------------------------------------------------------- /pydicer/input/tcia.py: -------------------------------------------------------------------------------- 1 | from typing import Union 2 | 3 | from platipy.dicom.download import tcia 4 | 5 | from pydicer.input.base import InputBase 6 | 7 | 8 | class TCIAInput(InputBase): 9 | def __init__( 10 | self, 11 | collection: str, 12 | patient_ids: list, 13 | modalities: list = None, 14 | working_directory: Union[str, list] = None, 15 | ): 16 | """ 17 | Input class that interfaces with the TCIA API 18 | 19 | Args: 20 | collection (str): The TCIA collection to fetch from 21 | patient_ids (list, optional): The patient IDs to fetch. If not set all patients are 22 | fetched 23 | modalities (list, optional): A list of strings defining the modalites to fetch. Will 24 | fetch all modalities available if not specified. 25 | working_directory (str|pathlib.Path, optional): The working directory in which 26 | to store the data fetched. Defaults to a temp directory. 27 | """ 28 | super().__init__(working_directory) 29 | self.collection = collection 30 | self.patient_ids = patient_ids 31 | self.modalities = modalities 32 | 33 | def fetch_data(self): 34 | """ 35 | Function to download the data from TCIA and write locally 36 | """ 37 | 38 | tcia.fetch_data( 39 | self.collection, 40 | self.patient_ids, 41 | self.modalities, 42 | nifti=False, 43 | output_directory=self.working_directory, 44 | ) 45 | -------------------------------------------------------------------------------- /.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- 1 | // For format details, see https://aka.ms/devcontainer.json. For config options, see the 2 | // README at: https://github.com/devcontainers/templates/tree/main/src/docker-existing-docker-compose 3 | { 4 | "name": "Existing Docker Compose (Extend)", 5 | // Update the 'dockerComposeFile' list if you have more compose files or use different names. 6 | // The .devcontainer/docker-compose.yml file contains any overrides you need/want to make. 7 | "dockerComposeFile": [ 8 | "../docker-compose.yml", 9 | "docker-compose.yml" 10 | ], 11 | // The 'service' property is the name of the service for the container that VS Code should 12 | // use. Update this value and .devcontainer/docker-compose.yml to the real service name. 13 | "service": "pydicer-dev", 14 | // The optional 'workspaceFolder' property is the path VS Code should open by default when 15 | // connected. This is typically a file mount in .devcontainer/docker-compose.yml 16 | "workspaceFolder": "/workspaces/${localWorkspaceFolderBasename}", 17 | // Features to add to the dev container. More info: https://containers.dev/features. 18 | "features": { 19 | "ghcr.io/devcontainers-extra/features/poetry:2": {}, 20 | "ghcr.io/nikobockerman/devcontainer-features/poetry-persistent-cache:1": {}, 21 | }, 22 | // "features": {}, 23 | // Use 'forwardPorts' to make a list of ports inside the container available locally. 24 | // "forwardPorts": [], 25 | // Uncomment the next line if you want start specific services in your Docker Compose config. 26 | // "runServices": [], 27 | // Uncomment the next line if you want to keep your containers running after VS Code shuts down. 28 | // "shutdownAction": "none", 29 | // Uncomment the next line to run commands after the container is created. 30 | // "postCreateCommand": "cat /etc/os-release", 31 | "updateContentCommand": ".devcontainer/install-dev-tools.sh", 32 | // Configure tool-specific properties. 33 | "customizations": { 34 | "vscode": { 35 | "extensions": [ 36 | "ms-toolsai.jupyter" // <-- Added the Microsoft Jupyter extension 37 | ] 38 | } 39 | }, 40 | // Uncomment to connect as an existing user other than the container default. More info: https://aka.ms/dev-containers-non-root. 41 | // "remoteUser": "devcontainer" 42 | } -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | # This CITATION.cff file was generated with cffinit. 2 | # Visit https://bit.ly/cffinit to generate yours today! 3 | 4 | cff-version: 1.2.0 5 | title: PyDicer 6 | message: >- 7 | If you use this software, please cite our SoftwareX article. 8 | type: software 9 | authors: 10 | - given-names: Phillip 11 | family-names: Chlap 12 | email: phillip.chlap@unsw.edu.au 13 | affiliation: University of New South Wales 14 | orcid: "https://orcid.org/0000-0002-6517-8745" 15 | - given-names: Daniel 16 | family-names: Al Mouiee 17 | affiliation: Ingham Institute 18 | - given-names: Robert N. 19 | family-names: Finnegan 20 | affiliation: University of Sydney 21 | orcid: "https://orcid.org/0000-0003-4728-8462" 22 | - given-names: Xinyi 23 | family-names: Cui 24 | affiliation: University of New South Wales 25 | - given-names: Shrikant 26 | family-names: Deshpande 27 | affiliation: South Western Sydney Local Health District 28 | - given-names: Vicky 29 | family-names: Chin 30 | affiliation: University of New South Wales 31 | - given-names: Lois 32 | family-names: Holloway 33 | affiliation: University of New South Wales 34 | repository-code: "https://github.com/AustralianCancerDataNetwork/pydicer" 35 | url: "https://australiancancerdatanetwork.github.io/pydicer/" 36 | keywords: 37 | - medical imaging 38 | - DICOM 39 | - radiotherapy 40 | license: Apache-2.0 41 | version: 0.2.0 42 | date-released: "2023-12-20" 43 | preferred-citation: 44 | authors: 45 | - family-names: Chlap 46 | given-names: Phillip 47 | orcid: "https://orcid.org/0000-0002-6517-8745" 48 | - given-names: Daniel 49 | family-names: Al Mouiee 50 | - family-names: Finnegan 51 | given-names: Robert N. 52 | orcid: "https://orcid.org/0000-0003-4728-8462" 53 | - given-names: Janet 54 | family-names: Cui 55 | - given-names: Chin 56 | family-names: Vicky 57 | - given-names: Deshpande 58 | family-names: Shrikant 59 | - given-names: Holloway 60 | family-names: Lois 61 | date-published: "2024-12-14" 62 | doi: 10.1016/j.softx.2024.102010 63 | journal: SoftwareX 64 | issn: 2352-7110 65 | publisher: 66 | name: Elsevier 67 | title: "PyDicer: An open-source python library for conversion and analysis of radiotherapy DICOM data" 68 | type: article 69 | url: "https://doi.org/10.1016/j.softx.2024.102010" 70 | volume: 29 71 | year: 2025 72 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | .DS_Store 132 | 133 | # Nifti 134 | *.nii.gz 135 | *.dcm 136 | 137 | testdata/ 138 | pydicer_testdata/ 139 | testdata*/ 140 | 141 | examples/data/ 142 | 143 | docs/**/*.md 144 | docs/**/*.png 145 | docs/**/*.jpg 146 | docs/**/*.gif 147 | docs/site/ 148 | docs/_examples/ 149 | -------------------------------------------------------------------------------- /pydicer/logger.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime as dt 2 | import pandas as pd 3 | 4 | SUMMARY_CSV_COLS = ["module", "hashed_uid", "success", "log", "start_time", "end_time"] 5 | 6 | 7 | class PatientLogger: 8 | """Class to document a patient's pipeline progress in a personalised CSV file""" 9 | 10 | def __init__(self, pat_id, data_directory, force=True): 11 | self.pat_id = pat_id 12 | self.data_directory = data_directory 13 | self.start_time = dt.now() 14 | 15 | # create pat dir if not yet created 16 | pat_directory = self.data_directory.joinpath(pat_id) 17 | pat_directory.mkdir(exist_ok=True) 18 | 19 | self.summary_csv_path = pat_directory.joinpath("summary.csv") 20 | 21 | # create patient csv if not already created 22 | if not self.summary_csv_path.exists() or force: 23 | df_pat_log = pd.DataFrame(columns=SUMMARY_CSV_COLS) 24 | df_pat_log.to_csv(self.summary_csv_path, index=False) 25 | 26 | def log_module_error(self, module: str, hashed_uid: str, error_log: str): 27 | """Function to log errors for a specific pydicer module 28 | 29 | Args: 30 | module (str): pydicer module to log error for in CSV 31 | hashed_uid (str): hashed UID of the patient being logged to the error CSV 32 | error_log (str): error to log in CSV 33 | """ 34 | end_time = dt.now() 35 | df_error = pd.DataFrame( 36 | [[module, hashed_uid, 1, error_log, self.start_time, end_time]], 37 | columns=SUMMARY_CSV_COLS, 38 | ) 39 | df_error.to_csv(self.summary_csv_path, header=False, mode="a", index=False) 40 | 41 | def eval_module_process(self, module: str, hashed_uid: str): 42 | """Function to log if any patient had issues for a specific pydicer module 43 | 44 | Args: 45 | module (str): pydicer module to check if no errors were generated for all patients 46 | hashed_uid (str): hashed UID of the patient being logged to the error CSV 47 | """ 48 | 49 | end_time = dt.now() 50 | df_summary = pd.read_csv(self.summary_csv_path) 51 | df_summary_mod = df_summary[ 52 | (df_summary.module == module) 53 | & (df_summary.success == 1) 54 | & (df_summary.hashed_uid == hashed_uid) 55 | ] 56 | if len(df_summary_mod) == 0: 57 | df_final_summary = pd.DataFrame( 58 | [[module, hashed_uid, 0, "", self.start_time, end_time]], 59 | columns=SUMMARY_CSV_COLS, 60 | ) 61 | df_final_summary.to_csv( 62 | self.summary_csv_path, 63 | header=False, 64 | mode="a", 65 | index=False, 66 | ) 67 | -------------------------------------------------------------------------------- /pydicer/quarantine.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import shutil 3 | import datetime 4 | 5 | import pandas as pd 6 | import pydicom 7 | 8 | # Attempt to store the following meta data keys in the quarantine summary DataFrame 9 | QUARATINE_DICOM_KEYS = ["PatientID", "Modality", "SOPInstanceUID", "SeriesDescription"] 10 | 11 | 12 | def copy_file_to_quarantine(file: Path, working_directory: Path, error_msg: str): 13 | """Move a DICOM file that couldn't be processed into the quarantine directory 14 | 15 | Args: 16 | file (pathlib.Path): DICOM path to be moved into quarantine 17 | working_directory (pathlib.Path): Main working directory for pydicer 18 | error_msg (str): error message associated with the quarantined file 19 | """ 20 | 21 | # Attempt to get some header information from the DICOM object to write into the summary 22 | 23 | summary_dict = { 24 | "file": file, 25 | "error": error_msg, 26 | "quarantine_dttm": datetime.datetime.now(), 27 | } 28 | 29 | ds = pydicom.read_file(file, force=True) 30 | for k in QUARATINE_DICOM_KEYS: 31 | val = None 32 | if k in ds: 33 | val = ds[k].value 34 | 35 | summary_dict[k] = val 36 | 37 | pat_id = "UNKNOWN" 38 | if "PatientID" in ds: 39 | pat_id = ds.PatientID 40 | 41 | df_this_summary = pd.DataFrame([summary_dict]) 42 | 43 | quaran_dir = Path(working_directory).joinpath("quarantine") 44 | file_dir = quaran_dir.joinpath(pat_id, file.parent.name) 45 | summary_file = quaran_dir.joinpath("summary.csv") 46 | 47 | df_summary = None 48 | if summary_file.exists(): 49 | df_summary = pd.read_csv(summary_file, index_col=0) 50 | df_summary = pd.concat([df_summary, df_this_summary], ignore_index=True) 51 | else: 52 | df_summary = df_this_summary 53 | 54 | # Create "quarantine/PATH_TO_DCM" directory 55 | file_dir.mkdir(exist_ok=True, parents=True) 56 | 57 | # Copy original DCM file to quarantine area 58 | shutil.copyfile(file, file_dir.joinpath(file.name)) 59 | 60 | # Create (if doesn't exist) summary file to hold info about file error 61 | df_summary.to_csv(summary_file) 62 | 63 | 64 | def read_quarantined_data(working_directory: Path) -> pd.DataFrame: 65 | """A function to read the data from the quarantine summary. 66 | 67 | Args: 68 | working_directory (pathlib.Path): The PyDicer working directory 69 | 70 | Returns: 71 | pd.DataFrame: A DataFrame summarising the contents of the quarantine. 72 | """ 73 | 74 | quarantine_dir = Path(working_directory).joinpath("quarantine") 75 | 76 | summary_file = quarantine_dir.joinpath("summary.csv") 77 | 78 | df_summary = pd.read_csv(summary_file, index_col=0) 79 | 80 | return df_summary 81 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | PyDicer welcomes any and all contributions in the way of new functionality, bug fixes or documentation. This document provides some guidance to developers who would like to contribute to the project. 4 | 5 | ## Git 6 | 7 | Create a branch off of **main** while you make your changes or implement your new tool. 8 | Once complete, head to [GitHub to create a pull 9 | request](https://github.com/australiancancerdatanetwork/pydicer/compare) to merge your changes 10 | into the **main** branch. At this point the automated tests will run and maintainers will review 11 | your submission before merging. 12 | 13 | ## Poetry 14 | 15 | PyDicer uses poetry to manage dependencies. Instructions for installing poetry are available 16 | [here](https://python-poetry.org/docs/#installation). Once installed, you can easily install the 17 | libraries required to develop for PyDicer using the following command: 18 | 19 | ```bash 20 | poetry install --with dev,docs --all-extras 21 | ``` 22 | 23 | This will automatically create a virtual environment managed by poetry. To run a script within this 24 | environment, use the `poetry run` followed by what to run. For example, to run a test.py script: 25 | 26 | ```bash 27 | poetry run python test.py 28 | ``` 29 | 30 | ## VSC Devcontainer 31 | 32 | You may setup a Visual Studio Code development container (Devcontainer) to ensure a standardised 33 | development and testing environment, without the need to perform overhead installation. This 34 | assumes that Docker and VSC are installed on your system. 35 | 36 | To set this up, you may perform the VSC shortcut `ctrl + shift + p` (or `cmd + shift p` on Mac) and 37 | select the `Reopen in devcontainer` option. This will create a Docker container with Python 3.9 38 | and its dependencies installed, along with other tools we use for development (eg. git, pytest). 39 | 40 | ## Coding standards 41 | 42 | Code in PyDicer must conform to Python's PEP-8 standards to ensure consistent formatting between contributors. To ensure this, pylint is used to check code conforms to these standards before a Pull Request can be merged. You can run pylint from the command line using the following command: 43 | 44 | ```bash 45 | pylint pydicer 46 | ``` 47 | 48 | But a better idea is to ensure you are using a Python IDE which supports linting (such as [VSCode](https://code.visualstudio.com/docs/python/linting) or PyCharm). Make sure you resolve all suggestions from pylint before submitting your pull request. 49 | 50 | If you're new to using pylint, you may like to [read this guide](https://docs.pylint.org/en/v2.11.1/tutorial.html). 51 | 52 | ## Automated tests 53 | 54 | A test suite is included in PyDicer which ensures that code contributed to the repository functions as expected and continues to function as further development takes place. Any code submitted via a pull request should include appropriate automated tests for the new code. 55 | 56 | pytest is used as a testing library. Running the tests from the command line is really easy: 57 | 58 | ```bash 59 | pytest 60 | ``` 61 | 62 | Add your tests to the appropriate file in the `tests/` directory. See the [pytest documention](https://docs.pytest.org/en/6.2.x/getting-started.html) for more information. 63 | -------------------------------------------------------------------------------- /pydicer/cli/contants.py: -------------------------------------------------------------------------------- 1 | def get_sub_help_mesg(input_commands, command): 2 | # pylint: disable=missing-function-docstring 3 | 4 | help_mesg = f"""Subcommand of the following: {input_commands} 5 | 6 | test WORKING_DIRECTORY_PATH 7 | 8 | Runs the command using the default test data. Check pydicer.input.test for more info 9 | 10 | - WORKING_DIRECTORY_PATH: The working directory in which to 11 | store the data fetched. Defaults to a temp directory. 12 | 13 | Example usage: 14 | python -m pydicer.cli.run input|pipeline --type test cli_test 15 | 16 | pacs WORKING_DIRECTORY_PATH HOST_IP PORT AE_TITLE MODALITY [PATIENT_IDs] 17 | 18 | Runs the command by querying a DIOCM PACS server and storing the data on locally on the 19 | filesystem 20 | 21 | - WORKING_DIRECTORY_PATH: The working directory in which to 22 | store the data fetched. Defaults to a temp directory. 23 | - HOST_IP (optional): The IP address of host name of DICOM PACS. Defaults to 24 | 'www.dicomserver.co.uk'. 25 | - PORT (optional): The port to use to communicate on. Defaults to 11112. 26 | - AE_TITLE (optional): AE Title to provide the DICOM service. Defaults to 27 | None. 28 | - MODALITY (optional): The modality to retrieve DICOMs for. Defaults 29 | to 'GM'. 30 | - PATIENT_IDs (required): a string-list of patient IDs (IDs seperated by spaces) 31 | to retrieve the DICOMs for. 32 | 33 | Example usage: 34 | python -m pydicer.cli.run input|pipeline --type pacs www.dicomserver.co.uk 11112 35 | DCMQUERY cli_test GM PAT004 PAT005 36 | 37 | 38 | web WORKING_DIRECTORY_PATH DATA_URL 39 | 40 | Runs the command by downloading data from a provided URL and storing it locally on the 41 | filesystem 42 | 43 | - WORKING_DIRECTORY_PATH: The working directory in which to 44 | store the data fetched. Defaults to a temp directory. 45 | - DATA_URL: URL of the dataset to be downloaded from the internet 46 | 47 | Example usage: 48 | python -m pydicer.cli.run input|pipeline --type web 49 | https://zenodo.org/record/5276878/files/HNSCC.zip cli_test 50 | """ 51 | if command == "pipeline": 52 | help_mesg += """ 53 | 54 | filesystem WORKING_DIRECTORY_PATH 55 | 56 | Runs the pipeline using a filesystem working directory which contains DICOM images as input 57 | 58 | - WORKING_DIRECTORY_PATH: The working directory in which to 59 | store the data fetched. Defaults to a temp directory. 60 | 61 | Example usage: 62 | python -m pydicer.cli.run pipeline --type filesystem cli_test 63 | 64 | 65 | e2e 66 | 67 | Runs the entire pipeline using the default settings. Check pydicer.pipeline for more info 68 | 69 | Example usage: 70 | python -m pydicer.cli.run pipeline 71 | or 72 | python -m pydicer.cli.run pipeline --type e2e 73 | 74 | """ 75 | 76 | return help_mesg 77 | -------------------------------------------------------------------------------- /.github/workflows/pull-request.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 3 | 4 | # Pipeline that checks branches that have been pushed to "Main" OR are the source branch in a newly created pull request into "Main" 5 | # Fails the test if there are Python syntax errors or undefined names OR pytest fails 6 | 7 | name: Pydicer Pytest and Pylint Validaiton 8 | 9 | on: 10 | push: 11 | branches: [main] 12 | pull_request: 13 | branches: [main] 14 | 15 | jobs: 16 | build: 17 | runs-on: ubuntu-latest 18 | strategy: 19 | fail-fast: false 20 | matrix: 21 | python-version: ["3.8", "3.9"] 22 | poetry-version: [1.7.1] 23 | # os: [ubuntu-20.04, macos-latest, windows-latest] 24 | steps: 25 | - uses: actions/checkout@v2 26 | - name: Set up Python ${{ matrix.python-version }} 27 | uses: actions/setup-python@v2 28 | with: 29 | python-version: ${{ matrix.python-version }} 30 | - name: Install Poetry 31 | uses: abatilo/actions-poetry@v2.0.0 32 | with: 33 | poetry-version: ${{ matrix.poetry-version }} 34 | - name: Install Python modules with poetry 35 | run: | 36 | poetry run pip install --upgrade pip 37 | poetry install 38 | poetry run pip install TotalSegmentator 39 | - name: Lint with Pylint 40 | run: | 41 | poetry run pylint pydicer 42 | - name: MyPy type checking 43 | run: | 44 | # TODO poetry run mypy 45 | echo "Skipping MyPy type checking..." 46 | - name: Conditional Pytest coverage 47 | run: | 48 | if [[ "${{ matrix.python-version }}" == "3.9" ]]; then 49 | echo "Running Pytest with coverage..." 50 | poetry run pip install pytest-cov coverage 51 | # Omit CLI from coverage report since it's not fully developed 52 | poetry run pytest --cov=pydicer --cov-report=xml --cov-config=.coveragerc 53 | poetry run coverage report --fail-under=70 # Fail if coverage is less than 70% 54 | else 55 | echo "Running Pytest without coverage..." 56 | poetry run pytest 57 | fi 58 | # Commit the coverage badge back to repo (only on main branch & for a specific Python version) 59 | - name: Generate and commit coverage badge 60 | if: github.ref == 'refs/heads/main' && matrix.python-version == '3.9' 61 | run: | 62 | poetry run pip install coverage-badge # These only work with python >=3.9 63 | # Generate an SVG coverage badge 64 | # poetry run coverage-badge -o coverage.svg 65 | 66 | # # Configure git 67 | # git config user.name "github-actions" 68 | # git config user.email "github-actions@github.com" 69 | 70 | # # Pull latest changes to avoid conflicts 71 | # git pull --rebase 72 | 73 | # # Stage and commit coverage.svg 74 | # git add coverage.svg 75 | # git commit -m "Update coverage badge" || echo "No changes to commit" 76 | 77 | # # Push commit 78 | # git push 79 | env: 80 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 81 | -------------------------------------------------------------------------------- /pydicer/cli/run.py: -------------------------------------------------------------------------------- 1 | """ 2 | Command Line Interface tool to run pydicer pipeline or specific modules on their own 3 | 4 | usage: python -m pydicer.cli.run ['pipeline'|'input'] 5 | 6 | pydicer CLI (Command Line Interface) 7 | 8 | positional arguments: 9 | command One of the following COMMANDS: ['pipeline'|'input'] 10 | 11 | optional arguments: 12 | -h, --help show help message 13 | """ 14 | import argparse 15 | from argparse import RawTextHelpFormatter 16 | import sys 17 | 18 | from pydicer.cli.contants import get_sub_help_mesg 19 | from pydicer.cli.input import testinput_cli, pacs_cli, tcia_cli, web_cli, run_pipeline 20 | 21 | # Sub command types for the Input command 22 | INPUT_TOOLS = { 23 | "test": testinput_cli, 24 | "pacs": pacs_cli, 25 | "tcia": tcia_cli, 26 | "web": web_cli, 27 | } 28 | 29 | PIPELINE_TOOLS = { 30 | # "e2e": run_test, TODO This broke due to some changes. Either we need to fix or remove. 31 | "filesystem": run_pipeline, 32 | "test": run_pipeline, 33 | "pacs": run_pipeline, 34 | "tcia": run_pipeline, 35 | "web": run_pipeline, 36 | } 37 | 38 | 39 | def parse_sub_input(command): 40 | """function to parse the input command args""" 41 | parse_sub_command(command, "Run the Input module only", INPUT_TOOLS, "test", INPUT_COMMANDS) 42 | 43 | 44 | def parse_sub_pipeline(command): 45 | """function to parse the pipeline command args""" 46 | parse_sub_command( 47 | command, 48 | "Run the pipeline with a specific input method", 49 | PIPELINE_TOOLS, 50 | "e2e", 51 | PIPELINE_COMMANDS, 52 | ) 53 | 54 | 55 | INPUT_COMMANDS = str(list(INPUT_TOOLS.keys())).replace(", ", "|") 56 | PIPELINE_COMMANDS = str(list(PIPELINE_TOOLS.keys())).replace(", ", "|") 57 | MODULES = {"pipeline": parse_sub_pipeline, "input": parse_sub_input} 58 | COMMANDS = str(list(MODULES.keys())).replace(", ", "|") 59 | 60 | 61 | def parse_sub_command(command, desc, tools, default_choice, help_commands): 62 | """Generic function to take in dynamic input and trigger the respective sub commands 63 | 64 | Args: 65 | desc (str): help description of what the sub command does 66 | tools (dict): dictionary of which sub command type can be run 67 | default_choice (str): default sub command type that will be run in the case no input is 68 | received from the user 69 | """ 70 | parser = argparse.ArgumentParser(description=desc, formatter_class=RawTextHelpFormatter) 71 | parser.add_argument( 72 | "--type", 73 | help=get_sub_help_mesg(help_commands, command), 74 | default=default_choice, 75 | choices=tools, 76 | ) 77 | 78 | args = parser.parse_args(sys.argv[2:4]) 79 | if command == "pipeline": 80 | tools[args.type](args.type, *sys.argv[4:]) 81 | else: 82 | tools[args.type](*sys.argv[4:]) 83 | 84 | 85 | def pydicer_cli(): 86 | """ 87 | Trigger pydicer CLI 88 | """ 89 | 90 | parser = argparse.ArgumentParser( 91 | description="pydicer CLI (Command Line Interface)", 92 | usage=f"python -m pydicer.cli.run {COMMANDS}", 93 | ) 94 | 95 | # Default to "pipeline" option without input 96 | parser.add_argument( 97 | "command", 98 | help=f"One of the following COMMANDS: {COMMANDS}", 99 | ) 100 | 101 | args = parser.parse_args(sys.argv[1:2]) 102 | MODULES[args.command](sys.argv[1]) 103 | 104 | 105 | if __name__ == "__main__": 106 | pydicer_cli() 107 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | # If extensions (or modules to document with autodoc) are in another directory, 10 | # add these directories to sys.path here. If the directory is relative to the 11 | # documentation root, use os.path.abspath to make it absolute, like shown here. 12 | # 13 | # import os 14 | # import sys 15 | # sys.path.insert(0, os.path.abspath('.')) 16 | import os 17 | import sys 18 | import shutil 19 | import datetime 20 | from pathlib import Path 21 | 22 | sys.path.insert(0, os.path.abspath("..")) 23 | 24 | # -- Project information ----------------------------------------------------- 25 | 26 | project = "PyDicer" 27 | year = datetime.datetime.now().year 28 | copyright = f"{year}, Ingham Medical Physics" 29 | author = "Ingham Medical Physics" 30 | 31 | 32 | # -- General configuration --------------------------------------------------- 33 | 34 | # Add any Sphinx extension module names here, as strings. They can be 35 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 36 | # ones. 37 | extensions = [ 38 | "sphinx.ext.autodoc", 39 | "sphinx.ext.coverage", 40 | "sphinx.ext.napoleon", 41 | "nbsphinx", 42 | "m2r2", 43 | ] 44 | 45 | # Add any paths that contain templates here, relative to this directory. 46 | templates_path = ["_templates"] 47 | 48 | # List of patterns, relative to source directory, that match files and 49 | # directories to ignore when looking for source files. 50 | # This pattern also affects html_static_path and html_extra_path. 51 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", "site"] 52 | 53 | 54 | # -- Options for HTML output ------------------------------------------------- 55 | 56 | # The theme to use for HTML and HTML Help pages. See the documentation for 57 | # a list of builtin themes. 58 | # 59 | html_theme = "furo" 60 | 61 | 62 | def setup(app): 63 | app.add_css_file("custom.css") 64 | 65 | 66 | # Add any paths that contain custom static files (such as style sheets) here, 67 | # relative to this directory. They are copied after the builtin static files, 68 | # so a file named "default.css" will overwrite the builtin "default.css". 69 | html_static_path = ["_static"] 70 | 71 | html_show_sphinx = False 72 | 73 | # Copy in the files from the other repository directories to have them 74 | # be rendered by Sphinx 75 | examples_path = Path("_examples") 76 | for notebook_path in examples_path.glob("*.ipynb"): 77 | os.remove(notebook_path) 78 | 79 | examples_path.mkdir(exist_ok=True) 80 | shutil.copy("../examples/GettingStarted.ipynb", "_examples/GettingStarted.ipynb") 81 | shutil.copy("../examples/ConvertingData.ipynb", "_examples/ConvertingData.ipynb") 82 | shutil.copy("../examples/VisualiseData.ipynb", "_examples/VisualiseData.ipynb") 83 | shutil.copy("../examples/DoseMetrics.ipynb", "_examples/DoseMetrics.ipynb") 84 | shutil.copy("../examples/Radiomics.ipynb", "_examples/Radiomics.ipynb") 85 | shutil.copy( 86 | "../examples/DatasetPreparation.ipynb", "_examples/DatasetPreparation.ipynb" 87 | ) 88 | shutil.copy("../examples/WorkingWithData.ipynb", "_examples/WorkingWithData.ipynb") 89 | shutil.copy( 90 | "../examples/WorkingWithStructures.ipynb", "_examples/WorkingWithStructures.ipynb" 91 | ) 92 | shutil.copy("../examples/Configuration.ipynb", "_examples/Configuration.ipynb") 93 | shutil.copy("../examples/ObjectGeneration.ipynb", "_examples/ObjectGeneration.ipynb") 94 | shutil.copy("../examples/AutoSegmentation.ipynb", "_examples/AutoSegmentation.ipynb") 95 | shutil.copy("../examples/nnUNet.ipynb", "_examples/nnUNet.ipynb") 96 | 97 | shutil.rmtree("site/assets", ignore_errors=True) 98 | os.makedirs("site", exist_ok=True) 99 | shutil.copytree("../assets", "site/assets") 100 | -------------------------------------------------------------------------------- /tests/test_pipeline.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=redefined-outer-name,missing-function-docstring 2 | 3 | import tempfile 4 | from pathlib import Path 5 | import numpy as np 6 | 7 | import pytest 8 | 9 | from pydicer import PyDicer 10 | from pydicer.input.test import TestInput 11 | 12 | 13 | @pytest.fixture 14 | def test_data(): 15 | """Fixture to grab the test data""" 16 | 17 | directory = Path("./testdata") 18 | directory.mkdir(exist_ok=True, parents=True) 19 | 20 | dicom_directory = directory.joinpath("dicom") 21 | dicom_directory.mkdir(exist_ok=True, parents=True) 22 | 23 | test_input = TestInput(dicom_directory) 24 | test_input.fetch_data() 25 | 26 | return directory 27 | 28 | 29 | def test_pipeline(test_data): 30 | """End-to-end test of the entire pipeline""" 31 | 32 | with tempfile.TemporaryDirectory() as directory: 33 | 34 | directory = Path(directory) 35 | 36 | dicom_directory = directory.joinpath("dicom") 37 | dicom_directory.symlink_to(test_data.absolute(), target_is_directory=True) 38 | 39 | pydicer = PyDicer(directory) 40 | pydicer.add_input(dicom_directory) 41 | 42 | # Preprocess the data fetch to prepare it for conversion 43 | pydicer.preprocess() 44 | 45 | # Convert the data into the output directory 46 | pydicer.convert.convert(patient="HNSCC-01-0199") 47 | 48 | # Visualise the converted data 49 | pydicer.visualise.visualise(patient="HNSCC-01-0199") 50 | 51 | # Dataset selection and preparation 52 | pydicer.dataset.prepare("clean", "rt_latest_dose") 53 | 54 | # Analysis computing Radiomics and DVH 55 | pydicer.analyse.compute_radiomics("clean") 56 | df_rad = pydicer.analyse.get_all_computed_radiomics_for_dataset() 57 | 58 | # Do some spot checks on the radiomics computed for the dataset to confirm the end-to-end 59 | # test worked 60 | assert np.isclose( 61 | ( 62 | df_rad.loc[ 63 | (df_rad.Contour == "Cord") & (df_rad.Patient == "HNSCC-01-0199"), 64 | "firstorder|Energy", 65 | ].iloc[0] 66 | ), 67 | 18025962.0, 68 | ) 69 | 70 | assert np.isclose( 71 | ( 72 | df_rad.loc[ 73 | (df_rad.Contour == "post_neck") & (df_rad.Patient == "HNSCC-01-0199"), 74 | "firstorder|Median", 75 | ].iloc[0] 76 | ), 77 | 45.0, 78 | ) 79 | 80 | assert np.isclose( 81 | ( 82 | df_rad.loc[ 83 | (df_rad.Contour == "PTV_63_Gy") & (df_rad.Patient == "HNSCC-01-0199"), 84 | "firstorder|Skewness", 85 | ].iloc[0] 86 | ), 87 | -0.0053863391917069, 88 | ) 89 | 90 | pydicer.analyse.compute_dvh() 91 | df_dose_metrics = pydicer.analyse.compute_dose_metrics( 92 | d_point=[50, 95, 99], v_point=[1, 10], d_cc_point=[1, 5, 10] 93 | ) 94 | 95 | assert np.isclose( 96 | (df_dose_metrics.loc[df_dose_metrics.label == "Brainstem", "V10"].iloc[0]), 97 | 29.68311309814453, 98 | ) 99 | 100 | assert np.isclose( 101 | (df_dose_metrics.loc[df_dose_metrics.label == "PTV_57_Gy", "cc"].iloc[0]), 102 | 145.16115188598633, 103 | ) 104 | 105 | assert np.isclose( 106 | (df_dose_metrics.loc[df_dose_metrics.label == "Lt_Parotid", "D95"].iloc[0]), 107 | 8.310638297872341, 108 | ) 109 | 110 | assert np.isclose( 111 | (df_dose_metrics.loc[df_dose_metrics.label == "GTV", "D99"].iloc[0]), 112 | 70.23906832298137, 113 | ) 114 | 115 | assert np.isclose( 116 | (df_dose_metrics.loc[df_dose_metrics.label == "Rt_Parotid", "D5cc"].iloc[0]), 117 | 70.45179733333333, 118 | ) 119 | -------------------------------------------------------------------------------- /tests/test_compare.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=redefined-outer-name,missing-function-docstring 2 | 3 | import tempfile 4 | 5 | from pathlib import Path 6 | import pandas as pd 7 | 8 | from pydicer import PyDicer 9 | from pydicer.analyse.compare import ( 10 | compute_contour_similarity_metrics, 11 | get_all_similarity_metrics_for_dataset, 12 | prepare_similarity_metric_analysis, 13 | ) 14 | from pydicer.utils import read_converted_data 15 | 16 | 17 | def test_compare_auto_segmentations(test_data_autoseg): 18 | working_directory = test_data_autoseg 19 | df = read_converted_data(working_directory=working_directory) 20 | 21 | # We'll test this by comparing the structures against themselves, 22 | # hence we expect perfect metrics 23 | df_target = df[df.modality == "RTSTRUCT"] 24 | df_reference = df[df.modality == "RTSTRUCT"] 25 | 26 | PyDicer(working_directory) 27 | segment_id = "test_seg" 28 | compute_contour_similarity_metrics(df_target, df_reference, segment_id) 29 | 30 | df_stats = get_all_similarity_metrics_for_dataset(working_directory) 31 | 32 | assert len(df_stats) == 200 33 | 34 | df_dsc = df_stats[df_stats["metric"] == "DSC"] 35 | assert df_dsc.value.min() == 1.0 36 | assert df_dsc.value.max() == 1.0 37 | 38 | 39 | def test_compaare_metrics_analysis(test_data_autoseg): 40 | working_directory = test_data_autoseg 41 | df = read_converted_data(working_directory=working_directory) 42 | 43 | # We'll test this by comparing the structures against themselves, 44 | # hence we expect perfect metrics 45 | df_target = df[df.modality == "RTSTRUCT"] 46 | df_reference = df[df.modality == "RTSTRUCT"] 47 | 48 | PyDicer(working_directory) 49 | segment_id = "test_seg" 50 | compute_contour_similarity_metrics(df_target, df_reference, segment_id) 51 | 52 | with tempfile.TemporaryDirectory() as analysis_dir: 53 | analysis_dir = Path(analysis_dir) 54 | 55 | prepare_similarity_metric_analysis( 56 | working_directory=working_directory, 57 | analysis_output_directory=analysis_dir, 58 | segment_id=segment_id, 59 | ) 60 | 61 | # Check that the output files exist 62 | raw_metrics_file = analysis_dir.joinpath("raw_test_seg_default.csv") 63 | assert raw_metrics_file.exists() 64 | stats_metrics_file = analysis_dir.joinpath("stats_test_seg_default.csv") 65 | assert stats_metrics_file.exists() 66 | plot_dsc_file = analysis_dir.joinpath("plot_DSC_test_seg_default.png") 67 | assert plot_dsc_file.exists() 68 | plot_hd_file = analysis_dir.joinpath("plot_hausdorffDistance_test_seg_default.png") 69 | assert plot_hd_file.exists() 70 | plot_msd_file = analysis_dir.joinpath("plot_meanSurfaceDistance_test_seg_default.png") 71 | assert plot_msd_file.exists() 72 | plot_sdsc_file = analysis_dir.joinpath("plot_surfaceDSC_test_seg_default.png") 73 | assert plot_sdsc_file.exists() 74 | 75 | # Read in the raw metrics file and do some checks 76 | df_raw = pd.read_csv(raw_metrics_file, index_col=0) 77 | assert len(df_raw) == 200 78 | 79 | # Since these structures compared against themselves, expect perfect metrics 80 | assert df_raw[df_raw.metric == "DSC"].value.min() == 1.0 81 | assert df_raw[df_raw.metric == "surfaceDSC"].value.min() == 1.0 82 | assert df_raw[df_raw.metric == "hausdorffDistance"].value.max() == 0.0 83 | assert df_raw[df_raw.metric == "meanSurfaceDistance"].value.max() == 0.0 84 | 85 | # Read in the stats metrics file and do some checks 86 | df_stats = pd.read_csv(stats_metrics_file, index_col=0) 87 | assert len(df_stats) == 36 88 | 89 | # Check one fo the rows 90 | row_check = df_stats[ 91 | (df_stats.structure == "Esophagus") & (df_stats.metric == "surfaceDSC") 92 | ].iloc[0] 93 | assert row_check["mean"] == 1.0 94 | assert row_check["std"] == 0.0 95 | assert row_check["max"] == 1.0 96 | assert row_check["min"] == 1.0 97 | assert row_check["count"] == 10 98 | -------------------------------------------------------------------------------- /tests/test_quarantine.py: -------------------------------------------------------------------------------- 1 | import os 2 | from pathlib import Path 3 | import pytest 4 | import shutil 5 | import tempfile 6 | 7 | import numpy as np 8 | import pandas as pd 9 | 10 | from pydicer.quarantine import copy_file_to_quarantine, read_quarantined_data 11 | 12 | 13 | def test_copy_file_to_quarantine(): 14 | """Test that copy_file_to_quarantine correctly copies the file to quarantine and 15 | writes the summary entry. 16 | """ 17 | with tempfile.TemporaryDirectory() as tmpdir: 18 | tmpdir_path = Path(tmpdir) 19 | 20 | # Create a dummy DICOM file 21 | dummy_file = tmpdir_path / "dummy.dcm" 22 | dummy_file.write_text("Some dummy content") 23 | 24 | # Invoke the function 25 | copy_file_to_quarantine(dummy_file, tmpdir_path, "Test error message") 26 | 27 | quarantine_dir = tmpdir_path / "quarantine" 28 | summary_file = quarantine_dir / "summary.csv" 29 | 30 | # Assert quarantine directory was created 31 | assert quarantine_dir.is_dir(), "Quarantine directory was not created." 32 | 33 | # Assert summary file was created 34 | assert summary_file.exists(), "Summary CSV file was not created." 35 | 36 | # Read the summary CSV 37 | df_summary = pd.read_csv(summary_file, index_col=0) 38 | 39 | # Check that exactly one entry is in the summary 40 | assert len( 41 | df_summary) == 1, "There should be exactly one entry in the summary." 42 | 43 | # Check the summary row 44 | row = df_summary.iloc[0] 45 | assert row["error"] == "Test error message" 46 | assert "file" in row, "'file' column is missing in the summary DataFrame." 47 | assert "PatientID" in row, "'PatientID' column is missing in the summary DataFrame." 48 | 49 | # Because this is not a valid DICOM, the code defaults PatientID to UNKNOWN 50 | assert pd.isna(row["PatientID"]) 51 | 52 | # The quarantined file is placed under: quarantine_dir / "UNKNOWN" / / dummy.dcm 53 | quarantined_file_path = quarantine_dir.joinpath( 54 | "UNKNOWN", dummy_file.parent.name, dummy_file.name) 55 | assert quarantined_file_path.exists( 56 | ), "Quarantined file was not copied to the correct location." 57 | 58 | 59 | def test_read_quarantined_data(): 60 | """Test that read_quarantined_data reads data from an existing quarantine summary CSV.""" 61 | with tempfile.TemporaryDirectory() as tmpdir: 62 | tmpdir_path = Path(tmpdir) 63 | quarantine_dir = tmpdir_path / "quarantine" 64 | quarantine_dir.mkdir(exist_ok=True) 65 | 66 | # Create a fake summary CSV 67 | summary_file = quarantine_dir / "summary.csv" 68 | df_expected = pd.DataFrame([ 69 | { 70 | "file": "somefile.dcm", 71 | "error": "some_error", 72 | "quarantine_dttm": "2024-01-01 00:00:00", 73 | "PatientID": "UNKNOWN", 74 | "Modality": None, 75 | "SOPInstanceUID": None, 76 | "SeriesDescription": None, 77 | } 78 | ]) 79 | df_expected.to_csv(summary_file) 80 | 81 | # Use the function to read it 82 | df_summary = read_quarantined_data(tmpdir_path) 83 | 84 | # Assert the DataFrame matches our expectations 85 | assert len(df_summary) == 1, "Expected one record in the summary." 86 | row = df_summary.iloc[0] 87 | assert row["file"] == "somefile.dcm" 88 | assert row["error"] == "some_error" 89 | assert row["PatientID"] == "UNKNOWN" 90 | 91 | 92 | def test_read_quarantined_data_no_summary(): 93 | """Test that reading quarantine data raises an error or fails gracefully if summary.csv is missing.""" 94 | with tempfile.TemporaryDirectory() as tmpdir: 95 | tmpdir_path = Path(tmpdir) 96 | quarantine_dir = tmpdir_path / "quarantine" 97 | quarantine_dir.mkdir(exist_ok=True) 98 | 99 | # summary.csv does NOT exist here 100 | with pytest.raises(FileNotFoundError): 101 | read_quarantined_data(tmpdir_path) 102 | -------------------------------------------------------------------------------- /pydicer/convert/rtstruct.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from pathlib import Path 3 | import pydicom 4 | import SimpleITK as sitk 5 | import matplotlib 6 | 7 | from platipy.dicom.io.rtstruct_to_nifti import transform_point_set_from_dicom_struct 8 | from platipy.imaging.utils.io import write_nrrd_structure_set 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | 13 | def convert_rtstruct( 14 | dcm_img, 15 | dcm_rt_file, 16 | prefix="Struct_", 17 | output_dir=".", 18 | output_img=None, 19 | spacing=None, 20 | ): 21 | """Convert a DICOM RTSTRUCT to NIFTI masks. 22 | 23 | The masks are stored as NIFTI files in the output directory 24 | 25 | Args: 26 | dcm_img (list|SimpleITK.Image): List of DICOM paths (as str) to use as the reference image 27 | series or a SimpleITK image of the already converted image. 28 | dcm_rt_file (str|pathlib.Path): Path to the DICOM RTSTRUCT file 29 | prefix (str, optional): The prefix to give the output files. Defaults to "Struct" + 30 | underscore. 31 | output_dir (str|pathlib.Path, optional): Path to the output directory. Defaults to ".". 32 | output_img (str|pathlib.Path, optional): If set, write the reference image to this file as 33 | in NIFTI format. Defaults to None. 34 | spacing (list, optional): Values of image spacing to override. Defaults to None. 35 | """ 36 | 37 | logger.debug("Converting RTStruct: %s", dcm_rt_file) 38 | logger.debug("Output file prefix: %s", prefix) 39 | 40 | if isinstance(dcm_img, list): 41 | dicom_image = sitk.ReadImage(dcm_img) 42 | elif isinstance(dcm_img, sitk.Image): 43 | dicom_image = dcm_img 44 | else: 45 | raise ValueError("dcm_img must be list or SimpleITK.Image") 46 | 47 | dicom_struct = pydicom.read_file(dcm_rt_file, force=True) 48 | 49 | if not isinstance(output_dir, Path): 50 | output_dir = Path(output_dir) 51 | 52 | if output_dir.exists(): 53 | output_dir.mkdir(exist_ok=True, parents=True) 54 | 55 | image_output_path = None 56 | if output_img is not None: 57 | if not isinstance(output_img, Path): 58 | if not output_img.endswith(".nii.gz"): 59 | output_img = f"{output_img}.nii.gz" 60 | output_img = output_dir.joinpath(output_img) 61 | 62 | image_output_path = output_img 63 | logger.debug("Image series to be converted to: %s", image_output_path) 64 | 65 | if spacing: 66 | if isinstance(spacing, str): 67 | spacing = [float(i) for i in spacing.split(",")] 68 | logger.debug("Overriding image spacing with: %s", spacing) 69 | 70 | struct_list, struct_name_sequence = transform_point_set_from_dicom_struct( 71 | dicom_image, dicom_struct, spacing 72 | ) 73 | 74 | for struct_index, struct_image in enumerate(struct_list): 75 | out_name = f"{prefix}{struct_name_sequence[struct_index]}.nii.gz" 76 | out_name = output_dir.joinpath(out_name) 77 | logger.debug("Writing file to: %s", out_name) 78 | sitk.WriteImage(struct_image, str(out_name)) 79 | 80 | if image_output_path is not None: 81 | sitk.WriteImage(dicom_image, str(image_output_path)) 82 | 83 | 84 | def write_nrrd_from_mask_directory( 85 | mask_directory, output_file, colormap=matplotlib.colormaps.get_cmap("rainbow") 86 | ): 87 | """Produce a NRRD file from a directory of masks in Nifti format 88 | 89 | Args: 90 | mask_directory (pathlib.Path|str): Path object of directory containing masks 91 | output_file (pathlib.Path|str): The output NRRD file to write to. 92 | color_map (matplotlib.colors.Colormap | dict, optional): Colormap to use for output. 93 | Defaults to matplotlib.colormaps.get_cmap("rainbow"). 94 | """ 95 | 96 | if isinstance(mask_directory, str): 97 | mask_directory = Path(mask_directory) 98 | 99 | masks = { 100 | p.name.replace(".nii.gz", ""): sitk.ReadImage(str(p)) 101 | for p in mask_directory.glob("*.nii.gz") 102 | } 103 | 104 | write_nrrd_structure_set(masks, output_file=output_file, colormap=colormap) 105 | logger.debug("Writing NRRD Structure Set to: %s", output_file) 106 | -------------------------------------------------------------------------------- /pydicer/input/pacs.py: -------------------------------------------------------------------------------- 1 | import os 2 | from pathlib import Path 3 | from typing import Union 4 | 5 | import pydicom 6 | 7 | from platipy.dicom.communication.connector import DicomConnector 8 | 9 | from pydicer.input.base import InputBase 10 | 11 | 12 | class DICOMPACSInput(InputBase): 13 | def __init__( 14 | self, 15 | host: str, 16 | port: str, 17 | ae_title: str = None, 18 | working_directory: Union[str, Path] = None, 19 | ): 20 | """Class for fetching files from DICOM PACS. Currently only supports C-GET commands to 21 | fetch the data. 22 | 23 | Args: 24 | host (str): The IP address of host name of DICOM PACS. 25 | port (int): The port to use to communicate on. 26 | ae_title (str, optional): AE Title to provide the DICOM service. 27 | working_directory (str|pathlib.Path, optional): The working directory in which to 28 | store the data fetched. Defaults to a temp directory. 29 | 30 | Raises: 31 | ConnectionError: Raises a connection error if unable to verify the connection to the 32 | PACS. 33 | """ 34 | 35 | super().__init__(working_directory) 36 | 37 | self.dicom_connector = DicomConnector( 38 | host=host, 39 | port=port, 40 | ae_title=ae_title, 41 | output_directory=self.working_directory, 42 | ) 43 | 44 | if not self.dicom_connector.verify(): 45 | raise ConnectionError("Unable to connect to DICOM PACS.") 46 | 47 | def fetch_data( 48 | self, patients: Union[list, str], modalities: Union[list, str] = None 49 | ): 50 | """Download the DICOM data from the PACS. 51 | 52 | Args: 53 | patients (list|str): A list of patient IDs, or a single patient ID. Wildcard matching 54 | based on the DICOM standard is supported. 55 | modalities (list|str, optional): List of modalities or a single modality to fetch. 56 | Defaults to None where all modalities would be fetched. 57 | """ 58 | 59 | if not isinstance(patients, list) and not isinstance(patients, tuple): 60 | patients = [patients] 61 | 62 | if modalities is None: 63 | modalities = [""] 64 | 65 | if not isinstance(modalities, list) and not isinstance(modalities, tuple): 66 | modalities = [modalities] 67 | 68 | for patient in patients: 69 | dataset = pydicom.Dataset() 70 | dataset.PatientID = patient 71 | dataset.PatientName = "" 72 | dataset.StudyInstanceUID = "" 73 | dataset.ModalitiesInStudy = "" 74 | dataset.QueryRetrieveLevel = "STUDY" 75 | 76 | studies = self.dicom_connector.do_find(dataset) 77 | 78 | for study in studies: 79 | if not study: 80 | continue # These lists often contain a None study, so just skip that 81 | 82 | for modality in modalities: 83 | dataset = pydicom.Dataset() 84 | dataset.PatientID = patient 85 | dataset.StudyInstanceUID = study.StudyInstanceUID 86 | dataset.Modality = modality 87 | dataset.SeriesInstanceUID = "" 88 | dataset.QueryRetrieveLevel = "SERIES" 89 | 90 | series = self.dicom_connector.do_find(dataset) 91 | for s in series: 92 | if not s: 93 | continue # Again, safe to skip this if None 94 | 95 | if not s.PatientID == patient: 96 | continue 97 | 98 | # Download the series 99 | self.dicom_connector.download_series(s.SeriesInstanceUID) 100 | 101 | # Finally, just make sure all files end with the .dcm extension 102 | for f in self.working_directory.glob("**/*"): 103 | if f.is_dir(): 104 | continue 105 | 106 | if f.name.endswith(".dcm"): 107 | continue 108 | 109 | target = f.parent.joinpath(f"{f.name}.dcm") 110 | os.rename(f, target) 111 | -------------------------------------------------------------------------------- /pydicer/cli/input.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import sys 3 | from pathlib import Path 4 | 5 | from pydicer.input.pacs import DICOMPACSInput 6 | from pydicer.input.test import TestInput 7 | from pydicer.input.web import WebInput 8 | from pydicer.input.filesystem import FileSystemInput 9 | from pydicer import PyDicer 10 | 11 | 12 | logger = logging.getLogger(__name__) 13 | 14 | 15 | def run_pipeline(input_method, *args): 16 | """Run the pipeline using a specific input methodthe test data provided 17 | 18 | Args: 19 | working_dir (str, optional): Path to store test data. 20 | input_method (str): the input method chosen to run this pipeline 21 | """ 22 | 23 | logging.basicConfig(format="%(name)s\t%(levelname)s\t%(message)s", level=logging.DEBUG) 24 | 25 | logger.info("Running Pipeline with Test Input") 26 | print(args[0]) 27 | directory = Path(args[0]) 28 | directory.mkdir(exist_ok=True, parents=True) 29 | 30 | dicom_dir = directory.joinpath("dicom") 31 | dicom_dir.mkdir(exist_ok=True, parents=True) 32 | 33 | if input_method == "test": 34 | input_obj = testinput_cli(*args) 35 | if input_method == "web": 36 | input_obj = web_cli(*args) 37 | elif input_method == "pacs": 38 | input_obj = pacs_cli(*args) 39 | else: 40 | input_obj = FileSystemInput(*args) 41 | 42 | input_obj.fetch_data() 43 | 44 | pydicer = PyDicer(directory) 45 | pydicer.add_input(input_obj) 46 | 47 | # Preprocess the data fetch to prepare it for conversion 48 | logger.info("Running Pipeline") 49 | pydicer.run_pipeline() 50 | 51 | 52 | def testinput_cli(working_dir): 53 | """Trigger the test input as a mini pipeline for the CLI tool 54 | 55 | Example usage: 56 | python -m pydicer.cli.run input --type test ./cli_test 57 | 58 | Args: 59 | working_dir (str|pathlib.Path, optional): The working directory in which to 60 | store the data fetched. 61 | """ 62 | logging.basicConfig(format="%(name)s\t%(levelname)s\t%(message)s", level=logging.DEBUG) 63 | 64 | logger.info("Running Test Input sub command") 65 | test_input = TestInput(working_dir) 66 | test_input.fetch_data() 67 | return test_input 68 | 69 | 70 | def pacs_cli( 71 | working_dir, 72 | host="www.dicomserver.co.uk", 73 | port=11112, 74 | ae_title=None, 75 | modalities="GM", 76 | *patients 77 | ): 78 | """Trigger the DICOM PACS input as a mini pipeline for the CLI tool. If no inputs received, 79 | then by default it will retrieve some test data 80 | 81 | Example usage: 82 | python -m pydicer.cli.run input --type pacs ./cli_test www.dicomserver.co.uk 11112 DCMQUERY 83 | GM PAT004 PAT005 84 | 85 | Args: 86 | working_dir (str|pathlib.Path, optional): The working directory in which to 87 | store the data fetched. 88 | host (str, optional): The IP address of host name of DICOM PACS. Defaults to 89 | "www.dicomserver.co.uk". 90 | port (int, optional): The port to use to communicate on. Defaults to 11112. 91 | ae_title (str, optional): AE Title to provide the DICOM service. Defaults to None. 92 | modalities (str, optional): The modalities to retrieve DICOMs for. Defaults to "GM". 93 | patients (str, required): a string-list of patient IDs (IDs seperated by spaces) to 94 | retrieve the DICOMs for. 95 | """ 96 | if not patients: 97 | logger.error( 98 | "No patient IDs provided, please provided a list-string separated by spaces of " 99 | "patients IDs to query for " 100 | ) 101 | sys.exit() 102 | logger.info("Running DICOM PACS Input sub command") 103 | pacs_input = DICOMPACSInput(host, int(port), ae_title, working_dir) 104 | pacs_input.fetch_data(patients, [modalities]) 105 | return pacs_input 106 | 107 | 108 | def tcia_cli(): 109 | """Trigger the TCIA input as a mini pipeline for the CLI tool.""" 110 | return 111 | 112 | 113 | def web_cli(working_dir, data_url): 114 | """Trigger the web input as a mini pipeline for the CLI tool. 115 | 116 | Example usage: 117 | python -m pydicer.cli.run input --type web ./cli_test 118 | https://zenodo.org/record/5276878/files/HNSCC.zip 119 | 120 | Args: 121 | working_dir (str|pathlib.Path): The working directory in which to 122 | store the data fetched. 123 | data_url (str): URL of the dataset to be downloaded from the internet 124 | """ 125 | 126 | logger.info("Running web Input sub command") 127 | web_input = WebInput(data_url, working_dir) 128 | web_input.fetch_data() 129 | return web_input 130 | -------------------------------------------------------------------------------- /tests/test_segmentation.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=redefined-outer-name,missing-function-docstring 2 | 3 | import pytest 4 | 5 | import pandas as pd 6 | import SimpleITK as sitk 7 | 8 | from pydicer.generate.segmentation import segment_image, segment_dataset, read_segmentation_log 9 | from pydicer.utils import read_converted_data 10 | 11 | 12 | @pytest.fixture 13 | def test_data_path(tmp_path_factory): 14 | """Fixture to generate a pydicer style file structure. Along with a few dummy images to 15 | run a dummy auto-semgentation on.""" 16 | 17 | working_directory = tmp_path_factory.mktemp("data") 18 | 19 | cols = [ 20 | "", 21 | "sop_instance_uid", 22 | "hashed_uid", 23 | "modality", 24 | "patient_id", 25 | "series_uid", 26 | "for_uid", 27 | "referenced_sop_instance_uid", 28 | "path", 29 | ] 30 | rows = [ 31 | [ 32 | 0, 33 | "1.3.6.1.4.1.14519.5.2.1.1706.8040.418136430763474248173140712714", 34 | "b281ea", 35 | "CT", 36 | "HNSCC-01-0019", 37 | "1.3.6.1.4.1.14519.5.2.1.1706.8040.233510441938368266923995238976", 38 | "1.3.6.1.4.1.14519.5.2.1.1706.8040.290727775603409136366833989550", 39 | "", 40 | "data/HNSCC-01-0019/images/b281ea", 41 | ], 42 | [ 43 | 1, 44 | "1.3.6.1.4.1.14519.5.2.1.1706.8040.41813643076347424817314071123", 45 | "b28321", 46 | "CT", 47 | "HNSCC-01-0019", 48 | "1.3.6.1.4.1.14519.5.2.1.1706.8040.233510441938368266923995238123", 49 | "1.3.6.1.4.1.14519.5.2.1.1706.8040.290727775603409136366833989123", 50 | "", 51 | "data/HNSCC-01-0019/images/b28321", 52 | ], 53 | ] 54 | 55 | df_converted = pd.DataFrame(rows, columns=cols) 56 | for _, row in df_converted.iterrows(): 57 | 58 | data_obj_path = working_directory.joinpath(row.path) 59 | data_obj_path.mkdir(parents=True, exist_ok=True) 60 | 61 | img_path = data_obj_path.joinpath("CT.nii.gz") 62 | sitk.WriteImage(sitk.Image(10, 10, 10, sitk.sitkUInt8), str(img_path)) 63 | 64 | converted_path = working_directory.joinpath("data", "HNSCC-01-0019", "converted.csv") 65 | df_converted.to_csv(converted_path) 66 | 67 | # Also create a dataset directory with converted sub-set 68 | dataset_path = working_directory.joinpath("test_dataset", "HNSCC-01-0019") 69 | dataset_path.mkdir(parents=True) 70 | converted_path = dataset_path.joinpath("converted.csv") 71 | df_converted[:1].to_csv(converted_path) 72 | 73 | return working_directory 74 | 75 | 76 | def test_segment_image_incorrect_function_input(test_data_path): 77 | def seg_func(_, __): 78 | return {"test": sitk.Image(10, 10, 10, sitk.sitkUInt8)} 79 | 80 | df = read_converted_data(test_data_path) 81 | img_row = df.iloc[0] 82 | 83 | segment_image(test_data_path, img_row, "test_seg_fail_input", seg_func) 84 | 85 | df_log = read_segmentation_log(image_row=img_row) 86 | assert len(df_log) == 1 87 | assert not df_log.iloc[0].success_flag 88 | 89 | 90 | def test_segment_image_incorrect_function_output(test_data_path): 91 | def seg_func(img): 92 | return img 93 | 94 | df = read_converted_data(test_data_path) 95 | img_row = df.iloc[0] 96 | 97 | segment_image(test_data_path, img_row, "test_seg_fail_output", seg_func) 98 | 99 | df_log = read_segmentation_log(image_row=img_row) 100 | assert len(df_log) == 1 101 | assert not df_log.iloc[0].success_flag 102 | 103 | 104 | def test_segment_image(test_data_path): 105 | def seg_func(img): 106 | return {"struct_a": img > 0, "struct_b": img > 1} 107 | 108 | df = read_converted_data(test_data_path) 109 | img_row = df.iloc[0] 110 | 111 | segment_image(test_data_path, img_row, "test_seg", seg_func) 112 | 113 | df_log = read_segmentation_log(image_row=img_row) 114 | assert len(df_log) == 1 115 | assert df_log.iloc[0].success_flag 116 | 117 | 118 | def test_segment_dataset(test_data_path): 119 | def seg_func(img): 120 | return {"struct_a": img > 0, "struct_b": img > 1} 121 | 122 | df = read_converted_data(test_data_path) 123 | assert len(df) == 2 124 | 125 | segment_dataset(test_data_path, "test_seg", seg_func) 126 | 127 | df = read_converted_data(test_data_path) 128 | assert len(df) == 4 129 | 130 | 131 | def test_segment_dataset_subset(test_data_path): 132 | def seg_func(img): 133 | return {"struct_a": img > 0, "struct_b": img > 1} 134 | 135 | segment_dataset(test_data_path, "test_seg", seg_func, dataset_name="test_dataset") 136 | 137 | df = read_converted_data(test_data_path, dataset_name="test_dataset") 138 | assert len(df) == 2 139 | -------------------------------------------------------------------------------- /tests/test_convert.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=redefined-outer-name,missing-function-docstring 2 | 3 | import tempfile 4 | import json 5 | from pathlib import Path 6 | import numpy as np 7 | 8 | import pytest 9 | 10 | import SimpleITK as sitk 11 | import pydicom 12 | 13 | from pydicer.input.test import TestInput 14 | from pydicer.input.web import WebInput 15 | 16 | from pydicer.convert.headers import convert_dicom_headers 17 | from pydicer.convert.rtstruct import convert_rtstruct 18 | from pydicer.convert.pt import convert_dicom_to_nifti_pt 19 | 20 | 21 | @pytest.fixture 22 | def test_data(): 23 | """Fixture to grab the test data""" 24 | 25 | directory = Path("./testdata") 26 | directory.mkdir(exist_ok=True, parents=True) 27 | 28 | working_directory = directory.joinpath("dicom") 29 | working_directory.mkdir(exist_ok=True, parents=True) 30 | 31 | test_input = TestInput(working_directory) 32 | test_input.fetch_data() 33 | 34 | return working_directory 35 | 36 | 37 | @pytest.fixture 38 | def test_data_all(): 39 | """Fixture to grab the test data with more modalities""" 40 | 41 | directory = Path("./testdata") 42 | directory.mkdir(exist_ok=True, parents=True) 43 | 44 | working_directory = directory.joinpath("working2") 45 | working_directory.mkdir(exist_ok=True, parents=True) 46 | 47 | data_url = "https://zenodo.org/record/5574640/files/HNSCC-01-0019.zip" 48 | web_input = WebInput(data_url, working_directory) 49 | web_input.fetch_data() 50 | 51 | return working_directory 52 | 53 | 54 | def test_convert_rt_struct(test_data): 55 | 56 | img_files = [ 57 | str(f) 58 | for f in test_data.joinpath( 59 | "HNSCC", "HNSCC-01-0199", "10-26-2002-NA-RT SIMULATION-18560", "3.000000-NA-58373" 60 | ).glob("*.dcm") 61 | ] 62 | 63 | img_files.sort() 64 | 65 | rt_struct_file = test_data.joinpath( 66 | "HNSCC", 67 | "HNSCC-01-0199", 68 | "10-26-2002-NA-RT SIMULATION-18560", 69 | "1.000000-NA-59395", 70 | "1-1.dcm", 71 | ) 72 | 73 | with tempfile.TemporaryDirectory() as output_dir: 74 | 75 | output_path = Path(output_dir) 76 | 77 | convert_rtstruct( 78 | img_files, 79 | rt_struct_file, 80 | prefix="", 81 | output_dir=output_path, 82 | output_img=None, 83 | spacing=None, 84 | ) 85 | 86 | # Make sure there are the correct number of structures 87 | assert len(list(output_path.glob("*"))) == 38 88 | 89 | # Open a random structure and check that it is correct 90 | brainstem_path = output_path.joinpath("Brainstem.nii.gz") 91 | brainstem = sitk.ReadImage(str(brainstem_path)) 92 | 93 | assert brainstem.GetSize() == (512, 512, 174) 94 | assert brainstem.GetSpacing() == (0.9765625, 0.9765625, 2.5) 95 | assert sitk.GetArrayFromImage(brainstem).sum() == 12450 96 | 97 | 98 | def test_convert_pet(test_data_all): 99 | 100 | pet_dir = test_data_all.joinpath( 101 | "1.3.6.1.4.1.14519.5.2.1.1706.8040.995469920533091641707578194770" 102 | ) 103 | 104 | pet_files = [str(f) for f in pet_dir.glob("*.dcm")] 105 | 106 | with tempfile.TemporaryDirectory() as output_dir: 107 | 108 | output_path = Path(output_dir) 109 | output_file = output_path.joinpath("pet.nii.gz") 110 | convert_dicom_to_nifti_pt(pet_files, str(output_file)) 111 | 112 | assert output_file.exists() 113 | 114 | pet_img = sitk.ReadImage(str(output_file)) 115 | assert pet_img.GetSize() == (128, 128, 91) 116 | 117 | pet_arr = sitk.GetArrayFromImage(pet_img) 118 | assert np.allclose(pet_arr.max(), 11.9479, atol=0.001) 119 | 120 | 121 | def test_save_dicom_headers(test_data_all): 122 | 123 | dicom_dir = test_data_all.joinpath( 124 | "1.3.6.1.4.1.14519.5.2.1.1706.8040.995469920533091641707578194770" 125 | ) 126 | 127 | dicom_file = [str(f) for f in dicom_dir.glob("*.dcm")][0] 128 | 129 | with tempfile.TemporaryDirectory() as output_dir: 130 | 131 | # Save off the headers for this file 132 | output_path = Path(output_dir) 133 | output_file = output_path.joinpath("test.json") 134 | convert_dicom_headers(dicom_file, "", output_file) 135 | 136 | # Check that we can read them again from the JSON 137 | with open(output_file, "r", encoding="utf8") as json_file: 138 | ds_dict = json.load(json_file) 139 | 140 | loaded_ds = pydicom.Dataset.from_json(ds_dict, bulk_data_uri_handler=lambda _: None) 141 | original_ds = pydicom.read_file(dicom_file) 142 | 143 | # Check that some key header values are the same in the original DICOM and the one loaded 144 | # from JSON 145 | assert loaded_ds.SeriesInstanceUID == original_ds.SeriesInstanceUID 146 | assert loaded_ds.PatientID == original_ds.PatientID 147 | assert loaded_ds.Modality == original_ds.Modality 148 | assert loaded_ds.SeriesDate == original_ds.SeriesDate 149 | assert loaded_ds.FrameOfReferenceUID == original_ds.FrameOfReferenceUID 150 | -------------------------------------------------------------------------------- /examples/VisualiseData.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Visualise Data\n", 8 | "\n", 9 | "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/AustralianCancerDataNetwork/pydicer/blob/main/examples/VisualiseData.ipynb)\n", 10 | "\n", 11 | "PyDicer's [visualise module](https://australiancancerdatanetwork.github.io/pydicer/visualise.html) will produce cross-sections of data objects and store them\n", 12 | "in `.png` format within the data object directory. This is particularly useful for fast inspection\n", 13 | "of the data to ensure that nothing has gone wrong during conversion.\n", 14 | "\n", 15 | "The visualise module can be run at any time after conversion. If you are using advanced features\n", 16 | "of PyDicer, such as [auto-segmentation inference](https://australiancancerdatanetwork.github.io/pydicer/_examples/AutoSegmentation.html) and [object generation](https://australiancancerdatanetwork.github.io/pydicer/_examples/ObjectGeneration.html), you can run the\n", 17 | "visualise module following the generation of the new data objects to produce the cross-section\n", 18 | "`.png` files." 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": null, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "try:\n", 28 | " from pydicer import PyDicer\n", 29 | "except ImportError:\n", 30 | " !pip install pydicer\n", 31 | " from pydicer import PyDicer\n", 32 | "\n", 33 | "from pathlib import Path\n", 34 | "\n", 35 | "from pydicer.utils import fetch_converted_test_data" 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": {}, 41 | "source": [ 42 | "## Setup PyDicer\n", 43 | "\n", 44 | "HNSCC data prepared for this example are downloaded and stored into a `testdata_hnscc` directory.\n", 45 | "We will use this for our PyDicer working directory. We also initialise our PyDicer object." 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": null, 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "working_directory = fetch_converted_test_data(\"./testdata_hnscc\", dataset=\"HNSCC\")\n", 55 | "\n", 56 | "pydicer = PyDicer(working_directory)" 57 | ] 58 | }, 59 | { 60 | "cell_type": "markdown", 61 | "metadata": {}, 62 | "source": [ 63 | "## Visualise Data\n", 64 | "\n", 65 | "We simply call the [visualise](https://australiancancerdatanetwork.github.io/pydicer/visualise.html#pydicer.visualise.data.VisualiseData.visualise) function of the `visualise` module to produce the cross-sections.\n", 66 | "\n", 67 | "Inspect some of the data object directories in `testdata_hnscc/data` and look for the `.png`\n", 68 | "cross-sections. The `{hashed_uid}` in files named `vis_{hashed_uid}.png` refers to a UID hash\n", 69 | "linking to the image being visualised. Visualisations are produced for:\n", 70 | "- Images\n", 71 | "- RT Structure Sets\n", 72 | "- RT Dose Grids" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": null, 78 | "metadata": {}, 79 | "outputs": [], 80 | "source": [ 81 | "pydicer.visualise.visualise()" 82 | ] 83 | }, 84 | { 85 | "cell_type": "markdown", 86 | "metadata": {}, 87 | "source": [ 88 | "### Run for a single patient\n", 89 | "\n", 90 | "You can run the visualisation for only a single patient (or list of specific patients) by providing\n", 91 | "the `patient` argument." 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": null, 97 | "metadata": {}, 98 | "outputs": [], 99 | "source": [ 100 | "pydicer.visualise.visualise(patient=\"HNSCC-01-0199\")" 101 | ] 102 | }, 103 | { 104 | "cell_type": "markdown", 105 | "metadata": {}, 106 | "source": [ 107 | "### Avoid Re-generating Visualisation\n", 108 | "\n", 109 | "If you've added more data to your dataset, and want to avoid re-generating visualisations, set the\n", 110 | "`force` argument to `False`." 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": null, 116 | "metadata": {}, 117 | "outputs": [], 118 | "source": [ 119 | "pydicer.visualise.visualise(force=False)" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": null, 125 | "metadata": {}, 126 | "outputs": [], 127 | "source": [] 128 | } 129 | ], 130 | "metadata": { 131 | "interpreter": { 132 | "hash": "814af119db7f8f2860617be3dcd1d37c560587d11c65bd58c45b1679d3ee6ea4" 133 | }, 134 | "kernelspec": { 135 | "display_name": "Python 3.8.0 64-bit ('pydicer': pyenv)", 136 | "language": "python", 137 | "name": "python3" 138 | }, 139 | "language_info": { 140 | "codemirror_mode": { 141 | "name": "ipython", 142 | "version": 3 143 | }, 144 | "file_extension": ".py", 145 | "mimetype": "text/x-python", 146 | "name": "python", 147 | "nbconvert_exporter": "python", 148 | "pygments_lexer": "ipython3", 149 | "version": "3.9.16" 150 | }, 151 | "orig_nbformat": 4 152 | }, 153 | "nbformat": 4, 154 | "nbformat_minor": 2 155 | } 156 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | We as members, contributors, and leaders pledge to make participation in our 6 | community a harassment-free experience for everyone, regardless of age, body 7 | size, visible or invisible disability, ethnicity, sex characteristics, gender 8 | identity and expression, level of experience, education, socio-economic status, 9 | nationality, personal appearance, race, religion, or sexual identity 10 | and orientation. 11 | 12 | We pledge to act and interact in ways that contribute to an open, welcoming, 13 | diverse, inclusive, and healthy community. 14 | 15 | ## Our Standards 16 | 17 | Examples of behavior that contributes to a positive environment for our 18 | community include: 19 | 20 | * Demonstrating empathy and kindness toward other people 21 | * Being respectful of differing opinions, viewpoints, and experiences 22 | * Giving and gracefully accepting constructive feedback 23 | * Accepting responsibility and apologizing to those affected by our mistakes, 24 | and learning from the experience 25 | * Focusing on what is best not just for us as individuals, but for the 26 | overall community 27 | 28 | Examples of unacceptable behavior include: 29 | 30 | * The use of sexualized language or imagery, and sexual attention or 31 | advances of any kind 32 | * Trolling, insulting or derogatory comments, and personal or political attacks 33 | * Public or private harassment 34 | * Publishing others' private information, such as a physical or email 35 | address, without their explicit permission 36 | * Other conduct which could reasonably be considered inappropriate in a 37 | professional setting 38 | 39 | ## Enforcement Responsibilities 40 | 41 | Community leaders are responsible for clarifying and enforcing our standards of 42 | acceptable behavior and will take appropriate and fair corrective action in 43 | response to any behavior that they deem inappropriate, threatening, offensive, 44 | or harmful. 45 | 46 | Community leaders have the right and responsibility to remove, edit, or reject 47 | comments, commits, code, wiki edits, issues, and other contributions that are 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation 49 | decisions when appropriate. 50 | 51 | ## Scope 52 | 53 | This Code of Conduct applies within all community spaces, and also applies when 54 | an individual is officially representing the community in public spaces. 55 | Examples of representing our community include using an official e-mail address, 56 | posting via an official social media account, or acting as an appointed 57 | representative at an online or offline event. 58 | 59 | ## Enforcement 60 | 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 62 | reported to the community leaders responsible for enforcement at 63 | d.almouiee@unsw.edu.au. 64 | All complaints will be reviewed and investigated promptly and fairly. 65 | 66 | All community leaders are obligated to respect the privacy and security of the 67 | reporter of any incident. 68 | 69 | ## Enforcement Guidelines 70 | 71 | Community leaders will follow these Community Impact Guidelines in determining 72 | the consequences for any action they deem in violation of this Code of Conduct: 73 | 74 | ### 1. Correction 75 | 76 | **Community Impact**: Use of inappropriate language or other behavior deemed 77 | unprofessional or unwelcome in the community. 78 | 79 | **Consequence**: A private, written warning from community leaders, providing 80 | clarity around the nature of the violation and an explanation of why the 81 | behavior was inappropriate. A public apology may be requested. 82 | 83 | ### 2. Warning 84 | 85 | **Community Impact**: A violation through a single incident or series 86 | of actions. 87 | 88 | **Consequence**: A warning with consequences for continued behavior. No 89 | interaction with the people involved, including unsolicited interaction with 90 | those enforcing the Code of Conduct, for a specified period of time. This 91 | includes avoiding interactions in community spaces as well as external channels 92 | like social media. Violating these terms may lead to a temporary or 93 | permanent ban. 94 | 95 | ### 3. Temporary Ban 96 | 97 | **Community Impact**: A serious violation of community standards, including 98 | sustained inappropriate behavior. 99 | 100 | **Consequence**: A temporary ban from any sort of interaction or public 101 | communication with the community for a specified period of time. No public or 102 | private interaction with the people involved, including unsolicited interaction 103 | with those enforcing the Code of Conduct, is allowed during this period. 104 | Violating these terms may lead to a permanent ban. 105 | 106 | ### 4. Permanent Ban 107 | 108 | **Community Impact**: Demonstrating a pattern of violation of community 109 | standards, including sustained inappropriate behavior, harassment of an 110 | individual, or aggression toward or disparagement of classes of individuals. 111 | 112 | **Consequence**: A permanent ban from any sort of public interaction within 113 | the community. 114 | 115 | ## Attribution 116 | 117 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], 118 | version 2.0, available at 119 | . 120 | 121 | Community Impact Guidelines were inspired by [Mozilla's code of conduct 122 | enforcement ladder](https://github.com/mozilla/diversity). 123 | 124 | [homepage]: https://www.contributor-covenant.org 125 | 126 | For answers to common questions about this code of conduct, see the FAQ at 127 | . Translations are available at 128 | . 129 | -------------------------------------------------------------------------------- /examples/DoseMetrics.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "attachments": {}, 5 | "cell_type": "markdown", 6 | "metadata": {}, 7 | "source": [ 8 | "# Dose Metrics\n", 9 | "\n", 10 | "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/AustralianCancerDataNetwork/pydicer/blob/main/examples/DoseMetrics.ipynb)\n", 11 | "\n", 12 | "In this example notebook we will compute [Dose Volume Histograms (DVH)](https://pyplati.github.io/platipy/dose.html#module-platipy.imaging.dose.dvh) for our `RTDOSE` objects\n", 13 | "across structures found in `RTSTRUCT` objects in our dataset. We use\n", 14 | "[HNSCC](https://wiki.cancerimagingarchive.net/display/Public/HNSCC) data from the Cancer Imaging\n", 15 | "Archive which has already been converted using PyDicer for demonstration purposes." 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": null, 21 | "metadata": {}, 22 | "outputs": [], 23 | "source": [ 24 | "try:\n", 25 | " from pydicer import PyDicer\n", 26 | "except ImportError:\n", 27 | " !pip install pydicer\n", 28 | " from pydicer import PyDicer\n", 29 | "\n", 30 | "from pathlib import Path\n", 31 | "\n", 32 | "from pydicer.utils import fetch_converted_test_data" 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "metadata": {}, 38 | "source": [ 39 | "## Fetch data\n", 40 | "\n", 41 | "HNSCC data prepared for this example are downloaded and stored into a `testdata_hnscc` directory.\n", 42 | "We will use this for our PyDicer working directory." 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": null, 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "working_directory = fetch_converted_test_data(\"./testdata_hnscc\", dataset=\"HNSCC\")" 52 | ] 53 | }, 54 | { 55 | "cell_type": "markdown", 56 | "metadata": {}, 57 | "source": [ 58 | "## Initialise PyDicer object\n", 59 | "\n", 60 | "Using the working directory containing the test data." 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": null, 66 | "metadata": {}, 67 | "outputs": [], 68 | "source": [ 69 | "pydicer = PyDicer(working_directory)" 70 | ] 71 | }, 72 | { 73 | "cell_type": "markdown", 74 | "metadata": {}, 75 | "source": [ 76 | "## Compute DVH\n", 77 | "\n", 78 | "Before we can extract dose metrics, we must compute Dose Volume Histograms for all dose objects and\n", 79 | "structure sets. This is done using the [compute_dvh](https://australiancancerdatanetwork.github.io/pydicer/analyse.html#pydicer.analyse.data.AnalyseData.compute_dvh) function." 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": null, 85 | "metadata": {}, 86 | "outputs": [], 87 | "source": [ 88 | "pydicer.analyse.compute_dvh()" 89 | ] 90 | }, 91 | { 92 | "cell_type": "markdown", 93 | "metadata": {}, 94 | "source": [ 95 | "## Inspect DVH\n", 96 | "\n", 97 | "DVHs computed are stored in the respective dose object directories on the file system. Inspect a\n", 98 | "dose object directory (e.g. `testdata_hnscc/data/HNSCC-01-0019/doses/309e1a`). Here you will find\n", 99 | "a `.png` file which plots the DVH for each of the linked structures. In addition a `.csv` file \n", 100 | "stores the raw DVH values.\n", 101 | "\n", 102 | "The DVHs can for this dataset can be loaded into a pandas DataFrame with the\n", 103 | "[get_all_dvhs_for_dataset](https://australiancancerdatanetwork.github.io/pydicer/analyse.html#pydicer.analyse.data.AnalyseData.get_all_dvhs_for_dataset) function." 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": null, 109 | "metadata": {}, 110 | "outputs": [], 111 | "source": [ 112 | "df_dvh = pydicer.analyse.get_all_dvhs_for_dataset()\n", 113 | "df_dvh.head()" 114 | ] 115 | }, 116 | { 117 | "cell_type": "markdown", 118 | "metadata": {}, 119 | "source": [ 120 | "## Compute Dose Metrics\n", 121 | "\n", 122 | "The [compute_dose_metrics](https://australiancancerdatanetwork.github.io/pydicer/analyse.html#pydicer.analyse.data.AnalyseData.compute_dose_metrics) function in the `analyse` module can compute **D**, **V** and **Dcc**\n", 123 | "metrics. Specify the points at which to compute those values. For example, the following cell\n", 124 | "computes the **D95**, **D50**, **V5** and **Dcc10**." 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": null, 130 | "metadata": {}, 131 | "outputs": [], 132 | "source": [ 133 | "df_dose_metrics = pydicer.analyse.compute_dose_metrics(\n", 134 | " d_point=[95, 50],\n", 135 | " v_point=[5],\n", 136 | " d_cc_point=[10]\n", 137 | ")\n", 138 | "df_dose_metrics.head()" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": null, 144 | "metadata": {}, 145 | "outputs": [], 146 | "source": [] 147 | } 148 | ], 149 | "metadata": { 150 | "interpreter": { 151 | "hash": "814af119db7f8f2860617be3dcd1d37c560587d11c65bd58c45b1679d3ee6ea4" 152 | }, 153 | "kernelspec": { 154 | "display_name": "Python 3.8.0 64-bit ('pydicer': pyenv)", 155 | "language": "python", 156 | "name": "python3" 157 | }, 158 | "language_info": { 159 | "codemirror_mode": { 160 | "name": "ipython", 161 | "version": 3 162 | }, 163 | "file_extension": ".py", 164 | "mimetype": "text/x-python", 165 | "name": "python", 166 | "nbconvert_exporter": "python", 167 | "pygments_lexer": "ipython3", 168 | "version": "3.9.16" 169 | }, 170 | "orig_nbformat": 4 171 | }, 172 | "nbformat": 4, 173 | "nbformat_minor": 2 174 | } 175 | -------------------------------------------------------------------------------- /pydicer/dataset/preparation.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | from pathlib import Path 4 | from typing import Callable, Union 5 | 6 | import pandas as pd 7 | 8 | from pydicer.constants import CONVERTED_DIR_NAME 9 | 10 | from pydicer.dataset import functions 11 | from pydicer.utils import read_converted_data 12 | 13 | logger = logging.getLogger(__name__) 14 | 15 | 16 | class PrepareDataset: 17 | """ 18 | Class that provides functionality for prepartion of subsets of data. 19 | 20 | Args: 21 | - working_directory (str|pathlib.Path, optional): Main working directory for pydicer. 22 | Defaults to ".". 23 | """ 24 | 25 | def __init__(self, working_directory: Union[str, Path] = "."): 26 | self.working_directory = Path(working_directory) 27 | 28 | def add_object_to_dataset(self, dataset_name: str, data_object_row: pd.Series): 29 | """Add one data object to a dataset. 30 | 31 | Args: 32 | dataset_name (str): The name of the dataset to add the object to. 33 | data_object_row (pd.Series): The DataFrame row of the converted object. 34 | """ 35 | 36 | dataset_dir = self.working_directory.joinpath(dataset_name) 37 | 38 | # Create a copy so that we aren't manuipulating the original entry 39 | data_object_row = data_object_row.copy() 40 | 41 | object_path = Path(data_object_row.path) 42 | if object_path.is_absolute(): 43 | data_object_row.path = str(object_path.relative_to(self.working_directory)) 44 | object_path = Path(data_object_row.path) 45 | 46 | object_path = Path(data_object_row.path) 47 | symlink_path = dataset_dir.joinpath(object_path.relative_to(CONVERTED_DIR_NAME)) 48 | 49 | rel_part = os.sep.join( 50 | [ 51 | ".." 52 | for _ in symlink_path.parent.relative_to(self.working_directory).parts 53 | ] 54 | ) 55 | src_path = Path(f"{rel_part}{os.sep}{object_path}") 56 | 57 | symlink_path.parent.mkdir(parents=True, exist_ok=True) 58 | 59 | if symlink_path.exists(): 60 | logger.debug("Symlink path already exists: %s", symlink_path) 61 | else: 62 | symlink_path.symlink_to(src_path) 63 | 64 | pat_id = data_object_row.patient_id 65 | pat_dir = dataset_dir.joinpath(pat_id) 66 | pat_converted_csv = pat_dir.joinpath("converted.csv") 67 | df_pat = pd.DataFrame([data_object_row]) 68 | if pat_converted_csv.exists(): 69 | col_types = {"patient_id": str, "hashed_uid": str} 70 | df_converted = pd.read_csv(pat_converted_csv, index_col=0, dtype=col_types) 71 | 72 | # Check if this object already exists in the converted dataframe 73 | if ( 74 | len(df_converted[df_converted.hashed_uid == data_object_row.hashed_uid]) 75 | == 0 76 | ): 77 | # If not add it 78 | df_pat = pd.concat([df_converted, df_pat]) 79 | else: 80 | # Otherwise just leave the converted data as is 81 | df_pat = df_converted 82 | 83 | df_pat = df_pat.reset_index(drop=True) 84 | df_pat.to_csv(pat_dir.joinpath("converted.csv")) 85 | 86 | def prepare_from_dataframe(self, dataset_name: str, df_prepare: pd.DataFrame): 87 | """Prepare a dataset from a filtered converted dataframe 88 | 89 | Args: 90 | dataset_name (str): The name of the dataset to generate 91 | df_prepare (pd.DataFrame): Filtered Pandas DataFrame containing rows of converted data. 92 | """ 93 | 94 | dataset_dir = self.working_directory.joinpath(dataset_name) 95 | if dataset_dir.exists(): 96 | logger.warning( 97 | "Dataset directory already exists. Consider using a different dataset name or " 98 | "remove the existing directory" 99 | ) 100 | 101 | # Create a copy of df_prepare 102 | df_prepare = df_prepare.copy() 103 | 104 | # Remove the working directory part for when we re-save off the filtered converted csv 105 | df_prepare.path = df_prepare.path.apply( 106 | lambda p: str(Path(p).relative_to(self.working_directory)) 107 | ) 108 | 109 | # For each data object prepare the data in the dataset directory 110 | for _, row in df_prepare.iterrows(): 111 | self.add_object_to_dataset(dataset_name, row) 112 | 113 | def prepare( 114 | self, dataset_name: str, preparation_function: Callable, patients=None, **kwargs 115 | ): 116 | """Calls upon an appropriate preparation function to generate a clean dataset ready for 117 | use. Additional keyword arguments are passed through to the preparation_function. 118 | 119 | Args: 120 | dataset_name (str): The name of the dataset to generate 121 | preparation_function (function|str): the function use for preparation 122 | patients (list): The list of patient IDs to use for dataset. If None then all patients 123 | will be considered. Defaults to None. 124 | 125 | Raises: 126 | AttributeError: Raised if preparation_function is not a function or a string defining 127 | a known preparation function. 128 | """ 129 | 130 | if isinstance(preparation_function, str): 131 | preparation_function = getattr(functions, preparation_function) 132 | 133 | if not callable(preparation_function): 134 | raise AttributeError( 135 | "preparation_function must be a function or a str defined in pydicer.dataset" 136 | ) 137 | 138 | logger.info( 139 | "Preparing dataset %s using function: %s", 140 | dataset_name, 141 | preparation_function, 142 | ) 143 | 144 | # Grab the DataFrame containing all the converted data 145 | df_converted = read_converted_data(self.working_directory, patients=patients) 146 | 147 | # Send to the prepare function which will return a DataFrame of the data objects to use for 148 | # the dataset 149 | df_clean_data = preparation_function(df_converted, **kwargs) 150 | 151 | self.prepare_from_dataframe(dataset_name, df_clean_data) 152 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PyDicer: PYthon Dicom Image ConvertER 2 | 3 | [![SoftwareX](https://img.shields.io/badge/SoftwareX-10.1016/j.softx.2024.102010-green.svg)](https://doi.org/10.1016/j.softx.2024.102010) 4 | 5 | Welcome to PyDicer, a tool to ease the process of converting Radiotherapy DICOM data objects into a format typically used for research purposes. In addition to data conversion, functionality is provided to help analyse the data. This includes computing radiomic features, radiotherapy dose metrics and auto-segmentation metrics. PyDicer uses the NIfTI format to store data is a well defined file system structure. Tracking of these data objects in CSV files, also stored on the file system, provides an easy and flexible way to work with the converted data in your research. 6 | 7 | The [PyDicer documentation](https://australiancancerdatanetwork.github.io/pydicer/index.html) provides several examples and guides to help you get started with the tool. Here are a few **PyDicer principles** to keep in mind as you get started: 8 | 9 | - The [working directory structure](https://australiancancerdatanetwork.github.io/pydicer/index.html#directory-structure) is standardised and generalisable for use with any DICOM dataset. 10 | - Use [Pandas DataFrame's](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html) to work with converted data objects. 11 | - [SimpleITK](https://simpleitk.readthedocs.io/en/master/) and [PlatiPy](https://pyplati.github.io/platipy/) are used under the hood for the image conversion, visualisation and analysis tasks. 12 | - Always inspect visualisations, plots and metrics produced by PyDicer in your working directory. Remember, PyDicer is a research tool so only use it for research purposes and expect the unexpected! 13 | 14 | ## Installation 15 | 16 | PyDicer currently supports Python version 3.8, 3.9 and 3.10. Install PyDicer in your Python 17 | environment using `pip`: 18 | 19 | ```bash 20 | pip install pydicer 21 | ``` 22 | 23 | ## Supported Modalities 24 | 25 | PyDicer currently supports converting and analysing the following DICOM modalities: 26 | 27 | - CT 28 | - MR 29 | - PT (Experimental) 30 | - RTSTRUCT 31 | - RTPLAN (Not converted since this only consists of meta data) 32 | - RTDOSE 33 | 34 | ## Directory Structure 35 | 36 | PyDicer will place converted and intermediate files into a specific directory structure. Visualisation, metrics computed and plots are also stored along side the converted data objects. Within the configured working directory `[working]`, the following directories will be generated: 37 | 38 | - `[working]/data`: Directory in which converted data will be placed 39 | - `[working]/quarantine`: Files which couldn't be preprocessed or converted will be placed in here for you to investigate further 40 | - `[working]/.pydicer`: Intermediate files as well as log output will be stored in here 41 | - `[working]/[dataset_name]`: Clean datasets prepared using the Dataset Preparation Module will be stored in a directory with their name and will symbolically link to converted in the `[working]/data` directory 42 | 43 | ![PyDicer Working Directory structure](assets/pydicer-working-directory-structure.png) 44 | PyDicer working directory structure. Ref. [Chlap, P. et al. SoftwareX](https://doi.org/10.1016/j.softx.2024.102010) 45 | 46 | ## Pipeline 47 | 48 | The pipeline handles fetching of the DICOM data to conversion and preparation of your research dataset. Here are the key steps of the pipeline: 49 | 50 | 1. **Input**: various classes are provided to fetch DICOM files from the file system, DICOM PACS, TCIA or Orthanc. A TestInput class is also provided to supply test data for development/testing. 51 | 52 | 2. **Preprocess**: The DICOM files are sorted and linked. Error checking is performed and resolved where possible. 53 | 54 | 3. **Conversion**: The DICOM files are converted to the target format (NIfTI). 55 | 56 | 4. **Visualistion**: Visualistions of data converted are prepared to assist with data selection. 57 | 58 | 5. **Dataset Preparation**: The appropriate files from the converted data are selected to prepare a clean dataset ready for use in your research project! 59 | 60 | 6. **Analysis**: Radiomics and Dose Metrics are computed on the converted data. 61 | 62 | ## Getting Started 63 | 64 | Running the pipeline is easy. The following script will get you started: 65 | 66 | ```python 67 | from pathlib import Path 68 | 69 | from pydicer.input.test import TestInput 70 | from pydicer import PyDicer 71 | 72 | # Configure working directory 73 | directory = Path("./testdata") 74 | directory.mkdir(exist_ok=True, parents=True) 75 | 76 | # Fetch some test DICOM data to convert 77 | dicom_directory = directory.joinpath("dicom") 78 | dicom_directory.mkdir(exist_ok=True, parents=True) 79 | test_input = TestInput(dicom_directory) 80 | test_input.fetch_data() 81 | 82 | # Create the PyDicer tool object and add the dicom directory as an input location 83 | pydicer = PyDicer(directory) 84 | pydicer.add_input(dicom_directory) 85 | 86 | # Run the pipeline 87 | pydicer.run_pipeline() 88 | ``` 89 | 90 | ## How to Cite 91 | 92 | If you make use of PyDicer within your research work, please consider citing our SoftwareX paper: 93 | 94 | Chlap P, Al Mouiee D, Finnegan RN, et al. PyDicer: An open-source python library for conversion and analysis of radiotherapy DICOM data. *SoftwareX*. 2025;29:102010. [doi:10.1016/j.softx.2024.102010](https://doi.org/10.1016/j.softx.2024.102010) 95 | 96 | ## Contributing 97 | 98 | PyDicer is an open-source tool and contributions are welcome! Here are some ways you might consider contributing to the project: 99 | 100 | - Reporting issues on GitHub. 101 | - Correcting/extending the documentation. 102 | - Contributing a bug fix or extending some functionality. 103 | - Providing functionality to support additional DICOM modalities. 104 | - Giving the [PyDicer project](https://github.com/AustralianCancerDataNetwork/pydicer) a star on GitHub. 105 | 106 | For more information, see the [Contributing documentation](https://australiancancerdatanetwork.github.io/pydicer/contributing.html). 107 | 108 | ## Authors 109 | 110 | PyDicer was developed by the [Ingham Medical Physics team](https://www.unsw.edu.au/medicine-health/our-schools/clinical-medicine/research-impact/research-groups/cancer/ingham-medical-physics) in South-Western Sydney. It was developed as part of the [Australian Cancer Data Network](https://australian-cancer-data.network/) supported by the [Australian Research Data Commons](https://ardc.edu.au/). 111 | 112 | - **Phillip Chlap** - [phillip.chlap@unsw.edu.au](phillip.chlap@unsw.edu.au) 113 | - **Daniel Al Mouiee** - [d.almouiee@gmail.com](d.almouiee@gmail.com) 114 | -------------------------------------------------------------------------------- /pydicer/input/orthanc.py: -------------------------------------------------------------------------------- 1 | from io import BytesIO 2 | import logging 3 | from typing import Union 4 | from pathlib import Path 5 | 6 | import pydicom 7 | from pyorthanc.deprecated.client import Orthanc 8 | 9 | from pydicer.utils import get_iterator 10 | from pydicer.input.base import InputBase 11 | 12 | logger = logging.getLogger(__name__) 13 | 14 | 15 | def adapt_dataset_from_bytes(blob: bytes) -> pydicom.Dataset: 16 | """Convert bytes coming from Orthanc to DICOM dataset 17 | 18 | Args: 19 | blob (bytes): The bytes to convert 20 | 21 | Returns: 22 | pydicom.Dataset: The DICOM dataset 23 | """ 24 | dataset = pydicom.dcmread(BytesIO(blob)) 25 | return dataset 26 | 27 | 28 | class OrthancInput(InputBase): 29 | def __init__( 30 | self, 31 | host: str, 32 | port: int, 33 | username: str = None, 34 | password: str = None, 35 | working_directory: Union[str, Path] = None, 36 | ): 37 | """Class for fetching files from Orthanc. 38 | 39 | Args: 40 | host (str): The IP address or host name of the Orthanc. 41 | port (int): The port to use to communicate on. 42 | username (str, optional): Orthanc username. 43 | password (str, optional): Orthanc password. 44 | working_directory (str|pathlib.Path, optional): The working directory in which to 45 | store the data fetched. Defaults to a temp directory. 46 | 47 | Raises: 48 | ConnectionError: Raises a connection error if unable to verify the connection to 49 | Orthanc. 50 | """ 51 | 52 | super().__init__(working_directory) 53 | 54 | if not host.startswith("http"): 55 | host = f"http://{host}" 56 | 57 | self.orthanc = Orthanc(f"{host}:{port}") 58 | 59 | if username is not None and password is not None: 60 | self.orthanc.setup_credentials(username, password) 61 | 62 | # Do a dummy lookup to check that we can reach the Orthanc host, this will throw a 63 | # connection error if we can't connect to the Orthanc 64 | self.orthanc.c_find({"Level": "Patient", "Query": {"PatientID": "XXX"}}) 65 | 66 | def fetch_data( 67 | self, patients: Union[list, str], modalities: Union[list, str] = None 68 | ): 69 | """Download the DICOM data from Orthanc 70 | 71 | Args: 72 | patients (list|str): A list of patient IDs, or a single patient ID. 73 | modalities (list|str, optional): List of modalities or a single modality to fetch. 74 | Defaults to None where all modalities would be fetched. 75 | """ 76 | 77 | if not isinstance(patients, list) and not isinstance(patients, tuple): 78 | patients = [patients] 79 | 80 | if ( 81 | modalities is not None 82 | and not isinstance(modalities, list) 83 | and not isinstance(modalities, tuple) 84 | ): 85 | modalities = [modalities] 86 | 87 | for patient in get_iterator(patients, unit="patients", name="Orthanc Fetch"): 88 | # Find the Orthanc ID for this patient 89 | orthanc_patient_ids = self.orthanc.c_find( 90 | {"Level": "Patient", "Query": {"PatientID": patient}} 91 | ) 92 | 93 | if len(orthanc_patient_ids) == 0: 94 | logger.warning("Patient not found in Orthanc: %s", patient) 95 | continue 96 | 97 | if len(orthanc_patient_ids) > 1: 98 | logger.warning( 99 | "Patient returned multple Orthanc IDs: %s. Selecting first only", 100 | patient, 101 | ) 102 | 103 | orthanc_patient_id = orthanc_patient_ids[0] 104 | 105 | patient_information = self.orthanc.get_patient_information( 106 | orthanc_patient_id 107 | ) 108 | patient_id = patient_information["MainDicomTags"]["PatientID"] 109 | 110 | # Loop over each study for this patient 111 | study_identifiers = patient_information["Studies"] 112 | for study_identifier in study_identifiers: 113 | # Loop over each series in this study 114 | study_information = self.orthanc.get_study_information(study_identifier) 115 | series_identifiers = study_information["Series"] 116 | for series_identifier in series_identifiers: 117 | series_information = self.orthanc.get_series_information( 118 | series_identifier 119 | ) 120 | 121 | # Skip if this isn't one of the modalities we want 122 | modality = series_information["MainDicomTags"]["Modality"] 123 | if modalities is not None and not modality in modalities: 124 | continue 125 | 126 | series_information = self.orthanc.get_series_information( 127 | series_identifier 128 | ) 129 | series_instance_uid = series_information["MainDicomTags"][ 130 | "SeriesInstanceUID" 131 | ] 132 | 133 | # Create the output directory for this series 134 | series_path = self.working_directory.joinpath( 135 | patient_id, series_instance_uid 136 | ) 137 | series_path.mkdir(exist_ok=True, parents=True) 138 | 139 | # Loop over each instance in this series 140 | instance_identifiers = series_information["Instances"] 141 | for instance_identifier in instance_identifiers: 142 | instance_information = self.orthanc.get_instance_information( 143 | instance_identifier 144 | ) 145 | 146 | # Download the DICOM instance 147 | f = self.orthanc.get_instance_file(instance_identifier) 148 | ds = adapt_dataset_from_bytes(f) 149 | 150 | sop_instance_uid = instance_information["MainDicomTags"][ 151 | "SOPInstanceUID" 152 | ] 153 | ds_file_name = f"{modality}.{sop_instance_uid}.dcm" 154 | ds_path = series_path.joinpath(ds_file_name) 155 | 156 | # Save the DICOM dataset 157 | ds.save_as(ds_path) 158 | logger.debug("Saving DICOM dataset to %s", ds_path) 159 | -------------------------------------------------------------------------------- /pydicer/dataset/structureset.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | from pathlib import Path 4 | 5 | import SimpleITK as sitk 6 | import pandas as pd 7 | 8 | from pydicer.constants import DEFAULT_MAPPING_ID 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | 13 | def get_mapping_for_structure_set( 14 | structure_set_row: pd.Series, mapping_id: str 15 | ) -> dict: 16 | """Searches the folder hierarchy to find a structure name mapping file with the given ID. 17 | 18 | Args: 19 | structure_set_row (pd.Series): The converted dataframe row entry for the structure set. 20 | mapping_id (str): The ID of the mapping to find. 21 | 22 | Returns: 23 | dict: The structure name mapping 24 | """ 25 | structure_set_path = Path(structure_set_row.path) 26 | 27 | potential_mapping_paths = [ 28 | # First look in the structure_set_path folder for the structure mapping 29 | structure_set_path.joinpath(".structure_set_mappings"), 30 | # Next look in the patient folder 31 | structure_set_path.parent.joinpath(".structure_set_mappings"), 32 | # Finally look for the project wide mapping 33 | structure_set_path.parent.parent.parent.parent.joinpath( 34 | ".pydicer", ".structure_set_mappings" 35 | ), 36 | ] 37 | 38 | for mapping_path in potential_mapping_paths: 39 | mapping_file = mapping_path.joinpath(f"{mapping_id}.json") 40 | if mapping_file.exists(): 41 | logger.debug("Using mapping file in %s", mapping_file) 42 | with open(mapping_file, encoding="utf-8") as json_file: 43 | return json.load(json_file) 44 | 45 | return None 46 | 47 | 48 | class StructureSet(dict): 49 | def __init__(self, structure_set_row, mapping_id=DEFAULT_MAPPING_ID): 50 | if not structure_set_row.modality == "RTSTRUCT": 51 | raise AttributeError("structure_set_row modality must be RTSTRUCT") 52 | 53 | self.structure_set_path = Path(structure_set_row.path) 54 | self.structure_set_id = structure_set_row.hashed_uid 55 | 56 | self.structure_names = [ 57 | s.name.replace(".nii.gz", "") 58 | for s in self.structure_set_path.glob("*.nii.gz") 59 | ] 60 | self.unmapped_structure_names = self.structure_names 61 | 62 | self.structure_mapping = None 63 | 64 | # Check if we can find a mapping for this structure set, if not we'll just used the 65 | # unmapped structure names 66 | if mapping_id is not None: 67 | self.structure_mapping = get_mapping_for_structure_set( 68 | structure_set_row, mapping_id 69 | ) 70 | 71 | if self.structure_mapping is None: 72 | logger.warning("No mapping file found with id %s", mapping_id) 73 | 74 | if self.structure_mapping is not None: 75 | self.structure_names = list(self.structure_mapping.keys()) 76 | 77 | self.cache = {} 78 | 79 | def get_mapped_structure_name(self, item: str) -> str: 80 | """Get the structure set specific name for a structure that may have been mapped. 81 | 82 | Args: 83 | item (str): The standardised name to look up. 84 | 85 | Returns: 86 | str: The structure set specific name if it could be mapped (returns the original name 87 | otherwise). 88 | """ 89 | structure_name = item 90 | 91 | if self.structure_mapping is not None: 92 | if item in self.structure_mapping: 93 | for variation in self.structure_mapping[item]: 94 | variation_path = self.structure_set_path.joinpath( 95 | f"{variation}.nii.gz" 96 | ) 97 | if variation_path.exists(): 98 | # Found variation, let's use that file... 99 | # TODO an issue would occur if there were multiple files that would match 100 | # this mapping. In that case we should probably throw an error (or at 101 | # a warning?). 102 | structure_name = variation 103 | 104 | return structure_name 105 | 106 | def get_standardised_structure_name(self, item: str) -> str: 107 | """Get the standardised name for a structure that is present in this structure set. 108 | 109 | Args: 110 | item (str): The name of the structure in this structure set. 111 | 112 | Returns: 113 | str: The standardised name if it could be mapped (returns the original name 114 | otherwise). 115 | """ 116 | 117 | structure_name = item 118 | 119 | if self.structure_mapping is not None: 120 | for standardised_name in self.structure_mapping: 121 | for variation in self.structure_mapping[standardised_name]: 122 | if variation == item: 123 | return standardised_name 124 | 125 | return structure_name 126 | 127 | def __getitem__(self, item): 128 | structure_name = self.get_mapped_structure_name(item) 129 | 130 | if item not in self.structure_names: 131 | raise KeyError( 132 | f"Structure name {item} not found in structure set {self.structure_set_id}." 133 | ) 134 | 135 | if item in self.cache: 136 | return self.cache[item] 137 | 138 | structure_path = self.structure_set_path.joinpath(f"{structure_name}.nii.gz") 139 | 140 | if not structure_path.exists(): 141 | raise FileExistsError( 142 | f"No structure file found for {structure_name} in structure " 143 | f"set {self.structure_set_id}" 144 | ) 145 | 146 | result = sitk.ReadImage(str(structure_path)) 147 | 148 | self.cache[item] = result 149 | return result 150 | 151 | def keys(self): 152 | return self.structure_names 153 | 154 | def values(self): 155 | return [self[s] for s in self.structure_names] 156 | 157 | def items(self): 158 | return [(s, self[s]) for s in self.structure_names] 159 | 160 | def get_unmapped_structures(self) -> list: 161 | """Get a list of structures for which no structure was found based on the mapping. If no 162 | mapping is being used this will always be empty. 163 | 164 | Returns: 165 | list: Names of structures that can't be found using a mapping 166 | """ 167 | missing_mappings = [] 168 | for k in self.keys(): 169 | structure_name = self.get_mapped_structure_name(k) 170 | structure_path = self.structure_set_path.joinpath( 171 | f"{structure_name}.nii.gz" 172 | ) 173 | if not structure_path.exists(): 174 | missing_mappings.append(k) 175 | 176 | return missing_mappings 177 | -------------------------------------------------------------------------------- /pydicer/config.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import json 3 | 4 | from pathlib import Path 5 | 6 | from pydicer.constants import PYDICER_DIR_NAME 7 | 8 | logger = logging.getLogger(__name__) 9 | 10 | PYDICER_CONFIG = { 11 | "verbosity": { 12 | "module": "general", 13 | "description": "Level of output for standard out. Value indicates the Python built-in log " 14 | "level. A value of 0 (not set) will display the process bar. Logs of all levels are " 15 | "available in the .pydicer directory.", 16 | "type": int, 17 | "default": 0, 18 | "choices": [ 19 | logging.NOTSET, 20 | logging.DEBUG, 21 | logging.INFO, 22 | logging.WARNING, 23 | logging.ERROR, 24 | ], 25 | }, 26 | "for_fallback_linkage": { 27 | "module": "general", 28 | "description": "Determine whether to fallback on linking objects via their Frame of " 29 | "Reference if no more stable link exists.", 30 | "type": bool, 31 | "default": True, 32 | "choices": None, 33 | }, 34 | "enforce_dcm_ext": { 35 | "module": "preprocess", 36 | "description": "If True only files with the .dcm or .DCM extension will be preprocessed. " 37 | "otherwise any file in the DICOM directory will be preprocessed.", 38 | "type": bool, 39 | "default": True, 40 | "choices": None, 41 | }, 42 | "interp_missing_slices": { 43 | "module": "convert", 44 | "description": "When missing slices are detected these will be interpolated if True. " 45 | "otherwise these cases will be sent to quarantine.", 46 | "type": bool, 47 | "default": True, 48 | "choices": None, 49 | }, 50 | "ignore_duplicate_slices": { 51 | "module": "convert", 52 | "description": "If two slices at the same location with different pixel data are found " 53 | "then the first slice is used if ignore_duplicate_slices is True. Otherwise an error is" 54 | "raised and these images are sent to quarantine", 55 | "type": bool, 56 | "default": False, 57 | "choices": None, 58 | }, 59 | "default_patient_weight": { 60 | "module": "convert", 61 | "description": "Default patient weight to use for PET conversion if it cannot be " 62 | "determined from the DICOM headers. If None, those cases will be sent to " 63 | "quarantine.", 64 | "type": float, 65 | "default": None, 66 | "choices": None, 67 | }, 68 | "generate_nrrd": { 69 | "module": "convert", 70 | "description": "Whether or not to generate an additional NRRD file when converting " 71 | "RTSTRUCT. This allows loading easily into 3D slicer.", 72 | "type": bool, 73 | "default": True, 74 | "choices": None, 75 | }, 76 | "nrrd_colormap": { 77 | "module": "convert", 78 | "description": "Matplotlib colormap to use when saving NRRD file of structures.", 79 | "type": str, 80 | "default": "rainbow", 81 | "choices": None, 82 | }, 83 | } 84 | 85 | 86 | class PyDicerConfig: 87 | class __PyDicerConfig: # pylint: disable=invalid-name 88 | def __init__(self, working_dir=None): 89 | if working_dir is None: 90 | raise ValueError("working_dir must be set on config init") 91 | self.working_dir = Path(working_dir) 92 | 93 | pydicer_dir = self.working_dir.joinpath(PYDICER_DIR_NAME) 94 | self.config_path = pydicer_dir.joinpath("config.json") 95 | 96 | self.pydicer_config = {} 97 | 98 | if self.config_path.exists(): 99 | # Read existing config if exists 100 | with open(self.config_path, "r", encoding="utf-8") as cp: 101 | self.pydicer_config = json.load(cp) 102 | 103 | # Add config items from config object. 104 | # Like this if new items are added in future versions of pydicer, new config items 105 | # will be added in 106 | for key, item in PYDICER_CONFIG.items(): 107 | if not key in self.pydicer_config: 108 | self.pydicer_config[key] = item["default"] 109 | 110 | instance = None 111 | 112 | def __init__(self, working_dir=None): 113 | """Return the singleton instance of PyDicerConfig 114 | 115 | Args: 116 | working_dir (str|pathlib.Path, optional): The working directory for project. Required 117 | on first initialisation. Defaults to None. 118 | """ 119 | 120 | if working_dir is not None and PyDicerConfig.instance is not None: 121 | # If we already have a config instance, but the working directory has changed, we will 122 | # recreate the instance with the new working directory. 123 | if not working_dir == PyDicerConfig.instance.working_dir: 124 | PyDicerConfig.instance = PyDicerConfig.__PyDicerConfig(working_dir) 125 | elif PyDicerConfig.instance is None: 126 | PyDicerConfig.instance = PyDicerConfig.__PyDicerConfig(working_dir) 127 | 128 | def get_working_dir(self): 129 | """Get the working directory configured for the project. 130 | 131 | Returns: 132 | pathlib.Path: The working directory 133 | """ 134 | return self.instance.working_dir 135 | 136 | def get_config(self, name: str) -> object: 137 | """Get the value of the config item with the specified name 138 | 139 | Args: 140 | name (str): Config item name 141 | 142 | Raises: 143 | AttributeError: Config value with name doesn't exist 144 | 145 | Returns: 146 | object: Value of the config with the given name 147 | """ 148 | 149 | if not name in self.instance.pydicer_config: 150 | raise AttributeError(f"{name} does not exist in config") 151 | 152 | return self.instance.pydicer_config[name] 153 | 154 | def set_config(self, name: str, value: object): 155 | """Set the value for the config with the given name 156 | 157 | Args: 158 | name (str): The name of the config to set 159 | value (object): The value of the config 160 | 161 | Raises: 162 | AttributeError: Config value with name doesn't exist 163 | ValueError: Config value is of the wrong type 164 | """ 165 | 166 | if not name in self.instance.pydicer_config: 167 | raise AttributeError(f"{name} does not exist in config") 168 | 169 | if not isinstance(value, PYDICER_CONFIG[name]["type"]) and not value is None: 170 | raise ValueError( 171 | f"Config {name} must be of type " 172 | f"{type(self.instance.pydicer_config[name])}" 173 | ) 174 | 175 | self.instance.pydicer_config[name] = value 176 | self.save_config() 177 | 178 | def save_config(self): 179 | """Save the config to the pydicer directory""" 180 | 181 | if not self.instance.config_path.parent.exists(): 182 | self.instance.config_path.parent.mkdir() 183 | 184 | with open(self.instance.config_path, "w", encoding="utf-8") as fp: 185 | json.dump(self.instance.pydicer_config, fp, indent=2) 186 | -------------------------------------------------------------------------------- /pydicer/generate/models.py: -------------------------------------------------------------------------------- 1 | import tempfile 2 | import logging 3 | from pathlib import Path 4 | 5 | import SimpleITK as sitk 6 | 7 | logger = logging.getLogger(__name__) 8 | 9 | 10 | def load_output_nifti(output_dir: Path) -> dict: 11 | """Loads segmentation masks saved as Nifti's in an output directory into a dictionary for use 12 | in PyDicer. 13 | 14 | Args: 15 | output_dir (Path): The output directory of a segmentation model. 16 | 17 | Returns: 18 | dict: Dictionary of segmentation masks with the structure name as key and sitk.Image mask 19 | as value. 20 | """ 21 | results = {} 22 | 23 | # Load the output masks into a dict to return 24 | for mask_file in output_dir.glob("*.nii.gz"): 25 | mask = sitk.ReadImage(str(mask_file)) 26 | 27 | structure_name = mask_file.name.replace(".nii.gz", "") 28 | 29 | # Check if the mask is empty, total segmentator stores empty mask files for structures 30 | # that aren't within FOV 31 | if sitk.GetArrayFromImage(mask).sum() == 0: 32 | logger.debug("Segmentation mask for %s is empty, skipping...", structure_name) 33 | continue 34 | 35 | logger.debug("Loading segmentation mask for %s", structure_name) 36 | results[structure_name] = mask 37 | 38 | return results 39 | 40 | 41 | def run_total_segmentator(input_image: sitk.Image) -> dict: 42 | """Run Total Segmentator on a given input image. Ensure the Total Segmentator is installed: 43 | 44 | ``` 45 | pip install TotalSegmentator 46 | ``` 47 | 48 | See https://github.com/wasserth/TotalSegmentator for more information. 49 | 50 | Args: 51 | input_image (sitk.Image): Input image (should be CT) to segment. 52 | 53 | Returns: 54 | dict: Dictionary of segmentations with structure name as key and sitk.Image mask as value. 55 | """ 56 | 57 | # Import within function since this is an optional dependency 58 | # pylint: disable=import-outside-toplevel 59 | from totalsegmentator.python_api import totalsegmentator 60 | 61 | results = {} 62 | 63 | with tempfile.TemporaryDirectory() as working_dir: 64 | logger.debug("Running TotalSegmentator in temporary directory: %s", working_dir) 65 | 66 | working_dir = Path(working_dir) 67 | 68 | # Save the temporary image file for total segmentator to find 69 | input_dir = working_dir.joinpath("input") 70 | input_dir.mkdir() 71 | input_file = input_dir.joinpath("img.nii.gz") 72 | sitk.WriteImage(input_image, str(input_file)) 73 | 74 | # Prepare a temporary folder for total segmentator to store the output 75 | output_dir = working_dir.joinpath("output") 76 | output_dir.mkdir() 77 | 78 | # Run total segmentator 79 | totalsegmentator(input_file, output_dir) 80 | 81 | # Load the output masks into a dict to return 82 | results = load_output_nifti(output_dir) 83 | 84 | logger.debug("TotalSegmentator complete") 85 | 86 | return results 87 | 88 | 89 | def get_available_mhub_models() -> dict: 90 | """Determine which mHub models have been configured for use in PyDicer. 91 | 92 | Returns: 93 | dict: A dictionary with mhub model id as key and the path to the config file as value. 94 | """ 95 | 96 | available_models = {} 97 | model_config_directory = Path(__file__).parent.joinpath("mhubconfigs") 98 | logger.debug("Loading mHub model configs from %s", model_config_directory) 99 | for model_config in model_config_directory.glob("*.yml"): 100 | available_models[model_config.name.replace(".yml", "")] = model_config.absolute() 101 | 102 | logger.debug("Found available configs: %s", available_models) 103 | return available_models 104 | 105 | 106 | def run_mhub_model( 107 | input_image: sitk.Image, 108 | mhub_model: str, 109 | mhub_config_file: Path = None, 110 | gpu: bool = True, 111 | ) -> dict: 112 | """Use Docker to run a model made available through mHub: https://mhub.ai/ 113 | 114 | Args: 115 | input_image (sitk.Image): The SimpleITK image to segment. 116 | mhub_model (str): The name of the model to run. Must be configured 117 | (check `get_available_mhub_models`) or a custom mhub_config_file should be provided. 118 | mhub_config_file (Path, optional): Path to a custom config file to use. Defaults to None. 119 | gpu (bool, optional): If True, all gpus will be requested when running the Docker image. 120 | Defaults to True. 121 | 122 | Raises: 123 | ImportError: Raised if the Python Docker SDK is not installed. 124 | ValueError: Raised if an mHub model which has not been configured for use in PyDicer is 125 | requested. Use the `get_available_mhub_models` function to determine available models. 126 | 127 | Returns: 128 | dict: Dictionary of segmentations with structure name as key and sitk.Image mask as value. 129 | """ 130 | 131 | try: 132 | # pylint: disable=import-outside-toplevel 133 | import docker 134 | except ImportError as ie: 135 | raise ImportError( 136 | "Docker Python package is required to run mHub models. Install with: " 137 | "pip install docker" 138 | ) from ie 139 | 140 | client = docker.from_env() 141 | 142 | mhub_image = f"mhubai/{mhub_model}" 143 | 144 | # Try pulling the image 145 | try: 146 | client.images.pull(mhub_image) 147 | except docker.errors.ImageNotFound as inf: 148 | raise docker.errors.ImageNotFound( 149 | f"The mhub image {mhub_image} could not be pulled. " 150 | "Check if this model is available using the get_available_mhub_models function." 151 | ) from inf 152 | 153 | if mhub_config_file is None: 154 | available_mhub_models = get_available_mhub_models() 155 | 156 | if not mhub_model in available_mhub_models: 157 | raise ValueError(f"mHub model {mhub_model} not configured for use in PyDicer.") 158 | 159 | mhub_config_file = available_mhub_models[mhub_model] 160 | 161 | with tempfile.TemporaryDirectory() as working_dir: 162 | logger.info("Running mHub model %s in temporary %s", mhub_model, working_dir) 163 | working_dir = Path(working_dir) 164 | input_dir = working_dir.joinpath("input") 165 | input_dir.mkdir() 166 | input_file = input_dir.joinpath("image.nii.gz") 167 | sitk.WriteImage(input_image, str(input_file)) 168 | 169 | output_dir = working_dir.joinpath("output") 170 | output_dir.mkdir() 171 | 172 | device_requests = [] 173 | if gpu: 174 | # Request all GPUs 175 | device_requests = [docker.types.DeviceRequest(count=-1, capabilities=[["gpu"]])] 176 | 177 | volumes = { 178 | input_dir.absolute(): {"bind": "/app/data/input_data", "mode": "rw"}, 179 | output_dir.absolute(): {"bind": "/app/data/output_data", "mode": "rw"}, 180 | mhub_config_file: {"bind": "/app/data/config.yml", "mode": "rw"}, 181 | } 182 | 183 | client.containers.run( 184 | mhub_image, 185 | command="--config /app/data/config.yml", 186 | remove=True, 187 | volumes=volumes, 188 | device_requests=device_requests, 189 | ) 190 | 191 | # Load the output masks into a dict to return 192 | results = load_output_nifti(output_dir) 193 | 194 | logger.debug("mHub segmentation complete") 195 | 196 | return results 197 | -------------------------------------------------------------------------------- /examples/Configuration.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "attachments": {}, 5 | "cell_type": "markdown", 6 | "metadata": {}, 7 | "source": [ 8 | "# Configuration\n", 9 | "\n", 10 | "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/AustralianCancerDataNetwork/pydicer/blob/main/examples/Configuration.ipynb)\n", 11 | "\n", 12 | "PyDicer provides various options which you may configure to change the behaviour of the tool." 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": null, 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "try:\n", 22 | " from pydicer import PyDicer\n", 23 | "except ImportError:\n", 24 | " !pip install pydicer\n", 25 | " from pydicer import PyDicer\n", 26 | "\n", 27 | "import logging\n", 28 | "\n", 29 | "from pydicer.utils import fetch_converted_test_data\n", 30 | "\n", 31 | "working_directory = fetch_converted_test_data(\"./testdata_lctsc\", dataset=\"LCTSC\")\n", 32 | "\n", 33 | "pydicer = PyDicer(working_directory)" 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "metadata": {}, 39 | "source": [ 40 | "## Getting and Setting Options\n", 41 | "\n", 42 | "Use the `get_config` and `set_config` functions of the [config module](https://australiancancerdatanetwork.github.io/pydicer/config.html) to get and set configuration options respectively." 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": null, 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "logging_verbosity = pydicer.config.get_config(\"verbosity\")\n", 52 | "print(f\"Current logging verbosity: {logging_verbosity}\")\n", 53 | "\n", 54 | "# Set to logging level DEBUG\n", 55 | "pydicer.config.set_config(\"verbosity\", logging.DEBUG)\n", 56 | "\n", 57 | "logging_verbosity = pydicer.config.get_config(\"verbosity\")\n", 58 | "print(f\"New logging verbosity: {logging_verbosity}\")" 59 | ] 60 | }, 61 | { 62 | "cell_type": "markdown", 63 | "metadata": {}, 64 | "source": [ 65 | "## Options Available\n", 66 | "\n", 67 | "### Logging Verbosity\n", 68 | "\n", 69 | "Level of output for standard out. Value indicates the [Python built-in log level](https://docs.python.org/3/library/logging.html#logging-levels). A value of 0\n", 70 | "(not set) will display the process bar. Logs of all levels are available in the .pydicer directory.\n", 71 | "\n", 72 | "Valid options are: `[logging.NOTSET, logging.DEBUG, logging.INFO, logging.WARNING, logging.ERROR]`" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": null, 78 | "metadata": {}, 79 | "outputs": [], 80 | "source": [ 81 | "pydicer.config.set_config(\"verbosity\", logging.DEBUG)" 82 | ] 83 | }, 84 | { 85 | "cell_type": "markdown", 86 | "metadata": {}, 87 | "source": [ 88 | "### Frame of Reference Fallback Linkage\n", 89 | "\n", 90 | "Determine whether to fallback on linking objects via their [Frame of Reference UID](https://dicom.innolitics.com/ciods/ct-image/frame-of-reference/00200052) if no more stable\n", 91 | "link exists.\n", 92 | "\n", 93 | "Valid options are: `True` or `False`" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": null, 99 | "metadata": {}, 100 | "outputs": [], 101 | "source": [ 102 | "pydicer.config.set_config(\"for_fallback_linkage\", True)" 103 | ] 104 | }, 105 | { 106 | "cell_type": "markdown", 107 | "metadata": {}, 108 | "source": [ 109 | "### Enforce `.dcm` file extension\n", 110 | "\n", 111 | "If True only files with the .dcm or .DCM extension will be preprocessed. Otherwise any file in the\n", 112 | "DICOM directory will be preprocessed.\n", 113 | "\n", 114 | "Valid options are: `True` or `False`" 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": null, 120 | "metadata": {}, 121 | "outputs": [], 122 | "source": [ 123 | "pydicer.config.set_config(\"enforce_dcm_ext\", True)" 124 | ] 125 | }, 126 | { 127 | "cell_type": "markdown", 128 | "metadata": {}, 129 | "source": [ 130 | "### Interpolate Missing Slices\n", 131 | "\n", 132 | "When missing slices are detected these will be interpolated if True. Otherwise these cases will be\n", 133 | "sent to quarantine.\n", 134 | "\n", 135 | "Valid options are: `True` or `False`" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": null, 141 | "metadata": {}, 142 | "outputs": [], 143 | "source": [ 144 | "pydicer.config.set_config(\"interp_missing_slices\", True)" 145 | ] 146 | }, 147 | { 148 | "cell_type": "markdown", 149 | "metadata": {}, 150 | "source": [ 151 | "### Ignore Duplicate Slices\n", 152 | "\n", 153 | "If two slices at the same location with different pixel data are found then the first slice is used\n", 154 | "if ignore_duplicate_slices is True. Otherwise an error is raised and these images are sent to\n", 155 | "quarantine.\n", 156 | "\n", 157 | "Valid options are: `True` or `False`" 158 | ] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "execution_count": null, 163 | "metadata": {}, 164 | "outputs": [], 165 | "source": [ 166 | "pydicer.config.set_config(\"ignore_duplicate_slices\", False)" 167 | ] 168 | }, 169 | { 170 | "cell_type": "markdown", 171 | "metadata": {}, 172 | "source": [ 173 | "### Generate NRRD\n", 174 | "\n", 175 | "Whether or not to generate an additional NRRD file when converting RTSTRUCT. This allows loading\n", 176 | "easily into [3D slicer](https://www.slicer.org/), but it takes up more disk space and takes time to generate the file.\n", 177 | "\n", 178 | "Valid options are: `True` or `False`" 179 | ] 180 | }, 181 | { 182 | "cell_type": "code", 183 | "execution_count": null, 184 | "metadata": {}, 185 | "outputs": [], 186 | "source": [ 187 | "pydicer.config.set_config(\"generate_nrrd\", False)" 188 | ] 189 | }, 190 | { 191 | "cell_type": "markdown", 192 | "metadata": {}, 193 | "source": [ 194 | "### NRRD Colormap\n", 195 | "\n", 196 | "If NRRD files are to be generated, this defines the Matplotlib colormap to use when saving NRRD\n", 197 | "file of structures.\n", 198 | "\n", 199 | "Valid options are any [Matplotlib colormap](https://matplotlib.org/stable/users/explain/colors/colormaps.html)." 200 | ] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": null, 205 | "metadata": {}, 206 | "outputs": [], 207 | "source": [ 208 | "pydicer.config.set_config(\"nrrd_colormap\", \"rainbow\")" 209 | ] 210 | }, 211 | { 212 | "cell_type": "code", 213 | "execution_count": null, 214 | "metadata": {}, 215 | "outputs": [], 216 | "source": [] 217 | } 218 | ], 219 | "metadata": { 220 | "interpreter": { 221 | "hash": "814af119db7f8f2860617be3dcd1d37c560587d11c65bd58c45b1679d3ee6ea4" 222 | }, 223 | "kernelspec": { 224 | "display_name": "Python 3.8.0 64-bit ('pydicer': pyenv)", 225 | "language": "python", 226 | "name": "python3" 227 | }, 228 | "language_info": { 229 | "codemirror_mode": { 230 | "name": "ipython", 231 | "version": 3 232 | }, 233 | "file_extension": ".py", 234 | "mimetype": "text/x-python", 235 | "name": "python", 236 | "nbconvert_exporter": "python", 237 | "pygments_lexer": "ipython3", 238 | "version": "3.9.16" 239 | }, 240 | "orig_nbformat": 4 241 | }, 242 | "nbformat": 4, 243 | "nbformat_minor": 2 244 | } 245 | -------------------------------------------------------------------------------- /examples/WorkingWithData.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "attachments": {}, 5 | "cell_type": "markdown", 6 | "metadata": {}, 7 | "source": [ 8 | "# Working with Data\n", 9 | "\n", 10 | "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/AustralianCancerDataNetwork/pydicer/blob/main/examples/WorkingWithData.ipynb)\n", 11 | "\n", 12 | "Here we present some useful tips & tricks which to help working with data which has been converted\n", 13 | "using PyDicer. As you will see, working with data in PyDicer is heavily oriented around DataFrames\n", 14 | "provided by the Pandas library. If you aren't familiar with Pandas, we recommend working through \n", 15 | "the [Pandas Getting Started Tutorials](https://pandas.pydata.org/docs/getting_started/index.html)." 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": null, 21 | "metadata": {}, 22 | "outputs": [], 23 | "source": [ 24 | "try:\n", 25 | " from pydicer import PyDicer\n", 26 | "except ImportError:\n", 27 | " !pip install pydicer\n", 28 | " from pydicer import PyDicer\n", 29 | "\n", 30 | "from pathlib import Path\n", 31 | "\n", 32 | "from pydicer.utils import (\n", 33 | " fetch_converted_test_data,\n", 34 | " load_object_metadata,\n", 35 | " determine_dcm_datetime,\n", 36 | " read_simple_itk_image\n", 37 | ")" 38 | ] 39 | }, 40 | { 41 | "cell_type": "markdown", 42 | "metadata": {}, 43 | "source": [ 44 | "## Setup PyDicer\n", 45 | "\n", 46 | "Here we load the LCTSC data which has already been converted. This is downloaded into the\n", 47 | "`testdata_lctsc` directory. We also initialise a `PyDicer` object." 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": null, 53 | "metadata": {}, 54 | "outputs": [], 55 | "source": [ 56 | "working_directory = fetch_converted_test_data(\"./testdata_lctsc\", dataset=\"LCTSC\")\n", 57 | "\n", 58 | "pydicer = PyDicer(working_directory)" 59 | ] 60 | }, 61 | { 62 | "cell_type": "markdown", 63 | "metadata": {}, 64 | "source": [ 65 | "## Read Converted Data\n", 66 | "\n", 67 | "To obtain a DataFrame of the converted data, use the [read_converted_data](https://australiancancerdatanetwork.github.io/pydicer/utils.html#pydicer.utils.read_converted_data) function." 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": null, 73 | "metadata": {}, 74 | "outputs": [], 75 | "source": [ 76 | "df = pydicer.read_converted_data()\n", 77 | "df" 78 | ] 79 | }, 80 | { 81 | "cell_type": "markdown", 82 | "metadata": {}, 83 | "source": [ 84 | "## Iterating Over Objects\n", 85 | "\n", 86 | "If you want to perform some operation on (for example) all images in your dataset, you can iterate\n", 87 | "over each image row like this. Within each loop we load each image as a `SimpleITK` image (just\n", 88 | "for demonstration purposes).)" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": null, 94 | "metadata": {}, 95 | "outputs": [], 96 | "source": [ 97 | "for idx, ct_row in df[df.modality==\"CT\"].iterrows():\n", 98 | "\n", 99 | " print(f\"Loading image with hashed UID: {ct_row.hashed_uid}...\", end=\"\")\n", 100 | "\n", 101 | " img = read_simple_itk_image(ct_row)\n", 102 | "\n", 103 | " print(\" Complete\")" 104 | ] 105 | }, 106 | { 107 | "cell_type": "markdown", 108 | "metadata": {}, 109 | "source": [ 110 | "## Loading Object Metadata\n", 111 | "\n", 112 | "The metadata from the DICOM headers is stored by PyDicer and can be easily loaded using the\n", 113 | "[load_object_metadata](https://australiancancerdatanetwork.github.io/pydicer/utils.html#pydicer.utils.load_object_metadata) function. Simply pass a row from the converted DataFrame into this function\n", 114 | "to load the metadata for that object." 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": null, 120 | "metadata": {}, 121 | "outputs": [], 122 | "source": [ 123 | "first_row = df.iloc[0]\n", 124 | "ds = load_object_metadata(first_row)\n", 125 | "ds" 126 | ] 127 | }, 128 | { 129 | "cell_type": "markdown", 130 | "metadata": {}, 131 | "source": [ 132 | "### Keep only specific header tags\n", 133 | "\n", 134 | "Loading object metadata can be slow, especially when doing this for many objects at once. So, you\n", 135 | "can specify the `keep_tags` argument if you know which header attributes you want to use. This\n", 136 | "speeds up loading metadata significantly.\n", 137 | "\n", 138 | "Here we load only the `StudyDate`, `PatientSex` and `Manufacturer`.\n", 139 | "\n", 140 | "> Tip: These tags are defined by the DICOM standard, and we use `pydicom` to load this metadata. In\n", 141 | "> fact, the metadata returned is a `pydicom` Dataset. Check out the [pydicom documentation](https://pydicom.github.io/pydicom/dev/old/pydicom_user_guide.html) for more information." 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": null, 147 | "metadata": {}, 148 | "outputs": [], 149 | "source": [ 150 | "ds = load_object_metadata(first_row, keep_tags=[\"StudyDate\", \"PatientSex\", \"Manufacturer\"])\n", 151 | "ds" 152 | ] 153 | }, 154 | { 155 | "cell_type": "markdown", 156 | "metadata": {}, 157 | "source": [ 158 | "### Loading metadata for all data objects\n", 159 | "\n", 160 | "You can use the Pandas `apply` function to load metadata for all rows and add it as a column to the\n", 161 | "converted DataFrame." 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": null, 167 | "metadata": {}, 168 | "outputs": [], 169 | "source": [ 170 | "df[\"StudyDescription\"] = df.apply(lambda row: load_object_metadata(row, keep_tags=\"StudyDescription\").StudyDescription, axis=1)\n", 171 | "df" 172 | ] 173 | }, 174 | { 175 | "cell_type": "markdown", 176 | "metadata": {}, 177 | "source": [ 178 | "### Determine Date of Object\n", 179 | "\n", 180 | "There are several DICOM header tags which could define the date of an object. The DICOM standard\n", 181 | "doesn't require all of these to be set within the metadata. PyDicer provides the \n", 182 | "[determine_dcm_datetime](https://australiancancerdatanetwork.github.io/pydicer/utils.html#pydicer.utils.determine_dcm_datetime) function to extract the date from the DICOM header." 183 | ] 184 | }, 185 | { 186 | "cell_type": "code", 187 | "execution_count": null, 188 | "metadata": {}, 189 | "outputs": [], 190 | "source": [ 191 | "ds = load_object_metadata(first_row)\n", 192 | "obj_datetime = determine_dcm_datetime(ds)\n", 193 | "print(obj_datetime)" 194 | ] 195 | }, 196 | { 197 | "cell_type": "code", 198 | "execution_count": null, 199 | "metadata": {}, 200 | "outputs": [], 201 | "source": [] 202 | } 203 | ], 204 | "metadata": { 205 | "interpreter": { 206 | "hash": "814af119db7f8f2860617be3dcd1d37c560587d11c65bd58c45b1679d3ee6ea4" 207 | }, 208 | "kernelspec": { 209 | "display_name": "Python 3.8.0 64-bit ('pydicer': pyenv)", 210 | "language": "python", 211 | "name": "python3" 212 | }, 213 | "language_info": { 214 | "codemirror_mode": { 215 | "name": "ipython", 216 | "version": 3 217 | }, 218 | "file_extension": ".py", 219 | "mimetype": "text/x-python", 220 | "name": "python", 221 | "nbconvert_exporter": "python", 222 | "pygments_lexer": "ipython3", 223 | "version": "3.9.16" 224 | }, 225 | "orig_nbformat": 4 226 | }, 227 | "nbformat": 4, 228 | "nbformat_minor": 2 229 | } 230 | -------------------------------------------------------------------------------- /examples/GettingStarted.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "attachments": {}, 5 | "cell_type": "markdown", 6 | "metadata": {}, 7 | "source": [ 8 | "# Getting Started\n", 9 | "\n", 10 | "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/AustralianCancerDataNetwork/pydicer/blob/main/examples/GettingStarted.ipynb)\n", 11 | "\n", 12 | "This notebook provides a basic example to run the PyDicer pipeline using some test data." 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": null, 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "try:\n", 22 | " from pydicer import PyDicer\n", 23 | "except ImportError:\n", 24 | " !pip install pydicer\n", 25 | " from pydicer import PyDicer\n", 26 | "\n", 27 | "from pathlib import Path\n", 28 | "\n", 29 | "from pydicer.input.test import TestInput" 30 | ] 31 | }, 32 | { 33 | "cell_type": "markdown", 34 | "metadata": {}, 35 | "source": [ 36 | "## Setup working directory\n", 37 | "\n", 38 | "First we'll create a directory for our project. Change the `directory` location to a folder on your\n", 39 | "system where you'd like PyDicer to work with this data." 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": null, 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [ 48 | "directory = Path(\"./data\")" 49 | ] 50 | }, 51 | { 52 | "cell_type": "markdown", 53 | "metadata": {}, 54 | "source": [ 55 | "## Create a PyDicer object\n", 56 | "\n", 57 | "The [PyDicer class](https://australiancancerdatanetwork.github.io/pydicer/tool.html) provides all functionlity to run the pipeline and work with the data stored and\n", 58 | "converted in your project directory" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": null, 64 | "metadata": {}, 65 | "outputs": [], 66 | "source": [ 67 | "pydicer = PyDicer(directory)" 68 | ] 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "metadata": {}, 73 | "source": [ 74 | "## Fetch some data\n", 75 | "\n", 76 | "A [TestInput class](https://australiancancerdatanetwork.github.io/pydicer/input.html#pydicer.input.test.TestInput) is provided in pydicer to download some sample data to work with. Several other\n", 77 | "input classes exist if you'd like to retrieve DICOM data for conversion from somewhere else, [see \n", 78 | "the docs for information on how these work](https://australiancancerdatanetwork.github.io/pydicer/html/input.html)." 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": null, 84 | "metadata": {}, 85 | "outputs": [], 86 | "source": [ 87 | "dicom_directory = directory.joinpath(\"dicom\")\n", 88 | "test_input = TestInput(dicom_directory)\n", 89 | "test_input.fetch_data()\n", 90 | "\n", 91 | "# Add the input DICOM location to the pydicer object\n", 92 | "pydicer.add_input(dicom_directory)" 93 | ] 94 | }, 95 | { 96 | "cell_type": "markdown", 97 | "metadata": {}, 98 | "source": [ 99 | "## Run the pipeline\n", 100 | "\n", 101 | "The function runs the entire PyDicer pipeline on the test DICOM data. This includes:\n", 102 | "- [Preprocessing](https://australiancancerdatanetwork.github.io/pydicer/preprocess.html) the DICOM data (data which can't be handled or is corrupt will be placed in Quarantine)\n", 103 | "- [Convert](https://australiancancerdatanetwork.github.io/pydicer/convert.html) the data to Nifti format (see the output in the `data` directory)\n", 104 | "- [Visualise](https://australiancancerdatanetwork.github.io/pydicer/visualise.html) the data (png files will be placed alongside the converted Nifti files)\n", 105 | "- [Compute Radiomics features](https://australiancancerdatanetwork.github.io/pydicer/analyse.html#pydicer.analyse.data.AnalyseData.compute_radiomics) (Results are stored in a csv alongside the converted structures)\n", 106 | "- [Compute Dose Volume Histograms](https://australiancancerdatanetwork.github.io/pydicer/analyse.html#pydicer.analyse.data.AnalyseData.compute_dvh) (results are stored alongside converted dose data)\n", 107 | "\n", 108 | "> Note that the entire Pipeline can be quite time consuming to run. Depending on your project's\n", 109 | "> dataset you will likely want to run only portions of the pipeline with finer control over each\n", 110 | "> step. For this reason we only run the pipeline for one patient here as a demonstration." 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": null, 116 | "metadata": {}, 117 | "outputs": [], 118 | "source": [ 119 | "pydicer.run_pipeline(patient=\"HNSCC-01-0019\")" 120 | ] 121 | }, 122 | { 123 | "cell_type": "markdown", 124 | "metadata": {}, 125 | "source": [ 126 | "## Prepare a dataset\n", 127 | "\n", 128 | "Datasets which are extracted in DICOM format can often be a bit messy and require some cleaning up\n", 129 | "after conversion. Exactly what data objects to extract for the clean dataset will differ by project\n", 130 | "but here we use a somewhat common approach of extracting the latest structure set for each patient\n", 131 | "and the image linked to that.\n", 132 | "\n", 133 | "The resulting dataset is stored in a folder with your dataset name (`clean` for this example).\n", 134 | "\n", 135 | "See the [dataset preparation example](https://australiancancerdatanetwork.github.io/pydicer/_examples/DatasetPreparation.html) for a more detailed description on how this works.\n" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": null, 141 | "metadata": {}, 142 | "outputs": [], 143 | "source": [ 144 | "pydicer.dataset.prepare(dataset_name=\"clean\", preparation_function=\"rt_latest_dose\")" 145 | ] 146 | }, 147 | { 148 | "cell_type": "markdown", 149 | "metadata": {}, 150 | "source": [ 151 | "## Analyse the dataset\n", 152 | "\n", 153 | "The pipeline computes first-order radiomics features by default, as well as dose volume histograms.\n", 154 | "Here we can extract out the results easily into a Pandas DataFrame for analysis.\n", 155 | "\n", 156 | "Check out the [Compute Radiomics](https://australiancancerdatanetwork.github.io/pydicer/_examples/Radiomics.html) and the [Dose Metrics](https://australiancancerdatanetwork.github.io/pydicer/_examples/DoseMetrics.html) examples for further details on how to use these functions." 157 | ] 158 | }, 159 | { 160 | "cell_type": "code", 161 | "execution_count": null, 162 | "metadata": {}, 163 | "outputs": [], 164 | "source": [ 165 | "# Display the DataFrame of radiomics computed\n", 166 | "df_radiomics = pydicer.analyse.get_all_computed_radiomics_for_dataset(dataset_name=\"clean\")\n", 167 | "df_radiomics" 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "execution_count": null, 173 | "metadata": {}, 174 | "outputs": [], 175 | "source": [ 176 | "# Extract the D95, D50 and V3 dose metrics\n", 177 | "df_dose_metrics = pydicer.analyse.compute_dose_metrics(dataset_name=\"clean\", d_point=[95, 50], v_point=[3])\n", 178 | "df_dose_metrics" 179 | ] 180 | }, 181 | { 182 | "cell_type": "code", 183 | "execution_count": null, 184 | "metadata": {}, 185 | "outputs": [], 186 | "source": [] 187 | } 188 | ], 189 | "metadata": { 190 | "interpreter": { 191 | "hash": "814af119db7f8f2860617be3dcd1d37c560587d11c65bd58c45b1679d3ee6ea4" 192 | }, 193 | "kernelspec": { 194 | "display_name": "Python 3.8.0 64-bit ('pydicer': pyenv)", 195 | "language": "python", 196 | "name": "python3" 197 | }, 198 | "language_info": { 199 | "codemirror_mode": { 200 | "name": "ipython", 201 | "version": 3 202 | }, 203 | "file_extension": ".py", 204 | "mimetype": "text/x-python", 205 | "name": "python", 206 | "nbconvert_exporter": "python", 207 | "pygments_lexer": "ipython3", 208 | "version": "3.9.16" 209 | }, 210 | "orig_nbformat": 4 211 | }, 212 | "nbformat": 4, 213 | "nbformat_minor": 2 214 | } 215 | -------------------------------------------------------------------------------- /tests/test_structure_set.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=redefined-outer-name,missing-function-docstring 2 | 3 | import json 4 | 5 | import SimpleITK as sitk 6 | 7 | from pydicer import PyDicer 8 | from pydicer.utils import add_structure_name_mapping, read_converted_data 9 | from pydicer.dataset.structureset import StructureSet 10 | from pydicer.constants import CONVERTED_DIR_NAME 11 | 12 | 13 | def test_add_project_mapping(test_data_converted): 14 | working_directory = test_data_converted 15 | 16 | mapping_id = "test_mapping" 17 | mapping = { 18 | "Brain": ["brain", "BRAIN"], 19 | "SpinalCord": ["Cord", "copy_of_cord"], 20 | } 21 | add_structure_name_mapping(mapping, mapping_id=mapping_id, working_directory=working_directory) 22 | 23 | # Confirm that the mapping file exists 24 | mapping_file = working_directory.joinpath( 25 | ".pydicer", ".structure_set_mappings", f"{mapping_id}.json" 26 | ) 27 | assert mapping_file.exists() 28 | 29 | # Read the file and confirm it contains the same contents as the mapping we provided 30 | with open(mapping_file, encoding="utf-8") as json_file: 31 | mapping_loaded = json.load(json_file) 32 | 33 | assert mapping == mapping_loaded 34 | 35 | 36 | def test_add_structure_set_mapping(test_data_converted): 37 | working_directory = test_data_converted 38 | 39 | df = read_converted_data(working_directory) 40 | 41 | # Pick one structure set to supply mapping for 42 | struct_hash = "6d2934" 43 | struct_row = df[df.hashed_uid == struct_hash].iloc[0] 44 | 45 | mapping_id = "structure_set_mapping" 46 | mapping = { 47 | "Brain": ["brain", "BRAIN"], 48 | "SpinalCord": ["Cord", "copy_of_cord"], 49 | } 50 | add_structure_name_mapping(mapping, mapping_id=mapping_id, structure_set_row=struct_row) 51 | 52 | # Confirm that the mapping file exists 53 | mapping_file = working_directory.joinpath( 54 | CONVERTED_DIR_NAME, 55 | struct_row.patient_id, 56 | "structures", 57 | struct_hash, 58 | ".structure_set_mappings", 59 | f"{mapping_id}.json", 60 | ) 61 | assert mapping_file.exists() 62 | 63 | # Read the file and confirm it contains the same contents as the mapping we provided 64 | with open(mapping_file, encoding="utf-8") as json_file: 65 | mapping_loaded = json.load(json_file) 66 | 67 | assert mapping == mapping_loaded 68 | 69 | 70 | def test_add_patient_mapping(test_data_converted): 71 | working_directory = test_data_converted 72 | 73 | mapping_id = "test_mapping" 74 | mapping = { 75 | "Brain": ["brain", "BRAIN"], 76 | "SpinalCord": ["Cord", "copy_of_cord"], 77 | } 78 | patient_id = "HNSCC-01-0199" 79 | add_structure_name_mapping( 80 | mapping, 81 | mapping_id=mapping_id, 82 | working_directory=working_directory, 83 | patient_id=patient_id, 84 | ) 85 | 86 | # Confirm that the mapping file exists 87 | mapping_file = working_directory.joinpath( 88 | CONVERTED_DIR_NAME, 89 | patient_id, 90 | "structures", 91 | ".structure_set_mappings", 92 | f"{mapping_id}.json", 93 | ) 94 | assert mapping_file.exists() 95 | 96 | # Read the file and confirm it contains the same contents as the mapping we provided 97 | with open(mapping_file, encoding="utf-8") as json_file: 98 | mapping_loaded = json.load(json_file) 99 | 100 | assert mapping == mapping_loaded 101 | 102 | 103 | def test_structure_set_class(test_data_converted): 104 | working_directory = test_data_converted 105 | 106 | df = read_converted_data(working_directory) 107 | 108 | # Pick one structure set to test mapping for 109 | struct_hash = "06e49c" 110 | struct_row = df[df.hashed_uid == struct_hash].iloc[0] 111 | 112 | # Check that we look up the correct structure name 113 | ss = StructureSet(struct_row) 114 | 115 | # Check that all structures are loaded 116 | assert len(ss.structure_names) == 38 117 | 118 | # Load a structure, confirm the values are as expected 119 | spinal_cord = ss["Cord"] 120 | spinal_cord_arr = sitk.GetArrayFromImage(spinal_cord) 121 | assert spinal_cord_arr.sum() == 7880 122 | 123 | 124 | def test_structure_set_mapping(test_data_converted): 125 | working_directory = test_data_converted 126 | 127 | df = read_converted_data(working_directory) 128 | 129 | # Add a mapping 130 | mapping_id = "ss_mapping" 131 | mapping = { 132 | "SpinalCord": ["Cord", "copy_of_cord"], 133 | "Parotid_L": ["Left_parotid", "Lt_Parotid"], 134 | "Parotid_R": ["Right_parotid", "Rt_Parotid"], 135 | "Brain": ["BRAIN"], 136 | } 137 | add_structure_name_mapping(mapping, mapping_id=mapping_id, working_directory=working_directory) 138 | 139 | # Pick one structure set to test mapping for 140 | struct_hash = "06e49c" 141 | struct_row = df[df.hashed_uid == struct_hash].iloc[0] 142 | 143 | # Check that we look up the correct structure name 144 | ss = StructureSet(struct_row, mapping_id=mapping_id) 145 | assert ss.get_mapped_structure_name("SpinalCord") == "Cord" 146 | assert ss.get_mapped_structure_name("Parotid_L") == "Lt_Parotid" 147 | assert ss.get_mapped_structure_name("Parotid_R") == "Rt_Parotid" 148 | 149 | # Check that the correct standardised name is mapped 150 | assert ss.get_standardised_structure_name("Cord") == "SpinalCord" 151 | assert ss.get_standardised_structure_name("Lt_Parotid") == "Parotid_L" 152 | assert ss.get_standardised_structure_name("Rt_Parotid") == "Parotid_R" 153 | 154 | # Check that we can read a structure by standardised name 155 | spinal_cord = ss["SpinalCord"] 156 | spinal_cord_arr = sitk.GetArrayFromImage(spinal_cord) 157 | assert spinal_cord_arr.sum() == 7880 158 | 159 | # Check that brain is detected as not mapped for this case (as the structure isn't available) 160 | assert len(ss.get_unmapped_structures()) == 1 161 | assert ss.get_unmapped_structures()[0] == "Brain" 162 | 163 | 164 | def test_radiomics_structure_names_standardised(test_data_converted): 165 | working_directory = test_data_converted 166 | pydicer = PyDicer(working_directory) 167 | 168 | # Add a mapping 169 | mapping_id = "rad_mapping" 170 | mapping = { 171 | "SpinalCord": ["Cord", "copy_of_cord", "cord"], 172 | "Parotid_L": ["Left_parotid", "Lt_Parotid", "L_parotid", "LT_Parotid"], 173 | "Parotid_R": ["Right_parotid", "Rt_Parotid", "R_parotid", "RT_Parotid"], 174 | "Brain": ["BRAIN"], 175 | } 176 | add_structure_name_mapping(mapping, mapping_id=mapping_id, working_directory=working_directory) 177 | 178 | # Check the radiomics without mapping 179 | df_radiomics = pydicer.analyse.get_all_computed_radiomics_for_dataset() 180 | assert len(df_radiomics.Contour.unique()) == 128 181 | 182 | # Check the radiomics with mapping 183 | df_radiomics = pydicer.analyse.get_all_computed_radiomics_for_dataset( 184 | structure_mapping_id=mapping_id 185 | ) 186 | assert len(df_radiomics) == 13 187 | assert len(df_radiomics.Contour.unique()) == 4 188 | 189 | 190 | def test_dose_metrics_structure_names_standardised(test_data_converted): 191 | working_directory = test_data_converted 192 | pydicer = PyDicer(working_directory) 193 | 194 | # Add a mapping 195 | mapping_id = "dose_mapping" 196 | mapping = { 197 | "SpinalCord": ["Cord", "copy_of_cord", "cord"], 198 | "Parotid_L": ["Left_parotid", "Lt_Parotid", "L_parotid", "LT_Parotid"], 199 | "Parotid_R": ["Right_parotid", "Rt_Parotid", "R_parotid", "RT_Parotid"], 200 | "Brain": ["BRAIN"], 201 | } 202 | add_structure_name_mapping(mapping, mapping_id=mapping_id, working_directory=working_directory) 203 | 204 | # Check the dose metrics without mapping 205 | df_dose_metrics = pydicer.analyse.compute_dose_metrics(d_point=[95, 50], v_point=[3]) 206 | assert len(df_dose_metrics.label.unique()) == 128 207 | 208 | # Check the dose metrics with mapping 209 | df_dose_metrics = pydicer.analyse.compute_dose_metrics( 210 | d_point=[95, 50], v_point=[3], structure_mapping_id=mapping_id 211 | ) 212 | assert len(df_dose_metrics) == 13 213 | assert len(df_dose_metrics.label.unique()) == 4 214 | -------------------------------------------------------------------------------- /examples/ConvertingData.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "attachments": {}, 5 | "cell_type": "markdown", 6 | "metadata": {}, 7 | "source": [ 8 | "# Converting Data\n", 9 | "\n", 10 | "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/AustralianCancerDataNetwork/pydicer/blob/main/examples/ConvertingData.ipynb)\n", 11 | "\n", 12 | "In this example, the preprocessing and conversion of DICOM data is demonstrated. These are\n", 13 | "essential first steps before data can be analysed using PyDicer." 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": null, 19 | "metadata": {}, 20 | "outputs": [], 21 | "source": [ 22 | "try:\n", 23 | " from pydicer import PyDicer\n", 24 | "except ImportError:\n", 25 | " !pip install pydicer\n", 26 | " from pydicer import PyDicer\n", 27 | "\n", 28 | "from pathlib import Path\n", 29 | "\n", 30 | "from pydicer.input.test import TestInput" 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "metadata": {}, 36 | "source": [ 37 | "## Setup PyDicer\n", 38 | "\n", 39 | "As in the [Getting Started example](https://australiancancerdatanetwork.github.io/pydicer/_examples/GettingStarted.html), we must first define a working directory for our dataset. We\n", 40 | "also create a `PyDicer` object." 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": null, 46 | "metadata": {}, 47 | "outputs": [], 48 | "source": [ 49 | "directory = Path(\"./working\")\n", 50 | "pydicer = PyDicer(directory)" 51 | ] 52 | }, 53 | { 54 | "cell_type": "markdown", 55 | "metadata": {}, 56 | "source": [ 57 | "## Fetch some data\n", 58 | "\n", 59 | "A [TestInput class](https://australiancancerdatanetwork.github.io/pydicer/input.html#pydicer.input.test.TestInput) is provided in pydicer to download some sample data to work with. Several other\n", 60 | "input classes exist if you'd like to retrieve DICOM data for conversion from somewhere else. See \n", 61 | "the [docs for information](https://australiancancerdatanetwork.github.io/pydicer/html/input.html)\n", 62 | "on how the PyDicer input classes work.\n", 63 | "\n", 64 | "Most commonly, if you have DICOM files stored within a folder on your file system you can simply\n", 65 | "pass the path to your DICOM directory to the `pydicer.add_input()` function." 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": null, 71 | "metadata": {}, 72 | "outputs": [], 73 | "source": [ 74 | "dicom_directory = directory.joinpath(\"dicom\")\n", 75 | "test_input = TestInput(dicom_directory)\n", 76 | "test_input.fetch_data()\n", 77 | "\n", 78 | "# Add the input DICOM location to the pydicer object\n", 79 | "pydicer.add_input(dicom_directory)" 80 | ] 81 | }, 82 | { 83 | "cell_type": "markdown", 84 | "metadata": {}, 85 | "source": [ 86 | "## Preprocess\n", 87 | "\n", 88 | "With some DICOM data ready to work with, we must first use the PyDicer [preprocess module](https://australiancancerdatanetwork.github.io/pydicer/preprocess.html). This\n", 89 | "module will crawl over all DICOM data available and will index all information required for\n", 90 | "conversion of the data." 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": null, 96 | "metadata": {}, 97 | "outputs": [], 98 | "source": [ 99 | "pydicer.preprocess()" 100 | ] 101 | }, 102 | { 103 | "cell_type": "markdown", 104 | "metadata": {}, 105 | "source": [ 106 | "### Inspect Preprocessed Data\n", 107 | "\n", 108 | "Here we load the data that was indexed during preprocessing and output the first rows. This data\n", 109 | "will be used by the following step of data conversion." 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": null, 115 | "metadata": {}, 116 | "outputs": [], 117 | "source": [ 118 | "df_preprocessed = pydicer.read_preprocessed_data()\n", 119 | "df_preprocessed.head()" 120 | ] 121 | }, 122 | { 123 | "cell_type": "markdown", 124 | "metadata": {}, 125 | "source": [ 126 | "## Convert Data\n", 127 | "\n", 128 | "With the DICOM data having been indexed during preprocessing, we are now ready to convert this data\n", 129 | "into NIfTI format which will be stored within the PyDicer standard directory structure.\n", 130 | "\n", 131 | "Running the following cell will begin the conversion process. While this cell is running, take a\n", 132 | "look inside the `working/data` directory to see how the converted data is being stored.\n", 133 | "\n", 134 | "Notice the `converted.csv` file stored for each patient. This tracks each converted data object.\n", 135 | "This will be loaded as a [Pandas DataFrame](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html) for use throughout PyDicer.\n" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": null, 141 | "metadata": {}, 142 | "outputs": [], 143 | "source": [ 144 | "pydicer.convert.convert()" 145 | ] 146 | }, 147 | { 148 | "cell_type": "markdown", 149 | "metadata": {}, 150 | "source": [ 151 | "### Load Converted DataFrame\n", 152 | "\n", 153 | "Once data is converted, we can load a Pandas DataFrame which contains a description of each object\n", 154 | "converted.\n", 155 | "\n", 156 | "The most useful columns in the DataFrame for working with this data in PyDicer are:\n", 157 | "- `hashed_uid`: This is a 6 character hexidecimal hash of the associated DICOM SeriesInstanceUID.\n", 158 | " PyDicer refers to objects using this hashed identifier for a more consice representation.\n", 159 | "- `modality`: The modality of the data object.\n", 160 | "- `patient_id`: The ID of the patient this data object belongs to.\n", 161 | "- `path`: The path within the working directory where files for this data object are stored." 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": null, 167 | "metadata": {}, 168 | "outputs": [], 169 | "source": [ 170 | "df = pydicer.read_converted_data()\n", 171 | "df" 172 | ] 173 | }, 174 | { 175 | "cell_type": "markdown", 176 | "metadata": {}, 177 | "source": [ 178 | "## Data Quarantine\n", 179 | "\n", 180 | "If anything goes wrong while converting a DICOM object during either the preprocess step or the\n", 181 | "conversion step, the problematic DICOM data will be copied to the `working/quarantine` directory.\n", 182 | "\n", 183 | "It's a good idea to regularly check your quarantine directory to ensure that no critical data\n", 184 | "objects are being quarantine. If so you may want to consider rectifying the issue and running the\n", 185 | "preprocess and conversion steps again.\n", 186 | "\n", 187 | "As can be seen by running the cell below, there were several DICOM objects moved to the quarantine\n", 188 | "during for our test dataset. This was due to there being multiple slices at the same location with\n", 189 | "differing pixel data in one CT image series." 190 | ] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "execution_count": null, 195 | "metadata": {}, 196 | "outputs": [], 197 | "source": [ 198 | "df_quarantine = pydicer.read_quarantined_data()\n", 199 | "df_quarantine" 200 | ] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": null, 205 | "metadata": {}, 206 | "outputs": [], 207 | "source": [] 208 | } 209 | ], 210 | "metadata": { 211 | "interpreter": { 212 | "hash": "814af119db7f8f2860617be3dcd1d37c560587d11c65bd58c45b1679d3ee6ea4" 213 | }, 214 | "kernelspec": { 215 | "display_name": "Python 3.8.0 64-bit ('pydicer': pyenv)", 216 | "language": "python", 217 | "name": "python3" 218 | }, 219 | "language_info": { 220 | "codemirror_mode": { 221 | "name": "ipython", 222 | "version": 3 223 | }, 224 | "file_extension": ".py", 225 | "mimetype": "text/x-python", 226 | "name": "python", 227 | "nbconvert_exporter": "python", 228 | "pygments_lexer": "ipython3", 229 | "version": "3.9.16" 230 | }, 231 | "orig_nbformat": 4 232 | }, 233 | "nbformat": 4, 234 | "nbformat_minor": 2 235 | } 236 | -------------------------------------------------------------------------------- /examples/Radiomics.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "attachments": {}, 5 | "cell_type": "markdown", 6 | "metadata": {}, 7 | "source": [ 8 | "# Compute Radiomics\n", 9 | "\n", 10 | "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/AustralianCancerDataNetwork/pydicer/blob/main/examples/Radiomics.ipynb)\n", 11 | "\n", 12 | "In this example notebook we use [PyRadiomics](https://github.com/AIM-Harvard/pyradiomics) to\n", 13 | "compute various type of radiomics features. We use some\n", 14 | "[LCTSC](https://wiki.cancerimagingarchive.net/pages/viewpage.action?pageId=24284539) data from the\n", 15 | "Cancer Imaging Archive which has already been converted using PyDicer for demonstration purposes." 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": null, 21 | "metadata": {}, 22 | "outputs": [], 23 | "source": [ 24 | "try:\n", 25 | " from pydicer import PyDicer\n", 26 | "except ImportError:\n", 27 | " !pip install pydicer\n", 28 | " from pydicer import PyDicer\n", 29 | "\n", 30 | "from pathlib import Path\n", 31 | "\n", 32 | "from pydicer.utils import fetch_converted_test_data\n", 33 | "\n", 34 | "from pydicer.utils import load_object_metadata" 35 | ] 36 | }, 37 | { 38 | "cell_type": "markdown", 39 | "metadata": {}, 40 | "source": [ 41 | "## Fetch data\n", 42 | "\n", 43 | "LCTSC data prepared for this example are downloaded and stored into a `testdata_lctsc` directory.\n", 44 | "We will use this for our PyDicer working directory." 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": null, 50 | "metadata": {}, 51 | "outputs": [], 52 | "source": [ 53 | "working_directory = fetch_converted_test_data(\"./testdata_lctsc\", dataset=\"LCTSC\")" 54 | ] 55 | }, 56 | { 57 | "cell_type": "markdown", 58 | "metadata": {}, 59 | "source": [ 60 | "## Initialise PyDicer object\n", 61 | "\n", 62 | "Using the working directory containing the LCTSC test data." 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": null, 68 | "metadata": {}, 69 | "outputs": [], 70 | "source": [ 71 | "pydicer = PyDicer(working_directory)" 72 | ] 73 | }, 74 | { 75 | "cell_type": "markdown", 76 | "metadata": {}, 77 | "source": [ 78 | "## Compute Default Radiomics\n", 79 | "\n", 80 | "By default, PyDicer will compute only first-order radiomics features. Radiomics are computed for\n", 81 | "each structure available in the dataset using the image data of images linked to those structures." 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": null, 87 | "metadata": {}, 88 | "outputs": [], 89 | "source": [ 90 | "pydicer.analyse.compute_radiomics()" 91 | ] 92 | }, 93 | { 94 | "cell_type": "markdown", 95 | "metadata": {}, 96 | "source": [ 97 | "## Fetch computed Radiomics\n", 98 | "\n", 99 | "Use the [get_all_computed_radiomics_for_dataset](https://australiancancerdatanetwork.github.io/pydicer/analyse.html#pydicer.analyse.data.AnalyseData.get_all_computed_radiomics_for_dataset) function to fetch all radiomics features computed\n", 100 | "in the last step.\n", 101 | "\n", 102 | "The `.head()` function on a Pandas DataFrame output the first 5 rows for inspection." 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": null, 108 | "metadata": {}, 109 | "outputs": [], 110 | "source": [ 111 | "# Display the DataFrame of radiomics computed\n", 112 | "df_radiomics = pydicer.analyse.get_all_computed_radiomics_for_dataset()\n", 113 | "df_radiomics.head()" 114 | ] 115 | }, 116 | { 117 | "cell_type": "markdown", 118 | "metadata": {}, 119 | "source": [ 120 | "## Specify Radiomics to Compute\n", 121 | "\n", 122 | "PyDicer used the popular `pyradiomics` library to compute radiomics. So, you may specify any\n", 123 | "radiomics features provided in that library to be computed. See the [pyradiomics documentation for\n", 124 | "a list of radiomics features\n", 125 | "available](https://pyradiomics.readthedocs.io/en/latest/features.html).\n", 126 | "\n", 127 | "In this example, we specify all `shape` features as well as `first-order` features to be computed." 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": null, 133 | "metadata": {}, 134 | "outputs": [], 135 | "source": [ 136 | "# Import pyradiomics feature modules\n", 137 | "from radiomics import (\n", 138 | " firstorder,\n", 139 | " shape,\n", 140 | ")\n", 141 | "\n", 142 | "# Prepare a dict of features to compute grouped by class\n", 143 | "first_order_features = firstorder.RadiomicsFirstOrder.getFeatureNames()\n", 144 | "shape_features = shape.RadiomicsShape.getFeatureNames()\n", 145 | "compute_radiomics = {\n", 146 | " \"firstorder\": [f for f in first_order_features if not first_order_features[f]],\n", 147 | " \"shape\": [f for f in shape_features if not shape_features[f]],\n", 148 | "}\n", 149 | "\n", 150 | "# Pass the dict to the compute the radiomics\n", 151 | "pydicer.analyse.compute_radiomics(radiomics=compute_radiomics)\n", 152 | "\n", 153 | "# Fetch the computed radiomics and output the first few rows\n", 154 | "df_radiomics = pydicer.analyse.get_all_computed_radiomics_for_dataset()\n", 155 | "df_radiomics.head()" 156 | ] 157 | }, 158 | { 159 | "cell_type": "markdown", 160 | "metadata": {}, 161 | "source": [ 162 | "You can also set a specific subset of features like this:" 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": null, 168 | "metadata": {}, 169 | "outputs": [], 170 | "source": [ 171 | "# Specify features to compute grouped by class\n", 172 | "compute_radiomics = {\n", 173 | " \"firstorder\": [\"Maximum\", \"Minimum\", \"Mean\", \"Median\"],\n", 174 | " \"shape\": [\"SurfaceArea\", \"VoxelVolume\"],\n", 175 | "}\n", 176 | "\n", 177 | "# Pass the dict to the compute the radiomics\n", 178 | "pydicer.analyse.compute_radiomics(radiomics=compute_radiomics)\n", 179 | "\n", 180 | "df_radiomics = pydicer.analyse.get_all_computed_radiomics_for_dataset()\n", 181 | "df_radiomics.head()" 182 | ] 183 | }, 184 | { 185 | "cell_type": "markdown", 186 | "metadata": {}, 187 | "source": [ 188 | "## Track Metadata\n", 189 | "\n", 190 | "When analysing your radiomic features, it may be useful to have certain metadata available from\n", 191 | "either the image or structure set. You can specify which DICOM header tags to extract metadata for\n", 192 | "and these will be stored alongside the radiomic feature values.\n", 193 | "\n", 194 | "In the cell below, we recompute our radiomics and store the `PatientSex` header value from the\n", 195 | "image series and the `StudyDate` value from the structure set." 196 | ] 197 | }, 198 | { 199 | "cell_type": "code", 200 | "execution_count": null, 201 | "metadata": {}, 202 | "outputs": [], 203 | "source": [ 204 | "# Compute the radiomics specifying the meta data to keep\n", 205 | "pydicer.analyse.compute_radiomics(\n", 206 | " radiomics=compute_radiomics,\n", 207 | " image_meta_data=[\"PatientSex\"],\n", 208 | " structure_meta_data=[\"StudyDate\"]\n", 209 | ")\n", 210 | "\n", 211 | "# Fetch the results and display the first rows\n", 212 | "df_radiomics = pydicer.analyse.get_all_computed_radiomics_for_dataset()\n", 213 | "df_radiomics.head()" 214 | ] 215 | }, 216 | { 217 | "cell_type": "code", 218 | "execution_count": null, 219 | "metadata": {}, 220 | "outputs": [], 221 | "source": [] 222 | } 223 | ], 224 | "metadata": { 225 | "interpreter": { 226 | "hash": "814af119db7f8f2860617be3dcd1d37c560587d11c65bd58c45b1679d3ee6ea4" 227 | }, 228 | "kernelspec": { 229 | "display_name": "Python 3.8.0 64-bit ('pydicer': pyenv)", 230 | "language": "python", 231 | "name": "python3" 232 | }, 233 | "language_info": { 234 | "codemirror_mode": { 235 | "name": "ipython", 236 | "version": 3 237 | }, 238 | "file_extension": ".py", 239 | "mimetype": "text/x-python", 240 | "name": "python", 241 | "nbconvert_exporter": "python", 242 | "pygments_lexer": "ipython3", 243 | "version": "3.9.16" 244 | }, 245 | "orig_nbformat": 4 246 | }, 247 | "nbformat": 4, 248 | "nbformat_minor": 2 249 | } 250 | -------------------------------------------------------------------------------- /tests/test_input.py: -------------------------------------------------------------------------------- 1 | import os 2 | from pathlib import Path 3 | import tempfile 4 | 5 | import pytest 6 | from unittest.mock import MagicMock, patch 7 | 8 | from pydicer.input.web import WebInput 9 | from pydicer.input.test import TestInput 10 | from pydicer.input.filesystem import FileSystemInput 11 | from pydicer.input.pacs import DICOMPACSInput 12 | from pydicer.input.tcia import TCIAInput 13 | 14 | 15 | def test_input_valid_working_dir(): 16 | valid_test_input = WebInput(data_url="") 17 | # Assert path to DICOMs exists 18 | assert valid_test_input.working_directory.is_dir() 19 | 20 | valid_filesystem_input = FileSystemInput( 21 | valid_test_input.working_directory) 22 | # Assert path to DICOMs exists 23 | assert valid_filesystem_input.working_directory.is_dir() 24 | 25 | valid_tcia_input = TCIAInput(collection="", patient_ids=[], modalities=[]) 26 | # Assert path to DICOMs exists 27 | assert valid_tcia_input.working_directory.is_dir() 28 | 29 | 30 | def assert_invalid_tcia_input(invalid_tcia_input): 31 | """ 32 | Assert path to DICOMs does exist, but it contains no files 33 | """ 34 | invalid_tcia_input.fetch_data() 35 | assert invalid_tcia_input.working_directory.is_dir() 36 | assert ( 37 | len( 38 | [ 39 | name 40 | for name in os.listdir(invalid_tcia_input.working_directory) 41 | if os.path.isfile(os.path.join(invalid_tcia_input.working_directory, name)) 42 | ] 43 | ) 44 | == 0 45 | ) 46 | 47 | 48 | def test_input_invalid_working_dir(): 49 | invalid_test_input = WebInput( 50 | data_url="", working_directory="INVALID_PATH") 51 | # Assert path to DICOMs does not exist 52 | assert not invalid_test_input.working_directory.is_dir() 53 | 54 | with pytest.raises(FileNotFoundError): 55 | FileSystemInput("INVALID_PATH") 56 | 57 | invalid_work_dir_tcia_input = TCIAInput( 58 | collection="TCGA-GBM", 59 | patient_ids=["TCGA-08-0244"], 60 | modalities=["MR"], 61 | working_directory="INVALID_PATH", 62 | ) 63 | # Assert path to DICOMs does not exist 64 | assert not invalid_work_dir_tcia_input.working_directory.is_dir() 65 | 66 | 67 | @pytest.mark.skip 68 | def test_tcia_input(): 69 | invalid_collection_tcia_input = TCIAInput( 70 | collection="INVALID_COLLECTION", patient_ids=[], modalities=[] 71 | ) 72 | invalid_patient_id_tcia_input = TCIAInput( 73 | collection="TCGA-GBM", patient_ids=["INVALID_PATIENT_ID"], modalities=[] 74 | ) 75 | 76 | assert_invalid_tcia_input(invalid_collection_tcia_input) 77 | assert_invalid_tcia_input(invalid_patient_id_tcia_input) 78 | 79 | 80 | def test_test_input(): 81 | test_input = TestInput() 82 | test_input.fetch_data() 83 | output_directory = test_input.working_directory.joinpath("HNSCC") 84 | 85 | # Assert that the 3 directories now exist on the system filepath 86 | assert output_directory.joinpath("HNSCC-01-0019").is_dir() 87 | assert output_directory.joinpath("HNSCC-01-0176").is_dir() 88 | assert output_directory.joinpath("HNSCC-01-0199").is_dir() 89 | 90 | 91 | def test_dicom_pacs_invalid_host(): 92 | # Using a presumably incorrect host/port to force a ConnectionError 93 | with pytest.raises(ConnectionError): 94 | DICOMPACSInput("INCORRECT_HOST", 1234) 95 | 96 | 97 | def test_dicom_pacs_valid_host(mocker): 98 | """ 99 | Test creating a DICOMPACSInput instance with a valid host where verify() returns True. 100 | """ 101 | # Patch the DicomConnector to return True for verify() 102 | mock_connector_class = mocker.patch("pydicer.input.pacs.DicomConnector") 103 | mock_connector_instance = mock_connector_class.return_value 104 | mock_connector_instance.verify.return_value = True 105 | 106 | # Should not raise ConnectionError 107 | dicompacs_input = DICOMPACSInput("VALID_HOST", 11112, "AE_TITLE") 108 | 109 | # Assert the underlying connector was indeed created 110 | assert dicompacs_input.dicom_connector is not None 111 | assert dicompacs_input.working_directory.is_dir() 112 | # Verify that verify() was called exactly once on initialization 113 | mock_connector_instance.verify.assert_called_once() 114 | 115 | 116 | def test_dicom_pacs_fetch_data_success(mocker): 117 | """ 118 | Test fetching data when the connection is valid, ensuring that we: 119 | 1) Convert single string patients/modalities to lists 120 | 2) Skip 'None' returns from do_find 121 | 3) Skip series whose patient ID doesn't match 122 | 4) Renames downloaded files to .dcm 123 | """ 124 | mock_connector_class = mocker.patch("pydicer.input.pacs.DicomConnector") 125 | mock_connector_instance = mock_connector_class.return_value 126 | mock_connector_instance.verify.return_value = True 127 | 128 | # Mock do_find to return "studies" and then "series" 129 | # The top-level do_find returns a list of "study" datasets (some None), 130 | # then the second do_find returns a list of "series" datasets (some None). 131 | # Each dataset is just a MagicMock or simple object with needed attributes. 132 | mock_study_1 = MagicMock() 133 | mock_study_1.StudyInstanceUID = "STUDY_UID_1" 134 | mock_study_2 = MagicMock() 135 | mock_study_2.StudyInstanceUID = "STUDY_UID_2" 136 | mock_study_none = None # Should be skipped 137 | 138 | mock_series_1 = MagicMock() 139 | mock_series_1.SeriesInstanceUID = "SERIES_UID_1" 140 | mock_series_1.PatientID = "PATIENT_1" 141 | mock_series_2 = MagicMock() 142 | mock_series_2.SeriesInstanceUID = "SERIES_UID_2" 143 | mock_series_2.PatientID = "SOME_OTHER_PATIENT" # Should be skipped 144 | mock_series_none = None 145 | 146 | # The "find" calls for studies: 147 | mock_connector_instance.do_find.side_effect = [ 148 | [mock_study_1, mock_study_none, mock_study_2], # Studies 149 | [mock_series_1, mock_series_none, mock_series_2], # Series for STUDY_UID_1 150 | # Series for STUDY_UID_2 (just re-using same object to keep it simple) 151 | [mock_series_1], 152 | ] 153 | 154 | dicompacs_input = DICOMPACSInput("VALID_HOST", 11112, "AE_TITLE") 155 | 156 | # Create a dummy file that doesn't end with .dcm so we can test rename 157 | with tempfile.TemporaryDirectory() as tmpdir: 158 | tmpdir_path = Path(tmpdir) 159 | # Force working directory to our temp dir 160 | dicompacs_input.working_directory = tmpdir_path 161 | 162 | dummy_file_path = tmpdir_path / "dummy_no_ext" 163 | dummy_file_path.write_text("test file content") 164 | 165 | # Single patient, single modality as strings 166 | dicompacs_input.fetch_data("PATIENT_1", "CT") 167 | 168 | # Ensure do_find was called multiple times 169 | # The first call: study-level (QueryRetrieveLevel="STUDY") 170 | # The next calls: series-level (QueryRetrieveLevel="SERIES"), once for each study 171 | assert mock_connector_instance.do_find.call_count == 3 172 | 173 | # Ensure download_series was called for the valid series only 174 | # The second series had a mismatched patient ID, so skip 175 | # The third call is a new do_find -> leads to another series (mock_series_1 with same patient) 176 | # So we should have downloaded 2 times 177 | assert mock_connector_instance.download_series.call_count == 2 178 | call_args_list = mock_connector_instance.download_series.call_args_list 179 | # We expect the arguments to match "SERIES_UID_1" each time in this example 180 | # (in practice, could differ if you had different series objects) 181 | assert call_args_list[0][0][0] == "SERIES_UID_1" 182 | assert call_args_list[1][0][0] == "SERIES_UID_1" 183 | 184 | # Check that the file without extension was renamed to .dcm 185 | renamed_file = tmpdir_path / "dummy_no_ext.dcm" 186 | assert renamed_file.exists(), "File without .dcm extension should have been renamed." 187 | assert not dummy_file_path.exists(), "Original file without extension should be renamed." 188 | 189 | 190 | @pytest.mark.skip 191 | def test_dicom_pacs_fetch(): 192 | """ 193 | Example real test that tries to actually fetch from a public DICOM PACS. 194 | This might be skipped because it depends on external availability. 195 | """ 196 | pacs_input = DICOMPACSInput("www.dicomserver.co.uk", 11112, "DCMQUERY") 197 | pacs_input.fetch_data("PAT004", modalities=["GM"]) 198 | 199 | assert pacs_input.working_directory.is_dir() 200 | assert len(list(pacs_input.working_directory.glob("*/*"))) > 0 201 | -------------------------------------------------------------------------------- /tests/test_generate.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=redefined-outer-name,missing-function-docstring 2 | 3 | import pytest 4 | 5 | import pandas as pd 6 | import SimpleITK as sitk 7 | 8 | from pydicer.generate.object import add_object, add_dose_object, add_structure_object 9 | from pydicer.utils import read_converted_data 10 | 11 | 12 | @pytest.fixture 13 | def test_data_path(tmp_path_factory): 14 | """Fixture to generate a pydicer style file structure. For the purposes of these tests, it 15 | doesn't really matter what the files themselves contain. Only the converted.csv will be used 16 | here.""" 17 | 18 | working_directory = tmp_path_factory.mktemp("data") 19 | 20 | cols = [ 21 | "", 22 | "sop_instance_uid", 23 | "hashed_uid", 24 | "modality", 25 | "patient_id", 26 | "series_uid", 27 | "for_uid", 28 | "referenced_sop_instance_uid", 29 | "path", 30 | ] 31 | rows = [ 32 | [ 33 | 0, 34 | "1.3.6.1.4.1.14519.5.2.1.1706.8040.418136430763474248173140712714", 35 | "b281ea", 36 | "CT", 37 | "HNSCC-01-0019", 38 | "1.3.6.1.4.1.14519.5.2.1.1706.8040.233510441938368266923995238976", 39 | "1.3.6.1.4.1.14519.5.2.1.1706.8040.290727775603409136366833989550", 40 | "", 41 | "data/HNSCC-01-0019/images/b281ea", 42 | ], 43 | [ 44 | 0, 45 | "1.3.6.1.4.1.14519.5.2.1.1706.8040.168221415040968580239112565792", 46 | "7cdcd9", 47 | "RTSTRUCT", 48 | "HNSCC-01-0019", 49 | "1.3.6.1.4.1.14519.5.2.1.1706.8040.103450757970418393826743010361", 50 | "1.3.6.1.4.1.14519.5.2.1.1706.8040.290727775603409136366833989550", 51 | "1.3.6.1.4.1.14519.5.2.1.1706.8040.418136430763474248173140712714", 52 | "data/HNSCC-01-0019/structures/7cdcd9", 53 | ], 54 | [ 55 | 0, 56 | "1.3.6.1.4.1.14519.5.2.1.1706.8040.254865609982571308239859201936", 57 | "57b99f", 58 | "RTPLAN", 59 | "HNSCC-01-0019", 60 | "1.3.6.1.4.1.14519.5.2.1.1706.8040.202542618630321306831779497186", 61 | "1.3.6.1.4.1.14519.5.2.1.1706.8040.290727775603409136366833989550", 62 | "1.3.6.1.4.1.14519.5.2.1.1706.8040.168221415040968580239112565792", 63 | "data/HNSCC-01-0019/plans/57b99f", 64 | ], 65 | [ 66 | 0, 67 | "1.3.6.1.4.1.14519.5.2.1.1706.8040.242809596262952988524850819667", 68 | "309e1a", 69 | "RTDOSE", 70 | "HNSCC-01-0019", 71 | "1.3.6.1.4.1.14519.5.2.1.1706.8040.777975715563610987698151746284", 72 | "1.3.6.1.4.1.14519.5.2.1.1706.8040.290727775603409136366833989550", 73 | "1.3.6.1.4.1.14519.5.2.1.1706.8040.254865609982571308239859201936", 74 | "data/HNSCC-01-0019/doses/309e1a", 75 | ], 76 | ] 77 | 78 | df_converted = pd.DataFrame(rows, columns=cols) 79 | for _, row in df_converted.iterrows(): 80 | 81 | data_obj_path = working_directory.joinpath(row.path) 82 | data_obj_path.mkdir(parents=True, exist_ok=True) 83 | 84 | converted_path = working_directory.joinpath("data", "HNSCC-01-0019", "converted.csv") 85 | df_converted.to_csv(converted_path) 86 | 87 | # Also create a dataset directory with converted sub-set 88 | dataset_path = working_directory.joinpath("test_dataset", "HNSCC-01-0019") 89 | dataset_path.mkdir(parents=True) 90 | converted_path = dataset_path.joinpath("converted.csv") 91 | df_converted[:2].to_csv(converted_path) 92 | 93 | return working_directory 94 | 95 | 96 | def test_generate_patient_id_does_not_exist(test_data_path): 97 | 98 | with pytest.raises(ValueError): 99 | add_object(test_data_path, "test_id", "test_pat", "image", "CT") 100 | 101 | 102 | def test_generate_incorrect_image_type(test_data_path): 103 | 104 | with pytest.raises(ValueError): 105 | add_object(test_data_path, "test_id", "HNSCC-01-0019", "oops", "CT") 106 | 107 | 108 | def test_generate_object_does_not_exist(test_data_path): 109 | 110 | with pytest.raises(SystemError): 111 | add_object(test_data_path, "test_id", "HNSCC-01-0019", "image", "CT") 112 | 113 | 114 | def test_generate_object_already_exists(test_data_path): 115 | 116 | with pytest.raises(SystemError): 117 | add_object(test_data_path, "b281ea", "HNSCC-01-0019", "image", "CT") 118 | 119 | 120 | def test_generate_object(test_data_path): 121 | 122 | test_obj_path = test_data_path.joinpath("data", "HNSCC-01-0019", "images", "test_id") 123 | test_obj_path.mkdir() 124 | 125 | # Confirm the data object isn't there yet 126 | df_converted = read_converted_data(test_data_path, patients=["HNSCC-01-0019"]) 127 | assert len(df_converted[df_converted.hashed_uid == "test_id"]) == 0 128 | 129 | # Add the object 130 | add_object(test_data_path, "test_id", "HNSCC-01-0019", "image", "CT") 131 | 132 | # Now make sure it's there 133 | df_converted = read_converted_data(test_data_path, patients=["HNSCC-01-0019"]) 134 | assert len(df_converted[df_converted.hashed_uid == "test_id"]) == 1 135 | 136 | 137 | def test_generate_object_add_to_dataset(test_data_path): 138 | 139 | test_obj_path = test_data_path.joinpath("data", "HNSCC-01-0019", "images", "test_id") 140 | test_obj_path.mkdir() 141 | 142 | # Confirm the data object isn't there yet 143 | df_converted = read_converted_data( 144 | test_data_path, dataset_name="test_dataset", patients=["HNSCC-01-0019"] 145 | ) 146 | assert len(df_converted[df_converted.hashed_uid == "test_id"]) == 0 147 | 148 | add_object( 149 | test_data_path, "test_id", "HNSCC-01-0019", "image", "CT", datasets=["test_dataset"] 150 | ) 151 | # Now make sure it's there 152 | df_converted = read_converted_data( 153 | test_data_path, dataset_name="test_dataset", patients=["HNSCC-01-0019"] 154 | ) 155 | assert len(df_converted[df_converted.hashed_uid == "test_id"]) == 1 156 | 157 | 158 | def test_generate_dose_object(test_data_path): 159 | 160 | # Confirm the data object isn't there yet 161 | df_converted = read_converted_data(test_data_path, patients=["HNSCC-01-0019"]) 162 | assert len(df_converted[df_converted.hashed_uid == "dose_id"]) == 0 163 | 164 | test_dose = sitk.Image(20, 20, 20, sitk.sitkFloat32) 165 | linked_structure_hash = "7cdcd9" 166 | add_dose_object(test_data_path, test_dose, "dose_id", "HNSCC-01-0019", linked_structure_hash) 167 | 168 | # Now make sure it's there 169 | df_converted = read_converted_data(test_data_path, patients=["HNSCC-01-0019"]) 170 | assert len(df_converted[df_converted.hashed_uid == "dose_id"]) == 1 171 | 172 | # Also make sure the for_uid and reference sop instance uid are correct 173 | linked_row = df_converted[df_converted.hashed_uid == "dose_id"].iloc[0] 174 | assert linked_row.for_uid == "1.3.6.1.4.1.14519.5.2.1.1706.8040.290727775603409136366833989550" 175 | assert ( 176 | linked_row.referenced_sop_instance_uid 177 | == "1.3.6.1.4.1.14519.5.2.1.1706.8040.168221415040968580239112565792" 178 | ) 179 | 180 | # And that the dose file exists 181 | assert test_data_path.joinpath( 182 | "data", "HNSCC-01-0019", "doses", "dose_id", "RTDOSE.nii.gz" 183 | ).exists() 184 | 185 | 186 | def test_generate_structure_object(test_data_path): 187 | 188 | # Confirm the data object isn't there yet 189 | df_converted = read_converted_data(test_data_path, patients=["HNSCC-01-0019"]) 190 | assert len(df_converted[df_converted.hashed_uid == "structure_id"]) == 0 191 | 192 | test_structure_set = { 193 | "test_struct1": sitk.Image(20, 20, 20, sitk.sitkFloat32), 194 | "test_struct2": sitk.Image(20, 20, 20, sitk.sitkFloat32), 195 | } 196 | linked_image_hash = "b281ea" 197 | add_structure_object( 198 | test_data_path, test_structure_set, "structure_id", "HNSCC-01-0019", linked_image_hash 199 | ) 200 | 201 | # Now make sure it's there 202 | df_converted = read_converted_data(test_data_path, patients=["HNSCC-01-0019"]) 203 | assert len(df_converted[df_converted.hashed_uid == "structure_id"]) == 1 204 | 205 | # Also make sure the for_uid and reference sop instance uid are correct 206 | linked_row = df_converted[df_converted.hashed_uid == "structure_id"].iloc[0] 207 | assert linked_row.for_uid == "1.3.6.1.4.1.14519.5.2.1.1706.8040.290727775603409136366833989550" 208 | assert ( 209 | linked_row.referenced_sop_instance_uid 210 | == "1.3.6.1.4.1.14519.5.2.1.1706.8040.418136430763474248173140712714" 211 | ) 212 | 213 | # And that the structure files actually exist 214 | assert test_data_path.joinpath( 215 | "data", "HNSCC-01-0019", "structures", "structure_id", "test_struct1.nii.gz" 216 | ).exists() 217 | --------------------------------------------------------------------------------