├── tests
    ├── __init__.py
    ├── conftest.py
    ├── test_config.py
    ├── test_utils.py
    ├── test_pipeline.py
    ├── test_compare.py
    ├── test_quarantine.py
    ├── test_segmentation.py
    ├── test_convert.py
    ├── test_structure_set.py
    ├── test_input.py
    └── test_generate.py
├── pydicer
    ├── cli
    │   ├── __init__.py
    │   ├── contants.py
    │   ├── run.py
    │   └── input.py
    ├── analyse
    │   └── __init__.py
    ├── convert
    │   ├── __init__.py
    │   ├── headers.py
    │   └── rtstruct.py
    ├── dataset
    │   ├── __init__.py
    │   ├── preparation.py
    │   └── structureset.py
    ├── generate
    │   ├── __init__.py
    │   ├── mhubconfigs
    │   │   ├── platipy.yml
    │   │   ├── lungmask.yml
    │   │   ├── totalsegmentator.yml
    │   │   ├── nnunet_liver.yml
    │   │   ├── casust.yml
    │   │   └── nnunet_pancreas.yml
    │   └── models.py
    ├── input
    │   ├── __init__.py
    │   ├── test.py
    │   ├── filesystem.py
    │   ├── base.py
    │   ├── web.py
    │   ├── tcia.py
    │   ├── pacs.py
    │   └── orthanc.py
    ├── preprocess
    │   └── __init__.py
    ├── visualise
    │   └── __init__.py
    ├── __init__.py
    ├── constants.py
    ├── logger.py
    ├── quarantine.py
    └── config.py
├── docs
    ├── contributing.rst
    ├── code_of_conduct.rst
    ├── utils.rst
    ├── nnunet.rst
    ├── tool.rst
    ├── config.rst
    ├── preprocess.rst
    ├── visualise.rst
    ├── analyse.rst
    ├── generate.rst
    ├── dataset.rst
    ├── _static
    │   └── custom.css
    ├── convert.rst
    ├── input.rst
    ├── Makefile
    ├── make.bat
    ├── index.rst
    └── conf.py
├── docker-compose.yml
├── .coveragerc
├── .devcontainer
    ├── install-dev-tools.sh
    ├── docker-compose.yml
    └── devcontainer.json
├── assets
    └── pydicer-working-directory-structure.png
├── requirements.txt
├── requirements-dev.txt
├── .vscode
    └── settings.json
├── Makefile
├── .github
    └── workflows
    │   ├── docs.yml
    │   └── pull-request.yml
├── examples
    ├── ASMIRTWorkshop
    │   └── README.md
    ├── VisualiseData.ipynb
    ├── DoseMetrics.ipynb
    ├── Configuration.ipynb
    ├── WorkingWithData.ipynb
    ├── GettingStarted.ipynb
    ├── ConvertingData.ipynb
    └── Radiomics.ipynb
├── pyproject.toml
├── CITATION.cff
├── .gitignore
├── CONTRIBUTING.md
├── CODE_OF_CONDUCT.md
└── README.md


/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pydicer/cli/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pydicer/analyse/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pydicer/convert/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pydicer/dataset/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pydicer/generate/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pydicer/input/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pydicer/preprocess/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pydicer/visualise/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/docs/contributing.rst:
--------------------------------------------------------------------------------
1 | .. mdinclude:: ../CONTRIBUTING.md


--------------------------------------------------------------------------------
/docs/code_of_conduct.rst:
--------------------------------------------------------------------------------
1 | .. mdinclude:: ../CODE_OF_CONDUCT.md


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
1 | services:
2 |   pydicer-dev:
3 |     build:
4 |       context: .
5 | 


--------------------------------------------------------------------------------
/.coveragerc:
--------------------------------------------------------------------------------
1 | [run]
2 | # Omit the CLI directory from coverage
3 | omit =
4 |     pydicer/cli/*


--------------------------------------------------------------------------------
/docs/utils.rst:
--------------------------------------------------------------------------------
1 | #####################
2 | Utils
3 | #####################
4 | 
5 | .. automodule:: pydicer.utils
6 |     :members:
7 | 


--------------------------------------------------------------------------------
/.devcontainer/install-dev-tools.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | 
3 | sudo apt update && sudo apt install -y libtbb-dev
4 | poetry install --with=dev
5 | 


--------------------------------------------------------------------------------
/docs/nnunet.rst:
--------------------------------------------------------------------------------
1 | #####################
2 | nnUNet
3 | #####################
4 | 
5 | .. autoclass:: pydicer.dataset.nnunet.NNUNetDataset
6 |     :members:


--------------------------------------------------------------------------------
/docs/tool.rst:
--------------------------------------------------------------------------------
1 | #####################
2 | PyDicer
3 | #####################
4 | 
5 |    
6 | .. autoclass:: pydicer.tool.PyDicer
7 |     :members:
8 |     


--------------------------------------------------------------------------------
/docs/config.rst:
--------------------------------------------------------------------------------
1 | #####################
2 | Configuration
3 | #####################
4 | 
5 | 
6 | .. autoclass:: pydicer.config.PyDicerConfig
7 |    :members:


--------------------------------------------------------------------------------
/docs/preprocess.rst:
--------------------------------------------------------------------------------
1 | #####################
2 | Preprocessing
3 | #####################
4 | 
5 | .. automodule:: pydicer.preprocess.data
6 |     :members:
7 | 


--------------------------------------------------------------------------------
/docs/visualise.rst:
--------------------------------------------------------------------------------
1 | #####################
2 | Visualisation
3 | #####################
4 | 
5 | .. automodule:: pydicer.visualise.data
6 |     :members:
7 | 


--------------------------------------------------------------------------------
/assets/pydicer-working-directory-structure.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AustralianCancerDataNetwork/pydicer/HEAD/assets/pydicer-working-directory-structure.png


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | pydicom >= 2.1.2
2 | SimpleITK >= 2.0.2
3 | platipy >= 0.5.0
4 | pyorthanc >= 0.2.14
5 | pyradiomics >= 3.1.0
6 | argparse >=  1.4.0
7 | tqdm >=  4.55.1
8 | 


--------------------------------------------------------------------------------
/docs/analyse.rst:
--------------------------------------------------------------------------------
 1 | #####################
 2 | Analyse
 3 | #####################
 4 | 
 5 | .. automodule:: pydicer.analyse.data
 6 |     :members:
 7 | 
 8 | .. automodule:: pydicer.analyse.compare
 9 |     :members:
10 | 


--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
 1 | pylint
 2 | black
 3 | pytest
 4 | numpy
 5 | sphinx==5.0.2
 6 | m2r2==0.3.2
 7 | nbstripout==0.5.0
 8 | furo==2022.6.4.1
 9 | sphinxcontrib-napoleon==0.7
10 | sphinx-theme==1.0
11 | nbstripout==0.5.0
12 | furo==2022.6.4.1
13 | nbsphinx==0.8.9
14 | MarkupSafe==2.0.1
15 | recommonmark==0.7.1
16 | 


--------------------------------------------------------------------------------
/docs/generate.rst:
--------------------------------------------------------------------------------
 1 | #####################
 2 | Generation
 3 | #####################
 4 | 
 5 | Objects
 6 | =======
 7 | 
 8 | .. automodule:: pydicer.generate.object
 9 |     :members:
10 | 
11 | Auto-segmentation
12 | =================
13 | 
14 | .. automodule:: pydicer.generate.segmentation
15 |     :members:
16 | 
17 | 
18 | Models
19 | ======
20 | 
21 | .. automodule:: pydicer.generate.models
22 |     :members:
23 | 
24 | 


--------------------------------------------------------------------------------
/docs/dataset.rst:
--------------------------------------------------------------------------------
 1 | #####################
 2 | Dataset Preparation
 3 | #####################
 4 | 
 5 | Prepare Dataset
 6 | ===============
 7 | 
 8 | .. automodule:: pydicer.dataset.preparation
 9 |     :members:
10 | 
11 | Preparation Functions
12 | =====================
13 | 
14 | .. automodule:: pydicer.dataset.functions
15 |     :members:
16 | 
17 | Structure Sets
18 | ==============
19 | 
20 | .. automodule:: pydicer.dataset.structureset
21 |     :members:
22 | 


--------------------------------------------------------------------------------
/pydicer/__init__.py:
--------------------------------------------------------------------------------
 1 | from .tool import PyDicer
 2 | 
 3 | __project__ = "pydicer"
 4 | __version__ = "0.2.0"
 5 | __keywords__ = [
 6 |     "medical imaging",
 7 |     "visualisation",
 8 |     "conversion",
 9 |     "DICOM",
10 |     "radiotherapy",
11 |     "image analysis",
12 | ]
13 | __author__ = "Ingham Medical Physics"
14 | __author_email__ = "phillip.chlap@unsw.edu.au"
15 | __url__ = "https://australiancancerdatanetwork.github.io/pydicer/"
16 | __platforms__ = "ALL"
17 | 


--------------------------------------------------------------------------------
/pydicer/generate/mhubconfigs/platipy.yml:
--------------------------------------------------------------------------------
 1 | general:
 2 |   data_base_dir: /app/data
 3 |   version: 1.0.0
 4 |   description: custom pipeline from nifti to nifti
 5 | 
 6 | execute:
 7 |   - FileStructureImporter
 8 |   - PlatipyRunner
 9 |   - DataOrganizer
10 | 
11 | modules:
12 |   FileStructureImporter:
13 |     input_dir: "input_data"
14 |     structures:
15 |       - image.nii.gz@instance@nifti:mod=ct
16 |     import_id: _instance
17 | 
18 |   DataOrganizer:
19 |     targets:
20 |       - nifti:mod=seg-->/app/data/output_data/[d:roi].nii.gz
21 | 


--------------------------------------------------------------------------------
/docs/_static/custom.css:
--------------------------------------------------------------------------------
 1 | table {
 2 |     color: var(--color-foreground-primary) !important;
 3 | }
 4 | 
 5 | table thead {
 6 |     background-color: var(--color-card-marginals-background) !important;
 7 | }
 8 | 
 9 | table tr:nth-child(odd) {
10 |     background: var(--color-background-border) !important;
11 | }
12 | 
13 | table tr:nth-child(even) {
14 |     background: var(--color-card-marginals-background) !important;
15 | }
16 | 
17 | div.nboutput.container div.output_area.stderr {
18 |     background: var(--color-background-primary) !important;
19 | }


--------------------------------------------------------------------------------
/docs/convert.rst:
--------------------------------------------------------------------------------
 1 | #####################
 2 | Conversion
 3 | #####################
 4 | 
 5 | Data Conversion
 6 | ===============
 7 | 
 8 | .. automodule:: pydicer.convert.data
 9 |     :members:
10 | 
11 | Header Conversion
12 | =================
13 | 
14 | .. automodule:: pydicer.convert.headers
15 |     :members:
16 | 
17 | RTSTRUCT Conversion
18 | ===================
19 | 
20 | .. automodule:: pydicer.convert.rtstruct
21 |     :members:
22 | 
23 | PET Conversion
24 | ==============
25 | 
26 | .. automodule:: pydicer.convert.pt
27 |     :members:
28 | 
29 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from pydicer.utils import fetch_converted_test_data
 4 | 
 5 | 
 6 | @pytest.fixture
 7 | def test_data_converted():
 8 |     """Fixture to grab the test data with already converted into PyDicer format"""
 9 | 
10 |     return fetch_converted_test_data("./testdata_hnscc", dataset="HNSCC")
11 | 
12 | 
13 | @pytest.fixture
14 | def test_data_autoseg():
15 |     """Fixture to grab the test data in PyDicer format for auto-seg tests"""
16 | 
17 |     return fetch_converted_test_data("./testdata_lctsc", dataset="LCTSC")
18 | 


--------------------------------------------------------------------------------
/pydicer/generate/mhubconfigs/lungmask.yml:
--------------------------------------------------------------------------------
 1 | general:
 2 |   data_base_dir: /app/data
 3 |   version: 1.0.0
 4 |   description: custom pipeline from nifti to nifti
 5 | 
 6 | execute:
 7 |   - FileStructureImporter
 8 |   - LungMaskRunner
 9 |   - DataOrganizer
10 | 
11 | modules:
12 |   FileStructureImporter:
13 |     input_dir: "input_data"
14 |     structures:
15 |       - image.nii.gz@instance@nifti:mod=ct
16 |     import_id: _instance
17 | 
18 |   LungMaskRunner:
19 |     batchsize: 64
20 | 
21 |   DataOrganizer:
22 |     targets:
23 |       - nifti:mod=seg-->/app/data/output_data/[d:roi].nii.gz
24 | 


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "python.linting.enabled": true,
 3 |     "python.linting.pylintEnabled": true,
 4 |     "python.linting.pylintArgs": [
 5 |         "--rcfile=.pylintrc"
 6 |     ],
 7 |     "python.formatting.blackArgs": [
 8 |         "--line-length",
 9 |         "99"
10 |     ],
11 |     "python.formatting.provider": "black",
12 |     "editor.rulers": [
13 |         99
14 |     ],
15 |     "python.testing.pytestArgs": [
16 |         "."
17 |     ],
18 |     "python.testing.unittestEnabled": false,
19 |     "python.testing.nosetestsEnabled": false,
20 |     "python.testing.pytestEnabled": true
21 | }


--------------------------------------------------------------------------------
/docs/input.rst:
--------------------------------------------------------------------------------
 1 | #####################
 2 | Input
 3 | #####################
 4 | 
 5 | 
 6 | .. automodule:: pydicer.input.filesystem
 7 |     :members:
 8 | 
 9 | ----
10 | 
11 | 
12 | .. automodule:: pydicer.input.pacs
13 |     :members:
14 | 
15 | 
16 | ----
17 | 
18 | 
19 | .. automodule:: pydicer.input.orthanc
20 |     :members:
21 | 
22 | 
23 | ----
24 | 
25 | 
26 | .. automodule:: pydicer.input.tcia
27 |     :members:
28 | 
29 | 
30 | ----
31 | 
32 | 
33 | 
34 | .. automodule:: pydicer.input.test
35 |     :members:
36 | 
37 | 
38 | ----
39 | 
40 | 
41 | 
42 | .. automodule:: pydicer.input.web
43 |     :members:
44 | 


--------------------------------------------------------------------------------
/pydicer/generate/mhubconfigs/totalsegmentator.yml:
--------------------------------------------------------------------------------
 1 | general:
 2 |   data_base_dir: /app/data
 3 |   version: 1.0.0
 4 |   description: custom pipeline from nifti to nifti
 5 | 
 6 | execute:
 7 |   - FileStructureImporter
 8 |   - TotalSegmentatorRunner
 9 |   - DataOrganizer
10 | 
11 | modules:
12 |   FileStructureImporter:
13 |     input_dir: "input_data"
14 |     structures:
15 |       - image.nii.gz@instance@nifti:mod=ct
16 |     import_id: _instance
17 | 
18 |   TotalSegmentatorRunner:
19 |     use_fast_mode: true
20 | 
21 |   DataOrganizer:
22 |     targets:
23 |       - nifti:mod=seg-->/app/data/output_data/[d:roi].nii.gz
24 | 


--------------------------------------------------------------------------------
/pydicer/constants.py:
--------------------------------------------------------------------------------
 1 | RT_STRUCTURE_STORAGE_UID = "1.2.840.10008.5.1.4.1.1.481.3"
 2 | RT_DOSE_STORAGE_UID = "1.2.840.10008.5.1.4.1.1.481.2"
 3 | RT_PLAN_STORAGE_UID = "1.2.840.10008.5.1.4.1.1.481.5"
 4 | CT_IMAGE_STORAGE_UID = "1.2.840.10008.5.1.4.1.1.2"
 5 | PET_IMAGE_STORAGE_UID = "1.2.840.10008.5.1.4.1.1.128"
 6 | MR_IMAGE_STORAGE_UID = "1.2.840.10008.5.1.4.1.1.4"
 7 | 
 8 | PYDICER_DIR_NAME = ".pydicer"
 9 | CONVERTED_DIR_NAME = "data"
10 | QUARANTINE_DIR_NAME = "quarantine"
11 | DEFAULT_MAPPING_ID = "default"
12 | 
13 | DICOM_FILE_EXTENSIONS = [
14 |     "dcm",
15 |     "DCM",
16 |     "dcim",
17 |     "DCIM",
18 |     "dicom",
19 |     "DICOM",
20 | ]
21 | 


--------------------------------------------------------------------------------
/pydicer/generate/mhubconfigs/nnunet_liver.yml:
--------------------------------------------------------------------------------
 1 | general:
 2 |   data_base_dir: /app/data
 3 |   version: 1.0.0
 4 |   description: custom pipeline from nifti to nifti
 5 | 
 6 | execute:
 7 |   - FileStructureImporter
 8 |   - NNUnetRunner
 9 |   - DataOrganizer
10 | 
11 | modules:
12 |   FileStructureImporter:
13 |     input_dir: "input_data"
14 |     structures:
15 |       - image.nii.gz@instance@nifti:mod=ct
16 |     import_id: _instance
17 | 
18 |   NNUnetRunner:
19 |     nnunet_task: "Task003_Liver"
20 |     nnunet_model: "3d_lowres"
21 |     roi: LIVER,LIVER+NEOPLASM_MALIGNANT_PRIMARY
22 | 
23 |   DataOrganizer:
24 |     targets:
25 |       - nifti:mod=seg-->/app/data/output_data/[d:roi].nii.gz
26 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = docs/source
 9 | BUILDDIR      = docs
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/pydicer/input/test.py:
--------------------------------------------------------------------------------
 1 | from typing import Union
 2 | 
 3 | from pydicer.input.web import WebInput
 4 | 
 5 | 
 6 | class TestInput(WebInput):
 7 |     __test__ = False  # pytest will try to use this as a test class without this
 8 | 
 9 |     def __init__(self, working_directory: Union[str, list] = None):
10 |         """
11 |         A test input class to download example data from zenodo
12 | 
13 |         Args:
14 |             working_directory (str|pathlib.Path, optional): The working directory in which to
15 |             store the data fetched. Defaults to a temp directory.
16 |         """
17 | 
18 |         data_url = "https://zenodo.org/record/5276878/files/HNSCC.zip"
19 | 
20 |         super().__init__(data_url, working_directory)
21 | 


--------------------------------------------------------------------------------
/pydicer/generate/mhubconfigs/casust.yml:
--------------------------------------------------------------------------------
 1 | general:
 2 |   data_base_dir: /app/data
 3 |   version: 1.0.0
 4 |   description: custom pipeline from nifti to nifti
 5 | 
 6 | execute:
 7 |   - FileStructureImporter
 8 |   - NNUnetRunner
 9 |   - CasustRunner
10 |   - DataOrganizer
11 | 
12 | modules:
13 |   FileStructureImporter:
14 |     input_dir: "input_data"
15 |     structures:
16 |       - image.nii.gz@instance@nifti:mod=ct
17 |     import_id: _instance
18 | 
19 |   NNUnetRunner:
20 |     folds: all
21 |     nnunet_task: Task400_OPEN_HEART_1FOLD
22 |     nnunet_model: 3d_lowres
23 |     roi: HEART
24 | 
25 |   CasustRunner:
26 |     test_time_augmentation: 0
27 | 
28 |   DataOrganizer:
29 |     targets:
30 |       - nifti:mod=seg-->/app/data/output_data/[d:roi].nii.gz
31 | 


--------------------------------------------------------------------------------
/pydicer/input/filesystem.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | from typing import Union
 3 | 
 4 | from pydicer.input.base import InputBase
 5 | 
 6 | 
 7 | class FileSystemInput(InputBase):
 8 |     def __init__(self, directory: Union[str, Path]):
 9 |         """
10 |         Class for inputing files from the file system
11 | 
12 |         Args:
13 |             directory (str|pathlib.Path): The directory in which to find DICOM files.
14 |         """
15 | 
16 |         super().__init__(directory)
17 | 
18 |         if not self.working_directory.exists():
19 |             raise FileNotFoundError("The directory provided does not exist")
20 | 
21 |         if not self.working_directory.is_dir():
22 |             raise AttributeError("Ensure that the path specified is a directory")
23 | 


--------------------------------------------------------------------------------
/.devcontainer/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | services:
 2 |   # Update this to the name of the service you want to work with in your docker-compose.yml file
 3 |   pydicer-dev:
 4 |     # Use a development image for dev
 5 |     build: !reset "null"
 6 |     image: mcr.microsoft.com/devcontainers/python:1-3.9-bookworm
 7 | 
 8 |     volumes:
 9 |       # Update this to wherever you want VS Code to mount the folder of your project
10 |       - ..:/workspaces:cached
11 | 
12 |     # Uncomment the next four lines if you will use a ptrace-based debugger like C++, Go, and Rust.
13 |     # cap_add:
14 |     #   - SYS_PTRACE
15 |     # security_opt:
16 |     #   - seccomp:unconfined
17 | 
18 |     # Overrides default command so things don't shut down after the process ends.
19 |     command: /bin/sh -c "while sleep 1000; do :; done"
20 | 


--------------------------------------------------------------------------------
/pydicer/input/base.py:
--------------------------------------------------------------------------------
 1 | import tempfile
 2 | import logging
 3 | from typing import Union
 4 | 
 5 | import abc
 6 | from pathlib import Path
 7 | 
 8 | 
 9 | logger = logging.getLogger(__name__)
10 | 
11 | 
12 | class InputBase(abc.ABC):
13 |     def __init__(self, working_directory: Union[str, Path] = None):
14 |         """
15 |         Base class for input modules.
16 | 
17 |         Args:
18 |             working_directory (str|pathlib.Path, optional): The working directory in which to
19 |               store the data fetched. Defaults to a temp directory.
20 |         """
21 | 
22 |         if working_directory is None:
23 |             working_directory = tempfile.mkdtemp()
24 | 
25 |         self.working_directory = Path(working_directory)
26 | 
27 |         logger.debug("Working directory set to: %s", self.working_directory)
28 | 


--------------------------------------------------------------------------------
/pydicer/generate/mhubconfigs/nnunet_pancreas.yml:
--------------------------------------------------------------------------------
 1 | general:
 2 |   data_base_dir: /app/data
 3 |   version: 1.0.0
 4 |   description: custom pipeline from nifti to nifti
 5 | 
 6 | execute:
 7 |   - FileStructureImporter
 8 |   - NNUnetRunner
 9 |   - DataOrganizer
10 | 
11 | modules:
12 |   FileStructureImporter:
13 |     input_dir: "input_data"
14 |     structures:
15 |       - image.nii.gz@instance@nifti:mod=ct
16 |     import_id: _instance
17 | 
18 |   NNUnetRunner:
19 |     input_data_type: nifti:mod=ct
20 |     nnunet_task: Task007_Pancreas
21 |     nnunet_model: 3d_lowres
22 |     export_prob_maps: False
23 |     roi: PANCREAS,PANCREAS+NEOPLASM_MALIGNANT_PRIMARY
24 |     prob_map_segments: [Background, Pancreas, Pancreatic_cancer]
25 | 
26 |   DataOrganizer:
27 |     targets:
28 |       - nifti:mod=seg-->/app/data/output_data/[d:roi].nii.gz
29 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | 
13 | %SPHINXBUILD% >NUL 2>NUL
14 | if errorlevel 9009 (
15 | 	echo.
16 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
17 | 	echo.installed, then set the SPHINXBUILD environment variable to point
18 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
19 | 	echo.may add the Sphinx directory to PATH.
20 | 	echo.
21 | 	echo.If you don't have Sphinx installed, grab it from
22 | 	echo.https://www.sphinx-doc.org/
23 | 	exit /b 1
24 | )
25 | 
26 | if "%1" == "" goto help
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/.github/workflows/docs.yml:
--------------------------------------------------------------------------------
 1 | name: Build Docs
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   push:
 6 |     branches: ["main"]
 7 | 
 8 | jobs:
 9 |   docs:
10 |     runs-on: ubuntu-latest
11 | 
12 |     steps:
13 |       - uses: actions/checkout@v2
14 |       - name: Set up Python
15 |         uses: actions/setup-python@v2
16 |         with:
17 |           python-version: "3.9"
18 |       - name: Install dependencies
19 |         run: |
20 |           curl -sSL https://install.python-poetry.org | python - --version 1.3.2
21 |           poetry install --with docs --all-extras
22 |           echo "PYTHONPATH=`pwd`" >> $GITHUB_ENV
23 |       - name: Build Docs
24 |         run: |
25 |           sudo apt-get update -y && sudo apt-get install -y pandoc python3-pkg-resources python3-setuptools
26 |           poetry run sphinx-build -b html -a docs docs/site
27 |       - name: Deploy docs
28 |         uses: peaceiris/actions-gh-pages@v3
29 |         with:
30 |           github_token: ${{ secrets.GITHUB_TOKEN }}
31 |           publish_dir: ./docs/site
32 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | .. mdinclude:: ../README.md
 2 | 
 3 | .. toctree::
 4 |     :caption: Examples
 5 |     :maxdepth: 2
 6 |     :hidden:
 7 | 
 8 |     _examples/GettingStarted
 9 |     _examples/ConvertingData
10 |     _examples/VisualiseData
11 |     _examples/Radiomics
12 |     _examples/DoseMetrics
13 |     _examples/DatasetPreparation
14 |     _examples/AutoSegmentation
15 |     _examples/nnUNet
16 | 
17 | .. toctree::
18 |     :caption: Guides
19 |     :maxdepth: 2
20 |     :hidden:
21 | 
22 |     _examples/WorkingWithData
23 |     _examples/WorkingWithStructures
24 |     _examples/ObjectGeneration
25 |     _examples/Configuration
26 | 
27 | .. toctree::
28 |     :caption: Developers
29 |     :maxdepth: 2
30 |     :hidden:
31 | 
32 |     contributing
33 |     code_of_conduct
34 | 
35 | .. toctree::
36 |    :caption: Reference
37 |    :maxdepth: 5
38 |    :hidden:
39 | 
40 |    tool
41 |    input
42 |    config
43 |    utils
44 |    preprocess
45 |    convert
46 |    visualise
47 |    dataset
48 |    analyse
49 |    generate
50 |    nnunet
51 | 


--------------------------------------------------------------------------------
/pydicer/convert/headers.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import json
 3 | from typing import Union
 4 | from pathlib import Path
 5 | 
 6 | import pydicom
 7 | 
 8 | logger = logging.getLogger(__name__)
 9 | 
10 | 
11 | def convert_dicom_headers(
12 |     dcm_file: Union[str, Path], binary_path: str, json_file: Union[str, Path]
13 | ):
14 |     """Save the DICOM Headers as a JSON file
15 | 
16 |     Args:
17 |         dcm_file (str|pathlib.Path): The files from which to save the headers.
18 |         binary_path (str): Relative path to binary data which will be placed into JSON.
19 |         json_file (str|pathlib.Path): Path to JSON file to save output.
20 |     """
21 | 
22 |     # Write the DICOM headers (of the first slice) to JSON
23 |     dcm_ds = pydicom.read_file(dcm_file, force=True)
24 |     dcm_dict = dcm_ds.to_json_dict(
25 |         bulk_data_threshold=4096, bulk_data_element_handler=lambda _: binary_path
26 |     )
27 | 
28 |     with open(json_file, "w", encoding="utf8") as jsonfile:
29 |         json.dump(dcm_dict, jsonfile, indent=2)
30 | 
31 |     logger.debug("DICOM Headers written to: %s", json_file)
32 | 


--------------------------------------------------------------------------------
/tests/test_config.py:
--------------------------------------------------------------------------------
 1 | # pylint: disable=redefined-outer-name,missing-function-docstring
 2 | 
 3 | import tempfile
 4 | 
 5 | import pytest
 6 | from pydicer.config import PyDicerConfig
 7 | 
 8 | 
 9 | def test_generate_nrrd_config():
10 | 
11 |     with tempfile.TemporaryDirectory() as directory:
12 | 
13 |         config = PyDicerConfig(directory)
14 | 
15 |         # Assert that generate NRRD is True (default)
16 |         assert config.get_config("generate_nrrd")
17 | 
18 |         # Update the config
19 |         config.set_config("generate_nrrd", False)
20 | 
21 |         # Assert that it is now False
22 |         assert not config.get_config("generate_nrrd")
23 | 
24 | 
25 | def test_config_not_exists():
26 | 
27 |     with tempfile.TemporaryDirectory() as directory:
28 | 
29 |         config = PyDicerConfig(directory)
30 | 
31 |         with pytest.raises(AttributeError):
32 |             config.get_config("doesn't_exist")
33 | 
34 |         with pytest.raises(AttributeError):
35 |             config.set_config("doesn't_exist", 123)
36 | 
37 | 
38 | def test_config_invalid_value():
39 | 
40 |     with tempfile.TemporaryDirectory() as directory:
41 | 
42 |         config = PyDicerConfig(directory)
43 | 
44 |         with pytest.raises(ValueError):
45 |             config.set_config("generate_nrrd", 123)
46 | 


--------------------------------------------------------------------------------
/pydicer/input/web.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from pathlib import Path
 3 | from typing import Union
 4 | 
 5 | from pydicer.input.base import InputBase
 6 | from pydicer.utils import download_and_extract_zip_file
 7 | 
 8 | logger = logging.getLogger(__name__)
 9 | 
10 | 
11 | class WebInput(InputBase):
12 |     def __init__(self, data_url: str, working_directory: Union[str, Path] = None):
13 |         """
14 |         Class for downloading and saving input data off the internet
15 | 
16 |         Args:
17 |             data_url (str): The URL of where the data is stored. For now, it must be a link to a
18 |             zip file
19 |             working_directory (str|pathlib.Path, optional): The working directory in which to
20 |             store the data fetched. Defaults to a temp directory.
21 |         """
22 |         super().__init__(working_directory)
23 |         self.data_url = data_url
24 | 
25 |     def fetch_data(self):
26 |         """Download the data."""
27 | 
28 |         files_in_directory = list(self.working_directory.glob("*"))
29 |         if len(files_in_directory) > 0:
30 |             logger.warning("Directory not empty, won't download files")
31 |             return
32 | 
33 |         logger.info("Downloading files from %s", self.data_url)
34 |         download_and_extract_zip_file(self.data_url, self.working_directory)
35 | 


--------------------------------------------------------------------------------
/examples/ASMIRTWorkshop/README.md:
--------------------------------------------------------------------------------
 1 | # ASMIRT Workshop 2023
 2 | 
 3 | This directory contains a series of Jupyter notebooks prepared for the **Radiotherapy image data
 4 | analysis using Python** Workshop at ASMIRT 2023 in Sydney, Australia.
 5 | 
 6 | ## Part 1: Python Basics
 7 | 
 8 | [Slides](https://unsw-my.sharepoint.com/:p:/g/personal/z3523015_ad_unsw_edu_au/EdvxEVKSZV1Glpb6pHn55mgBD7xC5Whu_SPoFrfJBITEYg?e=DKiv6G)
 9 | 
10 | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/AustralianCancerDataNetwork/pydicer/blob/main/examples/ASMIRTWorkshop/1_Python_Intro.ipynb)
11 | 
12 | ## Part 2: Working with DICOM
13 | 
14 | [Slides](https://unsw-my.sharepoint.com/:p:/g/personal/z3523015_ad_unsw_edu_au/EXyvFauxn4FBhFXiSskhzd0BYaE-Q3xPzI_PlxoTtC6_8Q?e=qFp3zm)
15 | 
16 | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/AustralianCancerDataNetwork/pydicer/blob/main/examples/ASMIRTWorkshop/2_DICOM.ipynb)
17 | 
18 | ## Part 3: Converting and analysing data
19 | 
20 | [Slides](https://unsw-my.sharepoint.com/:p:/g/personal/z3523015_ad_unsw_edu_au/ETopzSi06zxNqQgpqkHbR94B2DIuWy8TflxFB8ozfw1R-g?e=eq4hIC)
21 | 
22 | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/AustralianCancerDataNetwork/pydicer/blob/main/examples/ASMIRTWorkshop/3_RT_Data_Analysis.ipynb)
23 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "pydicer"
 3 | version = "0.2.0"
 4 | description = "PYthon Dicom Image ConvertER"
 5 | authors = ["Ingham Medical Physics"]
 6 | license = "Apache License 2.0"
 7 | classifiers = [
 8 |     "Programming Language :: Python :: 3",
 9 |     "License :: OSI Approved :: Apache Software License",
10 |     "Operating System :: OS Independent",
11 |     "Topic :: Scientific/Engineering :: Image Processing",
12 |     "Topic :: Scientific/Engineering :: Medical Science Apps.",
13 |     "Topic :: Scientific/Engineering :: Visualization",
14 |     "Development Status :: 4 - Beta",
15 | ]
16 | readme = "README.md"
17 | 
18 | [tool.poetry.dependencies]
19 | python = "^3.8"
20 | pydicom = ">=2.1.2"
21 | SimpleITK = ">=2.0.2"
22 | pyorthanc = ">=1.11.2"
23 | platipy = ">=0.5.0"
24 | argparse = ">=1.4.0"
25 | seaborn = "^0.12.0"
26 | tqdm = "^4.55.1"
27 | scikit-learn = "^1.2.2"
28 | pyradiomics = ">=3.0.1"
29 | 
30 | [tool.poetry.group.dev.dependencies]
31 | pylint = "^2.13.5"
32 | black = "^22.3.0"
33 | pytest = "6.2.5"
34 | mypy = "^1.14.0"
35 | pytest-mock = "^3.14.0"
36 | 
37 | [tool.poetry.group.docs.dependencies]
38 | sphinx = "^5.1.1"
39 | sphinxcontrib-napoleon = "^0.7"
40 | sphinx-theme = "^1.0"
41 | sphinx-click = "^4.3.0"
42 | furo = "^2022.6.21"
43 | nbsphinx = "^0.8.9"
44 | m2r2 = "^0.3.3"
45 | notebook = "^6.5.4"
46 | 
47 | 
48 | [build-system]
49 | requires = ["poetry-core>=1.0.0"]
50 | build-backend = "poetry.core.masonry.api"
51 | 


--------------------------------------------------------------------------------
/tests/test_utils.py:
--------------------------------------------------------------------------------
 1 | # pylint: disable=redefined-outer-name,missing-function-docstring
 2 | 
 3 | from pathlib import Path
 4 | 
 5 | import pytest
 6 | import pydicom
 7 | 
 8 | from pydicer.input.test import TestInput
 9 | from pydicer.utils import determine_dcm_datetime
10 | 
11 | 
12 | @pytest.fixture
13 | def test_data():
14 |     """Fixture to grab the test data"""
15 | 
16 |     directory = Path("./testdata")
17 |     directory.mkdir(exist_ok=True, parents=True)
18 | 
19 |     working_directory = directory.joinpath("dicom")
20 |     working_directory.mkdir(exist_ok=True, parents=True)
21 | 
22 |     test_input = TestInput(working_directory)
23 |     test_input.fetch_data()
24 | 
25 |     return working_directory
26 | 
27 | 
28 | def test_fetch_ds_datetime(test_data):
29 | 
30 |     rt_struct_file = test_data.joinpath(
31 |         "HNSCC",
32 |         "HNSCC-01-0019",
33 |         "07-04-1998-NA-RT SIMULATION-48452",
34 |         "1.000000-NA-10361",
35 |         "1-1.dcm",
36 |     )
37 | 
38 |     ds = pydicom.read_file(rt_struct_file)
39 |     ds_datetime = determine_dcm_datetime(ds)
40 |     assert ds_datetime.year == 1998
41 |     assert ds_datetime.month == 7
42 |     assert ds_datetime.day == 4
43 |     assert ds_datetime.hour == 0
44 |     assert ds_datetime.minute == 0
45 | 
46 |     ds_datetime = determine_dcm_datetime(ds, require_time=True)
47 |     assert ds_datetime.year == 2001
48 |     assert ds_datetime.month == 10
49 |     assert ds_datetime.day == 28
50 |     assert ds_datetime.hour == 12
51 |     assert ds_datetime.minute == 48
52 | 


--------------------------------------------------------------------------------
/pydicer/input/tcia.py:
--------------------------------------------------------------------------------
 1 | from typing import Union
 2 | 
 3 | from platipy.dicom.download import tcia
 4 | 
 5 | from pydicer.input.base import InputBase
 6 | 
 7 | 
 8 | class TCIAInput(InputBase):
 9 |     def __init__(
10 |         self,
11 |         collection: str,
12 |         patient_ids: list,
13 |         modalities: list = None,
14 |         working_directory: Union[str, list] = None,
15 |     ):
16 |         """
17 |         Input class that interfaces with the TCIA API
18 | 
19 |         Args:
20 |             collection (str): The TCIA collection to fetch from
21 |             patient_ids (list, optional): The patient IDs to fetch. If not set all patients are
22 |                 fetched
23 |             modalities (list, optional): A list of strings defining the modalites to fetch. Will
24 |                                         fetch all modalities available if not specified.
25 |             working_directory (str|pathlib.Path, optional): The working directory in which
26 |                 to store the data fetched. Defaults to a temp directory.
27 |         """
28 |         super().__init__(working_directory)
29 |         self.collection = collection
30 |         self.patient_ids = patient_ids
31 |         self.modalities = modalities
32 | 
33 |     def fetch_data(self):
34 |         """
35 |         Function to download the data from TCIA and write locally
36 |         """
37 | 
38 |         tcia.fetch_data(
39 |             self.collection,
40 |             self.patient_ids,
41 |             self.modalities,
42 |             nifti=False,
43 |             output_directory=self.working_directory,
44 |         )
45 | 


--------------------------------------------------------------------------------
/.devcontainer/devcontainer.json:
--------------------------------------------------------------------------------
 1 | // For format details, see https://aka.ms/devcontainer.json. For config options, see the
 2 | // README at: https://github.com/devcontainers/templates/tree/main/src/docker-existing-docker-compose
 3 | {
 4 | 	"name": "Existing Docker Compose (Extend)",
 5 | 	// Update the 'dockerComposeFile' list if you have more compose files or use different names.
 6 | 	// The .devcontainer/docker-compose.yml file contains any overrides you need/want to make.
 7 | 	"dockerComposeFile": [
 8 | 		"../docker-compose.yml",
 9 | 		"docker-compose.yml"
10 | 	],
11 | 	// The 'service' property is the name of the service for the container that VS Code should
12 | 	// use. Update this value and .devcontainer/docker-compose.yml to the real service name.
13 | 	"service": "pydicer-dev",
14 | 	// The optional 'workspaceFolder' property is the path VS Code should open by default when
15 | 	// connected. This is typically a file mount in .devcontainer/docker-compose.yml
16 | 	"workspaceFolder": "/workspaces/${localWorkspaceFolderBasename}",
17 | 	// Features to add to the dev container. More info: https://containers.dev/features.
18 | 	"features": {
19 | 		"ghcr.io/devcontainers-extra/features/poetry:2": {},
20 | 		"ghcr.io/nikobockerman/devcontainer-features/poetry-persistent-cache:1": {},
21 | 	},
22 | 	// "features": {},
23 | 	// Use 'forwardPorts' to make a list of ports inside the container available locally.
24 | 	// "forwardPorts": [],
25 | 	// Uncomment the next line if you want start specific services in your Docker Compose config.
26 | 	// "runServices": [],
27 | 	// Uncomment the next line if you want to keep your containers running after VS Code shuts down.
28 | 	// "shutdownAction": "none",
29 | 	// Uncomment the next line to run commands after the container is created.
30 | 	// "postCreateCommand": "cat /etc/os-release",
31 | 	"updateContentCommand": ".devcontainer/install-dev-tools.sh",
32 | 	// Configure tool-specific properties.
33 | 	"customizations": {
34 | 		"vscode": {
35 | 			"extensions": [
36 | 				"ms-toolsai.jupyter" // <-- Added the Microsoft Jupyter extension
37 | 			]
38 | 		}
39 | 	},
40 | 	// Uncomment to connect as an existing user other than the container default. More info: https://aka.ms/dev-containers-non-root.
41 | 	// "remoteUser": "devcontainer"
42 | }


--------------------------------------------------------------------------------
/CITATION.cff:
--------------------------------------------------------------------------------
 1 | # This CITATION.cff file was generated with cffinit.
 2 | # Visit https://bit.ly/cffinit to generate yours today!
 3 | 
 4 | cff-version: 1.2.0
 5 | title: PyDicer
 6 | message: >-
 7 |   If you use this software, please cite our SoftwareX article.
 8 | type: software
 9 | authors:
10 |   - given-names: Phillip
11 |     family-names: Chlap
12 |     email: phillip.chlap@unsw.edu.au
13 |     affiliation: University of New South Wales
14 |     orcid: "https://orcid.org/0000-0002-6517-8745"
15 |   - given-names: Daniel
16 |     family-names: Al Mouiee
17 |     affiliation: Ingham Institute
18 |   - given-names: Robert N.
19 |     family-names: Finnegan
20 |     affiliation: University of Sydney
21 |     orcid: "https://orcid.org/0000-0003-4728-8462"
22 |   - given-names: Xinyi
23 |     family-names: Cui
24 |     affiliation: University of New South Wales
25 |   - given-names: Shrikant
26 |     family-names: Deshpande
27 |     affiliation: South Western Sydney Local Health District
28 |   - given-names: Vicky
29 |     family-names: Chin
30 |     affiliation: University of New South Wales
31 |   - given-names: Lois
32 |     family-names: Holloway
33 |     affiliation: University of New South Wales
34 | repository-code: "https://github.com/AustralianCancerDataNetwork/pydicer"
35 | url: "https://australiancancerdatanetwork.github.io/pydicer/"
36 | keywords:
37 |   - medical imaging
38 |   - DICOM
39 |   - radiotherapy
40 | license: Apache-2.0
41 | version: 0.2.0
42 | date-released: "2023-12-20"
43 | preferred-citation:
44 |   authors:
45 |   - family-names: Chlap
46 |     given-names: Phillip
47 |     orcid: "https://orcid.org/0000-0002-6517-8745"
48 |   - given-names: Daniel
49 |     family-names: Al Mouiee
50 |   - family-names: Finnegan
51 |     given-names: Robert N.
52 |     orcid: "https://orcid.org/0000-0003-4728-8462"
53 |   - given-names: Janet
54 |     family-names: Cui
55 |   - given-names: Chin
56 |     family-names: Vicky
57 |   - given-names: Deshpande
58 |     family-names: Shrikant
59 |   - given-names: Holloway
60 |     family-names: Lois
61 |   date-published: "2024-12-14"
62 |   doi: 10.1016/j.softx.2024.102010
63 |   journal: SoftwareX
64 |   issn: 2352-7110
65 |   publisher:
66 |     name: Elsevier
67 |   title: "PyDicer: An open-source python library for conversion and analysis of radiotherapy DICOM data"
68 |   type: article
69 |   url: "https://doi.org/10.1016/j.softx.2024.102010"
70 |   volume: 29
71 |   year: 2025
72 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 
131 | .DS_Store
132 | 
133 | # Nifti
134 | *.nii.gz
135 | *.dcm
136 | 
137 | testdata/
138 | pydicer_testdata/
139 | testdata*/
140 | 
141 | examples/data/
142 | 
143 | docs/**/*.md
144 | docs/**/*.png
145 | docs/**/*.jpg
146 | docs/**/*.gif
147 | docs/site/
148 | docs/_examples/
149 | 


--------------------------------------------------------------------------------
/pydicer/logger.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime as dt
 2 | import pandas as pd
 3 | 
 4 | SUMMARY_CSV_COLS = ["module", "hashed_uid", "success", "log", "start_time", "end_time"]
 5 | 
 6 | 
 7 | class PatientLogger:
 8 |     """Class to document a patient's pipeline progress in a personalised CSV file"""
 9 | 
10 |     def __init__(self, pat_id, data_directory, force=True):
11 |         self.pat_id = pat_id
12 |         self.data_directory = data_directory
13 |         self.start_time = dt.now()
14 | 
15 |         # create pat dir if not yet created
16 |         pat_directory = self.data_directory.joinpath(pat_id)
17 |         pat_directory.mkdir(exist_ok=True)
18 | 
19 |         self.summary_csv_path = pat_directory.joinpath("summary.csv")
20 | 
21 |         # create patient csv if not already created
22 |         if not self.summary_csv_path.exists() or force:
23 |             df_pat_log = pd.DataFrame(columns=SUMMARY_CSV_COLS)
24 |             df_pat_log.to_csv(self.summary_csv_path, index=False)
25 | 
26 |     def log_module_error(self, module: str, hashed_uid: str, error_log: str):
27 |         """Function to log errors for a specific pydicer module
28 | 
29 |         Args:
30 |             module (str): pydicer module to log error for in CSV
31 |             hashed_uid (str): hashed UID of the patient being logged to the error CSV
32 |             error_log (str): error to log in CSV
33 |         """
34 |         end_time = dt.now()
35 |         df_error = pd.DataFrame(
36 |             [[module, hashed_uid, 1, error_log, self.start_time, end_time]],
37 |             columns=SUMMARY_CSV_COLS,
38 |         )
39 |         df_error.to_csv(self.summary_csv_path, header=False, mode="a", index=False)
40 | 
41 |     def eval_module_process(self, module: str, hashed_uid: str):
42 |         """Function to log if any patient had issues for a specific pydicer module
43 | 
44 |         Args:
45 |             module (str): pydicer module to check if no errors were generated for all patients
46 |             hashed_uid (str): hashed UID of the patient being logged to the error CSV
47 |         """
48 | 
49 |         end_time = dt.now()
50 |         df_summary = pd.read_csv(self.summary_csv_path)
51 |         df_summary_mod = df_summary[
52 |             (df_summary.module == module)
53 |             & (df_summary.success == 1)
54 |             & (df_summary.hashed_uid == hashed_uid)
55 |         ]
56 |         if len(df_summary_mod) == 0:
57 |             df_final_summary = pd.DataFrame(
58 |                 [[module, hashed_uid, 0, "", self.start_time, end_time]],
59 |                 columns=SUMMARY_CSV_COLS,
60 |             )
61 |             df_final_summary.to_csv(
62 |                 self.summary_csv_path,
63 |                 header=False,
64 |                 mode="a",
65 |                 index=False,
66 |             )
67 | 


--------------------------------------------------------------------------------
/pydicer/quarantine.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | import shutil
 3 | import datetime
 4 | 
 5 | import pandas as pd
 6 | import pydicom
 7 | 
 8 | # Attempt to store the following meta data keys in the quarantine summary DataFrame
 9 | QUARATINE_DICOM_KEYS = ["PatientID", "Modality", "SOPInstanceUID", "SeriesDescription"]
10 | 
11 | 
12 | def copy_file_to_quarantine(file: Path, working_directory: Path, error_msg: str):
13 |     """Move a DICOM file that couldn't be processed into the quarantine directory
14 | 
15 |     Args:
16 |         file (pathlib.Path): DICOM path to be moved into quarantine
17 |         working_directory (pathlib.Path): Main working directory for pydicer
18 |         error_msg (str): error message associated with the quarantined file
19 |     """
20 | 
21 |     # Attempt to get some header information from the DICOM object to write into the summary
22 | 
23 |     summary_dict = {
24 |         "file": file,
25 |         "error": error_msg,
26 |         "quarantine_dttm": datetime.datetime.now(),
27 |     }
28 | 
29 |     ds = pydicom.read_file(file, force=True)
30 |     for k in QUARATINE_DICOM_KEYS:
31 |         val = None
32 |         if k in ds:
33 |             val = ds[k].value
34 | 
35 |         summary_dict[k] = val
36 | 
37 |     pat_id = "UNKNOWN"
38 |     if "PatientID" in ds:
39 |         pat_id = ds.PatientID
40 | 
41 |     df_this_summary = pd.DataFrame([summary_dict])
42 | 
43 |     quaran_dir = Path(working_directory).joinpath("quarantine")
44 |     file_dir = quaran_dir.joinpath(pat_id, file.parent.name)
45 |     summary_file = quaran_dir.joinpath("summary.csv")
46 | 
47 |     df_summary = None
48 |     if summary_file.exists():
49 |         df_summary = pd.read_csv(summary_file, index_col=0)
50 |         df_summary = pd.concat([df_summary, df_this_summary], ignore_index=True)
51 |     else:
52 |         df_summary = df_this_summary
53 | 
54 |     # Create "quarantine/PATH_TO_DCM" directory
55 |     file_dir.mkdir(exist_ok=True, parents=True)
56 | 
57 |     # Copy original DCM file to quarantine area
58 |     shutil.copyfile(file, file_dir.joinpath(file.name))
59 | 
60 |     # Create (if doesn't exist) summary file to hold info about file error
61 |     df_summary.to_csv(summary_file)
62 | 
63 | 
64 | def read_quarantined_data(working_directory: Path) -> pd.DataFrame:
65 |     """A function to read the data from the quarantine summary.
66 | 
67 |     Args:
68 |         working_directory (pathlib.Path): The PyDicer working directory
69 | 
70 |     Returns:
71 |         pd.DataFrame: A DataFrame summarising the contents of the quarantine.
72 |     """
73 | 
74 |     quarantine_dir = Path(working_directory).joinpath("quarantine")
75 | 
76 |     summary_file = quarantine_dir.joinpath("summary.csv")
77 | 
78 |     df_summary = pd.read_csv(summary_file, index_col=0)
79 | 
80 |     return df_summary
81 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing
 2 | 
 3 | PyDicer welcomes any and all contributions in the way of new functionality, bug fixes or documentation. This document provides some guidance to developers who would like to contribute to the project.
 4 | 
 5 | ## Git
 6 | 
 7 | Create a branch off of **main** while you make your changes or implement your new tool.
 8 | Once complete, head to  [GitHub to create a pull
 9 | request](https://github.com/australiancancerdatanetwork/pydicer/compare) to merge your changes
10 | into the **main** branch. At this point the automated tests will run and maintainers will review
11 | your submission before merging.
12 | 
13 | ## Poetry
14 | 
15 | PyDicer uses poetry to manage dependencies. Instructions for installing poetry are available
16 | [here](https://python-poetry.org/docs/#installation). Once installed, you can easily install the
17 | libraries required to develop for PyDicer using the following command:
18 | 
19 | ```bash
20 | poetry install --with dev,docs --all-extras
21 | ```
22 | 
23 | This will automatically create a virtual environment managed by poetry. To run a script within this
24 | environment, use the `poetry run` followed by what to run. For example, to run a test.py script:
25 | 
26 | ```bash
27 | poetry run python test.py
28 | ```
29 | 
30 | ## VSC Devcontainer
31 | 
32 | You may setup a Visual Studio Code development container (Devcontainer) to ensure a standardised
33 | development and testing environment, without the need to perform overhead installation. This
34 | assumes that Docker and VSC are installed on your system.
35 | 
36 | To set this up, you may perform the VSC shortcut `ctrl + shift + p` (or `cmd + shift p` on Mac) and
37 | select the `Reopen in devcontainer` option. This will create a Docker container with Python 3.9
38 | and its dependencies installed, along with other tools we use for development (eg. git, pytest).
39 | 
40 | ## Coding standards
41 | 
42 | Code in PyDicer must conform to Python's PEP-8 standards to ensure consistent formatting between contributors. To ensure this, pylint is used to check code conforms to these standards before a Pull Request can be merged. You can run pylint from the command line using the following command:
43 | 
44 | ```bash
45 | pylint pydicer
46 | ```
47 | 
48 | But a better idea is to ensure you are using a Python IDE which supports linting (such as [VSCode](https://code.visualstudio.com/docs/python/linting) or PyCharm). Make sure you resolve all suggestions from pylint before submitting your pull request.
49 | 
50 | If you're new to using pylint, you may like to [read this guide](https://docs.pylint.org/en/v2.11.1/tutorial.html).
51 | 
52 | ## Automated tests
53 | 
54 | A test suite is included in PyDicer which ensures that code contributed to the repository functions as expected and continues to function as further development takes place. Any code submitted via a pull request should include appropriate automated tests for the new code.
55 | 
56 | pytest is used as a testing library. Running the tests from the command line is really easy:
57 | 
58 | ```bash
59 | pytest
60 | ```
61 | 
62 | Add your tests to the appropriate file in the `tests/` directory. See the [pytest documention](https://docs.pytest.org/en/6.2.x/getting-started.html) for more information.
63 | 


--------------------------------------------------------------------------------
/pydicer/cli/contants.py:
--------------------------------------------------------------------------------
 1 | def get_sub_help_mesg(input_commands, command):
 2 |     # pylint: disable=missing-function-docstring
 3 | 
 4 |     help_mesg = f"""Subcommand of the following: {input_commands}
 5 | 
 6 |         test WORKING_DIRECTORY_PATH
 7 | 
 8 |         Runs the command using the default test data. Check pydicer.input.test for more info
 9 | 
10 |             - WORKING_DIRECTORY_PATH: The working directory in which to
11 |                 store the data fetched. Defaults to a temp directory.
12 |         
13 |             Example usage:
14 |                 python -m pydicer.cli.run input|pipeline --type test cli_test
15 | 
16 |         pacs WORKING_DIRECTORY_PATH HOST_IP PORT AE_TITLE MODALITY [PATIENT_IDs]
17 | 
18 |         Runs the command by querying a DIOCM PACS server and storing the data on locally on the
19 |         filesystem
20 | 
21 |             - WORKING_DIRECTORY_PATH: The working directory in which to
22 |                 store the data fetched. Defaults to a temp directory.
23 |             - HOST_IP (optional): The IP address of host name of DICOM PACS. Defaults to 
24 |                 'www.dicomserver.co.uk'.
25 |             - PORT (optional): The port to use to communicate on. Defaults to 11112.
26 |             - AE_TITLE (optional): AE Title to provide the DICOM service. Defaults to 
27 |             None.
28 |             - MODALITY (optional): The modality to retrieve DICOMs for. Defaults 
29 |                 to 'GM'.
30 |             - PATIENT_IDs (required): a string-list of patient IDs (IDs seperated by spaces)
31 |                 to retrieve the DICOMs for.
32 |         
33 |             Example usage:
34 |                 python -m pydicer.cli.run input|pipeline --type pacs www.dicomserver.co.uk 11112
35 |                     DCMQUERY cli_test GM PAT004 PAT005
36 |         
37 |         
38 |         web WORKING_DIRECTORY_PATH DATA_URL
39 | 
40 |         Runs the command by downloading data from a provided URL and storing it locally on the
41 |         filesystem
42 | 
43 |         - WORKING_DIRECTORY_PATH: The working directory in which to 
44 |                 store the data fetched. Defaults to a temp directory.
45 |         - DATA_URL: URL of the dataset to be downloaded from the internet
46 |         
47 |             Example usage:
48 |                 python -m pydicer.cli.run input|pipeline --type web 
49 |                 https://zenodo.org/record/5276878/files/HNSCC.zip cli_test
50 |         """
51 |     if command == "pipeline":
52 |         help_mesg += """
53 | 
54 |         filesystem WORKING_DIRECTORY_PATH
55 | 
56 |         Runs the pipeline using a filesystem working directory which contains DICOM images as input
57 | 
58 |             - WORKING_DIRECTORY_PATH: The working directory in which to
59 |                 store the data fetched. Defaults to a temp directory.
60 |         
61 |             Example usage:
62 |                 python -m pydicer.cli.run pipeline --type filesystem cli_test
63 |         
64 | 
65 |         e2e
66 | 
67 |         Runs the entire pipeline using the default settings. Check pydicer.pipeline for more info
68 | 
69 |             Example usage:
70 |                 python -m pydicer.cli.run pipeline
71 |                 or
72 |                 python -m pydicer.cli.run pipeline --type e2e
73 | 
74 |         """
75 | 
76 |     return help_mesg
77 | 


--------------------------------------------------------------------------------
/.github/workflows/pull-request.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
 3 | 
 4 | # Pipeline that checks branches that have been pushed to "Main" OR are the source branch in a newly created pull request into "Main"
 5 | # Fails the test if there are Python syntax errors or undefined names OR pytest fails
 6 | 
 7 | name: Pydicer Pytest and Pylint Validaiton
 8 | 
 9 | on:
10 |   push:
11 |     branches: [main]
12 |   pull_request:
13 |     branches: [main]
14 | 
15 | jobs:
16 |   build:
17 |     runs-on: ubuntu-latest
18 |     strategy:
19 |       fail-fast: false
20 |       matrix:
21 |         python-version: ["3.8", "3.9"]
22 |         poetry-version: [1.7.1]
23 |         # os: [ubuntu-20.04, macos-latest, windows-latest]
24 |     steps:
25 |       - uses: actions/checkout@v2
26 |       - name: Set up Python ${{ matrix.python-version }}
27 |         uses: actions/setup-python@v2
28 |         with:
29 |           python-version: ${{ matrix.python-version }}
30 |       - name: Install Poetry
31 |         uses: abatilo/actions-poetry@v2.0.0
32 |         with:
33 |           poetry-version: ${{ matrix.poetry-version }}
34 |       - name: Install Python modules with poetry
35 |         run: |
36 |           poetry run pip install --upgrade pip
37 |           poetry install
38 |           poetry run pip install TotalSegmentator
39 |       - name: Lint with Pylint
40 |         run: |
41 |           poetry run pylint pydicer
42 |       - name: MyPy type checking
43 |         run: |
44 |           # TODO poetry run mypy
45 |           echo "Skipping MyPy type checking..."
46 |       - name: Conditional Pytest coverage
47 |         run: |
48 |           if [[ "${{ matrix.python-version }}" == "3.9" ]]; then
49 |             echo "Running Pytest with coverage..."
50 |             poetry run pip install pytest-cov coverage
51 |             # Omit CLI from coverage report since it's not fully developed
52 |             poetry run pytest --cov=pydicer --cov-report=xml --cov-config=.coveragerc
53 |             poetry run coverage report --fail-under=70 # Fail if coverage is less than 70%
54 |           else
55 |             echo "Running Pytest without coverage..."
56 |             poetry run pytest
57 |           fi
58 |       # Commit the coverage badge back to repo (only on main branch & for a specific Python version)
59 |       - name: Generate and commit coverage badge
60 |         if: github.ref == 'refs/heads/main' && matrix.python-version == '3.9'
61 |         run: |
62 |           poetry run pip install coverage-badge # These only work with python >=3.9
63 |           # Generate an SVG coverage badge
64 |           # poetry run coverage-badge -o coverage.svg
65 | 
66 |           # # Configure git
67 |           # git config user.name "github-actions"
68 |           # git config user.email "github-actions@github.com"
69 | 
70 |           # # Pull latest changes to avoid conflicts
71 |           # git pull --rebase
72 | 
73 |           # # Stage and commit coverage.svg
74 |           # git add coverage.svg
75 |           # git commit -m "Update coverage badge" || echo "No changes to commit"
76 | 
77 |           # # Push commit
78 |           # git push
79 |         env:
80 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
81 | 


--------------------------------------------------------------------------------
/pydicer/cli/run.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Command Line Interface tool to run pydicer pipeline or specific modules on their own
  3 | 
  4 | usage: python -m pydicer.cli.run ['pipeline'|'input']
  5 | 
  6 | pydicer CLI (Command Line Interface)
  7 | 
  8 | positional arguments:
  9 |   command     One of the following COMMANDS: ['pipeline'|'input']
 10 | 
 11 | optional arguments:
 12 |   -h, --help  show help message
 13 | """
 14 | import argparse
 15 | from argparse import RawTextHelpFormatter
 16 | import sys
 17 | 
 18 | from pydicer.cli.contants import get_sub_help_mesg
 19 | from pydicer.cli.input import testinput_cli, pacs_cli, tcia_cli, web_cli, run_pipeline
 20 | 
 21 | # Sub command types for the Input command
 22 | INPUT_TOOLS = {
 23 |     "test": testinput_cli,
 24 |     "pacs": pacs_cli,
 25 |     "tcia": tcia_cli,
 26 |     "web": web_cli,
 27 | }
 28 | 
 29 | PIPELINE_TOOLS = {
 30 |     # "e2e": run_test, TODO This broke due to some changes. Either we need to fix or remove.
 31 |     "filesystem": run_pipeline,
 32 |     "test": run_pipeline,
 33 |     "pacs": run_pipeline,
 34 |     "tcia": run_pipeline,
 35 |     "web": run_pipeline,
 36 | }
 37 | 
 38 | 
 39 | def parse_sub_input(command):
 40 |     """function to parse the input command args"""
 41 |     parse_sub_command(command, "Run the Input module only", INPUT_TOOLS, "test", INPUT_COMMANDS)
 42 | 
 43 | 
 44 | def parse_sub_pipeline(command):
 45 |     """function to parse the pipeline command args"""
 46 |     parse_sub_command(
 47 |         command,
 48 |         "Run the pipeline with a specific input method",
 49 |         PIPELINE_TOOLS,
 50 |         "e2e",
 51 |         PIPELINE_COMMANDS,
 52 |     )
 53 | 
 54 | 
 55 | INPUT_COMMANDS = str(list(INPUT_TOOLS.keys())).replace(", ", "|")
 56 | PIPELINE_COMMANDS = str(list(PIPELINE_TOOLS.keys())).replace(", ", "|")
 57 | MODULES = {"pipeline": parse_sub_pipeline, "input": parse_sub_input}
 58 | COMMANDS = str(list(MODULES.keys())).replace(", ", "|")
 59 | 
 60 | 
 61 | def parse_sub_command(command, desc, tools, default_choice, help_commands):
 62 |     """Generic function to take in dynamic input and trigger the respective sub commands
 63 | 
 64 |     Args:
 65 |         desc (str): help description of what the sub command does
 66 |         tools (dict): dictionary of which sub command type can be run
 67 |         default_choice (str): default sub command type that will be run in the case no input is
 68 |         received from the user
 69 |     """
 70 |     parser = argparse.ArgumentParser(description=desc, formatter_class=RawTextHelpFormatter)
 71 |     parser.add_argument(
 72 |         "--type",
 73 |         help=get_sub_help_mesg(help_commands, command),
 74 |         default=default_choice,
 75 |         choices=tools,
 76 |     )
 77 | 
 78 |     args = parser.parse_args(sys.argv[2:4])
 79 |     if command == "pipeline":
 80 |         tools[args.type](args.type, *sys.argv[4:])
 81 |     else:
 82 |         tools[args.type](*sys.argv[4:])
 83 | 
 84 | 
 85 | def pydicer_cli():
 86 |     """
 87 |     Trigger pydicer CLI
 88 |     """
 89 | 
 90 |     parser = argparse.ArgumentParser(
 91 |         description="pydicer CLI (Command Line Interface)",
 92 |         usage=f"python -m pydicer.cli.run {COMMANDS}",
 93 |     )
 94 | 
 95 |     # Default to "pipeline" option without input
 96 |     parser.add_argument(
 97 |         "command",
 98 |         help=f"One of the following COMMANDS: {COMMANDS}",
 99 |     )
100 | 
101 |     args = parser.parse_args(sys.argv[1:2])
102 |     MODULES[args.command](sys.argv[1])
103 | 
104 | 
105 | if __name__ == "__main__":
106 |     pydicer_cli()
107 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | # Configuration file for the Sphinx documentation builder.
  2 | #
  3 | # This file only contains a selection of the most common options. For a full
  4 | # list see the documentation:
  5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
  6 | 
  7 | # -- Path setup --------------------------------------------------------------
  8 | 
  9 | # If extensions (or modules to document with autodoc) are in another directory,
 10 | # add these directories to sys.path here. If the directory is relative to the
 11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 12 | #
 13 | # import os
 14 | # import sys
 15 | # sys.path.insert(0, os.path.abspath('.'))
 16 | import os
 17 | import sys
 18 | import shutil
 19 | import datetime
 20 | from pathlib import Path
 21 | 
 22 | sys.path.insert(0, os.path.abspath(".."))
 23 | 
 24 | # -- Project information -----------------------------------------------------
 25 | 
 26 | project = "PyDicer"
 27 | year = datetime.datetime.now().year
 28 | copyright = f"{year}, Ingham Medical Physics"
 29 | author = "Ingham Medical Physics"
 30 | 
 31 | 
 32 | # -- General configuration ---------------------------------------------------
 33 | 
 34 | # Add any Sphinx extension module names here, as strings. They can be
 35 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 36 | # ones.
 37 | extensions = [
 38 |     "sphinx.ext.autodoc",
 39 |     "sphinx.ext.coverage",
 40 |     "sphinx.ext.napoleon",
 41 |     "nbsphinx",
 42 |     "m2r2",
 43 | ]
 44 | 
 45 | # Add any paths that contain templates here, relative to this directory.
 46 | templates_path = ["_templates"]
 47 | 
 48 | # List of patterns, relative to source directory, that match files and
 49 | # directories to ignore when looking for source files.
 50 | # This pattern also affects html_static_path and html_extra_path.
 51 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", "site"]
 52 | 
 53 | 
 54 | # -- Options for HTML output -------------------------------------------------
 55 | 
 56 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 57 | # a list of builtin themes.
 58 | #
 59 | html_theme = "furo"
 60 | 
 61 | 
 62 | def setup(app):
 63 |     app.add_css_file("custom.css")
 64 | 
 65 | 
 66 | # Add any paths that contain custom static files (such as style sheets) here,
 67 | # relative to this directory. They are copied after the builtin static files,
 68 | # so a file named "default.css" will overwrite the builtin "default.css".
 69 | html_static_path = ["_static"]
 70 | 
 71 | html_show_sphinx = False
 72 | 
 73 | # Copy in the files from the other repository directories to have them
 74 | # be rendered by Sphinx
 75 | examples_path = Path("_examples")
 76 | for notebook_path in examples_path.glob("*.ipynb"):
 77 |     os.remove(notebook_path)
 78 | 
 79 | examples_path.mkdir(exist_ok=True)
 80 | shutil.copy("../examples/GettingStarted.ipynb", "_examples/GettingStarted.ipynb")
 81 | shutil.copy("../examples/ConvertingData.ipynb", "_examples/ConvertingData.ipynb")
 82 | shutil.copy("../examples/VisualiseData.ipynb", "_examples/VisualiseData.ipynb")
 83 | shutil.copy("../examples/DoseMetrics.ipynb", "_examples/DoseMetrics.ipynb")
 84 | shutil.copy("../examples/Radiomics.ipynb", "_examples/Radiomics.ipynb")
 85 | shutil.copy(
 86 |     "../examples/DatasetPreparation.ipynb", "_examples/DatasetPreparation.ipynb"
 87 | )
 88 | shutil.copy("../examples/WorkingWithData.ipynb", "_examples/WorkingWithData.ipynb")
 89 | shutil.copy(
 90 |     "../examples/WorkingWithStructures.ipynb", "_examples/WorkingWithStructures.ipynb"
 91 | )
 92 | shutil.copy("../examples/Configuration.ipynb", "_examples/Configuration.ipynb")
 93 | shutil.copy("../examples/ObjectGeneration.ipynb", "_examples/ObjectGeneration.ipynb")
 94 | shutil.copy("../examples/AutoSegmentation.ipynb", "_examples/AutoSegmentation.ipynb")
 95 | shutil.copy("../examples/nnUNet.ipynb", "_examples/nnUNet.ipynb")
 96 | 
 97 | shutil.rmtree("site/assets", ignore_errors=True)
 98 | os.makedirs("site", exist_ok=True)
 99 | shutil.copytree("../assets", "site/assets")
100 | 


--------------------------------------------------------------------------------
/tests/test_pipeline.py:
--------------------------------------------------------------------------------
  1 | # pylint: disable=redefined-outer-name,missing-function-docstring
  2 | 
  3 | import tempfile
  4 | from pathlib import Path
  5 | import numpy as np
  6 | 
  7 | import pytest
  8 | 
  9 | from pydicer import PyDicer
 10 | from pydicer.input.test import TestInput
 11 | 
 12 | 
 13 | @pytest.fixture
 14 | def test_data():
 15 |     """Fixture to grab the test data"""
 16 | 
 17 |     directory = Path("./testdata")
 18 |     directory.mkdir(exist_ok=True, parents=True)
 19 | 
 20 |     dicom_directory = directory.joinpath("dicom")
 21 |     dicom_directory.mkdir(exist_ok=True, parents=True)
 22 | 
 23 |     test_input = TestInput(dicom_directory)
 24 |     test_input.fetch_data()
 25 | 
 26 |     return directory
 27 | 
 28 | 
 29 | def test_pipeline(test_data):
 30 |     """End-to-end test of the entire pipeline"""
 31 | 
 32 |     with tempfile.TemporaryDirectory() as directory:
 33 | 
 34 |         directory = Path(directory)
 35 | 
 36 |         dicom_directory = directory.joinpath("dicom")
 37 |         dicom_directory.symlink_to(test_data.absolute(), target_is_directory=True)
 38 | 
 39 |         pydicer = PyDicer(directory)
 40 |         pydicer.add_input(dicom_directory)
 41 | 
 42 |         # Preprocess the data fetch to prepare it for conversion
 43 |         pydicer.preprocess()
 44 | 
 45 |         # Convert the data into the output directory
 46 |         pydicer.convert.convert(patient="HNSCC-01-0199")
 47 | 
 48 |         # Visualise the converted data
 49 |         pydicer.visualise.visualise(patient="HNSCC-01-0199")
 50 | 
 51 |         # Dataset selection and preparation
 52 |         pydicer.dataset.prepare("clean", "rt_latest_dose")
 53 | 
 54 |         # Analysis computing Radiomics and DVH
 55 |         pydicer.analyse.compute_radiomics("clean")
 56 |         df_rad = pydicer.analyse.get_all_computed_radiomics_for_dataset()
 57 | 
 58 |         # Do some spot checks on the radiomics computed for the dataset to confirm the end-to-end
 59 |         # test worked
 60 |         assert np.isclose(
 61 |             (
 62 |                 df_rad.loc[
 63 |                     (df_rad.Contour == "Cord") & (df_rad.Patient == "HNSCC-01-0199"),
 64 |                     "firstorder|Energy",
 65 |                 ].iloc[0]
 66 |             ),
 67 |             18025962.0,
 68 |         )
 69 | 
 70 |         assert np.isclose(
 71 |             (
 72 |                 df_rad.loc[
 73 |                     (df_rad.Contour == "post_neck") & (df_rad.Patient == "HNSCC-01-0199"),
 74 |                     "firstorder|Median",
 75 |                 ].iloc[0]
 76 |             ),
 77 |             45.0,
 78 |         )
 79 | 
 80 |         assert np.isclose(
 81 |             (
 82 |                 df_rad.loc[
 83 |                     (df_rad.Contour == "PTV_63_Gy") & (df_rad.Patient == "HNSCC-01-0199"),
 84 |                     "firstorder|Skewness",
 85 |                 ].iloc[0]
 86 |             ),
 87 |             -0.0053863391917069,
 88 |         )
 89 | 
 90 |         pydicer.analyse.compute_dvh()
 91 |         df_dose_metrics = pydicer.analyse.compute_dose_metrics(
 92 |             d_point=[50, 95, 99], v_point=[1, 10], d_cc_point=[1, 5, 10]
 93 |         )
 94 | 
 95 |         assert np.isclose(
 96 |             (df_dose_metrics.loc[df_dose_metrics.label == "Brainstem", "V10"].iloc[0]),
 97 |             29.68311309814453,
 98 |         )
 99 | 
100 |         assert np.isclose(
101 |             (df_dose_metrics.loc[df_dose_metrics.label == "PTV_57_Gy", "cc"].iloc[0]),
102 |             145.16115188598633,
103 |         )
104 | 
105 |         assert np.isclose(
106 |             (df_dose_metrics.loc[df_dose_metrics.label == "Lt_Parotid", "D95"].iloc[0]),
107 |             8.310638297872341,
108 |         )
109 | 
110 |         assert np.isclose(
111 |             (df_dose_metrics.loc[df_dose_metrics.label == "GTV", "D99"].iloc[0]),
112 |             70.23906832298137,
113 |         )
114 | 
115 |         assert np.isclose(
116 |             (df_dose_metrics.loc[df_dose_metrics.label == "Rt_Parotid", "D5cc"].iloc[0]),
117 |             70.45179733333333,
118 |         )
119 | 


--------------------------------------------------------------------------------
/tests/test_compare.py:
--------------------------------------------------------------------------------
 1 | # pylint: disable=redefined-outer-name,missing-function-docstring
 2 | 
 3 | import tempfile
 4 | 
 5 | from pathlib import Path
 6 | import pandas as pd
 7 | 
 8 | from pydicer import PyDicer
 9 | from pydicer.analyse.compare import (
10 |     compute_contour_similarity_metrics,
11 |     get_all_similarity_metrics_for_dataset,
12 |     prepare_similarity_metric_analysis,
13 | )
14 | from pydicer.utils import read_converted_data
15 | 
16 | 
17 | def test_compare_auto_segmentations(test_data_autoseg):
18 |     working_directory = test_data_autoseg
19 |     df = read_converted_data(working_directory=working_directory)
20 | 
21 |     # We'll test this by comparing the structures against themselves,
22 |     # hence we expect perfect metrics
23 |     df_target = df[df.modality == "RTSTRUCT"]
24 |     df_reference = df[df.modality == "RTSTRUCT"]
25 | 
26 |     PyDicer(working_directory)
27 |     segment_id = "test_seg"
28 |     compute_contour_similarity_metrics(df_target, df_reference, segment_id)
29 | 
30 |     df_stats = get_all_similarity_metrics_for_dataset(working_directory)
31 | 
32 |     assert len(df_stats) == 200
33 | 
34 |     df_dsc = df_stats[df_stats["metric"] == "DSC"]
35 |     assert df_dsc.value.min() == 1.0
36 |     assert df_dsc.value.max() == 1.0
37 | 
38 | 
39 | def test_compaare_metrics_analysis(test_data_autoseg):
40 |     working_directory = test_data_autoseg
41 |     df = read_converted_data(working_directory=working_directory)
42 | 
43 |     # We'll test this by comparing the structures against themselves,
44 |     # hence we expect perfect metrics
45 |     df_target = df[df.modality == "RTSTRUCT"]
46 |     df_reference = df[df.modality == "RTSTRUCT"]
47 | 
48 |     PyDicer(working_directory)
49 |     segment_id = "test_seg"
50 |     compute_contour_similarity_metrics(df_target, df_reference, segment_id)
51 | 
52 |     with tempfile.TemporaryDirectory() as analysis_dir:
53 |         analysis_dir = Path(analysis_dir)
54 | 
55 |         prepare_similarity_metric_analysis(
56 |             working_directory=working_directory,
57 |             analysis_output_directory=analysis_dir,
58 |             segment_id=segment_id,
59 |         )
60 | 
61 |         # Check that the output files exist
62 |         raw_metrics_file = analysis_dir.joinpath("raw_test_seg_default.csv")
63 |         assert raw_metrics_file.exists()
64 |         stats_metrics_file = analysis_dir.joinpath("stats_test_seg_default.csv")
65 |         assert stats_metrics_file.exists()
66 |         plot_dsc_file = analysis_dir.joinpath("plot_DSC_test_seg_default.png")
67 |         assert plot_dsc_file.exists()
68 |         plot_hd_file = analysis_dir.joinpath("plot_hausdorffDistance_test_seg_default.png")
69 |         assert plot_hd_file.exists()
70 |         plot_msd_file = analysis_dir.joinpath("plot_meanSurfaceDistance_test_seg_default.png")
71 |         assert plot_msd_file.exists()
72 |         plot_sdsc_file = analysis_dir.joinpath("plot_surfaceDSC_test_seg_default.png")
73 |         assert plot_sdsc_file.exists()
74 | 
75 |         # Read in the raw metrics file and do some checks
76 |         df_raw = pd.read_csv(raw_metrics_file, index_col=0)
77 |         assert len(df_raw) == 200
78 | 
79 |         # Since these structures compared against themselves, expect perfect metrics
80 |         assert df_raw[df_raw.metric == "DSC"].value.min() == 1.0
81 |         assert df_raw[df_raw.metric == "surfaceDSC"].value.min() == 1.0
82 |         assert df_raw[df_raw.metric == "hausdorffDistance"].value.max() == 0.0
83 |         assert df_raw[df_raw.metric == "meanSurfaceDistance"].value.max() == 0.0
84 | 
85 |         # Read in the stats metrics file and do some checks
86 |         df_stats = pd.read_csv(stats_metrics_file, index_col=0)
87 |         assert len(df_stats) == 36
88 | 
89 |         # Check one fo the rows
90 |         row_check = df_stats[
91 |             (df_stats.structure == "Esophagus") & (df_stats.metric == "surfaceDSC")
92 |         ].iloc[0]
93 |         assert row_check["mean"] == 1.0
94 |         assert row_check["std"] == 0.0
95 |         assert row_check["max"] == 1.0
96 |         assert row_check["min"] == 1.0
97 |         assert row_check["count"] == 10
98 | 


--------------------------------------------------------------------------------
/tests/test_quarantine.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from pathlib import Path
  3 | import pytest
  4 | import shutil
  5 | import tempfile
  6 | 
  7 | import numpy as np
  8 | import pandas as pd
  9 | 
 10 | from pydicer.quarantine import copy_file_to_quarantine, read_quarantined_data
 11 | 
 12 | 
 13 | def test_copy_file_to_quarantine():
 14 |     """Test that copy_file_to_quarantine correctly copies the file to quarantine and
 15 |     writes the summary entry.
 16 |     """
 17 |     with tempfile.TemporaryDirectory() as tmpdir:
 18 |         tmpdir_path = Path(tmpdir)
 19 | 
 20 |         # Create a dummy DICOM file
 21 |         dummy_file = tmpdir_path / "dummy.dcm"
 22 |         dummy_file.write_text("Some dummy content")
 23 | 
 24 |         # Invoke the function
 25 |         copy_file_to_quarantine(dummy_file, tmpdir_path, "Test error message")
 26 | 
 27 |         quarantine_dir = tmpdir_path / "quarantine"
 28 |         summary_file = quarantine_dir / "summary.csv"
 29 | 
 30 |         # Assert quarantine directory was created
 31 |         assert quarantine_dir.is_dir(), "Quarantine directory was not created."
 32 | 
 33 |         # Assert summary file was created
 34 |         assert summary_file.exists(), "Summary CSV file was not created."
 35 | 
 36 |         # Read the summary CSV
 37 |         df_summary = pd.read_csv(summary_file, index_col=0)
 38 | 
 39 |         # Check that exactly one entry is in the summary
 40 |         assert len(
 41 |             df_summary) == 1, "There should be exactly one entry in the summary."
 42 | 
 43 |         # Check the summary row
 44 |         row = df_summary.iloc[0]
 45 |         assert row["error"] == "Test error message"
 46 |         assert "file" in row, "'file' column is missing in the summary DataFrame."
 47 |         assert "PatientID" in row, "'PatientID' column is missing in the summary DataFrame."
 48 | 
 49 |         # Because this is not a valid DICOM, the code defaults PatientID to UNKNOWN
 50 |         assert pd.isna(row["PatientID"])
 51 | 
 52 |         # The quarantined file is placed under: quarantine_dir / "UNKNOWN" / <parent_folder> / dummy.dcm
 53 |         quarantined_file_path = quarantine_dir.joinpath(
 54 |             "UNKNOWN", dummy_file.parent.name, dummy_file.name)
 55 |         assert quarantined_file_path.exists(
 56 |         ), "Quarantined file was not copied to the correct location."
 57 | 
 58 | 
 59 | def test_read_quarantined_data():
 60 |     """Test that read_quarantined_data reads data from an existing quarantine summary CSV."""
 61 |     with tempfile.TemporaryDirectory() as tmpdir:
 62 |         tmpdir_path = Path(tmpdir)
 63 |         quarantine_dir = tmpdir_path / "quarantine"
 64 |         quarantine_dir.mkdir(exist_ok=True)
 65 | 
 66 |         # Create a fake summary CSV
 67 |         summary_file = quarantine_dir / "summary.csv"
 68 |         df_expected = pd.DataFrame([
 69 |             {
 70 |                 "file": "somefile.dcm",
 71 |                 "error": "some_error",
 72 |                 "quarantine_dttm": "2024-01-01 00:00:00",
 73 |                 "PatientID": "UNKNOWN",
 74 |                 "Modality": None,
 75 |                 "SOPInstanceUID": None,
 76 |                 "SeriesDescription": None,
 77 |             }
 78 |         ])
 79 |         df_expected.to_csv(summary_file)
 80 | 
 81 |         # Use the function to read it
 82 |         df_summary = read_quarantined_data(tmpdir_path)
 83 | 
 84 |         # Assert the DataFrame matches our expectations
 85 |         assert len(df_summary) == 1, "Expected one record in the summary."
 86 |         row = df_summary.iloc[0]
 87 |         assert row["file"] == "somefile.dcm"
 88 |         assert row["error"] == "some_error"
 89 |         assert row["PatientID"] == "UNKNOWN"
 90 | 
 91 | 
 92 | def test_read_quarantined_data_no_summary():
 93 |     """Test that reading quarantine data raises an error or fails gracefully if summary.csv is missing."""
 94 |     with tempfile.TemporaryDirectory() as tmpdir:
 95 |         tmpdir_path = Path(tmpdir)
 96 |         quarantine_dir = tmpdir_path / "quarantine"
 97 |         quarantine_dir.mkdir(exist_ok=True)
 98 | 
 99 |         # summary.csv does NOT exist here
100 |         with pytest.raises(FileNotFoundError):
101 |             read_quarantined_data(tmpdir_path)
102 | 


--------------------------------------------------------------------------------
/pydicer/convert/rtstruct.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | from pathlib import Path
  3 | import pydicom
  4 | import SimpleITK as sitk
  5 | import matplotlib
  6 | 
  7 | from platipy.dicom.io.rtstruct_to_nifti import transform_point_set_from_dicom_struct
  8 | from platipy.imaging.utils.io import write_nrrd_structure_set
  9 | 
 10 | logger = logging.getLogger(__name__)
 11 | 
 12 | 
 13 | def convert_rtstruct(
 14 |     dcm_img,
 15 |     dcm_rt_file,
 16 |     prefix="Struct_",
 17 |     output_dir=".",
 18 |     output_img=None,
 19 |     spacing=None,
 20 | ):
 21 |     """Convert a DICOM RTSTRUCT to NIFTI masks.
 22 | 
 23 |     The masks are stored as NIFTI files in the output directory
 24 | 
 25 |     Args:
 26 |         dcm_img (list|SimpleITK.Image): List of DICOM paths (as str) to use as the reference image
 27 |             series or a SimpleITK image of the already converted image.
 28 |         dcm_rt_file (str|pathlib.Path): Path to the DICOM RTSTRUCT file
 29 |         prefix (str, optional): The prefix to give the output files. Defaults to "Struct" +
 30 |             underscore.
 31 |         output_dir (str|pathlib.Path, optional): Path to the output directory. Defaults to ".".
 32 |         output_img (str|pathlib.Path, optional): If set, write the reference image to this file as
 33 |             in NIFTI format. Defaults to None.
 34 |         spacing (list, optional): Values of image spacing to override. Defaults to None.
 35 |     """
 36 | 
 37 |     logger.debug("Converting RTStruct: %s", dcm_rt_file)
 38 |     logger.debug("Output file prefix: %s", prefix)
 39 | 
 40 |     if isinstance(dcm_img, list):
 41 |         dicom_image = sitk.ReadImage(dcm_img)
 42 |     elif isinstance(dcm_img, sitk.Image):
 43 |         dicom_image = dcm_img
 44 |     else:
 45 |         raise ValueError("dcm_img must be list or SimpleITK.Image")
 46 | 
 47 |     dicom_struct = pydicom.read_file(dcm_rt_file, force=True)
 48 | 
 49 |     if not isinstance(output_dir, Path):
 50 |         output_dir = Path(output_dir)
 51 | 
 52 |     if output_dir.exists():
 53 |         output_dir.mkdir(exist_ok=True, parents=True)
 54 | 
 55 |     image_output_path = None
 56 |     if output_img is not None:
 57 |         if not isinstance(output_img, Path):
 58 |             if not output_img.endswith(".nii.gz"):
 59 |                 output_img = f"{output_img}.nii.gz"
 60 |             output_img = output_dir.joinpath(output_img)
 61 | 
 62 |         image_output_path = output_img
 63 |         logger.debug("Image series to be converted to: %s", image_output_path)
 64 | 
 65 |     if spacing:
 66 |         if isinstance(spacing, str):
 67 |             spacing = [float(i) for i in spacing.split(",")]
 68 |         logger.debug("Overriding image spacing with: %s", spacing)
 69 | 
 70 |     struct_list, struct_name_sequence = transform_point_set_from_dicom_struct(
 71 |         dicom_image, dicom_struct, spacing
 72 |     )
 73 | 
 74 |     for struct_index, struct_image in enumerate(struct_list):
 75 |         out_name = f"{prefix}{struct_name_sequence[struct_index]}.nii.gz"
 76 |         out_name = output_dir.joinpath(out_name)
 77 |         logger.debug("Writing file to: %s", out_name)
 78 |         sitk.WriteImage(struct_image, str(out_name))
 79 | 
 80 |     if image_output_path is not None:
 81 |         sitk.WriteImage(dicom_image, str(image_output_path))
 82 | 
 83 | 
 84 | def write_nrrd_from_mask_directory(
 85 |     mask_directory, output_file, colormap=matplotlib.colormaps.get_cmap("rainbow")
 86 | ):
 87 |     """Produce a NRRD file from a directory of masks in Nifti format
 88 | 
 89 |     Args:
 90 |         mask_directory (pathlib.Path|str): Path object of directory containing masks
 91 |         output_file (pathlib.Path|str): The output NRRD file to write to.
 92 |         color_map (matplotlib.colors.Colormap | dict, optional): Colormap to use for output.
 93 |             Defaults to matplotlib.colormaps.get_cmap("rainbow").
 94 |     """
 95 | 
 96 |     if isinstance(mask_directory, str):
 97 |         mask_directory = Path(mask_directory)
 98 | 
 99 |     masks = {
100 |         p.name.replace(".nii.gz", ""): sitk.ReadImage(str(p))
101 |         for p in mask_directory.glob("*.nii.gz")
102 |     }
103 | 
104 |     write_nrrd_structure_set(masks, output_file=output_file, colormap=colormap)
105 |     logger.debug("Writing NRRD Structure Set to: %s", output_file)
106 | 


--------------------------------------------------------------------------------
/pydicer/input/pacs.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from pathlib import Path
  3 | from typing import Union
  4 | 
  5 | import pydicom
  6 | 
  7 | from platipy.dicom.communication.connector import DicomConnector
  8 | 
  9 | from pydicer.input.base import InputBase
 10 | 
 11 | 
 12 | class DICOMPACSInput(InputBase):
 13 |     def __init__(
 14 |         self,
 15 |         host: str,
 16 |         port: str,
 17 |         ae_title: str = None,
 18 |         working_directory: Union[str, Path] = None,
 19 |     ):
 20 |         """Class for fetching files from DICOM PACS. Currently only supports C-GET commands to
 21 |         fetch the data.
 22 | 
 23 |         Args:
 24 |             host (str): The IP address of host name of DICOM PACS.
 25 |             port (int): The port to use to communicate on.
 26 |             ae_title (str, optional): AE Title to provide the DICOM service.
 27 |             working_directory (str|pathlib.Path, optional): The working directory in which to
 28 |             store the data fetched. Defaults to a temp directory.
 29 | 
 30 |         Raises:
 31 |             ConnectionError: Raises a connection error if unable to verify the connection to the
 32 |                 PACS.
 33 |         """
 34 | 
 35 |         super().__init__(working_directory)
 36 | 
 37 |         self.dicom_connector = DicomConnector(
 38 |             host=host,
 39 |             port=port,
 40 |             ae_title=ae_title,
 41 |             output_directory=self.working_directory,
 42 |         )
 43 | 
 44 |         if not self.dicom_connector.verify():
 45 |             raise ConnectionError("Unable to connect to DICOM PACS.")
 46 | 
 47 |     def fetch_data(
 48 |         self, patients: Union[list, str], modalities: Union[list, str] = None
 49 |     ):
 50 |         """Download the DICOM data from the PACS.
 51 | 
 52 |         Args:
 53 |             patients (list|str): A list of patient IDs, or a single patient ID. Wildcard matching
 54 |                 based on the DICOM standard is supported.
 55 |             modalities (list|str, optional): List of modalities or a single modality to fetch.
 56 |                 Defaults to None where all modalities would be fetched.
 57 |         """
 58 | 
 59 |         if not isinstance(patients, list) and not isinstance(patients, tuple):
 60 |             patients = [patients]
 61 | 
 62 |         if modalities is None:
 63 |             modalities = [""]
 64 | 
 65 |         if not isinstance(modalities, list) and not isinstance(modalities, tuple):
 66 |             modalities = [modalities]
 67 | 
 68 |         for patient in patients:
 69 |             dataset = pydicom.Dataset()
 70 |             dataset.PatientID = patient
 71 |             dataset.PatientName = ""
 72 |             dataset.StudyInstanceUID = ""
 73 |             dataset.ModalitiesInStudy = ""
 74 |             dataset.QueryRetrieveLevel = "STUDY"
 75 | 
 76 |             studies = self.dicom_connector.do_find(dataset)
 77 | 
 78 |             for study in studies:
 79 |                 if not study:
 80 |                     continue  # These lists often contain a None study, so just skip that
 81 | 
 82 |                 for modality in modalities:
 83 |                     dataset = pydicom.Dataset()
 84 |                     dataset.PatientID = patient
 85 |                     dataset.StudyInstanceUID = study.StudyInstanceUID
 86 |                     dataset.Modality = modality
 87 |                     dataset.SeriesInstanceUID = ""
 88 |                     dataset.QueryRetrieveLevel = "SERIES"
 89 | 
 90 |                     series = self.dicom_connector.do_find(dataset)
 91 |                     for s in series:
 92 |                         if not s:
 93 |                             continue  # Again, safe to skip this if None
 94 | 
 95 |                         if not s.PatientID == patient:
 96 |                             continue
 97 | 
 98 |                         # Download the series
 99 |                         self.dicom_connector.download_series(s.SeriesInstanceUID)
100 | 
101 |         # Finally, just make sure all files end with the .dcm extension
102 |         for f in self.working_directory.glob("**/*"):
103 |             if f.is_dir():
104 |                 continue
105 | 
106 |             if f.name.endswith(".dcm"):
107 |                 continue
108 | 
109 |             target = f.parent.joinpath(f"{f.name}.dcm")
110 |             os.rename(f, target)
111 | 


--------------------------------------------------------------------------------
/pydicer/cli/input.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import sys
  3 | from pathlib import Path
  4 | 
  5 | from pydicer.input.pacs import DICOMPACSInput
  6 | from pydicer.input.test import TestInput
  7 | from pydicer.input.web import WebInput
  8 | from pydicer.input.filesystem import FileSystemInput
  9 | from pydicer import PyDicer
 10 | 
 11 | 
 12 | logger = logging.getLogger(__name__)
 13 | 
 14 | 
 15 | def run_pipeline(input_method, *args):
 16 |     """Run the pipeline using a specific input methodthe test data provided
 17 | 
 18 |     Args:
 19 |         working_dir (str, optional): Path to store test data.
 20 |         input_method (str): the input method chosen to run this pipeline
 21 |     """
 22 | 
 23 |     logging.basicConfig(format="%(name)s\t%(levelname)s\t%(message)s", level=logging.DEBUG)
 24 | 
 25 |     logger.info("Running Pipeline with Test Input")
 26 |     print(args[0])
 27 |     directory = Path(args[0])
 28 |     directory.mkdir(exist_ok=True, parents=True)
 29 | 
 30 |     dicom_dir = directory.joinpath("dicom")
 31 |     dicom_dir.mkdir(exist_ok=True, parents=True)
 32 | 
 33 |     if input_method == "test":
 34 |         input_obj = testinput_cli(*args)
 35 |     if input_method == "web":
 36 |         input_obj = web_cli(*args)
 37 |     elif input_method == "pacs":
 38 |         input_obj = pacs_cli(*args)
 39 |     else:
 40 |         input_obj = FileSystemInput(*args)
 41 | 
 42 |     input_obj.fetch_data()
 43 | 
 44 |     pydicer = PyDicer(directory)
 45 |     pydicer.add_input(input_obj)
 46 | 
 47 |     # Preprocess the data fetch to prepare it for conversion
 48 |     logger.info("Running Pipeline")
 49 |     pydicer.run_pipeline()
 50 | 
 51 | 
 52 | def testinput_cli(working_dir):
 53 |     """Trigger the test input as a mini pipeline for the CLI tool
 54 | 
 55 |     Example usage:
 56 |         python -m pydicer.cli.run input --type test ./cli_test
 57 | 
 58 |     Args:
 59 |         working_dir (str|pathlib.Path, optional): The working directory in which to
 60 |         store the data fetched.
 61 |     """
 62 |     logging.basicConfig(format="%(name)s\t%(levelname)s\t%(message)s", level=logging.DEBUG)
 63 | 
 64 |     logger.info("Running Test Input sub command")
 65 |     test_input = TestInput(working_dir)
 66 |     test_input.fetch_data()
 67 |     return test_input
 68 | 
 69 | 
 70 | def pacs_cli(
 71 |     working_dir,
 72 |     host="www.dicomserver.co.uk",
 73 |     port=11112,
 74 |     ae_title=None,
 75 |     modalities="GM",
 76 |     *patients
 77 | ):
 78 |     """Trigger the DICOM PACS input as a mini pipeline for the CLI tool. If no inputs received,
 79 |     then by default it will retrieve some test data
 80 | 
 81 |     Example usage:
 82 |         python -m pydicer.cli.run input --type pacs ./cli_test www.dicomserver.co.uk 11112 DCMQUERY
 83 |             GM PAT004 PAT005
 84 | 
 85 |     Args:
 86 |         working_dir (str|pathlib.Path, optional): The working directory in which to
 87 |             store the data fetched.
 88 |         host (str, optional): The IP address of host name of DICOM PACS. Defaults to
 89 |             "www.dicomserver.co.uk".
 90 |         port (int, optional): The port to use to communicate on. Defaults to 11112.
 91 |         ae_title (str, optional): AE Title to provide the DICOM service. Defaults to None.
 92 |         modalities (str, optional): The modalities to retrieve DICOMs for. Defaults to "GM".
 93 |         patients (str, required): a string-list of patient IDs (IDs seperated by spaces) to
 94 |             retrieve the DICOMs for.
 95 |     """
 96 |     if not patients:
 97 |         logger.error(
 98 |             "No patient IDs provided, please provided a list-string separated by spaces of "
 99 |             "patients IDs to query for "
100 |         )
101 |         sys.exit()
102 |     logger.info("Running DICOM PACS Input sub command")
103 |     pacs_input = DICOMPACSInput(host, int(port), ae_title, working_dir)
104 |     pacs_input.fetch_data(patients, [modalities])
105 |     return pacs_input
106 | 
107 | 
108 | def tcia_cli():
109 |     """Trigger the TCIA input as a mini pipeline for the CLI tool."""
110 |     return
111 | 
112 | 
113 | def web_cli(working_dir, data_url):
114 |     """Trigger the web input as a mini pipeline for the CLI tool.
115 | 
116 |     Example usage:
117 |         python -m pydicer.cli.run input --type web ./cli_test
118 |             https://zenodo.org/record/5276878/files/HNSCC.zip
119 | 
120 |     Args:
121 |         working_dir (str|pathlib.Path): The working directory in which to
122 |             store the data fetched.
123 |         data_url (str): URL of the dataset to be downloaded from the internet
124 |     """
125 | 
126 |     logger.info("Running web Input sub command")
127 |     web_input = WebInput(data_url, working_dir)
128 |     web_input.fetch_data()
129 |     return web_input
130 | 


--------------------------------------------------------------------------------
/tests/test_segmentation.py:
--------------------------------------------------------------------------------
  1 | # pylint: disable=redefined-outer-name,missing-function-docstring
  2 | 
  3 | import pytest
  4 | 
  5 | import pandas as pd
  6 | import SimpleITK as sitk
  7 | 
  8 | from pydicer.generate.segmentation import segment_image, segment_dataset, read_segmentation_log
  9 | from pydicer.utils import read_converted_data
 10 | 
 11 | 
 12 | @pytest.fixture
 13 | def test_data_path(tmp_path_factory):
 14 |     """Fixture to generate a pydicer style file structure. Along with a few dummy images to
 15 |     run a dummy auto-semgentation on."""
 16 | 
 17 |     working_directory = tmp_path_factory.mktemp("data")
 18 | 
 19 |     cols = [
 20 |         "",
 21 |         "sop_instance_uid",
 22 |         "hashed_uid",
 23 |         "modality",
 24 |         "patient_id",
 25 |         "series_uid",
 26 |         "for_uid",
 27 |         "referenced_sop_instance_uid",
 28 |         "path",
 29 |     ]
 30 |     rows = [
 31 |         [
 32 |             0,
 33 |             "1.3.6.1.4.1.14519.5.2.1.1706.8040.418136430763474248173140712714",
 34 |             "b281ea",
 35 |             "CT",
 36 |             "HNSCC-01-0019",
 37 |             "1.3.6.1.4.1.14519.5.2.1.1706.8040.233510441938368266923995238976",
 38 |             "1.3.6.1.4.1.14519.5.2.1.1706.8040.290727775603409136366833989550",
 39 |             "",
 40 |             "data/HNSCC-01-0019/images/b281ea",
 41 |         ],
 42 |         [
 43 |             1,
 44 |             "1.3.6.1.4.1.14519.5.2.1.1706.8040.41813643076347424817314071123",
 45 |             "b28321",
 46 |             "CT",
 47 |             "HNSCC-01-0019",
 48 |             "1.3.6.1.4.1.14519.5.2.1.1706.8040.233510441938368266923995238123",
 49 |             "1.3.6.1.4.1.14519.5.2.1.1706.8040.290727775603409136366833989123",
 50 |             "",
 51 |             "data/HNSCC-01-0019/images/b28321",
 52 |         ],
 53 |     ]
 54 | 
 55 |     df_converted = pd.DataFrame(rows, columns=cols)
 56 |     for _, row in df_converted.iterrows():
 57 | 
 58 |         data_obj_path = working_directory.joinpath(row.path)
 59 |         data_obj_path.mkdir(parents=True, exist_ok=True)
 60 | 
 61 |         img_path = data_obj_path.joinpath("CT.nii.gz")
 62 |         sitk.WriteImage(sitk.Image(10, 10, 10, sitk.sitkUInt8), str(img_path))
 63 | 
 64 |     converted_path = working_directory.joinpath("data", "HNSCC-01-0019", "converted.csv")
 65 |     df_converted.to_csv(converted_path)
 66 | 
 67 |     # Also create a dataset directory with converted sub-set
 68 |     dataset_path = working_directory.joinpath("test_dataset", "HNSCC-01-0019")
 69 |     dataset_path.mkdir(parents=True)
 70 |     converted_path = dataset_path.joinpath("converted.csv")
 71 |     df_converted[:1].to_csv(converted_path)
 72 | 
 73 |     return working_directory
 74 | 
 75 | 
 76 | def test_segment_image_incorrect_function_input(test_data_path):
 77 |     def seg_func(_, __):
 78 |         return {"test": sitk.Image(10, 10, 10, sitk.sitkUInt8)}
 79 | 
 80 |     df = read_converted_data(test_data_path)
 81 |     img_row = df.iloc[0]
 82 | 
 83 |     segment_image(test_data_path, img_row, "test_seg_fail_input", seg_func)
 84 | 
 85 |     df_log = read_segmentation_log(image_row=img_row)
 86 |     assert len(df_log) == 1
 87 |     assert not df_log.iloc[0].success_flag
 88 | 
 89 | 
 90 | def test_segment_image_incorrect_function_output(test_data_path):
 91 |     def seg_func(img):
 92 |         return img
 93 | 
 94 |     df = read_converted_data(test_data_path)
 95 |     img_row = df.iloc[0]
 96 | 
 97 |     segment_image(test_data_path, img_row, "test_seg_fail_output", seg_func)
 98 | 
 99 |     df_log = read_segmentation_log(image_row=img_row)
100 |     assert len(df_log) == 1
101 |     assert not df_log.iloc[0].success_flag
102 | 
103 | 
104 | def test_segment_image(test_data_path):
105 |     def seg_func(img):
106 |         return {"struct_a": img > 0, "struct_b": img > 1}
107 | 
108 |     df = read_converted_data(test_data_path)
109 |     img_row = df.iloc[0]
110 | 
111 |     segment_image(test_data_path, img_row, "test_seg", seg_func)
112 | 
113 |     df_log = read_segmentation_log(image_row=img_row)
114 |     assert len(df_log) == 1
115 |     assert df_log.iloc[0].success_flag
116 | 
117 | 
118 | def test_segment_dataset(test_data_path):
119 |     def seg_func(img):
120 |         return {"struct_a": img > 0, "struct_b": img > 1}
121 | 
122 |     df = read_converted_data(test_data_path)
123 |     assert len(df) == 2
124 | 
125 |     segment_dataset(test_data_path, "test_seg", seg_func)
126 | 
127 |     df = read_converted_data(test_data_path)
128 |     assert len(df) == 4
129 | 
130 | 
131 | def test_segment_dataset_subset(test_data_path):
132 |     def seg_func(img):
133 |         return {"struct_a": img > 0, "struct_b": img > 1}
134 | 
135 |     segment_dataset(test_data_path, "test_seg", seg_func, dataset_name="test_dataset")
136 | 
137 |     df = read_converted_data(test_data_path, dataset_name="test_dataset")
138 |     assert len(df) == 2
139 | 


--------------------------------------------------------------------------------
/tests/test_convert.py:
--------------------------------------------------------------------------------
  1 | # pylint: disable=redefined-outer-name,missing-function-docstring
  2 | 
  3 | import tempfile
  4 | import json
  5 | from pathlib import Path
  6 | import numpy as np
  7 | 
  8 | import pytest
  9 | 
 10 | import SimpleITK as sitk
 11 | import pydicom
 12 | 
 13 | from pydicer.input.test import TestInput
 14 | from pydicer.input.web import WebInput
 15 | 
 16 | from pydicer.convert.headers import convert_dicom_headers
 17 | from pydicer.convert.rtstruct import convert_rtstruct
 18 | from pydicer.convert.pt import convert_dicom_to_nifti_pt
 19 | 
 20 | 
 21 | @pytest.fixture
 22 | def test_data():
 23 |     """Fixture to grab the test data"""
 24 | 
 25 |     directory = Path("./testdata")
 26 |     directory.mkdir(exist_ok=True, parents=True)
 27 | 
 28 |     working_directory = directory.joinpath("dicom")
 29 |     working_directory.mkdir(exist_ok=True, parents=True)
 30 | 
 31 |     test_input = TestInput(working_directory)
 32 |     test_input.fetch_data()
 33 | 
 34 |     return working_directory
 35 | 
 36 | 
 37 | @pytest.fixture
 38 | def test_data_all():
 39 |     """Fixture to grab the test data with more modalities"""
 40 | 
 41 |     directory = Path("./testdata")
 42 |     directory.mkdir(exist_ok=True, parents=True)
 43 | 
 44 |     working_directory = directory.joinpath("working2")
 45 |     working_directory.mkdir(exist_ok=True, parents=True)
 46 | 
 47 |     data_url = "https://zenodo.org/record/5574640/files/HNSCC-01-0019.zip"
 48 |     web_input = WebInput(data_url, working_directory)
 49 |     web_input.fetch_data()
 50 | 
 51 |     return working_directory
 52 | 
 53 | 
 54 | def test_convert_rt_struct(test_data):
 55 | 
 56 |     img_files = [
 57 |         str(f)
 58 |         for f in test_data.joinpath(
 59 |             "HNSCC", "HNSCC-01-0199", "10-26-2002-NA-RT SIMULATION-18560", "3.000000-NA-58373"
 60 |         ).glob("*.dcm")
 61 |     ]
 62 | 
 63 |     img_files.sort()
 64 | 
 65 |     rt_struct_file = test_data.joinpath(
 66 |         "HNSCC",
 67 |         "HNSCC-01-0199",
 68 |         "10-26-2002-NA-RT SIMULATION-18560",
 69 |         "1.000000-NA-59395",
 70 |         "1-1.dcm",
 71 |     )
 72 | 
 73 |     with tempfile.TemporaryDirectory() as output_dir:
 74 | 
 75 |         output_path = Path(output_dir)
 76 | 
 77 |         convert_rtstruct(
 78 |             img_files,
 79 |             rt_struct_file,
 80 |             prefix="",
 81 |             output_dir=output_path,
 82 |             output_img=None,
 83 |             spacing=None,
 84 |         )
 85 | 
 86 |         # Make sure there are the correct number of structures
 87 |         assert len(list(output_path.glob("*"))) == 38
 88 | 
 89 |         # Open a random structure and check that it is correct
 90 |         brainstem_path = output_path.joinpath("Brainstem.nii.gz")
 91 |         brainstem = sitk.ReadImage(str(brainstem_path))
 92 | 
 93 |         assert brainstem.GetSize() == (512, 512, 174)
 94 |         assert brainstem.GetSpacing() == (0.9765625, 0.9765625, 2.5)
 95 |         assert sitk.GetArrayFromImage(brainstem).sum() == 12450
 96 | 
 97 | 
 98 | def test_convert_pet(test_data_all):
 99 | 
100 |     pet_dir = test_data_all.joinpath(
101 |         "1.3.6.1.4.1.14519.5.2.1.1706.8040.995469920533091641707578194770"
102 |     )
103 | 
104 |     pet_files = [str(f) for f in pet_dir.glob("*.dcm")]
105 | 
106 |     with tempfile.TemporaryDirectory() as output_dir:
107 | 
108 |         output_path = Path(output_dir)
109 |         output_file = output_path.joinpath("pet.nii.gz")
110 |         convert_dicom_to_nifti_pt(pet_files, str(output_file))
111 | 
112 |         assert output_file.exists()
113 | 
114 |         pet_img = sitk.ReadImage(str(output_file))
115 |         assert pet_img.GetSize() == (128, 128, 91)
116 | 
117 |         pet_arr = sitk.GetArrayFromImage(pet_img)
118 |         assert np.allclose(pet_arr.max(), 11.9479, atol=0.001)
119 | 
120 | 
121 | def test_save_dicom_headers(test_data_all):
122 | 
123 |     dicom_dir = test_data_all.joinpath(
124 |         "1.3.6.1.4.1.14519.5.2.1.1706.8040.995469920533091641707578194770"
125 |     )
126 | 
127 |     dicom_file = [str(f) for f in dicom_dir.glob("*.dcm")][0]
128 | 
129 |     with tempfile.TemporaryDirectory() as output_dir:
130 | 
131 |         # Save off the headers for this file
132 |         output_path = Path(output_dir)
133 |         output_file = output_path.joinpath("test.json")
134 |         convert_dicom_headers(dicom_file, "", output_file)
135 | 
136 |         # Check that we can read them again from the JSON
137 |         with open(output_file, "r", encoding="utf8") as json_file:
138 |             ds_dict = json.load(json_file)
139 | 
140 |         loaded_ds = pydicom.Dataset.from_json(ds_dict, bulk_data_uri_handler=lambda _: None)
141 |         original_ds = pydicom.read_file(dicom_file)
142 | 
143 |         # Check that some key header values are the same in the original DICOM and the one loaded
144 |         # from JSON
145 |         assert loaded_ds.SeriesInstanceUID == original_ds.SeriesInstanceUID
146 |         assert loaded_ds.PatientID == original_ds.PatientID
147 |         assert loaded_ds.Modality == original_ds.Modality
148 |         assert loaded_ds.SeriesDate == original_ds.SeriesDate
149 |         assert loaded_ds.FrameOfReferenceUID == original_ds.FrameOfReferenceUID
150 | 


--------------------------------------------------------------------------------
/examples/VisualiseData.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Visualise Data\n",
  8 |     "\n",
  9 |     "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/AustralianCancerDataNetwork/pydicer/blob/main/examples/VisualiseData.ipynb)\n",
 10 |     "\n",
 11 |     "PyDicer's [visualise module](https://australiancancerdatanetwork.github.io/pydicer/visualise.html) will produce cross-sections of data objects and store them\n",
 12 |     "in `.png` format within the data object directory. This is particularly useful for fast inspection\n",
 13 |     "of the data to ensure that nothing has gone wrong during conversion.\n",
 14 |     "\n",
 15 |     "The visualise module can be run at any time after conversion. If you are using advanced features\n",
 16 |     "of PyDicer, such as [auto-segmentation inference](https://australiancancerdatanetwork.github.io/pydicer/_examples/AutoSegmentation.html) and [object generation](https://australiancancerdatanetwork.github.io/pydicer/_examples/ObjectGeneration.html), you can run the\n",
 17 |     "visualise module following the generation of the new data objects to produce the cross-section\n",
 18 |     "`.png` files."
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": null,
 24 |    "metadata": {},
 25 |    "outputs": [],
 26 |    "source": [
 27 |     "try:\n",
 28 |     "    from pydicer import PyDicer\n",
 29 |     "except ImportError:\n",
 30 |     "    !pip install pydicer\n",
 31 |     "    from pydicer import PyDicer\n",
 32 |     "\n",
 33 |     "from pathlib import Path\n",
 34 |     "\n",
 35 |     "from pydicer.utils import fetch_converted_test_data"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "markdown",
 40 |    "metadata": {},
 41 |    "source": [
 42 |     "## Setup PyDicer\n",
 43 |     "\n",
 44 |     "HNSCC data prepared for this example are downloaded and stored into a `testdata_hnscc` directory.\n",
 45 |     "We will use this for our PyDicer working directory. We also initialise our PyDicer object."
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "code",
 50 |    "execution_count": null,
 51 |    "metadata": {},
 52 |    "outputs": [],
 53 |    "source": [
 54 |     "working_directory = fetch_converted_test_data(\"./testdata_hnscc\", dataset=\"HNSCC\")\n",
 55 |     "\n",
 56 |     "pydicer = PyDicer(working_directory)"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "markdown",
 61 |    "metadata": {},
 62 |    "source": [
 63 |     "## Visualise Data\n",
 64 |     "\n",
 65 |     "We simply call the [visualise](https://australiancancerdatanetwork.github.io/pydicer/visualise.html#pydicer.visualise.data.VisualiseData.visualise) function of the `visualise` module to produce the cross-sections.\n",
 66 |     "\n",
 67 |     "Inspect some of the data object directories in `testdata_hnscc/data` and look for the `.png`\n",
 68 |     "cross-sections. The `{hashed_uid}` in files named `vis_{hashed_uid}.png`  refers to a UID hash\n",
 69 |     "linking to the image being visualised. Visualisations are produced for:\n",
 70 |     "- Images\n",
 71 |     "- RT Structure Sets\n",
 72 |     "- RT Dose Grids"
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "code",
 77 |    "execution_count": null,
 78 |    "metadata": {},
 79 |    "outputs": [],
 80 |    "source": [
 81 |     "pydicer.visualise.visualise()"
 82 |    ]
 83 |   },
 84 |   {
 85 |    "cell_type": "markdown",
 86 |    "metadata": {},
 87 |    "source": [
 88 |     "### Run for a single patient\n",
 89 |     "\n",
 90 |     "You can run the visualisation for only a single patient (or list of specific patients) by providing\n",
 91 |     "the `patient` argument."
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "code",
 96 |    "execution_count": null,
 97 |    "metadata": {},
 98 |    "outputs": [],
 99 |    "source": [
100 |     "pydicer.visualise.visualise(patient=\"HNSCC-01-0199\")"
101 |    ]
102 |   },
103 |   {
104 |    "cell_type": "markdown",
105 |    "metadata": {},
106 |    "source": [
107 |     "### Avoid Re-generating Visualisation\n",
108 |     "\n",
109 |     "If you've added more data to your dataset, and want to avoid re-generating visualisations, set the\n",
110 |     "`force` argument to `False`."
111 |    ]
112 |   },
113 |   {
114 |    "cell_type": "code",
115 |    "execution_count": null,
116 |    "metadata": {},
117 |    "outputs": [],
118 |    "source": [
119 |     "pydicer.visualise.visualise(force=False)"
120 |    ]
121 |   },
122 |   {
123 |    "cell_type": "code",
124 |    "execution_count": null,
125 |    "metadata": {},
126 |    "outputs": [],
127 |    "source": []
128 |   }
129 |  ],
130 |  "metadata": {
131 |   "interpreter": {
132 |    "hash": "814af119db7f8f2860617be3dcd1d37c560587d11c65bd58c45b1679d3ee6ea4"
133 |   },
134 |   "kernelspec": {
135 |    "display_name": "Python 3.8.0 64-bit ('pydicer': pyenv)",
136 |    "language": "python",
137 |    "name": "python3"
138 |   },
139 |   "language_info": {
140 |    "codemirror_mode": {
141 |     "name": "ipython",
142 |     "version": 3
143 |    },
144 |    "file_extension": ".py",
145 |    "mimetype": "text/x-python",
146 |    "name": "python",
147 |    "nbconvert_exporter": "python",
148 |    "pygments_lexer": "ipython3",
149 |    "version": "3.9.16"
150 |   },
151 |   "orig_nbformat": 4
152 |  },
153 |  "nbformat": 4,
154 |  "nbformat_minor": 2
155 | }
156 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
  1 | # Code of Conduct
  2 | 
  3 | ## Our Pledge
  4 | 
  5 | We as members, contributors, and leaders pledge to make participation in our
  6 | community a harassment-free experience for everyone, regardless of age, body
  7 | size, visible or invisible disability, ethnicity, sex characteristics, gender
  8 | identity and expression, level of experience, education, socio-economic status,
  9 | nationality, personal appearance, race, religion, or sexual identity
 10 | and orientation.
 11 | 
 12 | We pledge to act and interact in ways that contribute to an open, welcoming,
 13 | diverse, inclusive, and healthy community.
 14 | 
 15 | ## Our Standards
 16 | 
 17 | Examples of behavior that contributes to a positive environment for our
 18 | community include:
 19 | 
 20 | * Demonstrating empathy and kindness toward other people
 21 | * Being respectful of differing opinions, viewpoints, and experiences
 22 | * Giving and gracefully accepting constructive feedback
 23 | * Accepting responsibility and apologizing to those affected by our mistakes,
 24 |   and learning from the experience
 25 | * Focusing on what is best not just for us as individuals, but for the
 26 |   overall community
 27 | 
 28 | Examples of unacceptable behavior include:
 29 | 
 30 | * The use of sexualized language or imagery, and sexual attention or
 31 |   advances of any kind
 32 | * Trolling, insulting or derogatory comments, and personal or political attacks
 33 | * Public or private harassment
 34 | * Publishing others' private information, such as a physical or email
 35 |   address, without their explicit permission
 36 | * Other conduct which could reasonably be considered inappropriate in a
 37 |   professional setting
 38 | 
 39 | ## Enforcement Responsibilities
 40 | 
 41 | Community leaders are responsible for clarifying and enforcing our standards of
 42 | acceptable behavior and will take appropriate and fair corrective action in
 43 | response to any behavior that they deem inappropriate, threatening, offensive,
 44 | or harmful.
 45 | 
 46 | Community leaders have the right and responsibility to remove, edit, or reject
 47 | comments, commits, code, wiki edits, issues, and other contributions that are
 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation
 49 | decisions when appropriate.
 50 | 
 51 | ## Scope
 52 | 
 53 | This Code of Conduct applies within all community spaces, and also applies when
 54 | an individual is officially representing the community in public spaces.
 55 | Examples of representing our community include using an official e-mail address,
 56 | posting via an official social media account, or acting as an appointed
 57 | representative at an online or offline event.
 58 | 
 59 | ## Enforcement
 60 | 
 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
 62 | reported to the community leaders responsible for enforcement at
 63 | d.almouiee@unsw.edu.au.
 64 | All complaints will be reviewed and investigated promptly and fairly.
 65 | 
 66 | All community leaders are obligated to respect the privacy and security of the
 67 | reporter of any incident.
 68 | 
 69 | ## Enforcement Guidelines
 70 | 
 71 | Community leaders will follow these Community Impact Guidelines in determining
 72 | the consequences for any action they deem in violation of this Code of Conduct:
 73 | 
 74 | ### 1. Correction
 75 | 
 76 | **Community Impact**: Use of inappropriate language or other behavior deemed
 77 | unprofessional or unwelcome in the community.
 78 | 
 79 | **Consequence**: A private, written warning from community leaders, providing
 80 | clarity around the nature of the violation and an explanation of why the
 81 | behavior was inappropriate. A public apology may be requested.
 82 | 
 83 | ### 2. Warning
 84 | 
 85 | **Community Impact**: A violation through a single incident or series
 86 | of actions.
 87 | 
 88 | **Consequence**: A warning with consequences for continued behavior. No
 89 | interaction with the people involved, including unsolicited interaction with
 90 | those enforcing the Code of Conduct, for a specified period of time. This
 91 | includes avoiding interactions in community spaces as well as external channels
 92 | like social media. Violating these terms may lead to a temporary or
 93 | permanent ban.
 94 | 
 95 | ### 3. Temporary Ban
 96 | 
 97 | **Community Impact**: A serious violation of community standards, including
 98 | sustained inappropriate behavior.
 99 | 
100 | **Consequence**: A temporary ban from any sort of interaction or public
101 | communication with the community for a specified period of time. No public or
102 | private interaction with the people involved, including unsolicited interaction
103 | with those enforcing the Code of Conduct, is allowed during this period.
104 | Violating these terms may lead to a permanent ban.
105 | 
106 | ### 4. Permanent Ban
107 | 
108 | **Community Impact**: Demonstrating a pattern of violation of community
109 | standards, including sustained inappropriate behavior,  harassment of an
110 | individual, or aggression toward or disparagement of classes of individuals.
111 | 
112 | **Consequence**: A permanent ban from any sort of public interaction within
113 | the community.
114 | 
115 | ## Attribution
116 | 
117 | This Code of Conduct is adapted from the [Contributor Covenant][homepage],
118 | version 2.0, available at
119 | <https://www.contributor-covenant.org/version/2/0/code_of_conduct.html>.
120 | 
121 | Community Impact Guidelines were inspired by [Mozilla's code of conduct
122 | enforcement ladder](https://github.com/mozilla/diversity).
123 | 
124 | [homepage]: https://www.contributor-covenant.org
125 | 
126 | For answers to common questions about this code of conduct, see the FAQ at
127 | <https://www.contributor-covenant.org/faq>. Translations are available at
128 | <https://www.contributor-covenant.org/translations>.
129 | 


--------------------------------------------------------------------------------
/examples/DoseMetrics.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "attachments": {},
  5 |    "cell_type": "markdown",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "# Dose Metrics\n",
  9 |     "\n",
 10 |     "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/AustralianCancerDataNetwork/pydicer/blob/main/examples/DoseMetrics.ipynb)\n",
 11 |     "\n",
 12 |     "In this example notebook we will compute [Dose Volume Histograms (DVH)](https://pyplati.github.io/platipy/dose.html#module-platipy.imaging.dose.dvh) for our `RTDOSE` objects\n",
 13 |     "across structures found in `RTSTRUCT` objects in our dataset. We use\n",
 14 |     "[HNSCC](https://wiki.cancerimagingarchive.net/display/Public/HNSCC) data from the Cancer Imaging\n",
 15 |     "Archive which has already been converted using PyDicer for demonstration purposes."
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "code",
 20 |    "execution_count": null,
 21 |    "metadata": {},
 22 |    "outputs": [],
 23 |    "source": [
 24 |     "try:\n",
 25 |     "    from pydicer import PyDicer\n",
 26 |     "except ImportError:\n",
 27 |     "    !pip install pydicer\n",
 28 |     "    from pydicer import PyDicer\n",
 29 |     "\n",
 30 |     "from pathlib import Path\n",
 31 |     "\n",
 32 |     "from pydicer.utils import fetch_converted_test_data"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "markdown",
 37 |    "metadata": {},
 38 |    "source": [
 39 |     "## Fetch data\n",
 40 |     "\n",
 41 |     "HNSCC data prepared for this example are downloaded and stored into a `testdata_hnscc` directory.\n",
 42 |     "We will use this for our PyDicer working directory."
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "code",
 47 |    "execution_count": null,
 48 |    "metadata": {},
 49 |    "outputs": [],
 50 |    "source": [
 51 |     "working_directory = fetch_converted_test_data(\"./testdata_hnscc\", dataset=\"HNSCC\")"
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "markdown",
 56 |    "metadata": {},
 57 |    "source": [
 58 |     "## Initialise PyDicer object\n",
 59 |     "\n",
 60 |     "Using the working directory containing the test data."
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "code",
 65 |    "execution_count": null,
 66 |    "metadata": {},
 67 |    "outputs": [],
 68 |    "source": [
 69 |     "pydicer = PyDicer(working_directory)"
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "markdown",
 74 |    "metadata": {},
 75 |    "source": [
 76 |     "## Compute DVH\n",
 77 |     "\n",
 78 |     "Before we can extract dose metrics, we must compute Dose Volume Histograms for all dose objects and\n",
 79 |     "structure sets. This is done using the [compute_dvh](https://australiancancerdatanetwork.github.io/pydicer/analyse.html#pydicer.analyse.data.AnalyseData.compute_dvh) function."
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "code",
 84 |    "execution_count": null,
 85 |    "metadata": {},
 86 |    "outputs": [],
 87 |    "source": [
 88 |     "pydicer.analyse.compute_dvh()"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "markdown",
 93 |    "metadata": {},
 94 |    "source": [
 95 |     "## Inspect DVH\n",
 96 |     "\n",
 97 |     "DVHs computed are stored in the respective dose object directories on the file system. Inspect a\n",
 98 |     "dose object directory (e.g. `testdata_hnscc/data/HNSCC-01-0019/doses/309e1a`). Here you will find\n",
 99 |     "a `.png` file which plots the DVH for each of the linked structures. In addition a `.csv` file \n",
100 |     "stores the raw DVH values.\n",
101 |     "\n",
102 |     "The DVHs can for this dataset can be loaded into a pandas DataFrame with the\n",
103 |     "[get_all_dvhs_for_dataset](https://australiancancerdatanetwork.github.io/pydicer/analyse.html#pydicer.analyse.data.AnalyseData.get_all_dvhs_for_dataset) function."
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "code",
108 |    "execution_count": null,
109 |    "metadata": {},
110 |    "outputs": [],
111 |    "source": [
112 |     "df_dvh = pydicer.analyse.get_all_dvhs_for_dataset()\n",
113 |     "df_dvh.head()"
114 |    ]
115 |   },
116 |   {
117 |    "cell_type": "markdown",
118 |    "metadata": {},
119 |    "source": [
120 |     "## Compute Dose Metrics\n",
121 |     "\n",
122 |     "The [compute_dose_metrics](https://australiancancerdatanetwork.github.io/pydicer/analyse.html#pydicer.analyse.data.AnalyseData.compute_dose_metrics) function in the `analyse` module can compute **D**, **V** and **Dcc**\n",
123 |     "metrics. Specify the points at which to compute those values. For example, the following cell\n",
124 |     "computes the **D95**, **D50**, **V5** and **Dcc10**."
125 |    ]
126 |   },
127 |   {
128 |    "cell_type": "code",
129 |    "execution_count": null,
130 |    "metadata": {},
131 |    "outputs": [],
132 |    "source": [
133 |     "df_dose_metrics = pydicer.analyse.compute_dose_metrics(\n",
134 |     "    d_point=[95, 50],\n",
135 |     "    v_point=[5],\n",
136 |     "    d_cc_point=[10]\n",
137 |     ")\n",
138 |     "df_dose_metrics.head()"
139 |    ]
140 |   },
141 |   {
142 |    "cell_type": "code",
143 |    "execution_count": null,
144 |    "metadata": {},
145 |    "outputs": [],
146 |    "source": []
147 |   }
148 |  ],
149 |  "metadata": {
150 |   "interpreter": {
151 |    "hash": "814af119db7f8f2860617be3dcd1d37c560587d11c65bd58c45b1679d3ee6ea4"
152 |   },
153 |   "kernelspec": {
154 |    "display_name": "Python 3.8.0 64-bit ('pydicer': pyenv)",
155 |    "language": "python",
156 |    "name": "python3"
157 |   },
158 |   "language_info": {
159 |    "codemirror_mode": {
160 |     "name": "ipython",
161 |     "version": 3
162 |    },
163 |    "file_extension": ".py",
164 |    "mimetype": "text/x-python",
165 |    "name": "python",
166 |    "nbconvert_exporter": "python",
167 |    "pygments_lexer": "ipython3",
168 |    "version": "3.9.16"
169 |   },
170 |   "orig_nbformat": 4
171 |  },
172 |  "nbformat": 4,
173 |  "nbformat_minor": 2
174 | }
175 | 


--------------------------------------------------------------------------------
/pydicer/dataset/preparation.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import os
  3 | from pathlib import Path
  4 | from typing import Callable, Union
  5 | 
  6 | import pandas as pd
  7 | 
  8 | from pydicer.constants import CONVERTED_DIR_NAME
  9 | 
 10 | from pydicer.dataset import functions
 11 | from pydicer.utils import read_converted_data
 12 | 
 13 | logger = logging.getLogger(__name__)
 14 | 
 15 | 
 16 | class PrepareDataset:
 17 |     """
 18 |     Class that provides functionality for prepartion of subsets of data.
 19 | 
 20 |     Args:
 21 |         - working_directory (str|pathlib.Path, optional): Main working directory for pydicer.
 22 |             Defaults to ".".
 23 |     """
 24 | 
 25 |     def __init__(self, working_directory: Union[str, Path] = "."):
 26 |         self.working_directory = Path(working_directory)
 27 | 
 28 |     def add_object_to_dataset(self, dataset_name: str, data_object_row: pd.Series):
 29 |         """Add one data object to a dataset.
 30 | 
 31 |         Args:
 32 |             dataset_name (str): The name of the dataset to add the object to.
 33 |             data_object_row (pd.Series): The DataFrame row of the converted object.
 34 |         """
 35 | 
 36 |         dataset_dir = self.working_directory.joinpath(dataset_name)
 37 | 
 38 |         # Create a copy so that we aren't manuipulating the original entry
 39 |         data_object_row = data_object_row.copy()
 40 | 
 41 |         object_path = Path(data_object_row.path)
 42 |         if object_path.is_absolute():
 43 |             data_object_row.path = str(object_path.relative_to(self.working_directory))
 44 |             object_path = Path(data_object_row.path)
 45 | 
 46 |         object_path = Path(data_object_row.path)
 47 |         symlink_path = dataset_dir.joinpath(object_path.relative_to(CONVERTED_DIR_NAME))
 48 | 
 49 |         rel_part = os.sep.join(
 50 |             [
 51 |                 ".."
 52 |                 for _ in symlink_path.parent.relative_to(self.working_directory).parts
 53 |             ]
 54 |         )
 55 |         src_path = Path(f"{rel_part}{os.sep}{object_path}")
 56 | 
 57 |         symlink_path.parent.mkdir(parents=True, exist_ok=True)
 58 | 
 59 |         if symlink_path.exists():
 60 |             logger.debug("Symlink path already exists: %s", symlink_path)
 61 |         else:
 62 |             symlink_path.symlink_to(src_path)
 63 | 
 64 |         pat_id = data_object_row.patient_id
 65 |         pat_dir = dataset_dir.joinpath(pat_id)
 66 |         pat_converted_csv = pat_dir.joinpath("converted.csv")
 67 |         df_pat = pd.DataFrame([data_object_row])
 68 |         if pat_converted_csv.exists():
 69 |             col_types = {"patient_id": str, "hashed_uid": str}
 70 |             df_converted = pd.read_csv(pat_converted_csv, index_col=0, dtype=col_types)
 71 | 
 72 |             # Check if this object already exists in the converted dataframe
 73 |             if (
 74 |                 len(df_converted[df_converted.hashed_uid == data_object_row.hashed_uid])
 75 |                 == 0
 76 |             ):
 77 |                 # If not add it
 78 |                 df_pat = pd.concat([df_converted, df_pat])
 79 |             else:
 80 |                 # Otherwise just leave the converted data as is
 81 |                 df_pat = df_converted
 82 | 
 83 |         df_pat = df_pat.reset_index(drop=True)
 84 |         df_pat.to_csv(pat_dir.joinpath("converted.csv"))
 85 | 
 86 |     def prepare_from_dataframe(self, dataset_name: str, df_prepare: pd.DataFrame):
 87 |         """Prepare a dataset from a filtered converted dataframe
 88 | 
 89 |         Args:
 90 |             dataset_name (str): The name of the dataset to generate
 91 |             df_prepare (pd.DataFrame): Filtered Pandas DataFrame containing rows of converted data.
 92 |         """
 93 | 
 94 |         dataset_dir = self.working_directory.joinpath(dataset_name)
 95 |         if dataset_dir.exists():
 96 |             logger.warning(
 97 |                 "Dataset directory already exists. Consider using a different dataset name or "
 98 |                 "remove the existing directory"
 99 |             )
100 | 
101 |         # Create a copy of df_prepare
102 |         df_prepare = df_prepare.copy()
103 | 
104 |         # Remove the working directory part for when we re-save off the filtered converted csv
105 |         df_prepare.path = df_prepare.path.apply(
106 |             lambda p: str(Path(p).relative_to(self.working_directory))
107 |         )
108 | 
109 |         # For each data object prepare the data in the dataset directory
110 |         for _, row in df_prepare.iterrows():
111 |             self.add_object_to_dataset(dataset_name, row)
112 | 
113 |     def prepare(
114 |         self, dataset_name: str, preparation_function: Callable, patients=None, **kwargs
115 |     ):
116 |         """Calls upon an appropriate preparation function to generate a clean dataset ready for
117 |         use. Additional keyword arguments are passed through to the preparation_function.
118 | 
119 |         Args:
120 |             dataset_name (str): The name of the dataset to generate
121 |             preparation_function (function|str): the function use for preparation
122 |             patients (list): The list of patient IDs to use for dataset. If None then all patients
123 |                 will be considered. Defaults to None.
124 | 
125 |         Raises:
126 |             AttributeError: Raised if preparation_function is not a function or a string defining
127 |               a known preparation function.
128 |         """
129 | 
130 |         if isinstance(preparation_function, str):
131 |             preparation_function = getattr(functions, preparation_function)
132 | 
133 |         if not callable(preparation_function):
134 |             raise AttributeError(
135 |                 "preparation_function must be a function or a str defined in pydicer.dataset"
136 |             )
137 | 
138 |         logger.info(
139 |             "Preparing dataset %s using function: %s",
140 |             dataset_name,
141 |             preparation_function,
142 |         )
143 | 
144 |         # Grab the DataFrame containing all the converted data
145 |         df_converted = read_converted_data(self.working_directory, patients=patients)
146 | 
147 |         # Send to the prepare function which will return a DataFrame of the data objects to use for
148 |         # the dataset
149 |         df_clean_data = preparation_function(df_converted, **kwargs)
150 | 
151 |         self.prepare_from_dataframe(dataset_name, df_clean_data)
152 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # PyDicer: PYthon Dicom Image ConvertER
  2 | 
  3 | [![SoftwareX](https://img.shields.io/badge/SoftwareX-10.1016/j.softx.2024.102010-green.svg)](https://doi.org/10.1016/j.softx.2024.102010)
  4 | 
  5 | Welcome to PyDicer, a tool to ease the process of converting Radiotherapy DICOM data objects into a format typically used for research purposes. In addition to data conversion, functionality is provided to help analyse the data. This includes computing radiomic features, radiotherapy dose metrics and auto-segmentation metrics. PyDicer uses the NIfTI format to store data is a well defined file system structure. Tracking of these data objects in CSV files, also stored on the file system, provides an easy and flexible way to work with the converted data in your research.
  6 | 
  7 | The [PyDicer documentation](https://australiancancerdatanetwork.github.io/pydicer/index.html) provides several examples and guides to help you get started with the tool. Here are a few **PyDicer principles** to keep in mind as you get started:
  8 | 
  9 | - The [working directory structure](https://australiancancerdatanetwork.github.io/pydicer/index.html#directory-structure) is standardised and generalisable for use with any DICOM dataset.
 10 | - Use [Pandas DataFrame's](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html) to work with converted data objects.
 11 | - [SimpleITK](https://simpleitk.readthedocs.io/en/master/) and [PlatiPy](https://pyplati.github.io/platipy/) are used under the hood for the image conversion, visualisation and analysis tasks.
 12 | - Always inspect visualisations, plots and metrics produced by PyDicer in your working directory. Remember, PyDicer is a research tool so only use it for research purposes and expect the unexpected!
 13 | 
 14 | ## Installation
 15 | 
 16 | PyDicer currently supports Python version 3.8, 3.9 and 3.10. Install PyDicer in your Python
 17 | environment using `pip`:
 18 | 
 19 | ```bash
 20 | pip install pydicer
 21 | ```
 22 | 
 23 | ## Supported Modalities
 24 | 
 25 | PyDicer currently supports converting and analysing the following DICOM modalities:
 26 | 
 27 | - CT
 28 | - MR
 29 | - PT (Experimental)
 30 | - RTSTRUCT
 31 | - RTPLAN (Not converted since this only consists of meta data)
 32 | - RTDOSE
 33 | 
 34 | ## Directory Structure
 35 | 
 36 | PyDicer will place converted and intermediate files into a specific directory structure. Visualisation, metrics computed and plots are also stored along side the converted data objects. Within the configured working directory `[working]`, the following directories will be generated:
 37 | 
 38 | - `[working]/data`: Directory in which converted data will be placed
 39 | - `[working]/quarantine`: Files which couldn't be preprocessed or converted will be placed in here for you to investigate further
 40 | - `[working]/.pydicer`: Intermediate files as well as log output will be stored in here
 41 | - `[working]/[dataset_name]`: Clean datasets prepared using the Dataset Preparation Module will be stored in a directory with their name and will symbolically link to converted in the `[working]/data` directory
 42 | 
 43 | ![PyDicer Working Directory structure](assets/pydicer-working-directory-structure.png)
 44 | PyDicer working directory structure. Ref. [Chlap, P. et al. SoftwareX](https://doi.org/10.1016/j.softx.2024.102010)
 45 | 
 46 | ## Pipeline
 47 | 
 48 | The pipeline handles fetching of the DICOM data to conversion and preparation of your research dataset. Here are the key steps of the pipeline:
 49 | 
 50 | 1. **Input**: various classes are provided to fetch DICOM files from the file system, DICOM PACS, TCIA or Orthanc. A TestInput class is also provided to supply test data for development/testing.
 51 | 
 52 | 2. **Preprocess**: The DICOM files are sorted and linked. Error checking is performed and resolved where possible.
 53 | 
 54 | 3. **Conversion**: The DICOM files are converted to the target format (NIfTI).
 55 | 
 56 | 4. **Visualistion**: Visualistions of data converted are prepared to assist with data selection.
 57 | 
 58 | 5. **Dataset Preparation**: The appropriate files from the converted data are selected to prepare a clean dataset ready for use in your research project!
 59 | 
 60 | 6. **Analysis**: Radiomics and Dose Metrics are computed on the converted data.
 61 | 
 62 | ## Getting Started
 63 | 
 64 | Running the pipeline is easy. The following script will get you started:
 65 | 
 66 | ```python
 67 | from pathlib import Path
 68 | 
 69 | from pydicer.input.test import TestInput
 70 | from pydicer import PyDicer
 71 | 
 72 | # Configure working directory
 73 | directory = Path("./testdata")
 74 | directory.mkdir(exist_ok=True, parents=True)
 75 | 
 76 | # Fetch some test DICOM data to convert
 77 | dicom_directory = directory.joinpath("dicom")
 78 | dicom_directory.mkdir(exist_ok=True, parents=True)
 79 | test_input = TestInput(dicom_directory)
 80 | test_input.fetch_data()
 81 | 
 82 | # Create the PyDicer tool object and add the dicom directory as an input location
 83 | pydicer = PyDicer(directory)
 84 | pydicer.add_input(dicom_directory)
 85 | 
 86 | # Run the pipeline
 87 | pydicer.run_pipeline()
 88 | ```
 89 | 
 90 | ## How to Cite
 91 | 
 92 | If you make use of PyDicer within your research work, please consider citing our SoftwareX paper:
 93 | 
 94 | Chlap P, Al Mouiee D, Finnegan RN, et al. PyDicer: An open-source python library for conversion and analysis of radiotherapy DICOM data. *SoftwareX*. 2025;29:102010. [doi:10.1016/j.softx.2024.102010](https://doi.org/10.1016/j.softx.2024.102010)
 95 | 
 96 | ## Contributing
 97 | 
 98 | PyDicer is an open-source tool and contributions are welcome! Here are some ways you might consider contributing to the project:
 99 | 
100 | - Reporting issues on GitHub.
101 | - Correcting/extending the documentation.
102 | - Contributing a bug fix or extending some functionality.
103 | - Providing functionality to support additional DICOM modalities.
104 | - Giving the [PyDicer project](https://github.com/AustralianCancerDataNetwork/pydicer) a star on GitHub.
105 | 
106 | For more information, see the [Contributing documentation](https://australiancancerdatanetwork.github.io/pydicer/contributing.html).
107 | 
108 | ## Authors
109 | 
110 | PyDicer was developed by the [Ingham Medical Physics team](https://www.unsw.edu.au/medicine-health/our-schools/clinical-medicine/research-impact/research-groups/cancer/ingham-medical-physics) in South-Western Sydney. It was developed as part of the [Australian Cancer Data Network](https://australian-cancer-data.network/) supported by the [Australian Research Data Commons](https://ardc.edu.au/).
111 | 
112 | - **Phillip Chlap** - [phillip.chlap@unsw.edu.au](phillip.chlap@unsw.edu.au)
113 | - **Daniel Al Mouiee** - [d.almouiee@gmail.com](d.almouiee@gmail.com)
114 | 


--------------------------------------------------------------------------------
/pydicer/input/orthanc.py:
--------------------------------------------------------------------------------
  1 | from io import BytesIO
  2 | import logging
  3 | from typing import Union
  4 | from pathlib import Path
  5 | 
  6 | import pydicom
  7 | from pyorthanc.deprecated.client import Orthanc
  8 | 
  9 | from pydicer.utils import get_iterator
 10 | from pydicer.input.base import InputBase
 11 | 
 12 | logger = logging.getLogger(__name__)
 13 | 
 14 | 
 15 | def adapt_dataset_from_bytes(blob: bytes) -> pydicom.Dataset:
 16 |     """Convert bytes coming from Orthanc to DICOM dataset
 17 | 
 18 |     Args:
 19 |         blob (bytes): The bytes to convert
 20 | 
 21 |     Returns:
 22 |         pydicom.Dataset: The DICOM dataset
 23 |     """
 24 |     dataset = pydicom.dcmread(BytesIO(blob))
 25 |     return dataset
 26 | 
 27 | 
 28 | class OrthancInput(InputBase):
 29 |     def __init__(
 30 |         self,
 31 |         host: str,
 32 |         port: int,
 33 |         username: str = None,
 34 |         password: str = None,
 35 |         working_directory: Union[str, Path] = None,
 36 |     ):
 37 |         """Class for fetching files from Orthanc.
 38 | 
 39 |         Args:
 40 |             host (str): The IP address or host name of the Orthanc.
 41 |             port (int): The port to use to communicate on.
 42 |             username (str, optional): Orthanc username.
 43 |             password (str, optional): Orthanc password.
 44 |             working_directory (str|pathlib.Path, optional): The working directory in which to
 45 |             store the data fetched. Defaults to a temp directory.
 46 | 
 47 |         Raises:
 48 |             ConnectionError: Raises a connection error if unable to verify the connection to
 49 |                 Orthanc.
 50 |         """
 51 | 
 52 |         super().__init__(working_directory)
 53 | 
 54 |         if not host.startswith("http"):
 55 |             host = f"http://{host}"
 56 | 
 57 |         self.orthanc = Orthanc(f"{host}:{port}")
 58 | 
 59 |         if username is not None and password is not None:
 60 |             self.orthanc.setup_credentials(username, password)
 61 | 
 62 |         # Do a dummy lookup to check that we can reach the Orthanc host, this will throw a
 63 |         # connection error if we can't connect to the Orthanc
 64 |         self.orthanc.c_find({"Level": "Patient", "Query": {"PatientID": "XXX"}})
 65 | 
 66 |     def fetch_data(
 67 |         self, patients: Union[list, str], modalities: Union[list, str] = None
 68 |     ):
 69 |         """Download the DICOM data from Orthanc
 70 | 
 71 |         Args:
 72 |             patients (list|str): A list of patient IDs, or a single patient ID.
 73 |             modalities (list|str, optional): List of modalities or a single modality to fetch.
 74 |                 Defaults to None where all modalities would be fetched.
 75 |         """
 76 | 
 77 |         if not isinstance(patients, list) and not isinstance(patients, tuple):
 78 |             patients = [patients]
 79 | 
 80 |         if (
 81 |             modalities is not None
 82 |             and not isinstance(modalities, list)
 83 |             and not isinstance(modalities, tuple)
 84 |         ):
 85 |             modalities = [modalities]
 86 | 
 87 |         for patient in get_iterator(patients, unit="patients", name="Orthanc Fetch"):
 88 |             # Find the Orthanc ID for this patient
 89 |             orthanc_patient_ids = self.orthanc.c_find(
 90 |                 {"Level": "Patient", "Query": {"PatientID": patient}}
 91 |             )
 92 | 
 93 |             if len(orthanc_patient_ids) == 0:
 94 |                 logger.warning("Patient not found in Orthanc: %s", patient)
 95 |                 continue
 96 | 
 97 |             if len(orthanc_patient_ids) > 1:
 98 |                 logger.warning(
 99 |                     "Patient returned multple Orthanc IDs: %s. Selecting first only",
100 |                     patient,
101 |                 )
102 | 
103 |             orthanc_patient_id = orthanc_patient_ids[0]
104 | 
105 |             patient_information = self.orthanc.get_patient_information(
106 |                 orthanc_patient_id
107 |             )
108 |             patient_id = patient_information["MainDicomTags"]["PatientID"]
109 | 
110 |             # Loop over each study for this patient
111 |             study_identifiers = patient_information["Studies"]
112 |             for study_identifier in study_identifiers:
113 |                 # Loop over each series in this study
114 |                 study_information = self.orthanc.get_study_information(study_identifier)
115 |                 series_identifiers = study_information["Series"]
116 |                 for series_identifier in series_identifiers:
117 |                     series_information = self.orthanc.get_series_information(
118 |                         series_identifier
119 |                     )
120 | 
121 |                     # Skip if this isn't one of the modalities we want
122 |                     modality = series_information["MainDicomTags"]["Modality"]
123 |                     if modalities is not None and not modality in modalities:
124 |                         continue
125 | 
126 |                     series_information = self.orthanc.get_series_information(
127 |                         series_identifier
128 |                     )
129 |                     series_instance_uid = series_information["MainDicomTags"][
130 |                         "SeriesInstanceUID"
131 |                     ]
132 | 
133 |                     # Create the output directory for this series
134 |                     series_path = self.working_directory.joinpath(
135 |                         patient_id, series_instance_uid
136 |                     )
137 |                     series_path.mkdir(exist_ok=True, parents=True)
138 | 
139 |                     # Loop over each instance in this series
140 |                     instance_identifiers = series_information["Instances"]
141 |                     for instance_identifier in instance_identifiers:
142 |                         instance_information = self.orthanc.get_instance_information(
143 |                             instance_identifier
144 |                         )
145 | 
146 |                         # Download the DICOM instance
147 |                         f = self.orthanc.get_instance_file(instance_identifier)
148 |                         ds = adapt_dataset_from_bytes(f)
149 | 
150 |                         sop_instance_uid = instance_information["MainDicomTags"][
151 |                             "SOPInstanceUID"
152 |                         ]
153 |                         ds_file_name = f"{modality}.{sop_instance_uid}.dcm"
154 |                         ds_path = series_path.joinpath(ds_file_name)
155 | 
156 |                         # Save the DICOM dataset
157 |                         ds.save_as(ds_path)
158 |                         logger.debug("Saving DICOM dataset to %s", ds_path)
159 | 


--------------------------------------------------------------------------------
/pydicer/dataset/structureset.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import logging
  3 | from pathlib import Path
  4 | 
  5 | import SimpleITK as sitk
  6 | import pandas as pd
  7 | 
  8 | from pydicer.constants import DEFAULT_MAPPING_ID
  9 | 
 10 | logger = logging.getLogger(__name__)
 11 | 
 12 | 
 13 | def get_mapping_for_structure_set(
 14 |     structure_set_row: pd.Series, mapping_id: str
 15 | ) -> dict:
 16 |     """Searches the folder hierarchy to find a structure name mapping file with the given ID.
 17 | 
 18 |     Args:
 19 |         structure_set_row (pd.Series): The converted dataframe row entry for the structure set.
 20 |         mapping_id (str): The ID of the mapping to find.
 21 | 
 22 |     Returns:
 23 |         dict: The structure name mapping
 24 |     """
 25 |     structure_set_path = Path(structure_set_row.path)
 26 | 
 27 |     potential_mapping_paths = [
 28 |         # First look in the structure_set_path folder for the structure mapping
 29 |         structure_set_path.joinpath(".structure_set_mappings"),
 30 |         # Next look in the patient folder
 31 |         structure_set_path.parent.joinpath(".structure_set_mappings"),
 32 |         # Finally look for the project wide mapping
 33 |         structure_set_path.parent.parent.parent.parent.joinpath(
 34 |             ".pydicer", ".structure_set_mappings"
 35 |         ),
 36 |     ]
 37 | 
 38 |     for mapping_path in potential_mapping_paths:
 39 |         mapping_file = mapping_path.joinpath(f"{mapping_id}.json")
 40 |         if mapping_file.exists():
 41 |             logger.debug("Using mapping file in %s", mapping_file)
 42 |             with open(mapping_file, encoding="utf-8") as json_file:
 43 |                 return json.load(json_file)
 44 | 
 45 |     return None
 46 | 
 47 | 
 48 | class StructureSet(dict):
 49 |     def __init__(self, structure_set_row, mapping_id=DEFAULT_MAPPING_ID):
 50 |         if not structure_set_row.modality == "RTSTRUCT":
 51 |             raise AttributeError("structure_set_row modality must be RTSTRUCT")
 52 | 
 53 |         self.structure_set_path = Path(structure_set_row.path)
 54 |         self.structure_set_id = structure_set_row.hashed_uid
 55 | 
 56 |         self.structure_names = [
 57 |             s.name.replace(".nii.gz", "")
 58 |             for s in self.structure_set_path.glob("*.nii.gz")
 59 |         ]
 60 |         self.unmapped_structure_names = self.structure_names
 61 | 
 62 |         self.structure_mapping = None
 63 | 
 64 |         # Check if we can find a mapping for this structure set, if not we'll just used the
 65 |         # unmapped structure names
 66 |         if mapping_id is not None:
 67 |             self.structure_mapping = get_mapping_for_structure_set(
 68 |                 structure_set_row, mapping_id
 69 |             )
 70 | 
 71 |             if self.structure_mapping is None:
 72 |                 logger.warning("No mapping file found with id %s", mapping_id)
 73 | 
 74 |         if self.structure_mapping is not None:
 75 |             self.structure_names = list(self.structure_mapping.keys())
 76 | 
 77 |         self.cache = {}
 78 | 
 79 |     def get_mapped_structure_name(self, item: str) -> str:
 80 |         """Get the structure set specific name for a structure that may have been mapped.
 81 | 
 82 |         Args:
 83 |             item (str): The standardised name to look up.
 84 | 
 85 |         Returns:
 86 |             str: The structure set specific name if it could be mapped (returns the original name
 87 |               otherwise).
 88 |         """
 89 |         structure_name = item
 90 | 
 91 |         if self.structure_mapping is not None:
 92 |             if item in self.structure_mapping:
 93 |                 for variation in self.structure_mapping[item]:
 94 |                     variation_path = self.structure_set_path.joinpath(
 95 |                         f"{variation}.nii.gz"
 96 |                     )
 97 |                     if variation_path.exists():
 98 |                         # Found variation, let's use that file...
 99 |                         # TODO an issue would occur if there were multiple files that would match
100 |                         # this mapping. In that case we should probably throw an error (or at
101 |                         # a warning?).
102 |                         structure_name = variation
103 | 
104 |         return structure_name
105 | 
106 |     def get_standardised_structure_name(self, item: str) -> str:
107 |         """Get the standardised name for a structure that is present in this structure set.
108 | 
109 |         Args:
110 |             item (str): The name of the structure in this structure set.
111 | 
112 |         Returns:
113 |             str: The standardised name if it could be mapped (returns the original name
114 |               otherwise).
115 |         """
116 | 
117 |         structure_name = item
118 | 
119 |         if self.structure_mapping is not None:
120 |             for standardised_name in self.structure_mapping:
121 |                 for variation in self.structure_mapping[standardised_name]:
122 |                     if variation == item:
123 |                         return standardised_name
124 | 
125 |         return structure_name
126 | 
127 |     def __getitem__(self, item):
128 |         structure_name = self.get_mapped_structure_name(item)
129 | 
130 |         if item not in self.structure_names:
131 |             raise KeyError(
132 |                 f"Structure name {item} not found in structure set {self.structure_set_id}."
133 |             )
134 | 
135 |         if item in self.cache:
136 |             return self.cache[item]
137 | 
138 |         structure_path = self.structure_set_path.joinpath(f"{structure_name}.nii.gz")
139 | 
140 |         if not structure_path.exists():
141 |             raise FileExistsError(
142 |                 f"No structure file found for {structure_name} in structure "
143 |                 f"set {self.structure_set_id}"
144 |             )
145 | 
146 |         result = sitk.ReadImage(str(structure_path))
147 | 
148 |         self.cache[item] = result
149 |         return result
150 | 
151 |     def keys(self):
152 |         return self.structure_names
153 | 
154 |     def values(self):
155 |         return [self[s] for s in self.structure_names]
156 | 
157 |     def items(self):
158 |         return [(s, self[s]) for s in self.structure_names]
159 | 
160 |     def get_unmapped_structures(self) -> list:
161 |         """Get a list of structures for which no structure was found based on the mapping. If no
162 |         mapping is being used this will always be empty.
163 | 
164 |         Returns:
165 |             list: Names of structures that can't be found using a mapping
166 |         """
167 |         missing_mappings = []
168 |         for k in self.keys():
169 |             structure_name = self.get_mapped_structure_name(k)
170 |             structure_path = self.structure_set_path.joinpath(
171 |                 f"{structure_name}.nii.gz"
172 |             )
173 |             if not structure_path.exists():
174 |                 missing_mappings.append(k)
175 | 
176 |         return missing_mappings
177 | 


--------------------------------------------------------------------------------
/pydicer/config.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import json
  3 | 
  4 | from pathlib import Path
  5 | 
  6 | from pydicer.constants import PYDICER_DIR_NAME
  7 | 
  8 | logger = logging.getLogger(__name__)
  9 | 
 10 | PYDICER_CONFIG = {
 11 |     "verbosity": {
 12 |         "module": "general",
 13 |         "description": "Level of output for standard out. Value indicates the Python built-in log "
 14 |         "level. A value of 0 (not set) will display the process bar. Logs of all levels are "
 15 |         "available in the .pydicer directory.",
 16 |         "type": int,
 17 |         "default": 0,
 18 |         "choices": [
 19 |             logging.NOTSET,
 20 |             logging.DEBUG,
 21 |             logging.INFO,
 22 |             logging.WARNING,
 23 |             logging.ERROR,
 24 |         ],
 25 |     },
 26 |     "for_fallback_linkage": {
 27 |         "module": "general",
 28 |         "description": "Determine whether to fallback on linking objects via their Frame of "
 29 |         "Reference if no more stable link exists.",
 30 |         "type": bool,
 31 |         "default": True,
 32 |         "choices": None,
 33 |     },
 34 |     "enforce_dcm_ext": {
 35 |         "module": "preprocess",
 36 |         "description": "If True only files with the .dcm or .DCM extension will be preprocessed. "
 37 |         "otherwise any file in the DICOM directory will be preprocessed.",
 38 |         "type": bool,
 39 |         "default": True,
 40 |         "choices": None,
 41 |     },
 42 |     "interp_missing_slices": {
 43 |         "module": "convert",
 44 |         "description": "When missing slices are detected these will be interpolated if True. "
 45 |         "otherwise these cases will be sent to quarantine.",
 46 |         "type": bool,
 47 |         "default": True,
 48 |         "choices": None,
 49 |     },
 50 |     "ignore_duplicate_slices": {
 51 |         "module": "convert",
 52 |         "description": "If two slices at the same location with different pixel data are found "
 53 |         "then the first slice is used if ignore_duplicate_slices is True. Otherwise an error is"
 54 |         "raised and these images are sent to quarantine",
 55 |         "type": bool,
 56 |         "default": False,
 57 |         "choices": None,
 58 |     },
 59 |     "default_patient_weight": {
 60 |         "module": "convert",
 61 |         "description": "Default patient weight to use for PET conversion if it cannot be "
 62 |         "determined from the DICOM headers. If None, those cases will be sent to "
 63 |         "quarantine.",
 64 |         "type": float,
 65 |         "default": None,
 66 |         "choices": None,
 67 |     },
 68 |     "generate_nrrd": {
 69 |         "module": "convert",
 70 |         "description": "Whether or not to generate an additional NRRD file when converting "
 71 |         "RTSTRUCT. This allows loading easily into 3D slicer.",
 72 |         "type": bool,
 73 |         "default": True,
 74 |         "choices": None,
 75 |     },
 76 |     "nrrd_colormap": {
 77 |         "module": "convert",
 78 |         "description": "Matplotlib colormap to use when saving NRRD file of structures.",
 79 |         "type": str,
 80 |         "default": "rainbow",
 81 |         "choices": None,
 82 |     },
 83 | }
 84 | 
 85 | 
 86 | class PyDicerConfig:
 87 |     class __PyDicerConfig:  # pylint: disable=invalid-name
 88 |         def __init__(self, working_dir=None):
 89 |             if working_dir is None:
 90 |                 raise ValueError("working_dir must be set on config init")
 91 |             self.working_dir = Path(working_dir)
 92 | 
 93 |             pydicer_dir = self.working_dir.joinpath(PYDICER_DIR_NAME)
 94 |             self.config_path = pydicer_dir.joinpath("config.json")
 95 | 
 96 |             self.pydicer_config = {}
 97 | 
 98 |             if self.config_path.exists():
 99 |                 # Read existing config if exists
100 |                 with open(self.config_path, "r", encoding="utf-8") as cp:
101 |                     self.pydicer_config = json.load(cp)
102 | 
103 |             # Add config items from config object.
104 |             # Like this if new items are added in future versions of pydicer, new config items
105 |             # will be added in
106 |             for key, item in PYDICER_CONFIG.items():
107 |                 if not key in self.pydicer_config:
108 |                     self.pydicer_config[key] = item["default"]
109 | 
110 |     instance = None
111 | 
112 |     def __init__(self, working_dir=None):
113 |         """Return the singleton instance of PyDicerConfig
114 | 
115 |         Args:
116 |             working_dir (str|pathlib.Path, optional): The working directory for project. Required
117 |             on first initialisation. Defaults to None.
118 |         """
119 | 
120 |         if working_dir is not None and PyDicerConfig.instance is not None:
121 |             # If we already have a config instance, but the working directory has changed, we will
122 |             # recreate the instance with the new working directory.
123 |             if not working_dir == PyDicerConfig.instance.working_dir:
124 |                 PyDicerConfig.instance = PyDicerConfig.__PyDicerConfig(working_dir)
125 |         elif PyDicerConfig.instance is None:
126 |             PyDicerConfig.instance = PyDicerConfig.__PyDicerConfig(working_dir)
127 | 
128 |     def get_working_dir(self):
129 |         """Get the working directory configured for the project.
130 | 
131 |         Returns:
132 |             pathlib.Path: The working directory
133 |         """
134 |         return self.instance.working_dir
135 | 
136 |     def get_config(self, name: str) -> object:
137 |         """Get the value of the config item with the specified name
138 | 
139 |         Args:
140 |             name (str): Config item name
141 | 
142 |         Raises:
143 |             AttributeError: Config value with name doesn't exist
144 | 
145 |         Returns:
146 |             object: Value of the config with the given name
147 |         """
148 | 
149 |         if not name in self.instance.pydicer_config:
150 |             raise AttributeError(f"{name} does not exist in config")
151 | 
152 |         return self.instance.pydicer_config[name]
153 | 
154 |     def set_config(self, name: str, value: object):
155 |         """Set the value for the config with the given name
156 | 
157 |         Args:
158 |             name (str): The name of the config to set
159 |             value (object): The value of the config
160 | 
161 |         Raises:
162 |             AttributeError: Config value with name doesn't exist
163 |             ValueError: Config value is of the wrong type
164 |         """
165 | 
166 |         if not name in self.instance.pydicer_config:
167 |             raise AttributeError(f"{name} does not exist in config")
168 | 
169 |         if not isinstance(value, PYDICER_CONFIG[name]["type"]) and not value is None:
170 |             raise ValueError(
171 |                 f"Config {name} must be of type "
172 |                 f"{type(self.instance.pydicer_config[name])}"
173 |             )
174 | 
175 |         self.instance.pydicer_config[name] = value
176 |         self.save_config()
177 | 
178 |     def save_config(self):
179 |         """Save the config to the pydicer directory"""
180 | 
181 |         if not self.instance.config_path.parent.exists():
182 |             self.instance.config_path.parent.mkdir()
183 | 
184 |         with open(self.instance.config_path, "w", encoding="utf-8") as fp:
185 |             json.dump(self.instance.pydicer_config, fp, indent=2)
186 | 


--------------------------------------------------------------------------------
/pydicer/generate/models.py:
--------------------------------------------------------------------------------
  1 | import tempfile
  2 | import logging
  3 | from pathlib import Path
  4 | 
  5 | import SimpleITK as sitk
  6 | 
  7 | logger = logging.getLogger(__name__)
  8 | 
  9 | 
 10 | def load_output_nifti(output_dir: Path) -> dict:
 11 |     """Loads segmentation masks saved as Nifti's in an output directory into a dictionary for use
 12 |     in PyDicer.
 13 | 
 14 |     Args:
 15 |         output_dir (Path): The output directory of a segmentation model.
 16 | 
 17 |     Returns:
 18 |         dict: Dictionary of segmentation masks with the structure name as key and sitk.Image mask
 19 |             as value.
 20 |     """
 21 |     results = {}
 22 | 
 23 |     # Load the output masks into a dict to return
 24 |     for mask_file in output_dir.glob("*.nii.gz"):
 25 |         mask = sitk.ReadImage(str(mask_file))
 26 | 
 27 |         structure_name = mask_file.name.replace(".nii.gz", "")
 28 | 
 29 |         # Check if the mask is empty, total segmentator stores empty mask files for structures
 30 |         # that aren't within FOV
 31 |         if sitk.GetArrayFromImage(mask).sum() == 0:
 32 |             logger.debug("Segmentation mask for %s is empty, skipping...", structure_name)
 33 |             continue
 34 | 
 35 |         logger.debug("Loading segmentation mask for %s", structure_name)
 36 |         results[structure_name] = mask
 37 | 
 38 |     return results
 39 | 
 40 | 
 41 | def run_total_segmentator(input_image: sitk.Image) -> dict:
 42 |     """Run Total Segmentator on a given input image. Ensure the Total Segmentator is installed:
 43 | 
 44 |     ```
 45 |     pip install TotalSegmentator
 46 |     ```
 47 | 
 48 |     See https://github.com/wasserth/TotalSegmentator for more information.
 49 | 
 50 |     Args:
 51 |         input_image (sitk.Image): Input image (should be CT) to segment.
 52 | 
 53 |     Returns:
 54 |         dict: Dictionary of segmentations with structure name as key and sitk.Image mask as value.
 55 |     """
 56 | 
 57 |     # Import within function since this is an optional dependency
 58 |     # pylint: disable=import-outside-toplevel
 59 |     from totalsegmentator.python_api import totalsegmentator
 60 | 
 61 |     results = {}
 62 | 
 63 |     with tempfile.TemporaryDirectory() as working_dir:
 64 |         logger.debug("Running TotalSegmentator in temporary directory: %s", working_dir)
 65 | 
 66 |         working_dir = Path(working_dir)
 67 | 
 68 |         # Save the temporary image file for total segmentator to find
 69 |         input_dir = working_dir.joinpath("input")
 70 |         input_dir.mkdir()
 71 |         input_file = input_dir.joinpath("img.nii.gz")
 72 |         sitk.WriteImage(input_image, str(input_file))
 73 | 
 74 |         # Prepare a temporary folder for total segmentator to store the output
 75 |         output_dir = working_dir.joinpath("output")
 76 |         output_dir.mkdir()
 77 | 
 78 |         # Run total segmentator
 79 |         totalsegmentator(input_file, output_dir)
 80 | 
 81 |         # Load the output masks into a dict to return
 82 |         results = load_output_nifti(output_dir)
 83 | 
 84 |     logger.debug("TotalSegmentator complete")
 85 | 
 86 |     return results
 87 | 
 88 | 
 89 | def get_available_mhub_models() -> dict:
 90 |     """Determine which mHub models have been configured for use in PyDicer.
 91 | 
 92 |     Returns:
 93 |         dict: A dictionary with mhub model id as key and the path to the config file as value.
 94 |     """
 95 | 
 96 |     available_models = {}
 97 |     model_config_directory = Path(__file__).parent.joinpath("mhubconfigs")
 98 |     logger.debug("Loading mHub model configs from %s", model_config_directory)
 99 |     for model_config in model_config_directory.glob("*.yml"):
100 |         available_models[model_config.name.replace(".yml", "")] = model_config.absolute()
101 | 
102 |     logger.debug("Found available configs: %s", available_models)
103 |     return available_models
104 | 
105 | 
106 | def run_mhub_model(
107 |     input_image: sitk.Image,
108 |     mhub_model: str,
109 |     mhub_config_file: Path = None,
110 |     gpu: bool = True,
111 | ) -> dict:
112 |     """Use Docker to run a model made available through mHub: https://mhub.ai/
113 | 
114 |     Args:
115 |         input_image (sitk.Image): The SimpleITK image to segment.
116 |         mhub_model (str): The name of the model to run. Must be configured
117 |             (check `get_available_mhub_models`) or a custom mhub_config_file should be provided.
118 |         mhub_config_file (Path, optional): Path to a custom config file to use. Defaults to None.
119 |         gpu (bool, optional): If True, all gpus will be requested when running the Docker image.
120 |             Defaults to True.
121 | 
122 |     Raises:
123 |         ImportError: Raised if the Python Docker SDK is not installed.
124 |         ValueError: Raised if an mHub model which has not been configured for use in PyDicer is
125 |             requested. Use the `get_available_mhub_models` function to determine available models.
126 | 
127 |     Returns:
128 |         dict: Dictionary of segmentations with structure name as key and sitk.Image mask as value.
129 |     """
130 | 
131 |     try:
132 |         # pylint: disable=import-outside-toplevel
133 |         import docker
134 |     except ImportError as ie:
135 |         raise ImportError(
136 |             "Docker Python package is required to run mHub models. Install with: "
137 |             "pip install docker"
138 |         ) from ie
139 | 
140 |     client = docker.from_env()
141 | 
142 |     mhub_image = f"mhubai/{mhub_model}"
143 | 
144 |     # Try pulling the image
145 |     try:
146 |         client.images.pull(mhub_image)
147 |     except docker.errors.ImageNotFound as inf:
148 |         raise docker.errors.ImageNotFound(
149 |             f"The mhub image {mhub_image} could not be pulled. "
150 |             "Check if this model is available using the get_available_mhub_models function."
151 |         ) from inf
152 | 
153 |     if mhub_config_file is None:
154 |         available_mhub_models = get_available_mhub_models()
155 | 
156 |         if not mhub_model in available_mhub_models:
157 |             raise ValueError(f"mHub model {mhub_model} not configured for use in PyDicer.")
158 | 
159 |         mhub_config_file = available_mhub_models[mhub_model]
160 | 
161 |     with tempfile.TemporaryDirectory() as working_dir:
162 |         logger.info("Running mHub model %s in temporary %s", mhub_model, working_dir)
163 |         working_dir = Path(working_dir)
164 |         input_dir = working_dir.joinpath("input")
165 |         input_dir.mkdir()
166 |         input_file = input_dir.joinpath("image.nii.gz")
167 |         sitk.WriteImage(input_image, str(input_file))
168 | 
169 |         output_dir = working_dir.joinpath("output")
170 |         output_dir.mkdir()
171 | 
172 |         device_requests = []
173 |         if gpu:
174 |             # Request all GPUs
175 |             device_requests = [docker.types.DeviceRequest(count=-1, capabilities=[["gpu"]])]
176 | 
177 |         volumes = {
178 |             input_dir.absolute(): {"bind": "/app/data/input_data", "mode": "rw"},
179 |             output_dir.absolute(): {"bind": "/app/data/output_data", "mode": "rw"},
180 |             mhub_config_file: {"bind": "/app/data/config.yml", "mode": "rw"},
181 |         }
182 | 
183 |         client.containers.run(
184 |             mhub_image,
185 |             command="--config /app/data/config.yml",
186 |             remove=True,
187 |             volumes=volumes,
188 |             device_requests=device_requests,
189 |         )
190 | 
191 |         # Load the output masks into a dict to return
192 |         results = load_output_nifti(output_dir)
193 | 
194 |     logger.debug("mHub segmentation complete")
195 | 
196 |     return results
197 | 


--------------------------------------------------------------------------------
/examples/Configuration.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "attachments": {},
  5 |    "cell_type": "markdown",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "# Configuration\n",
  9 |     "\n",
 10 |     "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/AustralianCancerDataNetwork/pydicer/blob/main/examples/Configuration.ipynb)\n",
 11 |     "\n",
 12 |     "PyDicer provides various options which you may configure to change the behaviour of the tool."
 13 |    ]
 14 |   },
 15 |   {
 16 |    "cell_type": "code",
 17 |    "execution_count": null,
 18 |    "metadata": {},
 19 |    "outputs": [],
 20 |    "source": [
 21 |     "try:\n",
 22 |     "    from pydicer import PyDicer\n",
 23 |     "except ImportError:\n",
 24 |     "    !pip install pydicer\n",
 25 |     "    from pydicer import PyDicer\n",
 26 |     "\n",
 27 |     "import logging\n",
 28 |     "\n",
 29 |     "from pydicer.utils import fetch_converted_test_data\n",
 30 |     "\n",
 31 |     "working_directory = fetch_converted_test_data(\"./testdata_lctsc\", dataset=\"LCTSC\")\n",
 32 |     "\n",
 33 |     "pydicer = PyDicer(working_directory)"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "markdown",
 38 |    "metadata": {},
 39 |    "source": [
 40 |     "## Getting and Setting Options\n",
 41 |     "\n",
 42 |     "Use the `get_config` and `set_config` functions of the [config module](https://australiancancerdatanetwork.github.io/pydicer/config.html) to get and set configuration options respectively."
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "code",
 47 |    "execution_count": null,
 48 |    "metadata": {},
 49 |    "outputs": [],
 50 |    "source": [
 51 |     "logging_verbosity = pydicer.config.get_config(\"verbosity\")\n",
 52 |     "print(f\"Current logging verbosity: {logging_verbosity}\")\n",
 53 |     "\n",
 54 |     "# Set to logging level DEBUG\n",
 55 |     "pydicer.config.set_config(\"verbosity\", logging.DEBUG)\n",
 56 |     "\n",
 57 |     "logging_verbosity = pydicer.config.get_config(\"verbosity\")\n",
 58 |     "print(f\"New logging verbosity: {logging_verbosity}\")"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "markdown",
 63 |    "metadata": {},
 64 |    "source": [
 65 |     "## Options Available\n",
 66 |     "\n",
 67 |     "### Logging Verbosity\n",
 68 |     "\n",
 69 |     "Level of output for standard out. Value indicates the [Python built-in log level](https://docs.python.org/3/library/logging.html#logging-levels). A value of 0\n",
 70 |     "(not set) will display the process bar. Logs of all levels are available in the .pydicer directory.\n",
 71 |     "\n",
 72 |     "Valid options are: `[logging.NOTSET, logging.DEBUG, logging.INFO, logging.WARNING, logging.ERROR]`"
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "code",
 77 |    "execution_count": null,
 78 |    "metadata": {},
 79 |    "outputs": [],
 80 |    "source": [
 81 |     "pydicer.config.set_config(\"verbosity\", logging.DEBUG)"
 82 |    ]
 83 |   },
 84 |   {
 85 |    "cell_type": "markdown",
 86 |    "metadata": {},
 87 |    "source": [
 88 |     "### Frame of Reference Fallback Linkage\n",
 89 |     "\n",
 90 |     "Determine whether to fallback on linking objects via their [Frame of Reference UID](https://dicom.innolitics.com/ciods/ct-image/frame-of-reference/00200052) if no more stable\n",
 91 |     "link exists.\n",
 92 |     "\n",
 93 |     "Valid options are: `True` or `False`"
 94 |    ]
 95 |   },
 96 |   {
 97 |    "cell_type": "code",
 98 |    "execution_count": null,
 99 |    "metadata": {},
100 |    "outputs": [],
101 |    "source": [
102 |     "pydicer.config.set_config(\"for_fallback_linkage\", True)"
103 |    ]
104 |   },
105 |   {
106 |    "cell_type": "markdown",
107 |    "metadata": {},
108 |    "source": [
109 |     "### Enforce `.dcm` file extension\n",
110 |     "\n",
111 |     "If True only files with the .dcm or .DCM extension will be preprocessed. Otherwise any file in the\n",
112 |     "DICOM directory will be preprocessed.\n",
113 |     "\n",
114 |     "Valid options are: `True` or `False`"
115 |    ]
116 |   },
117 |   {
118 |    "cell_type": "code",
119 |    "execution_count": null,
120 |    "metadata": {},
121 |    "outputs": [],
122 |    "source": [
123 |     "pydicer.config.set_config(\"enforce_dcm_ext\", True)"
124 |    ]
125 |   },
126 |   {
127 |    "cell_type": "markdown",
128 |    "metadata": {},
129 |    "source": [
130 |     "### Interpolate Missing Slices\n",
131 |     "\n",
132 |     "When missing slices are detected these will be interpolated if True. Otherwise these cases will be\n",
133 |     "sent to quarantine.\n",
134 |     "\n",
135 |     "Valid options are: `True` or `False`"
136 |    ]
137 |   },
138 |   {
139 |    "cell_type": "code",
140 |    "execution_count": null,
141 |    "metadata": {},
142 |    "outputs": [],
143 |    "source": [
144 |     "pydicer.config.set_config(\"interp_missing_slices\", True)"
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "markdown",
149 |    "metadata": {},
150 |    "source": [
151 |     "### Ignore Duplicate Slices\n",
152 |     "\n",
153 |     "If two slices at the same location with different pixel data are found then the first slice is used\n",
154 |     "if ignore_duplicate_slices is True. Otherwise an error is raised and these images are sent to\n",
155 |     "quarantine.\n",
156 |     "\n",
157 |     "Valid options are: `True` or `False`"
158 |    ]
159 |   },
160 |   {
161 |    "cell_type": "code",
162 |    "execution_count": null,
163 |    "metadata": {},
164 |    "outputs": [],
165 |    "source": [
166 |     "pydicer.config.set_config(\"ignore_duplicate_slices\", False)"
167 |    ]
168 |   },
169 |   {
170 |    "cell_type": "markdown",
171 |    "metadata": {},
172 |    "source": [
173 |     "### Generate NRRD\n",
174 |     "\n",
175 |     "Whether or not to generate an additional NRRD file when converting RTSTRUCT. This allows loading\n",
176 |     "easily into [3D slicer](https://www.slicer.org/), but it takes up more disk space and takes time to generate the file.\n",
177 |     "\n",
178 |     "Valid options are: `True` or `False`"
179 |    ]
180 |   },
181 |   {
182 |    "cell_type": "code",
183 |    "execution_count": null,
184 |    "metadata": {},
185 |    "outputs": [],
186 |    "source": [
187 |     "pydicer.config.set_config(\"generate_nrrd\", False)"
188 |    ]
189 |   },
190 |   {
191 |    "cell_type": "markdown",
192 |    "metadata": {},
193 |    "source": [
194 |     "### NRRD Colormap\n",
195 |     "\n",
196 |     "If NRRD files are to be generated, this defines the Matplotlib colormap to use when saving NRRD\n",
197 |     "file of structures.\n",
198 |     "\n",
199 |     "Valid options are any [Matplotlib colormap](https://matplotlib.org/stable/users/explain/colors/colormaps.html)."
200 |    ]
201 |   },
202 |   {
203 |    "cell_type": "code",
204 |    "execution_count": null,
205 |    "metadata": {},
206 |    "outputs": [],
207 |    "source": [
208 |     "pydicer.config.set_config(\"nrrd_colormap\", \"rainbow\")"
209 |    ]
210 |   },
211 |   {
212 |    "cell_type": "code",
213 |    "execution_count": null,
214 |    "metadata": {},
215 |    "outputs": [],
216 |    "source": []
217 |   }
218 |  ],
219 |  "metadata": {
220 |   "interpreter": {
221 |    "hash": "814af119db7f8f2860617be3dcd1d37c560587d11c65bd58c45b1679d3ee6ea4"
222 |   },
223 |   "kernelspec": {
224 |    "display_name": "Python 3.8.0 64-bit ('pydicer': pyenv)",
225 |    "language": "python",
226 |    "name": "python3"
227 |   },
228 |   "language_info": {
229 |    "codemirror_mode": {
230 |     "name": "ipython",
231 |     "version": 3
232 |    },
233 |    "file_extension": ".py",
234 |    "mimetype": "text/x-python",
235 |    "name": "python",
236 |    "nbconvert_exporter": "python",
237 |    "pygments_lexer": "ipython3",
238 |    "version": "3.9.16"
239 |   },
240 |   "orig_nbformat": 4
241 |  },
242 |  "nbformat": 4,
243 |  "nbformat_minor": 2
244 | }
245 | 


--------------------------------------------------------------------------------
/examples/WorkingWithData.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "attachments": {},
  5 |    "cell_type": "markdown",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "# Working with Data\n",
  9 |     "\n",
 10 |     "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/AustralianCancerDataNetwork/pydicer/blob/main/examples/WorkingWithData.ipynb)\n",
 11 |     "\n",
 12 |     "Here we present some useful tips & tricks which to help working with data which has been converted\n",
 13 |     "using PyDicer. As you will see, working with data in PyDicer is heavily oriented around DataFrames\n",
 14 |     "provided by the Pandas library. If you aren't familiar with Pandas, we recommend working through \n",
 15 |     "the [Pandas Getting Started Tutorials](https://pandas.pydata.org/docs/getting_started/index.html)."
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "code",
 20 |    "execution_count": null,
 21 |    "metadata": {},
 22 |    "outputs": [],
 23 |    "source": [
 24 |     "try:\n",
 25 |     "    from pydicer import PyDicer\n",
 26 |     "except ImportError:\n",
 27 |     "    !pip install pydicer\n",
 28 |     "    from pydicer import PyDicer\n",
 29 |     "\n",
 30 |     "from pathlib import Path\n",
 31 |     "\n",
 32 |     "from pydicer.utils import (\n",
 33 |     "    fetch_converted_test_data,\n",
 34 |     "    load_object_metadata,\n",
 35 |     "    determine_dcm_datetime,\n",
 36 |     "    read_simple_itk_image\n",
 37 |     ")"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "markdown",
 42 |    "metadata": {},
 43 |    "source": [
 44 |     "## Setup PyDicer\n",
 45 |     "\n",
 46 |     "Here we load the LCTSC data which has already been converted. This is downloaded into the\n",
 47 |     "`testdata_lctsc` directory. We also initialise a `PyDicer` object."
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "code",
 52 |    "execution_count": null,
 53 |    "metadata": {},
 54 |    "outputs": [],
 55 |    "source": [
 56 |     "working_directory = fetch_converted_test_data(\"./testdata_lctsc\", dataset=\"LCTSC\")\n",
 57 |     "\n",
 58 |     "pydicer = PyDicer(working_directory)"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "markdown",
 63 |    "metadata": {},
 64 |    "source": [
 65 |     "## Read Converted Data\n",
 66 |     "\n",
 67 |     "To obtain a DataFrame of the converted data, use the [read_converted_data](https://australiancancerdatanetwork.github.io/pydicer/utils.html#pydicer.utils.read_converted_data) function."
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "code",
 72 |    "execution_count": null,
 73 |    "metadata": {},
 74 |    "outputs": [],
 75 |    "source": [
 76 |     "df = pydicer.read_converted_data()\n",
 77 |     "df"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "markdown",
 82 |    "metadata": {},
 83 |    "source": [
 84 |     "## Iterating Over Objects\n",
 85 |     "\n",
 86 |     "If you want to perform some operation on (for example) all images in your dataset, you can iterate\n",
 87 |     "over each image row like this. Within each loop we load each image as a `SimpleITK` image (just\n",
 88 |     "for demonstration purposes).)"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "code",
 93 |    "execution_count": null,
 94 |    "metadata": {},
 95 |    "outputs": [],
 96 |    "source": [
 97 |     "for idx, ct_row in df[df.modality==\"CT\"].iterrows():\n",
 98 |     "\n",
 99 |     "    print(f\"Loading image with hashed UID: {ct_row.hashed_uid}...\", end=\"\")\n",
100 |     "\n",
101 |     "    img = read_simple_itk_image(ct_row)\n",
102 |     "\n",
103 |     "    print(\" Complete\")"
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "markdown",
108 |    "metadata": {},
109 |    "source": [
110 |     "## Loading Object Metadata\n",
111 |     "\n",
112 |     "The metadata from the DICOM headers is stored by PyDicer and can be easily loaded using the\n",
113 |     "[load_object_metadata](https://australiancancerdatanetwork.github.io/pydicer/utils.html#pydicer.utils.load_object_metadata) function. Simply pass a row from the converted DataFrame into this function\n",
114 |     "to load the metadata for that object."
115 |    ]
116 |   },
117 |   {
118 |    "cell_type": "code",
119 |    "execution_count": null,
120 |    "metadata": {},
121 |    "outputs": [],
122 |    "source": [
123 |     "first_row = df.iloc[0]\n",
124 |     "ds = load_object_metadata(first_row)\n",
125 |     "ds"
126 |    ]
127 |   },
128 |   {
129 |    "cell_type": "markdown",
130 |    "metadata": {},
131 |    "source": [
132 |     "### Keep only specific header tags\n",
133 |     "\n",
134 |     "Loading object metadata can be slow, especially when doing this for many objects at once. So, you\n",
135 |     "can specify the `keep_tags` argument if you know which header attributes you want to use. This\n",
136 |     "speeds up loading metadata significantly.\n",
137 |     "\n",
138 |     "Here we load only the `StudyDate`, `PatientSex` and `Manufacturer`.\n",
139 |     "\n",
140 |     "> Tip: These tags are defined by the DICOM standard, and we use `pydicom` to load this metadata. In\n",
141 |     "> fact, the metadata returned is a `pydicom` Dataset. Check out the [pydicom documentation](https://pydicom.github.io/pydicom/dev/old/pydicom_user_guide.html) for more information."
142 |    ]
143 |   },
144 |   {
145 |    "cell_type": "code",
146 |    "execution_count": null,
147 |    "metadata": {},
148 |    "outputs": [],
149 |    "source": [
150 |     "ds = load_object_metadata(first_row, keep_tags=[\"StudyDate\", \"PatientSex\", \"Manufacturer\"])\n",
151 |     "ds"
152 |    ]
153 |   },
154 |   {
155 |    "cell_type": "markdown",
156 |    "metadata": {},
157 |    "source": [
158 |     "### Loading metadata for all data objects\n",
159 |     "\n",
160 |     "You can use the Pandas `apply` function to load metadata for all rows and add it as a column to the\n",
161 |     "converted DataFrame."
162 |    ]
163 |   },
164 |   {
165 |    "cell_type": "code",
166 |    "execution_count": null,
167 |    "metadata": {},
168 |    "outputs": [],
169 |    "source": [
170 |     "df[\"StudyDescription\"] = df.apply(lambda row: load_object_metadata(row, keep_tags=\"StudyDescription\").StudyDescription, axis=1)\n",
171 |     "df"
172 |    ]
173 |   },
174 |   {
175 |    "cell_type": "markdown",
176 |    "metadata": {},
177 |    "source": [
178 |     "### Determine Date of Object\n",
179 |     "\n",
180 |     "There are several DICOM header tags which could define the date of an object. The DICOM standard\n",
181 |     "doesn't require all of these to be set within the metadata. PyDicer provides the \n",
182 |     "[determine_dcm_datetime](https://australiancancerdatanetwork.github.io/pydicer/utils.html#pydicer.utils.determine_dcm_datetime) function to extract the date from the DICOM header."
183 |    ]
184 |   },
185 |   {
186 |    "cell_type": "code",
187 |    "execution_count": null,
188 |    "metadata": {},
189 |    "outputs": [],
190 |    "source": [
191 |     "ds = load_object_metadata(first_row)\n",
192 |     "obj_datetime = determine_dcm_datetime(ds)\n",
193 |     "print(obj_datetime)"
194 |    ]
195 |   },
196 |   {
197 |    "cell_type": "code",
198 |    "execution_count": null,
199 |    "metadata": {},
200 |    "outputs": [],
201 |    "source": []
202 |   }
203 |  ],
204 |  "metadata": {
205 |   "interpreter": {
206 |    "hash": "814af119db7f8f2860617be3dcd1d37c560587d11c65bd58c45b1679d3ee6ea4"
207 |   },
208 |   "kernelspec": {
209 |    "display_name": "Python 3.8.0 64-bit ('pydicer': pyenv)",
210 |    "language": "python",
211 |    "name": "python3"
212 |   },
213 |   "language_info": {
214 |    "codemirror_mode": {
215 |     "name": "ipython",
216 |     "version": 3
217 |    },
218 |    "file_extension": ".py",
219 |    "mimetype": "text/x-python",
220 |    "name": "python",
221 |    "nbconvert_exporter": "python",
222 |    "pygments_lexer": "ipython3",
223 |    "version": "3.9.16"
224 |   },
225 |   "orig_nbformat": 4
226 |  },
227 |  "nbformat": 4,
228 |  "nbformat_minor": 2
229 | }
230 | 


--------------------------------------------------------------------------------
/examples/GettingStarted.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "attachments": {},
  5 |    "cell_type": "markdown",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "# Getting Started\n",
  9 |     "\n",
 10 |     "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/AustralianCancerDataNetwork/pydicer/blob/main/examples/GettingStarted.ipynb)\n",
 11 |     "\n",
 12 |     "This notebook provides a basic example to run the PyDicer pipeline using some test data."
 13 |    ]
 14 |   },
 15 |   {
 16 |    "cell_type": "code",
 17 |    "execution_count": null,
 18 |    "metadata": {},
 19 |    "outputs": [],
 20 |    "source": [
 21 |     "try:\n",
 22 |     "    from pydicer import PyDicer\n",
 23 |     "except ImportError:\n",
 24 |     "    !pip install pydicer\n",
 25 |     "    from pydicer import PyDicer\n",
 26 |     "\n",
 27 |     "from pathlib import Path\n",
 28 |     "\n",
 29 |     "from pydicer.input.test import TestInput"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "markdown",
 34 |    "metadata": {},
 35 |    "source": [
 36 |     "## Setup working directory\n",
 37 |     "\n",
 38 |     "First we'll create a directory for our project. Change the `directory` location to a folder on your\n",
 39 |     "system where you'd like PyDicer to work with this data."
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "code",
 44 |    "execution_count": null,
 45 |    "metadata": {},
 46 |    "outputs": [],
 47 |    "source": [
 48 |     "directory = Path(\"./data\")"
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "markdown",
 53 |    "metadata": {},
 54 |    "source": [
 55 |     "## Create a PyDicer object\n",
 56 |     "\n",
 57 |     "The [PyDicer class](https://australiancancerdatanetwork.github.io/pydicer/tool.html) provides all functionlity to run the pipeline and work with the data stored and\n",
 58 |     "converted in your project directory"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "code",
 63 |    "execution_count": null,
 64 |    "metadata": {},
 65 |    "outputs": [],
 66 |    "source": [
 67 |     "pydicer = PyDicer(directory)"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "markdown",
 72 |    "metadata": {},
 73 |    "source": [
 74 |     "## Fetch some data\n",
 75 |     "\n",
 76 |     "A [TestInput class](https://australiancancerdatanetwork.github.io/pydicer/input.html#pydicer.input.test.TestInput) is provided in pydicer to download some sample data to work with. Several other\n",
 77 |     "input classes exist if you'd like to retrieve DICOM data for conversion from somewhere else, [see \n",
 78 |     "the docs for information on how these work](https://australiancancerdatanetwork.github.io/pydicer/html/input.html)."
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "code",
 83 |    "execution_count": null,
 84 |    "metadata": {},
 85 |    "outputs": [],
 86 |    "source": [
 87 |     "dicom_directory = directory.joinpath(\"dicom\")\n",
 88 |     "test_input = TestInput(dicom_directory)\n",
 89 |     "test_input.fetch_data()\n",
 90 |     "\n",
 91 |     "# Add the input DICOM location to the pydicer object\n",
 92 |     "pydicer.add_input(dicom_directory)"
 93 |    ]
 94 |   },
 95 |   {
 96 |    "cell_type": "markdown",
 97 |    "metadata": {},
 98 |    "source": [
 99 |     "## Run the pipeline\n",
100 |     "\n",
101 |     "The function runs the entire PyDicer pipeline on the test DICOM data. This includes:\n",
102 |     "- [Preprocessing](https://australiancancerdatanetwork.github.io/pydicer/preprocess.html) the DICOM data (data which can't be handled or is corrupt will be placed in Quarantine)\n",
103 |     "- [Convert](https://australiancancerdatanetwork.github.io/pydicer/convert.html) the data to Nifti format (see the output in the `data` directory)\n",
104 |     "- [Visualise](https://australiancancerdatanetwork.github.io/pydicer/visualise.html) the data (png files will be placed alongside the converted Nifti files)\n",
105 |     "- [Compute Radiomics features](https://australiancancerdatanetwork.github.io/pydicer/analyse.html#pydicer.analyse.data.AnalyseData.compute_radiomics) (Results are stored in a csv alongside the converted structures)\n",
106 |     "- [Compute Dose Volume Histograms](https://australiancancerdatanetwork.github.io/pydicer/analyse.html#pydicer.analyse.data.AnalyseData.compute_dvh) (results are stored alongside converted dose data)\n",
107 |     "\n",
108 |     "> Note that the entire Pipeline can be quite time consuming to run. Depending on your project's\n",
109 |     "> dataset you will likely want to run only portions of the pipeline with finer control over each\n",
110 |     "> step. For this reason we only run the pipeline for one patient here as a demonstration."
111 |    ]
112 |   },
113 |   {
114 |    "cell_type": "code",
115 |    "execution_count": null,
116 |    "metadata": {},
117 |    "outputs": [],
118 |    "source": [
119 |     "pydicer.run_pipeline(patient=\"HNSCC-01-0019\")"
120 |    ]
121 |   },
122 |   {
123 |    "cell_type": "markdown",
124 |    "metadata": {},
125 |    "source": [
126 |     "## Prepare a dataset\n",
127 |     "\n",
128 |     "Datasets which are extracted in DICOM format can often be a bit messy and require some cleaning up\n",
129 |     "after conversion. Exactly what data objects to extract for the clean dataset will differ by project\n",
130 |     "but here we use a somewhat common approach of extracting the latest structure set for each patient\n",
131 |     "and the image linked to that.\n",
132 |     "\n",
133 |     "The resulting dataset is stored in a folder with your dataset name (`clean` for this example).\n",
134 |     "\n",
135 |     "See the [dataset preparation example](https://australiancancerdatanetwork.github.io/pydicer/_examples/DatasetPreparation.html) for a more detailed description on how this works.\n"
136 |    ]
137 |   },
138 |   {
139 |    "cell_type": "code",
140 |    "execution_count": null,
141 |    "metadata": {},
142 |    "outputs": [],
143 |    "source": [
144 |     "pydicer.dataset.prepare(dataset_name=\"clean\", preparation_function=\"rt_latest_dose\")"
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "markdown",
149 |    "metadata": {},
150 |    "source": [
151 |     "## Analyse the dataset\n",
152 |     "\n",
153 |     "The pipeline computes first-order radiomics features by default, as well as dose volume histograms.\n",
154 |     "Here we can extract out the results easily into a Pandas DataFrame for analysis.\n",
155 |     "\n",
156 |     "Check out the [Compute Radiomics](https://australiancancerdatanetwork.github.io/pydicer/_examples/Radiomics.html) and the [Dose Metrics](https://australiancancerdatanetwork.github.io/pydicer/_examples/DoseMetrics.html) examples for further details on how to use these functions."
157 |    ]
158 |   },
159 |   {
160 |    "cell_type": "code",
161 |    "execution_count": null,
162 |    "metadata": {},
163 |    "outputs": [],
164 |    "source": [
165 |     "# Display the DataFrame of radiomics computed\n",
166 |     "df_radiomics = pydicer.analyse.get_all_computed_radiomics_for_dataset(dataset_name=\"clean\")\n",
167 |     "df_radiomics"
168 |    ]
169 |   },
170 |   {
171 |    "cell_type": "code",
172 |    "execution_count": null,
173 |    "metadata": {},
174 |    "outputs": [],
175 |    "source": [
176 |     "# Extract the D95, D50 and V3 dose metrics\n",
177 |     "df_dose_metrics = pydicer.analyse.compute_dose_metrics(dataset_name=\"clean\", d_point=[95, 50], v_point=[3])\n",
178 |     "df_dose_metrics"
179 |    ]
180 |   },
181 |   {
182 |    "cell_type": "code",
183 |    "execution_count": null,
184 |    "metadata": {},
185 |    "outputs": [],
186 |    "source": []
187 |   }
188 |  ],
189 |  "metadata": {
190 |   "interpreter": {
191 |    "hash": "814af119db7f8f2860617be3dcd1d37c560587d11c65bd58c45b1679d3ee6ea4"
192 |   },
193 |   "kernelspec": {
194 |    "display_name": "Python 3.8.0 64-bit ('pydicer': pyenv)",
195 |    "language": "python",
196 |    "name": "python3"
197 |   },
198 |   "language_info": {
199 |    "codemirror_mode": {
200 |     "name": "ipython",
201 |     "version": 3
202 |    },
203 |    "file_extension": ".py",
204 |    "mimetype": "text/x-python",
205 |    "name": "python",
206 |    "nbconvert_exporter": "python",
207 |    "pygments_lexer": "ipython3",
208 |    "version": "3.9.16"
209 |   },
210 |   "orig_nbformat": 4
211 |  },
212 |  "nbformat": 4,
213 |  "nbformat_minor": 2
214 | }
215 | 


--------------------------------------------------------------------------------
/tests/test_structure_set.py:
--------------------------------------------------------------------------------
  1 | # pylint: disable=redefined-outer-name,missing-function-docstring
  2 | 
  3 | import json
  4 | 
  5 | import SimpleITK as sitk
  6 | 
  7 | from pydicer import PyDicer
  8 | from pydicer.utils import add_structure_name_mapping, read_converted_data
  9 | from pydicer.dataset.structureset import StructureSet
 10 | from pydicer.constants import CONVERTED_DIR_NAME
 11 | 
 12 | 
 13 | def test_add_project_mapping(test_data_converted):
 14 |     working_directory = test_data_converted
 15 | 
 16 |     mapping_id = "test_mapping"
 17 |     mapping = {
 18 |         "Brain": ["brain", "BRAIN"],
 19 |         "SpinalCord": ["Cord", "copy_of_cord"],
 20 |     }
 21 |     add_structure_name_mapping(mapping, mapping_id=mapping_id, working_directory=working_directory)
 22 | 
 23 |     # Confirm that the mapping file exists
 24 |     mapping_file = working_directory.joinpath(
 25 |         ".pydicer", ".structure_set_mappings", f"{mapping_id}.json"
 26 |     )
 27 |     assert mapping_file.exists()
 28 | 
 29 |     # Read the file and confirm it contains the same contents as the mapping we provided
 30 |     with open(mapping_file, encoding="utf-8") as json_file:
 31 |         mapping_loaded = json.load(json_file)
 32 | 
 33 |     assert mapping == mapping_loaded
 34 | 
 35 | 
 36 | def test_add_structure_set_mapping(test_data_converted):
 37 |     working_directory = test_data_converted
 38 | 
 39 |     df = read_converted_data(working_directory)
 40 | 
 41 |     # Pick one structure set to supply mapping for
 42 |     struct_hash = "6d2934"
 43 |     struct_row = df[df.hashed_uid == struct_hash].iloc[0]
 44 | 
 45 |     mapping_id = "structure_set_mapping"
 46 |     mapping = {
 47 |         "Brain": ["brain", "BRAIN"],
 48 |         "SpinalCord": ["Cord", "copy_of_cord"],
 49 |     }
 50 |     add_structure_name_mapping(mapping, mapping_id=mapping_id, structure_set_row=struct_row)
 51 | 
 52 |     # Confirm that the mapping file exists
 53 |     mapping_file = working_directory.joinpath(
 54 |         CONVERTED_DIR_NAME,
 55 |         struct_row.patient_id,
 56 |         "structures",
 57 |         struct_hash,
 58 |         ".structure_set_mappings",
 59 |         f"{mapping_id}.json",
 60 |     )
 61 |     assert mapping_file.exists()
 62 | 
 63 |     # Read the file and confirm it contains the same contents as the mapping we provided
 64 |     with open(mapping_file, encoding="utf-8") as json_file:
 65 |         mapping_loaded = json.load(json_file)
 66 | 
 67 |     assert mapping == mapping_loaded
 68 | 
 69 | 
 70 | def test_add_patient_mapping(test_data_converted):
 71 |     working_directory = test_data_converted
 72 | 
 73 |     mapping_id = "test_mapping"
 74 |     mapping = {
 75 |         "Brain": ["brain", "BRAIN"],
 76 |         "SpinalCord": ["Cord", "copy_of_cord"],
 77 |     }
 78 |     patient_id = "HNSCC-01-0199"
 79 |     add_structure_name_mapping(
 80 |         mapping,
 81 |         mapping_id=mapping_id,
 82 |         working_directory=working_directory,
 83 |         patient_id=patient_id,
 84 |     )
 85 | 
 86 |     # Confirm that the mapping file exists
 87 |     mapping_file = working_directory.joinpath(
 88 |         CONVERTED_DIR_NAME,
 89 |         patient_id,
 90 |         "structures",
 91 |         ".structure_set_mappings",
 92 |         f"{mapping_id}.json",
 93 |     )
 94 |     assert mapping_file.exists()
 95 | 
 96 |     # Read the file and confirm it contains the same contents as the mapping we provided
 97 |     with open(mapping_file, encoding="utf-8") as json_file:
 98 |         mapping_loaded = json.load(json_file)
 99 | 
100 |     assert mapping == mapping_loaded
101 | 
102 | 
103 | def test_structure_set_class(test_data_converted):
104 |     working_directory = test_data_converted
105 | 
106 |     df = read_converted_data(working_directory)
107 | 
108 |     # Pick one structure set to test mapping for
109 |     struct_hash = "06e49c"
110 |     struct_row = df[df.hashed_uid == struct_hash].iloc[0]
111 | 
112 |     # Check that we look up the correct structure name
113 |     ss = StructureSet(struct_row)
114 | 
115 |     # Check that all structures are loaded
116 |     assert len(ss.structure_names) == 38
117 | 
118 |     # Load a structure, confirm the values are as expected
119 |     spinal_cord = ss["Cord"]
120 |     spinal_cord_arr = sitk.GetArrayFromImage(spinal_cord)
121 |     assert spinal_cord_arr.sum() == 7880
122 | 
123 | 
124 | def test_structure_set_mapping(test_data_converted):
125 |     working_directory = test_data_converted
126 | 
127 |     df = read_converted_data(working_directory)
128 | 
129 |     # Add a mapping
130 |     mapping_id = "ss_mapping"
131 |     mapping = {
132 |         "SpinalCord": ["Cord", "copy_of_cord"],
133 |         "Parotid_L": ["Left_parotid", "Lt_Parotid"],
134 |         "Parotid_R": ["Right_parotid", "Rt_Parotid"],
135 |         "Brain": ["BRAIN"],
136 |     }
137 |     add_structure_name_mapping(mapping, mapping_id=mapping_id, working_directory=working_directory)
138 | 
139 |     # Pick one structure set to test mapping for
140 |     struct_hash = "06e49c"
141 |     struct_row = df[df.hashed_uid == struct_hash].iloc[0]
142 | 
143 |     # Check that we look up the correct structure name
144 |     ss = StructureSet(struct_row, mapping_id=mapping_id)
145 |     assert ss.get_mapped_structure_name("SpinalCord") == "Cord"
146 |     assert ss.get_mapped_structure_name("Parotid_L") == "Lt_Parotid"
147 |     assert ss.get_mapped_structure_name("Parotid_R") == "Rt_Parotid"
148 | 
149 |     # Check that the correct standardised name is mapped
150 |     assert ss.get_standardised_structure_name("Cord") == "SpinalCord"
151 |     assert ss.get_standardised_structure_name("Lt_Parotid") == "Parotid_L"
152 |     assert ss.get_standardised_structure_name("Rt_Parotid") == "Parotid_R"
153 | 
154 |     # Check that we can read a structure by standardised name
155 |     spinal_cord = ss["SpinalCord"]
156 |     spinal_cord_arr = sitk.GetArrayFromImage(spinal_cord)
157 |     assert spinal_cord_arr.sum() == 7880
158 | 
159 |     # Check that brain is detected as not mapped for this case (as the structure isn't available)
160 |     assert len(ss.get_unmapped_structures()) == 1
161 |     assert ss.get_unmapped_structures()[0] == "Brain"
162 | 
163 | 
164 | def test_radiomics_structure_names_standardised(test_data_converted):
165 |     working_directory = test_data_converted
166 |     pydicer = PyDicer(working_directory)
167 | 
168 |     # Add a mapping
169 |     mapping_id = "rad_mapping"
170 |     mapping = {
171 |         "SpinalCord": ["Cord", "copy_of_cord", "cord"],
172 |         "Parotid_L": ["Left_parotid", "Lt_Parotid", "L_parotid", "LT_Parotid"],
173 |         "Parotid_R": ["Right_parotid", "Rt_Parotid", "R_parotid", "RT_Parotid"],
174 |         "Brain": ["BRAIN"],
175 |     }
176 |     add_structure_name_mapping(mapping, mapping_id=mapping_id, working_directory=working_directory)
177 | 
178 |     # Check the radiomics without mapping
179 |     df_radiomics = pydicer.analyse.get_all_computed_radiomics_for_dataset()
180 |     assert len(df_radiomics.Contour.unique()) == 128
181 | 
182 |     # Check the radiomics with mapping
183 |     df_radiomics = pydicer.analyse.get_all_computed_radiomics_for_dataset(
184 |         structure_mapping_id=mapping_id
185 |     )
186 |     assert len(df_radiomics) == 13
187 |     assert len(df_radiomics.Contour.unique()) == 4
188 | 
189 | 
190 | def test_dose_metrics_structure_names_standardised(test_data_converted):
191 |     working_directory = test_data_converted
192 |     pydicer = PyDicer(working_directory)
193 | 
194 |     # Add a mapping
195 |     mapping_id = "dose_mapping"
196 |     mapping = {
197 |         "SpinalCord": ["Cord", "copy_of_cord", "cord"],
198 |         "Parotid_L": ["Left_parotid", "Lt_Parotid", "L_parotid", "LT_Parotid"],
199 |         "Parotid_R": ["Right_parotid", "Rt_Parotid", "R_parotid", "RT_Parotid"],
200 |         "Brain": ["BRAIN"],
201 |     }
202 |     add_structure_name_mapping(mapping, mapping_id=mapping_id, working_directory=working_directory)
203 | 
204 |     # Check the dose metrics without mapping
205 |     df_dose_metrics = pydicer.analyse.compute_dose_metrics(d_point=[95, 50], v_point=[3])
206 |     assert len(df_dose_metrics.label.unique()) == 128
207 | 
208 |     # Check the dose metrics with mapping
209 |     df_dose_metrics = pydicer.analyse.compute_dose_metrics(
210 |         d_point=[95, 50], v_point=[3], structure_mapping_id=mapping_id
211 |     )
212 |     assert len(df_dose_metrics) == 13
213 |     assert len(df_dose_metrics.label.unique()) == 4
214 | 


--------------------------------------------------------------------------------
/examples/ConvertingData.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "attachments": {},
  5 |    "cell_type": "markdown",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "# Converting Data\n",
  9 |     "\n",
 10 |     "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/AustralianCancerDataNetwork/pydicer/blob/main/examples/ConvertingData.ipynb)\n",
 11 |     "\n",
 12 |     "In this example, the preprocessing and conversion of DICOM data is demonstrated. These are\n",
 13 |     "essential first steps before data can be analysed using PyDicer."
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": null,
 19 |    "metadata": {},
 20 |    "outputs": [],
 21 |    "source": [
 22 |     "try:\n",
 23 |     "    from pydicer import PyDicer\n",
 24 |     "except ImportError:\n",
 25 |     "    !pip install pydicer\n",
 26 |     "    from pydicer import PyDicer\n",
 27 |     "\n",
 28 |     "from pathlib import Path\n",
 29 |     "\n",
 30 |     "from pydicer.input.test import TestInput"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "markdown",
 35 |    "metadata": {},
 36 |    "source": [
 37 |     "## Setup PyDicer\n",
 38 |     "\n",
 39 |     "As in the [Getting Started example](https://australiancancerdatanetwork.github.io/pydicer/_examples/GettingStarted.html), we must first define a working directory for our dataset. We\n",
 40 |     "also create a `PyDicer` object."
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "code",
 45 |    "execution_count": null,
 46 |    "metadata": {},
 47 |    "outputs": [],
 48 |    "source": [
 49 |     "directory = Path(\"./working\")\n",
 50 |     "pydicer = PyDicer(directory)"
 51 |    ]
 52 |   },
 53 |   {
 54 |    "cell_type": "markdown",
 55 |    "metadata": {},
 56 |    "source": [
 57 |     "## Fetch some data\n",
 58 |     "\n",
 59 |     "A [TestInput class](https://australiancancerdatanetwork.github.io/pydicer/input.html#pydicer.input.test.TestInput) is provided in pydicer to download some sample data to work with. Several other\n",
 60 |     "input classes exist if you'd like to retrieve DICOM data for conversion from somewhere else. See \n",
 61 |     "the [docs for information](https://australiancancerdatanetwork.github.io/pydicer/html/input.html)\n",
 62 |     "on how the PyDicer input classes work.\n",
 63 |     "\n",
 64 |     "Most commonly, if you have DICOM files stored within a folder on your file system you can simply\n",
 65 |     "pass the path to your DICOM directory to the `pydicer.add_input()` function."
 66 |    ]
 67 |   },
 68 |   {
 69 |    "cell_type": "code",
 70 |    "execution_count": null,
 71 |    "metadata": {},
 72 |    "outputs": [],
 73 |    "source": [
 74 |     "dicom_directory = directory.joinpath(\"dicom\")\n",
 75 |     "test_input = TestInput(dicom_directory)\n",
 76 |     "test_input.fetch_data()\n",
 77 |     "\n",
 78 |     "# Add the input DICOM location to the pydicer object\n",
 79 |     "pydicer.add_input(dicom_directory)"
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "markdown",
 84 |    "metadata": {},
 85 |    "source": [
 86 |     "## Preprocess\n",
 87 |     "\n",
 88 |     "With some DICOM data ready to work with, we must first use the PyDicer [preprocess module](https://australiancancerdatanetwork.github.io/pydicer/preprocess.html). This\n",
 89 |     "module will crawl over all DICOM data available and will index all information required for\n",
 90 |     "conversion of the data."
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "code",
 95 |    "execution_count": null,
 96 |    "metadata": {},
 97 |    "outputs": [],
 98 |    "source": [
 99 |     "pydicer.preprocess()"
100 |    ]
101 |   },
102 |   {
103 |    "cell_type": "markdown",
104 |    "metadata": {},
105 |    "source": [
106 |     "### Inspect Preprocessed Data\n",
107 |     "\n",
108 |     "Here we load the data that was indexed during preprocessing and output the first rows. This data\n",
109 |     "will be used by the following step of data conversion."
110 |    ]
111 |   },
112 |   {
113 |    "cell_type": "code",
114 |    "execution_count": null,
115 |    "metadata": {},
116 |    "outputs": [],
117 |    "source": [
118 |     "df_preprocessed = pydicer.read_preprocessed_data()\n",
119 |     "df_preprocessed.head()"
120 |    ]
121 |   },
122 |   {
123 |    "cell_type": "markdown",
124 |    "metadata": {},
125 |    "source": [
126 |     "## Convert Data\n",
127 |     "\n",
128 |     "With the DICOM data having been indexed during preprocessing, we are now ready to convert this data\n",
129 |     "into NIfTI format which will be stored within the PyDicer standard directory structure.\n",
130 |     "\n",
131 |     "Running the following cell will begin the conversion process. While this cell is running, take a\n",
132 |     "look inside the `working/data` directory to see how the converted data is being stored.\n",
133 |     "\n",
134 |     "Notice the `converted.csv` file stored for each patient. This tracks each converted data object.\n",
135 |     "This will be loaded as a [Pandas DataFrame](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html) for use throughout PyDicer.\n"
136 |    ]
137 |   },
138 |   {
139 |    "cell_type": "code",
140 |    "execution_count": null,
141 |    "metadata": {},
142 |    "outputs": [],
143 |    "source": [
144 |     "pydicer.convert.convert()"
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "markdown",
149 |    "metadata": {},
150 |    "source": [
151 |     "### Load Converted DataFrame\n",
152 |     "\n",
153 |     "Once data is converted, we can load a Pandas DataFrame which contains a description of each object\n",
154 |     "converted.\n",
155 |     "\n",
156 |     "The most useful columns in the DataFrame for working with this data in PyDicer are:\n",
157 |     "- `hashed_uid`: This is a 6 character hexidecimal hash of the associated DICOM SeriesInstanceUID.\n",
158 |     "  PyDicer refers to objects using this hashed identifier for a more consice representation.\n",
159 |     "- `modality`: The modality of the data object.\n",
160 |     "- `patient_id`: The ID of the patient this data object belongs to.\n",
161 |     "- `path`: The path within the working directory where files for this data object are stored."
162 |    ]
163 |   },
164 |   {
165 |    "cell_type": "code",
166 |    "execution_count": null,
167 |    "metadata": {},
168 |    "outputs": [],
169 |    "source": [
170 |     "df = pydicer.read_converted_data()\n",
171 |     "df"
172 |    ]
173 |   },
174 |   {
175 |    "cell_type": "markdown",
176 |    "metadata": {},
177 |    "source": [
178 |     "## Data Quarantine\n",
179 |     "\n",
180 |     "If anything goes wrong while converting a DICOM object during either the preprocess step or the\n",
181 |     "conversion step, the problematic DICOM data will be copied to the `working/quarantine` directory.\n",
182 |     "\n",
183 |     "It's a good idea to regularly check your quarantine directory to ensure that no critical data\n",
184 |     "objects are being quarantine. If so you may want to consider rectifying the issue and running the\n",
185 |     "preprocess and conversion steps again.\n",
186 |     "\n",
187 |     "As can be seen by running the cell below, there were several DICOM objects moved to the quarantine\n",
188 |     "during for our test dataset. This was due to there being multiple slices at the same location with\n",
189 |     "differing pixel data in one CT image series."
190 |    ]
191 |   },
192 |   {
193 |    "cell_type": "code",
194 |    "execution_count": null,
195 |    "metadata": {},
196 |    "outputs": [],
197 |    "source": [
198 |     "df_quarantine = pydicer.read_quarantined_data()\n",
199 |     "df_quarantine"
200 |    ]
201 |   },
202 |   {
203 |    "cell_type": "code",
204 |    "execution_count": null,
205 |    "metadata": {},
206 |    "outputs": [],
207 |    "source": []
208 |   }
209 |  ],
210 |  "metadata": {
211 |   "interpreter": {
212 |    "hash": "814af119db7f8f2860617be3dcd1d37c560587d11c65bd58c45b1679d3ee6ea4"
213 |   },
214 |   "kernelspec": {
215 |    "display_name": "Python 3.8.0 64-bit ('pydicer': pyenv)",
216 |    "language": "python",
217 |    "name": "python3"
218 |   },
219 |   "language_info": {
220 |    "codemirror_mode": {
221 |     "name": "ipython",
222 |     "version": 3
223 |    },
224 |    "file_extension": ".py",
225 |    "mimetype": "text/x-python",
226 |    "name": "python",
227 |    "nbconvert_exporter": "python",
228 |    "pygments_lexer": "ipython3",
229 |    "version": "3.9.16"
230 |   },
231 |   "orig_nbformat": 4
232 |  },
233 |  "nbformat": 4,
234 |  "nbformat_minor": 2
235 | }
236 | 


--------------------------------------------------------------------------------
/examples/Radiomics.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "attachments": {},
  5 |    "cell_type": "markdown",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "# Compute Radiomics\n",
  9 |     "\n",
 10 |     "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/AustralianCancerDataNetwork/pydicer/blob/main/examples/Radiomics.ipynb)\n",
 11 |     "\n",
 12 |     "In this example notebook we use [PyRadiomics](https://github.com/AIM-Harvard/pyradiomics) to\n",
 13 |     "compute various type of radiomics features. We use some\n",
 14 |     "[LCTSC](https://wiki.cancerimagingarchive.net/pages/viewpage.action?pageId=24284539) data from the\n",
 15 |     "Cancer Imaging Archive which has already been converted using PyDicer for demonstration purposes."
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "code",
 20 |    "execution_count": null,
 21 |    "metadata": {},
 22 |    "outputs": [],
 23 |    "source": [
 24 |     "try:\n",
 25 |     "    from pydicer import PyDicer\n",
 26 |     "except ImportError:\n",
 27 |     "    !pip install pydicer\n",
 28 |     "    from pydicer import PyDicer\n",
 29 |     "\n",
 30 |     "from pathlib import Path\n",
 31 |     "\n",
 32 |     "from pydicer.utils import fetch_converted_test_data\n",
 33 |     "\n",
 34 |     "from pydicer.utils import load_object_metadata"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "markdown",
 39 |    "metadata": {},
 40 |    "source": [
 41 |     "## Fetch data\n",
 42 |     "\n",
 43 |     "LCTSC data prepared for this example are downloaded and stored into a `testdata_lctsc` directory.\n",
 44 |     "We will use this for our PyDicer working directory."
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": null,
 50 |    "metadata": {},
 51 |    "outputs": [],
 52 |    "source": [
 53 |     "working_directory = fetch_converted_test_data(\"./testdata_lctsc\", dataset=\"LCTSC\")"
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "markdown",
 58 |    "metadata": {},
 59 |    "source": [
 60 |     "## Initialise PyDicer object\n",
 61 |     "\n",
 62 |     "Using the working directory containing the LCTSC test data."
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": null,
 68 |    "metadata": {},
 69 |    "outputs": [],
 70 |    "source": [
 71 |     "pydicer = PyDicer(working_directory)"
 72 |    ]
 73 |   },
 74 |   {
 75 |    "cell_type": "markdown",
 76 |    "metadata": {},
 77 |    "source": [
 78 |     "## Compute Default Radiomics\n",
 79 |     "\n",
 80 |     "By default, PyDicer will compute only first-order radiomics features. Radiomics are computed for\n",
 81 |     "each structure available in the dataset using the image data of images linked to those structures."
 82 |    ]
 83 |   },
 84 |   {
 85 |    "cell_type": "code",
 86 |    "execution_count": null,
 87 |    "metadata": {},
 88 |    "outputs": [],
 89 |    "source": [
 90 |     "pydicer.analyse.compute_radiomics()"
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "markdown",
 95 |    "metadata": {},
 96 |    "source": [
 97 |     "## Fetch computed Radiomics\n",
 98 |     "\n",
 99 |     "Use the [get_all_computed_radiomics_for_dataset](https://australiancancerdatanetwork.github.io/pydicer/analyse.html#pydicer.analyse.data.AnalyseData.get_all_computed_radiomics_for_dataset) function to fetch all radiomics features computed\n",
100 |     "in the last step.\n",
101 |     "\n",
102 |     "The `.head()` function on a Pandas DataFrame output the first 5 rows for inspection."
103 |    ]
104 |   },
105 |   {
106 |    "cell_type": "code",
107 |    "execution_count": null,
108 |    "metadata": {},
109 |    "outputs": [],
110 |    "source": [
111 |     "# Display the DataFrame of radiomics computed\n",
112 |     "df_radiomics = pydicer.analyse.get_all_computed_radiomics_for_dataset()\n",
113 |     "df_radiomics.head()"
114 |    ]
115 |   },
116 |   {
117 |    "cell_type": "markdown",
118 |    "metadata": {},
119 |    "source": [
120 |     "## Specify Radiomics to Compute\n",
121 |     "\n",
122 |     "PyDicer used the popular `pyradiomics` library to compute radiomics. So, you may specify any\n",
123 |     "radiomics features provided in that library to be computed. See the [pyradiomics documentation for\n",
124 |     "a list of radiomics features\n",
125 |     "available](https://pyradiomics.readthedocs.io/en/latest/features.html).\n",
126 |     "\n",
127 |     "In this example, we specify all `shape` features as well as `first-order` features to be computed."
128 |    ]
129 |   },
130 |   {
131 |    "cell_type": "code",
132 |    "execution_count": null,
133 |    "metadata": {},
134 |    "outputs": [],
135 |    "source": [
136 |     "# Import pyradiomics feature modules\n",
137 |     "from radiomics import (\n",
138 |     "    firstorder,\n",
139 |     "    shape,\n",
140 |     ")\n",
141 |     "\n",
142 |     "# Prepare a dict of features to compute grouped by class\n",
143 |     "first_order_features = firstorder.RadiomicsFirstOrder.getFeatureNames()\n",
144 |     "shape_features = shape.RadiomicsShape.getFeatureNames()\n",
145 |     "compute_radiomics = {\n",
146 |     "    \"firstorder\": [f for f in first_order_features if not first_order_features[f]],\n",
147 |     "    \"shape\": [f for f in shape_features if not shape_features[f]],\n",
148 |     "}\n",
149 |     "\n",
150 |     "# Pass the dict to the compute the radiomics\n",
151 |     "pydicer.analyse.compute_radiomics(radiomics=compute_radiomics)\n",
152 |     "\n",
153 |     "# Fetch the computed radiomics and output the first few rows\n",
154 |     "df_radiomics = pydicer.analyse.get_all_computed_radiomics_for_dataset()\n",
155 |     "df_radiomics.head()"
156 |    ]
157 |   },
158 |   {
159 |    "cell_type": "markdown",
160 |    "metadata": {},
161 |    "source": [
162 |     "You can also set a specific subset of features like this:"
163 |    ]
164 |   },
165 |   {
166 |    "cell_type": "code",
167 |    "execution_count": null,
168 |    "metadata": {},
169 |    "outputs": [],
170 |    "source": [
171 |     "# Specify features to compute grouped by class\n",
172 |     "compute_radiomics = {\n",
173 |     "    \"firstorder\": [\"Maximum\", \"Minimum\", \"Mean\", \"Median\"],\n",
174 |     "    \"shape\": [\"SurfaceArea\", \"VoxelVolume\"],\n",
175 |     "}\n",
176 |     "\n",
177 |     "# Pass the dict to the compute the radiomics\n",
178 |     "pydicer.analyse.compute_radiomics(radiomics=compute_radiomics)\n",
179 |     "\n",
180 |     "df_radiomics = pydicer.analyse.get_all_computed_radiomics_for_dataset()\n",
181 |     "df_radiomics.head()"
182 |    ]
183 |   },
184 |   {
185 |    "cell_type": "markdown",
186 |    "metadata": {},
187 |    "source": [
188 |     "## Track Metadata\n",
189 |     "\n",
190 |     "When analysing your radiomic features, it may be useful to have certain metadata available from\n",
191 |     "either the image or structure set. You can specify which DICOM header tags to extract metadata for\n",
192 |     "and these will be stored alongside the radiomic feature values.\n",
193 |     "\n",
194 |     "In the cell below, we recompute our radiomics and store the `PatientSex` header value from the\n",
195 |     "image series and the `StudyDate` value from the structure set."
196 |    ]
197 |   },
198 |   {
199 |    "cell_type": "code",
200 |    "execution_count": null,
201 |    "metadata": {},
202 |    "outputs": [],
203 |    "source": [
204 |     "# Compute the radiomics specifying the meta data to keep\n",
205 |     "pydicer.analyse.compute_radiomics(\n",
206 |     "    radiomics=compute_radiomics,\n",
207 |     "    image_meta_data=[\"PatientSex\"],\n",
208 |     "    structure_meta_data=[\"StudyDate\"]\n",
209 |     ")\n",
210 |     "\n",
211 |     "# Fetch the results and display the first rows\n",
212 |     "df_radiomics = pydicer.analyse.get_all_computed_radiomics_for_dataset()\n",
213 |     "df_radiomics.head()"
214 |    ]
215 |   },
216 |   {
217 |    "cell_type": "code",
218 |    "execution_count": null,
219 |    "metadata": {},
220 |    "outputs": [],
221 |    "source": []
222 |   }
223 |  ],
224 |  "metadata": {
225 |   "interpreter": {
226 |    "hash": "814af119db7f8f2860617be3dcd1d37c560587d11c65bd58c45b1679d3ee6ea4"
227 |   },
228 |   "kernelspec": {
229 |    "display_name": "Python 3.8.0 64-bit ('pydicer': pyenv)",
230 |    "language": "python",
231 |    "name": "python3"
232 |   },
233 |   "language_info": {
234 |    "codemirror_mode": {
235 |     "name": "ipython",
236 |     "version": 3
237 |    },
238 |    "file_extension": ".py",
239 |    "mimetype": "text/x-python",
240 |    "name": "python",
241 |    "nbconvert_exporter": "python",
242 |    "pygments_lexer": "ipython3",
243 |    "version": "3.9.16"
244 |   },
245 |   "orig_nbformat": 4
246 |  },
247 |  "nbformat": 4,
248 |  "nbformat_minor": 2
249 | }
250 | 


--------------------------------------------------------------------------------
/tests/test_input.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from pathlib import Path
  3 | import tempfile
  4 | 
  5 | import pytest
  6 | from unittest.mock import MagicMock, patch
  7 | 
  8 | from pydicer.input.web import WebInput
  9 | from pydicer.input.test import TestInput
 10 | from pydicer.input.filesystem import FileSystemInput
 11 | from pydicer.input.pacs import DICOMPACSInput
 12 | from pydicer.input.tcia import TCIAInput
 13 | 
 14 | 
 15 | def test_input_valid_working_dir():
 16 |     valid_test_input = WebInput(data_url="")
 17 |     # Assert path to DICOMs exists
 18 |     assert valid_test_input.working_directory.is_dir()
 19 | 
 20 |     valid_filesystem_input = FileSystemInput(
 21 |         valid_test_input.working_directory)
 22 |     # Assert path to DICOMs exists
 23 |     assert valid_filesystem_input.working_directory.is_dir()
 24 | 
 25 |     valid_tcia_input = TCIAInput(collection="", patient_ids=[], modalities=[])
 26 |     # Assert path to DICOMs exists
 27 |     assert valid_tcia_input.working_directory.is_dir()
 28 | 
 29 | 
 30 | def assert_invalid_tcia_input(invalid_tcia_input):
 31 |     """
 32 |     Assert path to DICOMs does exist, but it contains no files
 33 |     """
 34 |     invalid_tcia_input.fetch_data()
 35 |     assert invalid_tcia_input.working_directory.is_dir()
 36 |     assert (
 37 |         len(
 38 |             [
 39 |                 name
 40 |                 for name in os.listdir(invalid_tcia_input.working_directory)
 41 |                 if os.path.isfile(os.path.join(invalid_tcia_input.working_directory, name))
 42 |             ]
 43 |         )
 44 |         == 0
 45 |     )
 46 | 
 47 | 
 48 | def test_input_invalid_working_dir():
 49 |     invalid_test_input = WebInput(
 50 |         data_url="", working_directory="INVALID_PATH")
 51 |     # Assert path to DICOMs does not exist
 52 |     assert not invalid_test_input.working_directory.is_dir()
 53 | 
 54 |     with pytest.raises(FileNotFoundError):
 55 |         FileSystemInput("INVALID_PATH")
 56 | 
 57 |     invalid_work_dir_tcia_input = TCIAInput(
 58 |         collection="TCGA-GBM",
 59 |         patient_ids=["TCGA-08-0244"],
 60 |         modalities=["MR"],
 61 |         working_directory="INVALID_PATH",
 62 |     )
 63 |     # Assert path to DICOMs does not exist
 64 |     assert not invalid_work_dir_tcia_input.working_directory.is_dir()
 65 | 
 66 | 
 67 | @pytest.mark.skip
 68 | def test_tcia_input():
 69 |     invalid_collection_tcia_input = TCIAInput(
 70 |         collection="INVALID_COLLECTION", patient_ids=[], modalities=[]
 71 |     )
 72 |     invalid_patient_id_tcia_input = TCIAInput(
 73 |         collection="TCGA-GBM", patient_ids=["INVALID_PATIENT_ID"], modalities=[]
 74 |     )
 75 | 
 76 |     assert_invalid_tcia_input(invalid_collection_tcia_input)
 77 |     assert_invalid_tcia_input(invalid_patient_id_tcia_input)
 78 | 
 79 | 
 80 | def test_test_input():
 81 |     test_input = TestInput()
 82 |     test_input.fetch_data()
 83 |     output_directory = test_input.working_directory.joinpath("HNSCC")
 84 | 
 85 |     # Assert that the 3 directories now exist on the system filepath
 86 |     assert output_directory.joinpath("HNSCC-01-0019").is_dir()
 87 |     assert output_directory.joinpath("HNSCC-01-0176").is_dir()
 88 |     assert output_directory.joinpath("HNSCC-01-0199").is_dir()
 89 | 
 90 | 
 91 | def test_dicom_pacs_invalid_host():
 92 |     # Using a presumably incorrect host/port to force a ConnectionError
 93 |     with pytest.raises(ConnectionError):
 94 |         DICOMPACSInput("INCORRECT_HOST", 1234)
 95 | 
 96 | 
 97 | def test_dicom_pacs_valid_host(mocker):
 98 |     """
 99 |     Test creating a DICOMPACSInput instance with a valid host where verify() returns True.
100 |     """
101 |     # Patch the DicomConnector to return True for verify()
102 |     mock_connector_class = mocker.patch("pydicer.input.pacs.DicomConnector")
103 |     mock_connector_instance = mock_connector_class.return_value
104 |     mock_connector_instance.verify.return_value = True
105 | 
106 |     # Should not raise ConnectionError
107 |     dicompacs_input = DICOMPACSInput("VALID_HOST", 11112, "AE_TITLE")
108 | 
109 |     # Assert the underlying connector was indeed created
110 |     assert dicompacs_input.dicom_connector is not None
111 |     assert dicompacs_input.working_directory.is_dir()
112 |     # Verify that verify() was called exactly once on initialization
113 |     mock_connector_instance.verify.assert_called_once()
114 | 
115 | 
116 | def test_dicom_pacs_fetch_data_success(mocker):
117 |     """
118 |     Test fetching data when the connection is valid, ensuring that we:
119 |     1) Convert single string patients/modalities to lists
120 |     2) Skip 'None' returns from do_find
121 |     3) Skip series whose patient ID doesn't match
122 |     4) Renames downloaded files to .dcm
123 |     """
124 |     mock_connector_class = mocker.patch("pydicer.input.pacs.DicomConnector")
125 |     mock_connector_instance = mock_connector_class.return_value
126 |     mock_connector_instance.verify.return_value = True
127 | 
128 |     # Mock do_find to return "studies" and then "series"
129 |     # The top-level do_find returns a list of "study" datasets (some None),
130 |     # then the second do_find returns a list of "series" datasets (some None).
131 |     # Each dataset is just a MagicMock or simple object with needed attributes.
132 |     mock_study_1 = MagicMock()
133 |     mock_study_1.StudyInstanceUID = "STUDY_UID_1"
134 |     mock_study_2 = MagicMock()
135 |     mock_study_2.StudyInstanceUID = "STUDY_UID_2"
136 |     mock_study_none = None  # Should be skipped
137 | 
138 |     mock_series_1 = MagicMock()
139 |     mock_series_1.SeriesInstanceUID = "SERIES_UID_1"
140 |     mock_series_1.PatientID = "PATIENT_1"
141 |     mock_series_2 = MagicMock()
142 |     mock_series_2.SeriesInstanceUID = "SERIES_UID_2"
143 |     mock_series_2.PatientID = "SOME_OTHER_PATIENT"  # Should be skipped
144 |     mock_series_none = None
145 | 
146 |     # The "find" calls for studies:
147 |     mock_connector_instance.do_find.side_effect = [
148 |         [mock_study_1, mock_study_none, mock_study_2],  # Studies
149 |         [mock_series_1, mock_series_none, mock_series_2],  # Series for STUDY_UID_1
150 |         # Series for STUDY_UID_2 (just re-using same object to keep it simple)
151 |         [mock_series_1],
152 |     ]
153 | 
154 |     dicompacs_input = DICOMPACSInput("VALID_HOST", 11112, "AE_TITLE")
155 | 
156 |     # Create a dummy file that doesn't end with .dcm so we can test rename
157 |     with tempfile.TemporaryDirectory() as tmpdir:
158 |         tmpdir_path = Path(tmpdir)
159 |         # Force working directory to our temp dir
160 |         dicompacs_input.working_directory = tmpdir_path
161 | 
162 |         dummy_file_path = tmpdir_path / "dummy_no_ext"
163 |         dummy_file_path.write_text("test file content")
164 | 
165 |         # Single patient, single modality as strings
166 |         dicompacs_input.fetch_data("PATIENT_1", "CT")
167 | 
168 |         # Ensure do_find was called multiple times
169 |         # The first call: study-level (QueryRetrieveLevel="STUDY")
170 |         # The next calls: series-level (QueryRetrieveLevel="SERIES"), once for each study
171 |         assert mock_connector_instance.do_find.call_count == 3
172 | 
173 |         # Ensure download_series was called for the valid series only
174 |         # The second series had a mismatched patient ID, so skip
175 |         # The third call is a new do_find -> leads to another series (mock_series_1 with same patient)
176 |         # So we should have downloaded 2 times
177 |         assert mock_connector_instance.download_series.call_count == 2
178 |         call_args_list = mock_connector_instance.download_series.call_args_list
179 |         # We expect the arguments to match "SERIES_UID_1" each time in this example
180 |         # (in practice, could differ if you had different series objects)
181 |         assert call_args_list[0][0][0] == "SERIES_UID_1"
182 |         assert call_args_list[1][0][0] == "SERIES_UID_1"
183 | 
184 |         # Check that the file without extension was renamed to .dcm
185 |         renamed_file = tmpdir_path / "dummy_no_ext.dcm"
186 |         assert renamed_file.exists(), "File without .dcm extension should have been renamed."
187 |         assert not dummy_file_path.exists(), "Original file without extension should be renamed."
188 | 
189 | 
190 | @pytest.mark.skip
191 | def test_dicom_pacs_fetch():
192 |     """
193 |     Example real test that tries to actually fetch from a public DICOM PACS.
194 |     This might be skipped because it depends on external availability.
195 |     """
196 |     pacs_input = DICOMPACSInput("www.dicomserver.co.uk", 11112, "DCMQUERY")
197 |     pacs_input.fetch_data("PAT004", modalities=["GM"])
198 | 
199 |     assert pacs_input.working_directory.is_dir()
200 |     assert len(list(pacs_input.working_directory.glob("*/*"))) > 0
201 | 


--------------------------------------------------------------------------------
/tests/test_generate.py:
--------------------------------------------------------------------------------
  1 | # pylint: disable=redefined-outer-name,missing-function-docstring
  2 | 
  3 | import pytest
  4 | 
  5 | import pandas as pd
  6 | import SimpleITK as sitk
  7 | 
  8 | from pydicer.generate.object import add_object, add_dose_object, add_structure_object
  9 | from pydicer.utils import read_converted_data
 10 | 
 11 | 
 12 | @pytest.fixture
 13 | def test_data_path(tmp_path_factory):
 14 |     """Fixture to generate a pydicer style file structure. For the purposes of these tests, it
 15 |     doesn't really matter what the files themselves contain. Only the converted.csv will be used
 16 |     here."""
 17 | 
 18 |     working_directory = tmp_path_factory.mktemp("data")
 19 | 
 20 |     cols = [
 21 |         "",
 22 |         "sop_instance_uid",
 23 |         "hashed_uid",
 24 |         "modality",
 25 |         "patient_id",
 26 |         "series_uid",
 27 |         "for_uid",
 28 |         "referenced_sop_instance_uid",
 29 |         "path",
 30 |     ]
 31 |     rows = [
 32 |         [
 33 |             0,
 34 |             "1.3.6.1.4.1.14519.5.2.1.1706.8040.418136430763474248173140712714",
 35 |             "b281ea",
 36 |             "CT",
 37 |             "HNSCC-01-0019",
 38 |             "1.3.6.1.4.1.14519.5.2.1.1706.8040.233510441938368266923995238976",
 39 |             "1.3.6.1.4.1.14519.5.2.1.1706.8040.290727775603409136366833989550",
 40 |             "",
 41 |             "data/HNSCC-01-0019/images/b281ea",
 42 |         ],
 43 |         [
 44 |             0,
 45 |             "1.3.6.1.4.1.14519.5.2.1.1706.8040.168221415040968580239112565792",
 46 |             "7cdcd9",
 47 |             "RTSTRUCT",
 48 |             "HNSCC-01-0019",
 49 |             "1.3.6.1.4.1.14519.5.2.1.1706.8040.103450757970418393826743010361",
 50 |             "1.3.6.1.4.1.14519.5.2.1.1706.8040.290727775603409136366833989550",
 51 |             "1.3.6.1.4.1.14519.5.2.1.1706.8040.418136430763474248173140712714",
 52 |             "data/HNSCC-01-0019/structures/7cdcd9",
 53 |         ],
 54 |         [
 55 |             0,
 56 |             "1.3.6.1.4.1.14519.5.2.1.1706.8040.254865609982571308239859201936",
 57 |             "57b99f",
 58 |             "RTPLAN",
 59 |             "HNSCC-01-0019",
 60 |             "1.3.6.1.4.1.14519.5.2.1.1706.8040.202542618630321306831779497186",
 61 |             "1.3.6.1.4.1.14519.5.2.1.1706.8040.290727775603409136366833989550",
 62 |             "1.3.6.1.4.1.14519.5.2.1.1706.8040.168221415040968580239112565792",
 63 |             "data/HNSCC-01-0019/plans/57b99f",
 64 |         ],
 65 |         [
 66 |             0,
 67 |             "1.3.6.1.4.1.14519.5.2.1.1706.8040.242809596262952988524850819667",
 68 |             "309e1a",
 69 |             "RTDOSE",
 70 |             "HNSCC-01-0019",
 71 |             "1.3.6.1.4.1.14519.5.2.1.1706.8040.777975715563610987698151746284",
 72 |             "1.3.6.1.4.1.14519.5.2.1.1706.8040.290727775603409136366833989550",
 73 |             "1.3.6.1.4.1.14519.5.2.1.1706.8040.254865609982571308239859201936",
 74 |             "data/HNSCC-01-0019/doses/309e1a",
 75 |         ],
 76 |     ]
 77 | 
 78 |     df_converted = pd.DataFrame(rows, columns=cols)
 79 |     for _, row in df_converted.iterrows():
 80 | 
 81 |         data_obj_path = working_directory.joinpath(row.path)
 82 |         data_obj_path.mkdir(parents=True, exist_ok=True)
 83 | 
 84 |     converted_path = working_directory.joinpath("data", "HNSCC-01-0019", "converted.csv")
 85 |     df_converted.to_csv(converted_path)
 86 | 
 87 |     # Also create a dataset directory with converted sub-set
 88 |     dataset_path = working_directory.joinpath("test_dataset", "HNSCC-01-0019")
 89 |     dataset_path.mkdir(parents=True)
 90 |     converted_path = dataset_path.joinpath("converted.csv")
 91 |     df_converted[:2].to_csv(converted_path)
 92 | 
 93 |     return working_directory
 94 | 
 95 | 
 96 | def test_generate_patient_id_does_not_exist(test_data_path):
 97 | 
 98 |     with pytest.raises(ValueError):
 99 |         add_object(test_data_path, "test_id", "test_pat", "image", "CT")
100 | 
101 | 
102 | def test_generate_incorrect_image_type(test_data_path):
103 | 
104 |     with pytest.raises(ValueError):
105 |         add_object(test_data_path, "test_id", "HNSCC-01-0019", "oops", "CT")
106 | 
107 | 
108 | def test_generate_object_does_not_exist(test_data_path):
109 | 
110 |     with pytest.raises(SystemError):
111 |         add_object(test_data_path, "test_id", "HNSCC-01-0019", "image", "CT")
112 | 
113 | 
114 | def test_generate_object_already_exists(test_data_path):
115 | 
116 |     with pytest.raises(SystemError):
117 |         add_object(test_data_path, "b281ea", "HNSCC-01-0019", "image", "CT")
118 | 
119 | 
120 | def test_generate_object(test_data_path):
121 | 
122 |     test_obj_path = test_data_path.joinpath("data", "HNSCC-01-0019", "images", "test_id")
123 |     test_obj_path.mkdir()
124 | 
125 |     # Confirm the data object isn't there yet
126 |     df_converted = read_converted_data(test_data_path, patients=["HNSCC-01-0019"])
127 |     assert len(df_converted[df_converted.hashed_uid == "test_id"]) == 0
128 | 
129 |     # Add the object
130 |     add_object(test_data_path, "test_id", "HNSCC-01-0019", "image", "CT")
131 | 
132 |     # Now make sure it's there
133 |     df_converted = read_converted_data(test_data_path, patients=["HNSCC-01-0019"])
134 |     assert len(df_converted[df_converted.hashed_uid == "test_id"]) == 1
135 | 
136 | 
137 | def test_generate_object_add_to_dataset(test_data_path):
138 | 
139 |     test_obj_path = test_data_path.joinpath("data", "HNSCC-01-0019", "images", "test_id")
140 |     test_obj_path.mkdir()
141 | 
142 |     # Confirm the data object isn't there yet
143 |     df_converted = read_converted_data(
144 |         test_data_path, dataset_name="test_dataset", patients=["HNSCC-01-0019"]
145 |     )
146 |     assert len(df_converted[df_converted.hashed_uid == "test_id"]) == 0
147 | 
148 |     add_object(
149 |         test_data_path, "test_id", "HNSCC-01-0019", "image", "CT", datasets=["test_dataset"]
150 |     )
151 |     # Now make sure it's there
152 |     df_converted = read_converted_data(
153 |         test_data_path, dataset_name="test_dataset", patients=["HNSCC-01-0019"]
154 |     )
155 |     assert len(df_converted[df_converted.hashed_uid == "test_id"]) == 1
156 | 
157 | 
158 | def test_generate_dose_object(test_data_path):
159 | 
160 |     # Confirm the data object isn't there yet
161 |     df_converted = read_converted_data(test_data_path, patients=["HNSCC-01-0019"])
162 |     assert len(df_converted[df_converted.hashed_uid == "dose_id"]) == 0
163 | 
164 |     test_dose = sitk.Image(20, 20, 20, sitk.sitkFloat32)
165 |     linked_structure_hash = "7cdcd9"
166 |     add_dose_object(test_data_path, test_dose, "dose_id", "HNSCC-01-0019", linked_structure_hash)
167 | 
168 |     # Now make sure it's there
169 |     df_converted = read_converted_data(test_data_path, patients=["HNSCC-01-0019"])
170 |     assert len(df_converted[df_converted.hashed_uid == "dose_id"]) == 1
171 | 
172 |     # Also make sure the for_uid and reference sop instance uid are correct
173 |     linked_row = df_converted[df_converted.hashed_uid == "dose_id"].iloc[0]
174 |     assert linked_row.for_uid == "1.3.6.1.4.1.14519.5.2.1.1706.8040.290727775603409136366833989550"
175 |     assert (
176 |         linked_row.referenced_sop_instance_uid
177 |         == "1.3.6.1.4.1.14519.5.2.1.1706.8040.168221415040968580239112565792"
178 |     )
179 | 
180 |     # And that the dose file exists
181 |     assert test_data_path.joinpath(
182 |         "data", "HNSCC-01-0019", "doses", "dose_id", "RTDOSE.nii.gz"
183 |     ).exists()
184 | 
185 | 
186 | def test_generate_structure_object(test_data_path):
187 | 
188 |     # Confirm the data object isn't there yet
189 |     df_converted = read_converted_data(test_data_path, patients=["HNSCC-01-0019"])
190 |     assert len(df_converted[df_converted.hashed_uid == "structure_id"]) == 0
191 | 
192 |     test_structure_set = {
193 |         "test_struct1": sitk.Image(20, 20, 20, sitk.sitkFloat32),
194 |         "test_struct2": sitk.Image(20, 20, 20, sitk.sitkFloat32),
195 |     }
196 |     linked_image_hash = "b281ea"
197 |     add_structure_object(
198 |         test_data_path, test_structure_set, "structure_id", "HNSCC-01-0019", linked_image_hash
199 |     )
200 | 
201 |     # Now make sure it's there
202 |     df_converted = read_converted_data(test_data_path, patients=["HNSCC-01-0019"])
203 |     assert len(df_converted[df_converted.hashed_uid == "structure_id"]) == 1
204 | 
205 |     # Also make sure the for_uid and reference sop instance uid are correct
206 |     linked_row = df_converted[df_converted.hashed_uid == "structure_id"].iloc[0]
207 |     assert linked_row.for_uid == "1.3.6.1.4.1.14519.5.2.1.1706.8040.290727775603409136366833989550"
208 |     assert (
209 |         linked_row.referenced_sop_instance_uid
210 |         == "1.3.6.1.4.1.14519.5.2.1.1706.8040.418136430763474248173140712714"
211 |     )
212 | 
213 |     # And that the structure files actually exist
214 |     assert test_data_path.joinpath(
215 |         "data", "HNSCC-01-0019", "structures", "structure_id", "test_struct1.nii.gz"
216 |     ).exists()
217 | 


--------------------------------------------------------------------------------