├── lint.sh ├── amid ├── __version__.py ├── amos │ ├── __init__.py │ └── dataset.py ├── mslub │ ├── __init__.py │ └── dataset.py ├── ribfrac │ ├── __init__.py │ └── dataset.py ├── upenn_gbm │ ├── __init__.py │ ├── data_classes.py │ └── upenn_gbm.py ├── cancer_500 │ ├── __init__.py │ ├── typing.py │ ├── dataset.py │ └── nodules.py ├── rsna_bc │ ├── __init__.py │ ├── utils.py │ └── dataset.py ├── totalsegmentator │ ├── __init__.py │ ├── utils.py │ ├── const.py │ └── dataset.py ├── lits │ ├── __init__.py │ ├── transforms.py │ └── dataset.py ├── internals │ ├── __init__.py │ ├── dataset.py │ ├── licenses.py │ └── registry.py ├── vs_seg │ ├── __init__.py │ └── transforms.py ├── cc359 │ ├── __init__.py │ ├── transforms.py │ └── dataset.py ├── lidc │ ├── __init__.py │ ├── transforms.py │ ├── typing.py │ ├── nodules.py │ └── dataset.py ├── transforms.py ├── __init__.py ├── hcp.py ├── kits.py ├── covid_1110.py ├── cl_detection.py ├── tbad.py ├── liver_medseg.py ├── medseg9.py ├── curvas.py ├── nlst.py ├── utils.py ├── crlm.py ├── luna25.py ├── brats2021.py ├── egd.py ├── flare2022.py ├── crossmoda.py ├── ct_ich.py ├── verse.py ├── mood.py ├── deeplesion.py └── msd.py ├── tests ├── requirements.txt └── test_consistency.py ├── .gitignore ├── MANIFEST.in ├── lint-requirements.txt ├── docs ├── javascript │ ├── tablesort.js │ └── tablesort.filesize.js ├── requirements.txt ├── index.md ├── datasets-api.md ├── fill_readme.py ├── fill_docs.py ├── mkdocstrings_handlers │ └── python_connectome.py ├── CONTRIBUTING.md └── recipes │ └── RSNABreastCancer.ipynb ├── requirements.txt ├── .github └── workflows │ ├── lint.yml │ ├── docs.yml │ ├── release.yml │ └── tests.yml ├── .flake8 ├── LICENSE ├── pyproject.toml └── mkdocs.yml /lint.sh: -------------------------------------------------------------------------------- 1 | flake8 .; black .; isort . 2 | -------------------------------------------------------------------------------- /amid/__version__.py: -------------------------------------------------------------------------------- 1 | __version__ = '0.15.0' 2 | -------------------------------------------------------------------------------- /amid/amos/__init__.py: -------------------------------------------------------------------------------- 1 | from .dataset import AMOS 2 | -------------------------------------------------------------------------------- /amid/mslub/__init__.py: -------------------------------------------------------------------------------- 1 | from .dataset import MSLUB 2 | -------------------------------------------------------------------------------- /amid/ribfrac/__init__.py: -------------------------------------------------------------------------------- 1 | from .dataset import RibFrac 2 | -------------------------------------------------------------------------------- /amid/upenn_gbm/__init__.py: -------------------------------------------------------------------------------- 1 | from .upenn_gbm import UPENN_GBM 2 | -------------------------------------------------------------------------------- /amid/cancer_500/__init__.py: -------------------------------------------------------------------------------- 1 | from .dataset import MoscowCancer500 2 | -------------------------------------------------------------------------------- /amid/rsna_bc/__init__.py: -------------------------------------------------------------------------------- 1 | from .dataset import RSNABreastCancer 2 | -------------------------------------------------------------------------------- /tests/requirements.txt: -------------------------------------------------------------------------------- 1 | pytest 2 | pytest-cov 3 | pytest-subtests 4 | -------------------------------------------------------------------------------- /amid/totalsegmentator/__init__.py: -------------------------------------------------------------------------------- 1 | from .dataset import Totalsegmentator 2 | -------------------------------------------------------------------------------- /amid/lits/__init__.py: -------------------------------------------------------------------------------- 1 | from .dataset import LiTS 2 | from .transforms import CanonicalCTOrientation, Rescale 3 | -------------------------------------------------------------------------------- /amid/internals/__init__.py: -------------------------------------------------------------------------------- 1 | from .dataset import Dataset, field 2 | from .registry import gather_datasets, register 3 | -------------------------------------------------------------------------------- /amid/vs_seg/__init__.py: -------------------------------------------------------------------------------- 1 | from .dataset import VSSEG 2 | from .transforms import CanonicalMRIOrientation, Rescale 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | __pycache__/ 3 | .pytest_cache/ 4 | *.egg-info/ 5 | .coverage 6 | docs/build/ 7 | docs/source/_*/ 8 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.md 2 | include requirements.txt 3 | include LICENSE 4 | recursive-include amid *.py .bev.yml *.hash 5 | -------------------------------------------------------------------------------- /amid/cc359/__init__.py: -------------------------------------------------------------------------------- 1 | from .dataset import CC359, open_nii_gz_file 2 | from .transforms import CanonicalMRIOrientation, Rescale 3 | -------------------------------------------------------------------------------- /lint-requirements.txt: -------------------------------------------------------------------------------- 1 | black 2 | flake8<=5 3 | flake8-tidy-imports 4 | flake8-quotes 5 | flake8-bugbear 6 | flake8-comprehensions 7 | isort 8 | -------------------------------------------------------------------------------- /amid/lidc/__init__.py: -------------------------------------------------------------------------------- 1 | from .dataset import LIDC 2 | from .transforms import CanonicalCTOrientation, Rescale 3 | 4 | 5 | # TODO: remove pylidc dependency 6 | -------------------------------------------------------------------------------- /docs/javascript/tablesort.js: -------------------------------------------------------------------------------- 1 | document$.subscribe(function() { 2 | var tables = document.querySelectorAll("article table:not([class])") 3 | tables.forEach(function(table) { 4 | new Tablesort(table) 5 | }) 6 | }) 7 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | connectome>=0.10.0,<1.0.0 2 | numpy 3 | nibabel 4 | more-itertools 5 | dicom-csv 6 | tqdm 7 | pandas 8 | pylidc 9 | joblib 10 | deli<1.0.0 11 | scipy 12 | scikit-image 13 | pydicom 14 | imops 15 | highdicom 16 | SimpleITK 17 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | mkdocs==1.5.3 2 | mkdocs-material==9.4.1 3 | mkdocstrings[python]==0.22.0 4 | mkdocs-jupyter==0.24.2 5 | mkdocs-exclude==1.0.2 6 | mkdocs-autorefs==0.4.1 7 | mike 8 | pandas 9 | tabulate 10 | ipython-genutils 11 | griffe==0.29.1 12 | mkdocs-material-extensions==1.2 13 | mkdocstrings-python==1.1.2 14 | -------------------------------------------------------------------------------- /.github/workflows/lint.yml: -------------------------------------------------------------------------------- 1 | name: Linters 2 | 3 | on: [ pull_request ] 4 | 5 | jobs: 6 | test: 7 | runs-on: ubuntu-20.04 8 | steps: 9 | - uses: actions/checkout@v3 10 | - name: Set up Python 11 | uses: actions/setup-python@v4 12 | with: 13 | python-version: '3.11' 14 | 15 | - name: Check code style 16 | run: | 17 | pip install -r lint-requirements.txt 18 | flake8 . 19 | isort --check . 20 | black --check . 21 | -------------------------------------------------------------------------------- /amid/cancer_500/typing.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | from typing import NamedTuple, Optional, Sequence 3 | 4 | 5 | class Texture(Enum): 6 | Solid, PartSolid, GroundGlass, Other = 0, 1, 2, 3 7 | 8 | 9 | class Review(Enum): 10 | Confirmed, ConfirmedPartially, Doubt, Rejected = 0, 1, 2, 3 11 | 12 | 13 | class Comment(Enum): 14 | Fibrosis, LymphNode, Calcium, Calcified, Bronchiectasis, Vessel = 0, 1, 2, 3, 4, 5 15 | 16 | 17 | class Cancer500Nodule(NamedTuple): 18 | center_voxel: Sequence[int] 19 | review: Review 20 | comment: Optional[Comment] = None 21 | diameter_mm: Optional[float] = None 22 | texture: Optional[Texture] = None 23 | malignancy: Optional[bool] = None 24 | -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | # W503: line break before binary operator is actually considered best-practice 3 | # E203: spaces around complex variables in slices are pep-right 4 | # F401: unused imports in __init__.py-s 5 | # I251: allow absolute imports in upper files 6 | # B019: @lru_cache for static methods is fine 7 | # B008: calling the function as default value is just part of the typer's interface 8 | # C408: for identifier-like fields dict(x=y) is just more concise 9 | ignore = W503,E203,B019,B028,C408,Q000 10 | per-file-ignores = 11 | **/__init__.py:F401 12 | scripts/*:I251 13 | tests/*:I251 14 | docs/*:I251 15 | amid/internals/cli.py:B008 16 | max-line-length = 120 17 | banned-modules = 18 | amid.* = Use relative imports 19 | -------------------------------------------------------------------------------- /amid/rsna_bc/utils.py: -------------------------------------------------------------------------------- 1 | import contextlib 2 | import zipfile 3 | from pathlib import Path 4 | 5 | import pandas as pd 6 | 7 | from ..internals.dataset import register_field 8 | 9 | 10 | def csv_field(name, cast): 11 | def _loader(self, i): 12 | value = self._meta[i].get(name) 13 | if pd.isnull(value): 14 | return None 15 | if cast is not None: 16 | return cast(value) 17 | return value 18 | 19 | register_field('RSNABreastCancer', name, _loader) 20 | return _loader 21 | 22 | 23 | @contextlib.contextmanager 24 | def unpack(root: str, relative: str): 25 | unpacked = Path(root) / relative 26 | 27 | if unpacked.exists(): 28 | yield unpacked, True 29 | else: 30 | with zipfile.Path(root, relative).open('rb') as unpacked: 31 | yield unpacked, False 32 | -------------------------------------------------------------------------------- /amid/internals/dataset.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from typing import Sequence 3 | 4 | from connectome import ExternalBase 5 | 6 | from ..utils import PathOrStr 7 | 8 | 9 | class Dataset(ExternalBase): 10 | _path: str 11 | _fields: Sequence[str] = None 12 | 13 | def __init__(self, root: PathOrStr): 14 | fields = None 15 | if hasattr(self, '_fields'): 16 | fields = self._fields 17 | 18 | super().__init__(fields=fields, inputs=['id'], properties=['ids'], inherit=['id']) 19 | self.root = Path(root) 20 | 21 | @classmethod 22 | def __getversion__(cls): 23 | return 0 24 | 25 | 26 | _Fields = {} 27 | 28 | 29 | def register_field(cls, name, func): 30 | _Fields.setdefault(cls, {})[name] = func 31 | 32 | 33 | def field(func): 34 | cls, name = func.__qualname__.split('.') 35 | register_field(cls, name, func) 36 | return func 37 | -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | Awesome Medical Imaging Datasets (AMID) - a curated list of medical imaging datasets with unified interfaces 2 | 3 | ## Getting started 4 | 5 | Just import a dataset and start using it! 6 | 7 | Note that for some datasets you must manually download the raw files first. 8 | 9 | ```python 10 | from amid.verse import VerSe 11 | 12 | ds = VerSe() 13 | # get the available ids 14 | print(len(ds.ids)) 15 | i = ds.ids[0] 16 | 17 | # use the available methods: 18 | # load the image and vertebrae masks 19 | x, y = ds.image(i), ds.masks(i) 20 | print(ds.split(i), ds.patient(i)) 21 | 22 | # or get a namedTuple-like object: 23 | entry = ds(i) 24 | x, y = entry.image, entry.masks 25 | print(entry.split, entry.patient) 26 | ``` 27 | 28 | ## Install 29 | 30 | Just get it from PyPi: 31 | 32 | ```shell 33 | pip install amid 34 | ``` 35 | 36 | Or if you want to use version control features: 37 | 38 | ```shell 39 | git clone https://github.com/neuro-ml/amid.git 40 | cd amid && pip install -e . 41 | ``` 42 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022-2024 NeuroML Group 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /amid/transforms.py: -------------------------------------------------------------------------------- 1 | import nibabel 2 | import numpy as np 3 | from connectome import Output, Transform 4 | 5 | 6 | class SpacingFromAffine(Transform): 7 | __inherit__ = True 8 | 9 | def spacing(affine): 10 | return nibabel.affines.voxel_sizes(affine) 11 | 12 | 13 | class ParseAffineMatrix(Transform): 14 | """Splits affine matrix into separate methods for more convenient usage. 15 | 16 | Examples 17 | -------- 18 | >>> dataset = Dataset() 19 | >>> dataset.voxel_spacing(id_) 20 | # FieldError 21 | >>> dataset = dataset >> ParseAffineMatrix() 22 | >>> dataset.voxel_spacing(id_) 23 | # array([1.5, 1.5, 1.5]) 24 | """ 25 | 26 | __inherit__ = True 27 | 28 | def origin(affine): 29 | """Constructs an origin tensor from the given affine matrix.""" 30 | return affine[:-1, -1] 31 | 32 | def spacing(affine): 33 | """Constructs a voxel spacing tensor from the given orientation matrix.""" 34 | return np.linalg.norm(affine[:3, :3], axis=0) 35 | 36 | def orientation(affine, spacing: Output): 37 | """Constructs an orientation matrix from the given affine matrix.""" 38 | return np.divide(affine[:3, :3], spacing) 39 | -------------------------------------------------------------------------------- /amid/__init__.py: -------------------------------------------------------------------------------- 1 | from connectome.cache import unstable_module 2 | 3 | from .__version__ import __version__ 4 | from .amos import AMOS 5 | from .bimcv import BIMCVCovid19 6 | from .brats2021 import BraTS2021 7 | from .cancer_500 import MoscowCancer500 8 | from .cc359 import CC359 9 | from .cl_detection import CLDetection2023 10 | from .covid_1110 import MoscowCovid1110 11 | from .crlm import CRLM 12 | from .crossmoda import CrossMoDA 13 | from .ct_ich import CT_ICH 14 | from .curvas import CURVAS 15 | from .deeplesion import DeepLesion 16 | from .egd import EGD 17 | from .flare2022 import FLARE2022 18 | from .hcp import HCP 19 | from .lidc import LIDC 20 | from .lits import LiTS 21 | from .liver_medseg import LiverMedseg 22 | from .luna25 import LUNA25 23 | from .medseg9 import Medseg9 24 | from .midrc import MIDRC 25 | from .mood import MOOD 26 | from .mslub import MSLUB 27 | from .nlst import NLST 28 | from .nsclc import NSCLC 29 | from .ribfrac import RibFrac 30 | from .rsna_bc import RSNABreastCancer 31 | from .stanford_coca import StanfordCoCa 32 | from .totalsegmentator import Totalsegmentator 33 | from .upenn_gbm import UPENN_GBM 34 | from .verse import VerSe 35 | from .vs_seg import VSSEG 36 | 37 | 38 | unstable_module(__name__) 39 | -------------------------------------------------------------------------------- /.github/workflows/docs.yml: -------------------------------------------------------------------------------- 1 | name: Docs 2 | 3 | on: 4 | release: 5 | types: [ released ] 6 | push: 7 | branches: 8 | - dev 9 | 10 | jobs: 11 | deploy: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v3 15 | - uses: fregante/setup-git-user@v1 16 | - run: git fetch origin gh-pages --depth=1 17 | - uses: actions/setup-python@v2 18 | with: 19 | python-version: '3.10' 20 | - name: Install 21 | run: | 22 | pip install -e . 23 | pip install -r docs/requirements.txt 24 | 25 | # release 26 | - id: get_version 27 | if: github.event_name == 'release' 28 | name: Get the release version 29 | uses: battila7/get-version-action@v2 30 | 31 | - name: Deploy the docs 32 | if: github.event_name == 'release' 33 | run: | 34 | PYTHONPATH=$PYTHONPATH:./docs mike deploy ${{ steps.get_version.outputs.version-without-v }} latest --push --update-aliases 35 | 36 | # dev 37 | # - name: Deploy the docs 38 | # if: ${{ github.ref == 'refs/heads/dev' }} 39 | # run: | 40 | # cd docs 41 | # VERSION=dev python fill_docs.py 42 | # cd .. 43 | # PYTHONPATH=$PYTHONPATH:./docs mike deploy dev --push 44 | -------------------------------------------------------------------------------- /docs/datasets-api.md: -------------------------------------------------------------------------------- 1 | # Datasets API 2 | 3 | ::: amid.amos.dataset.AMOS 4 | 5 | ::: amid.bimcv.BIMCVCovid19 6 | 7 | ::: amid.brats2021.BraTS2021 8 | 9 | ::: amid.cc359.dataset.CC359 10 | 11 | ::: amid.cl_detection.CLDetection2023 12 | 13 | ::: amid.crlm.CRLM 14 | 15 | ::: amid.ct_ich.CT_ICH 16 | 17 | ::: amid.curvas.CURVAS 18 | 19 | ::: amid.crossmoda.CrossMoDA 20 | 21 | ::: amid.deeplesion.DeepLesion 22 | 23 | ::: amid.egd.EGD 24 | 25 | ::: amid.flare2022.FLARE2022 26 | 27 | ::: amid.hcp.HCP 28 | 29 | ::: amid.kits.KiTS23 30 | 31 | ::: amid.lidc.dataset.LIDC 32 | 33 | ::: amid.lits.dataset.LiTS 34 | 35 | ::: amid.liver_medseg.LiverMedseg 36 | 37 | ::: amid.midrc.MIDRC 38 | 39 | ::: amid.mood.MOOD 40 | 41 | ::: amid.msd.MSD 42 | 43 | ::: amid.mslub.dataset.MSLUB 44 | 45 | ::: amid.medseg9.Medseg9 46 | 47 | ::: amid.cancer_500.dataset.MoscowCancer500 48 | 49 | ::: amid.covid_1110.MoscowCovid1110 50 | 51 | ::: amid.nlst.NLST 52 | 53 | ::: amid.nsclc.NSCLC 54 | 55 | ::: amid.rsna_bc.dataset.RSNABreastCancer 56 | 57 | ::: amid.ribfrac.dataset.RibFrac 58 | 59 | ::: amid.stanford_coca.StanfordCoCa 60 | 61 | ::: amid.tbad.TBAD 62 | 63 | ::: amid.totalsegmentator.dataset.Totalsegmentator 64 | 65 | ::: amid.upenn_gbm.upenn_gbm.UPENN_GBM 66 | 67 | ::: amid.vs_seg.dataset.VSSEG 68 | 69 | ::: amid.verse.VerSe 70 | 71 | -------------------------------------------------------------------------------- /amid/totalsegmentator/utils.py: -------------------------------------------------------------------------------- 1 | import nibabel 2 | import numpy as np 3 | 4 | from ..internals.dataset import register_field 5 | from ..utils import open_nii_gz_file, unpack 6 | from .const import ANATOMICAL_STRUCTURES, LABELS 7 | 8 | 9 | ARCHIVE_ROOT = 'Totalsegmentator_dataset' 10 | 11 | 12 | def label_loader(name): 13 | def loader(self, i): 14 | return self._meta[self._meta['image_id'] == i][name].item() 15 | 16 | register_field('Totalsegmentator', name, loader) 17 | return loader 18 | 19 | 20 | def mask_loader(name): 21 | def loader(self, i): 22 | file = f'{i}/segmentations/{name}.nii.gz' 23 | 24 | with unpack(self.root, file, ARCHIVE_ROOT, '.zip') as (unpacked, is_unpacked): 25 | if is_unpacked: 26 | return np.asarray(nibabel.load(unpacked).dataobj) 27 | else: 28 | with open_nii_gz_file(unpacked) as image: 29 | return np.asarray(image.dataobj) 30 | 31 | register_field('Totalsegmentator', name, loader) 32 | return loader 33 | 34 | 35 | def add_labels(scope): 36 | for label in LABELS: 37 | scope[label] = label_loader(label) 38 | 39 | 40 | def add_masks(scope): 41 | for anatomical_structure in ANATOMICAL_STRUCTURES: 42 | scope[anatomical_structure] = mask_loader(anatomical_structure) 43 | -------------------------------------------------------------------------------- /docs/fill_readme.py: -------------------------------------------------------------------------------- 1 | import re 2 | from pathlib import Path 3 | 4 | import deli 5 | import pandas as pd 6 | from tqdm import tqdm 7 | 8 | from amid.internals.registry import gather_datasets, prepare_for_table 9 | 10 | 11 | file = Path(__file__).resolve().parent.parent / 'README.md' 12 | with open(file, 'r') as fd: 13 | content = fd.read() 14 | 15 | start = re.search(r'# Available datasets', content).end() 16 | stop = re.search(r'Check out \[our docs\]', content).start() 17 | raw_data = deli.load('/shared/amid/raw.json') 18 | cache_path = '/shared/amid/cache.json' 19 | cache = deli.load(cache_path) 20 | 21 | records = [] 22 | for name, (cls, module, description) in tqdm(list(gather_datasets().items())): # noqa 23 | if name in cache: 24 | count = cache[name] 25 | else: 26 | count = len(cls(root=raw_data[name]).ids) 27 | cache[name] = count 28 | deli.save(cache, cache_path) 29 | records.append(prepare_for_table(name, count, module, description, 'latest')) 30 | 31 | table = pd.DataFrame.from_records(records).fillna('') 32 | table.columns = [x.replace('_', ' ').capitalize() for x in table.columns] 33 | table = table[['Name', 'Entries', 'Body region', 'Modality']].to_markdown(index=False) 34 | content = f'{content[:start]}\n\n{table}\n\n{content[stop:]}' 35 | 36 | with open(file, 'w') as fd: 37 | fd.write(content) 38 | -------------------------------------------------------------------------------- /amid/lidc/transforms.py: -------------------------------------------------------------------------------- 1 | from typing import Sequence, Union 2 | 3 | import numpy as np 4 | from connectome import Transform 5 | from imops import zoom 6 | 7 | from ..utils import Numeric 8 | 9 | 10 | class CanonicalCTOrientation(Transform): 11 | __exclude__ = ('nodules', 'nodules_masks') 12 | 13 | def image(image): 14 | return image[..., ::-1] 15 | 16 | def cancer(cancer): 17 | return cancer[..., ::-1] 18 | 19 | 20 | class Rescale(Transform): 21 | __exclude__ = ('pixel_spacing', 'slice_locations', 'voxel_spacing', 'orientation_matrix') 22 | 23 | _new_spacing: Union[Sequence[Numeric], Numeric] 24 | _order: int = 1 25 | 26 | def _spacing(spacing, _new_spacing): 27 | _new_spacing = np.broadcast_to(_new_spacing, len(spacing)).copy() 28 | _new_spacing[np.isnan(_new_spacing)] = np.array(spacing)[np.isnan(_new_spacing)] 29 | return tuple(_new_spacing.tolist()) 30 | 31 | def _scale_factor(spacing, _spacing): 32 | return np.float32(spacing) / np.float32(_spacing) 33 | 34 | def spacing(_spacing): 35 | return _spacing 36 | 37 | def image(image, _scale_factor, _order): 38 | return zoom(image.astype(np.float32), _scale_factor, order=_order) 39 | 40 | def cancer(cancer, _scale_factor, _order): 41 | return zoom(cancer.astype(np.float32), _scale_factor, order=_order) > 0.5 42 | -------------------------------------------------------------------------------- /docs/fill_docs.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import deli 4 | import pandas as pd 5 | from tqdm import tqdm 6 | 7 | from amid.__version__ import __version__ as version 8 | from amid.internals.registry import gather_datasets, prepare_for_table 9 | 10 | 11 | # version = os.environ.get('VERSION') 12 | # if not version: 13 | # raise RuntimeError('Please define the "VERSION" env variable') 14 | raw_data = deli.load('/shared/amid/raw.json') 15 | cache_path = '/shared/amid/cache.json' 16 | cache = deli.load(cache_path) 17 | 18 | records = [] 19 | root = Path(__file__).resolve().parent 20 | with open(root / 'datasets-api.md', 'w') as file: 21 | file.write('# Datasets API\n\n') 22 | for name, (cls, module, description) in tqdm(list(gather_datasets().items())): 23 | file.write(f'::: {module}.{name}\n\n') 24 | if name in cache: 25 | count = cache[name] 26 | else: 27 | count = len(cls(root=raw_data[name]).ids) 28 | cache[name] = count 29 | deli.save(cache, cache_path) 30 | 31 | records.append(prepare_for_table(name, count, module, description, version)) 32 | 33 | table = pd.DataFrame.from_records(records).fillna('') 34 | table.columns = [x.replace('_', ' ').capitalize() for x in table.columns] 35 | with open(root / 'datasets.md', 'w') as file: 36 | file.write('# Datasets\n\n') 37 | file.write(table.to_markdown(index=False)) 38 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Release 2 | 3 | on: 4 | release: 5 | types: [ released ] 6 | 7 | env: 8 | MODULE_NAME: amid 9 | 10 | jobs: 11 | release: 12 | runs-on: ubuntu-latest 13 | 14 | steps: 15 | - uses: actions/checkout@v3 16 | - name: Set up Python 17 | uses: actions/setup-python@v4 18 | with: 19 | python-version: '3.10' 20 | 21 | - id: get_version 22 | name: Get the release version 23 | uses: battila7/get-version-action@v2 24 | 25 | - name: Check the version and build the package 26 | run: | 27 | RELEASE=${{ steps.get_version.outputs.version-without-v }} 28 | VERSION=$(python -c "from pathlib import Path; import runpy; folder, = {d.parent for d in Path().resolve().glob('*/__init__.py') if d.parent.is_dir() and (d.parent / '__version__.py').exists()}; print(runpy.run_path(folder / '__version__.py')['__version__'])") 29 | MATCH=$(pip index versions $MODULE_NAME | grep "Available versions:" | grep $VERSION) || echo 30 | echo $MATCH 31 | if [ "$GITHUB_BASE_REF" = "master" ] && [ "$MATCH" != "" ]; then echo "Version $VERSION already present" && exit 1; fi 32 | if [ "$VERSION" != "$RELEASE" ]; then echo "$VERSION vs $RELEASE" && exit 1; fi 33 | pip install build 34 | python -m build --sdist 35 | 36 | - name: Publish to PyPi 37 | uses: pypa/gh-action-pypi-publish@master 38 | with: 39 | password: ${{ secrets.PYPI_API_TOKEN }} 40 | -------------------------------------------------------------------------------- /amid/internals/licenses.py: -------------------------------------------------------------------------------- 1 | from typing import NamedTuple 2 | 3 | 4 | class License(NamedTuple): 5 | name: str 6 | url: str 7 | 8 | 9 | CC0_10 = License(name='CC0 1.0', url='https://creativecommons.org/publicdomain/zero/1.0/') 10 | CC_BY_30 = License( 11 | name='CC BY 3.0', 12 | url='https://creativecommons.org/licenses/by/3.0/', 13 | ) 14 | CC_BY_40 = License( 15 | name='CC BY 4.0', 16 | url='https://creativecommons.org/licenses/by/4.0/', 17 | ) 18 | CC_BYNC_40 = License( 19 | name='CC BY-NC 4.0', 20 | url='https://creativecommons.org/licenses/by-nc/4.0/', 21 | ) 22 | CC_BYND_40 = License( 23 | name='CC BY-ND 4.0', 24 | url='https://creativecommons.org/licenses/by-nd/4.0/', 25 | ) 26 | CC_BYNCND_40 = License( 27 | name='CC BY-NC-ND 4.0', 28 | url='https://creativecommons.org/licenses/by-nc-nd/4.0/', 29 | ) 30 | CC_BYSA_40 = License( 31 | name='CC BY-SA 4.0', 32 | url='https://creativecommons.org/licenses/by-sa/4.0/', 33 | ) 34 | CC_BYNCSA_40 = License( 35 | name='CC BY-NC-SA 4.0', 36 | url='https://creativecommons.org/licenses/by-nc-sa/4.0/', 37 | ) 38 | 39 | PhysioNet_RHD_150 = License( 40 | name='PhysioNet Restricted Health Data License 1.5.0', 41 | url='https://www.physionet.org/about/licenses/physionet-restricted-health-data-license-150/', 42 | ) 43 | 44 | StanfordDSResearch = License( 45 | name='Stanford University Dataset Research Use Agreement', 46 | url='https://stanfordaimi.azurewebsites.net/datasets/e8ca74dc-8dd4-4340-815a-60b41f6cb2aa', # TODO: separate link 47 | ) 48 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "amid" 3 | dynamic = ["version", "dependencies"] 4 | description = "A curated list of medical imaging datasets with unified interfaces" 5 | readme = "README.md" 6 | requires-python = ">=3.8" 7 | license = { file = "LICENSE" } 8 | keywords = ["medical imaging", "dataset"] 9 | authors = [ 10 | { name = "NeuroML Group", email = "max@ira-labs.com" } 11 | ] 12 | classifiers = [ 13 | "Development Status :: 4 - Beta", 14 | "License :: OSI Approved :: MIT License", 15 | "Programming Language :: Python :: 3", 16 | "Programming Language :: Python :: 3.8", 17 | "Programming Language :: Python :: 3.9", 18 | "Programming Language :: Python :: 3.10", 19 | "Programming Language :: Python :: 3.11", 20 | "Programming Language :: Python :: 3 :: Only", 21 | ] 22 | 23 | [project.urls] 24 | "Homepage" = "https://github.com/neuro-ml/amid" 25 | "Issues" = "https://github.com/neuro-ml/amid/issues" 26 | "Source" = "https://github.com/neuro-ml/amid" 27 | "Docs" = "https://neuro-ml.github.io/amid" 28 | 29 | [build-system] 30 | requires = ["setuptools>=43.0.0", "wheel"] 31 | build-backend = "setuptools.build_meta" 32 | 33 | [tool.setuptools.packages.find] 34 | include = ["amid"] 35 | 36 | [tool.setuptools.dynamic] 37 | version = { attr = "amid.__version__.__version__" } 38 | dependencies = { file = "requirements.txt" } 39 | 40 | [tool.pytest.ini_options] 41 | markers = [ 42 | "raw: tests that require the raw files storage", 43 | ] 44 | 45 | [tool.black] 46 | line-length = 120 47 | skip-string-normalization = true 48 | 49 | [tool.isort] 50 | line_length = 120 51 | lines_after_imports = 2 52 | profile = 'black' 53 | combine_as_imports = true 54 | -------------------------------------------------------------------------------- /amid/lits/transforms.py: -------------------------------------------------------------------------------- 1 | from typing import Sequence, Union 2 | 3 | import numpy as np 4 | from connectome import Transform 5 | from imops import zoom 6 | 7 | from ..utils import Numeric, propagate_none 8 | 9 | 10 | class CanonicalCTOrientation(Transform): 11 | __inherit__ = True 12 | 13 | def image(image): 14 | return np.transpose(image, (1, 0, 2))[::-1, :, ::-1] 15 | 16 | def mask(mask): 17 | return np.transpose(mask, (1, 0, 2))[::-1, :, ::-1] 18 | 19 | def spacing(spacing): 20 | return tuple(np.array(spacing)[[1, 0, 2]].tolist()) 21 | 22 | 23 | class Rescale(Transform): 24 | __exclude__ = ( 25 | 'voxel_spacing', 26 | 'affine', 27 | ) 28 | 29 | _new_spacing: Union[Sequence[Numeric], Numeric] 30 | _order: int = 1 31 | 32 | def _spacing(spacing, _new_spacing): 33 | _new_spacing = np.broadcast_to(_new_spacing, len(spacing)).copy() 34 | _new_spacing[np.isnan(_new_spacing)] = np.array(spacing)[np.isnan(_new_spacing)] 35 | return tuple(_new_spacing.tolist()) 36 | 37 | def _scale_factor(spacing, _spacing): 38 | return np.float32(spacing) / np.float32(_spacing) 39 | 40 | def spacing(_spacing): 41 | return _spacing 42 | 43 | def image(image, _scale_factor, _order): 44 | return zoom(image.astype(np.float32), _scale_factor, order=_order) 45 | 46 | @propagate_none 47 | def mask(mask, _scale_factor, _order): 48 | onehot = np.arange(mask.max() + 1) == mask[..., None] 49 | onehot = onehot.astype(mask.dtype).transpose(3, 0, 1, 2) 50 | out = np.array(zoom(onehot.astype(np.float32), _scale_factor, axis=(1, 2, 3)) > 0.5, dtype=mask.dtype) 51 | labels = out.argmax(axis=0) 52 | return labels 53 | -------------------------------------------------------------------------------- /amid/lidc/typing.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | from typing import NamedTuple, Optional, Sequence 3 | 4 | import numpy as np 5 | 6 | 7 | class Calcification(Enum): 8 | Popcorn, Laminated, Solid, NonCentral, Central, Absent = 1, 2, 3, 4, 5, 6 9 | 10 | 11 | class InternalStructure(Enum): 12 | SoftTissue, Fluid, Fat, Air = 1, 2, 3, 4 13 | 14 | 15 | class Lobulation(Enum): 16 | NoLobulation, NearlyNoLobulation, MediumLobulation, NearMarkedLobulation, MarkedLobulation = 1, 2, 3, 4, 5 17 | 18 | 19 | class Malignancy(Enum): 20 | HighlyUnlikely, ModeratelyUnlikely, Indeterminate, ModeratelySuspicious, HighlySuspicious = 1, 2, 3, 4, 5 21 | 22 | 23 | class Sphericity(Enum): 24 | Linear, OvoidLinear, Ovoid, OvoidRound, Round = 1, 2, 3, 4, 5 25 | 26 | 27 | class Spiculation(Enum): 28 | NoSpiculation, NearlyNoSpiculation, MediumSpiculation, NearMarkedSpiculation, MarkedSpiculation = 1, 2, 3, 4, 5 29 | 30 | 31 | class Subtlety(Enum): 32 | ExtremelySubtle, ModeratelySubtle, FairlySubtle, ModeratelyObvious, Obvious = 1, 2, 3, 4, 5 33 | 34 | 35 | class Texture(Enum): 36 | NonSolidGGO, NonSolidMixed, PartSolidMixed, SolidMixed, Solid = 1, 2, 3, 4, 5 37 | 38 | 39 | class LIDCNodule(NamedTuple): 40 | center_voxel: Sequence[float] 41 | bbox: np.ndarray 42 | diameter_mm: float 43 | surface_area_mm2: float 44 | volume_mm3: float 45 | calcification: Optional[Calcification] = None 46 | internal_structure: Optional[InternalStructure] = None 47 | lobulation: Optional[Lobulation] = None 48 | malignancy: Optional[Malignancy] = None 49 | sphericity: Optional[Sphericity] = None 50 | spiculation: Optional[Spiculation] = None 51 | subtlety: Optional[Subtlety] = None 52 | texture: Optional[Texture] = None 53 | -------------------------------------------------------------------------------- /amid/lidc/nodules.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from pylidc import Annotation 3 | 4 | from .typing import ( 5 | Calcification, 6 | InternalStructure, 7 | LIDCNodule, 8 | Lobulation, 9 | Malignancy, 10 | Sphericity, 11 | Spiculation, 12 | Subtlety, 13 | Texture, 14 | ) 15 | 16 | 17 | def get_nodule(ann: Annotation) -> LIDCNodule: 18 | def init_enum(enum_class, value): 19 | try: 20 | return enum_class(value) 21 | except ValueError: 22 | pass 23 | 24 | bbox = ann.bbox_matrix().T 25 | bbox[1] = bbox[1] + 1 26 | 27 | return LIDCNodule( 28 | center_voxel=ann.centroid, 29 | bbox=bbox, 30 | diameter_mm=ann.diameter, 31 | surface_area_mm2=ann.surface_area, 32 | volume_mm3=ann.volume, 33 | calcification=init_enum(Calcification, ann.calcification), 34 | internal_structure=init_enum(InternalStructure, ann.internalStructure), 35 | lobulation=init_enum(Lobulation, ann.lobulation), 36 | malignancy=init_enum(Malignancy, ann.malignancy), 37 | sphericity=init_enum(Sphericity, ann.sphericity), 38 | spiculation=init_enum(Spiculation, ann.spiculation), 39 | subtlety=init_enum(Subtlety, ann.subtlety), 40 | texture=init_enum(Texture, ann.texture), 41 | ) 42 | 43 | 44 | def flip_nodule(nodule: LIDCNodule, n_slices: int) -> LIDCNodule: 45 | bbox = nodule.bbox.copy() 46 | start_slice, stop_slice = bbox[:, -1] 47 | bbox[:, -1] = np.array([n_slices - stop_slice, n_slices - start_slice]) 48 | 49 | center_voxel = nodule.center_voxel 50 | center_voxel[-1] = n_slices - center_voxel[-1] 51 | 52 | return nodule._replace( 53 | center_voxel=center_voxel, 54 | bbox=bbox, 55 | ) 56 | -------------------------------------------------------------------------------- /amid/upenn_gbm/data_classes.py: -------------------------------------------------------------------------------- 1 | from typing import NamedTuple 2 | 3 | 4 | class ClinicalInfo(NamedTuple): 5 | gender: str 6 | age_at_scan_years: float 7 | survival_from_surgery_days: int 8 | idh1: str 9 | mgmt: str 10 | kps: str 11 | gtr_over90percent: str 12 | time_since_baseline_preop: int 13 | psp_tp_score: float 14 | 15 | 16 | class AcquisitionInfo(NamedTuple): 17 | manufacturer: str 18 | model: str 19 | magnetic_field_strength: float 20 | t1_imaging_frequency: float 21 | t1_repetition_time: float 22 | t1_echo_time: float 23 | t1_inversion_time: float 24 | t1_flip_angle: float 25 | t1_pixel_spacing: str 26 | t1_slice_thickness: float 27 | t1gd_imaging_frequency: float 28 | t1gd_repetition_time: float 29 | t1gd_echo_time: float 30 | t1gd_inversion_time: float 31 | t1gd_flip_angle: float 32 | t1gd_pixel_spacing: str 33 | t1gd_slice_thickness: float 34 | t2_imaging_frequency: float 35 | t2_repetition_time: float 36 | t2_echo_time: float 37 | t2_flip_angle: float 38 | t2_pixel_spacing: str 39 | t2_slice_thickness: float 40 | flair_imaging_frequency: float 41 | flair_repetition_time: float 42 | flair_echo_time: float 43 | flair_inversion_time: float 44 | flair_flip_angle: float 45 | flair_pixel_spacing: str 46 | flair_slice_thickness: float 47 | dti_imaging_frequency: float 48 | dti_repetition_time: float 49 | dti_echo_time: float 50 | dti_flip_angle: float 51 | dti_pixel_spacing: str 52 | dti_slice_thickness: float 53 | dsc_imaging_frequency: float 54 | dsc_repetition_time: float 55 | dsc_echo_time: float 56 | dsc_flip_angle: float 57 | dsc_pixel_spacing: str 58 | dsc_slice_thickness: float 59 | -------------------------------------------------------------------------------- /tests/test_consistency.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | 3 | import numpy as np 4 | import pytest 5 | 6 | from amid.internals import gather_datasets 7 | 8 | 9 | MAPPING = gather_datasets() 10 | DATASETS = [x[0] for x in MAPPING.values()] 11 | NAMES = list(MAPPING) 12 | 13 | 14 | @pytest.mark.raw 15 | @pytest.mark.parametrize('cls', DATASETS, ids=NAMES) 16 | def test_ids_availability(cls): 17 | assert len(cls().ids) > 0 18 | 19 | 20 | @pytest.mark.raw 21 | @pytest.mark.parametrize('cls', DATASETS, ids=NAMES) 22 | def test_pickleable(cls): 23 | raw = cls()[0] 24 | cached = cls() 25 | fields = dir(raw) 26 | 27 | for ds in raw, cached: 28 | loader = ds._compile(fields) 29 | pickle.dumps(loader) 30 | 31 | f = cached._compile('ids') 32 | raw = pickle.dumps(f) 33 | g = pickle.loads(raw) 34 | assert f() == g() 35 | 36 | 37 | # @pytest.mark.raw 38 | # @pytest.mark.parametrize('cls', ROOT_MAPPING, ids=[cls.__name__ for cls in ROOT_MAPPING]) 39 | # def test_cache_consistency(cls): 40 | # raw = cls(root=ROOT_MAPPING[cls]) 41 | # cached = raw.cached() 42 | # fields = {x.name for x in raw._container.outputs} - {'ids', 'id', 'cached'} 43 | # 44 | # ids = raw.ids 45 | # assert ids == cached.ids 46 | # for i in ids: 47 | # for field in fields: 48 | # compare(getattr(raw, field)(i), getattr(cached, field)(i)) 49 | 50 | 51 | # TODO: find a package for this 52 | def compare(x, y): 53 | assert type(x) == type(y) 54 | if isinstance(x, (str, int, float, bytes)): 55 | assert x == y 56 | elif isinstance(x, (np.ndarray, np.generic)): 57 | np.testing.assert_allclose(x, y) 58 | elif isinstance(x, (list, tuple)): 59 | list(map(compare, x, y)) 60 | else: 61 | raise TypeError(type(x)) 62 | -------------------------------------------------------------------------------- /docs/javascript/tablesort.filesize.js: -------------------------------------------------------------------------------- 1 | // Filesizes. e.g. '5.35 K', '10 MB', '12.45 GB', or '4.67 TiB' 2 | (function(){ 3 | var compareNumber = function(a, b) { 4 | a = parseFloat(a); 5 | b = parseFloat(b); 6 | 7 | a = isNaN(a) ? 0 : a; 8 | b = isNaN(b) ? 0 : b; 9 | 10 | return a - b; 11 | }, 12 | 13 | cleanNumber = function(i) { 14 | return i.replace(',', '.').replace(/[^\-?0-9.]/g, ''); 15 | }, 16 | 17 | // Returns suffix multiplier 18 | // Ex. suffix2num('KB') -> 1000 19 | // Ex. suffix2num('KiB') -> 1024 20 | suffix2num = function(suffix) { 21 | suffix = suffix.toLowerCase(); 22 | var base = suffix[1] === 'i' ? 1024 : 1000; 23 | 24 | switch(suffix[0]) { 25 | case 'k': 26 | return Math.pow(base, 2); 27 | case 'm': 28 | return Math.pow(base, 3); 29 | case 'g': 30 | return Math.pow(base, 4); 31 | case 't': 32 | return Math.pow(base, 5); 33 | case 'p': 34 | return Math.pow(base, 6); 35 | case 'e': 36 | return Math.pow(base, 7); 37 | case 'z': 38 | return Math.pow(base, 8); 39 | case 'y': 40 | return Math.pow(base, 9); 41 | default: 42 | return base; 43 | } 44 | }, 45 | 46 | // Converts filesize to bytes 47 | // Ex. filesize2num('123 KB') -> 123000 48 | // Ex. filesize2num('123 KiB') -> 125952 49 | filesize2num = function(filesize) { 50 | var matches = filesize.match(/^(\d+([.,]\d+)?) ?((K|M|G|T|P|E|Z|Y|B$)i?B?)$/i); 51 | 52 | var num = parseFloat(cleanNumber(matches[1])), 53 | suffix = matches[3]; 54 | 55 | return num * suffix2num(suffix); 56 | }; 57 | 58 | Tablesort.extend('filesize', function(item) { 59 | return /^\d+([.,]\d+)? ?(K|M|G|T|P|E|Z|Y|B$)i?B?$/i.test(item); 60 | }, function(a, b) { 61 | a = filesize2num(a); 62 | b = filesize2num(b); 63 | 64 | return compareNumber(b, a); 65 | }); 66 | }()); 67 | -------------------------------------------------------------------------------- /amid/vs_seg/transforms.py: -------------------------------------------------------------------------------- 1 | from typing import Sequence, Union 2 | 3 | import numpy as np 4 | from connectome import Transform 5 | from imops import zoom 6 | 7 | from ..utils import Numeric, propagate_none 8 | 9 | 10 | class CanonicalMRIOrientation(Transform): 11 | __inherit__ = True 12 | 13 | def image(image): 14 | return np.transpose(image, (1, 0, 2))[..., ::-1] 15 | 16 | def spacing(spacing): 17 | return tuple(np.array(spacing)[[1, 0, 2]].tolist()) 18 | 19 | @propagate_none 20 | def schwannoma(schwannoma): 21 | return np.transpose(schwannoma, (1, 0, 2))[..., ::-1] 22 | 23 | @propagate_none 24 | def cochlea(cochlea): 25 | return np.transpose(cochlea, (1, 0, 2))[..., ::-1] 26 | 27 | @propagate_none 28 | def meningioma(meningioma): 29 | return np.transpose(meningioma, (1, 0, 2))[..., ::-1] 30 | 31 | 32 | class Rescale(Transform): 33 | __inherit__ = True 34 | 35 | _new_spacing: Union[Sequence[Numeric], Numeric] 36 | _order: int = 1 37 | 38 | def _spacing(spacing, _new_spacing): 39 | _new_spacing = np.broadcast_to(_new_spacing, len(spacing)).copy() 40 | _new_spacing[np.isnan(_new_spacing)] = np.array(spacing)[np.isnan(_new_spacing)] 41 | return tuple(_new_spacing.tolist()) 42 | 43 | def _scale_factor(spacing, _spacing): 44 | return np.float32(spacing) / np.float32(_spacing) 45 | 46 | def spacing(_spacing): 47 | return _spacing 48 | 49 | def image(image, _scale_factor, _order): 50 | return zoom(image.astype(np.float32), _scale_factor, order=_order) 51 | 52 | @propagate_none 53 | def schwannoma(schwannoma, _scale_factor, _order): 54 | return zoom(schwannoma.astype(np.float32), _scale_factor, order=_order) > 0.5 55 | 56 | @propagate_none 57 | def cochlea(cochlea, _scale_factor, _order): 58 | return zoom(cochlea.astype(np.float32), _scale_factor, order=_order) > 0.5 59 | 60 | @propagate_none 61 | def meningioma(meningioma, _scale_factor, _order): 62 | return zoom(meningioma.astype(np.float32), _scale_factor, order=_order) > 0.5 63 | -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: 'AMID: Awesome Medical Imaging Datasets' 2 | site_url: https://neuro-ml.github.io/amid 3 | repo_url: https://github.com/neuro-ml/amid 4 | 5 | plugins: 6 | - mkdocs-jupyter 7 | - search 8 | - autorefs 9 | - mike: 10 | canonical_version: latest 11 | - mkdocstrings: 12 | default_handler: python_connectome 13 | handlers: 14 | python_connectome: 15 | options: 16 | docstring_style: numpy 17 | merge_init_into_class: true 18 | members_order: source 19 | show_if_no_docstring: true 20 | show_bases: false 21 | show_signature_annotations: true 22 | show_root_heading: true 23 | show_source: false 24 | heading_level: 3 25 | - exclude: 26 | glob: 27 | - '**/python_connectome.py' 28 | - 'fill_docs.py' 29 | - 'fill_readme.py' 30 | 31 | theme: 32 | name: material 33 | icon: 34 | repo: fontawesome/brands/github-alt 35 | palette: 36 | - media: "(prefers-color-scheme: dark)" 37 | scheme: slate 38 | toggle: 39 | icon: material/lightbulb-outline 40 | name: Switch to light mode 41 | - media: "(prefers-color-scheme: light)" 42 | scheme: default 43 | toggle: 44 | icon: material/lightbulb 45 | name: Switch to dark mode 46 | 47 | markdown_extensions: 48 | - admonition 49 | - tables 50 | - pymdownx.highlight: 51 | anchor_linenums: true 52 | - pymdownx.inlinehilite 53 | - pymdownx.snippets 54 | - pymdownx.details 55 | - pymdownx.superfences 56 | - toc: 57 | toc_depth: 3 58 | 59 | extra: 60 | version: 61 | provider: mike 62 | 63 | extra_javascript: 64 | - https://unpkg.com/tablesort@5.3.0/dist/tablesort.min.js 65 | - https://unpkg.com/tablesort@5.3.0/dist/sorts/tablesort.number.min.js 66 | - https://unpkg.com/tablesort@5.3.0/dist/sorts/tablesort.date.min.js 67 | - https://unpkg.com/tablesort@5.3.0/dist/sorts/tablesort.dotsep.min.js 68 | - https://unpkg.com/tablesort@5.3.0/dist/sorts/tablesort.monthname.min.js 69 | - javascript/tablesort.filesize.js 70 | - javascript/tablesort.js 71 | -------------------------------------------------------------------------------- /amid/cc359/transforms.py: -------------------------------------------------------------------------------- 1 | from typing import Sequence, Union 2 | 3 | import numpy as np 4 | from connectome import Transform 5 | from imops import zoom 6 | 7 | from ..utils import Numeric, propagate_none 8 | 9 | 10 | class CanonicalMRIOrientation(Transform): 11 | __inherit__ = True 12 | 13 | def image(image): 14 | return np.transpose(image, (1, 0, 2))[::-1, :, ::-1] 15 | 16 | def spacing(spacing): 17 | return tuple(np.array(spacing)[[1, 0, 2]].tolist()) 18 | 19 | def brain(brain): 20 | return np.transpose(brain, (1, 0, 2))[::-1, :, ::-1] 21 | 22 | @propagate_none 23 | def hippocampus(hippocampus): 24 | return np.transpose(hippocampus, (1, 0, 2))[::-1, :, ::-1] 25 | 26 | @propagate_none 27 | def wm_gm_csf(wm_gm_csf): 28 | return np.transpose(wm_gm_csf, (1, 0, 2))[::-1, :, ::-1] 29 | 30 | 31 | class Rescale(Transform): 32 | __inherit__ = True 33 | 34 | _new_spacing: Union[Sequence[Numeric], Numeric] 35 | _order: int = 1 36 | 37 | def _spacing(spacing, _new_spacing): 38 | _new_spacing = np.broadcast_to(_new_spacing, len(spacing)).copy() 39 | _new_spacing[np.isnan(_new_spacing)] = np.array(spacing)[np.isnan(_new_spacing)] 40 | return tuple(_new_spacing.tolist()) 41 | 42 | def _scale_factor(spacing, _spacing): 43 | return np.float32(spacing) / np.float32(_spacing) 44 | 45 | def spacing(_spacing): 46 | return _spacing 47 | 48 | def image(image, _scale_factor, _order): 49 | return zoom(image.astype(np.float32), _scale_factor, order=_order) 50 | 51 | def brain(brain, _scale_factor, _order): 52 | return zoom(brain.astype(np.float32), _scale_factor, order=_order) > 0.5 53 | 54 | @propagate_none 55 | def hippocampus(hippocampus, _scale_factor, _order): 56 | return zoom(hippocampus.astype(np.float32), _scale_factor, order=_order) > 0.5 57 | 58 | @propagate_none 59 | def wm_gm_csf(wm_gm_csf, _scale_factor, _order): 60 | onehot = np.arange(wm_gm_csf.max() + 1) == wm_gm_csf[..., None] 61 | onehot = onehot.astype(wm_gm_csf.dtype).transpose(3, 0, 1, 2) 62 | out = np.array(zoom(onehot.astype(np.float32), _scale_factor, axis=(1, 2, 3)) > 0.5, dtype=wm_gm_csf.dtype) 63 | labels = out.argmax(axis=0) 64 | return labels 65 | -------------------------------------------------------------------------------- /amid/internals/registry.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | import inspect 3 | from collections import OrderedDict 4 | from pathlib import Path 5 | from typing import NamedTuple, Type 6 | 7 | import pandas as pd 8 | 9 | from .licenses import License 10 | 11 | 12 | _REGISTRY = {} 13 | 14 | 15 | class Description(NamedTuple): 16 | body_region: str = None 17 | license: str = None 18 | link: str = None 19 | modality: str = None 20 | prep_data_size: str = None 21 | raw_data_size: str = None 22 | task: str = None 23 | 24 | 25 | def register(**kwargs): 26 | def decorator(cls: Type): 27 | _register(cls, cls.__name__, description, 2) 28 | # cls._path = path 29 | return cls 30 | 31 | # path = kwargs.pop('path') 32 | description = Description(**kwargs) 33 | return decorator 34 | 35 | 36 | def _register(cls, name, description, level): 37 | module = inspect.getmodule(inspect.stack()[level][0]).__name__ 38 | assert name not in _REGISTRY, name 39 | _REGISTRY[name] = cls, module, description 40 | 41 | 42 | def gather_datasets(): 43 | for f in Path(__file__).resolve().parent.parent.iterdir(): 44 | module_name = f'amid.{f.stem}' 45 | importlib.import_module(module_name) 46 | 47 | return OrderedDict((k, _REGISTRY[k]) for k in sorted(_REGISTRY)) 48 | 49 | 50 | def prepare_for_table(name, count, module, description, version): 51 | def stringify(x): 52 | if pd.isnull(x): 53 | return '' 54 | if isinstance(x, str): 55 | return x 56 | if isinstance(x, (list, tuple)): 57 | return ', '.join(x) 58 | return x 59 | 60 | entry = {'name': name, 'entries': count} 61 | entry.update({k: v for k, v in description._asdict().items() if not pd.isnull(v)}) 62 | license_ = entry.get('license', None) 63 | if license_: 64 | if isinstance(license_, License): 65 | license_ = f'{license_.name}' 66 | entry['license'] = license_ 67 | 68 | link = entry.pop('link', None) 69 | if link is not None: 70 | entry['link'] = f'Source' 71 | 72 | entry['name'] = f'{name}' 73 | return {k: stringify(v) for k, v in entry.items()} 74 | -------------------------------------------------------------------------------- /amid/hcp.py: -------------------------------------------------------------------------------- 1 | import gzip 2 | import zipfile 3 | from pathlib import Path 4 | from zipfile import ZipFile 5 | 6 | import nibabel as nb 7 | import numpy as np 8 | 9 | from .internals import Dataset, field, licenses, register 10 | 11 | 12 | @register( 13 | body_region='Head', 14 | license=licenses.CC_BYNCND_40, 15 | link='https://www.humanconnectome.org/study/hcp-young-adult/document/1200-subjects-data-release', 16 | modality='MRI', 17 | prep_data_size='125G', 18 | raw_data_size='125G', 19 | task='Segmentation', 20 | ) 21 | class HCP(Dataset): 22 | @property 23 | def ids(self): 24 | result = set() 25 | for archive in self.root.glob('*.zip'): 26 | with ZipFile(archive) as zf: 27 | for zipinfo in zf.infolist(): 28 | if zipinfo.is_dir(): 29 | continue 30 | result.add(zipinfo.filename.split('/')[0]) 31 | 32 | return tuple(sorted(result)) 33 | 34 | def _file(self, i): 35 | for archive in self.root.glob('*.zip'): 36 | with ZipFile(archive) as zf: 37 | for zipinfo in zf.infolist(): 38 | if zipinfo.is_dir(): 39 | continue 40 | file = Path(zipinfo.filename) 41 | if (i in file.stem) and ('T1w_MPR1' in file.stem): 42 | return zipfile.Path(str(archive), str(file)) 43 | 44 | @field 45 | def image(self, i) -> np.ndarray: 46 | with self._file(i).open('rb') as opened: 47 | with gzip.GzipFile(fileobj=opened) as nii: 48 | nii = nb.FileHolder(fileobj=nii) 49 | image = nb.Nifti1Image.from_file_map({'header': nii, 'image': nii}) 50 | return np.int16(image.get_fdata()) 51 | 52 | @field 53 | def affine(self, i) -> np.ndarray: 54 | with self._file(i).open('rb') as opened: 55 | with gzip.GzipFile(fileobj=opened) as nii: 56 | nii = nb.FileHolder(fileobj=nii) 57 | image = nb.Nifti1Image.from_file_map({'header': nii, 'image': nii}) 58 | return image.affine 59 | 60 | def spacing(self, i): 61 | with self._file(i).open('rb') as opened: 62 | with gzip.GzipFile(fileobj=opened) as nii: 63 | nii = nb.FileHolder(fileobj=nii) 64 | image = nb.Nifti1Image.from_file_map({'header': nii, 'image': nii}) 65 | return tuple(image.header['pixdim'][1:4]) 66 | -------------------------------------------------------------------------------- /amid/mslub/dataset.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import nibabel as nb 4 | 5 | from ..internals import Dataset, licenses, register 6 | 7 | 8 | @register( 9 | body_region='Head', 10 | license=licenses.CC_BY_30, 11 | link='https://github.com/muschellij2/open_ms_data?tab=readme-ov-file', 12 | modality='MRI', 13 | prep_data_size='18G', 14 | raw_data_size='5.9G', 15 | task='Anomaly segmentation', 16 | ) 17 | class MSLUB(Dataset): 18 | @property 19 | def ids(self): 20 | result = set() 21 | for file in self.root.glob('**/*.gz'): 22 | if ('raw' not in str(file)) or ('gt' in str(file)): 23 | continue 24 | patient = file.parent.name 25 | plane = file.parent.parent.parent.name 26 | ind = f'{plane}-{patient}' 27 | if 'longitudinal' in str(file): 28 | filename = file.name 29 | study_number = filename.split('_')[0] 30 | ind = f'{ind}-{study_number}' 31 | result.add(ind) 32 | return list(result) 33 | 34 | def _file(self, i): 35 | plane = i.split('-')[0] 36 | patient = i.split('-')[1] 37 | path = self.root / plane / 'raw' / patient 38 | if 'longitudinal' in i: 39 | study_number = i.split('-')[2] 40 | return path / study_number 41 | return path 42 | 43 | def image(self, i): 44 | file = self._file(i) 45 | if 'longitudinal' in str(file): 46 | study_number = file.stem 47 | file_name = file.parent / f'{study_number}_FLAIR.nii.gz' 48 | else: 49 | file_name = file / 'FLAIR.nii.gz' 50 | image = nb.load(file_name).get_fdata() 51 | return image 52 | 53 | def mask(self, i): 54 | file = self._file(i) 55 | if 'longitudinal' in str(file): 56 | file_name = file.parent / 'gt.nii.gz' 57 | else: 58 | file_name = file / 'consensus_gt.nii.gz' 59 | image = nb.load(file_name).get_fdata() 60 | return image 61 | 62 | def patient(self, i): 63 | file = self._file(i) 64 | if 'longitudinal' in str(file): 65 | return Path(file).parent.name 66 | else: 67 | return Path(file).name 68 | 69 | def affine(self, i): 70 | file = self._file(i) 71 | if 'longitudinal' in str(file): 72 | study_number = file.stem 73 | file_name = file.parent / f'{study_number}_FLAIR.nii.gz' 74 | else: 75 | file_name = file / 'FLAIR.nii.gz' 76 | return nb.load(file_name).affine 77 | -------------------------------------------------------------------------------- /docs/mkdocstrings_handlers/python_connectome.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | from collections import OrderedDict 3 | 4 | from griffe.dataclasses import Alias, Attribute, Class, Function, Parameter, ParameterKind, Parameters 5 | from mkdocstrings_handlers.python.handler import PythonHandler 6 | 7 | 8 | class PythonConnectomeHandler(PythonHandler): 9 | def get_templates_dir(self, handler: str): 10 | return super().get_templates_dir('python') 11 | 12 | def collect(self, identifier: str, config: dict): 13 | result = super().collect(identifier, config) 14 | m, p = result.path.rsplit('.', 1) 15 | v = getattr(importlib.import_module(m), p) 16 | if hasattr(v, '__origin__'): 17 | origin = v.__origin__ 18 | if origin.__qualname__ != result.name: 19 | origin = super().collect(f'{origin.__module__}.{origin.__qualname__}', config) 20 | origin.name = result.name 21 | result = origin 22 | 23 | if isinstance(result, Alias): 24 | result.target = self.patch_class(result.target) 25 | else: 26 | result = self.patch_class(result) 27 | return result 28 | 29 | @staticmethod 30 | def patch_class(x: Class): 31 | members = OrderedDict() 32 | for name, v in x.members.items(): 33 | if not name.startswith('_'): 34 | if isinstance(v, Function): 35 | if name == 'ids': 36 | v.parameters = Parameters() 37 | else: 38 | v.parameters = Parameters( 39 | Parameter('id', annotation='str', kind=ParameterKind.positional_or_keyword) 40 | ) 41 | 42 | elif isinstance(v, Attribute): 43 | v = Function( 44 | name, 45 | parameters=Parameters( 46 | Parameter('id', annotation='str', kind=ParameterKind.positional_or_keyword) 47 | ), 48 | parent=x, 49 | ) 50 | 51 | else: 52 | raise TypeError(v) 53 | 54 | members[name] = v 55 | 56 | x.members = members 57 | return x 58 | 59 | 60 | def get_handler(theme: str, custom_templates=None, config_file_path=None, paths=None, **config): 61 | return PythonConnectomeHandler( 62 | handler='python_connectome', 63 | theme=theme, 64 | custom_templates=custom_templates, 65 | config_file_path=config_file_path, 66 | paths=paths, 67 | ) 68 | -------------------------------------------------------------------------------- /amid/kits.py: -------------------------------------------------------------------------------- 1 | import nibabel as nb 2 | import numpy as np 3 | 4 | from .internals import Dataset, field, register 5 | from .utils import PathOrStr 6 | 7 | 8 | @register( 9 | body_region='thorax', 10 | license=None, # todo 11 | link='https://kits-challenge.org/kits23/', 12 | modality='CT', 13 | prep_data_size='50G', 14 | raw_data_size='12G', 15 | task='Kidney Tumor Segmentation', 16 | ) 17 | class KiTS23(Dataset): 18 | """Kidney and Kidney Tumor Segmentation Challenge, 19 | The 2023 Kidney and Kidney Tumor Segmentation challenge (abbreviated KiTS23) 20 | is a competition in which teams compete to develop the best system for 21 | automatic semantic segmentation of kidneys, renal tumors, and renal cysts. 22 | 23 | Competition page is https://kits-challenge.org/kits23/, official competition repository is 24 | https://github.com/neheller/kits23/. 25 | 26 | For usage, clone the repository https://github.com/neheller/kits23/, install and run `kits23_download_data`. 27 | 28 | Parameters 29 | ---------- 30 | root: str, Path 31 | Absolute path to the root containing the downloaded archive and meta. 32 | If not provided, the cache is assumed to be already populated. 33 | """ 34 | 35 | def __init__(self, root: PathOrStr): 36 | super().__init__(root) 37 | if not (self.root / "dataset").exists(): 38 | raise FileNotFoundError(f"Dataset not found in {self.root}") 39 | 40 | @property 41 | def ids(self): 42 | return tuple(sorted(sub.name for sub in (self.root / 'dataset').glob('*'))) 43 | 44 | @field 45 | def image(self, i): 46 | # CT images are integer-valued, this will help us improve compression rates 47 | image_file = nb.load(self.root / 'dataset' / i / 'imaging.nii.gz') 48 | return np.int16(image_file.get_fdata()[...]) 49 | 50 | # TODO add multiple segmentations 51 | @field 52 | def mask(self, i): 53 | """Combined annotation for kidneys, tumor and cyst (if present).""" 54 | ct_scan_nifti = nb.load(self.root / 'dataset' / i / 'segmentation.nii.gz') 55 | return np.int8(ct_scan_nifti.get_fdata()) 56 | 57 | @field 58 | def affine(self, i): 59 | """The 4x4 matrix that gives the image's spatial orientation.""" 60 | image_file = nb.load(self.root / 'dataset' / i / 'imaging.nii.gz') 61 | return image_file.affine 62 | 63 | @property 64 | def labels_names(self): 65 | """Indicates which label correspond to which mask, consistent accross all samples.""" 66 | return KITS_LABEL_NAMES 67 | 68 | 69 | KITS_LABEL_NAMES = { 70 | # https://github.com/neheller/kits23/blob/063d4c00afd383fc68145a00c0aa6a4e2a3c0f50/kits23/configuration/labels.py#L23 71 | 1: 'kidney', 72 | 2: 'tumor', 73 | 3: 'cyst', 74 | } 75 | -------------------------------------------------------------------------------- /amid/covid_1110.py: -------------------------------------------------------------------------------- 1 | import gzip 2 | from typing import Union 3 | 4 | import nibabel 5 | import numpy as np 6 | 7 | from .internals import Dataset, field, register 8 | 9 | 10 | @register( 11 | body_region='Thorax', 12 | modality='CT', 13 | task='COVID-19 Segmentation', 14 | link='https://mosmed.ai/en/datasets/covid191110/', 15 | raw_data_size='21G', 16 | ) 17 | class MoscowCovid1110(Dataset): 18 | """ 19 | The Moscow Radiology COVID-19 dataset. 20 | 21 | Parameters 22 | ---------- 23 | root : str, Path, optional 24 | path to the folder containing the raw downloaded files. 25 | If not provided, the cache is assumed to be already populated. 26 | 27 | Notes 28 | ----- 29 | Download links: 30 | https://mosmed.ai/en/datasets/covid191110/ 31 | 32 | Examples 33 | -------- 34 | >>> # Place the downloaded files in any folder and pass the path to the constructor: 35 | >>> ds = MoscowCovid1110(root='/path/to/files/root') 36 | >>> print(len(ds.ids)) 37 | # 1110 38 | >>> print(ds.image(ds.ids[0]).shape) 39 | # (512, 512, 43) 40 | """ 41 | 42 | @property 43 | def ids(self): 44 | return sorted({f.name[:-7] for f in self.root.glob('CT-*/*')}) 45 | 46 | def _file(self, i): 47 | return next(self.root.glob(f'CT-*/{i}.nii.gz')) 48 | 49 | @field 50 | def image(self, i) -> np.ndarray: 51 | with self._file(i).open('rb') as opened: 52 | with gzip.GzipFile(fileobj=opened) as nii: 53 | nii = nibabel.FileHolder(fileobj=nii) 54 | image = nibabel.Nifti1Image.from_file_map({'header': nii, 'image': nii}) 55 | # most ct scans are integer-valued, this will help us improve compression rates 56 | # (instead of using `image.get_fdata()`) 57 | return np.asarray(image.dataobj) 58 | 59 | @field 60 | def affine(self, i) -> np.ndarray: 61 | with self._file(i).open('rb') as opened: 62 | with gzip.GzipFile(fileobj=opened) as nii: 63 | nii = nibabel.FileHolder(fileobj=nii) 64 | image = nibabel.Nifti1Image.from_file_map({'header': nii, 'image': nii}) 65 | return image.affine 66 | 67 | @field 68 | def label(self, i) -> str: 69 | return self._file(i).parent.name[3:] 70 | 71 | @field 72 | def mask(self, i) -> Union[np.ndarray, None]: 73 | path = self.root / 'masks' / f'{i}_mask.nii.gz' 74 | if not path.exists(): 75 | return 76 | 77 | with path.open('rb') as opened: 78 | with gzip.GzipFile(fileobj=opened) as nii: 79 | nii = nibabel.FileHolder(fileobj=nii) 80 | image = nibabel.Nifti1Image.from_file_map({'header': nii, 'image': nii}) 81 | return np.asarray(image.dataobj) > 0.5 82 | -------------------------------------------------------------------------------- /amid/totalsegmentator/const.py: -------------------------------------------------------------------------------- 1 | ANATOMICAL_STRUCTURES = [ 2 | 'adrenal_gland_left', 3 | 'adrenal_gland_right', 4 | 'aorta', 5 | 'autochthon_left', 6 | 'autochthon_right', 7 | 'brain', 8 | 'clavicula_left', 9 | 'clavicula_right', 10 | 'colon', 11 | 'duodenum', 12 | 'esophagus', 13 | 'face', 14 | 'femur_left', 15 | 'femur_right', 16 | 'gallbladder', 17 | 'gluteus_maximus_left', 18 | 'gluteus_maximus_right', 19 | 'gluteus_medius_left', 20 | 'gluteus_medius_right', 21 | 'gluteus_minimus_left', 22 | 'gluteus_minimus_right', 23 | 'heart_atrium_left', 24 | 'heart_atrium_right', 25 | 'heart_myocardium', 26 | 'heart_ventricle_left', 27 | 'heart_ventricle_right', 28 | 'hip_left', 29 | 'hip_right', 30 | 'humerus_left', 31 | 'humerus_right', 32 | 'iliac_artery_left', 33 | 'iliac_artery_right', 34 | 'iliac_vena_left', 35 | 'iliac_vena_right', 36 | 'iliopsoas_left', 37 | 'iliopsoas_right', 38 | 'inferior_vena_cava', 39 | 'kidney_left', 40 | 'kidney_right', 41 | 'liver', 42 | 'lung_lower_lobe_left', 43 | 'lung_lower_lobe_right', 44 | 'lung_middle_lobe_right', 45 | 'lung_upper_lobe_left', 46 | 'lung_upper_lobe_right', 47 | 'pancreas', 48 | 'portal_vein_and_splenic_vein', 49 | 'pulmonary_artery', 50 | 'rib_left_1', 51 | 'rib_left_10', 52 | 'rib_left_11', 53 | 'rib_left_12', 54 | 'rib_left_2', 55 | 'rib_left_3', 56 | 'rib_left_4', 57 | 'rib_left_5', 58 | 'rib_left_6', 59 | 'rib_left_7', 60 | 'rib_left_8', 61 | 'rib_left_9', 62 | 'rib_right_1', 63 | 'rib_right_10', 64 | 'rib_right_11', 65 | 'rib_right_12', 66 | 'rib_right_2', 67 | 'rib_right_3', 68 | 'rib_right_4', 69 | 'rib_right_5', 70 | 'rib_right_6', 71 | 'rib_right_7', 72 | 'rib_right_8', 73 | 'rib_right_9', 74 | 'sacrum', 75 | 'scapula_left', 76 | 'scapula_right', 77 | 'small_bowel', 78 | 'spleen', 79 | 'stomach', 80 | 'trachea', 81 | 'urinary_bladder', 82 | 'vertebrae_C1', 83 | 'vertebrae_C2', 84 | 'vertebrae_C3', 85 | 'vertebrae_C4', 86 | 'vertebrae_C5', 87 | 'vertebrae_C6', 88 | 'vertebrae_C7', 89 | 'vertebrae_L1', 90 | 'vertebrae_L2', 91 | 'vertebrae_L3', 92 | 'vertebrae_L4', 93 | 'vertebrae_L5', 94 | 'vertebrae_T1', 95 | 'vertebrae_T10', 96 | 'vertebrae_T11', 97 | 'vertebrae_T12', 98 | 'vertebrae_T2', 99 | 'vertebrae_T3', 100 | 'vertebrae_T4', 101 | 'vertebrae_T5', 102 | 'vertebrae_T6', 103 | 'vertebrae_T7', 104 | 'vertebrae_T8', 105 | 'vertebrae_T9', 106 | ] 107 | 108 | LABELS = ['age', 'gender', 'institute', 'study_type', 'split'] 109 | -------------------------------------------------------------------------------- /amid/rsna_bc/dataset.py: -------------------------------------------------------------------------------- 1 | from contextlib import suppress 2 | from functools import cached_property 3 | 4 | import pandas as pd 5 | import pydicom 6 | 7 | from ..internals import Dataset, field, register 8 | from .utils import csv_field, unpack 9 | 10 | 11 | @register( 12 | body_region='Thorax', 13 | license='Non-Commercial Use', 14 | link='https://www.kaggle.com/competitions/rsna-breast-cancer-detection/data', 15 | modality='MG', 16 | raw_data_size='271G', 17 | prep_data_size='294G', 18 | task='Breast cancer classification', 19 | ) 20 | class RSNABreastCancer(Dataset): 21 | @cached_property 22 | def _meta(self): 23 | dfs = [] 24 | for part in 'train', 'test': 25 | with suppress(FileNotFoundError): 26 | with unpack(self.root, f'{part}.csv') as (file, _): 27 | df = pd.read_csv(file) 28 | df['part'] = part 29 | dfs.append(df) 30 | 31 | if not dfs: 32 | raise FileNotFoundError('No metadata found') 33 | dfs = pd.concat(dfs, ignore_index=True) 34 | for name in 'image_id', 'patient_id', 'site_id': 35 | dfs[name] = dfs[name].astype(str) 36 | 37 | raw = list(map(str, dfs.image_id.tolist())) 38 | ids = set(raw) 39 | if len(ids) != len(raw): 40 | raise ValueError('The image ids are not unique') 41 | 42 | return {row.image_id: row for _, row in dfs.iterrows()} 43 | 44 | # csv fields 45 | site_id = csv_field('site_id', str) 46 | patient_id = csv_field('patient_id', str) 47 | image_id = csv_field('image_id', str) 48 | laterality = csv_field('laterality', None) 49 | view = csv_field('view', None) 50 | age = csv_field('age', None) 51 | cancer = csv_field('cancer', bool) 52 | biopsy = csv_field('biopsy', bool) 53 | invasive = csv_field('invasive', bool) 54 | BIRADS = csv_field('BIRADS', int) 55 | implant = csv_field('implant', bool) 56 | density = csv_field('density', None) 57 | machine_id = csv_field('machine_id', str) 58 | prediction_id = csv_field('prediction_id', str) 59 | difficult_negative_case = csv_field('difficult_negative_case', bool) 60 | 61 | @property 62 | def ids(self): 63 | return tuple(sorted(self._meta)) 64 | 65 | def _dicom(self, i): 66 | row = self._meta[i] 67 | with unpack(self.root, f'{row.part}_images/{row.patient_id}/{row.image_id}.dcm') as (file, _): 68 | return pydicom.dcmread(file) 69 | 70 | @field 71 | def image(self, i): 72 | return self._dicom(i).pixel_array 73 | 74 | @field 75 | def padding_value(self, i): 76 | return getattr(self._dicom(i), 'PixelPaddingValue', None) 77 | 78 | @field 79 | def intensity_sign(self, i): 80 | return getattr(self._dicom(i), 'PixelIntensityRelationshipSign', None) 81 | -------------------------------------------------------------------------------- /amid/cl_detection.py: -------------------------------------------------------------------------------- 1 | from functools import cached_property 2 | from typing import Dict, Tuple 3 | 4 | import numpy as np 5 | import SimpleITK 6 | from connectome import Transform 7 | from deli import load 8 | from imops import crop_to_box 9 | 10 | from .internals import Dataset, field, licenses, register 11 | from .utils import mask_to_box 12 | 13 | 14 | @register( 15 | body_region='Head', 16 | license=licenses.CC_BYNC_40, 17 | link='https://github.com/cwwang1979/CL-detection2023/', 18 | modality='X-ray', 19 | prep_data_size='1.8G', 20 | raw_data_size='1.5G', 21 | task='Keypoint detection', 22 | ) 23 | class CLDetection2023(Dataset): 24 | """ 25 | The data for the "Cephalometric Landmark Detection in Lateral X-ray Images" Challenge, 26 | held with the MICCAI-2023 conference. 27 | 28 | Notes 29 | ----- 30 | The data can only be obtained by contacting the organizers by email. 31 | See the [challenge home page](https://cl-detection2023.grand-challenge.org/) for details. 32 | 33 | Parameters 34 | ---------- 35 | root : str, Path, optional 36 | path to the folder containing the raw downloaded and unarchived data. 37 | If not provided, the cache is assumed to be already populated. 38 | 39 | Examples 40 | -------- 41 | >>> # Place the downloaded archives in any folder and pass the path to the constructor: 42 | >>> ds = CLDetection2023(root='/path/to/data/root/folder') 43 | >>> print(len(ds.ids)) 44 | # 400 45 | >>> print(ds.image(ds.ids[0]).shape) 46 | # (2400, 1935) 47 | """ 48 | 49 | @cached_property 50 | def _images(self): 51 | return SimpleITK.GetArrayFromImage(SimpleITK.ReadImage(self.root / 'train_stack.mha')) 52 | 53 | @cached_property 54 | def _points(self): 55 | return load(self.root / 'train-gt.json')['points'] 56 | 57 | @property 58 | def ids(self): 59 | return tuple(map(str, range(1, len(self._images) + 1))) 60 | 61 | @field 62 | def image(self, i) -> np.ndarray: 63 | i = int(i) 64 | return self._images[i - 1] 65 | 66 | @field 67 | def points(self, i) -> Dict[str, np.ndarray]: 68 | i = int(i) 69 | return {x['name']: np.array(x['point'][:2]) for x in self._points if x['point'][-1] == i} 70 | 71 | @field 72 | def spacing(self, i) -> Tuple[float, float]: 73 | i = int(i) 74 | (scale,) = {x['scale'] for x in self._points if x['point'][-1] == i} 75 | scale = float(scale) 76 | return scale, scale 77 | 78 | 79 | class CropPadding(Transform): 80 | __inherit__ = 'spacing' 81 | 82 | def _box(image): 83 | return mask_to_box(image[..., 0] != 0) 84 | 85 | def image(image, _box): 86 | return crop_to_box(image[..., 0], _box) 87 | 88 | def points(points, _box): 89 | return {k: v - _box[0] for k, v in points.items()} 90 | 91 | 92 | class FlipPoints(Transform): 93 | __inherit__ = True 94 | 95 | def points(points): 96 | return {name: pt[::-1] for name, pt in points.items()} 97 | -------------------------------------------------------------------------------- /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | name: Tests 2 | 3 | on: [ pull_request ] 4 | 5 | env: 6 | MODULE_NAME: amid 7 | 8 | jobs: 9 | test: 10 | runs-on: ubuntu-20.04 11 | strategy: 12 | matrix: 13 | python-version: [ '3.8', '3.9', '3.10', '3.11', '3.12' ] 14 | 15 | steps: 16 | - uses: actions/checkout@v3 17 | - name: Set up Python ${{ matrix.python-version }} 18 | uses: actions/setup-python@v4 19 | with: 20 | python-version: ${{ matrix.python-version }} 21 | 22 | - name: Check the version 23 | if: "! github.event.pull_request.head.repo.fork " 24 | run: | 25 | VERSION=$(python -c "from pathlib import Path; import runpy; folder, = {d.parent for d in Path().resolve().glob('*/__init__.py') if d.parent.is_dir() and (d.parent / '__version__.py').exists()}; print(runpy.run_path(folder / '__version__.py')['__version__'])") 26 | MATCH=$(pip index versions $MODULE_NAME | grep "Available versions:" | grep $VERSION) || echo 27 | echo $MATCH 28 | if [ "$GITHUB_BASE_REF" = "master" ] && [ "$MATCH" != "" ]; then exit 1; fi 29 | - name: Build the package 30 | run: | 31 | pip install build 32 | python -m build --sdist 33 | 34 | - name: Install 35 | run: | 36 | pip install dist/* 37 | pip install -r tests/requirements.txt 38 | 39 | cd tests 40 | export MODULE_PARENT=$(python -c "import $MODULE_NAME, os; print(os.path.dirname($MODULE_NAME.__path__[0]))") 41 | export MODULE_PARENT=${MODULE_PARENT%"/"} 42 | cd .. 43 | echo $MODULE_PARENT 44 | echo "MODULE_PARENT=$(echo $MODULE_PARENT)" >> $GITHUB_ENV 45 | 46 | - name: Test with pytest 47 | if: "! github.event.pull_request.head.repo.fork " 48 | run: | 49 | # pytest tests -m "not raw" --junitxml=reports/junit-${{ matrix.python-version }}.xml --cov="$MODULE_PARENT/$MODULE_NAME" --cov-report=xml --cov-branch 50 | # for now we only test that everything is importable 51 | pip install setuptools # needed for pylidc to work 52 | python -c "from $MODULE_NAME import *" 53 | # - name: Generate coverage report 54 | # if: "! github.event.pull_request.head.repo.fork " 55 | # run: | 56 | # coverage xml -o reports/coverage-${{ matrix.python-version }}.xml 57 | # sed -i -e "s|$MODULE_PARENT/||g" reports/coverage-${{ matrix.python-version }}.xml 58 | # sed -i -e "s|$(echo $MODULE_PARENT/ | tr "/" .)||g" reports/coverage-${{ matrix.python-version }}.xml 59 | # 60 | # - name: Upload artifacts 61 | # if: "! github.event.pull_request.head.repo.fork " 62 | # uses: actions/upload-artifact@v3 63 | # with: 64 | # name: reports-${{ matrix.python-version }} 65 | # path: reports/*-${{ matrix.python-version }}.xml 66 | 67 | # TODO: coverage is not informative in the CI anyway 68 | # - name: Upload coverage results 69 | # if: "! github.event.pull_request.head.repo.fork " 70 | # uses: codecov/codecov-action@v3 71 | # with: 72 | # fail_ci_if_error: true 73 | # files: reports/coverage-${{ matrix.python-version }}.xml 74 | # verbose: true 75 | -------------------------------------------------------------------------------- /amid/tbad.py: -------------------------------------------------------------------------------- 1 | import gzip 2 | from pathlib import Path 3 | 4 | import nibabel as nb 5 | import numpy as np 6 | 7 | from .internals import Dataset, field, licenses, register 8 | 9 | 10 | @register( 11 | body_region='Chest', 12 | license=licenses.CC_BYNC_40, 13 | link='https://github.com/XiaoweiXu/Dataset_Type-B-Aortic-Dissection', 14 | modality='CT', 15 | prep_data_size='14G', 16 | raw_data_size='14G', 17 | task='Aortic dissection segmentation', 18 | ) 19 | class TBAD(Dataset): 20 | """ 21 | A dataset of 3D Computed Tomography (CT) images for Type-B Aortic Dissection segmentation. 22 | 23 | Notes 24 | ----- 25 | The data can only be obtained by contacting the authors by email. 26 | See the [dataset home page](https://github.com/XiaoweiXu/Dataset_Type-B-Aortic-Dissection) for details. 27 | 28 | Parameters 29 | ---------- 30 | root : str, Path, optional 31 | path to the folder containing the raw downloaded files. 32 | If not provided, the cache is assumed to be already populated. 33 | 34 | Examples 35 | -------- 36 | >>> # Place the downloaded files in any folder and pass the path to the constructor: 37 | >>> ds = TBAD(root='/path/to/files/root') 38 | >>> print(len(ds.ids)) 39 | # 100 40 | >>> print(ds.image(ds.ids[0]).shape) 41 | # (512, 512, 327) 42 | 43 | References 44 | ---------- 45 | .. [1] Yao, Zeyang & Xie, Wen & Zhang, Jiawei & Dong, Yuhao & Qiu, Hailong & Haiyun, Yuan & Jia, 46 | Qianjun & Tianchen, Wang & Shi, Yiyi & Zhuang, Jian & Que, Lifeng & Xu, Xiaowei & Huang, Meiping. 47 | (2021). ImageTBAD: A 3D Computed Tomography Angiography Image Dataset for Automatic Segmentation 48 | of Type-B Aortic Dissection. Frontiers in Physiology. 12. 732711. 10.3389/fphys.2021.732711. 49 | """ 50 | 51 | @property 52 | def ids(self): 53 | result = set() 54 | 55 | for file in self.root.glob('*_image.nii.gz'): 56 | result.add(file.stem.split('_')[0]) 57 | 58 | return tuple(sorted(result)) 59 | 60 | def _fname(self, i): 61 | return self.root / f'{i}_image.nii.gz' 62 | 63 | def image(self, i) -> np.ndarray: 64 | with self._fname(i).open('rb') as opened: 65 | with gzip.GzipFile(fileobj=opened) as nii: 66 | nii = nb.FileHolder(fileobj=nii) 67 | image = nb.Nifti1Image.from_file_map({'header': nii, 'image': nii}) 68 | return np.int16(image.get_fdata()) 69 | 70 | def affine(self, i) -> np.ndarray: 71 | """The 4x4 matrix that gives the image's spatial orientation.""" 72 | with self._fname(i).open('rb') as opened: 73 | with gzip.GzipFile(fileobj=opened) as nii: 74 | nii = nb.FileHolder(fileobj=nii) 75 | image = nb.Nifti1Image.from_file_map({'header': nii, 'image': nii}) 76 | return image.affine 77 | 78 | @field 79 | def mask(self, i) -> np.ndarray: 80 | with Path(self.root / f'{i}_label.nii.gz').open('rb') as opened: 81 | with gzip.GzipFile(fileobj=opened) as nii: 82 | nii = nb.FileHolder(fileobj=nii) 83 | label = nb.Nifti1Image.from_file_map({'header': nii, 'image': nii}) 84 | return np.uint8(label.get_fdata()) 85 | -------------------------------------------------------------------------------- /amid/ribfrac/dataset.py: -------------------------------------------------------------------------------- 1 | from functools import cached_property 2 | 3 | import nibabel 4 | import numpy as np 5 | 6 | from ..internals import Dataset, licenses, register 7 | 8 | 9 | @register( 10 | body_region='Chest', 11 | license=licenses.CC_BYNC_40, 12 | link='https://ribfrac.grand-challenge.org', 13 | modality='CT', 14 | raw_data_size='77.8 G', 15 | task='Segmentation', 16 | ) 17 | class RibFrac(Dataset): 18 | """ 19 | RibFrac dataset is a benchmark for developping algorithms on rib fracture detection, 20 | segmentation and classification. We hope this large-scale dataset could facilitate 21 | both clinical research for automatic rib fracture detection and diagnoses, 22 | and engineering research for 3D detection, segmentation and classification. 23 | 24 | 25 | Parameters 26 | ---------- 27 | root : str, Path, optional 28 | path to the folder containing the raw downloaded archives. 29 | If not provided, the cache is assumed to be already populated. 30 | 31 | 32 | Notes 33 | ----- 34 | Data downloaded from here: 35 | https://doi.org/10.5281/zenodo.3893507 -- train Part1 (300 images) 36 | https://doi.org/10.5281/zenodo.3893497 -- train Part2 (120 images) 37 | https://doi.org/10.5281/zenodo.3893495 -- val (80 images) 38 | https://zenodo.org/record/3993380 -- test (160 images without annotation) 39 | 40 | 41 | 42 | References 43 | ---------- 44 | Jiancheng Yang, Liang Jin, Bingbing Ni, & Ming Li. (2020). 45 | RibFrac Dataset: A Benchmark for Rib Fracture Detection, 46 | Segmentation and Classification 47 | """ 48 | 49 | @property 50 | def ids(self): 51 | result = set() 52 | for folder in ['Part1', 'Part2', 'ribfrac-val-images', 'ribfrac-test-images']: 53 | result |= {v.name.split('-')[0] for v in (self.root / folder).iterdir()} 54 | 55 | return tuple(sorted(result)) 56 | 57 | @cached_property 58 | def _id2folder(self): 59 | folders = [item for item in self.root.iterdir() if item.is_dir()] 60 | result_dict = {} 61 | for folder in folders: 62 | p = self.root / folder 63 | folder_ids = [v.name.split('-')[0] for v in p.iterdir()] 64 | folder_dict = {_id: p for _id in folder_ids} 65 | result_dict = {**result_dict, **folder_dict} 66 | 67 | return result_dict 68 | 69 | def image(self, i): 70 | image_path = self._id2folder[i] / f'{i}-image.nii.gz' 71 | image = nibabel.load(image_path).get_fdata() 72 | return image.astype(np.int16) 73 | 74 | def label(self, i): 75 | folder_path = self._id2folder[i] 76 | folder = folder_path.name 77 | if folder != 'ribfrac-test-images': 78 | if folder.startswith('Part'): 79 | label_path = folder_path / f'{i}-label.nii.gz' 80 | elif folder == 'ribfrac-val-images': 81 | dir = folder_path.parent / 'ribfrac-val-labels' 82 | label_path = dir / f'{i}-label.nii.gz' 83 | 84 | label = nibabel.load(label_path).get_fdata() 85 | return label.astype(np.int16) 86 | 87 | def affine(self, i): 88 | """The 4x4 matrix that gives the image's spatial orientation""" 89 | image_path = self._id2folder[i] / f'{i}-image.nii.gz' 90 | return nibabel.load(image_path).affine 91 | -------------------------------------------------------------------------------- /amid/liver_medseg.py: -------------------------------------------------------------------------------- 1 | import contextlib 2 | import gzip 3 | import re 4 | import zipfile 5 | from pathlib import Path 6 | from zipfile import ZipFile 7 | 8 | import nibabel as nb 9 | import numpy as np 10 | 11 | from .internals import Dataset, field, licenses, register 12 | 13 | 14 | @register( 15 | body_region=('Chest', 'Abdomen'), 16 | license=licenses.CC_BYSA_40, 17 | link='https://www.medseg.ai/database/liver-segments-50-cases', 18 | modality='CT', 19 | prep_data_size='1,88G', 20 | raw_data_size='616M', 21 | task='Segmentation', 22 | ) 23 | class LiverMedseg(Dataset): 24 | """ 25 | LiverMedseg is a public CT segmentation dataset with 50 annotated images. 26 | Case collection of 50 livers with their segments. 27 | Images obtained from Decathlon Medical Segmentation competition 28 | 29 | Parameters 30 | ---------- 31 | root : str, Path, optional 32 | path to the folder containing the raw downloaded archives. 33 | If not provided, the cache is assumed to be already populated. 34 | 35 | Notes 36 | ----- 37 | Download links: 38 | https://www.medseg.ai/database/liver-segments-50-cases 39 | 40 | Examples 41 | -------- 42 | >>> # Place the downloaded archives in any folder and pass the path to the constructor: 43 | >>> ds = LiverMedseg(root='/path/to/archives/root') 44 | >>> print(len(ds.ids)) 45 | # 50 46 | >>> print(ds.image(ds.ids[0]).shape) 47 | # (512, 512, 38) 48 | 49 | References 50 | ---------- 51 | """ 52 | 53 | @property 54 | def ids(self): 55 | result = set() 56 | with ZipFile(self.root / 'img.zip') as zf: 57 | for zipinfo in zf.infolist(): 58 | if zipinfo.is_dir(): 59 | continue 60 | file_stem = Path(zipinfo.filename).stem 61 | result.add('liver_medseg_' + re.findall(r'\d+', file_stem)[0]) 62 | 63 | return tuple(sorted(result)) 64 | 65 | def _file(self, i): 66 | num_id = i.split('_')[-1] 67 | return zipfile.Path(self.root / 'img.zip', f'img{num_id}.nii.gz') 68 | 69 | @field 70 | def image(self, i) -> np.ndarray: 71 | with open_nii_gz_file(self._file(i)) as nii_file: 72 | return np.asarray(nii_file.dataobj) 73 | 74 | @field 75 | def affine(self, i) -> np.ndarray: 76 | """The 4x4 matrix that gives the image's spatial orientation.""" 77 | with open_nii_gz_file(self._file(i)) as nii_file: 78 | return nii_file.affine 79 | 80 | def spacing(self, i) -> tuple: 81 | with open_nii_gz_file(self._file(i)) as nii_file: 82 | return tuple(nii_file.header['pixdim'][1:4]) 83 | 84 | @field 85 | def mask(self, i) -> np.ndarray: 86 | path = Path(str(self._file(i)).replace('img', 'mask')) 87 | folder, image = path.parent, path.name 88 | _file = zipfile.Path(folder, image) 89 | with open_nii_gz_file(_file) as nii_file: 90 | return np.asarray(nii_file.dataobj).astype(np.uint8) 91 | 92 | 93 | # TODO: sync with amid.utils 94 | @contextlib.contextmanager 95 | def open_nii_gz_file(file): 96 | with file.open('rb') as opened: 97 | with gzip.GzipFile(fileobj=opened) as nii: 98 | nii = nb.FileHolder(fileobj=nii) 99 | yield nb.Nifti1Image.from_file_map({'header': nii, 'image': nii}) 100 | -------------------------------------------------------------------------------- /docs/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contribution Guidelines 2 | 3 | ## Preparing the environment 4 | 5 | 1\. First, set up a cache storage. Create the file `~/.config/amid/.bev.yml` with the following content: 6 | 7 | ```yaml 8 | main: 9 | storage: /path/to/storage 10 | cache: /path/to/cache 11 | ``` 12 | 13 | where `/path/to/storage` and `/path/to/cache` are some paths in your filesystem. 14 | 15 | 2\. Run 16 | 17 | ```shell 18 | amid init 19 | ``` 20 | 21 | The full command could look something like this: 22 | 23 | ```shell 24 | mkdir -p ~/.config/amid 25 | cat >~/.config/amid/.bev.yml < The dataset should be written in such a way, that making a submission to a contest would work out of the box. 55 | 56 | !!! note 57 | In case of DICOM files, make sure to transpose the first 2 image axes. 58 | This way, the image axes will be consistent with the potential contour coordinates. 59 | 60 | !!! tip 61 | If some value is missing for a given id, it is preferable to return `None` instead of raising an exception. 62 | 63 | !!! tip 64 | The dataset must have a docstring which describes it and provides a link to the original data. 65 | 66 | !!! tip 67 | If the raw data contains a table with metadata, it is preferable to split the metadata columns into separate fields. 68 | 69 | 4\. Register the dataset like so: 70 | 71 | ```python 72 | from amid.internals import register 73 | 74 | @register( 75 | ..., 76 | ) 77 | class LiTS(Dataset): 78 | ... 79 | ``` 80 | 81 | where `...` stands for the following arguments: 82 | 83 | - `modality` — the images' modality/modalities, e.g., CT, MRI 84 | - `body_region` — the anatomical regions present in the dataset, e.g., Head, Thorax, Abdomen 85 | - `license` — the dataset's license, if any 86 | - `link` — the link to the original data 87 | - `raw_data_size` — the total size, required for the raw data, e.g., 10G, 500M 88 | - `task` — the dataset's downstream task if any. 89 | E.g., Supervised Learning, Domain Adaptation, Self-supervised Learning, Tumor Segmentation, etc. 90 | 91 | 5\. Make sure all the methods are working as expected: 92 | 93 | ```python 94 | from amid.lits import LiTS 95 | 96 | dataset = LiTS(root="/datasets/LiTS") 97 | 98 | print(len(dataset.ids)) 99 | 100 | id_ = dataset.ids[0] 101 | print(dataset.image(id_).shape) 102 | ``` 103 | 104 | 6\. Check the codestyle using the `lint.sh` script in the repository's root and make changes if flake8 is not happy: 105 | 106 | ```shell 107 | pip install -r lint-requirements.txt # only for the first time 108 | ./lint.sh 109 | ``` -------------------------------------------------------------------------------- /amid/medseg9.py: -------------------------------------------------------------------------------- 1 | import contextlib 2 | import gzip 3 | import zipfile 4 | from pathlib import Path 5 | from zipfile import ZipFile 6 | 7 | import nibabel as nb 8 | import numpy as np 9 | 10 | from .internals import Dataset, field, licenses, register 11 | 12 | 13 | @register( 14 | body_region='Chest', 15 | license=licenses.CC0_10, 16 | link='http://medicalsegmentation.com/covid19/', 17 | modality='CT', 18 | prep_data_size='300M', 19 | raw_data_size='310M', 20 | task='COVID-19 segmentation', 21 | ) 22 | class Medseg9(Dataset): 23 | """ 24 | 25 | Medseg9 is a public COVID-19 CT segmentation dataset with 9 annotated images. 26 | 27 | Parameters 28 | ---------- 29 | root : str, Path, optional 30 | path to the folder containing the raw downloaded archives. 31 | If not provided, the cache is assumed to be already populated. 32 | 33 | Notes 34 | ----- 35 | Data can be downloaded here: http://medicalsegmentation.com/covid19/. 36 | 37 | Then, the folder with raw downloaded data should contain three zip archives with data and masks 38 | (`rp_im.zip`, `rp_lung_msk.zip`, `rp_msk.zip`). 39 | 40 | Examples 41 | -------- 42 | >>> # Place the downloaded archives in any folder and pass the path to the constructor: 43 | >>> ds = Medseg9(root='/path/to/downloaded/data/folder/') 44 | >>> print(len(ds.ids)) 45 | # 9 46 | >>> print(ds.image(ds.ids[0]).shape) 47 | # (630, 630, 45) 48 | >>> print(ds.covid(ds.ids[0]).shape) 49 | # (630, 630, 45) 50 | 51 | """ 52 | 53 | @property 54 | def ids(self): 55 | result = set() 56 | 57 | with ZipFile(self.root / 'rp_msk.zip') as zf: 58 | for zipinfo in zf.infolist(): 59 | if zipinfo.is_dir(): 60 | continue 61 | file_stem = Path(zipinfo.filename).stem 62 | result.add('medseg9_' + file_stem.split('.nii')[0]) 63 | 64 | return tuple(sorted(result)) 65 | 66 | @staticmethod 67 | def _filename(i): 68 | num_id = i.split('_')[-1] 69 | return f'{num_id}.nii.gz' 70 | 71 | def _file(self, i): 72 | return zipfile.Path(self.root / 'rp_im.zip', f'rp_im/{self._filename(i)}') 73 | 74 | @field 75 | def image(self, i): 76 | with open_nii_gz_file(self._file(i)) as nii_image: 77 | # most CT/MRI scans are integer-valued, this will help us improve compression rates 78 | return np.int16(nii_image.get_fdata()) 79 | 80 | @field 81 | def affine(self, i): 82 | """The 4x4 matrix that gives the image's spatial orientation.""" 83 | with open_nii_gz_file(self._file(i)) as nii_image: 84 | return nii_image.affine 85 | 86 | @field 87 | def lungs(self, i): 88 | mask_file = zipfile.Path(self.root / 'rp_lung_msk.zip', f'rp_lung_msk/{self._filename(i)}') 89 | with open_nii_gz_file(mask_file) as nii_image: 90 | return np.bool_(nii_image.get_fdata()) 91 | 92 | @field 93 | def covid(self, i): 94 | """ 95 | int16 mask. 96 | 0 - normal, 1 - ground-glass opacities (матовое стекло), 2 - consolidation (консолидация). 97 | """ 98 | mask_file = zipfile.Path(self.root / 'rp_msk.zip', f'rp_msk/{self._filename(i)}') 99 | with open_nii_gz_file(mask_file) as nii_image: 100 | # most CT/MRI scans are integer-valued, this will help us improve compression rates 101 | return np.uint8(nii_image.get_fdata()) 102 | 103 | 104 | # TODO: sync with amid.utils 105 | @contextlib.contextmanager 106 | def open_nii_gz_file(file): 107 | with file.open('rb') as opened: 108 | with gzip.GzipFile(fileobj=opened) as nii: 109 | nii = nb.FileHolder(fileobj=nii) 110 | yield nb.Nifti1Image.from_file_map({'header': nii, 'image': nii}) 111 | -------------------------------------------------------------------------------- /amid/curvas.py: -------------------------------------------------------------------------------- 1 | import gzip 2 | import zipfile 3 | from typing import Dict 4 | from zipfile import ZipFile 5 | 6 | import nibabel 7 | import numpy as np 8 | 9 | from .internals import Dataset, field, licenses, register 10 | 11 | 12 | @register( 13 | body_region='Abdomen', 14 | license=licenses.CC_BY_40, 15 | link='https://zenodo.org/records/13767408', 16 | modality='CT', 17 | prep_data_size='30G', 18 | raw_data_size='30G', 19 | task='Abdominal organ pathologies segmentation', 20 | ) 21 | class CURVAS(Dataset): 22 | """ 23 | Pancreas, liver and kidney cysts segmentation from multi-rater annotated data. 24 | 25 | The dataset was used at the MICCAI 2024 CURVAS challenge. 26 | 27 | Parameters 28 | ---------- 29 | root : str, Path, optional 30 | path to the folder containing the raw downloaded archives. 31 | If not provided, the cache is assumed to be already populated. 32 | 33 | Notes 34 | ----- 35 | Download link: https://zenodo.org/records/13767408 36 | 37 | The `root` folder should contain the three downloaded .zip archives, namely: 38 | `training_set.zip`, `validation_set.zip` and `testing_set.zip`. 39 | 40 | Examples 41 | -------- 42 | >>> # Place the downloaded folders in any folder and pass the path to the constructor: 43 | >>> ds = CURVAS(root='/path/to/downloaded/data/folder/') 44 | >>> print(len(ds.ids)) 45 | # 90 46 | >>> print(ds.image(ds.ids[5]).shape) 47 | # (512, 512, 1045) 48 | >>> print(ds.mask(ds.ids[35]).shape) 49 | # (512, 512, 992) 50 | 51 | """ 52 | 53 | @property 54 | def ids(self): 55 | def _extract(split): 56 | archive = self.root / f'{split}_set.zip' 57 | with ZipFile(archive) as zf: 58 | namelist = [x for x in zf.namelist() if len(x.rstrip('/').split('/')) == 2] 59 | ids = [f'{x.split("/")[1]}-{split}' for x in namelist] 60 | return ids 61 | 62 | return sorted( 63 | [ 64 | *_extract('training'), # 20 Training cases 65 | *_extract('validation'), # 5 Validation cases 66 | *_extract('testing'), # 65 Testing cases 67 | ] 68 | ) 69 | 70 | def _file(self, i, obj): 71 | uid, split = i.split('-') 72 | 73 | archive = self.root / f'{split}_set.zip' 74 | file = f'{split}_set/{uid}/{obj}.nii.gz' 75 | 76 | return zipfile.Path(archive, file) 77 | 78 | @field 79 | def image(self, i) -> np.ndarray: 80 | with self._file(i, 'image').open('rb') as opened: 81 | with gzip.GzipFile(fileobj=opened) as nii: 82 | nii = nibabel.FileHolder(fileobj=nii) 83 | image = nibabel.Nifti1Image.from_file_map({'header': nii, 'image': nii}) 84 | return np.asarray(image.dataobj).astype(np.int16) 85 | 86 | @field 87 | def affine(self, i) -> np.ndarray: 88 | """The 4x4 matrix that gives the image's spatial orientation""" 89 | with self._file(i, 'image').open('rb') as opened: 90 | with gzip.GzipFile(fileobj=opened) as nii: 91 | nii = nibabel.FileHolder(fileobj=nii) 92 | image = nibabel.Nifti1Image.from_file_map({'header': nii, 'image': nii}) 93 | return image.affine 94 | 95 | @field 96 | def masks(self, i) -> Dict[str, np.ndarray]: 97 | masks = {} 98 | for x in range(1, 4): 99 | with self._file(i, f'annotation_{x}').open('rb') as opened: 100 | with gzip.GzipFile(fileobj=opened) as nii: 101 | nii = nibabel.FileHolder(fileobj=nii) 102 | image = nibabel.Nifti1Image.from_file_map({'header': nii, 'image': nii}) 103 | 104 | masks[f'annotation_{x}'] = np.asarray(image.dataobj).astype(np.uint8) 105 | 106 | return masks 107 | -------------------------------------------------------------------------------- /amid/totalsegmentator/dataset.py: -------------------------------------------------------------------------------- 1 | import gzip 2 | from contextlib import suppress 3 | from pathlib import Path 4 | from zipfile import ZipFile 5 | 6 | import nibabel 7 | import numpy as np 8 | import pandas as pd 9 | 10 | from ..internals import Dataset, field, licenses, register 11 | from ..utils import PathOrStr, open_nii_gz_file, unpack 12 | from .utils import ARCHIVE_ROOT, add_labels, add_masks 13 | 14 | 15 | @register( 16 | body_region=('Head', 'Thorax', 'Abdomen', 'Pelvis', 'Legs'), 17 | license=licenses.CC_BY_40, 18 | link='https://zenodo.org/record/6802614#.Y6M2MxXP1D8', 19 | modality='CT', 20 | raw_data_size='35G', 21 | prep_data_size='35G', 22 | task='Supervised anatomical structures segmentation', 23 | ) 24 | class Totalsegmentator(Dataset): 25 | """ 26 | In 1204 CT images we segmented 104 anatomical structures (27 organs, 59 bones, 10 muscles, 8 vessels) 27 | covering a majority of relevant classes for most use cases. 28 | 29 | The CT images were randomly sampled from clinical routine, thus representing a real world dataset which 30 | generalizes to clinical application. 31 | 32 | The dataset contains a wide range of different pathologies, scanners, sequences and institutions. [1] 33 | 34 | Parameters 35 | ---------- 36 | root : str, Path, optional 37 | absolute path to the downloaded archive. 38 | If not provided, the cache is assumed to be already populated. 39 | 40 | Notes 41 | ----- 42 | Download link: https://zenodo.org/record/6802614/files/Totalsegmentator_dataset.zip 43 | 44 | Examples 45 | -------- 46 | >>> # Download the archive to any folder and pass the path to the constructor: 47 | >>> ds = Totalsegmentator(root='/path/to/the/downloaded/archive') 48 | >>> print(len(ds.ids)) 49 | # 1204 50 | >>> print(ds.image(ds.ids[0]).shape) 51 | # (294, 192, 179) 52 | >>> print(ds.aorta(ds.ids[25]).shape) 53 | # (320, 320, 145) 54 | 55 | References 56 | ---------- 57 | .. [1] Jakob Wasserthal (2022) Dataset with segmentations of 104 important anatomical structures in 1204 CT images. 58 | Available at: https://zenodo.org/record/6802614#.Y6M2MxXP1D8 59 | """ 60 | 61 | add_masks(locals()) 62 | add_labels(locals()) 63 | 64 | def __init__(self, root: PathOrStr): 65 | root = Path(root) 66 | if root.is_dir(): 67 | if root / ARCHIVE_ROOT in list(root.iterdir()): 68 | root = root / ARCHIVE_ROOT 69 | 70 | file = 'meta.csv' 71 | with unpack(root, file, ARCHIVE_ROOT, '.zip') as (unpacked, _): 72 | self._meta = pd.read_csv(unpacked, sep=';') 73 | 74 | super().__init__(root) 75 | 76 | @property 77 | def ids(self): 78 | if self.root.is_dir(): 79 | return sorted({x.name for x in self.root.iterdir() if x.name != 'meta.csv'}) 80 | else: 81 | with ZipFile(self.root) as zf: 82 | parsed_namelist = [x.strip('/').split('/') for x in zf.namelist()] 83 | return sorted({x[-1] for x in parsed_namelist if len(x) == 2 and x[-1] != 'meta.csv'}) 84 | 85 | @field 86 | def image(self, i): 87 | file = f'{i}/ct.nii.gz' 88 | 89 | with suppress(gzip.BadGzipFile): 90 | with unpack(self.root, file, ARCHIVE_ROOT, '.zip') as (unpacked, is_unpacked): 91 | if is_unpacked: 92 | return np.asarray(nibabel.load(unpacked).dataobj) 93 | else: 94 | with open_nii_gz_file(unpacked) as image: 95 | return np.asarray(image.dataobj) 96 | 97 | @field 98 | def affine(self, i): 99 | """The 4x4 matrix that gives the image's spatial orientation""" 100 | file = f'{i}/ct.nii.gz' 101 | 102 | with unpack(self.root, file, ARCHIVE_ROOT, '.zip') as (unpacked, is_unpacked): 103 | if is_unpacked: 104 | return nibabel.load(unpacked).affine 105 | else: 106 | with open_nii_gz_file(unpacked) as image: 107 | return image.affine 108 | -------------------------------------------------------------------------------- /amid/nlst.py: -------------------------------------------------------------------------------- 1 | import deli 2 | import numpy as np 3 | import pydicom 4 | from dicom_csv import ( 5 | Plane, 6 | drop_duplicated_slices, 7 | expand_volumetric, 8 | get_common_tag, 9 | get_orientation_matrix, 10 | get_pixel_spacing, 11 | get_slice_locations, 12 | get_slices_plane, 13 | get_tag, 14 | order_series, 15 | stack_images, 16 | ) 17 | from tqdm.auto import tqdm 18 | 19 | from .internals import Dataset, field, licenses, register 20 | from .utils import get_series_date 21 | 22 | 23 | @register( 24 | body_region='Thorax', 25 | license=licenses.CC_BY_30, 26 | link='https://wiki.cancerimagingarchive.net/display/NLST/National+Lung+Screening+Trial', 27 | modality='CT', 28 | prep_data_size=None, # TODO: should be measured... 29 | raw_data_size=None, # TODO: should be measured... 30 | task=None, 31 | ) 32 | class NLST(Dataset): 33 | """ 34 | 35 | Dataset with low-dose CT scans of 26,254 patients acquired during National Lung Screening Trial. 36 | 37 | Parameters 38 | ---------- 39 | root : str, Path, optional 40 | path to the folder (usually called NLST) containing the patient subfolders (like 101426). 41 | If not provided, the cache is assumed to be already populated. 42 | 43 | Notes 44 | ----- 45 | Follow the download instructions at 46 | https://wiki.cancerimagingarchive.net/display/NLST/National+Lung+Screening+Trial. 47 | The dicoms should be placed under the following folders' structure: 48 | <...>//////*.dcm 49 | 50 | Examples 51 | -------- 52 | >>> ds = NLST(root='/path/to/NLST/') 53 | >>> print(len(ds.ids)) 54 | ... 55 | >>> print(ds.image(ds.ids[0]).shape) 56 | ... 57 | >>> print(ds.mask(ds.ids[80]).shape) 58 | ... 59 | 60 | References 61 | ---------- 62 | """ 63 | 64 | @property 65 | def ids(self): 66 | ids = [] 67 | for path in tqdm(list(self.root.iterdir())): 68 | series_uid2num_slices = {p.stem: int(deli.load(p)['Total'][5]) for p in path.glob('*/*/*.json')} 69 | ids.append(max(series_uid2num_slices, key=series_uid2num_slices.get)) 70 | 71 | return ids 72 | 73 | def _series(self, i): 74 | (folder,) = self.root.glob(f'**/{i}') 75 | series = list(map(pydicom.dcmread, folder.iterdir())) 76 | series = expand_volumetric(series) 77 | assert get_common_tag(series, 'Modality') == 'CT' 78 | assert get_slices_plane(series) == Plane.Axial 79 | series = drop_duplicated_slices(series) 80 | series = order_series(series, decreasing=False) 81 | return series 82 | 83 | @field 84 | def image(self, i): 85 | return np.moveaxis(stack_images(self._series(i), -1).astype(np.int16), 0, 1) 86 | 87 | @field 88 | def study_uid(self, i): 89 | return get_common_tag(self._series(i), 'StudyInstanceUID') 90 | 91 | @field 92 | def series_uid(self, i): 93 | return get_common_tag(self._series(i), 'SeriesInstanceUID') 94 | 95 | @field 96 | def sop_uids(self, i): 97 | return [str(get_tag(i, 'SOPInstanceUID')) for i in self._series(i)] 98 | 99 | @field 100 | def pixel_spacing(self, i): 101 | return get_pixel_spacing(self, i).tolist() 102 | 103 | @field 104 | def slice_locations(self, i): 105 | return get_slice_locations(self, i) 106 | 107 | @field 108 | def orientation_matrix(self, i): 109 | return get_orientation_matrix(self, i) 110 | 111 | @field 112 | def conv_kernel(self, i): 113 | return get_common_tag(self._series(i), 'ConvolutionKernel', default=None) 114 | 115 | @field 116 | def kvp(self, i): 117 | return get_common_tag(self._series(i), 'KVP', default=None) 118 | 119 | @field 120 | def patient_id(self, i): 121 | return get_common_tag(self._series(i), 'PatientID', default=None) 122 | 123 | @field 124 | def study_date(self, i): 125 | return get_series_date(self._series(i)) 126 | 127 | @field 128 | def accession_number(self, i): 129 | return get_common_tag(self._series(i), 'AccessionNumber', default=None) 130 | -------------------------------------------------------------------------------- /amid/utils.py: -------------------------------------------------------------------------------- 1 | import contextlib 2 | import datetime 3 | import functools 4 | import itertools 5 | import zipfile 6 | from gzip import GzipFile 7 | from os import PathLike 8 | from pathlib import Path 9 | from typing import List, Union 10 | 11 | import nibabel 12 | import numpy as np 13 | from dicom_csv import get_common_tag, order_series, stack_images 14 | from dicom_csv.exceptions import ConsistencyError, TagTypeError 15 | from pydicom import Dataset, dcmread 16 | 17 | 18 | Numeric = Union[float, int] 19 | PathOrStr = Union[str, PathLike] 20 | 21 | 22 | @contextlib.contextmanager 23 | def unpack(root: PathOrStr, relative: str, archive_root_name: str = None, archive_ext: str = None): 24 | """Provides the absolute path to the file in both scenarios: inside archive or inside folder. 25 | 26 | Parameters 27 | ---------- 28 | root : str, Path 29 | Absolute path to the downloaded archive or the unpacked archive root. 30 | relative : str, Path 31 | Relative file path inside the archive. Archive's root folder sholud be ommited. 32 | archive_root_name : str, Path, optional 33 | If `root` is a archive, it's root folder name shold be given. 34 | archive_ext: {'.zip'}, optional 35 | Compression algorithm used to create the archive 36 | 37 | Returns 38 | ------- 39 | unpacked : Path 40 | Absolute file path to be opened. 41 | is_unpacked : {True, False} 42 | Reached file state. `True` if the file is located inside archive, `False` otherwise. 43 | """ 44 | unpacked = Path(root) / relative 45 | 46 | if unpacked.exists(): 47 | yield unpacked, True 48 | elif archive_ext == '.zip': 49 | with zipfile.Path(root, str(Path(archive_root_name, relative))).open('rb') as unpacked: 50 | yield unpacked, False 51 | else: 52 | raise ValueError('Unexpected file path or unsupported compression algorithm.') 53 | 54 | 55 | @contextlib.contextmanager 56 | def open_nii_gz_file(unpacked): 57 | """Opens ``.nii.gz`` file if it is packed in archive 58 | 59 | Examples 60 | -------- 61 | >>> with unpack('/path/to/archive.zip', 'relative/file/path', 'root', '.zip') as (unpacked, is_unpacked): 62 | >>> with open_nii_gz_file(unpacked) as image: 63 | >>> print(np.asarray(image.dataobj).shape) 64 | # (512, 512, 256) 65 | """ 66 | with GzipFile(fileobj=unpacked) as nii: 67 | nii = nibabel.FileHolder(fileobj=nii) 68 | yield nibabel.Nifti1Image.from_file_map({'header': nii, 'image': nii}) 69 | 70 | 71 | def get_series_date(series): 72 | try: 73 | study_date = get_common_tag(series, 'StudyDate') 74 | except (TagTypeError, ConsistencyError): 75 | return 76 | 77 | if not isinstance(study_date, str) or not study_date.isnumeric() or len(study_date) != 8: 78 | return 79 | 80 | try: 81 | year = int(study_date[:4]) 82 | month = int(study_date[4:6]) 83 | day = int(study_date[6:]) 84 | except TypeError: 85 | return 86 | 87 | if year < 1972: # the year of creation of the first CT scanner 88 | return 89 | 90 | return datetime.date(year, month, day) 91 | 92 | 93 | def propagate_none(func): 94 | @functools.wraps(func) 95 | def wrapper(x, *args, **kwargs): 96 | return None if (x is None) else func(x, *args, **kwargs) 97 | 98 | return wrapper 99 | 100 | 101 | def deprecate(message=None): 102 | def decorator(func): 103 | return functools.wraps(func)(np.deprecate(message=message)(func)) 104 | 105 | return decorator 106 | 107 | 108 | def image_from_dicom_folder(folder: Union[str, Path]) -> np.ndarray: 109 | return stack_images(series_from_dicom_folder(folder)) 110 | 111 | 112 | def series_from_dicom_folder(folder: Union[str, Path]) -> List[Dataset]: 113 | return order_series([dcmread(p) for p in Path(folder).glob('*.dcm')]) 114 | 115 | 116 | # TODO: stolen from dpipe for now 117 | def mask_to_box(mask: np.ndarray): 118 | """ 119 | Find the smallest box that contains all true values of the ``mask``. 120 | """ 121 | if not mask.any(): 122 | raise ValueError('The mask is empty.') 123 | 124 | start, stop = [], [] 125 | for ax in itertools.combinations(range(mask.ndim), mask.ndim - 1): 126 | nonzero = np.any(mask, axis=ax) 127 | if np.any(nonzero): 128 | left, right = np.where(nonzero)[0][[0, -1]] 129 | else: 130 | left, right = 0, 0 131 | start.insert(0, left) 132 | stop.insert(0, right + 1) 133 | return start, stop 134 | -------------------------------------------------------------------------------- /amid/crlm.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | from typing import Dict 3 | 4 | import highdicom 5 | import numpy as np 6 | from dicom_csv import get_orientation_matrix, get_slice_locations, get_voxel_spacing, stack_images 7 | from imops import restore_crop 8 | from more_itertools import locate 9 | 10 | from .internals import Dataset, licenses, register 11 | from .utils import series_from_dicom_folder 12 | 13 | 14 | @register( 15 | body_region='Abdomen', 16 | license=licenses.CC_BY_40, 17 | link='https://wiki.cancerimagingarchive.net/pages/viewpage.action?' 18 | 'pageId=89096268#89096268412b832037484784bd78caf58e052641', 19 | modality=('CT, SEG'), 20 | prep_data_size='11G', 21 | raw_data_size='11G', 22 | task=('Segmentation', 'Classification'), 23 | ) 24 | class CRLM(Dataset): 25 | """ 26 | Parameters 27 | ---------- 28 | root : str, Path, optional 29 | path to the folder containing the raw downloaded archives. 30 | If not provided, the cache is assumed to be already populated. 31 | 32 | 33 | Notes 34 | ----- 35 | Download links: 36 | https://wiki.cancerimagingarchive.net/pages/viewpage.action?pageId=89096268#89096268b2cc35fce0664a2b875b5ec675ba9446 37 | 38 | This collection consists of DICOM images and DICOM Segmentation Objects (DSOs) 39 | for 197 patients with Colorectal Liver Metastases (CRLM). 40 | Comprised of Original DICOM CTs and Segmentations for each subject. 41 | The segmentations include 'Liver', 'Liver_Remnant' 42 | (liver that will remain after surgery based on a preoperative CT plan), 43 | 'Hepatic' and 'Portal' veins, 44 | and 'Tumor_x', where 'x' denotes the various tumor occurrences in the case 45 | 46 | Examples 47 | -------- 48 | >>> # Place the downloaded archives in any folder and pass the path to the constructor: 49 | >>> ds = CRLM(root='/path/to/archives/root') 50 | >>> print(len(ds.ids)) 51 | # 197 52 | >>> print(ds.image(ds.ids[0]).shape) 53 | # (512, 512, 52) 54 | 55 | References 56 | ---------- 57 | """ 58 | 59 | @property 60 | def ids(self): 61 | return sorted(d.name for d in self.root.iterdir()) 62 | 63 | def _folders(self, i): 64 | case = self.root / i 65 | folders = tuple({p.parent for p in case.glob('*/*/*/*.dcm')}) 66 | return tuple(sorted(folders, key=lambda f: len(list(f.iterdir())))) 67 | 68 | def _series(self, i): 69 | return series_from_dicom_folder(self._folders(i)[1]) 70 | 71 | def image(self, i): 72 | return stack_images(self._series(i)) 73 | 74 | def mask(self, i) -> Dict[str, np.ndarray]: 75 | """Returns dict: {'liver': ..., 'hepatic': ..., 'tumor_x': ...}""" 76 | dicom_seg = highdicom.seg.segread(next(self._folders(i)[0].glob('*.dcm'))) 77 | series = self._series(i) 78 | image_sops = [s.SOPInstanceUID for s in series] 79 | seg_sops = [sop_uid for _, _, sop_uid in dicom_seg.get_source_image_uids()] 80 | 81 | sops = [sop for sop in image_sops if sop in set(seg_sops).intersection(image_sops)] 82 | seg_box_start = list(locate(image_sops, lambda i: i == sops[0]))[0] 83 | seg_box_stop = list(locate(image_sops, lambda i: i == sops[-1]))[0] 84 | 85 | image = self.image(i) 86 | seg_box = np.asarray(((0, 0, seg_box_start), (*np.atleast_1d(image.shape[:-1]), seg_box_stop + 1))) 87 | 88 | raw_masks = np.swapaxes( 89 | dicom_seg.get_pixels_by_source_instance( 90 | sops, 91 | ignore_spatial_locations=True, 92 | segment_numbers=dicom_seg.get_segment_numbers(), 93 | ), 94 | -1, 95 | 0, 96 | ) 97 | masks = list(map(partial(restore_crop, box=seg_box, shape=image.shape), raw_masks)) 98 | 99 | liver_mask = {'liver': masks[0].astype(bool)} 100 | # skip liver remnant 101 | veins = {'hepatic': masks[2].astype(bool), 'portal': masks[3].astype(bool)} 102 | tumors = {f'tumor_{i}': array.astype(bool) for i, array in enumerate(masks[4:])} 103 | 104 | return {**liver_mask, **veins, **tumors} 105 | 106 | def spacing(self, i): 107 | """Returns the voxel spacing along axes (x, y, z).""" 108 | return get_voxel_spacing(self._series(i)) 109 | 110 | def slice_locations(self, i): 111 | return get_slice_locations(self._series(i)) 112 | 113 | def affine(self, i): 114 | """Returns 4x4 matrix that gives the image's spatial orientation.""" 115 | return get_orientation_matrix(self._series(i)) 116 | -------------------------------------------------------------------------------- /amid/luna25.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from functools import cached_property 3 | from typing import NamedTuple, Sequence 4 | 5 | import numpy as np 6 | import pandas as pd 7 | import SimpleITK as sitk 8 | 9 | from .internals import Dataset, field, licenses, register 10 | 11 | 12 | class LUNA25Nodule(NamedTuple): 13 | coords: Sequence[float] 14 | lesion_id: int 15 | annotation_id: str 16 | nodule_id: str 17 | malignancy: bool 18 | center_voxel: Sequence[float] 19 | bbox: np.ndarray 20 | 21 | 22 | @register( 23 | body_region='Chest', 24 | license=licenses.CC_BY_40, 25 | link='https://luna25.grand-challenge.org/', 26 | modality='CT', 27 | prep_data_size='214G', 28 | raw_data_size='205G', 29 | task='Lung nodule malignancy risk estimation', 30 | ) 31 | class LUNA25(Dataset): 32 | """ 33 | The LUNA25 Challenge dataset is a comprehensive collection designed to support 34 | the development and validation of AI algorithms for lung nodule malignancy risk 35 | estimation using low-dose chest CT scans. In total, it contains 2120 patients 36 | and 4069 low-dose chest CT scans, with 555 annotated malignant nodules and 37 | 5608 benign nodules (3762 unique nodules, 348 of them are malignant). 38 | The dataset was acquired in participants who enrolled in the 39 | National Lung Cancer Screening Trial (NLST) between 2002 and 2004 in 40 | one of the 33 centers in the United States. 41 | 42 | Parameters 43 | ---------- 44 | root : str, Path, optional 45 | path to the folder containing `luna25_images` and `luna25_nodule_blocks` folders and 46 | `LUNA25_Public_Training_Development_Data.csv` file obtained by the instruction at 47 | https://luna25.grand-challenge.org/datasets/. 48 | If not provided, the cache is assumed to be already populated. 49 | 50 | Notes 51 | ----- 52 | Join the challenge at https://luna25.grand-challenge.org/. 53 | Then follow the download and extraction instructions at https://luna25.grand-challenge.org/datasets/. 54 | """ 55 | 56 | @property 57 | def ids(self): 58 | return [file.name[: -len('.mha')] for file in (self.root / 'luna25_images').iterdir()] 59 | 60 | def _sitk_image(self, i): 61 | return sitk.ReadImage(self.root / f'luna25_images/{i}.mha') 62 | 63 | @field 64 | def image(self, i): 65 | return sitk.GetArrayFromImage(self._sitk_image(i)) 66 | 67 | @field 68 | def spacing(self, i): 69 | return self._sitk_image(i).GetSpacing()[::-1] 70 | 71 | @cached_property 72 | def _data(self): 73 | return pd.read_csv(self.root / 'LUNA25_Public_Training_Development_Data.csv') 74 | 75 | def _data_rows(self, i): 76 | return self._data[self._data['SeriesInstanceUID'] == i] 77 | 78 | def _data_column_value(self, i, column_name): 79 | values = self._data_rows(i).get(column_name).unique() 80 | assert len(values) == 1 81 | value = values[0] 82 | assert not pd.isnull(value) 83 | return value 84 | 85 | @field 86 | def patient_id(self, i): 87 | return str(self._data_column_value(i, 'PatientID')) 88 | 89 | @field 90 | def study_date(self, i): 91 | study_date = str(self._data_column_value(i, 'StudyDate')) 92 | return datetime.strptime(study_date, "%Y%m%d").date() 93 | 94 | @field 95 | def age(self, i): 96 | return self._data_column_value(i, 'Age_at_StudyDate') 97 | 98 | @field 99 | def gender(self, i): 100 | return self._data_column_value(i, 'Gender') 101 | 102 | @field 103 | def nodules(self, i): 104 | nodules = [] 105 | sitk_image = self._sitk_image(i) 106 | shape = self.image(i).shape 107 | bbox_size = np.array([64, 128, 128]) # all nodule blocks in LUNA25 are of the same size 108 | for row in self._data_rows(i).itertuples(): 109 | coords = (row.CoordX, row.CoordY, row.CoordZ) 110 | center_voxel = sitk_image.TransformPhysicalPointToIndex(map(int, coords))[::-1] 111 | 112 | nodule_block_origin = self.get_nodule_block_metadata(row.AnnotationID)['origin'][::-1] 113 | bbox_start_point = sitk_image.TransformPhysicalPointToIndex(map(int, nodule_block_origin))[::-1] 114 | bbox = np.array([bbox_start_point, np.minimum(bbox_start_point + bbox_size, shape)]) 115 | nodules.append( 116 | LUNA25Nodule( 117 | coords=coords, 118 | lesion_id=row.LesionID, 119 | annotation_id=str(row.AnnotationID), 120 | nodule_id=str(row.NoduleID), 121 | malignancy=row.label, 122 | center_voxel=center_voxel, 123 | bbox=bbox, 124 | ) 125 | ) 126 | return nodules 127 | 128 | def get_nodule_block_image(self, annotation_id): 129 | return np.load(self.root / f'luna25_nodule_blocks/image/{annotation_id}.npy') 130 | 131 | def get_nodule_block_metadata(self, annotation_id): 132 | metadata = np.load(self.root / f'luna25_nodule_blocks/metadata/{annotation_id}.npy', allow_pickle=True) 133 | assert metadata.shape == () 134 | return metadata.item() 135 | -------------------------------------------------------------------------------- /amid/brats2021.py: -------------------------------------------------------------------------------- 1 | import contextlib 2 | from pathlib import Path 3 | from typing import Union 4 | from zipfile import ZipFile 5 | 6 | import nibabel 7 | import numpy as np 8 | import pandas as pd 9 | 10 | from .internals import Dataset, field, licenses, register 11 | from .utils import open_nii_gz_file, unpack 12 | 13 | 14 | @register( 15 | body_region='Head', 16 | license=licenses.CC_BYNCSA_40, 17 | link='http://www.braintumorsegmentation.org/', 18 | modality=('MRI T1', 'MRI T1Gd', 'MRI T2', 'MRI T2-FLAIR'), 19 | prep_data_size='8,96G', 20 | raw_data_size='15G', 21 | task=('Segmentation', 'Classification', 'Domain Adaptation'), 22 | ) 23 | class BraTS2021(Dataset): 24 | """ 25 | Parameters 26 | ---------- 27 | root : str, Path, optional 28 | path to the folder containing the raw downloaded archives. 29 | If not provided, the cache is assumed to be already populated. 30 | 31 | Notes 32 | ----- 33 | Download links: 34 | 2021: http://www.braintumorsegmentation.org/ 35 | 36 | Examples 37 | -------- 38 | >>> # Place the downloaded archives in any folder and pass the path to the constructor: 39 | >>> ds = BraTS2021(root='/path/to/archives/root') 40 | >>> print(len(ds.ids)) 41 | # 5880 42 | >>> print(ds.image(ds.ids[0]).shape) 43 | # (240, 240, 155) 44 | 45 | References 46 | ---------- 47 | """ 48 | 49 | @property 50 | def ids(self): 51 | return sorted(_get_ids_or_file(self.root, 'TrainingData') + _get_ids_or_file(self.root, 'ValidationData')) 52 | 53 | @field 54 | def fold(self, i) -> str: 55 | return 'ValidationData' if _get_ids_or_file(self.root, 'ValidationData', check_id=i) else 'TrainingData' 56 | 57 | @property 58 | def mapping21_17(self) -> pd.DataFrame: 59 | return pd.read_csv(self.root / 'BraTS21-17_Mapping.csv') 60 | 61 | @field 62 | def subject_id(self, i) -> str: 63 | return i.rsplit('_', 1)[0] 64 | 65 | @field 66 | def modality(self, i) -> str: 67 | return i.rsplit('_', 1)[1] 68 | 69 | @field 70 | def image(self, i) -> np.ndarray: 71 | root, relative = _get_ids_or_file(self.root, self.fold(i), check_id=i, return_image=True) 72 | with _load_nibabel_probably_from_zip(root, relative, '.', '.zip') as nii_image: 73 | return np.asarray(nii_image.dataobj) 74 | 75 | def mask(self, i) -> Union[np.ndarray, None]: 76 | if self.fold(i) == 'ValidationData': 77 | return None 78 | else: 79 | root, relative = _get_ids_or_file(self.root, self.fold(i), check_id=i, return_segm=True) 80 | with _load_nibabel_probably_from_zip(root, relative, '.', '.zip') as nii_image: 81 | return np.asarray(nii_image.dataobj) 82 | 83 | def spacing(self, i): 84 | """Returns the voxel spacing along axes (x, y, z).""" 85 | root, relative = _get_ids_or_file(self.root, self.fold(i), check_id=i, return_image=True) 86 | with _load_nibabel_probably_from_zip(root, relative, '.', '.zip') as nii_image: 87 | return tuple(nii_image.header['pixdim'][1:4]) 88 | 89 | @field 90 | def affine(self, i) -> np.ndarray: 91 | """Returns 4x4 matrix that gives the image's spatial orientation.""" 92 | root, relative = _get_ids_or_file(self.root, self.fold(i), check_id=i, return_image=True) 93 | with _load_nibabel_probably_from_zip(root, relative, '.', '.zip') as nii_image: 94 | return nii_image.affine 95 | 96 | 97 | def _get_ids_or_file( 98 | base_path, 99 | archive_name_part: str = 'TrainingData', 100 | check_id: str = None, 101 | return_image: bool = False, 102 | return_segm: bool = False, 103 | ): 104 | # TODO: implement the same functionality for folder extraction. 105 | ids = [] 106 | for archive in base_path.glob('*.zip'): 107 | if archive_name_part in archive.name: 108 | with ZipFile(archive) as zf: 109 | for zipinfo in zf.infolist(): 110 | if not zipinfo.is_dir(): 111 | file = Path(zipinfo.filename) 112 | _id = file.stem.replace('.nii', '') 113 | 114 | if 'seg' not in _id: 115 | ids.append(_id) 116 | 117 | if (check_id is not None) and (check_id == _id): 118 | if return_segm: 119 | return str(archive), str(file)[: -len('.nii.gz')].rsplit('_', 1)[0] + '_seg.nii.gz' 120 | 121 | if return_image: 122 | return str(archive), str(file) 123 | 124 | return True # if check_id in archive 125 | 126 | return ids if (check_id is None) else False # if check_id not in archive 127 | 128 | 129 | @contextlib.contextmanager 130 | def _load_nibabel_probably_from_zip(root: str, relative: str, archive_root_name: str = None, archive_ext: str = None): 131 | with unpack(root, relative, archive_root_name, archive_ext) as (unpacked, is_unpacked): 132 | if is_unpacked: 133 | yield nibabel.load(unpacked) 134 | else: 135 | with open_nii_gz_file(unpacked) as nii_image: 136 | yield nii_image 137 | -------------------------------------------------------------------------------- /amid/egd.py: -------------------------------------------------------------------------------- 1 | import nibabel as nb 2 | import numpy as np 3 | from deli import load 4 | 5 | from .internals import Dataset, field as _field, register 6 | 7 | 8 | @register( 9 | body_region='Head', 10 | license='EGD data license', 11 | link='https://xnat.bmia.nl/data/archive/projects/egd', 12 | modality=('FLAIR', 'MRI T1', 'MRI T1GD', 'MRI T2'), 13 | prep_data_size='107,49G', 14 | raw_data_size='40G', 15 | task='Segmentation', 16 | ) 17 | class EGD(Dataset): 18 | """ 19 | The Erasmus Glioma Database (EGD): Structural MRI scans, WHO 2016 subtypes, 20 | and segmentations of 774 patients with glioma [1]_. 21 | 22 | Parameters 23 | ---------- 24 | root : str, Path, optional 25 | path to the folder containing the raw downloaded archives. 26 | If not provided, the cache is assumed to be already populated. 27 | 28 | Notes 29 | ----- 30 | The access to the dataset could be requested at XNAT portal [https://xnat.bmia.nl/data/archive/projects/egd]. 31 | 32 | To download the data in the compatible structure we recommend to use 33 | egd-downloader script [https://zenodo.org/record/4761089#.YtZpLtJBxhF]. 34 | Please, refer to its README for further information. 35 | 36 | Examples 37 | -------- 38 | >>> # Place the downloaded archives in any folder and pass the path to the constructor: 39 | >>> egd = EGD(root='/path/to/downloaded/data/folder/') 40 | >>> print(len(egd.ids)) 41 | # 774 42 | >>> print(egd.t1gd(egd.ids[215]).shape) 43 | # (197, 233, 189) 44 | >>> print(egd.manufacturer(egd.ids[444])) 45 | # Philips Medical Systems 46 | 47 | References 48 | ---------- 49 | .. [1] van der Voort, Sebastian R., et al. "The Erasmus Glioma Database (EGD): Structural MRI scans, 50 | WHO 2016 subtypes, and segmentations of 774 patients with glioma." 51 | Data in brief 37 (2021): 107191. 52 | https://www.sciencedirect.com/science/article/pii/S2352340921004753 53 | 54 | """ 55 | 56 | @property 57 | def ids(self): 58 | result = [] 59 | for folder in (self.root / 'SUBJECTS').iterdir(): 60 | for suffix in 'FLAIR', 'T1', 'T1GD', 'T2': 61 | result.append(f'{folder.name}-{suffix}') 62 | 63 | return tuple(sorted(result)) 64 | 65 | @_field 66 | def brain_mask(self, i) -> np.ndarray: 67 | return nb.load(self.root / 'METADATA' / 'Brain_mask.nii.gz').get_fdata().astype(bool) 68 | 69 | @_field 70 | def deface_mask(self, i) -> np.ndarray: 71 | return nb.load(self.root / 'METADATA' / 'Deface_mask.nii.gz').get_fdata().astype(bool) 72 | 73 | def _image_file(self, i): 74 | i, suffix = i.rsplit('-', 1) 75 | return nb.load(self.root / 'SUBJECTS' / i / f'{suffix}.nii.gz') 76 | 77 | @_field 78 | def modality(self, i) -> str: 79 | _, suffix = i.rsplit('-', 1) 80 | return suffix 81 | 82 | @_field 83 | def subject_id(self, i) -> str: 84 | subject, _ = i.rsplit('-', 1) 85 | return subject 86 | 87 | @_field 88 | def affine(self, i) -> np.ndarray: 89 | return self._image_file(i).affine 90 | 91 | def spacing(self, i): 92 | # voxel spacing is [1, 1, 1] for all images in this dataset... 93 | return tuple(self._image_file(i).header['pixdim'][1:4]) 94 | 95 | @_field 96 | def image(self, i) -> np.ndarray: 97 | # intensities are not integer-valued in this dataset... 98 | return np.asarray(self._image_file(i).dataobj) 99 | 100 | def _metadata(self, i): 101 | i, _ = i.rsplit('-', 1) 102 | return load(self.root / 'SUBJECTS' / i / 'metadata.json') 103 | 104 | @_field 105 | def genetic_and_histological_label_idh(self, i) -> str: 106 | return self._metadata(i)['Genetic_and_Histological_labels']['IDH'] 107 | 108 | @_field 109 | def genetic_and_histological_label_1p19q(self, i) -> str: 110 | return self._metadata(i)['Genetic_and_Histological_labels']['1p19q'] 111 | 112 | @_field 113 | def genetic_and_histological_label_grade(self, i) -> str: 114 | return self._metadata(i)['Genetic_and_Histological_labels']['Grade'] 115 | 116 | @_field 117 | def age(self, i) -> float: 118 | return self._metadata(i)['Clinical_data']['Age'] 119 | 120 | @_field 121 | def sex(self, i) -> str: 122 | return self._metadata(i)['Clinical_data']['Sex'] 123 | 124 | @_field 125 | def observer(self, i) -> str: 126 | return self._metadata(i)['Segmentation_source']['Observer'] 127 | 128 | @_field 129 | def original_scan(self, i) -> str: 130 | return self._metadata(i)['Segmentation_source']['Original scan'] 131 | 132 | @_field 133 | def manufacturer(self, i) -> str: 134 | return self._metadata(i)['Scan_characteristics']['Manufacturer'] 135 | 136 | @_field 137 | def system(self, i) -> str: 138 | return self._metadata(i)['Scan_characteristics']['System'] 139 | 140 | @_field 141 | def field(self, i) -> str: 142 | return self._metadata(i)['Scan_characteristics']['Field'] 143 | 144 | @_field 145 | def mask(self, i) -> np.ndarray: 146 | i, _ = i.rsplit('-', 1) 147 | return nb.load(self.root / 'SUBJECTS' / i / 'MASK.nii.gz').get_fdata().astype(bool) 148 | -------------------------------------------------------------------------------- /amid/flare2022.py: -------------------------------------------------------------------------------- 1 | import gzip 2 | import zipfile 3 | from pathlib import Path 4 | from typing import Union 5 | from zipfile import ZipFile 6 | 7 | import nibabel 8 | import numpy as np 9 | 10 | from .internals import Dataset, field, register 11 | 12 | 13 | @register( 14 | body_region='Abdomen', 15 | license=None, 16 | link='https://flare22.grand-challenge.org/', 17 | modality='CT', 18 | prep_data_size='347G', 19 | raw_data_size='247G', 20 | task='Semi-supervised abdominal organ segmentation', 21 | ) 22 | class FLARE2022(Dataset): 23 | """ 24 | An abdominal organ segmentation dataset for semi-supervised learning [1]_. 25 | 26 | The dataset was used at the MICCAI FLARE 2022 challenge. 27 | 28 | Parameters 29 | ---------- 30 | root : str, Path, optional 31 | path to the folder containing the raw downloaded archives. 32 | If not provided, the cache is assumed to be already populated. 33 | 34 | Notes 35 | ----- 36 | Download link: https://flare22.grand-challenge.org/Dataset/ 37 | 38 | The `root` folder should contain the two downloaded folders, namely: "Training" and "Validation". 39 | 40 | Examples 41 | -------- 42 | >>> # Place the downloaded folders in any folder and pass the path to the constructor: 43 | >>> ds = FLARE2022(root='/path/to/downloaded/data/folder/') 44 | >>> print(len(ds.ids)) 45 | # 2100 46 | >>> print(ds.image(ds.ids[0]).shape) 47 | # (512, 512, 110) 48 | >>> print(ds.mask(ds.ids[25]).shape) 49 | # (512, 512, 104) 50 | 51 | References 52 | ---------- 53 | .. [1] Ma, Jun, et al. "Fast and Low-GPU-memory abdomen CT organ segmentation: The FLARE challenge." 54 | Medical Image Analysis 82 (2022): 102616. 55 | """ 56 | 57 | @property 58 | def ids(self): 59 | result = set() 60 | 61 | # 50 Training Labeled cases 62 | archive = self.root / 'Training' / 'FLARE22_LabeledCase50' / 'images.zip' 63 | with ZipFile(archive) as zf: 64 | for file in zf.namelist(): 65 | result.add(f"TL{file.split('_')[-2]}") 66 | 67 | # 2000 Training Unlabeled cases 68 | for archive in (self.root / 'Training').glob('*.zip'): 69 | with ZipFile(archive) as zf: 70 | for file in zf.namelist(): 71 | if not file.endswith('.nii.gz'): 72 | continue 73 | 74 | file = Path(file) 75 | result.add(f"TU{file.name.split('_')[-2]}") 76 | 77 | # 50 Validation Unlabeled cases 78 | for file in (self.root / 'Validation').glob('*'): 79 | if not file.name.endswith('.nii.gz'): 80 | continue 81 | 82 | result.add(f"VU{file.name.split('_')[-2]}") 83 | 84 | return sorted(result) 85 | 86 | def _file(self, i): 87 | # 50 Training Labeled cases 88 | if i.startswith('TL'): 89 | archive = self.root / 'Training' / 'FLARE22_LabeledCase50' / 'images.zip' 90 | with ZipFile(archive) as zf: 91 | for file in zf.namelist(): 92 | if i[2:] in file: 93 | return zipfile.Path(archive, file) 94 | 95 | # 2000 Training Unlabeled cases 96 | for archive in (self.root / 'Training').glob('*.zip'): 97 | with ZipFile(archive) as zf: 98 | for file in zf.namelist(): 99 | if i[2:] in file: 100 | return zipfile.Path(archive, file) 101 | 102 | # 50 Validation Unlabeled cases 103 | if i.startswith('VU'): 104 | file = self.root / 'Validation' / f'FLARETs_{i[2:]}_0000.nii.gz' 105 | return file 106 | 107 | raise ValueError(f'Id "{i}" not found') 108 | 109 | @field 110 | def image(self, i) -> np.ndarray: 111 | with self._file(i).open('rb') as opened: 112 | with gzip.GzipFile(fileobj=opened) as nii: 113 | nii = nibabel.FileHolder(fileobj=nii) 114 | image = nibabel.Nifti1Image.from_file_map({'header': nii, 'image': nii}) 115 | return np.asarray(image.dataobj) 116 | 117 | @field 118 | def affine(self, i) -> np.ndarray: 119 | """The 4x4 matrix that gives the image's spatial orientation""" 120 | with self._file(i).open('rb') as opened: 121 | with gzip.GzipFile(fileobj=opened) as nii: 122 | nii = nibabel.FileHolder(fileobj=nii) 123 | image = nibabel.Nifti1Image.from_file_map({'header': nii, 'image': nii}) 124 | return image.affine 125 | 126 | @field 127 | def mask(self, i) -> Union[np.ndarray, None]: 128 | if not i.startswith('TL'): 129 | return None 130 | 131 | archive = self.root / 'Training' / 'FLARE22_LabeledCase50' / 'labels.zip' 132 | with ZipFile(archive) as zf: 133 | for file in zf.namelist(): 134 | if i[2:] in file: 135 | with zipfile.Path(archive, file).open('rb') as opened: 136 | with gzip.GzipFile(fileobj=opened) as nii: 137 | nii = nibabel.FileHolder(fileobj=nii) 138 | mask = nibabel.Nifti1Image.from_file_map({'header': nii, 'image': nii}) 139 | return np.asarray(mask.dataobj) 140 | -------------------------------------------------------------------------------- /amid/crossmoda.py: -------------------------------------------------------------------------------- 1 | import contextlib 2 | import gzip 3 | import zipfile 4 | from pathlib import Path 5 | from typing import Union 6 | from zipfile import ZipFile 7 | 8 | import nibabel as nb 9 | import numpy as np 10 | import pandas as pd 11 | 12 | from .internals import Dataset, licenses, register 13 | 14 | 15 | @register( 16 | body_region='Head', 17 | license=licenses.CC_BYNCSA_40, 18 | link='https://zenodo.org/record/6504722#.YsgwnNJByV4', 19 | modality=('MRI T1c', 'MRI T2hr'), 20 | prep_data_size='8,96G', 21 | raw_data_size='17G', 22 | task=('Segmentation', 'Classification', 'Domain Adaptation'), 23 | ) 24 | class CrossMoDA(Dataset): 25 | """ 26 | Parameters 27 | ---------- 28 | root : str, Path, optional 29 | path to the folder containing the raw downloaded archives. 30 | If not provided, the cache is assumed to be already populated. 31 | 32 | Notes 33 | ----- 34 | Download links: 35 | 2021 & 2022: https://zenodo.org/record/6504722#.YsgwnNJByV4 36 | 37 | Examples 38 | -------- 39 | >>> # Place the downloaded archives in any folder and pass the path to the constructor: 40 | >>> ds = CrossMoDA(root='/path/to/archives/root') 41 | >>> print(len(ds.ids)) 42 | # 484 43 | >>> print(ds.image(ds.ids[0]).shape) 44 | # (512, 512, 214) 45 | 46 | References 47 | ---------- 48 | """ 49 | 50 | @property 51 | def ids(self): 52 | result = set() 53 | for archive in self.root.glob('*.zip'): 54 | with ZipFile(archive) as zf: 55 | for zipinfo in zf.infolist(): 56 | if zipinfo.is_dir(): 57 | continue 58 | 59 | file = Path(zipinfo.filename) 60 | assert file.stem not in result, file.stem 61 | 62 | if 'Label' not in file.stem and file.suffix == '.gz': 63 | result.add('_'.join(file.stem.split('_')[:-1])) 64 | else: 65 | continue 66 | 67 | return sorted(result) 68 | 69 | @property 70 | def train_source_df(self): 71 | return pd.read_csv(self.root / 'infos_source_training.csv', index_col='crossmoda_name') 72 | 73 | def _file(self, i): 74 | for archive in self.root.glob('*.zip'): 75 | with ZipFile(archive) as zf: 76 | for zipinfo in zf.infolist(): 77 | if i == '_'.join(Path(zipinfo.filename).stem.split('_')[:-1]) and 'Label' not in zipinfo.filename: 78 | return zipfile.Path(archive, zipinfo.filename) 79 | 80 | raise ValueError(f'Id "{i}" not found') 81 | 82 | def image(self, i) -> Union[np.ndarray, None]: 83 | with open_nii_gz_file(self._file(i)) as nii_image: 84 | return np.asarray(nii_image.dataobj) 85 | 86 | def spacing(self, i): 87 | """Returns pixel spacing along axes (x, y, z)""" 88 | with open_nii_gz_file(self._file(i)) as nii_image: 89 | return tuple(nii_image.header['pixdim'][1:4]) 90 | 91 | def affine(self, i): 92 | """The 4x4 matrix that gives the image's spatial orientation""" 93 | with open_nii_gz_file(self._file(i)) as nii_image: 94 | return nii_image.affine 95 | 96 | def split(self, i) -> str: 97 | """The split in which this entry is contained: training_source, training_target, validation""" 98 | file = self._file(i) 99 | idx = int(file.name.split('_')[2]) 100 | dataset = file.name.split('_')[1] 101 | 102 | if dataset == 'ldn': 103 | if 1 <= idx < 106: 104 | return 'training_source' 105 | elif 106 <= idx < 211: 106 | return 'training_target' 107 | elif 211 <= idx < 243: 108 | return 'validation' 109 | 110 | elif dataset == 'etz': 111 | if 0 <= idx < 105: 112 | return 'training_source' 113 | elif 105 <= idx < 210: 114 | return 'training_target' 115 | elif 210 <= idx < 242: 116 | return 'validation' 117 | 118 | raise ValueError(f'Cannot find split for the file: {file}') 119 | 120 | def year(self, i) -> int: 121 | """The year in which this entry was published: 2021 or 2022""" 122 | return int(self._file(i).name[9:13]) 123 | 124 | def masks(self, i): 125 | """Combined mask of schwannoma and cochlea (1 and 2 respectively)""" 126 | file = self._file(i) 127 | if 'T2' not in file.name: 128 | with open_nii_gz_file(file.parent / file.name.replace('ceT1', 'Label')) as nii_image: 129 | return nii_image.get_fdata().astype(np.uint8) 130 | 131 | def koos_grade(self, i): 132 | """VS Tumour characteristic according to Koos grading scale: [1..4] or (-1 - post operative)""" 133 | if self.split(i) == 'training_source': 134 | grade = self.train_source_df.loc[i, 'koos'] 135 | return -1 if (grade == 'post-operative-london') else int(grade) 136 | 137 | 138 | # TODO: sync with amid.utils 139 | @contextlib.contextmanager 140 | def open_nii_gz_file(file): 141 | with file.open('rb') as opened: 142 | with gzip.GzipFile(fileobj=opened) as nii: 143 | nii = nb.FileHolder(fileobj=nii) 144 | yield nb.Nifti1Image.from_file_map({'header': nii, 'image': nii}) 145 | -------------------------------------------------------------------------------- /docs/recipes/RSNABreastCancer.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "source": [ 6 | "## Normalization" 7 | ], 8 | "metadata": { 9 | "collapsed": false 10 | } 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "outputs": [], 16 | "source": [ 17 | "from connectome import Transform\n", 18 | "\n", 19 | "\n", 20 | "class Normalize(Transform):\n", 21 | " __inherit__ = True\n", 22 | "\n", 23 | " def image(image, padding_value, intensity_sign):\n", 24 | " if padding_value is not None:\n", 25 | " if padding_value > 0:\n", 26 | " return padding_value - image\n", 27 | " return image\n", 28 | "\n", 29 | " if intensity_sign == 1:\n", 30 | " return image.max() - image\n", 31 | "\n", 32 | " return image" 33 | ], 34 | "metadata": { 35 | "collapsed": false 36 | } 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "source": [ 41 | "## Zoom to reduce image size" 42 | ], 43 | "metadata": { 44 | "collapsed": false 45 | } 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": null, 50 | "outputs": [], 51 | "source": [ 52 | "from connectome import Apply\n", 53 | "from scipy.ndimage import zoom\n", 54 | "\n", 55 | "# 0.25 - is the downsample factor. It should probably be tuned via cross-validation\n", 56 | "Zoom = Apply(image=lambda x: zoom(np.float32(x), 0.25, order=1))" 57 | ], 58 | "metadata": { 59 | "collapsed": false 60 | } 61 | }, 62 | { 63 | "cell_type": "markdown", 64 | "source": [ 65 | "## Artifacts and background removal" 66 | ], 67 | "metadata": { 68 | "collapsed": false 69 | } 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": null, 74 | "outputs": [], 75 | "source": [ 76 | "from connectome import Transform\n", 77 | "from skimage.morphology import label\n", 78 | "\n", 79 | "\n", 80 | "class GreatestComponent(Transform):\n", 81 | " __inherit__ = True\n", 82 | "\n", 83 | " def image(image):\n", 84 | " lbl = label(image > 0)\n", 85 | " values, counts = np.unique(lbl, return_counts=True)\n", 86 | " foreground = values != 0\n", 87 | " component = values[foreground][counts[foreground].argmax()]\n", 88 | " # select all the components greater than the background\n", 89 | " # + the greatest foreground component\n", 90 | " components = set(values[counts > counts[~foreground]]) | {component}\n", 91 | " if len(components) > 1:\n", 92 | " # if there are several components - pick the one with the greatest intensity\n", 93 | " component = max(components, key=lambda c: image[lbl == c].mean())\n", 94 | "\n", 95 | " return image * (lbl == component)\n", 96 | "\n", 97 | "\n", 98 | "class CropBackground(Transform):\n", 99 | " __inherit__ = True\n", 100 | "\n", 101 | " def image(image):\n", 102 | " mask = image > 0\n", 103 | " xs, = mask.any(0).nonzero()\n", 104 | " ys, = mask.any(1).nonzero()\n", 105 | " return image[ys.min():ys.max() + 1, xs.min():xs.max() + 1]" 106 | ], 107 | "metadata": { 108 | "collapsed": false 109 | } 110 | }, 111 | { 112 | "cell_type": "markdown", 113 | "source": [ 114 | "## Data augmentation" 115 | ], 116 | "metadata": { 117 | "collapsed": false 118 | } 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": null, 123 | "outputs": [], 124 | "source": [ 125 | "from connectome import Transform, impure\n", 126 | "import numpy as np\n", 127 | "\n", 128 | "\n", 129 | "class RandomFlip(Transform):\n", 130 | " __inherit__ = True\n", 131 | "\n", 132 | " @impure\n", 133 | " def _flip():\n", 134 | " return np.random.binomial(1, 0.5)\n", 135 | "\n", 136 | " def image(image, _flip):\n", 137 | " if _flip:\n", 138 | " return np.flip(image, axis=1)\n", 139 | " return image" 140 | ], 141 | "metadata": { 142 | "collapsed": false 143 | } 144 | }, 145 | { 146 | "cell_type": "markdown", 147 | "source": [ 148 | "## Combining it all together" 149 | ], 150 | "metadata": { 151 | "collapsed": false 152 | } 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": null, 157 | "outputs": [], 158 | "source": [ 159 | "from amid.rsna_bc import RSNABreastCancer\n", 160 | "from connectome import Chain\n", 161 | "\n", 162 | "ds = Chain(\n", 163 | " RSNABreastCancer('/path/to/downloaded/folder'),\n", 164 | " Normalize(),\n", 165 | " Apply(image=lambda x: zoom(np.float32(x), 0.25, order=1)),\n", 166 | " GreatestComponent(),\n", 167 | " CropBackground(),\n", 168 | "\n", 169 | " # aug\n", 170 | " RandomFlip(),\n", 171 | ")" 172 | ], 173 | "metadata": { 174 | "collapsed": false 175 | } 176 | } 177 | ], 178 | "metadata": { 179 | "kernelspec": { 180 | "display_name": "Python 3", 181 | "language": "python", 182 | "name": "python3" 183 | }, 184 | "language_info": { 185 | "codemirror_mode": { 186 | "name": "ipython", 187 | "version": 2 188 | }, 189 | "file_extension": ".py", 190 | "mimetype": "text/x-python", 191 | "name": "python", 192 | "nbconvert_exporter": "python", 193 | "pygments_lexer": "ipython2", 194 | "version": "2.7.6" 195 | } 196 | }, 197 | "nbformat": 4, 198 | "nbformat_minor": 0 199 | } 200 | -------------------------------------------------------------------------------- /amid/ct_ich.py: -------------------------------------------------------------------------------- 1 | import nibabel as nb 2 | import numpy as np 3 | import pandas as pd 4 | 5 | from .internals import Dataset, field, licenses, register 6 | 7 | 8 | @register( 9 | body_region='Head', 10 | license=licenses.PhysioNet_RHD_150, 11 | link='https://physionet.org/content/ct-ich/1.3.1/', 12 | modality='CT', 13 | prep_data_size='661M', 14 | raw_data_size='2,8G', 15 | task='Intracranial hemorrhage segmentation', 16 | ) 17 | class CT_ICH(Dataset): 18 | """ 19 | (C)omputed (T)omography Images for (I)ntracranial (H)emorrhage Detection and (S)egmentation. 20 | 21 | This dataset contains 75 head CT scans including 36 scans for patients diagnosed with 22 | intracranial hemorrhage with the following types: 23 | Intraventricular, Intraparenchymal, Subarachnoid, Epidural and Subdural. 24 | 25 | Parameters 26 | ---------- 27 | root : str, Path, optional 28 | path to the folder containing the raw downloaded archives. 29 | If not provided, the cache is assumed to be already populated. 30 | 31 | Notes 32 | ----- 33 | Data can be downloaded here: https://physionet.org/content/ct-ich/1.3.1/. 34 | Then, the folder with raw downloaded data should contain folders `ct_scans` and `masks` along with other files. 35 | 36 | Examples 37 | -------- 38 | >>> # Place the downloaded archives in any folder and pass the path to the constructor: 39 | >>> ds = CT_ICH(root='/path/to/downloaded/data/folder/') 40 | >>> print(len(ds.ids)) 41 | # 75 42 | >>> print(ds.image(ds.ids[0]).shape) 43 | # (512, 512, 39) 44 | >>> print(ds.mask(ds.ids[0]).shape) 45 | # (512, 512, 39) 46 | """ 47 | 48 | @property 49 | def ids(self): 50 | result = [f'ct_ich_{uid:0=3d}' for uid in [*range(49, 59), *range(66, 131)]] 51 | return tuple(sorted(result)) 52 | 53 | def _image_file(self, i): 54 | num_id = i.split('_')[-1] 55 | return nb.load(self.root / 'ct_scans' / f'{num_id}.nii') 56 | 57 | @field 58 | def image(self, i) -> np.ndarray: 59 | # most CT/MRI scans are integer-valued, this will help us improve compression rates 60 | return np.int16(self._image_file(i).get_fdata()) 61 | 62 | @field 63 | def mask(self, i) -> np.ndarray: 64 | num_id = i.split('_')[-1] 65 | mask_path = self.root / 'masks' / f'{num_id}.nii' 66 | ct_scan_nifti = nb.load(mask_path) 67 | return ct_scan_nifti.get_fdata().astype(bool) 68 | 69 | @field 70 | def affine(self, i) -> np.ndarray: 71 | """The 4x4 matrix that gives the image's spatial orientation.""" 72 | return self._image_file(i).affine 73 | 74 | def spacing(self, i): 75 | """Returns voxel spacing along axes (x, y, z).""" 76 | return tuple(self._image_file(i).header['pixdim'][1:4]) 77 | 78 | @property 79 | def _patient_metadata(self): 80 | return pd.read_csv(self.root / 'Patient_demographics.csv', index_col='Patient Number') 81 | 82 | @property 83 | def _diagnosis_metadata(self): 84 | return pd.read_csv(self.root / 'hemorrhage_diagnosis_raw_ct.csv') 85 | 86 | def _row(self, i): 87 | patient_id = int(i.split('_')[-1]) 88 | return self._patient_metadata.loc[patient_id] 89 | 90 | @field 91 | def age(self, i) -> float: 92 | return self._row(i)['Age\n(years)'] 93 | 94 | @field 95 | def sex(self, i) -> str: 96 | return self._row(i)['Gender'] 97 | 98 | @field 99 | def intraventricular_hemorrhage(self, i) -> bool: 100 | """Returns True if hemorrhage exists and its type is intraventricular.""" 101 | num_id = int(i.split('_')[-1]) 102 | return str(self._patient_metadata['Hemorrhage type based on the radiologists diagnosis '].loc[num_id]) != 'nan' 103 | 104 | @field 105 | def intraparenchymal_hemorrhage(self, i) -> bool: 106 | """Returns True if hemorrhage was diagnosed and its type is intraparenchymal.""" 107 | num_id = int(i.split('_')[-1]) 108 | return str(self._patient_metadata['Unnamed: 4'].loc[num_id]) != 'nan' 109 | 110 | @field 111 | def subarachnoid_hemorrhage(self, i) -> bool: 112 | """Returns True if hemorrhage was diagnosed and its type is subarachnoid.""" 113 | num_id = int(i.split('_')[-1]) 114 | return str(self._patient_metadata['Unnamed: 5'].loc[num_id]) != 'nan' 115 | 116 | @field 117 | def epidural_hemorrhage(self, i) -> bool: 118 | """Returns True if hemorrhage was diagnosed and its type is epidural.""" 119 | num_id = int(i.split('_')[-1]) 120 | return str(self._patient_metadata['Unnamed: 6'].loc[num_id]) != 'nan' 121 | 122 | @field 123 | def subdural_hemorrhage(self, i) -> bool: 124 | """Returns True if hemorrhage was diagnosed and its type is subdural.""" 125 | num_id = int(i.split('_')[-1]) 126 | return str(self._patient_metadata['Unnamed: 7'].loc[num_id]) != 'nan' 127 | 128 | @field 129 | def fracture(self, i) -> bool: 130 | """Returns True if skull fracture was diagnosed.""" 131 | num_id = int(i.split('_')[-1]) 132 | return str(self._patient_metadata['Fracture (yes 1/no 0)'].loc[num_id]) != 'nan' 133 | 134 | @field 135 | def notes(self, i) -> str: 136 | """Returns special notes if they exist.""" 137 | num_id = int(i.split('_')[-1]) 138 | result = str(self._patient_metadata['Note1'].loc[num_id]) 139 | return result if result != 'nan' else None 140 | 141 | @field 142 | def hemorrhage_diagnosis_raw_metadata(self, i): 143 | num_id = int(i.split('_')[-1]) 144 | return self._diagnosis_metadata[self._diagnosis_metadata['PatientNumber'] == num_id] 145 | -------------------------------------------------------------------------------- /amid/verse.py: -------------------------------------------------------------------------------- 1 | import gzip 2 | import json 3 | import zipfile 4 | from pathlib import Path 5 | from typing import Dict, Tuple, Union 6 | from zipfile import ZipFile 7 | 8 | import nibabel 9 | import numpy as np 10 | 11 | from .internals import Dataset, field, licenses, register 12 | 13 | 14 | @register( 15 | body_region=('Thorax', 'Abdomen'), 16 | modality='CT', 17 | task='Vertebrae Segmentation', 18 | link='https://osf.io/4skx2/', 19 | raw_data_size='97G', 20 | license=licenses.CC_BYSA_40, 21 | ) 22 | class VerSe(Dataset): 23 | """ 24 | A Vertebral Segmentation Dataset with Fracture Grading [1]_ 25 | 26 | The dataset was used in the MICCAI-2019 and MICCAI-2020 Vertebrae Segmentation Challenges. 27 | 28 | Parameters 29 | ---------- 30 | root : str, Path, optional 31 | path to the folder containing the raw downloaded archives. 32 | If not provided, the cache is assumed to be already populated. 33 | 34 | Notes 35 | ----- 36 | Download links: 37 | 2019: https://osf.io/jtfa5/ 38 | 2020: https://osf.io/4skx2/ 39 | 40 | Examples 41 | -------- 42 | >>> # Place the downloaded archives in any folder and pass the path to the constructor: 43 | >>> ds = VerSe(root='/path/to/archives/root') 44 | >>> print(len(ds.ids)) 45 | # 374 46 | >>> print(ds.image(ds.ids[0]).shape) 47 | # (512, 512, 214) 48 | 49 | References 50 | ---------- 51 | .. [1] Löffler MT, Sekuboyina A, Jacob A, et al. A Vertebral Segmentation Dataset with Fracture Grading. 52 | Radiol Artif Intell. 2020;2(4):e190138. Published 2020 Jul 29. doi:10.1148/ryai.2020190138 53 | """ 54 | 55 | @property 56 | def ids(self): 57 | result = set() 58 | for archive in self.root.glob('*.zip'): 59 | with ZipFile(archive) as zf: 60 | for file in zf.namelist(): 61 | if '/rawdata/' not in file: 62 | continue 63 | 64 | file = Path(file) 65 | patient = file.parent.name[4:] 66 | name = file.name 67 | if 'split' in name: 68 | i = name.split('split')[1][1:] 69 | i = i.split('_')[0] 70 | else: 71 | i = patient 72 | 73 | assert i not in result, i 74 | result.add(i) 75 | 76 | return sorted(result) 77 | 78 | def _file(self, i): 79 | for archive in self.root.glob('*.zip'): 80 | with ZipFile(archive) as zf: 81 | for file in zf.namelist(): 82 | if '/rawdata/' in file and i in file: 83 | return zipfile.Path(archive, file) 84 | 85 | raise ValueError(f'Id "{i}" not found') 86 | 87 | @field 88 | def image(self, i) -> np.ndarray: 89 | with self._file(i).open('rb') as opened: 90 | with gzip.GzipFile(fileobj=opened) as nii: 91 | nii = nibabel.FileHolder(fileobj=nii) 92 | image = nibabel.Nifti1Image.from_file_map({'header': nii, 'image': nii}) 93 | # most ct scans are integer-valued, this will help us improve compression rates 94 | # (instead of using `image.get_fdata()`) 95 | return np.asarray(image.dataobj) 96 | 97 | @field 98 | def affine(self, i) -> np.ndarray: 99 | """The 4x4 matrix that gives the image's spatial orientation""" 100 | with self._file(i).open('rb') as opened: 101 | with gzip.GzipFile(fileobj=opened) as nii: 102 | nii = nibabel.FileHolder(fileobj=nii) 103 | image = nibabel.Nifti1Image.from_file_map({'header': nii, 'image': nii}) 104 | return image.affine 105 | 106 | @field 107 | def split(self, i) -> str: 108 | """The split in which this entry is contained: training, validate, test""" 109 | # it's ugly, but it gets the job done (; 110 | return self._file(i).parent.parent.parent.name.split('_')[-1].split('9')[-1] 111 | 112 | @field 113 | def patient(self, i) -> str: 114 | """The unique patient id""" 115 | return self._file(i).parent.name[4:] 116 | 117 | @field 118 | def year(self, i) -> int: 119 | """The year in which this entry was published: 2019, 2020""" 120 | year = self._file(i).parent.parent.parent.name 121 | if year.startswith('dataset-verse'): 122 | assert '19' in year 123 | return 2019 124 | return 2020 125 | 126 | def _derivatives(self, i): 127 | file = self._file(i) 128 | return file.parent.parent.parent / 'derivatives' / file.parent.name 129 | 130 | @field 131 | def centers(self, i) -> Dict[str, Tuple[int, int, int]]: 132 | """Vertebrae centers in format {label: [x, y, z]}""" 133 | ann = [f for f in self._derivatives(i).iterdir() if f.name.endswith('.json') and i in f.name] 134 | if not ann: 135 | return {} 136 | assert len(ann) == 1 137 | (ann,) = ann 138 | 139 | with ann.open() as file: 140 | ann = json.load(file) 141 | 142 | return {k['label']: (k['X'], k['Y'], k['Z']) for k in ann[1:]} 143 | 144 | @field 145 | def masks(self, i) -> Union[np.ndarray, None]: 146 | """Vertebrae masks""" 147 | ann = [f for f in self._derivatives(i).iterdir() if f.name.endswith('.nii.gz') and i in f.name] 148 | if not ann: 149 | return 150 | assert len(ann) == 1 151 | (ann,) = ann 152 | 153 | with ann.open('rb') as opened: 154 | with gzip.GzipFile(fileobj=opened) as nii: 155 | nii = nibabel.FileHolder(fileobj=nii) 156 | mask = nibabel.Nifti1Image.from_file_map({'header': nii, 'image': nii}) 157 | return mask.get_fdata().astype(np.uint8) 158 | -------------------------------------------------------------------------------- /amid/mood.py: -------------------------------------------------------------------------------- 1 | import contextlib 2 | import gzip 3 | import zipfile 4 | from pathlib import Path 5 | from zipfile import ZipFile 6 | 7 | import nibabel as nb 8 | import numpy as np 9 | 10 | from .internals import Dataset, field, register 11 | 12 | 13 | @register( 14 | body_region=('Head', 'Abdominal'), 15 | license=None, # FIXME: inherit licenses from the original datasets... 16 | link='http://medicalood.dkfz.de/web/', 17 | modality=('MRI', 'CT'), 18 | prep_data_size='405G', 19 | raw_data_size='120G', 20 | task='Out-of-distribution detection', 21 | ) 22 | class MOOD(Dataset): 23 | """ 24 | A (M)edival (O)ut-(O)f-(D)istribution analysis challenge [1]_ 25 | 26 | This dataset contains raw brain MRI and abdominal CT images. 27 | 28 | Number of training samples: 29 | - Brain: 800 scans ( 256 x 256 x 256 ) 30 | - Abdominal: 550 scans ( 512 x 512 x 512 ) 31 | 32 | For each setup there are 4 toy test samples with OOD cases. 33 | 34 | Parameters 35 | ---------- 36 | root : str, Path, optional 37 | path to the folder containing the raw downloaded archives. 38 | If not provided, the cache is assumed to be already populated. 39 | 40 | Notes 41 | ----- 42 | Follow the download instructions at https://www.synapse.org/#!Synapse:syn21343101/wiki/599515. 43 | 44 | Then, the folder with raw downloaded data should contain four zip archives with data 45 | (`abdom_toy.zip`, `abdom_train.zip`, `brain_toy.zip` and `brain_train.zip`). 46 | 47 | Examples 48 | -------- 49 | >>> # Place the downloaded archives in any folder and pass the path to the constructor: 50 | >>> ds = MOOD(root='/path/to/downloaded/data/folder/') 51 | >>> print(len(ds.ids)) 52 | # 1358 53 | >>> print(ds.image(ds.ids[0]).shape) 54 | # (512, 512, 512) 55 | >>> print(ds.pixel_label(ds.ids[0]).shape) 56 | # (512, 512, 512) 57 | 58 | References 59 | ---------- 60 | .. [1] Zimmerer, Petersen, et al. "Medical Out-of-Distribution Analysis Challenge 2022." 61 | doi: 10.5281/zenodo.6362313 (2022). 62 | """ 63 | 64 | @property 65 | def ids(self): 66 | result = set() 67 | # zip archives for train images: 68 | for archive in self.root.glob('*.zip'): 69 | if 'brain' in str(archive): # define whether it is brain (MRI) or abdominal (CT) 70 | task = 'brain' 71 | else: 72 | task = 'abdom' 73 | 74 | if 'toy' in str(archive): # fold - train or toy test 75 | fold = 'toy' 76 | else: 77 | fold = 'train' 78 | 79 | with ZipFile(archive) as zf: 80 | for zipinfo in zf.infolist(): 81 | if zipinfo.is_dir(): 82 | continue 83 | 84 | file_stem = Path(zipinfo.filename).stem 85 | if '.nii' in file_stem: 86 | if fold == 'train': 87 | result.add(f'mood_{task}_{fold}_{file_stem.split(".nii")[0]}') 88 | # fold == 'toy' 89 | else: 90 | result.add(f'mood_{task}_{file_stem.split(".nii")[0]}') 91 | 92 | return tuple(sorted(result)) 93 | 94 | @field 95 | def fold(self, i): 96 | """Returns fold: train or toy (test).""" 97 | if 'train' in i: 98 | return 'train' 99 | # if 'toy' in i: 100 | return 'toy' 101 | 102 | @field 103 | def task(self, i): 104 | """Returns task: brain (MRI) or abdominal (CT).""" 105 | if 'brain' in i: 106 | return 'brain' 107 | # if 'abdom' in i: 108 | return 'abdom' 109 | 110 | def _file(self, i): 111 | task, fold, num_id = i.split('_')[-3:] 112 | if fold == 'train': 113 | return zipfile.Path(self.root / f'{task}_{fold}.zip', f'{task}_{fold}/{num_id}.nii.gz') 114 | return zipfile.Path(self.root / f'{task}_{fold}.zip', f'toy/toy_{num_id}.nii.gz') 115 | 116 | @field 117 | def image(self, i): 118 | with open_nii_gz_file(self._file(i)) as nii_image: 119 | return np.asarray(nii_image.dataobj) 120 | 121 | @field 122 | def affine(self, i): 123 | """The 4x4 matrix that gives the image's spatial orientation.""" 124 | with open_nii_gz_file(self._file(i)) as nii_image: 125 | return nii_image.affine 126 | 127 | def spacing(self, i): 128 | """Returns voxel spacing along axes (x, y, z).""" 129 | with open_nii_gz_file(self._file(i)) as nii_image: 130 | return tuple(nii_image.header['pixdim'][1:4]) 131 | 132 | @field 133 | def sample_label(self, i): 134 | """ 135 | Returns sample-level OOD score for toy examples and None otherwise. 136 | 0 indicates no abnormality and 1 indicates abnormal input. 137 | """ 138 | file = self._file(i) 139 | if 'toy' in file.name: 140 | with (file.parent.parent / 'toy_label/sample' / f'{file.name}.txt').open('r') as nii: 141 | return int(nii.read()) 142 | 143 | @field 144 | def pixel_label(self, i): 145 | """ 146 | Returns voxel-level OOD scores for toy examples and None otherwise. 147 | 0 indicates no abnormality and 1 indicates abnormal input. 148 | """ 149 | file = self._file(i) 150 | if 'toy' in file.name: 151 | with open_nii_gz_file(file.parent.parent / 'toy_label/pixel' / file.name) as nii_image: 152 | return np.bool_(nii_image.get_fdata()) 153 | 154 | 155 | # TODO: sync with amid.utils 156 | @contextlib.contextmanager 157 | def open_nii_gz_file(file): 158 | with file.open('rb') as opened: 159 | with gzip.GzipFile(fileobj=opened) as nii: 160 | nii = nb.FileHolder(fileobj=nii) 161 | yield nb.Nifti1Image.from_file_map({'header': nii, 'image': nii}) 162 | -------------------------------------------------------------------------------- /amid/cancer_500/dataset.py: -------------------------------------------------------------------------------- 1 | import codecs 2 | import json 3 | import warnings 4 | from functools import cached_property 5 | from pathlib import Path 6 | 7 | import numpy as np 8 | import pydicom 9 | from dicom_csv import ( 10 | get_common_tag, 11 | get_orientation_matrix, 12 | get_pixel_spacing, 13 | get_slice_locations, 14 | get_tag, 15 | order_series, 16 | stack_images, 17 | ) 18 | from dicom_csv.exceptions import TagMissingError 19 | from tqdm.auto import tqdm 20 | 21 | from ..internals import Dataset, field, register 22 | from ..utils import get_series_date 23 | from .nodules import get_nodules 24 | 25 | 26 | @register( 27 | body_region='Thorax', 28 | modality='CT', 29 | task='Lung Cancer Detection', 30 | link='https://mosmed.ai/en/datasets/mosmeddata-kt-s-priznakami-raka-legkogo-tip-viii/', 31 | prep_data_size='103G', 32 | raw_data_size='187G', 33 | ) 34 | class MoscowCancer500(Dataset): 35 | """ 36 | The Moscow Radiology Cancer-500 dataset. 37 | 38 | Parameters 39 | ---------- 40 | root : str, Path, optional 41 | path to the folder containing the raw downloaded files. 42 | If not provided, the cache is assumed to be already populated. 43 | 44 | 45 | Notes 46 | ----- 47 | Download links: 48 | https://mosmed.ai/en/datasets/mosmeddata-kt-s-priznakami-raka-legkogo-tip-viii/ 49 | After pressing the `download` button you will have to provide an email address to which further instructions 50 | will be sent. 51 | 52 | Examples 53 | -------- 54 | >>> # Place the downloaded files in any folder and pass the path to the constructor: 55 | >>> ds = MoscowCancer500(root='/path/to/files/root') 56 | >>> print(len(ds.ids)) 57 | # 979 58 | >>> print(ds.image(ds.ids[0]).shape) 59 | # (512, 512, 67) 60 | """ 61 | 62 | @cached_property 63 | def _mapping(self): 64 | path = self.root / 'series-to-files.json' 65 | if not path.exists(): 66 | mapping = {} 67 | for file in tqdm( 68 | self.root.rglob('*'), total=sum(1 for _ in self.root.rglob('*')), desc='Analyzing folder structure' 69 | ): 70 | if file.is_dir(): 71 | continue 72 | 73 | series = pydicom.dcmread(file, specific_tags=[(0x0020, 0x000E)]).SeriesInstanceUID 74 | mapping[series].append(str(file.relative_to(self.root))) 75 | 76 | with open(path, 'w') as file: 77 | json.dump(mapping, file) 78 | return mapping 79 | 80 | with open(path) as file: 81 | return json.load(file) 82 | 83 | @property 84 | def ids(self): 85 | # this id has an undefined image orientation 86 | ignore = {'1.2.643.5.1.13.13.12.2.77.8252.604378326291403.583548115656123.'} 87 | return tuple(sorted(set(self._mapping) - ignore)) 88 | 89 | def _series(self, i): 90 | series = [pydicom.dcmread(Path(self.root, 'dicom', f)) for f in self._mapping[i]] 91 | series = order_series(series, decreasing=False) 92 | return series 93 | 94 | @field 95 | def image(self, i): 96 | x = stack_images(self._series(i), -1).astype(np.int16) 97 | # DICOM specifies that the first 2 axes are (y, x). let's fix that 98 | return np.moveaxis(x, 0, 1) 99 | 100 | @field 101 | def study_uid(self, i): 102 | return get_common_tag(self._series(i), 'StudyInstanceUID') 103 | 104 | @field 105 | def series_uid(self, i): 106 | return get_common_tag(self._series(i), 'SeriesInstanceUID') 107 | 108 | @field 109 | def sop_uids(self, i): 110 | return [str(get_tag(i, 'SOPInstanceUID')) for i in self._series(i)] 111 | 112 | @field 113 | def pixel_spacing(self, i): 114 | return get_pixel_spacing(self._series(i)).tolist() 115 | 116 | @field 117 | def slice_locations(self, i): 118 | return get_slice_locations(self._series(i)) 119 | 120 | @field 121 | def orientation_matrix(self, i): 122 | return get_orientation_matrix(self._series(i)) 123 | 124 | @field 125 | def instance_numbers(self, i): 126 | try: 127 | instance_numbers = [int(get_tag(i, 'InstanceNumber')) for i in self._series(i)] 128 | if not _is_monotonic(instance_numbers): 129 | warnings.warn('Ordered series has non-monotonic instance numbers.') 130 | 131 | return instance_numbers 132 | except TagMissingError: 133 | pass 134 | 135 | @field 136 | def conv_kernel(self, i): 137 | return get_common_tag(self._series(i), 'ConvolutionKernel', default=None) 138 | 139 | @field 140 | def kvp(self, i): 141 | return get_common_tag(self._series(i), 'KVP', default=None) 142 | 143 | @field 144 | def patient_id(self, i): 145 | return get_common_tag(self._series(i), 'PatientID', default=None) 146 | 147 | @field 148 | def study_date(self, i): 149 | return get_series_date(self._series(i)) 150 | 151 | @field 152 | def accession_number(self, i): 153 | return get_common_tag(self._series(i), 'AccessionNumber', default=None) 154 | 155 | @field 156 | def nodules(self, i): 157 | folders = {Path(f).parent.name for f in self._mapping[i]} 158 | if len(folders) != 1: 159 | # can't determine protocol filename 160 | return 161 | 162 | (filename,) = folders 163 | protocol = json.load(codecs.open(str(self.root / 'protocols' / f'{filename}.json'), 'r', 'utf-8-sig')) 164 | 165 | series_number = get_common_tag(self._series(i), 'SeriesNumber') 166 | try: 167 | return get_nodules(protocol, series_number, self.slice_locations(i)) 168 | except ValueError: 169 | pass 170 | 171 | 172 | def _is_monotonic(sequence): 173 | sequence = list(sequence) 174 | return sequence == sorted(sequence) or sequence == sorted(sequence)[::-1] 175 | -------------------------------------------------------------------------------- /amid/cancer_500/nodules.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | import numpy as np 4 | 5 | from .typing import Cancer500Nodule, Comment, Review, Texture 6 | 7 | 8 | def get_nodules(protocol, series_number, slice_locations): 9 | if protocol['nodules'] is None: 10 | num_doctors = len(protocol['doctors']) 11 | assert num_doctors in [3, 6] 12 | 13 | if len([d for d in protocol['doctors'] if definetely_no_nodules(d['comment'])]) > num_doctors / 2: 14 | return [] 15 | else: 16 | raise ValueError 17 | 18 | assert protocol['nodules'] 19 | 20 | nodules = [] 21 | for nodule in protocol['nodules']: 22 | annotations = dict(get_nodule_annotations(nodule[-1], series_number, slice_locations)) 23 | if not annotations: 24 | raise ValueError 25 | 26 | nodules.append(annotations) 27 | 28 | return nodules 29 | 30 | 31 | def definetely_no_nodules(overall_comment): 32 | overall_comment = overall_comment.lower() 33 | prefixes = ['нет очагов', 'очагов нет', 'очаги не выявлены', 'достоверно очагов нет'] 34 | return any(overall_comment.startswith(p) for p in prefixes) 35 | 36 | 37 | def get_nodule_annotations(nodule: dict, series_number: int, slice_locations: list): 38 | for rater, ann in nodule.items(): 39 | if ann is None: 40 | continue 41 | 42 | if 'series_no' in ann and str(series_number) not in ann['series_no']: 43 | warnings.warn('Cannot check that annotation belongs to this particular series.') 44 | continue 45 | 46 | try: 47 | yield rater, parse_nodule_annotation(ann, slice_locations) 48 | except ValueError as e: 49 | warnings.warn(str(e)) 50 | continue 51 | 52 | 53 | def parse_nodule_annotation(ann: dict, slice_locations: list): 54 | return Cancer500Nodule( 55 | center_voxel=parse_center_voxel(ann, slice_locations), 56 | review=parse_review(ann), 57 | comment=parse_comment(ann), 58 | diameter_mm=parse_diameter_mm(ann), 59 | texture=parse_texture(ann), 60 | malignancy=parse_malignancy(ann), 61 | ) 62 | 63 | 64 | def parse_center_voxel(ann: dict, slice_locations: list): 65 | i, j = int(ann['x']), int(ann['y']) 66 | assert i == ann['x'] 67 | assert j == ann['y'] 68 | 69 | assert 'z type' in ann 70 | assert ann['z type'].strip() == 'mm' 71 | diff = np.abs(np.array(slice_locations) - ann['z']) 72 | if np.min(diff) >= 1: 73 | raise ValueError('Cannot determine slice.') 74 | slc = np.argmin(diff) 75 | 76 | comments = [review['comment'] for review in ann['expert decision']] 77 | if 'z = 258 = -151,6 ' in comments: 78 | slc = 258 79 | elif 'не 134 а 143 по оси Х' in comments: 80 | i = 143 81 | elif ( 82 | 'неправильная координата х (должно быть 73, а не 734). сосуд, несовпадение типа (другое), неверный размер' 83 | in comments 84 | ): 85 | i = 73 86 | elif 'ошибка в координате Y - должно быть 296, тогда очаг есть' in comments: 87 | j = 296 88 | elif 'срез съехал на два ниже' in comments: 89 | slc -= 2 90 | elif set(comments) & { 91 | 'очага нет', 92 | 'промахно', 93 | 'промахнулись с координатой х', 94 | 'часть координат не совпадает с топикой очага', 95 | 'часть координат не совпадает с топикой очага, неверный размер', 96 | }: 97 | raise ValueError('Cannot detetmine center voxel') 98 | 99 | return i, j, slc 100 | 101 | 102 | def parse_review(ann: dict): 103 | decisions = {review['decision'] for review in ann['expert decision']} 104 | if 'confirmed' in decisions: 105 | return Review.Confirmed 106 | elif 'confirmed_partially' in decisions: 107 | return Review.ConfirmedPartially 108 | elif 'doubt' in decisions: 109 | return Review.Doubt 110 | elif 'rejected' in decisions: 111 | return Review.Rejected 112 | else: 113 | raise ValueError(decisions) 114 | 115 | 116 | def parse_comment(ann: dict): 117 | comments = {review['comment'] for review in ann['expert decision']} 118 | if 'кальцинат, несовпадение типа (другое)' in comments: 119 | return Comment.Calcium 120 | elif 'фиброз' in comments: 121 | return Comment.Fibrosis 122 | elif 'внутрилегочный л\\у' in comments: 123 | return Comment.LymphNode 124 | elif 'очаг с кальцинацией, несовпадение типа (другое)' in comments: 125 | return Comment.Calcified 126 | elif 'бронхоэктаз с содержимым, несовпадение типа (другое)' in comments: 127 | return Comment.Bronchiectasis 128 | elif 'сосуд' in comments: 129 | return Comment.Vessel 130 | 131 | 132 | def parse_diameter_mm(ann: dict): 133 | if any('неверный размер' in review['comment'].lower() for review in ann['expert decision']): 134 | return 135 | 136 | return round(ann['diameter (mm)'], 2) 137 | 138 | 139 | def parse_texture(ann: dict): 140 | nodule_types = {review['type'] for review in ann['expert decision']} & {'#0S', '#1PS', '#2GG', 'другое'} 141 | if nodule_types: 142 | assert len(nodule_types) == 1 143 | (nodule_type,) = nodule_types 144 | elif parse_review(ann) in [Review.Confirmed, Review.ConfirmedPartially, Review.Doubt]: 145 | assert ann['type'] in ['#0S', '#1PS', '#2GG'] 146 | nodule_type = ann['type'] 147 | else: 148 | return 149 | 150 | if nodule_type == '#0S': 151 | return Texture.Solid 152 | elif nodule_type == '#1PS': 153 | return Texture.PartSolid 154 | elif nodule_type == '#2GG': 155 | return Texture.GroundGlass 156 | elif nodule_type == 'другое': 157 | return Texture.Other 158 | 159 | 160 | def parse_malignancy(ann: dict): 161 | malignant = [review['malignant'] for review in ann['expert decision']] 162 | if all(malignant): 163 | return True 164 | elif not any(malignant): 165 | return False 166 | -------------------------------------------------------------------------------- /amid/lits/dataset.py: -------------------------------------------------------------------------------- 1 | import zipfile 2 | from pathlib import Path 3 | from zipfile import ZipFile 4 | 5 | import nibabel as nb 6 | import numpy as np 7 | 8 | from ..internals import Dataset, licenses, register 9 | 10 | 11 | @register( 12 | body_region='Abdominal', 13 | license=licenses.CC_BYNCND_40, 14 | link='https://competitions.codalab.org/competitions/17094', 15 | modality='CT', 16 | prep_data_size='24,7G', 17 | raw_data_size='35G', 18 | task='Segmentation', 19 | ) 20 | class LiTS(Dataset): 21 | """ 22 | A (Li)ver (T)umor (S)egmentation dataset [1]_ from Medical Segmentation Decathlon [2]_ 23 | 24 | There are two segmentation tasks on this dataset: liver and liver tumor segmentation. 25 | 26 | Parameters 27 | ---------- 28 | root : str, Path, optional 29 | path to the folder containing the raw downloaded archives. 30 | If not provided, the cache is assumed to be already populated. 31 | 32 | 33 | Notes 34 | ----- 35 | Follow the download instructions at https://competitions.codalab.org/competitions/17094. 36 | 37 | Then, the folder with raw downloaded data should contain two zip archives with the train data 38 | (`Training_Batch1.zip` and `Training_Batch2.zip`) 39 | and a folder with the test data 40 | (`LITS-Challenge-Test-Data`). 41 | 42 | The folder with test data should have original structure: 43 | <...>/LITS-Challenge-Test-Data/test-volume-0.nii 44 | <...>/LITS-Challenge-Test-Data/test-volume-1.nii 45 | ... 46 | 47 | P.S. Organs boxes are also provided from a separate source https://github.com/superxuang/caffe_3d_faster_rcnn. 48 | 49 | Examples 50 | -------- 51 | >>> # Place the downloaded archives in any folder and pass the path to the constructor: 52 | >>> ds = LiTS(root='/path/to/downloaded/data/folder/') 53 | >>> print(len(ds.ids)) 54 | # 201 55 | >>> print(ds.image(ds.ids[0]).shape) 56 | # (512, 512, 163) 57 | >>> print(ds.tumor_mask(ds.ids[80]).shape) 58 | # (512, 512, 771) 59 | 60 | References 61 | ---------- 62 | .. [1] Bilic, Patrick, et al. "The liver tumor segmentation benchmark (lits)." 63 | arXiv preprint arXiv:1901.04056 (2019). 64 | .. [2] Antonelli, Michela, et al. "The medical segmentation decathlon." 65 | arXiv preprint arXiv:2106.05735 (2021). 66 | """ 67 | 68 | @property 69 | def ids(self): 70 | result = set() 71 | # zip archives for train images: 72 | for archive in self.root.glob('*.zip'): 73 | with ZipFile(archive) as zf: 74 | for zipinfo in zf.infolist(): 75 | if zipinfo.is_dir(): 76 | continue 77 | 78 | file_stem = Path(zipinfo.filename).stem 79 | if 'volume' in file_stem: 80 | result.add('lits-train-' + file_stem.split('-')[-1]) 81 | 82 | # folder for test images: 83 | for file in (self.root / 'LITS-Challenge-Test-Data').glob('*'): 84 | result.add('lits-test-' + file.stem.split('-')[-1]) 85 | 86 | return tuple(sorted(result)) 87 | 88 | def fold(self, i): 89 | num_id = i.split('-')[-1] 90 | 91 | if 'train' in i: 92 | for archive in self.root.glob('*.zip'): 93 | batch = '1' if ('1' in archive.stem) else '2' 94 | 95 | with ZipFile(archive) as zf: 96 | for zipinfo in zf.infolist(): 97 | if zipinfo.is_dir(): 98 | continue 99 | 100 | if num_id == Path(zipinfo.filename).stem.split('-')[-1]: 101 | return f'train_batch_{batch}' 102 | 103 | else: # if 'test' in i: 104 | return 'test' 105 | 106 | def _file(self, i): 107 | num_id = i.split('-')[-1] 108 | 109 | if 'train' in i: 110 | for archive in self.root.glob('*.zip'): 111 | with ZipFile(archive) as zf: 112 | for zipinfo in zf.infolist(): 113 | if zipinfo.is_dir(): 114 | continue 115 | 116 | file = Path(zipinfo.filename) 117 | if ('volume' in file.stem) and (num_id == file.stem.split('-')[-1]): 118 | return zipfile.Path(str(archive), str(file)) 119 | 120 | else: # if 'test' in i: 121 | return self.root / 'LITS-Challenge-Test-Data' / f'test-volume-{num_id}.nii' 122 | 123 | raise KeyError(f'Id "{i}" not found') 124 | 125 | def image(self, i): 126 | with self._file(i).open('rb') as nii: 127 | nii = nb.FileHolder(fileobj=nii) 128 | image = nb.Nifti1Image.from_file_map({'header': nii, 'image': nii}) 129 | # most ct scans are integer-valued, this will help us improve compression rates 130 | return np.int16(image.get_fdata()) 131 | 132 | def affine(self, i): 133 | """The 4x4 matrix that gives the image's spatial orientation.""" 134 | with self._file(i).open('rb') as nii: 135 | nii = nb.FileHolder(fileobj=nii) 136 | image = nb.Nifti1Image.from_file_map({'header': nii, 'image': nii}) 137 | return image.affine 138 | 139 | def spacing(self, i): 140 | """Returns voxel spacing along axes (x, y, z).""" 141 | with self._file(i).open('rb') as nii: 142 | nii = nb.FileHolder(fileobj=nii) 143 | image = nb.Nifti1Image.from_file_map({'header': nii, 'image': nii}) 144 | return tuple(image.header['pixdim'][1:4]) 145 | 146 | def mask(self, i): 147 | file = self._file(i) 148 | if 'test' not in file.name: 149 | with (file.parent / file.name.replace('volume', 'segmentation')).open('rb') as nii: 150 | nii = nb.FileHolder(fileobj=nii) 151 | image = nb.Nifti1Image.from_file_map({'header': nii, 'image': nii}) 152 | return np.uint8(image.get_fdata()) 153 | -------------------------------------------------------------------------------- /amid/upenn_gbm/upenn_gbm.py: -------------------------------------------------------------------------------- 1 | from functools import cached_property 2 | 3 | import nibabel as nb 4 | import numpy as np 5 | import pandas as pd 6 | 7 | from ..internals import Dataset, licenses, register 8 | from .data_classes import AcquisitionInfo, ClinicalInfo 9 | 10 | 11 | @register( 12 | body_region='Head', 13 | license=licenses.CC_BY_40, 14 | link='https://wiki.cancerimagingarchive.net/pages/viewpage.action?pageId=70225642', 15 | modality=('FLAIR', 'MRI T1', 'MRI T1GD', 'MRI T2', 'DSC MRI', 'DTI MRI'), 16 | prep_data_size='70G', 17 | raw_data_size='69G', 18 | task='Segmentation', 19 | ) 20 | class UPENN_GBM(Dataset): 21 | """ 22 | Multi-parametric magnetic resonance imaging (mpMRI) scans for de novo Glioblastoma 23 | (GBM) patients from the University of Pennsylvania Health System (UPENN-GBM). 24 | Dataset contains 630 patients. 25 | 26 | All samples are registered to a common atlas (SRI) 27 | using a uniform preprocessing and the segmentation are aligned with them. 28 | 29 | 30 | Parameters 31 | ---------- 32 | root : str, Path, optional 33 | path to the folder containing the raw downloaded archives. 34 | If not provided, the cache is assumed to be already populated. 35 | 36 | Notes 37 | ----- 38 | Follow the download instructions at https://wiki.cancerimagingarchive.net/pages/viewpage.action?pageId=70225642 39 | Download to the root folder nifti images and metadata. Organise folder as folows: 40 | 41 | 42 | <...>//NIfTI-files/images_segm/UPENN-GBM-00054_11_segm.nii.gz 43 | <...>//NIfTI-files/... 44 | 45 | <...>//UPENN-GBM_clinical_info_v1.0.csv 46 | <...>//UPENN-GBM_acquisition.csv 47 | 48 | 49 | Examples 50 | -------- 51 | >>> # Place the downloaded archives in any folder and pass the path to the constructor: 52 | >>> ds = UPENN_GBM(root='/path/to/downloaded/data/folder/') 53 | >>> print(len(ds.ids)) 54 | # 671 55 | >>> print(ds.image(ds.ids[215]).shape) 56 | # (4, 240, 240, 155) 57 | >>> print(d.acqusition_info(d.ids[215]).manufacturer) 58 | # SIEMENS 59 | 60 | References 61 | ---------- 62 | .. [1] Bakas, S., Sako, C., Akbari, H., Bilello, M., Sotiras, A., Shukla, G., Rudie, 63 | J. D., Flores Santamaria, N., Fathi Kazerooni, A., Pati, S., Rathore, S., 64 | Mamourian, E., Ha, S. M., Parker, W., Doshi, J., Baid, U., Bergman, M., Binder, Z. A., Verma, R., … Davatzikos, 65 | C. (2021). Multi-parametric magnetic resonance imaging (mpMRI) scans for de novo 66 | Glioblastoma (GBM) patients from the University of Pennsylvania Health System (UPENN-GBM) 67 | (Version 2) [Data set]. The Cancer Imaging Archive. 68 | https://doi.org/10.7937/TCIA.709X-DN49 69 | 70 | """ 71 | 72 | @property 73 | def ids(self): 74 | ids = [x.name for x in (self.root / 'NIfTI-files/images_structural').iterdir()] 75 | return tuple(sorted(ids)) 76 | 77 | @property 78 | def modalities(self): 79 | return ['T1', 'T1GD', 'T2', 'FLAIR'] 80 | 81 | @property 82 | def dsc_modalities(self): 83 | return ['', 'ap-rCBV', 'PH', 'PSR'] 84 | 85 | @property 86 | def dti_modalities(self): 87 | return ['AD', 'FA', 'RD', 'TR'] 88 | 89 | def _mask_path(self, i): 90 | p1 = self.root / 'NIfTI-files/images_segm' 91 | p2 = self.root / 'NIfTI-files/automated_segm' 92 | p1 = list(p1.glob(i + '*')) 93 | p2 = list(p2.glob(i + '*')) 94 | return p1[0] if p1 else p2[0] if p2 else None 95 | 96 | def mask(self, i): 97 | path = self._mask_path(i) 98 | if not path: 99 | return None 100 | return np.asarray(nb.load(path).get_fdata()) 101 | 102 | def is_mask_automated(self, i): 103 | path = self._mask_path(i) 104 | if path is None: 105 | return None 106 | return path.parent.name == 'automated_segm' 107 | 108 | def image(self, i): 109 | path = self.root / f'NIfTI-files/images_structural/{i}' 110 | image_pathes = [path / f'{i}_{mod}.nii.gz' for mod in self.modalities] 111 | images = [np.asarray(nb.load(p).dataobj) for p in image_pathes] 112 | return np.stack(images) 113 | 114 | def image_unstripped(self, i): 115 | path = self.root / f'NIfTI-files/images_structural_unstripped/{i}' 116 | image_pathes = [path / f'{i}_{mod}_unstripped.nii.gz' for mod in self.modalities] 117 | images = [np.asarray(nb.load(p).dataobj) for p in image_pathes] 118 | return np.stack(images) 119 | 120 | def image_DTI(self, i): 121 | path = self.root / f'NIfTI-files/images_DTI/{i}' 122 | if not path.exists(): 123 | return None 124 | image_pathes = [path / f'{i}_DTI_{mod}.nii.gz' for mod in self.dti_modalities] 125 | images = [np.asarray(nb.load(p).dataobj) for p in image_pathes] 126 | return np.stack(images) 127 | 128 | def image_DSC(self, i): 129 | path = self.root / f'NIfTI-files/images_DSC/{i}' 130 | if not path.exists(): 131 | return None 132 | image_pathes = [path / (f'{i}_DSC_{mod}.nii.gz' if mod else f'{i}_DSC.nii.gz') for mod in self.dsc_modalities] 133 | images = [np.asarray(nb.load(p).dataobj) for p in image_pathes] 134 | return images 135 | 136 | @cached_property 137 | def _clinical_info(self): 138 | return pd.read_csv(self.root / 'UPENN-GBM_clinical_info_v1.0.csv') 139 | 140 | @cached_property 141 | def _acqusition_info(self): 142 | return pd.read_csv(self.root / 'UPENN-GBM_acquisition.csv') 143 | 144 | def clinical_info(self, i): 145 | row = self._clinical_info[self._clinical_info.ID == i] 146 | return ClinicalInfo(*row.iloc[0, 1:]) 147 | 148 | def acqusition_info(self, i): 149 | row = self._acqusition_info[self._acqusition_info.ID == i] 150 | return AcquisitionInfo(*row.iloc[0, 1:]) 151 | 152 | def subject_id(self, i): 153 | return i.split('_')[0] 154 | 155 | def affine(self, i): 156 | return np.array([[-1.0, 0.0, 0.0, -0.0], [0.0, -1.0, 0.0, 239.0], [0.0, 0.0, 1.0, 0.0], [0.0, 0.0, 0.0, 1.0]]) 157 | 158 | def spacing(self, i): 159 | return (1, 1, 1) 160 | -------------------------------------------------------------------------------- /amid/deeplesion.py: -------------------------------------------------------------------------------- 1 | from functools import cached_property 2 | 3 | import deli 4 | import nibabel 5 | import numpy as np 6 | 7 | from .internals import Dataset, register 8 | 9 | 10 | @register( 11 | body_region=('Abdomen', 'Thorax'), 12 | link='https://nihcc.app.box.com/v/DeepLesion', 13 | modality='CT', 14 | prep_data_size='259G', 15 | raw_data_size='259G', 16 | task=('Localisation', 'Detection', 'Classification'), 17 | ) 18 | class DeepLesion(Dataset): 19 | """ 20 | DeepLesion is composed of 33,688 bookmarked radiology images from 21 | 10,825 studies of 4,477 unique patients. For every bookmarked image, a bound- 22 | ing box is created to cover the target lesion based on its measured diameters [1]. 23 | 24 | Parameters 25 | ---------- 26 | root : str, Path, optional 27 | path to the folder containing `DL_info.csv` file and a subfolder `Images_nifti` with 20094 nii.gz files. 28 | 29 | Notes 30 | ----- 31 | Dataset is available at https://nihcc.app.box.com/v/DeepLesion 32 | 33 | To download the data we recommend using a Python script provided by the authors `batch_download_zips.py`. 34 | Once you download the data and unarchive all 56 zip archives, you should run `DL_save_nifti.py` 35 | provided by the authors to convert 2D PNGs into 20094 nii.gz files. 36 | 37 | Example 38 | -------- 39 | >>> ds = DeepLesion(root='/path/to/folder') 40 | >>> print(len(ds.ids)) 41 | # 20094 42 | 43 | References 44 | ---------- 45 | .. [1] Yan, Ke, Xiaosong Wang, Le Lu, and Ronald M. Summers. "Deeplesion: Automated deep mining, 46 | categorization and detection of significant radiology image findings using large-scale clinical 47 | lesion annotations." arXiv preprint arXiv:1710.01766 (2017). 48 | 49 | """ 50 | 51 | @property 52 | def ids(self): 53 | return tuple(sorted(file.name.replace('.nii.gz', '') for file in (self.root / 'Images_nifti').glob('*.nii.gz'))) 54 | 55 | def _image_file(self, i): 56 | return nibabel.load(self.root / 'Images_nifti' / f'{i}.nii.gz') 57 | 58 | @cached_property 59 | def _metadata(self): 60 | df = deli.load(self.root / 'DL_info.csv') 61 | 62 | cols_to_transform = [ 63 | 'Measurement_coordinates', 64 | 'Bounding_boxes', 65 | 'Lesion_diameters_Pixel_', 66 | 'Normalized_lesion_location', 67 | ] 68 | for col in cols_to_transform: 69 | df[col] = df[col].apply(lambda x: list(map(float, x.split(',')))) 70 | 71 | df['Slice_range_list'] = df['Slice_range'].apply(lambda x: list(map(int, x.split(',')))) 72 | 73 | def get_ids(x): 74 | patient_study_series = '_'.join(x.File_name.split('_')[:3]) 75 | slice_range_list = list(map(str, x.Slice_range_list)) 76 | slice_range_list = [num.zfill(3) for num in slice_range_list] 77 | slice_range_list = '-'.join(slice_range_list) 78 | return f'{patient_study_series}_{slice_range_list}' 79 | 80 | df['ids'] = df.apply(get_ids, axis=1) 81 | return df 82 | 83 | def _row(self, i): 84 | # funny story, f-string does not work for pandas.query, 85 | # @ syntax does not work for linter, use # noqa 86 | return self._metadata.query('ids==@i') 87 | 88 | def patient_id(self, i): 89 | patient, study, series = map(int, i.split('_')[:3]) 90 | return patient 91 | 92 | def study_id(self, i): 93 | patient, study, series = map(int, i.split('_')[:3]) 94 | return study 95 | 96 | def series_id(self, i): 97 | patient, study, series = map(int, i.split('_')[:3]) 98 | return series 99 | 100 | def sex(self, i): 101 | return self._row(i).Patient_gender.iloc[0] 102 | 103 | def age(self, i): 104 | """Patient Age might be different for different studies (dataset contains longitudinal records).""" 105 | return self._row(i).Patient_age.iloc[0] 106 | 107 | def ct_window(self, i): 108 | """CT window extracted from DICOMs. Recall, that it is min-max values for windowing, not width-level.""" 109 | return self._row(i).DICOM_windows.iloc[0] 110 | 111 | def affine(self, i): 112 | return self._image_file(i).affine 113 | 114 | def spacing(self, i): 115 | return tuple(self._image_file(i).header['pixdim'][1:4]) 116 | 117 | def image(self, i): 118 | """Some 3D volumes are stored as separate subvolumes, e.g. ds.ids[15000] and ds.ids[15001].""" 119 | return np.asarray(self._image_file(i).dataobj) 120 | 121 | def train_val_test(self, i): 122 | """Authors' defined randomly generated patient-level data split, train=1, validation=2, test=3, 123 | 70/15/15 ratio.""" 124 | return int(self._row(i).Train_Val_Test.iloc[0]) 125 | 126 | def lesion_position(self, i): 127 | """Lesion measurements as it appear in DL_info.csv, for details see 128 | https://nihcc.app.box.com/v/DeepLesion/file/306056134060 .""" 129 | position = self._row(i)[ 130 | [ 131 | 'Slice_range_list', 132 | 'Key_slice_index', 133 | 'Measurement_coordinates', 134 | 'Bounding_boxes', 135 | 'Lesion_diameters_Pixel_', 136 | 'Normalized_lesion_location', 137 | ] 138 | ].to_dict('list') 139 | position['Slice_range_list'] = position['Slice_range_list'][0] 140 | return position 141 | 142 | def mask(self, i): 143 | """Mask of provided bounding boxes. Recall that bboxes annotation 144 | is very coarse, it only covers a single 2D slice.""" 145 | mask = np.zeros_like(self.image(i)) 146 | lesion_position = self.lesion_position(i) 147 | min_index = lesion_position['Slice_range_list'][0] 148 | for i, slice_index in enumerate(lesion_position['Key_slice_index']): 149 | image_index = slice_index - min_index 150 | top_left_x, top_left_y, bot_right_x, bot_right_y = lesion_position['Bounding_boxes'][i] 151 | mask[ 152 | int(np.floor(top_left_y)) : int(np.ceil(bot_right_y)), 153 | int(np.floor(top_left_x)) : int(np.ceil(bot_right_x)), 154 | image_index, 155 | ] = 1 156 | return mask 157 | -------------------------------------------------------------------------------- /amid/cc359/dataset.py: -------------------------------------------------------------------------------- 1 | import contextlib 2 | import gzip 3 | import zipfile 4 | from pathlib import Path 5 | from zipfile import ZipFile 6 | 7 | import nibabel as nb 8 | import numpy as np 9 | 10 | from ..internals import Dataset, licenses, register 11 | 12 | 13 | @register( 14 | body_region='Head', 15 | license=licenses.CC_BYND_40, 16 | link='https://sites.google.com/view/calgary-campinas-dataset/home', 17 | modality='MRI T1', 18 | prep_data_size='14,66G', 19 | raw_data_size='4,1G', 20 | task='Segmentation', 21 | ) 22 | class CC359(Dataset): 23 | """ 24 | A (C)algary-(C)ampinas public brain MR dataset with (359) volumetric images [1]_. 25 | 26 | There are three segmentation tasks on this dataset: (i) brain, (ii) hippocampus, and 27 | (iii) White-Matter (WM), Gray-Matter (WM), and Cerebrospinal Fluid (CSF) segmentation. 28 | 29 | Parameters 30 | ---------- 31 | root : str, Path, optional 32 | path to the folder containing the raw downloaded archives. 33 | If not provided, the cache is assumed to be already populated. 34 | 35 | 36 | Notes 37 | ----- 38 | homepage (upd): https://sites.google.com/view/calgary-campinas-dataset/home 39 | homepage (old): https://miclab.fee.unicamp.br/calgary-campinas-359-updated-05092017 40 | 41 | To obtain MR images and brain and hippocampus segmentation masks, please, follow the instructions 42 | at the download platform: https://portal.conp.ca/dataset?id=projects/calgary-campinas. 43 | 44 | Via `datalad` lib you need to download three zip archives: 45 | - `Original.zip` (the original MR images) 46 | - `hippocampus_staple.zip` (Silver-standard hippocampus masks generated using STAPLE) 47 | - `Silver-standard-machine-learning.zip` (Silver-standard brain masks generated using a machine learning method) 48 | 49 | To the current date, WM, GM, and CSF mask could be downloaded only from the google drive: 50 | https://drive.google.com/drive/u/0/folders/0BxLb0NB2MjVZNm9JY1pWNFp6WTA?resourcekey=0-2sXMr8q-n2Nn6iY3PbBAdA. 51 | 52 | Here you need to manually download a folder (from the google drive root above) 53 | `CC359/Reconstructed/CC359/WM-GM-CSF/` 54 | 55 | So the root folder to pass to this dataset class should contain four objects: 56 | - three zip archives (`Original.zip`, `hippocampus_staple.zip`, and `Silver-standard-machine-learning.zip`) 57 | - one folder `WM-GM-CSF` with the original structure: 58 | <...>/WM-GM-CSF/CC0319_ge_3_45_M.nii.gz 59 | <...>/WM-GM-CSF/CC0324_ge_3_56_M.nii.gz 60 | ... 61 | 62 | Examples 63 | -------- 64 | >>> # Place the downloaded archives in any folder and pass the path to the constructor: 65 | >>> cc359 = CC359(root='/path/to/downloaded/data/folder/') 66 | >>> print(len(cc359.ids)) 67 | # 359 68 | >>> print(cc359.image(cc359.ids[0]).shape) 69 | # (171, 256, 256) 70 | >>> print(cc359.wm_gm_csf(cc359.ids[80]).shape) 71 | # (180, 240, 240) 72 | 73 | References 74 | ---------- 75 | .. [1] Souza, Roberto, et al. "An open, multi-vendor, multi-field-strength brain MR dataset 76 | and analysis of publicly available skull stripping methods agreement." 77 | NeuroImage 170 (2018): 482-494. 78 | https://www.sciencedirect.com/science/article/pii/S1053811917306687 79 | 80 | """ 81 | 82 | @property 83 | def ids(self): 84 | result = set() 85 | with ZipFile(self.root / 'Original.zip') as zf: 86 | for zipinfo in zf.infolist(): 87 | if zipinfo.is_dir(): 88 | continue 89 | 90 | file_name = Path(zipinfo.filename).name 91 | if file_name.startswith('CC'): 92 | result.add(file_name.split('_')[0]) 93 | 94 | return tuple(sorted(result)) 95 | 96 | def _image_file(self, i): 97 | return get_zipfile(i, 'Original.zip', self.root) 98 | 99 | def vendor(self, i): 100 | return zipfile2meta(self._image_file(i))['vendor'] 101 | 102 | def field(self, i): 103 | return zipfile2meta(self._image_file(i))['field'] 104 | 105 | def age(self, i): 106 | return zipfile2meta(self._image_file(i))['age'] 107 | 108 | def sex(self, i): 109 | return zipfile2meta(self._image_file(i))['gender'] 110 | 111 | def image(self, i): 112 | with open_nii_gz_file(self._image_file(i)) as nii_image: 113 | return np.asarray(nii_image.dataobj) 114 | 115 | def affine(self, i): 116 | """The 4x4 matrix that gives the image's spatial orientation.""" 117 | with open_nii_gz_file(self._image_file(i)) as nii_image: 118 | return nii_image.affine 119 | 120 | def spacing(self, i): 121 | """Returns voxel spacing along axes (x, y, z).""" 122 | with open_nii_gz_file(self._image_file(i)) as nii_image: 123 | return tuple(nii_image.header['pixdim'][1:4]) 124 | 125 | # masks: 126 | 127 | def brain(self, i): 128 | zf = get_zipfile(i, 'Silver-standard-machine-learning.zip', self.root) 129 | with open_nii_gz_file(zf) as nii_image: 130 | return np.uint8(nii_image.get_fdata()) 131 | 132 | def hippocampus(self, i): 133 | try: 134 | zf = get_zipfile(i, 'hippocampus_staple.zip', self.root) 135 | except KeyError: 136 | return None 137 | 138 | with open_nii_gz_file(zf) as nii_image: 139 | return np.uint8(nii_image.get_fdata()) 140 | 141 | def wm_gm_csf(self, i): 142 | for file in (self.root / 'WM-GM-CSF').glob('*'): 143 | if file.name.startswith(i): 144 | with open_nii_gz_file(file) as nii_image: 145 | return np.uint8(nii_image.get_fdata()) 146 | 147 | 148 | # TODO: sync with amid.utils 149 | @contextlib.contextmanager 150 | def open_nii_gz_file(file): 151 | with file.open('rb') as opened: 152 | with gzip.GzipFile(fileobj=opened) as nii: 153 | nii = nb.FileHolder(fileobj=nii) 154 | yield nb.Nifti1Image.from_file_map({'header': nii, 'image': nii}) 155 | 156 | 157 | def get_zipfile(_id, archive_name, root): 158 | archive = Path(root) / archive_name 159 | with ZipFile(archive) as zf: 160 | for zipinfo in zf.infolist(): 161 | if Path(zipinfo.filename).name.startswith(_id) and not zipinfo.is_dir(): 162 | return zipfile.Path(str(archive), zipinfo.filename) 163 | 164 | raise KeyError(f'Id "{_id}" not found') 165 | 166 | 167 | def zipfile2meta(zf): 168 | return dict(zip(['id', 'vendor', 'field', 'age', 'gender'], zf.name[: -len('.nii.gz')].split('_'))) 169 | -------------------------------------------------------------------------------- /amid/msd.py: -------------------------------------------------------------------------------- 1 | import contextlib 2 | import gzip 3 | import json 4 | import tarfile 5 | from pathlib import Path 6 | 7 | import nibabel as nb 8 | import numpy as np 9 | 10 | from .internals import Dataset, register 11 | 12 | 13 | @register( 14 | body_region=('Chest', 'Abdominal', 'Head'), 15 | link='http://medicaldecathlon.com/', 16 | modality=('CT', 'CE CT', 'MRI', 'MRI FLAIR', 'MRI T1w', 'MRI t1gd', 'MRI T2w', 'MRI T2', 'MRI ADC'), 17 | raw_data_size='97.8G', 18 | task='Image segmentation', 19 | ) 20 | class MSD(Dataset): 21 | """ 22 | MSD is a Medical Segmentaton Decathlon Challenge with 10 tasks. 23 | Parameters 24 | ---------- 25 | root : str, Path, optional 26 | path to the folder containing the raw downloaded archives. 27 | If not provided, the cache is assumed to be already populated. 28 | 29 | Notes 30 | ----- 31 | Data can be downloaded here:http://medicaldecathlon.com/ 32 | or here: https://msd-for-monai.s3-us-west-2.amazonaws.com/ 33 | or here: https://drive.google.com/drive/folders/1HqEgzS8BV2c7xYNrZdEAnrHk7osJJ--2/ 34 | Then, the folder with raw downloaded data should contain tar archive with data and masks 35 | (`Task03_Liver.tar`). 36 | """ 37 | 38 | @property 39 | def ids(self): 40 | ids_all = [] 41 | for folder in self.root.glob('*'): 42 | if folder.name.endswith('.tar'): 43 | ids_folder = ids_from_tar(folder) 44 | else: 45 | ids_folder = ids_from_folder(folder) 46 | ids_all.extend(ids_folder) 47 | return tuple(ids_all) 48 | 49 | def train_test(self, i) -> str: 50 | fold = 'train' if 'train' in i else 'test' 51 | return fold 52 | 53 | def task(self, i) -> str: 54 | return NAME_TO_TASK[i.split('_')[1]] 55 | 56 | def _relative(self, i): 57 | name = i.removeprefix('train_').removeprefix('test_') 58 | return Path(self.task(i)), Path('imagesTr' if 'train' in i else 'imagesTs') / f'{name}.nii.gz' 59 | 60 | def image(self, i): 61 | with open_nii_gz(self.root, self._relative(i)) as (file, unpacked): 62 | if unpacked: 63 | return np.int16(nb.load(file).get_fdata()) 64 | else: 65 | with gzip.GzipFile(fileobj=file) as nii_gz: 66 | nii = nb.FileHolder(fileobj=nii_gz) 67 | return np.int16(nb.Nifti1Image.from_file_map({'header': nii, 'image': nii}).get_fdata()) 68 | 69 | def affine(self, i): 70 | """The 4x4 matrix that gives the image's spatial orientation.""" 71 | with open_nii_gz(self.root, self._relative(i)) as (file, unpacked): 72 | if unpacked: 73 | return nb.load(file).affine 74 | else: 75 | with gzip.GzipFile(fileobj=file) as nii_gz: 76 | nii = nb.FileHolder(fileobj=nii_gz) 77 | return nb.Nifti1Image.from_file_map({'header': nii, 'image': nii}).affine 78 | 79 | def image_modality(self, i): 80 | task = self.task(i) 81 | if (self.root / task).is_dir(): 82 | with open(self.root / task / 'dataset.json', 'r') as file: 83 | return json.loads(file.read())['modality'] 84 | 85 | with tarfile.open(self.root / f'{task}.tar') as tf: 86 | member = tf.getmember(f'{task}/dataset.json') 87 | file = tf.extractfile(member) 88 | return json.loads(file.read())['modality'] 89 | 90 | def segmentation_labels(self, i): 91 | """Returns segmentation labels for the task""" 92 | task = self.task(i) 93 | if (self.root / task).is_dir(): 94 | with open(self.root / task / 'dataset.json', 'r') as file: 95 | return json.loads(file.read())['labels'] 96 | 97 | with tarfile.open(self.root / f'{task}.tar') as tf: 98 | member = tf.getmember(f'{task}/dataset.json') 99 | file = tf.extractfile(member) 100 | return json.loads(file.read())['labels'] 101 | 102 | def mask(self, i): 103 | task, relative = self._relative(i) 104 | if 'imagesTs' not in str(relative): 105 | with open_nii_gz(self.root, (task, str(relative).replace('images', 'labels'))) as (file, unpacked): 106 | if unpacked: 107 | return np.uint8(nb.load(file).get_fdata()) 108 | else: 109 | with gzip.GzipFile(fileobj=file) as nii_gz: 110 | nii = nb.FileHolder(fileobj=nii_gz) 111 | return np.uint8(nb.Nifti1Image.from_file_map({'header': nii, 'image': nii}).get_fdata()) 112 | 113 | 114 | TASK_TO_NAME: dict = { 115 | 'Task01_BrainTumour': 'BRATS', 116 | 'Task02_Heart': 'la', 117 | 'Task03_Liver': 'liver', 118 | 'Task04_Hippocampus': 'hippocampus', 119 | 'Task05_Prostate': 'prostate', 120 | 'Task06_Lung': 'lung', 121 | 'Task07_Pancreas': 'pancreas', 122 | 'Task08_HepaticVessel': 'hepaticvessel', 123 | 'Task09_Spleen': 'spleen', 124 | 'Task10_Colon': 'colon', 125 | } 126 | 127 | NAME_TO_TASK = dict(zip(TASK_TO_NAME.values(), TASK_TO_NAME.keys())) 128 | 129 | 130 | @contextlib.contextmanager 131 | def open_nii_gz(path, nii_gz_path): 132 | """Opens a .nii.gz file from inside a .tar archive. 133 | 134 | Parameters: 135 | - path: path to the .tar archive or folder 136 | - nii_gz_path: path to the .nii.gz file inside the .tar archive. 137 | 138 | Yields: 139 | - nibabel.Nifti1Image object. 140 | """ 141 | task, relative = nii_gz_path 142 | if (path / task / relative).exists(): 143 | yield path / task / relative, True 144 | else: 145 | with tarfile.open(path / f'{task}.tar', 'r') as tar: 146 | yield tar.extractfile(str(task / relative)), False 147 | 148 | 149 | def get_id(filename: Path): 150 | fold = 'test' if 'imagesTs' in str(filename) else 'train' 151 | name = filename.name.removesuffix('.nii.gz') 152 | return '_'.join([fold, name]) 153 | 154 | 155 | def ids_from_tar(tar_folder): 156 | ids = [] 157 | with tarfile.open(tar_folder, 'r') as tf: 158 | for file in tf.getmembers(): 159 | filename = Path(file.name) 160 | if not filename.name.startswith('._') and filename.suffix == '.gz' and 'images' in filename.parent.name: 161 | ids.append(get_id(filename)) 162 | return sorted(ids) 163 | 164 | 165 | def ids_from_folder(folder): 166 | ids = [] 167 | for filename in folder.rglob('*.nii.gz'): 168 | if not filename.name.startswith('._') and filename.suffix == '.gz' and 'images' in filename.parent.name: 169 | ids.append(get_id(filename)) 170 | return sorted(ids) 171 | -------------------------------------------------------------------------------- /amid/amos/dataset.py: -------------------------------------------------------------------------------- 1 | from functools import cached_property 2 | from zipfile import ZipFile 3 | 4 | import nibabel 5 | import numpy as np 6 | import pandas as pd 7 | from jboc import composed 8 | 9 | from ..internals import Dataset, field, licenses, register 10 | from ..utils import open_nii_gz_file, unpack 11 | 12 | 13 | ARCHIVE_NAME_SEG = 'amos22.zip' 14 | ARCHIVE_ROOT_NAME = 'amos22' 15 | ERRORS = ['5514', '5437'] # these ids are damaged in the zip archives 16 | 17 | 18 | # TODO: add MRI 19 | 20 | 21 | @register( 22 | body_region='Abdomen', 23 | license=licenses.CC_BY_40, 24 | link='https://zenodo.org/record/7262581', 25 | modality=('CT', 'MRI'), 26 | raw_data_size='23G', # TODO: update size with unlabelled 27 | prep_data_size='89,5G', 28 | task='Supervised multi-modality abdominal multi-organ segmentation', 29 | ) 30 | class AMOS(Dataset): 31 | """ 32 | AMOS provides 500 CT and 100 MRI scans collected from multi-center, multi-vendor, multi-modality, multi-phase, 33 | multi-disease patients, each with voxel-level annotations of 15 abdominal organs, providing challenging examples 34 | and test-bed for studying robust segmentation algorithms under diverse targets and scenarios. [1] 35 | 36 | Parameters 37 | ---------- 38 | root : str, Path, optional 39 | Absolute path to the root containing the downloaded archive and meta. 40 | If not provided, the cache is assumed to be already populated. 41 | 42 | Notes 43 | ----- 44 | Download link: https://zenodo.org/record/7262581/files/amos22.zip 45 | 46 | Examples 47 | -------- 48 | >>> # Download the archive and meta to any folder and pass the path to the constructor: 49 | >>> ds = AMOS(root='/path/to/the/downloaded/files') 50 | >>> print(len(ds.ids)) 51 | # 961 52 | >>> print(ds.image(ds.ids[0]).shape) 53 | # (768, 768, 90) 54 | >>> print(ds.mask(ds.ids[26]).shape) 55 | # (512, 512, 124) 56 | 57 | References 58 | ---------- 59 | .. [1] JI YUANFENG. (2022). Amos: A large-scale abdominal multi-organ benchmark for 60 | versatile medical image segmentation [Data set]. Zenodo. https://doi.org/10.5281/zenodo.7262581 61 | """ 62 | 63 | @property 64 | def ids(self): 65 | ids = list(self._id2split) 66 | 67 | for archive in [ 68 | 'amos22_unlabeled_ct_5000_5399.zip', 69 | 'amos22_unlabeled_ct_5400_5899.zip', 70 | 'amos22_unlabeled_ct_5900_6199.zip', 71 | 'amos22_unlabeled_ct_6200_6899.zip', 72 | ]: 73 | file = self.root / archive 74 | if not file.exists(): 75 | continue 76 | 77 | with ZipFile(file) as zf: 78 | for x in zf.namelist(): 79 | if x.endswith('.nii.gz'): 80 | file = x.split('/')[-1] 81 | 82 | ids.append(file.split('.')[0].split('_')[-1]) 83 | 84 | return sorted(ids) 85 | 86 | @field 87 | def image(self, i): 88 | """Corresponding 3D image.""" 89 | if i in ERRORS: 90 | return None # this image is damaged in the archive 91 | 92 | archive_name, archive_root, file = self._archive_name(i) 93 | with unpack(self.root / archive_name, file, archive_root, '.zip') as (unpacked, is_unpacked): 94 | if is_unpacked: 95 | return np.asarray(nibabel.load(unpacked).dataobj) 96 | else: 97 | with open_nii_gz_file(unpacked) as image: 98 | return np.asarray(image.dataobj) 99 | 100 | @field 101 | def affine(self, i): 102 | """The 4x4 matrix that gives the image's spatial orientation.""" 103 | if i in ERRORS: 104 | return None # this image is damaged in the archive 105 | 106 | archive_name, archive_root, file = self._archive_name(i) 107 | with unpack(self.root / archive_name, file, archive_root, '.zip') as (unpacked, is_unpacked): 108 | if is_unpacked: 109 | return nibabel.load(unpacked).affine 110 | else: 111 | with open_nii_gz_file(unpacked) as image: 112 | return image.affine 113 | 114 | @field 115 | def mask(self, i): 116 | if i not in self._id2split: 117 | return 118 | 119 | file = f'labels{self._id2split[i]}/amos_{i}.nii.gz' 120 | try: 121 | with unpack(self.root / ARCHIVE_NAME_SEG, file, ARCHIVE_ROOT_NAME, '.zip') as (unpacked, is_unpacked): 122 | if is_unpacked: 123 | return np.asarray(nibabel.load(unpacked).dataobj) 124 | else: 125 | with open_nii_gz_file(unpacked) as image: 126 | return np.asarray(image.dataobj) 127 | except FileNotFoundError: 128 | return 129 | 130 | @field 131 | def image_modality(self, i): 132 | """Returns image modality, `CT` or `MRI`.""" 133 | if 500 < int(i) <= 600: 134 | return 'MRI' 135 | return 'CT' 136 | 137 | # labels 138 | @field 139 | def birth_date(self, i): 140 | return self._label(i, "Patient's Birth Date") 141 | 142 | @field 143 | def sex(self, i): 144 | return self._label(i, "Patient's Sex") 145 | 146 | @field 147 | def age(self, i): 148 | return self._label(i, "Patient's Age") 149 | 150 | @field 151 | def manufacturer_model(self, i): 152 | return self._label(i, "Manufacturer's Model Name") 153 | 154 | @field 155 | def manufacturer(self, i): 156 | return self._label(i, 'Manufacturer') 157 | 158 | @field 159 | def acquisition_date(self, i): 160 | return self._label(i, 'Acquisition Date') 161 | 162 | @field 163 | def site(self, i): 164 | return self._label(i, 'Site') 165 | 166 | @cached_property 167 | @composed(dict) 168 | def _id2split(self): 169 | with ZipFile(self.root / ARCHIVE_NAME_SEG) as zf: 170 | for x in zf.namelist(): 171 | if (len(x.strip('/').split('/')) == 3) and x.endswith('.nii.gz'): 172 | file, split = x.split('/')[-1], x.split('/')[-2][-2:] 173 | id_ = file.split('.')[0].split('_')[-1] 174 | 175 | yield id_, split 176 | 177 | @cached_property 178 | def _meta(self): 179 | files = [ 180 | 'labeled_data_meta_0000_0599.csv', 181 | 'unlabeled_data_meta_5400_5899.csv', 182 | 'unlabeled_data_meta_5000_5399.csv', 183 | 'unlabeled_data_meta_5900_6199.csv', 184 | ] 185 | 186 | dfs = [] 187 | for file in files: 188 | with unpack(self.root, file) as (unpacked, _): 189 | dfs.append(pd.read_csv(unpacked)) 190 | return pd.concat(dfs) 191 | 192 | def _archive_name(self, i): 193 | if i in self._id2split: 194 | return ARCHIVE_NAME_SEG, ARCHIVE_ROOT_NAME, f'images{self._id2split[i]}/amos_{i}.nii.gz' 195 | 196 | i = int(i) 197 | file = f'amos_{i}.nii.gz' 198 | if 5000 <= i < 5400: 199 | return 'amos22_unlabeled_ct_5000_5399.zip', 'amos_unlabeled_ct_5000_5399', file 200 | elif 5400 <= i < 5900: 201 | return 'amos22_unlabeled_ct_5400_5899.zip', 'amos_unlabeled_ct_5400_5899', file 202 | elif 5900 <= i < 6200: 203 | return 'amos22_unlabeled_ct_5900_6199.zip', 'amos22_unlabeled_ct_5900_6199', file 204 | else: 205 | return 'amos22_unlabeled_ct_6200_6899.zip', 'amos22_unlabeled_6200_6899', file 206 | 207 | def _label(self, i, column): 208 | # ambiguous data in meta 209 | if int(i) in [500, 600]: 210 | return None 211 | elif int(i) not in self._meta['amos_id']: 212 | return None 213 | 214 | return self._meta[self._meta['amos_id'] == int(i)][column].item() 215 | -------------------------------------------------------------------------------- /amid/lidc/dataset.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import os 3 | from typing import List, Tuple, Union 4 | 5 | import numpy as np 6 | import pylidc as pl 7 | from dicom_csv import ( 8 | Series, 9 | expand_volumetric, 10 | get_common_tag, 11 | get_orientation_matrix, 12 | get_tag, 13 | order_series, 14 | stack_images, 15 | ) 16 | from pylidc.utils import consensus 17 | from scipy import stats 18 | 19 | from ..internals import Dataset, field, licenses, register 20 | from ..utils import PathOrStr, get_series_date 21 | from .nodules import get_nodule 22 | from .typing import LIDCNodule 23 | 24 | 25 | @register( 26 | body_region='Chest', 27 | license=licenses.CC_BY_30, 28 | link='https://wiki.cancerimagingarchive.net/pages/viewpage.action?pageId=1966254', 29 | modality='CT', 30 | prep_data_size='71,2G', 31 | raw_data_size='126G', 32 | task='Lung nodules segmentation', 33 | ) 34 | class LIDC(Dataset): 35 | """ 36 | The (L)ung (I)mage (D)atabase (C)onsortium image collection (LIDC-IDRI) [1]_ 37 | consists of diagnostic and lung cancer screening thoracic computed tomography (CT) scans 38 | with marked-up annotated lesions and lung nodules segmentation task. 39 | Scans contains multiple expert annotations. 40 | 41 | Number of CT scans: 1018. 42 | 43 | Parameters 44 | ---------- 45 | root : str, Path, optional 46 | path to the folder containing the raw downloaded archives. 47 | If not provided, the cache is assumed to be already populated. 48 | 49 | Notes 50 | ----- 51 | Follow the download instructions at https://wiki.cancerimagingarchive.net/pages/viewpage.action?pageId=1966254. 52 | 53 | Then, the folder with raw downloaded data should contain folder `LIDC-IDRI`, 54 | which contains folders `LIDC-IDRI-*`. 55 | 56 | Examples 57 | -------- 58 | >>> # Place the downloaded archives in any folder and pass the path to the constructor: 59 | >>> ds = LIDC(root='/path/to/downloaded/data/folder/') 60 | >>> print(len(ds.ids)) 61 | # 1018 62 | >>> print(ds.image(ds.ids[0]).shape) 63 | # (512, 512, 194) 64 | >>> print(ds.cancer(ds.ids[0]).shape) 65 | # (512, 512, 194) 66 | 67 | References 68 | ---------- 69 | .. [1] Armato III, McLennan, et al. "The lung image database consortium (lidc) and image database 70 | resource initiative (idri): a completed reference database of lung nodules on ct scans." 71 | Medical physics 38(2) (2011): 915–931. 72 | https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3041807/ 73 | """ 74 | 75 | def __init__(self, root: PathOrStr): 76 | super().__init__(root) 77 | self._check_config() 78 | 79 | def _check_config(self): 80 | pylidc_config_start = '[dicom]\npath = ' 81 | if os.path.exists(os.path.expanduser('~/.pylidcrc')): 82 | with open(os.path.expanduser('~/.pylidcrc'), 'r') as config_file: 83 | content = config_file.read() 84 | if content == f'{pylidc_config_start}{self.root}': 85 | return 86 | 87 | # save _root path to ~/.pylidcrc file for pylidc 88 | with open(os.path.expanduser('~/.pylidcrc'), 'w') as config_file: 89 | config_file.write(f'{pylidc_config_start}{self.root}') 90 | 91 | @property 92 | def ids(self): 93 | result = [scan.series_instance_uid for scan in pl.query(pl.Scan).all()] 94 | return tuple(sorted(result)) 95 | 96 | def _scan(self, i) -> pl.Scan: 97 | _id = i.split('_')[-1] 98 | return pl.query(pl.Scan).filter(pl.Scan.series_instance_uid == _id).first() 99 | 100 | def _series(self, i) -> Series: 101 | series = expand_volumetric(self._scan(i).load_all_dicom_images(verbose=False)) 102 | series = order_series(series) 103 | return series 104 | 105 | def _shape(self, i) -> Tuple[int, int, int]: 106 | return stack_images(self._series(i), -1).shape 107 | 108 | @field 109 | def image(self, i) -> np.ndarray: 110 | return self._scan(i).to_volume(verbose=False) 111 | 112 | @field 113 | def study_uid(self, i) -> str: 114 | return self._scan(i).study_instance_uid 115 | 116 | @field 117 | def series_uid(self, i) -> str: 118 | return self._scan(i).series_instance_uid 119 | 120 | @field 121 | def patient_id(self, i) -> str: 122 | return self._scan(i).patient_id 123 | 124 | @field 125 | def sop_uids(self, i) -> List[str]: 126 | return [str(get_tag(i, 'SOPInstanceUID')) for i in self._series(i)] 127 | 128 | @field 129 | def pixel_spacing(self, i) -> List[float]: 130 | spacing = self._scan(i).pixel_spacing 131 | return [spacing, spacing] 132 | 133 | @field 134 | def slice_locations(self, i) -> np.ndarray: 135 | return self._scan(i).slice_zvals 136 | 137 | # @field 138 | def spacing(self, i) -> Tuple[float, float, float]: 139 | """ 140 | Volumetric spacing of the image. 141 | The maximum relative difference in `slice_locations` < 1e-3 142 | (except 4 images listed below), 143 | so we allow ourselves to use the common spacing for the whole 3D image. 144 | 145 | Note 146 | ---- 147 | The `slice_locations` attribute typically (but not always!) has the constant step. 148 | In LIDC dataset, only 4 images have difference in `slice_locations` > 1e-3: 149 | 1.3.6.1.4.1.14519.5.2.1.6279.6001.526570782606728516388531252230 150 | 1.3.6.1.4.1.14519.5.2.1.6279.6001.329334252028672866365623335798 151 | 1.3.6.1.4.1.14519.5.2.1.6279.6001.245181799370098278918756923992 152 | 1.3.6.1.4.1.14519.5.2.1.6279.6001.103115201714075993579787468219 153 | And these differences appear in the maximum of 3 slices. 154 | Therefore, we consider their impact negligible. 155 | """ 156 | return (*self.pixel_spacing(i), stats.mode(np.diff(self.slice_locations(i)))[0].item()) 157 | 158 | @field 159 | def contrast_used(self, i) -> bool: 160 | """If the DICOM file for the scan had any Contrast tag, this is marked as `True`.""" 161 | return self._scan(i).contrast_used 162 | 163 | @field 164 | def is_from_initial(self, i) -> bool: 165 | """ 166 | Indicates whether or not this PatientID was tagged as 167 | part of the initial 399 release. 168 | """ 169 | return self._scan(i).is_from_initial 170 | 171 | @field 172 | def orientation_matrix(self, i) -> np.ndarray: 173 | return get_orientation_matrix(self._series(i)) 174 | 175 | @field 176 | def sex(self, i) -> Union[str, None]: 177 | return get_common_tag(self._series(i), 'PatientSex', default=None) 178 | 179 | @field 180 | def age(self, i) -> Union[str, None]: 181 | return get_common_tag(self._series(i), 'PatientAge', default=None) 182 | 183 | @field 184 | def conv_kernel(self, i) -> Union[str, None]: 185 | return get_common_tag(self._series(i), 'ConvolutionKernel', default=None) 186 | 187 | @field 188 | def kvp(self, i) -> Union[str, None]: 189 | return get_common_tag(self._series(i), 'KVP', default=None) 190 | 191 | @field 192 | def tube_current(self, i) -> Union[str, None]: 193 | return get_common_tag(self._series(i), 'XRayTubeCurrent', default=None) 194 | 195 | @field 196 | def study_date(self, i) -> Union[datetime.date, None]: 197 | return get_series_date(self._series(i)) 198 | 199 | @field 200 | def accession_number(self, i) -> Union[str, None]: 201 | return get_common_tag(self._series(i), 'AccessionNumber', default=None) 202 | 203 | @field 204 | def nodules(self, i) -> List[List[LIDCNodule]]: 205 | nodules = [] 206 | for anns in self._scan(i).cluster_annotations(): 207 | nodule_annotations = [] 208 | for ann in anns: 209 | nodule_annotations.append(get_nodule(ann)) 210 | nodules.append(nodule_annotations) 211 | return nodules 212 | 213 | @field 214 | def nodules_masks(self, i) -> List[List[np.ndarray]]: 215 | nodules = [] 216 | for anns in self._scan(i).cluster_annotations(): 217 | nodule_annotations = [] 218 | for ann in anns: 219 | nodule_annotations.append(ann.boolean_mask()) 220 | nodules.append(nodule_annotations) 221 | return nodules 222 | 223 | @field 224 | def cancer(self, i) -> np.ndarray: 225 | cancer = np.zeros(self._shape(i), dtype=bool) 226 | for anns in self._scan(i).cluster_annotations(): 227 | cancer |= consensus(anns, pad=np.inf)[0] 228 | 229 | return cancer 230 | --------------------------------------------------------------------------------