├── lint.sh
├── amid
├── __version__.py
├── amos
│ ├── __init__.py
│ └── dataset.py
├── mslub
│ ├── __init__.py
│ └── dataset.py
├── ribfrac
│ ├── __init__.py
│ └── dataset.py
├── upenn_gbm
│ ├── __init__.py
│ ├── data_classes.py
│ └── upenn_gbm.py
├── cancer_500
│ ├── __init__.py
│ ├── typing.py
│ ├── dataset.py
│ └── nodules.py
├── rsna_bc
│ ├── __init__.py
│ ├── utils.py
│ └── dataset.py
├── totalsegmentator
│ ├── __init__.py
│ ├── utils.py
│ ├── const.py
│ └── dataset.py
├── lits
│ ├── __init__.py
│ ├── transforms.py
│ └── dataset.py
├── internals
│ ├── __init__.py
│ ├── dataset.py
│ ├── licenses.py
│ └── registry.py
├── vs_seg
│ ├── __init__.py
│ └── transforms.py
├── cc359
│ ├── __init__.py
│ ├── transforms.py
│ └── dataset.py
├── lidc
│ ├── __init__.py
│ ├── transforms.py
│ ├── typing.py
│ ├── nodules.py
│ └── dataset.py
├── transforms.py
├── __init__.py
├── hcp.py
├── kits.py
├── covid_1110.py
├── cl_detection.py
├── tbad.py
├── liver_medseg.py
├── medseg9.py
├── curvas.py
├── nlst.py
├── utils.py
├── crlm.py
├── luna25.py
├── brats2021.py
├── egd.py
├── flare2022.py
├── crossmoda.py
├── ct_ich.py
├── verse.py
├── mood.py
├── deeplesion.py
└── msd.py
├── tests
├── requirements.txt
└── test_consistency.py
├── .gitignore
├── MANIFEST.in
├── lint-requirements.txt
├── docs
├── javascript
│ ├── tablesort.js
│ └── tablesort.filesize.js
├── requirements.txt
├── index.md
├── datasets-api.md
├── fill_readme.py
├── fill_docs.py
├── mkdocstrings_handlers
│ └── python_connectome.py
├── CONTRIBUTING.md
└── recipes
│ └── RSNABreastCancer.ipynb
├── requirements.txt
├── .github
└── workflows
│ ├── lint.yml
│ ├── docs.yml
│ ├── release.yml
│ └── tests.yml
├── .flake8
├── LICENSE
├── pyproject.toml
└── mkdocs.yml
/lint.sh:
--------------------------------------------------------------------------------
1 | flake8 .; black .; isort .
2 |
--------------------------------------------------------------------------------
/amid/__version__.py:
--------------------------------------------------------------------------------
1 | __version__ = '0.15.0'
2 |
--------------------------------------------------------------------------------
/amid/amos/__init__.py:
--------------------------------------------------------------------------------
1 | from .dataset import AMOS
2 |
--------------------------------------------------------------------------------
/amid/mslub/__init__.py:
--------------------------------------------------------------------------------
1 | from .dataset import MSLUB
2 |
--------------------------------------------------------------------------------
/amid/ribfrac/__init__.py:
--------------------------------------------------------------------------------
1 | from .dataset import RibFrac
2 |
--------------------------------------------------------------------------------
/amid/upenn_gbm/__init__.py:
--------------------------------------------------------------------------------
1 | from .upenn_gbm import UPENN_GBM
2 |
--------------------------------------------------------------------------------
/amid/cancer_500/__init__.py:
--------------------------------------------------------------------------------
1 | from .dataset import MoscowCancer500
2 |
--------------------------------------------------------------------------------
/amid/rsna_bc/__init__.py:
--------------------------------------------------------------------------------
1 | from .dataset import RSNABreastCancer
2 |
--------------------------------------------------------------------------------
/tests/requirements.txt:
--------------------------------------------------------------------------------
1 | pytest
2 | pytest-cov
3 | pytest-subtests
4 |
--------------------------------------------------------------------------------
/amid/totalsegmentator/__init__.py:
--------------------------------------------------------------------------------
1 | from .dataset import Totalsegmentator
2 |
--------------------------------------------------------------------------------
/amid/lits/__init__.py:
--------------------------------------------------------------------------------
1 | from .dataset import LiTS
2 | from .transforms import CanonicalCTOrientation, Rescale
3 |
--------------------------------------------------------------------------------
/amid/internals/__init__.py:
--------------------------------------------------------------------------------
1 | from .dataset import Dataset, field
2 | from .registry import gather_datasets, register
3 |
--------------------------------------------------------------------------------
/amid/vs_seg/__init__.py:
--------------------------------------------------------------------------------
1 | from .dataset import VSSEG
2 | from .transforms import CanonicalMRIOrientation, Rescale
3 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea/
2 | __pycache__/
3 | .pytest_cache/
4 | *.egg-info/
5 | .coverage
6 | docs/build/
7 | docs/source/_*/
8 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include README.md
2 | include requirements.txt
3 | include LICENSE
4 | recursive-include amid *.py .bev.yml *.hash
5 |
--------------------------------------------------------------------------------
/amid/cc359/__init__.py:
--------------------------------------------------------------------------------
1 | from .dataset import CC359, open_nii_gz_file
2 | from .transforms import CanonicalMRIOrientation, Rescale
3 |
--------------------------------------------------------------------------------
/lint-requirements.txt:
--------------------------------------------------------------------------------
1 | black
2 | flake8<=5
3 | flake8-tidy-imports
4 | flake8-quotes
5 | flake8-bugbear
6 | flake8-comprehensions
7 | isort
8 |
--------------------------------------------------------------------------------
/amid/lidc/__init__.py:
--------------------------------------------------------------------------------
1 | from .dataset import LIDC
2 | from .transforms import CanonicalCTOrientation, Rescale
3 |
4 |
5 | # TODO: remove pylidc dependency
6 |
--------------------------------------------------------------------------------
/docs/javascript/tablesort.js:
--------------------------------------------------------------------------------
1 | document$.subscribe(function() {
2 | var tables = document.querySelectorAll("article table:not([class])")
3 | tables.forEach(function(table) {
4 | new Tablesort(table)
5 | })
6 | })
7 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | connectome>=0.10.0,<1.0.0
2 | numpy
3 | nibabel
4 | more-itertools
5 | dicom-csv
6 | tqdm
7 | pandas
8 | pylidc
9 | joblib
10 | deli<1.0.0
11 | scipy
12 | scikit-image
13 | pydicom
14 | imops
15 | highdicom
16 | SimpleITK
17 |
--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | mkdocs==1.5.3
2 | mkdocs-material==9.4.1
3 | mkdocstrings[python]==0.22.0
4 | mkdocs-jupyter==0.24.2
5 | mkdocs-exclude==1.0.2
6 | mkdocs-autorefs==0.4.1
7 | mike
8 | pandas
9 | tabulate
10 | ipython-genutils
11 | griffe==0.29.1
12 | mkdocs-material-extensions==1.2
13 | mkdocstrings-python==1.1.2
14 |
--------------------------------------------------------------------------------
/.github/workflows/lint.yml:
--------------------------------------------------------------------------------
1 | name: Linters
2 |
3 | on: [ pull_request ]
4 |
5 | jobs:
6 | test:
7 | runs-on: ubuntu-20.04
8 | steps:
9 | - uses: actions/checkout@v3
10 | - name: Set up Python
11 | uses: actions/setup-python@v4
12 | with:
13 | python-version: '3.11'
14 |
15 | - name: Check code style
16 | run: |
17 | pip install -r lint-requirements.txt
18 | flake8 .
19 | isort --check .
20 | black --check .
21 |
--------------------------------------------------------------------------------
/amid/cancer_500/typing.py:
--------------------------------------------------------------------------------
1 | from enum import Enum
2 | from typing import NamedTuple, Optional, Sequence
3 |
4 |
5 | class Texture(Enum):
6 | Solid, PartSolid, GroundGlass, Other = 0, 1, 2, 3
7 |
8 |
9 | class Review(Enum):
10 | Confirmed, ConfirmedPartially, Doubt, Rejected = 0, 1, 2, 3
11 |
12 |
13 | class Comment(Enum):
14 | Fibrosis, LymphNode, Calcium, Calcified, Bronchiectasis, Vessel = 0, 1, 2, 3, 4, 5
15 |
16 |
17 | class Cancer500Nodule(NamedTuple):
18 | center_voxel: Sequence[int]
19 | review: Review
20 | comment: Optional[Comment] = None
21 | diameter_mm: Optional[float] = None
22 | texture: Optional[Texture] = None
23 | malignancy: Optional[bool] = None
24 |
--------------------------------------------------------------------------------
/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | # W503: line break before binary operator is actually considered best-practice
3 | # E203: spaces around complex variables in slices are pep-right
4 | # F401: unused imports in __init__.py-s
5 | # I251: allow absolute imports in upper files
6 | # B019: @lru_cache for static methods is fine
7 | # B008: calling the function as default value is just part of the typer's interface
8 | # C408: for identifier-like fields dict(x=y) is just more concise
9 | ignore = W503,E203,B019,B028,C408,Q000
10 | per-file-ignores =
11 | **/__init__.py:F401
12 | scripts/*:I251
13 | tests/*:I251
14 | docs/*:I251
15 | amid/internals/cli.py:B008
16 | max-line-length = 120
17 | banned-modules =
18 | amid.* = Use relative imports
19 |
--------------------------------------------------------------------------------
/amid/rsna_bc/utils.py:
--------------------------------------------------------------------------------
1 | import contextlib
2 | import zipfile
3 | from pathlib import Path
4 |
5 | import pandas as pd
6 |
7 | from ..internals.dataset import register_field
8 |
9 |
10 | def csv_field(name, cast):
11 | def _loader(self, i):
12 | value = self._meta[i].get(name)
13 | if pd.isnull(value):
14 | return None
15 | if cast is not None:
16 | return cast(value)
17 | return value
18 |
19 | register_field('RSNABreastCancer', name, _loader)
20 | return _loader
21 |
22 |
23 | @contextlib.contextmanager
24 | def unpack(root: str, relative: str):
25 | unpacked = Path(root) / relative
26 |
27 | if unpacked.exists():
28 | yield unpacked, True
29 | else:
30 | with zipfile.Path(root, relative).open('rb') as unpacked:
31 | yield unpacked, False
32 |
--------------------------------------------------------------------------------
/amid/internals/dataset.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 | from typing import Sequence
3 |
4 | from connectome import ExternalBase
5 |
6 | from ..utils import PathOrStr
7 |
8 |
9 | class Dataset(ExternalBase):
10 | _path: str
11 | _fields: Sequence[str] = None
12 |
13 | def __init__(self, root: PathOrStr):
14 | fields = None
15 | if hasattr(self, '_fields'):
16 | fields = self._fields
17 |
18 | super().__init__(fields=fields, inputs=['id'], properties=['ids'], inherit=['id'])
19 | self.root = Path(root)
20 |
21 | @classmethod
22 | def __getversion__(cls):
23 | return 0
24 |
25 |
26 | _Fields = {}
27 |
28 |
29 | def register_field(cls, name, func):
30 | _Fields.setdefault(cls, {})[name] = func
31 |
32 |
33 | def field(func):
34 | cls, name = func.__qualname__.split('.')
35 | register_field(cls, name, func)
36 | return func
37 |
--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
1 | Awesome Medical Imaging Datasets (AMID) - a curated list of medical imaging datasets with unified interfaces
2 |
3 | ## Getting started
4 |
5 | Just import a dataset and start using it!
6 |
7 | Note that for some datasets you must manually download the raw files first.
8 |
9 | ```python
10 | from amid.verse import VerSe
11 |
12 | ds = VerSe()
13 | # get the available ids
14 | print(len(ds.ids))
15 | i = ds.ids[0]
16 |
17 | # use the available methods:
18 | # load the image and vertebrae masks
19 | x, y = ds.image(i), ds.masks(i)
20 | print(ds.split(i), ds.patient(i))
21 |
22 | # or get a namedTuple-like object:
23 | entry = ds(i)
24 | x, y = entry.image, entry.masks
25 | print(entry.split, entry.patient)
26 | ```
27 |
28 | ## Install
29 |
30 | Just get it from PyPi:
31 |
32 | ```shell
33 | pip install amid
34 | ```
35 |
36 | Or if you want to use version control features:
37 |
38 | ```shell
39 | git clone https://github.com/neuro-ml/amid.git
40 | cd amid && pip install -e .
41 | ```
42 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2022-2024 NeuroML Group
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/amid/transforms.py:
--------------------------------------------------------------------------------
1 | import nibabel
2 | import numpy as np
3 | from connectome import Output, Transform
4 |
5 |
6 | class SpacingFromAffine(Transform):
7 | __inherit__ = True
8 |
9 | def spacing(affine):
10 | return nibabel.affines.voxel_sizes(affine)
11 |
12 |
13 | class ParseAffineMatrix(Transform):
14 | """Splits affine matrix into separate methods for more convenient usage.
15 |
16 | Examples
17 | --------
18 | >>> dataset = Dataset()
19 | >>> dataset.voxel_spacing(id_)
20 | # FieldError
21 | >>> dataset = dataset >> ParseAffineMatrix()
22 | >>> dataset.voxel_spacing(id_)
23 | # array([1.5, 1.5, 1.5])
24 | """
25 |
26 | __inherit__ = True
27 |
28 | def origin(affine):
29 | """Constructs an origin tensor from the given affine matrix."""
30 | return affine[:-1, -1]
31 |
32 | def spacing(affine):
33 | """Constructs a voxel spacing tensor from the given orientation matrix."""
34 | return np.linalg.norm(affine[:3, :3], axis=0)
35 |
36 | def orientation(affine, spacing: Output):
37 | """Constructs an orientation matrix from the given affine matrix."""
38 | return np.divide(affine[:3, :3], spacing)
39 |
--------------------------------------------------------------------------------
/amid/__init__.py:
--------------------------------------------------------------------------------
1 | from connectome.cache import unstable_module
2 |
3 | from .__version__ import __version__
4 | from .amos import AMOS
5 | from .bimcv import BIMCVCovid19
6 | from .brats2021 import BraTS2021
7 | from .cancer_500 import MoscowCancer500
8 | from .cc359 import CC359
9 | from .cl_detection import CLDetection2023
10 | from .covid_1110 import MoscowCovid1110
11 | from .crlm import CRLM
12 | from .crossmoda import CrossMoDA
13 | from .ct_ich import CT_ICH
14 | from .curvas import CURVAS
15 | from .deeplesion import DeepLesion
16 | from .egd import EGD
17 | from .flare2022 import FLARE2022
18 | from .hcp import HCP
19 | from .lidc import LIDC
20 | from .lits import LiTS
21 | from .liver_medseg import LiverMedseg
22 | from .luna25 import LUNA25
23 | from .medseg9 import Medseg9
24 | from .midrc import MIDRC
25 | from .mood import MOOD
26 | from .mslub import MSLUB
27 | from .nlst import NLST
28 | from .nsclc import NSCLC
29 | from .ribfrac import RibFrac
30 | from .rsna_bc import RSNABreastCancer
31 | from .stanford_coca import StanfordCoCa
32 | from .totalsegmentator import Totalsegmentator
33 | from .upenn_gbm import UPENN_GBM
34 | from .verse import VerSe
35 | from .vs_seg import VSSEG
36 |
37 |
38 | unstable_module(__name__)
39 |
--------------------------------------------------------------------------------
/.github/workflows/docs.yml:
--------------------------------------------------------------------------------
1 | name: Docs
2 |
3 | on:
4 | release:
5 | types: [ released ]
6 | push:
7 | branches:
8 | - dev
9 |
10 | jobs:
11 | deploy:
12 | runs-on: ubuntu-latest
13 | steps:
14 | - uses: actions/checkout@v3
15 | - uses: fregante/setup-git-user@v1
16 | - run: git fetch origin gh-pages --depth=1
17 | - uses: actions/setup-python@v2
18 | with:
19 | python-version: '3.10'
20 | - name: Install
21 | run: |
22 | pip install -e .
23 | pip install -r docs/requirements.txt
24 |
25 | # release
26 | - id: get_version
27 | if: github.event_name == 'release'
28 | name: Get the release version
29 | uses: battila7/get-version-action@v2
30 |
31 | - name: Deploy the docs
32 | if: github.event_name == 'release'
33 | run: |
34 | PYTHONPATH=$PYTHONPATH:./docs mike deploy ${{ steps.get_version.outputs.version-without-v }} latest --push --update-aliases
35 |
36 | # dev
37 | # - name: Deploy the docs
38 | # if: ${{ github.ref == 'refs/heads/dev' }}
39 | # run: |
40 | # cd docs
41 | # VERSION=dev python fill_docs.py
42 | # cd ..
43 | # PYTHONPATH=$PYTHONPATH:./docs mike deploy dev --push
44 |
--------------------------------------------------------------------------------
/docs/datasets-api.md:
--------------------------------------------------------------------------------
1 | # Datasets API
2 |
3 | ::: amid.amos.dataset.AMOS
4 |
5 | ::: amid.bimcv.BIMCVCovid19
6 |
7 | ::: amid.brats2021.BraTS2021
8 |
9 | ::: amid.cc359.dataset.CC359
10 |
11 | ::: amid.cl_detection.CLDetection2023
12 |
13 | ::: amid.crlm.CRLM
14 |
15 | ::: amid.ct_ich.CT_ICH
16 |
17 | ::: amid.curvas.CURVAS
18 |
19 | ::: amid.crossmoda.CrossMoDA
20 |
21 | ::: amid.deeplesion.DeepLesion
22 |
23 | ::: amid.egd.EGD
24 |
25 | ::: amid.flare2022.FLARE2022
26 |
27 | ::: amid.hcp.HCP
28 |
29 | ::: amid.kits.KiTS23
30 |
31 | ::: amid.lidc.dataset.LIDC
32 |
33 | ::: amid.lits.dataset.LiTS
34 |
35 | ::: amid.liver_medseg.LiverMedseg
36 |
37 | ::: amid.midrc.MIDRC
38 |
39 | ::: amid.mood.MOOD
40 |
41 | ::: amid.msd.MSD
42 |
43 | ::: amid.mslub.dataset.MSLUB
44 |
45 | ::: amid.medseg9.Medseg9
46 |
47 | ::: amid.cancer_500.dataset.MoscowCancer500
48 |
49 | ::: amid.covid_1110.MoscowCovid1110
50 |
51 | ::: amid.nlst.NLST
52 |
53 | ::: amid.nsclc.NSCLC
54 |
55 | ::: amid.rsna_bc.dataset.RSNABreastCancer
56 |
57 | ::: amid.ribfrac.dataset.RibFrac
58 |
59 | ::: amid.stanford_coca.StanfordCoCa
60 |
61 | ::: amid.tbad.TBAD
62 |
63 | ::: amid.totalsegmentator.dataset.Totalsegmentator
64 |
65 | ::: amid.upenn_gbm.upenn_gbm.UPENN_GBM
66 |
67 | ::: amid.vs_seg.dataset.VSSEG
68 |
69 | ::: amid.verse.VerSe
70 |
71 |
--------------------------------------------------------------------------------
/amid/totalsegmentator/utils.py:
--------------------------------------------------------------------------------
1 | import nibabel
2 | import numpy as np
3 |
4 | from ..internals.dataset import register_field
5 | from ..utils import open_nii_gz_file, unpack
6 | from .const import ANATOMICAL_STRUCTURES, LABELS
7 |
8 |
9 | ARCHIVE_ROOT = 'Totalsegmentator_dataset'
10 |
11 |
12 | def label_loader(name):
13 | def loader(self, i):
14 | return self._meta[self._meta['image_id'] == i][name].item()
15 |
16 | register_field('Totalsegmentator', name, loader)
17 | return loader
18 |
19 |
20 | def mask_loader(name):
21 | def loader(self, i):
22 | file = f'{i}/segmentations/{name}.nii.gz'
23 |
24 | with unpack(self.root, file, ARCHIVE_ROOT, '.zip') as (unpacked, is_unpacked):
25 | if is_unpacked:
26 | return np.asarray(nibabel.load(unpacked).dataobj)
27 | else:
28 | with open_nii_gz_file(unpacked) as image:
29 | return np.asarray(image.dataobj)
30 |
31 | register_field('Totalsegmentator', name, loader)
32 | return loader
33 |
34 |
35 | def add_labels(scope):
36 | for label in LABELS:
37 | scope[label] = label_loader(label)
38 |
39 |
40 | def add_masks(scope):
41 | for anatomical_structure in ANATOMICAL_STRUCTURES:
42 | scope[anatomical_structure] = mask_loader(anatomical_structure)
43 |
--------------------------------------------------------------------------------
/docs/fill_readme.py:
--------------------------------------------------------------------------------
1 | import re
2 | from pathlib import Path
3 |
4 | import deli
5 | import pandas as pd
6 | from tqdm import tqdm
7 |
8 | from amid.internals.registry import gather_datasets, prepare_for_table
9 |
10 |
11 | file = Path(__file__).resolve().parent.parent / 'README.md'
12 | with open(file, 'r') as fd:
13 | content = fd.read()
14 |
15 | start = re.search(r'# Available datasets', content).end()
16 | stop = re.search(r'Check out \[our docs\]', content).start()
17 | raw_data = deli.load('/shared/amid/raw.json')
18 | cache_path = '/shared/amid/cache.json'
19 | cache = deli.load(cache_path)
20 |
21 | records = []
22 | for name, (cls, module, description) in tqdm(list(gather_datasets().items())): # noqa
23 | if name in cache:
24 | count = cache[name]
25 | else:
26 | count = len(cls(root=raw_data[name]).ids)
27 | cache[name] = count
28 | deli.save(cache, cache_path)
29 | records.append(prepare_for_table(name, count, module, description, 'latest'))
30 |
31 | table = pd.DataFrame.from_records(records).fillna('')
32 | table.columns = [x.replace('_', ' ').capitalize() for x in table.columns]
33 | table = table[['Name', 'Entries', 'Body region', 'Modality']].to_markdown(index=False)
34 | content = f'{content[:start]}\n\n{table}\n\n{content[stop:]}'
35 |
36 | with open(file, 'w') as fd:
37 | fd.write(content)
38 |
--------------------------------------------------------------------------------
/amid/lidc/transforms.py:
--------------------------------------------------------------------------------
1 | from typing import Sequence, Union
2 |
3 | import numpy as np
4 | from connectome import Transform
5 | from imops import zoom
6 |
7 | from ..utils import Numeric
8 |
9 |
10 | class CanonicalCTOrientation(Transform):
11 | __exclude__ = ('nodules', 'nodules_masks')
12 |
13 | def image(image):
14 | return image[..., ::-1]
15 |
16 | def cancer(cancer):
17 | return cancer[..., ::-1]
18 |
19 |
20 | class Rescale(Transform):
21 | __exclude__ = ('pixel_spacing', 'slice_locations', 'voxel_spacing', 'orientation_matrix')
22 |
23 | _new_spacing: Union[Sequence[Numeric], Numeric]
24 | _order: int = 1
25 |
26 | def _spacing(spacing, _new_spacing):
27 | _new_spacing = np.broadcast_to(_new_spacing, len(spacing)).copy()
28 | _new_spacing[np.isnan(_new_spacing)] = np.array(spacing)[np.isnan(_new_spacing)]
29 | return tuple(_new_spacing.tolist())
30 |
31 | def _scale_factor(spacing, _spacing):
32 | return np.float32(spacing) / np.float32(_spacing)
33 |
34 | def spacing(_spacing):
35 | return _spacing
36 |
37 | def image(image, _scale_factor, _order):
38 | return zoom(image.astype(np.float32), _scale_factor, order=_order)
39 |
40 | def cancer(cancer, _scale_factor, _order):
41 | return zoom(cancer.astype(np.float32), _scale_factor, order=_order) > 0.5
42 |
--------------------------------------------------------------------------------
/docs/fill_docs.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 |
3 | import deli
4 | import pandas as pd
5 | from tqdm import tqdm
6 |
7 | from amid.__version__ import __version__ as version
8 | from amid.internals.registry import gather_datasets, prepare_for_table
9 |
10 |
11 | # version = os.environ.get('VERSION')
12 | # if not version:
13 | # raise RuntimeError('Please define the "VERSION" env variable')
14 | raw_data = deli.load('/shared/amid/raw.json')
15 | cache_path = '/shared/amid/cache.json'
16 | cache = deli.load(cache_path)
17 |
18 | records = []
19 | root = Path(__file__).resolve().parent
20 | with open(root / 'datasets-api.md', 'w') as file:
21 | file.write('# Datasets API\n\n')
22 | for name, (cls, module, description) in tqdm(list(gather_datasets().items())):
23 | file.write(f'::: {module}.{name}\n\n')
24 | if name in cache:
25 | count = cache[name]
26 | else:
27 | count = len(cls(root=raw_data[name]).ids)
28 | cache[name] = count
29 | deli.save(cache, cache_path)
30 |
31 | records.append(prepare_for_table(name, count, module, description, version))
32 |
33 | table = pd.DataFrame.from_records(records).fillna('')
34 | table.columns = [x.replace('_', ' ').capitalize() for x in table.columns]
35 | with open(root / 'datasets.md', 'w') as file:
36 | file.write('# Datasets\n\n')
37 | file.write(table.to_markdown(index=False))
38 |
--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
1 | name: Release
2 |
3 | on:
4 | release:
5 | types: [ released ]
6 |
7 | env:
8 | MODULE_NAME: amid
9 |
10 | jobs:
11 | release:
12 | runs-on: ubuntu-latest
13 |
14 | steps:
15 | - uses: actions/checkout@v3
16 | - name: Set up Python
17 | uses: actions/setup-python@v4
18 | with:
19 | python-version: '3.10'
20 |
21 | - id: get_version
22 | name: Get the release version
23 | uses: battila7/get-version-action@v2
24 |
25 | - name: Check the version and build the package
26 | run: |
27 | RELEASE=${{ steps.get_version.outputs.version-without-v }}
28 | VERSION=$(python -c "from pathlib import Path; import runpy; folder, = {d.parent for d in Path().resolve().glob('*/__init__.py') if d.parent.is_dir() and (d.parent / '__version__.py').exists()}; print(runpy.run_path(folder / '__version__.py')['__version__'])")
29 | MATCH=$(pip index versions $MODULE_NAME | grep "Available versions:" | grep $VERSION) || echo
30 | echo $MATCH
31 | if [ "$GITHUB_BASE_REF" = "master" ] && [ "$MATCH" != "" ]; then echo "Version $VERSION already present" && exit 1; fi
32 | if [ "$VERSION" != "$RELEASE" ]; then echo "$VERSION vs $RELEASE" && exit 1; fi
33 | pip install build
34 | python -m build --sdist
35 |
36 | - name: Publish to PyPi
37 | uses: pypa/gh-action-pypi-publish@master
38 | with:
39 | password: ${{ secrets.PYPI_API_TOKEN }}
40 |
--------------------------------------------------------------------------------
/amid/internals/licenses.py:
--------------------------------------------------------------------------------
1 | from typing import NamedTuple
2 |
3 |
4 | class License(NamedTuple):
5 | name: str
6 | url: str
7 |
8 |
9 | CC0_10 = License(name='CC0 1.0', url='https://creativecommons.org/publicdomain/zero/1.0/')
10 | CC_BY_30 = License(
11 | name='CC BY 3.0',
12 | url='https://creativecommons.org/licenses/by/3.0/',
13 | )
14 | CC_BY_40 = License(
15 | name='CC BY 4.0',
16 | url='https://creativecommons.org/licenses/by/4.0/',
17 | )
18 | CC_BYNC_40 = License(
19 | name='CC BY-NC 4.0',
20 | url='https://creativecommons.org/licenses/by-nc/4.0/',
21 | )
22 | CC_BYND_40 = License(
23 | name='CC BY-ND 4.0',
24 | url='https://creativecommons.org/licenses/by-nd/4.0/',
25 | )
26 | CC_BYNCND_40 = License(
27 | name='CC BY-NC-ND 4.0',
28 | url='https://creativecommons.org/licenses/by-nc-nd/4.0/',
29 | )
30 | CC_BYSA_40 = License(
31 | name='CC BY-SA 4.0',
32 | url='https://creativecommons.org/licenses/by-sa/4.0/',
33 | )
34 | CC_BYNCSA_40 = License(
35 | name='CC BY-NC-SA 4.0',
36 | url='https://creativecommons.org/licenses/by-nc-sa/4.0/',
37 | )
38 |
39 | PhysioNet_RHD_150 = License(
40 | name='PhysioNet Restricted Health Data License 1.5.0',
41 | url='https://www.physionet.org/about/licenses/physionet-restricted-health-data-license-150/',
42 | )
43 |
44 | StanfordDSResearch = License(
45 | name='Stanford University Dataset Research Use Agreement',
46 | url='https://stanfordaimi.azurewebsites.net/datasets/e8ca74dc-8dd4-4340-815a-60b41f6cb2aa', # TODO: separate link
47 | )
48 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [project]
2 | name = "amid"
3 | dynamic = ["version", "dependencies"]
4 | description = "A curated list of medical imaging datasets with unified interfaces"
5 | readme = "README.md"
6 | requires-python = ">=3.8"
7 | license = { file = "LICENSE" }
8 | keywords = ["medical imaging", "dataset"]
9 | authors = [
10 | { name = "NeuroML Group", email = "max@ira-labs.com" }
11 | ]
12 | classifiers = [
13 | "Development Status :: 4 - Beta",
14 | "License :: OSI Approved :: MIT License",
15 | "Programming Language :: Python :: 3",
16 | "Programming Language :: Python :: 3.8",
17 | "Programming Language :: Python :: 3.9",
18 | "Programming Language :: Python :: 3.10",
19 | "Programming Language :: Python :: 3.11",
20 | "Programming Language :: Python :: 3 :: Only",
21 | ]
22 |
23 | [project.urls]
24 | "Homepage" = "https://github.com/neuro-ml/amid"
25 | "Issues" = "https://github.com/neuro-ml/amid/issues"
26 | "Source" = "https://github.com/neuro-ml/amid"
27 | "Docs" = "https://neuro-ml.github.io/amid"
28 |
29 | [build-system]
30 | requires = ["setuptools>=43.0.0", "wheel"]
31 | build-backend = "setuptools.build_meta"
32 |
33 | [tool.setuptools.packages.find]
34 | include = ["amid"]
35 |
36 | [tool.setuptools.dynamic]
37 | version = { attr = "amid.__version__.__version__" }
38 | dependencies = { file = "requirements.txt" }
39 |
40 | [tool.pytest.ini_options]
41 | markers = [
42 | "raw: tests that require the raw files storage",
43 | ]
44 |
45 | [tool.black]
46 | line-length = 120
47 | skip-string-normalization = true
48 |
49 | [tool.isort]
50 | line_length = 120
51 | lines_after_imports = 2
52 | profile = 'black'
53 | combine_as_imports = true
54 |
--------------------------------------------------------------------------------
/amid/lits/transforms.py:
--------------------------------------------------------------------------------
1 | from typing import Sequence, Union
2 |
3 | import numpy as np
4 | from connectome import Transform
5 | from imops import zoom
6 |
7 | from ..utils import Numeric, propagate_none
8 |
9 |
10 | class CanonicalCTOrientation(Transform):
11 | __inherit__ = True
12 |
13 | def image(image):
14 | return np.transpose(image, (1, 0, 2))[::-1, :, ::-1]
15 |
16 | def mask(mask):
17 | return np.transpose(mask, (1, 0, 2))[::-1, :, ::-1]
18 |
19 | def spacing(spacing):
20 | return tuple(np.array(spacing)[[1, 0, 2]].tolist())
21 |
22 |
23 | class Rescale(Transform):
24 | __exclude__ = (
25 | 'voxel_spacing',
26 | 'affine',
27 | )
28 |
29 | _new_spacing: Union[Sequence[Numeric], Numeric]
30 | _order: int = 1
31 |
32 | def _spacing(spacing, _new_spacing):
33 | _new_spacing = np.broadcast_to(_new_spacing, len(spacing)).copy()
34 | _new_spacing[np.isnan(_new_spacing)] = np.array(spacing)[np.isnan(_new_spacing)]
35 | return tuple(_new_spacing.tolist())
36 |
37 | def _scale_factor(spacing, _spacing):
38 | return np.float32(spacing) / np.float32(_spacing)
39 |
40 | def spacing(_spacing):
41 | return _spacing
42 |
43 | def image(image, _scale_factor, _order):
44 | return zoom(image.astype(np.float32), _scale_factor, order=_order)
45 |
46 | @propagate_none
47 | def mask(mask, _scale_factor, _order):
48 | onehot = np.arange(mask.max() + 1) == mask[..., None]
49 | onehot = onehot.astype(mask.dtype).transpose(3, 0, 1, 2)
50 | out = np.array(zoom(onehot.astype(np.float32), _scale_factor, axis=(1, 2, 3)) > 0.5, dtype=mask.dtype)
51 | labels = out.argmax(axis=0)
52 | return labels
53 |
--------------------------------------------------------------------------------
/amid/lidc/typing.py:
--------------------------------------------------------------------------------
1 | from enum import Enum
2 | from typing import NamedTuple, Optional, Sequence
3 |
4 | import numpy as np
5 |
6 |
7 | class Calcification(Enum):
8 | Popcorn, Laminated, Solid, NonCentral, Central, Absent = 1, 2, 3, 4, 5, 6
9 |
10 |
11 | class InternalStructure(Enum):
12 | SoftTissue, Fluid, Fat, Air = 1, 2, 3, 4
13 |
14 |
15 | class Lobulation(Enum):
16 | NoLobulation, NearlyNoLobulation, MediumLobulation, NearMarkedLobulation, MarkedLobulation = 1, 2, 3, 4, 5
17 |
18 |
19 | class Malignancy(Enum):
20 | HighlyUnlikely, ModeratelyUnlikely, Indeterminate, ModeratelySuspicious, HighlySuspicious = 1, 2, 3, 4, 5
21 |
22 |
23 | class Sphericity(Enum):
24 | Linear, OvoidLinear, Ovoid, OvoidRound, Round = 1, 2, 3, 4, 5
25 |
26 |
27 | class Spiculation(Enum):
28 | NoSpiculation, NearlyNoSpiculation, MediumSpiculation, NearMarkedSpiculation, MarkedSpiculation = 1, 2, 3, 4, 5
29 |
30 |
31 | class Subtlety(Enum):
32 | ExtremelySubtle, ModeratelySubtle, FairlySubtle, ModeratelyObvious, Obvious = 1, 2, 3, 4, 5
33 |
34 |
35 | class Texture(Enum):
36 | NonSolidGGO, NonSolidMixed, PartSolidMixed, SolidMixed, Solid = 1, 2, 3, 4, 5
37 |
38 |
39 | class LIDCNodule(NamedTuple):
40 | center_voxel: Sequence[float]
41 | bbox: np.ndarray
42 | diameter_mm: float
43 | surface_area_mm2: float
44 | volume_mm3: float
45 | calcification: Optional[Calcification] = None
46 | internal_structure: Optional[InternalStructure] = None
47 | lobulation: Optional[Lobulation] = None
48 | malignancy: Optional[Malignancy] = None
49 | sphericity: Optional[Sphericity] = None
50 | spiculation: Optional[Spiculation] = None
51 | subtlety: Optional[Subtlety] = None
52 | texture: Optional[Texture] = None
53 |
--------------------------------------------------------------------------------
/amid/lidc/nodules.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from pylidc import Annotation
3 |
4 | from .typing import (
5 | Calcification,
6 | InternalStructure,
7 | LIDCNodule,
8 | Lobulation,
9 | Malignancy,
10 | Sphericity,
11 | Spiculation,
12 | Subtlety,
13 | Texture,
14 | )
15 |
16 |
17 | def get_nodule(ann: Annotation) -> LIDCNodule:
18 | def init_enum(enum_class, value):
19 | try:
20 | return enum_class(value)
21 | except ValueError:
22 | pass
23 |
24 | bbox = ann.bbox_matrix().T
25 | bbox[1] = bbox[1] + 1
26 |
27 | return LIDCNodule(
28 | center_voxel=ann.centroid,
29 | bbox=bbox,
30 | diameter_mm=ann.diameter,
31 | surface_area_mm2=ann.surface_area,
32 | volume_mm3=ann.volume,
33 | calcification=init_enum(Calcification, ann.calcification),
34 | internal_structure=init_enum(InternalStructure, ann.internalStructure),
35 | lobulation=init_enum(Lobulation, ann.lobulation),
36 | malignancy=init_enum(Malignancy, ann.malignancy),
37 | sphericity=init_enum(Sphericity, ann.sphericity),
38 | spiculation=init_enum(Spiculation, ann.spiculation),
39 | subtlety=init_enum(Subtlety, ann.subtlety),
40 | texture=init_enum(Texture, ann.texture),
41 | )
42 |
43 |
44 | def flip_nodule(nodule: LIDCNodule, n_slices: int) -> LIDCNodule:
45 | bbox = nodule.bbox.copy()
46 | start_slice, stop_slice = bbox[:, -1]
47 | bbox[:, -1] = np.array([n_slices - stop_slice, n_slices - start_slice])
48 |
49 | center_voxel = nodule.center_voxel
50 | center_voxel[-1] = n_slices - center_voxel[-1]
51 |
52 | return nodule._replace(
53 | center_voxel=center_voxel,
54 | bbox=bbox,
55 | )
56 |
--------------------------------------------------------------------------------
/amid/upenn_gbm/data_classes.py:
--------------------------------------------------------------------------------
1 | from typing import NamedTuple
2 |
3 |
4 | class ClinicalInfo(NamedTuple):
5 | gender: str
6 | age_at_scan_years: float
7 | survival_from_surgery_days: int
8 | idh1: str
9 | mgmt: str
10 | kps: str
11 | gtr_over90percent: str
12 | time_since_baseline_preop: int
13 | psp_tp_score: float
14 |
15 |
16 | class AcquisitionInfo(NamedTuple):
17 | manufacturer: str
18 | model: str
19 | magnetic_field_strength: float
20 | t1_imaging_frequency: float
21 | t1_repetition_time: float
22 | t1_echo_time: float
23 | t1_inversion_time: float
24 | t1_flip_angle: float
25 | t1_pixel_spacing: str
26 | t1_slice_thickness: float
27 | t1gd_imaging_frequency: float
28 | t1gd_repetition_time: float
29 | t1gd_echo_time: float
30 | t1gd_inversion_time: float
31 | t1gd_flip_angle: float
32 | t1gd_pixel_spacing: str
33 | t1gd_slice_thickness: float
34 | t2_imaging_frequency: float
35 | t2_repetition_time: float
36 | t2_echo_time: float
37 | t2_flip_angle: float
38 | t2_pixel_spacing: str
39 | t2_slice_thickness: float
40 | flair_imaging_frequency: float
41 | flair_repetition_time: float
42 | flair_echo_time: float
43 | flair_inversion_time: float
44 | flair_flip_angle: float
45 | flair_pixel_spacing: str
46 | flair_slice_thickness: float
47 | dti_imaging_frequency: float
48 | dti_repetition_time: float
49 | dti_echo_time: float
50 | dti_flip_angle: float
51 | dti_pixel_spacing: str
52 | dti_slice_thickness: float
53 | dsc_imaging_frequency: float
54 | dsc_repetition_time: float
55 | dsc_echo_time: float
56 | dsc_flip_angle: float
57 | dsc_pixel_spacing: str
58 | dsc_slice_thickness: float
59 |
--------------------------------------------------------------------------------
/tests/test_consistency.py:
--------------------------------------------------------------------------------
1 | import pickle
2 |
3 | import numpy as np
4 | import pytest
5 |
6 | from amid.internals import gather_datasets
7 |
8 |
9 | MAPPING = gather_datasets()
10 | DATASETS = [x[0] for x in MAPPING.values()]
11 | NAMES = list(MAPPING)
12 |
13 |
14 | @pytest.mark.raw
15 | @pytest.mark.parametrize('cls', DATASETS, ids=NAMES)
16 | def test_ids_availability(cls):
17 | assert len(cls().ids) > 0
18 |
19 |
20 | @pytest.mark.raw
21 | @pytest.mark.parametrize('cls', DATASETS, ids=NAMES)
22 | def test_pickleable(cls):
23 | raw = cls()[0]
24 | cached = cls()
25 | fields = dir(raw)
26 |
27 | for ds in raw, cached:
28 | loader = ds._compile(fields)
29 | pickle.dumps(loader)
30 |
31 | f = cached._compile('ids')
32 | raw = pickle.dumps(f)
33 | g = pickle.loads(raw)
34 | assert f() == g()
35 |
36 |
37 | # @pytest.mark.raw
38 | # @pytest.mark.parametrize('cls', ROOT_MAPPING, ids=[cls.__name__ for cls in ROOT_MAPPING])
39 | # def test_cache_consistency(cls):
40 | # raw = cls(root=ROOT_MAPPING[cls])
41 | # cached = raw.cached()
42 | # fields = {x.name for x in raw._container.outputs} - {'ids', 'id', 'cached'}
43 | #
44 | # ids = raw.ids
45 | # assert ids == cached.ids
46 | # for i in ids:
47 | # for field in fields:
48 | # compare(getattr(raw, field)(i), getattr(cached, field)(i))
49 |
50 |
51 | # TODO: find a package for this
52 | def compare(x, y):
53 | assert type(x) == type(y)
54 | if isinstance(x, (str, int, float, bytes)):
55 | assert x == y
56 | elif isinstance(x, (np.ndarray, np.generic)):
57 | np.testing.assert_allclose(x, y)
58 | elif isinstance(x, (list, tuple)):
59 | list(map(compare, x, y))
60 | else:
61 | raise TypeError(type(x))
62 |
--------------------------------------------------------------------------------
/docs/javascript/tablesort.filesize.js:
--------------------------------------------------------------------------------
1 | // Filesizes. e.g. '5.35 K', '10 MB', '12.45 GB', or '4.67 TiB'
2 | (function(){
3 | var compareNumber = function(a, b) {
4 | a = parseFloat(a);
5 | b = parseFloat(b);
6 |
7 | a = isNaN(a) ? 0 : a;
8 | b = isNaN(b) ? 0 : b;
9 |
10 | return a - b;
11 | },
12 |
13 | cleanNumber = function(i) {
14 | return i.replace(',', '.').replace(/[^\-?0-9.]/g, '');
15 | },
16 |
17 | // Returns suffix multiplier
18 | // Ex. suffix2num('KB') -> 1000
19 | // Ex. suffix2num('KiB') -> 1024
20 | suffix2num = function(suffix) {
21 | suffix = suffix.toLowerCase();
22 | var base = suffix[1] === 'i' ? 1024 : 1000;
23 |
24 | switch(suffix[0]) {
25 | case 'k':
26 | return Math.pow(base, 2);
27 | case 'm':
28 | return Math.pow(base, 3);
29 | case 'g':
30 | return Math.pow(base, 4);
31 | case 't':
32 | return Math.pow(base, 5);
33 | case 'p':
34 | return Math.pow(base, 6);
35 | case 'e':
36 | return Math.pow(base, 7);
37 | case 'z':
38 | return Math.pow(base, 8);
39 | case 'y':
40 | return Math.pow(base, 9);
41 | default:
42 | return base;
43 | }
44 | },
45 |
46 | // Converts filesize to bytes
47 | // Ex. filesize2num('123 KB') -> 123000
48 | // Ex. filesize2num('123 KiB') -> 125952
49 | filesize2num = function(filesize) {
50 | var matches = filesize.match(/^(\d+([.,]\d+)?) ?((K|M|G|T|P|E|Z|Y|B$)i?B?)$/i);
51 |
52 | var num = parseFloat(cleanNumber(matches[1])),
53 | suffix = matches[3];
54 |
55 | return num * suffix2num(suffix);
56 | };
57 |
58 | Tablesort.extend('filesize', function(item) {
59 | return /^\d+([.,]\d+)? ?(K|M|G|T|P|E|Z|Y|B$)i?B?$/i.test(item);
60 | }, function(a, b) {
61 | a = filesize2num(a);
62 | b = filesize2num(b);
63 |
64 | return compareNumber(b, a);
65 | });
66 | }());
67 |
--------------------------------------------------------------------------------
/amid/vs_seg/transforms.py:
--------------------------------------------------------------------------------
1 | from typing import Sequence, Union
2 |
3 | import numpy as np
4 | from connectome import Transform
5 | from imops import zoom
6 |
7 | from ..utils import Numeric, propagate_none
8 |
9 |
10 | class CanonicalMRIOrientation(Transform):
11 | __inherit__ = True
12 |
13 | def image(image):
14 | return np.transpose(image, (1, 0, 2))[..., ::-1]
15 |
16 | def spacing(spacing):
17 | return tuple(np.array(spacing)[[1, 0, 2]].tolist())
18 |
19 | @propagate_none
20 | def schwannoma(schwannoma):
21 | return np.transpose(schwannoma, (1, 0, 2))[..., ::-1]
22 |
23 | @propagate_none
24 | def cochlea(cochlea):
25 | return np.transpose(cochlea, (1, 0, 2))[..., ::-1]
26 |
27 | @propagate_none
28 | def meningioma(meningioma):
29 | return np.transpose(meningioma, (1, 0, 2))[..., ::-1]
30 |
31 |
32 | class Rescale(Transform):
33 | __inherit__ = True
34 |
35 | _new_spacing: Union[Sequence[Numeric], Numeric]
36 | _order: int = 1
37 |
38 | def _spacing(spacing, _new_spacing):
39 | _new_spacing = np.broadcast_to(_new_spacing, len(spacing)).copy()
40 | _new_spacing[np.isnan(_new_spacing)] = np.array(spacing)[np.isnan(_new_spacing)]
41 | return tuple(_new_spacing.tolist())
42 |
43 | def _scale_factor(spacing, _spacing):
44 | return np.float32(spacing) / np.float32(_spacing)
45 |
46 | def spacing(_spacing):
47 | return _spacing
48 |
49 | def image(image, _scale_factor, _order):
50 | return zoom(image.astype(np.float32), _scale_factor, order=_order)
51 |
52 | @propagate_none
53 | def schwannoma(schwannoma, _scale_factor, _order):
54 | return zoom(schwannoma.astype(np.float32), _scale_factor, order=_order) > 0.5
55 |
56 | @propagate_none
57 | def cochlea(cochlea, _scale_factor, _order):
58 | return zoom(cochlea.astype(np.float32), _scale_factor, order=_order) > 0.5
59 |
60 | @propagate_none
61 | def meningioma(meningioma, _scale_factor, _order):
62 | return zoom(meningioma.astype(np.float32), _scale_factor, order=_order) > 0.5
63 |
--------------------------------------------------------------------------------
/mkdocs.yml:
--------------------------------------------------------------------------------
1 | site_name: 'AMID: Awesome Medical Imaging Datasets'
2 | site_url: https://neuro-ml.github.io/amid
3 | repo_url: https://github.com/neuro-ml/amid
4 |
5 | plugins:
6 | - mkdocs-jupyter
7 | - search
8 | - autorefs
9 | - mike:
10 | canonical_version: latest
11 | - mkdocstrings:
12 | default_handler: python_connectome
13 | handlers:
14 | python_connectome:
15 | options:
16 | docstring_style: numpy
17 | merge_init_into_class: true
18 | members_order: source
19 | show_if_no_docstring: true
20 | show_bases: false
21 | show_signature_annotations: true
22 | show_root_heading: true
23 | show_source: false
24 | heading_level: 3
25 | - exclude:
26 | glob:
27 | - '**/python_connectome.py'
28 | - 'fill_docs.py'
29 | - 'fill_readme.py'
30 |
31 | theme:
32 | name: material
33 | icon:
34 | repo: fontawesome/brands/github-alt
35 | palette:
36 | - media: "(prefers-color-scheme: dark)"
37 | scheme: slate
38 | toggle:
39 | icon: material/lightbulb-outline
40 | name: Switch to light mode
41 | - media: "(prefers-color-scheme: light)"
42 | scheme: default
43 | toggle:
44 | icon: material/lightbulb
45 | name: Switch to dark mode
46 |
47 | markdown_extensions:
48 | - admonition
49 | - tables
50 | - pymdownx.highlight:
51 | anchor_linenums: true
52 | - pymdownx.inlinehilite
53 | - pymdownx.snippets
54 | - pymdownx.details
55 | - pymdownx.superfences
56 | - toc:
57 | toc_depth: 3
58 |
59 | extra:
60 | version:
61 | provider: mike
62 |
63 | extra_javascript:
64 | - https://unpkg.com/tablesort@5.3.0/dist/tablesort.min.js
65 | - https://unpkg.com/tablesort@5.3.0/dist/sorts/tablesort.number.min.js
66 | - https://unpkg.com/tablesort@5.3.0/dist/sorts/tablesort.date.min.js
67 | - https://unpkg.com/tablesort@5.3.0/dist/sorts/tablesort.dotsep.min.js
68 | - https://unpkg.com/tablesort@5.3.0/dist/sorts/tablesort.monthname.min.js
69 | - javascript/tablesort.filesize.js
70 | - javascript/tablesort.js
71 |
--------------------------------------------------------------------------------
/amid/cc359/transforms.py:
--------------------------------------------------------------------------------
1 | from typing import Sequence, Union
2 |
3 | import numpy as np
4 | from connectome import Transform
5 | from imops import zoom
6 |
7 | from ..utils import Numeric, propagate_none
8 |
9 |
10 | class CanonicalMRIOrientation(Transform):
11 | __inherit__ = True
12 |
13 | def image(image):
14 | return np.transpose(image, (1, 0, 2))[::-1, :, ::-1]
15 |
16 | def spacing(spacing):
17 | return tuple(np.array(spacing)[[1, 0, 2]].tolist())
18 |
19 | def brain(brain):
20 | return np.transpose(brain, (1, 0, 2))[::-1, :, ::-1]
21 |
22 | @propagate_none
23 | def hippocampus(hippocampus):
24 | return np.transpose(hippocampus, (1, 0, 2))[::-1, :, ::-1]
25 |
26 | @propagate_none
27 | def wm_gm_csf(wm_gm_csf):
28 | return np.transpose(wm_gm_csf, (1, 0, 2))[::-1, :, ::-1]
29 |
30 |
31 | class Rescale(Transform):
32 | __inherit__ = True
33 |
34 | _new_spacing: Union[Sequence[Numeric], Numeric]
35 | _order: int = 1
36 |
37 | def _spacing(spacing, _new_spacing):
38 | _new_spacing = np.broadcast_to(_new_spacing, len(spacing)).copy()
39 | _new_spacing[np.isnan(_new_spacing)] = np.array(spacing)[np.isnan(_new_spacing)]
40 | return tuple(_new_spacing.tolist())
41 |
42 | def _scale_factor(spacing, _spacing):
43 | return np.float32(spacing) / np.float32(_spacing)
44 |
45 | def spacing(_spacing):
46 | return _spacing
47 |
48 | def image(image, _scale_factor, _order):
49 | return zoom(image.astype(np.float32), _scale_factor, order=_order)
50 |
51 | def brain(brain, _scale_factor, _order):
52 | return zoom(brain.astype(np.float32), _scale_factor, order=_order) > 0.5
53 |
54 | @propagate_none
55 | def hippocampus(hippocampus, _scale_factor, _order):
56 | return zoom(hippocampus.astype(np.float32), _scale_factor, order=_order) > 0.5
57 |
58 | @propagate_none
59 | def wm_gm_csf(wm_gm_csf, _scale_factor, _order):
60 | onehot = np.arange(wm_gm_csf.max() + 1) == wm_gm_csf[..., None]
61 | onehot = onehot.astype(wm_gm_csf.dtype).transpose(3, 0, 1, 2)
62 | out = np.array(zoom(onehot.astype(np.float32), _scale_factor, axis=(1, 2, 3)) > 0.5, dtype=wm_gm_csf.dtype)
63 | labels = out.argmax(axis=0)
64 | return labels
65 |
--------------------------------------------------------------------------------
/amid/internals/registry.py:
--------------------------------------------------------------------------------
1 | import importlib
2 | import inspect
3 | from collections import OrderedDict
4 | from pathlib import Path
5 | from typing import NamedTuple, Type
6 |
7 | import pandas as pd
8 |
9 | from .licenses import License
10 |
11 |
12 | _REGISTRY = {}
13 |
14 |
15 | class Description(NamedTuple):
16 | body_region: str = None
17 | license: str = None
18 | link: str = None
19 | modality: str = None
20 | prep_data_size: str = None
21 | raw_data_size: str = None
22 | task: str = None
23 |
24 |
25 | def register(**kwargs):
26 | def decorator(cls: Type):
27 | _register(cls, cls.__name__, description, 2)
28 | # cls._path = path
29 | return cls
30 |
31 | # path = kwargs.pop('path')
32 | description = Description(**kwargs)
33 | return decorator
34 |
35 |
36 | def _register(cls, name, description, level):
37 | module = inspect.getmodule(inspect.stack()[level][0]).__name__
38 | assert name not in _REGISTRY, name
39 | _REGISTRY[name] = cls, module, description
40 |
41 |
42 | def gather_datasets():
43 | for f in Path(__file__).resolve().parent.parent.iterdir():
44 | module_name = f'amid.{f.stem}'
45 | importlib.import_module(module_name)
46 |
47 | return OrderedDict((k, _REGISTRY[k]) for k in sorted(_REGISTRY))
48 |
49 |
50 | def prepare_for_table(name, count, module, description, version):
51 | def stringify(x):
52 | if pd.isnull(x):
53 | return ''
54 | if isinstance(x, str):
55 | return x
56 | if isinstance(x, (list, tuple)):
57 | return ', '.join(x)
58 | return x
59 |
60 | entry = {'name': name, 'entries': count}
61 | entry.update({k: v for k, v in description._asdict().items() if not pd.isnull(v)})
62 | license_ = entry.get('license', None)
63 | if license_:
64 | if isinstance(license_, License):
65 | license_ = f'{license_.name}'
66 | entry['license'] = license_
67 |
68 | link = entry.pop('link', None)
69 | if link is not None:
70 | entry['link'] = f'Source'
71 |
72 | entry['name'] = f'{name}'
73 | return {k: stringify(v) for k, v in entry.items()}
74 |
--------------------------------------------------------------------------------
/amid/hcp.py:
--------------------------------------------------------------------------------
1 | import gzip
2 | import zipfile
3 | from pathlib import Path
4 | from zipfile import ZipFile
5 |
6 | import nibabel as nb
7 | import numpy as np
8 |
9 | from .internals import Dataset, field, licenses, register
10 |
11 |
12 | @register(
13 | body_region='Head',
14 | license=licenses.CC_BYNCND_40,
15 | link='https://www.humanconnectome.org/study/hcp-young-adult/document/1200-subjects-data-release',
16 | modality='MRI',
17 | prep_data_size='125G',
18 | raw_data_size='125G',
19 | task='Segmentation',
20 | )
21 | class HCP(Dataset):
22 | @property
23 | def ids(self):
24 | result = set()
25 | for archive in self.root.glob('*.zip'):
26 | with ZipFile(archive) as zf:
27 | for zipinfo in zf.infolist():
28 | if zipinfo.is_dir():
29 | continue
30 | result.add(zipinfo.filename.split('/')[0])
31 |
32 | return tuple(sorted(result))
33 |
34 | def _file(self, i):
35 | for archive in self.root.glob('*.zip'):
36 | with ZipFile(archive) as zf:
37 | for zipinfo in zf.infolist():
38 | if zipinfo.is_dir():
39 | continue
40 | file = Path(zipinfo.filename)
41 | if (i in file.stem) and ('T1w_MPR1' in file.stem):
42 | return zipfile.Path(str(archive), str(file))
43 |
44 | @field
45 | def image(self, i) -> np.ndarray:
46 | with self._file(i).open('rb') as opened:
47 | with gzip.GzipFile(fileobj=opened) as nii:
48 | nii = nb.FileHolder(fileobj=nii)
49 | image = nb.Nifti1Image.from_file_map({'header': nii, 'image': nii})
50 | return np.int16(image.get_fdata())
51 |
52 | @field
53 | def affine(self, i) -> np.ndarray:
54 | with self._file(i).open('rb') as opened:
55 | with gzip.GzipFile(fileobj=opened) as nii:
56 | nii = nb.FileHolder(fileobj=nii)
57 | image = nb.Nifti1Image.from_file_map({'header': nii, 'image': nii})
58 | return image.affine
59 |
60 | def spacing(self, i):
61 | with self._file(i).open('rb') as opened:
62 | with gzip.GzipFile(fileobj=opened) as nii:
63 | nii = nb.FileHolder(fileobj=nii)
64 | image = nb.Nifti1Image.from_file_map({'header': nii, 'image': nii})
65 | return tuple(image.header['pixdim'][1:4])
66 |
--------------------------------------------------------------------------------
/amid/mslub/dataset.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 |
3 | import nibabel as nb
4 |
5 | from ..internals import Dataset, licenses, register
6 |
7 |
8 | @register(
9 | body_region='Head',
10 | license=licenses.CC_BY_30,
11 | link='https://github.com/muschellij2/open_ms_data?tab=readme-ov-file',
12 | modality='MRI',
13 | prep_data_size='18G',
14 | raw_data_size='5.9G',
15 | task='Anomaly segmentation',
16 | )
17 | class MSLUB(Dataset):
18 | @property
19 | def ids(self):
20 | result = set()
21 | for file in self.root.glob('**/*.gz'):
22 | if ('raw' not in str(file)) or ('gt' in str(file)):
23 | continue
24 | patient = file.parent.name
25 | plane = file.parent.parent.parent.name
26 | ind = f'{plane}-{patient}'
27 | if 'longitudinal' in str(file):
28 | filename = file.name
29 | study_number = filename.split('_')[0]
30 | ind = f'{ind}-{study_number}'
31 | result.add(ind)
32 | return list(result)
33 |
34 | def _file(self, i):
35 | plane = i.split('-')[0]
36 | patient = i.split('-')[1]
37 | path = self.root / plane / 'raw' / patient
38 | if 'longitudinal' in i:
39 | study_number = i.split('-')[2]
40 | return path / study_number
41 | return path
42 |
43 | def image(self, i):
44 | file = self._file(i)
45 | if 'longitudinal' in str(file):
46 | study_number = file.stem
47 | file_name = file.parent / f'{study_number}_FLAIR.nii.gz'
48 | else:
49 | file_name = file / 'FLAIR.nii.gz'
50 | image = nb.load(file_name).get_fdata()
51 | return image
52 |
53 | def mask(self, i):
54 | file = self._file(i)
55 | if 'longitudinal' in str(file):
56 | file_name = file.parent / 'gt.nii.gz'
57 | else:
58 | file_name = file / 'consensus_gt.nii.gz'
59 | image = nb.load(file_name).get_fdata()
60 | return image
61 |
62 | def patient(self, i):
63 | file = self._file(i)
64 | if 'longitudinal' in str(file):
65 | return Path(file).parent.name
66 | else:
67 | return Path(file).name
68 |
69 | def affine(self, i):
70 | file = self._file(i)
71 | if 'longitudinal' in str(file):
72 | study_number = file.stem
73 | file_name = file.parent / f'{study_number}_FLAIR.nii.gz'
74 | else:
75 | file_name = file / 'FLAIR.nii.gz'
76 | return nb.load(file_name).affine
77 |
--------------------------------------------------------------------------------
/docs/mkdocstrings_handlers/python_connectome.py:
--------------------------------------------------------------------------------
1 | import importlib
2 | from collections import OrderedDict
3 |
4 | from griffe.dataclasses import Alias, Attribute, Class, Function, Parameter, ParameterKind, Parameters
5 | from mkdocstrings_handlers.python.handler import PythonHandler
6 |
7 |
8 | class PythonConnectomeHandler(PythonHandler):
9 | def get_templates_dir(self, handler: str):
10 | return super().get_templates_dir('python')
11 |
12 | def collect(self, identifier: str, config: dict):
13 | result = super().collect(identifier, config)
14 | m, p = result.path.rsplit('.', 1)
15 | v = getattr(importlib.import_module(m), p)
16 | if hasattr(v, '__origin__'):
17 | origin = v.__origin__
18 | if origin.__qualname__ != result.name:
19 | origin = super().collect(f'{origin.__module__}.{origin.__qualname__}', config)
20 | origin.name = result.name
21 | result = origin
22 |
23 | if isinstance(result, Alias):
24 | result.target = self.patch_class(result.target)
25 | else:
26 | result = self.patch_class(result)
27 | return result
28 |
29 | @staticmethod
30 | def patch_class(x: Class):
31 | members = OrderedDict()
32 | for name, v in x.members.items():
33 | if not name.startswith('_'):
34 | if isinstance(v, Function):
35 | if name == 'ids':
36 | v.parameters = Parameters()
37 | else:
38 | v.parameters = Parameters(
39 | Parameter('id', annotation='str', kind=ParameterKind.positional_or_keyword)
40 | )
41 |
42 | elif isinstance(v, Attribute):
43 | v = Function(
44 | name,
45 | parameters=Parameters(
46 | Parameter('id', annotation='str', kind=ParameterKind.positional_or_keyword)
47 | ),
48 | parent=x,
49 | )
50 |
51 | else:
52 | raise TypeError(v)
53 |
54 | members[name] = v
55 |
56 | x.members = members
57 | return x
58 |
59 |
60 | def get_handler(theme: str, custom_templates=None, config_file_path=None, paths=None, **config):
61 | return PythonConnectomeHandler(
62 | handler='python_connectome',
63 | theme=theme,
64 | custom_templates=custom_templates,
65 | config_file_path=config_file_path,
66 | paths=paths,
67 | )
68 |
--------------------------------------------------------------------------------
/amid/kits.py:
--------------------------------------------------------------------------------
1 | import nibabel as nb
2 | import numpy as np
3 |
4 | from .internals import Dataset, field, register
5 | from .utils import PathOrStr
6 |
7 |
8 | @register(
9 | body_region='thorax',
10 | license=None, # todo
11 | link='https://kits-challenge.org/kits23/',
12 | modality='CT',
13 | prep_data_size='50G',
14 | raw_data_size='12G',
15 | task='Kidney Tumor Segmentation',
16 | )
17 | class KiTS23(Dataset):
18 | """Kidney and Kidney Tumor Segmentation Challenge,
19 | The 2023 Kidney and Kidney Tumor Segmentation challenge (abbreviated KiTS23)
20 | is a competition in which teams compete to develop the best system for
21 | automatic semantic segmentation of kidneys, renal tumors, and renal cysts.
22 |
23 | Competition page is https://kits-challenge.org/kits23/, official competition repository is
24 | https://github.com/neheller/kits23/.
25 |
26 | For usage, clone the repository https://github.com/neheller/kits23/, install and run `kits23_download_data`.
27 |
28 | Parameters
29 | ----------
30 | root: str, Path
31 | Absolute path to the root containing the downloaded archive and meta.
32 | If not provided, the cache is assumed to be already populated.
33 | """
34 |
35 | def __init__(self, root: PathOrStr):
36 | super().__init__(root)
37 | if not (self.root / "dataset").exists():
38 | raise FileNotFoundError(f"Dataset not found in {self.root}")
39 |
40 | @property
41 | def ids(self):
42 | return tuple(sorted(sub.name for sub in (self.root / 'dataset').glob('*')))
43 |
44 | @field
45 | def image(self, i):
46 | # CT images are integer-valued, this will help us improve compression rates
47 | image_file = nb.load(self.root / 'dataset' / i / 'imaging.nii.gz')
48 | return np.int16(image_file.get_fdata()[...])
49 |
50 | # TODO add multiple segmentations
51 | @field
52 | def mask(self, i):
53 | """Combined annotation for kidneys, tumor and cyst (if present)."""
54 | ct_scan_nifti = nb.load(self.root / 'dataset' / i / 'segmentation.nii.gz')
55 | return np.int8(ct_scan_nifti.get_fdata())
56 |
57 | @field
58 | def affine(self, i):
59 | """The 4x4 matrix that gives the image's spatial orientation."""
60 | image_file = nb.load(self.root / 'dataset' / i / 'imaging.nii.gz')
61 | return image_file.affine
62 |
63 | @property
64 | def labels_names(self):
65 | """Indicates which label correspond to which mask, consistent accross all samples."""
66 | return KITS_LABEL_NAMES
67 |
68 |
69 | KITS_LABEL_NAMES = {
70 | # https://github.com/neheller/kits23/blob/063d4c00afd383fc68145a00c0aa6a4e2a3c0f50/kits23/configuration/labels.py#L23
71 | 1: 'kidney',
72 | 2: 'tumor',
73 | 3: 'cyst',
74 | }
75 |
--------------------------------------------------------------------------------
/amid/covid_1110.py:
--------------------------------------------------------------------------------
1 | import gzip
2 | from typing import Union
3 |
4 | import nibabel
5 | import numpy as np
6 |
7 | from .internals import Dataset, field, register
8 |
9 |
10 | @register(
11 | body_region='Thorax',
12 | modality='CT',
13 | task='COVID-19 Segmentation',
14 | link='https://mosmed.ai/en/datasets/covid191110/',
15 | raw_data_size='21G',
16 | )
17 | class MoscowCovid1110(Dataset):
18 | """
19 | The Moscow Radiology COVID-19 dataset.
20 |
21 | Parameters
22 | ----------
23 | root : str, Path, optional
24 | path to the folder containing the raw downloaded files.
25 | If not provided, the cache is assumed to be already populated.
26 |
27 | Notes
28 | -----
29 | Download links:
30 | https://mosmed.ai/en/datasets/covid191110/
31 |
32 | Examples
33 | --------
34 | >>> # Place the downloaded files in any folder and pass the path to the constructor:
35 | >>> ds = MoscowCovid1110(root='/path/to/files/root')
36 | >>> print(len(ds.ids))
37 | # 1110
38 | >>> print(ds.image(ds.ids[0]).shape)
39 | # (512, 512, 43)
40 | """
41 |
42 | @property
43 | def ids(self):
44 | return sorted({f.name[:-7] for f in self.root.glob('CT-*/*')})
45 |
46 | def _file(self, i):
47 | return next(self.root.glob(f'CT-*/{i}.nii.gz'))
48 |
49 | @field
50 | def image(self, i) -> np.ndarray:
51 | with self._file(i).open('rb') as opened:
52 | with gzip.GzipFile(fileobj=opened) as nii:
53 | nii = nibabel.FileHolder(fileobj=nii)
54 | image = nibabel.Nifti1Image.from_file_map({'header': nii, 'image': nii})
55 | # most ct scans are integer-valued, this will help us improve compression rates
56 | # (instead of using `image.get_fdata()`)
57 | return np.asarray(image.dataobj)
58 |
59 | @field
60 | def affine(self, i) -> np.ndarray:
61 | with self._file(i).open('rb') as opened:
62 | with gzip.GzipFile(fileobj=opened) as nii:
63 | nii = nibabel.FileHolder(fileobj=nii)
64 | image = nibabel.Nifti1Image.from_file_map({'header': nii, 'image': nii})
65 | return image.affine
66 |
67 | @field
68 | def label(self, i) -> str:
69 | return self._file(i).parent.name[3:]
70 |
71 | @field
72 | def mask(self, i) -> Union[np.ndarray, None]:
73 | path = self.root / 'masks' / f'{i}_mask.nii.gz'
74 | if not path.exists():
75 | return
76 |
77 | with path.open('rb') as opened:
78 | with gzip.GzipFile(fileobj=opened) as nii:
79 | nii = nibabel.FileHolder(fileobj=nii)
80 | image = nibabel.Nifti1Image.from_file_map({'header': nii, 'image': nii})
81 | return np.asarray(image.dataobj) > 0.5
82 |
--------------------------------------------------------------------------------
/amid/totalsegmentator/const.py:
--------------------------------------------------------------------------------
1 | ANATOMICAL_STRUCTURES = [
2 | 'adrenal_gland_left',
3 | 'adrenal_gland_right',
4 | 'aorta',
5 | 'autochthon_left',
6 | 'autochthon_right',
7 | 'brain',
8 | 'clavicula_left',
9 | 'clavicula_right',
10 | 'colon',
11 | 'duodenum',
12 | 'esophagus',
13 | 'face',
14 | 'femur_left',
15 | 'femur_right',
16 | 'gallbladder',
17 | 'gluteus_maximus_left',
18 | 'gluteus_maximus_right',
19 | 'gluteus_medius_left',
20 | 'gluteus_medius_right',
21 | 'gluteus_minimus_left',
22 | 'gluteus_minimus_right',
23 | 'heart_atrium_left',
24 | 'heart_atrium_right',
25 | 'heart_myocardium',
26 | 'heart_ventricle_left',
27 | 'heart_ventricle_right',
28 | 'hip_left',
29 | 'hip_right',
30 | 'humerus_left',
31 | 'humerus_right',
32 | 'iliac_artery_left',
33 | 'iliac_artery_right',
34 | 'iliac_vena_left',
35 | 'iliac_vena_right',
36 | 'iliopsoas_left',
37 | 'iliopsoas_right',
38 | 'inferior_vena_cava',
39 | 'kidney_left',
40 | 'kidney_right',
41 | 'liver',
42 | 'lung_lower_lobe_left',
43 | 'lung_lower_lobe_right',
44 | 'lung_middle_lobe_right',
45 | 'lung_upper_lobe_left',
46 | 'lung_upper_lobe_right',
47 | 'pancreas',
48 | 'portal_vein_and_splenic_vein',
49 | 'pulmonary_artery',
50 | 'rib_left_1',
51 | 'rib_left_10',
52 | 'rib_left_11',
53 | 'rib_left_12',
54 | 'rib_left_2',
55 | 'rib_left_3',
56 | 'rib_left_4',
57 | 'rib_left_5',
58 | 'rib_left_6',
59 | 'rib_left_7',
60 | 'rib_left_8',
61 | 'rib_left_9',
62 | 'rib_right_1',
63 | 'rib_right_10',
64 | 'rib_right_11',
65 | 'rib_right_12',
66 | 'rib_right_2',
67 | 'rib_right_3',
68 | 'rib_right_4',
69 | 'rib_right_5',
70 | 'rib_right_6',
71 | 'rib_right_7',
72 | 'rib_right_8',
73 | 'rib_right_9',
74 | 'sacrum',
75 | 'scapula_left',
76 | 'scapula_right',
77 | 'small_bowel',
78 | 'spleen',
79 | 'stomach',
80 | 'trachea',
81 | 'urinary_bladder',
82 | 'vertebrae_C1',
83 | 'vertebrae_C2',
84 | 'vertebrae_C3',
85 | 'vertebrae_C4',
86 | 'vertebrae_C5',
87 | 'vertebrae_C6',
88 | 'vertebrae_C7',
89 | 'vertebrae_L1',
90 | 'vertebrae_L2',
91 | 'vertebrae_L3',
92 | 'vertebrae_L4',
93 | 'vertebrae_L5',
94 | 'vertebrae_T1',
95 | 'vertebrae_T10',
96 | 'vertebrae_T11',
97 | 'vertebrae_T12',
98 | 'vertebrae_T2',
99 | 'vertebrae_T3',
100 | 'vertebrae_T4',
101 | 'vertebrae_T5',
102 | 'vertebrae_T6',
103 | 'vertebrae_T7',
104 | 'vertebrae_T8',
105 | 'vertebrae_T9',
106 | ]
107 |
108 | LABELS = ['age', 'gender', 'institute', 'study_type', 'split']
109 |
--------------------------------------------------------------------------------
/amid/rsna_bc/dataset.py:
--------------------------------------------------------------------------------
1 | from contextlib import suppress
2 | from functools import cached_property
3 |
4 | import pandas as pd
5 | import pydicom
6 |
7 | from ..internals import Dataset, field, register
8 | from .utils import csv_field, unpack
9 |
10 |
11 | @register(
12 | body_region='Thorax',
13 | license='Non-Commercial Use',
14 | link='https://www.kaggle.com/competitions/rsna-breast-cancer-detection/data',
15 | modality='MG',
16 | raw_data_size='271G',
17 | prep_data_size='294G',
18 | task='Breast cancer classification',
19 | )
20 | class RSNABreastCancer(Dataset):
21 | @cached_property
22 | def _meta(self):
23 | dfs = []
24 | for part in 'train', 'test':
25 | with suppress(FileNotFoundError):
26 | with unpack(self.root, f'{part}.csv') as (file, _):
27 | df = pd.read_csv(file)
28 | df['part'] = part
29 | dfs.append(df)
30 |
31 | if not dfs:
32 | raise FileNotFoundError('No metadata found')
33 | dfs = pd.concat(dfs, ignore_index=True)
34 | for name in 'image_id', 'patient_id', 'site_id':
35 | dfs[name] = dfs[name].astype(str)
36 |
37 | raw = list(map(str, dfs.image_id.tolist()))
38 | ids = set(raw)
39 | if len(ids) != len(raw):
40 | raise ValueError('The image ids are not unique')
41 |
42 | return {row.image_id: row for _, row in dfs.iterrows()}
43 |
44 | # csv fields
45 | site_id = csv_field('site_id', str)
46 | patient_id = csv_field('patient_id', str)
47 | image_id = csv_field('image_id', str)
48 | laterality = csv_field('laterality', None)
49 | view = csv_field('view', None)
50 | age = csv_field('age', None)
51 | cancer = csv_field('cancer', bool)
52 | biopsy = csv_field('biopsy', bool)
53 | invasive = csv_field('invasive', bool)
54 | BIRADS = csv_field('BIRADS', int)
55 | implant = csv_field('implant', bool)
56 | density = csv_field('density', None)
57 | machine_id = csv_field('machine_id', str)
58 | prediction_id = csv_field('prediction_id', str)
59 | difficult_negative_case = csv_field('difficult_negative_case', bool)
60 |
61 | @property
62 | def ids(self):
63 | return tuple(sorted(self._meta))
64 |
65 | def _dicom(self, i):
66 | row = self._meta[i]
67 | with unpack(self.root, f'{row.part}_images/{row.patient_id}/{row.image_id}.dcm') as (file, _):
68 | return pydicom.dcmread(file)
69 |
70 | @field
71 | def image(self, i):
72 | return self._dicom(i).pixel_array
73 |
74 | @field
75 | def padding_value(self, i):
76 | return getattr(self._dicom(i), 'PixelPaddingValue', None)
77 |
78 | @field
79 | def intensity_sign(self, i):
80 | return getattr(self._dicom(i), 'PixelIntensityRelationshipSign', None)
81 |
--------------------------------------------------------------------------------
/amid/cl_detection.py:
--------------------------------------------------------------------------------
1 | from functools import cached_property
2 | from typing import Dict, Tuple
3 |
4 | import numpy as np
5 | import SimpleITK
6 | from connectome import Transform
7 | from deli import load
8 | from imops import crop_to_box
9 |
10 | from .internals import Dataset, field, licenses, register
11 | from .utils import mask_to_box
12 |
13 |
14 | @register(
15 | body_region='Head',
16 | license=licenses.CC_BYNC_40,
17 | link='https://github.com/cwwang1979/CL-detection2023/',
18 | modality='X-ray',
19 | prep_data_size='1.8G',
20 | raw_data_size='1.5G',
21 | task='Keypoint detection',
22 | )
23 | class CLDetection2023(Dataset):
24 | """
25 | The data for the "Cephalometric Landmark Detection in Lateral X-ray Images" Challenge,
26 | held with the MICCAI-2023 conference.
27 |
28 | Notes
29 | -----
30 | The data can only be obtained by contacting the organizers by email.
31 | See the [challenge home page](https://cl-detection2023.grand-challenge.org/) for details.
32 |
33 | Parameters
34 | ----------
35 | root : str, Path, optional
36 | path to the folder containing the raw downloaded and unarchived data.
37 | If not provided, the cache is assumed to be already populated.
38 |
39 | Examples
40 | --------
41 | >>> # Place the downloaded archives in any folder and pass the path to the constructor:
42 | >>> ds = CLDetection2023(root='/path/to/data/root/folder')
43 | >>> print(len(ds.ids))
44 | # 400
45 | >>> print(ds.image(ds.ids[0]).shape)
46 | # (2400, 1935)
47 | """
48 |
49 | @cached_property
50 | def _images(self):
51 | return SimpleITK.GetArrayFromImage(SimpleITK.ReadImage(self.root / 'train_stack.mha'))
52 |
53 | @cached_property
54 | def _points(self):
55 | return load(self.root / 'train-gt.json')['points']
56 |
57 | @property
58 | def ids(self):
59 | return tuple(map(str, range(1, len(self._images) + 1)))
60 |
61 | @field
62 | def image(self, i) -> np.ndarray:
63 | i = int(i)
64 | return self._images[i - 1]
65 |
66 | @field
67 | def points(self, i) -> Dict[str, np.ndarray]:
68 | i = int(i)
69 | return {x['name']: np.array(x['point'][:2]) for x in self._points if x['point'][-1] == i}
70 |
71 | @field
72 | def spacing(self, i) -> Tuple[float, float]:
73 | i = int(i)
74 | (scale,) = {x['scale'] for x in self._points if x['point'][-1] == i}
75 | scale = float(scale)
76 | return scale, scale
77 |
78 |
79 | class CropPadding(Transform):
80 | __inherit__ = 'spacing'
81 |
82 | def _box(image):
83 | return mask_to_box(image[..., 0] != 0)
84 |
85 | def image(image, _box):
86 | return crop_to_box(image[..., 0], _box)
87 |
88 | def points(points, _box):
89 | return {k: v - _box[0] for k, v in points.items()}
90 |
91 |
92 | class FlipPoints(Transform):
93 | __inherit__ = True
94 |
95 | def points(points):
96 | return {name: pt[::-1] for name, pt in points.items()}
97 |
--------------------------------------------------------------------------------
/.github/workflows/tests.yml:
--------------------------------------------------------------------------------
1 | name: Tests
2 |
3 | on: [ pull_request ]
4 |
5 | env:
6 | MODULE_NAME: amid
7 |
8 | jobs:
9 | test:
10 | runs-on: ubuntu-20.04
11 | strategy:
12 | matrix:
13 | python-version: [ '3.8', '3.9', '3.10', '3.11', '3.12' ]
14 |
15 | steps:
16 | - uses: actions/checkout@v3
17 | - name: Set up Python ${{ matrix.python-version }}
18 | uses: actions/setup-python@v4
19 | with:
20 | python-version: ${{ matrix.python-version }}
21 |
22 | - name: Check the version
23 | if: "! github.event.pull_request.head.repo.fork "
24 | run: |
25 | VERSION=$(python -c "from pathlib import Path; import runpy; folder, = {d.parent for d in Path().resolve().glob('*/__init__.py') if d.parent.is_dir() and (d.parent / '__version__.py').exists()}; print(runpy.run_path(folder / '__version__.py')['__version__'])")
26 | MATCH=$(pip index versions $MODULE_NAME | grep "Available versions:" | grep $VERSION) || echo
27 | echo $MATCH
28 | if [ "$GITHUB_BASE_REF" = "master" ] && [ "$MATCH" != "" ]; then exit 1; fi
29 | - name: Build the package
30 | run: |
31 | pip install build
32 | python -m build --sdist
33 |
34 | - name: Install
35 | run: |
36 | pip install dist/*
37 | pip install -r tests/requirements.txt
38 |
39 | cd tests
40 | export MODULE_PARENT=$(python -c "import $MODULE_NAME, os; print(os.path.dirname($MODULE_NAME.__path__[0]))")
41 | export MODULE_PARENT=${MODULE_PARENT%"/"}
42 | cd ..
43 | echo $MODULE_PARENT
44 | echo "MODULE_PARENT=$(echo $MODULE_PARENT)" >> $GITHUB_ENV
45 |
46 | - name: Test with pytest
47 | if: "! github.event.pull_request.head.repo.fork "
48 | run: |
49 | # pytest tests -m "not raw" --junitxml=reports/junit-${{ matrix.python-version }}.xml --cov="$MODULE_PARENT/$MODULE_NAME" --cov-report=xml --cov-branch
50 | # for now we only test that everything is importable
51 | pip install setuptools # needed for pylidc to work
52 | python -c "from $MODULE_NAME import *"
53 | # - name: Generate coverage report
54 | # if: "! github.event.pull_request.head.repo.fork "
55 | # run: |
56 | # coverage xml -o reports/coverage-${{ matrix.python-version }}.xml
57 | # sed -i -e "s|$MODULE_PARENT/||g" reports/coverage-${{ matrix.python-version }}.xml
58 | # sed -i -e "s|$(echo $MODULE_PARENT/ | tr "/" .)||g" reports/coverage-${{ matrix.python-version }}.xml
59 | #
60 | # - name: Upload artifacts
61 | # if: "! github.event.pull_request.head.repo.fork "
62 | # uses: actions/upload-artifact@v3
63 | # with:
64 | # name: reports-${{ matrix.python-version }}
65 | # path: reports/*-${{ matrix.python-version }}.xml
66 |
67 | # TODO: coverage is not informative in the CI anyway
68 | # - name: Upload coverage results
69 | # if: "! github.event.pull_request.head.repo.fork "
70 | # uses: codecov/codecov-action@v3
71 | # with:
72 | # fail_ci_if_error: true
73 | # files: reports/coverage-${{ matrix.python-version }}.xml
74 | # verbose: true
75 |
--------------------------------------------------------------------------------
/amid/tbad.py:
--------------------------------------------------------------------------------
1 | import gzip
2 | from pathlib import Path
3 |
4 | import nibabel as nb
5 | import numpy as np
6 |
7 | from .internals import Dataset, field, licenses, register
8 |
9 |
10 | @register(
11 | body_region='Chest',
12 | license=licenses.CC_BYNC_40,
13 | link='https://github.com/XiaoweiXu/Dataset_Type-B-Aortic-Dissection',
14 | modality='CT',
15 | prep_data_size='14G',
16 | raw_data_size='14G',
17 | task='Aortic dissection segmentation',
18 | )
19 | class TBAD(Dataset):
20 | """
21 | A dataset of 3D Computed Tomography (CT) images for Type-B Aortic Dissection segmentation.
22 |
23 | Notes
24 | -----
25 | The data can only be obtained by contacting the authors by email.
26 | See the [dataset home page](https://github.com/XiaoweiXu/Dataset_Type-B-Aortic-Dissection) for details.
27 |
28 | Parameters
29 | ----------
30 | root : str, Path, optional
31 | path to the folder containing the raw downloaded files.
32 | If not provided, the cache is assumed to be already populated.
33 |
34 | Examples
35 | --------
36 | >>> # Place the downloaded files in any folder and pass the path to the constructor:
37 | >>> ds = TBAD(root='/path/to/files/root')
38 | >>> print(len(ds.ids))
39 | # 100
40 | >>> print(ds.image(ds.ids[0]).shape)
41 | # (512, 512, 327)
42 |
43 | References
44 | ----------
45 | .. [1] Yao, Zeyang & Xie, Wen & Zhang, Jiawei & Dong, Yuhao & Qiu, Hailong & Haiyun, Yuan & Jia,
46 | Qianjun & Tianchen, Wang & Shi, Yiyi & Zhuang, Jian & Que, Lifeng & Xu, Xiaowei & Huang, Meiping.
47 | (2021). ImageTBAD: A 3D Computed Tomography Angiography Image Dataset for Automatic Segmentation
48 | of Type-B Aortic Dissection. Frontiers in Physiology. 12. 732711. 10.3389/fphys.2021.732711.
49 | """
50 |
51 | @property
52 | def ids(self):
53 | result = set()
54 |
55 | for file in self.root.glob('*_image.nii.gz'):
56 | result.add(file.stem.split('_')[0])
57 |
58 | return tuple(sorted(result))
59 |
60 | def _fname(self, i):
61 | return self.root / f'{i}_image.nii.gz'
62 |
63 | def image(self, i) -> np.ndarray:
64 | with self._fname(i).open('rb') as opened:
65 | with gzip.GzipFile(fileobj=opened) as nii:
66 | nii = nb.FileHolder(fileobj=nii)
67 | image = nb.Nifti1Image.from_file_map({'header': nii, 'image': nii})
68 | return np.int16(image.get_fdata())
69 |
70 | def affine(self, i) -> np.ndarray:
71 | """The 4x4 matrix that gives the image's spatial orientation."""
72 | with self._fname(i).open('rb') as opened:
73 | with gzip.GzipFile(fileobj=opened) as nii:
74 | nii = nb.FileHolder(fileobj=nii)
75 | image = nb.Nifti1Image.from_file_map({'header': nii, 'image': nii})
76 | return image.affine
77 |
78 | @field
79 | def mask(self, i) -> np.ndarray:
80 | with Path(self.root / f'{i}_label.nii.gz').open('rb') as opened:
81 | with gzip.GzipFile(fileobj=opened) as nii:
82 | nii = nb.FileHolder(fileobj=nii)
83 | label = nb.Nifti1Image.from_file_map({'header': nii, 'image': nii})
84 | return np.uint8(label.get_fdata())
85 |
--------------------------------------------------------------------------------
/amid/ribfrac/dataset.py:
--------------------------------------------------------------------------------
1 | from functools import cached_property
2 |
3 | import nibabel
4 | import numpy as np
5 |
6 | from ..internals import Dataset, licenses, register
7 |
8 |
9 | @register(
10 | body_region='Chest',
11 | license=licenses.CC_BYNC_40,
12 | link='https://ribfrac.grand-challenge.org',
13 | modality='CT',
14 | raw_data_size='77.8 G',
15 | task='Segmentation',
16 | )
17 | class RibFrac(Dataset):
18 | """
19 | RibFrac dataset is a benchmark for developping algorithms on rib fracture detection,
20 | segmentation and classification. We hope this large-scale dataset could facilitate
21 | both clinical research for automatic rib fracture detection and diagnoses,
22 | and engineering research for 3D detection, segmentation and classification.
23 |
24 |
25 | Parameters
26 | ----------
27 | root : str, Path, optional
28 | path to the folder containing the raw downloaded archives.
29 | If not provided, the cache is assumed to be already populated.
30 |
31 |
32 | Notes
33 | -----
34 | Data downloaded from here:
35 | https://doi.org/10.5281/zenodo.3893507 -- train Part1 (300 images)
36 | https://doi.org/10.5281/zenodo.3893497 -- train Part2 (120 images)
37 | https://doi.org/10.5281/zenodo.3893495 -- val (80 images)
38 | https://zenodo.org/record/3993380 -- test (160 images without annotation)
39 |
40 |
41 |
42 | References
43 | ----------
44 | Jiancheng Yang, Liang Jin, Bingbing Ni, & Ming Li. (2020).
45 | RibFrac Dataset: A Benchmark for Rib Fracture Detection,
46 | Segmentation and Classification
47 | """
48 |
49 | @property
50 | def ids(self):
51 | result = set()
52 | for folder in ['Part1', 'Part2', 'ribfrac-val-images', 'ribfrac-test-images']:
53 | result |= {v.name.split('-')[0] for v in (self.root / folder).iterdir()}
54 |
55 | return tuple(sorted(result))
56 |
57 | @cached_property
58 | def _id2folder(self):
59 | folders = [item for item in self.root.iterdir() if item.is_dir()]
60 | result_dict = {}
61 | for folder in folders:
62 | p = self.root / folder
63 | folder_ids = [v.name.split('-')[0] for v in p.iterdir()]
64 | folder_dict = {_id: p for _id in folder_ids}
65 | result_dict = {**result_dict, **folder_dict}
66 |
67 | return result_dict
68 |
69 | def image(self, i):
70 | image_path = self._id2folder[i] / f'{i}-image.nii.gz'
71 | image = nibabel.load(image_path).get_fdata()
72 | return image.astype(np.int16)
73 |
74 | def label(self, i):
75 | folder_path = self._id2folder[i]
76 | folder = folder_path.name
77 | if folder != 'ribfrac-test-images':
78 | if folder.startswith('Part'):
79 | label_path = folder_path / f'{i}-label.nii.gz'
80 | elif folder == 'ribfrac-val-images':
81 | dir = folder_path.parent / 'ribfrac-val-labels'
82 | label_path = dir / f'{i}-label.nii.gz'
83 |
84 | label = nibabel.load(label_path).get_fdata()
85 | return label.astype(np.int16)
86 |
87 | def affine(self, i):
88 | """The 4x4 matrix that gives the image's spatial orientation"""
89 | image_path = self._id2folder[i] / f'{i}-image.nii.gz'
90 | return nibabel.load(image_path).affine
91 |
--------------------------------------------------------------------------------
/amid/liver_medseg.py:
--------------------------------------------------------------------------------
1 | import contextlib
2 | import gzip
3 | import re
4 | import zipfile
5 | from pathlib import Path
6 | from zipfile import ZipFile
7 |
8 | import nibabel as nb
9 | import numpy as np
10 |
11 | from .internals import Dataset, field, licenses, register
12 |
13 |
14 | @register(
15 | body_region=('Chest', 'Abdomen'),
16 | license=licenses.CC_BYSA_40,
17 | link='https://www.medseg.ai/database/liver-segments-50-cases',
18 | modality='CT',
19 | prep_data_size='1,88G',
20 | raw_data_size='616M',
21 | task='Segmentation',
22 | )
23 | class LiverMedseg(Dataset):
24 | """
25 | LiverMedseg is a public CT segmentation dataset with 50 annotated images.
26 | Case collection of 50 livers with their segments.
27 | Images obtained from Decathlon Medical Segmentation competition
28 |
29 | Parameters
30 | ----------
31 | root : str, Path, optional
32 | path to the folder containing the raw downloaded archives.
33 | If not provided, the cache is assumed to be already populated.
34 |
35 | Notes
36 | -----
37 | Download links:
38 | https://www.medseg.ai/database/liver-segments-50-cases
39 |
40 | Examples
41 | --------
42 | >>> # Place the downloaded archives in any folder and pass the path to the constructor:
43 | >>> ds = LiverMedseg(root='/path/to/archives/root')
44 | >>> print(len(ds.ids))
45 | # 50
46 | >>> print(ds.image(ds.ids[0]).shape)
47 | # (512, 512, 38)
48 |
49 | References
50 | ----------
51 | """
52 |
53 | @property
54 | def ids(self):
55 | result = set()
56 | with ZipFile(self.root / 'img.zip') as zf:
57 | for zipinfo in zf.infolist():
58 | if zipinfo.is_dir():
59 | continue
60 | file_stem = Path(zipinfo.filename).stem
61 | result.add('liver_medseg_' + re.findall(r'\d+', file_stem)[0])
62 |
63 | return tuple(sorted(result))
64 |
65 | def _file(self, i):
66 | num_id = i.split('_')[-1]
67 | return zipfile.Path(self.root / 'img.zip', f'img{num_id}.nii.gz')
68 |
69 | @field
70 | def image(self, i) -> np.ndarray:
71 | with open_nii_gz_file(self._file(i)) as nii_file:
72 | return np.asarray(nii_file.dataobj)
73 |
74 | @field
75 | def affine(self, i) -> np.ndarray:
76 | """The 4x4 matrix that gives the image's spatial orientation."""
77 | with open_nii_gz_file(self._file(i)) as nii_file:
78 | return nii_file.affine
79 |
80 | def spacing(self, i) -> tuple:
81 | with open_nii_gz_file(self._file(i)) as nii_file:
82 | return tuple(nii_file.header['pixdim'][1:4])
83 |
84 | @field
85 | def mask(self, i) -> np.ndarray:
86 | path = Path(str(self._file(i)).replace('img', 'mask'))
87 | folder, image = path.parent, path.name
88 | _file = zipfile.Path(folder, image)
89 | with open_nii_gz_file(_file) as nii_file:
90 | return np.asarray(nii_file.dataobj).astype(np.uint8)
91 |
92 |
93 | # TODO: sync with amid.utils
94 | @contextlib.contextmanager
95 | def open_nii_gz_file(file):
96 | with file.open('rb') as opened:
97 | with gzip.GzipFile(fileobj=opened) as nii:
98 | nii = nb.FileHolder(fileobj=nii)
99 | yield nb.Nifti1Image.from_file_map({'header': nii, 'image': nii})
100 |
--------------------------------------------------------------------------------
/docs/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contribution Guidelines
2 |
3 | ## Preparing the environment
4 |
5 | 1\. First, set up a cache storage. Create the file `~/.config/amid/.bev.yml` with the following content:
6 |
7 | ```yaml
8 | main:
9 | storage: /path/to/storage
10 | cache: /path/to/cache
11 | ```
12 |
13 | where `/path/to/storage` and `/path/to/cache` are some paths in your filesystem.
14 |
15 | 2\. Run
16 |
17 | ```shell
18 | amid init
19 | ```
20 |
21 | The full command could look something like this:
22 |
23 | ```shell
24 | mkdir -p ~/.config/amid
25 | cat >~/.config/amid/.bev.yml < The dataset should be written in such a way, that making a submission to a contest would work out of the box.
55 |
56 | !!! note
57 | In case of DICOM files, make sure to transpose the first 2 image axes.
58 | This way, the image axes will be consistent with the potential contour coordinates.
59 |
60 | !!! tip
61 | If some value is missing for a given id, it is preferable to return `None` instead of raising an exception.
62 |
63 | !!! tip
64 | The dataset must have a docstring which describes it and provides a link to the original data.
65 |
66 | !!! tip
67 | If the raw data contains a table with metadata, it is preferable to split the metadata columns into separate fields.
68 |
69 | 4\. Register the dataset like so:
70 |
71 | ```python
72 | from amid.internals import register
73 |
74 | @register(
75 | ...,
76 | )
77 | class LiTS(Dataset):
78 | ...
79 | ```
80 |
81 | where `...` stands for the following arguments:
82 |
83 | - `modality` — the images' modality/modalities, e.g., CT, MRI
84 | - `body_region` — the anatomical regions present in the dataset, e.g., Head, Thorax, Abdomen
85 | - `license` — the dataset's license, if any
86 | - `link` — the link to the original data
87 | - `raw_data_size` — the total size, required for the raw data, e.g., 10G, 500M
88 | - `task` — the dataset's downstream task if any.
89 | E.g., Supervised Learning, Domain Adaptation, Self-supervised Learning, Tumor Segmentation, etc.
90 |
91 | 5\. Make sure all the methods are working as expected:
92 |
93 | ```python
94 | from amid.lits import LiTS
95 |
96 | dataset = LiTS(root="/datasets/LiTS")
97 |
98 | print(len(dataset.ids))
99 |
100 | id_ = dataset.ids[0]
101 | print(dataset.image(id_).shape)
102 | ```
103 |
104 | 6\. Check the codestyle using the `lint.sh` script in the repository's root and make changes if flake8 is not happy:
105 |
106 | ```shell
107 | pip install -r lint-requirements.txt # only for the first time
108 | ./lint.sh
109 | ```
--------------------------------------------------------------------------------
/amid/medseg9.py:
--------------------------------------------------------------------------------
1 | import contextlib
2 | import gzip
3 | import zipfile
4 | from pathlib import Path
5 | from zipfile import ZipFile
6 |
7 | import nibabel as nb
8 | import numpy as np
9 |
10 | from .internals import Dataset, field, licenses, register
11 |
12 |
13 | @register(
14 | body_region='Chest',
15 | license=licenses.CC0_10,
16 | link='http://medicalsegmentation.com/covid19/',
17 | modality='CT',
18 | prep_data_size='300M',
19 | raw_data_size='310M',
20 | task='COVID-19 segmentation',
21 | )
22 | class Medseg9(Dataset):
23 | """
24 |
25 | Medseg9 is a public COVID-19 CT segmentation dataset with 9 annotated images.
26 |
27 | Parameters
28 | ----------
29 | root : str, Path, optional
30 | path to the folder containing the raw downloaded archives.
31 | If not provided, the cache is assumed to be already populated.
32 |
33 | Notes
34 | -----
35 | Data can be downloaded here: http://medicalsegmentation.com/covid19/.
36 |
37 | Then, the folder with raw downloaded data should contain three zip archives with data and masks
38 | (`rp_im.zip`, `rp_lung_msk.zip`, `rp_msk.zip`).
39 |
40 | Examples
41 | --------
42 | >>> # Place the downloaded archives in any folder and pass the path to the constructor:
43 | >>> ds = Medseg9(root='/path/to/downloaded/data/folder/')
44 | >>> print(len(ds.ids))
45 | # 9
46 | >>> print(ds.image(ds.ids[0]).shape)
47 | # (630, 630, 45)
48 | >>> print(ds.covid(ds.ids[0]).shape)
49 | # (630, 630, 45)
50 |
51 | """
52 |
53 | @property
54 | def ids(self):
55 | result = set()
56 |
57 | with ZipFile(self.root / 'rp_msk.zip') as zf:
58 | for zipinfo in zf.infolist():
59 | if zipinfo.is_dir():
60 | continue
61 | file_stem = Path(zipinfo.filename).stem
62 | result.add('medseg9_' + file_stem.split('.nii')[0])
63 |
64 | return tuple(sorted(result))
65 |
66 | @staticmethod
67 | def _filename(i):
68 | num_id = i.split('_')[-1]
69 | return f'{num_id}.nii.gz'
70 |
71 | def _file(self, i):
72 | return zipfile.Path(self.root / 'rp_im.zip', f'rp_im/{self._filename(i)}')
73 |
74 | @field
75 | def image(self, i):
76 | with open_nii_gz_file(self._file(i)) as nii_image:
77 | # most CT/MRI scans are integer-valued, this will help us improve compression rates
78 | return np.int16(nii_image.get_fdata())
79 |
80 | @field
81 | def affine(self, i):
82 | """The 4x4 matrix that gives the image's spatial orientation."""
83 | with open_nii_gz_file(self._file(i)) as nii_image:
84 | return nii_image.affine
85 |
86 | @field
87 | def lungs(self, i):
88 | mask_file = zipfile.Path(self.root / 'rp_lung_msk.zip', f'rp_lung_msk/{self._filename(i)}')
89 | with open_nii_gz_file(mask_file) as nii_image:
90 | return np.bool_(nii_image.get_fdata())
91 |
92 | @field
93 | def covid(self, i):
94 | """
95 | int16 mask.
96 | 0 - normal, 1 - ground-glass opacities (матовое стекло), 2 - consolidation (консолидация).
97 | """
98 | mask_file = zipfile.Path(self.root / 'rp_msk.zip', f'rp_msk/{self._filename(i)}')
99 | with open_nii_gz_file(mask_file) as nii_image:
100 | # most CT/MRI scans are integer-valued, this will help us improve compression rates
101 | return np.uint8(nii_image.get_fdata())
102 |
103 |
104 | # TODO: sync with amid.utils
105 | @contextlib.contextmanager
106 | def open_nii_gz_file(file):
107 | with file.open('rb') as opened:
108 | with gzip.GzipFile(fileobj=opened) as nii:
109 | nii = nb.FileHolder(fileobj=nii)
110 | yield nb.Nifti1Image.from_file_map({'header': nii, 'image': nii})
111 |
--------------------------------------------------------------------------------
/amid/curvas.py:
--------------------------------------------------------------------------------
1 | import gzip
2 | import zipfile
3 | from typing import Dict
4 | from zipfile import ZipFile
5 |
6 | import nibabel
7 | import numpy as np
8 |
9 | from .internals import Dataset, field, licenses, register
10 |
11 |
12 | @register(
13 | body_region='Abdomen',
14 | license=licenses.CC_BY_40,
15 | link='https://zenodo.org/records/13767408',
16 | modality='CT',
17 | prep_data_size='30G',
18 | raw_data_size='30G',
19 | task='Abdominal organ pathologies segmentation',
20 | )
21 | class CURVAS(Dataset):
22 | """
23 | Pancreas, liver and kidney cysts segmentation from multi-rater annotated data.
24 |
25 | The dataset was used at the MICCAI 2024 CURVAS challenge.
26 |
27 | Parameters
28 | ----------
29 | root : str, Path, optional
30 | path to the folder containing the raw downloaded archives.
31 | If not provided, the cache is assumed to be already populated.
32 |
33 | Notes
34 | -----
35 | Download link: https://zenodo.org/records/13767408
36 |
37 | The `root` folder should contain the three downloaded .zip archives, namely:
38 | `training_set.zip`, `validation_set.zip` and `testing_set.zip`.
39 |
40 | Examples
41 | --------
42 | >>> # Place the downloaded folders in any folder and pass the path to the constructor:
43 | >>> ds = CURVAS(root='/path/to/downloaded/data/folder/')
44 | >>> print(len(ds.ids))
45 | # 90
46 | >>> print(ds.image(ds.ids[5]).shape)
47 | # (512, 512, 1045)
48 | >>> print(ds.mask(ds.ids[35]).shape)
49 | # (512, 512, 992)
50 |
51 | """
52 |
53 | @property
54 | def ids(self):
55 | def _extract(split):
56 | archive = self.root / f'{split}_set.zip'
57 | with ZipFile(archive) as zf:
58 | namelist = [x for x in zf.namelist() if len(x.rstrip('/').split('/')) == 2]
59 | ids = [f'{x.split("/")[1]}-{split}' for x in namelist]
60 | return ids
61 |
62 | return sorted(
63 | [
64 | *_extract('training'), # 20 Training cases
65 | *_extract('validation'), # 5 Validation cases
66 | *_extract('testing'), # 65 Testing cases
67 | ]
68 | )
69 |
70 | def _file(self, i, obj):
71 | uid, split = i.split('-')
72 |
73 | archive = self.root / f'{split}_set.zip'
74 | file = f'{split}_set/{uid}/{obj}.nii.gz'
75 |
76 | return zipfile.Path(archive, file)
77 |
78 | @field
79 | def image(self, i) -> np.ndarray:
80 | with self._file(i, 'image').open('rb') as opened:
81 | with gzip.GzipFile(fileobj=opened) as nii:
82 | nii = nibabel.FileHolder(fileobj=nii)
83 | image = nibabel.Nifti1Image.from_file_map({'header': nii, 'image': nii})
84 | return np.asarray(image.dataobj).astype(np.int16)
85 |
86 | @field
87 | def affine(self, i) -> np.ndarray:
88 | """The 4x4 matrix that gives the image's spatial orientation"""
89 | with self._file(i, 'image').open('rb') as opened:
90 | with gzip.GzipFile(fileobj=opened) as nii:
91 | nii = nibabel.FileHolder(fileobj=nii)
92 | image = nibabel.Nifti1Image.from_file_map({'header': nii, 'image': nii})
93 | return image.affine
94 |
95 | @field
96 | def masks(self, i) -> Dict[str, np.ndarray]:
97 | masks = {}
98 | for x in range(1, 4):
99 | with self._file(i, f'annotation_{x}').open('rb') as opened:
100 | with gzip.GzipFile(fileobj=opened) as nii:
101 | nii = nibabel.FileHolder(fileobj=nii)
102 | image = nibabel.Nifti1Image.from_file_map({'header': nii, 'image': nii})
103 |
104 | masks[f'annotation_{x}'] = np.asarray(image.dataobj).astype(np.uint8)
105 |
106 | return masks
107 |
--------------------------------------------------------------------------------
/amid/totalsegmentator/dataset.py:
--------------------------------------------------------------------------------
1 | import gzip
2 | from contextlib import suppress
3 | from pathlib import Path
4 | from zipfile import ZipFile
5 |
6 | import nibabel
7 | import numpy as np
8 | import pandas as pd
9 |
10 | from ..internals import Dataset, field, licenses, register
11 | from ..utils import PathOrStr, open_nii_gz_file, unpack
12 | from .utils import ARCHIVE_ROOT, add_labels, add_masks
13 |
14 |
15 | @register(
16 | body_region=('Head', 'Thorax', 'Abdomen', 'Pelvis', 'Legs'),
17 | license=licenses.CC_BY_40,
18 | link='https://zenodo.org/record/6802614#.Y6M2MxXP1D8',
19 | modality='CT',
20 | raw_data_size='35G',
21 | prep_data_size='35G',
22 | task='Supervised anatomical structures segmentation',
23 | )
24 | class Totalsegmentator(Dataset):
25 | """
26 | In 1204 CT images we segmented 104 anatomical structures (27 organs, 59 bones, 10 muscles, 8 vessels)
27 | covering a majority of relevant classes for most use cases.
28 |
29 | The CT images were randomly sampled from clinical routine, thus representing a real world dataset which
30 | generalizes to clinical application.
31 |
32 | The dataset contains a wide range of different pathologies, scanners, sequences and institutions. [1]
33 |
34 | Parameters
35 | ----------
36 | root : str, Path, optional
37 | absolute path to the downloaded archive.
38 | If not provided, the cache is assumed to be already populated.
39 |
40 | Notes
41 | -----
42 | Download link: https://zenodo.org/record/6802614/files/Totalsegmentator_dataset.zip
43 |
44 | Examples
45 | --------
46 | >>> # Download the archive to any folder and pass the path to the constructor:
47 | >>> ds = Totalsegmentator(root='/path/to/the/downloaded/archive')
48 | >>> print(len(ds.ids))
49 | # 1204
50 | >>> print(ds.image(ds.ids[0]).shape)
51 | # (294, 192, 179)
52 | >>> print(ds.aorta(ds.ids[25]).shape)
53 | # (320, 320, 145)
54 |
55 | References
56 | ----------
57 | .. [1] Jakob Wasserthal (2022) Dataset with segmentations of 104 important anatomical structures in 1204 CT images.
58 | Available at: https://zenodo.org/record/6802614#.Y6M2MxXP1D8
59 | """
60 |
61 | add_masks(locals())
62 | add_labels(locals())
63 |
64 | def __init__(self, root: PathOrStr):
65 | root = Path(root)
66 | if root.is_dir():
67 | if root / ARCHIVE_ROOT in list(root.iterdir()):
68 | root = root / ARCHIVE_ROOT
69 |
70 | file = 'meta.csv'
71 | with unpack(root, file, ARCHIVE_ROOT, '.zip') as (unpacked, _):
72 | self._meta = pd.read_csv(unpacked, sep=';')
73 |
74 | super().__init__(root)
75 |
76 | @property
77 | def ids(self):
78 | if self.root.is_dir():
79 | return sorted({x.name for x in self.root.iterdir() if x.name != 'meta.csv'})
80 | else:
81 | with ZipFile(self.root) as zf:
82 | parsed_namelist = [x.strip('/').split('/') for x in zf.namelist()]
83 | return sorted({x[-1] for x in parsed_namelist if len(x) == 2 and x[-1] != 'meta.csv'})
84 |
85 | @field
86 | def image(self, i):
87 | file = f'{i}/ct.nii.gz'
88 |
89 | with suppress(gzip.BadGzipFile):
90 | with unpack(self.root, file, ARCHIVE_ROOT, '.zip') as (unpacked, is_unpacked):
91 | if is_unpacked:
92 | return np.asarray(nibabel.load(unpacked).dataobj)
93 | else:
94 | with open_nii_gz_file(unpacked) as image:
95 | return np.asarray(image.dataobj)
96 |
97 | @field
98 | def affine(self, i):
99 | """The 4x4 matrix that gives the image's spatial orientation"""
100 | file = f'{i}/ct.nii.gz'
101 |
102 | with unpack(self.root, file, ARCHIVE_ROOT, '.zip') as (unpacked, is_unpacked):
103 | if is_unpacked:
104 | return nibabel.load(unpacked).affine
105 | else:
106 | with open_nii_gz_file(unpacked) as image:
107 | return image.affine
108 |
--------------------------------------------------------------------------------
/amid/nlst.py:
--------------------------------------------------------------------------------
1 | import deli
2 | import numpy as np
3 | import pydicom
4 | from dicom_csv import (
5 | Plane,
6 | drop_duplicated_slices,
7 | expand_volumetric,
8 | get_common_tag,
9 | get_orientation_matrix,
10 | get_pixel_spacing,
11 | get_slice_locations,
12 | get_slices_plane,
13 | get_tag,
14 | order_series,
15 | stack_images,
16 | )
17 | from tqdm.auto import tqdm
18 |
19 | from .internals import Dataset, field, licenses, register
20 | from .utils import get_series_date
21 |
22 |
23 | @register(
24 | body_region='Thorax',
25 | license=licenses.CC_BY_30,
26 | link='https://wiki.cancerimagingarchive.net/display/NLST/National+Lung+Screening+Trial',
27 | modality='CT',
28 | prep_data_size=None, # TODO: should be measured...
29 | raw_data_size=None, # TODO: should be measured...
30 | task=None,
31 | )
32 | class NLST(Dataset):
33 | """
34 |
35 | Dataset with low-dose CT scans of 26,254 patients acquired during National Lung Screening Trial.
36 |
37 | Parameters
38 | ----------
39 | root : str, Path, optional
40 | path to the folder (usually called NLST) containing the patient subfolders (like 101426).
41 | If not provided, the cache is assumed to be already populated.
42 |
43 | Notes
44 | -----
45 | Follow the download instructions at
46 | https://wiki.cancerimagingarchive.net/display/NLST/National+Lung+Screening+Trial.
47 | The dicoms should be placed under the following folders' structure:
48 | <...>//////*.dcm
49 |
50 | Examples
51 | --------
52 | >>> ds = NLST(root='/path/to/NLST/')
53 | >>> print(len(ds.ids))
54 | ...
55 | >>> print(ds.image(ds.ids[0]).shape)
56 | ...
57 | >>> print(ds.mask(ds.ids[80]).shape)
58 | ...
59 |
60 | References
61 | ----------
62 | """
63 |
64 | @property
65 | def ids(self):
66 | ids = []
67 | for path in tqdm(list(self.root.iterdir())):
68 | series_uid2num_slices = {p.stem: int(deli.load(p)['Total'][5]) for p in path.glob('*/*/*.json')}
69 | ids.append(max(series_uid2num_slices, key=series_uid2num_slices.get))
70 |
71 | return ids
72 |
73 | def _series(self, i):
74 | (folder,) = self.root.glob(f'**/{i}')
75 | series = list(map(pydicom.dcmread, folder.iterdir()))
76 | series = expand_volumetric(series)
77 | assert get_common_tag(series, 'Modality') == 'CT'
78 | assert get_slices_plane(series) == Plane.Axial
79 | series = drop_duplicated_slices(series)
80 | series = order_series(series, decreasing=False)
81 | return series
82 |
83 | @field
84 | def image(self, i):
85 | return np.moveaxis(stack_images(self._series(i), -1).astype(np.int16), 0, 1)
86 |
87 | @field
88 | def study_uid(self, i):
89 | return get_common_tag(self._series(i), 'StudyInstanceUID')
90 |
91 | @field
92 | def series_uid(self, i):
93 | return get_common_tag(self._series(i), 'SeriesInstanceUID')
94 |
95 | @field
96 | def sop_uids(self, i):
97 | return [str(get_tag(i, 'SOPInstanceUID')) for i in self._series(i)]
98 |
99 | @field
100 | def pixel_spacing(self, i):
101 | return get_pixel_spacing(self, i).tolist()
102 |
103 | @field
104 | def slice_locations(self, i):
105 | return get_slice_locations(self, i)
106 |
107 | @field
108 | def orientation_matrix(self, i):
109 | return get_orientation_matrix(self, i)
110 |
111 | @field
112 | def conv_kernel(self, i):
113 | return get_common_tag(self._series(i), 'ConvolutionKernel', default=None)
114 |
115 | @field
116 | def kvp(self, i):
117 | return get_common_tag(self._series(i), 'KVP', default=None)
118 |
119 | @field
120 | def patient_id(self, i):
121 | return get_common_tag(self._series(i), 'PatientID', default=None)
122 |
123 | @field
124 | def study_date(self, i):
125 | return get_series_date(self._series(i))
126 |
127 | @field
128 | def accession_number(self, i):
129 | return get_common_tag(self._series(i), 'AccessionNumber', default=None)
130 |
--------------------------------------------------------------------------------
/amid/utils.py:
--------------------------------------------------------------------------------
1 | import contextlib
2 | import datetime
3 | import functools
4 | import itertools
5 | import zipfile
6 | from gzip import GzipFile
7 | from os import PathLike
8 | from pathlib import Path
9 | from typing import List, Union
10 |
11 | import nibabel
12 | import numpy as np
13 | from dicom_csv import get_common_tag, order_series, stack_images
14 | from dicom_csv.exceptions import ConsistencyError, TagTypeError
15 | from pydicom import Dataset, dcmread
16 |
17 |
18 | Numeric = Union[float, int]
19 | PathOrStr = Union[str, PathLike]
20 |
21 |
22 | @contextlib.contextmanager
23 | def unpack(root: PathOrStr, relative: str, archive_root_name: str = None, archive_ext: str = None):
24 | """Provides the absolute path to the file in both scenarios: inside archive or inside folder.
25 |
26 | Parameters
27 | ----------
28 | root : str, Path
29 | Absolute path to the downloaded archive or the unpacked archive root.
30 | relative : str, Path
31 | Relative file path inside the archive. Archive's root folder sholud be ommited.
32 | archive_root_name : str, Path, optional
33 | If `root` is a archive, it's root folder name shold be given.
34 | archive_ext: {'.zip'}, optional
35 | Compression algorithm used to create the archive
36 |
37 | Returns
38 | -------
39 | unpacked : Path
40 | Absolute file path to be opened.
41 | is_unpacked : {True, False}
42 | Reached file state. `True` if the file is located inside archive, `False` otherwise.
43 | """
44 | unpacked = Path(root) / relative
45 |
46 | if unpacked.exists():
47 | yield unpacked, True
48 | elif archive_ext == '.zip':
49 | with zipfile.Path(root, str(Path(archive_root_name, relative))).open('rb') as unpacked:
50 | yield unpacked, False
51 | else:
52 | raise ValueError('Unexpected file path or unsupported compression algorithm.')
53 |
54 |
55 | @contextlib.contextmanager
56 | def open_nii_gz_file(unpacked):
57 | """Opens ``.nii.gz`` file if it is packed in archive
58 |
59 | Examples
60 | --------
61 | >>> with unpack('/path/to/archive.zip', 'relative/file/path', 'root', '.zip') as (unpacked, is_unpacked):
62 | >>> with open_nii_gz_file(unpacked) as image:
63 | >>> print(np.asarray(image.dataobj).shape)
64 | # (512, 512, 256)
65 | """
66 | with GzipFile(fileobj=unpacked) as nii:
67 | nii = nibabel.FileHolder(fileobj=nii)
68 | yield nibabel.Nifti1Image.from_file_map({'header': nii, 'image': nii})
69 |
70 |
71 | def get_series_date(series):
72 | try:
73 | study_date = get_common_tag(series, 'StudyDate')
74 | except (TagTypeError, ConsistencyError):
75 | return
76 |
77 | if not isinstance(study_date, str) or not study_date.isnumeric() or len(study_date) != 8:
78 | return
79 |
80 | try:
81 | year = int(study_date[:4])
82 | month = int(study_date[4:6])
83 | day = int(study_date[6:])
84 | except TypeError:
85 | return
86 |
87 | if year < 1972: # the year of creation of the first CT scanner
88 | return
89 |
90 | return datetime.date(year, month, day)
91 |
92 |
93 | def propagate_none(func):
94 | @functools.wraps(func)
95 | def wrapper(x, *args, **kwargs):
96 | return None if (x is None) else func(x, *args, **kwargs)
97 |
98 | return wrapper
99 |
100 |
101 | def deprecate(message=None):
102 | def decorator(func):
103 | return functools.wraps(func)(np.deprecate(message=message)(func))
104 |
105 | return decorator
106 |
107 |
108 | def image_from_dicom_folder(folder: Union[str, Path]) -> np.ndarray:
109 | return stack_images(series_from_dicom_folder(folder))
110 |
111 |
112 | def series_from_dicom_folder(folder: Union[str, Path]) -> List[Dataset]:
113 | return order_series([dcmread(p) for p in Path(folder).glob('*.dcm')])
114 |
115 |
116 | # TODO: stolen from dpipe for now
117 | def mask_to_box(mask: np.ndarray):
118 | """
119 | Find the smallest box that contains all true values of the ``mask``.
120 | """
121 | if not mask.any():
122 | raise ValueError('The mask is empty.')
123 |
124 | start, stop = [], []
125 | for ax in itertools.combinations(range(mask.ndim), mask.ndim - 1):
126 | nonzero = np.any(mask, axis=ax)
127 | if np.any(nonzero):
128 | left, right = np.where(nonzero)[0][[0, -1]]
129 | else:
130 | left, right = 0, 0
131 | start.insert(0, left)
132 | stop.insert(0, right + 1)
133 | return start, stop
134 |
--------------------------------------------------------------------------------
/amid/crlm.py:
--------------------------------------------------------------------------------
1 | from functools import partial
2 | from typing import Dict
3 |
4 | import highdicom
5 | import numpy as np
6 | from dicom_csv import get_orientation_matrix, get_slice_locations, get_voxel_spacing, stack_images
7 | from imops import restore_crop
8 | from more_itertools import locate
9 |
10 | from .internals import Dataset, licenses, register
11 | from .utils import series_from_dicom_folder
12 |
13 |
14 | @register(
15 | body_region='Abdomen',
16 | license=licenses.CC_BY_40,
17 | link='https://wiki.cancerimagingarchive.net/pages/viewpage.action?'
18 | 'pageId=89096268#89096268412b832037484784bd78caf58e052641',
19 | modality=('CT, SEG'),
20 | prep_data_size='11G',
21 | raw_data_size='11G',
22 | task=('Segmentation', 'Classification'),
23 | )
24 | class CRLM(Dataset):
25 | """
26 | Parameters
27 | ----------
28 | root : str, Path, optional
29 | path to the folder containing the raw downloaded archives.
30 | If not provided, the cache is assumed to be already populated.
31 |
32 |
33 | Notes
34 | -----
35 | Download links:
36 | https://wiki.cancerimagingarchive.net/pages/viewpage.action?pageId=89096268#89096268b2cc35fce0664a2b875b5ec675ba9446
37 |
38 | This collection consists of DICOM images and DICOM Segmentation Objects (DSOs)
39 | for 197 patients with Colorectal Liver Metastases (CRLM).
40 | Comprised of Original DICOM CTs and Segmentations for each subject.
41 | The segmentations include 'Liver', 'Liver_Remnant'
42 | (liver that will remain after surgery based on a preoperative CT plan),
43 | 'Hepatic' and 'Portal' veins,
44 | and 'Tumor_x', where 'x' denotes the various tumor occurrences in the case
45 |
46 | Examples
47 | --------
48 | >>> # Place the downloaded archives in any folder and pass the path to the constructor:
49 | >>> ds = CRLM(root='/path/to/archives/root')
50 | >>> print(len(ds.ids))
51 | # 197
52 | >>> print(ds.image(ds.ids[0]).shape)
53 | # (512, 512, 52)
54 |
55 | References
56 | ----------
57 | """
58 |
59 | @property
60 | def ids(self):
61 | return sorted(d.name for d in self.root.iterdir())
62 |
63 | def _folders(self, i):
64 | case = self.root / i
65 | folders = tuple({p.parent for p in case.glob('*/*/*/*.dcm')})
66 | return tuple(sorted(folders, key=lambda f: len(list(f.iterdir()))))
67 |
68 | def _series(self, i):
69 | return series_from_dicom_folder(self._folders(i)[1])
70 |
71 | def image(self, i):
72 | return stack_images(self._series(i))
73 |
74 | def mask(self, i) -> Dict[str, np.ndarray]:
75 | """Returns dict: {'liver': ..., 'hepatic': ..., 'tumor_x': ...}"""
76 | dicom_seg = highdicom.seg.segread(next(self._folders(i)[0].glob('*.dcm')))
77 | series = self._series(i)
78 | image_sops = [s.SOPInstanceUID for s in series]
79 | seg_sops = [sop_uid for _, _, sop_uid in dicom_seg.get_source_image_uids()]
80 |
81 | sops = [sop for sop in image_sops if sop in set(seg_sops).intersection(image_sops)]
82 | seg_box_start = list(locate(image_sops, lambda i: i == sops[0]))[0]
83 | seg_box_stop = list(locate(image_sops, lambda i: i == sops[-1]))[0]
84 |
85 | image = self.image(i)
86 | seg_box = np.asarray(((0, 0, seg_box_start), (*np.atleast_1d(image.shape[:-1]), seg_box_stop + 1)))
87 |
88 | raw_masks = np.swapaxes(
89 | dicom_seg.get_pixels_by_source_instance(
90 | sops,
91 | ignore_spatial_locations=True,
92 | segment_numbers=dicom_seg.get_segment_numbers(),
93 | ),
94 | -1,
95 | 0,
96 | )
97 | masks = list(map(partial(restore_crop, box=seg_box, shape=image.shape), raw_masks))
98 |
99 | liver_mask = {'liver': masks[0].astype(bool)}
100 | # skip liver remnant
101 | veins = {'hepatic': masks[2].astype(bool), 'portal': masks[3].astype(bool)}
102 | tumors = {f'tumor_{i}': array.astype(bool) for i, array in enumerate(masks[4:])}
103 |
104 | return {**liver_mask, **veins, **tumors}
105 |
106 | def spacing(self, i):
107 | """Returns the voxel spacing along axes (x, y, z)."""
108 | return get_voxel_spacing(self._series(i))
109 |
110 | def slice_locations(self, i):
111 | return get_slice_locations(self._series(i))
112 |
113 | def affine(self, i):
114 | """Returns 4x4 matrix that gives the image's spatial orientation."""
115 | return get_orientation_matrix(self._series(i))
116 |
--------------------------------------------------------------------------------
/amid/luna25.py:
--------------------------------------------------------------------------------
1 | from datetime import datetime
2 | from functools import cached_property
3 | from typing import NamedTuple, Sequence
4 |
5 | import numpy as np
6 | import pandas as pd
7 | import SimpleITK as sitk
8 |
9 | from .internals import Dataset, field, licenses, register
10 |
11 |
12 | class LUNA25Nodule(NamedTuple):
13 | coords: Sequence[float]
14 | lesion_id: int
15 | annotation_id: str
16 | nodule_id: str
17 | malignancy: bool
18 | center_voxel: Sequence[float]
19 | bbox: np.ndarray
20 |
21 |
22 | @register(
23 | body_region='Chest',
24 | license=licenses.CC_BY_40,
25 | link='https://luna25.grand-challenge.org/',
26 | modality='CT',
27 | prep_data_size='214G',
28 | raw_data_size='205G',
29 | task='Lung nodule malignancy risk estimation',
30 | )
31 | class LUNA25(Dataset):
32 | """
33 | The LUNA25 Challenge dataset is a comprehensive collection designed to support
34 | the development and validation of AI algorithms for lung nodule malignancy risk
35 | estimation using low-dose chest CT scans. In total, it contains 2120 patients
36 | and 4069 low-dose chest CT scans, with 555 annotated malignant nodules and
37 | 5608 benign nodules (3762 unique nodules, 348 of them are malignant).
38 | The dataset was acquired in participants who enrolled in the
39 | National Lung Cancer Screening Trial (NLST) between 2002 and 2004 in
40 | one of the 33 centers in the United States.
41 |
42 | Parameters
43 | ----------
44 | root : str, Path, optional
45 | path to the folder containing `luna25_images` and `luna25_nodule_blocks` folders and
46 | `LUNA25_Public_Training_Development_Data.csv` file obtained by the instruction at
47 | https://luna25.grand-challenge.org/datasets/.
48 | If not provided, the cache is assumed to be already populated.
49 |
50 | Notes
51 | -----
52 | Join the challenge at https://luna25.grand-challenge.org/.
53 | Then follow the download and extraction instructions at https://luna25.grand-challenge.org/datasets/.
54 | """
55 |
56 | @property
57 | def ids(self):
58 | return [file.name[: -len('.mha')] for file in (self.root / 'luna25_images').iterdir()]
59 |
60 | def _sitk_image(self, i):
61 | return sitk.ReadImage(self.root / f'luna25_images/{i}.mha')
62 |
63 | @field
64 | def image(self, i):
65 | return sitk.GetArrayFromImage(self._sitk_image(i))
66 |
67 | @field
68 | def spacing(self, i):
69 | return self._sitk_image(i).GetSpacing()[::-1]
70 |
71 | @cached_property
72 | def _data(self):
73 | return pd.read_csv(self.root / 'LUNA25_Public_Training_Development_Data.csv')
74 |
75 | def _data_rows(self, i):
76 | return self._data[self._data['SeriesInstanceUID'] == i]
77 |
78 | def _data_column_value(self, i, column_name):
79 | values = self._data_rows(i).get(column_name).unique()
80 | assert len(values) == 1
81 | value = values[0]
82 | assert not pd.isnull(value)
83 | return value
84 |
85 | @field
86 | def patient_id(self, i):
87 | return str(self._data_column_value(i, 'PatientID'))
88 |
89 | @field
90 | def study_date(self, i):
91 | study_date = str(self._data_column_value(i, 'StudyDate'))
92 | return datetime.strptime(study_date, "%Y%m%d").date()
93 |
94 | @field
95 | def age(self, i):
96 | return self._data_column_value(i, 'Age_at_StudyDate')
97 |
98 | @field
99 | def gender(self, i):
100 | return self._data_column_value(i, 'Gender')
101 |
102 | @field
103 | def nodules(self, i):
104 | nodules = []
105 | sitk_image = self._sitk_image(i)
106 | shape = self.image(i).shape
107 | bbox_size = np.array([64, 128, 128]) # all nodule blocks in LUNA25 are of the same size
108 | for row in self._data_rows(i).itertuples():
109 | coords = (row.CoordX, row.CoordY, row.CoordZ)
110 | center_voxel = sitk_image.TransformPhysicalPointToIndex(map(int, coords))[::-1]
111 |
112 | nodule_block_origin = self.get_nodule_block_metadata(row.AnnotationID)['origin'][::-1]
113 | bbox_start_point = sitk_image.TransformPhysicalPointToIndex(map(int, nodule_block_origin))[::-1]
114 | bbox = np.array([bbox_start_point, np.minimum(bbox_start_point + bbox_size, shape)])
115 | nodules.append(
116 | LUNA25Nodule(
117 | coords=coords,
118 | lesion_id=row.LesionID,
119 | annotation_id=str(row.AnnotationID),
120 | nodule_id=str(row.NoduleID),
121 | malignancy=row.label,
122 | center_voxel=center_voxel,
123 | bbox=bbox,
124 | )
125 | )
126 | return nodules
127 |
128 | def get_nodule_block_image(self, annotation_id):
129 | return np.load(self.root / f'luna25_nodule_blocks/image/{annotation_id}.npy')
130 |
131 | def get_nodule_block_metadata(self, annotation_id):
132 | metadata = np.load(self.root / f'luna25_nodule_blocks/metadata/{annotation_id}.npy', allow_pickle=True)
133 | assert metadata.shape == ()
134 | return metadata.item()
135 |
--------------------------------------------------------------------------------
/amid/brats2021.py:
--------------------------------------------------------------------------------
1 | import contextlib
2 | from pathlib import Path
3 | from typing import Union
4 | from zipfile import ZipFile
5 |
6 | import nibabel
7 | import numpy as np
8 | import pandas as pd
9 |
10 | from .internals import Dataset, field, licenses, register
11 | from .utils import open_nii_gz_file, unpack
12 |
13 |
14 | @register(
15 | body_region='Head',
16 | license=licenses.CC_BYNCSA_40,
17 | link='http://www.braintumorsegmentation.org/',
18 | modality=('MRI T1', 'MRI T1Gd', 'MRI T2', 'MRI T2-FLAIR'),
19 | prep_data_size='8,96G',
20 | raw_data_size='15G',
21 | task=('Segmentation', 'Classification', 'Domain Adaptation'),
22 | )
23 | class BraTS2021(Dataset):
24 | """
25 | Parameters
26 | ----------
27 | root : str, Path, optional
28 | path to the folder containing the raw downloaded archives.
29 | If not provided, the cache is assumed to be already populated.
30 |
31 | Notes
32 | -----
33 | Download links:
34 | 2021: http://www.braintumorsegmentation.org/
35 |
36 | Examples
37 | --------
38 | >>> # Place the downloaded archives in any folder and pass the path to the constructor:
39 | >>> ds = BraTS2021(root='/path/to/archives/root')
40 | >>> print(len(ds.ids))
41 | # 5880
42 | >>> print(ds.image(ds.ids[0]).shape)
43 | # (240, 240, 155)
44 |
45 | References
46 | ----------
47 | """
48 |
49 | @property
50 | def ids(self):
51 | return sorted(_get_ids_or_file(self.root, 'TrainingData') + _get_ids_or_file(self.root, 'ValidationData'))
52 |
53 | @field
54 | def fold(self, i) -> str:
55 | return 'ValidationData' if _get_ids_or_file(self.root, 'ValidationData', check_id=i) else 'TrainingData'
56 |
57 | @property
58 | def mapping21_17(self) -> pd.DataFrame:
59 | return pd.read_csv(self.root / 'BraTS21-17_Mapping.csv')
60 |
61 | @field
62 | def subject_id(self, i) -> str:
63 | return i.rsplit('_', 1)[0]
64 |
65 | @field
66 | def modality(self, i) -> str:
67 | return i.rsplit('_', 1)[1]
68 |
69 | @field
70 | def image(self, i) -> np.ndarray:
71 | root, relative = _get_ids_or_file(self.root, self.fold(i), check_id=i, return_image=True)
72 | with _load_nibabel_probably_from_zip(root, relative, '.', '.zip') as nii_image:
73 | return np.asarray(nii_image.dataobj)
74 |
75 | def mask(self, i) -> Union[np.ndarray, None]:
76 | if self.fold(i) == 'ValidationData':
77 | return None
78 | else:
79 | root, relative = _get_ids_or_file(self.root, self.fold(i), check_id=i, return_segm=True)
80 | with _load_nibabel_probably_from_zip(root, relative, '.', '.zip') as nii_image:
81 | return np.asarray(nii_image.dataobj)
82 |
83 | def spacing(self, i):
84 | """Returns the voxel spacing along axes (x, y, z)."""
85 | root, relative = _get_ids_or_file(self.root, self.fold(i), check_id=i, return_image=True)
86 | with _load_nibabel_probably_from_zip(root, relative, '.', '.zip') as nii_image:
87 | return tuple(nii_image.header['pixdim'][1:4])
88 |
89 | @field
90 | def affine(self, i) -> np.ndarray:
91 | """Returns 4x4 matrix that gives the image's spatial orientation."""
92 | root, relative = _get_ids_or_file(self.root, self.fold(i), check_id=i, return_image=True)
93 | with _load_nibabel_probably_from_zip(root, relative, '.', '.zip') as nii_image:
94 | return nii_image.affine
95 |
96 |
97 | def _get_ids_or_file(
98 | base_path,
99 | archive_name_part: str = 'TrainingData',
100 | check_id: str = None,
101 | return_image: bool = False,
102 | return_segm: bool = False,
103 | ):
104 | # TODO: implement the same functionality for folder extraction.
105 | ids = []
106 | for archive in base_path.glob('*.zip'):
107 | if archive_name_part in archive.name:
108 | with ZipFile(archive) as zf:
109 | for zipinfo in zf.infolist():
110 | if not zipinfo.is_dir():
111 | file = Path(zipinfo.filename)
112 | _id = file.stem.replace('.nii', '')
113 |
114 | if 'seg' not in _id:
115 | ids.append(_id)
116 |
117 | if (check_id is not None) and (check_id == _id):
118 | if return_segm:
119 | return str(archive), str(file)[: -len('.nii.gz')].rsplit('_', 1)[0] + '_seg.nii.gz'
120 |
121 | if return_image:
122 | return str(archive), str(file)
123 |
124 | return True # if check_id in archive
125 |
126 | return ids if (check_id is None) else False # if check_id not in archive
127 |
128 |
129 | @contextlib.contextmanager
130 | def _load_nibabel_probably_from_zip(root: str, relative: str, archive_root_name: str = None, archive_ext: str = None):
131 | with unpack(root, relative, archive_root_name, archive_ext) as (unpacked, is_unpacked):
132 | if is_unpacked:
133 | yield nibabel.load(unpacked)
134 | else:
135 | with open_nii_gz_file(unpacked) as nii_image:
136 | yield nii_image
137 |
--------------------------------------------------------------------------------
/amid/egd.py:
--------------------------------------------------------------------------------
1 | import nibabel as nb
2 | import numpy as np
3 | from deli import load
4 |
5 | from .internals import Dataset, field as _field, register
6 |
7 |
8 | @register(
9 | body_region='Head',
10 | license='EGD data license',
11 | link='https://xnat.bmia.nl/data/archive/projects/egd',
12 | modality=('FLAIR', 'MRI T1', 'MRI T1GD', 'MRI T2'),
13 | prep_data_size='107,49G',
14 | raw_data_size='40G',
15 | task='Segmentation',
16 | )
17 | class EGD(Dataset):
18 | """
19 | The Erasmus Glioma Database (EGD): Structural MRI scans, WHO 2016 subtypes,
20 | and segmentations of 774 patients with glioma [1]_.
21 |
22 | Parameters
23 | ----------
24 | root : str, Path, optional
25 | path to the folder containing the raw downloaded archives.
26 | If not provided, the cache is assumed to be already populated.
27 |
28 | Notes
29 | -----
30 | The access to the dataset could be requested at XNAT portal [https://xnat.bmia.nl/data/archive/projects/egd].
31 |
32 | To download the data in the compatible structure we recommend to use
33 | egd-downloader script [https://zenodo.org/record/4761089#.YtZpLtJBxhF].
34 | Please, refer to its README for further information.
35 |
36 | Examples
37 | --------
38 | >>> # Place the downloaded archives in any folder and pass the path to the constructor:
39 | >>> egd = EGD(root='/path/to/downloaded/data/folder/')
40 | >>> print(len(egd.ids))
41 | # 774
42 | >>> print(egd.t1gd(egd.ids[215]).shape)
43 | # (197, 233, 189)
44 | >>> print(egd.manufacturer(egd.ids[444]))
45 | # Philips Medical Systems
46 |
47 | References
48 | ----------
49 | .. [1] van der Voort, Sebastian R., et al. "The Erasmus Glioma Database (EGD): Structural MRI scans,
50 | WHO 2016 subtypes, and segmentations of 774 patients with glioma."
51 | Data in brief 37 (2021): 107191.
52 | https://www.sciencedirect.com/science/article/pii/S2352340921004753
53 |
54 | """
55 |
56 | @property
57 | def ids(self):
58 | result = []
59 | for folder in (self.root / 'SUBJECTS').iterdir():
60 | for suffix in 'FLAIR', 'T1', 'T1GD', 'T2':
61 | result.append(f'{folder.name}-{suffix}')
62 |
63 | return tuple(sorted(result))
64 |
65 | @_field
66 | def brain_mask(self, i) -> np.ndarray:
67 | return nb.load(self.root / 'METADATA' / 'Brain_mask.nii.gz').get_fdata().astype(bool)
68 |
69 | @_field
70 | def deface_mask(self, i) -> np.ndarray:
71 | return nb.load(self.root / 'METADATA' / 'Deface_mask.nii.gz').get_fdata().astype(bool)
72 |
73 | def _image_file(self, i):
74 | i, suffix = i.rsplit('-', 1)
75 | return nb.load(self.root / 'SUBJECTS' / i / f'{suffix}.nii.gz')
76 |
77 | @_field
78 | def modality(self, i) -> str:
79 | _, suffix = i.rsplit('-', 1)
80 | return suffix
81 |
82 | @_field
83 | def subject_id(self, i) -> str:
84 | subject, _ = i.rsplit('-', 1)
85 | return subject
86 |
87 | @_field
88 | def affine(self, i) -> np.ndarray:
89 | return self._image_file(i).affine
90 |
91 | def spacing(self, i):
92 | # voxel spacing is [1, 1, 1] for all images in this dataset...
93 | return tuple(self._image_file(i).header['pixdim'][1:4])
94 |
95 | @_field
96 | def image(self, i) -> np.ndarray:
97 | # intensities are not integer-valued in this dataset...
98 | return np.asarray(self._image_file(i).dataobj)
99 |
100 | def _metadata(self, i):
101 | i, _ = i.rsplit('-', 1)
102 | return load(self.root / 'SUBJECTS' / i / 'metadata.json')
103 |
104 | @_field
105 | def genetic_and_histological_label_idh(self, i) -> str:
106 | return self._metadata(i)['Genetic_and_Histological_labels']['IDH']
107 |
108 | @_field
109 | def genetic_and_histological_label_1p19q(self, i) -> str:
110 | return self._metadata(i)['Genetic_and_Histological_labels']['1p19q']
111 |
112 | @_field
113 | def genetic_and_histological_label_grade(self, i) -> str:
114 | return self._metadata(i)['Genetic_and_Histological_labels']['Grade']
115 |
116 | @_field
117 | def age(self, i) -> float:
118 | return self._metadata(i)['Clinical_data']['Age']
119 |
120 | @_field
121 | def sex(self, i) -> str:
122 | return self._metadata(i)['Clinical_data']['Sex']
123 |
124 | @_field
125 | def observer(self, i) -> str:
126 | return self._metadata(i)['Segmentation_source']['Observer']
127 |
128 | @_field
129 | def original_scan(self, i) -> str:
130 | return self._metadata(i)['Segmentation_source']['Original scan']
131 |
132 | @_field
133 | def manufacturer(self, i) -> str:
134 | return self._metadata(i)['Scan_characteristics']['Manufacturer']
135 |
136 | @_field
137 | def system(self, i) -> str:
138 | return self._metadata(i)['Scan_characteristics']['System']
139 |
140 | @_field
141 | def field(self, i) -> str:
142 | return self._metadata(i)['Scan_characteristics']['Field']
143 |
144 | @_field
145 | def mask(self, i) -> np.ndarray:
146 | i, _ = i.rsplit('-', 1)
147 | return nb.load(self.root / 'SUBJECTS' / i / 'MASK.nii.gz').get_fdata().astype(bool)
148 |
--------------------------------------------------------------------------------
/amid/flare2022.py:
--------------------------------------------------------------------------------
1 | import gzip
2 | import zipfile
3 | from pathlib import Path
4 | from typing import Union
5 | from zipfile import ZipFile
6 |
7 | import nibabel
8 | import numpy as np
9 |
10 | from .internals import Dataset, field, register
11 |
12 |
13 | @register(
14 | body_region='Abdomen',
15 | license=None,
16 | link='https://flare22.grand-challenge.org/',
17 | modality='CT',
18 | prep_data_size='347G',
19 | raw_data_size='247G',
20 | task='Semi-supervised abdominal organ segmentation',
21 | )
22 | class FLARE2022(Dataset):
23 | """
24 | An abdominal organ segmentation dataset for semi-supervised learning [1]_.
25 |
26 | The dataset was used at the MICCAI FLARE 2022 challenge.
27 |
28 | Parameters
29 | ----------
30 | root : str, Path, optional
31 | path to the folder containing the raw downloaded archives.
32 | If not provided, the cache is assumed to be already populated.
33 |
34 | Notes
35 | -----
36 | Download link: https://flare22.grand-challenge.org/Dataset/
37 |
38 | The `root` folder should contain the two downloaded folders, namely: "Training" and "Validation".
39 |
40 | Examples
41 | --------
42 | >>> # Place the downloaded folders in any folder and pass the path to the constructor:
43 | >>> ds = FLARE2022(root='/path/to/downloaded/data/folder/')
44 | >>> print(len(ds.ids))
45 | # 2100
46 | >>> print(ds.image(ds.ids[0]).shape)
47 | # (512, 512, 110)
48 | >>> print(ds.mask(ds.ids[25]).shape)
49 | # (512, 512, 104)
50 |
51 | References
52 | ----------
53 | .. [1] Ma, Jun, et al. "Fast and Low-GPU-memory abdomen CT organ segmentation: The FLARE challenge."
54 | Medical Image Analysis 82 (2022): 102616.
55 | """
56 |
57 | @property
58 | def ids(self):
59 | result = set()
60 |
61 | # 50 Training Labeled cases
62 | archive = self.root / 'Training' / 'FLARE22_LabeledCase50' / 'images.zip'
63 | with ZipFile(archive) as zf:
64 | for file in zf.namelist():
65 | result.add(f"TL{file.split('_')[-2]}")
66 |
67 | # 2000 Training Unlabeled cases
68 | for archive in (self.root / 'Training').glob('*.zip'):
69 | with ZipFile(archive) as zf:
70 | for file in zf.namelist():
71 | if not file.endswith('.nii.gz'):
72 | continue
73 |
74 | file = Path(file)
75 | result.add(f"TU{file.name.split('_')[-2]}")
76 |
77 | # 50 Validation Unlabeled cases
78 | for file in (self.root / 'Validation').glob('*'):
79 | if not file.name.endswith('.nii.gz'):
80 | continue
81 |
82 | result.add(f"VU{file.name.split('_')[-2]}")
83 |
84 | return sorted(result)
85 |
86 | def _file(self, i):
87 | # 50 Training Labeled cases
88 | if i.startswith('TL'):
89 | archive = self.root / 'Training' / 'FLARE22_LabeledCase50' / 'images.zip'
90 | with ZipFile(archive) as zf:
91 | for file in zf.namelist():
92 | if i[2:] in file:
93 | return zipfile.Path(archive, file)
94 |
95 | # 2000 Training Unlabeled cases
96 | for archive in (self.root / 'Training').glob('*.zip'):
97 | with ZipFile(archive) as zf:
98 | for file in zf.namelist():
99 | if i[2:] in file:
100 | return zipfile.Path(archive, file)
101 |
102 | # 50 Validation Unlabeled cases
103 | if i.startswith('VU'):
104 | file = self.root / 'Validation' / f'FLARETs_{i[2:]}_0000.nii.gz'
105 | return file
106 |
107 | raise ValueError(f'Id "{i}" not found')
108 |
109 | @field
110 | def image(self, i) -> np.ndarray:
111 | with self._file(i).open('rb') as opened:
112 | with gzip.GzipFile(fileobj=opened) as nii:
113 | nii = nibabel.FileHolder(fileobj=nii)
114 | image = nibabel.Nifti1Image.from_file_map({'header': nii, 'image': nii})
115 | return np.asarray(image.dataobj)
116 |
117 | @field
118 | def affine(self, i) -> np.ndarray:
119 | """The 4x4 matrix that gives the image's spatial orientation"""
120 | with self._file(i).open('rb') as opened:
121 | with gzip.GzipFile(fileobj=opened) as nii:
122 | nii = nibabel.FileHolder(fileobj=nii)
123 | image = nibabel.Nifti1Image.from_file_map({'header': nii, 'image': nii})
124 | return image.affine
125 |
126 | @field
127 | def mask(self, i) -> Union[np.ndarray, None]:
128 | if not i.startswith('TL'):
129 | return None
130 |
131 | archive = self.root / 'Training' / 'FLARE22_LabeledCase50' / 'labels.zip'
132 | with ZipFile(archive) as zf:
133 | for file in zf.namelist():
134 | if i[2:] in file:
135 | with zipfile.Path(archive, file).open('rb') as opened:
136 | with gzip.GzipFile(fileobj=opened) as nii:
137 | nii = nibabel.FileHolder(fileobj=nii)
138 | mask = nibabel.Nifti1Image.from_file_map({'header': nii, 'image': nii})
139 | return np.asarray(mask.dataobj)
140 |
--------------------------------------------------------------------------------
/amid/crossmoda.py:
--------------------------------------------------------------------------------
1 | import contextlib
2 | import gzip
3 | import zipfile
4 | from pathlib import Path
5 | from typing import Union
6 | from zipfile import ZipFile
7 |
8 | import nibabel as nb
9 | import numpy as np
10 | import pandas as pd
11 |
12 | from .internals import Dataset, licenses, register
13 |
14 |
15 | @register(
16 | body_region='Head',
17 | license=licenses.CC_BYNCSA_40,
18 | link='https://zenodo.org/record/6504722#.YsgwnNJByV4',
19 | modality=('MRI T1c', 'MRI T2hr'),
20 | prep_data_size='8,96G',
21 | raw_data_size='17G',
22 | task=('Segmentation', 'Classification', 'Domain Adaptation'),
23 | )
24 | class CrossMoDA(Dataset):
25 | """
26 | Parameters
27 | ----------
28 | root : str, Path, optional
29 | path to the folder containing the raw downloaded archives.
30 | If not provided, the cache is assumed to be already populated.
31 |
32 | Notes
33 | -----
34 | Download links:
35 | 2021 & 2022: https://zenodo.org/record/6504722#.YsgwnNJByV4
36 |
37 | Examples
38 | --------
39 | >>> # Place the downloaded archives in any folder and pass the path to the constructor:
40 | >>> ds = CrossMoDA(root='/path/to/archives/root')
41 | >>> print(len(ds.ids))
42 | # 484
43 | >>> print(ds.image(ds.ids[0]).shape)
44 | # (512, 512, 214)
45 |
46 | References
47 | ----------
48 | """
49 |
50 | @property
51 | def ids(self):
52 | result = set()
53 | for archive in self.root.glob('*.zip'):
54 | with ZipFile(archive) as zf:
55 | for zipinfo in zf.infolist():
56 | if zipinfo.is_dir():
57 | continue
58 |
59 | file = Path(zipinfo.filename)
60 | assert file.stem not in result, file.stem
61 |
62 | if 'Label' not in file.stem and file.suffix == '.gz':
63 | result.add('_'.join(file.stem.split('_')[:-1]))
64 | else:
65 | continue
66 |
67 | return sorted(result)
68 |
69 | @property
70 | def train_source_df(self):
71 | return pd.read_csv(self.root / 'infos_source_training.csv', index_col='crossmoda_name')
72 |
73 | def _file(self, i):
74 | for archive in self.root.glob('*.zip'):
75 | with ZipFile(archive) as zf:
76 | for zipinfo in zf.infolist():
77 | if i == '_'.join(Path(zipinfo.filename).stem.split('_')[:-1]) and 'Label' not in zipinfo.filename:
78 | return zipfile.Path(archive, zipinfo.filename)
79 |
80 | raise ValueError(f'Id "{i}" not found')
81 |
82 | def image(self, i) -> Union[np.ndarray, None]:
83 | with open_nii_gz_file(self._file(i)) as nii_image:
84 | return np.asarray(nii_image.dataobj)
85 |
86 | def spacing(self, i):
87 | """Returns pixel spacing along axes (x, y, z)"""
88 | with open_nii_gz_file(self._file(i)) as nii_image:
89 | return tuple(nii_image.header['pixdim'][1:4])
90 |
91 | def affine(self, i):
92 | """The 4x4 matrix that gives the image's spatial orientation"""
93 | with open_nii_gz_file(self._file(i)) as nii_image:
94 | return nii_image.affine
95 |
96 | def split(self, i) -> str:
97 | """The split in which this entry is contained: training_source, training_target, validation"""
98 | file = self._file(i)
99 | idx = int(file.name.split('_')[2])
100 | dataset = file.name.split('_')[1]
101 |
102 | if dataset == 'ldn':
103 | if 1 <= idx < 106:
104 | return 'training_source'
105 | elif 106 <= idx < 211:
106 | return 'training_target'
107 | elif 211 <= idx < 243:
108 | return 'validation'
109 |
110 | elif dataset == 'etz':
111 | if 0 <= idx < 105:
112 | return 'training_source'
113 | elif 105 <= idx < 210:
114 | return 'training_target'
115 | elif 210 <= idx < 242:
116 | return 'validation'
117 |
118 | raise ValueError(f'Cannot find split for the file: {file}')
119 |
120 | def year(self, i) -> int:
121 | """The year in which this entry was published: 2021 or 2022"""
122 | return int(self._file(i).name[9:13])
123 |
124 | def masks(self, i):
125 | """Combined mask of schwannoma and cochlea (1 and 2 respectively)"""
126 | file = self._file(i)
127 | if 'T2' not in file.name:
128 | with open_nii_gz_file(file.parent / file.name.replace('ceT1', 'Label')) as nii_image:
129 | return nii_image.get_fdata().astype(np.uint8)
130 |
131 | def koos_grade(self, i):
132 | """VS Tumour characteristic according to Koos grading scale: [1..4] or (-1 - post operative)"""
133 | if self.split(i) == 'training_source':
134 | grade = self.train_source_df.loc[i, 'koos']
135 | return -1 if (grade == 'post-operative-london') else int(grade)
136 |
137 |
138 | # TODO: sync with amid.utils
139 | @contextlib.contextmanager
140 | def open_nii_gz_file(file):
141 | with file.open('rb') as opened:
142 | with gzip.GzipFile(fileobj=opened) as nii:
143 | nii = nb.FileHolder(fileobj=nii)
144 | yield nb.Nifti1Image.from_file_map({'header': nii, 'image': nii})
145 |
--------------------------------------------------------------------------------
/docs/recipes/RSNABreastCancer.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "source": [
6 | "## Normalization"
7 | ],
8 | "metadata": {
9 | "collapsed": false
10 | }
11 | },
12 | {
13 | "cell_type": "code",
14 | "execution_count": null,
15 | "outputs": [],
16 | "source": [
17 | "from connectome import Transform\n",
18 | "\n",
19 | "\n",
20 | "class Normalize(Transform):\n",
21 | " __inherit__ = True\n",
22 | "\n",
23 | " def image(image, padding_value, intensity_sign):\n",
24 | " if padding_value is not None:\n",
25 | " if padding_value > 0:\n",
26 | " return padding_value - image\n",
27 | " return image\n",
28 | "\n",
29 | " if intensity_sign == 1:\n",
30 | " return image.max() - image\n",
31 | "\n",
32 | " return image"
33 | ],
34 | "metadata": {
35 | "collapsed": false
36 | }
37 | },
38 | {
39 | "cell_type": "markdown",
40 | "source": [
41 | "## Zoom to reduce image size"
42 | ],
43 | "metadata": {
44 | "collapsed": false
45 | }
46 | },
47 | {
48 | "cell_type": "code",
49 | "execution_count": null,
50 | "outputs": [],
51 | "source": [
52 | "from connectome import Apply\n",
53 | "from scipy.ndimage import zoom\n",
54 | "\n",
55 | "# 0.25 - is the downsample factor. It should probably be tuned via cross-validation\n",
56 | "Zoom = Apply(image=lambda x: zoom(np.float32(x), 0.25, order=1))"
57 | ],
58 | "metadata": {
59 | "collapsed": false
60 | }
61 | },
62 | {
63 | "cell_type": "markdown",
64 | "source": [
65 | "## Artifacts and background removal"
66 | ],
67 | "metadata": {
68 | "collapsed": false
69 | }
70 | },
71 | {
72 | "cell_type": "code",
73 | "execution_count": null,
74 | "outputs": [],
75 | "source": [
76 | "from connectome import Transform\n",
77 | "from skimage.morphology import label\n",
78 | "\n",
79 | "\n",
80 | "class GreatestComponent(Transform):\n",
81 | " __inherit__ = True\n",
82 | "\n",
83 | " def image(image):\n",
84 | " lbl = label(image > 0)\n",
85 | " values, counts = np.unique(lbl, return_counts=True)\n",
86 | " foreground = values != 0\n",
87 | " component = values[foreground][counts[foreground].argmax()]\n",
88 | " # select all the components greater than the background\n",
89 | " # + the greatest foreground component\n",
90 | " components = set(values[counts > counts[~foreground]]) | {component}\n",
91 | " if len(components) > 1:\n",
92 | " # if there are several components - pick the one with the greatest intensity\n",
93 | " component = max(components, key=lambda c: image[lbl == c].mean())\n",
94 | "\n",
95 | " return image * (lbl == component)\n",
96 | "\n",
97 | "\n",
98 | "class CropBackground(Transform):\n",
99 | " __inherit__ = True\n",
100 | "\n",
101 | " def image(image):\n",
102 | " mask = image > 0\n",
103 | " xs, = mask.any(0).nonzero()\n",
104 | " ys, = mask.any(1).nonzero()\n",
105 | " return image[ys.min():ys.max() + 1, xs.min():xs.max() + 1]"
106 | ],
107 | "metadata": {
108 | "collapsed": false
109 | }
110 | },
111 | {
112 | "cell_type": "markdown",
113 | "source": [
114 | "## Data augmentation"
115 | ],
116 | "metadata": {
117 | "collapsed": false
118 | }
119 | },
120 | {
121 | "cell_type": "code",
122 | "execution_count": null,
123 | "outputs": [],
124 | "source": [
125 | "from connectome import Transform, impure\n",
126 | "import numpy as np\n",
127 | "\n",
128 | "\n",
129 | "class RandomFlip(Transform):\n",
130 | " __inherit__ = True\n",
131 | "\n",
132 | " @impure\n",
133 | " def _flip():\n",
134 | " return np.random.binomial(1, 0.5)\n",
135 | "\n",
136 | " def image(image, _flip):\n",
137 | " if _flip:\n",
138 | " return np.flip(image, axis=1)\n",
139 | " return image"
140 | ],
141 | "metadata": {
142 | "collapsed": false
143 | }
144 | },
145 | {
146 | "cell_type": "markdown",
147 | "source": [
148 | "## Combining it all together"
149 | ],
150 | "metadata": {
151 | "collapsed": false
152 | }
153 | },
154 | {
155 | "cell_type": "code",
156 | "execution_count": null,
157 | "outputs": [],
158 | "source": [
159 | "from amid.rsna_bc import RSNABreastCancer\n",
160 | "from connectome import Chain\n",
161 | "\n",
162 | "ds = Chain(\n",
163 | " RSNABreastCancer('/path/to/downloaded/folder'),\n",
164 | " Normalize(),\n",
165 | " Apply(image=lambda x: zoom(np.float32(x), 0.25, order=1)),\n",
166 | " GreatestComponent(),\n",
167 | " CropBackground(),\n",
168 | "\n",
169 | " # aug\n",
170 | " RandomFlip(),\n",
171 | ")"
172 | ],
173 | "metadata": {
174 | "collapsed": false
175 | }
176 | }
177 | ],
178 | "metadata": {
179 | "kernelspec": {
180 | "display_name": "Python 3",
181 | "language": "python",
182 | "name": "python3"
183 | },
184 | "language_info": {
185 | "codemirror_mode": {
186 | "name": "ipython",
187 | "version": 2
188 | },
189 | "file_extension": ".py",
190 | "mimetype": "text/x-python",
191 | "name": "python",
192 | "nbconvert_exporter": "python",
193 | "pygments_lexer": "ipython2",
194 | "version": "2.7.6"
195 | }
196 | },
197 | "nbformat": 4,
198 | "nbformat_minor": 0
199 | }
200 |
--------------------------------------------------------------------------------
/amid/ct_ich.py:
--------------------------------------------------------------------------------
1 | import nibabel as nb
2 | import numpy as np
3 | import pandas as pd
4 |
5 | from .internals import Dataset, field, licenses, register
6 |
7 |
8 | @register(
9 | body_region='Head',
10 | license=licenses.PhysioNet_RHD_150,
11 | link='https://physionet.org/content/ct-ich/1.3.1/',
12 | modality='CT',
13 | prep_data_size='661M',
14 | raw_data_size='2,8G',
15 | task='Intracranial hemorrhage segmentation',
16 | )
17 | class CT_ICH(Dataset):
18 | """
19 | (C)omputed (T)omography Images for (I)ntracranial (H)emorrhage Detection and (S)egmentation.
20 |
21 | This dataset contains 75 head CT scans including 36 scans for patients diagnosed with
22 | intracranial hemorrhage with the following types:
23 | Intraventricular, Intraparenchymal, Subarachnoid, Epidural and Subdural.
24 |
25 | Parameters
26 | ----------
27 | root : str, Path, optional
28 | path to the folder containing the raw downloaded archives.
29 | If not provided, the cache is assumed to be already populated.
30 |
31 | Notes
32 | -----
33 | Data can be downloaded here: https://physionet.org/content/ct-ich/1.3.1/.
34 | Then, the folder with raw downloaded data should contain folders `ct_scans` and `masks` along with other files.
35 |
36 | Examples
37 | --------
38 | >>> # Place the downloaded archives in any folder and pass the path to the constructor:
39 | >>> ds = CT_ICH(root='/path/to/downloaded/data/folder/')
40 | >>> print(len(ds.ids))
41 | # 75
42 | >>> print(ds.image(ds.ids[0]).shape)
43 | # (512, 512, 39)
44 | >>> print(ds.mask(ds.ids[0]).shape)
45 | # (512, 512, 39)
46 | """
47 |
48 | @property
49 | def ids(self):
50 | result = [f'ct_ich_{uid:0=3d}' for uid in [*range(49, 59), *range(66, 131)]]
51 | return tuple(sorted(result))
52 |
53 | def _image_file(self, i):
54 | num_id = i.split('_')[-1]
55 | return nb.load(self.root / 'ct_scans' / f'{num_id}.nii')
56 |
57 | @field
58 | def image(self, i) -> np.ndarray:
59 | # most CT/MRI scans are integer-valued, this will help us improve compression rates
60 | return np.int16(self._image_file(i).get_fdata())
61 |
62 | @field
63 | def mask(self, i) -> np.ndarray:
64 | num_id = i.split('_')[-1]
65 | mask_path = self.root / 'masks' / f'{num_id}.nii'
66 | ct_scan_nifti = nb.load(mask_path)
67 | return ct_scan_nifti.get_fdata().astype(bool)
68 |
69 | @field
70 | def affine(self, i) -> np.ndarray:
71 | """The 4x4 matrix that gives the image's spatial orientation."""
72 | return self._image_file(i).affine
73 |
74 | def spacing(self, i):
75 | """Returns voxel spacing along axes (x, y, z)."""
76 | return tuple(self._image_file(i).header['pixdim'][1:4])
77 |
78 | @property
79 | def _patient_metadata(self):
80 | return pd.read_csv(self.root / 'Patient_demographics.csv', index_col='Patient Number')
81 |
82 | @property
83 | def _diagnosis_metadata(self):
84 | return pd.read_csv(self.root / 'hemorrhage_diagnosis_raw_ct.csv')
85 |
86 | def _row(self, i):
87 | patient_id = int(i.split('_')[-1])
88 | return self._patient_metadata.loc[patient_id]
89 |
90 | @field
91 | def age(self, i) -> float:
92 | return self._row(i)['Age\n(years)']
93 |
94 | @field
95 | def sex(self, i) -> str:
96 | return self._row(i)['Gender']
97 |
98 | @field
99 | def intraventricular_hemorrhage(self, i) -> bool:
100 | """Returns True if hemorrhage exists and its type is intraventricular."""
101 | num_id = int(i.split('_')[-1])
102 | return str(self._patient_metadata['Hemorrhage type based on the radiologists diagnosis '].loc[num_id]) != 'nan'
103 |
104 | @field
105 | def intraparenchymal_hemorrhage(self, i) -> bool:
106 | """Returns True if hemorrhage was diagnosed and its type is intraparenchymal."""
107 | num_id = int(i.split('_')[-1])
108 | return str(self._patient_metadata['Unnamed: 4'].loc[num_id]) != 'nan'
109 |
110 | @field
111 | def subarachnoid_hemorrhage(self, i) -> bool:
112 | """Returns True if hemorrhage was diagnosed and its type is subarachnoid."""
113 | num_id = int(i.split('_')[-1])
114 | return str(self._patient_metadata['Unnamed: 5'].loc[num_id]) != 'nan'
115 |
116 | @field
117 | def epidural_hemorrhage(self, i) -> bool:
118 | """Returns True if hemorrhage was diagnosed and its type is epidural."""
119 | num_id = int(i.split('_')[-1])
120 | return str(self._patient_metadata['Unnamed: 6'].loc[num_id]) != 'nan'
121 |
122 | @field
123 | def subdural_hemorrhage(self, i) -> bool:
124 | """Returns True if hemorrhage was diagnosed and its type is subdural."""
125 | num_id = int(i.split('_')[-1])
126 | return str(self._patient_metadata['Unnamed: 7'].loc[num_id]) != 'nan'
127 |
128 | @field
129 | def fracture(self, i) -> bool:
130 | """Returns True if skull fracture was diagnosed."""
131 | num_id = int(i.split('_')[-1])
132 | return str(self._patient_metadata['Fracture (yes 1/no 0)'].loc[num_id]) != 'nan'
133 |
134 | @field
135 | def notes(self, i) -> str:
136 | """Returns special notes if they exist."""
137 | num_id = int(i.split('_')[-1])
138 | result = str(self._patient_metadata['Note1'].loc[num_id])
139 | return result if result != 'nan' else None
140 |
141 | @field
142 | def hemorrhage_diagnosis_raw_metadata(self, i):
143 | num_id = int(i.split('_')[-1])
144 | return self._diagnosis_metadata[self._diagnosis_metadata['PatientNumber'] == num_id]
145 |
--------------------------------------------------------------------------------
/amid/verse.py:
--------------------------------------------------------------------------------
1 | import gzip
2 | import json
3 | import zipfile
4 | from pathlib import Path
5 | from typing import Dict, Tuple, Union
6 | from zipfile import ZipFile
7 |
8 | import nibabel
9 | import numpy as np
10 |
11 | from .internals import Dataset, field, licenses, register
12 |
13 |
14 | @register(
15 | body_region=('Thorax', 'Abdomen'),
16 | modality='CT',
17 | task='Vertebrae Segmentation',
18 | link='https://osf.io/4skx2/',
19 | raw_data_size='97G',
20 | license=licenses.CC_BYSA_40,
21 | )
22 | class VerSe(Dataset):
23 | """
24 | A Vertebral Segmentation Dataset with Fracture Grading [1]_
25 |
26 | The dataset was used in the MICCAI-2019 and MICCAI-2020 Vertebrae Segmentation Challenges.
27 |
28 | Parameters
29 | ----------
30 | root : str, Path, optional
31 | path to the folder containing the raw downloaded archives.
32 | If not provided, the cache is assumed to be already populated.
33 |
34 | Notes
35 | -----
36 | Download links:
37 | 2019: https://osf.io/jtfa5/
38 | 2020: https://osf.io/4skx2/
39 |
40 | Examples
41 | --------
42 | >>> # Place the downloaded archives in any folder and pass the path to the constructor:
43 | >>> ds = VerSe(root='/path/to/archives/root')
44 | >>> print(len(ds.ids))
45 | # 374
46 | >>> print(ds.image(ds.ids[0]).shape)
47 | # (512, 512, 214)
48 |
49 | References
50 | ----------
51 | .. [1] Löffler MT, Sekuboyina A, Jacob A, et al. A Vertebral Segmentation Dataset with Fracture Grading.
52 | Radiol Artif Intell. 2020;2(4):e190138. Published 2020 Jul 29. doi:10.1148/ryai.2020190138
53 | """
54 |
55 | @property
56 | def ids(self):
57 | result = set()
58 | for archive in self.root.glob('*.zip'):
59 | with ZipFile(archive) as zf:
60 | for file in zf.namelist():
61 | if '/rawdata/' not in file:
62 | continue
63 |
64 | file = Path(file)
65 | patient = file.parent.name[4:]
66 | name = file.name
67 | if 'split' in name:
68 | i = name.split('split')[1][1:]
69 | i = i.split('_')[0]
70 | else:
71 | i = patient
72 |
73 | assert i not in result, i
74 | result.add(i)
75 |
76 | return sorted(result)
77 |
78 | def _file(self, i):
79 | for archive in self.root.glob('*.zip'):
80 | with ZipFile(archive) as zf:
81 | for file in zf.namelist():
82 | if '/rawdata/' in file and i in file:
83 | return zipfile.Path(archive, file)
84 |
85 | raise ValueError(f'Id "{i}" not found')
86 |
87 | @field
88 | def image(self, i) -> np.ndarray:
89 | with self._file(i).open('rb') as opened:
90 | with gzip.GzipFile(fileobj=opened) as nii:
91 | nii = nibabel.FileHolder(fileobj=nii)
92 | image = nibabel.Nifti1Image.from_file_map({'header': nii, 'image': nii})
93 | # most ct scans are integer-valued, this will help us improve compression rates
94 | # (instead of using `image.get_fdata()`)
95 | return np.asarray(image.dataobj)
96 |
97 | @field
98 | def affine(self, i) -> np.ndarray:
99 | """The 4x4 matrix that gives the image's spatial orientation"""
100 | with self._file(i).open('rb') as opened:
101 | with gzip.GzipFile(fileobj=opened) as nii:
102 | nii = nibabel.FileHolder(fileobj=nii)
103 | image = nibabel.Nifti1Image.from_file_map({'header': nii, 'image': nii})
104 | return image.affine
105 |
106 | @field
107 | def split(self, i) -> str:
108 | """The split in which this entry is contained: training, validate, test"""
109 | # it's ugly, but it gets the job done (;
110 | return self._file(i).parent.parent.parent.name.split('_')[-1].split('9')[-1]
111 |
112 | @field
113 | def patient(self, i) -> str:
114 | """The unique patient id"""
115 | return self._file(i).parent.name[4:]
116 |
117 | @field
118 | def year(self, i) -> int:
119 | """The year in which this entry was published: 2019, 2020"""
120 | year = self._file(i).parent.parent.parent.name
121 | if year.startswith('dataset-verse'):
122 | assert '19' in year
123 | return 2019
124 | return 2020
125 |
126 | def _derivatives(self, i):
127 | file = self._file(i)
128 | return file.parent.parent.parent / 'derivatives' / file.parent.name
129 |
130 | @field
131 | def centers(self, i) -> Dict[str, Tuple[int, int, int]]:
132 | """Vertebrae centers in format {label: [x, y, z]}"""
133 | ann = [f for f in self._derivatives(i).iterdir() if f.name.endswith('.json') and i in f.name]
134 | if not ann:
135 | return {}
136 | assert len(ann) == 1
137 | (ann,) = ann
138 |
139 | with ann.open() as file:
140 | ann = json.load(file)
141 |
142 | return {k['label']: (k['X'], k['Y'], k['Z']) for k in ann[1:]}
143 |
144 | @field
145 | def masks(self, i) -> Union[np.ndarray, None]:
146 | """Vertebrae masks"""
147 | ann = [f for f in self._derivatives(i).iterdir() if f.name.endswith('.nii.gz') and i in f.name]
148 | if not ann:
149 | return
150 | assert len(ann) == 1
151 | (ann,) = ann
152 |
153 | with ann.open('rb') as opened:
154 | with gzip.GzipFile(fileobj=opened) as nii:
155 | nii = nibabel.FileHolder(fileobj=nii)
156 | mask = nibabel.Nifti1Image.from_file_map({'header': nii, 'image': nii})
157 | return mask.get_fdata().astype(np.uint8)
158 |
--------------------------------------------------------------------------------
/amid/mood.py:
--------------------------------------------------------------------------------
1 | import contextlib
2 | import gzip
3 | import zipfile
4 | from pathlib import Path
5 | from zipfile import ZipFile
6 |
7 | import nibabel as nb
8 | import numpy as np
9 |
10 | from .internals import Dataset, field, register
11 |
12 |
13 | @register(
14 | body_region=('Head', 'Abdominal'),
15 | license=None, # FIXME: inherit licenses from the original datasets...
16 | link='http://medicalood.dkfz.de/web/',
17 | modality=('MRI', 'CT'),
18 | prep_data_size='405G',
19 | raw_data_size='120G',
20 | task='Out-of-distribution detection',
21 | )
22 | class MOOD(Dataset):
23 | """
24 | A (M)edival (O)ut-(O)f-(D)istribution analysis challenge [1]_
25 |
26 | This dataset contains raw brain MRI and abdominal CT images.
27 |
28 | Number of training samples:
29 | - Brain: 800 scans ( 256 x 256 x 256 )
30 | - Abdominal: 550 scans ( 512 x 512 x 512 )
31 |
32 | For each setup there are 4 toy test samples with OOD cases.
33 |
34 | Parameters
35 | ----------
36 | root : str, Path, optional
37 | path to the folder containing the raw downloaded archives.
38 | If not provided, the cache is assumed to be already populated.
39 |
40 | Notes
41 | -----
42 | Follow the download instructions at https://www.synapse.org/#!Synapse:syn21343101/wiki/599515.
43 |
44 | Then, the folder with raw downloaded data should contain four zip archives with data
45 | (`abdom_toy.zip`, `abdom_train.zip`, `brain_toy.zip` and `brain_train.zip`).
46 |
47 | Examples
48 | --------
49 | >>> # Place the downloaded archives in any folder and pass the path to the constructor:
50 | >>> ds = MOOD(root='/path/to/downloaded/data/folder/')
51 | >>> print(len(ds.ids))
52 | # 1358
53 | >>> print(ds.image(ds.ids[0]).shape)
54 | # (512, 512, 512)
55 | >>> print(ds.pixel_label(ds.ids[0]).shape)
56 | # (512, 512, 512)
57 |
58 | References
59 | ----------
60 | .. [1] Zimmerer, Petersen, et al. "Medical Out-of-Distribution Analysis Challenge 2022."
61 | doi: 10.5281/zenodo.6362313 (2022).
62 | """
63 |
64 | @property
65 | def ids(self):
66 | result = set()
67 | # zip archives for train images:
68 | for archive in self.root.glob('*.zip'):
69 | if 'brain' in str(archive): # define whether it is brain (MRI) or abdominal (CT)
70 | task = 'brain'
71 | else:
72 | task = 'abdom'
73 |
74 | if 'toy' in str(archive): # fold - train or toy test
75 | fold = 'toy'
76 | else:
77 | fold = 'train'
78 |
79 | with ZipFile(archive) as zf:
80 | for zipinfo in zf.infolist():
81 | if zipinfo.is_dir():
82 | continue
83 |
84 | file_stem = Path(zipinfo.filename).stem
85 | if '.nii' in file_stem:
86 | if fold == 'train':
87 | result.add(f'mood_{task}_{fold}_{file_stem.split(".nii")[0]}')
88 | # fold == 'toy'
89 | else:
90 | result.add(f'mood_{task}_{file_stem.split(".nii")[0]}')
91 |
92 | return tuple(sorted(result))
93 |
94 | @field
95 | def fold(self, i):
96 | """Returns fold: train or toy (test)."""
97 | if 'train' in i:
98 | return 'train'
99 | # if 'toy' in i:
100 | return 'toy'
101 |
102 | @field
103 | def task(self, i):
104 | """Returns task: brain (MRI) or abdominal (CT)."""
105 | if 'brain' in i:
106 | return 'brain'
107 | # if 'abdom' in i:
108 | return 'abdom'
109 |
110 | def _file(self, i):
111 | task, fold, num_id = i.split('_')[-3:]
112 | if fold == 'train':
113 | return zipfile.Path(self.root / f'{task}_{fold}.zip', f'{task}_{fold}/{num_id}.nii.gz')
114 | return zipfile.Path(self.root / f'{task}_{fold}.zip', f'toy/toy_{num_id}.nii.gz')
115 |
116 | @field
117 | def image(self, i):
118 | with open_nii_gz_file(self._file(i)) as nii_image:
119 | return np.asarray(nii_image.dataobj)
120 |
121 | @field
122 | def affine(self, i):
123 | """The 4x4 matrix that gives the image's spatial orientation."""
124 | with open_nii_gz_file(self._file(i)) as nii_image:
125 | return nii_image.affine
126 |
127 | def spacing(self, i):
128 | """Returns voxel spacing along axes (x, y, z)."""
129 | with open_nii_gz_file(self._file(i)) as nii_image:
130 | return tuple(nii_image.header['pixdim'][1:4])
131 |
132 | @field
133 | def sample_label(self, i):
134 | """
135 | Returns sample-level OOD score for toy examples and None otherwise.
136 | 0 indicates no abnormality and 1 indicates abnormal input.
137 | """
138 | file = self._file(i)
139 | if 'toy' in file.name:
140 | with (file.parent.parent / 'toy_label/sample' / f'{file.name}.txt').open('r') as nii:
141 | return int(nii.read())
142 |
143 | @field
144 | def pixel_label(self, i):
145 | """
146 | Returns voxel-level OOD scores for toy examples and None otherwise.
147 | 0 indicates no abnormality and 1 indicates abnormal input.
148 | """
149 | file = self._file(i)
150 | if 'toy' in file.name:
151 | with open_nii_gz_file(file.parent.parent / 'toy_label/pixel' / file.name) as nii_image:
152 | return np.bool_(nii_image.get_fdata())
153 |
154 |
155 | # TODO: sync with amid.utils
156 | @contextlib.contextmanager
157 | def open_nii_gz_file(file):
158 | with file.open('rb') as opened:
159 | with gzip.GzipFile(fileobj=opened) as nii:
160 | nii = nb.FileHolder(fileobj=nii)
161 | yield nb.Nifti1Image.from_file_map({'header': nii, 'image': nii})
162 |
--------------------------------------------------------------------------------
/amid/cancer_500/dataset.py:
--------------------------------------------------------------------------------
1 | import codecs
2 | import json
3 | import warnings
4 | from functools import cached_property
5 | from pathlib import Path
6 |
7 | import numpy as np
8 | import pydicom
9 | from dicom_csv import (
10 | get_common_tag,
11 | get_orientation_matrix,
12 | get_pixel_spacing,
13 | get_slice_locations,
14 | get_tag,
15 | order_series,
16 | stack_images,
17 | )
18 | from dicom_csv.exceptions import TagMissingError
19 | from tqdm.auto import tqdm
20 |
21 | from ..internals import Dataset, field, register
22 | from ..utils import get_series_date
23 | from .nodules import get_nodules
24 |
25 |
26 | @register(
27 | body_region='Thorax',
28 | modality='CT',
29 | task='Lung Cancer Detection',
30 | link='https://mosmed.ai/en/datasets/mosmeddata-kt-s-priznakami-raka-legkogo-tip-viii/',
31 | prep_data_size='103G',
32 | raw_data_size='187G',
33 | )
34 | class MoscowCancer500(Dataset):
35 | """
36 | The Moscow Radiology Cancer-500 dataset.
37 |
38 | Parameters
39 | ----------
40 | root : str, Path, optional
41 | path to the folder containing the raw downloaded files.
42 | If not provided, the cache is assumed to be already populated.
43 |
44 |
45 | Notes
46 | -----
47 | Download links:
48 | https://mosmed.ai/en/datasets/mosmeddata-kt-s-priznakami-raka-legkogo-tip-viii/
49 | After pressing the `download` button you will have to provide an email address to which further instructions
50 | will be sent.
51 |
52 | Examples
53 | --------
54 | >>> # Place the downloaded files in any folder and pass the path to the constructor:
55 | >>> ds = MoscowCancer500(root='/path/to/files/root')
56 | >>> print(len(ds.ids))
57 | # 979
58 | >>> print(ds.image(ds.ids[0]).shape)
59 | # (512, 512, 67)
60 | """
61 |
62 | @cached_property
63 | def _mapping(self):
64 | path = self.root / 'series-to-files.json'
65 | if not path.exists():
66 | mapping = {}
67 | for file in tqdm(
68 | self.root.rglob('*'), total=sum(1 for _ in self.root.rglob('*')), desc='Analyzing folder structure'
69 | ):
70 | if file.is_dir():
71 | continue
72 |
73 | series = pydicom.dcmread(file, specific_tags=[(0x0020, 0x000E)]).SeriesInstanceUID
74 | mapping[series].append(str(file.relative_to(self.root)))
75 |
76 | with open(path, 'w') as file:
77 | json.dump(mapping, file)
78 | return mapping
79 |
80 | with open(path) as file:
81 | return json.load(file)
82 |
83 | @property
84 | def ids(self):
85 | # this id has an undefined image orientation
86 | ignore = {'1.2.643.5.1.13.13.12.2.77.8252.604378326291403.583548115656123.'}
87 | return tuple(sorted(set(self._mapping) - ignore))
88 |
89 | def _series(self, i):
90 | series = [pydicom.dcmread(Path(self.root, 'dicom', f)) for f in self._mapping[i]]
91 | series = order_series(series, decreasing=False)
92 | return series
93 |
94 | @field
95 | def image(self, i):
96 | x = stack_images(self._series(i), -1).astype(np.int16)
97 | # DICOM specifies that the first 2 axes are (y, x). let's fix that
98 | return np.moveaxis(x, 0, 1)
99 |
100 | @field
101 | def study_uid(self, i):
102 | return get_common_tag(self._series(i), 'StudyInstanceUID')
103 |
104 | @field
105 | def series_uid(self, i):
106 | return get_common_tag(self._series(i), 'SeriesInstanceUID')
107 |
108 | @field
109 | def sop_uids(self, i):
110 | return [str(get_tag(i, 'SOPInstanceUID')) for i in self._series(i)]
111 |
112 | @field
113 | def pixel_spacing(self, i):
114 | return get_pixel_spacing(self._series(i)).tolist()
115 |
116 | @field
117 | def slice_locations(self, i):
118 | return get_slice_locations(self._series(i))
119 |
120 | @field
121 | def orientation_matrix(self, i):
122 | return get_orientation_matrix(self._series(i))
123 |
124 | @field
125 | def instance_numbers(self, i):
126 | try:
127 | instance_numbers = [int(get_tag(i, 'InstanceNumber')) for i in self._series(i)]
128 | if not _is_monotonic(instance_numbers):
129 | warnings.warn('Ordered series has non-monotonic instance numbers.')
130 |
131 | return instance_numbers
132 | except TagMissingError:
133 | pass
134 |
135 | @field
136 | def conv_kernel(self, i):
137 | return get_common_tag(self._series(i), 'ConvolutionKernel', default=None)
138 |
139 | @field
140 | def kvp(self, i):
141 | return get_common_tag(self._series(i), 'KVP', default=None)
142 |
143 | @field
144 | def patient_id(self, i):
145 | return get_common_tag(self._series(i), 'PatientID', default=None)
146 |
147 | @field
148 | def study_date(self, i):
149 | return get_series_date(self._series(i))
150 |
151 | @field
152 | def accession_number(self, i):
153 | return get_common_tag(self._series(i), 'AccessionNumber', default=None)
154 |
155 | @field
156 | def nodules(self, i):
157 | folders = {Path(f).parent.name for f in self._mapping[i]}
158 | if len(folders) != 1:
159 | # can't determine protocol filename
160 | return
161 |
162 | (filename,) = folders
163 | protocol = json.load(codecs.open(str(self.root / 'protocols' / f'{filename}.json'), 'r', 'utf-8-sig'))
164 |
165 | series_number = get_common_tag(self._series(i), 'SeriesNumber')
166 | try:
167 | return get_nodules(protocol, series_number, self.slice_locations(i))
168 | except ValueError:
169 | pass
170 |
171 |
172 | def _is_monotonic(sequence):
173 | sequence = list(sequence)
174 | return sequence == sorted(sequence) or sequence == sorted(sequence)[::-1]
175 |
--------------------------------------------------------------------------------
/amid/cancer_500/nodules.py:
--------------------------------------------------------------------------------
1 | import warnings
2 |
3 | import numpy as np
4 |
5 | from .typing import Cancer500Nodule, Comment, Review, Texture
6 |
7 |
8 | def get_nodules(protocol, series_number, slice_locations):
9 | if protocol['nodules'] is None:
10 | num_doctors = len(protocol['doctors'])
11 | assert num_doctors in [3, 6]
12 |
13 | if len([d for d in protocol['doctors'] if definetely_no_nodules(d['comment'])]) > num_doctors / 2:
14 | return []
15 | else:
16 | raise ValueError
17 |
18 | assert protocol['nodules']
19 |
20 | nodules = []
21 | for nodule in protocol['nodules']:
22 | annotations = dict(get_nodule_annotations(nodule[-1], series_number, slice_locations))
23 | if not annotations:
24 | raise ValueError
25 |
26 | nodules.append(annotations)
27 |
28 | return nodules
29 |
30 |
31 | def definetely_no_nodules(overall_comment):
32 | overall_comment = overall_comment.lower()
33 | prefixes = ['нет очагов', 'очагов нет', 'очаги не выявлены', 'достоверно очагов нет']
34 | return any(overall_comment.startswith(p) for p in prefixes)
35 |
36 |
37 | def get_nodule_annotations(nodule: dict, series_number: int, slice_locations: list):
38 | for rater, ann in nodule.items():
39 | if ann is None:
40 | continue
41 |
42 | if 'series_no' in ann and str(series_number) not in ann['series_no']:
43 | warnings.warn('Cannot check that annotation belongs to this particular series.')
44 | continue
45 |
46 | try:
47 | yield rater, parse_nodule_annotation(ann, slice_locations)
48 | except ValueError as e:
49 | warnings.warn(str(e))
50 | continue
51 |
52 |
53 | def parse_nodule_annotation(ann: dict, slice_locations: list):
54 | return Cancer500Nodule(
55 | center_voxel=parse_center_voxel(ann, slice_locations),
56 | review=parse_review(ann),
57 | comment=parse_comment(ann),
58 | diameter_mm=parse_diameter_mm(ann),
59 | texture=parse_texture(ann),
60 | malignancy=parse_malignancy(ann),
61 | )
62 |
63 |
64 | def parse_center_voxel(ann: dict, slice_locations: list):
65 | i, j = int(ann['x']), int(ann['y'])
66 | assert i == ann['x']
67 | assert j == ann['y']
68 |
69 | assert 'z type' in ann
70 | assert ann['z type'].strip() == 'mm'
71 | diff = np.abs(np.array(slice_locations) - ann['z'])
72 | if np.min(diff) >= 1:
73 | raise ValueError('Cannot determine slice.')
74 | slc = np.argmin(diff)
75 |
76 | comments = [review['comment'] for review in ann['expert decision']]
77 | if 'z = 258 = -151,6 ' in comments:
78 | slc = 258
79 | elif 'не 134 а 143 по оси Х' in comments:
80 | i = 143
81 | elif (
82 | 'неправильная координата х (должно быть 73, а не 734). сосуд, несовпадение типа (другое), неверный размер'
83 | in comments
84 | ):
85 | i = 73
86 | elif 'ошибка в координате Y - должно быть 296, тогда очаг есть' in comments:
87 | j = 296
88 | elif 'срез съехал на два ниже' in comments:
89 | slc -= 2
90 | elif set(comments) & {
91 | 'очага нет',
92 | 'промахно',
93 | 'промахнулись с координатой х',
94 | 'часть координат не совпадает с топикой очага',
95 | 'часть координат не совпадает с топикой очага, неверный размер',
96 | }:
97 | raise ValueError('Cannot detetmine center voxel')
98 |
99 | return i, j, slc
100 |
101 |
102 | def parse_review(ann: dict):
103 | decisions = {review['decision'] for review in ann['expert decision']}
104 | if 'confirmed' in decisions:
105 | return Review.Confirmed
106 | elif 'confirmed_partially' in decisions:
107 | return Review.ConfirmedPartially
108 | elif 'doubt' in decisions:
109 | return Review.Doubt
110 | elif 'rejected' in decisions:
111 | return Review.Rejected
112 | else:
113 | raise ValueError(decisions)
114 |
115 |
116 | def parse_comment(ann: dict):
117 | comments = {review['comment'] for review in ann['expert decision']}
118 | if 'кальцинат, несовпадение типа (другое)' in comments:
119 | return Comment.Calcium
120 | elif 'фиброз' in comments:
121 | return Comment.Fibrosis
122 | elif 'внутрилегочный л\\у' in comments:
123 | return Comment.LymphNode
124 | elif 'очаг с кальцинацией, несовпадение типа (другое)' in comments:
125 | return Comment.Calcified
126 | elif 'бронхоэктаз с содержимым, несовпадение типа (другое)' in comments:
127 | return Comment.Bronchiectasis
128 | elif 'сосуд' in comments:
129 | return Comment.Vessel
130 |
131 |
132 | def parse_diameter_mm(ann: dict):
133 | if any('неверный размер' in review['comment'].lower() for review in ann['expert decision']):
134 | return
135 |
136 | return round(ann['diameter (mm)'], 2)
137 |
138 |
139 | def parse_texture(ann: dict):
140 | nodule_types = {review['type'] for review in ann['expert decision']} & {'#0S', '#1PS', '#2GG', 'другое'}
141 | if nodule_types:
142 | assert len(nodule_types) == 1
143 | (nodule_type,) = nodule_types
144 | elif parse_review(ann) in [Review.Confirmed, Review.ConfirmedPartially, Review.Doubt]:
145 | assert ann['type'] in ['#0S', '#1PS', '#2GG']
146 | nodule_type = ann['type']
147 | else:
148 | return
149 |
150 | if nodule_type == '#0S':
151 | return Texture.Solid
152 | elif nodule_type == '#1PS':
153 | return Texture.PartSolid
154 | elif nodule_type == '#2GG':
155 | return Texture.GroundGlass
156 | elif nodule_type == 'другое':
157 | return Texture.Other
158 |
159 |
160 | def parse_malignancy(ann: dict):
161 | malignant = [review['malignant'] for review in ann['expert decision']]
162 | if all(malignant):
163 | return True
164 | elif not any(malignant):
165 | return False
166 |
--------------------------------------------------------------------------------
/amid/lits/dataset.py:
--------------------------------------------------------------------------------
1 | import zipfile
2 | from pathlib import Path
3 | from zipfile import ZipFile
4 |
5 | import nibabel as nb
6 | import numpy as np
7 |
8 | from ..internals import Dataset, licenses, register
9 |
10 |
11 | @register(
12 | body_region='Abdominal',
13 | license=licenses.CC_BYNCND_40,
14 | link='https://competitions.codalab.org/competitions/17094',
15 | modality='CT',
16 | prep_data_size='24,7G',
17 | raw_data_size='35G',
18 | task='Segmentation',
19 | )
20 | class LiTS(Dataset):
21 | """
22 | A (Li)ver (T)umor (S)egmentation dataset [1]_ from Medical Segmentation Decathlon [2]_
23 |
24 | There are two segmentation tasks on this dataset: liver and liver tumor segmentation.
25 |
26 | Parameters
27 | ----------
28 | root : str, Path, optional
29 | path to the folder containing the raw downloaded archives.
30 | If not provided, the cache is assumed to be already populated.
31 |
32 |
33 | Notes
34 | -----
35 | Follow the download instructions at https://competitions.codalab.org/competitions/17094.
36 |
37 | Then, the folder with raw downloaded data should contain two zip archives with the train data
38 | (`Training_Batch1.zip` and `Training_Batch2.zip`)
39 | and a folder with the test data
40 | (`LITS-Challenge-Test-Data`).
41 |
42 | The folder with test data should have original structure:
43 | <...>/LITS-Challenge-Test-Data/test-volume-0.nii
44 | <...>/LITS-Challenge-Test-Data/test-volume-1.nii
45 | ...
46 |
47 | P.S. Organs boxes are also provided from a separate source https://github.com/superxuang/caffe_3d_faster_rcnn.
48 |
49 | Examples
50 | --------
51 | >>> # Place the downloaded archives in any folder and pass the path to the constructor:
52 | >>> ds = LiTS(root='/path/to/downloaded/data/folder/')
53 | >>> print(len(ds.ids))
54 | # 201
55 | >>> print(ds.image(ds.ids[0]).shape)
56 | # (512, 512, 163)
57 | >>> print(ds.tumor_mask(ds.ids[80]).shape)
58 | # (512, 512, 771)
59 |
60 | References
61 | ----------
62 | .. [1] Bilic, Patrick, et al. "The liver tumor segmentation benchmark (lits)."
63 | arXiv preprint arXiv:1901.04056 (2019).
64 | .. [2] Antonelli, Michela, et al. "The medical segmentation decathlon."
65 | arXiv preprint arXiv:2106.05735 (2021).
66 | """
67 |
68 | @property
69 | def ids(self):
70 | result = set()
71 | # zip archives for train images:
72 | for archive in self.root.glob('*.zip'):
73 | with ZipFile(archive) as zf:
74 | for zipinfo in zf.infolist():
75 | if zipinfo.is_dir():
76 | continue
77 |
78 | file_stem = Path(zipinfo.filename).stem
79 | if 'volume' in file_stem:
80 | result.add('lits-train-' + file_stem.split('-')[-1])
81 |
82 | # folder for test images:
83 | for file in (self.root / 'LITS-Challenge-Test-Data').glob('*'):
84 | result.add('lits-test-' + file.stem.split('-')[-1])
85 |
86 | return tuple(sorted(result))
87 |
88 | def fold(self, i):
89 | num_id = i.split('-')[-1]
90 |
91 | if 'train' in i:
92 | for archive in self.root.glob('*.zip'):
93 | batch = '1' if ('1' in archive.stem) else '2'
94 |
95 | with ZipFile(archive) as zf:
96 | for zipinfo in zf.infolist():
97 | if zipinfo.is_dir():
98 | continue
99 |
100 | if num_id == Path(zipinfo.filename).stem.split('-')[-1]:
101 | return f'train_batch_{batch}'
102 |
103 | else: # if 'test' in i:
104 | return 'test'
105 |
106 | def _file(self, i):
107 | num_id = i.split('-')[-1]
108 |
109 | if 'train' in i:
110 | for archive in self.root.glob('*.zip'):
111 | with ZipFile(archive) as zf:
112 | for zipinfo in zf.infolist():
113 | if zipinfo.is_dir():
114 | continue
115 |
116 | file = Path(zipinfo.filename)
117 | if ('volume' in file.stem) and (num_id == file.stem.split('-')[-1]):
118 | return zipfile.Path(str(archive), str(file))
119 |
120 | else: # if 'test' in i:
121 | return self.root / 'LITS-Challenge-Test-Data' / f'test-volume-{num_id}.nii'
122 |
123 | raise KeyError(f'Id "{i}" not found')
124 |
125 | def image(self, i):
126 | with self._file(i).open('rb') as nii:
127 | nii = nb.FileHolder(fileobj=nii)
128 | image = nb.Nifti1Image.from_file_map({'header': nii, 'image': nii})
129 | # most ct scans are integer-valued, this will help us improve compression rates
130 | return np.int16(image.get_fdata())
131 |
132 | def affine(self, i):
133 | """The 4x4 matrix that gives the image's spatial orientation."""
134 | with self._file(i).open('rb') as nii:
135 | nii = nb.FileHolder(fileobj=nii)
136 | image = nb.Nifti1Image.from_file_map({'header': nii, 'image': nii})
137 | return image.affine
138 |
139 | def spacing(self, i):
140 | """Returns voxel spacing along axes (x, y, z)."""
141 | with self._file(i).open('rb') as nii:
142 | nii = nb.FileHolder(fileobj=nii)
143 | image = nb.Nifti1Image.from_file_map({'header': nii, 'image': nii})
144 | return tuple(image.header['pixdim'][1:4])
145 |
146 | def mask(self, i):
147 | file = self._file(i)
148 | if 'test' not in file.name:
149 | with (file.parent / file.name.replace('volume', 'segmentation')).open('rb') as nii:
150 | nii = nb.FileHolder(fileobj=nii)
151 | image = nb.Nifti1Image.from_file_map({'header': nii, 'image': nii})
152 | return np.uint8(image.get_fdata())
153 |
--------------------------------------------------------------------------------
/amid/upenn_gbm/upenn_gbm.py:
--------------------------------------------------------------------------------
1 | from functools import cached_property
2 |
3 | import nibabel as nb
4 | import numpy as np
5 | import pandas as pd
6 |
7 | from ..internals import Dataset, licenses, register
8 | from .data_classes import AcquisitionInfo, ClinicalInfo
9 |
10 |
11 | @register(
12 | body_region='Head',
13 | license=licenses.CC_BY_40,
14 | link='https://wiki.cancerimagingarchive.net/pages/viewpage.action?pageId=70225642',
15 | modality=('FLAIR', 'MRI T1', 'MRI T1GD', 'MRI T2', 'DSC MRI', 'DTI MRI'),
16 | prep_data_size='70G',
17 | raw_data_size='69G',
18 | task='Segmentation',
19 | )
20 | class UPENN_GBM(Dataset):
21 | """
22 | Multi-parametric magnetic resonance imaging (mpMRI) scans for de novo Glioblastoma
23 | (GBM) patients from the University of Pennsylvania Health System (UPENN-GBM).
24 | Dataset contains 630 patients.
25 |
26 | All samples are registered to a common atlas (SRI)
27 | using a uniform preprocessing and the segmentation are aligned with them.
28 |
29 |
30 | Parameters
31 | ----------
32 | root : str, Path, optional
33 | path to the folder containing the raw downloaded archives.
34 | If not provided, the cache is assumed to be already populated.
35 |
36 | Notes
37 | -----
38 | Follow the download instructions at https://wiki.cancerimagingarchive.net/pages/viewpage.action?pageId=70225642
39 | Download to the root folder nifti images and metadata. Organise folder as folows:
40 |
41 |
42 | <...>//NIfTI-files/images_segm/UPENN-GBM-00054_11_segm.nii.gz
43 | <...>//NIfTI-files/...
44 |
45 | <...>//UPENN-GBM_clinical_info_v1.0.csv
46 | <...>//UPENN-GBM_acquisition.csv
47 |
48 |
49 | Examples
50 | --------
51 | >>> # Place the downloaded archives in any folder and pass the path to the constructor:
52 | >>> ds = UPENN_GBM(root='/path/to/downloaded/data/folder/')
53 | >>> print(len(ds.ids))
54 | # 671
55 | >>> print(ds.image(ds.ids[215]).shape)
56 | # (4, 240, 240, 155)
57 | >>> print(d.acqusition_info(d.ids[215]).manufacturer)
58 | # SIEMENS
59 |
60 | References
61 | ----------
62 | .. [1] Bakas, S., Sako, C., Akbari, H., Bilello, M., Sotiras, A., Shukla, G., Rudie,
63 | J. D., Flores Santamaria, N., Fathi Kazerooni, A., Pati, S., Rathore, S.,
64 | Mamourian, E., Ha, S. M., Parker, W., Doshi, J., Baid, U., Bergman, M., Binder, Z. A., Verma, R., … Davatzikos,
65 | C. (2021). Multi-parametric magnetic resonance imaging (mpMRI) scans for de novo
66 | Glioblastoma (GBM) patients from the University of Pennsylvania Health System (UPENN-GBM)
67 | (Version 2) [Data set]. The Cancer Imaging Archive.
68 | https://doi.org/10.7937/TCIA.709X-DN49
69 |
70 | """
71 |
72 | @property
73 | def ids(self):
74 | ids = [x.name for x in (self.root / 'NIfTI-files/images_structural').iterdir()]
75 | return tuple(sorted(ids))
76 |
77 | @property
78 | def modalities(self):
79 | return ['T1', 'T1GD', 'T2', 'FLAIR']
80 |
81 | @property
82 | def dsc_modalities(self):
83 | return ['', 'ap-rCBV', 'PH', 'PSR']
84 |
85 | @property
86 | def dti_modalities(self):
87 | return ['AD', 'FA', 'RD', 'TR']
88 |
89 | def _mask_path(self, i):
90 | p1 = self.root / 'NIfTI-files/images_segm'
91 | p2 = self.root / 'NIfTI-files/automated_segm'
92 | p1 = list(p1.glob(i + '*'))
93 | p2 = list(p2.glob(i + '*'))
94 | return p1[0] if p1 else p2[0] if p2 else None
95 |
96 | def mask(self, i):
97 | path = self._mask_path(i)
98 | if not path:
99 | return None
100 | return np.asarray(nb.load(path).get_fdata())
101 |
102 | def is_mask_automated(self, i):
103 | path = self._mask_path(i)
104 | if path is None:
105 | return None
106 | return path.parent.name == 'automated_segm'
107 |
108 | def image(self, i):
109 | path = self.root / f'NIfTI-files/images_structural/{i}'
110 | image_pathes = [path / f'{i}_{mod}.nii.gz' for mod in self.modalities]
111 | images = [np.asarray(nb.load(p).dataobj) for p in image_pathes]
112 | return np.stack(images)
113 |
114 | def image_unstripped(self, i):
115 | path = self.root / f'NIfTI-files/images_structural_unstripped/{i}'
116 | image_pathes = [path / f'{i}_{mod}_unstripped.nii.gz' for mod in self.modalities]
117 | images = [np.asarray(nb.load(p).dataobj) for p in image_pathes]
118 | return np.stack(images)
119 |
120 | def image_DTI(self, i):
121 | path = self.root / f'NIfTI-files/images_DTI/{i}'
122 | if not path.exists():
123 | return None
124 | image_pathes = [path / f'{i}_DTI_{mod}.nii.gz' for mod in self.dti_modalities]
125 | images = [np.asarray(nb.load(p).dataobj) for p in image_pathes]
126 | return np.stack(images)
127 |
128 | def image_DSC(self, i):
129 | path = self.root / f'NIfTI-files/images_DSC/{i}'
130 | if not path.exists():
131 | return None
132 | image_pathes = [path / (f'{i}_DSC_{mod}.nii.gz' if mod else f'{i}_DSC.nii.gz') for mod in self.dsc_modalities]
133 | images = [np.asarray(nb.load(p).dataobj) for p in image_pathes]
134 | return images
135 |
136 | @cached_property
137 | def _clinical_info(self):
138 | return pd.read_csv(self.root / 'UPENN-GBM_clinical_info_v1.0.csv')
139 |
140 | @cached_property
141 | def _acqusition_info(self):
142 | return pd.read_csv(self.root / 'UPENN-GBM_acquisition.csv')
143 |
144 | def clinical_info(self, i):
145 | row = self._clinical_info[self._clinical_info.ID == i]
146 | return ClinicalInfo(*row.iloc[0, 1:])
147 |
148 | def acqusition_info(self, i):
149 | row = self._acqusition_info[self._acqusition_info.ID == i]
150 | return AcquisitionInfo(*row.iloc[0, 1:])
151 |
152 | def subject_id(self, i):
153 | return i.split('_')[0]
154 |
155 | def affine(self, i):
156 | return np.array([[-1.0, 0.0, 0.0, -0.0], [0.0, -1.0, 0.0, 239.0], [0.0, 0.0, 1.0, 0.0], [0.0, 0.0, 0.0, 1.0]])
157 |
158 | def spacing(self, i):
159 | return (1, 1, 1)
160 |
--------------------------------------------------------------------------------
/amid/deeplesion.py:
--------------------------------------------------------------------------------
1 | from functools import cached_property
2 |
3 | import deli
4 | import nibabel
5 | import numpy as np
6 |
7 | from .internals import Dataset, register
8 |
9 |
10 | @register(
11 | body_region=('Abdomen', 'Thorax'),
12 | link='https://nihcc.app.box.com/v/DeepLesion',
13 | modality='CT',
14 | prep_data_size='259G',
15 | raw_data_size='259G',
16 | task=('Localisation', 'Detection', 'Classification'),
17 | )
18 | class DeepLesion(Dataset):
19 | """
20 | DeepLesion is composed of 33,688 bookmarked radiology images from
21 | 10,825 studies of 4,477 unique patients. For every bookmarked image, a bound-
22 | ing box is created to cover the target lesion based on its measured diameters [1].
23 |
24 | Parameters
25 | ----------
26 | root : str, Path, optional
27 | path to the folder containing `DL_info.csv` file and a subfolder `Images_nifti` with 20094 nii.gz files.
28 |
29 | Notes
30 | -----
31 | Dataset is available at https://nihcc.app.box.com/v/DeepLesion
32 |
33 | To download the data we recommend using a Python script provided by the authors `batch_download_zips.py`.
34 | Once you download the data and unarchive all 56 zip archives, you should run `DL_save_nifti.py`
35 | provided by the authors to convert 2D PNGs into 20094 nii.gz files.
36 |
37 | Example
38 | --------
39 | >>> ds = DeepLesion(root='/path/to/folder')
40 | >>> print(len(ds.ids))
41 | # 20094
42 |
43 | References
44 | ----------
45 | .. [1] Yan, Ke, Xiaosong Wang, Le Lu, and Ronald M. Summers. "Deeplesion: Automated deep mining,
46 | categorization and detection of significant radiology image findings using large-scale clinical
47 | lesion annotations." arXiv preprint arXiv:1710.01766 (2017).
48 |
49 | """
50 |
51 | @property
52 | def ids(self):
53 | return tuple(sorted(file.name.replace('.nii.gz', '') for file in (self.root / 'Images_nifti').glob('*.nii.gz')))
54 |
55 | def _image_file(self, i):
56 | return nibabel.load(self.root / 'Images_nifti' / f'{i}.nii.gz')
57 |
58 | @cached_property
59 | def _metadata(self):
60 | df = deli.load(self.root / 'DL_info.csv')
61 |
62 | cols_to_transform = [
63 | 'Measurement_coordinates',
64 | 'Bounding_boxes',
65 | 'Lesion_diameters_Pixel_',
66 | 'Normalized_lesion_location',
67 | ]
68 | for col in cols_to_transform:
69 | df[col] = df[col].apply(lambda x: list(map(float, x.split(','))))
70 |
71 | df['Slice_range_list'] = df['Slice_range'].apply(lambda x: list(map(int, x.split(','))))
72 |
73 | def get_ids(x):
74 | patient_study_series = '_'.join(x.File_name.split('_')[:3])
75 | slice_range_list = list(map(str, x.Slice_range_list))
76 | slice_range_list = [num.zfill(3) for num in slice_range_list]
77 | slice_range_list = '-'.join(slice_range_list)
78 | return f'{patient_study_series}_{slice_range_list}'
79 |
80 | df['ids'] = df.apply(get_ids, axis=1)
81 | return df
82 |
83 | def _row(self, i):
84 | # funny story, f-string does not work for pandas.query,
85 | # @ syntax does not work for linter, use # noqa
86 | return self._metadata.query('ids==@i')
87 |
88 | def patient_id(self, i):
89 | patient, study, series = map(int, i.split('_')[:3])
90 | return patient
91 |
92 | def study_id(self, i):
93 | patient, study, series = map(int, i.split('_')[:3])
94 | return study
95 |
96 | def series_id(self, i):
97 | patient, study, series = map(int, i.split('_')[:3])
98 | return series
99 |
100 | def sex(self, i):
101 | return self._row(i).Patient_gender.iloc[0]
102 |
103 | def age(self, i):
104 | """Patient Age might be different for different studies (dataset contains longitudinal records)."""
105 | return self._row(i).Patient_age.iloc[0]
106 |
107 | def ct_window(self, i):
108 | """CT window extracted from DICOMs. Recall, that it is min-max values for windowing, not width-level."""
109 | return self._row(i).DICOM_windows.iloc[0]
110 |
111 | def affine(self, i):
112 | return self._image_file(i).affine
113 |
114 | def spacing(self, i):
115 | return tuple(self._image_file(i).header['pixdim'][1:4])
116 |
117 | def image(self, i):
118 | """Some 3D volumes are stored as separate subvolumes, e.g. ds.ids[15000] and ds.ids[15001]."""
119 | return np.asarray(self._image_file(i).dataobj)
120 |
121 | def train_val_test(self, i):
122 | """Authors' defined randomly generated patient-level data split, train=1, validation=2, test=3,
123 | 70/15/15 ratio."""
124 | return int(self._row(i).Train_Val_Test.iloc[0])
125 |
126 | def lesion_position(self, i):
127 | """Lesion measurements as it appear in DL_info.csv, for details see
128 | https://nihcc.app.box.com/v/DeepLesion/file/306056134060 ."""
129 | position = self._row(i)[
130 | [
131 | 'Slice_range_list',
132 | 'Key_slice_index',
133 | 'Measurement_coordinates',
134 | 'Bounding_boxes',
135 | 'Lesion_diameters_Pixel_',
136 | 'Normalized_lesion_location',
137 | ]
138 | ].to_dict('list')
139 | position['Slice_range_list'] = position['Slice_range_list'][0]
140 | return position
141 |
142 | def mask(self, i):
143 | """Mask of provided bounding boxes. Recall that bboxes annotation
144 | is very coarse, it only covers a single 2D slice."""
145 | mask = np.zeros_like(self.image(i))
146 | lesion_position = self.lesion_position(i)
147 | min_index = lesion_position['Slice_range_list'][0]
148 | for i, slice_index in enumerate(lesion_position['Key_slice_index']):
149 | image_index = slice_index - min_index
150 | top_left_x, top_left_y, bot_right_x, bot_right_y = lesion_position['Bounding_boxes'][i]
151 | mask[
152 | int(np.floor(top_left_y)) : int(np.ceil(bot_right_y)),
153 | int(np.floor(top_left_x)) : int(np.ceil(bot_right_x)),
154 | image_index,
155 | ] = 1
156 | return mask
157 |
--------------------------------------------------------------------------------
/amid/cc359/dataset.py:
--------------------------------------------------------------------------------
1 | import contextlib
2 | import gzip
3 | import zipfile
4 | from pathlib import Path
5 | from zipfile import ZipFile
6 |
7 | import nibabel as nb
8 | import numpy as np
9 |
10 | from ..internals import Dataset, licenses, register
11 |
12 |
13 | @register(
14 | body_region='Head',
15 | license=licenses.CC_BYND_40,
16 | link='https://sites.google.com/view/calgary-campinas-dataset/home',
17 | modality='MRI T1',
18 | prep_data_size='14,66G',
19 | raw_data_size='4,1G',
20 | task='Segmentation',
21 | )
22 | class CC359(Dataset):
23 | """
24 | A (C)algary-(C)ampinas public brain MR dataset with (359) volumetric images [1]_.
25 |
26 | There are three segmentation tasks on this dataset: (i) brain, (ii) hippocampus, and
27 | (iii) White-Matter (WM), Gray-Matter (WM), and Cerebrospinal Fluid (CSF) segmentation.
28 |
29 | Parameters
30 | ----------
31 | root : str, Path, optional
32 | path to the folder containing the raw downloaded archives.
33 | If not provided, the cache is assumed to be already populated.
34 |
35 |
36 | Notes
37 | -----
38 | homepage (upd): https://sites.google.com/view/calgary-campinas-dataset/home
39 | homepage (old): https://miclab.fee.unicamp.br/calgary-campinas-359-updated-05092017
40 |
41 | To obtain MR images and brain and hippocampus segmentation masks, please, follow the instructions
42 | at the download platform: https://portal.conp.ca/dataset?id=projects/calgary-campinas.
43 |
44 | Via `datalad` lib you need to download three zip archives:
45 | - `Original.zip` (the original MR images)
46 | - `hippocampus_staple.zip` (Silver-standard hippocampus masks generated using STAPLE)
47 | - `Silver-standard-machine-learning.zip` (Silver-standard brain masks generated using a machine learning method)
48 |
49 | To the current date, WM, GM, and CSF mask could be downloaded only from the google drive:
50 | https://drive.google.com/drive/u/0/folders/0BxLb0NB2MjVZNm9JY1pWNFp6WTA?resourcekey=0-2sXMr8q-n2Nn6iY3PbBAdA.
51 |
52 | Here you need to manually download a folder (from the google drive root above)
53 | `CC359/Reconstructed/CC359/WM-GM-CSF/`
54 |
55 | So the root folder to pass to this dataset class should contain four objects:
56 | - three zip archives (`Original.zip`, `hippocampus_staple.zip`, and `Silver-standard-machine-learning.zip`)
57 | - one folder `WM-GM-CSF` with the original structure:
58 | <...>/WM-GM-CSF/CC0319_ge_3_45_M.nii.gz
59 | <...>/WM-GM-CSF/CC0324_ge_3_56_M.nii.gz
60 | ...
61 |
62 | Examples
63 | --------
64 | >>> # Place the downloaded archives in any folder and pass the path to the constructor:
65 | >>> cc359 = CC359(root='/path/to/downloaded/data/folder/')
66 | >>> print(len(cc359.ids))
67 | # 359
68 | >>> print(cc359.image(cc359.ids[0]).shape)
69 | # (171, 256, 256)
70 | >>> print(cc359.wm_gm_csf(cc359.ids[80]).shape)
71 | # (180, 240, 240)
72 |
73 | References
74 | ----------
75 | .. [1] Souza, Roberto, et al. "An open, multi-vendor, multi-field-strength brain MR dataset
76 | and analysis of publicly available skull stripping methods agreement."
77 | NeuroImage 170 (2018): 482-494.
78 | https://www.sciencedirect.com/science/article/pii/S1053811917306687
79 |
80 | """
81 |
82 | @property
83 | def ids(self):
84 | result = set()
85 | with ZipFile(self.root / 'Original.zip') as zf:
86 | for zipinfo in zf.infolist():
87 | if zipinfo.is_dir():
88 | continue
89 |
90 | file_name = Path(zipinfo.filename).name
91 | if file_name.startswith('CC'):
92 | result.add(file_name.split('_')[0])
93 |
94 | return tuple(sorted(result))
95 |
96 | def _image_file(self, i):
97 | return get_zipfile(i, 'Original.zip', self.root)
98 |
99 | def vendor(self, i):
100 | return zipfile2meta(self._image_file(i))['vendor']
101 |
102 | def field(self, i):
103 | return zipfile2meta(self._image_file(i))['field']
104 |
105 | def age(self, i):
106 | return zipfile2meta(self._image_file(i))['age']
107 |
108 | def sex(self, i):
109 | return zipfile2meta(self._image_file(i))['gender']
110 |
111 | def image(self, i):
112 | with open_nii_gz_file(self._image_file(i)) as nii_image:
113 | return np.asarray(nii_image.dataobj)
114 |
115 | def affine(self, i):
116 | """The 4x4 matrix that gives the image's spatial orientation."""
117 | with open_nii_gz_file(self._image_file(i)) as nii_image:
118 | return nii_image.affine
119 |
120 | def spacing(self, i):
121 | """Returns voxel spacing along axes (x, y, z)."""
122 | with open_nii_gz_file(self._image_file(i)) as nii_image:
123 | return tuple(nii_image.header['pixdim'][1:4])
124 |
125 | # masks:
126 |
127 | def brain(self, i):
128 | zf = get_zipfile(i, 'Silver-standard-machine-learning.zip', self.root)
129 | with open_nii_gz_file(zf) as nii_image:
130 | return np.uint8(nii_image.get_fdata())
131 |
132 | def hippocampus(self, i):
133 | try:
134 | zf = get_zipfile(i, 'hippocampus_staple.zip', self.root)
135 | except KeyError:
136 | return None
137 |
138 | with open_nii_gz_file(zf) as nii_image:
139 | return np.uint8(nii_image.get_fdata())
140 |
141 | def wm_gm_csf(self, i):
142 | for file in (self.root / 'WM-GM-CSF').glob('*'):
143 | if file.name.startswith(i):
144 | with open_nii_gz_file(file) as nii_image:
145 | return np.uint8(nii_image.get_fdata())
146 |
147 |
148 | # TODO: sync with amid.utils
149 | @contextlib.contextmanager
150 | def open_nii_gz_file(file):
151 | with file.open('rb') as opened:
152 | with gzip.GzipFile(fileobj=opened) as nii:
153 | nii = nb.FileHolder(fileobj=nii)
154 | yield nb.Nifti1Image.from_file_map({'header': nii, 'image': nii})
155 |
156 |
157 | def get_zipfile(_id, archive_name, root):
158 | archive = Path(root) / archive_name
159 | with ZipFile(archive) as zf:
160 | for zipinfo in zf.infolist():
161 | if Path(zipinfo.filename).name.startswith(_id) and not zipinfo.is_dir():
162 | return zipfile.Path(str(archive), zipinfo.filename)
163 |
164 | raise KeyError(f'Id "{_id}" not found')
165 |
166 |
167 | def zipfile2meta(zf):
168 | return dict(zip(['id', 'vendor', 'field', 'age', 'gender'], zf.name[: -len('.nii.gz')].split('_')))
169 |
--------------------------------------------------------------------------------
/amid/msd.py:
--------------------------------------------------------------------------------
1 | import contextlib
2 | import gzip
3 | import json
4 | import tarfile
5 | from pathlib import Path
6 |
7 | import nibabel as nb
8 | import numpy as np
9 |
10 | from .internals import Dataset, register
11 |
12 |
13 | @register(
14 | body_region=('Chest', 'Abdominal', 'Head'),
15 | link='http://medicaldecathlon.com/',
16 | modality=('CT', 'CE CT', 'MRI', 'MRI FLAIR', 'MRI T1w', 'MRI t1gd', 'MRI T2w', 'MRI T2', 'MRI ADC'),
17 | raw_data_size='97.8G',
18 | task='Image segmentation',
19 | )
20 | class MSD(Dataset):
21 | """
22 | MSD is a Medical Segmentaton Decathlon Challenge with 10 tasks.
23 | Parameters
24 | ----------
25 | root : str, Path, optional
26 | path to the folder containing the raw downloaded archives.
27 | If not provided, the cache is assumed to be already populated.
28 |
29 | Notes
30 | -----
31 | Data can be downloaded here:http://medicaldecathlon.com/
32 | or here: https://msd-for-monai.s3-us-west-2.amazonaws.com/
33 | or here: https://drive.google.com/drive/folders/1HqEgzS8BV2c7xYNrZdEAnrHk7osJJ--2/
34 | Then, the folder with raw downloaded data should contain tar archive with data and masks
35 | (`Task03_Liver.tar`).
36 | """
37 |
38 | @property
39 | def ids(self):
40 | ids_all = []
41 | for folder in self.root.glob('*'):
42 | if folder.name.endswith('.tar'):
43 | ids_folder = ids_from_tar(folder)
44 | else:
45 | ids_folder = ids_from_folder(folder)
46 | ids_all.extend(ids_folder)
47 | return tuple(ids_all)
48 |
49 | def train_test(self, i) -> str:
50 | fold = 'train' if 'train' in i else 'test'
51 | return fold
52 |
53 | def task(self, i) -> str:
54 | return NAME_TO_TASK[i.split('_')[1]]
55 |
56 | def _relative(self, i):
57 | name = i.removeprefix('train_').removeprefix('test_')
58 | return Path(self.task(i)), Path('imagesTr' if 'train' in i else 'imagesTs') / f'{name}.nii.gz'
59 |
60 | def image(self, i):
61 | with open_nii_gz(self.root, self._relative(i)) as (file, unpacked):
62 | if unpacked:
63 | return np.int16(nb.load(file).get_fdata())
64 | else:
65 | with gzip.GzipFile(fileobj=file) as nii_gz:
66 | nii = nb.FileHolder(fileobj=nii_gz)
67 | return np.int16(nb.Nifti1Image.from_file_map({'header': nii, 'image': nii}).get_fdata())
68 |
69 | def affine(self, i):
70 | """The 4x4 matrix that gives the image's spatial orientation."""
71 | with open_nii_gz(self.root, self._relative(i)) as (file, unpacked):
72 | if unpacked:
73 | return nb.load(file).affine
74 | else:
75 | with gzip.GzipFile(fileobj=file) as nii_gz:
76 | nii = nb.FileHolder(fileobj=nii_gz)
77 | return nb.Nifti1Image.from_file_map({'header': nii, 'image': nii}).affine
78 |
79 | def image_modality(self, i):
80 | task = self.task(i)
81 | if (self.root / task).is_dir():
82 | with open(self.root / task / 'dataset.json', 'r') as file:
83 | return json.loads(file.read())['modality']
84 |
85 | with tarfile.open(self.root / f'{task}.tar') as tf:
86 | member = tf.getmember(f'{task}/dataset.json')
87 | file = tf.extractfile(member)
88 | return json.loads(file.read())['modality']
89 |
90 | def segmentation_labels(self, i):
91 | """Returns segmentation labels for the task"""
92 | task = self.task(i)
93 | if (self.root / task).is_dir():
94 | with open(self.root / task / 'dataset.json', 'r') as file:
95 | return json.loads(file.read())['labels']
96 |
97 | with tarfile.open(self.root / f'{task}.tar') as tf:
98 | member = tf.getmember(f'{task}/dataset.json')
99 | file = tf.extractfile(member)
100 | return json.loads(file.read())['labels']
101 |
102 | def mask(self, i):
103 | task, relative = self._relative(i)
104 | if 'imagesTs' not in str(relative):
105 | with open_nii_gz(self.root, (task, str(relative).replace('images', 'labels'))) as (file, unpacked):
106 | if unpacked:
107 | return np.uint8(nb.load(file).get_fdata())
108 | else:
109 | with gzip.GzipFile(fileobj=file) as nii_gz:
110 | nii = nb.FileHolder(fileobj=nii_gz)
111 | return np.uint8(nb.Nifti1Image.from_file_map({'header': nii, 'image': nii}).get_fdata())
112 |
113 |
114 | TASK_TO_NAME: dict = {
115 | 'Task01_BrainTumour': 'BRATS',
116 | 'Task02_Heart': 'la',
117 | 'Task03_Liver': 'liver',
118 | 'Task04_Hippocampus': 'hippocampus',
119 | 'Task05_Prostate': 'prostate',
120 | 'Task06_Lung': 'lung',
121 | 'Task07_Pancreas': 'pancreas',
122 | 'Task08_HepaticVessel': 'hepaticvessel',
123 | 'Task09_Spleen': 'spleen',
124 | 'Task10_Colon': 'colon',
125 | }
126 |
127 | NAME_TO_TASK = dict(zip(TASK_TO_NAME.values(), TASK_TO_NAME.keys()))
128 |
129 |
130 | @contextlib.contextmanager
131 | def open_nii_gz(path, nii_gz_path):
132 | """Opens a .nii.gz file from inside a .tar archive.
133 |
134 | Parameters:
135 | - path: path to the .tar archive or folder
136 | - nii_gz_path: path to the .nii.gz file inside the .tar archive.
137 |
138 | Yields:
139 | - nibabel.Nifti1Image object.
140 | """
141 | task, relative = nii_gz_path
142 | if (path / task / relative).exists():
143 | yield path / task / relative, True
144 | else:
145 | with tarfile.open(path / f'{task}.tar', 'r') as tar:
146 | yield tar.extractfile(str(task / relative)), False
147 |
148 |
149 | def get_id(filename: Path):
150 | fold = 'test' if 'imagesTs' in str(filename) else 'train'
151 | name = filename.name.removesuffix('.nii.gz')
152 | return '_'.join([fold, name])
153 |
154 |
155 | def ids_from_tar(tar_folder):
156 | ids = []
157 | with tarfile.open(tar_folder, 'r') as tf:
158 | for file in tf.getmembers():
159 | filename = Path(file.name)
160 | if not filename.name.startswith('._') and filename.suffix == '.gz' and 'images' in filename.parent.name:
161 | ids.append(get_id(filename))
162 | return sorted(ids)
163 |
164 |
165 | def ids_from_folder(folder):
166 | ids = []
167 | for filename in folder.rglob('*.nii.gz'):
168 | if not filename.name.startswith('._') and filename.suffix == '.gz' and 'images' in filename.parent.name:
169 | ids.append(get_id(filename))
170 | return sorted(ids)
171 |
--------------------------------------------------------------------------------
/amid/amos/dataset.py:
--------------------------------------------------------------------------------
1 | from functools import cached_property
2 | from zipfile import ZipFile
3 |
4 | import nibabel
5 | import numpy as np
6 | import pandas as pd
7 | from jboc import composed
8 |
9 | from ..internals import Dataset, field, licenses, register
10 | from ..utils import open_nii_gz_file, unpack
11 |
12 |
13 | ARCHIVE_NAME_SEG = 'amos22.zip'
14 | ARCHIVE_ROOT_NAME = 'amos22'
15 | ERRORS = ['5514', '5437'] # these ids are damaged in the zip archives
16 |
17 |
18 | # TODO: add MRI
19 |
20 |
21 | @register(
22 | body_region='Abdomen',
23 | license=licenses.CC_BY_40,
24 | link='https://zenodo.org/record/7262581',
25 | modality=('CT', 'MRI'),
26 | raw_data_size='23G', # TODO: update size with unlabelled
27 | prep_data_size='89,5G',
28 | task='Supervised multi-modality abdominal multi-organ segmentation',
29 | )
30 | class AMOS(Dataset):
31 | """
32 | AMOS provides 500 CT and 100 MRI scans collected from multi-center, multi-vendor, multi-modality, multi-phase,
33 | multi-disease patients, each with voxel-level annotations of 15 abdominal organs, providing challenging examples
34 | and test-bed for studying robust segmentation algorithms under diverse targets and scenarios. [1]
35 |
36 | Parameters
37 | ----------
38 | root : str, Path, optional
39 | Absolute path to the root containing the downloaded archive and meta.
40 | If not provided, the cache is assumed to be already populated.
41 |
42 | Notes
43 | -----
44 | Download link: https://zenodo.org/record/7262581/files/amos22.zip
45 |
46 | Examples
47 | --------
48 | >>> # Download the archive and meta to any folder and pass the path to the constructor:
49 | >>> ds = AMOS(root='/path/to/the/downloaded/files')
50 | >>> print(len(ds.ids))
51 | # 961
52 | >>> print(ds.image(ds.ids[0]).shape)
53 | # (768, 768, 90)
54 | >>> print(ds.mask(ds.ids[26]).shape)
55 | # (512, 512, 124)
56 |
57 | References
58 | ----------
59 | .. [1] JI YUANFENG. (2022). Amos: A large-scale abdominal multi-organ benchmark for
60 | versatile medical image segmentation [Data set]. Zenodo. https://doi.org/10.5281/zenodo.7262581
61 | """
62 |
63 | @property
64 | def ids(self):
65 | ids = list(self._id2split)
66 |
67 | for archive in [
68 | 'amos22_unlabeled_ct_5000_5399.zip',
69 | 'amos22_unlabeled_ct_5400_5899.zip',
70 | 'amos22_unlabeled_ct_5900_6199.zip',
71 | 'amos22_unlabeled_ct_6200_6899.zip',
72 | ]:
73 | file = self.root / archive
74 | if not file.exists():
75 | continue
76 |
77 | with ZipFile(file) as zf:
78 | for x in zf.namelist():
79 | if x.endswith('.nii.gz'):
80 | file = x.split('/')[-1]
81 |
82 | ids.append(file.split('.')[0].split('_')[-1])
83 |
84 | return sorted(ids)
85 |
86 | @field
87 | def image(self, i):
88 | """Corresponding 3D image."""
89 | if i in ERRORS:
90 | return None # this image is damaged in the archive
91 |
92 | archive_name, archive_root, file = self._archive_name(i)
93 | with unpack(self.root / archive_name, file, archive_root, '.zip') as (unpacked, is_unpacked):
94 | if is_unpacked:
95 | return np.asarray(nibabel.load(unpacked).dataobj)
96 | else:
97 | with open_nii_gz_file(unpacked) as image:
98 | return np.asarray(image.dataobj)
99 |
100 | @field
101 | def affine(self, i):
102 | """The 4x4 matrix that gives the image's spatial orientation."""
103 | if i in ERRORS:
104 | return None # this image is damaged in the archive
105 |
106 | archive_name, archive_root, file = self._archive_name(i)
107 | with unpack(self.root / archive_name, file, archive_root, '.zip') as (unpacked, is_unpacked):
108 | if is_unpacked:
109 | return nibabel.load(unpacked).affine
110 | else:
111 | with open_nii_gz_file(unpacked) as image:
112 | return image.affine
113 |
114 | @field
115 | def mask(self, i):
116 | if i not in self._id2split:
117 | return
118 |
119 | file = f'labels{self._id2split[i]}/amos_{i}.nii.gz'
120 | try:
121 | with unpack(self.root / ARCHIVE_NAME_SEG, file, ARCHIVE_ROOT_NAME, '.zip') as (unpacked, is_unpacked):
122 | if is_unpacked:
123 | return np.asarray(nibabel.load(unpacked).dataobj)
124 | else:
125 | with open_nii_gz_file(unpacked) as image:
126 | return np.asarray(image.dataobj)
127 | except FileNotFoundError:
128 | return
129 |
130 | @field
131 | def image_modality(self, i):
132 | """Returns image modality, `CT` or `MRI`."""
133 | if 500 < int(i) <= 600:
134 | return 'MRI'
135 | return 'CT'
136 |
137 | # labels
138 | @field
139 | def birth_date(self, i):
140 | return self._label(i, "Patient's Birth Date")
141 |
142 | @field
143 | def sex(self, i):
144 | return self._label(i, "Patient's Sex")
145 |
146 | @field
147 | def age(self, i):
148 | return self._label(i, "Patient's Age")
149 |
150 | @field
151 | def manufacturer_model(self, i):
152 | return self._label(i, "Manufacturer's Model Name")
153 |
154 | @field
155 | def manufacturer(self, i):
156 | return self._label(i, 'Manufacturer')
157 |
158 | @field
159 | def acquisition_date(self, i):
160 | return self._label(i, 'Acquisition Date')
161 |
162 | @field
163 | def site(self, i):
164 | return self._label(i, 'Site')
165 |
166 | @cached_property
167 | @composed(dict)
168 | def _id2split(self):
169 | with ZipFile(self.root / ARCHIVE_NAME_SEG) as zf:
170 | for x in zf.namelist():
171 | if (len(x.strip('/').split('/')) == 3) and x.endswith('.nii.gz'):
172 | file, split = x.split('/')[-1], x.split('/')[-2][-2:]
173 | id_ = file.split('.')[0].split('_')[-1]
174 |
175 | yield id_, split
176 |
177 | @cached_property
178 | def _meta(self):
179 | files = [
180 | 'labeled_data_meta_0000_0599.csv',
181 | 'unlabeled_data_meta_5400_5899.csv',
182 | 'unlabeled_data_meta_5000_5399.csv',
183 | 'unlabeled_data_meta_5900_6199.csv',
184 | ]
185 |
186 | dfs = []
187 | for file in files:
188 | with unpack(self.root, file) as (unpacked, _):
189 | dfs.append(pd.read_csv(unpacked))
190 | return pd.concat(dfs)
191 |
192 | def _archive_name(self, i):
193 | if i in self._id2split:
194 | return ARCHIVE_NAME_SEG, ARCHIVE_ROOT_NAME, f'images{self._id2split[i]}/amos_{i}.nii.gz'
195 |
196 | i = int(i)
197 | file = f'amos_{i}.nii.gz'
198 | if 5000 <= i < 5400:
199 | return 'amos22_unlabeled_ct_5000_5399.zip', 'amos_unlabeled_ct_5000_5399', file
200 | elif 5400 <= i < 5900:
201 | return 'amos22_unlabeled_ct_5400_5899.zip', 'amos_unlabeled_ct_5400_5899', file
202 | elif 5900 <= i < 6200:
203 | return 'amos22_unlabeled_ct_5900_6199.zip', 'amos22_unlabeled_ct_5900_6199', file
204 | else:
205 | return 'amos22_unlabeled_ct_6200_6899.zip', 'amos22_unlabeled_6200_6899', file
206 |
207 | def _label(self, i, column):
208 | # ambiguous data in meta
209 | if int(i) in [500, 600]:
210 | return None
211 | elif int(i) not in self._meta['amos_id']:
212 | return None
213 |
214 | return self._meta[self._meta['amos_id'] == int(i)][column].item()
215 |
--------------------------------------------------------------------------------
/amid/lidc/dataset.py:
--------------------------------------------------------------------------------
1 | import datetime
2 | import os
3 | from typing import List, Tuple, Union
4 |
5 | import numpy as np
6 | import pylidc as pl
7 | from dicom_csv import (
8 | Series,
9 | expand_volumetric,
10 | get_common_tag,
11 | get_orientation_matrix,
12 | get_tag,
13 | order_series,
14 | stack_images,
15 | )
16 | from pylidc.utils import consensus
17 | from scipy import stats
18 |
19 | from ..internals import Dataset, field, licenses, register
20 | from ..utils import PathOrStr, get_series_date
21 | from .nodules import get_nodule
22 | from .typing import LIDCNodule
23 |
24 |
25 | @register(
26 | body_region='Chest',
27 | license=licenses.CC_BY_30,
28 | link='https://wiki.cancerimagingarchive.net/pages/viewpage.action?pageId=1966254',
29 | modality='CT',
30 | prep_data_size='71,2G',
31 | raw_data_size='126G',
32 | task='Lung nodules segmentation',
33 | )
34 | class LIDC(Dataset):
35 | """
36 | The (L)ung (I)mage (D)atabase (C)onsortium image collection (LIDC-IDRI) [1]_
37 | consists of diagnostic and lung cancer screening thoracic computed tomography (CT) scans
38 | with marked-up annotated lesions and lung nodules segmentation task.
39 | Scans contains multiple expert annotations.
40 |
41 | Number of CT scans: 1018.
42 |
43 | Parameters
44 | ----------
45 | root : str, Path, optional
46 | path to the folder containing the raw downloaded archives.
47 | If not provided, the cache is assumed to be already populated.
48 |
49 | Notes
50 | -----
51 | Follow the download instructions at https://wiki.cancerimagingarchive.net/pages/viewpage.action?pageId=1966254.
52 |
53 | Then, the folder with raw downloaded data should contain folder `LIDC-IDRI`,
54 | which contains folders `LIDC-IDRI-*`.
55 |
56 | Examples
57 | --------
58 | >>> # Place the downloaded archives in any folder and pass the path to the constructor:
59 | >>> ds = LIDC(root='/path/to/downloaded/data/folder/')
60 | >>> print(len(ds.ids))
61 | # 1018
62 | >>> print(ds.image(ds.ids[0]).shape)
63 | # (512, 512, 194)
64 | >>> print(ds.cancer(ds.ids[0]).shape)
65 | # (512, 512, 194)
66 |
67 | References
68 | ----------
69 | .. [1] Armato III, McLennan, et al. "The lung image database consortium (lidc) and image database
70 | resource initiative (idri): a completed reference database of lung nodules on ct scans."
71 | Medical physics 38(2) (2011): 915–931.
72 | https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3041807/
73 | """
74 |
75 | def __init__(self, root: PathOrStr):
76 | super().__init__(root)
77 | self._check_config()
78 |
79 | def _check_config(self):
80 | pylidc_config_start = '[dicom]\npath = '
81 | if os.path.exists(os.path.expanduser('~/.pylidcrc')):
82 | with open(os.path.expanduser('~/.pylidcrc'), 'r') as config_file:
83 | content = config_file.read()
84 | if content == f'{pylidc_config_start}{self.root}':
85 | return
86 |
87 | # save _root path to ~/.pylidcrc file for pylidc
88 | with open(os.path.expanduser('~/.pylidcrc'), 'w') as config_file:
89 | config_file.write(f'{pylidc_config_start}{self.root}')
90 |
91 | @property
92 | def ids(self):
93 | result = [scan.series_instance_uid for scan in pl.query(pl.Scan).all()]
94 | return tuple(sorted(result))
95 |
96 | def _scan(self, i) -> pl.Scan:
97 | _id = i.split('_')[-1]
98 | return pl.query(pl.Scan).filter(pl.Scan.series_instance_uid == _id).first()
99 |
100 | def _series(self, i) -> Series:
101 | series = expand_volumetric(self._scan(i).load_all_dicom_images(verbose=False))
102 | series = order_series(series)
103 | return series
104 |
105 | def _shape(self, i) -> Tuple[int, int, int]:
106 | return stack_images(self._series(i), -1).shape
107 |
108 | @field
109 | def image(self, i) -> np.ndarray:
110 | return self._scan(i).to_volume(verbose=False)
111 |
112 | @field
113 | def study_uid(self, i) -> str:
114 | return self._scan(i).study_instance_uid
115 |
116 | @field
117 | def series_uid(self, i) -> str:
118 | return self._scan(i).series_instance_uid
119 |
120 | @field
121 | def patient_id(self, i) -> str:
122 | return self._scan(i).patient_id
123 |
124 | @field
125 | def sop_uids(self, i) -> List[str]:
126 | return [str(get_tag(i, 'SOPInstanceUID')) for i in self._series(i)]
127 |
128 | @field
129 | def pixel_spacing(self, i) -> List[float]:
130 | spacing = self._scan(i).pixel_spacing
131 | return [spacing, spacing]
132 |
133 | @field
134 | def slice_locations(self, i) -> np.ndarray:
135 | return self._scan(i).slice_zvals
136 |
137 | # @field
138 | def spacing(self, i) -> Tuple[float, float, float]:
139 | """
140 | Volumetric spacing of the image.
141 | The maximum relative difference in `slice_locations` < 1e-3
142 | (except 4 images listed below),
143 | so we allow ourselves to use the common spacing for the whole 3D image.
144 |
145 | Note
146 | ----
147 | The `slice_locations` attribute typically (but not always!) has the constant step.
148 | In LIDC dataset, only 4 images have difference in `slice_locations` > 1e-3:
149 | 1.3.6.1.4.1.14519.5.2.1.6279.6001.526570782606728516388531252230
150 | 1.3.6.1.4.1.14519.5.2.1.6279.6001.329334252028672866365623335798
151 | 1.3.6.1.4.1.14519.5.2.1.6279.6001.245181799370098278918756923992
152 | 1.3.6.1.4.1.14519.5.2.1.6279.6001.103115201714075993579787468219
153 | And these differences appear in the maximum of 3 slices.
154 | Therefore, we consider their impact negligible.
155 | """
156 | return (*self.pixel_spacing(i), stats.mode(np.diff(self.slice_locations(i)))[0].item())
157 |
158 | @field
159 | def contrast_used(self, i) -> bool:
160 | """If the DICOM file for the scan had any Contrast tag, this is marked as `True`."""
161 | return self._scan(i).contrast_used
162 |
163 | @field
164 | def is_from_initial(self, i) -> bool:
165 | """
166 | Indicates whether or not this PatientID was tagged as
167 | part of the initial 399 release.
168 | """
169 | return self._scan(i).is_from_initial
170 |
171 | @field
172 | def orientation_matrix(self, i) -> np.ndarray:
173 | return get_orientation_matrix(self._series(i))
174 |
175 | @field
176 | def sex(self, i) -> Union[str, None]:
177 | return get_common_tag(self._series(i), 'PatientSex', default=None)
178 |
179 | @field
180 | def age(self, i) -> Union[str, None]:
181 | return get_common_tag(self._series(i), 'PatientAge', default=None)
182 |
183 | @field
184 | def conv_kernel(self, i) -> Union[str, None]:
185 | return get_common_tag(self._series(i), 'ConvolutionKernel', default=None)
186 |
187 | @field
188 | def kvp(self, i) -> Union[str, None]:
189 | return get_common_tag(self._series(i), 'KVP', default=None)
190 |
191 | @field
192 | def tube_current(self, i) -> Union[str, None]:
193 | return get_common_tag(self._series(i), 'XRayTubeCurrent', default=None)
194 |
195 | @field
196 | def study_date(self, i) -> Union[datetime.date, None]:
197 | return get_series_date(self._series(i))
198 |
199 | @field
200 | def accession_number(self, i) -> Union[str, None]:
201 | return get_common_tag(self._series(i), 'AccessionNumber', default=None)
202 |
203 | @field
204 | def nodules(self, i) -> List[List[LIDCNodule]]:
205 | nodules = []
206 | for anns in self._scan(i).cluster_annotations():
207 | nodule_annotations = []
208 | for ann in anns:
209 | nodule_annotations.append(get_nodule(ann))
210 | nodules.append(nodule_annotations)
211 | return nodules
212 |
213 | @field
214 | def nodules_masks(self, i) -> List[List[np.ndarray]]:
215 | nodules = []
216 | for anns in self._scan(i).cluster_annotations():
217 | nodule_annotations = []
218 | for ann in anns:
219 | nodule_annotations.append(ann.boolean_mask())
220 | nodules.append(nodule_annotations)
221 | return nodules
222 |
223 | @field
224 | def cancer(self, i) -> np.ndarray:
225 | cancer = np.zeros(self._shape(i), dtype=bool)
226 | for anns in self._scan(i).cluster_annotations():
227 | cancer |= consensus(anns, pad=np.inf)[0]
228 |
229 | return cancer
230 |
--------------------------------------------------------------------------------