├── lint.sh
├── amid
    ├── __version__.py
    ├── amos
    │   ├── __init__.py
    │   └── dataset.py
    ├── mslub
    │   ├── __init__.py
    │   └── dataset.py
    ├── ribfrac
    │   ├── __init__.py
    │   └── dataset.py
    ├── upenn_gbm
    │   ├── __init__.py
    │   ├── data_classes.py
    │   └── upenn_gbm.py
    ├── cancer_500
    │   ├── __init__.py
    │   ├── typing.py
    │   ├── dataset.py
    │   └── nodules.py
    ├── rsna_bc
    │   ├── __init__.py
    │   ├── utils.py
    │   └── dataset.py
    ├── totalsegmentator
    │   ├── __init__.py
    │   ├── utils.py
    │   ├── const.py
    │   └── dataset.py
    ├── lits
    │   ├── __init__.py
    │   ├── transforms.py
    │   └── dataset.py
    ├── internals
    │   ├── __init__.py
    │   ├── dataset.py
    │   ├── licenses.py
    │   └── registry.py
    ├── vs_seg
    │   ├── __init__.py
    │   └── transforms.py
    ├── cc359
    │   ├── __init__.py
    │   ├── transforms.py
    │   └── dataset.py
    ├── lidc
    │   ├── __init__.py
    │   ├── transforms.py
    │   ├── typing.py
    │   ├── nodules.py
    │   └── dataset.py
    ├── transforms.py
    ├── __init__.py
    ├── hcp.py
    ├── kits.py
    ├── covid_1110.py
    ├── cl_detection.py
    ├── tbad.py
    ├── liver_medseg.py
    ├── medseg9.py
    ├── curvas.py
    ├── nlst.py
    ├── utils.py
    ├── crlm.py
    ├── luna25.py
    ├── brats2021.py
    ├── egd.py
    ├── flare2022.py
    ├── crossmoda.py
    ├── ct_ich.py
    ├── verse.py
    ├── mood.py
    ├── deeplesion.py
    └── msd.py
├── tests
    ├── requirements.txt
    └── test_consistency.py
├── .gitignore
├── MANIFEST.in
├── lint-requirements.txt
├── docs
    ├── javascript
    │   ├── tablesort.js
    │   └── tablesort.filesize.js
    ├── requirements.txt
    ├── index.md
    ├── datasets-api.md
    ├── fill_readme.py
    ├── fill_docs.py
    ├── mkdocstrings_handlers
    │   └── python_connectome.py
    ├── CONTRIBUTING.md
    └── recipes
    │   └── RSNABreastCancer.ipynb
├── requirements.txt
├── .github
    └── workflows
    │   ├── lint.yml
    │   ├── docs.yml
    │   ├── release.yml
    │   └── tests.yml
├── .flake8
├── LICENSE
├── pyproject.toml
└── mkdocs.yml


/lint.sh:
--------------------------------------------------------------------------------
1 | flake8 .; black .; isort .
2 | 


--------------------------------------------------------------------------------
/amid/__version__.py:
--------------------------------------------------------------------------------
1 | __version__ = '0.15.0'
2 | 


--------------------------------------------------------------------------------
/amid/amos/__init__.py:
--------------------------------------------------------------------------------
1 | from .dataset import AMOS
2 | 


--------------------------------------------------------------------------------
/amid/mslub/__init__.py:
--------------------------------------------------------------------------------
1 | from .dataset import MSLUB
2 | 


--------------------------------------------------------------------------------
/amid/ribfrac/__init__.py:
--------------------------------------------------------------------------------
1 | from .dataset import RibFrac
2 | 


--------------------------------------------------------------------------------
/amid/upenn_gbm/__init__.py:
--------------------------------------------------------------------------------
1 | from .upenn_gbm import UPENN_GBM
2 | 


--------------------------------------------------------------------------------
/amid/cancer_500/__init__.py:
--------------------------------------------------------------------------------
1 | from .dataset import MoscowCancer500
2 | 


--------------------------------------------------------------------------------
/amid/rsna_bc/__init__.py:
--------------------------------------------------------------------------------
1 | from .dataset import RSNABreastCancer
2 | 


--------------------------------------------------------------------------------
/tests/requirements.txt:
--------------------------------------------------------------------------------
1 | pytest
2 | pytest-cov
3 | pytest-subtests
4 | 


--------------------------------------------------------------------------------
/amid/totalsegmentator/__init__.py:
--------------------------------------------------------------------------------
1 | from .dataset import Totalsegmentator
2 | 


--------------------------------------------------------------------------------
/amid/lits/__init__.py:
--------------------------------------------------------------------------------
1 | from .dataset import LiTS
2 | from .transforms import CanonicalCTOrientation, Rescale
3 | 


--------------------------------------------------------------------------------
/amid/internals/__init__.py:
--------------------------------------------------------------------------------
1 | from .dataset import Dataset, field
2 | from .registry import gather_datasets, register
3 | 


--------------------------------------------------------------------------------
/amid/vs_seg/__init__.py:
--------------------------------------------------------------------------------
1 | from .dataset import VSSEG
2 | from .transforms import CanonicalMRIOrientation, Rescale
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea/
2 | __pycache__/
3 | .pytest_cache/
4 | *.egg-info/
5 | .coverage
6 | docs/build/
7 | docs/source/_*/
8 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include README.md
2 | include requirements.txt
3 | include LICENSE
4 | recursive-include amid *.py .bev.yml *.hash
5 | 


--------------------------------------------------------------------------------
/amid/cc359/__init__.py:
--------------------------------------------------------------------------------
1 | from .dataset import CC359, open_nii_gz_file
2 | from .transforms import CanonicalMRIOrientation, Rescale
3 | 


--------------------------------------------------------------------------------
/lint-requirements.txt:
--------------------------------------------------------------------------------
1 | black
2 | flake8<=5
3 | flake8-tidy-imports
4 | flake8-quotes
5 | flake8-bugbear
6 | flake8-comprehensions
7 | isort
8 | 


--------------------------------------------------------------------------------
/amid/lidc/__init__.py:
--------------------------------------------------------------------------------
1 | from .dataset import LIDC
2 | from .transforms import CanonicalCTOrientation, Rescale
3 | 
4 | 
5 | # TODO: remove pylidc dependency
6 | 


--------------------------------------------------------------------------------
/docs/javascript/tablesort.js:
--------------------------------------------------------------------------------
1 | document$.subscribe(function() {
2 |   var tables = document.querySelectorAll("article table:not([class])")
3 |   tables.forEach(function(table) {
4 |     new Tablesort(table)
5 |   })
6 | })
7 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | connectome>=0.10.0,<1.0.0
 2 | numpy
 3 | nibabel
 4 | more-itertools
 5 | dicom-csv
 6 | tqdm
 7 | pandas
 8 | pylidc
 9 | joblib
10 | deli<1.0.0
11 | scipy
12 | scikit-image
13 | pydicom
14 | imops
15 | highdicom
16 | SimpleITK
17 | 


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
 1 | mkdocs==1.5.3
 2 | mkdocs-material==9.4.1
 3 | mkdocstrings[python]==0.22.0
 4 | mkdocs-jupyter==0.24.2
 5 | mkdocs-exclude==1.0.2
 6 | mkdocs-autorefs==0.4.1
 7 | mike
 8 | pandas
 9 | tabulate
10 | ipython-genutils
11 | griffe==0.29.1
12 | mkdocs-material-extensions==1.2
13 | mkdocstrings-python==1.1.2
14 | 


--------------------------------------------------------------------------------
/.github/workflows/lint.yml:
--------------------------------------------------------------------------------
 1 | name: Linters
 2 | 
 3 | on: [ pull_request ]
 4 | 
 5 | jobs:
 6 |   test:
 7 |     runs-on: ubuntu-20.04
 8 |     steps:
 9 |       - uses: actions/checkout@v3
10 |       - name: Set up Python
11 |         uses: actions/setup-python@v4
12 |         with:
13 |           python-version: '3.11'
14 | 
15 |       - name: Check code style
16 |         run: |
17 |           pip install -r lint-requirements.txt
18 |           flake8 .
19 |           isort --check .
20 |           black --check .
21 | 


--------------------------------------------------------------------------------
/amid/cancer_500/typing.py:
--------------------------------------------------------------------------------
 1 | from enum import Enum
 2 | from typing import NamedTuple, Optional, Sequence
 3 | 
 4 | 
 5 | class Texture(Enum):
 6 |     Solid, PartSolid, GroundGlass, Other = 0, 1, 2, 3
 7 | 
 8 | 
 9 | class Review(Enum):
10 |     Confirmed, ConfirmedPartially, Doubt, Rejected = 0, 1, 2, 3
11 | 
12 | 
13 | class Comment(Enum):
14 |     Fibrosis, LymphNode, Calcium, Calcified, Bronchiectasis, Vessel = 0, 1, 2, 3, 4, 5
15 | 
16 | 
17 | class Cancer500Nodule(NamedTuple):
18 |     center_voxel: Sequence[int]
19 |     review: Review
20 |     comment: Optional[Comment] = None
21 |     diameter_mm: Optional[float] = None
22 |     texture: Optional[Texture] = None
23 |     malignancy: Optional[bool] = None
24 | 


--------------------------------------------------------------------------------
/.flake8:
--------------------------------------------------------------------------------
 1 | [flake8]
 2 | # W503: line break before binary operator is actually considered best-practice
 3 | # E203: spaces around complex variables in slices are pep-right
 4 | # F401: unused imports in __init__.py-s
 5 | # I251: allow absolute imports in upper files
 6 | # B019: @lru_cache for static methods is fine
 7 | # B008: calling the function as default value is just part of the typer's interface
 8 | # C408: for identifier-like fields dict(x=y) is just more concise
 9 | ignore = W503,E203,B019,B028,C408,Q000
10 | per-file-ignores =
11 |     **/__init__.py:F401
12 |     scripts/*:I251
13 |     tests/*:I251
14 |     docs/*:I251
15 |     amid/internals/cli.py:B008
16 | max-line-length = 120
17 | banned-modules =
18 |     amid.* = Use relative imports
19 | 


--------------------------------------------------------------------------------
/amid/rsna_bc/utils.py:
--------------------------------------------------------------------------------
 1 | import contextlib
 2 | import zipfile
 3 | from pathlib import Path
 4 | 
 5 | import pandas as pd
 6 | 
 7 | from ..internals.dataset import register_field
 8 | 
 9 | 
10 | def csv_field(name, cast):
11 |     def _loader(self, i):
12 |         value = self._meta[i].get(name)
13 |         if pd.isnull(value):
14 |             return None
15 |         if cast is not None:
16 |             return cast(value)
17 |         return value
18 | 
19 |     register_field('RSNABreastCancer', name, _loader)
20 |     return _loader
21 | 
22 | 
23 | @contextlib.contextmanager
24 | def unpack(root: str, relative: str):
25 |     unpacked = Path(root) / relative
26 | 
27 |     if unpacked.exists():
28 |         yield unpacked, True
29 |     else:
30 |         with zipfile.Path(root, relative).open('rb') as unpacked:
31 |             yield unpacked, False
32 | 


--------------------------------------------------------------------------------
/amid/internals/dataset.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | from typing import Sequence
 3 | 
 4 | from connectome import ExternalBase
 5 | 
 6 | from ..utils import PathOrStr
 7 | 
 8 | 
 9 | class Dataset(ExternalBase):
10 |     _path: str
11 |     _fields: Sequence[str] = None
12 | 
13 |     def __init__(self, root: PathOrStr):
14 |         fields = None
15 |         if hasattr(self, '_fields'):
16 |             fields = self._fields
17 | 
18 |         super().__init__(fields=fields, inputs=['id'], properties=['ids'], inherit=['id'])
19 |         self.root = Path(root)
20 | 
21 |     @classmethod
22 |     def __getversion__(cls):
23 |         return 0
24 | 
25 | 
26 | _Fields = {}
27 | 
28 | 
29 | def register_field(cls, name, func):
30 |     _Fields.setdefault(cls, {})[name] = func
31 | 
32 | 
33 | def field(func):
34 |     cls, name = func.__qualname__.split('.')
35 |     register_field(cls, name, func)
36 |     return func
37 | 


--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
 1 | Awesome Medical Imaging Datasets (AMID) - a curated list of medical imaging datasets with unified interfaces
 2 | 
 3 | ## Getting started
 4 | 
 5 | Just import a dataset and start using it!
 6 | 
 7 | Note that for some datasets you must manually download the raw files first.
 8 | 
 9 | ```python
10 | from amid.verse import VerSe
11 | 
12 | ds = VerSe()
13 | # get the available ids
14 | print(len(ds.ids))
15 | i = ds.ids[0]
16 | 
17 | # use the available methods:
18 | #   load the image and vertebrae masks
19 | x, y = ds.image(i), ds.masks(i)
20 | print(ds.split(i), ds.patient(i))
21 | 
22 | # or get a namedTuple-like object:
23 | entry = ds(i)
24 | x, y = entry.image, entry.masks
25 | print(entry.split, entry.patient)
26 | ```
27 | 
28 | ## Install
29 | 
30 | Just get it from PyPi:
31 | 
32 | ```shell
33 | pip install amid
34 | ```
35 | 
36 | Or if you want to use version control features:
37 | 
38 | ```shell
39 | git clone https://github.com/neuro-ml/amid.git
40 | cd amid && pip install -e .
41 | ```
42 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022-2024 NeuroML Group
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/amid/transforms.py:
--------------------------------------------------------------------------------
 1 | import nibabel
 2 | import numpy as np
 3 | from connectome import Output, Transform
 4 | 
 5 | 
 6 | class SpacingFromAffine(Transform):
 7 |     __inherit__ = True
 8 | 
 9 |     def spacing(affine):
10 |         return nibabel.affines.voxel_sizes(affine)
11 | 
12 | 
13 | class ParseAffineMatrix(Transform):
14 |     """Splits affine matrix into separate methods for more convenient usage.
15 | 
16 |     Examples
17 |     --------
18 |     >>> dataset = Dataset()
19 |     >>> dataset.voxel_spacing(id_)
20 |     # FieldError
21 |     >>> dataset = dataset >> ParseAffineMatrix()
22 |     >>> dataset.voxel_spacing(id_)
23 |     # array([1.5, 1.5, 1.5])
24 |     """
25 | 
26 |     __inherit__ = True
27 | 
28 |     def origin(affine):
29 |         """Constructs an origin tensor from the given affine matrix."""
30 |         return affine[:-1, -1]
31 | 
32 |     def spacing(affine):
33 |         """Constructs a voxel spacing tensor from the given orientation matrix."""
34 |         return np.linalg.norm(affine[:3, :3], axis=0)
35 | 
36 |     def orientation(affine, spacing: Output):
37 |         """Constructs an orientation matrix from the given affine matrix."""
38 |         return np.divide(affine[:3, :3], spacing)
39 | 


--------------------------------------------------------------------------------
/amid/__init__.py:
--------------------------------------------------------------------------------
 1 | from connectome.cache import unstable_module
 2 | 
 3 | from .__version__ import __version__
 4 | from .amos import AMOS
 5 | from .bimcv import BIMCVCovid19
 6 | from .brats2021 import BraTS2021
 7 | from .cancer_500 import MoscowCancer500
 8 | from .cc359 import CC359
 9 | from .cl_detection import CLDetection2023
10 | from .covid_1110 import MoscowCovid1110
11 | from .crlm import CRLM
12 | from .crossmoda import CrossMoDA
13 | from .ct_ich import CT_ICH
14 | from .curvas import CURVAS
15 | from .deeplesion import DeepLesion
16 | from .egd import EGD
17 | from .flare2022 import FLARE2022
18 | from .hcp import HCP
19 | from .lidc import LIDC
20 | from .lits import LiTS
21 | from .liver_medseg import LiverMedseg
22 | from .luna25 import LUNA25
23 | from .medseg9 import Medseg9
24 | from .midrc import MIDRC
25 | from .mood import MOOD
26 | from .mslub import MSLUB
27 | from .nlst import NLST
28 | from .nsclc import NSCLC
29 | from .ribfrac import RibFrac
30 | from .rsna_bc import RSNABreastCancer
31 | from .stanford_coca import StanfordCoCa
32 | from .totalsegmentator import Totalsegmentator
33 | from .upenn_gbm import UPENN_GBM
34 | from .verse import VerSe
35 | from .vs_seg import VSSEG
36 | 
37 | 
38 | unstable_module(__name__)
39 | 


--------------------------------------------------------------------------------
/.github/workflows/docs.yml:
--------------------------------------------------------------------------------
 1 | name: Docs
 2 | 
 3 | on:
 4 |   release:
 5 |     types: [ released ]
 6 |   push:
 7 |     branches:
 8 |       - dev
 9 | 
10 | jobs:
11 |   deploy:
12 |     runs-on: ubuntu-latest
13 |     steps:
14 |       - uses: actions/checkout@v3
15 |       - uses: fregante/setup-git-user@v1
16 |       - run: git fetch origin gh-pages --depth=1
17 |       - uses: actions/setup-python@v2
18 |         with:
19 |           python-version: '3.10'
20 |       - name: Install
21 |         run: |
22 |           pip install -e .
23 |           pip install -r docs/requirements.txt
24 | 
25 |       # release
26 |       - id: get_version
27 |         if: github.event_name == 'release'
28 |         name: Get the release version
29 |         uses: battila7/get-version-action@v2
30 | 
31 |       - name: Deploy the docs
32 |         if: github.event_name == 'release'
33 |         run: |
34 |           PYTHONPATH=$PYTHONPATH:./docs mike deploy ${{ steps.get_version.outputs.version-without-v }} latest --push --update-aliases
35 | 
36 |       # dev
37 | #      - name: Deploy the docs
38 | #        if: ${{ github.ref == 'refs/heads/dev' }}
39 | #        run: |
40 | #          cd docs
41 | #          VERSION=dev python fill_docs.py
42 | #          cd ..
43 | #          PYTHONPATH=$PYTHONPATH:./docs mike deploy dev --push
44 | 


--------------------------------------------------------------------------------
/docs/datasets-api.md:
--------------------------------------------------------------------------------
 1 | # Datasets API
 2 | 
 3 | ::: amid.amos.dataset.AMOS
 4 | 
 5 | ::: amid.bimcv.BIMCVCovid19
 6 | 
 7 | ::: amid.brats2021.BraTS2021
 8 | 
 9 | ::: amid.cc359.dataset.CC359
10 | 
11 | ::: amid.cl_detection.CLDetection2023
12 | 
13 | ::: amid.crlm.CRLM
14 | 
15 | ::: amid.ct_ich.CT_ICH
16 | 
17 | ::: amid.curvas.CURVAS
18 | 
19 | ::: amid.crossmoda.CrossMoDA
20 | 
21 | ::: amid.deeplesion.DeepLesion
22 | 
23 | ::: amid.egd.EGD
24 | 
25 | ::: amid.flare2022.FLARE2022
26 | 
27 | ::: amid.hcp.HCP
28 | 
29 | ::: amid.kits.KiTS23
30 | 
31 | ::: amid.lidc.dataset.LIDC
32 | 
33 | ::: amid.lits.dataset.LiTS
34 | 
35 | ::: amid.liver_medseg.LiverMedseg
36 | 
37 | ::: amid.midrc.MIDRC
38 | 
39 | ::: amid.mood.MOOD
40 | 
41 | ::: amid.msd.MSD
42 | 
43 | ::: amid.mslub.dataset.MSLUB
44 | 
45 | ::: amid.medseg9.Medseg9
46 | 
47 | ::: amid.cancer_500.dataset.MoscowCancer500
48 | 
49 | ::: amid.covid_1110.MoscowCovid1110
50 | 
51 | ::: amid.nlst.NLST
52 | 
53 | ::: amid.nsclc.NSCLC
54 | 
55 | ::: amid.rsna_bc.dataset.RSNABreastCancer
56 | 
57 | ::: amid.ribfrac.dataset.RibFrac
58 | 
59 | ::: amid.stanford_coca.StanfordCoCa
60 | 
61 | ::: amid.tbad.TBAD
62 | 
63 | ::: amid.totalsegmentator.dataset.Totalsegmentator
64 | 
65 | ::: amid.upenn_gbm.upenn_gbm.UPENN_GBM
66 | 
67 | ::: amid.vs_seg.dataset.VSSEG
68 | 
69 | ::: amid.verse.VerSe
70 | 
71 | 


--------------------------------------------------------------------------------
/amid/totalsegmentator/utils.py:
--------------------------------------------------------------------------------
 1 | import nibabel
 2 | import numpy as np
 3 | 
 4 | from ..internals.dataset import register_field
 5 | from ..utils import open_nii_gz_file, unpack
 6 | from .const import ANATOMICAL_STRUCTURES, LABELS
 7 | 
 8 | 
 9 | ARCHIVE_ROOT = 'Totalsegmentator_dataset'
10 | 
11 | 
12 | def label_loader(name):
13 |     def loader(self, i):
14 |         return self._meta[self._meta['image_id'] == i][name].item()
15 | 
16 |     register_field('Totalsegmentator', name, loader)
17 |     return loader
18 | 
19 | 
20 | def mask_loader(name):
21 |     def loader(self, i):
22 |         file = f'{i}/segmentations/{name}.nii.gz'
23 | 
24 |         with unpack(self.root, file, ARCHIVE_ROOT, '.zip') as (unpacked, is_unpacked):
25 |             if is_unpacked:
26 |                 return np.asarray(nibabel.load(unpacked).dataobj)
27 |             else:
28 |                 with open_nii_gz_file(unpacked) as image:
29 |                     return np.asarray(image.dataobj)
30 | 
31 |     register_field('Totalsegmentator', name, loader)
32 |     return loader
33 | 
34 | 
35 | def add_labels(scope):
36 |     for label in LABELS:
37 |         scope[label] = label_loader(label)
38 | 
39 | 
40 | def add_masks(scope):
41 |     for anatomical_structure in ANATOMICAL_STRUCTURES:
42 |         scope[anatomical_structure] = mask_loader(anatomical_structure)
43 | 


--------------------------------------------------------------------------------
/docs/fill_readme.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | from pathlib import Path
 3 | 
 4 | import deli
 5 | import pandas as pd
 6 | from tqdm import tqdm
 7 | 
 8 | from amid.internals.registry import gather_datasets, prepare_for_table
 9 | 
10 | 
11 | file = Path(__file__).resolve().parent.parent / 'README.md'
12 | with open(file, 'r') as fd:
13 |     content = fd.read()
14 | 
15 | start = re.search(r'# Available datasets', content).end()
16 | stop = re.search(r'Check out \[our docs\]', content).start()
17 | raw_data = deli.load('/shared/amid/raw.json')
18 | cache_path = '/shared/amid/cache.json'
19 | cache = deli.load(cache_path)
20 | 
21 | records = []
22 | for name, (cls, module, description) in tqdm(list(gather_datasets().items())):  # noqa
23 |     if name in cache:
24 |         count = cache[name]
25 |     else:
26 |         count = len(cls(root=raw_data[name]).ids)
27 |         cache[name] = count
28 |         deli.save(cache, cache_path)
29 |     records.append(prepare_for_table(name, count, module, description, 'latest'))
30 | 
31 | table = pd.DataFrame.from_records(records).fillna('')
32 | table.columns = [x.replace('_', ' ').capitalize() for x in table.columns]
33 | table = table[['Name', 'Entries', 'Body region', 'Modality']].to_markdown(index=False)
34 | content = f'{content[:start]}\n\n{table}\n\n{content[stop:]}'
35 | 
36 | with open(file, 'w') as fd:
37 |     fd.write(content)
38 | 


--------------------------------------------------------------------------------
/amid/lidc/transforms.py:
--------------------------------------------------------------------------------
 1 | from typing import Sequence, Union
 2 | 
 3 | import numpy as np
 4 | from connectome import Transform
 5 | from imops import zoom
 6 | 
 7 | from ..utils import Numeric
 8 | 
 9 | 
10 | class CanonicalCTOrientation(Transform):
11 |     __exclude__ = ('nodules', 'nodules_masks')
12 | 
13 |     def image(image):
14 |         return image[..., ::-1]
15 | 
16 |     def cancer(cancer):
17 |         return cancer[..., ::-1]
18 | 
19 | 
20 | class Rescale(Transform):
21 |     __exclude__ = ('pixel_spacing', 'slice_locations', 'voxel_spacing', 'orientation_matrix')
22 | 
23 |     _new_spacing: Union[Sequence[Numeric], Numeric]
24 |     _order: int = 1
25 | 
26 |     def _spacing(spacing, _new_spacing):
27 |         _new_spacing = np.broadcast_to(_new_spacing, len(spacing)).copy()
28 |         _new_spacing[np.isnan(_new_spacing)] = np.array(spacing)[np.isnan(_new_spacing)]
29 |         return tuple(_new_spacing.tolist())
30 | 
31 |     def _scale_factor(spacing, _spacing):
32 |         return np.float32(spacing) / np.float32(_spacing)
33 | 
34 |     def spacing(_spacing):
35 |         return _spacing
36 | 
37 |     def image(image, _scale_factor, _order):
38 |         return zoom(image.astype(np.float32), _scale_factor, order=_order)
39 | 
40 |     def cancer(cancer, _scale_factor, _order):
41 |         return zoom(cancer.astype(np.float32), _scale_factor, order=_order) > 0.5
42 | 


--------------------------------------------------------------------------------
/docs/fill_docs.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import deli
 4 | import pandas as pd
 5 | from tqdm import tqdm
 6 | 
 7 | from amid.__version__ import __version__ as version
 8 | from amid.internals.registry import gather_datasets, prepare_for_table
 9 | 
10 | 
11 | # version = os.environ.get('VERSION')
12 | # if not version:
13 | #     raise RuntimeError('Please define the "VERSION" env variable')
14 | raw_data = deli.load('/shared/amid/raw.json')
15 | cache_path = '/shared/amid/cache.json'
16 | cache = deli.load(cache_path)
17 | 
18 | records = []
19 | root = Path(__file__).resolve().parent
20 | with open(root / 'datasets-api.md', 'w') as file:
21 |     file.write('# Datasets API\n\n')
22 |     for name, (cls, module, description) in tqdm(list(gather_datasets().items())):
23 |         file.write(f'::: {module}.{name}\n\n')
24 |         if name in cache:
25 |             count = cache[name]
26 |         else:
27 |             count = len(cls(root=raw_data[name]).ids)
28 |             cache[name] = count
29 |             deli.save(cache, cache_path)
30 | 
31 |         records.append(prepare_for_table(name, count, module, description, version))
32 | 
33 | table = pd.DataFrame.from_records(records).fillna('')
34 | table.columns = [x.replace('_', ' ').capitalize() for x in table.columns]
35 | with open(root / 'datasets.md', 'w') as file:
36 |     file.write('# Datasets\n\n')
37 |     file.write(table.to_markdown(index=False))
38 | 


--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
 1 | name: Release
 2 | 
 3 | on:
 4 |   release:
 5 |     types: [ released ]
 6 | 
 7 | env:
 8 |   MODULE_NAME: amid
 9 | 
10 | jobs:
11 |   release:
12 |     runs-on: ubuntu-latest
13 | 
14 |     steps:
15 |       - uses: actions/checkout@v3
16 |       - name: Set up Python
17 |         uses: actions/setup-python@v4
18 |         with:
19 |           python-version: '3.10'
20 | 
21 |       - id: get_version
22 |         name: Get the release version
23 |         uses: battila7/get-version-action@v2
24 | 
25 |       - name: Check the version and build the package
26 |         run: |
27 |           RELEASE=${{ steps.get_version.outputs.version-without-v }}
28 |           VERSION=$(python -c "from pathlib import Path; import runpy; folder, = {d.parent for d in Path().resolve().glob('*/__init__.py') if d.parent.is_dir() and (d.parent / '__version__.py').exists()}; print(runpy.run_path(folder / '__version__.py')['__version__'])")
29 |           MATCH=$(pip index versions $MODULE_NAME | grep "Available versions:" | grep $VERSION) || echo
30 |           echo $MATCH
31 |           if [ "$GITHUB_BASE_REF" = "master" ] && [ "$MATCH" != "" ]; then echo "Version $VERSION already present" && exit 1; fi
32 |           if [ "$VERSION" != "$RELEASE" ]; then echo "$VERSION vs $RELEASE" && exit 1; fi
33 |           pip install build
34 |           python -m build --sdist
35 | 
36 |       - name: Publish to PyPi
37 |         uses: pypa/gh-action-pypi-publish@master
38 |         with:
39 |           password: ${{ secrets.PYPI_API_TOKEN }}
40 | 


--------------------------------------------------------------------------------
/amid/internals/licenses.py:
--------------------------------------------------------------------------------
 1 | from typing import NamedTuple
 2 | 
 3 | 
 4 | class License(NamedTuple):
 5 |     name: str
 6 |     url: str
 7 | 
 8 | 
 9 | CC0_10 = License(name='CC0 1.0', url='https://creativecommons.org/publicdomain/zero/1.0/')
10 | CC_BY_30 = License(
11 |     name='CC BY 3.0',
12 |     url='https://creativecommons.org/licenses/by/3.0/',
13 | )
14 | CC_BY_40 = License(
15 |     name='CC BY 4.0',
16 |     url='https://creativecommons.org/licenses/by/4.0/',
17 | )
18 | CC_BYNC_40 = License(
19 |     name='CC BY-NC 4.0',
20 |     url='https://creativecommons.org/licenses/by-nc/4.0/',
21 | )
22 | CC_BYND_40 = License(
23 |     name='CC BY-ND 4.0',
24 |     url='https://creativecommons.org/licenses/by-nd/4.0/',
25 | )
26 | CC_BYNCND_40 = License(
27 |     name='CC BY-NC-ND 4.0',
28 |     url='https://creativecommons.org/licenses/by-nc-nd/4.0/',
29 | )
30 | CC_BYSA_40 = License(
31 |     name='CC BY-SA 4.0',
32 |     url='https://creativecommons.org/licenses/by-sa/4.0/',
33 | )
34 | CC_BYNCSA_40 = License(
35 |     name='CC BY-NC-SA 4.0',
36 |     url='https://creativecommons.org/licenses/by-nc-sa/4.0/',
37 | )
38 | 
39 | PhysioNet_RHD_150 = License(
40 |     name='PhysioNet Restricted Health Data License 1.5.0',
41 |     url='https://www.physionet.org/about/licenses/physionet-restricted-health-data-license-150/',
42 | )
43 | 
44 | StanfordDSResearch = License(
45 |     name='Stanford University Dataset Research Use Agreement',
46 |     url='https://stanfordaimi.azurewebsites.net/datasets/e8ca74dc-8dd4-4340-815a-60b41f6cb2aa',  # TODO: separate link
47 | )
48 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "amid"
 3 | dynamic = ["version", "dependencies"]
 4 | description = "A curated list of medical imaging datasets with unified interfaces"
 5 | readme = "README.md"
 6 | requires-python = ">=3.8"
 7 | license = { file = "LICENSE" }
 8 | keywords = ["medical imaging", "dataset"]
 9 | authors = [
10 |     { name = "NeuroML Group", email = "max@ira-labs.com" }
11 | ]
12 | classifiers = [
13 |     "Development Status :: 4 - Beta",
14 |     "License :: OSI Approved :: MIT License",
15 |     "Programming Language :: Python :: 3",
16 |     "Programming Language :: Python :: 3.8",
17 |     "Programming Language :: Python :: 3.9",
18 |     "Programming Language :: Python :: 3.10",
19 |     "Programming Language :: Python :: 3.11",
20 |     "Programming Language :: Python :: 3 :: Only",
21 | ]
22 | 
23 | [project.urls]
24 | "Homepage" = "https://github.com/neuro-ml/amid"
25 | "Issues" = "https://github.com/neuro-ml/amid/issues"
26 | "Source" = "https://github.com/neuro-ml/amid"
27 | "Docs" = "https://neuro-ml.github.io/amid"
28 | 
29 | [build-system]
30 | requires = ["setuptools>=43.0.0", "wheel"]
31 | build-backend = "setuptools.build_meta"
32 | 
33 | [tool.setuptools.packages.find]
34 | include = ["amid"]
35 | 
36 | [tool.setuptools.dynamic]
37 | version = { attr = "amid.__version__.__version__" }
38 | dependencies = { file = "requirements.txt" }
39 | 
40 | [tool.pytest.ini_options]
41 | markers = [
42 |     "raw: tests that require the raw files storage",
43 | ]
44 | 
45 | [tool.black]
46 | line-length = 120
47 | skip-string-normalization = true
48 | 
49 | [tool.isort]
50 | line_length = 120
51 | lines_after_imports = 2
52 | profile = 'black'
53 | combine_as_imports = true
54 | 


--------------------------------------------------------------------------------
/amid/lits/transforms.py:
--------------------------------------------------------------------------------
 1 | from typing import Sequence, Union
 2 | 
 3 | import numpy as np
 4 | from connectome import Transform
 5 | from imops import zoom
 6 | 
 7 | from ..utils import Numeric, propagate_none
 8 | 
 9 | 
10 | class CanonicalCTOrientation(Transform):
11 |     __inherit__ = True
12 | 
13 |     def image(image):
14 |         return np.transpose(image, (1, 0, 2))[::-1, :, ::-1]
15 | 
16 |     def mask(mask):
17 |         return np.transpose(mask, (1, 0, 2))[::-1, :, ::-1]
18 | 
19 |     def spacing(spacing):
20 |         return tuple(np.array(spacing)[[1, 0, 2]].tolist())
21 | 
22 | 
23 | class Rescale(Transform):
24 |     __exclude__ = (
25 |         'voxel_spacing',
26 |         'affine',
27 |     )
28 | 
29 |     _new_spacing: Union[Sequence[Numeric], Numeric]
30 |     _order: int = 1
31 | 
32 |     def _spacing(spacing, _new_spacing):
33 |         _new_spacing = np.broadcast_to(_new_spacing, len(spacing)).copy()
34 |         _new_spacing[np.isnan(_new_spacing)] = np.array(spacing)[np.isnan(_new_spacing)]
35 |         return tuple(_new_spacing.tolist())
36 | 
37 |     def _scale_factor(spacing, _spacing):
38 |         return np.float32(spacing) / np.float32(_spacing)
39 | 
40 |     def spacing(_spacing):
41 |         return _spacing
42 | 
43 |     def image(image, _scale_factor, _order):
44 |         return zoom(image.astype(np.float32), _scale_factor, order=_order)
45 | 
46 |     @propagate_none
47 |     def mask(mask, _scale_factor, _order):
48 |         onehot = np.arange(mask.max() + 1) == mask[..., None]
49 |         onehot = onehot.astype(mask.dtype).transpose(3, 0, 1, 2)
50 |         out = np.array(zoom(onehot.astype(np.float32), _scale_factor, axis=(1, 2, 3)) > 0.5, dtype=mask.dtype)
51 |         labels = out.argmax(axis=0)
52 |         return labels
53 | 


--------------------------------------------------------------------------------
/amid/lidc/typing.py:
--------------------------------------------------------------------------------
 1 | from enum import Enum
 2 | from typing import NamedTuple, Optional, Sequence
 3 | 
 4 | import numpy as np
 5 | 
 6 | 
 7 | class Calcification(Enum):
 8 |     Popcorn, Laminated, Solid, NonCentral, Central, Absent = 1, 2, 3, 4, 5, 6
 9 | 
10 | 
11 | class InternalStructure(Enum):
12 |     SoftTissue, Fluid, Fat, Air = 1, 2, 3, 4
13 | 
14 | 
15 | class Lobulation(Enum):
16 |     NoLobulation, NearlyNoLobulation, MediumLobulation, NearMarkedLobulation, MarkedLobulation = 1, 2, 3, 4, 5
17 | 
18 | 
19 | class Malignancy(Enum):
20 |     HighlyUnlikely, ModeratelyUnlikely, Indeterminate, ModeratelySuspicious, HighlySuspicious = 1, 2, 3, 4, 5
21 | 
22 | 
23 | class Sphericity(Enum):
24 |     Linear, OvoidLinear, Ovoid, OvoidRound, Round = 1, 2, 3, 4, 5
25 | 
26 | 
27 | class Spiculation(Enum):
28 |     NoSpiculation, NearlyNoSpiculation, MediumSpiculation, NearMarkedSpiculation, MarkedSpiculation = 1, 2, 3, 4, 5
29 | 
30 | 
31 | class Subtlety(Enum):
32 |     ExtremelySubtle, ModeratelySubtle, FairlySubtle, ModeratelyObvious, Obvious = 1, 2, 3, 4, 5
33 | 
34 | 
35 | class Texture(Enum):
36 |     NonSolidGGO, NonSolidMixed, PartSolidMixed, SolidMixed, Solid = 1, 2, 3, 4, 5
37 | 
38 | 
39 | class LIDCNodule(NamedTuple):
40 |     center_voxel: Sequence[float]
41 |     bbox: np.ndarray
42 |     diameter_mm: float
43 |     surface_area_mm2: float
44 |     volume_mm3: float
45 |     calcification: Optional[Calcification] = None
46 |     internal_structure: Optional[InternalStructure] = None
47 |     lobulation: Optional[Lobulation] = None
48 |     malignancy: Optional[Malignancy] = None
49 |     sphericity: Optional[Sphericity] = None
50 |     spiculation: Optional[Spiculation] = None
51 |     subtlety: Optional[Subtlety] = None
52 |     texture: Optional[Texture] = None
53 | 


--------------------------------------------------------------------------------
/amid/lidc/nodules.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from pylidc import Annotation
 3 | 
 4 | from .typing import (
 5 |     Calcification,
 6 |     InternalStructure,
 7 |     LIDCNodule,
 8 |     Lobulation,
 9 |     Malignancy,
10 |     Sphericity,
11 |     Spiculation,
12 |     Subtlety,
13 |     Texture,
14 | )
15 | 
16 | 
17 | def get_nodule(ann: Annotation) -> LIDCNodule:
18 |     def init_enum(enum_class, value):
19 |         try:
20 |             return enum_class(value)
21 |         except ValueError:
22 |             pass
23 | 
24 |     bbox = ann.bbox_matrix().T
25 |     bbox[1] = bbox[1] + 1
26 | 
27 |     return LIDCNodule(
28 |         center_voxel=ann.centroid,
29 |         bbox=bbox,
30 |         diameter_mm=ann.diameter,
31 |         surface_area_mm2=ann.surface_area,
32 |         volume_mm3=ann.volume,
33 |         calcification=init_enum(Calcification, ann.calcification),
34 |         internal_structure=init_enum(InternalStructure, ann.internalStructure),
35 |         lobulation=init_enum(Lobulation, ann.lobulation),
36 |         malignancy=init_enum(Malignancy, ann.malignancy),
37 |         sphericity=init_enum(Sphericity, ann.sphericity),
38 |         spiculation=init_enum(Spiculation, ann.spiculation),
39 |         subtlety=init_enum(Subtlety, ann.subtlety),
40 |         texture=init_enum(Texture, ann.texture),
41 |     )
42 | 
43 | 
44 | def flip_nodule(nodule: LIDCNodule, n_slices: int) -> LIDCNodule:
45 |     bbox = nodule.bbox.copy()
46 |     start_slice, stop_slice = bbox[:, -1]
47 |     bbox[:, -1] = np.array([n_slices - stop_slice, n_slices - start_slice])
48 | 
49 |     center_voxel = nodule.center_voxel
50 |     center_voxel[-1] = n_slices - center_voxel[-1]
51 | 
52 |     return nodule._replace(
53 |         center_voxel=center_voxel,
54 |         bbox=bbox,
55 |     )
56 | 


--------------------------------------------------------------------------------
/amid/upenn_gbm/data_classes.py:
--------------------------------------------------------------------------------
 1 | from typing import NamedTuple
 2 | 
 3 | 
 4 | class ClinicalInfo(NamedTuple):
 5 |     gender: str
 6 |     age_at_scan_years: float
 7 |     survival_from_surgery_days: int
 8 |     idh1: str
 9 |     mgmt: str
10 |     kps: str
11 |     gtr_over90percent: str
12 |     time_since_baseline_preop: int
13 |     psp_tp_score: float
14 | 
15 | 
16 | class AcquisitionInfo(NamedTuple):
17 |     manufacturer: str
18 |     model: str
19 |     magnetic_field_strength: float
20 |     t1_imaging_frequency: float
21 |     t1_repetition_time: float
22 |     t1_echo_time: float
23 |     t1_inversion_time: float
24 |     t1_flip_angle: float
25 |     t1_pixel_spacing: str
26 |     t1_slice_thickness: float
27 |     t1gd_imaging_frequency: float
28 |     t1gd_repetition_time: float
29 |     t1gd_echo_time: float
30 |     t1gd_inversion_time: float
31 |     t1gd_flip_angle: float
32 |     t1gd_pixel_spacing: str
33 |     t1gd_slice_thickness: float
34 |     t2_imaging_frequency: float
35 |     t2_repetition_time: float
36 |     t2_echo_time: float
37 |     t2_flip_angle: float
38 |     t2_pixel_spacing: str
39 |     t2_slice_thickness: float
40 |     flair_imaging_frequency: float
41 |     flair_repetition_time: float
42 |     flair_echo_time: float
43 |     flair_inversion_time: float
44 |     flair_flip_angle: float
45 |     flair_pixel_spacing: str
46 |     flair_slice_thickness: float
47 |     dti_imaging_frequency: float
48 |     dti_repetition_time: float
49 |     dti_echo_time: float
50 |     dti_flip_angle: float
51 |     dti_pixel_spacing: str
52 |     dti_slice_thickness: float
53 |     dsc_imaging_frequency: float
54 |     dsc_repetition_time: float
55 |     dsc_echo_time: float
56 |     dsc_flip_angle: float
57 |     dsc_pixel_spacing: str
58 |     dsc_slice_thickness: float
59 | 


--------------------------------------------------------------------------------
/tests/test_consistency.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | 
 3 | import numpy as np
 4 | import pytest
 5 | 
 6 | from amid.internals import gather_datasets
 7 | 
 8 | 
 9 | MAPPING = gather_datasets()
10 | DATASETS = [x[0] for x in MAPPING.values()]
11 | NAMES = list(MAPPING)
12 | 
13 | 
14 | @pytest.mark.raw
15 | @pytest.mark.parametrize('cls', DATASETS, ids=NAMES)
16 | def test_ids_availability(cls):
17 |     assert len(cls().ids) > 0
18 | 
19 | 
20 | @pytest.mark.raw
21 | @pytest.mark.parametrize('cls', DATASETS, ids=NAMES)
22 | def test_pickleable(cls):
23 |     raw = cls()[0]
24 |     cached = cls()
25 |     fields = dir(raw)
26 | 
27 |     for ds in raw, cached:
28 |         loader = ds._compile(fields)
29 |         pickle.dumps(loader)
30 | 
31 |     f = cached._compile('ids')
32 |     raw = pickle.dumps(f)
33 |     g = pickle.loads(raw)
34 |     assert f() == g()
35 | 
36 | 
37 | # @pytest.mark.raw
38 | # @pytest.mark.parametrize('cls', ROOT_MAPPING, ids=[cls.__name__ for cls in ROOT_MAPPING])
39 | # def test_cache_consistency(cls):
40 | #     raw = cls(root=ROOT_MAPPING[cls])
41 | #     cached = raw.cached()
42 | #     fields = {x.name for x in raw._container.outputs} - {'ids', 'id', 'cached'}
43 | #
44 | #     ids = raw.ids
45 | #     assert ids == cached.ids
46 | #     for i in ids:
47 | #         for field in fields:
48 | #             compare(getattr(raw, field)(i), getattr(cached, field)(i))
49 | 
50 | 
51 | # TODO: find a package for this
52 | def compare(x, y):
53 |     assert type(x) == type(y)
54 |     if isinstance(x, (str, int, float, bytes)):
55 |         assert x == y
56 |     elif isinstance(x, (np.ndarray, np.generic)):
57 |         np.testing.assert_allclose(x, y)
58 |     elif isinstance(x, (list, tuple)):
59 |         list(map(compare, x, y))
60 |     else:
61 |         raise TypeError(type(x))
62 | 


--------------------------------------------------------------------------------
/docs/javascript/tablesort.filesize.js:
--------------------------------------------------------------------------------
 1 | // Filesizes. e.g. '5.35 K', '10 MB', '12.45 GB', or '4.67 TiB'
 2 | (function(){
 3 |   var compareNumber = function(a, b) {
 4 |     a = parseFloat(a);
 5 |     b = parseFloat(b);
 6 | 
 7 |     a = isNaN(a) ? 0 : a;
 8 |     b = isNaN(b) ? 0 : b;
 9 | 
10 |     return a - b;
11 |   },
12 | 
13 |   cleanNumber = function(i) {
14 |     return i.replace(',', '.').replace(/[^\-?0-9.]/g, '');
15 |   },
16 | 
17 |   // Returns suffix multiplier
18 |   // Ex. suffix2num('KB') -> 1000
19 |   // Ex. suffix2num('KiB') -> 1024
20 |   suffix2num = function(suffix) {
21 |     suffix = suffix.toLowerCase();
22 |     var base = suffix[1] === 'i' ? 1024 : 1000;
23 | 
24 |     switch(suffix[0]) {
25 |       case 'k':
26 |         return Math.pow(base, 2);
27 |       case 'm':
28 |         return Math.pow(base, 3);
29 |       case 'g':
30 |         return Math.pow(base, 4);
31 |       case 't':
32 |         return Math.pow(base, 5);
33 |       case 'p':
34 |         return Math.pow(base, 6);
35 |       case 'e':
36 |         return Math.pow(base, 7);
37 |       case 'z':
38 |         return Math.pow(base, 8);
39 |       case 'y':
40 |         return Math.pow(base, 9);
41 |       default:
42 |         return base;
43 |     }
44 |   },
45 | 
46 |   // Converts filesize to bytes
47 |   // Ex. filesize2num('123 KB') -> 123000
48 |   // Ex. filesize2num('123 KiB') -> 125952
49 |   filesize2num = function(filesize) {
50 |     var matches = filesize.match(/^(\d+([.,]\d+)?) ?((K|M|G|T|P|E|Z|Y|B$)i?B?)$/i);
51 | 
52 |     var num  = parseFloat(cleanNumber(matches[1])),
53 |       suffix = matches[3];
54 | 
55 |     return num * suffix2num(suffix);
56 |   };
57 | 
58 |   Tablesort.extend('filesize', function(item) {
59 |     return /^\d+([.,]\d+)? ?(K|M|G|T|P|E|Z|Y|B$)i?B?$/i.test(item);
60 |   }, function(a, b) {
61 |     a = filesize2num(a);
62 |     b = filesize2num(b);
63 | 
64 |     return compareNumber(b, a);
65 |   });
66 | }());
67 | 


--------------------------------------------------------------------------------
/amid/vs_seg/transforms.py:
--------------------------------------------------------------------------------
 1 | from typing import Sequence, Union
 2 | 
 3 | import numpy as np
 4 | from connectome import Transform
 5 | from imops import zoom
 6 | 
 7 | from ..utils import Numeric, propagate_none
 8 | 
 9 | 
10 | class CanonicalMRIOrientation(Transform):
11 |     __inherit__ = True
12 | 
13 |     def image(image):
14 |         return np.transpose(image, (1, 0, 2))[..., ::-1]
15 | 
16 |     def spacing(spacing):
17 |         return tuple(np.array(spacing)[[1, 0, 2]].tolist())
18 | 
19 |     @propagate_none
20 |     def schwannoma(schwannoma):
21 |         return np.transpose(schwannoma, (1, 0, 2))[..., ::-1]
22 | 
23 |     @propagate_none
24 |     def cochlea(cochlea):
25 |         return np.transpose(cochlea, (1, 0, 2))[..., ::-1]
26 | 
27 |     @propagate_none
28 |     def meningioma(meningioma):
29 |         return np.transpose(meningioma, (1, 0, 2))[..., ::-1]
30 | 
31 | 
32 | class Rescale(Transform):
33 |     __inherit__ = True
34 | 
35 |     _new_spacing: Union[Sequence[Numeric], Numeric]
36 |     _order: int = 1
37 | 
38 |     def _spacing(spacing, _new_spacing):
39 |         _new_spacing = np.broadcast_to(_new_spacing, len(spacing)).copy()
40 |         _new_spacing[np.isnan(_new_spacing)] = np.array(spacing)[np.isnan(_new_spacing)]
41 |         return tuple(_new_spacing.tolist())
42 | 
43 |     def _scale_factor(spacing, _spacing):
44 |         return np.float32(spacing) / np.float32(_spacing)
45 | 
46 |     def spacing(_spacing):
47 |         return _spacing
48 | 
49 |     def image(image, _scale_factor, _order):
50 |         return zoom(image.astype(np.float32), _scale_factor, order=_order)
51 | 
52 |     @propagate_none
53 |     def schwannoma(schwannoma, _scale_factor, _order):
54 |         return zoom(schwannoma.astype(np.float32), _scale_factor, order=_order) > 0.5
55 | 
56 |     @propagate_none
57 |     def cochlea(cochlea, _scale_factor, _order):
58 |         return zoom(cochlea.astype(np.float32), _scale_factor, order=_order) > 0.5
59 | 
60 |     @propagate_none
61 |     def meningioma(meningioma, _scale_factor, _order):
62 |         return zoom(meningioma.astype(np.float32), _scale_factor, order=_order) > 0.5
63 | 


--------------------------------------------------------------------------------
/mkdocs.yml:
--------------------------------------------------------------------------------
 1 | site_name: 'AMID: Awesome Medical Imaging Datasets'
 2 | site_url: https://neuro-ml.github.io/amid
 3 | repo_url: https://github.com/neuro-ml/amid
 4 | 
 5 | plugins:
 6 |   - mkdocs-jupyter
 7 |   - search
 8 |   - autorefs
 9 |   - mike:
10 |       canonical_version: latest
11 |   - mkdocstrings:
12 |       default_handler: python_connectome
13 |       handlers:
14 |         python_connectome:
15 |           options:
16 |             docstring_style: numpy
17 |             merge_init_into_class: true
18 |             members_order: source
19 |             show_if_no_docstring: true
20 |             show_bases: false
21 |             show_signature_annotations: true
22 |             show_root_heading: true
23 |             show_source: false
24 |             heading_level: 3
25 |   - exclude:
26 |       glob:
27 |         - '**/python_connectome.py'
28 |         - 'fill_docs.py'
29 |         - 'fill_readme.py'
30 | 
31 | theme:
32 |   name: material
33 |   icon:
34 |     repo: fontawesome/brands/github-alt
35 |   palette:
36 |     - media: "(prefers-color-scheme: dark)"
37 |       scheme: slate
38 |       toggle:
39 |         icon: material/lightbulb-outline
40 |         name: Switch to light mode
41 |     - media: "(prefers-color-scheme: light)"
42 |       scheme: default
43 |       toggle:
44 |         icon: material/lightbulb
45 |         name: Switch to dark mode
46 | 
47 | markdown_extensions:
48 |   - admonition
49 |   - tables
50 |   - pymdownx.highlight:
51 |       anchor_linenums: true
52 |   - pymdownx.inlinehilite
53 |   - pymdownx.snippets
54 |   - pymdownx.details
55 |   - pymdownx.superfences
56 |   - toc:
57 |       toc_depth: 3
58 | 
59 | extra:
60 |   version:
61 |     provider: mike
62 | 
63 | extra_javascript:
64 |   - https://unpkg.com/tablesort@5.3.0/dist/tablesort.min.js
65 |   - https://unpkg.com/tablesort@5.3.0/dist/sorts/tablesort.number.min.js
66 |   - https://unpkg.com/tablesort@5.3.0/dist/sorts/tablesort.date.min.js
67 |   - https://unpkg.com/tablesort@5.3.0/dist/sorts/tablesort.dotsep.min.js
68 |   - https://unpkg.com/tablesort@5.3.0/dist/sorts/tablesort.monthname.min.js
69 |   - javascript/tablesort.filesize.js
70 |   - javascript/tablesort.js
71 | 


--------------------------------------------------------------------------------
/amid/cc359/transforms.py:
--------------------------------------------------------------------------------
 1 | from typing import Sequence, Union
 2 | 
 3 | import numpy as np
 4 | from connectome import Transform
 5 | from imops import zoom
 6 | 
 7 | from ..utils import Numeric, propagate_none
 8 | 
 9 | 
10 | class CanonicalMRIOrientation(Transform):
11 |     __inherit__ = True
12 | 
13 |     def image(image):
14 |         return np.transpose(image, (1, 0, 2))[::-1, :, ::-1]
15 | 
16 |     def spacing(spacing):
17 |         return tuple(np.array(spacing)[[1, 0, 2]].tolist())
18 | 
19 |     def brain(brain):
20 |         return np.transpose(brain, (1, 0, 2))[::-1, :, ::-1]
21 | 
22 |     @propagate_none
23 |     def hippocampus(hippocampus):
24 |         return np.transpose(hippocampus, (1, 0, 2))[::-1, :, ::-1]
25 | 
26 |     @propagate_none
27 |     def wm_gm_csf(wm_gm_csf):
28 |         return np.transpose(wm_gm_csf, (1, 0, 2))[::-1, :, ::-1]
29 | 
30 | 
31 | class Rescale(Transform):
32 |     __inherit__ = True
33 | 
34 |     _new_spacing: Union[Sequence[Numeric], Numeric]
35 |     _order: int = 1
36 | 
37 |     def _spacing(spacing, _new_spacing):
38 |         _new_spacing = np.broadcast_to(_new_spacing, len(spacing)).copy()
39 |         _new_spacing[np.isnan(_new_spacing)] = np.array(spacing)[np.isnan(_new_spacing)]
40 |         return tuple(_new_spacing.tolist())
41 | 
42 |     def _scale_factor(spacing, _spacing):
43 |         return np.float32(spacing) / np.float32(_spacing)
44 | 
45 |     def spacing(_spacing):
46 |         return _spacing
47 | 
48 |     def image(image, _scale_factor, _order):
49 |         return zoom(image.astype(np.float32), _scale_factor, order=_order)
50 | 
51 |     def brain(brain, _scale_factor, _order):
52 |         return zoom(brain.astype(np.float32), _scale_factor, order=_order) > 0.5
53 | 
54 |     @propagate_none
55 |     def hippocampus(hippocampus, _scale_factor, _order):
56 |         return zoom(hippocampus.astype(np.float32), _scale_factor, order=_order) > 0.5
57 | 
58 |     @propagate_none
59 |     def wm_gm_csf(wm_gm_csf, _scale_factor, _order):
60 |         onehot = np.arange(wm_gm_csf.max() + 1) == wm_gm_csf[..., None]
61 |         onehot = onehot.astype(wm_gm_csf.dtype).transpose(3, 0, 1, 2)
62 |         out = np.array(zoom(onehot.astype(np.float32), _scale_factor, axis=(1, 2, 3)) > 0.5, dtype=wm_gm_csf.dtype)
63 |         labels = out.argmax(axis=0)
64 |         return labels
65 | 


--------------------------------------------------------------------------------
/amid/internals/registry.py:
--------------------------------------------------------------------------------
 1 | import importlib
 2 | import inspect
 3 | from collections import OrderedDict
 4 | from pathlib import Path
 5 | from typing import NamedTuple, Type
 6 | 
 7 | import pandas as pd
 8 | 
 9 | from .licenses import License
10 | 
11 | 
12 | _REGISTRY = {}
13 | 
14 | 
15 | class Description(NamedTuple):
16 |     body_region: str = None
17 |     license: str = None
18 |     link: str = None
19 |     modality: str = None
20 |     prep_data_size: str = None
21 |     raw_data_size: str = None
22 |     task: str = None
23 | 
24 | 
25 | def register(**kwargs):
26 |     def decorator(cls: Type):
27 |         _register(cls, cls.__name__, description, 2)
28 |         # cls._path = path
29 |         return cls
30 | 
31 |     # path = kwargs.pop('path')
32 |     description = Description(**kwargs)
33 |     return decorator
34 | 
35 | 
36 | def _register(cls, name, description, level):
37 |     module = inspect.getmodule(inspect.stack()[level][0]).__name__
38 |     assert name not in _REGISTRY, name
39 |     _REGISTRY[name] = cls, module, description
40 | 
41 | 
42 | def gather_datasets():
43 |     for f in Path(__file__).resolve().parent.parent.iterdir():
44 |         module_name = f'amid.{f.stem}'
45 |         importlib.import_module(module_name)
46 | 
47 |     return OrderedDict((k, _REGISTRY[k]) for k in sorted(_REGISTRY))
48 | 
49 | 
50 | def prepare_for_table(name, count, module, description, version):
51 |     def stringify(x):
52 |         if pd.isnull(x):
53 |             return ''
54 |         if isinstance(x, str):
55 |             return x
56 |         if isinstance(x, (list, tuple)):
57 |             return ', '.join(x)
58 |         return x
59 | 
60 |     entry = {'name': name, 'entries': count}
61 |     entry.update({k: v for k, v in description._asdict().items() if not pd.isnull(v)})
62 |     license_ = entry.get('license', None)
63 |     if license_:
64 |         if isinstance(license_, License):
65 |             license_ = f'<a href="{license_.url}">{license_.name}</a>'
66 |         entry['license'] = license_
67 | 
68 |     link = entry.pop('link', None)
69 |     if link is not None:
70 |         entry['link'] = f'<a href="{link}">Source</a>'
71 | 
72 |     entry['name'] = f'<a href="https://neuro-ml.github.io/amid/{version}/datasets-api/#{module}.{name}">{name}</a>'
73 |     return {k: stringify(v) for k, v in entry.items()}
74 | 


--------------------------------------------------------------------------------
/amid/hcp.py:
--------------------------------------------------------------------------------
 1 | import gzip
 2 | import zipfile
 3 | from pathlib import Path
 4 | from zipfile import ZipFile
 5 | 
 6 | import nibabel as nb
 7 | import numpy as np
 8 | 
 9 | from .internals import Dataset, field, licenses, register
10 | 
11 | 
12 | @register(
13 |     body_region='Head',
14 |     license=licenses.CC_BYNCND_40,
15 |     link='https://www.humanconnectome.org/study/hcp-young-adult/document/1200-subjects-data-release',
16 |     modality='MRI',
17 |     prep_data_size='125G',
18 |     raw_data_size='125G',
19 |     task='Segmentation',
20 | )
21 | class HCP(Dataset):
22 |     @property
23 |     def ids(self):
24 |         result = set()
25 |         for archive in self.root.glob('*.zip'):
26 |             with ZipFile(archive) as zf:
27 |                 for zipinfo in zf.infolist():
28 |                     if zipinfo.is_dir():
29 |                         continue
30 |                     result.add(zipinfo.filename.split('/')[0])
31 | 
32 |         return tuple(sorted(result))
33 | 
34 |     def _file(self, i):
35 |         for archive in self.root.glob('*.zip'):
36 |             with ZipFile(archive) as zf:
37 |                 for zipinfo in zf.infolist():
38 |                     if zipinfo.is_dir():
39 |                         continue
40 |                     file = Path(zipinfo.filename)
41 |                     if (i in file.stem) and ('T1w_MPR1' in file.stem):
42 |                         return zipfile.Path(str(archive), str(file))
43 | 
44 |     @field
45 |     def image(self, i) -> np.ndarray:
46 |         with self._file(i).open('rb') as opened:
47 |             with gzip.GzipFile(fileobj=opened) as nii:
48 |                 nii = nb.FileHolder(fileobj=nii)
49 |                 image = nb.Nifti1Image.from_file_map({'header': nii, 'image': nii})
50 |                 return np.int16(image.get_fdata())
51 | 
52 |     @field
53 |     def affine(self, i) -> np.ndarray:
54 |         with self._file(i).open('rb') as opened:
55 |             with gzip.GzipFile(fileobj=opened) as nii:
56 |                 nii = nb.FileHolder(fileobj=nii)
57 |                 image = nb.Nifti1Image.from_file_map({'header': nii, 'image': nii})
58 |                 return image.affine
59 | 
60 |     def spacing(self, i):
61 |         with self._file(i).open('rb') as opened:
62 |             with gzip.GzipFile(fileobj=opened) as nii:
63 |                 nii = nb.FileHolder(fileobj=nii)
64 |                 image = nb.Nifti1Image.from_file_map({'header': nii, 'image': nii})
65 |                 return tuple(image.header['pixdim'][1:4])
66 | 


--------------------------------------------------------------------------------
/amid/mslub/dataset.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import nibabel as nb
 4 | 
 5 | from ..internals import Dataset, licenses, register
 6 | 
 7 | 
 8 | @register(
 9 |     body_region='Head',
10 |     license=licenses.CC_BY_30,
11 |     link='https://github.com/muschellij2/open_ms_data?tab=readme-ov-file',
12 |     modality='MRI',
13 |     prep_data_size='18G',
14 |     raw_data_size='5.9G',
15 |     task='Anomaly segmentation',
16 | )
17 | class MSLUB(Dataset):
18 |     @property
19 |     def ids(self):
20 |         result = set()
21 |         for file in self.root.glob('**/*.gz'):
22 |             if ('raw' not in str(file)) or ('gt' in str(file)):
23 |                 continue
24 |             patient = file.parent.name
25 |             plane = file.parent.parent.parent.name
26 |             ind = f'{plane}-{patient}'
27 |             if 'longitudinal' in str(file):
28 |                 filename = file.name
29 |                 study_number = filename.split('_')[0]
30 |                 ind = f'{ind}-{study_number}'
31 |             result.add(ind)
32 |         return list(result)
33 | 
34 |     def _file(self, i):
35 |         plane = i.split('-')[0]
36 |         patient = i.split('-')[1]
37 |         path = self.root / plane / 'raw' / patient
38 |         if 'longitudinal' in i:
39 |             study_number = i.split('-')[2]
40 |             return path / study_number
41 |         return path
42 | 
43 |     def image(self, i):
44 |         file = self._file(i)
45 |         if 'longitudinal' in str(file):
46 |             study_number = file.stem
47 |             file_name = file.parent / f'{study_number}_FLAIR.nii.gz'
48 |         else:
49 |             file_name = file / 'FLAIR.nii.gz'
50 |         image = nb.load(file_name).get_fdata()
51 |         return image
52 | 
53 |     def mask(self, i):
54 |         file = self._file(i)
55 |         if 'longitudinal' in str(file):
56 |             file_name = file.parent / 'gt.nii.gz'
57 |         else:
58 |             file_name = file / 'consensus_gt.nii.gz'
59 |         image = nb.load(file_name).get_fdata()
60 |         return image
61 | 
62 |     def patient(self, i):
63 |         file = self._file(i)
64 |         if 'longitudinal' in str(file):
65 |             return Path(file).parent.name
66 |         else:
67 |             return Path(file).name
68 | 
69 |     def affine(self, i):
70 |         file = self._file(i)
71 |         if 'longitudinal' in str(file):
72 |             study_number = file.stem
73 |             file_name = file.parent / f'{study_number}_FLAIR.nii.gz'
74 |         else:
75 |             file_name = file / 'FLAIR.nii.gz'
76 |         return nb.load(file_name).affine
77 | 


--------------------------------------------------------------------------------
/docs/mkdocstrings_handlers/python_connectome.py:
--------------------------------------------------------------------------------
 1 | import importlib
 2 | from collections import OrderedDict
 3 | 
 4 | from griffe.dataclasses import Alias, Attribute, Class, Function, Parameter, ParameterKind, Parameters
 5 | from mkdocstrings_handlers.python.handler import PythonHandler
 6 | 
 7 | 
 8 | class PythonConnectomeHandler(PythonHandler):
 9 |     def get_templates_dir(self, handler: str):
10 |         return super().get_templates_dir('python')
11 | 
12 |     def collect(self, identifier: str, config: dict):
13 |         result = super().collect(identifier, config)
14 |         m, p = result.path.rsplit('.', 1)
15 |         v = getattr(importlib.import_module(m), p)
16 |         if hasattr(v, '__origin__'):
17 |             origin = v.__origin__
18 |             if origin.__qualname__ != result.name:
19 |                 origin = super().collect(f'{origin.__module__}.{origin.__qualname__}', config)
20 |                 origin.name = result.name
21 |                 result = origin
22 | 
23 |         if isinstance(result, Alias):
24 |             result.target = self.patch_class(result.target)
25 |         else:
26 |             result = self.patch_class(result)
27 |         return result
28 | 
29 |     @staticmethod
30 |     def patch_class(x: Class):
31 |         members = OrderedDict()
32 |         for name, v in x.members.items():
33 |             if not name.startswith('_'):
34 |                 if isinstance(v, Function):
35 |                     if name == 'ids':
36 |                         v.parameters = Parameters()
37 |                     else:
38 |                         v.parameters = Parameters(
39 |                             Parameter('id', annotation='str', kind=ParameterKind.positional_or_keyword)
40 |                         )
41 | 
42 |                 elif isinstance(v, Attribute):
43 |                     v = Function(
44 |                         name,
45 |                         parameters=Parameters(
46 |                             Parameter('id', annotation='str', kind=ParameterKind.positional_or_keyword)
47 |                         ),
48 |                         parent=x,
49 |                     )
50 | 
51 |                 else:
52 |                     raise TypeError(v)
53 | 
54 |                 members[name] = v
55 | 
56 |         x.members = members
57 |         return x
58 | 
59 | 
60 | def get_handler(theme: str, custom_templates=None, config_file_path=None, paths=None, **config):
61 |     return PythonConnectomeHandler(
62 |         handler='python_connectome',
63 |         theme=theme,
64 |         custom_templates=custom_templates,
65 |         config_file_path=config_file_path,
66 |         paths=paths,
67 |     )
68 | 


--------------------------------------------------------------------------------
/amid/kits.py:
--------------------------------------------------------------------------------
 1 | import nibabel as nb
 2 | import numpy as np
 3 | 
 4 | from .internals import Dataset, field, register
 5 | from .utils import PathOrStr
 6 | 
 7 | 
 8 | @register(
 9 |     body_region='thorax',
10 |     license=None,  # todo
11 |     link='https://kits-challenge.org/kits23/',
12 |     modality='CT',
13 |     prep_data_size='50G',
14 |     raw_data_size='12G',
15 |     task='Kidney Tumor Segmentation',
16 | )
17 | class KiTS23(Dataset):
18 |     """Kidney and Kidney Tumor Segmentation Challenge,
19 |     The 2023 Kidney and Kidney Tumor Segmentation challenge (abbreviated KiTS23)
20 |     is a competition in which teams compete to develop the best system for
21 |     automatic semantic segmentation of kidneys, renal tumors, and renal cysts.
22 | 
23 |     Competition page is https://kits-challenge.org/kits23/, official competition repository is
24 |     https://github.com/neheller/kits23/.
25 | 
26 |     For usage, clone the repository https://github.com/neheller/kits23/, install and run `kits23_download_data`.
27 | 
28 |     Parameters
29 |     ----------
30 |     root: str, Path
31 |         Absolute path to the root containing the downloaded archive and meta.
32 |         If not provided, the cache is assumed to be already populated.
33 |     """
34 | 
35 |     def __init__(self, root: PathOrStr):
36 |         super().__init__(root)
37 |         if not (self.root / "dataset").exists():
38 |             raise FileNotFoundError(f"Dataset not found in {self.root}")
39 | 
40 |     @property
41 |     def ids(self):
42 |         return tuple(sorted(sub.name for sub in (self.root / 'dataset').glob('*')))
43 | 
44 |     @field
45 |     def image(self, i):
46 |         # CT images are integer-valued, this will help us improve compression rates
47 |         image_file = nb.load(self.root / 'dataset' / i / 'imaging.nii.gz')
48 |         return np.int16(image_file.get_fdata()[...])
49 | 
50 |     # TODO add multiple segmentations
51 |     @field
52 |     def mask(self, i):
53 |         """Combined annotation for kidneys, tumor and cyst (if present)."""
54 |         ct_scan_nifti = nb.load(self.root / 'dataset' / i / 'segmentation.nii.gz')
55 |         return np.int8(ct_scan_nifti.get_fdata())
56 | 
57 |     @field
58 |     def affine(self, i):
59 |         """The 4x4 matrix that gives the image's spatial orientation."""
60 |         image_file = nb.load(self.root / 'dataset' / i / 'imaging.nii.gz')
61 |         return image_file.affine
62 | 
63 |     @property
64 |     def labels_names(self):
65 |         """Indicates which label correspond to which mask, consistent accross all samples."""
66 |         return KITS_LABEL_NAMES
67 | 
68 | 
69 | KITS_LABEL_NAMES = {
70 |     # https://github.com/neheller/kits23/blob/063d4c00afd383fc68145a00c0aa6a4e2a3c0f50/kits23/configuration/labels.py#L23
71 |     1: 'kidney',
72 |     2: 'tumor',
73 |     3: 'cyst',
74 | }
75 | 


--------------------------------------------------------------------------------
/amid/covid_1110.py:
--------------------------------------------------------------------------------
 1 | import gzip
 2 | from typing import Union
 3 | 
 4 | import nibabel
 5 | import numpy as np
 6 | 
 7 | from .internals import Dataset, field, register
 8 | 
 9 | 
10 | @register(
11 |     body_region='Thorax',
12 |     modality='CT',
13 |     task='COVID-19 Segmentation',
14 |     link='https://mosmed.ai/en/datasets/covid191110/',
15 |     raw_data_size='21G',
16 | )
17 | class MoscowCovid1110(Dataset):
18 |     """
19 |     The Moscow Radiology COVID-19 dataset.
20 | 
21 |     Parameters
22 |     ----------
23 |     root : str, Path, optional
24 |         path to the folder containing the raw downloaded files.
25 |         If not provided, the cache is assumed to be already populated.
26 | 
27 |     Notes
28 |     -----
29 |     Download links:
30 |     https://mosmed.ai/en/datasets/covid191110/
31 | 
32 |     Examples
33 |     --------
34 |     >>> # Place the downloaded files in any folder and pass the path to the constructor:
35 |     >>> ds = MoscowCovid1110(root='/path/to/files/root')
36 |     >>> print(len(ds.ids))
37 |     # 1110
38 |     >>> print(ds.image(ds.ids[0]).shape)
39 |     # (512, 512, 43)
40 |     """
41 | 
42 |     @property
43 |     def ids(self):
44 |         return sorted({f.name[:-7] for f in self.root.glob('CT-*/*')})
45 | 
46 |     def _file(self, i):
47 |         return next(self.root.glob(f'CT-*/{i}.nii.gz'))
48 | 
49 |     @field
50 |     def image(self, i) -> np.ndarray:
51 |         with self._file(i).open('rb') as opened:
52 |             with gzip.GzipFile(fileobj=opened) as nii:
53 |                 nii = nibabel.FileHolder(fileobj=nii)
54 |                 image = nibabel.Nifti1Image.from_file_map({'header': nii, 'image': nii})
55 |                 # most ct scans are integer-valued, this will help us improve compression rates
56 |                 #  (instead of using `image.get_fdata()`)
57 |                 return np.asarray(image.dataobj)
58 | 
59 |     @field
60 |     def affine(self, i) -> np.ndarray:
61 |         with self._file(i).open('rb') as opened:
62 |             with gzip.GzipFile(fileobj=opened) as nii:
63 |                 nii = nibabel.FileHolder(fileobj=nii)
64 |                 image = nibabel.Nifti1Image.from_file_map({'header': nii, 'image': nii})
65 |                 return image.affine
66 | 
67 |     @field
68 |     def label(self, i) -> str:
69 |         return self._file(i).parent.name[3:]
70 | 
71 |     @field
72 |     def mask(self, i) -> Union[np.ndarray, None]:
73 |         path = self.root / 'masks' / f'{i}_mask.nii.gz'
74 |         if not path.exists():
75 |             return
76 | 
77 |         with path.open('rb') as opened:
78 |             with gzip.GzipFile(fileobj=opened) as nii:
79 |                 nii = nibabel.FileHolder(fileobj=nii)
80 |                 image = nibabel.Nifti1Image.from_file_map({'header': nii, 'image': nii})
81 |                 return np.asarray(image.dataobj) > 0.5
82 | 


--------------------------------------------------------------------------------
/amid/totalsegmentator/const.py:
--------------------------------------------------------------------------------
  1 | ANATOMICAL_STRUCTURES = [
  2 |     'adrenal_gland_left',
  3 |     'adrenal_gland_right',
  4 |     'aorta',
  5 |     'autochthon_left',
  6 |     'autochthon_right',
  7 |     'brain',
  8 |     'clavicula_left',
  9 |     'clavicula_right',
 10 |     'colon',
 11 |     'duodenum',
 12 |     'esophagus',
 13 |     'face',
 14 |     'femur_left',
 15 |     'femur_right',
 16 |     'gallbladder',
 17 |     'gluteus_maximus_left',
 18 |     'gluteus_maximus_right',
 19 |     'gluteus_medius_left',
 20 |     'gluteus_medius_right',
 21 |     'gluteus_minimus_left',
 22 |     'gluteus_minimus_right',
 23 |     'heart_atrium_left',
 24 |     'heart_atrium_right',
 25 |     'heart_myocardium',
 26 |     'heart_ventricle_left',
 27 |     'heart_ventricle_right',
 28 |     'hip_left',
 29 |     'hip_right',
 30 |     'humerus_left',
 31 |     'humerus_right',
 32 |     'iliac_artery_left',
 33 |     'iliac_artery_right',
 34 |     'iliac_vena_left',
 35 |     'iliac_vena_right',
 36 |     'iliopsoas_left',
 37 |     'iliopsoas_right',
 38 |     'inferior_vena_cava',
 39 |     'kidney_left',
 40 |     'kidney_right',
 41 |     'liver',
 42 |     'lung_lower_lobe_left',
 43 |     'lung_lower_lobe_right',
 44 |     'lung_middle_lobe_right',
 45 |     'lung_upper_lobe_left',
 46 |     'lung_upper_lobe_right',
 47 |     'pancreas',
 48 |     'portal_vein_and_splenic_vein',
 49 |     'pulmonary_artery',
 50 |     'rib_left_1',
 51 |     'rib_left_10',
 52 |     'rib_left_11',
 53 |     'rib_left_12',
 54 |     'rib_left_2',
 55 |     'rib_left_3',
 56 |     'rib_left_4',
 57 |     'rib_left_5',
 58 |     'rib_left_6',
 59 |     'rib_left_7',
 60 |     'rib_left_8',
 61 |     'rib_left_9',
 62 |     'rib_right_1',
 63 |     'rib_right_10',
 64 |     'rib_right_11',
 65 |     'rib_right_12',
 66 |     'rib_right_2',
 67 |     'rib_right_3',
 68 |     'rib_right_4',
 69 |     'rib_right_5',
 70 |     'rib_right_6',
 71 |     'rib_right_7',
 72 |     'rib_right_8',
 73 |     'rib_right_9',
 74 |     'sacrum',
 75 |     'scapula_left',
 76 |     'scapula_right',
 77 |     'small_bowel',
 78 |     'spleen',
 79 |     'stomach',
 80 |     'trachea',
 81 |     'urinary_bladder',
 82 |     'vertebrae_C1',
 83 |     'vertebrae_C2',
 84 |     'vertebrae_C3',
 85 |     'vertebrae_C4',
 86 |     'vertebrae_C5',
 87 |     'vertebrae_C6',
 88 |     'vertebrae_C7',
 89 |     'vertebrae_L1',
 90 |     'vertebrae_L2',
 91 |     'vertebrae_L3',
 92 |     'vertebrae_L4',
 93 |     'vertebrae_L5',
 94 |     'vertebrae_T1',
 95 |     'vertebrae_T10',
 96 |     'vertebrae_T11',
 97 |     'vertebrae_T12',
 98 |     'vertebrae_T2',
 99 |     'vertebrae_T3',
100 |     'vertebrae_T4',
101 |     'vertebrae_T5',
102 |     'vertebrae_T6',
103 |     'vertebrae_T7',
104 |     'vertebrae_T8',
105 |     'vertebrae_T9',
106 | ]
107 | 
108 | LABELS = ['age', 'gender', 'institute', 'study_type', 'split']
109 | 


--------------------------------------------------------------------------------
/amid/rsna_bc/dataset.py:
--------------------------------------------------------------------------------
 1 | from contextlib import suppress
 2 | from functools import cached_property
 3 | 
 4 | import pandas as pd
 5 | import pydicom
 6 | 
 7 | from ..internals import Dataset, field, register
 8 | from .utils import csv_field, unpack
 9 | 
10 | 
11 | @register(
12 |     body_region='Thorax',
13 |     license='Non-Commercial Use',
14 |     link='https://www.kaggle.com/competitions/rsna-breast-cancer-detection/data',
15 |     modality='MG',
16 |     raw_data_size='271G',
17 |     prep_data_size='294G',
18 |     task='Breast cancer classification',
19 | )
20 | class RSNABreastCancer(Dataset):
21 |     @cached_property
22 |     def _meta(self):
23 |         dfs = []
24 |         for part in 'train', 'test':
25 |             with suppress(FileNotFoundError):
26 |                 with unpack(self.root, f'{part}.csv') as (file, _):
27 |                     df = pd.read_csv(file)
28 |                     df['part'] = part
29 |                     dfs.append(df)
30 | 
31 |         if not dfs:
32 |             raise FileNotFoundError('No metadata found')
33 |         dfs = pd.concat(dfs, ignore_index=True)
34 |         for name in 'image_id', 'patient_id', 'site_id':
35 |             dfs[name] = dfs[name].astype(str)
36 | 
37 |         raw = list(map(str, dfs.image_id.tolist()))
38 |         ids = set(raw)
39 |         if len(ids) != len(raw):
40 |             raise ValueError('The image ids are not unique')
41 | 
42 |         return {row.image_id: row for _, row in dfs.iterrows()}
43 | 
44 |     # csv fields
45 |     site_id = csv_field('site_id', str)
46 |     patient_id = csv_field('patient_id', str)
47 |     image_id = csv_field('image_id', str)
48 |     laterality = csv_field('laterality', None)
49 |     view = csv_field('view', None)
50 |     age = csv_field('age', None)
51 |     cancer = csv_field('cancer', bool)
52 |     biopsy = csv_field('biopsy', bool)
53 |     invasive = csv_field('invasive', bool)
54 |     BIRADS = csv_field('BIRADS', int)
55 |     implant = csv_field('implant', bool)
56 |     density = csv_field('density', None)
57 |     machine_id = csv_field('machine_id', str)
58 |     prediction_id = csv_field('prediction_id', str)
59 |     difficult_negative_case = csv_field('difficult_negative_case', bool)
60 | 
61 |     @property
62 |     def ids(self):
63 |         return tuple(sorted(self._meta))
64 | 
65 |     def _dicom(self, i):
66 |         row = self._meta[i]
67 |         with unpack(self.root, f'{row.part}_images/{row.patient_id}/{row.image_id}.dcm') as (file, _):
68 |             return pydicom.dcmread(file)
69 | 
70 |     @field
71 |     def image(self, i):
72 |         return self._dicom(i).pixel_array
73 | 
74 |     @field
75 |     def padding_value(self, i):
76 |         return getattr(self._dicom(i), 'PixelPaddingValue', None)
77 | 
78 |     @field
79 |     def intensity_sign(self, i):
80 |         return getattr(self._dicom(i), 'PixelIntensityRelationshipSign', None)
81 | 


--------------------------------------------------------------------------------
/amid/cl_detection.py:
--------------------------------------------------------------------------------
 1 | from functools import cached_property
 2 | from typing import Dict, Tuple
 3 | 
 4 | import numpy as np
 5 | import SimpleITK
 6 | from connectome import Transform
 7 | from deli import load
 8 | from imops import crop_to_box
 9 | 
10 | from .internals import Dataset, field, licenses, register
11 | from .utils import mask_to_box
12 | 
13 | 
14 | @register(
15 |     body_region='Head',
16 |     license=licenses.CC_BYNC_40,
17 |     link='https://github.com/cwwang1979/CL-detection2023/',
18 |     modality='X-ray',
19 |     prep_data_size='1.8G',
20 |     raw_data_size='1.5G',
21 |     task='Keypoint detection',
22 | )
23 | class CLDetection2023(Dataset):
24 |     """
25 |     The data for the "Cephalometric Landmark Detection in Lateral X-ray Images" Challenge,
26 |     held with the MICCAI-2023 conference.
27 | 
28 |     Notes
29 |     -----
30 |     The data can only be obtained by contacting the organizers by email.
31 |     See the [challenge home page](https://cl-detection2023.grand-challenge.org/) for details.
32 | 
33 |     Parameters
34 |     ----------
35 |     root : str, Path, optional
36 |         path to the folder containing the raw downloaded and unarchived data.
37 |         If not provided, the cache is assumed to be already populated.
38 | 
39 |     Examples
40 |     --------
41 |     >>> # Place the downloaded archives in any folder and pass the path to the constructor:
42 |     >>> ds = CLDetection2023(root='/path/to/data/root/folder')
43 |     >>> print(len(ds.ids))
44 |     # 400
45 |     >>> print(ds.image(ds.ids[0]).shape)
46 |     # (2400, 1935)
47 |     """
48 | 
49 |     @cached_property
50 |     def _images(self):
51 |         return SimpleITK.GetArrayFromImage(SimpleITK.ReadImage(self.root / 'train_stack.mha'))
52 | 
53 |     @cached_property
54 |     def _points(self):
55 |         return load(self.root / 'train-gt.json')['points']
56 | 
57 |     @property
58 |     def ids(self):
59 |         return tuple(map(str, range(1, len(self._images) + 1)))
60 | 
61 |     @field
62 |     def image(self, i) -> np.ndarray:
63 |         i = int(i)
64 |         return self._images[i - 1]
65 | 
66 |     @field
67 |     def points(self, i) -> Dict[str, np.ndarray]:
68 |         i = int(i)
69 |         return {x['name']: np.array(x['point'][:2]) for x in self._points if x['point'][-1] == i}
70 | 
71 |     @field
72 |     def spacing(self, i) -> Tuple[float, float]:
73 |         i = int(i)
74 |         (scale,) = {x['scale'] for x in self._points if x['point'][-1] == i}
75 |         scale = float(scale)
76 |         return scale, scale
77 | 
78 | 
79 | class CropPadding(Transform):
80 |     __inherit__ = 'spacing'
81 | 
82 |     def _box(image):
83 |         return mask_to_box(image[..., 0] != 0)
84 | 
85 |     def image(image, _box):
86 |         return crop_to_box(image[..., 0], _box)
87 | 
88 |     def points(points, _box):
89 |         return {k: v - _box[0] for k, v in points.items()}
90 | 
91 | 
92 | class FlipPoints(Transform):
93 |     __inherit__ = True
94 | 
95 |     def points(points):
96 |         return {name: pt[::-1] for name, pt in points.items()}
97 | 


--------------------------------------------------------------------------------
/.github/workflows/tests.yml:
--------------------------------------------------------------------------------
 1 | name: Tests
 2 | 
 3 | on: [ pull_request ]
 4 | 
 5 | env:
 6 |   MODULE_NAME: amid
 7 | 
 8 | jobs:
 9 |   test:
10 |     runs-on: ubuntu-20.04
11 |     strategy:
12 |       matrix:
13 |         python-version: [ '3.8', '3.9', '3.10', '3.11', '3.12' ]
14 | 
15 |     steps:
16 |       - uses: actions/checkout@v3
17 |       - name: Set up Python ${{ matrix.python-version }}
18 |         uses: actions/setup-python@v4
19 |         with:
20 |           python-version: ${{ matrix.python-version }}
21 | 
22 |       - name: Check the version
23 |         if: "! github.event.pull_request.head.repo.fork "
24 |         run: |
25 |           VERSION=$(python -c "from pathlib import Path; import runpy; folder, = {d.parent for d in Path().resolve().glob('*/__init__.py') if d.parent.is_dir() and (d.parent / '__version__.py').exists()}; print(runpy.run_path(folder / '__version__.py')['__version__'])")
26 |           MATCH=$(pip index versions $MODULE_NAME | grep "Available versions:" | grep $VERSION) || echo
27 |           echo $MATCH
28 |           if [ "$GITHUB_BASE_REF" = "master" ] && [ "$MATCH" != "" ]; then exit 1; fi
29 |       - name: Build the package
30 |         run: |
31 |           pip install build
32 |           python -m build --sdist
33 | 
34 |       - name: Install
35 |         run: |
36 |           pip install dist/*
37 |           pip install -r tests/requirements.txt
38 | 
39 |           cd tests
40 |           export MODULE_PARENT=$(python -c "import $MODULE_NAME, os; print(os.path.dirname($MODULE_NAME.__path__[0]))")
41 |           export MODULE_PARENT=${MODULE_PARENT%"/"}
42 |           cd ..
43 |           echo $MODULE_PARENT
44 |           echo "MODULE_PARENT=$(echo $MODULE_PARENT)" >> $GITHUB_ENV
45 | 
46 |       - name: Test with pytest
47 |         if: "! github.event.pull_request.head.repo.fork "
48 |         run: |
49 |           # pytest tests -m "not raw" --junitxml=reports/junit-${{ matrix.python-version }}.xml --cov="$MODULE_PARENT/$MODULE_NAME" --cov-report=xml --cov-branch
50 |           # for now we only test that everything is importable
51 |           pip install setuptools # needed for pylidc to work
52 |           python -c "from $MODULE_NAME import *"
53 | #      - name: Generate coverage report
54 | #        if: "! github.event.pull_request.head.repo.fork "
55 | #        run: |
56 | #          coverage xml -o reports/coverage-${{ matrix.python-version }}.xml
57 | #          sed -i -e "s|$MODULE_PARENT/||g" reports/coverage-${{ matrix.python-version }}.xml
58 | #          sed -i -e "s|$(echo $MODULE_PARENT/ | tr "/" .)||g" reports/coverage-${{ matrix.python-version }}.xml
59 | #
60 | #      - name: Upload artifacts
61 | #        if: "! github.event.pull_request.head.repo.fork "
62 | #        uses: actions/upload-artifact@v3
63 | #        with:
64 | #          name: reports-${{ matrix.python-version }}
65 | #          path: reports/*-${{ matrix.python-version }}.xml
66 | 
67 | # TODO: coverage is not informative in the CI anyway
68 | #      - name: Upload coverage results
69 | #        if: "! github.event.pull_request.head.repo.fork "
70 | #        uses: codecov/codecov-action@v3
71 | #        with:
72 | #          fail_ci_if_error: true
73 | #          files: reports/coverage-${{ matrix.python-version }}.xml
74 | #          verbose: true
75 | 


--------------------------------------------------------------------------------
/amid/tbad.py:
--------------------------------------------------------------------------------
 1 | import gzip
 2 | from pathlib import Path
 3 | 
 4 | import nibabel as nb
 5 | import numpy as np
 6 | 
 7 | from .internals import Dataset, field, licenses, register
 8 | 
 9 | 
10 | @register(
11 |     body_region='Chest',
12 |     license=licenses.CC_BYNC_40,
13 |     link='https://github.com/XiaoweiXu/Dataset_Type-B-Aortic-Dissection',
14 |     modality='CT',
15 |     prep_data_size='14G',
16 |     raw_data_size='14G',
17 |     task='Aortic dissection segmentation',
18 | )
19 | class TBAD(Dataset):
20 |     """
21 |     A dataset of 3D Computed Tomography (CT) images for Type-B Aortic Dissection segmentation.
22 | 
23 |     Notes
24 |     -----
25 |     The data can only be obtained by contacting the authors by email.
26 |     See the [dataset home page](https://github.com/XiaoweiXu/Dataset_Type-B-Aortic-Dissection) for details.
27 | 
28 |     Parameters
29 |     ----------
30 |     root : str, Path, optional
31 |         path to the folder containing the raw downloaded files.
32 |         If not provided, the cache is assumed to be already populated.
33 | 
34 |     Examples
35 |     --------
36 |     >>> # Place the downloaded files in any folder and pass the path to the constructor:
37 |     >>> ds = TBAD(root='/path/to/files/root')
38 |     >>> print(len(ds.ids))
39 |     # 100
40 |     >>> print(ds.image(ds.ids[0]).shape)
41 |     # (512, 512, 327)
42 | 
43 |     References
44 |     ----------
45 |     .. [1] Yao, Zeyang & Xie, Wen & Zhang, Jiawei & Dong, Yuhao & Qiu, Hailong & Haiyun, Yuan & Jia,
46 |            Qianjun & Tianchen, Wang & Shi, Yiyi & Zhuang, Jian & Que, Lifeng & Xu, Xiaowei & Huang, Meiping.
47 |            (2021). ImageTBAD: A 3D Computed Tomography Angiography Image Dataset for Automatic Segmentation
48 |            of Type-B Aortic Dissection. Frontiers in Physiology. 12. 732711. 10.3389/fphys.2021.732711.
49 |     """
50 | 
51 |     @property
52 |     def ids(self):
53 |         result = set()
54 | 
55 |         for file in self.root.glob('*_image.nii.gz'):
56 |             result.add(file.stem.split('_')[0])
57 | 
58 |         return tuple(sorted(result))
59 | 
60 |     def _fname(self, i):
61 |         return self.root / f'{i}_image.nii.gz'
62 | 
63 |     def image(self, i) -> np.ndarray:
64 |         with self._fname(i).open('rb') as opened:
65 |             with gzip.GzipFile(fileobj=opened) as nii:
66 |                 nii = nb.FileHolder(fileobj=nii)
67 |                 image = nb.Nifti1Image.from_file_map({'header': nii, 'image': nii})
68 |                 return np.int16(image.get_fdata())
69 | 
70 |     def affine(self, i) -> np.ndarray:
71 |         """The 4x4 matrix that gives the image's spatial orientation."""
72 |         with self._fname(i).open('rb') as opened:
73 |             with gzip.GzipFile(fileobj=opened) as nii:
74 |                 nii = nb.FileHolder(fileobj=nii)
75 |                 image = nb.Nifti1Image.from_file_map({'header': nii, 'image': nii})
76 |                 return image.affine
77 | 
78 |     @field
79 |     def mask(self, i) -> np.ndarray:
80 |         with Path(self.root / f'{i}_label.nii.gz').open('rb') as opened:
81 |             with gzip.GzipFile(fileobj=opened) as nii:
82 |                 nii = nb.FileHolder(fileobj=nii)
83 |                 label = nb.Nifti1Image.from_file_map({'header': nii, 'image': nii})
84 |                 return np.uint8(label.get_fdata())
85 | 


--------------------------------------------------------------------------------
/amid/ribfrac/dataset.py:
--------------------------------------------------------------------------------
 1 | from functools import cached_property
 2 | 
 3 | import nibabel
 4 | import numpy as np
 5 | 
 6 | from ..internals import Dataset, licenses, register
 7 | 
 8 | 
 9 | @register(
10 |     body_region='Chest',
11 |     license=licenses.CC_BYNC_40,
12 |     link='https://ribfrac.grand-challenge.org',
13 |     modality='CT',
14 |     raw_data_size='77.8 G',
15 |     task='Segmentation',
16 | )
17 | class RibFrac(Dataset):
18 |     """
19 |     RibFrac dataset is a benchmark for developping algorithms on rib fracture detection,
20 |     segmentation and classification. We hope this large-scale dataset could facilitate
21 |     both clinical research for automatic rib fracture detection and diagnoses,
22 |     and engineering research for 3D detection, segmentation and classification.
23 | 
24 | 
25 |     Parameters
26 |     ----------
27 |     root : str, Path, optional
28 |         path to the folder containing the raw downloaded archives.
29 |         If not provided, the cache is assumed to be already populated.
30 | 
31 | 
32 |     Notes
33 |     -----
34 |     Data downloaded from here:
35 |     https://doi.org/10.5281/zenodo.3893507 -- train Part1 (300 images)
36 |     https://doi.org/10.5281/zenodo.3893497 -- train Part2 (120 images)
37 |     https://doi.org/10.5281/zenodo.3893495 -- val (80 images)
38 |     https://zenodo.org/record/3993380 -- test (160 images without annotation)
39 | 
40 | 
41 | 
42 |     References
43 |     ----------
44 |     Jiancheng Yang, Liang Jin, Bingbing Ni, & Ming Li. (2020).
45 |     RibFrac Dataset: A Benchmark for Rib Fracture Detection,
46 |     Segmentation and Classification
47 |     """
48 | 
49 |     @property
50 |     def ids(self):
51 |         result = set()
52 |         for folder in ['Part1', 'Part2', 'ribfrac-val-images', 'ribfrac-test-images']:
53 |             result |= {v.name.split('-')[0] for v in (self.root / folder).iterdir()}
54 | 
55 |         return tuple(sorted(result))
56 | 
57 |     @cached_property
58 |     def _id2folder(self):
59 |         folders = [item for item in self.root.iterdir() if item.is_dir()]
60 |         result_dict = {}
61 |         for folder in folders:
62 |             p = self.root / folder
63 |             folder_ids = [v.name.split('-')[0] for v in p.iterdir()]
64 |             folder_dict = {_id: p for _id in folder_ids}
65 |             result_dict = {**result_dict, **folder_dict}
66 | 
67 |         return result_dict
68 | 
69 |     def image(self, i):
70 |         image_path = self._id2folder[i] / f'{i}-image.nii.gz'
71 |         image = nibabel.load(image_path).get_fdata()
72 |         return image.astype(np.int16)
73 | 
74 |     def label(self, i):
75 |         folder_path = self._id2folder[i]
76 |         folder = folder_path.name
77 |         if folder != 'ribfrac-test-images':
78 |             if folder.startswith('Part'):
79 |                 label_path = folder_path / f'{i}-label.nii.gz'
80 |             elif folder == 'ribfrac-val-images':
81 |                 dir = folder_path.parent / 'ribfrac-val-labels'
82 |                 label_path = dir / f'{i}-label.nii.gz'
83 | 
84 |             label = nibabel.load(label_path).get_fdata()
85 |             return label.astype(np.int16)
86 | 
87 |     def affine(self, i):
88 |         """The 4x4 matrix that gives the image's spatial orientation"""
89 |         image_path = self._id2folder[i] / f'{i}-image.nii.gz'
90 |         return nibabel.load(image_path).affine
91 | 


--------------------------------------------------------------------------------
/amid/liver_medseg.py:
--------------------------------------------------------------------------------
  1 | import contextlib
  2 | import gzip
  3 | import re
  4 | import zipfile
  5 | from pathlib import Path
  6 | from zipfile import ZipFile
  7 | 
  8 | import nibabel as nb
  9 | import numpy as np
 10 | 
 11 | from .internals import Dataset, field, licenses, register
 12 | 
 13 | 
 14 | @register(
 15 |     body_region=('Chest', 'Abdomen'),
 16 |     license=licenses.CC_BYSA_40,
 17 |     link='https://www.medseg.ai/database/liver-segments-50-cases',
 18 |     modality='CT',
 19 |     prep_data_size='1,88G',
 20 |     raw_data_size='616M',
 21 |     task='Segmentation',
 22 | )
 23 | class LiverMedseg(Dataset):
 24 |     """
 25 |     LiverMedseg is a public CT segmentation dataset with 50 annotated images.
 26 |     Case collection of 50 livers with their segments.
 27 |     Images obtained from Decathlon Medical Segmentation competition
 28 | 
 29 |     Parameters
 30 |     ----------
 31 |     root : str, Path, optional
 32 |         path to the folder containing the raw downloaded archives.
 33 |         If not provided, the cache is assumed to be already populated.
 34 | 
 35 |     Notes
 36 |     -----
 37 |     Download links:
 38 |     https://www.medseg.ai/database/liver-segments-50-cases
 39 | 
 40 |     Examples
 41 |     --------
 42 |     >>> # Place the downloaded archives in any folder and pass the path to the constructor:
 43 |     >>> ds = LiverMedseg(root='/path/to/archives/root')
 44 |     >>> print(len(ds.ids))
 45 |     # 50
 46 |     >>> print(ds.image(ds.ids[0]).shape)
 47 |     # (512, 512, 38)
 48 | 
 49 |     References
 50 |     ----------
 51 |     """
 52 | 
 53 |     @property
 54 |     def ids(self):
 55 |         result = set()
 56 |         with ZipFile(self.root / 'img.zip') as zf:
 57 |             for zipinfo in zf.infolist():
 58 |                 if zipinfo.is_dir():
 59 |                     continue
 60 |                 file_stem = Path(zipinfo.filename).stem
 61 |                 result.add('liver_medseg_' + re.findall(r'\d+', file_stem)[0])
 62 | 
 63 |         return tuple(sorted(result))
 64 | 
 65 |     def _file(self, i):
 66 |         num_id = i.split('_')[-1]
 67 |         return zipfile.Path(self.root / 'img.zip', f'img{num_id}.nii.gz')
 68 | 
 69 |     @field
 70 |     def image(self, i) -> np.ndarray:
 71 |         with open_nii_gz_file(self._file(i)) as nii_file:
 72 |             return np.asarray(nii_file.dataobj)
 73 | 
 74 |     @field
 75 |     def affine(self, i) -> np.ndarray:
 76 |         """The 4x4 matrix that gives the image's spatial orientation."""
 77 |         with open_nii_gz_file(self._file(i)) as nii_file:
 78 |             return nii_file.affine
 79 | 
 80 |     def spacing(self, i) -> tuple:
 81 |         with open_nii_gz_file(self._file(i)) as nii_file:
 82 |             return tuple(nii_file.header['pixdim'][1:4])
 83 | 
 84 |     @field
 85 |     def mask(self, i) -> np.ndarray:
 86 |         path = Path(str(self._file(i)).replace('img', 'mask'))
 87 |         folder, image = path.parent, path.name
 88 |         _file = zipfile.Path(folder, image)
 89 |         with open_nii_gz_file(_file) as nii_file:
 90 |             return np.asarray(nii_file.dataobj).astype(np.uint8)
 91 | 
 92 | 
 93 | # TODO: sync with amid.utils
 94 | @contextlib.contextmanager
 95 | def open_nii_gz_file(file):
 96 |     with file.open('rb') as opened:
 97 |         with gzip.GzipFile(fileobj=opened) as nii:
 98 |             nii = nb.FileHolder(fileobj=nii)
 99 |             yield nb.Nifti1Image.from_file_map({'header': nii, 'image': nii})
100 | 


--------------------------------------------------------------------------------
/docs/CONTRIBUTING.md:
--------------------------------------------------------------------------------
  1 | # Contribution Guidelines
  2 | 
  3 | ## Preparing the environment
  4 | 
  5 | 1\. First, set up a cache storage. Create the file `~/.config/amid/.bev.yml` with the following content:
  6 | 
  7 | ```yaml
  8 | main:
  9 |   storage: /path/to/storage
 10 |   cache: /path/to/cache
 11 | ```
 12 | 
 13 | where `/path/to/storage` and `/path/to/cache` are some paths in your filesystem.
 14 | 
 15 | 2\. Run
 16 | 
 17 | ```shell
 18 | amid init
 19 | ```
 20 | 
 21 | The full command could look something like this:
 22 | 
 23 | ```shell
 24 | mkdir -p ~/.config/amid
 25 | cat >~/.config/amid/.bev.yml <<EOL
 26 | main:
 27 |   storage: /mount/data/storage
 28 |   cache: /mount/data/cache
 29 | EOL
 30 | amid init
 31 | ```
 32 | 
 33 | ## Adding a new dataset
 34 | 
 35 | We will be using [LiTS](https://github.com/neuro-ml/amid/blob/master/amid/lits.py) as an example.
 36 | 
 37 | 1\. Download the raw data to a separate folder in your filesystem
 38 | 
 39 | 2\. (Optionally) create a new branch for the dataset:
 40 | 
 41 | ```shell
 42 | git checkout lits
 43 | ```
 44 | 
 45 | 3\. Create a class that loads the raw data. [LiTS](https://github.com/neuro-ml/amid/blob/master/amid/lits.py) is a good
 46 | example. Note how each field is implemented as a separate function.
 47 | 
 48 | There are no strict rules regarding the dataset fields,
 49 | but try to keep the output "as raw as possible",
 50 | i.e., do not apply heavy processing that modifies the data irreversibly.
 51 | 
 52 | **Rule of thumb:**
 53 | 
 54 | > The dataset should be written in such a way, that making a submission to a contest would work out of the box.
 55 | 
 56 | !!! note
 57 |     In case of DICOM files, make sure to transpose the first 2 image axes. 
 58 |     This way, the image axes will be consistent with the potential contour coordinates.
 59 | 
 60 | !!! tip 
 61 |     If some value is missing for a given id, it is preferable to return `None` instead of raising an exception.
 62 | 
 63 | !!! tip
 64 |     The dataset must have a docstring which describes it and provides a link to the original data.
 65 | 
 66 | !!! tip
 67 |     If the raw data contains a table with metadata, it is preferable to split the metadata columns into separate fields.
 68 | 
 69 | 4\. Register the dataset like so:
 70 | 
 71 | ```python
 72 | from amid.internals import register
 73 | 
 74 | @register(
 75 |     ...,
 76 | )
 77 | class LiTS(Dataset):
 78 |     ...
 79 | ```
 80 | 
 81 | where `...` stands for the following arguments:
 82 | 
 83 | - `modality` — the images' modality/modalities, e.g., CT, MRI
 84 | - `body_region` — the anatomical regions present in the dataset, e.g., Head, Thorax, Abdomen
 85 | - `license` — the dataset's license, if any
 86 | - `link` — the link to the original data
 87 | - `raw_data_size` — the total size, required for the raw data, e.g., 10G, 500M
 88 | - `task` — the dataset's downstream task if any.
 89 |     E.g., Supervised Learning, Domain Adaptation, Self-supervised Learning, Tumor Segmentation, etc.
 90 | 
 91 | 5\. Make sure all the methods are working as expected:
 92 | 
 93 | ```python
 94 | from amid.lits import LiTS
 95 | 
 96 | dataset = LiTS(root="/datasets/LiTS")
 97 | 
 98 | print(len(dataset.ids))
 99 | 
100 | id_ = dataset.ids[0]
101 | print(dataset.image(id_).shape)
102 | ```
103 | 
104 | 6\. Check the codestyle using the `lint.sh` script in the repository's root and make changes if flake8 is not happy:
105 | 
106 | ```shell
107 | pip install -r lint-requirements.txt # only for the first time
108 | ./lint.sh
109 | ```


--------------------------------------------------------------------------------
/amid/medseg9.py:
--------------------------------------------------------------------------------
  1 | import contextlib
  2 | import gzip
  3 | import zipfile
  4 | from pathlib import Path
  5 | from zipfile import ZipFile
  6 | 
  7 | import nibabel as nb
  8 | import numpy as np
  9 | 
 10 | from .internals import Dataset, field, licenses, register
 11 | 
 12 | 
 13 | @register(
 14 |     body_region='Chest',
 15 |     license=licenses.CC0_10,
 16 |     link='http://medicalsegmentation.com/covid19/',
 17 |     modality='CT',
 18 |     prep_data_size='300M',
 19 |     raw_data_size='310M',
 20 |     task='COVID-19 segmentation',
 21 | )
 22 | class Medseg9(Dataset):
 23 |     """
 24 | 
 25 |     Medseg9 is a public COVID-19 CT segmentation dataset with 9 annotated images.
 26 | 
 27 |     Parameters
 28 |     ----------
 29 |     root : str, Path, optional
 30 |         path to the folder containing the raw downloaded archives.
 31 |         If not provided, the cache is assumed to be already populated.
 32 | 
 33 |     Notes
 34 |     -----
 35 |     Data can be downloaded here: http://medicalsegmentation.com/covid19/.
 36 | 
 37 |     Then, the folder with raw downloaded data should contain three zip archives with data and masks
 38 |     (`rp_im.zip`, `rp_lung_msk.zip`, `rp_msk.zip`).
 39 | 
 40 |     Examples
 41 |     --------
 42 |     >>> # Place the downloaded archives in any folder and pass the path to the constructor:
 43 |     >>> ds = Medseg9(root='/path/to/downloaded/data/folder/')
 44 |     >>> print(len(ds.ids))
 45 |     # 9
 46 |     >>> print(ds.image(ds.ids[0]).shape)
 47 |     # (630, 630, 45)
 48 |     >>> print(ds.covid(ds.ids[0]).shape)
 49 |     # (630, 630, 45)
 50 | 
 51 |     """
 52 | 
 53 |     @property
 54 |     def ids(self):
 55 |         result = set()
 56 | 
 57 |         with ZipFile(self.root / 'rp_msk.zip') as zf:
 58 |             for zipinfo in zf.infolist():
 59 |                 if zipinfo.is_dir():
 60 |                     continue
 61 |                 file_stem = Path(zipinfo.filename).stem
 62 |                 result.add('medseg9_' + file_stem.split('.nii')[0])
 63 | 
 64 |         return tuple(sorted(result))
 65 | 
 66 |     @staticmethod
 67 |     def _filename(i):
 68 |         num_id = i.split('_')[-1]
 69 |         return f'{num_id}.nii.gz'
 70 | 
 71 |     def _file(self, i):
 72 |         return zipfile.Path(self.root / 'rp_im.zip', f'rp_im/{self._filename(i)}')
 73 | 
 74 |     @field
 75 |     def image(self, i):
 76 |         with open_nii_gz_file(self._file(i)) as nii_image:
 77 |             # most CT/MRI scans are integer-valued, this will help us improve compression rates
 78 |             return np.int16(nii_image.get_fdata())
 79 | 
 80 |     @field
 81 |     def affine(self, i):
 82 |         """The 4x4 matrix that gives the image's spatial orientation."""
 83 |         with open_nii_gz_file(self._file(i)) as nii_image:
 84 |             return nii_image.affine
 85 | 
 86 |     @field
 87 |     def lungs(self, i):
 88 |         mask_file = zipfile.Path(self.root / 'rp_lung_msk.zip', f'rp_lung_msk/{self._filename(i)}')
 89 |         with open_nii_gz_file(mask_file) as nii_image:
 90 |             return np.bool_(nii_image.get_fdata())
 91 | 
 92 |     @field
 93 |     def covid(self, i):
 94 |         """
 95 |         int16 mask.
 96 |         0 - normal, 1 - ground-glass opacities (матовое стекло), 2 - consolidation (консолидация).
 97 |         """
 98 |         mask_file = zipfile.Path(self.root / 'rp_msk.zip', f'rp_msk/{self._filename(i)}')
 99 |         with open_nii_gz_file(mask_file) as nii_image:
100 |             # most CT/MRI scans are integer-valued, this will help us improve compression rates
101 |             return np.uint8(nii_image.get_fdata())
102 | 
103 | 
104 | # TODO: sync with amid.utils
105 | @contextlib.contextmanager
106 | def open_nii_gz_file(file):
107 |     with file.open('rb') as opened:
108 |         with gzip.GzipFile(fileobj=opened) as nii:
109 |             nii = nb.FileHolder(fileobj=nii)
110 |             yield nb.Nifti1Image.from_file_map({'header': nii, 'image': nii})
111 | 


--------------------------------------------------------------------------------
/amid/curvas.py:
--------------------------------------------------------------------------------
  1 | import gzip
  2 | import zipfile
  3 | from typing import Dict
  4 | from zipfile import ZipFile
  5 | 
  6 | import nibabel
  7 | import numpy as np
  8 | 
  9 | from .internals import Dataset, field, licenses, register
 10 | 
 11 | 
 12 | @register(
 13 |     body_region='Abdomen',
 14 |     license=licenses.CC_BY_40,
 15 |     link='https://zenodo.org/records/13767408',
 16 |     modality='CT',
 17 |     prep_data_size='30G',
 18 |     raw_data_size='30G',
 19 |     task='Abdominal organ pathologies segmentation',
 20 | )
 21 | class CURVAS(Dataset):
 22 |     """
 23 |     Pancreas, liver and kidney cysts segmentation from multi-rater annotated data.
 24 | 
 25 |     The dataset was used at the MICCAI 2024 CURVAS challenge.
 26 | 
 27 |     Parameters
 28 |     ----------
 29 |     root : str, Path, optional
 30 |         path to the folder containing the raw downloaded archives.
 31 |         If not provided, the cache is assumed to be already populated.
 32 | 
 33 |     Notes
 34 |     -----
 35 |     Download link: https://zenodo.org/records/13767408
 36 | 
 37 |     The `root` folder should contain the three downloaded .zip archives, namely:
 38 |     `training_set.zip`, `validation_set.zip` and `testing_set.zip`.
 39 | 
 40 |     Examples
 41 |     --------
 42 |     >>> # Place the downloaded folders in any folder and pass the path to the constructor:
 43 |     >>> ds = CURVAS(root='/path/to/downloaded/data/folder/')
 44 |     >>> print(len(ds.ids))
 45 |     # 90
 46 |     >>> print(ds.image(ds.ids[5]).shape)
 47 |     # (512, 512, 1045)
 48 |     >>> print(ds.mask(ds.ids[35]).shape)
 49 |     # (512, 512, 992)
 50 | 
 51 |     """
 52 | 
 53 |     @property
 54 |     def ids(self):
 55 |         def _extract(split):
 56 |             archive = self.root / f'{split}_set.zip'
 57 |             with ZipFile(archive) as zf:
 58 |                 namelist = [x for x in zf.namelist() if len(x.rstrip('/').split('/')) == 2]
 59 |                 ids = [f'{x.split("/")[1]}-{split}' for x in namelist]
 60 |                 return ids
 61 | 
 62 |         return sorted(
 63 |             [
 64 |                 *_extract('training'),  # 20 Training   cases
 65 |                 *_extract('validation'),  # 5  Validation cases
 66 |                 *_extract('testing'),  # 65 Testing    cases
 67 |             ]
 68 |         )
 69 | 
 70 |     def _file(self, i, obj):
 71 |         uid, split = i.split('-')
 72 | 
 73 |         archive = self.root / f'{split}_set.zip'
 74 |         file = f'{split}_set/{uid}/{obj}.nii.gz'
 75 | 
 76 |         return zipfile.Path(archive, file)
 77 | 
 78 |     @field
 79 |     def image(self, i) -> np.ndarray:
 80 |         with self._file(i, 'image').open('rb') as opened:
 81 |             with gzip.GzipFile(fileobj=opened) as nii:
 82 |                 nii = nibabel.FileHolder(fileobj=nii)
 83 |                 image = nibabel.Nifti1Image.from_file_map({'header': nii, 'image': nii})
 84 |                 return np.asarray(image.dataobj).astype(np.int16)
 85 | 
 86 |     @field
 87 |     def affine(self, i) -> np.ndarray:
 88 |         """The 4x4 matrix that gives the image's spatial orientation"""
 89 |         with self._file(i, 'image').open('rb') as opened:
 90 |             with gzip.GzipFile(fileobj=opened) as nii:
 91 |                 nii = nibabel.FileHolder(fileobj=nii)
 92 |                 image = nibabel.Nifti1Image.from_file_map({'header': nii, 'image': nii})
 93 |                 return image.affine
 94 | 
 95 |     @field
 96 |     def masks(self, i) -> Dict[str, np.ndarray]:
 97 |         masks = {}
 98 |         for x in range(1, 4):
 99 |             with self._file(i, f'annotation_{x}').open('rb') as opened:
100 |                 with gzip.GzipFile(fileobj=opened) as nii:
101 |                     nii = nibabel.FileHolder(fileobj=nii)
102 |                     image = nibabel.Nifti1Image.from_file_map({'header': nii, 'image': nii})
103 | 
104 |                     masks[f'annotation_{x}'] = np.asarray(image.dataobj).astype(np.uint8)
105 | 
106 |         return masks
107 | 


--------------------------------------------------------------------------------
/amid/totalsegmentator/dataset.py:
--------------------------------------------------------------------------------
  1 | import gzip
  2 | from contextlib import suppress
  3 | from pathlib import Path
  4 | from zipfile import ZipFile
  5 | 
  6 | import nibabel
  7 | import numpy as np
  8 | import pandas as pd
  9 | 
 10 | from ..internals import Dataset, field, licenses, register
 11 | from ..utils import PathOrStr, open_nii_gz_file, unpack
 12 | from .utils import ARCHIVE_ROOT, add_labels, add_masks
 13 | 
 14 | 
 15 | @register(
 16 |     body_region=('Head', 'Thorax', 'Abdomen', 'Pelvis', 'Legs'),
 17 |     license=licenses.CC_BY_40,
 18 |     link='https://zenodo.org/record/6802614#.Y6M2MxXP1D8',
 19 |     modality='CT',
 20 |     raw_data_size='35G',
 21 |     prep_data_size='35G',
 22 |     task='Supervised anatomical structures segmentation',
 23 | )
 24 | class Totalsegmentator(Dataset):
 25 |     """
 26 |     In 1204 CT images we segmented 104 anatomical structures (27 organs, 59 bones, 10 muscles, 8 vessels)
 27 |     covering a majority of relevant classes for most use cases.
 28 | 
 29 |     The CT images were randomly sampled from clinical routine, thus representing a real world dataset which
 30 |     generalizes to clinical application.
 31 | 
 32 |     The dataset contains a wide range of different pathologies, scanners, sequences and institutions. [1]
 33 | 
 34 |     Parameters
 35 |     ----------
 36 |     root : str, Path, optional
 37 |         absolute path to the downloaded archive.
 38 |         If not provided, the cache is assumed to be already populated.
 39 | 
 40 |     Notes
 41 |     -----
 42 |     Download link: https://zenodo.org/record/6802614/files/Totalsegmentator_dataset.zip
 43 | 
 44 |     Examples
 45 |     --------
 46 |     >>> # Download the archive to any folder and pass the path to the constructor:
 47 |     >>> ds = Totalsegmentator(root='/path/to/the/downloaded/archive')
 48 |     >>> print(len(ds.ids))
 49 |     # 1204
 50 |     >>> print(ds.image(ds.ids[0]).shape)
 51 |     # (294, 192, 179)
 52 |     >>> print(ds.aorta(ds.ids[25]).shape)
 53 |     # (320, 320, 145)
 54 | 
 55 |     References
 56 |     ----------
 57 |     .. [1] Jakob Wasserthal (2022) Dataset with segmentations of 104 important anatomical structures in 1204 CT images.
 58 |     Available at: https://zenodo.org/record/6802614#.Y6M2MxXP1D8
 59 |     """
 60 | 
 61 |     add_masks(locals())
 62 |     add_labels(locals())
 63 | 
 64 |     def __init__(self, root: PathOrStr):
 65 |         root = Path(root)
 66 |         if root.is_dir():
 67 |             if root / ARCHIVE_ROOT in list(root.iterdir()):
 68 |                 root = root / ARCHIVE_ROOT
 69 | 
 70 |         file = 'meta.csv'
 71 |         with unpack(root, file, ARCHIVE_ROOT, '.zip') as (unpacked, _):
 72 |             self._meta = pd.read_csv(unpacked, sep=';')
 73 | 
 74 |         super().__init__(root)
 75 | 
 76 |     @property
 77 |     def ids(self):
 78 |         if self.root.is_dir():
 79 |             return sorted({x.name for x in self.root.iterdir() if x.name != 'meta.csv'})
 80 |         else:
 81 |             with ZipFile(self.root) as zf:
 82 |                 parsed_namelist = [x.strip('/').split('/') for x in zf.namelist()]
 83 |                 return sorted({x[-1] for x in parsed_namelist if len(x) == 2 and x[-1] != 'meta.csv'})
 84 | 
 85 |     @field
 86 |     def image(self, i):
 87 |         file = f'{i}/ct.nii.gz'
 88 | 
 89 |         with suppress(gzip.BadGzipFile):
 90 |             with unpack(self.root, file, ARCHIVE_ROOT, '.zip') as (unpacked, is_unpacked):
 91 |                 if is_unpacked:
 92 |                     return np.asarray(nibabel.load(unpacked).dataobj)
 93 |                 else:
 94 |                     with open_nii_gz_file(unpacked) as image:
 95 |                         return np.asarray(image.dataobj)
 96 | 
 97 |     @field
 98 |     def affine(self, i):
 99 |         """The 4x4 matrix that gives the image's spatial orientation"""
100 |         file = f'{i}/ct.nii.gz'
101 | 
102 |         with unpack(self.root, file, ARCHIVE_ROOT, '.zip') as (unpacked, is_unpacked):
103 |             if is_unpacked:
104 |                 return nibabel.load(unpacked).affine
105 |             else:
106 |                 with open_nii_gz_file(unpacked) as image:
107 |                     return image.affine
108 | 


--------------------------------------------------------------------------------
/amid/nlst.py:
--------------------------------------------------------------------------------
  1 | import deli
  2 | import numpy as np
  3 | import pydicom
  4 | from dicom_csv import (
  5 |     Plane,
  6 |     drop_duplicated_slices,
  7 |     expand_volumetric,
  8 |     get_common_tag,
  9 |     get_orientation_matrix,
 10 |     get_pixel_spacing,
 11 |     get_slice_locations,
 12 |     get_slices_plane,
 13 |     get_tag,
 14 |     order_series,
 15 |     stack_images,
 16 | )
 17 | from tqdm.auto import tqdm
 18 | 
 19 | from .internals import Dataset, field, licenses, register
 20 | from .utils import get_series_date
 21 | 
 22 | 
 23 | @register(
 24 |     body_region='Thorax',
 25 |     license=licenses.CC_BY_30,
 26 |     link='https://wiki.cancerimagingarchive.net/display/NLST/National+Lung+Screening+Trial',
 27 |     modality='CT',
 28 |     prep_data_size=None,  # TODO: should be measured...
 29 |     raw_data_size=None,  # TODO: should be measured...
 30 |     task=None,
 31 | )
 32 | class NLST(Dataset):
 33 |     """
 34 | 
 35 |         Dataset with low-dose CT scans of 26,254 patients acquired during National Lung Screening Trial.
 36 | 
 37 |     Parameters
 38 |     ----------
 39 |     root : str, Path, optional
 40 |         path to the folder (usually called NLST) containing the patient subfolders (like 101426).
 41 |         If not provided, the cache is assumed to be already populated.
 42 | 
 43 |     Notes
 44 |     -----
 45 |     Follow the download instructions at
 46 |     https://wiki.cancerimagingarchive.net/display/NLST/National+Lung+Screening+Trial.
 47 |     The dicoms should be placed under the following folders' structure:
 48 |         <...>/<NLST-root>/<patiend_id>/<study_uid>/<date>/<series_uid>/*.dcm
 49 | 
 50 |     Examples
 51 |     --------
 52 |     >>> ds = NLST(root='/path/to/NLST/')
 53 |     >>> print(len(ds.ids))
 54 |      ...
 55 |     >>> print(ds.image(ds.ids[0]).shape)
 56 |      ...
 57 |     >>> print(ds.mask(ds.ids[80]).shape)
 58 |      ...
 59 | 
 60 |     References
 61 |     ----------
 62 |     """
 63 | 
 64 |     @property
 65 |     def ids(self):
 66 |         ids = []
 67 |         for path in tqdm(list(self.root.iterdir())):
 68 |             series_uid2num_slices = {p.stem: int(deli.load(p)['Total'][5]) for p in path.glob('*/*/*.json')}
 69 |             ids.append(max(series_uid2num_slices, key=series_uid2num_slices.get))
 70 | 
 71 |         return ids
 72 | 
 73 |     def _series(self, i):
 74 |         (folder,) = self.root.glob(f'**/{i}')
 75 |         series = list(map(pydicom.dcmread, folder.iterdir()))
 76 |         series = expand_volumetric(series)
 77 |         assert get_common_tag(series, 'Modality') == 'CT'
 78 |         assert get_slices_plane(series) == Plane.Axial
 79 |         series = drop_duplicated_slices(series)
 80 |         series = order_series(series, decreasing=False)
 81 |         return series
 82 | 
 83 |     @field
 84 |     def image(self, i):
 85 |         return np.moveaxis(stack_images(self._series(i), -1).astype(np.int16), 0, 1)
 86 | 
 87 |     @field
 88 |     def study_uid(self, i):
 89 |         return get_common_tag(self._series(i), 'StudyInstanceUID')
 90 | 
 91 |     @field
 92 |     def series_uid(self, i):
 93 |         return get_common_tag(self._series(i), 'SeriesInstanceUID')
 94 | 
 95 |     @field
 96 |     def sop_uids(self, i):
 97 |         return [str(get_tag(i, 'SOPInstanceUID')) for i in self._series(i)]
 98 | 
 99 |     @field
100 |     def pixel_spacing(self, i):
101 |         return get_pixel_spacing(self, i).tolist()
102 | 
103 |     @field
104 |     def slice_locations(self, i):
105 |         return get_slice_locations(self, i)
106 | 
107 |     @field
108 |     def orientation_matrix(self, i):
109 |         return get_orientation_matrix(self, i)
110 | 
111 |     @field
112 |     def conv_kernel(self, i):
113 |         return get_common_tag(self._series(i), 'ConvolutionKernel', default=None)
114 | 
115 |     @field
116 |     def kvp(self, i):
117 |         return get_common_tag(self._series(i), 'KVP', default=None)
118 | 
119 |     @field
120 |     def patient_id(self, i):
121 |         return get_common_tag(self._series(i), 'PatientID', default=None)
122 | 
123 |     @field
124 |     def study_date(self, i):
125 |         return get_series_date(self._series(i))
126 | 
127 |     @field
128 |     def accession_number(self, i):
129 |         return get_common_tag(self._series(i), 'AccessionNumber', default=None)
130 | 


--------------------------------------------------------------------------------
/amid/utils.py:
--------------------------------------------------------------------------------
  1 | import contextlib
  2 | import datetime
  3 | import functools
  4 | import itertools
  5 | import zipfile
  6 | from gzip import GzipFile
  7 | from os import PathLike
  8 | from pathlib import Path
  9 | from typing import List, Union
 10 | 
 11 | import nibabel
 12 | import numpy as np
 13 | from dicom_csv import get_common_tag, order_series, stack_images
 14 | from dicom_csv.exceptions import ConsistencyError, TagTypeError
 15 | from pydicom import Dataset, dcmread
 16 | 
 17 | 
 18 | Numeric = Union[float, int]
 19 | PathOrStr = Union[str, PathLike]
 20 | 
 21 | 
 22 | @contextlib.contextmanager
 23 | def unpack(root: PathOrStr, relative: str, archive_root_name: str = None, archive_ext: str = None):
 24 |     """Provides the absolute path to the file in both scenarios: inside archive or inside folder.
 25 | 
 26 |     Parameters
 27 |     ----------
 28 |     root : str, Path
 29 |         Absolute path to the downloaded archive or the unpacked archive root.
 30 |     relative : str, Path
 31 |         Relative file path inside the archive. Archive's root folder sholud be ommited.
 32 |     archive_root_name : str, Path, optional
 33 |         If `root` is a archive, it's root folder name shold be given.
 34 |     archive_ext: {'.zip'}, optional
 35 |         Compression algorithm used to create the archive
 36 | 
 37 |     Returns
 38 |     -------
 39 |     unpacked : Path
 40 |         Absolute file path to be opened.
 41 |     is_unpacked : {True, False}
 42 |         Reached file state. `True` if the file is located inside archive, `False` otherwise.
 43 |     """
 44 |     unpacked = Path(root) / relative
 45 | 
 46 |     if unpacked.exists():
 47 |         yield unpacked, True
 48 |     elif archive_ext == '.zip':
 49 |         with zipfile.Path(root, str(Path(archive_root_name, relative))).open('rb') as unpacked:
 50 |             yield unpacked, False
 51 |     else:
 52 |         raise ValueError('Unexpected file path or unsupported compression algorithm.')
 53 | 
 54 | 
 55 | @contextlib.contextmanager
 56 | def open_nii_gz_file(unpacked):
 57 |     """Opens ``.nii.gz`` file if it is packed in archive
 58 | 
 59 |     Examples
 60 |     --------
 61 |     >>> with unpack('/path/to/archive.zip', 'relative/file/path', 'root', '.zip') as (unpacked, is_unpacked):
 62 |     >>>     with open_nii_gz_file(unpacked) as image:
 63 |     >>>         print(np.asarray(image.dataobj).shape)
 64 |     # (512, 512, 256)
 65 |     """
 66 |     with GzipFile(fileobj=unpacked) as nii:
 67 |         nii = nibabel.FileHolder(fileobj=nii)
 68 |         yield nibabel.Nifti1Image.from_file_map({'header': nii, 'image': nii})
 69 | 
 70 | 
 71 | def get_series_date(series):
 72 |     try:
 73 |         study_date = get_common_tag(series, 'StudyDate')
 74 |     except (TagTypeError, ConsistencyError):
 75 |         return
 76 | 
 77 |     if not isinstance(study_date, str) or not study_date.isnumeric() or len(study_date) != 8:
 78 |         return
 79 | 
 80 |     try:
 81 |         year = int(study_date[:4])
 82 |         month = int(study_date[4:6])
 83 |         day = int(study_date[6:])
 84 |     except TypeError:
 85 |         return
 86 | 
 87 |     if year < 1972:  # the year of creation of the first CT scanner
 88 |         return
 89 | 
 90 |     return datetime.date(year, month, day)
 91 | 
 92 | 
 93 | def propagate_none(func):
 94 |     @functools.wraps(func)
 95 |     def wrapper(x, *args, **kwargs):
 96 |         return None if (x is None) else func(x, *args, **kwargs)
 97 | 
 98 |     return wrapper
 99 | 
100 | 
101 | def deprecate(message=None):
102 |     def decorator(func):
103 |         return functools.wraps(func)(np.deprecate(message=message)(func))
104 | 
105 |     return decorator
106 | 
107 | 
108 | def image_from_dicom_folder(folder: Union[str, Path]) -> np.ndarray:
109 |     return stack_images(series_from_dicom_folder(folder))
110 | 
111 | 
112 | def series_from_dicom_folder(folder: Union[str, Path]) -> List[Dataset]:
113 |     return order_series([dcmread(p) for p in Path(folder).glob('*.dcm')])
114 | 
115 | 
116 | # TODO: stolen from dpipe for now
117 | def mask_to_box(mask: np.ndarray):
118 |     """
119 |     Find the smallest box that contains all true values of the ``mask``.
120 |     """
121 |     if not mask.any():
122 |         raise ValueError('The mask is empty.')
123 | 
124 |     start, stop = [], []
125 |     for ax in itertools.combinations(range(mask.ndim), mask.ndim - 1):
126 |         nonzero = np.any(mask, axis=ax)
127 |         if np.any(nonzero):
128 |             left, right = np.where(nonzero)[0][[0, -1]]
129 |         else:
130 |             left, right = 0, 0
131 |         start.insert(0, left)
132 |         stop.insert(0, right + 1)
133 |     return start, stop
134 | 


--------------------------------------------------------------------------------
/amid/crlm.py:
--------------------------------------------------------------------------------
  1 | from functools import partial
  2 | from typing import Dict
  3 | 
  4 | import highdicom
  5 | import numpy as np
  6 | from dicom_csv import get_orientation_matrix, get_slice_locations, get_voxel_spacing, stack_images
  7 | from imops import restore_crop
  8 | from more_itertools import locate
  9 | 
 10 | from .internals import Dataset, licenses, register
 11 | from .utils import series_from_dicom_folder
 12 | 
 13 | 
 14 | @register(
 15 |     body_region='Abdomen',
 16 |     license=licenses.CC_BY_40,
 17 |     link='https://wiki.cancerimagingarchive.net/pages/viewpage.action?'
 18 |     'pageId=89096268#89096268412b832037484784bd78caf58e052641',
 19 |     modality=('CT, SEG'),
 20 |     prep_data_size='11G',
 21 |     raw_data_size='11G',
 22 |     task=('Segmentation', 'Classification'),
 23 | )
 24 | class CRLM(Dataset):
 25 |     """
 26 |     Parameters
 27 |     ----------
 28 |     root : str, Path, optional
 29 |         path to the folder containing the raw downloaded archives.
 30 |         If not provided, the cache is assumed to be already populated.
 31 | 
 32 | 
 33 |     Notes
 34 |     -----
 35 |     Download links:
 36 |     https://wiki.cancerimagingarchive.net/pages/viewpage.action?pageId=89096268#89096268b2cc35fce0664a2b875b5ec675ba9446
 37 | 
 38 |     This collection consists of DICOM images and DICOM Segmentation Objects (DSOs)
 39 |     for 197 patients with Colorectal Liver Metastases (CRLM).
 40 |     Comprised of Original DICOM CTs and Segmentations for each subject.
 41 |     The segmentations include 'Liver', 'Liver_Remnant'
 42 |     (liver that will remain after surgery based on a preoperative CT plan),
 43 |     'Hepatic' and 'Portal' veins,
 44 |     and 'Tumor_x', where 'x' denotes the various tumor occurrences in the case
 45 | 
 46 |     Examples
 47 |     --------
 48 |     >>> # Place the downloaded archives in any folder and pass the path to the constructor:
 49 |     >>> ds = CRLM(root='/path/to/archives/root')
 50 |     >>> print(len(ds.ids))
 51 |     # 197
 52 |     >>> print(ds.image(ds.ids[0]).shape)
 53 |     # (512, 512, 52)
 54 | 
 55 |     References
 56 |     ----------
 57 |     """
 58 | 
 59 |     @property
 60 |     def ids(self):
 61 |         return sorted(d.name for d in self.root.iterdir())
 62 | 
 63 |     def _folders(self, i):
 64 |         case = self.root / i
 65 |         folders = tuple({p.parent for p in case.glob('*/*/*/*.dcm')})
 66 |         return tuple(sorted(folders, key=lambda f: len(list(f.iterdir()))))
 67 | 
 68 |     def _series(self, i):
 69 |         return series_from_dicom_folder(self._folders(i)[1])
 70 | 
 71 |     def image(self, i):
 72 |         return stack_images(self._series(i))
 73 | 
 74 |     def mask(self, i) -> Dict[str, np.ndarray]:
 75 |         """Returns dict: {'liver': ..., 'hepatic': ..., 'tumor_x': ...}"""
 76 |         dicom_seg = highdicom.seg.segread(next(self._folders(i)[0].glob('*.dcm')))
 77 |         series = self._series(i)
 78 |         image_sops = [s.SOPInstanceUID for s in series]
 79 |         seg_sops = [sop_uid for _, _, sop_uid in dicom_seg.get_source_image_uids()]
 80 | 
 81 |         sops = [sop for sop in image_sops if sop in set(seg_sops).intersection(image_sops)]
 82 |         seg_box_start = list(locate(image_sops, lambda i: i == sops[0]))[0]
 83 |         seg_box_stop = list(locate(image_sops, lambda i: i == sops[-1]))[0]
 84 | 
 85 |         image = self.image(i)
 86 |         seg_box = np.asarray(((0, 0, seg_box_start), (*np.atleast_1d(image.shape[:-1]), seg_box_stop + 1)))
 87 | 
 88 |         raw_masks = np.swapaxes(
 89 |             dicom_seg.get_pixels_by_source_instance(
 90 |                 sops,
 91 |                 ignore_spatial_locations=True,
 92 |                 segment_numbers=dicom_seg.get_segment_numbers(),
 93 |             ),
 94 |             -1,
 95 |             0,
 96 |         )
 97 |         masks = list(map(partial(restore_crop, box=seg_box, shape=image.shape), raw_masks))
 98 | 
 99 |         liver_mask = {'liver': masks[0].astype(bool)}
100 |         # skip liver remnant
101 |         veins = {'hepatic': masks[2].astype(bool), 'portal': masks[3].astype(bool)}
102 |         tumors = {f'tumor_{i}': array.astype(bool) for i, array in enumerate(masks[4:])}
103 | 
104 |         return {**liver_mask, **veins, **tumors}
105 | 
106 |     def spacing(self, i):
107 |         """Returns the voxel spacing along axes (x, y, z)."""
108 |         return get_voxel_spacing(self._series(i))
109 | 
110 |     def slice_locations(self, i):
111 |         return get_slice_locations(self._series(i))
112 | 
113 |     def affine(self, i):
114 |         """Returns 4x4 matrix that gives the image's spatial orientation."""
115 |         return get_orientation_matrix(self._series(i))
116 | 


--------------------------------------------------------------------------------
/amid/luna25.py:
--------------------------------------------------------------------------------
  1 | from datetime import datetime
  2 | from functools import cached_property
  3 | from typing import NamedTuple, Sequence
  4 | 
  5 | import numpy as np
  6 | import pandas as pd
  7 | import SimpleITK as sitk
  8 | 
  9 | from .internals import Dataset, field, licenses, register
 10 | 
 11 | 
 12 | class LUNA25Nodule(NamedTuple):
 13 |     coords: Sequence[float]
 14 |     lesion_id: int
 15 |     annotation_id: str
 16 |     nodule_id: str
 17 |     malignancy: bool
 18 |     center_voxel: Sequence[float]
 19 |     bbox: np.ndarray
 20 | 
 21 | 
 22 | @register(
 23 |     body_region='Chest',
 24 |     license=licenses.CC_BY_40,
 25 |     link='https://luna25.grand-challenge.org/',
 26 |     modality='CT',
 27 |     prep_data_size='214G',
 28 |     raw_data_size='205G',
 29 |     task='Lung nodule malignancy risk estimation',
 30 | )
 31 | class LUNA25(Dataset):
 32 |     """
 33 |     The LUNA25 Challenge dataset is a comprehensive collection designed to support
 34 |     the development and validation of AI algorithms for lung nodule malignancy risk
 35 |     estimation using low-dose chest CT scans. In total, it contains 2120 patients
 36 |     and 4069 low-dose chest CT scans, with 555 annotated malignant nodules and
 37 |     5608 benign nodules (3762 unique nodules, 348 of them are malignant).
 38 |     The dataset was acquired in participants who enrolled in the
 39 |     National Lung Cancer Screening Trial (NLST) between 2002 and 2004 in
 40 |     one of the 33 centers in the United States.
 41 | 
 42 |     Parameters
 43 |     ----------
 44 |     root : str, Path, optional
 45 |         path to the folder containing `luna25_images` and `luna25_nodule_blocks` folders and
 46 |         `LUNA25_Public_Training_Development_Data.csv` file obtained by the instruction at
 47 |         https://luna25.grand-challenge.org/datasets/.
 48 |         If not provided, the cache is assumed to be already populated.
 49 | 
 50 |     Notes
 51 |     -----
 52 |     Join the challenge at https://luna25.grand-challenge.org/.
 53 |     Then follow the download and extraction instructions at https://luna25.grand-challenge.org/datasets/.
 54 |     """
 55 | 
 56 |     @property
 57 |     def ids(self):
 58 |         return [file.name[: -len('.mha')] for file in (self.root / 'luna25_images').iterdir()]
 59 | 
 60 |     def _sitk_image(self, i):
 61 |         return sitk.ReadImage(self.root / f'luna25_images/{i}.mha')
 62 | 
 63 |     @field
 64 |     def image(self, i):
 65 |         return sitk.GetArrayFromImage(self._sitk_image(i))
 66 | 
 67 |     @field
 68 |     def spacing(self, i):
 69 |         return self._sitk_image(i).GetSpacing()[::-1]
 70 | 
 71 |     @cached_property
 72 |     def _data(self):
 73 |         return pd.read_csv(self.root / 'LUNA25_Public_Training_Development_Data.csv')
 74 | 
 75 |     def _data_rows(self, i):
 76 |         return self._data[self._data['SeriesInstanceUID'] == i]
 77 | 
 78 |     def _data_column_value(self, i, column_name):
 79 |         values = self._data_rows(i).get(column_name).unique()
 80 |         assert len(values) == 1
 81 |         value = values[0]
 82 |         assert not pd.isnull(value)
 83 |         return value
 84 | 
 85 |     @field
 86 |     def patient_id(self, i):
 87 |         return str(self._data_column_value(i, 'PatientID'))
 88 | 
 89 |     @field
 90 |     def study_date(self, i):
 91 |         study_date = str(self._data_column_value(i, 'StudyDate'))
 92 |         return datetime.strptime(study_date, "%Y%m%d").date()
 93 | 
 94 |     @field
 95 |     def age(self, i):
 96 |         return self._data_column_value(i, 'Age_at_StudyDate')
 97 | 
 98 |     @field
 99 |     def gender(self, i):
100 |         return self._data_column_value(i, 'Gender')
101 | 
102 |     @field
103 |     def nodules(self, i):
104 |         nodules = []
105 |         sitk_image = self._sitk_image(i)
106 |         shape = self.image(i).shape
107 |         bbox_size = np.array([64, 128, 128])  # all nodule blocks in LUNA25 are of the same size
108 |         for row in self._data_rows(i).itertuples():
109 |             coords = (row.CoordX, row.CoordY, row.CoordZ)
110 |             center_voxel = sitk_image.TransformPhysicalPointToIndex(map(int, coords))[::-1]
111 | 
112 |             nodule_block_origin = self.get_nodule_block_metadata(row.AnnotationID)['origin'][::-1]
113 |             bbox_start_point = sitk_image.TransformPhysicalPointToIndex(map(int, nodule_block_origin))[::-1]
114 |             bbox = np.array([bbox_start_point, np.minimum(bbox_start_point + bbox_size, shape)])
115 |             nodules.append(
116 |                 LUNA25Nodule(
117 |                     coords=coords,
118 |                     lesion_id=row.LesionID,
119 |                     annotation_id=str(row.AnnotationID),
120 |                     nodule_id=str(row.NoduleID),
121 |                     malignancy=row.label,
122 |                     center_voxel=center_voxel,
123 |                     bbox=bbox,
124 |                 )
125 |             )
126 |         return nodules
127 | 
128 |     def get_nodule_block_image(self, annotation_id):
129 |         return np.load(self.root / f'luna25_nodule_blocks/image/{annotation_id}.npy')
130 | 
131 |     def get_nodule_block_metadata(self, annotation_id):
132 |         metadata = np.load(self.root / f'luna25_nodule_blocks/metadata/{annotation_id}.npy', allow_pickle=True)
133 |         assert metadata.shape == ()
134 |         return metadata.item()
135 | 


--------------------------------------------------------------------------------
/amid/brats2021.py:
--------------------------------------------------------------------------------
  1 | import contextlib
  2 | from pathlib import Path
  3 | from typing import Union
  4 | from zipfile import ZipFile
  5 | 
  6 | import nibabel
  7 | import numpy as np
  8 | import pandas as pd
  9 | 
 10 | from .internals import Dataset, field, licenses, register
 11 | from .utils import open_nii_gz_file, unpack
 12 | 
 13 | 
 14 | @register(
 15 |     body_region='Head',
 16 |     license=licenses.CC_BYNCSA_40,
 17 |     link='http://www.braintumorsegmentation.org/',
 18 |     modality=('MRI T1', 'MRI T1Gd', 'MRI T2', 'MRI T2-FLAIR'),
 19 |     prep_data_size='8,96G',
 20 |     raw_data_size='15G',
 21 |     task=('Segmentation', 'Classification', 'Domain Adaptation'),
 22 | )
 23 | class BraTS2021(Dataset):
 24 |     """
 25 |     Parameters
 26 |     ----------
 27 |     root : str, Path, optional
 28 |         path to the folder containing the raw downloaded archives.
 29 |         If not provided, the cache is assumed to be already populated.
 30 | 
 31 |     Notes
 32 |     -----
 33 |     Download links:
 34 |     2021: http://www.braintumorsegmentation.org/
 35 | 
 36 |     Examples
 37 |     --------
 38 |     >>> # Place the downloaded archives in any folder and pass the path to the constructor:
 39 |     >>> ds = BraTS2021(root='/path/to/archives/root')
 40 |     >>> print(len(ds.ids))
 41 |     # 5880
 42 |     >>> print(ds.image(ds.ids[0]).shape)
 43 |     # (240, 240, 155)
 44 | 
 45 |     References
 46 |     ----------
 47 |     """
 48 | 
 49 |     @property
 50 |     def ids(self):
 51 |         return sorted(_get_ids_or_file(self.root, 'TrainingData') + _get_ids_or_file(self.root, 'ValidationData'))
 52 | 
 53 |     @field
 54 |     def fold(self, i) -> str:
 55 |         return 'ValidationData' if _get_ids_or_file(self.root, 'ValidationData', check_id=i) else 'TrainingData'
 56 | 
 57 |     @property
 58 |     def mapping21_17(self) -> pd.DataFrame:
 59 |         return pd.read_csv(self.root / 'BraTS21-17_Mapping.csv')
 60 | 
 61 |     @field
 62 |     def subject_id(self, i) -> str:
 63 |         return i.rsplit('_', 1)[0]
 64 | 
 65 |     @field
 66 |     def modality(self, i) -> str:
 67 |         return i.rsplit('_', 1)[1]
 68 | 
 69 |     @field
 70 |     def image(self, i) -> np.ndarray:
 71 |         root, relative = _get_ids_or_file(self.root, self.fold(i), check_id=i, return_image=True)
 72 |         with _load_nibabel_probably_from_zip(root, relative, '.', '.zip') as nii_image:
 73 |             return np.asarray(nii_image.dataobj)
 74 | 
 75 |     def mask(self, i) -> Union[np.ndarray, None]:
 76 |         if self.fold(i) == 'ValidationData':
 77 |             return None
 78 |         else:
 79 |             root, relative = _get_ids_or_file(self.root, self.fold(i), check_id=i, return_segm=True)
 80 |             with _load_nibabel_probably_from_zip(root, relative, '.', '.zip') as nii_image:
 81 |                 return np.asarray(nii_image.dataobj)
 82 | 
 83 |     def spacing(self, i):
 84 |         """Returns the voxel spacing along axes (x, y, z)."""
 85 |         root, relative = _get_ids_or_file(self.root, self.fold(i), check_id=i, return_image=True)
 86 |         with _load_nibabel_probably_from_zip(root, relative, '.', '.zip') as nii_image:
 87 |             return tuple(nii_image.header['pixdim'][1:4])
 88 | 
 89 |     @field
 90 |     def affine(self, i) -> np.ndarray:
 91 |         """Returns 4x4 matrix that gives the image's spatial orientation."""
 92 |         root, relative = _get_ids_or_file(self.root, self.fold(i), check_id=i, return_image=True)
 93 |         with _load_nibabel_probably_from_zip(root, relative, '.', '.zip') as nii_image:
 94 |             return nii_image.affine
 95 | 
 96 | 
 97 | def _get_ids_or_file(
 98 |     base_path,
 99 |     archive_name_part: str = 'TrainingData',
100 |     check_id: str = None,
101 |     return_image: bool = False,
102 |     return_segm: bool = False,
103 | ):
104 |     # TODO: implement the same functionality for folder extraction.
105 |     ids = []
106 |     for archive in base_path.glob('*.zip'):
107 |         if archive_name_part in archive.name:
108 |             with ZipFile(archive) as zf:
109 |                 for zipinfo in zf.infolist():
110 |                     if not zipinfo.is_dir():
111 |                         file = Path(zipinfo.filename)
112 |                         _id = file.stem.replace('.nii', '')
113 | 
114 |                         if 'seg' not in _id:
115 |                             ids.append(_id)
116 | 
117 |                         if (check_id is not None) and (check_id == _id):
118 |                             if return_segm:
119 |                                 return str(archive), str(file)[: -len('.nii.gz')].rsplit('_', 1)[0] + '_seg.nii.gz'
120 | 
121 |                             if return_image:
122 |                                 return str(archive), str(file)
123 | 
124 |                             return True  # if check_id in archive
125 | 
126 |     return ids if (check_id is None) else False  # if check_id not in archive
127 | 
128 | 
129 | @contextlib.contextmanager
130 | def _load_nibabel_probably_from_zip(root: str, relative: str, archive_root_name: str = None, archive_ext: str = None):
131 |     with unpack(root, relative, archive_root_name, archive_ext) as (unpacked, is_unpacked):
132 |         if is_unpacked:
133 |             yield nibabel.load(unpacked)
134 |         else:
135 |             with open_nii_gz_file(unpacked) as nii_image:
136 |                 yield nii_image
137 | 


--------------------------------------------------------------------------------
/amid/egd.py:
--------------------------------------------------------------------------------
  1 | import nibabel as nb
  2 | import numpy as np
  3 | from deli import load
  4 | 
  5 | from .internals import Dataset, field as _field, register
  6 | 
  7 | 
  8 | @register(
  9 |     body_region='Head',
 10 |     license='EGD data license',
 11 |     link='https://xnat.bmia.nl/data/archive/projects/egd',
 12 |     modality=('FLAIR', 'MRI T1', 'MRI T1GD', 'MRI T2'),
 13 |     prep_data_size='107,49G',
 14 |     raw_data_size='40G',
 15 |     task='Segmentation',
 16 | )
 17 | class EGD(Dataset):
 18 |     """
 19 |     The Erasmus Glioma Database (EGD): Structural MRI scans, WHO 2016 subtypes,
 20 |     and segmentations of 774 patients with glioma [1]_.
 21 | 
 22 |     Parameters
 23 |     ----------
 24 |     root : str, Path, optional
 25 |         path to the folder containing the raw downloaded archives.
 26 |         If not provided, the cache is assumed to be already populated.
 27 | 
 28 |     Notes
 29 |     -----
 30 |     The access to the dataset could be requested at XNAT portal [https://xnat.bmia.nl/data/archive/projects/egd].
 31 | 
 32 |     To download the data in the compatible structure we recommend to use
 33 |     egd-downloader script [https://zenodo.org/record/4761089#.YtZpLtJBxhF].
 34 |     Please, refer to its README for further information.
 35 | 
 36 |     Examples
 37 |     --------
 38 |     >>> # Place the downloaded archives in any folder and pass the path to the constructor:
 39 |     >>> egd = EGD(root='/path/to/downloaded/data/folder/')
 40 |     >>> print(len(egd.ids))
 41 |     # 774
 42 |     >>> print(egd.t1gd(egd.ids[215]).shape)
 43 |     # (197, 233, 189)
 44 |     >>> print(egd.manufacturer(egd.ids[444]))
 45 |     # Philips Medical Systems
 46 | 
 47 |     References
 48 |     ----------
 49 |     .. [1] van der Voort, Sebastian R., et al. "The Erasmus Glioma Database (EGD): Structural MRI scans,
 50 |            WHO 2016 subtypes, and segmentations of 774 patients with glioma."
 51 |            Data in brief 37 (2021): 107191.
 52 |            https://www.sciencedirect.com/science/article/pii/S2352340921004753
 53 | 
 54 |     """
 55 | 
 56 |     @property
 57 |     def ids(self):
 58 |         result = []
 59 |         for folder in (self.root / 'SUBJECTS').iterdir():
 60 |             for suffix in 'FLAIR', 'T1', 'T1GD', 'T2':
 61 |                 result.append(f'{folder.name}-{suffix}')
 62 | 
 63 |         return tuple(sorted(result))
 64 | 
 65 |     @_field
 66 |     def brain_mask(self, i) -> np.ndarray:
 67 |         return nb.load(self.root / 'METADATA' / 'Brain_mask.nii.gz').get_fdata().astype(bool)
 68 | 
 69 |     @_field
 70 |     def deface_mask(self, i) -> np.ndarray:
 71 |         return nb.load(self.root / 'METADATA' / 'Deface_mask.nii.gz').get_fdata().astype(bool)
 72 | 
 73 |     def _image_file(self, i):
 74 |         i, suffix = i.rsplit('-', 1)
 75 |         return nb.load(self.root / 'SUBJECTS' / i / f'{suffix}.nii.gz')
 76 | 
 77 |     @_field
 78 |     def modality(self, i) -> str:
 79 |         _, suffix = i.rsplit('-', 1)
 80 |         return suffix
 81 | 
 82 |     @_field
 83 |     def subject_id(self, i) -> str:
 84 |         subject, _ = i.rsplit('-', 1)
 85 |         return subject
 86 | 
 87 |     @_field
 88 |     def affine(self, i) -> np.ndarray:
 89 |         return self._image_file(i).affine
 90 | 
 91 |     def spacing(self, i):
 92 |         # voxel spacing is [1, 1, 1] for all images in this dataset...
 93 |         return tuple(self._image_file(i).header['pixdim'][1:4])
 94 | 
 95 |     @_field
 96 |     def image(self, i) -> np.ndarray:
 97 |         # intensities are not integer-valued in this dataset...
 98 |         return np.asarray(self._image_file(i).dataobj)
 99 | 
100 |     def _metadata(self, i):
101 |         i, _ = i.rsplit('-', 1)
102 |         return load(self.root / 'SUBJECTS' / i / 'metadata.json')
103 | 
104 |     @_field
105 |     def genetic_and_histological_label_idh(self, i) -> str:
106 |         return self._metadata(i)['Genetic_and_Histological_labels']['IDH']
107 | 
108 |     @_field
109 |     def genetic_and_histological_label_1p19q(self, i) -> str:
110 |         return self._metadata(i)['Genetic_and_Histological_labels']['1p19q']
111 | 
112 |     @_field
113 |     def genetic_and_histological_label_grade(self, i) -> str:
114 |         return self._metadata(i)['Genetic_and_Histological_labels']['Grade']
115 | 
116 |     @_field
117 |     def age(self, i) -> float:
118 |         return self._metadata(i)['Clinical_data']['Age']
119 | 
120 |     @_field
121 |     def sex(self, i) -> str:
122 |         return self._metadata(i)['Clinical_data']['Sex']
123 | 
124 |     @_field
125 |     def observer(self, i) -> str:
126 |         return self._metadata(i)['Segmentation_source']['Observer']
127 | 
128 |     @_field
129 |     def original_scan(self, i) -> str:
130 |         return self._metadata(i)['Segmentation_source']['Original scan']
131 | 
132 |     @_field
133 |     def manufacturer(self, i) -> str:
134 |         return self._metadata(i)['Scan_characteristics']['Manufacturer']
135 | 
136 |     @_field
137 |     def system(self, i) -> str:
138 |         return self._metadata(i)['Scan_characteristics']['System']
139 | 
140 |     @_field
141 |     def field(self, i) -> str:
142 |         return self._metadata(i)['Scan_characteristics']['Field']
143 | 
144 |     @_field
145 |     def mask(self, i) -> np.ndarray:
146 |         i, _ = i.rsplit('-', 1)
147 |         return nb.load(self.root / 'SUBJECTS' / i / 'MASK.nii.gz').get_fdata().astype(bool)
148 | 


--------------------------------------------------------------------------------
/amid/flare2022.py:
--------------------------------------------------------------------------------
  1 | import gzip
  2 | import zipfile
  3 | from pathlib import Path
  4 | from typing import Union
  5 | from zipfile import ZipFile
  6 | 
  7 | import nibabel
  8 | import numpy as np
  9 | 
 10 | from .internals import Dataset, field, register
 11 | 
 12 | 
 13 | @register(
 14 |     body_region='Abdomen',
 15 |     license=None,
 16 |     link='https://flare22.grand-challenge.org/',
 17 |     modality='CT',
 18 |     prep_data_size='347G',
 19 |     raw_data_size='247G',
 20 |     task='Semi-supervised abdominal organ segmentation',
 21 | )
 22 | class FLARE2022(Dataset):
 23 |     """
 24 |     An abdominal organ segmentation dataset for semi-supervised learning [1]_.
 25 | 
 26 |     The dataset was used at the MICCAI FLARE 2022 challenge.
 27 | 
 28 |     Parameters
 29 |     ----------
 30 |     root : str, Path, optional
 31 |         path to the folder containing the raw downloaded archives.
 32 |         If not provided, the cache is assumed to be already populated.
 33 | 
 34 |     Notes
 35 |     -----
 36 |     Download link: https://flare22.grand-challenge.org/Dataset/
 37 | 
 38 |     The `root` folder should contain the two downloaded folders, namely: "Training" and "Validation".
 39 | 
 40 |     Examples
 41 |     --------
 42 |     >>> # Place the downloaded folders in any folder and pass the path to the constructor:
 43 |     >>> ds = FLARE2022(root='/path/to/downloaded/data/folder/')
 44 |     >>> print(len(ds.ids))
 45 |     # 2100
 46 |     >>> print(ds.image(ds.ids[0]).shape)
 47 |     # (512, 512, 110)
 48 |     >>> print(ds.mask(ds.ids[25]).shape)
 49 |     # (512, 512, 104)
 50 | 
 51 |     References
 52 |     ----------
 53 |     .. [1] Ma, Jun, et al. "Fast and Low-GPU-memory abdomen CT organ segmentation: The FLARE challenge."
 54 |     Medical Image Analysis 82 (2022): 102616.
 55 |     """
 56 | 
 57 |     @property
 58 |     def ids(self):
 59 |         result = set()
 60 | 
 61 |         # 50 Training Labeled cases
 62 |         archive = self.root / 'Training' / 'FLARE22_LabeledCase50' / 'images.zip'
 63 |         with ZipFile(archive) as zf:
 64 |             for file in zf.namelist():
 65 |                 result.add(f"TL{file.split('_')[-2]}")
 66 | 
 67 |         # 2000 Training Unlabeled cases
 68 |         for archive in (self.root / 'Training').glob('*.zip'):
 69 |             with ZipFile(archive) as zf:
 70 |                 for file in zf.namelist():
 71 |                     if not file.endswith('.nii.gz'):
 72 |                         continue
 73 | 
 74 |                     file = Path(file)
 75 |                     result.add(f"TU{file.name.split('_')[-2]}")
 76 | 
 77 |         # 50 Validation Unlabeled cases
 78 |         for file in (self.root / 'Validation').glob('*'):
 79 |             if not file.name.endswith('.nii.gz'):
 80 |                 continue
 81 | 
 82 |             result.add(f"VU{file.name.split('_')[-2]}")
 83 | 
 84 |         return sorted(result)
 85 | 
 86 |     def _file(self, i):
 87 |         # 50 Training Labeled cases
 88 |         if i.startswith('TL'):
 89 |             archive = self.root / 'Training' / 'FLARE22_LabeledCase50' / 'images.zip'
 90 |             with ZipFile(archive) as zf:
 91 |                 for file in zf.namelist():
 92 |                     if i[2:] in file:
 93 |                         return zipfile.Path(archive, file)
 94 | 
 95 |         # 2000 Training Unlabeled cases
 96 |         for archive in (self.root / 'Training').glob('*.zip'):
 97 |             with ZipFile(archive) as zf:
 98 |                 for file in zf.namelist():
 99 |                     if i[2:] in file:
100 |                         return zipfile.Path(archive, file)
101 | 
102 |         # 50 Validation Unlabeled cases
103 |         if i.startswith('VU'):
104 |             file = self.root / 'Validation' / f'FLARETs_{i[2:]}_0000.nii.gz'
105 |             return file
106 | 
107 |         raise ValueError(f'Id "{i}" not found')
108 | 
109 |     @field
110 |     def image(self, i) -> np.ndarray:
111 |         with self._file(i).open('rb') as opened:
112 |             with gzip.GzipFile(fileobj=opened) as nii:
113 |                 nii = nibabel.FileHolder(fileobj=nii)
114 |                 image = nibabel.Nifti1Image.from_file_map({'header': nii, 'image': nii})
115 |                 return np.asarray(image.dataobj)
116 | 
117 |     @field
118 |     def affine(self, i) -> np.ndarray:
119 |         """The 4x4 matrix that gives the image's spatial orientation"""
120 |         with self._file(i).open('rb') as opened:
121 |             with gzip.GzipFile(fileobj=opened) as nii:
122 |                 nii = nibabel.FileHolder(fileobj=nii)
123 |                 image = nibabel.Nifti1Image.from_file_map({'header': nii, 'image': nii})
124 |                 return image.affine
125 | 
126 |     @field
127 |     def mask(self, i) -> Union[np.ndarray, None]:
128 |         if not i.startswith('TL'):
129 |             return None
130 | 
131 |         archive = self.root / 'Training' / 'FLARE22_LabeledCase50' / 'labels.zip'
132 |         with ZipFile(archive) as zf:
133 |             for file in zf.namelist():
134 |                 if i[2:] in file:
135 |                     with zipfile.Path(archive, file).open('rb') as opened:
136 |                         with gzip.GzipFile(fileobj=opened) as nii:
137 |                             nii = nibabel.FileHolder(fileobj=nii)
138 |                             mask = nibabel.Nifti1Image.from_file_map({'header': nii, 'image': nii})
139 |                             return np.asarray(mask.dataobj)
140 | 


--------------------------------------------------------------------------------
/amid/crossmoda.py:
--------------------------------------------------------------------------------
  1 | import contextlib
  2 | import gzip
  3 | import zipfile
  4 | from pathlib import Path
  5 | from typing import Union
  6 | from zipfile import ZipFile
  7 | 
  8 | import nibabel as nb
  9 | import numpy as np
 10 | import pandas as pd
 11 | 
 12 | from .internals import Dataset, licenses, register
 13 | 
 14 | 
 15 | @register(
 16 |     body_region='Head',
 17 |     license=licenses.CC_BYNCSA_40,
 18 |     link='https://zenodo.org/record/6504722#.YsgwnNJByV4',
 19 |     modality=('MRI T1c', 'MRI T2hr'),
 20 |     prep_data_size='8,96G',
 21 |     raw_data_size='17G',
 22 |     task=('Segmentation', 'Classification', 'Domain Adaptation'),
 23 | )
 24 | class CrossMoDA(Dataset):
 25 |     """
 26 |     Parameters
 27 |     ----------
 28 |     root : str, Path, optional
 29 |         path to the folder containing the raw downloaded archives.
 30 |         If not provided, the cache is assumed to be already populated.
 31 | 
 32 |     Notes
 33 |     -----
 34 |     Download links:
 35 |     2021 & 2022: https://zenodo.org/record/6504722#.YsgwnNJByV4
 36 | 
 37 |     Examples
 38 |     --------
 39 |     >>> # Place the downloaded archives in any folder and pass the path to the constructor:
 40 |     >>> ds = CrossMoDA(root='/path/to/archives/root')
 41 |     >>> print(len(ds.ids))
 42 |     # 484
 43 |     >>> print(ds.image(ds.ids[0]).shape)
 44 |     # (512, 512, 214)
 45 | 
 46 |     References
 47 |     ----------
 48 |     """
 49 | 
 50 |     @property
 51 |     def ids(self):
 52 |         result = set()
 53 |         for archive in self.root.glob('*.zip'):
 54 |             with ZipFile(archive) as zf:
 55 |                 for zipinfo in zf.infolist():
 56 |                     if zipinfo.is_dir():
 57 |                         continue
 58 | 
 59 |                     file = Path(zipinfo.filename)
 60 |                     assert file.stem not in result, file.stem
 61 | 
 62 |                     if 'Label' not in file.stem and file.suffix == '.gz':
 63 |                         result.add('_'.join(file.stem.split('_')[:-1]))
 64 |                     else:
 65 |                         continue
 66 | 
 67 |         return sorted(result)
 68 | 
 69 |     @property
 70 |     def train_source_df(self):
 71 |         return pd.read_csv(self.root / 'infos_source_training.csv', index_col='crossmoda_name')
 72 | 
 73 |     def _file(self, i):
 74 |         for archive in self.root.glob('*.zip'):
 75 |             with ZipFile(archive) as zf:
 76 |                 for zipinfo in zf.infolist():
 77 |                     if i == '_'.join(Path(zipinfo.filename).stem.split('_')[:-1]) and 'Label' not in zipinfo.filename:
 78 |                         return zipfile.Path(archive, zipinfo.filename)
 79 | 
 80 |         raise ValueError(f'Id "{i}" not found')
 81 | 
 82 |     def image(self, i) -> Union[np.ndarray, None]:
 83 |         with open_nii_gz_file(self._file(i)) as nii_image:
 84 |             return np.asarray(nii_image.dataobj)
 85 | 
 86 |     def spacing(self, i):
 87 |         """Returns pixel spacing along axes (x, y, z)"""
 88 |         with open_nii_gz_file(self._file(i)) as nii_image:
 89 |             return tuple(nii_image.header['pixdim'][1:4])
 90 | 
 91 |     def affine(self, i):
 92 |         """The 4x4 matrix that gives the image's spatial orientation"""
 93 |         with open_nii_gz_file(self._file(i)) as nii_image:
 94 |             return nii_image.affine
 95 | 
 96 |     def split(self, i) -> str:
 97 |         """The split in which this entry is contained: training_source, training_target, validation"""
 98 |         file = self._file(i)
 99 |         idx = int(file.name.split('_')[2])
100 |         dataset = file.name.split('_')[1]
101 | 
102 |         if dataset == 'ldn':
103 |             if 1 <= idx < 106:
104 |                 return 'training_source'
105 |             elif 106 <= idx < 211:
106 |                 return 'training_target'
107 |             elif 211 <= idx < 243:
108 |                 return 'validation'
109 | 
110 |         elif dataset == 'etz':
111 |             if 0 <= idx < 105:
112 |                 return 'training_source'
113 |             elif 105 <= idx < 210:
114 |                 return 'training_target'
115 |             elif 210 <= idx < 242:
116 |                 return 'validation'
117 | 
118 |         raise ValueError(f'Cannot find split for the file: {file}')
119 | 
120 |     def year(self, i) -> int:
121 |         """The year in which this entry was published: 2021 or 2022"""
122 |         return int(self._file(i).name[9:13])
123 | 
124 |     def masks(self, i):
125 |         """Combined mask of schwannoma and cochlea (1 and 2 respectively)"""
126 |         file = self._file(i)
127 |         if 'T2' not in file.name:
128 |             with open_nii_gz_file(file.parent / file.name.replace('ceT1', 'Label')) as nii_image:
129 |                 return nii_image.get_fdata().astype(np.uint8)
130 | 
131 |     def koos_grade(self, i):
132 |         """VS Tumour characteristic according to Koos grading scale: [1..4] or (-1 - post operative)"""
133 |         if self.split(i) == 'training_source':
134 |             grade = self.train_source_df.loc[i, 'koos']
135 |             return -1 if (grade == 'post-operative-london') else int(grade)
136 | 
137 | 
138 | # TODO: sync with amid.utils
139 | @contextlib.contextmanager
140 | def open_nii_gz_file(file):
141 |     with file.open('rb') as opened:
142 |         with gzip.GzipFile(fileobj=opened) as nii:
143 |             nii = nb.FileHolder(fileobj=nii)
144 |             yield nb.Nifti1Image.from_file_map({'header': nii, 'image': nii})
145 | 


--------------------------------------------------------------------------------
/docs/recipes/RSNABreastCancer.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "source": [
  6 |     "## Normalization"
  7 |    ],
  8 |    "metadata": {
  9 |     "collapsed": false
 10 |    }
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": null,
 15 |    "outputs": [],
 16 |    "source": [
 17 |     "from connectome import Transform\n",
 18 |     "\n",
 19 |     "\n",
 20 |     "class Normalize(Transform):\n",
 21 |     "    __inherit__ = True\n",
 22 |     "\n",
 23 |     "    def image(image, padding_value, intensity_sign):\n",
 24 |     "        if padding_value is not None:\n",
 25 |     "            if padding_value > 0:\n",
 26 |     "                return padding_value - image\n",
 27 |     "            return image\n",
 28 |     "\n",
 29 |     "        if intensity_sign == 1:\n",
 30 |     "            return image.max() - image\n",
 31 |     "\n",
 32 |     "        return image"
 33 |    ],
 34 |    "metadata": {
 35 |     "collapsed": false
 36 |    }
 37 |   },
 38 |   {
 39 |    "cell_type": "markdown",
 40 |    "source": [
 41 |     "## Zoom to reduce image size"
 42 |    ],
 43 |    "metadata": {
 44 |     "collapsed": false
 45 |    }
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": null,
 50 |    "outputs": [],
 51 |    "source": [
 52 |     "from connectome import Apply\n",
 53 |     "from scipy.ndimage import zoom\n",
 54 |     "\n",
 55 |     "# 0.25 - is the downsample factor. It should probably be tuned via cross-validation\n",
 56 |     "Zoom = Apply(image=lambda x: zoom(np.float32(x), 0.25, order=1))"
 57 |    ],
 58 |    "metadata": {
 59 |     "collapsed": false
 60 |    }
 61 |   },
 62 |   {
 63 |    "cell_type": "markdown",
 64 |    "source": [
 65 |     "## Artifacts and background removal"
 66 |    ],
 67 |    "metadata": {
 68 |     "collapsed": false
 69 |    }
 70 |   },
 71 |   {
 72 |    "cell_type": "code",
 73 |    "execution_count": null,
 74 |    "outputs": [],
 75 |    "source": [
 76 |     "from connectome import Transform\n",
 77 |     "from skimage.morphology import label\n",
 78 |     "\n",
 79 |     "\n",
 80 |     "class GreatestComponent(Transform):\n",
 81 |     "    __inherit__ = True\n",
 82 |     "\n",
 83 |     "    def image(image):\n",
 84 |     "        lbl = label(image > 0)\n",
 85 |     "        values, counts = np.unique(lbl, return_counts=True)\n",
 86 |     "        foreground = values != 0\n",
 87 |     "        component = values[foreground][counts[foreground].argmax()]\n",
 88 |     "        # select all the components greater than the background\n",
 89 |     "        #  + the greatest foreground component\n",
 90 |     "        components = set(values[counts > counts[~foreground]]) | {component}\n",
 91 |     "        if len(components) > 1:\n",
 92 |     "            # if there are several components - pick the one with the greatest intensity\n",
 93 |     "            component = max(components, key=lambda c: image[lbl == c].mean())\n",
 94 |     "\n",
 95 |     "        return image * (lbl == component)\n",
 96 |     "\n",
 97 |     "\n",
 98 |     "class CropBackground(Transform):\n",
 99 |     "    __inherit__ = True\n",
100 |     "\n",
101 |     "    def image(image):\n",
102 |     "        mask = image > 0\n",
103 |     "        xs, = mask.any(0).nonzero()\n",
104 |     "        ys, = mask.any(1).nonzero()\n",
105 |     "        return image[ys.min():ys.max() + 1, xs.min():xs.max() + 1]"
106 |    ],
107 |    "metadata": {
108 |     "collapsed": false
109 |    }
110 |   },
111 |   {
112 |    "cell_type": "markdown",
113 |    "source": [
114 |     "## Data augmentation"
115 |    ],
116 |    "metadata": {
117 |     "collapsed": false
118 |    }
119 |   },
120 |   {
121 |    "cell_type": "code",
122 |    "execution_count": null,
123 |    "outputs": [],
124 |    "source": [
125 |     "from connectome import Transform, impure\n",
126 |     "import numpy as np\n",
127 |     "\n",
128 |     "\n",
129 |     "class RandomFlip(Transform):\n",
130 |     "    __inherit__ = True\n",
131 |     "\n",
132 |     "    @impure\n",
133 |     "    def _flip():\n",
134 |     "        return np.random.binomial(1, 0.5)\n",
135 |     "\n",
136 |     "    def image(image, _flip):\n",
137 |     "        if _flip:\n",
138 |     "            return np.flip(image, axis=1)\n",
139 |     "        return image"
140 |    ],
141 |    "metadata": {
142 |     "collapsed": false
143 |    }
144 |   },
145 |   {
146 |    "cell_type": "markdown",
147 |    "source": [
148 |     "## Combining it all together"
149 |    ],
150 |    "metadata": {
151 |     "collapsed": false
152 |    }
153 |   },
154 |   {
155 |    "cell_type": "code",
156 |    "execution_count": null,
157 |    "outputs": [],
158 |    "source": [
159 |     "from amid.rsna_bc import RSNABreastCancer\n",
160 |     "from connectome import Chain\n",
161 |     "\n",
162 |     "ds = Chain(\n",
163 |     "    RSNABreastCancer('/path/to/downloaded/folder'),\n",
164 |     "    Normalize(),\n",
165 |     "    Apply(image=lambda x: zoom(np.float32(x), 0.25, order=1)),\n",
166 |     "    GreatestComponent(),\n",
167 |     "    CropBackground(),\n",
168 |     "\n",
169 |     "    # aug\n",
170 |     "    RandomFlip(),\n",
171 |     ")"
172 |    ],
173 |    "metadata": {
174 |     "collapsed": false
175 |    }
176 |   }
177 |  ],
178 |  "metadata": {
179 |   "kernelspec": {
180 |    "display_name": "Python 3",
181 |    "language": "python",
182 |    "name": "python3"
183 |   },
184 |   "language_info": {
185 |    "codemirror_mode": {
186 |     "name": "ipython",
187 |     "version": 2
188 |    },
189 |    "file_extension": ".py",
190 |    "mimetype": "text/x-python",
191 |    "name": "python",
192 |    "nbconvert_exporter": "python",
193 |    "pygments_lexer": "ipython2",
194 |    "version": "2.7.6"
195 |   }
196 |  },
197 |  "nbformat": 4,
198 |  "nbformat_minor": 0
199 | }
200 | 


--------------------------------------------------------------------------------
/amid/ct_ich.py:
--------------------------------------------------------------------------------
  1 | import nibabel as nb
  2 | import numpy as np
  3 | import pandas as pd
  4 | 
  5 | from .internals import Dataset, field, licenses, register
  6 | 
  7 | 
  8 | @register(
  9 |     body_region='Head',
 10 |     license=licenses.PhysioNet_RHD_150,
 11 |     link='https://physionet.org/content/ct-ich/1.3.1/',
 12 |     modality='CT',
 13 |     prep_data_size='661M',
 14 |     raw_data_size='2,8G',
 15 |     task='Intracranial hemorrhage segmentation',
 16 | )
 17 | class CT_ICH(Dataset):
 18 |     """
 19 |     (C)omputed (T)omography Images for (I)ntracranial (H)emorrhage Detection and (S)egmentation.
 20 | 
 21 |     This dataset contains 75 head CT scans including 36 scans for patients diagnosed with
 22 |     intracranial hemorrhage with the following types:
 23 |     Intraventricular, Intraparenchymal, Subarachnoid, Epidural and Subdural.
 24 | 
 25 |     Parameters
 26 |     ----------
 27 |     root : str, Path, optional
 28 |         path to the folder containing the raw downloaded archives.
 29 |         If not provided, the cache is assumed to be already populated.
 30 | 
 31 |     Notes
 32 |     -----
 33 |     Data can be downloaded here: https://physionet.org/content/ct-ich/1.3.1/.
 34 |     Then, the folder with raw downloaded data should contain folders `ct_scans` and `masks` along with other files.
 35 | 
 36 |     Examples
 37 |     --------
 38 |     >>> # Place the downloaded archives in any folder and pass the path to the constructor:
 39 |     >>> ds = CT_ICH(root='/path/to/downloaded/data/folder/')
 40 |     >>> print(len(ds.ids))
 41 |     # 75
 42 |     >>> print(ds.image(ds.ids[0]).shape)
 43 |     # (512, 512, 39)
 44 |     >>> print(ds.mask(ds.ids[0]).shape)
 45 |     # (512, 512, 39)
 46 |     """
 47 | 
 48 |     @property
 49 |     def ids(self):
 50 |         result = [f'ct_ich_{uid:0=3d}' for uid in [*range(49, 59), *range(66, 131)]]
 51 |         return tuple(sorted(result))
 52 | 
 53 |     def _image_file(self, i):
 54 |         num_id = i.split('_')[-1]
 55 |         return nb.load(self.root / 'ct_scans' / f'{num_id}.nii')
 56 | 
 57 |     @field
 58 |     def image(self, i) -> np.ndarray:
 59 |         # most CT/MRI scans are integer-valued, this will help us improve compression rates
 60 |         return np.int16(self._image_file(i).get_fdata())
 61 | 
 62 |     @field
 63 |     def mask(self, i) -> np.ndarray:
 64 |         num_id = i.split('_')[-1]
 65 |         mask_path = self.root / 'masks' / f'{num_id}.nii'
 66 |         ct_scan_nifti = nb.load(mask_path)
 67 |         return ct_scan_nifti.get_fdata().astype(bool)
 68 | 
 69 |     @field
 70 |     def affine(self, i) -> np.ndarray:
 71 |         """The 4x4 matrix that gives the image's spatial orientation."""
 72 |         return self._image_file(i).affine
 73 | 
 74 |     def spacing(self, i):
 75 |         """Returns voxel spacing along axes (x, y, z)."""
 76 |         return tuple(self._image_file(i).header['pixdim'][1:4])
 77 | 
 78 |     @property
 79 |     def _patient_metadata(self):
 80 |         return pd.read_csv(self.root / 'Patient_demographics.csv', index_col='Patient Number')
 81 | 
 82 |     @property
 83 |     def _diagnosis_metadata(self):
 84 |         return pd.read_csv(self.root / 'hemorrhage_diagnosis_raw_ct.csv')
 85 | 
 86 |     def _row(self, i):
 87 |         patient_id = int(i.split('_')[-1])
 88 |         return self._patient_metadata.loc[patient_id]
 89 | 
 90 |     @field
 91 |     def age(self, i) -> float:
 92 |         return self._row(i)['Age\n(years)']
 93 | 
 94 |     @field
 95 |     def sex(self, i) -> str:
 96 |         return self._row(i)['Gender']
 97 | 
 98 |     @field
 99 |     def intraventricular_hemorrhage(self, i) -> bool:
100 |         """Returns True if hemorrhage exists and its type is intraventricular."""
101 |         num_id = int(i.split('_')[-1])
102 |         return str(self._patient_metadata['Hemorrhage type based on the radiologists diagnosis '].loc[num_id]) != 'nan'
103 | 
104 |     @field
105 |     def intraparenchymal_hemorrhage(self, i) -> bool:
106 |         """Returns True if hemorrhage was diagnosed and its type is intraparenchymal."""
107 |         num_id = int(i.split('_')[-1])
108 |         return str(self._patient_metadata['Unnamed: 4'].loc[num_id]) != 'nan'
109 | 
110 |     @field
111 |     def subarachnoid_hemorrhage(self, i) -> bool:
112 |         """Returns True if hemorrhage was diagnosed and its type is subarachnoid."""
113 |         num_id = int(i.split('_')[-1])
114 |         return str(self._patient_metadata['Unnamed: 5'].loc[num_id]) != 'nan'
115 | 
116 |     @field
117 |     def epidural_hemorrhage(self, i) -> bool:
118 |         """Returns True if hemorrhage was diagnosed and its type is epidural."""
119 |         num_id = int(i.split('_')[-1])
120 |         return str(self._patient_metadata['Unnamed: 6'].loc[num_id]) != 'nan'
121 | 
122 |     @field
123 |     def subdural_hemorrhage(self, i) -> bool:
124 |         """Returns True if hemorrhage was diagnosed and its type is subdural."""
125 |         num_id = int(i.split('_')[-1])
126 |         return str(self._patient_metadata['Unnamed: 7'].loc[num_id]) != 'nan'
127 | 
128 |     @field
129 |     def fracture(self, i) -> bool:
130 |         """Returns True if skull fracture was diagnosed."""
131 |         num_id = int(i.split('_')[-1])
132 |         return str(self._patient_metadata['Fracture (yes 1/no 0)'].loc[num_id]) != 'nan'
133 | 
134 |     @field
135 |     def notes(self, i) -> str:
136 |         """Returns special notes if they exist."""
137 |         num_id = int(i.split('_')[-1])
138 |         result = str(self._patient_metadata['Note1'].loc[num_id])
139 |         return result if result != 'nan' else None
140 | 
141 |     @field
142 |     def hemorrhage_diagnosis_raw_metadata(self, i):
143 |         num_id = int(i.split('_')[-1])
144 |         return self._diagnosis_metadata[self._diagnosis_metadata['PatientNumber'] == num_id]
145 | 


--------------------------------------------------------------------------------
/amid/verse.py:
--------------------------------------------------------------------------------
  1 | import gzip
  2 | import json
  3 | import zipfile
  4 | from pathlib import Path
  5 | from typing import Dict, Tuple, Union
  6 | from zipfile import ZipFile
  7 | 
  8 | import nibabel
  9 | import numpy as np
 10 | 
 11 | from .internals import Dataset, field, licenses, register
 12 | 
 13 | 
 14 | @register(
 15 |     body_region=('Thorax', 'Abdomen'),
 16 |     modality='CT',
 17 |     task='Vertebrae Segmentation',
 18 |     link='https://osf.io/4skx2/',
 19 |     raw_data_size='97G',
 20 |     license=licenses.CC_BYSA_40,
 21 | )
 22 | class VerSe(Dataset):
 23 |     """
 24 |     A Vertebral Segmentation Dataset with Fracture Grading [1]_
 25 | 
 26 |     The dataset was used in the MICCAI-2019 and MICCAI-2020 Vertebrae Segmentation Challenges.
 27 | 
 28 |     Parameters
 29 |     ----------
 30 |     root : str, Path, optional
 31 |         path to the folder containing the raw downloaded archives.
 32 |         If not provided, the cache is assumed to be already populated.
 33 | 
 34 |     Notes
 35 |     -----
 36 |     Download links:
 37 |         2019: https://osf.io/jtfa5/
 38 |         2020: https://osf.io/4skx2/
 39 | 
 40 |     Examples
 41 |     --------
 42 |     >>> # Place the downloaded archives in any folder and pass the path to the constructor:
 43 |     >>> ds = VerSe(root='/path/to/archives/root')
 44 |     >>> print(len(ds.ids))
 45 |     # 374
 46 |     >>> print(ds.image(ds.ids[0]).shape)
 47 |     # (512, 512, 214)
 48 | 
 49 |     References
 50 |     ----------
 51 |     .. [1] Löffler MT, Sekuboyina A, Jacob A, et al. A Vertebral Segmentation Dataset with Fracture Grading.
 52 |        Radiol Artif Intell. 2020;2(4):e190138. Published 2020 Jul 29. doi:10.1148/ryai.2020190138
 53 |     """
 54 | 
 55 |     @property
 56 |     def ids(self):
 57 |         result = set()
 58 |         for archive in self.root.glob('*.zip'):
 59 |             with ZipFile(archive) as zf:
 60 |                 for file in zf.namelist():
 61 |                     if '/rawdata/' not in file:
 62 |                         continue
 63 | 
 64 |                     file = Path(file)
 65 |                     patient = file.parent.name[4:]
 66 |                     name = file.name
 67 |                     if 'split' in name:
 68 |                         i = name.split('split')[1][1:]
 69 |                         i = i.split('_')[0]
 70 |                     else:
 71 |                         i = patient
 72 | 
 73 |                     assert i not in result, i
 74 |                     result.add(i)
 75 | 
 76 |         return sorted(result)
 77 | 
 78 |     def _file(self, i):
 79 |         for archive in self.root.glob('*.zip'):
 80 |             with ZipFile(archive) as zf:
 81 |                 for file in zf.namelist():
 82 |                     if '/rawdata/' in file and i in file:
 83 |                         return zipfile.Path(archive, file)
 84 | 
 85 |         raise ValueError(f'Id "{i}" not found')
 86 | 
 87 |     @field
 88 |     def image(self, i) -> np.ndarray:
 89 |         with self._file(i).open('rb') as opened:
 90 |             with gzip.GzipFile(fileobj=opened) as nii:
 91 |                 nii = nibabel.FileHolder(fileobj=nii)
 92 |                 image = nibabel.Nifti1Image.from_file_map({'header': nii, 'image': nii})
 93 |                 # most ct scans are integer-valued, this will help us improve compression rates
 94 |                 #  (instead of using `image.get_fdata()`)
 95 |                 return np.asarray(image.dataobj)
 96 | 
 97 |     @field
 98 |     def affine(self, i) -> np.ndarray:
 99 |         """The 4x4 matrix that gives the image's spatial orientation"""
100 |         with self._file(i).open('rb') as opened:
101 |             with gzip.GzipFile(fileobj=opened) as nii:
102 |                 nii = nibabel.FileHolder(fileobj=nii)
103 |                 image = nibabel.Nifti1Image.from_file_map({'header': nii, 'image': nii})
104 |                 return image.affine
105 | 
106 |     @field
107 |     def split(self, i) -> str:
108 |         """The split in which this entry is contained: training, validate, test"""
109 |         # it's ugly, but it gets the job done (;
110 |         return self._file(i).parent.parent.parent.name.split('_')[-1].split('9')[-1]
111 | 
112 |     @field
113 |     def patient(self, i) -> str:
114 |         """The unique patient id"""
115 |         return self._file(i).parent.name[4:]
116 | 
117 |     @field
118 |     def year(self, i) -> int:
119 |         """The year in which this entry was published: 2019, 2020"""
120 |         year = self._file(i).parent.parent.parent.name
121 |         if year.startswith('dataset-verse'):
122 |             assert '19' in year
123 |             return 2019
124 |         return 2020
125 | 
126 |     def _derivatives(self, i):
127 |         file = self._file(i)
128 |         return file.parent.parent.parent / 'derivatives' / file.parent.name
129 | 
130 |     @field
131 |     def centers(self, i) -> Dict[str, Tuple[int, int, int]]:
132 |         """Vertebrae centers in format {label: [x, y, z]}"""
133 |         ann = [f for f in self._derivatives(i).iterdir() if f.name.endswith('.json') and i in f.name]
134 |         if not ann:
135 |             return {}
136 |         assert len(ann) == 1
137 |         (ann,) = ann
138 | 
139 |         with ann.open() as file:
140 |             ann = json.load(file)
141 | 
142 |         return {k['label']: (k['X'], k['Y'], k['Z']) for k in ann[1:]}
143 | 
144 |     @field
145 |     def masks(self, i) -> Union[np.ndarray, None]:
146 |         """Vertebrae masks"""
147 |         ann = [f for f in self._derivatives(i).iterdir() if f.name.endswith('.nii.gz') and i in f.name]
148 |         if not ann:
149 |             return
150 |         assert len(ann) == 1
151 |         (ann,) = ann
152 | 
153 |         with ann.open('rb') as opened:
154 |             with gzip.GzipFile(fileobj=opened) as nii:
155 |                 nii = nibabel.FileHolder(fileobj=nii)
156 |                 mask = nibabel.Nifti1Image.from_file_map({'header': nii, 'image': nii})
157 |                 return mask.get_fdata().astype(np.uint8)
158 | 


--------------------------------------------------------------------------------
/amid/mood.py:
--------------------------------------------------------------------------------
  1 | import contextlib
  2 | import gzip
  3 | import zipfile
  4 | from pathlib import Path
  5 | from zipfile import ZipFile
  6 | 
  7 | import nibabel as nb
  8 | import numpy as np
  9 | 
 10 | from .internals import Dataset, field, register
 11 | 
 12 | 
 13 | @register(
 14 |     body_region=('Head', 'Abdominal'),
 15 |     license=None,  # FIXME: inherit licenses from the original datasets...
 16 |     link='http://medicalood.dkfz.de/web/',
 17 |     modality=('MRI', 'CT'),
 18 |     prep_data_size='405G',
 19 |     raw_data_size='120G',
 20 |     task='Out-of-distribution detection',
 21 | )
 22 | class MOOD(Dataset):
 23 |     """
 24 |     A (M)edival (O)ut-(O)f-(D)istribution analysis challenge [1]_
 25 | 
 26 |     This dataset contains raw brain MRI and abdominal CT images.
 27 | 
 28 |     Number of training samples:
 29 |     - Brain: 800 scans ( 256 x 256 x 256 )
 30 |     - Abdominal: 550 scans ( 512 x 512 x 512 )
 31 | 
 32 |     For each setup there are 4 toy test samples with OOD cases.
 33 | 
 34 |     Parameters
 35 |     ----------
 36 |     root : str, Path, optional
 37 |         path to the folder containing the raw downloaded archives.
 38 |         If not provided, the cache is assumed to be already populated.
 39 | 
 40 |     Notes
 41 |     -----
 42 |     Follow the download instructions at https://www.synapse.org/#!Synapse:syn21343101/wiki/599515.
 43 | 
 44 |     Then, the folder with raw downloaded data should contain four zip archives with data
 45 |     (`abdom_toy.zip`, `abdom_train.zip`, `brain_toy.zip` and `brain_train.zip`).
 46 | 
 47 |     Examples
 48 |     --------
 49 |     >>> # Place the downloaded archives in any folder and pass the path to the constructor:
 50 |     >>> ds = MOOD(root='/path/to/downloaded/data/folder/')
 51 |     >>> print(len(ds.ids))
 52 |     # 1358
 53 |     >>> print(ds.image(ds.ids[0]).shape)
 54 |     # (512, 512, 512)
 55 |     >>> print(ds.pixel_label(ds.ids[0]).shape)
 56 |     # (512, 512, 512)
 57 | 
 58 |     References
 59 |     ----------
 60 |     .. [1] Zimmerer, Petersen, et al. "Medical Out-of-Distribution Analysis Challenge 2022."
 61 |            doi: 10.5281/zenodo.6362313 (2022).
 62 |     """
 63 | 
 64 |     @property
 65 |     def ids(self):
 66 |         result = set()
 67 |         # zip archives for train images:
 68 |         for archive in self.root.glob('*.zip'):
 69 |             if 'brain' in str(archive):  # define whether it is brain (MRI) or abdominal (CT)
 70 |                 task = 'brain'
 71 |             else:
 72 |                 task = 'abdom'
 73 | 
 74 |             if 'toy' in str(archive):  # fold - train or toy test
 75 |                 fold = 'toy'
 76 |             else:
 77 |                 fold = 'train'
 78 | 
 79 |             with ZipFile(archive) as zf:
 80 |                 for zipinfo in zf.infolist():
 81 |                     if zipinfo.is_dir():
 82 |                         continue
 83 | 
 84 |                     file_stem = Path(zipinfo.filename).stem
 85 |                     if '.nii' in file_stem:
 86 |                         if fold == 'train':
 87 |                             result.add(f'mood_{task}_{fold}_{file_stem.split(".nii")[0]}')
 88 |                         # fold == 'toy'
 89 |                         else:
 90 |                             result.add(f'mood_{task}_{file_stem.split(".nii")[0]}')
 91 | 
 92 |         return tuple(sorted(result))
 93 | 
 94 |     @field
 95 |     def fold(self, i):
 96 |         """Returns fold: train or toy (test)."""
 97 |         if 'train' in i:
 98 |             return 'train'
 99 |         # if 'toy' in i:
100 |         return 'toy'
101 | 
102 |     @field
103 |     def task(self, i):
104 |         """Returns task: brain (MRI) or abdominal (CT)."""
105 |         if 'brain' in i:
106 |             return 'brain'
107 |         # if 'abdom' in i:
108 |         return 'abdom'
109 | 
110 |     def _file(self, i):
111 |         task, fold, num_id = i.split('_')[-3:]
112 |         if fold == 'train':
113 |             return zipfile.Path(self.root / f'{task}_{fold}.zip', f'{task}_{fold}/{num_id}.nii.gz')
114 |         return zipfile.Path(self.root / f'{task}_{fold}.zip', f'toy/toy_{num_id}.nii.gz')
115 | 
116 |     @field
117 |     def image(self, i):
118 |         with open_nii_gz_file(self._file(i)) as nii_image:
119 |             return np.asarray(nii_image.dataobj)
120 | 
121 |     @field
122 |     def affine(self, i):
123 |         """The 4x4 matrix that gives the image's spatial orientation."""
124 |         with open_nii_gz_file(self._file(i)) as nii_image:
125 |             return nii_image.affine
126 | 
127 |     def spacing(self, i):
128 |         """Returns voxel spacing along axes (x, y, z)."""
129 |         with open_nii_gz_file(self._file(i)) as nii_image:
130 |             return tuple(nii_image.header['pixdim'][1:4])
131 | 
132 |     @field
133 |     def sample_label(self, i):
134 |         """
135 |         Returns sample-level OOD score for toy examples and None otherwise.
136 |         0 indicates no abnormality and 1 indicates abnormal input.
137 |         """
138 |         file = self._file(i)
139 |         if 'toy' in file.name:
140 |             with (file.parent.parent / 'toy_label/sample' / f'{file.name}.txt').open('r') as nii:
141 |                 return int(nii.read())
142 | 
143 |     @field
144 |     def pixel_label(self, i):
145 |         """
146 |         Returns voxel-level OOD scores for toy examples and None otherwise.
147 |         0 indicates no abnormality and 1 indicates abnormal input.
148 |         """
149 |         file = self._file(i)
150 |         if 'toy' in file.name:
151 |             with open_nii_gz_file(file.parent.parent / 'toy_label/pixel' / file.name) as nii_image:
152 |                 return np.bool_(nii_image.get_fdata())
153 | 
154 | 
155 | # TODO: sync with amid.utils
156 | @contextlib.contextmanager
157 | def open_nii_gz_file(file):
158 |     with file.open('rb') as opened:
159 |         with gzip.GzipFile(fileobj=opened) as nii:
160 |             nii = nb.FileHolder(fileobj=nii)
161 |             yield nb.Nifti1Image.from_file_map({'header': nii, 'image': nii})
162 | 


--------------------------------------------------------------------------------
/amid/cancer_500/dataset.py:
--------------------------------------------------------------------------------
  1 | import codecs
  2 | import json
  3 | import warnings
  4 | from functools import cached_property
  5 | from pathlib import Path
  6 | 
  7 | import numpy as np
  8 | import pydicom
  9 | from dicom_csv import (
 10 |     get_common_tag,
 11 |     get_orientation_matrix,
 12 |     get_pixel_spacing,
 13 |     get_slice_locations,
 14 |     get_tag,
 15 |     order_series,
 16 |     stack_images,
 17 | )
 18 | from dicom_csv.exceptions import TagMissingError
 19 | from tqdm.auto import tqdm
 20 | 
 21 | from ..internals import Dataset, field, register
 22 | from ..utils import get_series_date
 23 | from .nodules import get_nodules
 24 | 
 25 | 
 26 | @register(
 27 |     body_region='Thorax',
 28 |     modality='CT',
 29 |     task='Lung Cancer Detection',
 30 |     link='https://mosmed.ai/en/datasets/mosmeddata-kt-s-priznakami-raka-legkogo-tip-viii/',
 31 |     prep_data_size='103G',
 32 |     raw_data_size='187G',
 33 | )
 34 | class MoscowCancer500(Dataset):
 35 |     """
 36 |     The Moscow Radiology Cancer-500 dataset.
 37 | 
 38 |     Parameters
 39 |     ----------
 40 |     root : str, Path, optional
 41 |         path to the folder containing the raw downloaded files.
 42 |         If not provided, the cache is assumed to be already populated.
 43 | 
 44 | 
 45 |     Notes
 46 |     -----
 47 |     Download links:
 48 |     https://mosmed.ai/en/datasets/mosmeddata-kt-s-priznakami-raka-legkogo-tip-viii/
 49 |     After pressing the `download` button you will have to provide an email address to which further instructions
 50 |     will be sent.
 51 | 
 52 |     Examples
 53 |     --------
 54 |     >>> # Place the downloaded files in any folder and pass the path to the constructor:
 55 |     >>> ds = MoscowCancer500(root='/path/to/files/root')
 56 |     >>> print(len(ds.ids))
 57 |     # 979
 58 |     >>> print(ds.image(ds.ids[0]).shape)
 59 |     # (512, 512, 67)
 60 |     """
 61 | 
 62 |     @cached_property
 63 |     def _mapping(self):
 64 |         path = self.root / 'series-to-files.json'
 65 |         if not path.exists():
 66 |             mapping = {}
 67 |             for file in tqdm(
 68 |                 self.root.rglob('*'), total=sum(1 for _ in self.root.rglob('*')), desc='Analyzing folder structure'
 69 |             ):
 70 |                 if file.is_dir():
 71 |                     continue
 72 | 
 73 |                 series = pydicom.dcmread(file, specific_tags=[(0x0020, 0x000E)]).SeriesInstanceUID
 74 |                 mapping[series].append(str(file.relative_to(self.root)))
 75 | 
 76 |             with open(path, 'w') as file:
 77 |                 json.dump(mapping, file)
 78 |             return mapping
 79 | 
 80 |         with open(path) as file:
 81 |             return json.load(file)
 82 | 
 83 |     @property
 84 |     def ids(self):
 85 |         # this id has an undefined image orientation
 86 |         ignore = {'1.2.643.5.1.13.13.12.2.77.8252.604378326291403.583548115656123.'}
 87 |         return tuple(sorted(set(self._mapping) - ignore))
 88 | 
 89 |     def _series(self, i):
 90 |         series = [pydicom.dcmread(Path(self.root, 'dicom', f)) for f in self._mapping[i]]
 91 |         series = order_series(series, decreasing=False)
 92 |         return series
 93 | 
 94 |     @field
 95 |     def image(self, i):
 96 |         x = stack_images(self._series(i), -1).astype(np.int16)
 97 |         # DICOM specifies that the first 2 axes are (y, x). let's fix that
 98 |         return np.moveaxis(x, 0, 1)
 99 | 
100 |     @field
101 |     def study_uid(self, i):
102 |         return get_common_tag(self._series(i), 'StudyInstanceUID')
103 | 
104 |     @field
105 |     def series_uid(self, i):
106 |         return get_common_tag(self._series(i), 'SeriesInstanceUID')
107 | 
108 |     @field
109 |     def sop_uids(self, i):
110 |         return [str(get_tag(i, 'SOPInstanceUID')) for i in self._series(i)]
111 | 
112 |     @field
113 |     def pixel_spacing(self, i):
114 |         return get_pixel_spacing(self._series(i)).tolist()
115 | 
116 |     @field
117 |     def slice_locations(self, i):
118 |         return get_slice_locations(self._series(i))
119 | 
120 |     @field
121 |     def orientation_matrix(self, i):
122 |         return get_orientation_matrix(self._series(i))
123 | 
124 |     @field
125 |     def instance_numbers(self, i):
126 |         try:
127 |             instance_numbers = [int(get_tag(i, 'InstanceNumber')) for i in self._series(i)]
128 |             if not _is_monotonic(instance_numbers):
129 |                 warnings.warn('Ordered series has non-monotonic instance numbers.')
130 | 
131 |             return instance_numbers
132 |         except TagMissingError:
133 |             pass
134 | 
135 |     @field
136 |     def conv_kernel(self, i):
137 |         return get_common_tag(self._series(i), 'ConvolutionKernel', default=None)
138 | 
139 |     @field
140 |     def kvp(self, i):
141 |         return get_common_tag(self._series(i), 'KVP', default=None)
142 | 
143 |     @field
144 |     def patient_id(self, i):
145 |         return get_common_tag(self._series(i), 'PatientID', default=None)
146 | 
147 |     @field
148 |     def study_date(self, i):
149 |         return get_series_date(self._series(i))
150 | 
151 |     @field
152 |     def accession_number(self, i):
153 |         return get_common_tag(self._series(i), 'AccessionNumber', default=None)
154 | 
155 |     @field
156 |     def nodules(self, i):
157 |         folders = {Path(f).parent.name for f in self._mapping[i]}
158 |         if len(folders) != 1:
159 |             # can't determine protocol filename
160 |             return
161 | 
162 |         (filename,) = folders
163 |         protocol = json.load(codecs.open(str(self.root / 'protocols' / f'{filename}.json'), 'r', 'utf-8-sig'))
164 | 
165 |         series_number = get_common_tag(self._series(i), 'SeriesNumber')
166 |         try:
167 |             return get_nodules(protocol, series_number, self.slice_locations(i))
168 |         except ValueError:
169 |             pass
170 | 
171 | 
172 | def _is_monotonic(sequence):
173 |     sequence = list(sequence)
174 |     return sequence == sorted(sequence) or sequence == sorted(sequence)[::-1]
175 | 


--------------------------------------------------------------------------------
/amid/cancer_500/nodules.py:
--------------------------------------------------------------------------------
  1 | import warnings
  2 | 
  3 | import numpy as np
  4 | 
  5 | from .typing import Cancer500Nodule, Comment, Review, Texture
  6 | 
  7 | 
  8 | def get_nodules(protocol, series_number, slice_locations):
  9 |     if protocol['nodules'] is None:
 10 |         num_doctors = len(protocol['doctors'])
 11 |         assert num_doctors in [3, 6]
 12 | 
 13 |         if len([d for d in protocol['doctors'] if definetely_no_nodules(d['comment'])]) > num_doctors / 2:
 14 |             return []
 15 |         else:
 16 |             raise ValueError
 17 | 
 18 |     assert protocol['nodules']
 19 | 
 20 |     nodules = []
 21 |     for nodule in protocol['nodules']:
 22 |         annotations = dict(get_nodule_annotations(nodule[-1], series_number, slice_locations))
 23 |         if not annotations:
 24 |             raise ValueError
 25 | 
 26 |         nodules.append(annotations)
 27 | 
 28 |     return nodules
 29 | 
 30 | 
 31 | def definetely_no_nodules(overall_comment):
 32 |     overall_comment = overall_comment.lower()
 33 |     prefixes = ['нет очагов', 'очагов нет', 'очаги не выявлены', 'достоверно очагов нет']
 34 |     return any(overall_comment.startswith(p) for p in prefixes)
 35 | 
 36 | 
 37 | def get_nodule_annotations(nodule: dict, series_number: int, slice_locations: list):
 38 |     for rater, ann in nodule.items():
 39 |         if ann is None:
 40 |             continue
 41 | 
 42 |         if 'series_no' in ann and str(series_number) not in ann['series_no']:
 43 |             warnings.warn('Cannot check that annotation belongs to this particular series.')
 44 |             continue
 45 | 
 46 |         try:
 47 |             yield rater, parse_nodule_annotation(ann, slice_locations)
 48 |         except ValueError as e:
 49 |             warnings.warn(str(e))
 50 |             continue
 51 | 
 52 | 
 53 | def parse_nodule_annotation(ann: dict, slice_locations: list):
 54 |     return Cancer500Nodule(
 55 |         center_voxel=parse_center_voxel(ann, slice_locations),
 56 |         review=parse_review(ann),
 57 |         comment=parse_comment(ann),
 58 |         diameter_mm=parse_diameter_mm(ann),
 59 |         texture=parse_texture(ann),
 60 |         malignancy=parse_malignancy(ann),
 61 |     )
 62 | 
 63 | 
 64 | def parse_center_voxel(ann: dict, slice_locations: list):
 65 |     i, j = int(ann['x']), int(ann['y'])
 66 |     assert i == ann['x']
 67 |     assert j == ann['y']
 68 | 
 69 |     assert 'z type' in ann
 70 |     assert ann['z type'].strip() == 'mm'
 71 |     diff = np.abs(np.array(slice_locations) - ann['z'])
 72 |     if np.min(diff) >= 1:
 73 |         raise ValueError('Cannot determine slice.')
 74 |     slc = np.argmin(diff)
 75 | 
 76 |     comments = [review['comment'] for review in ann['expert decision']]
 77 |     if 'z = 258 = -151,6 ' in comments:
 78 |         slc = 258
 79 |     elif 'не 134 а 143 по оси Х' in comments:
 80 |         i = 143
 81 |     elif (
 82 |         'неправильная координата х (должно быть 73, а не 734). сосуд, несовпадение типа (другое), неверный размер'
 83 |         in comments
 84 |     ):
 85 |         i = 73
 86 |     elif 'ошибка в координате Y - должно быть 296, тогда очаг есть' in comments:
 87 |         j = 296
 88 |     elif 'срез съехал на два ниже' in comments:
 89 |         slc -= 2
 90 |     elif set(comments) & {
 91 |         'очага нет',
 92 |         'промахно',
 93 |         'промахнулись с координатой х',
 94 |         'часть координат не совпадает с топикой очага',
 95 |         'часть координат не совпадает с топикой очага, неверный размер',
 96 |     }:
 97 |         raise ValueError('Cannot detetmine center voxel')
 98 | 
 99 |     return i, j, slc
100 | 
101 | 
102 | def parse_review(ann: dict):
103 |     decisions = {review['decision'] for review in ann['expert decision']}
104 |     if 'confirmed' in decisions:
105 |         return Review.Confirmed
106 |     elif 'confirmed_partially' in decisions:
107 |         return Review.ConfirmedPartially
108 |     elif 'doubt' in decisions:
109 |         return Review.Doubt
110 |     elif 'rejected' in decisions:
111 |         return Review.Rejected
112 |     else:
113 |         raise ValueError(decisions)
114 | 
115 | 
116 | def parse_comment(ann: dict):
117 |     comments = {review['comment'] for review in ann['expert decision']}
118 |     if 'кальцинат, несовпадение типа (другое)' in comments:
119 |         return Comment.Calcium
120 |     elif 'фиброз' in comments:
121 |         return Comment.Fibrosis
122 |     elif 'внутрилегочный л\\у' in comments:
123 |         return Comment.LymphNode
124 |     elif 'очаг с кальцинацией, несовпадение типа (другое)' in comments:
125 |         return Comment.Calcified
126 |     elif 'бронхоэктаз с содержимым, несовпадение типа (другое)' in comments:
127 |         return Comment.Bronchiectasis
128 |     elif 'сосуд' in comments:
129 |         return Comment.Vessel
130 | 
131 | 
132 | def parse_diameter_mm(ann: dict):
133 |     if any('неверный размер' in review['comment'].lower() for review in ann['expert decision']):
134 |         return
135 | 
136 |     return round(ann['diameter (mm)'], 2)
137 | 
138 | 
139 | def parse_texture(ann: dict):
140 |     nodule_types = {review['type'] for review in ann['expert decision']} & {'#0S', '#1PS', '#2GG', 'другое'}
141 |     if nodule_types:
142 |         assert len(nodule_types) == 1
143 |         (nodule_type,) = nodule_types
144 |     elif parse_review(ann) in [Review.Confirmed, Review.ConfirmedPartially, Review.Doubt]:
145 |         assert ann['type'] in ['#0S', '#1PS', '#2GG']
146 |         nodule_type = ann['type']
147 |     else:
148 |         return
149 | 
150 |     if nodule_type == '#0S':
151 |         return Texture.Solid
152 |     elif nodule_type == '#1PS':
153 |         return Texture.PartSolid
154 |     elif nodule_type == '#2GG':
155 |         return Texture.GroundGlass
156 |     elif nodule_type == 'другое':
157 |         return Texture.Other
158 | 
159 | 
160 | def parse_malignancy(ann: dict):
161 |     malignant = [review['malignant'] for review in ann['expert decision']]
162 |     if all(malignant):
163 |         return True
164 |     elif not any(malignant):
165 |         return False
166 | 


--------------------------------------------------------------------------------
/amid/lits/dataset.py:
--------------------------------------------------------------------------------
  1 | import zipfile
  2 | from pathlib import Path
  3 | from zipfile import ZipFile
  4 | 
  5 | import nibabel as nb
  6 | import numpy as np
  7 | 
  8 | from ..internals import Dataset, licenses, register
  9 | 
 10 | 
 11 | @register(
 12 |     body_region='Abdominal',
 13 |     license=licenses.CC_BYNCND_40,
 14 |     link='https://competitions.codalab.org/competitions/17094',
 15 |     modality='CT',
 16 |     prep_data_size='24,7G',
 17 |     raw_data_size='35G',
 18 |     task='Segmentation',
 19 | )
 20 | class LiTS(Dataset):
 21 |     """
 22 |     A (Li)ver (T)umor (S)egmentation dataset [1]_ from Medical Segmentation Decathlon [2]_
 23 | 
 24 |     There are two segmentation tasks on this dataset: liver and liver tumor segmentation.
 25 | 
 26 |     Parameters
 27 |     ----------
 28 |     root : str, Path, optional
 29 |         path to the folder containing the raw downloaded archives.
 30 |         If not provided, the cache is assumed to be already populated.
 31 | 
 32 | 
 33 |     Notes
 34 |     -----
 35 |     Follow the download instructions at https://competitions.codalab.org/competitions/17094.
 36 | 
 37 |     Then, the folder with raw downloaded data should contain two zip archives with the train data
 38 |     (`Training_Batch1.zip` and `Training_Batch2.zip`)
 39 |     and a folder with the test data
 40 |     (`LITS-Challenge-Test-Data`).
 41 | 
 42 |     The folder with test data should have original structure:
 43 |         <...>/LITS-Challenge-Test-Data/test-volume-0.nii
 44 |         <...>/LITS-Challenge-Test-Data/test-volume-1.nii
 45 |         ...
 46 | 
 47 |     P.S. Organs boxes are also provided from a separate source https://github.com/superxuang/caffe_3d_faster_rcnn.
 48 | 
 49 |     Examples
 50 |     --------
 51 |     >>> # Place the downloaded archives in any folder and pass the path to the constructor:
 52 |     >>> ds = LiTS(root='/path/to/downloaded/data/folder/')
 53 |     >>> print(len(ds.ids))
 54 |     # 201
 55 |     >>> print(ds.image(ds.ids[0]).shape)
 56 |     # (512, 512, 163)
 57 |     >>> print(ds.tumor_mask(ds.ids[80]).shape)
 58 |     # (512, 512, 771)
 59 | 
 60 |     References
 61 |     ----------
 62 |     .. [1] Bilic, Patrick, et al. "The liver tumor segmentation benchmark (lits)."
 63 |            arXiv preprint arXiv:1901.04056 (2019).
 64 |     .. [2] Antonelli, Michela, et al. "The medical segmentation decathlon."
 65 |            arXiv preprint arXiv:2106.05735 (2021).
 66 |     """
 67 | 
 68 |     @property
 69 |     def ids(self):
 70 |         result = set()
 71 |         # zip archives for train images:
 72 |         for archive in self.root.glob('*.zip'):
 73 |             with ZipFile(archive) as zf:
 74 |                 for zipinfo in zf.infolist():
 75 |                     if zipinfo.is_dir():
 76 |                         continue
 77 | 
 78 |                     file_stem = Path(zipinfo.filename).stem
 79 |                     if 'volume' in file_stem:
 80 |                         result.add('lits-train-' + file_stem.split('-')[-1])
 81 | 
 82 |         # folder for test images:
 83 |         for file in (self.root / 'LITS-Challenge-Test-Data').glob('*'):
 84 |             result.add('lits-test-' + file.stem.split('-')[-1])
 85 | 
 86 |         return tuple(sorted(result))
 87 | 
 88 |     def fold(self, i):
 89 |         num_id = i.split('-')[-1]
 90 | 
 91 |         if 'train' in i:
 92 |             for archive in self.root.glob('*.zip'):
 93 |                 batch = '1' if ('1' in archive.stem) else '2'
 94 | 
 95 |                 with ZipFile(archive) as zf:
 96 |                     for zipinfo in zf.infolist():
 97 |                         if zipinfo.is_dir():
 98 |                             continue
 99 | 
100 |                         if num_id == Path(zipinfo.filename).stem.split('-')[-1]:
101 |                             return f'train_batch_{batch}'
102 | 
103 |         else:  # if 'test' in i:
104 |             return 'test'
105 | 
106 |     def _file(self, i):
107 |         num_id = i.split('-')[-1]
108 | 
109 |         if 'train' in i:
110 |             for archive in self.root.glob('*.zip'):
111 |                 with ZipFile(archive) as zf:
112 |                     for zipinfo in zf.infolist():
113 |                         if zipinfo.is_dir():
114 |                             continue
115 | 
116 |                         file = Path(zipinfo.filename)
117 |                         if ('volume' in file.stem) and (num_id == file.stem.split('-')[-1]):
118 |                             return zipfile.Path(str(archive), str(file))
119 | 
120 |         else:  # if 'test' in i:
121 |             return self.root / 'LITS-Challenge-Test-Data' / f'test-volume-{num_id}.nii'
122 | 
123 |         raise KeyError(f'Id "{i}" not found')
124 | 
125 |     def image(self, i):
126 |         with self._file(i).open('rb') as nii:
127 |             nii = nb.FileHolder(fileobj=nii)
128 |             image = nb.Nifti1Image.from_file_map({'header': nii, 'image': nii})
129 |             # most ct scans are integer-valued, this will help us improve compression rates
130 |             return np.int16(image.get_fdata())
131 | 
132 |     def affine(self, i):
133 |         """The 4x4 matrix that gives the image's spatial orientation."""
134 |         with self._file(i).open('rb') as nii:
135 |             nii = nb.FileHolder(fileobj=nii)
136 |             image = nb.Nifti1Image.from_file_map({'header': nii, 'image': nii})
137 |             return image.affine
138 | 
139 |     def spacing(self, i):
140 |         """Returns voxel spacing along axes (x, y, z)."""
141 |         with self._file(i).open('rb') as nii:
142 |             nii = nb.FileHolder(fileobj=nii)
143 |             image = nb.Nifti1Image.from_file_map({'header': nii, 'image': nii})
144 |             return tuple(image.header['pixdim'][1:4])
145 | 
146 |     def mask(self, i):
147 |         file = self._file(i)
148 |         if 'test' not in file.name:
149 |             with (file.parent / file.name.replace('volume', 'segmentation')).open('rb') as nii:
150 |                 nii = nb.FileHolder(fileobj=nii)
151 |                 image = nb.Nifti1Image.from_file_map({'header': nii, 'image': nii})
152 |                 return np.uint8(image.get_fdata())
153 | 


--------------------------------------------------------------------------------
/amid/upenn_gbm/upenn_gbm.py:
--------------------------------------------------------------------------------
  1 | from functools import cached_property
  2 | 
  3 | import nibabel as nb
  4 | import numpy as np
  5 | import pandas as pd
  6 | 
  7 | from ..internals import Dataset, licenses, register
  8 | from .data_classes import AcquisitionInfo, ClinicalInfo
  9 | 
 10 | 
 11 | @register(
 12 |     body_region='Head',
 13 |     license=licenses.CC_BY_40,
 14 |     link='https://wiki.cancerimagingarchive.net/pages/viewpage.action?pageId=70225642',
 15 |     modality=('FLAIR', 'MRI T1', 'MRI T1GD', 'MRI T2', 'DSC MRI', 'DTI MRI'),
 16 |     prep_data_size='70G',
 17 |     raw_data_size='69G',
 18 |     task='Segmentation',
 19 | )
 20 | class UPENN_GBM(Dataset):
 21 |     """
 22 |     Multi-parametric magnetic resonance imaging (mpMRI) scans for de novo Glioblastoma
 23 |       (GBM) patients from the University of Pennsylvania Health System (UPENN-GBM).
 24 |     Dataset contains 630 patients.
 25 | 
 26 |     All samples are registered to a common atlas (SRI)
 27 |         using a uniform preprocessing and the segmentation are aligned with them.
 28 | 
 29 | 
 30 |     Parameters
 31 |     ----------
 32 |     root : str, Path, optional
 33 |         path to the folder containing the raw downloaded archives.
 34 |         If not provided, the cache is assumed to be already populated.
 35 | 
 36 |     Notes
 37 |     -----
 38 |     Follow the download instructions at https://wiki.cancerimagingarchive.net/pages/viewpage.action?pageId=70225642
 39 |     Download to the root folder nifti images and metadata. Organise folder as folows:
 40 | 
 41 | 
 42 |     <...>/<UPENN-root>/NIfTI-files/images_segm/UPENN-GBM-00054_11_segm.nii.gz
 43 |     <...>/<UPENN-root>/NIfTI-files/...
 44 | 
 45 |     <...>/<UPENN-root>/UPENN-GBM_clinical_info_v1.0.csv
 46 |     <...>/<UPENN-root>/UPENN-GBM_acquisition.csv
 47 | 
 48 | 
 49 |     Examples
 50 |     --------
 51 |     >>> # Place the downloaded archives in any folder and pass the path to the constructor:
 52 |     >>> ds = UPENN_GBM(root='/path/to/downloaded/data/folder/')
 53 |     >>> print(len(ds.ids))
 54 |     # 671
 55 |     >>> print(ds.image(ds.ids[215]).shape)
 56 |     # (4, 240, 240, 155)
 57 |     >>> print(d.acqusition_info(d.ids[215]).manufacturer)
 58 |     # SIEMENS
 59 | 
 60 |     References
 61 |     ----------
 62 |     .. [1] Bakas, S., Sako, C., Akbari, H., Bilello, M., Sotiras, A., Shukla, G., Rudie,
 63 |       J. D., Flores Santamaria, N., Fathi Kazerooni, A., Pati, S., Rathore, S.,
 64 |     Mamourian, E., Ha, S. M., Parker, W., Doshi, J., Baid, U., Bergman, M., Binder, Z. A., Verma, R., … Davatzikos,
 65 |     C. (2021). Multi-parametric magnetic resonance imaging (mpMRI) scans for de novo
 66 |     Glioblastoma (GBM) patients from the University of Pennsylvania Health System (UPENN-GBM)
 67 |     (Version 2) [Data set]. The Cancer Imaging Archive.
 68 |     https://doi.org/10.7937/TCIA.709X-DN49
 69 | 
 70 |     """
 71 | 
 72 |     @property
 73 |     def ids(self):
 74 |         ids = [x.name for x in (self.root / 'NIfTI-files/images_structural').iterdir()]
 75 |         return tuple(sorted(ids))
 76 | 
 77 |     @property
 78 |     def modalities(self):
 79 |         return ['T1', 'T1GD', 'T2', 'FLAIR']
 80 | 
 81 |     @property
 82 |     def dsc_modalities(self):
 83 |         return ['', 'ap-rCBV', 'PH', 'PSR']
 84 | 
 85 |     @property
 86 |     def dti_modalities(self):
 87 |         return ['AD', 'FA', 'RD', 'TR']
 88 | 
 89 |     def _mask_path(self, i):
 90 |         p1 = self.root / 'NIfTI-files/images_segm'
 91 |         p2 = self.root / 'NIfTI-files/automated_segm'
 92 |         p1 = list(p1.glob(i + '*'))
 93 |         p2 = list(p2.glob(i + '*'))
 94 |         return p1[0] if p1 else p2[0] if p2 else None
 95 | 
 96 |     def mask(self, i):
 97 |         path = self._mask_path(i)
 98 |         if not path:
 99 |             return None
100 |         return np.asarray(nb.load(path).get_fdata())
101 | 
102 |     def is_mask_automated(self, i):
103 |         path = self._mask_path(i)
104 |         if path is None:
105 |             return None
106 |         return path.parent.name == 'automated_segm'
107 | 
108 |     def image(self, i):
109 |         path = self.root / f'NIfTI-files/images_structural/{i}'
110 |         image_pathes = [path / f'{i}_{mod}.nii.gz' for mod in self.modalities]
111 |         images = [np.asarray(nb.load(p).dataobj) for p in image_pathes]
112 |         return np.stack(images)
113 | 
114 |     def image_unstripped(self, i):
115 |         path = self.root / f'NIfTI-files/images_structural_unstripped/{i}'
116 |         image_pathes = [path / f'{i}_{mod}_unstripped.nii.gz' for mod in self.modalities]
117 |         images = [np.asarray(nb.load(p).dataobj) for p in image_pathes]
118 |         return np.stack(images)
119 | 
120 |     def image_DTI(self, i):
121 |         path = self.root / f'NIfTI-files/images_DTI/{i}'
122 |         if not path.exists():
123 |             return None
124 |         image_pathes = [path / f'{i}_DTI_{mod}.nii.gz' for mod in self.dti_modalities]
125 |         images = [np.asarray(nb.load(p).dataobj) for p in image_pathes]
126 |         return np.stack(images)
127 | 
128 |     def image_DSC(self, i):
129 |         path = self.root / f'NIfTI-files/images_DSC/{i}'
130 |         if not path.exists():
131 |             return None
132 |         image_pathes = [path / (f'{i}_DSC_{mod}.nii.gz' if mod else f'{i}_DSC.nii.gz') for mod in self.dsc_modalities]
133 |         images = [np.asarray(nb.load(p).dataobj) for p in image_pathes]
134 |         return images
135 | 
136 |     @cached_property
137 |     def _clinical_info(self):
138 |         return pd.read_csv(self.root / 'UPENN-GBM_clinical_info_v1.0.csv')
139 | 
140 |     @cached_property
141 |     def _acqusition_info(self):
142 |         return pd.read_csv(self.root / 'UPENN-GBM_acquisition.csv')
143 | 
144 |     def clinical_info(self, i):
145 |         row = self._clinical_info[self._clinical_info.ID == i]
146 |         return ClinicalInfo(*row.iloc[0, 1:])
147 | 
148 |     def acqusition_info(self, i):
149 |         row = self._acqusition_info[self._acqusition_info.ID == i]
150 |         return AcquisitionInfo(*row.iloc[0, 1:])
151 | 
152 |     def subject_id(self, i):
153 |         return i.split('_')[0]
154 | 
155 |     def affine(self, i):
156 |         return np.array([[-1.0, 0.0, 0.0, -0.0], [0.0, -1.0, 0.0, 239.0], [0.0, 0.0, 1.0, 0.0], [0.0, 0.0, 0.0, 1.0]])
157 | 
158 |     def spacing(self, i):
159 |         return (1, 1, 1)
160 | 


--------------------------------------------------------------------------------
/amid/deeplesion.py:
--------------------------------------------------------------------------------
  1 | from functools import cached_property
  2 | 
  3 | import deli
  4 | import nibabel
  5 | import numpy as np
  6 | 
  7 | from .internals import Dataset, register
  8 | 
  9 | 
 10 | @register(
 11 |     body_region=('Abdomen', 'Thorax'),
 12 |     link='https://nihcc.app.box.com/v/DeepLesion',
 13 |     modality='CT',
 14 |     prep_data_size='259G',
 15 |     raw_data_size='259G',
 16 |     task=('Localisation', 'Detection', 'Classification'),
 17 | )
 18 | class DeepLesion(Dataset):
 19 |     """
 20 |     DeepLesion is composed of 33,688 bookmarked radiology images from
 21 |     10,825 studies of 4,477 unique patients. For every bookmarked image, a bound-
 22 |     ing box is created to cover the target lesion based on its measured diameters [1].
 23 | 
 24 |     Parameters
 25 |     ----------
 26 |     root : str, Path, optional
 27 |         path to the folder containing `DL_info.csv` file and a subfolder `Images_nifti` with 20094 nii.gz files.
 28 | 
 29 |     Notes
 30 |     -----
 31 |     Dataset is available at https://nihcc.app.box.com/v/DeepLesion
 32 | 
 33 |     To download the data we recommend using a Python script provided by the authors `batch_download_zips.py`.
 34 |     Once you download the data and unarchive all 56 zip archives, you should run `DL_save_nifti.py`
 35 |     provided by the authors to convert 2D PNGs into 20094 nii.gz files.
 36 | 
 37 |     Example
 38 |     --------
 39 |     >>> ds = DeepLesion(root='/path/to/folder')
 40 |     >>> print(len(ds.ids))
 41 |     # 20094
 42 | 
 43 |     References
 44 |     ----------
 45 |     .. [1] Yan, Ke, Xiaosong Wang, Le Lu, and Ronald M. Summers. "Deeplesion: Automated deep mining,
 46 |       categorization and detection of significant radiology image findings using large-scale clinical
 47 |       lesion annotations." arXiv preprint arXiv:1710.01766 (2017).
 48 | 
 49 |     """
 50 | 
 51 |     @property
 52 |     def ids(self):
 53 |         return tuple(sorted(file.name.replace('.nii.gz', '') for file in (self.root / 'Images_nifti').glob('*.nii.gz')))
 54 | 
 55 |     def _image_file(self, i):
 56 |         return nibabel.load(self.root / 'Images_nifti' / f'{i}.nii.gz')
 57 | 
 58 |     @cached_property
 59 |     def _metadata(self):
 60 |         df = deli.load(self.root / 'DL_info.csv')
 61 | 
 62 |         cols_to_transform = [
 63 |             'Measurement_coordinates',
 64 |             'Bounding_boxes',
 65 |             'Lesion_diameters_Pixel_',
 66 |             'Normalized_lesion_location',
 67 |         ]
 68 |         for col in cols_to_transform:
 69 |             df[col] = df[col].apply(lambda x: list(map(float, x.split(','))))
 70 | 
 71 |         df['Slice_range_list'] = df['Slice_range'].apply(lambda x: list(map(int, x.split(','))))
 72 | 
 73 |         def get_ids(x):
 74 |             patient_study_series = '_'.join(x.File_name.split('_')[:3])
 75 |             slice_range_list = list(map(str, x.Slice_range_list))
 76 |             slice_range_list = [num.zfill(3) for num in slice_range_list]
 77 |             slice_range_list = '-'.join(slice_range_list)
 78 |             return f'{patient_study_series}_{slice_range_list}'
 79 | 
 80 |         df['ids'] = df.apply(get_ids, axis=1)
 81 |         return df
 82 | 
 83 |     def _row(self, i):
 84 |         # funny story, f-string does not work for pandas.query,
 85 |         # @ syntax does not work for linter, use # noqa
 86 |         return self._metadata.query('ids==@i')
 87 | 
 88 |     def patient_id(self, i):
 89 |         patient, study, series = map(int, i.split('_')[:3])
 90 |         return patient
 91 | 
 92 |     def study_id(self, i):
 93 |         patient, study, series = map(int, i.split('_')[:3])
 94 |         return study
 95 | 
 96 |     def series_id(self, i):
 97 |         patient, study, series = map(int, i.split('_')[:3])
 98 |         return series
 99 | 
100 |     def sex(self, i):
101 |         return self._row(i).Patient_gender.iloc[0]
102 | 
103 |     def age(self, i):
104 |         """Patient Age might be different for different studies (dataset contains longitudinal records)."""
105 |         return self._row(i).Patient_age.iloc[0]
106 | 
107 |     def ct_window(self, i):
108 |         """CT window extracted from DICOMs. Recall, that it is min-max values for windowing, not width-level."""
109 |         return self._row(i).DICOM_windows.iloc[0]
110 | 
111 |     def affine(self, i):
112 |         return self._image_file(i).affine
113 | 
114 |     def spacing(self, i):
115 |         return tuple(self._image_file(i).header['pixdim'][1:4])
116 | 
117 |     def image(self, i):
118 |         """Some 3D volumes are stored as separate subvolumes, e.g. ds.ids[15000] and ds.ids[15001]."""
119 |         return np.asarray(self._image_file(i).dataobj)
120 | 
121 |     def train_val_test(self, i):
122 |         """Authors' defined randomly generated patient-level data split, train=1, validation=2, test=3,
123 |         70/15/15 ratio."""
124 |         return int(self._row(i).Train_Val_Test.iloc[0])
125 | 
126 |     def lesion_position(self, i):
127 |         """Lesion measurements as it appear in DL_info.csv, for details see
128 |         https://nihcc.app.box.com/v/DeepLesion/file/306056134060 ."""
129 |         position = self._row(i)[
130 |             [
131 |                 'Slice_range_list',
132 |                 'Key_slice_index',
133 |                 'Measurement_coordinates',
134 |                 'Bounding_boxes',
135 |                 'Lesion_diameters_Pixel_',
136 |                 'Normalized_lesion_location',
137 |             ]
138 |         ].to_dict('list')
139 |         position['Slice_range_list'] = position['Slice_range_list'][0]
140 |         return position
141 | 
142 |     def mask(self, i):
143 |         """Mask of provided bounding boxes. Recall that bboxes annotation
144 |         is very coarse, it only covers a single 2D slice."""
145 |         mask = np.zeros_like(self.image(i))
146 |         lesion_position = self.lesion_position(i)
147 |         min_index = lesion_position['Slice_range_list'][0]
148 |         for i, slice_index in enumerate(lesion_position['Key_slice_index']):
149 |             image_index = slice_index - min_index
150 |             top_left_x, top_left_y, bot_right_x, bot_right_y = lesion_position['Bounding_boxes'][i]
151 |             mask[
152 |                 int(np.floor(top_left_y)) : int(np.ceil(bot_right_y)),
153 |                 int(np.floor(top_left_x)) : int(np.ceil(bot_right_x)),
154 |                 image_index,
155 |             ] = 1
156 |         return mask
157 | 


--------------------------------------------------------------------------------
/amid/cc359/dataset.py:
--------------------------------------------------------------------------------
  1 | import contextlib
  2 | import gzip
  3 | import zipfile
  4 | from pathlib import Path
  5 | from zipfile import ZipFile
  6 | 
  7 | import nibabel as nb
  8 | import numpy as np
  9 | 
 10 | from ..internals import Dataset, licenses, register
 11 | 
 12 | 
 13 | @register(
 14 |     body_region='Head',
 15 |     license=licenses.CC_BYND_40,
 16 |     link='https://sites.google.com/view/calgary-campinas-dataset/home',
 17 |     modality='MRI T1',
 18 |     prep_data_size='14,66G',
 19 |     raw_data_size='4,1G',
 20 |     task='Segmentation',
 21 | )
 22 | class CC359(Dataset):
 23 |     """
 24 |     A (C)algary-(C)ampinas public brain MR dataset with (359) volumetric images [1]_.
 25 | 
 26 |     There are three segmentation tasks on this dataset: (i) brain, (ii) hippocampus, and
 27 |     (iii) White-Matter (WM), Gray-Matter (WM), and Cerebrospinal Fluid (CSF) segmentation.
 28 | 
 29 |     Parameters
 30 |     ----------
 31 |     root : str, Path, optional
 32 |         path to the folder containing the raw downloaded archives.
 33 |         If not provided, the cache is assumed to be already populated.
 34 | 
 35 | 
 36 |     Notes
 37 |     -----
 38 |     homepage (upd): https://sites.google.com/view/calgary-campinas-dataset/home
 39 |     homepage (old): https://miclab.fee.unicamp.br/calgary-campinas-359-updated-05092017
 40 | 
 41 |     To obtain MR images and brain and hippocampus segmentation masks, please, follow the instructions
 42 |     at the download platform: https://portal.conp.ca/dataset?id=projects/calgary-campinas.
 43 | 
 44 |     Via `datalad` lib you need to download three zip archives:
 45 |         - `Original.zip` (the original MR images)
 46 |         - `hippocampus_staple.zip` (Silver-standard hippocampus masks generated using STAPLE)
 47 |         - `Silver-standard-machine-learning.zip` (Silver-standard brain masks generated using a machine learning method)
 48 | 
 49 |     To the current date, WM, GM, and CSF mask could be downloaded only from the google drive:
 50 |     https://drive.google.com/drive/u/0/folders/0BxLb0NB2MjVZNm9JY1pWNFp6WTA?resourcekey=0-2sXMr8q-n2Nn6iY3PbBAdA.
 51 | 
 52 |     Here you need to manually download a folder (from the google drive root above)
 53 |     `CC359/Reconstructed/CC359/WM-GM-CSF/`
 54 | 
 55 |     So the root folder to pass to this dataset class should contain four objects:
 56 |         - three zip archives (`Original.zip`, `hippocampus_staple.zip`, and `Silver-standard-machine-learning.zip`)
 57 |         - one folder `WM-GM-CSF` with the original structure:
 58 |             <...>/WM-GM-CSF/CC0319_ge_3_45_M.nii.gz
 59 |             <...>/WM-GM-CSF/CC0324_ge_3_56_M.nii.gz
 60 |             ...
 61 | 
 62 |     Examples
 63 |     --------
 64 |     >>> # Place the downloaded archives in any folder and pass the path to the constructor:
 65 |     >>> cc359 = CC359(root='/path/to/downloaded/data/folder/')
 66 |     >>> print(len(cc359.ids))
 67 |     # 359
 68 |     >>> print(cc359.image(cc359.ids[0]).shape)
 69 |     # (171, 256, 256)
 70 |     >>> print(cc359.wm_gm_csf(cc359.ids[80]).shape)
 71 |     # (180, 240, 240)
 72 | 
 73 |     References
 74 |     ----------
 75 |     .. [1] Souza, Roberto, et al. "An open, multi-vendor, multi-field-strength brain MR dataset
 76 |            and analysis of publicly available skull stripping methods agreement."
 77 |            NeuroImage 170 (2018): 482-494.
 78 |            https://www.sciencedirect.com/science/article/pii/S1053811917306687
 79 | 
 80 |     """
 81 | 
 82 |     @property
 83 |     def ids(self):
 84 |         result = set()
 85 |         with ZipFile(self.root / 'Original.zip') as zf:
 86 |             for zipinfo in zf.infolist():
 87 |                 if zipinfo.is_dir():
 88 |                     continue
 89 | 
 90 |                 file_name = Path(zipinfo.filename).name
 91 |                 if file_name.startswith('CC'):
 92 |                     result.add(file_name.split('_')[0])
 93 | 
 94 |         return tuple(sorted(result))
 95 | 
 96 |     def _image_file(self, i):
 97 |         return get_zipfile(i, 'Original.zip', self.root)
 98 | 
 99 |     def vendor(self, i):
100 |         return zipfile2meta(self._image_file(i))['vendor']
101 | 
102 |     def field(self, i):
103 |         return zipfile2meta(self._image_file(i))['field']
104 | 
105 |     def age(self, i):
106 |         return zipfile2meta(self._image_file(i))['age']
107 | 
108 |     def sex(self, i):
109 |         return zipfile2meta(self._image_file(i))['gender']
110 | 
111 |     def image(self, i):
112 |         with open_nii_gz_file(self._image_file(i)) as nii_image:
113 |             return np.asarray(nii_image.dataobj)
114 | 
115 |     def affine(self, i):
116 |         """The 4x4 matrix that gives the image's spatial orientation."""
117 |         with open_nii_gz_file(self._image_file(i)) as nii_image:
118 |             return nii_image.affine
119 | 
120 |     def spacing(self, i):
121 |         """Returns voxel spacing along axes (x, y, z)."""
122 |         with open_nii_gz_file(self._image_file(i)) as nii_image:
123 |             return tuple(nii_image.header['pixdim'][1:4])
124 | 
125 |     # masks:
126 | 
127 |     def brain(self, i):
128 |         zf = get_zipfile(i, 'Silver-standard-machine-learning.zip', self.root)
129 |         with open_nii_gz_file(zf) as nii_image:
130 |             return np.uint8(nii_image.get_fdata())
131 | 
132 |     def hippocampus(self, i):
133 |         try:
134 |             zf = get_zipfile(i, 'hippocampus_staple.zip', self.root)
135 |         except KeyError:
136 |             return None
137 | 
138 |         with open_nii_gz_file(zf) as nii_image:
139 |             return np.uint8(nii_image.get_fdata())
140 | 
141 |     def wm_gm_csf(self, i):
142 |         for file in (self.root / 'WM-GM-CSF').glob('*'):
143 |             if file.name.startswith(i):
144 |                 with open_nii_gz_file(file) as nii_image:
145 |                     return np.uint8(nii_image.get_fdata())
146 | 
147 | 
148 | # TODO: sync with amid.utils
149 | @contextlib.contextmanager
150 | def open_nii_gz_file(file):
151 |     with file.open('rb') as opened:
152 |         with gzip.GzipFile(fileobj=opened) as nii:
153 |             nii = nb.FileHolder(fileobj=nii)
154 |             yield nb.Nifti1Image.from_file_map({'header': nii, 'image': nii})
155 | 
156 | 
157 | def get_zipfile(_id, archive_name, root):
158 |     archive = Path(root) / archive_name
159 |     with ZipFile(archive) as zf:
160 |         for zipinfo in zf.infolist():
161 |             if Path(zipinfo.filename).name.startswith(_id) and not zipinfo.is_dir():
162 |                 return zipfile.Path(str(archive), zipinfo.filename)
163 | 
164 |     raise KeyError(f'Id "{_id}" not found')
165 | 
166 | 
167 | def zipfile2meta(zf):
168 |     return dict(zip(['id', 'vendor', 'field', 'age', 'gender'], zf.name[: -len('.nii.gz')].split('_')))
169 | 


--------------------------------------------------------------------------------
/amid/msd.py:
--------------------------------------------------------------------------------
  1 | import contextlib
  2 | import gzip
  3 | import json
  4 | import tarfile
  5 | from pathlib import Path
  6 | 
  7 | import nibabel as nb
  8 | import numpy as np
  9 | 
 10 | from .internals import Dataset, register
 11 | 
 12 | 
 13 | @register(
 14 |     body_region=('Chest', 'Abdominal', 'Head'),
 15 |     link='http://medicaldecathlon.com/',
 16 |     modality=('CT', 'CE CT', 'MRI', 'MRI FLAIR', 'MRI T1w', 'MRI t1gd', 'MRI T2w', 'MRI T2', 'MRI ADC'),
 17 |     raw_data_size='97.8G',
 18 |     task='Image segmentation',
 19 | )
 20 | class MSD(Dataset):
 21 |     """
 22 |     MSD is a Medical Segmentaton Decathlon Challenge with 10 tasks.
 23 |     Parameters
 24 |     ----------
 25 |     root : str, Path, optional
 26 |         path to the folder containing the raw downloaded archives.
 27 |         If not provided, the cache is assumed to be already populated.
 28 | 
 29 |     Notes
 30 |     -----
 31 |     Data can be downloaded here:http://medicaldecathlon.com/
 32 |     or here: https://msd-for-monai.s3-us-west-2.amazonaws.com/
 33 |     or here: https://drive.google.com/drive/folders/1HqEgzS8BV2c7xYNrZdEAnrHk7osJJ--2/
 34 |     Then, the folder with raw downloaded data should contain tar archive with data and masks
 35 |     (`Task03_Liver.tar`).
 36 |     """
 37 | 
 38 |     @property
 39 |     def ids(self):
 40 |         ids_all = []
 41 |         for folder in self.root.glob('*'):
 42 |             if folder.name.endswith('.tar'):
 43 |                 ids_folder = ids_from_tar(folder)
 44 |             else:
 45 |                 ids_folder = ids_from_folder(folder)
 46 |             ids_all.extend(ids_folder)
 47 |         return tuple(ids_all)
 48 | 
 49 |     def train_test(self, i) -> str:
 50 |         fold = 'train' if 'train' in i else 'test'
 51 |         return fold
 52 | 
 53 |     def task(self, i) -> str:
 54 |         return NAME_TO_TASK[i.split('_')[1]]
 55 | 
 56 |     def _relative(self, i):
 57 |         name = i.removeprefix('train_').removeprefix('test_')
 58 |         return Path(self.task(i)), Path('imagesTr' if 'train' in i else 'imagesTs') / f'{name}.nii.gz'
 59 | 
 60 |     def image(self, i):
 61 |         with open_nii_gz(self.root, self._relative(i)) as (file, unpacked):
 62 |             if unpacked:
 63 |                 return np.int16(nb.load(file).get_fdata())
 64 |             else:
 65 |                 with gzip.GzipFile(fileobj=file) as nii_gz:
 66 |                     nii = nb.FileHolder(fileobj=nii_gz)
 67 |                     return np.int16(nb.Nifti1Image.from_file_map({'header': nii, 'image': nii}).get_fdata())
 68 | 
 69 |     def affine(self, i):
 70 |         """The 4x4 matrix that gives the image's spatial orientation."""
 71 |         with open_nii_gz(self.root, self._relative(i)) as (file, unpacked):
 72 |             if unpacked:
 73 |                 return nb.load(file).affine
 74 |             else:
 75 |                 with gzip.GzipFile(fileobj=file) as nii_gz:
 76 |                     nii = nb.FileHolder(fileobj=nii_gz)
 77 |                     return nb.Nifti1Image.from_file_map({'header': nii, 'image': nii}).affine
 78 | 
 79 |     def image_modality(self, i):
 80 |         task = self.task(i)
 81 |         if (self.root / task).is_dir():
 82 |             with open(self.root / task / 'dataset.json', 'r') as file:
 83 |                 return json.loads(file.read())['modality']
 84 | 
 85 |         with tarfile.open(self.root / f'{task}.tar') as tf:
 86 |             member = tf.getmember(f'{task}/dataset.json')
 87 |             file = tf.extractfile(member)
 88 |             return json.loads(file.read())['modality']
 89 | 
 90 |     def segmentation_labels(self, i):
 91 |         """Returns segmentation labels for the task"""
 92 |         task = self.task(i)
 93 |         if (self.root / task).is_dir():
 94 |             with open(self.root / task / 'dataset.json', 'r') as file:
 95 |                 return json.loads(file.read())['labels']
 96 | 
 97 |         with tarfile.open(self.root / f'{task}.tar') as tf:
 98 |             member = tf.getmember(f'{task}/dataset.json')
 99 |             file = tf.extractfile(member)
100 |             return json.loads(file.read())['labels']
101 | 
102 |     def mask(self, i):
103 |         task, relative = self._relative(i)
104 |         if 'imagesTs' not in str(relative):
105 |             with open_nii_gz(self.root, (task, str(relative).replace('images', 'labels'))) as (file, unpacked):
106 |                 if unpacked:
107 |                     return np.uint8(nb.load(file).get_fdata())
108 |                 else:
109 |                     with gzip.GzipFile(fileobj=file) as nii_gz:
110 |                         nii = nb.FileHolder(fileobj=nii_gz)
111 |                         return np.uint8(nb.Nifti1Image.from_file_map({'header': nii, 'image': nii}).get_fdata())
112 | 
113 | 
114 | TASK_TO_NAME: dict = {
115 |     'Task01_BrainTumour': 'BRATS',
116 |     'Task02_Heart': 'la',
117 |     'Task03_Liver': 'liver',
118 |     'Task04_Hippocampus': 'hippocampus',
119 |     'Task05_Prostate': 'prostate',
120 |     'Task06_Lung': 'lung',
121 |     'Task07_Pancreas': 'pancreas',
122 |     'Task08_HepaticVessel': 'hepaticvessel',
123 |     'Task09_Spleen': 'spleen',
124 |     'Task10_Colon': 'colon',
125 | }
126 | 
127 | NAME_TO_TASK = dict(zip(TASK_TO_NAME.values(), TASK_TO_NAME.keys()))
128 | 
129 | 
130 | @contextlib.contextmanager
131 | def open_nii_gz(path, nii_gz_path):
132 |     """Opens a .nii.gz file from inside a .tar archive.
133 | 
134 |     Parameters:
135 |     - path: path to the .tar archive or folder
136 |     - nii_gz_path: path to the .nii.gz file inside the .tar archive.
137 | 
138 |     Yields:
139 |     - nibabel.Nifti1Image object.
140 |     """
141 |     task, relative = nii_gz_path
142 |     if (path / task / relative).exists():
143 |         yield path / task / relative, True
144 |     else:
145 |         with tarfile.open(path / f'{task}.tar', 'r') as tar:
146 |             yield tar.extractfile(str(task / relative)), False
147 | 
148 | 
149 | def get_id(filename: Path):
150 |     fold = 'test' if 'imagesTs' in str(filename) else 'train'
151 |     name = filename.name.removesuffix('.nii.gz')
152 |     return '_'.join([fold, name])
153 | 
154 | 
155 | def ids_from_tar(tar_folder):
156 |     ids = []
157 |     with tarfile.open(tar_folder, 'r') as tf:
158 |         for file in tf.getmembers():
159 |             filename = Path(file.name)
160 |             if not filename.name.startswith('._') and filename.suffix == '.gz' and 'images' in filename.parent.name:
161 |                 ids.append(get_id(filename))
162 |     return sorted(ids)
163 | 
164 | 
165 | def ids_from_folder(folder):
166 |     ids = []
167 |     for filename in folder.rglob('*.nii.gz'):
168 |         if not filename.name.startswith('._') and filename.suffix == '.gz' and 'images' in filename.parent.name:
169 |             ids.append(get_id(filename))
170 |     return sorted(ids)
171 | 


--------------------------------------------------------------------------------
/amid/amos/dataset.py:
--------------------------------------------------------------------------------
  1 | from functools import cached_property
  2 | from zipfile import ZipFile
  3 | 
  4 | import nibabel
  5 | import numpy as np
  6 | import pandas as pd
  7 | from jboc import composed
  8 | 
  9 | from ..internals import Dataset, field, licenses, register
 10 | from ..utils import open_nii_gz_file, unpack
 11 | 
 12 | 
 13 | ARCHIVE_NAME_SEG = 'amos22.zip'
 14 | ARCHIVE_ROOT_NAME = 'amos22'
 15 | ERRORS = ['5514', '5437']  # these ids are damaged in the zip archives
 16 | 
 17 | 
 18 | # TODO: add MRI
 19 | 
 20 | 
 21 | @register(
 22 |     body_region='Abdomen',
 23 |     license=licenses.CC_BY_40,
 24 |     link='https://zenodo.org/record/7262581',
 25 |     modality=('CT', 'MRI'),
 26 |     raw_data_size='23G',  # TODO: update size with unlabelled
 27 |     prep_data_size='89,5G',
 28 |     task='Supervised multi-modality abdominal multi-organ segmentation',
 29 | )
 30 | class AMOS(Dataset):
 31 |     """
 32 |     AMOS provides 500 CT and 100 MRI scans collected from multi-center, multi-vendor, multi-modality, multi-phase,
 33 |     multi-disease patients, each with voxel-level annotations of 15 abdominal organs, providing challenging examples
 34 |     and test-bed for studying robust segmentation algorithms under diverse targets and scenarios. [1]
 35 | 
 36 |     Parameters
 37 |     ----------
 38 |     root : str, Path, optional
 39 |         Absolute path to the root containing the downloaded archive and meta.
 40 |         If not provided, the cache is assumed to be already populated.
 41 | 
 42 |     Notes
 43 |     -----
 44 |     Download link: https://zenodo.org/record/7262581/files/amos22.zip
 45 | 
 46 |     Examples
 47 |     --------
 48 |     >>> # Download the archive and meta to any folder and pass the path to the constructor:
 49 |     >>> ds = AMOS(root='/path/to/the/downloaded/files')
 50 |     >>> print(len(ds.ids))
 51 |     # 961
 52 |     >>> print(ds.image(ds.ids[0]).shape)
 53 |     # (768, 768, 90)
 54 |     >>> print(ds.mask(ds.ids[26]).shape)
 55 |     # (512, 512, 124)
 56 | 
 57 |     References
 58 |     ----------
 59 |     .. [1] JI YUANFENG. (2022). Amos: A large-scale abdominal multi-organ benchmark for
 60 |     versatile medical image segmentation [Data set]. Zenodo. https://doi.org/10.5281/zenodo.7262581
 61 |     """
 62 | 
 63 |     @property
 64 |     def ids(self):
 65 |         ids = list(self._id2split)
 66 | 
 67 |         for archive in [
 68 |             'amos22_unlabeled_ct_5000_5399.zip',
 69 |             'amos22_unlabeled_ct_5400_5899.zip',
 70 |             'amos22_unlabeled_ct_5900_6199.zip',
 71 |             'amos22_unlabeled_ct_6200_6899.zip',
 72 |         ]:
 73 |             file = self.root / archive
 74 |             if not file.exists():
 75 |                 continue
 76 | 
 77 |             with ZipFile(file) as zf:
 78 |                 for x in zf.namelist():
 79 |                     if x.endswith('.nii.gz'):
 80 |                         file = x.split('/')[-1]
 81 | 
 82 |                         ids.append(file.split('.')[0].split('_')[-1])
 83 | 
 84 |         return sorted(ids)
 85 | 
 86 |     @field
 87 |     def image(self, i):
 88 |         """Corresponding 3D image."""
 89 |         if i in ERRORS:
 90 |             return None  # this image is damaged in the archive
 91 | 
 92 |         archive_name, archive_root, file = self._archive_name(i)
 93 |         with unpack(self.root / archive_name, file, archive_root, '.zip') as (unpacked, is_unpacked):
 94 |             if is_unpacked:
 95 |                 return np.asarray(nibabel.load(unpacked).dataobj)
 96 |             else:
 97 |                 with open_nii_gz_file(unpacked) as image:
 98 |                     return np.asarray(image.dataobj)
 99 | 
100 |     @field
101 |     def affine(self, i):
102 |         """The 4x4 matrix that gives the image's spatial orientation."""
103 |         if i in ERRORS:
104 |             return None  # this image is damaged in the archive
105 | 
106 |         archive_name, archive_root, file = self._archive_name(i)
107 |         with unpack(self.root / archive_name, file, archive_root, '.zip') as (unpacked, is_unpacked):
108 |             if is_unpacked:
109 |                 return nibabel.load(unpacked).affine
110 |             else:
111 |                 with open_nii_gz_file(unpacked) as image:
112 |                     return image.affine
113 | 
114 |     @field
115 |     def mask(self, i):
116 |         if i not in self._id2split:
117 |             return
118 | 
119 |         file = f'labels{self._id2split[i]}/amos_{i}.nii.gz'
120 |         try:
121 |             with unpack(self.root / ARCHIVE_NAME_SEG, file, ARCHIVE_ROOT_NAME, '.zip') as (unpacked, is_unpacked):
122 |                 if is_unpacked:
123 |                     return np.asarray(nibabel.load(unpacked).dataobj)
124 |                 else:
125 |                     with open_nii_gz_file(unpacked) as image:
126 |                         return np.asarray(image.dataobj)
127 |         except FileNotFoundError:
128 |             return
129 | 
130 |     @field
131 |     def image_modality(self, i):
132 |         """Returns image modality, `CT` or `MRI`."""
133 |         if 500 < int(i) <= 600:
134 |             return 'MRI'
135 |         return 'CT'
136 | 
137 |     # labels
138 |     @field
139 |     def birth_date(self, i):
140 |         return self._label(i, "Patient's Birth Date")
141 | 
142 |     @field
143 |     def sex(self, i):
144 |         return self._label(i, "Patient's Sex")
145 | 
146 |     @field
147 |     def age(self, i):
148 |         return self._label(i, "Patient's Age")
149 | 
150 |     @field
151 |     def manufacturer_model(self, i):
152 |         return self._label(i, "Manufacturer's Model Name")
153 | 
154 |     @field
155 |     def manufacturer(self, i):
156 |         return self._label(i, 'Manufacturer')
157 | 
158 |     @field
159 |     def acquisition_date(self, i):
160 |         return self._label(i, 'Acquisition Date')
161 | 
162 |     @field
163 |     def site(self, i):
164 |         return self._label(i, 'Site')
165 | 
166 |     @cached_property
167 |     @composed(dict)
168 |     def _id2split(self):
169 |         with ZipFile(self.root / ARCHIVE_NAME_SEG) as zf:
170 |             for x in zf.namelist():
171 |                 if (len(x.strip('/').split('/')) == 3) and x.endswith('.nii.gz'):
172 |                     file, split = x.split('/')[-1], x.split('/')[-2][-2:]
173 |                     id_ = file.split('.')[0].split('_')[-1]
174 | 
175 |                     yield id_, split
176 | 
177 |     @cached_property
178 |     def _meta(self):
179 |         files = [
180 |             'labeled_data_meta_0000_0599.csv',
181 |             'unlabeled_data_meta_5400_5899.csv',
182 |             'unlabeled_data_meta_5000_5399.csv',
183 |             'unlabeled_data_meta_5900_6199.csv',
184 |         ]
185 | 
186 |         dfs = []
187 |         for file in files:
188 |             with unpack(self.root, file) as (unpacked, _):
189 |                 dfs.append(pd.read_csv(unpacked))
190 |         return pd.concat(dfs)
191 | 
192 |     def _archive_name(self, i):
193 |         if i in self._id2split:
194 |             return ARCHIVE_NAME_SEG, ARCHIVE_ROOT_NAME, f'images{self._id2split[i]}/amos_{i}.nii.gz'
195 | 
196 |         i = int(i)
197 |         file = f'amos_{i}.nii.gz'
198 |         if 5000 <= i < 5400:
199 |             return 'amos22_unlabeled_ct_5000_5399.zip', 'amos_unlabeled_ct_5000_5399', file
200 |         elif 5400 <= i < 5900:
201 |             return 'amos22_unlabeled_ct_5400_5899.zip', 'amos_unlabeled_ct_5400_5899', file
202 |         elif 5900 <= i < 6200:
203 |             return 'amos22_unlabeled_ct_5900_6199.zip', 'amos22_unlabeled_ct_5900_6199', file
204 |         else:
205 |             return 'amos22_unlabeled_ct_6200_6899.zip', 'amos22_unlabeled_6200_6899', file
206 | 
207 |     def _label(self, i, column):
208 |         # ambiguous data in meta
209 |         if int(i) in [500, 600]:
210 |             return None
211 |         elif int(i) not in self._meta['amos_id']:
212 |             return None
213 | 
214 |         return self._meta[self._meta['amos_id'] == int(i)][column].item()
215 | 


--------------------------------------------------------------------------------
/amid/lidc/dataset.py:
--------------------------------------------------------------------------------
  1 | import datetime
  2 | import os
  3 | from typing import List, Tuple, Union
  4 | 
  5 | import numpy as np
  6 | import pylidc as pl
  7 | from dicom_csv import (
  8 |     Series,
  9 |     expand_volumetric,
 10 |     get_common_tag,
 11 |     get_orientation_matrix,
 12 |     get_tag,
 13 |     order_series,
 14 |     stack_images,
 15 | )
 16 | from pylidc.utils import consensus
 17 | from scipy import stats
 18 | 
 19 | from ..internals import Dataset, field, licenses, register
 20 | from ..utils import PathOrStr, get_series_date
 21 | from .nodules import get_nodule
 22 | from .typing import LIDCNodule
 23 | 
 24 | 
 25 | @register(
 26 |     body_region='Chest',
 27 |     license=licenses.CC_BY_30,
 28 |     link='https://wiki.cancerimagingarchive.net/pages/viewpage.action?pageId=1966254',
 29 |     modality='CT',
 30 |     prep_data_size='71,2G',
 31 |     raw_data_size='126G',
 32 |     task='Lung nodules segmentation',
 33 | )
 34 | class LIDC(Dataset):
 35 |     """
 36 |     The (L)ung (I)mage (D)atabase (C)onsortium image collection (LIDC-IDRI) [1]_
 37 |     consists of diagnostic and lung cancer screening thoracic computed tomography (CT) scans
 38 |     with marked-up annotated lesions and lung nodules segmentation task.
 39 |     Scans contains multiple expert annotations.
 40 | 
 41 |     Number of CT scans: 1018.
 42 | 
 43 |     Parameters
 44 |     ----------
 45 |     root : str, Path, optional
 46 |         path to the folder containing the raw downloaded archives.
 47 |         If not provided, the cache is assumed to be already populated.
 48 | 
 49 |     Notes
 50 |     -----
 51 |     Follow the download instructions at https://wiki.cancerimagingarchive.net/pages/viewpage.action?pageId=1966254.
 52 | 
 53 |     Then, the folder with raw downloaded data should contain folder `LIDC-IDRI`,
 54 |     which contains folders `LIDC-IDRI-*`.
 55 | 
 56 |     Examples
 57 |     --------
 58 |     >>> # Place the downloaded archives in any folder and pass the path to the constructor:
 59 |     >>> ds = LIDC(root='/path/to/downloaded/data/folder/')
 60 |     >>> print(len(ds.ids))
 61 |     # 1018
 62 |     >>> print(ds.image(ds.ids[0]).shape)
 63 |     # (512, 512, 194)
 64 |     >>> print(ds.cancer(ds.ids[0]).shape)
 65 |     # (512, 512, 194)
 66 | 
 67 |     References
 68 |     ----------
 69 |     .. [1] Armato III, McLennan, et al. "The lung image database consortium (lidc) and image database
 70 |     resource initiative (idri): a completed reference database of lung nodules on ct scans."
 71 |     Medical physics 38(2) (2011): 915–931.
 72 |     https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3041807/
 73 |     """
 74 | 
 75 |     def __init__(self, root: PathOrStr):
 76 |         super().__init__(root)
 77 |         self._check_config()
 78 | 
 79 |     def _check_config(self):
 80 |         pylidc_config_start = '[dicom]\npath = '
 81 |         if os.path.exists(os.path.expanduser('~/.pylidcrc')):
 82 |             with open(os.path.expanduser('~/.pylidcrc'), 'r') as config_file:
 83 |                 content = config_file.read()
 84 |             if content == f'{pylidc_config_start}{self.root}':
 85 |                 return
 86 | 
 87 |         # save _root path to ~/.pylidcrc file for pylidc
 88 |         with open(os.path.expanduser('~/.pylidcrc'), 'w') as config_file:
 89 |             config_file.write(f'{pylidc_config_start}{self.root}')
 90 | 
 91 |     @property
 92 |     def ids(self):
 93 |         result = [scan.series_instance_uid for scan in pl.query(pl.Scan).all()]
 94 |         return tuple(sorted(result))
 95 | 
 96 |     def _scan(self, i) -> pl.Scan:
 97 |         _id = i.split('_')[-1]
 98 |         return pl.query(pl.Scan).filter(pl.Scan.series_instance_uid == _id).first()
 99 | 
100 |     def _series(self, i) -> Series:
101 |         series = expand_volumetric(self._scan(i).load_all_dicom_images(verbose=False))
102 |         series = order_series(series)
103 |         return series
104 | 
105 |     def _shape(self, i) -> Tuple[int, int, int]:
106 |         return stack_images(self._series(i), -1).shape
107 | 
108 |     @field
109 |     def image(self, i) -> np.ndarray:
110 |         return self._scan(i).to_volume(verbose=False)
111 | 
112 |     @field
113 |     def study_uid(self, i) -> str:
114 |         return self._scan(i).study_instance_uid
115 | 
116 |     @field
117 |     def series_uid(self, i) -> str:
118 |         return self._scan(i).series_instance_uid
119 | 
120 |     @field
121 |     def patient_id(self, i) -> str:
122 |         return self._scan(i).patient_id
123 | 
124 |     @field
125 |     def sop_uids(self, i) -> List[str]:
126 |         return [str(get_tag(i, 'SOPInstanceUID')) for i in self._series(i)]
127 | 
128 |     @field
129 |     def pixel_spacing(self, i) -> List[float]:
130 |         spacing = self._scan(i).pixel_spacing
131 |         return [spacing, spacing]
132 | 
133 |     @field
134 |     def slice_locations(self, i) -> np.ndarray:
135 |         return self._scan(i).slice_zvals
136 | 
137 |     # @field
138 |     def spacing(self, i) -> Tuple[float, float, float]:
139 |         """
140 |         Volumetric spacing of the image.
141 |         The maximum relative difference in `slice_locations` < 1e-3
142 |         (except 4 images listed below),
143 |         so we allow ourselves to use the common spacing for the whole 3D image.
144 | 
145 |         Note
146 |         ----
147 |         The `slice_locations` attribute typically (but not always!) has the constant step.
148 |         In LIDC dataset, only 4 images have difference in `slice_locations` > 1e-3:
149 |             1.3.6.1.4.1.14519.5.2.1.6279.6001.526570782606728516388531252230
150 |             1.3.6.1.4.1.14519.5.2.1.6279.6001.329334252028672866365623335798
151 |             1.3.6.1.4.1.14519.5.2.1.6279.6001.245181799370098278918756923992
152 |             1.3.6.1.4.1.14519.5.2.1.6279.6001.103115201714075993579787468219
153 |         And these differences appear in the maximum of 3 slices.
154 |         Therefore, we consider their impact negligible.
155 |         """
156 |         return (*self.pixel_spacing(i), stats.mode(np.diff(self.slice_locations(i)))[0].item())
157 | 
158 |     @field
159 |     def contrast_used(self, i) -> bool:
160 |         """If the DICOM file for the scan had any Contrast tag, this is marked as `True`."""
161 |         return self._scan(i).contrast_used
162 | 
163 |     @field
164 |     def is_from_initial(self, i) -> bool:
165 |         """
166 |         Indicates whether or not this PatientID was tagged as
167 |         part of the initial 399 release.
168 |         """
169 |         return self._scan(i).is_from_initial
170 | 
171 |     @field
172 |     def orientation_matrix(self, i) -> np.ndarray:
173 |         return get_orientation_matrix(self._series(i))
174 | 
175 |     @field
176 |     def sex(self, i) -> Union[str, None]:
177 |         return get_common_tag(self._series(i), 'PatientSex', default=None)
178 | 
179 |     @field
180 |     def age(self, i) -> Union[str, None]:
181 |         return get_common_tag(self._series(i), 'PatientAge', default=None)
182 | 
183 |     @field
184 |     def conv_kernel(self, i) -> Union[str, None]:
185 |         return get_common_tag(self._series(i), 'ConvolutionKernel', default=None)
186 | 
187 |     @field
188 |     def kvp(self, i) -> Union[str, None]:
189 |         return get_common_tag(self._series(i), 'KVP', default=None)
190 | 
191 |     @field
192 |     def tube_current(self, i) -> Union[str, None]:
193 |         return get_common_tag(self._series(i), 'XRayTubeCurrent', default=None)
194 | 
195 |     @field
196 |     def study_date(self, i) -> Union[datetime.date, None]:
197 |         return get_series_date(self._series(i))
198 | 
199 |     @field
200 |     def accession_number(self, i) -> Union[str, None]:
201 |         return get_common_tag(self._series(i), 'AccessionNumber', default=None)
202 | 
203 |     @field
204 |     def nodules(self, i) -> List[List[LIDCNodule]]:
205 |         nodules = []
206 |         for anns in self._scan(i).cluster_annotations():
207 |             nodule_annotations = []
208 |             for ann in anns:
209 |                 nodule_annotations.append(get_nodule(ann))
210 |             nodules.append(nodule_annotations)
211 |         return nodules
212 | 
213 |     @field
214 |     def nodules_masks(self, i) -> List[List[np.ndarray]]:
215 |         nodules = []
216 |         for anns in self._scan(i).cluster_annotations():
217 |             nodule_annotations = []
218 |             for ann in anns:
219 |                 nodule_annotations.append(ann.boolean_mask())
220 |             nodules.append(nodule_annotations)
221 |         return nodules
222 | 
223 |     @field
224 |     def cancer(self, i) -> np.ndarray:
225 |         cancer = np.zeros(self._shape(i), dtype=bool)
226 |         for anns in self._scan(i).cluster_annotations():
227 |             cancer |= consensus(anns, pad=np.inf)[0]
228 | 
229 |         return cancer
230 | 


--------------------------------------------------------------------------------