├── Dockerfile ├── requirements.txt ├── README.md ├── .gitignore ├── duke_dbt_data.py ├── dcmread_image.ipynb └── draw_box.ipynb /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.8 2 | 3 | RUN pip install --upgrade pip==20.3.1 4 | 5 | COPY requirements.txt ./ 6 | 7 | RUN pip install --no-cache-dir -r requirements.txt 8 | 9 | WORKDIR /duke-dbt 10 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | jupyter==1.0.0 2 | numpy==1.19.4 3 | pandas==1.1.4 4 | scikit-learn==0.23.2 5 | imageio==2.9.0 6 | scikit-image==0.17.2 7 | pydicom==2.1.1 8 | pylibjpeg==1.1.1 9 | matplotlib==3.3.3 10 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # duke-dbt-data 2 | 3 | A repository with code samples and notebooks for DukeDBT Dataset. 4 | 5 | ## docker 6 | 7 | Build docker image: 8 | 9 | ``` 10 | docker build -t duke-dbt . 11 | ``` 12 | 13 | Run container bash: 14 | 15 | ``` 16 | docker run --rm -it \ 17 | -v `pwd`:/duke-dbt \ 18 | -v /path/to/data:/data \ 19 | -p 8889:8889 \ 20 | duke-dbt bash 21 | ``` 22 | 23 | Replace `/path/to/data` with a path to the downloaded data folder. 24 | 25 | ## jupyter notebook 26 | 27 | Serve jupyter notebook from the container: 28 | 29 | ``` 30 | jupyter notebook --allow-root --ip=0.0.0.0 --port=8889 31 | ``` 32 | 33 | ## read dicom image 34 | 35 | `dcmread_image.ipynb` notebook shows how to read image data from a DICOM file in the coordinate system that maches the ground truth bounding boxes. 36 | 37 | ## draw bounding box 38 | 39 | `draw_box.ipynb` notebook shows how to draw a bounding box on the image. 40 | 41 | ## helper functions 42 | 43 | To use helper functions from the notebooks, simply copy the `duke_dbt_data.py` file to your project. 44 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by https://www.toptal.com/developers/gitignore/api/linux,macos,python,jupyternotebooks,intellij+all,vim,vscode 2 | # Edit at https://www.toptal.com/developers/gitignore?templates=linux,macos,python,jupyternotebooks,intellij+all,vim,vscode 3 | 4 | ### Intellij+all ### 5 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider 6 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 7 | 8 | # User-specific stuff 9 | .idea/**/workspace.xml 10 | .idea/**/tasks.xml 11 | .idea/**/usage.statistics.xml 12 | .idea/**/dictionaries 13 | .idea/**/shelf 14 | 15 | # Generated files 16 | .idea/**/contentModel.xml 17 | 18 | # Sensitive or high-churn files 19 | .idea/**/dataSources/ 20 | .idea/**/dataSources.ids 21 | .idea/**/dataSources.local.xml 22 | .idea/**/sqlDataSources.xml 23 | .idea/**/dynamic.xml 24 | .idea/**/uiDesigner.xml 25 | .idea/**/dbnavigator.xml 26 | 27 | # Gradle 28 | .idea/**/gradle.xml 29 | .idea/**/libraries 30 | 31 | # Gradle and Maven with auto-import 32 | # When using Gradle or Maven with auto-import, you should exclude module files, 33 | # since they will be recreated, and may cause churn. Uncomment if using 34 | # auto-import. 35 | # .idea/artifacts 36 | # .idea/compiler.xml 37 | # .idea/jarRepositories.xml 38 | # .idea/modules.xml 39 | # .idea/*.iml 40 | # .idea/modules 41 | # *.iml 42 | # *.ipr 43 | 44 | # CMake 45 | cmake-build-*/ 46 | 47 | # Mongo Explorer plugin 48 | .idea/**/mongoSettings.xml 49 | 50 | # File-based project format 51 | *.iws 52 | 53 | # IntelliJ 54 | out/ 55 | 56 | # mpeltonen/sbt-idea plugin 57 | .idea_modules/ 58 | 59 | # JIRA plugin 60 | atlassian-ide-plugin.xml 61 | 62 | # Cursive Clojure plugin 63 | .idea/replstate.xml 64 | 65 | # Crashlytics plugin (for Android Studio and IntelliJ) 66 | com_crashlytics_export_strings.xml 67 | crashlytics.properties 68 | crashlytics-build.properties 69 | fabric.properties 70 | 71 | # Editor-based Rest Client 72 | .idea/httpRequests 73 | 74 | # Android studio 3.1+ serialized cache file 75 | .idea/caches/build_file_checksums.ser 76 | 77 | ### Intellij+all Patch ### 78 | # Ignores the whole .idea folder and all .iml files 79 | # See https://github.com/joeblau/gitignore.io/issues/186 and https://github.com/joeblau/gitignore.io/issues/360 80 | 81 | .idea/ 82 | 83 | # Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-249601023 84 | 85 | *.iml 86 | modules.xml 87 | .idea/misc.xml 88 | *.ipr 89 | 90 | # Sonarlint plugin 91 | .idea/sonarlint 92 | 93 | ### JupyterNotebooks ### 94 | # gitignore template for Jupyter Notebooks 95 | # website: http://jupyter.org/ 96 | 97 | .ipynb_checkpoints 98 | */.ipynb_checkpoints/* 99 | 100 | # IPython 101 | profile_default/ 102 | ipython_config.py 103 | 104 | # Remove previous ipynb_checkpoints 105 | # git rm -r .ipynb_checkpoints/ 106 | 107 | ### Linux ### 108 | *~ 109 | 110 | # temporary files which can be created if a process still has a handle open of a deleted file 111 | .fuse_hidden* 112 | 113 | # KDE directory preferences 114 | .directory 115 | 116 | # Linux trash folder which might appear on any partition or disk 117 | .Trash-* 118 | 119 | # .nfs files are created when an open file is removed but is still being accessed 120 | .nfs* 121 | 122 | ### macOS ### 123 | # General 124 | .DS_Store 125 | .AppleDouble 126 | .LSOverride 127 | 128 | # Icon must end with two \r 129 | Icon 130 | 131 | 132 | # Thumbnails 133 | ._* 134 | 135 | # Files that might appear in the root of a volume 136 | .DocumentRevisions-V100 137 | .fseventsd 138 | .Spotlight-V100 139 | .TemporaryItems 140 | .Trashes 141 | .VolumeIcon.icns 142 | .com.apple.timemachine.donotpresent 143 | 144 | # Directories potentially created on remote AFP share 145 | .AppleDB 146 | .AppleDesktop 147 | Network Trash Folder 148 | Temporary Items 149 | .apdisk 150 | 151 | ### Python ### 152 | # Byte-compiled / optimized / DLL files 153 | __pycache__/ 154 | *.py[cod] 155 | *$py.class 156 | 157 | # C extensions 158 | *.so 159 | 160 | # Distribution / packaging 161 | .Python 162 | build/ 163 | develop-eggs/ 164 | dist/ 165 | downloads/ 166 | eggs/ 167 | .eggs/ 168 | lib/ 169 | lib64/ 170 | parts/ 171 | sdist/ 172 | var/ 173 | wheels/ 174 | pip-wheel-metadata/ 175 | share/python-wheels/ 176 | *.egg-info/ 177 | .installed.cfg 178 | *.egg 179 | MANIFEST 180 | 181 | # PyInstaller 182 | # Usually these files are written by a python script from a template 183 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 184 | *.manifest 185 | *.spec 186 | 187 | # Installer logs 188 | pip-log.txt 189 | pip-delete-this-directory.txt 190 | 191 | # Unit test / coverage reports 192 | htmlcov/ 193 | .tox/ 194 | .nox/ 195 | .coverage 196 | .coverage.* 197 | .cache 198 | nosetests.xml 199 | coverage.xml 200 | *.cover 201 | *.py,cover 202 | .hypothesis/ 203 | .pytest_cache/ 204 | pytestdebug.log 205 | 206 | # Translations 207 | *.mo 208 | *.pot 209 | 210 | # Django stuff: 211 | *.log 212 | local_settings.py 213 | db.sqlite3 214 | db.sqlite3-journal 215 | 216 | # Flask stuff: 217 | instance/ 218 | .webassets-cache 219 | 220 | # Scrapy stuff: 221 | .scrapy 222 | 223 | # Sphinx documentation 224 | docs/_build/ 225 | doc/_build/ 226 | 227 | # PyBuilder 228 | target/ 229 | 230 | # Jupyter Notebook 231 | 232 | # IPython 233 | 234 | # pyenv 235 | .python-version 236 | 237 | # pipenv 238 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 239 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 240 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 241 | # install all needed dependencies. 242 | #Pipfile.lock 243 | 244 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 245 | __pypackages__/ 246 | 247 | # Celery stuff 248 | celerybeat-schedule 249 | celerybeat.pid 250 | 251 | # SageMath parsed files 252 | *.sage.py 253 | 254 | # Environments 255 | .env 256 | .venv 257 | env/ 258 | venv/ 259 | ENV/ 260 | env.bak/ 261 | venv.bak/ 262 | pythonenv* 263 | 264 | # Spyder project settings 265 | .spyderproject 266 | .spyproject 267 | 268 | # Rope project settings 269 | .ropeproject 270 | 271 | # mkdocs documentation 272 | /site 273 | 274 | # mypy 275 | .mypy_cache/ 276 | .dmypy.json 277 | dmypy.json 278 | 279 | # Pyre type checker 280 | .pyre/ 281 | 282 | # pytype static type analyzer 283 | .pytype/ 284 | 285 | # profiling data 286 | .prof 287 | 288 | ### Vim ### 289 | # Swap 290 | [._]*.s[a-v][a-z] 291 | !*.svg # comment out if you don't need vector files 292 | [._]*.sw[a-p] 293 | [._]s[a-rt-v][a-z] 294 | [._]ss[a-gi-z] 295 | [._]sw[a-p] 296 | 297 | # Session 298 | Session.vim 299 | Sessionx.vim 300 | 301 | # Temporary 302 | .netrwhist 303 | # Auto-generated tag files 304 | tags 305 | # Persistent undo 306 | [._]*.un~ 307 | 308 | ### vscode ### 309 | .vscode/* 310 | !.vscode/settings.json 311 | !.vscode/tasks.json 312 | !.vscode/launch.json 313 | !.vscode/extensions.json 314 | *.code-workspace 315 | 316 | # End of https://www.toptal.com/developers/gitignore/api/linux,macos,python,jupyternotebooks,intellij+all,vim,vscode -------------------------------------------------------------------------------- /duke_dbt_data.py: -------------------------------------------------------------------------------- 1 | from typing import AnyStr, BinaryIO, Dict, List, NamedTuple, Optional, Union 2 | 3 | import matplotlib 4 | import numpy as np 5 | import pandas as pd 6 | import pydicom as dicom 7 | from skimage.exposure import rescale_intensity 8 | 9 | 10 | def dcmread_image( 11 | fp: Union[str, "os.PathLike[AnyStr]", BinaryIO], 12 | view: str, 13 | index: Optional[np.uint] = None, 14 | ) -> np.ndarray: 15 | """Read pixel array from DBT DICOM file""" 16 | ds = dicom.dcmread(fp) 17 | ds.decompress(handler_name="pylibjpeg") 18 | pixel_array = ds.pixel_array 19 | view_laterality = view[0].upper() 20 | image_laterality = _get_image_laterality(pixel_array[index or 0]) 21 | if index is not None: 22 | pixel_array = pixel_array[index] 23 | if not image_laterality == view_laterality: 24 | pixel_array = np.flip(pixel_array, axis=(-1, -2)) 25 | window_center = _get_window_center(ds) 26 | window_width = _get_window_width(ds) 27 | low = (2 * window_center - window_width) / 2 28 | high = (2 * window_center + window_width) / 2 29 | pixel_array = rescale_intensity( 30 | pixel_array, in_range=(low, high), out_range="dtype" 31 | ) 32 | return pixel_array 33 | 34 | 35 | def read_boxes( 36 | boxes_fp: pd._typing.FilePathOrBuffer, filepaths_fp: pd._typing.FilePathOrBuffer 37 | ) -> pd.DataFrame: 38 | """Read pandas DataFrame with bounding boxes joined with file paths""" 39 | df_boxes = pd.read_csv(boxes_fp) 40 | df_filepaths = pd.read_csv(filepaths_fp) 41 | primary_key = ("PatientID", "StudyUID", "View") 42 | if not all([key in df_boxes.columns for key in primary_key]): 43 | raise AssertionError( 44 | f"Not all primary key columns {primary_key} are present in bounding boxes columns {df_boxes.columns}" 45 | ) 46 | if not all([key in df_boxes.columns for key in primary_key]): 47 | raise AssertionError( 48 | f"Not all primary key columns {primary_key} are present in file paths columns {df_filepaths.columns}" 49 | ) 50 | return pd.merge(df_boxes, df_filepaths, on=primary_key) 51 | 52 | 53 | def draw_box( 54 | image: np.ndarray, 55 | x: int, 56 | y: int, 57 | width: int, 58 | height: int, 59 | color: Optional[Union[int, tuple]] = None, 60 | lw=4, 61 | ): 62 | """Draw bounding box on the image""" 63 | x = min(max(x, 0), image.shape[1] - 1) 64 | y = min(max(y, 0), image.shape[0] - 1) 65 | if color is None: 66 | color = np.max(image) 67 | if len(image.shape) > 2 and not hasattr(color, "__len__"): 68 | color = (color,) + (0,) * (image.shape[-1] - 1) 69 | image[y : y + lw, x : x + width] = color 70 | image[y + height - lw : y + height, x : x + width] = color 71 | image[y : y + height, x : x + lw] = color 72 | image[y : y + height, x + width - lw : x + width] = color 73 | return image 74 | 75 | 76 | def evaluate( 77 | labels_fp: pd._typing.FilePathOrBuffer, 78 | boxes_fp: pd._typing.FilePathOrBuffer, 79 | predictions_fp: pd._typing.FilePathOrBuffer, 80 | ) -> Dict[str, float]: 81 | """Evaluate predictions""" 82 | df_labels = pd.read_csv(labels_fp) 83 | df_boxes = pd.read_csv(boxes_fp, dtype={"VolumeSlices": float}) 84 | df_pred = pd.read_csv(predictions_fp, dtype={"Score": float}) 85 | 86 | df_labels = df_labels.reset_index().set_index(["StudyUID", "View"]).sort_index() 87 | df_boxes = df_boxes.reset_index().set_index(["StudyUID", "View"]).sort_index() 88 | df_pred = df_pred.reset_index().set_index(["StudyUID", "View"]).sort_index() 89 | 90 | df_pred["TP"] = 0 91 | df_pred["GTID"] = -1 92 | 93 | thresholds = [df_pred["Score"].max() + 1.0] 94 | 95 | # find true positive predictions and assign detected ground truth box ID 96 | for box_pred in df_pred.itertuples(): 97 | if box_pred.Index not in df_boxes.index: 98 | continue 99 | 100 | df_boxes_view = df_boxes.loc[[box_pred.Index]] 101 | view_slice_offset = df_boxes.loc[[box_pred.Index], "VolumeSlices"].iloc[0] / 4 102 | tp_boxes = [ 103 | b 104 | for b in df_boxes_view.itertuples() 105 | if _is_tp(box_pred, b, slice_offset=view_slice_offset) 106 | ] 107 | if len(tp_boxes) > 1: 108 | # find the nearest GT box 109 | tp_distances = [_distance(box_pred, b) for b in tp_boxes] 110 | tp_boxes = [tp_boxes[np.argmin(tp_distances)]] 111 | if len(tp_boxes) > 0: 112 | tp_i = tp_boxes[0].index 113 | df_pred.loc[df_pred["index"] == box_pred.index, ("TP", "GTID")] = (1, tp_i) 114 | thresholds.append(box_pred.Score) 115 | 116 | thresholds.append(df_pred["Score"].min() - 1.0) 117 | 118 | # compute sensitivity at 2 FPs/volume on all cases 119 | evaluation_fps_all = (2.0,) 120 | tpr_all = _froc( 121 | df_pred=df_pred, 122 | thresholds=thresholds, 123 | n_volumes=len(df_labels), 124 | n_boxes=len(df_boxes), 125 | evaluation_fps=evaluation_fps_all, 126 | ) 127 | result = {f"sensitivity_at_2_fps_all": tpr_all[0]} 128 | 129 | # compute mean sensitivity at 1, 2, 3, 4 FPs/volume on positive cases 130 | df_pred = df_pred[df_pred.index.isin(df_boxes.index)] 131 | df_labels = df_labels[df_labels.index.isin(df_boxes.index)] 132 | evaluation_fps_positive = (1.0, 2.0, 3.0, 4.0) 133 | tpr_positive = _froc( 134 | df_pred=df_pred, 135 | thresholds=thresholds, 136 | n_volumes=len(df_labels), 137 | n_boxes=len(df_boxes), 138 | evaluation_fps=evaluation_fps_positive, 139 | ) 140 | 141 | result.update( 142 | dict( 143 | (f"sensitivity_at_{int(x)}_fps_positive", y) 144 | for x, y in zip(evaluation_fps_positive, tpr_positive) 145 | ) 146 | ) 147 | result.update({"mean_sensitivity_positive": np.mean(tpr_positive)}) 148 | 149 | return result 150 | 151 | 152 | def _froc( 153 | df_pred: pd.DataFrame, 154 | thresholds: List[float], 155 | n_volumes: int, 156 | n_boxes: int, 157 | evaluation_fps: tuple, 158 | ) -> List[float]: 159 | tpr = [] 160 | fps = [] 161 | for th in sorted(thresholds, reverse=True): 162 | df_th = df_pred.loc[df_pred["Score"] >= th] 163 | df_th_unique_tp = df_th.reset_index().drop_duplicates( 164 | subset=["StudyUID", "View", "TP", "GTID"] 165 | ) 166 | n_tps_th = float(sum(df_th_unique_tp["TP"])) 167 | tpr_th = n_tps_th / n_boxes 168 | n_fps_th = float(len(df_th[df_th["TP"] == 0])) 169 | fps_th = n_fps_th / n_volumes 170 | tpr.append(tpr_th) 171 | fps.append(fps_th) 172 | if fps_th > max(evaluation_fps): 173 | break 174 | return [np.interp(x, fps, tpr) for x in evaluation_fps] 175 | 176 | 177 | def _is_tp( 178 | box_pred: NamedTuple, box_true: NamedTuple, slice_offset: int, min_dist: int = 100 179 | ) -> bool: 180 | pred_y = box_pred.Y + box_pred.Height / 2 181 | pred_x = box_pred.X + box_pred.Width / 2 182 | pred_z = box_pred.Z + box_pred.Depth / 2 183 | true_y = box_true.Y + box_true.Height / 2 184 | true_x = box_true.X + box_true.Width / 2 185 | true_z = box_true.Slice 186 | # 2D distance between true and predicted center points 187 | dist = np.linalg.norm((pred_x - true_x, pred_y - true_y)) 188 | # compute radius based on true box size 189 | dist_threshold = np.sqrt(box_true.Width ** 2 + box_true.Height ** 2) / 2.0 190 | dist_threshold = max(dist_threshold, min_dist) 191 | slice_diff = np.abs(pred_z - true_z) 192 | # TP if predicted center within radius and slice within slice offset 193 | return dist <= dist_threshold and slice_diff <= slice_offset 194 | 195 | 196 | def _distance(box_pred: NamedTuple, box_true: NamedTuple) -> float: 197 | pred_y = box_pred.Y + box_pred.Height / 2 198 | pred_x = box_pred.X + box_pred.Width / 2 199 | pred_z = box_pred.Z + box_pred.Depth / 2 200 | true_y = box_true.Y + box_true.Height / 2 201 | true_x = box_true.X + box_true.Width / 2 202 | true_z = box_true.Slice 203 | return np.linalg.norm((pred_x - true_x, pred_y - true_y, pred_z - true_z)) 204 | 205 | 206 | def _get_dicom_laterality(ds: dicom.dataset.FileDataset) -> str: 207 | """Unreliable - DICOM laterality is incorrect for some cases""" 208 | return ds[0x5200, 0x9229][0][0x0020, 0x9071][0][0x0020, 0x9072].value 209 | 210 | 211 | def _get_image_laterality(pixel_array: np.ndarray) -> str: 212 | left_edge = np.sum(pixel_array[:, 0]) # sum of left edge pixels 213 | right_edge = np.sum(pixel_array[:, -1]) # sum of right edge pixels 214 | return "R" if left_edge < right_edge else "L" 215 | 216 | 217 | def _get_window_center(ds: dicom.dataset.FileDataset) -> np.float32: 218 | return np.float32(ds[0x5200, 0x9229][0][0x0028, 0x9132][0][0x0028, 0x1050].value) 219 | 220 | 221 | def _get_window_width(ds: dicom.dataset.FileDataset) -> np.float32: 222 | return np.float32(ds[0x5200, 0x9229][0][0x0028, 0x9132][0][0x0028, 0x1051].value) 223 | -------------------------------------------------------------------------------- /dcmread_image.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import os\n", 10 | "\n", 11 | "%matplotlib inline\n", 12 | "import matplotlib.pyplot as plt\n", 13 | "import pandas as pd\n", 14 | "\n", 15 | "from duke_dbt_data import dcmread_image" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": 2, 21 | "metadata": {}, 22 | "outputs": [], 23 | "source": [ 24 | "df = pd.read_csv(\"/data/file-paths-train.csv\")" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 3, 30 | "metadata": {}, 31 | "outputs": [], 32 | "source": [ 33 | "view_series = df.iloc[0]\n", 34 | "view = view_series[\"View\"]\n", 35 | "# if you have image data saved in classic path folder structure, read the file path from \"classic_path\"\n", 36 | "# image_path = os.path.join(\"/data\", view_series[\"classic_path\"])\n", 37 | "image_path = os.path.join(\"/data\", view_series[\"descriptive_path\"])" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 4, 43 | "metadata": {}, 44 | "outputs": [ 45 | { 46 | "name": "stderr", 47 | "output_type": "stream", 48 | "text": [ 49 | "/usr/local/lib/python3.8/site-packages/openjpeg/utils.py:189: UserWarning: The (0028,0101) Bits Stored value '10' in the dataset does not match the component precision value '16' found in the JPEG 2000 data. It's recommended that you change the Bits Stored value to produce the correct output\n", 50 | " warnings.warn(\n" 51 | ] 52 | }, 53 | { 54 | "name": "stdout", 55 | "output_type": "stream", 56 | "text": [ 57 | "CPU times: user 14.4 s, sys: 894 ms, total: 15.3 s\n", 58 | "Wall time: 15.4 s\n" 59 | ] 60 | } 61 | ], 62 | "source": [ 63 | "%%time\n", 64 | "image = dcmread_image(fp=image_path, view=view)" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": 5, 70 | "metadata": {}, 71 | "outputs": [ 72 | { 73 | "data": { 74 | "image/png": "\n", 75 | "text/plain": [ 76 | "
" 77 | ] 78 | }, 79 | "metadata": { 80 | "needs_background": "light" 81 | }, 82 | "output_type": "display_data" 83 | } 84 | ], 85 | "source": [ 86 | "plt.imshow(image[0], cmap=plt.cm.gray);" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": null, 92 | "metadata": {}, 93 | "outputs": [], 94 | "source": [] 95 | } 96 | ], 97 | "metadata": { 98 | "kernelspec": { 99 | "display_name": "Python 3", 100 | "language": "python", 101 | "name": "python3" 102 | }, 103 | "language_info": { 104 | "codemirror_mode": { 105 | "name": "ipython", 106 | "version": 3 107 | }, 108 | "file_extension": ".py", 109 | "mimetype": "text/x-python", 110 | "name": "python", 111 | "nbconvert_exporter": "python", 112 | "pygments_lexer": "ipython3", 113 | "version": "3.8.6" 114 | } 115 | }, 116 | "nbformat": 4, 117 | "nbformat_minor": 4 118 | } 119 | -------------------------------------------------------------------------------- /draw_box.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import os\n", 10 | "\n", 11 | "%matplotlib inline\n", 12 | "import matplotlib.pyplot as plt\n", 13 | "\n", 14 | "from duke_dbt_data import dcmread_image, read_boxes, draw_box" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 2, 20 | "metadata": { 21 | "scrolled": false 22 | }, 23 | "outputs": [], 24 | "source": [ 25 | "df = read_boxes(boxes_fp=\"/data/boxes-train.csv\", filepaths_fp=\"/data/file-paths-train.csv\")" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 3, 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": [ 34 | "box_series = df.iloc[0]\n", 35 | "view = box_series[\"View\"]\n", 36 | "slice_index = box_series[\"Slice\"]\n", 37 | "# if you have image data saved in classic path folder structure, read the file path from \"classic_path\"\n", 38 | "# image_path = os.path.join(\"/data\", view_series[\"classic_path\"])\n", 39 | "image_path = os.path.join(\"/data\", box_series[\"descriptive_path\"])" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 4, 45 | "metadata": {}, 46 | "outputs": [ 47 | { 48 | "name": "stderr", 49 | "output_type": "stream", 50 | "text": [ 51 | "/usr/local/lib/python3.8/site-packages/openjpeg/utils.py:189: UserWarning: The (0028,0101) Bits Stored value '10' in the dataset does not match the component precision value '16' found in the JPEG 2000 data. It's recommended that you change the Bits Stored value to produce the correct output\n", 52 | " warnings.warn(\n" 53 | ] 54 | }, 55 | { 56 | "name": "stdout", 57 | "output_type": "stream", 58 | "text": [ 59 | "CPU times: user 12.5 s, sys: 160 ms, total: 12.7 s\n", 60 | "Wall time: 12.7 s\n" 61 | ] 62 | } 63 | ], 64 | "source": [ 65 | "%%time\n", 66 | "image = dcmread_image(fp=image_path, view=view, index=slice_index)" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": 5, 72 | "metadata": {}, 73 | "outputs": [], 74 | "source": [ 75 | "x, y, width, height = box_series[[\"X\", \"Y\", \"Width\", \"Height\"]]\n", 76 | "image = draw_box(image=image, x=x, y=y, width=width, height=height, lw=10)" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": 6, 82 | "metadata": {}, 83 | "outputs": [ 84 | { 85 | "data": { 86 | "image/png": "\n", 87 | "text/plain": [ 88 | "
" 89 | ] 90 | }, 91 | "metadata": { 92 | "needs_background": "light" 93 | }, 94 | "output_type": "display_data" 95 | } 96 | ], 97 | "source": [ 98 | "plt.imshow(image, cmap=plt.cm.gray);" 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": null, 104 | "metadata": {}, 105 | "outputs": [], 106 | "source": [] 107 | } 108 | ], 109 | "metadata": { 110 | "kernelspec": { 111 | "display_name": "Python 3", 112 | "language": "python", 113 | "name": "python3" 114 | }, 115 | "language_info": { 116 | "codemirror_mode": { 117 | "name": "ipython", 118 | "version": 3 119 | }, 120 | "file_extension": ".py", 121 | "mimetype": "text/x-python", 122 | "name": "python", 123 | "nbconvert_exporter": "python", 124 | "pygments_lexer": "ipython3", 125 | "version": "3.8.6" 126 | } 127 | }, 128 | "nbformat": 4, 129 | "nbformat_minor": 4 130 | } 131 | --------------------------------------------------------------------------------