├── README.md ├── LICENSE ├── .gitignore └── notebooks └── generate_masks.ipynb /README.md: -------------------------------------------------------------------------------- 1 | # data-processing-work 2 | My work in data processing 3 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 Shashi Gharti 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[codz] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py.cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # UV 98 | # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | #uv.lock 102 | 103 | # poetry 104 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 105 | # This is especially recommended for binary packages to ensure reproducibility, and is more 106 | # commonly ignored for libraries. 107 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 108 | #poetry.lock 109 | #poetry.toml 110 | 111 | # pdm 112 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 113 | # pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python. 114 | # https://pdm-project.org/en/latest/usage/project/#working-with-version-control 115 | #pdm.lock 116 | #pdm.toml 117 | .pdm-python 118 | .pdm-build/ 119 | 120 | # pixi 121 | # Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control. 122 | #pixi.lock 123 | # Pixi creates a virtual environment in the .pixi directory, just like venv module creates one 124 | # in the .venv directory. It is recommended not to include this directory in version control. 125 | .pixi 126 | 127 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 128 | __pypackages__/ 129 | 130 | # Celery stuff 131 | celerybeat-schedule 132 | celerybeat.pid 133 | 134 | # SageMath parsed files 135 | *.sage.py 136 | 137 | # Environments 138 | .env 139 | .envrc 140 | .venv 141 | env/ 142 | venv/ 143 | ENV/ 144 | env.bak/ 145 | venv.bak/ 146 | 147 | # Spyder project settings 148 | .spyderproject 149 | .spyproject 150 | 151 | # Rope project settings 152 | .ropeproject 153 | 154 | # mkdocs documentation 155 | /site 156 | 157 | # mypy 158 | .mypy_cache/ 159 | .dmypy.json 160 | dmypy.json 161 | 162 | # Pyre type checker 163 | .pyre/ 164 | 165 | # pytype static type analyzer 166 | .pytype/ 167 | 168 | # Cython debug symbols 169 | cython_debug/ 170 | 171 | # PyCharm 172 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 173 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 174 | # and can be added to the global gitignore or merged into this file. For a more nuclear 175 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 176 | #.idea/ 177 | 178 | # Abstra 179 | # Abstra is an AI-powered process automation framework. 180 | # Ignore directories containing user credentials, local state, and settings. 181 | # Learn more at https://abstra.io/docs 182 | .abstra/ 183 | 184 | # Visual Studio Code 185 | # Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore 186 | # that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore 187 | # and can be added to the global gitignore or merged into this file. However, if you prefer, 188 | # you could uncomment the following to ignore the entire vscode folder 189 | # .vscode/ 190 | 191 | # Ruff stuff: 192 | .ruff_cache/ 193 | 194 | # PyPI configuration file 195 | .pypirc 196 | 197 | # Cursor 198 | # Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to 199 | # exclude from AI features like autocomplete and code analysis. Recommended for sensitive data 200 | # refer to https://docs.cursor.com/context/ignore-files 201 | .cursorignore 202 | .cursorindexingignore 203 | 204 | # Marimo 205 | marimo/_static/ 206 | marimo/_lsp/ 207 | __marimo__/ 208 | -------------------------------------------------------------------------------- /notebooks/generate_masks.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "30bf446d", 6 | "metadata": {}, 7 | "source": [ 8 | "This involves image processing step for the breast images obtained through mammography with CC and MLO views. Here I identify the mass area in each image and locate the landmarks for nipple and chest line(top, bottom) . \n", 9 | "\n", 10 | "And using landmarks in two pairs of images from same patients at different time points, I compute the affine matrix. The matrix is then applied in reverse go project maks for earlier mamograms. This helps to identify the locations which might have early signs." 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": null, 16 | "id": "cb1f97b9-7960-4ba0-b3df-33c3165ce087", 17 | "metadata": { 18 | "id": "e947c340-03a1-44c8-b831-b88812881f7b" 19 | }, 20 | "outputs": [], 21 | "source": [ 22 | "import csv\n", 23 | "import os\n", 24 | "import cv2\n", 25 | "\n", 26 | "import pickle\n", 27 | "import pandas as pd\n", 28 | "import numpy as np\n", 29 | "from PIL import Image\n", 30 | "from calendar import c\n", 31 | "from cmath import sqrt\n", 32 | "import matplotlib.pyplot as plt\n", 33 | "from aln_tools.gmic import common" 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "id": "8a3f1d80-ecdb-4d2f-81a3-f75f5e77e995", 39 | "metadata": {}, 40 | "source": [ 41 | "#### Find the biggest contour and draw the marker around the contour to find the mass. Identify landmarks for nipple and chest points" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": null, 47 | "id": "5bc09223-dc9c-419b-ab6d-64616cd98d50", 48 | "metadata": { 49 | "id": "5bc09223-dc9c-419b-ab6d-64616cd98d50" 50 | }, 51 | "outputs": [], 52 | "source": [ 53 | "def process_path(image_path, folder):\n", 54 | " paths = image_path.split(\"/\")\n", 55 | " return os.path.join(\"/\".join(paths[:-1]), folder, paths[-1])\n", 56 | "\n", 57 | "def find_contours(binary_image, image, polygon_image):\n", 58 | " y_max = 0\n", 59 | " y_min = 0\n", 60 | " max_area = 0\n", 61 | " biggest_contour = None\n", 62 | " # Scan the breast to see how many bows there are on the image\n", 63 | " # Find contours in the thresholded image\n", 64 | " contours, _ = cv2.findContours(binary_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)\n", 65 | " polygon_image = image.copy()\n", 66 | "\n", 67 | " # Iterate through the contours and draw polygons\n", 68 | " for contour_idx, contour in enumerate(contours):\n", 69 | " # Calculate the area of the polygon\n", 70 | " area = int(cv2.contourArea(contour))\n", 71 | "\n", 72 | " # Calculate the middle point of the polygon\n", 73 | " M = cv2.moments(contour)\n", 74 | " if M[\"m00\"] != 0:\n", 75 | " cx = int(M[\"m10\"] / M[\"m00\"])\n", 76 | " cy = int(M[\"m01\"] / M[\"m00\"])\n", 77 | " middle_point = (cx, cy)\n", 78 | " # Draw the middle point on the image\n", 79 | " #cv2.circle(polygon_image, middle_point, 5, (0, 0, 255), -1)\n", 80 | "\n", 81 | " if area >= 150:\n", 82 | " if area>=max_area:\n", 83 | " biggest_contour = contour\n", 84 | " max_area = area\n", 85 | "\n", 86 | " return biggest_contour\n", 87 | "\n", 88 | "def draw(biggest_contour, polygon_image, image, orig_path, view):\n", 89 | " # Draw the biggest polygon on the image\n", 90 | " epsilon = 0.001 * cv2.arcLength(biggest_contour, True)\n", 91 | " approx_polygon = cv2.approxPolyDP(biggest_contour, epsilon, True)\n", 92 | " cv2.polylines(polygon_image, [approx_polygon], isClosed=True, color=(0, 255, 0), thickness=2)\n", 93 | " # Calculate the area of the biggest polygon\n", 94 | " breast_volume = int(cv2.contourArea(biggest_contour))\n", 95 | "\n", 96 | " # Iterate through the contour points to find y_min and y_max\n", 97 | " y_min = float('inf') # Initialize to positive infinity\n", 98 | " y_max = -1 # Initialize to a negative value\n", 99 | " x_min = -1\n", 100 | " x_max = -1\n", 101 | " y_nipple = -1\n", 102 | " x_nipple = -1\n", 103 | "\n", 104 | " if view == \"RCC\":\n", 105 | " x_nipple = float('inf') # Initialize to positive infinity\n", 106 | " nipple_point = (x_nipple, y_nipple)\n", 107 | "\n", 108 | " for point in biggest_contour:\n", 109 | " x, y = point[0] # Extract x and y coordinates from the point\n", 110 | " if y < y_min:\n", 111 | " # print(y_min)\n", 112 | " y_min = y # Update y_min if a smaller y-coordinate is encountered\n", 113 | " x_min = x\n", 114 | " if y > y_max:\n", 115 | " y_max = y # Update y_max if a larger y-coordinate is encountered\n", 116 | " x_max = x\n", 117 | " if view == \"LCC\":\n", 118 | " if x > x_nipple:\n", 119 | " x_nipple = x\n", 120 | " y_nipple = y\n", 121 | " if view == \"RCC\":\n", 122 | " if x < x_nipple:\n", 123 | " x_nipple = x\n", 124 | " y_nipple = y\n", 125 | "\n", 126 | " # #Draw the nipple from the polygon\n", 127 | " # nipple = (x_nipple, y_nipple)\n", 128 | " # cv2.circle(polygon_image, nipple, 15, (255, 0, 0), -1)\n", 129 | "\n", 130 | " image_height = image.shape[0]\n", 131 | " image_width = image.shape[1]\n", 132 | " image_width_threshold = image_width - 10\n", 133 | " image_width_threshold = 10 if view == \"LCC\" else image_width_threshold\n", 134 | " image_width = 0 if view == \"LCC\" else image_width\n", 135 | "\n", 136 | " # If top coordinate is not on the side line but the bottom coordinate\n", 137 | " if (x_min != image_width):\n", 138 | " if (y_min == 0):\n", 139 | " y_min = float('inf') # Initialize to positive infinity\n", 140 | " x_min = -1\n", 141 | "\n", 142 | " for point in biggest_contour:\n", 143 | " x, y = point[0] # Extract x and y coordinates from the point\n", 144 | " if (y<=y_nipple and y>=10):\n", 145 | " if x> x_min:\n", 146 | " y_min = y # Update y_min if a smaller y-coordinate is encountered\n", 147 | " x_min = x\n", 148 | "\n", 149 | "\n", 150 | "\n", 151 | " # If bottom coordinate is not on the side line but the top coordinate is\n", 152 | " if (x_max != image_width):\n", 153 | " if (y_max == image_height - 1):\n", 154 | " y_max = y_nipple # Initialize to positive infinity\n", 155 | " x_max = x_nipple\n", 156 | "\n", 157 | " for point in biggest_contour:\n", 158 | " x, y = point[0] # Extract x and y coordinates from the point\n", 159 | " if (y >= y_nipple and y <= (image_height - 100) and x <= image_width_threshold):\n", 160 | " if (x >= x_max):\n", 161 | " y_max = y # Update y_max if a bigger y-coordinate is encountered\n", 162 | " x_max = x\n", 163 | " if view == \"LCC\":\n", 164 | " x_min = 0\n", 165 | "\n", 166 | " #Draw the nipple marker on the polygon\n", 167 | " nipple = (x_nipple, y_nipple)\n", 168 | " # print(f\"nipple: {nipple}\")\n", 169 | " cv2.circle(polygon_image, nipple, 15, (255, 0, 0), -1)\n", 170 | "\n", 171 | " #Draw the middle_point on side opposite of breast(chest wall)\n", 172 | " y_middle_point = int(0.5*y_min + 0.5*y_max)\n", 173 | " x_middle_point = x_min\n", 174 | " middle_point = (x_middle_point, y_middle_point)\n", 175 | " cv2.circle(polygon_image, middle_point, 15, (255, 0, 0), -1)\n", 176 | "\n", 177 | " #Draw the points on chest wall\n", 178 | " min_point = (x_min, y_min)\n", 179 | " max_point = (x_min, y_max)\n", 180 | " cv2.circle(polygon_image, min_point, 15, (255, 0, 0), -1)\n", 181 | " cv2.circle(polygon_image, max_point, 15, (255, 0, 0), -1)\n", 182 | "\n", 183 | " #Draw the side line\n", 184 | " cv2.line(polygon_image, min_point,max_point,(255, 0, 0),5)\n", 185 | " #Draw the center line\n", 186 | " cv2.line(polygon_image, middle_point,nipple,(255, 0, 0),5)\n", 187 | "\n", 188 | " # Calculate the angle in radians between the two points\n", 189 | " delta_x = nipple[0] - middle_point[0]\n", 190 | " delta_y = nipple[1] - middle_point[1]\n", 191 | " angle_rad = np.arctan2(delta_y, delta_x)\n", 192 | "\n", 193 | " # Convert the angle from radians to degrees\n", 194 | " angle_deg = int(np.degrees(angle_rad))\n", 195 | "\n", 196 | " # Ensure the angle is positive (0 to 360 degrees)\n", 197 | " if angle_deg < 0:\n", 198 | " angle_deg += 360\n", 199 | "\n", 200 | " #Calculate the dist until the nipple\n", 201 | " dist = int(abs(sqrt( (x_nipple - x_min)**2 + (y_nipple - y_middle_point)**2 )))\n", 202 | " #width = y_max-y_min\n", 203 | "\n", 204 | " # Write image to the file\n", 205 | " image_path = process_path(orig_path, \"processed\")\n", 206 | " cv2.imwrite(image_path, polygon_image)\n", 207 | "\n", 208 | " # print(f\"image shape: {image.shape}\")\n", 209 | " # return [x_min, y_min, x_max, y_max, x_nipple, y_nipple, x_middle_point, y_middle_point, dist, angle_deg, breast_volume]\n", 210 | " return [*min_point, *max_point, *nipple, *middle_point, image.shape[1], image.shape[0]]\n", 211 | "\n", 212 | "\n", 213 | "#50452008_L-CC\n", 214 | "def read_dim(view, image_path=None, image=None):\n", 215 | "\n", 216 | " orig_path = image_path\n", 217 | " gray_image = image\n", 218 | " if image is None:\n", 219 | " # Load the image\n", 220 | " image = cv2.imread(image_path)\n", 221 | "\n", 222 | " # Convert the cropped image to grayscale\n", 223 | " gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)\n", 224 | "\n", 225 | " # Threshold the grayscale image to create a binary image\n", 226 | " _, binary_image = cv2.threshold(gray_image, 0, 255, cv2.THRESH_BINARY)\n", 227 | "\n", 228 | " # Save the binary image\n", 229 | " image_path = process_path(orig_path, \"binary\")\n", 230 | " cv2.imwrite(image_path, binary_image)\n", 231 | " polygon_image = image.copy()\n", 232 | "\n", 233 | " if view in ['LMLO', 'RMLO']:\n", 234 | " return [1,1]\n", 235 | "\n", 236 | " biggest_contour = find_contours(binary_image, image, polygon_image)\n", 237 | " if view == \"LCC\":\n", 238 | " # print(f\"\\n view: {view}\")\n", 239 | " if biggest_contour is not None:\n", 240 | " return draw(biggest_contour, polygon_image, image, orig_path, view)\n", 241 | " if view == \"RCC\":\n", 242 | " # print(f\"\\n view: {view}\")\n", 243 | " if biggest_contour is not None:\n", 244 | " return draw(biggest_contour, polygon_image, image, orig_path, view)" 245 | ] 246 | }, 247 | { 248 | "cell_type": "code", 249 | "execution_count": null, 250 | "id": "cd2ed48b-8d38-402d-84a3-7dfbf8a892c2", 251 | "metadata": { 252 | "id": "cd2ed48b-8d38-402d-84a3-7dfbf8a892c2" 253 | }, 254 | "outputs": [], 255 | "source": [ 256 | "def process_view(image_path):\n", 257 | " view = ''\n", 258 | " if 'L-CC' in image_path:\n", 259 | " view = 'L-CC'\n", 260 | " if 'L-MLO' in image_path:\n", 261 | " view = 'L-MLO'\n", 262 | " if 'R-CC' in image_path:\n", 263 | " view = 'R-CC'\n", 264 | " if 'R-MLO' in image_path:\n", 265 | " view = 'R-MLO'\n", 266 | " return view\n", 267 | "\n", 268 | "def process_file(folder_path, images):\n", 269 | " processed_list = []\n", 270 | " for image_name in images:\n", 271 | " image_path = os.path.join(folder_path, image_name)\n", 272 | " view = process_view(image_path)\n", 273 | " view = view.replace(\"-\", \"\")\n", 274 | " meta_data = read_dim(view, image_path=image_path)\n", 275 | " data = [image_name] + list(meta_data)\n", 276 | " processed_list.append(data)\n", 277 | " return processed_list\n", 278 | "\n", 279 | "def save_processed_file(processed_file_path, processed_list):\n", 280 | " columns = [\"name\", \"chestline_top_x\", \"chestline_top_y\", \"chestline_bottom_x\", \"chestline_bottom_y\", \"nipple_x\", \"nipple_y\", \"chestline_center_x\", \"chestline_center_y\", \"image_w\", \"image_h\"]\n", 281 | " processed_data_df = pd.DataFrame(processed_list, columns=columns)\n", 282 | " processed_data_df.to_csv(processed_file_path)\n", 283 | "\n", 284 | "\n", 285 | "def get_best_center(file_name, pkl_file_path, laterality='L-CC'):\n", 286 | " with open(pkl_file_path, 'rb') as file:\n", 287 | " data = pickle.load(file)\n", 288 | "\n", 289 | " for exam in data:\n", 290 | " found = exam[laterality][0] == file_name\n", 291 | " if found:\n", 292 | " return exam['best_center'][laterality][0]" 293 | ] 294 | }, 295 | { 296 | "cell_type": "code", 297 | "execution_count": null, 298 | "id": "06894d75-411f-45f3-98ac-119ee1ccde29", 299 | "metadata": { 300 | "id": "06894d75-411f-45f3-98ac-119ee1ccde29" 301 | }, 302 | "outputs": [], 303 | "source": [ 304 | "base_path = \"/home/shashi/Desktop/aileen-health-docs/calcification/cropped\"\n", 305 | "\n", 306 | "\n", 307 | "# images\n", 308 | "images_path = os.path.join(base_path, \"images\")\n", 309 | "# images/test.csv\n", 310 | "data_file_path = os.path.join(images_path, \"test.csv\")\n", 311 | "# images/processed\n", 312 | "processed_images_path = os.path.join(images_path, \"processed\")\n", 313 | "# images/processed/processed.csv\n", 314 | "processed_file_path = os.path.join(processed_images_path, 'processed.csv')\n", 315 | "\n", 316 | "# images/resized\n", 317 | "resized_images_path = os.path.join(images_path, \"resized\")\n", 318 | "# images/resized/processed\n", 319 | "resized_processed_images_path = os.path.join(resized_images_path, \"processed\")\n", 320 | "# images/resized/processed/processed.csv\n", 321 | "resized_processed_file_path = os.path.join(resized_processed_images_path, 'processed.csv')\n", 322 | "# images/resized/generated\n", 323 | "resized_generated_images_path = os.path.join(resized_images_path, \"generated\")\n", 324 | "\n", 325 | "# segmentation\n", 326 | "segmentation_path = os.path.join(base_path, \"segmentation\")\n", 327 | "# segmentation/resized\n", 328 | "resized_segmentation_path = os.path.join(segmentation_path, \"resized\")\n", 329 | "# segmentation/resized/generated\n", 330 | "resized_generated_segmentation_path = os.path.join(resized_segmentation_path, \"generated\")\n" 331 | ] 332 | }, 333 | { 334 | "cell_type": "markdown", 335 | "id": "d374aa5a-22a4-460a-ac4c-335345143afc", 336 | "metadata": { 337 | "id": "d374aa5a-22a4-460a-ac4c-335345143afc" 338 | }, 339 | "source": [ 340 | "#### Utility functions to load, resize and process image." 341 | ] 342 | }, 343 | { 344 | "cell_type": "code", 345 | "execution_count": null, 346 | "id": "3a86de7b-f85f-4602-8b22-5cd20e004042", 347 | "metadata": { 348 | "id": "3a86de7b-f85f-4602-8b22-5cd20e004042" 349 | }, 350 | "outputs": [], 351 | "source": [ 352 | "def pad_image_and_mask_to_max_size(image, max_width, max_height, laterality, mask=None):\n", 353 | " \"\"\"Pads an image and its mask to the given max_width and max_height.\"\"\"\n", 354 | " h, w = image.shape[:2]\n", 355 | " # print(h, w, max_width, max_height)\n", 356 | " # Calculate padding required for width and height\n", 357 | " pad_w = max_width - w\n", 358 | " pad_h = max_height - h\n", 359 | " # print(\"pad_w, pad_h\", pad_w, pad_h)\n", 360 | "\n", 361 | " # Calculate padding for each side\n", 362 | " top, bottom = pad_h // 2, pad_h - pad_h // 2\n", 363 | " if laterality == \"L\":\n", 364 | " left, right = 0, pad_w\n", 365 | " if laterality == \"R\":\n", 366 | " left, right = pad_w, 0\n", 367 | "\n", 368 | " # print(\"top, bottom\", top, bottom)\n", 369 | " # print(\"left, right\", left, right)\n", 370 | "\n", 371 | " # Pad the image with black pixels\n", 372 | " padded_image = cv2.copyMakeBorder(image, top, bottom, left, right, cv2.BORDER_CONSTANT, value=[0, 0, 0])\n", 373 | " # Pad the mask with zeros (assuming the mask is binary or grayscale)\n", 374 | " padded_mask = None\n", 375 | " if mask is not None:\n", 376 | " padded_mask = cv2.copyMakeBorder(mask, top, bottom, left, right, cv2.BORDER_CONSTANT, value=[0])\n", 377 | " return padded_image, padded_mask\n", 378 | "\n", 379 | "def get_exam_years_of_patients(data):\n", 380 | " exam_years_of_patients = {}\n", 381 | " for _, patient in data.iterrows():\n", 382 | " patient_id = int(patient[\"anon_patientid\"])\n", 383 | " exam_year = int(patient[\"exam_year\"])\n", 384 | "\n", 385 | " if patient_id not in exam_years_of_patients:\n", 386 | " exam_years_of_patients[patient_id] = []\n", 387 | "\n", 388 | " if exam_year not in exam_years_of_patients[patient_id]:\n", 389 | " exam_years_of_patients[patient_id].append(exam_year)\n", 390 | " return exam_years_of_patients\n", 391 | "\n", 392 | "def get_common_image_size(patient, years, lt_view):\n", 393 | " max_width = 0\n", 394 | " max_height = 0\n", 395 | " images = []\n", 396 | " for year in years:\n", 397 | " image = cv2.imread(os.path.join(base_path, 'images', f'{patient}{year}_{lt_view}.png'))\n", 398 | " height, width = image.shape[:2]\n", 399 | " images.append(image)\n", 400 | "\n", 401 | " if max_height < height:\n", 402 | " max_height = height\n", 403 | " if max_width < width:\n", 404 | " max_width = width\n", 405 | " return images, max_height, max_width\n", 406 | "\n", 407 | "def get_mask_path(image_filename, folder=None):\n", 408 | " segmentation_file_path = \"segmentation\"\n", 409 | " if folder:\n", 410 | " segmentation_file_path = folder\n", 411 | "\n", 412 | " malignant_path = os.path.join(segmentation_file_path, image_filename.replace(\".png\", \"_malignant.png\"))\n", 413 | " benign_path = os.path.join(segmentation_file_path, image_filename.replace(\".png\", \"_benign.png\"))\n", 414 | " path = os.path.join(segmentation_file_path, image_filename)\n", 415 | "\n", 416 | " has_malignant = os.path.exists(malignant_path)\n", 417 | " has_benign = os.path.exists(benign_path)\n", 418 | " has_path = os.path.exists(path)\n", 419 | "\n", 420 | " if has_malignant:\n", 421 | " return malignant_path\n", 422 | "\n", 423 | " if has_benign:\n", 424 | " return benign_path\n", 425 | "\n", 426 | " if has_path:\n", 427 | " return path\n" 428 | ] 429 | }, 430 | { 431 | "cell_type": "code", 432 | "execution_count": null, 433 | "id": "f330043c-b661-427d-9909-318e92232e97", 434 | "metadata": {}, 435 | "outputs": [], 436 | "source": [ 437 | "# load last two images starting from the later years\n", 438 | "data_df = pd.read_csv(data_file_path)\n", 439 | "patients = get_exam_years_of_patients(data_df)\n", 440 | "lateralities = ['L-CC', 'L-MLO', 'R-MLO', 'R-CC']\n", 441 | "for patient_id, years in patients.items():\n", 442 | " for lt_view in lateralities:\n", 443 | " source_image_year = years[-1]\n", 444 | " laterality = lt_view[0]\n", 445 | " images, max_height, max_width = get_common_image_size(patient_id, years, lt_view)\n", 446 | " for image, year in zip(images, years): \n", 447 | " image_file_name = f\"{patient_id}{year}_{lt_view}.png\"\n", 448 | " mask_file_name = get_mask_path(image_file_name, segmentation_path)\n", 449 | " mask = None \n", 450 | " if mask_file_name:\n", 451 | " # print(mask_file_name)\n", 452 | " mask = cv2.imread(mask_file_name, cv2.IMREAD_GRAYSCALE)\n", 453 | " image_padded, mask_padded = pad_image_and_mask_to_max_size(image, max_width, max_height, laterality, mask)\n", 454 | " image_file_name = os.path.join(base_path, 'images/resized/', image_file_name)\n", 455 | " cv2.imwrite(image_file_name, image_padded)\n", 456 | " if mask_file_name:\n", 457 | " mask_file_name = mask_file_name.replace(\"segmentation\", \"segmentation/resized\")\n", 458 | " cv2.imwrite(mask_file_name, mask_padded)\n", 459 | " print(f\"Saving: {image_file_name} {mask_file_name}\")" 460 | ] 461 | }, 462 | { 463 | "cell_type": "markdown", 464 | "id": "67faea08-40a3-4f6f-afa0-5dbb8cad230f", 465 | "metadata": {}, 466 | "source": [ 467 | "#### Process masks to generate the marker points" 468 | ] 469 | }, 470 | { 471 | "cell_type": "code", 472 | "execution_count": null, 473 | "id": "de423293-1fa7-473c-af2b-b152f2249d9d", 474 | "metadata": {}, 475 | "outputs": [], 476 | "source": [ 477 | "# apply to all files\n", 478 | "images = os.listdir(images_path)\n", 479 | "images = [image for image in images if '.png' in image]\n", 480 | "processed_list = []\n", 481 | "for image_name in images:\n", 482 | " image_path = os.path.join(base_path, \"images/resized\", image_name)\n", 483 | " # print(image_path)\n", 484 | " \n", 485 | " view = process_view(image_path) \n", 486 | " view = view.replace(\"-\", \"\")\n", 487 | " meta_data = read_dim(view, image_path=image_path)\n", 488 | " # print(meta_data)\n", 489 | " data = [image_name] + list(meta_data)\n", 490 | " processed_list.append(data)\n", 491 | "\n", 492 | "\n", 493 | "# Save file\n", 494 | "save_processed_file(resized_processed_file_path, processed_list)" 495 | ] 496 | }, 497 | { 498 | "cell_type": "code", 499 | "execution_count": null, 500 | "id": "48c242e1-5cb2-4f25-a2a6-aa021717d3a2", 501 | "metadata": { 502 | "id": "48c242e1-5cb2-4f25-a2a6-aa021717d3a2", 503 | "outputId": "08764233-22d5-48ce-e2f2-5e4375bfa4e9" 504 | }, 505 | "outputs": [ 506 | { 507 | "name": "stdout", 508 | "output_type": "stream", 509 | "text": [ 510 | "loading file path: /home/shashi/Desktop/aileen-health-docs/calcification/cropped/images/resized/processed/processed.csv\n" 511 | ] 512 | }, 513 | { 514 | "data": { 515 | "text/html": [ 516 | "
\n", 517 | "\n", 530 | "\n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | "
Unnamed: 0namechestline_top_xchestline_top_ychestline_bottom_xchestline_bottom_ynipple_xnipple_ychestline_center_xchestline_center_yimage_wimage_h
0053992009_L-CC.png0460.02661.01520.01276.00.01353.01664.02718.0
1150452010_L-CC.png0790.02717.01667.01427.00.01398.01718.02812.0
2252882008_R-MLO.png11NaNNaNNaNNaNNaNNaNNaNNaN
3353992011_R-CC.png1605121605.02596.048.01293.01605.01304.01606.02630.0
441812013_L-CC.png0200.02612.01215.01264.00.01316.01266.02644.0
\n", 626 | "
" 627 | ], 628 | "text/plain": [ 629 | " Unnamed: 0 name chestline_top_x chestline_top_y \\\n", 630 | "0 0 53992009_L-CC.png 0 46 \n", 631 | "1 1 50452010_L-CC.png 0 79 \n", 632 | "2 2 52882008_R-MLO.png 1 1 \n", 633 | "3 3 53992011_R-CC.png 1605 12 \n", 634 | "4 4 1812013_L-CC.png 0 20 \n", 635 | "\n", 636 | " chestline_bottom_x chestline_bottom_y nipple_x nipple_y \\\n", 637 | "0 0.0 2661.0 1520.0 1276.0 \n", 638 | "1 0.0 2717.0 1667.0 1427.0 \n", 639 | "2 NaN NaN NaN NaN \n", 640 | "3 1605.0 2596.0 48.0 1293.0 \n", 641 | "4 0.0 2612.0 1215.0 1264.0 \n", 642 | "\n", 643 | " chestline_center_x chestline_center_y image_w image_h \n", 644 | "0 0.0 1353.0 1664.0 2718.0 \n", 645 | "1 0.0 1398.0 1718.0 2812.0 \n", 646 | "2 NaN NaN NaN NaN \n", 647 | "3 1605.0 1304.0 1606.0 2630.0 \n", 648 | "4 0.0 1316.0 1266.0 2644.0 " 649 | ] 650 | }, 651 | "execution_count": 9, 652 | "metadata": {}, 653 | "output_type": "execute_result" 654 | } 655 | ], 656 | "source": [ 657 | "print(f\"loading file path: {resized_processed_file_path}\")\n", 658 | "processed_data_df = pd.read_csv(resized_processed_file_path)\n", 659 | "processed_data_df.head()" 660 | ] 661 | }, 662 | { 663 | "cell_type": "markdown", 664 | "id": "855f75b7-0804-4fea-982b-80a3b3391434", 665 | "metadata": { 666 | "id": "855f75b7-0804-4fea-982b-80a3b3391434" 667 | }, 668 | "source": [ 669 | "# Generate mask" 670 | ] 671 | }, 672 | { 673 | "cell_type": "code", 674 | "execution_count": null, 675 | "id": "b88544f3-6441-4cda-99d7-067f501f0a3c", 676 | "metadata": { 677 | "id": "b88544f3-6441-4cda-99d7-067f501f0a3c" 678 | }, 679 | "outputs": [], 680 | "source": [ 681 | "from typing import List\n", 682 | "from collections import defaultdict\n", 683 | "reference_points = 3\n", 684 | "def get_affine_matrix(landmarks_image1: List[List[float]], landmarks_image2: List[List[float]]) -> np.ndarray:\n", 685 | " \"\"\"\n", 686 | " Calculate the affine transformation matrix from landmarks in two images.\n", 687 | "\n", 688 | " Parameters:\n", 689 | " landmarks_image1 (List[List[float]]): Coordinates of landmarks in the first image.\n", 690 | " landmarks_image2 (List[List[float]]): Corresponding coordinates of landmarks in the second image.\n", 691 | "\n", 692 | " Returns:\n", 693 | " np.ndarray: The affine transformation matrix.\n", 694 | "\n", 695 | " Raises:\n", 696 | " ValueError: If the input lists do not contain exactly three landmarks each.\n", 697 | " \"\"\"\n", 698 | " if len(landmarks_image1) != reference_points or len(landmarks_image2) != reference_points:\n", 699 | " raise ValueError(\"Both input lists must contain exactly three landmarks.\")\n", 700 | "\n", 701 | " # Convert lists to NumPy arrays\n", 702 | " landmarks_image1_np = np.array(landmarks_image1, dtype=np.float32)\n", 703 | " landmarks_image2_np = np.array(landmarks_image2, dtype=np.float32)\n", 704 | "\n", 705 | " # Compute the affine transformation matrix\n", 706 | " M = cv2.getAffineTransform(landmarks_image1_np, landmarks_image2_np)\n", 707 | "\n", 708 | " return M\n" 709 | ] 710 | }, 711 | { 712 | "cell_type": "code", 713 | "execution_count": null, 714 | "id": "6c45cbb2-8016-46b1-a5e4-9e58cc3c15ce", 715 | "metadata": { 716 | "id": "6c45cbb2-8016-46b1-a5e4-9e58cc3c15ce", 717 | "outputId": "c5b6ff5c-9629-4322-9f1f-f4c585e1477f" 718 | }, 719 | "outputs": [ 720 | { 721 | "name": "stdout", 722 | "output_type": "stream", 723 | "text": [ 724 | "image years: ['2013', '2011', '2010']\n", 725 | "mask years: ['2013']\n", 726 | "metadata years: [[(0, 20), (0.0, 2612.0), (1215.0, 1264.0), (0.0, 1316.0), (1266.0, 2644.0)], [(0, 47), (0.0, 2595.0), (1134.0, 1228.0), (0.0, 1321.0), (1266.0, 2644.0)], [(0, 76), (0.0, 2578.0), (999.0, 1058.0), (0.0, 1327.0), (1266.0, 2644.0)]]\n", 727 | "\n", 728 | "\n" 729 | ] 730 | } 731 | ], 732 | "source": [ 733 | "rows = processed_data_df[processed_data_df['name'].isin(images)].values.tolist()\n", 734 | "rows = processed_data_df.values.tolist()\n", 735 | "\n", 736 | "views = ['CC', 'MLO']\n", 737 | "lateralities = ['L', 'R']\n", 738 | "patient_images = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))\n", 739 | "patient_masks = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))\n", 740 | "patient_images_metadata = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))\n", 741 | "\n", 742 | "for row in rows:\n", 743 | " _, name, chestline_top_x, chestline_top_y, chestline_bottom_x, chestline_bottom_y, nipple_x, nipple_y, chestline_center_x, chestline_center_y, width, height = row\n", 744 | " prefix, suffix = name.split(\"_\")\n", 745 | " laterality, view = suffix[:-4].split(\"-\")\n", 746 | " patient_id = prefix[:-4]\n", 747 | " year = prefix[-4:]\n", 748 | "\n", 749 | " patient_images[patient_id][laterality][view].append(year)\n", 750 | " patient_images_metadata[patient_id][laterality][view].append([(chestline_top_x, chestline_top_y), (chestline_bottom_x, chestline_bottom_y), (nipple_x, nipple_y), (chestline_center_x, chestline_center_y), (width, height)])\n", 751 | " has_mask = get_mask_path(name, resized_segmentation_path)\n", 752 | " if has_mask:\n", 753 | " patient_masks[patient_id][laterality][view].append(year)\n", 754 | " # Debug prints (optional)\n", 755 | "print(\"image years:\", patient_images['181']['L']['CC'])\n", 756 | "print(\"mask years:\", patient_masks['181']['L']['CC'])\n", 757 | "print(\"metadata years:\", patient_images_metadata['181']['L']['CC'])\n", 758 | "print(\"\\n\")\n", 759 | "# print(patient_images)\n" 760 | ] 761 | }, 762 | { 763 | "cell_type": "code", 764 | "execution_count": null, 765 | "id": "467ceb32-e2b5-4636-9d80-f46c9b7155cc", 766 | "metadata": { 767 | "id": "467ceb32-e2b5-4636-9d80-f46c9b7155cc", 768 | "outputId": "b404e72e-e852-4f2f-8887-1007aaedaa4c", 769 | "scrolled": true 770 | }, 771 | "outputs": [ 772 | { 773 | "name": "stdout", 774 | "output_type": "stream", 775 | "text": [ 776 | "[2011, 2013, 2009, 2012] []\n", 777 | "[2009, 2012, 2011, 2013] [2013]\n", 778 | "year_with_masks:['2013'] years:['2009', '2012', '2011', '2013']\n", 779 | "year_index_with_mask:3\n" 780 | ] 781 | } 782 | ], 783 | "source": [ 784 | "def get_year_with_mask(years, years_with_masks):\n", 785 | " years = list(map(int, years))\n", 786 | " years_with_masks = list(map(int, years_with_masks))\n", 787 | " print(years, years_with_masks)\n", 788 | " for year_index, year in enumerate(years):\n", 789 | " if year in years_with_masks:\n", 790 | " return year_index\n", 791 | " return -1\n", 792 | "\n", 793 | "# Generate affine matrix the pair of images\n", 794 | "for patient_id, lateralities in patient_images.items():\n", 795 | " if patient_id not in ['2911']:\n", 796 | " continue\n", 797 | " for laterality, views in lateralities.items():\n", 798 | " for view, years in views.items():\n", 799 | " if view not in [\"CC\"]:\n", 800 | " continue\n", 801 | "\n", 802 | " year_with_masks = patient_masks[patient_id][laterality][view]\n", 803 | " year_index_with_mask = get_year_with_mask(years, year_with_masks)\n", 804 | " if year_index_with_mask < 0:\n", 805 | " continue\n", 806 | "\n", 807 | " print(f\"year_with_masks:{year_with_masks} years:{years}\")\n", 808 | " print(f\"year_index_with_mask:{year_index_with_mask}\")\n", 809 | "\n", 810 | " image_name = f\"{patient_id}{years[year_index_with_mask]}_{laterality}-{view}.png\"\n", 811 | " mask_path = get_mask_path(image_name, resized_segmentation_path)\n", 812 | " for year_index, year in enumerate(years):\n", 813 | "\n", 814 | " # print(f\"\\npatient_id: {patient_id}\")\n", 815 | " # print(f\"year: {year}\")\n", 816 | " # print(f\"years_with_masks: {year_with_masks}\")\n", 817 | " # print(f\"years_with_masks: {year_with_masks}\")\n", 818 | " # print(f\"year_index_with_mask: {year_index_with_mask}\")\n", 819 | "\n", 820 | " if year not in year_with_masks:\n", 821 | " mask1 = cv2.imread(mask_path)\n", 822 | " height, width = mask1.shape[:2]\n", 823 | "\n", 824 | " # images_metadata = patient_images_metadata[patient_id][laterality][view][:2]\n", 825 | " # landmarks = [landmark[:3] for landmark in images_metadata]\n", 826 | " # affine_m = get_affine_matrix(*landmarks)\n", 827 | "\n", 828 | " reference_image_landmarks = patient_images_metadata[patient_id][laterality][view][year_index_with_mask][:reference_points]\n", 829 | " image_landmarks = patient_images_metadata[patient_id][laterality][view][year_index][:reference_points]\n", 830 | " landmarks = [reference_image_landmarks, image_landmarks]\n", 831 | " affine_m = get_affine_matrix(*landmarks)\n", 832 | "\n", 833 | " #Generate mask based on affine matrix\n", 834 | " mask2 = cv2.warpAffine(mask1, affine_m, (width, height))\n", 835 | " *folders, basename = mask_path.split(\"/\")\n", 836 | " prefix, *suffix = basename.split(\"_\")\n", 837 | " prefix = prefix[:-4] + str(year)\n", 838 | " mask_filename = \"_\".join([prefix] + suffix)\n", 839 | "\n", 840 | " new_mask_filename = os.path.join(resized_generated_segmentation_path, mask_filename)\n", 841 | " # print(f\"Saving mask file: {new_mask_filename}\")\n", 842 | " cv2.imwrite(new_mask_filename, mask2)" 843 | ] 844 | }, 845 | { 846 | "cell_type": "markdown", 847 | "id": "d47b7a72-1ab8-4259-b4f7-f6e8704039cc", 848 | "metadata": { 849 | "id": "d47b7a72-1ab8-4259-b4f7-f6e8704039cc" 850 | }, 851 | "source": [ 852 | "# Save visualization with generated masks" 853 | ] 854 | }, 855 | { 856 | "cell_type": "code", 857 | "execution_count": null, 858 | "id": "3ba9fca5-0d2e-466c-9609-b976fedc7f3a", 859 | "metadata": { 860 | "id": "3ba9fca5-0d2e-466c-9609-b976fedc7f3a" 861 | }, 862 | "outputs": [], 863 | "source": [ 864 | "def center_point_of_mask(mask):\n", 865 | " # Find the coordinates of non-zero pixels\n", 866 | " indices = np.where(mask == 1)\n", 867 | "\n", 868 | " # Calculate the center (centroid) of the mask\n", 869 | " if indices[0].size > 0: # Ensure there are non-zero pixels\n", 870 | " center_y = np.mean(indices[0]) # Average of row indices\n", 871 | " center_x = np.mean(indices[1]) # Average of column indices\n", 872 | " return (int(center_x), int(center_y)) # Return as integer coordinates\n", 873 | " return None" 874 | ] 875 | }, 876 | { 877 | "cell_type": "code", 878 | "execution_count": null, 879 | "id": "91c22716-6305-4673-888a-f62fca630d44", 880 | "metadata": { 881 | "id": "91c22716-6305-4673-888a-f62fca630d44", 882 | "outputId": "021754c9-3523-4ed2-bb74-c5eb0e38a72a" 883 | }, 884 | "outputs": [ 885 | { 886 | "name": "stdout", 887 | "output_type": "stream", 888 | "text": [ 889 | "image years: ['2009', '2012', '2011', '2013']\n", 890 | "mask years: ['2013']\n", 891 | "metadata years: [[(0, 191), (0.0, 2987.0), (1444.0, 1802.0), (0.0, 1589.0), (1806.0, 3230.0)], [(0, 42), (0.0, 3154.0), (1660.0, 1773.0), (0.0, 1598.0), (1806.0, 3230.0)], [(0, 166), (0.0, 3103.0), (1558.0, 1536.0), (0.0, 1634.0), (1806.0, 3230.0)], [(0, 31), (0.0, 3179.0), (1755.0, 1773.0), (0.0, 1605.0), (1806.0, 3230.0)]]\n", 892 | "\n", 893 | "\n", 894 | "patient_id: 2911\n", 895 | "[2009, 2012, 2011, 2013] [2013]\n", 896 | "[2011, 2013, 2009, 2012] []\n" 897 | ] 898 | } 899 | ], 900 | "source": [ 901 | "from matplotlib.patches import Circle\n", 902 | "print(\"image years:\", patient_images['2911']['L']['CC'])\n", 903 | "print(\"mask years:\", patient_masks['2911']['L']['CC'])\n", 904 | "print(\"metadata years:\", patient_images_metadata['2911']['L']['CC'])\n", 905 | "print(\"\\n\")\n", 906 | "\n", 907 | "# Loop through patients and their exam years\n", 908 | "for patient_id, _ in patient_images.items():\n", 909 | " if patient_id not in ['2911']:\n", 910 | " continue\n", 911 | " print(f\"patient_id: {patient_id}\")\n", 912 | " # Iterate over laterality for each patient\n", 913 | " for laterality in [\"L-CC\", \"R-CC\"]:\n", 914 | " lt, view = laterality.split(\"-\")\n", 915 | " years = patient_images[patient_id][lt][view]\n", 916 | " year_with_masks = patient_masks[patient_id][lt][view]\n", 917 | " year_index_with_mask = get_year_with_mask(years, year_with_masks)\n", 918 | " # print(f\"year_with_masks:{year_with_masks} years:{years}\")\n", 919 | " # print(f\"year_index_with_mask:{year_index_with_mask}\")\n", 920 | "\n", 921 | " if year_index_with_mask < 0:\n", 922 | " continue\n", 923 | "\n", 924 | " # Create subplots for each year\n", 925 | " fig, axes = plt.subplots(1, len(years), figsize=(3 * len(years), 5))\n", 926 | "\n", 927 | " if len(years) == 1: # If there's only one year, ensure axes is treated as a list\n", 928 | " axes = [axes]\n", 929 | "\n", 930 | " for year_index, year in enumerate(years):\n", 931 | " image_filename = f\"{patient_id}{year}_{laterality}.png\"\n", 932 | "\n", 933 | " # Get mask path from two different sources\n", 934 | " full_mask_path = get_mask_path(image_filename, resized_segmentation_path)\n", 935 | " gen = \"\"\n", 936 | " if not full_mask_path:\n", 937 | " full_mask_path = get_mask_path(image_filename, resized_generated_segmentation_path)\n", 938 | " gen = \"-gen\"\n", 939 | "\n", 940 | " # Load and process the image\n", 941 | " full_image_path = os.path.join(resized_images_path, image_filename)\n", 942 | " image = Image.open(full_image_path).convert('L')\n", 943 | " image_np = np.array(image) / 255 # Normalize the image to [0, 1]\n", 944 | " # print(f\"\\n image shape: {image_np.shape}\")\n", 945 | " axes[year_index].imshow(image_np, cmap='gray')\n", 946 | "\n", 947 | " if full_mask_path:\n", 948 | " # Load and process the mask\n", 949 | " image_mask = Image.open(full_mask_path).convert('L')\n", 950 | " image_mask_np = np.array(image_mask) / 255 # Normalize the mask to [0, 1]\n", 951 | " color_mask = np.zeros((image_np.shape[0], image_np.shape[1], 3), dtype=np.uint8)\n", 952 | " color_mask[image_mask_np == 1] = [255, 0, 0] # Red for the mask\n", 953 | " non_zero_pixel_count = np.sum(image_mask_np == 1)\n", 954 | " mask_center = center_point_of_mask(image_mask_np)\n", 955 | " # print(f\"patient_id:{patient_id} mask_center:{mask_center}\")\n", 956 | "\n", 957 | " # Overlay the mask on the original image and display\n", 958 | " axes[year_index].imshow(color_mask, alpha=0.2) # Red overlay with transparency\n", 959 | " if non_zero_pixel_count == 1:\n", 960 | " circle = Circle(mask_center, radius=5, color='red', alpha=1.0, fill=True)\n", 961 | " axes[year_index].add_patch(circle)\n", 962 | "\n", 963 | " landmarks = patient_images_metadata[patient_id][lt][view][year_index][:4]\n", 964 | " # print(f\"year_index:{year_index} year:{year} landmarks:{landmarks}\")\n", 965 | " for landmark in landmarks:\n", 966 | " # print(f\"{year}-L-CC\", *landmark)\n", 967 | " axes[year_index].scatter(*landmark, color='blue', s=5)\n", 968 | " axes[year_index].plot(\n", 969 | " [mask_center[0], landmark[0]], # X coordinates\n", 970 | " [mask_center[1], landmark[1]], # Y coordinates\n", 971 | " color='yellow', # Line color\n", 972 | " linewidth=0.2 # Line width\n", 973 | " )\n", 974 | " distance = np.sqrt((mask_center[0] - landmark[0]) ** 2 + (mask_center[1] - landmark[1]) ** 2)\n", 975 | " axes[year_index].text(\n", 976 | " (mask_center[0] + landmark[0]) / 2, # X coordinate for the text (midpoint)\n", 977 | " (mask_center[1] + landmark[1]) / 2, # Y coordinate for the text (midpoint)\n", 978 | " f'{distance:.0f} px', # Text to display\n", 979 | " color='white', # Text color\n", 980 | " fontsize=6, # Font size\n", 981 | " bbox=dict(facecolor='black', alpha=0.5) # Text background for better readability\n", 982 | " )\n", 983 | "\n", 984 | " axes[year_index].set_title(f'{year}-{laterality}{gen}')\n", 985 | " axes[year_index].axis('off')\n", 986 | "\n", 987 | " # Save the entire row for this patient and laterality\n", 988 | " row_filename = os.path.join(resized_generated_images_path, \"results\", f\"{patient_id}{year}_{laterality}.png\")\n", 989 | " # print(f\"Saved {row_filename}\")\n", 990 | " plt.subplots_adjust(left=0.05, right=0.95, top=0.9, bottom=0.1)\n", 991 | " plt.savefig(row_filename, bbox_inches=\"tight\", dpi=1000)\n", 992 | " plt.close(fig) # Close the figure after saving" 993 | ] 994 | } 995 | ], 996 | "metadata": { 997 | "colab": { 998 | "provenance": [] 999 | }, 1000 | "kernelspec": { 1001 | "display_name": "Python 3 (ipykernel)", 1002 | "language": "python", 1003 | "name": "python3" 1004 | }, 1005 | "language_info": { 1006 | "codemirror_mode": { 1007 | "name": "ipython", 1008 | "version": 3 1009 | }, 1010 | "file_extension": ".py", 1011 | "mimetype": "text/x-python", 1012 | "name": "python", 1013 | "nbconvert_exporter": "python", 1014 | "pygments_lexer": "ipython3", 1015 | "version": "3.12.3" 1016 | } 1017 | }, 1018 | "nbformat": 4, 1019 | "nbformat_minor": 5 1020 | } 1021 | --------------------------------------------------------------------------------