├── README.md
├── LICENSE
├── .gitignore
└── notebooks
    └── generate_masks.ipynb


/README.md:
--------------------------------------------------------------------------------
1 | # data-processing-work
2 | My work in data processing
3 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2025 Shashi Gharti
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[codz]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py.cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # UV
 98 | #   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #uv.lock
102 | 
103 | # poetry
104 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
106 | #   commonly ignored for libraries.
107 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108 | #poetry.lock
109 | #poetry.toml
110 | 
111 | # pdm
112 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
113 | #   pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
114 | #   https://pdm-project.org/en/latest/usage/project/#working-with-version-control
115 | #pdm.lock
116 | #pdm.toml
117 | .pdm-python
118 | .pdm-build/
119 | 
120 | # pixi
121 | #   Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
122 | #pixi.lock
123 | #   Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
124 | #   in the .venv directory. It is recommended not to include this directory in version control.
125 | .pixi
126 | 
127 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
128 | __pypackages__/
129 | 
130 | # Celery stuff
131 | celerybeat-schedule
132 | celerybeat.pid
133 | 
134 | # SageMath parsed files
135 | *.sage.py
136 | 
137 | # Environments
138 | .env
139 | .envrc
140 | .venv
141 | env/
142 | venv/
143 | ENV/
144 | env.bak/
145 | venv.bak/
146 | 
147 | # Spyder project settings
148 | .spyderproject
149 | .spyproject
150 | 
151 | # Rope project settings
152 | .ropeproject
153 | 
154 | # mkdocs documentation
155 | /site
156 | 
157 | # mypy
158 | .mypy_cache/
159 | .dmypy.json
160 | dmypy.json
161 | 
162 | # Pyre type checker
163 | .pyre/
164 | 
165 | # pytype static type analyzer
166 | .pytype/
167 | 
168 | # Cython debug symbols
169 | cython_debug/
170 | 
171 | # PyCharm
172 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
173 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
174 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
175 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
176 | #.idea/
177 | 
178 | # Abstra
179 | # Abstra is an AI-powered process automation framework.
180 | # Ignore directories containing user credentials, local state, and settings.
181 | # Learn more at https://abstra.io/docs
182 | .abstra/
183 | 
184 | # Visual Studio Code
185 | #  Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore 
186 | #  that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
187 | #  and can be added to the global gitignore or merged into this file. However, if you prefer, 
188 | #  you could uncomment the following to ignore the entire vscode folder
189 | # .vscode/
190 | 
191 | # Ruff stuff:
192 | .ruff_cache/
193 | 
194 | # PyPI configuration file
195 | .pypirc
196 | 
197 | # Cursor
198 | #  Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
199 | #  exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
200 | #  refer to https://docs.cursor.com/context/ignore-files
201 | .cursorignore
202 | .cursorindexingignore
203 | 
204 | # Marimo
205 | marimo/_static/
206 | marimo/_lsp/
207 | __marimo__/
208 | 


--------------------------------------------------------------------------------
/notebooks/generate_masks.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "markdown",
   5 |    "id": "30bf446d",
   6 |    "metadata": {},
   7 |    "source": [
   8 |     "This involves image processing step for the breast images obtained through mammography with CC and MLO views. Here I identify the mass area in each image and  locate the landmarks for nipple and chest line(top, bottom) . \n",
   9 |     "\n",
  10 |     "And using landmarks in two pairs of images from same patients at different time points, I compute the affine matrix. The matrix is then applied in reverse go project maks for earlier mamograms. This helps to identify the locations which might have early signs."
  11 |    ]
  12 |   },
  13 |   {
  14 |    "cell_type": "code",
  15 |    "execution_count": null,
  16 |    "id": "cb1f97b9-7960-4ba0-b3df-33c3165ce087",
  17 |    "metadata": {
  18 |     "id": "e947c340-03a1-44c8-b831-b88812881f7b"
  19 |    },
  20 |    "outputs": [],
  21 |    "source": [
  22 |     "import csv\n",
  23 |     "import os\n",
  24 |     "import cv2\n",
  25 |     "\n",
  26 |     "import pickle\n",
  27 |     "import pandas as pd\n",
  28 |     "import numpy as np\n",
  29 |     "from PIL import Image\n",
  30 |     "from calendar import c\n",
  31 |     "from cmath import sqrt\n",
  32 |     "import matplotlib.pyplot as plt\n",
  33 |     "from aln_tools.gmic import common"
  34 |    ]
  35 |   },
  36 |   {
  37 |    "cell_type": "markdown",
  38 |    "id": "8a3f1d80-ecdb-4d2f-81a3-f75f5e77e995",
  39 |    "metadata": {},
  40 |    "source": [
  41 |     "#### Find the biggest contour and draw the marker around the contour to find the mass. Identify landmarks for nipple and chest points"
  42 |    ]
  43 |   },
  44 |   {
  45 |    "cell_type": "code",
  46 |    "execution_count": null,
  47 |    "id": "5bc09223-dc9c-419b-ab6d-64616cd98d50",
  48 |    "metadata": {
  49 |     "id": "5bc09223-dc9c-419b-ab6d-64616cd98d50"
  50 |    },
  51 |    "outputs": [],
  52 |    "source": [
  53 |     "def process_path(image_path, folder):\n",
  54 |     "    paths = image_path.split(\"/\")\n",
  55 |     "    return os.path.join(\"/\".join(paths[:-1]), folder, paths[-1])\n",
  56 |     "\n",
  57 |     "def find_contours(binary_image, image, polygon_image):\n",
  58 |     "    y_max = 0\n",
  59 |     "    y_min = 0\n",
  60 |     "    max_area = 0\n",
  61 |     "    biggest_contour = None\n",
  62 |     "    # Scan the breast to see how many bows there are on the image\n",
  63 |     "    # Find contours in the thresholded image\n",
  64 |     "    contours, _ = cv2.findContours(binary_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)\n",
  65 |     "    polygon_image = image.copy()\n",
  66 |     "\n",
  67 |     "    # Iterate through the contours and draw polygons\n",
  68 |     "    for contour_idx, contour in enumerate(contours):\n",
  69 |     "        # Calculate the area of the polygon\n",
  70 |     "        area = int(cv2.contourArea(contour))\n",
  71 |     "\n",
  72 |     "        # Calculate the middle point of the polygon\n",
  73 |     "        M = cv2.moments(contour)\n",
  74 |     "        if M[\"m00\"] != 0:\n",
  75 |     "            cx = int(M[\"m10\"] / M[\"m00\"])\n",
  76 |     "            cy = int(M[\"m01\"] / M[\"m00\"])\n",
  77 |     "            middle_point = (cx, cy)\n",
  78 |     "            # Draw the middle point on the image\n",
  79 |     "            #cv2.circle(polygon_image, middle_point, 5, (0, 0, 255), -1)\n",
  80 |     "\n",
  81 |     "        if area >= 150:\n",
  82 |     "            if area>=max_area:\n",
  83 |     "                biggest_contour  = contour\n",
  84 |     "                max_area = area\n",
  85 |     "\n",
  86 |     "    return biggest_contour\n",
  87 |     "\n",
  88 |     "def draw(biggest_contour, polygon_image, image, orig_path, view):\n",
  89 |     "    # Draw the biggest polygon on the image\n",
  90 |     "    epsilon = 0.001 * cv2.arcLength(biggest_contour, True)\n",
  91 |     "    approx_polygon = cv2.approxPolyDP(biggest_contour, epsilon, True)\n",
  92 |     "    cv2.polylines(polygon_image, [approx_polygon], isClosed=True, color=(0, 255, 0), thickness=2)\n",
  93 |     "    # Calculate the area of the biggest polygon\n",
  94 |     "    breast_volume = int(cv2.contourArea(biggest_contour))\n",
  95 |     "\n",
  96 |     "    # Iterate through the contour points to find y_min and y_max\n",
  97 |     "    y_min = float('inf')  # Initialize to positive infinity\n",
  98 |     "    y_max = -1  # Initialize to a negative value\n",
  99 |     "    x_min = -1\n",
 100 |     "    x_max = -1\n",
 101 |     "    y_nipple = -1\n",
 102 |     "    x_nipple = -1\n",
 103 |     "\n",
 104 |     "    if view == \"RCC\":\n",
 105 |     "        x_nipple = float('inf') # Initialize to positive infinity\n",
 106 |     "        nipple_point = (x_nipple, y_nipple)\n",
 107 |     "\n",
 108 |     "    for point in biggest_contour:\n",
 109 |     "        x, y = point[0]  # Extract x and y coordinates from the point\n",
 110 |     "        if y < y_min:\n",
 111 |     "            # print(y_min)\n",
 112 |     "            y_min = y  # Update y_min if a smaller y-coordinate is encountered\n",
 113 |     "            x_min = x\n",
 114 |     "        if y > y_max:\n",
 115 |     "            y_max = y  # Update y_max if a larger y-coordinate is encountered\n",
 116 |     "            x_max = x\n",
 117 |     "        if view == \"LCC\":\n",
 118 |     "            if x > x_nipple:\n",
 119 |     "                x_nipple = x\n",
 120 |     "                y_nipple = y\n",
 121 |     "        if view == \"RCC\":\n",
 122 |     "            if x < x_nipple:\n",
 123 |     "                x_nipple = x\n",
 124 |     "                y_nipple = y\n",
 125 |     "\n",
 126 |     "    # #Draw the nipple from the polygon\n",
 127 |     "    # nipple = (x_nipple, y_nipple)\n",
 128 |     "    # cv2.circle(polygon_image, nipple, 15, (255, 0, 0), -1)\n",
 129 |     "\n",
 130 |     "    image_height = image.shape[0]\n",
 131 |     "    image_width = image.shape[1]\n",
 132 |     "    image_width_threshold = image_width - 10\n",
 133 |     "    image_width_threshold = 10 if view == \"LCC\" else image_width_threshold\n",
 134 |     "    image_width = 0 if view == \"LCC\" else image_width\n",
 135 |     "\n",
 136 |     "    # If top coordinate is not on the side line but the bottom coordinate\n",
 137 |     "    if (x_min != image_width):\n",
 138 |     "        if (y_min == 0):\n",
 139 |     "            y_min = float('inf')  # Initialize to positive infinity\n",
 140 |     "            x_min = -1\n",
 141 |     "\n",
 142 |     "            for point in biggest_contour:\n",
 143 |     "                x, y = point[0]  # Extract x and y coordinates from the point\n",
 144 |     "                if (y<=y_nipple and y>=10):\n",
 145 |     "                    if x> x_min:\n",
 146 |     "                        y_min = y  # Update y_min if a smaller y-coordinate is encountered\n",
 147 |     "                        x_min = x\n",
 148 |     "\n",
 149 |     "\n",
 150 |     "\n",
 151 |     "    # If bottom coordinate is not on the side line but the top coordinate is\n",
 152 |     "    if (x_max != image_width):\n",
 153 |     "        if (y_max == image_height - 1):\n",
 154 |     "            y_max = y_nipple # Initialize to positive infinity\n",
 155 |     "            x_max = x_nipple\n",
 156 |     "\n",
 157 |     "            for point in biggest_contour:\n",
 158 |     "                x, y = point[0]  # Extract x and y coordinates from the point\n",
 159 |     "                if (y >= y_nipple and y <= (image_height - 100) and x <= image_width_threshold):\n",
 160 |     "                    if (x >= x_max):\n",
 161 |     "                        y_max = y  # Update y_max if a bigger y-coordinate is encountered\n",
 162 |     "                        x_max = x\n",
 163 |     "    if view == \"LCC\":\n",
 164 |     "       x_min = 0\n",
 165 |     "\n",
 166 |     "    #Draw the nipple marker on the polygon\n",
 167 |     "    nipple = (x_nipple, y_nipple)\n",
 168 |     "    # print(f\"nipple: {nipple}\")\n",
 169 |     "    cv2.circle(polygon_image, nipple, 15, (255, 0, 0), -1)\n",
 170 |     "\n",
 171 |     "    #Draw the middle_point on side opposite of breast(chest wall)\n",
 172 |     "    y_middle_point = int(0.5*y_min + 0.5*y_max)\n",
 173 |     "    x_middle_point = x_min\n",
 174 |     "    middle_point = (x_middle_point, y_middle_point)\n",
 175 |     "    cv2.circle(polygon_image, middle_point, 15, (255, 0, 0), -1)\n",
 176 |     "\n",
 177 |     "    #Draw the points on chest wall\n",
 178 |     "    min_point = (x_min, y_min)\n",
 179 |     "    max_point = (x_min, y_max)\n",
 180 |     "    cv2.circle(polygon_image, min_point, 15, (255, 0, 0), -1)\n",
 181 |     "    cv2.circle(polygon_image, max_point, 15, (255, 0, 0), -1)\n",
 182 |     "\n",
 183 |     "    #Draw the side line\n",
 184 |     "    cv2.line(polygon_image, min_point,max_point,(255, 0, 0),5)\n",
 185 |     "    #Draw the center line\n",
 186 |     "    cv2.line(polygon_image, middle_point,nipple,(255, 0, 0),5)\n",
 187 |     "\n",
 188 |     "    # Calculate the angle in radians between the two points\n",
 189 |     "    delta_x = nipple[0] - middle_point[0]\n",
 190 |     "    delta_y = nipple[1] - middle_point[1]\n",
 191 |     "    angle_rad = np.arctan2(delta_y, delta_x)\n",
 192 |     "\n",
 193 |     "    # Convert the angle from radians to degrees\n",
 194 |     "    angle_deg = int(np.degrees(angle_rad))\n",
 195 |     "\n",
 196 |     "    # Ensure the angle is positive (0 to 360 degrees)\n",
 197 |     "    if angle_deg < 0:\n",
 198 |     "        angle_deg += 360\n",
 199 |     "\n",
 200 |     "    #Calculate the dist until the nipple\n",
 201 |     "    dist = int(abs(sqrt( (x_nipple - x_min)**2 + (y_nipple - y_middle_point)**2 )))\n",
 202 |     "    #width = y_max-y_min\n",
 203 |     "\n",
 204 |     "    # Write image to the file\n",
 205 |     "    image_path = process_path(orig_path, \"processed\")\n",
 206 |     "    cv2.imwrite(image_path,  polygon_image)\n",
 207 |     "\n",
 208 |     "    # print(f\"image shape: {image.shape}\")\n",
 209 |     "    # return [x_min, y_min, x_max, y_max, x_nipple, y_nipple, x_middle_point, y_middle_point, dist, angle_deg, breast_volume]\n",
 210 |     "    return [*min_point, *max_point, *nipple, *middle_point, image.shape[1], image.shape[0]]\n",
 211 |     "\n",
 212 |     "\n",
 213 |     "#50452008_L-CC\n",
 214 |     "def read_dim(view, image_path=None, image=None):\n",
 215 |     "\n",
 216 |     "    orig_path = image_path\n",
 217 |     "    gray_image = image\n",
 218 |     "    if image is None:\n",
 219 |     "        # Load the image\n",
 220 |     "        image = cv2.imread(image_path)\n",
 221 |     "\n",
 222 |     "        # Convert the cropped image to grayscale\n",
 223 |     "        gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)\n",
 224 |     "\n",
 225 |     "    # Threshold the grayscale image to create a binary image\n",
 226 |     "    _, binary_image = cv2.threshold(gray_image, 0, 255, cv2.THRESH_BINARY)\n",
 227 |     "\n",
 228 |     "    # Save the binary image\n",
 229 |     "    image_path = process_path(orig_path, \"binary\")\n",
 230 |     "    cv2.imwrite(image_path, binary_image)\n",
 231 |     "    polygon_image = image.copy()\n",
 232 |     "\n",
 233 |     "    if view in ['LMLO', 'RMLO']:\n",
 234 |     "        return [1,1]\n",
 235 |     "\n",
 236 |     "    biggest_contour = find_contours(binary_image, image, polygon_image)\n",
 237 |     "    if view == \"LCC\":\n",
 238 |     "        # print(f\"\\n view: {view}\")\n",
 239 |     "        if biggest_contour is not None:\n",
 240 |     "            return draw(biggest_contour, polygon_image, image, orig_path, view)\n",
 241 |     "    if view == \"RCC\":\n",
 242 |     "        # print(f\"\\n view: {view}\")\n",
 243 |     "        if biggest_contour is not None:\n",
 244 |     "            return draw(biggest_contour, polygon_image, image, orig_path, view)"
 245 |    ]
 246 |   },
 247 |   {
 248 |    "cell_type": "code",
 249 |    "execution_count": null,
 250 |    "id": "cd2ed48b-8d38-402d-84a3-7dfbf8a892c2",
 251 |    "metadata": {
 252 |     "id": "cd2ed48b-8d38-402d-84a3-7dfbf8a892c2"
 253 |    },
 254 |    "outputs": [],
 255 |    "source": [
 256 |     "def process_view(image_path):\n",
 257 |     "    view = ''\n",
 258 |     "    if 'L-CC' in image_path:\n",
 259 |     "        view = 'L-CC'\n",
 260 |     "    if 'L-MLO' in image_path:\n",
 261 |     "        view = 'L-MLO'\n",
 262 |     "    if 'R-CC' in image_path:\n",
 263 |     "        view = 'R-CC'\n",
 264 |     "    if 'R-MLO' in image_path:\n",
 265 |     "        view = 'R-MLO'\n",
 266 |     "    return view\n",
 267 |     "\n",
 268 |     "def process_file(folder_path, images):\n",
 269 |     "    processed_list = []\n",
 270 |     "    for image_name in images:\n",
 271 |     "        image_path = os.path.join(folder_path, image_name)\n",
 272 |     "        view = process_view(image_path)\n",
 273 |     "        view = view.replace(\"-\", \"\")\n",
 274 |     "        meta_data = read_dim(view, image_path=image_path)\n",
 275 |     "        data = [image_name] + list(meta_data)\n",
 276 |     "        processed_list.append(data)\n",
 277 |     "    return processed_list\n",
 278 |     "\n",
 279 |     "def save_processed_file(processed_file_path, processed_list):\n",
 280 |     "    columns = [\"name\", \"chestline_top_x\", \"chestline_top_y\", \"chestline_bottom_x\", \"chestline_bottom_y\", \"nipple_x\", \"nipple_y\", \"chestline_center_x\", \"chestline_center_y\", \"image_w\", \"image_h\"]\n",
 281 |     "    processed_data_df = pd.DataFrame(processed_list, columns=columns)\n",
 282 |     "    processed_data_df.to_csv(processed_file_path)\n",
 283 |     "\n",
 284 |     "\n",
 285 |     "def get_best_center(file_name, pkl_file_path, laterality='L-CC'):\n",
 286 |     "    with open(pkl_file_path, 'rb') as file:\n",
 287 |     "        data = pickle.load(file)\n",
 288 |     "\n",
 289 |     "    for exam in data:\n",
 290 |     "        found = exam[laterality][0] == file_name\n",
 291 |     "        if found:\n",
 292 |     "            return exam['best_center'][laterality][0]"
 293 |    ]
 294 |   },
 295 |   {
 296 |    "cell_type": "code",
 297 |    "execution_count": null,
 298 |    "id": "06894d75-411f-45f3-98ac-119ee1ccde29",
 299 |    "metadata": {
 300 |     "id": "06894d75-411f-45f3-98ac-119ee1ccde29"
 301 |    },
 302 |    "outputs": [],
 303 |    "source": [
 304 |     "base_path = \"/home/shashi/Desktop/aileen-health-docs/calcification/cropped\"\n",
 305 |     "\n",
 306 |     "\n",
 307 |     "# images\n",
 308 |     "images_path = os.path.join(base_path, \"images\")\n",
 309 |     "# images/test.csv\n",
 310 |     "data_file_path = os.path.join(images_path, \"test.csv\")\n",
 311 |     "# images/processed\n",
 312 |     "processed_images_path = os.path.join(images_path, \"processed\")\n",
 313 |     "# images/processed/processed.csv\n",
 314 |     "processed_file_path = os.path.join(processed_images_path, 'processed.csv')\n",
 315 |     "\n",
 316 |     "# images/resized\n",
 317 |     "resized_images_path = os.path.join(images_path, \"resized\")\n",
 318 |     "# images/resized/processed\n",
 319 |     "resized_processed_images_path = os.path.join(resized_images_path, \"processed\")\n",
 320 |     "# images/resized/processed/processed.csv\n",
 321 |     "resized_processed_file_path = os.path.join(resized_processed_images_path, 'processed.csv')\n",
 322 |     "# images/resized/generated\n",
 323 |     "resized_generated_images_path = os.path.join(resized_images_path, \"generated\")\n",
 324 |     "\n",
 325 |     "# segmentation\n",
 326 |     "segmentation_path = os.path.join(base_path, \"segmentation\")\n",
 327 |     "# segmentation/resized\n",
 328 |     "resized_segmentation_path = os.path.join(segmentation_path, \"resized\")\n",
 329 |     "# segmentation/resized/generated\n",
 330 |     "resized_generated_segmentation_path = os.path.join(resized_segmentation_path, \"generated\")\n"
 331 |    ]
 332 |   },
 333 |   {
 334 |    "cell_type": "markdown",
 335 |    "id": "d374aa5a-22a4-460a-ac4c-335345143afc",
 336 |    "metadata": {
 337 |     "id": "d374aa5a-22a4-460a-ac4c-335345143afc"
 338 |    },
 339 |    "source": [
 340 |     "#### Utility functions to load, resize and process image."
 341 |    ]
 342 |   },
 343 |   {
 344 |    "cell_type": "code",
 345 |    "execution_count": null,
 346 |    "id": "3a86de7b-f85f-4602-8b22-5cd20e004042",
 347 |    "metadata": {
 348 |     "id": "3a86de7b-f85f-4602-8b22-5cd20e004042"
 349 |    },
 350 |    "outputs": [],
 351 |    "source": [
 352 |     "def pad_image_and_mask_to_max_size(image, max_width, max_height, laterality, mask=None):\n",
 353 |     "    \"\"\"Pads an image and its mask to the given max_width and max_height.\"\"\"\n",
 354 |     "    h, w = image.shape[:2]\n",
 355 |     "    # print(h, w, max_width, max_height)\n",
 356 |     "    # Calculate padding required for width and height\n",
 357 |     "    pad_w = max_width - w\n",
 358 |     "    pad_h = max_height - h\n",
 359 |     "    # print(\"pad_w, pad_h\", pad_w, pad_h)\n",
 360 |     "\n",
 361 |     "    # Calculate padding for each side\n",
 362 |     "    top, bottom = pad_h // 2, pad_h - pad_h // 2\n",
 363 |     "    if laterality == \"L\":\n",
 364 |     "        left, right = 0, pad_w\n",
 365 |     "    if laterality == \"R\":\n",
 366 |     "        left, right = pad_w, 0\n",
 367 |     "\n",
 368 |     "    # print(\"top, bottom\", top, bottom)\n",
 369 |     "    # print(\"left, right\", left, right)\n",
 370 |     "\n",
 371 |     "    # Pad the image with black pixels\n",
 372 |     "    padded_image = cv2.copyMakeBorder(image, top, bottom, left, right, cv2.BORDER_CONSTANT, value=[0, 0, 0])\n",
 373 |     "    # Pad the mask with zeros (assuming the mask is binary or grayscale)\n",
 374 |     "    padded_mask = None\n",
 375 |     "    if mask is not None:\n",
 376 |     "        padded_mask = cv2.copyMakeBorder(mask, top, bottom, left, right, cv2.BORDER_CONSTANT, value=[0])\n",
 377 |     "    return padded_image, padded_mask\n",
 378 |     "\n",
 379 |     "def get_exam_years_of_patients(data):\n",
 380 |     "    exam_years_of_patients = {}\n",
 381 |     "    for _, patient in data.iterrows():\n",
 382 |     "        patient_id = int(patient[\"anon_patientid\"])\n",
 383 |     "        exam_year = int(patient[\"exam_year\"])\n",
 384 |     "\n",
 385 |     "        if patient_id not in exam_years_of_patients:\n",
 386 |     "            exam_years_of_patients[patient_id] = []\n",
 387 |     "\n",
 388 |     "        if exam_year not in exam_years_of_patients[patient_id]:\n",
 389 |     "            exam_years_of_patients[patient_id].append(exam_year)\n",
 390 |     "    return exam_years_of_patients\n",
 391 |     "\n",
 392 |     "def get_common_image_size(patient, years, lt_view):\n",
 393 |     "    max_width = 0\n",
 394 |     "    max_height = 0\n",
 395 |     "    images = []\n",
 396 |     "    for year in years:\n",
 397 |     "        image = cv2.imread(os.path.join(base_path, 'images', f'{patient}{year}_{lt_view}.png'))\n",
 398 |     "        height, width = image.shape[:2]\n",
 399 |     "        images.append(image)\n",
 400 |     "\n",
 401 |     "        if max_height < height:\n",
 402 |     "            max_height = height\n",
 403 |     "        if max_width < width:\n",
 404 |     "            max_width = width\n",
 405 |     "    return images, max_height, max_width\n",
 406 |     "\n",
 407 |     "def get_mask_path(image_filename, folder=None):\n",
 408 |     "    segmentation_file_path = \"segmentation\"\n",
 409 |     "    if folder:\n",
 410 |     "        segmentation_file_path = folder\n",
 411 |     "\n",
 412 |     "    malignant_path = os.path.join(segmentation_file_path, image_filename.replace(\".png\", \"_malignant.png\"))\n",
 413 |     "    benign_path = os.path.join(segmentation_file_path, image_filename.replace(\".png\", \"_benign.png\"))\n",
 414 |     "    path = os.path.join(segmentation_file_path, image_filename)\n",
 415 |     "\n",
 416 |     "    has_malignant = os.path.exists(malignant_path)\n",
 417 |     "    has_benign = os.path.exists(benign_path)\n",
 418 |     "    has_path = os.path.exists(path)\n",
 419 |     "\n",
 420 |     "    if has_malignant:\n",
 421 |     "        return malignant_path\n",
 422 |     "\n",
 423 |     "    if has_benign:\n",
 424 |     "        return benign_path\n",
 425 |     "\n",
 426 |     "    if has_path:\n",
 427 |     "        return path\n"
 428 |    ]
 429 |   },
 430 |   {
 431 |    "cell_type": "code",
 432 |    "execution_count": null,
 433 |    "id": "f330043c-b661-427d-9909-318e92232e97",
 434 |    "metadata": {},
 435 |    "outputs": [],
 436 |    "source": [
 437 |     "# load last two images starting from the later years\n",
 438 |     "data_df = pd.read_csv(data_file_path)\n",
 439 |     "patients = get_exam_years_of_patients(data_df)\n",
 440 |     "lateralities = ['L-CC', 'L-MLO', 'R-MLO', 'R-CC']\n",
 441 |     "for patient_id, years in patients.items():\n",
 442 |     "    for lt_view in lateralities:\n",
 443 |     "        source_image_year = years[-1]\n",
 444 |     "        laterality = lt_view[0]\n",
 445 |     "        images, max_height, max_width = get_common_image_size(patient_id, years, lt_view)\n",
 446 |     "        for image, year in zip(images, years):  \n",
 447 |     "            image_file_name = f\"{patient_id}{year}_{lt_view}.png\"\n",
 448 |     "            mask_file_name = get_mask_path(image_file_name, segmentation_path)\n",
 449 |     "            mask = None            \n",
 450 |     "            if mask_file_name:\n",
 451 |     "                # print(mask_file_name)\n",
 452 |     "                mask = cv2.imread(mask_file_name, cv2.IMREAD_GRAYSCALE)\n",
 453 |     "            image_padded, mask_padded = pad_image_and_mask_to_max_size(image, max_width, max_height, laterality, mask)\n",
 454 |     "            image_file_name = os.path.join(base_path, 'images/resized/', image_file_name)\n",
 455 |     "            cv2.imwrite(image_file_name, image_padded)\n",
 456 |     "            if mask_file_name:\n",
 457 |     "                mask_file_name = mask_file_name.replace(\"segmentation\", \"segmentation/resized\")\n",
 458 |     "                cv2.imwrite(mask_file_name, mask_padded)\n",
 459 |     "            print(f\"Saving: {image_file_name} {mask_file_name}\")"
 460 |    ]
 461 |   },
 462 |   {
 463 |    "cell_type": "markdown",
 464 |    "id": "67faea08-40a3-4f6f-afa0-5dbb8cad230f",
 465 |    "metadata": {},
 466 |    "source": [
 467 |     "#### Process masks to generate the marker points"
 468 |    ]
 469 |   },
 470 |   {
 471 |    "cell_type": "code",
 472 |    "execution_count": null,
 473 |    "id": "de423293-1fa7-473c-af2b-b152f2249d9d",
 474 |    "metadata": {},
 475 |    "outputs": [],
 476 |    "source": [
 477 |     "# apply to all files\n",
 478 |     "images = os.listdir(images_path)\n",
 479 |     "images = [image for image in images if '.png' in image]\n",
 480 |     "processed_list = []\n",
 481 |     "for image_name in images:\n",
 482 |     "    image_path = os.path.join(base_path, \"images/resized\", image_name)\n",
 483 |     "    # print(image_path)\n",
 484 |     "    \n",
 485 |     "    view = process_view(image_path)    \n",
 486 |     "    view = view.replace(\"-\", \"\")\n",
 487 |     "    meta_data = read_dim(view, image_path=image_path)\n",
 488 |     "    # print(meta_data)\n",
 489 |     "    data = [image_name] + list(meta_data)\n",
 490 |     "    processed_list.append(data)\n",
 491 |     "\n",
 492 |     "\n",
 493 |     "# Save file\n",
 494 |     "save_processed_file(resized_processed_file_path, processed_list)"
 495 |    ]
 496 |   },
 497 |   {
 498 |    "cell_type": "code",
 499 |    "execution_count": null,
 500 |    "id": "48c242e1-5cb2-4f25-a2a6-aa021717d3a2",
 501 |    "metadata": {
 502 |     "id": "48c242e1-5cb2-4f25-a2a6-aa021717d3a2",
 503 |     "outputId": "08764233-22d5-48ce-e2f2-5e4375bfa4e9"
 504 |    },
 505 |    "outputs": [
 506 |     {
 507 |      "name": "stdout",
 508 |      "output_type": "stream",
 509 |      "text": [
 510 |       "loading file path: /home/shashi/Desktop/aileen-health-docs/calcification/cropped/images/resized/processed/processed.csv\n"
 511 |      ]
 512 |     },
 513 |     {
 514 |      "data": {
 515 |       "text/html": [
 516 |        "<div>\n",
 517 |        "<style scoped>\n",
 518 |        "    .dataframe tbody tr th:only-of-type {\n",
 519 |        "        vertical-align: middle;\n",
 520 |        "    }\n",
 521 |        "\n",
 522 |        "    .dataframe tbody tr th {\n",
 523 |        "        vertical-align: top;\n",
 524 |        "    }\n",
 525 |        "\n",
 526 |        "    .dataframe thead th {\n",
 527 |        "        text-align: right;\n",
 528 |        "    }\n",
 529 |        "</style>\n",
 530 |        "<table border=\"1\" class=\"dataframe\">\n",
 531 |        "  <thead>\n",
 532 |        "    <tr style=\"text-align: right;\">\n",
 533 |        "      <th></th>\n",
 534 |        "      <th>Unnamed: 0</th>\n",
 535 |        "      <th>name</th>\n",
 536 |        "      <th>chestline_top_x</th>\n",
 537 |        "      <th>chestline_top_y</th>\n",
 538 |        "      <th>chestline_bottom_x</th>\n",
 539 |        "      <th>chestline_bottom_y</th>\n",
 540 |        "      <th>nipple_x</th>\n",
 541 |        "      <th>nipple_y</th>\n",
 542 |        "      <th>chestline_center_x</th>\n",
 543 |        "      <th>chestline_center_y</th>\n",
 544 |        "      <th>image_w</th>\n",
 545 |        "      <th>image_h</th>\n",
 546 |        "    </tr>\n",
 547 |        "  </thead>\n",
 548 |        "  <tbody>\n",
 549 |        "    <tr>\n",
 550 |        "      <th>0</th>\n",
 551 |        "      <td>0</td>\n",
 552 |        "      <td>53992009_L-CC.png</td>\n",
 553 |        "      <td>0</td>\n",
 554 |        "      <td>46</td>\n",
 555 |        "      <td>0.0</td>\n",
 556 |        "      <td>2661.0</td>\n",
 557 |        "      <td>1520.0</td>\n",
 558 |        "      <td>1276.0</td>\n",
 559 |        "      <td>0.0</td>\n",
 560 |        "      <td>1353.0</td>\n",
 561 |        "      <td>1664.0</td>\n",
 562 |        "      <td>2718.0</td>\n",
 563 |        "    </tr>\n",
 564 |        "    <tr>\n",
 565 |        "      <th>1</th>\n",
 566 |        "      <td>1</td>\n",
 567 |        "      <td>50452010_L-CC.png</td>\n",
 568 |        "      <td>0</td>\n",
 569 |        "      <td>79</td>\n",
 570 |        "      <td>0.0</td>\n",
 571 |        "      <td>2717.0</td>\n",
 572 |        "      <td>1667.0</td>\n",
 573 |        "      <td>1427.0</td>\n",
 574 |        "      <td>0.0</td>\n",
 575 |        "      <td>1398.0</td>\n",
 576 |        "      <td>1718.0</td>\n",
 577 |        "      <td>2812.0</td>\n",
 578 |        "    </tr>\n",
 579 |        "    <tr>\n",
 580 |        "      <th>2</th>\n",
 581 |        "      <td>2</td>\n",
 582 |        "      <td>52882008_R-MLO.png</td>\n",
 583 |        "      <td>1</td>\n",
 584 |        "      <td>1</td>\n",
 585 |        "      <td>NaN</td>\n",
 586 |        "      <td>NaN</td>\n",
 587 |        "      <td>NaN</td>\n",
 588 |        "      <td>NaN</td>\n",
 589 |        "      <td>NaN</td>\n",
 590 |        "      <td>NaN</td>\n",
 591 |        "      <td>NaN</td>\n",
 592 |        "      <td>NaN</td>\n",
 593 |        "    </tr>\n",
 594 |        "    <tr>\n",
 595 |        "      <th>3</th>\n",
 596 |        "      <td>3</td>\n",
 597 |        "      <td>53992011_R-CC.png</td>\n",
 598 |        "      <td>1605</td>\n",
 599 |        "      <td>12</td>\n",
 600 |        "      <td>1605.0</td>\n",
 601 |        "      <td>2596.0</td>\n",
 602 |        "      <td>48.0</td>\n",
 603 |        "      <td>1293.0</td>\n",
 604 |        "      <td>1605.0</td>\n",
 605 |        "      <td>1304.0</td>\n",
 606 |        "      <td>1606.0</td>\n",
 607 |        "      <td>2630.0</td>\n",
 608 |        "    </tr>\n",
 609 |        "    <tr>\n",
 610 |        "      <th>4</th>\n",
 611 |        "      <td>4</td>\n",
 612 |        "      <td>1812013_L-CC.png</td>\n",
 613 |        "      <td>0</td>\n",
 614 |        "      <td>20</td>\n",
 615 |        "      <td>0.0</td>\n",
 616 |        "      <td>2612.0</td>\n",
 617 |        "      <td>1215.0</td>\n",
 618 |        "      <td>1264.0</td>\n",
 619 |        "      <td>0.0</td>\n",
 620 |        "      <td>1316.0</td>\n",
 621 |        "      <td>1266.0</td>\n",
 622 |        "      <td>2644.0</td>\n",
 623 |        "    </tr>\n",
 624 |        "  </tbody>\n",
 625 |        "</table>\n",
 626 |        "</div>"
 627 |       ],
 628 |       "text/plain": [
 629 |        "   Unnamed: 0                name  chestline_top_x  chestline_top_y  \\\n",
 630 |        "0           0   53992009_L-CC.png                0               46   \n",
 631 |        "1           1   50452010_L-CC.png                0               79   \n",
 632 |        "2           2  52882008_R-MLO.png                1                1   \n",
 633 |        "3           3   53992011_R-CC.png             1605               12   \n",
 634 |        "4           4    1812013_L-CC.png                0               20   \n",
 635 |        "\n",
 636 |        "   chestline_bottom_x  chestline_bottom_y  nipple_x  nipple_y  \\\n",
 637 |        "0                 0.0              2661.0    1520.0    1276.0   \n",
 638 |        "1                 0.0              2717.0    1667.0    1427.0   \n",
 639 |        "2                 NaN                 NaN       NaN       NaN   \n",
 640 |        "3              1605.0              2596.0      48.0    1293.0   \n",
 641 |        "4                 0.0              2612.0    1215.0    1264.0   \n",
 642 |        "\n",
 643 |        "   chestline_center_x  chestline_center_y  image_w  image_h  \n",
 644 |        "0                 0.0              1353.0   1664.0   2718.0  \n",
 645 |        "1                 0.0              1398.0   1718.0   2812.0  \n",
 646 |        "2                 NaN                 NaN      NaN      NaN  \n",
 647 |        "3              1605.0              1304.0   1606.0   2630.0  \n",
 648 |        "4                 0.0              1316.0   1266.0   2644.0  "
 649 |       ]
 650 |      },
 651 |      "execution_count": 9,
 652 |      "metadata": {},
 653 |      "output_type": "execute_result"
 654 |     }
 655 |    ],
 656 |    "source": [
 657 |     "print(f\"loading file path: {resized_processed_file_path}\")\n",
 658 |     "processed_data_df = pd.read_csv(resized_processed_file_path)\n",
 659 |     "processed_data_df.head()"
 660 |    ]
 661 |   },
 662 |   {
 663 |    "cell_type": "markdown",
 664 |    "id": "855f75b7-0804-4fea-982b-80a3b3391434",
 665 |    "metadata": {
 666 |     "id": "855f75b7-0804-4fea-982b-80a3b3391434"
 667 |    },
 668 |    "source": [
 669 |     "# Generate mask"
 670 |    ]
 671 |   },
 672 |   {
 673 |    "cell_type": "code",
 674 |    "execution_count": null,
 675 |    "id": "b88544f3-6441-4cda-99d7-067f501f0a3c",
 676 |    "metadata": {
 677 |     "id": "b88544f3-6441-4cda-99d7-067f501f0a3c"
 678 |    },
 679 |    "outputs": [],
 680 |    "source": [
 681 |     "from typing import List\n",
 682 |     "from collections import defaultdict\n",
 683 |     "reference_points = 3\n",
 684 |     "def get_affine_matrix(landmarks_image1: List[List[float]], landmarks_image2: List[List[float]]) -> np.ndarray:\n",
 685 |     "    \"\"\"\n",
 686 |     "    Calculate the affine transformation matrix from landmarks in two images.\n",
 687 |     "\n",
 688 |     "    Parameters:\n",
 689 |     "        landmarks_image1 (List[List[float]]): Coordinates of landmarks in the first image.\n",
 690 |     "        landmarks_image2 (List[List[float]]): Corresponding coordinates of landmarks in the second image.\n",
 691 |     "\n",
 692 |     "    Returns:\n",
 693 |     "        np.ndarray: The affine transformation matrix.\n",
 694 |     "\n",
 695 |     "    Raises:\n",
 696 |     "        ValueError: If the input lists do not contain exactly three landmarks each.\n",
 697 |     "    \"\"\"\n",
 698 |     "    if len(landmarks_image1) != reference_points or len(landmarks_image2) != reference_points:\n",
 699 |     "        raise ValueError(\"Both input lists must contain exactly three landmarks.\")\n",
 700 |     "\n",
 701 |     "    # Convert lists to NumPy arrays\n",
 702 |     "    landmarks_image1_np = np.array(landmarks_image1, dtype=np.float32)\n",
 703 |     "    landmarks_image2_np = np.array(landmarks_image2, dtype=np.float32)\n",
 704 |     "\n",
 705 |     "    # Compute the affine transformation matrix\n",
 706 |     "    M = cv2.getAffineTransform(landmarks_image1_np, landmarks_image2_np)\n",
 707 |     "\n",
 708 |     "    return M\n"
 709 |    ]
 710 |   },
 711 |   {
 712 |    "cell_type": "code",
 713 |    "execution_count": null,
 714 |    "id": "6c45cbb2-8016-46b1-a5e4-9e58cc3c15ce",
 715 |    "metadata": {
 716 |     "id": "6c45cbb2-8016-46b1-a5e4-9e58cc3c15ce",
 717 |     "outputId": "c5b6ff5c-9629-4322-9f1f-f4c585e1477f"
 718 |    },
 719 |    "outputs": [
 720 |     {
 721 |      "name": "stdout",
 722 |      "output_type": "stream",
 723 |      "text": [
 724 |       "image years: ['2013', '2011', '2010']\n",
 725 |       "mask years: ['2013']\n",
 726 |       "metadata years: [[(0, 20), (0.0, 2612.0), (1215.0, 1264.0), (0.0, 1316.0), (1266.0, 2644.0)], [(0, 47), (0.0, 2595.0), (1134.0, 1228.0), (0.0, 1321.0), (1266.0, 2644.0)], [(0, 76), (0.0, 2578.0), (999.0, 1058.0), (0.0, 1327.0), (1266.0, 2644.0)]]\n",
 727 |       "\n",
 728 |       "\n"
 729 |      ]
 730 |     }
 731 |    ],
 732 |    "source": [
 733 |     "rows = processed_data_df[processed_data_df['name'].isin(images)].values.tolist()\n",
 734 |     "rows = processed_data_df.values.tolist()\n",
 735 |     "\n",
 736 |     "views = ['CC', 'MLO']\n",
 737 |     "lateralities = ['L', 'R']\n",
 738 |     "patient_images = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))\n",
 739 |     "patient_masks = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))\n",
 740 |     "patient_images_metadata = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))\n",
 741 |     "\n",
 742 |     "for row in rows:\n",
 743 |     "    _, name, chestline_top_x, chestline_top_y, chestline_bottom_x, chestline_bottom_y, nipple_x, nipple_y, chestline_center_x, chestline_center_y, width, height = row\n",
 744 |     "    prefix, suffix = name.split(\"_\")\n",
 745 |     "    laterality, view = suffix[:-4].split(\"-\")\n",
 746 |     "    patient_id = prefix[:-4]\n",
 747 |     "    year = prefix[-4:]\n",
 748 |     "\n",
 749 |     "    patient_images[patient_id][laterality][view].append(year)\n",
 750 |     "    patient_images_metadata[patient_id][laterality][view].append([(chestline_top_x, chestline_top_y), (chestline_bottom_x, chestline_bottom_y), (nipple_x, nipple_y), (chestline_center_x, chestline_center_y), (width, height)])\n",
 751 |     "    has_mask = get_mask_path(name, resized_segmentation_path)\n",
 752 |     "    if has_mask:\n",
 753 |     "        patient_masks[patient_id][laterality][view].append(year)\n",
 754 |     "    # Debug prints (optional)\n",
 755 |     "print(\"image years:\", patient_images['181']['L']['CC'])\n",
 756 |     "print(\"mask years:\", patient_masks['181']['L']['CC'])\n",
 757 |     "print(\"metadata years:\", patient_images_metadata['181']['L']['CC'])\n",
 758 |     "print(\"\\n\")\n",
 759 |     "# print(patient_images)\n"
 760 |    ]
 761 |   },
 762 |   {
 763 |    "cell_type": "code",
 764 |    "execution_count": null,
 765 |    "id": "467ceb32-e2b5-4636-9d80-f46c9b7155cc",
 766 |    "metadata": {
 767 |     "id": "467ceb32-e2b5-4636-9d80-f46c9b7155cc",
 768 |     "outputId": "b404e72e-e852-4f2f-8887-1007aaedaa4c",
 769 |     "scrolled": true
 770 |    },
 771 |    "outputs": [
 772 |     {
 773 |      "name": "stdout",
 774 |      "output_type": "stream",
 775 |      "text": [
 776 |       "[2011, 2013, 2009, 2012] []\n",
 777 |       "[2009, 2012, 2011, 2013] [2013]\n",
 778 |       "year_with_masks:['2013'] years:['2009', '2012', '2011', '2013']\n",
 779 |       "year_index_with_mask:3\n"
 780 |      ]
 781 |     }
 782 |    ],
 783 |    "source": [
 784 |     "def get_year_with_mask(years, years_with_masks):\n",
 785 |     "    years = list(map(int, years))\n",
 786 |     "    years_with_masks = list(map(int, years_with_masks))\n",
 787 |     "    print(years, years_with_masks)\n",
 788 |     "    for year_index, year in enumerate(years):\n",
 789 |     "        if year in years_with_masks:\n",
 790 |     "            return year_index\n",
 791 |     "    return -1\n",
 792 |     "\n",
 793 |     "# Generate affine matrix the pair of images\n",
 794 |     "for patient_id, lateralities in patient_images.items():\n",
 795 |     "    if patient_id not in ['2911']:\n",
 796 |     "        continue\n",
 797 |     "    for laterality, views in lateralities.items():\n",
 798 |     "        for view, years in views.items():\n",
 799 |     "            if view not in [\"CC\"]:\n",
 800 |     "                continue\n",
 801 |     "\n",
 802 |     "            year_with_masks = patient_masks[patient_id][laterality][view]\n",
 803 |     "            year_index_with_mask = get_year_with_mask(years, year_with_masks)\n",
 804 |     "            if year_index_with_mask < 0:\n",
 805 |     "                continue\n",
 806 |     "\n",
 807 |     "            print(f\"year_with_masks:{year_with_masks} years:{years}\")\n",
 808 |     "            print(f\"year_index_with_mask:{year_index_with_mask}\")\n",
 809 |     "\n",
 810 |     "            image_name = f\"{patient_id}{years[year_index_with_mask]}_{laterality}-{view}.png\"\n",
 811 |     "            mask_path = get_mask_path(image_name, resized_segmentation_path)\n",
 812 |     "            for year_index, year in enumerate(years):\n",
 813 |     "\n",
 814 |     "                # print(f\"\\npatient_id: {patient_id}\")\n",
 815 |     "                # print(f\"year: {year}\")\n",
 816 |     "                # print(f\"years_with_masks: {year_with_masks}\")\n",
 817 |     "                # print(f\"years_with_masks: {year_with_masks}\")\n",
 818 |     "                # print(f\"year_index_with_mask: {year_index_with_mask}\")\n",
 819 |     "\n",
 820 |     "                if year not in year_with_masks:\n",
 821 |     "                    mask1 = cv2.imread(mask_path)\n",
 822 |     "                    height, width = mask1.shape[:2]\n",
 823 |     "\n",
 824 |     "                    # images_metadata = patient_images_metadata[patient_id][laterality][view][:2]\n",
 825 |     "                    # landmarks = [landmark[:3] for landmark in images_metadata]\n",
 826 |     "                    # affine_m = get_affine_matrix(*landmarks)\n",
 827 |     "\n",
 828 |     "                    reference_image_landmarks = patient_images_metadata[patient_id][laterality][view][year_index_with_mask][:reference_points]\n",
 829 |     "                    image_landmarks = patient_images_metadata[patient_id][laterality][view][year_index][:reference_points]\n",
 830 |     "                    landmarks = [reference_image_landmarks, image_landmarks]\n",
 831 |     "                    affine_m = get_affine_matrix(*landmarks)\n",
 832 |     "\n",
 833 |     "                    #Generate mask based on affine matrix\n",
 834 |     "                    mask2 = cv2.warpAffine(mask1, affine_m, (width, height))\n",
 835 |     "                    *folders, basename = mask_path.split(\"/\")\n",
 836 |     "                    prefix, *suffix = basename.split(\"_\")\n",
 837 |     "                    prefix = prefix[:-4] + str(year)\n",
 838 |     "                    mask_filename = \"_\".join([prefix] + suffix)\n",
 839 |     "\n",
 840 |     "                    new_mask_filename = os.path.join(resized_generated_segmentation_path, mask_filename)\n",
 841 |     "                    # print(f\"Saving mask file: {new_mask_filename}\")\n",
 842 |     "                    cv2.imwrite(new_mask_filename, mask2)"
 843 |    ]
 844 |   },
 845 |   {
 846 |    "cell_type": "markdown",
 847 |    "id": "d47b7a72-1ab8-4259-b4f7-f6e8704039cc",
 848 |    "metadata": {
 849 |     "id": "d47b7a72-1ab8-4259-b4f7-f6e8704039cc"
 850 |    },
 851 |    "source": [
 852 |     "# Save visualization with generated masks"
 853 |    ]
 854 |   },
 855 |   {
 856 |    "cell_type": "code",
 857 |    "execution_count": null,
 858 |    "id": "3ba9fca5-0d2e-466c-9609-b976fedc7f3a",
 859 |    "metadata": {
 860 |     "id": "3ba9fca5-0d2e-466c-9609-b976fedc7f3a"
 861 |    },
 862 |    "outputs": [],
 863 |    "source": [
 864 |     "def center_point_of_mask(mask):\n",
 865 |     "    # Find the coordinates of non-zero pixels\n",
 866 |     "    indices = np.where(mask == 1)\n",
 867 |     "\n",
 868 |     "    # Calculate the center (centroid) of the mask\n",
 869 |     "    if indices[0].size > 0:  # Ensure there are non-zero pixels\n",
 870 |     "        center_y = np.mean(indices[0])  # Average of row indices\n",
 871 |     "        center_x = np.mean(indices[1])  # Average of column indices\n",
 872 |     "        return (int(center_x), int(center_y))  # Return as integer coordinates\n",
 873 |     "    return None"
 874 |    ]
 875 |   },
 876 |   {
 877 |    "cell_type": "code",
 878 |    "execution_count": null,
 879 |    "id": "91c22716-6305-4673-888a-f62fca630d44",
 880 |    "metadata": {
 881 |     "id": "91c22716-6305-4673-888a-f62fca630d44",
 882 |     "outputId": "021754c9-3523-4ed2-bb74-c5eb0e38a72a"
 883 |    },
 884 |    "outputs": [
 885 |     {
 886 |      "name": "stdout",
 887 |      "output_type": "stream",
 888 |      "text": [
 889 |       "image years: ['2009', '2012', '2011', '2013']\n",
 890 |       "mask years: ['2013']\n",
 891 |       "metadata years: [[(0, 191), (0.0, 2987.0), (1444.0, 1802.0), (0.0, 1589.0), (1806.0, 3230.0)], [(0, 42), (0.0, 3154.0), (1660.0, 1773.0), (0.0, 1598.0), (1806.0, 3230.0)], [(0, 166), (0.0, 3103.0), (1558.0, 1536.0), (0.0, 1634.0), (1806.0, 3230.0)], [(0, 31), (0.0, 3179.0), (1755.0, 1773.0), (0.0, 1605.0), (1806.0, 3230.0)]]\n",
 892 |       "\n",
 893 |       "\n",
 894 |       "patient_id: 2911\n",
 895 |       "[2009, 2012, 2011, 2013] [2013]\n",
 896 |       "[2011, 2013, 2009, 2012] []\n"
 897 |      ]
 898 |     }
 899 |    ],
 900 |    "source": [
 901 |     "from matplotlib.patches import Circle\n",
 902 |     "print(\"image years:\", patient_images['2911']['L']['CC'])\n",
 903 |     "print(\"mask years:\", patient_masks['2911']['L']['CC'])\n",
 904 |     "print(\"metadata years:\", patient_images_metadata['2911']['L']['CC'])\n",
 905 |     "print(\"\\n\")\n",
 906 |     "\n",
 907 |     "# Loop through patients and their exam years\n",
 908 |     "for patient_id, _ in patient_images.items():\n",
 909 |     "    if patient_id not in ['2911']:\n",
 910 |     "        continue\n",
 911 |     "    print(f\"patient_id: {patient_id}\")\n",
 912 |     "    # Iterate over laterality for each patient\n",
 913 |     "    for laterality in [\"L-CC\", \"R-CC\"]:\n",
 914 |     "        lt, view = laterality.split(\"-\")\n",
 915 |     "        years = patient_images[patient_id][lt][view]\n",
 916 |     "        year_with_masks = patient_masks[patient_id][lt][view]\n",
 917 |     "        year_index_with_mask = get_year_with_mask(years, year_with_masks)\n",
 918 |     "        # print(f\"year_with_masks:{year_with_masks} years:{years}\")\n",
 919 |     "        # print(f\"year_index_with_mask:{year_index_with_mask}\")\n",
 920 |     "\n",
 921 |     "        if year_index_with_mask < 0:\n",
 922 |     "            continue\n",
 923 |     "\n",
 924 |     "        # Create subplots for each year\n",
 925 |     "        fig, axes = plt.subplots(1, len(years), figsize=(3 * len(years), 5))\n",
 926 |     "\n",
 927 |     "        if len(years) == 1:  # If there's only one year, ensure axes is treated as a list\n",
 928 |     "            axes = [axes]\n",
 929 |     "\n",
 930 |     "        for year_index, year in enumerate(years):\n",
 931 |     "            image_filename = f\"{patient_id}{year}_{laterality}.png\"\n",
 932 |     "\n",
 933 |     "            # Get mask path from two different sources\n",
 934 |     "            full_mask_path = get_mask_path(image_filename, resized_segmentation_path)\n",
 935 |     "            gen = \"\"\n",
 936 |     "            if not full_mask_path:\n",
 937 |     "                full_mask_path = get_mask_path(image_filename, resized_generated_segmentation_path)\n",
 938 |     "                gen = \"-gen\"\n",
 939 |     "\n",
 940 |     "            # Load and process the image\n",
 941 |     "            full_image_path = os.path.join(resized_images_path, image_filename)\n",
 942 |     "            image = Image.open(full_image_path).convert('L')\n",
 943 |     "            image_np = np.array(image) / 255  # Normalize the image to [0, 1]\n",
 944 |     "            # print(f\"\\n image shape: {image_np.shape}\")\n",
 945 |     "            axes[year_index].imshow(image_np, cmap='gray')\n",
 946 |     "\n",
 947 |     "            if full_mask_path:\n",
 948 |     "                # Load and process the mask\n",
 949 |     "                image_mask = Image.open(full_mask_path).convert('L')\n",
 950 |     "                image_mask_np = np.array(image_mask) / 255  # Normalize the mask to [0, 1]\n",
 951 |     "                color_mask = np.zeros((image_np.shape[0], image_np.shape[1], 3), dtype=np.uint8)\n",
 952 |     "                color_mask[image_mask_np == 1] = [255, 0, 0]  # Red for the mask\n",
 953 |     "                non_zero_pixel_count = np.sum(image_mask_np == 1)\n",
 954 |     "                mask_center = center_point_of_mask(image_mask_np)\n",
 955 |     "                # print(f\"patient_id:{patient_id} mask_center:{mask_center}\")\n",
 956 |     "\n",
 957 |     "                # Overlay the mask on the original image and display\n",
 958 |     "                axes[year_index].imshow(color_mask, alpha=0.2)  # Red overlay with transparency\n",
 959 |     "                if non_zero_pixel_count == 1:\n",
 960 |     "                    circle = Circle(mask_center, radius=5, color='red', alpha=1.0, fill=True)\n",
 961 |     "                    axes[year_index].add_patch(circle)\n",
 962 |     "\n",
 963 |     "                landmarks = patient_images_metadata[patient_id][lt][view][year_index][:4]\n",
 964 |     "                # print(f\"year_index:{year_index} year:{year} landmarks:{landmarks}\")\n",
 965 |     "                for landmark in landmarks:\n",
 966 |     "                    # print(f\"{year}-L-CC\", *landmark)\n",
 967 |     "                    axes[year_index].scatter(*landmark, color='blue', s=5)\n",
 968 |     "                    axes[year_index].plot(\n",
 969 |     "                        [mask_center[0], landmark[0]],  # X coordinates\n",
 970 |     "                        [mask_center[1], landmark[1]],  # Y coordinates\n",
 971 |     "                        color='yellow',                   # Line color\n",
 972 |     "                        linewidth=0.2                     # Line width\n",
 973 |     "                    )\n",
 974 |     "                    distance = np.sqrt((mask_center[0] - landmark[0]) ** 2 + (mask_center[1] - landmark[1]) ** 2)\n",
 975 |     "                    axes[year_index].text(\n",
 976 |     "                        (mask_center[0] + landmark[0]) / 2,  # X coordinate for the text (midpoint)\n",
 977 |     "                        (mask_center[1] + landmark[1]) / 2,  # Y coordinate for the text (midpoint)\n",
 978 |     "                        f'{distance:.0f} px',                # Text to display\n",
 979 |     "                        color='white',                       # Text color\n",
 980 |     "                        fontsize=6,                          # Font size\n",
 981 |     "                        bbox=dict(facecolor='black', alpha=0.5)  # Text background for better readability\n",
 982 |     "                    )\n",
 983 |     "\n",
 984 |     "            axes[year_index].set_title(f'{year}-{laterality}{gen}')\n",
 985 |     "            axes[year_index].axis('off')\n",
 986 |     "\n",
 987 |     "        # Save the entire row for this patient and laterality\n",
 988 |     "        row_filename = os.path.join(resized_generated_images_path, \"results\", f\"{patient_id}{year}_{laterality}.png\")\n",
 989 |     "        # print(f\"Saved {row_filename}\")\n",
 990 |     "        plt.subplots_adjust(left=0.05, right=0.95, top=0.9, bottom=0.1)\n",
 991 |     "        plt.savefig(row_filename, bbox_inches=\"tight\", dpi=1000)\n",
 992 |     "        plt.close(fig)  # Close the figure after saving"
 993 |    ]
 994 |   }
 995 |  ],
 996 |  "metadata": {
 997 |   "colab": {
 998 |    "provenance": []
 999 |   },
1000 |   "kernelspec": {
1001 |    "display_name": "Python 3 (ipykernel)",
1002 |    "language": "python",
1003 |    "name": "python3"
1004 |   },
1005 |   "language_info": {
1006 |    "codemirror_mode": {
1007 |     "name": "ipython",
1008 |     "version": 3
1009 |    },
1010 |    "file_extension": ".py",
1011 |    "mimetype": "text/x-python",
1012 |    "name": "python",
1013 |    "nbconvert_exporter": "python",
1014 |    "pygments_lexer": "ipython3",
1015 |    "version": "3.12.3"
1016 |   }
1017 |  },
1018 |  "nbformat": 4,
1019 |  "nbformat_minor": 5
1020 | }
1021 | 


--------------------------------------------------------------------------------