├── imagedephi
    ├── __init__.py
    ├── utils
    │   ├── __init__.py
    │   ├── logger.py
    │   ├── progress_log.py
    │   ├── image.py
    │   ├── network.py
    │   ├── dicom.py
    │   ├── os.py
    │   ├── cli.py
    │   └── tiff.py
    ├── gui
    │   ├── utils
    │   │   ├── __init__.py
    │   │   ├── constants.py
    │   │   ├── directory.py
    │   │   └── image.py
    │   ├── api
    │   │   ├── __init__.py
    │   │   └── api.py
    │   ├── __init__.py
    │   └── app.py
    ├── __main__.py
    ├── command_file.py
    ├── redact
    │   ├── __init__.py
    │   ├── redaction_plan.py
    │   ├── build_redaction_plan.py
    │   ├── dicom.py
    │   └── svs.py
    ├── logging.conf
    ├── demo_files.csv
    ├── minimum_rules.yaml
    └── rules.py
├── client
    ├── .yarnrc.yml
    ├── src
    │   ├── vite-env.d.ts
    │   ├── style.css
    │   ├── shims-vue.d.ts
    │   ├── App.vue
    │   ├── main.ts
    │   ├── store
    │   │   ├── redactionStore.ts
    │   │   ├── types.ts
    │   │   ├── imageStore.ts
    │   │   └── directoryStore.ts
    │   ├── components
    │   │   ├── ImageDataDisplay.vue
    │   │   ├── InfiniteScroller.vue
    │   │   ├── MenuSteps.vue
    │   │   ├── ImageDataTable.vue
    │   │   └── FileBrowser.vue
    │   ├── api
    │   │   └── rest.ts
    │   └── HomePage.vue
    ├── public
    │   ├── logo.png
    │   ├── associatedPlaceholder.svg
    │   └── thumbnailPlaceholder.svg
    ├── postcss.config.js
    ├── .env.development
    ├── vite.config.ts
    ├── tsconfig.node.json
    ├── README.md
    ├── .gitignore
    ├── index.html
    ├── tsconfig.json
    ├── eslint.config.mjs
    ├── tailwind.config.js
    └── package.json
├── .gitattributes
├── docs
    ├── images
    │   ├── initial_ui.png
    │   ├── image_grid_errors_ui.png
    │   ├── image_grid_success_ui.png
    │   ├── redaction_complete_ui.png
    │   ├── redaction_progress_ui.png
    │   ├── step_3_ruleset_select_ruleset.png
    │   ├── step_1_input_directory_open_browser.png
    │   ├── step_1_input_directory_select_directory.png
    │   └── step_2_output_directory_select_directory.png
    ├── development.md
    └── demo.md
├── .github
    ├── dependabot.yaml
    ├── zip_and_upload_package.sh
    └── workflows
    │   ├── release.yaml
    │   └── ci.yaml
├── .git-blame-ignore-revs
├── stubs
    └── tifftools
    │   ├── exceptions.pyi
    │   ├── __init__.pyi
    │   ├── tifftools.pyi
    │   └── constants.pyi
├── .editorconfig
├── tests
    ├── override_rule_sets
    │   └── example_user_rules.yaml
    ├── test_utils_network.py
    ├── test_utils_os.py
    ├── test_utils_cli.py
    ├── test_gui.py
    ├── conftest.py
    ├── test_e2e.py
    └── test_redact.py
├── pyproject.toml
├── tox.ini
├── .gitignore
├── LICENSE
└── README.md


/imagedephi/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/imagedephi/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/imagedephi/gui/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/client/.yarnrc.yml:
--------------------------------------------------------------------------------
1 | ---
2 | nodeLinker: pnp
3 | 


--------------------------------------------------------------------------------
/client/src/vite-env.d.ts:
--------------------------------------------------------------------------------
1 | /// <reference types="vite/client" />
2 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | **/tests/data/** filter=lfs diff=lfs merge=lfs -text
2 | 


--------------------------------------------------------------------------------
/imagedephi/gui/utils/constants.py:
--------------------------------------------------------------------------------
1 | MAX_ASSOCIATED_IMAGE_SIZE = 160
2 | 


--------------------------------------------------------------------------------
/imagedephi/gui/api/__init__.py:
--------------------------------------------------------------------------------
1 | from .api import router
2 | 
3 | __all__ = ["router"]
4 | 


--------------------------------------------------------------------------------
/client/src/style.css:
--------------------------------------------------------------------------------
1 | @tailwind base;
2 | @tailwind components;
3 | @tailwind utilities;
4 | 


--------------------------------------------------------------------------------
/client/public/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DigitalSlideArchive/ImageDePHI/HEAD/client/public/logo.png


--------------------------------------------------------------------------------
/imagedephi/gui/__init__.py:
--------------------------------------------------------------------------------
1 | from .app import app, shutdown_event
2 | 
3 | __all__ = ["app", "shutdown_event"]
4 | 


--------------------------------------------------------------------------------
/client/src/shims-vue.d.ts:
--------------------------------------------------------------------------------
1 | declare module "*.vue" {
2 |   import Vue from "vue";
3 |   export default Vue;
4 | }
5 | 


--------------------------------------------------------------------------------
/docs/images/initial_ui.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DigitalSlideArchive/ImageDePHI/HEAD/docs/images/initial_ui.png


--------------------------------------------------------------------------------
/imagedephi/__main__.py:
--------------------------------------------------------------------------------
1 | from imagedephi import main
2 | 
3 | if __name__ == "__main__":
4 |     main.imagedephi()
5 | 


--------------------------------------------------------------------------------
/client/postcss.config.js:
--------------------------------------------------------------------------------
1 | export default {
2 |   plugins: {
3 |     tailwindcss: {},
4 |     autoprefixer: {},
5 |   },
6 | };
7 | 


--------------------------------------------------------------------------------
/docs/images/image_grid_errors_ui.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DigitalSlideArchive/ImageDePHI/HEAD/docs/images/image_grid_errors_ui.png


--------------------------------------------------------------------------------
/docs/images/image_grid_success_ui.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DigitalSlideArchive/ImageDePHI/HEAD/docs/images/image_grid_success_ui.png


--------------------------------------------------------------------------------
/docs/images/redaction_complete_ui.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DigitalSlideArchive/ImageDePHI/HEAD/docs/images/redaction_complete_ui.png


--------------------------------------------------------------------------------
/docs/images/redaction_progress_ui.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DigitalSlideArchive/ImageDePHI/HEAD/docs/images/redaction_progress_ui.png


--------------------------------------------------------------------------------
/client/.env.development:
--------------------------------------------------------------------------------
1 | # This could leak to production. Should only be used in DEBUG mode for FastAPI
2 | VITE_APP_API_URL=http://127.0.0.1:8000
3 | 


--------------------------------------------------------------------------------
/docs/images/step_3_ruleset_select_ruleset.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DigitalSlideArchive/ImageDePHI/HEAD/docs/images/step_3_ruleset_select_ruleset.png


--------------------------------------------------------------------------------
/client/src/App.vue:
--------------------------------------------------------------------------------
1 | <script setup lang="ts">
2 | import HomePage from "./HomePage.vue";
3 | </script>
4 | 
5 | <template>
6 |   <HomePage />
7 | </template>
8 | 


--------------------------------------------------------------------------------
/client/public/associatedPlaceholder.svg:
--------------------------------------------------------------------------------
1 | <svg width="80" height="80" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 80 1" preserveAspectRatio="none">
2 |     </svg>
3 | 


--------------------------------------------------------------------------------
/docs/images/step_1_input_directory_open_browser.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DigitalSlideArchive/ImageDePHI/HEAD/docs/images/step_1_input_directory_open_browser.png


--------------------------------------------------------------------------------
/.github/dependabot.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | version: 2
3 | updates:
4 |   - package-ecosystem: "github-actions"
5 |     directory: "/"
6 |     schedule:
7 |       interval: "weekly"
8 | 


--------------------------------------------------------------------------------
/docs/images/step_1_input_directory_select_directory.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DigitalSlideArchive/ImageDePHI/HEAD/docs/images/step_1_input_directory_select_directory.png


--------------------------------------------------------------------------------
/docs/images/step_2_output_directory_select_directory.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DigitalSlideArchive/ImageDePHI/HEAD/docs/images/step_2_output_directory_select_directory.png


--------------------------------------------------------------------------------
/.git-blame-ignore-revs:
--------------------------------------------------------------------------------
1 | # Reformat code to standardize double quoted strings
2 | 92c1ec85ab680851055c16f022ab9ce80c600e3c
3 | 
4 | # Format yaml files
5 | 8846ba15625de7bfb68d40c4eb224c2c2d59dda2
6 | 


--------------------------------------------------------------------------------
/client/src/main.ts:
--------------------------------------------------------------------------------
1 | import { createApp } from "vue";
2 | import App from "./App.vue";
3 | import "./style.css";
4 | import "remixicon/fonts/remixicon.css";
5 | 
6 | createApp(App).mount("#app");
7 | 


--------------------------------------------------------------------------------
/imagedephi/command_file.py:
--------------------------------------------------------------------------------
1 | from pydantic import BaseModel
2 | 
3 | 
4 | # The only two required fields are command and input_paths
5 | class CommandFile(BaseModel):
6 |     input_paths: list[str]
7 | 


--------------------------------------------------------------------------------
/client/vite.config.ts:
--------------------------------------------------------------------------------
1 | import { defineConfig } from "vite";
2 | import vue from "@vitejs/plugin-vue";
3 | 
4 | // https://vitejs.dev/config/
5 | export default defineConfig({
6 |   plugins: [vue()],
7 | });
8 | 


--------------------------------------------------------------------------------
/stubs/tifftools/exceptions.pyi:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 | 
3 | class TifftoolsError(Exception): ...
4 | class UnknownTagError(TifftoolsError): ...
5 | class MustBeBigTiffError(TifftoolsError): ...
6 | 


--------------------------------------------------------------------------------
/imagedephi/redact/__init__.py:
--------------------------------------------------------------------------------
1 | from .redact import ProfileChoice, iter_image_dirs, redact_images, show_redaction_plan
2 | 
3 | __all__ = ["iter_image_dirs", "redact_images", "show_redaction_plan", "ProfileChoice"]
4 | 


--------------------------------------------------------------------------------
/client/src/store/redactionStore.ts:
--------------------------------------------------------------------------------
1 | import { ref, Ref } from "vue";
2 | 
3 | export const redactionStateFlags: Ref<Record<string, boolean>> = ref({
4 |   redacting: false,
5 |   redactionComplete: false,
6 |   showImageTable: false,
7 |   redactionSnackbar: false,
8 | });
9 | 


--------------------------------------------------------------------------------
/client/tsconfig.node.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "composite": true,
 4 |     "skipLibCheck": true,
 5 |     "module": "ESNext",
 6 |     "moduleResolution": "bundler",
 7 |     "allowSyntheticDefaultImports": true
 8 |   },
 9 |   "include": ["vite.config.ts"]
10 | }
11 | 


--------------------------------------------------------------------------------
/imagedephi/utils/logger.py:
--------------------------------------------------------------------------------
 1 | import importlib.resources
 2 | import logging
 3 | import logging.config
 4 | import os
 5 | 
 6 | try:
 7 |     logging.config.fileConfig(
 8 |         "logging.conf"
 9 |         if os.path.exists("logging.conf")
10 |         else str(importlib.resources.files("imagedephi") / "logging.conf")
11 |     )
12 | except (FileNotFoundError, KeyError):
13 |     pass
14 | 
15 | logger = logging.getLogger("root")
16 | 


--------------------------------------------------------------------------------
/imagedephi/logging.conf:
--------------------------------------------------------------------------------
 1 | [loggers]
 2 | keys=root
 3 | 
 4 | [handlers]
 5 | keys=consoleHandler
 6 | 
 7 | [formatters]
 8 | keys=messageFormatter
 9 | 
10 | [logger_root]
11 | level=WARNING
12 | handlers=consoleHandler
13 | qualname=root
14 | propagate=0
15 | 
16 | [handler_consoleHandler]
17 | class=StreamHandler
18 | args=(sys.stdout,)
19 | formatter=messageFormatter
20 | 
21 | [formatter_messageFormatter]
22 | format=%(asctime)s - %(levelname)s - %(message)s
23 | 


--------------------------------------------------------------------------------
/client/README.md:
--------------------------------------------------------------------------------
 1 | ## Development
 2 | For efficient front end developement in the project root run:
 3 | 
 4 |    ```bash
 5 |    export DEBUG=True
 6 |    hypercorn --reload imagedephi.gui.app:app
 7 |    ```
 8 | 
 9 | In a new terminal:
10 | 
11 | ```bash
12 | cd client/
13 | yarn dev
14 | ```
15 | 
16 | **Note**
17 | `imagedephi gui` will break and tests will fail in debug mode. Remember to reset variable when done with development.
18 | 
19 | ```bash
20 | export DEBUG=False
21 | ```
22 | 


--------------------------------------------------------------------------------
/client/.gitignore:
--------------------------------------------------------------------------------
 1 | # Logs
 2 | logs
 3 | *.log
 4 | npm-debug.log*
 5 | yarn-debug.log*
 6 | yarn-error.log*
 7 | pnpm-debug.log*
 8 | lerna-debug.log*
 9 | 
10 | 
11 | dist
12 | dist-ssr
13 | *.local
14 | 
15 | # Editor directories and files
16 | .vscode/*
17 | !.vscode/extensions.json
18 | .idea
19 | .DS_Store
20 | *.suo
21 | *.ntvs*
22 | *.njsproj
23 | *.sln
24 | *.sw?
25 | 
26 | # Yarn directories
27 | node_modules/*
28 | .pnp.*
29 | .yarn/*
30 | !.yarn/patches
31 | !.yarn/plugins
32 | !.yarn/releases
33 | !.yarn/versions
34 | 


--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
 1 | root = true
 2 | 
 3 | [*]
 4 | indent_style = space
 5 | end_of_line = lf
 6 | insert_final_newline = true
 7 | trim_trailing_whitespace = true
 8 | charset = utf-8
 9 | 
10 | [*.toml]
11 | indent_size = 2
12 | 
13 | [*.py]
14 | indent_size = 4
15 | max_line_length = 100
16 | 
17 | [*.js]
18 | indent_size = 2
19 | max_line_length = 100
20 | 
21 | [{*.html,*.html.j2}]
22 | indent_size = 2
23 | 
24 | [*.css]
25 | indent_size = 2
26 | 
27 | [*.json]
28 | indent_size = 2
29 | 
30 | [{*.yml,*.yaml}]
31 | indent_size = 2
32 | 


--------------------------------------------------------------------------------
/imagedephi/utils/progress_log.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | import queue
 3 | 
 4 | _progress_queue: queue.Queue[tuple] = queue.Queue(-1)
 5 | 
 6 | 
 7 | def push_progress(count: int, max: int, redact_dir: Path) -> None:
 8 |     _progress_queue.put_nowait((count, max, redact_dir))
 9 | 
10 | 
11 | def get_next_progress_message() -> tuple | None:
12 |     try:
13 |         record = _progress_queue.get_nowait()
14 |     except queue.Empty:
15 |         return None
16 |     else:
17 |         # return record.message
18 |         return record
19 | 


--------------------------------------------------------------------------------
/stubs/tifftools/__init__.pyi:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from .constants import Datatype, Tag, TiffDatatype, TiffTag
 4 | from .exceptions import MustBeBigTiffError, TifftoolsError, UnknownTagError
 5 | from .tifftools import read_tiff, write_tiff
 6 | 
 7 | __version__: str
 8 | 
 9 | __all__ = [
10 |     "Datatype",
11 |     "TiffDatatype",
12 |     "Tag",
13 |     "TiffTag",
14 |     "TifftoolsError",
15 |     "UnknownTagError",
16 |     "MustBeBigTiffError",
17 |     "read_tiff",
18 |     "write_tiff",
19 |     "__version__",
20 | ]
21 | 


--------------------------------------------------------------------------------
/.github/zip_and_upload_package.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -ex
 4 | 
 5 | runner_os=$1
 6 | tag_name=$2
 7 | 
 8 | cd dist/
 9 | 
10 | if [ -f "./imagedephi.exe" ]; then
11 |     executable="imagedephi.exe"
12 | else
13 |     executable="imagedephi"
14 | fi
15 | 
16 | chmod +x $executable
17 | zipfile="${runner_os}-imagedephi-cli.zip"
18 | 
19 | if [[ "$runner_os" = "Windows" ]]; then
20 |     powershell Compress-Archive $executable $zipfile
21 | else
22 |     zip $zipfile $executable
23 | fi
24 | 
25 | gh release upload \
26 |     $tag_name \
27 |     "${zipfile}#${runner_os} executable" \
28 |     --clobber
29 | 


--------------------------------------------------------------------------------
/imagedephi/demo_files.csv:
--------------------------------------------------------------------------------
1 | file_name,hash
2 | SEER_Mouse_1_17158543_demo.svs,sha512:0d8559ad29cf5ff3a735f1fbb76c5b1d7a575d9ba2c4c894229cf44de270fd6c94415fd71fe3dac70c86188e72a3bd559b24f5b2345cb4aa1910d575522330e2
3 | SEER_Mouse_10_17158610_demo.svs,sha512:6e458c081910a8918317a2d3fa4cfa2fc27c5f4c07f4550827dc36eb7f566dd6e2aaf551a2051668c75921f5aebf6c11e632d3380aac62588ae91b33fe399be4
4 | SEER_Mouse_13_17158639_demo.svs,sha512:1a73693d8a4c83f7f6146faf376daf9cff1b30c9b9ec48996bd1da2a6a3f81bdd9481eefae36da00ee5f684ff6f8b315dffe40ded20d2f67f9cb24e1c1d3a258
5 | README.txt,sha512:a312dae0db79701b27cd71b3fe3e13ca52415dbb51373b3edc604a575e59f8bb6e37f247ff9ba76ca3b4d5d56b38174f2bbbd9171d40582300e9589bcea14e54
6 | 


--------------------------------------------------------------------------------
/client/index.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html lang="en">
 3 |   <head>
 4 |     <meta charset="UTF-8" />
 5 |     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
 6 |     <link rel="preconnect" href="https://fonts.googleapis.com">
 7 | <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
 8 | <link href="https://fonts.googleapis.com/css2?family=Roboto:ital,wght@0,100;0,300;0,400;0,500;0,700;0,900;1,100;1,300;1,400;1,500;1,700;1,900&display=swap" rel="stylesheet">
 9 |     <title>ImageDePHI</title>
10 |   </head>
11 |   <body class="bg-base-300 min-h-screen">
12 |     <div id="app"></div>
13 |     <script type="module" src="/src/main.ts"></script>
14 |   </body>
15 | </html>
16 | 


--------------------------------------------------------------------------------
/client/src/store/types.ts:
--------------------------------------------------------------------------------
 1 | export type DirectoryData = {
 2 |   directory: string;
 3 |   ancestors: Path[];
 4 |   children: Path[];
 5 |   childrenImages: Path[];
 6 |   childrenYaml: Path[];
 7 | };
 8 | 
 9 | export interface ImagePlanParams {
10 |   directory: string;
11 |   rules?: string;
12 |   limit?: number;
13 |   offset?: number;
14 |   update?: boolean;
15 | }
16 | 
17 | export type imagePlanResponse = {
18 |   data: Record<string, Record<string, string>>;
19 |   total: number;
20 |   tags: string[];
21 |   missing_rules: boolean;
22 | };
23 | 
24 | export interface Path {
25 |   name: string;
26 |   path: string;
27 | }
28 | export interface SelectedDirectories {
29 |   [key: string]: string;
30 | }
31 | 


--------------------------------------------------------------------------------
/client/public/thumbnailPlaceholder.svg:
--------------------------------------------------------------------------------
 1 | <svg width="80" height="80" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 80 80" preserveAspectRatio="none">
 2 |       <defs>
 3 |         <style type="text/css">
 4 |           #holder text {
 5 |             fill: #000000;
 6 |             font-family: Times New Roman;
 7 |             font-size: 14px;
 8 |             font-weight: 100;
 9 |           }
10 |         </style>
11 |       </defs>
12 |       <g id="holder">
13 |         <rect width="100%" height="100%" fill="#cccccc"></rect>
14 |         <g>
15 |           <text text-anchor="middle" x="50%" y="40%" dy=".3em">No Preview</text>
16 |           <text text-anchor="middle" x="50%" y="60%" dy=".3em">Available</text>
17 |         </g>
18 |       </g>
19 |     </svg>
20 | 


--------------------------------------------------------------------------------
/imagedephi/utils/image.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | from imagedephi.rules import FileFormat
 4 | 
 5 | 
 6 | def get_file_format_from_path(image_path: Path) -> FileFormat | None:
 7 |     """
 8 |     Attempt to determine the file type of an image by looking at its file signature.
 9 | 
10 |     See https://en.wikipedia.org/wiki/List_of_file_signatures. In case of a "dual-flavor" DICOM
11 |     file (i.e. a file that can be read as a DICOM or a tiff), prefer to report the image as
12 |     DICOM.
13 |     """
14 |     data = open(image_path, "rb").read(132)
15 |     if data[128:] == b"DICM":
16 |         return FileFormat.DICOM
17 |     elif data[:4] in (b"II\x2a\x00", b"MM\x00\x2a", b"II\x2b\x00", b"MM\x00\x2b"):
18 |         return FileFormat.TIFF
19 |     return None
20 | 


--------------------------------------------------------------------------------
/tests/override_rule_sets/example_user_rules.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Example user rules
 3 | description: A set of reasonable rules used for testing
 4 | output_file_name: my_study_slide
 5 | tiff:
 6 |   associated_images: {}
 7 |   metadata:
 8 |     ImageDescription:
 9 |       action: replace
10 |       new_value: Redacted by ImageDePHI
11 |     YCbCrSubsampling:
12 |       action: check_type
13 |       expected_type: number
14 |       expected_count: 2
15 | svs:
16 |   associated_images: {}
17 |   metadata:
18 |     YCbCrSubsampling:
19 |       action: check_type
20 |       expected_type: number
21 |       expected_count: 2
22 |   image_description:
23 |     ICC Profile:
24 |       action: delete
25 |     Filename:
26 |       action: check_type
27 |       expected_type: number
28 | dicom:
29 |   metadata: {}
30 | 


--------------------------------------------------------------------------------
/client/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "target": "ES2020",
 4 |     "useDefineForClassFields": true,
 5 |     "module": "ESNext",
 6 |     "lib": ["ES2020", "DOM", "DOM.Iterable"],
 7 |     "skipLibCheck": true,
 8 | 
 9 |     /* Bundler mode */
10 |     "moduleResolution": "bundler",
11 |     "allowImportingTsExtensions": true,
12 |     "resolveJsonModule": true,
13 |     "isolatedModules": true,
14 |     "noEmit": true,
15 |     "jsx": "preserve",
16 | 
17 |     /* Linting */
18 |     "strict": true,
19 |     "noUnusedLocals": true,
20 |     "noUnusedParameters": true,
21 |     "noFallthroughCasesInSwitch": true
22 |   },
23 |   "include": ["src/**/*.ts", "src/**/*.d.ts", "src/**/*.tsx", "src/**/*.vue"],
24 |   "references": [
25 |     {
26 |       "path": "./tsconfig.node.json"
27 |     }
28 |   ],
29 |   "exclude": ["node_modules"]
30 | }
31 | 


--------------------------------------------------------------------------------
/imagedephi/utils/network.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import socket
 3 | 
 4 | 
 5 | async def wait_for_port(port: int, host: str = "127.0.0.1") -> None:
 6 |     """Block until a TCP port on the specified host can be opened."""
 7 |     while True:
 8 |         try:
 9 |             _, writer = await asyncio.open_connection(host, port)
10 |         except ConnectionRefusedError:
11 |             pass
12 |         else:
13 |             writer.close()
14 |             await writer.wait_closed()
15 |             return
16 | 
17 | 
18 | def unused_tcp_port() -> int:
19 |     with socket.socket() as sock:
20 |         # Ensure the port can be immediately reused
21 |         sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
22 |         # Specifying 0 as the port will select a dynamic ephimeral port
23 |         sock.bind(("127.0.0.1", 0))
24 |         _, sock_port = sock.getsockname()
25 |         return sock_port
26 | 


--------------------------------------------------------------------------------
/client/eslint.config.mjs:
--------------------------------------------------------------------------------
 1 | import eslint from '@eslint/js';
 2 | import eslintConfigPrettier from 'eslint-config-prettier';
 3 | import eslintPluginVue from 'eslint-plugin-vue';
 4 | import globals from 'globals';
 5 | import typescriptEslint from 'typescript-eslint';
 6 | 
 7 | export default typescriptEslint.config(
 8 |     { ignores: ['*.d.ts', '.yarn/**', '.gitignore', '.pnp.*'] },
 9 |     {
10 |     extends: [
11 |         eslint.configs.recommended,
12 |       ...typescriptEslint.configs.recommended,
13 |       ...eslintPluginVue.configs['flat/recommended'],
14 |     ],
15 |     files: ['**/*.{ts,tsx,vue,js,jsx,cjs,mjs,cts,mts}'],
16 | 
17 |     languageOptions: {
18 |         globals: globals.browser,
19 |         ecmaVersion: 'latest',
20 |         sourceType: "module",
21 | 
22 |         parserOptions: {
23 |             parser: "@typescript-eslint/parser",
24 |         },
25 |     },
26 | },
27 | eslintConfigPrettier
28 | );
29 | 


--------------------------------------------------------------------------------
/imagedephi/redact/redaction_plan.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import abc
 4 | from pathlib import Path
 5 | from typing import TYPE_CHECKING
 6 | 
 7 | from imagedephi.rules import FileFormat
 8 | 
 9 | if TYPE_CHECKING:
10 |     from tifftools.tifftools import TagData
11 | 
12 |     ByteInfo = dict[str, str | int]
13 | 
14 |     TagRedactionPlan = dict[str, int | float | TagData | ByteInfo]
15 | 
16 |     RedactionPlanReport = dict[str, dict[str, int | str | list[str] | TagRedactionPlan]]
17 | 
18 | 
19 | class RedactionPlan:
20 |     file_format: FileFormat
21 | 
22 |     @abc.abstractmethod
23 |     def report_plan(self) -> RedactionPlanReport: ...
24 | 
25 |     @abc.abstractmethod
26 |     def execute_plan(self) -> None: ...
27 | 
28 |     @abc.abstractmethod
29 |     def is_comprehensive(self) -> bool:
30 |         """Return whether the plan redacts all metadata and/or images needed."""
31 |         ...
32 | 
33 |     @abc.abstractmethod
34 |     def report_missing_rules(self, report=None) -> None: ...
35 | 
36 |     @abc.abstractmethod
37 |     def save(self, output_path: Path, overwrite: bool) -> None: ...
38 | 


--------------------------------------------------------------------------------
/tests/test_utils_network.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | from collections.abc import AsyncGenerator
 3 | import socket
 4 | 
 5 | import pytest
 6 | import pytest_asyncio
 7 | 
 8 | from imagedephi.utils.network import unused_tcp_port, wait_for_port
 9 | 
10 | 
11 | @pytest_asyncio.fixture
12 | async def server(unused_tcp_port: int) -> AsyncGenerator[asyncio.Server, None]:
13 |     def server_callback(reader: asyncio.StreamReader, writer: asyncio.StreamWriter) -> None:
14 |         pass
15 | 
16 |     server = await asyncio.start_server(server_callback, "127.0.0.1", unused_tcp_port)
17 |     async with server:
18 |         yield server
19 |         server.sockets[0]
20 | 
21 | 
22 | @pytest.mark.timeout(1)
23 | @pytest.mark.asyncio
24 | async def test_utils_network_wait_for_port(server: asyncio.Server) -> None:
25 |     server_port = server.sockets[0].getsockname()[1]
26 | 
27 |     await wait_for_port(server_port)
28 | 
29 | 
30 | def test_utils_network_unused_tcp_port() -> None:
31 |     port = unused_tcp_port()
32 | 
33 |     # This will raise an OSError if the port is already in use
34 |     with socket.create_server(("127.0.0.1", port)) as sock:
35 |         assert sock
36 | 


--------------------------------------------------------------------------------
/stubs/tifftools/tifftools.pyi:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from os import PathLike
 4 | from typing import BinaryIO, Literal, NotRequired, TypeAlias, TypedDict
 5 | 
 6 | _PathOrStream: TypeAlias = str | PathLike[str] | BinaryIO
 7 | 
 8 | class TiffInfo(TypedDict):
 9 |     ifds: list[IFD]
10 |     path_or_fobj: _PathOrStream
11 |     size: int
12 |     header: bytes
13 |     bigEndian: bool
14 |     bigtiff: bool
15 |     endianPack: Literal[">", "<"]
16 |     firstifd: int
17 | 
18 | class IFD(TypedDict):
19 |     offset: int
20 |     tags: dict[int, TagEntry]
21 |     path_or_fobj: _PathOrStream
22 |     size: int
23 |     bigEndian: bool
24 |     bigtiff: bool
25 |     tagcount: int
26 | 
27 | TagData = str | bytes | list[int | float]
28 | 
29 | class TagEntry(TypedDict):
30 |     datatype: int
31 |     count: int
32 |     datapos: int
33 |     offset: NotRequired[int]
34 |     ifds: NotRequired[list[list[IFD]]]
35 |     data: TagData
36 | 
37 | def read_tiff(path: _PathOrStream) -> TiffInfo: ...
38 | def write_tiff(
39 |     ifds: TiffInfo | IFD | list[IFD],
40 |     path: _PathOrStream,
41 |     bigEndian: bool | None = ...,
42 |     bigtiff: bool | None = ...,
43 |     allowExisting: bool = ...,
44 |     ifdsFirst: bool = ...,
45 | ) -> None: ...
46 | 


--------------------------------------------------------------------------------
/docs/development.md:
--------------------------------------------------------------------------------
 1 | # Development
 2 | 
 3 | ## Installation
 4 | To install for development:
 5 | * [Create and activate a Python virtual environment](https://docs.python.org/3/library/venv.html).
 6 | * Install for local development:
 7 |     ```bash
 8 |     pip install -e .
 9 |     ```
10 | * Install [Tox](https://tox.wiki/) to run development tasks:
11 |     ```bash
12 |     pip install tox
13 |     ```
14 | 
15 | ## Running the CLI
16 | With the virtual environment active, run the CLI:
17 | ```bash
18 | imagedephi
19 | ```
20 | 
21 | ### Development
22 | #### Requirements
23 | 
24 | ```bash
25 | python ^3.11
26 | node ^20
27 | ```
28 | 
29 | #### Initial Install
30 | This project uses yarn modern. As such you'll need to enable corepack to detect the correct yarn version:
31 | 
32 | ```bash
33 | cd /client
34 | corepack enable
35 | ```
36 | 
37 | 
38 | #### Developing the Web GUI
39 | While developing the web GUI, it may be useful to launch web server
40 | that auto-reloads code changes and shows in-browser exception tracebacks:
41 | ```bash
42 | DEBUG=1 hypercorn --reload imagedephi.gui:app
43 | ```
44 | 
45 | ## Auto-format Code Changes:
46 | To format all code to comply with style rules:
47 | ```bash
48 | tox -e format
49 | ```
50 | 
51 | ## Running Tests
52 | To run all tests:
53 | ```bash
54 | tox
55 | ```
56 | 


--------------------------------------------------------------------------------
/client/tailwind.config.js:
--------------------------------------------------------------------------------
 1 | /* eslint-disable @typescript-eslint/no-require-imports */
 2 | /** @type {import('tailwindcss').Config} */
 3 | module.exports = {
 4 |   content: ["./index.html", "./src/**/*.{vue,js,ts}"],
 5 |   theme: {
 6 |     extend: {
 7 |       fontFamily: {
 8 |         sans: ["Roboto", "sans-serif"],
 9 |       },
10 |       colors: {
11 |         primary: "#5A387C",
12 | 
13 |         secondary: "#00A6BF",
14 | 
15 |         accent: "#FF6A6A",
16 | 
17 |         neutral: "#201C35",
18 | 
19 |         "base-100": "#FFFFFF",
20 | 
21 |         info: "#3ABFF8",
22 | 
23 |         success: "#36D399",
24 | 
25 |         warning: "#FBBD23",
26 | 
27 |         error: "#F87272",
28 | 
29 |         secondaryContent: "#E8F2F3",
30 |       },
31 |     },
32 |   },
33 |   plugins: [require("daisyui")],
34 |   daisyui: {
35 |     themes: [
36 |       {
37 |         light: {
38 | 
39 |           ...require("daisyui/src/theming/themes")["light"],
40 | 
41 |           primary: "#5A387C",
42 | 
43 |           secondary: "#00A6BF",
44 | 
45 |           accent: "#FF6A6A",
46 | 
47 |           neutral: "#201C35",
48 | 
49 |           "base-100": "#FFFFFF",
50 | 
51 |           info: "#3ABFF8",
52 | 
53 |           success: "#36D399",
54 | 
55 |           warning: "#FBBD23",
56 | 
57 |           error: "#F87272",
58 | 
59 |           secondaryContent: "#E8F2F3",
60 |         },
61 |       },
62 |     ],
63 |   },
64 | };
65 | 


--------------------------------------------------------------------------------
/imagedephi/gui/utils/directory.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from pathlib import Path
 3 | 
 4 | from imagedephi.redact import iter_image_dirs
 5 | 
 6 | 
 7 | class DirectoryData:
 8 |     directory: Path
 9 |     ancestors: list[dict[str, str | Path]]
10 |     child_directories: list[dict[str, str | Path]]
11 |     child_images: list[dict[str, str | Path]]
12 |     child_yaml_files: list[dict[str, str | Path]]
13 | 
14 |     def __init__(self, directory: Path):
15 |         self.directory = directory
16 | 
17 |         self.ancestors = [
18 |             {"name": ancestor.name, "path": ancestor} for ancestor in reversed(directory.parents)
19 |         ]
20 |         self.ancestors.append({"name": directory.name, "path": directory})
21 | 
22 |         self.child_directories = [
23 |             {"name": child.name, "path": child}
24 |             for child in directory.iterdir()
25 |             if child.is_dir() and os.access(child, os.R_OK)
26 |         ]
27 | 
28 |         self.child_images = [
29 |             {"name": image.name, "path": image} for image in list(iter_image_dirs([directory]))
30 |         ]
31 |         self.child_yaml_files = [
32 |             {"name": yaml_file.name, "path": yaml_file} for yaml_file in _iter_yaml_files(directory)
33 |         ]
34 | 
35 | 
36 | def _iter_yaml_files(directory: Path):
37 |     for child in directory.iterdir():
38 |         if child.is_file() and child.suffix == ".yaml":
39 |             yield child
40 | 


--------------------------------------------------------------------------------
/.github/workflows/release.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: release
 3 | on:
 4 |   release:
 5 |     types:
 6 |       - published
 7 | jobs:
 8 |   publish:
 9 |     runs-on: ${{ matrix.os }}
10 |     strategy:
11 |       matrix:
12 |         os:
13 |           # Use an older Linux: https://pyinstaller.org/en/stable/usage.html#making-gnu-linux-apps-forward-compatible
14 |           - ubuntu-22.04
15 |           - macos-latest
16 |           - windows-latest
17 |     steps:
18 |       - uses: actions/checkout@v6
19 |         with:
20 |           # LFS data is not needed for release
21 |           lfs: false
22 |           # Tags are needed to compute the current version number
23 |           fetch-depth: 0
24 |       - name: Set up Python
25 |         uses: actions/setup-python@v6
26 |         with:
27 |           python-version: "3.11"
28 |       - name: Set up Node.js
29 |         uses: actions/setup-node@v6
30 |         with:
31 |           node-version: "20"
32 |       - name: Enable Corepack
33 |         run: |
34 |           corepack enable
35 |       - name: Install tox
36 |         run: |
37 |           pip install --upgrade pip
38 |           pip install tox
39 |       - name: Build binary
40 |         run: |
41 |           tox -e binary
42 |       - name: Zip and upload binary
43 |         run: |
44 |           .github/zip_and_upload_package.sh ${{ runner.os }} ${{ github.event.release.tag_name }}
45 |         shell: bash
46 |         env:
47 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
48 | 


--------------------------------------------------------------------------------
/client/src/components/ImageDataDisplay.vue:
--------------------------------------------------------------------------------
 1 | <script setup lang="ts">
 2 | import { computed, ref } from "vue";
 3 | import { useRedactionPlan } from "../store/imageStore";
 4 | import { getRedactionPlan } from "../api/rest";
 5 | import ImageDataTable from "./ImageDataTable.vue";
 6 | import InfiniteScroller from "./InfiniteScroller.vue";
 7 | import { selectedDirectories } from "../store/directoryStore";
 8 | 
 9 | const limit = ref(50);
10 | const offset = ref(1);
11 | 
12 | const loadImagePlan = async () => {
13 |   if (
14 |     useRedactionPlan.imageRedactionPlan.total <
15 |     (offset.value + 1) * limit.value
16 |   ) {
17 |     return;
18 |   }
19 |   const newPlan = await getRedactionPlan({
20 |     directory: useRedactionPlan.currentDirectory,
21 |     rules: selectedDirectories.value.rulesetDirectory,
22 |     limit: limit.value,
23 |     offset: offset.value,
24 |     update: true,
25 |   });
26 |   useRedactionPlan.imageRedactionPlan.data = {
27 |     ...useRedactionPlan.imageRedactionPlan.data,
28 |     ...newPlan.data,
29 |   };
30 |   useRedactionPlan.getThumbnail(newPlan.data);
31 |   ++offset.value;
32 | };
33 | const usedColumns = computed(() => useRedactionPlan.imageRedactionPlan.tags);
34 | </script>
35 | 
36 | <template>
37 |   <div class="card m-4 pb-4 rounded">
38 |     <InfiniteScroller @infinite-scroll="loadImagePlan">
39 |       <ImageDataTable
40 |         :used-columns="usedColumns"
41 |         :image-redaction-plan="useRedactionPlan.imageRedactionPlan"
42 |       />
43 |     </InfiniteScroller>
44 |   </div>
45 | </template>
46 | 


--------------------------------------------------------------------------------
/client/src/components/InfiniteScroller.vue:
--------------------------------------------------------------------------------
 1 | <script setup lang="ts">
 2 | import { ref, onMounted } from "vue";
 3 | 
 4 | const emits = defineEmits(["infinite-scroll"]);
 5 | 
 6 | const infiniteScroller = ref<HTMLTableElement | null>(null);
 7 | const endOfTable = ref<HTMLDivElement | null>(null);
 8 | 
 9 | onMounted(() => {
10 |   const observer = new IntersectionObserver(
11 |     (entries) => {
12 |       if (entries[0].isIntersecting) {
13 |         emits("infinite-scroll");
14 |       }
15 |     },
16 |     {
17 |       root: infiniteScroller.value,
18 |       rootMargin: "200px",
19 |     },
20 |   );
21 |   observer.observe(endOfTable.value as Element);
22 | });
23 | </script>
24 | <template>
25 |   <div
26 |     ref="infiniteScroller"
27 |     class="card rounded max-h-[calc(100vh-50px)] max-w-[calc(100vw-425px)] overflow-auto customScroll"
28 |   >
29 |     <slot />
30 |     <div ref="endOfTable" />
31 |   </div>
32 | </template>
33 | <style scoped>
34 | @supports selector(::-webkit-scrollbar) {
35 |   .customScroll::-webkit-scrollbar {
36 |     width: 10px;
37 |     height: 10px;
38 |   }
39 | 
40 |   .customScroll::-webkit-scrollbar-button,
41 |   .customScroll::-webkit-scrollbar-corner {
42 |     display: none;
43 |   }
44 | 
45 |   .customScroll::-webkit-scrollbar-thumb,
46 |   .customScroll::-webkit-scrollbar-track {
47 |     border-radius: 20px;
48 |   }
49 | 
50 |   .customScroll::-webkit-scrollbar-thumb {
51 |     background-color: #00a6bf;
52 |   }
53 | 
54 |   .customScroll::-webkit-scrollbar-track {
55 |     background-color: #6b7280;
56 |     width: 5px;
57 |   }
58 | }
59 | </style>
60 | 


--------------------------------------------------------------------------------
/tests/test_utils_os.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | import pytest
 4 | from pytest_mock import MockerFixture
 5 | 
 6 | from imagedephi.utils.os import launched_from_frozen_binary, launched_from_windows_explorer
 7 | 
 8 | 
 9 | @pytest.mark.parametrize("frozen", [False, True])
10 | def test_utils_os_launched_from_frozen_binary(frozen: bool, mocker: MockerFixture) -> None:
11 |     mocker.patch("sys.frozen", new=frozen, create=True)
12 | 
13 |     result = launched_from_frozen_binary()
14 | 
15 |     assert result is frozen
16 | 
17 | 
18 | @pytest.mark.skipif(sys.platform == "win32", reason="non-windows only")
19 | def test_utils_os_launched_from_windows_explorer_nonwindows() -> None:
20 |     result = launched_from_windows_explorer()
21 | 
22 |     assert result is False
23 | 
24 | 
25 | @pytest.mark.skipif(sys.platform != "win32", reason="windows only")
26 | @pytest.mark.parametrize(
27 |     "frozen,process_count,expected",
28 |     [
29 |         (False, 1, True),
30 |         (False, 3, False),
31 |         (True, 2, True),
32 |         (True, 3, False),
33 |     ],
34 |     ids=["non-frozen explorer", "non-frozen console", "frozen explorer", "frozen console"],
35 | )
36 | def test_utils_os_launched_from_windows_explorer_windows(
37 |     frozen: bool, process_count: int, expected: bool, mocker: MockerFixture
38 | ) -> None:
39 |     mocker.patch("imagedephi.utils.os.launched_from_frozen_binary", return_value=frozen)
40 |     mocker.patch("ctypes.windll.kernel32.GetConsoleProcessList", return_value=process_count)
41 | 
42 |     result = launched_from_windows_explorer()
43 | 
44 |     assert result is expected
45 | 


--------------------------------------------------------------------------------
/imagedephi/utils/dicom.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | import re
 3 | 
 4 | import pydicom
 5 | 
 6 | extensions = {
 7 |     None: True,
 8 |     "dcm": True,
 9 |     "dic": True,
10 |     "dicom": True,
11 | }
12 | 
13 | 
14 | def file_is_same_series_as(original_path: Path, path: Path) -> bool:
15 |     """
16 |     Determine if path belongs to the same series as original_path.
17 | 
18 |     These heuristics match those defined in the large image DICOM source found at
19 |     https://github.com/girder/large_image/blob/master/sources/dicom/large_image_source_dicom/__init__.py#L226.
20 |     """
21 |     might_match = False
22 |     if original_path.suffix not in extensions:
23 |         if original_path.suffix == path.suffix or path.suffix in extensions:
24 |             might_match = True
25 |     if (
26 |         not might_match
27 |         and re.match(r"^([1-9][0-9]*|0)(\.([1-9][0-9]*|0))+$", str(path))
28 |         and len(str(path)) <= 64
29 |     ):
30 |         might_match = True
31 |     if not might_match and re.match(r"^DCM_\d+$", str(path)):
32 |         might_match = True
33 |     if might_match:
34 |         original = pydicom.dcmread(original_path, stop_before_pixels=True)
35 |         original_series_uid = original.data_element("SeriesInstanceUID")
36 |         if original_series_uid:
37 |             original_series_uid = original_series_uid.value
38 |             slide_to_test = pydicom.dcmread(path, stop_before_pixels=True)
39 |             slide_series_uid = slide_to_test.data_element("SeriesInstanceUID")
40 |             return slide_series_uid is not None and slide_series_uid.value == original_series_uid
41 |     return False
42 | 


--------------------------------------------------------------------------------
/client/src/api/rest.ts:
--------------------------------------------------------------------------------
 1 | import { ImagePlanParams } from "../store/types";
 2 | 
 3 | const basePath = import.meta.env.VITE_APP_API_URL
 4 |   ? import.meta.env.VITE_APP_API_URL
 5 |   : "";
 6 | 
 7 | export async function getDirectoryInfo(path?: string) {
 8 |   const selectedPath = path ? path : "/";
 9 |   const response = await fetch(
10 |     `${basePath}/directory/?directory=${selectedPath}`,
11 |     {
12 |       method: "GET",
13 |       mode: "cors",
14 |     },
15 |   );
16 |   return response.json().then((data) => {
17 |     return data[0].directory_data;
18 |   });
19 | }
20 | 
21 | export async function getRedactionPlan(params: ImagePlanParams) {
22 |   const response = await fetch(
23 |     `${basePath}/redaction_plan?input_directory=${params.directory}&rules_path=${params.rules}&limit=${params.limit}&offset=${params.offset}&update=${params.update}`,
24 |     {
25 |       method: "GET",
26 |       mode: "cors",
27 |     },
28 |   );
29 |   return response.json().then((data) => {
30 |     return data;
31 |   });
32 | }
33 | 
34 | export async function redactImages(
35 |   inputDirectory: string,
36 |   outputDirectory: string,
37 |   rules?: string,
38 | ) {
39 |   const response = await fetch(
40 |     `${basePath}/redact/?input_directory=${inputDirectory}&output_directory=${outputDirectory}&rules_path=${rules}`,
41 |     {
42 |       method: "POST",
43 |       mode: "cors",
44 |     },
45 |   );
46 |   return response;
47 | }
48 | 
49 | export async function getImages(path: string, imageKey: string) {
50 |   const response = await fetch(
51 |     `${basePath}/image/?file_name=${path}&image_key=${imageKey}`,
52 |     {
53 |       method: "GET",
54 |       mode: "cors",
55 |     },
56 |   );
57 |   return response;
58 | }
59 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: ci
 3 | on:
 4 |   pull_request:
 5 |   push:
 6 |     tags: "*"
 7 |     branches:
 8 |       - main
 9 | jobs:
10 |   test:
11 |     runs-on: ${{ matrix.os }}
12 |     strategy:
13 |       # Seeing whether a failure is exclusive to one platform is useful
14 |       fail-fast: false
15 |       matrix:
16 |         os:
17 |           # Use an older Linux: https://pyinstaller.org/en/stable/usage.html#making-gnu-linux-apps-forward-compatible
18 |           - ubuntu-22.04
19 |           - macos-14
20 |           - windows-latest
21 |     steps:
22 |       - uses: actions/checkout@v6
23 |         with:
24 |           lfs: true
25 |           # Tags are needed to compute the current version number
26 |           fetch-depth: 0
27 |       - name: Set up Python
28 |         uses: actions/setup-python@v6
29 |         with:
30 |           python-version: "3.11"
31 |       - name: Set up Node.js
32 |         uses: actions/setup-node@v6
33 |         with:
34 |           node-version: "20"
35 |       - name: Enable Corepack
36 |         run: |
37 |           corepack enable
38 |       - name: Install tox
39 |         run: |
40 |           pip install --upgrade pip
41 |           pip install tox
42 |       - name: Run tests
43 |         run: |
44 |           tox
45 |       - name: Build binary
46 |         run: |
47 |           tox -e binary
48 |       - name: Test binary runs
49 |         run: |
50 |           dist/imagedephi --help
51 |       - name: Upload binary artifact
52 |         uses: actions/upload-artifact@v6
53 |         with:
54 |           name: imagedephi-${{ matrix.os }}-binary
55 |           path: |
56 |             dist/imagedephi
57 |             dist/imagedephi.exe
58 |           retention-days: 5
59 |           if-no-files-found: error
60 | 


--------------------------------------------------------------------------------
/client/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "imagedephi",
 3 |   "private": true,
 4 |   "version": "0.0.0",
 5 |   "license": "Apache-2.0",
 6 |   "type": "module",
 7 |   "scripts": {
 8 |     "lint:tsc": "vue-tsc --noEmit",
 9 |     "lint:check": "eslint . --fix ",
10 |     "lint:format": "prettier src  --write",
11 |     "lint": "npm-run-all lint:tsc lint:check lint:format",
12 |     "dev": "NODE_ENV=development vite --port 8080",
13 |     "build:clean": "rimraf ../imagedephi/web_static",
14 |     "build:compile": "vite build --outDir ../imagedephi/web_static",
15 |     "build": "npm-run-all build:clean build:compile"
16 |   },
17 |   "dependencies": {
18 |     "remixicon": "^3.6.0",
19 |     "vue": "^3.3.4"
20 |   },
21 |   "devDependencies": {
22 |     "@eslint/eslintrc": "^3.2.0",
23 |     "@eslint/js": "^9.17.0",
24 |     "@typescript-eslint/eslint-plugin": "^8.19.1",
25 |     "@typescript-eslint/parser": "^8.19.1",
26 |     "@vitejs/plugin-vue": "^4.2.3",
27 |     "autoprefixer": "^10.4.16",
28 |     "daisyui": "^4.0.7",
29 |     "eslint": "^9.17.0",
30 |     "eslint-config-prettier": "^9.1.0",
31 |     "eslint-plugin-vue": "^9.32.0",
32 |     "globals": "^15.14.0",
33 |     "npm-run-all": "^4.1.5",
34 |     "postcss": "^8.4.31",
35 |     "prettier": "^3.1.1",
36 |     "rimraf": "^5.0.5",
37 |     "tailwindcss": "^3.3.5",
38 |     "typescript": "^5.3.3",
39 |     "typescript-eslint": "^8.19.1",
40 |     "vite": "^5.4.21",
41 |     "vue-eslint-parser": "^9.4.3",
42 |     "vue-tsc": "^2.2.0"
43 |   },
44 |   "packageManager": "yarn@4.10.3+sha512.c38cafb5c7bb273f3926d04e55e1d8c9dfa7d9c3ea1f36a4868fa028b9e5f72298f0b7f401ad5eb921749eb012eb1c3bb74bf7503df3ee43fd600d14a018266f",
45 |   "dependenciesMeta": {
46 |     "remixicon@3.7.0": {
47 |       "unplugged": true
48 |     }
49 |   }
50 | }
51 | 


--------------------------------------------------------------------------------
/imagedephi/utils/os.py:
--------------------------------------------------------------------------------
 1 | import ctypes
 2 | import sys
 3 | 
 4 | 
 5 | def launched_from_frozen_binary() -> bool:
 6 |     """Return whether the current program was launched within a frozen binary."""
 7 |     # https://pyinstaller.org/en/stable/runtime-information.html#run-time-information
 8 |     return getattr(sys, "frozen", False)
 9 | 
10 | 
11 | def launched_from_windows_explorer() -> bool:
12 |     """Return whether the current program was launched directly from the Windows Explorer."""
13 |     # Using "platform.system()" is preferred: https://stackoverflow.com/a/58071295
14 |     # However, this is not recognised by Mypy yet: https://github.com/python/mypy/issues/8166
15 |     if sys.platform == "win32":
16 |         # See https://devblogs.microsoft.com/oldnewthing/20160125-00/?p=92922 for this algorithm.
17 |         # Contradicting the blog, the API docs
18 |         # https://learn.microsoft.com/en-us/windows/console/getconsoleprocesslist
19 |         # indicate that the "process_list" array may not be null.
20 |         # Also "process_list" must have a size larger than 0, but its full content isn't needed.
21 |         process_list_size = 1
22 |         # Array elements should be DWORD, which is a uint
23 |         process_list = (ctypes.c_uint * process_list_size)()
24 |         process_count: int = ctypes.windll.kernel32.GetConsoleProcessList(
25 |             process_list, process_list_size
26 |         )
27 |         if process_count == 0:
28 |             # TODO: Log this internally
29 |             raise OSError("Could not detect Windows console.")
30 |         # If frozen, the Pyinstaller bootloader is also running in this console:
31 |         # https://pyinstaller.org/en/stable/advanced-topics.html#the-bootstrap-process-in-detail
32 |         expected_solo_process_count = 2 if launched_from_frozen_binary() else 1
33 |         return process_count == expected_solo_process_count
34 |     else:
35 |         return False
36 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["hatchling", "hatch-vcs"]
 3 | build-backend = "hatchling.build"
 4 | 
 5 | [project]
 6 | name = "ImageDePHI"
 7 | requires-python = ">=3.11"
 8 | dependencies = [
 9 |     "click",
10 |     "tifftools",
11 |     "fastapi",
12 |     "python-multipart",
13 |     "hypercorn",
14 |     "pyyaml",
15 |     "Pillow",
16 |     "pooch",
17 |     "pydicom",
18 |     "tqdm",
19 |     "wsidicom",
20 |     "websockets",
21 | ]
22 | dynamic = ["version"]
23 | 
24 | [project.scripts]
25 | imagedephi = "imagedephi.main:imagedephi"
26 | 
27 | [tool.hatch.build]
28 | only-include = [
29 |   "imagedephi",
30 | ]
31 | artifacts = [
32 |   "imagedephi/web_static",
33 | ]
34 | 
35 | [tool.hatch.version]
36 | source = "vcs"
37 | 
38 | [tool.hatch.build.hooks.jupyter-builder]
39 | dependencies = ["hatch-jupyter-builder"]
40 | build-function = "hatch_jupyter_builder.npm_builder"
41 | ensured-targets = [
42 |   "imagedephi/web_static/index.html",
43 | ]
44 | # Allow development installs even if the client build is broken
45 | optional-editable-build = true
46 | 
47 | [tool.hatch.build.hooks.jupyter-builder.build-kwargs]
48 | path = "client"
49 | build_cmd = "build"
50 | # Will only rebuild if source_dir was modified after build_dir
51 | source_dir = "client"
52 | build_dir = "imagedephi/web_static"
53 | 
54 | [tool.black]
55 | line-length = 100
56 | target-version = ["py311"]
57 | 
58 | [tool.isort]
59 | profile = "black"
60 | line_length = 100
61 | force_sort_within_sections = true
62 | combine_as_imports = true
63 | 
64 | [tool.mypy]
65 | ignore_missing_imports = true
66 | show_error_codes = true
67 | mypy_path = "$MYPY_CONFIG_FILE_DIR/stubs"
68 | exclude = [
69 |     "build/",
70 | ]
71 | 
72 | [tool.pytest.ini_options]
73 | addopts = "--strict-config --strict-markers --showlocals --verbose"
74 | testpaths = ["tests"]
75 | 
76 | [tool.pyright]
77 | stubPath = "stubs"
78 | 
79 | [tool.yamlfix]
80 | line_length = 200
81 | preserve_quotes = true
82 | sequence_style = "block_style"
83 | 


--------------------------------------------------------------------------------
/client/src/store/imageStore.ts:
--------------------------------------------------------------------------------
 1 | import { reactive } from "vue";
 2 | import { imagePlanResponse, ImagePlanParams } from "./types";
 3 | import { getRedactionPlan, getImages } from "../api/rest";
 4 | import { selectedDirectories } from "./directoryStore";
 5 | import { redactionStateFlags } from "./redactionStore";
 6 | 
 7 | export const useRedactionPlan = reactive({
 8 |   imageRedactionPlan: {} as imagePlanResponse,
 9 |   currentDirectory: selectedDirectories.value.inputDirectory,
10 |   async updateImageData(params: ImagePlanParams) {
11 |     this.currentDirectory = params.directory;
12 |     this.imageRedactionPlan = await getRedactionPlan(params);
13 |     this.getThumbnail(this.imageRedactionPlan.data);
14 |   },
15 |   async getThumbnail(imagedict: Record<string, Record<string, string>>) {
16 |     Object.keys(imagedict).forEach(async (image) => {
17 |       const keys = ["thumbnail", "label", "macro"];
18 |       for (let kidx=0; kidx < keys.length; kidx += 1) {
19 |         const key = keys[kidx];
20 |         const response = await getImages(
21 |           this.currentDirectory + "/" + image,
22 |           key,
23 |         );
24 |         if (response.status >= 400) {
25 |           this.imageRedactionPlan.data[image][key] = key === "thumbnail" ? "/thumbnailPlaceholder.svg" : "/associatedPlaceholder.svg";
26 |           return;
27 |         }
28 |         if (response.body) {
29 |           const reader = response.body.getReader();
30 |           const chunks = [];
31 | 
32 |           while (true) {
33 |             const { done, value } = await reader.read();
34 |             if (done) break;
35 |             chunks.push(value);
36 |           }
37 |           const blob = new Blob(chunks);
38 |           const url = URL.createObjectURL(blob);
39 |           this.imageRedactionPlan.data[image][key]= url;
40 |         }
41 |       };
42 |     });
43 |   },
44 | 
45 |   clearImageData() {
46 |     this.imageRedactionPlan = {} as imagePlanResponse;
47 |   },
48 | });
49 | 
50 | export const updateTableData = (params: ImagePlanParams) => {
51 |   redactionStateFlags.value.redactionSnackbar = false;
52 |   useRedactionPlan.updateImageData(params);
53 | };
54 | 


--------------------------------------------------------------------------------
/tests/test_utils_cli.py:
--------------------------------------------------------------------------------
 1 | from inspect import iscoroutinefunction
 2 | 
 3 | import click
 4 | from click.testing import CliRunner
 5 | import pytest
 6 | from pytest_mock import MockerFixture
 7 | 
 8 | from imagedephi.utils.cli import FallthroughGroup, run_coroutine
 9 | 
10 | 
11 | def test_utils_cli_run_coroutine(mocker: MockerFixture) -> None:
12 |     async_mock = mocker.AsyncMock()
13 | 
14 |     wrapped = run_coroutine(async_mock)
15 | 
16 |     assert not iscoroutinefunction(wrapped)
17 |     wrapped(5, foo="bar")
18 |     async_mock.assert_awaited_once_with(5, foo="bar")
19 | 
20 | 
21 | def test_utils_cli_fallthrough_group_baseline(mocker: MockerFixture, cli_runner: CliRunner) -> None:
22 |     cmd = mocker.Mock()
23 |     sub = mocker.Mock()
24 |     should_fallthrough = mocker.Mock()
25 |     # Decorators can't be used with mocks, so create the group and subcommands here
26 |     cmd_group = FallthroughGroup(
27 |         subcommand_name="sub", should_fallthrough=should_fallthrough, callback=cmd
28 |     )
29 |     cmd_group.add_command(click.Command(name="sub", callback=sub))
30 | 
31 |     # Explicitly invoke a subcommand
32 |     result = cli_runner.invoke(cmd_group, ["sub"])
33 | 
34 |     assert result.exit_code == 0
35 |     cmd.assert_called_once()
36 |     sub.assert_called_once()
37 |     should_fallthrough.assert_not_called()
38 |     assert "Usage" not in result.output
39 | 
40 | 
41 | @pytest.mark.parametrize("should_fallthrough", [False, True])
42 | def test_utils_cli_fallthrough_group_empty(
43 |     should_fallthrough: bool, mocker: MockerFixture, cli_runner: CliRunner
44 | ) -> None:
45 |     cmd = mocker.Mock()
46 |     sub = mocker.Mock()
47 |     cmd_group = FallthroughGroup(
48 |         subcommand_name="sub", should_fallthrough=lambda: should_fallthrough, callback=cmd
49 |     )
50 |     cmd_group.add_command(click.Command(name="sub", callback=sub))
51 | 
52 |     # No subcommand
53 |     result = cli_runner.invoke(cmd_group, [])
54 | 
55 |     assert result.exit_code == 0
56 |     assert cmd.called is should_fallthrough
57 |     assert sub.called is should_fallthrough
58 |     assert ("Usage" in result.output) is not should_fallthrough
59 | 


--------------------------------------------------------------------------------
/stubs/tifftools/constants.pyi:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from collections.abc import Generator
 4 | from typing import Any, Generic, TypeAlias, TypeVar, overload
 5 | 
 6 | # Anything can be set as a TiffConstant attribute
 7 | _TiffConstantAttr: TypeAlias = Any
 8 | 
 9 | class TiffConstant(int):
10 |     value: int
11 |     name: str
12 |     def __init__(self, value: int, constantDict: dict[str, _TiffConstantAttr]) -> None: ...
13 |     def __getitem__(self, key: str) -> _TiffConstantAttr: ...
14 |     def get(self, key: str, default: _TiffConstantAttr = ...) -> _TiffConstantAttr: ...
15 | 
16 | _TiffConstantT = TypeVar("_TiffConstantT", bound=TiffConstant)
17 | 
18 | class TiffConstantSet(Generic[_TiffConstantT]):
19 |     def __init__(
20 |         self, setNameOrClass: _TiffConstantT | str, setDict: dict[str, _TiffConstantAttr]
21 |     ) -> None: ...
22 |     def __contains__(self, other: str | int) -> bool: ...
23 |     def __getattr__(self, key: str) -> _TiffConstantT: ...
24 |     def __getitem__(self, key: str | int | _TiffConstantT) -> _TiffConstantT: ...
25 |     def __iter__(self) -> Generator[_TiffConstantT, None, None]: ...
26 |     def get(
27 |         self, key: str | int, default: _TiffConstantT | None = ...
28 |     ) -> _TiffConstantT | None: ...
29 | 
30 | class TiffTag(TiffConstant):
31 |     def isOffsetData(self) -> bool: ...
32 |     def isIFD(self) -> bool: ...
33 | 
34 | Tag: TiffConstantSet[TiffTag]
35 | 
36 | Compression: TiffConstantSet
37 | 
38 | GPSTag: TiffConstantSet[TiffTag]
39 | 
40 | EXIFTag: TiffConstantSet[TiffTag]
41 | 
42 | NewSubfileType: TiffConstantSet
43 | 
44 | class TiffDatatype(TiffConstant): ...
45 | 
46 | Datatype: TiffConstantSet[TiffDatatype]
47 | 
48 | # When tagSet is None or not provided, this returns a TiffTag
49 | @overload
50 | def get_or_create_tag(
51 |     key: str | int,
52 |     tagSet: None = ...,
53 |     upperLimit: bool = ...,
54 |     **tagOptions: _TiffConstantAttr,
55 | ) -> TiffTag: ...
56 | @overload
57 | def get_or_create_tag(
58 |     key: str | int,
59 |     tagSet: TiffConstantSet[_TiffConstantT],
60 |     upperLimit: bool = ...,
61 |     **tagOptions: _TiffConstantAttr,
62 | ) -> _TiffConstantT: ...
63 | 


--------------------------------------------------------------------------------
/tests/test_gui.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | from fastapi.testclient import TestClient
 4 | import pytest
 5 | 
 6 | from imagedephi.gui.app import app
 7 | 
 8 | 
 9 | @pytest.fixture
10 | def client() -> TestClient:
11 |     return TestClient(app)
12 | 
13 | 
14 | def test_gui_select_directory(client: TestClient) -> None:
15 |     response = client.get(app.url_path_for("select_directory"))
16 | 
17 |     assert response.status_code == 200
18 | 
19 | 
20 | def test_gui_select_directory_success(
21 |     client: TestClient,
22 |     tmp_path: Path,
23 | ) -> None:
24 |     response = client.get(
25 |         app.url_path_for("select_directory"),
26 |         params={"input_directory": str(tmp_path), "output_directory": str(tmp_path)},
27 |     )
28 | 
29 |     assert response.status_code == 200
30 | 
31 | 
32 | def test_gui_select_directory_not_found(
33 |     client: TestClient,
34 |     tmp_path: Path,
35 | ) -> None:
36 |     response = client.get(
37 |         app.url_path_for("select_directory"), params={"directory": str(tmp_path / "fake")}
38 |     )
39 | 
40 |     assert response.status_code == 404
41 |     assert response.json() == {"detail": "Input directory not found"}
42 | 
43 | 
44 | def test_gui_redact(
45 |     client: TestClient,
46 |     tmp_path: Path,
47 | ) -> None:
48 |     response = client.post(
49 |         app.url_path_for("redact"),
50 |         params={"input_directory": str(tmp_path), "output_directory": str(tmp_path)},
51 |     )
52 | 
53 |     assert response.status_code == 200
54 | 
55 | 
56 | def test_gui_redact_input_failure(
57 |     client: TestClient,
58 |     tmp_path: Path,
59 | ) -> None:
60 |     response = client.post(
61 |         app.url_path_for("redact"),
62 |         params={"input_directory": str(tmp_path / "fake"), "output_directory": str(tmp_path)},
63 |     )
64 | 
65 |     assert response.status_code == 404
66 |     assert response.json() == {"detail": "Input directory not found"}
67 | 
68 | 
69 | def test_gui_redact_output_failure(
70 |     client: TestClient,
71 |     tmp_path: Path,
72 | ) -> None:
73 |     response = client.post(
74 |         app.url_path_for("redact"),
75 |         params={"input_directory": str(tmp_path), "output_directory": str(tmp_path / "fake")},
76 |     )
77 | 
78 |     assert response.status_code == 404
79 |     assert response.json() == {"detail": "Output directory not found"}
80 | 


--------------------------------------------------------------------------------
/imagedephi/utils/cli.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | from collections.abc import Callable, Coroutine
 3 | from functools import wraps
 4 | from typing import Any, ParamSpec, TypeVar
 5 | 
 6 | import click
 7 | 
 8 | T = TypeVar("T")
 9 | P = ParamSpec("P")
10 | 
11 | 
12 | def run_coroutine(f: Callable[P, Coroutine[None, None, T]]) -> Callable[P, T]:
13 |     """Decorate an async function to be run in a new event loop."""
14 | 
15 |     @wraps(f)
16 |     def wrapper(*args: P.args, **kwargs: P.kwargs) -> T:
17 |         return asyncio.run(f(*args, **kwargs))
18 | 
19 |     return wrapper
20 | 
21 | 
22 | class FallthroughGroup(click.Group):
23 |     """A Group which may run a subcommand when no subcommand is specified."""
24 | 
25 |     def __init__(self, subcommand_name: str, should_fallthrough: Callable[[], bool], **attrs: Any):
26 |         # Subcommands are not added until after this is instantiated,
27 |         # so only store the future subcommand name
28 |         self.subcommand_name = subcommand_name
29 |         self.should_fallthrough = should_fallthrough
30 | 
31 |         attrs["invoke_without_command"] = True
32 |         attrs["no_args_is_help"] = False
33 |         super().__init__(**attrs)
34 | 
35 |     def invoke(self, ctx: click.Context) -> Any:
36 |         # If no subcommand is specified.
37 |         # Use this test, since "ctx.invoked_subcommand" is not set yet.
38 |         if not ctx.protected_args:
39 |             if self.should_fallthrough():
40 |                 # Subcommands are stored in "ctx.protected_args", so fake a call by prepending it
41 |                 # Calling "ctx.invoke" directly here would not allow the parent command to run
42 |                 ctx.protected_args.insert(0, self.subcommand_name)
43 |             elif not ctx.resilient_parsing:
44 |                 # Execute the normal Click "no_args_is_help" behavior
45 |                 click.echo(ctx.get_help(), color=ctx.color)
46 |                 ctx.exit()
47 |         elif ctx.protected_args and ctx.protected_args[0] not in self.commands:
48 |             # If the subcommand stored in "ctx.protected_args" is not a real
49 |             # subcommand, show the entire help text in addition to the "no such
50 |             # command" mesasge.
51 |             click.echo(f"Error: No such command: '{ctx.protected_args[0]}'.")
52 |             click.echo(ctx.get_help(), color=ctx.color)
53 |             ctx.exit()
54 | 
55 |         # All non-help cases reach here
56 |         return super().invoke(ctx)
57 | 


--------------------------------------------------------------------------------
/imagedephi/redact/build_redaction_plan.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | from imagedephi.rules import FileFormat, Ruleset
 4 | from imagedephi.utils.image import get_file_format_from_path
 5 | from imagedephi.utils.tiff import get_is_svs
 6 | 
 7 | from .dicom import DicomRedactionPlan
 8 | from .redaction_plan import RedactionPlan
 9 | from .svs import SvsRedactionPlan
10 | from .tiff import TiffRedactionPlan, UnsupportedFileTypeError
11 | 
12 | 
13 | class ImageDePHIRedactionError(Exception):
14 |     """Thrown when the program encounters problems with current configuration and image files."""
15 | 
16 | 
17 | def build_redaction_plan(
18 |     image_path: Path,
19 |     base_rules: Ruleset,
20 |     override_rules: Ruleset | None = None,
21 |     dcm_uid_map: dict[str, str] | None = None,
22 | ) -> RedactionPlan:
23 |     file_format = get_file_format_from_path(image_path)
24 |     strict = override_rules.strict if override_rules else base_rules.strict
25 |     if file_format == FileFormat.TIFF:
26 |         if get_is_svs(image_path):
27 |             merged_svs_rules = base_rules.svs.copy()
28 |             if override_rules:
29 |                 merged_svs_rules.metadata.update(override_rules.svs.metadata)
30 |                 merged_svs_rules.associated_images.update(override_rules.svs.associated_images)
31 |                 merged_svs_rules.image_description.update(override_rules.svs.image_description)
32 |             return SvsRedactionPlan(image_path, merged_svs_rules, strict)
33 |         else:
34 |             merged_tiff_rules = base_rules.tiff.copy()
35 |             if override_rules:
36 |                 merged_tiff_rules.metadata.update(override_rules.tiff.metadata)
37 |                 merged_tiff_rules.associated_images.update(override_rules.tiff.associated_images)
38 |             return TiffRedactionPlan(image_path, merged_tiff_rules, strict)
39 |     elif file_format == FileFormat.DICOM:
40 |         if strict:
41 |             raise ImageDePHIRedactionError(
42 |                 "strict redaction is not currently supported for DICOM images"
43 |             )
44 |         dicom_rules = base_rules.dicom.copy()
45 |         if override_rules:
46 |             dicom_rules.metadata.update(override_rules.dicom.metadata)
47 |             dicom_rules.custom_metadata_action = override_rules.dicom.custom_metadata_action
48 |             dicom_rules.associated_images.update(override_rules.dicom.associated_images)
49 |         return DicomRedactionPlan(image_path, dicom_rules, dcm_uid_map)
50 |     else:
51 |         raise UnsupportedFileTypeError(f"File format for {image_path} not supported.")
52 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
  1 | [tox]
  2 | # Don't use "min_version", to ensure Tox 3 respects this
  3 | minversion = 4
  4 | env_list =
  5 |     lint,
  6 |     type,
  7 |     test,
  8 | 
  9 | [testenv]
 10 | base_python = python3.11
 11 | # Building and installing wheels is significantly faster
 12 | package = wheel
 13 | 
 14 | [testenv:lint]
 15 | package = skip
 16 | deps =
 17 |     flake8
 18 |     flake8-black
 19 |     flake8-bugbear
 20 |     flake8-docstrings
 21 |     flake8-isort
 22 |     pep8-naming
 23 |     yamlfix
 24 | commands =
 25 |     flake8 {posargs:.}
 26 |     yamlfix -c pyproject.toml --exclude ./client/node_modules/**/* client docs .github imagedephi stubs tests --check
 27 | 
 28 | [testenv:format]
 29 | package = skip
 30 | deps =
 31 |     black
 32 |     isort
 33 |     yamlfix
 34 | commands =
 35 |     isort {posargs:.}
 36 |     black {posargs:.}
 37 |     yamlfix -c pyproject.toml --exclude ./client/node_modules/**/* client docs .github imagedephi stubs tests
 38 | 
 39 | [testenv:type]
 40 | # Editable ensures dependencies are installed, but full packaging isn't necessary
 41 | package = editable
 42 | deps =
 43 |     mypy
 44 |     pytest
 45 |     types-PyYAML
 46 |     types-Pillow
 47 | commands =
 48 |     mypy {posargs:.}
 49 | 
 50 | [testenv:test]
 51 | deps =
 52 |     freezegun
 53 |     # httpx is needed for FastApi testing
 54 |     httpx
 55 |     pytest
 56 |     pytest-asyncio
 57 |     pytest-mock
 58 |     pytest-timeout
 59 |     pooch
 60 | commands =
 61 |     pytest tests {posargs}
 62 | 
 63 | [testenv:binary]
 64 | deps =
 65 |     pyinstaller
 66 | commands =
 67 |     pyinstaller \
 68 |         --clean \
 69 |         --noconfirm \
 70 |         --onefile \
 71 |         --name imagedephi \
 72 |         --recursive-copy-metadata imagedephi \
 73 |         --collect-data imagedephi \
 74 |         --collect-submodules pydicom.encoders \
 75 |         --specpath {env_tmp_dir} \
 76 |         --workpath {env_tmp_dir} \
 77 |         {env_site_packages_dir}/imagedephi/__main__.py
 78 | 
 79 | [flake8]
 80 | max-line-length = 100
 81 | show-source = true
 82 | extend-exclude =
 83 |     .mypy_cache
 84 |     # Expect many developers to create a virtual environment here
 85 |     .venv
 86 |     .direnv
 87 |     client
 88 | ignore =
 89 |     # closing bracket does not match indentation of opening bracket’s line
 90 |     E123,
 91 |     # whitespace before ':'
 92 |     E203,
 93 |     # line break before binary operator
 94 |     W503,
 95 |     # Missing docstring in *
 96 |     D10,
 97 |     # Multiple statements on one line (https://github.com/psf/black/issues/3887)
 98 |     E704,
 99 | 
100 | [yamlfix]
101 | line_length = 200
102 | preserve_quotes = True
103 | sequence_style = YamlNodeStyle.BLOCK_STYLE
104 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Build artifacts
  2 | /imagedephi/web_static/
  3 | 
  4 | # Test data retrieved by pooch
  5 | /tests/data/*
  6 | 
  7 | # Developer tools
  8 | .envrc
  9 | .vscode/*
 10 | */web_static/*
 11 | 
 12 | # Sample Redacted Images
 13 | */REDACTED_*
 14 | */Redacted_*
 15 | Redacted_*/
 16 | 
 17 | 
 18 | # Byte-compiled / optimized / DLL files
 19 | __pycache__/
 20 | *.py[cod]
 21 | *$py.class
 22 | 
 23 | # C extensions
 24 | *.so
 25 | 
 26 | # Distribution / packaging
 27 | .Python
 28 | build/
 29 | develop-eggs/
 30 | dist/
 31 | downloads/
 32 | eggs/
 33 | .eggs/
 34 | lib/
 35 | lib64/
 36 | parts/
 37 | sdist/
 38 | var/
 39 | wheels/
 40 | pip-wheel-metadata/
 41 | share/python-wheels/
 42 | *.egg-info/
 43 | .installed.cfg
 44 | *.egg
 45 | MANIFEST
 46 | 
 47 | # PyInstaller
 48 | #  Usually these files are written by a python script from a template
 49 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 50 | *.manifest
 51 | *.spec
 52 | 
 53 | # Installer logs
 54 | pip-log.txt
 55 | pip-delete-this-directory.txt
 56 | 
 57 | # Unit test / coverage reports
 58 | htmlcov/
 59 | .tox/
 60 | .nox/
 61 | .coverage
 62 | .coverage.*
 63 | .cache
 64 | nosetests.xml
 65 | coverage.xml
 66 | *.cover
 67 | *.py,cover
 68 | .hypothesis/
 69 | .pytest_cache/
 70 | 
 71 | # Translations
 72 | *.mo
 73 | *.pot
 74 | 
 75 | # Django stuff:
 76 | *.log
 77 | local_settings.py
 78 | db.sqlite3
 79 | db.sqlite3-journal
 80 | 
 81 | # Flask stuff:
 82 | instance/
 83 | .webassets-cache
 84 | 
 85 | # Scrapy stuff:
 86 | .scrapy
 87 | 
 88 | # Sphinx documentation
 89 | docs/_build/
 90 | 
 91 | # PyBuilder
 92 | target/
 93 | 
 94 | # Jupyter Notebook
 95 | .ipynb_checkpoints
 96 | 
 97 | # IPython
 98 | profile_default/
 99 | ipython_config.py
100 | 
101 | # pyenv
102 | .python-version
103 | 
104 | # pipenv
105 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
106 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
107 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
108 | #   install all needed dependencies.
109 | #Pipfile.lock
110 | 
111 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
112 | __pypackages__/
113 | 
114 | # Celery stuff
115 | celerybeat-schedule
116 | celerybeat.pid
117 | 
118 | # SageMath parsed files
119 | *.sage.py
120 | 
121 | # Environments
122 | .env
123 | .venv
124 | env/
125 | venv/
126 | ENV/
127 | env.bak/
128 | venv.bak/
129 | 
130 | # Spyder project settings
131 | .spyderproject
132 | .spyproject
133 | 
134 | # Rope project settings
135 | .ropeproject
136 | 
137 | # mkdocs documentation
138 | /site
139 | 
140 | # mypy
141 | .mypy_cache/
142 | .dmypy.json
143 | dmypy.json
144 | 
145 | # Pyre type checker
146 | .pyre/
147 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | from click.testing import CliRunner
 4 | import pooch
 5 | import pytest
 6 | 
 7 | registry = {
 8 |     "secret_metadata.tiff": "sha512:28be4880984bbb5f8e2a2e2314e594686427aa3da54e6a804ed5682e2da2585d21d3e3f751e995ad5b133a8b183cc7eb3cdee55cfcf5e4589d69c543db10fa9e",  # noqa: E501
 9 |     "test_dcm_image.dcm": "sha512:4cb4c76799ce5d6f3f66c6d3bc055c5527b9fbef3d684450f48c870a5fbd882c87f3d59349ace778e80e78085276fecd278844af98a2324b754345aab6d6eac5",  # noqa: E501
10 |     "test_image.tif": "sha512:269d974a373f08e3274a1074fa595f8c70c496c64590412b6233eb2e3ce691c92963e5d0a3518848f15d9353783624faab18a002a9e049691cb5b627e40c9423",  # noqa: E501
11 |     "test_svs_image_blank.svs": "sha512:76350f39bafd86ced9d94b9a095200d3894034c36a3bc9a45a57a8f5ea820b8fd877ae483e9d9d4aef018cd8ca96c54ea815467ec94f6d692fca0ecc69afab23",  # noqa: E501
12 |     "test_svs_no_extension": "sha512:76350f39bafd86ced9d94b9a095200d3894034c36a3bc9a45a57a8f5ea820b8fd877ae483e9d9d4aef018cd8ca96c54ea815467ec94f6d692fca0ecc69afab23",  # noqa: E501
13 | }
14 | 
15 | 
16 | def retrieve_file(file_name: str, output_path: Path) -> Path:
17 |     algo, hash_value = registry[file_name].split(":")
18 |     h = registry[file_name]
19 |     full_path = pooch.retrieve(
20 |         url=f"https://data.kitware.com/api/v1/file/hashsum/{algo}/{hash_value}/download",
21 |         known_hash=h,
22 |         fname=file_name,
23 |         path=output_path,
24 |     )
25 |     return Path(full_path)
26 | 
27 | 
28 | @pytest.fixture
29 | def secret_metadata_image() -> list[Path]:
30 |     path_list = [retrieve_file("secret_metadata.tiff", Path(__file__).with_name("data") / "input")]
31 |     return path_list
32 | 
33 | 
34 | @pytest.fixture
35 | def test_image_tiff() -> Path:
36 |     return retrieve_file("test_image.tif", Path(__file__).with_name("data") / "input" / "tiff")
37 | 
38 | 
39 | @pytest.fixture
40 | def test_image_svs() -> Path:
41 |     return retrieve_file(
42 |         "test_svs_image_blank.svs", Path(__file__).with_name("data") / "input" / "svs"
43 |     )
44 | 
45 | 
46 | @pytest.fixture
47 | def test_image_dcm() -> list[Path]:
48 |     path_list = [
49 |         retrieve_file("test_dcm_image.dcm", Path(__file__).with_name("data") / "input" / "dcm")
50 |     ]
51 |     return path_list
52 | 
53 | 
54 | @pytest.fixture
55 | def test_image_svs_no_extension() -> list[Path]:
56 |     path_list = [retrieve_file("test_svs_no_extension", Path(__file__).with_name("data") / "input")]
57 |     return path_list
58 | 
59 | 
60 | @pytest.fixture
61 | def data_dir() -> Path:
62 |     return Path(__file__).with_name("data")
63 | 
64 | 
65 | @pytest.fixture
66 | def rules_dir() -> Path:
67 |     return Path(__file__).with_name("override_rule_sets")
68 | 
69 | 
70 | @pytest.fixture
71 | def cli_runner() -> CliRunner:
72 |     return CliRunner()
73 | 


--------------------------------------------------------------------------------
/imagedephi/gui/app.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | from collections.abc import AsyncGenerator
 3 | from contextlib import asynccontextmanager
 4 | import importlib.resources
 5 | import os
 6 | 
 7 | from fastapi import FastAPI, Request
 8 | from fastapi.middleware.cors import CORSMiddleware
 9 | from fastapi.responses import PlainTextResponse
10 | from fastapi.staticfiles import StaticFiles
11 | from starlette.background import BackgroundTask
12 | 
13 | from imagedephi.gui.api import api
14 | 
15 | shutdown_event = asyncio.Event()
16 | debug_mode = eval(str(os.environ.get("DEBUG")))
17 | 
18 | 
19 | @asynccontextmanager
20 | async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]:
21 |     # Reset server state on startup, to support unit testing
22 |     shutdown_event.clear()
23 |     app.state.last_exception = None
24 | 
25 |     yield
26 | 
27 |     if app.state.last_exception is not None:
28 |         # This will cause a "lifespan.shutdown.failed" event to be sent. Hypercorn will re-raise
29 |         # this from "serve", allowing exceptions to propagate to the top level.
30 |         raise app.state.last_exception  # pyright: ignore [reportGeneralTypeIssues]
31 | 
32 | 
33 | app = FastAPI(
34 |     lifespan=lifespan,
35 |     # End users don't need access to the OpenAPI spec
36 |     openapi_url="/openapi.json" if debug_mode else None,
37 |     # FastAPI's debug flag will render exception tracebacks
38 |     debug=debug_mode,
39 | )
40 | 
41 | app.include_router(api.router)  # type: ignore
42 | 
43 | if debug_mode:
44 |     app.add_middleware(
45 |         CORSMiddleware,
46 |         allow_origins=["*"],
47 |         allow_methods=["*"],
48 |         allow_headers=["*"],
49 |     )
50 | 
51 | else:
52 |     app.mount(
53 |         "/",
54 |         StaticFiles(
55 |             directory=str(importlib.resources.files("imagedephi") / "web_static"), html=True
56 |         ),
57 |         name="home",
58 |     )
59 |     app.mount(
60 |         "/assets",
61 |         StaticFiles(
62 |             directory=str(importlib.resources.files("imagedephi") / "web_static" / "assets")
63 |         ),
64 |         name="assets",
65 |     )
66 | 
67 | 
68 | # This exception handler not be used when FastAPI debug flag is enabled,
69 | # due to how ServerErrorMiddleware works.
70 | @app.exception_handler(500)
71 | def on_internal_error(request: Request, exc: Exception) -> PlainTextResponse:
72 |     """Return an error response and schedule the server for immediate shutdown."""
73 |     # Unlike the default error response, this also shuts down the server.
74 |     # A desktop application doesn't need to continue running through internal errors, and
75 |     # continuing to run makes it harder for users and the test environment to detect fatal errors.
76 |     app.state.last_exception = exc
77 |     return PlainTextResponse(
78 |         "Internal Server Error", status_code=500, background=BackgroundTask(shutdown_event.set)
79 |     )
80 | 


--------------------------------------------------------------------------------
/client/src/store/directoryStore.ts:
--------------------------------------------------------------------------------
 1 | import { ref, Ref, nextTick } from "vue";
 2 | import { SelectedDirectories, DirectoryData, Path } from "./types";
 3 | import { getDirectoryInfo } from "../api/rest";
 4 | 
 5 | const storedDirectories = {
 6 |   inputDirectory: localStorage.getItem("inputDirectory"),
 7 |   outputDirectory: localStorage.getItem("outputDirectory"),
 8 |   rulesetDirectory: localStorage.getItem("rulesetDirectory"),
 9 | };
10 | 
11 | export const selectedDirectories: Ref<SelectedDirectories> = ref({
12 |   inputDirectory: storedDirectories.inputDirectory
13 |     ? storedDirectories.inputDirectory
14 |     : "",
15 |   outputDirectory: storedDirectories.outputDirectory
16 |     ? storedDirectories.outputDirectory
17 |     : "",
18 |   rulesetDirectory: storedDirectories.rulesetDirectory
19 |     ? storedDirectories.rulesetDirectory
20 |     : "",
21 | });
22 | 
23 | export const directoryData: Ref<DirectoryData> = ref({
24 |   directory: "",
25 |   ancestors: [],
26 |   children: [],
27 |   childrenImages: [],
28 |   childrenYaml: [],
29 | });
30 | 
31 | export const loadingData = ref(false);
32 | 
33 | export const updateDirectories = async (currentDirectory?: string) => {
34 |   directoryData.value.children = [];
35 |   directoryData.value.childrenImages = [];
36 |   directoryData.value.childrenYaml = [];
37 |   const timeout = setTimeout(() => {
38 |     loadingData.value = true;
39 |   }, 100);
40 |   const data = await getDirectoryInfo(currentDirectory);
41 |   clearTimeout(timeout);
42 |   loadingData.value = false;
43 |   directoryData.value = await {
44 |     ...data,
45 |     children: data.child_directories,
46 |     childrenImages: data.child_images,
47 |     childrenYaml: data.child_yaml_files,
48 |   };
49 |   loadingData.value = false;
50 |   calculateVisibleItems();
51 | };
52 | 
53 | export const visibleImages: Ref<Path[]> = ref([]);
54 | export const remainingImages = ref(0);
55 | 
56 | export const calculateVisibleItems = () => {
57 |   const menuTop = document.querySelector(".menu-top");
58 |   const listContainer = document.querySelector(".list-container");
59 |   // Determine and set the height of the list container
60 |   listContainer?.setAttribute(
61 |     "style",
62 |     `height: calc(100% - (${menuTop?.clientHeight}px + 3.5rem))`,
63 |   );
64 | 
65 |   nextTick(() => {
66 |     const listItems = listContainer?.querySelectorAll("li");
67 |     const containerHeight = listContainer?.clientHeight;
68 |     const listHeight = ref(0);
69 |     const visibleItems = ref(0);
70 |     // Determine the height of each list item
71 |     const listItemHeight =
72 |       listItems && listItems[0] ? listItems[0].clientHeight : 0;
73 | 
74 |     directoryData.value.childrenImages.forEach(() => {
75 |       listHeight.value += listItemHeight;
76 |       if (containerHeight && listHeight.value < containerHeight) {
77 |         visibleItems.value += 1;
78 |       }
79 |     });
80 | 
81 |     visibleImages.value = directoryData.value.childrenImages.slice(
82 |       0,
83 |       visibleItems.value,
84 |     );
85 |     remainingImages.value =
86 |       directoryData.value.childrenImages.length - visibleItems.value;
87 |   });
88 | };
89 | 


--------------------------------------------------------------------------------
/client/src/components/MenuSteps.vue:
--------------------------------------------------------------------------------
  1 | <script setup lang="ts">
  2 | import {
  3 |   selectedDirectories,
  4 |   updateDirectories,
  5 | } from "../store/directoryStore";
  6 | import { useRedactionPlan } from "../store/imageStore";
  7 | 
  8 | const props = defineProps({
  9 |   stepNumber: {
 10 |     type: Number,
 11 |     default: 0,
 12 |   },
 13 |   stepTitle: {
 14 |     type: String,
 15 |     default: "",
 16 |   },
 17 |   helpText: {
 18 |     type: String,
 19 |     default: "",
 20 |   },
 21 |   inputModal: {
 22 |     type: Object,
 23 |     default: null,
 24 |   },
 25 |   outputModal: {
 26 |     type: Object,
 27 |     default: null,
 28 |   },
 29 |   rulesetModal: {
 30 |     type: Object,
 31 |     default: null,
 32 |   },
 33 | });
 34 | 
 35 | const openModal = () => {
 36 |   if (props.stepTitle.includes("Input")) {
 37 |     props.inputModal.modal.showModal();
 38 |     updateDirectories(selectedDirectories.value.inputDirectory);
 39 |   } else if (props.stepTitle.includes("Output")) {
 40 |     props.outputModal.modal.showModal();
 41 |     updateDirectories(selectedDirectories.value.outputDirectory);
 42 |   } else {
 43 |     props.rulesetModal.modal.showModal();
 44 |     updateDirectories(selectedDirectories.value.rulesetDirectory);
 45 |   }
 46 | };
 47 | 
 48 | const clearRuleset = () => {
 49 |   selectedDirectories.value.rulesetDirectory = "";
 50 |   useRedactionPlan.updateImageData({
 51 |     directory: selectedDirectories.value.inputDirectory,
 52 |     rules: selectedDirectories.value.rulesetDirectory,
 53 |     limit: 50,
 54 |     offset: 0,
 55 |     update: false,
 56 |   });
 57 | };
 58 | </script>
 59 | 
 60 | <template>
 61 |   <div
 62 |     class="w-96 pt-2.5 bg-white flex-col justify-start items-start inline-flex"
 63 |   >
 64 |     <div
 65 |       class="self-stretch px-4 py-3 bg-white justify-start items-center gap-2.5 inline-flex"
 66 |     >
 67 |       <div
 68 |         class="w-6 h-6 bg-rose-100 rounded-[100px] justify-center items-center flex"
 69 |       >
 70 |         <div
 71 |           class="w-6 h-6 text-red-400 text-sm font-semibold flex flex-wrap justify-center content-center"
 72 |         >
 73 |           {{ stepNumber }}
 74 |         </div>
 75 |       </div>
 76 |       <div class="grow shrink basis-0">
 77 |         <div class="grow shrink basis-0 flex flex-col">
 78 |           <span
 79 |             class="text-purple-900 text-sm font-semibold uppercase tracking-widest"
 80 |           >
 81 |             {{ stepTitle }}
 82 |           </span>
 83 |           <span class="text-gray-500 text-xs font-normal tracking-wide">
 84 |             {{ helpText }}
 85 |           </span>
 86 |         </div>
 87 |       </div>
 88 |       <button class="btn btn-ghost btn-square btn-sm" @click="openModal">
 89 |         <i class="ri-folder-open-fill text-secondary text-lg" />
 90 |       </button>
 91 |     </div>
 92 |     <div
 93 |       class="self-stretch h-[74px] px-5 pb-10 bg-white border-b border-neutral-200 flex-col justify-start flex"
 94 |     >
 95 |       <div
 96 |         v-if="
 97 |           stepTitle?.includes('Input') && selectedDirectories.inputDirectory
 98 |         "
 99 |         class="text-left text-gray-500 text-[14px] font-bold break-all pl-8"
100 |       >
101 |         {{ selectedDirectories.inputDirectory }}
102 |       </div>
103 |       <div
104 |         v-else-if="
105 |           stepTitle?.includes('Output') && selectedDirectories.outputDirectory
106 |         "
107 |         class="text-left text-gray-500 text-[14px] font-bold break-all pl-8"
108 |       >
109 |         {{ selectedDirectories.outputDirectory }}
110 |       </div>
111 |       <div
112 |         v-else-if="
113 |           stepTitle?.includes('Ruleset') && selectedDirectories.rulesetDirectory
114 |         "
115 |         class="text-left text-gray-500 text-[14px] font-bold break-all pl-8"
116 |       >
117 |         {{ selectedDirectories.rulesetDirectory }}
118 |         <button
119 |           class="btn btn-ghost btn-square btn-sm tooltip tooltip-right"
120 |           data-tip="Clear selected rules"
121 |           @click="clearRuleset"
122 |         >
123 |           <i class="ri-close-circle-fill text-secondary text-lg" />
124 |         </button>
125 |       </div>
126 |       <div
127 |         v-else
128 |         class="text-left text-gray-500 text-[14px] font-bold break-all pl-8"
129 |       >
130 |         {{ rulesetModal ? "No file selected" : "No directory selected" }}
131 |       </div>
132 |     </div>
133 |   </div>
134 | </template>
135 | 


--------------------------------------------------------------------------------
/imagedephi/minimum_rules.yaml:
--------------------------------------------------------------------------------
  1 | ---
  2 | name: Minimum Rules
  3 | description: A set of rules that defines a minimum amount of metadata for images to be read. Metadata not specified by a rule is deleted (controlled by the metadata_fallback_action).
  4 | output_file_name: study_slide
  5 | strict: true
  6 | tiff:
  7 |   metadata_fallback_action: delete
  8 |   associated_images:
  9 |     default:
 10 |       action: delete
 11 |   metadata:
 12 |     ImageWidth:
 13 |       action: keep
 14 |     ImageLength:
 15 |       action: keep
 16 |     BitsPerSample:
 17 |       action: keep
 18 |     Compression:
 19 |       action: keep
 20 |     Photometric:
 21 |       action: keep
 22 |     StripOffsets:
 23 |       action: keep
 24 |     SamplesPerPixel:
 25 |       action: keep
 26 |     RowsPerStrip:
 27 |       action: keep
 28 |     StripByteCounts:
 29 |       action: keep
 30 |     XResolution:
 31 |       action: keep
 32 |     YResolution:
 33 |       action: keep
 34 |     ResolutionUnit:
 35 |       action: keep
 36 |     NewSubfileType:
 37 |       action: keep
 38 |     FillOrder:
 39 |       action: keep
 40 |     Orientation:
 41 |       action: keep
 42 |     Predictor:
 43 |       action: keep
 44 |     ColorMap:
 45 |       action: keep
 46 |     TileWidth:
 47 |       action: keep
 48 |     TileLength:
 49 |       action: keep
 50 |     TileOffsets:
 51 |       action: keep
 52 |     TileByteCounts:
 53 |       action: keep
 54 |     SubIFD:
 55 |       action: keep
 56 |     ExtraSamples:
 57 |       action: keep
 58 |     SampleFormat:
 59 |       action: keep
 60 |     Indexed:
 61 |       action: keep
 62 |     JPEGTables:
 63 |       action: keep
 64 |     StripRowCounts:
 65 |       action: keep
 66 |     ICCProfile:
 67 |       action: keep
 68 |     JPEGProc:
 69 |       action: keep
 70 |     JPEGIFOffset:
 71 |       action: keep
 72 |     JPEGIFByteCount:
 73 |       action: keep
 74 |     JPEGRestartInterval:
 75 |       action: keep
 76 |     JPEGLosslessPredictors:
 77 |       action: keep
 78 |     JPEGPointTransform:
 79 |       action: keep
 80 |     JPEGQTables:
 81 |       action: keep
 82 |     JPEGDCTables:
 83 |       action: keep
 84 |     JPEGACTables:
 85 |       action: keep
 86 |     YCbCrCoefficients:
 87 |       action: keep
 88 |     YCbCrSubsampling:
 89 |       action: keep
 90 |     YCbCrPositioning:
 91 |       action: keep
 92 |     PlanarConfig:
 93 |       action: keep
 94 | svs:
 95 |   metadata_fallback_action: delete
 96 |   associated_images:
 97 |     default:
 98 |       action: delete
 99 |   metadata:
100 |     ImageWidth:
101 |       action: keep
102 |     ImageLength:
103 |       action: keep
104 |     BitsPerSample:
105 |       action: keep
106 |     Compression:
107 |       action: keep
108 |     Photometric:
109 |       action: keep
110 |     StripOffsets:
111 |       action: keep
112 |     SamplesPerPixel:
113 |       action: keep
114 |     RowsPerStrip:
115 |       action: keep
116 |     StripByteCounts:
117 |       action: keep
118 |     XResolution:
119 |       action: keep
120 |     YResolution:
121 |       action: keep
122 |     ResolutionUnit:
123 |       action: keep
124 |     NewSubfileType:
125 |       action: keep
126 |     FillOrder:
127 |       action: keep
128 |     Orientation:
129 |       action: keep
130 |     Predictor:
131 |       action: keep
132 |     ColorMap:
133 |       action: keep
134 |     TileWidth:
135 |       action: keep
136 |     TileLength:
137 |       action: keep
138 |     TileOffsets:
139 |       action: keep
140 |     TileByteCounts:
141 |       action: keep
142 |     SubIFD:
143 |       action: keep
144 |     ExtraSamples:
145 |       action: keep
146 |     SampleFormat:
147 |       action: keep
148 |     Indexed:
149 |       action: keep
150 |     JPEGTables:
151 |       action: keep
152 |     StripRowCounts:
153 |       action: keep
154 |     ICCProfile:
155 |       action: keep
156 |     JPEGProc:
157 |       action: keep
158 |     JPEGIFOffset:
159 |       action: keep
160 |     JPEGIFByteCount:
161 |       action: keep
162 |     JPEGRestartInterval:
163 |       action: keep
164 |     JPEGLosslessPredictors:
165 |       action: keep
166 |     JPEGPointTransform:
167 |       action: keep
168 |     JPEGQTables:
169 |       action: keep
170 |     JPEGDCTables:
171 |       action: keep
172 |     JPEGACTables:
173 |       action: keep
174 |     YCbCrCoefficients:
175 |       action: keep
176 |     YCbCrSubsampling:
177 |       action: keep
178 |     YCbCrPositioning:
179 |       action: keep
180 |     PlanarConfig:
181 |       action: keep
182 | 


--------------------------------------------------------------------------------
/client/src/components/ImageDataTable.vue:
--------------------------------------------------------------------------------
  1 | <script setup lang="ts">
  2 | defineProps({
  3 |   imageRedactionPlan: {
  4 |     type: Object,
  5 |     required: true,
  6 |   },
  7 |   usedColumns: {
  8 |     type: Array<string>,
  9 |     required: true,
 10 |   },
 11 | });
 12 | </script>
 13 | <template>
 14 |   <div v-if="!usedColumns" class="m-auto flex justify-center">
 15 |     Loading.. <span class="loading loading-bars loading-md" />
 16 |   </div>
 17 |   <table
 18 |     v-if="usedColumns"
 19 |     class="table table-xs table-auto text-center bg-base-100"
 20 |   >
 21 |     <thead>
 22 |       <tr class="text-base bg-gray-600">
 23 |         <th class="bg-neutral text-white py-5 px-6">Image File Name</th>
 24 |         <th class="bg-gray-600 text-white py-5 px-10">Image</th>
 25 |         <th class="bg-gray-600 text-white py-5 px-10">Label</th>
 26 |         <th class="bg-gray-600 text-white py-5 px-10">Macro</th>
 27 |         <th class="bg-gray-600 text-white">Redaction Status</th>
 28 |         <th
 29 |           v-if="Object.keys(imageRedactionPlan.data).includes('missing_tags')"
 30 |           class="text-white p-4"
 31 |         >
 32 |           Missing Tags
 33 |         </th>
 34 |         <th
 35 |           v-for="tag in usedColumns"
 36 |           :key="tag"
 37 |           class="bg-gray-600 text-white py-5 px-6"
 38 |         >
 39 |           {{ tag }}
 40 |         </th>
 41 |       </tr>
 42 |     </thead>
 43 |     <tbody class="text-base bg-base-100">
 44 |       <tr v-for="(image, index) in imageRedactionPlan.data" :key="index">
 45 |         <th>{{ index }}</th>
 46 |         <td class="imagebox">
 47 |           <img :src="image.thumbnail" />
 48 |         </td>
 49 |         <td class="imagebox">
 50 |           <img :src="image.label" />
 51 |         </td>
 52 |         <td class="imagebox">
 53 |           <img :src="image.macro" />
 54 |         </td>
 55 |         <td>
 56 |           <div
 57 |             v-if="image.missing_tags"
 58 |             class="tooltip tooltip-right"
 59 |             :data-tip="`${image.missing_tags.length} tag(s) missing redaction rules.`"
 60 |           >
 61 |             <i class="ri-error-warning-fill text-red-600 text-xl" />
 62 |             <div v-for="(obj, pos) in image.missing_tags" :key="pos">
 63 |               <span v-for="(value, key) in obj" :key="key">
 64 |                 {{ key }}: {{ value }}
 65 |               </span>
 66 |             </div>
 67 |           </div>
 68 | 
 69 |           <div
 70 |             v-else
 71 |             class="tooltip tooltip-right"
 72 |             :data-tip="`No missing redaction rules.`"
 73 |           >
 74 |             <i class="ri-checkbox-circle-fill text-green-600 text-xl" />
 75 |           </div>
 76 |         </td>
 77 |         <template v-for="tag in usedColumns" :key="tag">
 78 |           <td class="text-ellipsis overflow-hidden max-w-[200px]">
 79 |             <span
 80 |               v-if="image[tag]"
 81 |               :class="
 82 |                 image[tag].action === 'delete'
 83 |                   ? 'line-through text-accent font-bold decoration-2'
 84 |                   : ''
 85 |               "
 86 |             >
 87 |               <span v-if="image[tag].binary" class="text-nowrap">
 88 |                 {{ image[tag].binary.bytes }} bytes
 89 |                 <span>
 90 |                   {{ image[tag].binary.value.slice(0, 20) }}
 91 |                 </span>
 92 |               </span>
 93 |               <span v-else class="text-nowrap">
 94 |                 {{
 95 |                   typeof image[tag].value === "object"
 96 |                     ? image[tag].value.join(", ")
 97 |                     : image[tag].value
 98 |                 }}
 99 |               </span>
100 |             </span>
101 |           </td>
102 |         </template>
103 |       </tr>
104 |     </tbody>
105 |   </table>
106 | </template>
107 | <style scoped>
108 | thead th:first-child {
109 |   position: sticky;
110 |   left: 0;
111 |   z-index: 2;
112 |   background-color: #201c35;
113 | }
114 | thead th {
115 |   position: sticky;
116 |   top: 0;
117 |   z-index: 1;
118 | }
119 | 
120 | tbody th {
121 |   position: relative;
122 | }
123 | tbody th:first-child {
124 |   position: sticky;
125 |   left: 0;
126 |   z-index: 1;
127 |   background-color: #ffffff;
128 | }
129 | td.imagebox {
130 |   min-width: 160px;
131 |   text-align: center;
132 |   vertical-align: middle;
133 | }
134 | 
135 | td.imagebox img {
136 |   max-width: 100%;
137 |   height: auto;
138 |   display: inline-block;
139 | }
140 | </style>
141 | 


--------------------------------------------------------------------------------
/imagedephi/rules.py:
--------------------------------------------------------------------------------
  1 | from enum import Enum
  2 | from typing import Annotated, Any, Literal, Type, TypeAlias
  3 | 
  4 | from pydantic import BaseModel, Field, validator
  5 | 
  6 | 
  7 | class FileFormat(Enum):
  8 |     TIFF = "tiff"
  9 |     SVS = "svs"
 10 |     DICOM = "dicom"
 11 | 
 12 | 
 13 | expected_type_map: dict[str, list[Type[Any]]] = {
 14 |     "integer": [int],
 15 |     "number": [int, float],
 16 |     "text": [str],
 17 |     "rational": [int],
 18 | }
 19 | 
 20 | RedactionOperation: TypeAlias = Literal[
 21 |     "keep",
 22 |     "delete",
 23 |     "replace",
 24 |     "empty",
 25 |     "replace_uid",
 26 |     "replace_dummy",
 27 |     "modify_date",
 28 | ]
 29 | 
 30 | 
 31 | class _Rule(BaseModel):
 32 |     # key_name is not set by users, but is availible internally
 33 |     key_name: str = Field(exclude=True)
 34 |     action: Literal[
 35 |         "keep",
 36 |         "delete",
 37 |         "replace",
 38 |         "replace_uid",
 39 |         "replace_dummy",
 40 |         "empty",
 41 |         "check_type",
 42 |         "modify_date",
 43 |     ]
 44 | 
 45 | 
 46 | class KeepRule(_Rule):
 47 |     action: Literal["keep"]
 48 | 
 49 | 
 50 | class DeleteRule(_Rule):
 51 |     action: Literal["delete"]
 52 | 
 53 | 
 54 | class EmptyRule(_Rule):
 55 |     """Replace with a zero-length value."""
 56 | 
 57 |     action: Literal["empty"]
 58 | 
 59 | 
 60 | class ReplaceRule(_Rule):
 61 |     action: Literal["replace"]
 62 | 
 63 | 
 64 | class MetadataReplaceRule(ReplaceRule):
 65 |     new_value: str
 66 | 
 67 | 
 68 | class ModifyDateRule(_Rule):
 69 |     action: Literal["modify_date"]
 70 | 
 71 | 
 72 | class ImageReplaceRule(ReplaceRule):
 73 |     replace_with: Literal["blank_image"]
 74 | 
 75 | 
 76 | class CheckTypeMetadataRule(_Rule):
 77 |     action: Literal["check_type"]
 78 |     expected_type: Literal["number", "integer", "text", "rational"]
 79 |     valid_data_types: list[Type[Any]] = []
 80 |     expected_count: int = 1
 81 | 
 82 |     @validator("valid_data_types", pre=True, always=True)
 83 |     @classmethod
 84 |     def set_valid_data_types(
 85 |         cls, valid_data_types: list[Type[Any]], values: dict[str, Any]
 86 |     ) -> list[Type[Any]]:
 87 |         valid_data_types = expected_type_map[values["expected_type"]]
 88 |         return valid_data_types
 89 | 
 90 | 
 91 | class UidReplaceRule(_Rule):
 92 |     action: Literal["replace_uid"]
 93 | 
 94 | 
 95 | class DummyReplaceRule(_Rule):
 96 |     """Replace value with a system-defined value based on original type."""
 97 | 
 98 |     action: Literal["replace_dummy"]
 99 | 
100 | 
101 | ConcreteMetadataRule = Annotated[
102 |     MetadataReplaceRule
103 |     | KeepRule
104 |     | DeleteRule
105 |     | CheckTypeMetadataRule
106 |     | UidReplaceRule
107 |     | EmptyRule
108 |     | DummyReplaceRule
109 |     | ModifyDateRule,
110 |     Field(discriminator="action"),
111 | ]
112 | 
113 | ConcreteImageRule = Annotated[
114 |     ImageReplaceRule | KeepRule | DeleteRule, Field(discriminator="action")
115 | ]
116 | 
117 | 
118 | class BaseRules(BaseModel):
119 |     matches: list[str]
120 | 
121 | 
122 | class TiffRules(BaseModel):
123 |     associated_images: dict[str, ConcreteImageRule] = {}
124 |     metadata: dict[str, ConcreteMetadataRule] = {}
125 |     metadata_fallback_action: Literal["delete"] | Literal["keep"] | None = None
126 |     associated_image_fallback: ConcreteImageRule | None = None
127 | 
128 |     # TODO: is pre necessary?
129 |     @validator("metadata", "associated_images", pre=True)
130 |     @classmethod
131 |     def set_tag_name(cls, metadata: Any):
132 |         if isinstance(metadata, dict):
133 |             for key, value in metadata.items():
134 |                 if isinstance(value, dict):
135 |                     value["key_name"] = key
136 |         return metadata
137 | 
138 | 
139 | class SvsRules(TiffRules):
140 |     image_description: dict[str, ConcreteMetadataRule] = {}
141 | 
142 |     # TODO: is pre necessary?
143 |     @validator("metadata", "image_description", "associated_images", pre=True)
144 |     @classmethod
145 |     def set_tag_name(cls, metadata: Any):
146 |         if isinstance(metadata, dict):
147 |             for key, value in metadata.items():
148 |                 if isinstance(value, dict):
149 |                     value["key_name"] = key
150 |         return metadata
151 | 
152 | 
153 | class DicomRules(BaseModel):
154 |     metadata: dict[str, ConcreteMetadataRule] = {}
155 |     associated_images: dict[str, ConcreteImageRule] = {}
156 |     custom_metadata_action: Literal["keep"] | Literal["delete"] | Literal["use_rule"] = "delete"
157 | 
158 |     @validator("metadata", "associated_images", pre=True)
159 |     @classmethod
160 |     def set_tag_name(cls, metadata: Any):
161 |         if isinstance(metadata, dict):
162 |             for key, value in metadata.items():
163 |                 if isinstance(value, dict):
164 |                     value["key_name"] = key
165 |         return metadata
166 | 
167 | 
168 | class Ruleset(BaseModel):
169 |     name: str = "My Rules"
170 |     description: str = "My rules"
171 |     output_file_name: str = "study_slide"
172 |     strict: bool = False
173 |     tiff: TiffRules = TiffRules()
174 |     svs: SvsRules = SvsRules()
175 |     dicom: DicomRules = DicomRules()
176 | 


--------------------------------------------------------------------------------
/imagedephi/utils/tiff.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | from collections.abc import Generator
  4 | from pathlib import Path
  5 | from typing import TYPE_CHECKING
  6 | 
  7 | import tifftools
  8 | 
  9 | from imagedephi.gui.utils.image import IMAGE_DEPHI_MAX_IMAGE_PIXELS
 10 | 
 11 | if TYPE_CHECKING:
 12 |     from tifftools.tifftools import IFD
 13 | 
 14 | 
 15 | IMAGE_DESCRIPTION_ID = tifftools.constants.Tag["ImageDescription"].value
 16 | NEWSUBFILETYPE_ID = tifftools.constants.Tag["NewSubfileType"].value
 17 | 
 18 | 
 19 | def iter_ifds(
 20 |     ifds: list[IFD],
 21 |     tag_set=tifftools.constants.Tag,
 22 | ) -> Generator[IFD, None, None]:
 23 |     for ifd in ifds:
 24 |         for tag_id, entry in ifd["tags"].items():
 25 |             tag: tifftools.TiffTag = tifftools.constants.get_or_create_tag(
 26 |                 tag_id,
 27 |                 tagSet=tag_set,
 28 |                 datatype=tifftools.Datatype[entry["datatype"]],
 29 |             )
 30 |             if tag.isIFD():
 31 |                 # entry['ifds'] contains a list of lists
 32 |                 # see tifftools.read_tiff
 33 |                 for sub_ifds in entry.get("ifds", []):
 34 |                     yield from iter_ifds(sub_ifds, tag.get("tagset"))
 35 |         yield ifd
 36 | 
 37 | 
 38 | def is_tiled(ifd: IFD):
 39 |     """Determine if an IFD represents a tiled image."""
 40 |     return tifftools.Tag.TileWidth.value in ifd["tags"]
 41 | 
 42 | 
 43 | def get_tiff_tag(tag_name: str) -> tifftools.TiffTag:
 44 |     """Given the name of a TIFF tag, attempt to return the TIFF tag from tifftools."""
 45 |     # This function checks TagSet objects from tifftools for a given tag. If the tag is not found
 46 |     # after exhausting the tag sets, a new tag is created.
 47 |     for tag_set in [
 48 |         tifftools.constants.Tag,
 49 |         tifftools.constants.GPSTag,
 50 |         tifftools.constants.EXIFTag,
 51 |     ]:
 52 |         if tag_name in tag_set:
 53 |             return tag_set[tag_name]
 54 |     return tifftools.constants.get_or_create_tag(tag_name)
 55 | 
 56 | 
 57 | def _get_macro(ifds: list[IFD]) -> IFD | None:
 58 |     key = "macro"
 59 |     for ifd in iter_ifds(ifds):
 60 |         if IMAGE_DESCRIPTION_ID in ifd["tags"]:
 61 |             if key in str(ifd["tags"][IMAGE_DESCRIPTION_ID]["data"]):
 62 |                 return ifd
 63 |         if NEWSUBFILETYPE_ID in ifd["tags"]:
 64 |             newsubfiletype = ifd["tags"][NEWSUBFILETYPE_ID]["data"][0]
 65 |             if newsubfiletype == 9:
 66 |                 return ifd
 67 |     return None
 68 | 
 69 | 
 70 | def _get_label(ifds: list[IFD]) -> IFD | None:
 71 |     key = "label"
 72 |     for ifd in iter_ifds(ifds):
 73 |         if IMAGE_DESCRIPTION_ID in ifd["tags"]:
 74 |             if key in str(ifd["tags"][IMAGE_DESCRIPTION_ID]["data"]):
 75 |                 return ifd
 76 |         # Check NewSubfileType/tiled or non tiled
 77 |         if not is_tiled(ifd) and NEWSUBFILETYPE_ID in ifd["tags"]:
 78 |             if ifd["tags"][NEWSUBFILETYPE_ID]["data"][0] == 1:
 79 |                 return ifd
 80 |     return None
 81 | 
 82 | 
 83 | def get_associated_image_svs(image_path: Path, image_key: str) -> IFD | None:
 84 |     """Given a path to an SVS image, return the IFD for a given associated label or macro image."""
 85 |     if image_key not in ["macro", "label"]:
 86 |         raise ValueError("image_key must be one of macro, label")
 87 | 
 88 |     image_info = tifftools.read_tiff(image_path)
 89 |     ifds = image_info["ifds"]
 90 | 
 91 |     if "aperio" not in str(ifds[0]["tags"][IMAGE_DESCRIPTION_ID]["data"]).lower():
 92 |         return None
 93 | 
 94 |     if image_key == "macro":
 95 |         return _get_macro(ifds)
 96 |     elif image_key == "label":
 97 |         return _get_label(ifds)
 98 |     return None
 99 | 
100 | 
101 | def get_ifd_for_thumbnail(image_path: Path, thumbnail_width=0, thumbnail_height=0) -> IFD | None:
102 |     """Given a path to a TIFF image, return the IFD for the lowest resolution tiled image."""
103 |     image_info = tifftools.read_tiff(image_path)
104 | 
105 |     candidate_width = float("inf")
106 |     candidate_height = float("inf")
107 |     candidate_ifd = None
108 |     for ifd in iter_ifds(image_info["ifds"]):
109 |         # We are interested in the lowest res tiled image.
110 |         if tifftools.Tag.TileWidth.value not in ifd["tags"]:
111 |             continue
112 | 
113 |         image_width = int(ifd["tags"][tifftools.Tag.ImageWidth.value]["data"][0])
114 |         image_height = int(ifd["tags"][tifftools.Tag.ImageHeight.value]["data"][0])
115 | 
116 |         # Pass over images that are too big or lacking information
117 |         if (
118 |             not image_width
119 |             or not image_height
120 |             or image_width * image_height > IMAGE_DEPHI_MAX_IMAGE_PIXELS
121 |         ):
122 |             continue
123 | 
124 |         if candidate_ifd is None:
125 |             candidate_ifd = ifd
126 |             candidate_width = image_width
127 |             candidate_height = image_height
128 |         else:
129 |             # Look at the candidate_ifd, the current ifd, and the thumbnail size
130 |             if candidate_width > image_width:
131 |                 # This is case 1. If the current IFD is smaller than
132 |                 # the candidate AND is larger than the desired
133 |                 # thumbnail size, it is the new candidate
134 |                 if image_width >= thumbnail_width and image_height >= thumbnail_height:
135 |                     candidate_ifd = ifd
136 |                     candidate_width = image_width
137 |                     candidate_height = image_height
138 |             else:
139 |                 # candidate_width <= image_width
140 |                 # Case 2. The candidate should be replaced if it is smaller
141 |                 # than the desired thumbnail size
142 |                 if candidate_height < thumbnail_height or candidate_width < thumbnail_width:
143 |                     candidate_ifd = ifd
144 |                     candidate_width = image_width
145 |                     candidate_height = image_height
146 | 
147 |     return candidate_ifd
148 | 
149 | 
150 | def get_is_svs(image_path: Path) -> bool:
151 |     image_info = tifftools.read_tiff(image_path)
152 |     if tifftools.Tag.ImageDescription.value not in image_info["ifds"][0]["tags"]:
153 |         return False
154 |     image_description = image_info["ifds"][0]["tags"][tifftools.Tag.ImageDescription.value]["data"]
155 |     return "aperio" in str(image_description).lower()
156 | 


--------------------------------------------------------------------------------
/imagedephi/gui/utils/image.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | from io import BytesIO
  4 | from pathlib import Path
  5 | from typing import TYPE_CHECKING
  6 | 
  7 | from PIL import Image, UnidentifiedImageError
  8 | from fastapi.responses import StreamingResponse
  9 | import tifftools
 10 | from wsidicom import WsiDicom
 11 | from wsidicom.errors import WsiDicomNotFoundError
 12 | 
 13 | from imagedephi.gui.utils.constants import MAX_ASSOCIATED_IMAGE_SIZE
 14 | 
 15 | if TYPE_CHECKING:
 16 |     from tifftools.tifftools import IFD
 17 | 
 18 | IMAGE_DEPHI_MAX_IMAGE_PIXELS = 1000000000
 19 | 
 20 | 
 21 | def get_scale_factor(max_dimensions: tuple[int, int], image_dimensions: tuple[int, int]) -> float:
 22 |     height_scale = int(max_dimensions[1]) / image_dimensions[1]
 23 |     width_scale = int(max_dimensions[0]) / image_dimensions[0]
 24 |     return min(height_scale, width_scale)
 25 | 
 26 | 
 27 | def extract_thumbnail_from_image_bytes(
 28 |     ifd: IFD,
 29 |     file_name: str,
 30 |     max_width=MAX_ASSOCIATED_IMAGE_SIZE,
 31 |     max_height=MAX_ASSOCIATED_IMAGE_SIZE,
 32 | ) -> Image.Image | None:
 33 |     offsets = ifd["tags"][tifftools.Tag.TileOffsets.value]["data"]
 34 |     byte_counts = ifd["tags"][tifftools.Tag.TileByteCounts.value]["data"]
 35 |     num_tiles = len(offsets)
 36 | 
 37 |     height = int(ifd["tags"][tifftools.Tag.ImageLength.value]["data"][0])
 38 |     width = int(ifd["tags"][tifftools.Tag.ImageWidth.value]["data"][0])
 39 |     top: int = 0
 40 |     left: int = 0
 41 | 
 42 |     image_canvas: Image.Image | None = None
 43 |     with open(file_name, "rb") as image_file:
 44 |         for idx in range(num_tiles):
 45 |             image_file.seek(int(offsets[idx]))
 46 |             tile_bytes = BytesIO(image_file.read(int(byte_counts[idx])))
 47 |             tile_image = Image.open(tile_bytes)
 48 | 
 49 |             if not image_canvas:
 50 |                 image_canvas = Image.new(tile_image.mode, (width, height))
 51 | 
 52 |             tile_size = tile_image.size
 53 | 
 54 |             bottom = top + tile_size[0]
 55 |             right = left + tile_size[1]
 56 |             if bottom > height:
 57 |                 bottom = height
 58 |             if right > width:
 59 |                 right = width
 60 | 
 61 |             piece_height = bottom - top
 62 |             piece_width = right - left
 63 | 
 64 |             if piece_width != tile_image.size[1] or piece_height != tile_image.size[0]:
 65 |                 tile_image = tile_image.crop((0, 0, piece_width, piece_height))
 66 | 
 67 |             image_canvas.paste(tile_image, (left, top, right, bottom))
 68 | 
 69 |             left = right
 70 |             if left >= width:
 71 |                 # go to next row
 72 |                 left = 0
 73 |                 top = top + tile_size[0]
 74 | 
 75 |     if not image_canvas:
 76 |         return None
 77 | 
 78 |     scale_factor = get_scale_factor((max_width, max_height), image_canvas.size)
 79 |     new_size = (
 80 |         int(image_canvas.size[0] * scale_factor),
 81 |         int(image_canvas.size[1] * scale_factor),
 82 |     )
 83 |     resized_image = image_canvas.resize(new_size, Image.LANCZOS)
 84 |     return resized_image
 85 | 
 86 | 
 87 | def get_image_response_from_ifd(
 88 |     ifd: "IFD",
 89 |     file_name: str,
 90 |     max_width=MAX_ASSOCIATED_IMAGE_SIZE,
 91 |     max_height=MAX_ASSOCIATED_IMAGE_SIZE,
 92 | ) -> StreamingResponse:
 93 |     # Make sure the image isn't too big
 94 |     height = int(ifd["tags"][tifftools.Tag.ImageLength.value]["data"][0])
 95 |     width = int(ifd["tags"][tifftools.Tag.ImageWidth.value]["data"][0])
 96 |     if height * width > IMAGE_DEPHI_MAX_IMAGE_PIXELS:
 97 |         raise Exception(f"{file_name} too large to create thumbnail")
 98 | 
 99 |     # use tifftools and PIL to create a jpeg of the associated image, sized for the browser
100 |     tiff_buffer = BytesIO()
101 |     jpeg_buffer = BytesIO()
102 |     tifftools.write_tiff(ifd, tiff_buffer)
103 |     try:
104 |         image = Image.open(tiff_buffer)
105 | 
106 |         scale_factor = get_scale_factor((max_width, max_height), image.size)
107 | 
108 |         new_size = (int(image.size[0] * scale_factor), int(image.size[1] * scale_factor))
109 |         image.thumbnail(new_size, Image.LANCZOS)
110 |         image.save(jpeg_buffer, "JPEG")
111 |         jpeg_buffer.seek(0)
112 | 
113 |     except UnidentifiedImageError:
114 |         #  Extract a thumbnail from the original image if the IFD can't be opened by PIL
115 |         composite_image = extract_thumbnail_from_image_bytes(ifd, file_name, max_width, max_height)
116 |         if composite_image:
117 |             composite_image.save(jpeg_buffer, "JPEG")
118 |             jpeg_buffer.seek(0)
119 |     return StreamingResponse(jpeg_buffer, media_type="image/jpeg")
120 | 
121 | 
122 | def get_image_response_from_tiff(
123 |     file_name: str, max_width=MAX_ASSOCIATED_IMAGE_SIZE, max_height=MAX_ASSOCIATED_IMAGE_SIZE
124 | ):
125 |     """
126 |     Use as a fallback when we can't find the best IFD for a thumbnail image.
127 | 
128 |     This happens when attempting to extract a thumbnail from a non-tiled tiff.
129 |     We expect users to be opening very large images, so we override the default
130 |     MAX_IMAGE_PIXELS of PIL.Image with our own value.
131 |     """
132 |     max_size = Image.MAX_IMAGE_PIXELS
133 |     Image.MAX_IMAGE_PIXELS = IMAGE_DEPHI_MAX_IMAGE_PIXELS
134 |     jpeg_buffer = BytesIO()
135 |     image = Image.open(file_name)
136 |     scale_factor = get_scale_factor((max_width, max_height), image.size)
137 |     new_size = (int(image.size[0] * scale_factor), int(image.size[1] * scale_factor))
138 |     image.thumbnail(new_size, Image.LANCZOS)
139 |     image.save(jpeg_buffer, "JPEG")
140 |     jpeg_buffer.seek(0)
141 |     Image.MAX_IMAGE_PIXELS = max_size
142 |     return StreamingResponse(jpeg_buffer, media_type="image/jpeg")
143 | 
144 | 
145 | def get_image_response_dicom(
146 |     related_files: list[Path],
147 |     key: str,
148 |     max_width=MAX_ASSOCIATED_IMAGE_SIZE,
149 |     max_height=MAX_ASSOCIATED_IMAGE_SIZE,
150 | ):
151 |     slide = WsiDicom.open(related_files)
152 |     image = None
153 |     try:
154 |         if key == "thumbnail":
155 |             image = slide.read_thumbnail()
156 |         elif key == "label":
157 |             image = slide.read_label()
158 |         elif key == "macro":
159 |             image = slide.read_overview()
160 |         if image:
161 |             # resize the image
162 |             scale_factor = get_scale_factor((max_width, max_height), image.size)
163 |             new_size = (int(image.size[0] * scale_factor), int(image.size[1] * scale_factor))
164 |             image.thumbnail(new_size, Image.LANCZOS)
165 |             img_buffer = BytesIO()
166 |             image.save(img_buffer, "JPEG")
167 |             img_buffer.seek(0)
168 |             return StreamingResponse(img_buffer, media_type="image/jpeg")
169 |     except WsiDicomNotFoundError:
170 |         return StreamingResponse(img_buffer, status_code=404)
171 | 


--------------------------------------------------------------------------------
/client/src/components/FileBrowser.vue:
--------------------------------------------------------------------------------
  1 | <script setup lang="ts">
  2 | import { ref, onMounted, onBeforeUnmount } from "vue";
  3 | import {
  4 |   selectedDirectories,
  5 |   updateDirectories,
  6 |   directoryData,
  7 |   loadingData,
  8 |   calculateVisibleItems,
  9 |   visibleImages,
 10 |   remainingImages,
 11 | } from "../store/directoryStore";
 12 | import { updateTableData } from "../store/imageStore";
 13 | 
 14 | const props = defineProps({
 15 |   modalId: {
 16 |     type: String,
 17 |     required: true,
 18 |   },
 19 |   title: {
 20 |     type: String,
 21 |     required: true,
 22 |   },
 23 | });
 24 | 
 25 | const modal = ref();
 26 | defineExpose({ modal });
 27 | defineEmits(["update-image-list"]);
 28 | 
 29 | const closeModal = () => {
 30 |   modal.value.close();
 31 | };
 32 | 
 33 | const updateSelectedDirectories = (path: string) => {
 34 |   selectedDirectories.value[props.modalId] = path;
 35 |   localStorage.setItem(
 36 |     "inputDirectory",
 37 |     selectedDirectories.value.inputDirectory,
 38 |   );
 39 |   localStorage.setItem(
 40 |     "outputDirectory",
 41 |     selectedDirectories.value.outputDirectory,
 42 |   );
 43 |   localStorage.setItem(
 44 |     "rulesetDirectory",
 45 |     selectedDirectories.value.rulesetDirectory,
 46 |   );
 47 | };
 48 | 
 49 | onMounted(() => {
 50 |   calculateVisibleItems();
 51 |   window.addEventListener("resize", calculateVisibleItems);
 52 | });
 53 | 
 54 | onBeforeUnmount(() => {
 55 |   window.removeEventListener("resize", calculateVisibleItems);
 56 | });
 57 | </script>
 58 | 
 59 | <template>
 60 |   <dialog :id="modalId" ref="modal" class="modal">
 61 |     <div class="w-full max-w-4xl h-4/5 rounded-xl overflow-hidden">
 62 |       <div class="modal-box w-full max-w-4xl h-4/5 overflow-auto pt-0">
 63 |         <div class="sticky top-0 pt-6 bg-white menu-top">
 64 |           <div class="flex justify-between">
 65 |             <h2 class="text-lg font-semibold">
 66 |               {{ title }}
 67 |             </h2>
 68 |             <button
 69 |               class="btn btn-primary float-right text-white uppercase"
 70 |               type="button"
 71 |               @click="
 72 |                 $emit('update-image-list'),
 73 |                   closeModal(),
 74 |                   title !== 'Output Directory'
 75 |                     ? updateTableData({
 76 |                         directory: selectedDirectories.inputDirectory,
 77 |                         rules: selectedDirectories.rulesetDirectory,
 78 |                         limit: 50,
 79 |                         offset: 0,
 80 |                         update: false,
 81 |                       })
 82 |                     : ''
 83 |               "
 84 |             >
 85 |               Select
 86 |             </button>
 87 |           </div>
 88 |           <div class="text-sm breadcrumbs mb-4 border-b-2">
 89 |             <ul class="flex flex-wrap">
 90 |               <li
 91 |                 v-for="(ancestor, index) in directoryData.ancestors"
 92 |                 :key="index"
 93 |                 class="mr-1 text-base"
 94 |               >
 95 |                 <span
 96 |                   v-if="index === directoryData.ancestors.length - 1"
 97 |                   class="font-black"
 98 |                   >{{ ancestor.name ? ancestor.name : "/" }}</span
 99 |                 >
100 |                 <a
101 |                   v-else
102 |                   class="text-blue-700"
103 |                   @click="
104 |                     updateDirectories(ancestor.path),
105 |                       updateSelectedDirectories(ancestor.path)
106 |                   "
107 |                 >
108 |                   {{ ancestor.name ? ancestor.name : "/" }}
109 |                 </a>
110 |               </li>
111 |             </ul>
112 |           </div>
113 |         </div>
114 |         <div v-if="loadingData" class="text-center">
115 |           <span class="loading loading-spinner text-primary"></span>
116 |           <span class="ml-2 italic font-light align-top"
117 |             >Collecting Directory Data</span
118 |           >
119 |         </div>
120 |         <div class="list-container">
121 |           <ul class="text-blue-700">
122 |             <li
123 |               v-for="child in directoryData.children.sort((a, b) => {
124 |                 const folder1 = a.name.toLowerCase();
125 |                 const folder2 = b.name.toLowerCase();
126 |                 if (folder1 < folder2) {
127 |                   return -1;
128 |                 }
129 |                 if (folder1 > folder2) {
130 |                   return 1;
131 |                 }
132 |                 return 0;
133 |               })"
134 |               :key="child.path"
135 |               class="hover:bg-base-300 cursor-default py-0.5"
136 |               @click="
137 |                 updateDirectories(child.path),
138 |                   updateSelectedDirectories(child.path)
139 |               "
140 |             >
141 |               <i class="ri-folder-3-fill text-neutral"></i>
142 |               {{ child.name }}
143 |             </li>
144 |           </ul>
145 |           <div class="list-container">
146 |             <ul class="pl-2">
147 |               <template v-if="modalId !== 'rulesetDirectory'">
148 |                 <li
149 |                   v-for="child_image in visibleImages"
150 |                   :key="child_image.path"
151 |                   class="py-0.5"
152 |                 >
153 |                   <i class="ri-image-fill text-sky-800"></i>
154 |                   {{ child_image.name }}
155 |                 </li>
156 |                 <li
157 |                   v-if="directoryData.childrenImages.length > 10"
158 |                   class="italic"
159 |                 >
160 |                   {{ remainingImages }} More Images
161 |                 </li>
162 |               </template>
163 |               <template v-if="modalId === 'rulesetDirectory'">
164 |                 <li
165 |                   v-for="ruleset in directoryData.childrenYaml"
166 |                   :key="ruleset.path"
167 |                   class="hover:bg-base-300 cursor-default py-0.5"
168 |                   @click="updateSelectedDirectories(ruleset.path)"
169 |                 >
170 |                   <i class="ri-file-text-line text-neutral"></i>
171 |                   {{ ruleset.name }}
172 |                 </li>
173 |               </template>
174 |             </ul>
175 |           </div>
176 |         </div>
177 |       </div>
178 |     </div>
179 |     <form method="dialog" class="modal-backdrop w-screen h-screen absolute">
180 |       <button
181 |         @click="
182 |           $emit('update-image-list'),
183 |             closeModal(),
184 |             title !== 'Output Directory'
185 |               ? updateTableData({
186 |                   directory: selectedDirectories.inputDirectory,
187 |                   rules: selectedDirectories.rulesetDirectory,
188 |                   limit: 50,
189 |                   offset: 0,
190 |                   update: false,
191 |                 })
192 |               : ''
193 |         "
194 |       >
195 |         close
196 |       </button>
197 |     </form>
198 |   </dialog>
199 | </template>
200 | 


--------------------------------------------------------------------------------
/tests/test_e2e.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | from collections.abc import Generator
  3 | from concurrent.futures import ThreadPoolExecutor
  4 | from pathlib import Path
  5 | import subprocess
  6 | import sys
  7 | 
  8 | from click.testing import CliRunner
  9 | from freezegun import freeze_time
 10 | import httpx
 11 | import pytest
 12 | 
 13 | from imagedephi import main
 14 | from imagedephi.utils.network import wait_for_port
 15 | 
 16 | 
 17 | @freeze_time("2023-05-12 12:12:53")
 18 | @pytest.mark.timeout(5)
 19 | def test_e2e_run(
 20 |     cli_runner: CliRunner, data_dir: Path, test_image_tiff: Path, rules_dir: Path, tmp_path: Path
 21 | ) -> None:
 22 |     result = cli_runner.invoke(
 23 |         main.imagedephi,
 24 |         [
 25 |             "run",
 26 |             str(data_dir / "input" / "tiff"),
 27 |             "--output-dir",
 28 |             str(tmp_path),
 29 |             "-R",
 30 |             str(rules_dir / "example_user_rules.yaml"),
 31 |         ],
 32 |     )
 33 |     assert result.exit_code == 0
 34 |     output_file = tmp_path / "Redacted_2023-05-12_12-12-53" / "my_study_slide_1.tif"
 35 |     output_file_bytes = output_file.read_bytes()
 36 |     assert b"large_image_converter" not in output_file_bytes
 37 |     assert b"Redacted by ImageDePHI" in output_file_bytes
 38 | 
 39 | 
 40 | @freeze_time("2024-05-20 11:46:00")
 41 | @pytest.mark.timeout(5)
 42 | def test_e2e_strict(
 43 |     cli_runner: CliRunner, data_dir: Path, test_image_tiff: Path, rules_dir: Path, tmp_path: Path
 44 | ) -> None:
 45 |     result = cli_runner.invoke(
 46 |         main.imagedephi,
 47 |         [
 48 |             "run",
 49 |             str(data_dir / "input" / "tiff"),
 50 |             "--profile",
 51 |             "strict",
 52 |             "--output-dir",
 53 |             str(tmp_path),
 54 |         ],
 55 |     )
 56 |     assert result.exit_code == 0
 57 |     output_file = tmp_path / "Redacted_2024-05-20_11-46-00" / "study_slide_1.tif"
 58 |     assert output_file.exists()
 59 | 
 60 | 
 61 | @pytest.mark.timeout(5)
 62 | def test_e2e_plan(
 63 |     cli_runner: CliRunner, data_dir: Path, test_image_tiff: Path, rules_dir: Path
 64 | ) -> None:
 65 |     result = cli_runner.invoke(
 66 |         main.imagedephi,
 67 |         [
 68 |             "--override-rules",
 69 |             str(rules_dir / "example_user_rules.yaml"),
 70 |             "plan",
 71 |             str(data_dir / "input" / "tiff" / "test_image.tif"),
 72 |         ],
 73 |     )
 74 | 
 75 |     assert result.exit_code == 0
 76 | 
 77 | 
 78 | def test_e2e_gui(
 79 |     unused_tcp_port: int,
 80 |     data_dir: Path,
 81 |     test_image_tiff: Path,
 82 |     tmp_path: Path,
 83 | ) -> None:
 84 | 
 85 |     port = unused_tcp_port
 86 | 
 87 |     gui = subprocess.Popen(
 88 |         [sys.executable, "-m", "imagedephi", "gui", "--port", str(port)],
 89 |     )
 90 | 
 91 |     asyncio.run(asyncio.wait_for(wait_for_port(port), timeout=2))
 92 | 
 93 |     # Check that the GUI is running
 94 |     assert gui.poll() is None
 95 | 
 96 |     check_gui = httpx.get(f"http://127.0.0.1:{port}")
 97 |     assert check_gui.status_code == 200
 98 | 
 99 |     # flake8: noqa: E501
100 |     check_redact = httpx.post(
101 |         f"http://127.0.0.1:{port}/redact/?input_directory={str(data_dir /'input' /'tiff')}&output_directory={str(tmp_path)}",
102 |     )
103 | 
104 |     assert check_redact.status_code == 200
105 | 
106 |     gui.terminate()
107 |     gui.wait()
108 |     # Check that the GUI has stopped
109 |     assert gui.poll() is not None
110 | 
111 |     redacted_dirs = [path for path in tmp_path.glob("*Redacted*") if path.is_dir()]
112 |     assert len(redacted_dirs) > 0
113 |     redacted_files = list(redacted_dirs[0].glob("*"))
114 |     assert len(redacted_files) > 0
115 |     output_file = redacted_dirs[0] / "study_slide_1.tif"
116 |     output_file_bytes = output_file.read_bytes()
117 |     assert b"large_image_converter" not in output_file_bytes
118 | 
119 | 
120 | def test_e2e_version(cli_runner: CliRunner) -> None:
121 |     result = cli_runner.invoke(main.imagedephi, ["--version"])
122 | 
123 |     assert result.exit_code == 0
124 |     assert "ImageDePHI, version" in result.output
125 | 
126 | 
127 | @pytest.mark.parametrize(
128 |     "help_flag",
129 |     [
130 |         "--help",
131 |         pytest.param(
132 |             "/?", marks=pytest.mark.skipif(sys.platform != "win32", reason="windows only")
133 |         ),
134 |     ],
135 | )
136 | def test_e2e_help(cli_runner: CliRunner, help_flag: str) -> None:
137 |     result = cli_runner.invoke(main.imagedephi, [help_flag])
138 | 
139 |     assert result.exit_code == 0
140 |     assert "Usage: imagedephi" in result.output
141 | 
142 | 
143 | @freeze_time("2023-05-12 12:12:53")
144 | @pytest.mark.timeout(5)
145 | @pytest.mark.parametrize("rename", [True, False])
146 | def test_e2e_rename_flag(
147 |     cli_runner, data_dir: Path, test_image_tiff: Path, tmp_path: Path, rename: bool
148 | ):
149 |     rename_flag = "--rename" if rename else "--skip-rename"
150 |     result = cli_runner.invoke(
151 |         main.imagedephi,
152 |         ["run", str(data_dir / "input" / "tiff"), "--output-dir", str(tmp_path), rename_flag],
153 |     )
154 | 
155 |     assert result.exit_code == 0
156 | 
157 |     output_file_name = (
158 |         tmp_path / "Redacted_2023-05-12_12-12-53" / "study_slide_1.tif"
159 |         if rename
160 |         else tmp_path / "Redacted_2023-05-12_12-12-53" / "test_image.tif"
161 |     )
162 |     assert output_file_name.exists()
163 | 
164 | 
165 | @freeze_time("2024-01-04 10:48:00")
166 | @pytest.mark.timeout(5)
167 | @pytest.mark.parametrize(
168 |     "recursive,rename", [(True, True), (True, False), (False, False), (False, True)]
169 | )
170 | def test_e2e_recursive(
171 |     cli_runner, data_dir: Path, tmp_path: Path, test_image_svs: Path, recursive: bool, rename: bool
172 | ):
173 |     args = ["run", str(data_dir / "input"), "--output-dir", str(tmp_path)]
174 |     if recursive:
175 |         args.append("--recursive")
176 |     if rename:
177 |         args.append("--skip-rename")
178 |     result = cli_runner.invoke(main.imagedephi, args)
179 | 
180 |     assert result.exit_code == 0
181 |     output_subdir = tmp_path / "Redacted_2024-01-04_10-48-00" / "svs"
182 |     assert output_subdir.exists() == recursive
183 | 
184 |     if recursive:
185 |         assert len(list(output_subdir.iterdir()))
186 | 
187 | 
188 | @freeze_time("2024-01-04 10:48:00")
189 | @pytest.mark.timeout(5)
190 | def test_e2e_manifest(cli_runner, data_dir: Path, tmp_path: Path, test_image_tiff: Path):
191 |     args = ["run", str(data_dir / "input" / "tiff"), "--output-dir", str(tmp_path)]
192 |     result = cli_runner.invoke(main.imagedephi, args)
193 | 
194 |     assert result.exit_code == 0
195 |     manifest_path = tmp_path / "Redacted_2024-01-04_10-48-00_manifest.csv"
196 |     assert manifest_path.exists()
197 | 
198 |     output_file_name = tmp_path / "Redacted_2024-01-04_10-48-00" / "study_slide_1.tif"
199 |     assert output_file_name.exists()
200 |     manifest_file_bytes = manifest_path.read_bytes()
201 |     assert b"study_slide_1.tif" in manifest_file_bytes
202 |     assert str(test_image_tiff).encode() in manifest_file_bytes
203 | 
204 | 
205 | @pytest.mark.parametrize("args", [["foo"], ["-r", "foo"]])
206 | def test_e2e_no_such_command(cli_runner, args):
207 |     result = cli_runner.invoke(main.imagedephi, args)
208 |     assert result.exit_code == 0
209 | 
210 |     # Assert that the user has been told their command was invalid
211 |     assert "No such command" in result.output
212 |     # Assert the usage docs are shown to the user
213 |     assert "Usage: imagedephi" in result.output
214 | 


--------------------------------------------------------------------------------
/imagedephi/gui/api/api.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import asyncio
  4 | from pathlib import Path
  5 | from typing import TYPE_CHECKING, Optional
  6 | import urllib.parse
  7 | 
  8 | from fastapi import APIRouter, HTTPException, WebSocket, WebSocketDisconnect
  9 | from fastapi.responses import FileResponse
 10 | 
 11 | from imagedephi.gui.utils.constants import MAX_ASSOCIATED_IMAGE_SIZE
 12 | from imagedephi.gui.utils.directory import DirectoryData
 13 | from imagedephi.gui.utils.image import (
 14 |     get_image_response_dicom,
 15 |     get_image_response_from_ifd,
 16 |     get_image_response_from_tiff,
 17 | )
 18 | from imagedephi.redact import redact_images, show_redaction_plan
 19 | from imagedephi.rules import FileFormat
 20 | from imagedephi.utils.dicom import file_is_same_series_as
 21 | from imagedephi.utils.image import get_file_format_from_path
 22 | from imagedephi.utils.progress_log import get_next_progress_message
 23 | from imagedephi.utils.tiff import get_associated_image_svs, get_ifd_for_thumbnail, get_is_svs
 24 | 
 25 | if TYPE_CHECKING:
 26 |     from tifftools.tifftools import IFD
 27 | 
 28 | router = APIRouter()
 29 | 
 30 | 
 31 | @router.get("/directory/")
 32 | def select_directory(
 33 |     directory: str = ("/"),
 34 | ):
 35 |     directory_path = Path(directory)
 36 |     # TODO: if input_directory is specified but an empty string, it gets instantiated as the CWD
 37 |     if not directory_path.exists():
 38 |         raise HTTPException(status_code=404, detail="Input directory not found")
 39 | 
 40 |     def image_url(path: str, key: str) -> str:
 41 |         params = {"file_name": str(directory_path / path), "image_key": key}
 42 |         return "image/?" + urllib.parse.urlencode(params, safe="")
 43 | 
 44 |     return (
 45 |         {
 46 |             "directory_data": DirectoryData(directory_path),
 47 |             "image_url": image_url,
 48 |         },
 49 |     )
 50 | 
 51 | 
 52 | @router.get("/image/", response_class=FileResponse)
 53 | def get_associated_image(
 54 |     file_name: str = "",
 55 |     image_key: str = "",
 56 |     max_height=MAX_ASSOCIATED_IMAGE_SIZE,
 57 |     max_width=MAX_ASSOCIATED_IMAGE_SIZE,
 58 | ):
 59 |     if not file_name:
 60 |         raise HTTPException(status_code=400, detail="file_name is a required parameter")
 61 | 
 62 |     if not Path(file_name).exists():
 63 |         raise HTTPException(status_code=404, detail=f"{file_name} does not exist")
 64 | 
 65 |     if image_key not in ["macro", "label", "thumbnail"]:
 66 |         raise HTTPException(
 67 |             status_code=400,
 68 |             detail=f"{image_key} is not a supported associated image key for {file_name}.",
 69 |         )
 70 | 
 71 |     image_type = get_file_format_from_path(Path(file_name))
 72 |     if image_type == FileFormat.SVS or image_type == FileFormat.TIFF:
 73 |         ifd: IFD | None = None
 74 |         if image_key == "thumbnail":
 75 |             ifd = get_ifd_for_thumbnail(Path(file_name), int(max_width), int(max_height))
 76 |             if not ifd:
 77 |                 try:
 78 |                     # If the image is not tiled, no appropriate IFD was found. In this case
 79 |                     # attempt to get a thumbnail using the entire image.
 80 |                     return get_image_response_from_tiff(file_name, max_width, max_height)
 81 |                 except Exception as e:
 82 |                     raise HTTPException(
 83 |                         status_code=422,  # unprocessable content
 84 |                         detail=f"Could not generate thumbnail image for {file_name}: {e.args[0]}",
 85 |                     )
 86 |             else:
 87 |                 try:
 88 |                     return get_image_response_from_ifd(ifd, file_name, max_width, max_height)
 89 |                 except Exception as e:
 90 |                     raise HTTPException(
 91 |                         status_code=422,  # unprocessable content
 92 |                         detail=f"Could not generate thumbnail image for {file_name}: {e.args[0]}",
 93 |                     )
 94 | 
 95 |         # image key is one of "macro", "label"
 96 |         if not get_is_svs(Path(file_name)):
 97 |             raise HTTPException(
 98 |                 status_code=404, detail=f"Image key {image_key} is not supported for {file_name}"
 99 |             )
100 | 
101 |         ifd = get_associated_image_svs(Path(file_name), image_key)
102 |         if not ifd:
103 |             raise HTTPException(
104 |                 status_code=404, detail=f"No {image_key} image found for {file_name}"
105 |             )
106 |         try:
107 |             return get_image_response_from_ifd(ifd, file_name, max_height, max_width)
108 |         except Exception as e:
109 |             raise HTTPException(
110 |                 status_code=422,  # unprocessable content
111 |                 detail=f"Could not generate thumbnail image for {file_name}: {e.args[0]}",
112 |             )
113 |     elif image_type == FileFormat.DICOM:
114 |         path = Path(file_name)
115 |         related_files = [
116 |             child
117 |             for child in path.parent.iterdir()
118 |             if child != path and file_is_same_series_as(path, child)
119 |         ]
120 |         image_response = get_image_response_dicom(related_files, image_key, max_width, max_height)
121 |         if image_response:
122 |             return image_response
123 |         raise HTTPException(
124 |             status_code=404, detail=f"Could not retrieve {image_key} image for {file_name}"
125 |         )
126 | 
127 |     return HTTPException(
128 |         status_code=404, detail=f"Could not retrieve {image_key} image for {file_name}"
129 |     )
130 | 
131 | 
132 | @router.get("/redaction_plan")
133 | def get_redaction_plan(
134 |     input_directory: str = ("/"),  # noqa: B008
135 |     rules_path: Optional[str] = None,
136 |     limit: int = 10,
137 |     offset: int = 0,
138 |     update: bool = True,
139 | ):
140 |     input_path = Path(input_directory)
141 |     if not input_path.is_dir():
142 |         raise HTTPException(status_code=404, detail="Input directory not found")
143 | 
144 |     # TODO: Add support for multiple input directories in the UI
145 |     if rules_path and not Path(rules_path).is_file():
146 |         rules_path = None
147 |         print("Rules file not found")
148 |     if rules_path:
149 |         return show_redaction_plan(
150 |             [input_path], override_rules=Path(rules_path), limit=limit, offset=offset, update=update
151 |         )._asdict()
152 | 
153 |     return show_redaction_plan([input_path], limit=limit, offset=offset, update=update)._asdict()
154 | 
155 | 
156 | @router.post("/redact/")
157 | def redact(
158 |     input_directory: str,  # noqa: B008
159 |     output_directory: str,  # noqa: B008
160 |     rules_path: Optional[str] = None,
161 | ):
162 |     input_path = Path(input_directory)
163 |     output_path = Path(output_directory)
164 |     if not input_path.is_dir():
165 |         raise HTTPException(status_code=404, detail="Input directory not found")
166 |     if not output_path.is_dir():
167 |         raise HTTPException(status_code=404, detail="Output directory not found")
168 |     if rules_path is not None and not Path(rules_path).is_file():
169 |         rules_path = None
170 |         print("Rules file not found")
171 |     # TODO: Add support for multiple input directories in the UI
172 |     if rules_path:
173 |         redact_images([input_path], output_path, override_rules=Path(rules_path))
174 |     else:
175 |         redact_images([input_path], output_path)
176 | 
177 | 
178 | @router.websocket("/ws")
179 | async def websocket_endpoint(websocket: WebSocket):
180 |     await websocket.accept()
181 |     backoff = 1
182 | 
183 |     while True:
184 |         try:
185 |             print("Client connected")
186 |             backoff = 1
187 | 
188 |             while True:
189 |                 message = get_next_progress_message()
190 |                 if message is not None:
191 |                     message_dict = dict(
192 |                         count=message[0], max=message[1], redact_dir=message[2].name
193 |                     )
194 |                     await websocket.send_json(message_dict)
195 |                 else:
196 |                     await asyncio.sleep(0.001)  # Add a small delay to avoid busy waiting
197 | 
198 |         except WebSocketDisconnect:
199 |             print("Attempting to reconnect to client")
200 |             await asyncio.sleep(backoff)
201 |             backoff = min(backoff * 2, 60)
202 |             await websocket.accept()
203 | 


--------------------------------------------------------------------------------
/client/src/HomePage.vue:
--------------------------------------------------------------------------------
  1 | <script setup lang="ts">
  2 | import { ref, onMounted } from "vue";
  3 | 
  4 | import { redactImages } from "./api/rest";
  5 | import { selectedDirectories } from "./store/directoryStore";
  6 | import { useRedactionPlan, updateTableData } from "./store/imageStore";
  7 | import { redactionStateFlags } from "./store/redactionStore";
  8 | 
  9 | import MenuSteps from "./components/MenuSteps.vue";
 10 | import FileBrowser from "./components/FileBrowser.vue";
 11 | import ImageDataDisplay from "./components/ImageDataDisplay.vue";
 12 | 
 13 | const inputModal = ref(null);
 14 | const outputModal = ref(null);
 15 | const rulesetModal = ref(null);
 16 | const redactionModal = ref();
 17 | const missingRulesModal = ref();
 18 | 
 19 | const progress = ref({
 20 |   count: 0,
 21 |   max: useRedactionPlan.imageRedactionPlan.total,
 22 |   redact_dir: "",
 23 | });
 24 | 
 25 | const wsBase = import.meta.env.VITE_APP_API_URL
 26 |   ? new URL(import.meta.env.VITE_APP_API_URL)
 27 |   : new URL(import.meta.url);
 28 | 
 29 | const ws = new WebSocket("ws:" + wsBase.host + "/ws");
 30 | 
 31 | ws.onmessage = (event) => {
 32 |   const data = JSON.parse(event.data);
 33 |   progress.value = {
 34 |     count: data.count || progress.value.count, // don't update if not present
 35 |     max: useRedactionPlan.imageRedactionPlan.total,
 36 |     redact_dir: data.redact_dir || progress.value.redact_dir, // don't update if not present
 37 |   };
 38 | };
 39 | // Periodically ping the websocket
 40 | 
 41 | setInterval(() => {
 42 |   if (ws.readyState === ws.OPEN) {
 43 |     ws.send("ping");
 44 |   }
 45 | }, 5000);
 46 | 
 47 | const redact_images = async () => {
 48 |   if (
 49 |     !selectedDirectories.value.inputDirectory ||
 50 |     !selectedDirectories.value.outputDirectory
 51 |   ) {
 52 |     return;
 53 |   }
 54 |   redactionStateFlags.value.redactionSnackbar = false;
 55 |   redactionStateFlags.value.redacting = true;
 56 |   // Reset progress count
 57 |   progress.value.count = 0;
 58 |   redactionModal.value.showModal();
 59 |   const response = await redactImages(
 60 |     selectedDirectories.value.inputDirectory,
 61 |     selectedDirectories.value.outputDirectory,
 62 |     selectedDirectories.value.rulesetDirectory,
 63 |   );
 64 |   if (response.status === 200) {
 65 |     useRedactionPlan.updateImageData({
 66 |       directory: `${selectedDirectories.value.outputDirectory}/${progress.value.redact_dir}`,
 67 |       rules: selectedDirectories.value.rulesetDirectory,
 68 |       limit: 50,
 69 |       offset: 0,
 70 |       update: false,
 71 |     });
 72 |     redactionStateFlags.value.redacting = false;
 73 |     redactionModal.value.close();
 74 |     redactionStateFlags.value.showImageTable = false;
 75 |     redactionStateFlags.value.redactionComplete =
 76 |       !!useRedactionPlan.imageRedactionPlan.total;
 77 |     redactionStateFlags.value.redactionSnackbar = true;
 78 |   }
 79 | };
 80 | 
 81 | const canRedact = () => {
 82 |   if (
 83 |     !selectedDirectories.value.inputDirectory ||
 84 |     !selectedDirectories.value.outputDirectory
 85 |   ) {
 86 |     return;
 87 |   }
 88 |   if (useRedactionPlan.imageRedactionPlan.missing_rules) {
 89 |     missingRulesModal.value.showModal();
 90 |   } else {
 91 |     redact_images();
 92 |   }
 93 | };
 94 | // If the user chooses to redact with missing rules, force redaction
 95 | const forceRedact = () => {
 96 |   missingRulesModal.value.close();
 97 |   redact_images();
 98 | };
 99 | 
100 | onMounted(() => {
101 |   if (selectedDirectories.value.inputDirectory) {
102 |     updateTableData({
103 |       directory: selectedDirectories.value.inputDirectory,
104 |       rules: selectedDirectories.value.rulesetDirectory,
105 |       limit: 50,
106 |       offset: 0,
107 |       update: false,
108 |     });
109 |     redactionStateFlags.value.showImageTable = true;
110 |   }
111 | });
112 | </script>
113 | 
114 | <template>
115 |   <div class="flex">
116 |     <input id="side-drawer" type="checkbox" class="drawer-toggle" />
117 |     <div class="flex max-w-md">
118 |       <div
119 |         :class="`pl-4 py-4 ${redactionStateFlags.redacting ? 'opacity-50' : ''}`"
120 |       >
121 |         <div class="bg-base-100 drop-shadow-xl rounded flex flex-col">
122 |           <div class="flex justify-between content-center p-4 border-b">
123 |             <div class="max-h6 w-auto self-center">
124 |               <img src="/logo.png" />
125 |             </div>
126 |             <div class="flex items-center space-y-0.5">
127 |               <a class="btn btn-ghost btn-square btn-sm">
128 |                 <i class="ri-side-bar-line text-lg text-neutral" />
129 |               </a>
130 |             </div>
131 |           </div>
132 |           <MenuSteps
133 |             :step-number="1"
134 |             step-title="Input Directory"
135 |             help-text="Location of the images you’d like to process."
136 |             :input-modal="inputModal || undefined"
137 |           />
138 |           <MenuSteps
139 |             :step-number="2"
140 |             step-title="Output Directory"
141 |             help-text="Location of the images after they are processed."
142 |             :output-modal="outputModal || undefined"
143 |           />
144 |           <MenuSteps
145 |             :step-number="3"
146 |             step-title="Rulesets"
147 |             help-text="Custom ruleset to be used for redaction in addition to the baserules."
148 |             :ruleset-modal="rulesetModal || undefined"
149 |           />
150 |           <FileBrowser
151 |             ref="inputModal"
152 |             :modal-id="'inputDirectory'"
153 |             :title="'Input Directory'"
154 |             @update-image-list="
155 |               (redactionStateFlags.showImageTable = true),
156 |                 (redactionStateFlags.redactionComplete = false)
157 |             "
158 |           />
159 |           <FileBrowser
160 |             ref="outputModal"
161 |             :modal-id="'outputDirectory'"
162 |             :title="'Output Directory'"
163 |           />
164 |           <FileBrowser
165 |             ref="rulesetModal"
166 |             :modal-id="'rulesetDirectory'"
167 |             :title="'Ruleset Directory'"
168 |           />
169 |           <div class="p-4 w-full">
170 |             <button
171 |               type="submit"
172 |               :class="`${!selectedDirectories.inputDirectory || !selectedDirectories.outputDirectory ? 'btn btn-block bg-accent text-white uppercase rounded-lg tooltip' : 'btn btn-block btn-accent text-white uppercase rounded-lg'}`"
173 |               data-tip="Please select input and output directories"
174 |               @click="canRedact()"
175 |             >
176 |               De-phi Images
177 |             </button>
178 |           </div>
179 |         </div>
180 |       </div>
181 |     </div>
182 |     <dialog id="missingRulesModal" ref="missingRulesModal" class="modal">
183 |       <div class="modal-box max-w-100">
184 |         <div class="card max-w-100">
185 |           <div class="card-body">
186 |             <h2 class="font-bold text-xl text-center">
187 |               Missing Redaction Rules
188 |             </h2>
189 |             <div class="divider my-1" />
190 |             <p class="indent-8 font-medium">
191 |               One or more images are missing redaction rules. If you continue
192 |               these images will not be redacted.
193 |             </p>
194 |             <p class="indent-8 text-base font-medium">
195 |               To add rules, please select a ruleset with the missing redaction
196 |               rules.
197 |             </p>
198 |           </div>
199 |           <div class="card-actions flex-nowrap justify-between">
200 |             <button
201 |               class="btn btn-accent w-1/2 text-white uppercase"
202 |               @click="forceRedact()"
203 |             >
204 |               Continue
205 |             </button>
206 |             <button
207 |               class="btn btn-neutral text-white w-1/2 uppercase"
208 |               @click="missingRulesModal.close()"
209 |             >
210 |               Cancel
211 |             </button>
212 |           </div>
213 |         </div>
214 |       </div>
215 |     </dialog>
216 | 
217 |     <dialog id="redactionModal" ref="redactionModal" class="modal">
218 |       <div class="modal-box w-96">
219 |         <div class="card">
220 |           <div class="card-body">
221 |             <h2 class="card-title">Redaction in progress:</h2>
222 |             <p>
223 |               Redacting images
224 |               <span class="float-right"
225 |                 >{{ progress.count }}/{{ progress.max }}</span
226 |               >
227 |             </p>
228 |             <progress
229 |               v-if="redactionStateFlags.redacting"
230 |               class="progress progress-primary"
231 |               :value="progress.count"
232 |               :max="progress.max"
233 |             />
234 |           </div>
235 |         </div>
236 |       </div>
237 |     </dialog>
238 |     <ImageDataDisplay
239 |       v-if="
240 |         useRedactionPlan.imageRedactionPlan.total &&
241 |         redactionStateFlags.showImageTable
242 |       "
243 |     />
244 |     <ImageDataDisplay v-if="redactionStateFlags.redactionComplete" />
245 |     <div v-if="redactionStateFlags.redactionSnackbar" class="toast z-[100]">
246 |       <div class="alert alert-success">
247 |         <span class="font-semibold">Redaction Complete</span>
248 |         <div>
249 |           Redacted images now in {{ selectedDirectories.outputDirectory }}/{{
250 |             progress.redact_dir
251 |           }}
252 |           <button
253 |             class="btn btn-xs btn-ghost"
254 |             @click="redactionStateFlags.redactionSnackbar = false"
255 |           >
256 |             <i class="ri-close-line" />
257 |           </button>
258 |         </div>
259 |       </div>
260 |     </div>
261 |   </div>
262 | </template>
263 | 


--------------------------------------------------------------------------------
/tests/test_redact.py:
--------------------------------------------------------------------------------
  1 | import datetime
  2 | import importlib.resources
  3 | import logging
  4 | from pathlib import Path, PurePath
  5 | import struct
  6 | 
  7 | from freezegun import freeze_time
  8 | import pytest
  9 | import yaml
 10 | 
 11 | from imagedephi import redact
 12 | from imagedephi.redact.redact import ProfileChoice, create_redact_dir_and_manifest
 13 | from imagedephi.redact.svs import SvsRedactionPlan
 14 | from imagedephi.rules import KeepRule, Ruleset
 15 | from imagedephi.utils.logger import logger
 16 | 
 17 | 
 18 | @pytest.fixture
 19 | def base_rule_set():
 20 |     base_rules_path = importlib.resources.files("imagedephi") / "base_rules.yaml"
 21 |     with base_rules_path.open() as base_rules_stream:
 22 |         return Ruleset.model_validate(yaml.safe_load(base_rules_stream))
 23 | 
 24 | 
 25 | @pytest.fixture
 26 | def override_rule_set(rules_dir: Path):
 27 |     rule_file = rules_dir / "example_user_rules.yaml"
 28 |     return rule_file
 29 | 
 30 | 
 31 | @pytest.fixture
 32 | def strict_rule_set():
 33 |     strict_rules_path = importlib.resources.files("imagedephi") / "minimum_rules.yaml"
 34 |     return strict_rules_path
 35 | 
 36 | 
 37 | @pytest.fixture(
 38 |     params=[PurePath("svs"), PurePath("svs") / "test_svs_image_blank.svs"],
 39 |     ids=["input_dir", "input_file"],
 40 | )
 41 | def svs_input_paths(test_image_svs, data_dir, request) -> list[Path]:
 42 |     path_list = [data_dir / "input" / request.param]
 43 |     return path_list
 44 | 
 45 | 
 46 | @pytest.fixture(
 47 |     params=[PurePath("dcm"), PurePath("dcm") / "test_dcm_image.dcm"],
 48 |     ids=["input_dir", "input_file"],
 49 | )
 50 | def dcm_input_path(data_dir, test_image_dcm, request) -> list[Path]:
 51 |     path_list = [data_dir / "input" / request.param]
 52 |     return path_list
 53 | 
 54 | 
 55 | @pytest.fixture(
 56 |     params=[PurePath("tiff"), PurePath("tiff") / "test_image.tif"],
 57 |     ids=["input_dir", "input_file"],
 58 | )
 59 | def tiff_input_path(data_dir, test_image_tiff, request) -> list[Path]:
 60 |     path_list = [data_dir / "input" / request.param]
 61 |     return path_list
 62 | 
 63 | 
 64 | @freeze_time("2023-05-12 12:12:53")
 65 | def test_create_redact_dir_and_manifest(tmp_path):
 66 |     time_stamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
 67 |     output_dir, manifest = create_redact_dir_and_manifest(tmp_path / "fake", time_stamp)
 68 |     assert output_dir.exists()
 69 |     assert output_dir.name == "Redacted_2023-05-12_12-12-53"
 70 |     assert manifest.exists()
 71 |     assert manifest.name == "Redacted_2023-05-12_12-12-53_manifest.csv"
 72 | 
 73 | 
 74 | @freeze_time("2023-05-12 12:12:53")
 75 | def test_redact_svs(svs_input_paths, tmp_path, override_rule_set):
 76 |     redact.redact_images(svs_input_paths, tmp_path, override_rule_set)
 77 | 
 78 |     output_file = tmp_path / "Redacted_2023-05-12_12-12-53" / "my_study_slide_1.svs"
 79 |     svs_output_file_bytes = output_file.read_bytes()
 80 |     # verify our custom svs rule was applied
 81 |     assert b"ICC Profile" not in svs_output_file_bytes
 82 |     # verify the base image rule was applied to the macro
 83 |     assert b"macro" not in svs_output_file_bytes
 84 | 
 85 | 
 86 | def test_redact_svs_no_extension(mocker, test_image_svs_no_extension, tmp_path):
 87 |     # Ensure the correct redaction plan is called for an SVS file with no
 88 |     # extension
 89 |     spy = mocker.spy(SvsRedactionPlan, "__init__")
 90 |     redact.redact_images(test_image_svs_no_extension, tmp_path)
 91 |     assert spy.call_count == 1
 92 | 
 93 | 
 94 | def test_plan_svs(caplog, svs_input_paths, override_rule_set):
 95 |     logger.setLevel(logging.INFO)
 96 |     redact.show_redaction_plan(svs_input_paths, override_rule_set)
 97 | 
 98 |     # Behavior for directories: skip printing full plans
 99 |     # Behavior for single image file: print full plan
100 |     for svs_input_path in svs_input_paths:
101 |         if svs_input_path.is_dir() and len(list(svs_input_path.iterdir())) > 1:
102 |             assert "Aperio (.svs) Metadata Redaction Plan" not in caplog.text
103 |             assert "ICC Profile: delete" not in caplog.text
104 |         else:
105 |             assert "Aperio (.svs) Metadata Redaction Plan" in caplog.text
106 |             assert "ICC Profile: delete" in caplog.text
107 | 
108 | 
109 | def test_associated_image_key_no_description(data_dir, base_rule_set):
110 |     input_image = data_dir / "input" / "svs" / "test_svs_image_blank.svs"
111 |     svs_redaction_plan = SvsRedactionPlan(input_image, base_rule_set.svs)
112 |     test_tags = {
113 |         254: {
114 |             "datatype": 4,
115 |             "count": 1,
116 |             "datapos": 0,
117 |             "data": [9],
118 |         }
119 |     }
120 |     test_ifd = {
121 |         "offset": 0,
122 |         "tags": test_tags,
123 |         "path_or_fobj": "",
124 |         "size": 0,
125 |         "bigEndian": False,
126 |         "bigtiff": False,
127 |         "tagcount": 1,
128 |     }
129 |     associated_image_key = svs_redaction_plan.get_associated_image_key_for_ifd(
130 |         test_ifd,  # type: ignore
131 |     )
132 |     assert associated_image_key == "macro"
133 | 
134 | 
135 | @freeze_time("2023-05-12 12:12:53")
136 | def test_remove_orphaned_metadata(secret_metadata_image, tmp_path, override_rule_set):
137 |     input_bytes = b""
138 |     for image in secret_metadata_image:
139 |         input_bytes = image.read_bytes()
140 | 
141 |     redact.redact_images(secret_metadata_image, tmp_path, override_rule_set)
142 | 
143 |     output_file = tmp_path / "Redacted_2023-05-12_12-12-53" / "my_study_slide_1.tiff"
144 |     output_bytes = output_file.read_bytes()
145 | 
146 |     assert b"Secret" in input_bytes
147 |     assert b"Secret" not in output_bytes
148 | 
149 | 
150 | @freeze_time("2023-05-12 12:12:53")
151 | def test_redact_dcm(test_image_dcm, tmp_path, override_rule_set):
152 |     redact.redact_images(test_image_dcm, tmp_path, override_rule_set)
153 | 
154 |     output_file = tmp_path / "Redacted_2023-05-12_12-12-53" / "my_study_slide_1.dcm"
155 |     dcm_output_file_bytes = output_file.read_bytes()
156 |     # verify th ebase rule deleted "SeriesDescription"
157 |     assert b"Sample" not in dcm_output_file_bytes
158 | 
159 | 
160 | def test_plan_dcm(caplog, test_image_dcm):
161 |     logger.setLevel(logging.DEBUG)
162 |     redact.show_redaction_plan(test_image_dcm)
163 | 
164 |     assert "DICOM Metadata Redaction Plan" in caplog.text
165 |     assert "SeriesDescription: delete" in caplog.text
166 | 
167 | 
168 | @freeze_time("2023-05-12 12:12:53")
169 | @pytest.mark.timeout(5)
170 | def test_strict(svs_input_paths, tmp_path) -> None:
171 |     redact.redact_images(svs_input_paths, tmp_path, profile=ProfileChoice.Strict.value)
172 |     output_file = tmp_path / "Redacted_2023-05-12_12-12-53" / "study_slide_1.svs"
173 |     output_file_bytes = output_file.read_bytes()
174 |     assert b"Aperio" not in output_file_bytes
175 |     assert b"macro" not in output_file_bytes
176 | 
177 | 
178 | @freeze_time("2023-05-12 12:12:53")
179 | @pytest.mark.timeout(5)
180 | def test_override_with_strict_flag(svs_input_paths, tmp_path, strict_rule_set) -> None:
181 |     redact.redact_images(svs_input_paths, tmp_path, override_rules=strict_rule_set)
182 |     output_file = tmp_path / "Redacted_2023-05-12_12-12-53" / "study_slide_1.svs"
183 |     output_file_bytes = output_file.read_bytes()
184 |     assert b"Aperio" not in output_file_bytes
185 |     assert b"macro" not in output_file_bytes
186 | 
187 | 
188 | @freeze_time("2023-05-12 12:12:53")
189 | @pytest.mark.timeout(5)
190 | def test_strict_skip_dcm(dcm_input_path, tmp_path) -> None:
191 |     redact.redact_images(dcm_input_path, tmp_path, profile=ProfileChoice.Strict.value)
192 |     output_dir = tmp_path / "Redacted_2023-05-12_12-12-53"
193 |     assert output_dir.is_dir()
194 |     assert len(list(output_dir.iterdir())) == 0
195 | 
196 | 
197 | @freeze_time("2023-05-12 12:12:53")
198 | @pytest.mark.timeout(5)
199 | @pytest.mark.parametrize(
200 |     "action,custom_tag_exists", [("keep", True), ("delete", False), ("use_rule", True)]
201 | )
202 | def test_dcm_private_redaction(dcm_input_path, tmp_path, action, custom_tag_exists) -> None:
203 |     override_ruleset = Ruleset()
204 |     override_ruleset.dicom.custom_metadata_action = action
205 |     if action == "use_rule":
206 |         override_ruleset.dicom.metadata["(1001,1001)"] = KeepRule(
207 |             key_name="TestItem", action="keep"
208 |         )
209 | 
210 |     override_rules = tmp_path / "override_rules.yaml"
211 |     with override_rules.open("w") as override_rules_stream:
212 |         yaml.safe_dump(override_ruleset.model_dump(), override_rules_stream)
213 |     redact.redact_images(
214 |         dcm_input_path,
215 |         tmp_path,
216 |         override_rules=override_rules,
217 |     )
218 |     output_file = tmp_path / "Redacted_2023-05-12_12-12-53" / "study_slide_1.dcm"
219 |     dcm_output_file_bytes = output_file.read_bytes()
220 |     tag_bytes = struct.pack("<i", 0x10011001)
221 |     assert custom_tag_exists == (tag_bytes in dcm_output_file_bytes)
222 | 
223 | 
224 | @freeze_time("2023-05-12 12:12:53")
225 | @pytest.mark.timeout(5)
226 | def test_dates_dcm(dcm_input_path, tmp_path) -> None:
227 |     redact.redact_images(dcm_input_path, tmp_path, profile=ProfileChoice.Dates.value)
228 |     output_file = tmp_path / "Redacted_2023-05-12_12-12-53" / "study_slide_1.dcm"
229 |     dcm_output_file_bytes = output_file.read_bytes()
230 |     assert b"20220101" in dcm_output_file_bytes
231 | 
232 | 
233 | @freeze_time("2023-05-12 12:12:53")
234 | @pytest.mark.timeout(5)
235 | def test_dates_svs(svs_input_paths, tmp_path) -> None:
236 |     redact.redact_images(svs_input_paths, tmp_path, profile=ProfileChoice.Dates.value)
237 |     output_file = tmp_path / "Redacted_2023-05-12_12-12-53" / "study_slide_1.svs"
238 |     output_file_bytes = output_file.read_bytes()
239 |     # DAte set to January 1
240 |     assert b"01/01/08" in output_file_bytes
241 |     # Time set to midnight
242 |     assert b"00:00:00" in output_file_bytes
243 | 
244 | 
245 | @freeze_time("2023-05-12 12:12:53")
246 | @pytest.mark.timeout(5)
247 | def test_dates_tiff(tiff_input_path, tmp_path) -> None:
248 |     redact.redact_images(tiff_input_path, tmp_path, profile=ProfileChoice.Dates.value)
249 |     output_file = tmp_path / "Redacted_2023-05-12_12-12-53" / "study_slide_1.tif"
250 |     output_file_bytes = output_file.read_bytes()
251 |     assert b"2024:01:01 00:00:00" in output_file_bytes
252 | 


--------------------------------------------------------------------------------
/docs/demo.md:
--------------------------------------------------------------------------------
  1 | # ImageDePHI Demo
  2 | 
  3 | This walkthrough will guide you through using the ImageDePHI program.
  4 | 
  5 | ## Getting the demo data
  6 | 
  7 | In order to get the demo data, you will need to have installed ImageDePHI and run the following command:
  8 | 
  9 | ```bash
 10 | imagedephi demo-data
 11 | ```
 12 | 
 13 | This will create a new directory in the location it is run called `demo_files` and download several whole slide images into that directory. These images contain fake PHI, which we will redact with ImageDePHI.
 14 | 
 15 | ## Redacting with the Graphical User Interface (GUI)
 16 | ImageDePHI allows redaction of whole slide images through either a graphical user interface, accessible through a web browser, or a command line interface. First, let's take a look at the redaction workflow using the graphical user interface.
 17 | 
 18 | #### 1. Starting the program
 19 | In order to start the program, install ImageDePHI and run:
 20 | 
 21 | ```bash
 22 | imagedephi gui
 23 | ```
 24 | 
 25 | This will start the program, which will be accessible at a random port, and open up a browser at the correct address.
 26 | 
 27 | By default, this command will select a random port to serve the application from. You can specify a port if you'd like by using the `--port` flag, e.g:
 28 | 
 29 | ```bash
 30 | imagedephi gui --port 8888
 31 | ```
 32 | 
 33 | #### 2. Looking at the UI
 34 | If your browser is not already open to ImageDePHI, open up your browser and go to `127.0.0.1:<port>` where `<port>` is either the random port picked by the command above or the number you supplied to the `--port` flag if you used that option to start the server.
 35 | 
 36 | ![Initial ImageDePHI UI](./images/initial_ui.png)
 37 | 
 38 | You should be greeted by the initial UI screen. On the left hand side there are several options for specifying which files should be redacted and how they should be redacted. We will go over each step individually.
 39 | 
 40 | #### 3. Select Files to be Redacted
 41 | 
 42 | The first thing you'll need to do is select files for redaction.
 43 | 
 44 | ![Button to open input directory browser](./images/step_1_input_directory_open_browser.png)
 45 | Click the button in Step 1 to open up a file browser.
 46 | 
 47 | ![Input directory browswer](./images/step_1_input_directory_select_directory.png)
 48 | Navigate your computer's file system until you come to the directory where you downloaded your demo files, then click "Select."
 49 | 
 50 | #### 4. Select Output Destination
 51 | 
 52 | Next, select a location for redacted images. ImageDePHI does not modify your original images. Instead, it creates new, redacted images saved into the location selected here.
 53 | 
 54 | ![Output directory selector](./images/step_2_output_directory_select_directory.png)
 55 | For this demo, select the directory that is the parent of your `demo_files/` directory. A new directory will be created at this location for the redacted images.
 56 | 
 57 | #### 5. Preview Redaction Changes
 58 | 
 59 | After selecting your input directory, you will see a table previewing the redaction that is about to happen. For each file in the input directory, you'll see a row containing the file name, a thumbnail, the redaction status, and the metadata tags.
 60 | 
 61 | Looking at the metadata tags, you'll see that, for example, the "Date" tag is red with strikethrough. This indicates that this field will be removed and not present in the redacted output file. Scrolling over, you'll see tags like "AppMag" and "BitsPerSample" have no special styling, indicating that they will be included in the output file.
 62 | 
 63 | Most importantly, you'll see that there's an issue in the "Redaction Status" column for the image "SEER_Mouse_1_17158543_demo.svs". If you hover over the red icon you'll see the message "1 tag(s) missing redaction rules." Below that you'll see "55500: 55500," indicating that this image contains a metadata tag with the number "55500" that ImageDePHI doesn't know how to redact.
 64 | 
 65 | ![Image grid showing an error](./images/image_grid_errors_ui.png)
 66 | 
 67 | #### 6. Creating a Custom Rule Set
 68 | 
 69 | The base rule set provided by ImageDePHI is used every time images are redacted. User-defined rule sets can be used to supplement or modify the behavior defined by the base rules.
 70 | 
 71 | The base rule set does not contain a rule for tag `55500`, so in order to redact the demo images, the program will need to be supplied a ruleset that knows what to do with tag `55500`.
 72 | 
 73 | Let's create that ruleset now. Create a new file called `custom_rules.yaml` and add the following:
 74 | 
 75 | ```yaml
 76 | ---
 77 | name: Custom Rules
 78 | description: Custom ruleset used for the ImageDePHI demo.
 79 | svs:
 80 |     metadata:
 81 |         '55500':
 82 |             action: delete
 83 | ```
 84 | 
 85 | If you'd like to know the default behavior of ImageDePHI, take a look at the [base rules](../imagedephi/base_rules.yaml).
 86 | 
 87 | #### 7. Using Your Custom Ruleset
 88 | 
 89 | Now that you've created a rule to complete redaction of the demo images, let's use that rule set.
 90 | 
 91 | Click the folder icon in Step 3 (Rulesets) to open the file navigator.
 92 | 
 93 | ![Custom ruleset file navigator](./images/step_3_ruleset_select_ruleset.png)
 94 | 
 95 | Navigate to the custom rule set you created in step 6 and select it. The rule set you select in this step will be composed with the base rule set provided by ImageDePHI. If a tag appears in both the base rules and the custom rule set, the custom rule will be applied instead of the base rule.
 96 | 
 97 | The table should update to reflect that the program now knows how to redact tag `55500`, and each image should have a green checkmark icon in the "Redaction Status" column.
 98 | 
 99 | ![Image grid showing no errors](./images/image_grid_success_ui.png)
100 | 
101 | #### 8. Redact the Demo Images
102 | 
103 | All that's left to do is click redact! Click the button that says "De-PHI Images." You'll see a progress bar that indicates how much time is left in the redaction process.
104 | 
105 | ![Image redaction indicated by a progress bar](./images/redaction_progress_ui.png)
106 | 
107 | Once that succeeds, you'll see a toast notification at the bottom of the screen indicating that the images have been redacted successfully.
108 | 
109 | ![Redaction complete notification](./images/redaction_complete_ui.png)
110 | 
111 | You'll find a new directory in the location you selected as your output directory. This new directory will have a name starting with "Redacted_" and ending with a timestamp of when you started redacting images. It will contain redacted images. Adjacent to that directory will be a manifest file mapping input file names to output file names. If there were any issues during redaction, those would be reported in the manifest file as well.
112 | 
113 | ## Using the CLI
114 | 
115 | If you would prefer to use the CLI to redact the images, follow this section to walk through the same example using that tool instead of the UI. Make sure the follow the instructions at the top of this guide to get the demo data.
116 | 
117 | #### 1. Use the `plan` command
118 | 
119 | The `plan` command is one way to determine if the files you want to redact are able to be redacted. If not, the output of the `plan` command will help you discover what you'll need to do in order to redact your images. After obtaining the test data, run the following command:
120 | 
121 | ```bash
122 | imagedephi plan demo_files
123 | ```
124 | 
125 | You'll see in the output of that command that one of the files cannot be redacted. In order to find out why, you can run:
126 | 
127 | ```bash
128 | imagedephi plan demo_files/SEER_Mouse_1_17158543_demo.svs
129 | ```
130 | 
131 | Running the `plan` command on a single image will provide a detailed report of exactly how that particular image is redacted. To see this level of detail for all images in a directory, use the `-v` (verbose) option.
132 | 
133 | The ouput of the `plan` command for that particular image reveals that it contains a metadata item with tag `55500` with no corresponding rule.
134 | 
135 | #### 2. Create an override rule set
136 | 
137 | In order to redact the demo images, we'll need to give the program a rule it can use for tag `55500`. The mechanism we can use to do this is with an override, or custom, rule set.
138 | 
139 | ImageDePHI comes with a base set of rules that covers most commonly seen metadata tags for SVS and DICOM images. If your images contain metadata not covered by the base rules, you'll need a custom rule set.
140 | 
141 | For this demo, create a file called `custom_rules.yaml` add add the following:
142 | 
143 | ```yaml
144 | ---
145 | name: Custom Rules
146 | description: Custom ruleset used for the ImageDePHI demo.
147 | svs:
148 |     metadata:
149 |         '55500':
150 |             action: delete
151 | ```
152 | 
153 | We now have a ruleset to supplement the base rules and enable redaction of the demo images.
154 | 
155 | #### 4. Use the `plan` command with the override rule set
156 | 
157 | First, let's verify that our custom rule set works as intended. Run the following command:
158 | 
159 | ```bash
160 | imagedephi plan -R custom_rules.yaml demo_files
161 | ```
162 | 
163 | Note the message "3 images able to be redacted" in the output. This means all of the demo files can now be redacted.
164 | 
165 | #### 5. Use the `run` command to redact the images
166 | 
167 | The `run` command is very similar to `plan`, except it also needs to be told where to save the redacted files. This is done using the `-o` option. Run the following:
168 | 
169 | ```bash
170 | mkdir ./output_files
171 | imagedephi run -R custom_rules.yaml -o ./output_files demo_files
172 | ```
173 | 
174 | After that command finishes, you'll see a new directory in `./output_files` called `Redacted_<timestamp>` containing the redacted files.
175 | 
176 | You'll also see a file next to that directory called `Redacted_<timestamp>_manifest.csv`. This will contain a mapping of input file names to output file names, as well as any errors that may have occurred during redaction.
177 | 
178 | ### Using a command file in the CLI
179 | In some instances you may want to pass a command file to the CLI. For example you may have an long list of input files that would be cumbersome to type in a terminal.
180 | 
181 | For this demo create a file called `command_file.yaml` and add the following:
182 | 
183 | ```bash
184 | ---
185 | command: plan
186 | input_path:
187 |   - "demo_files"
188 | output_dir: ~/redacted_images
189 | ```
190 | Now run the following:
191 | 
192 | ```bash
193 | imagedephi plan -c command_file.yaml
194 | ```
195 | This option is supported by both the `plan` and `run` commands. Any option that can be added to these commands can also be added to the command file.
196 | 
197 | ```bash
198 | ---
199 | command: run
200 | input_paths:
201 |   - "demo_files"
202 | output_dir: /redacted_images
203 | recursive: True
204 | ```
205 |  **Please Note:** The command file is meant to supplement the command given in the terminal. Any option supplied in the terminal takes priority.
206 | 
207 | Additionally you can supply an unformatted yaml or text file with the `--file_list` option.
208 | 
209 | Create a file called `file_list.txt` and add the following:
210 | ```bash
211 | demo_files
212 | ```
213 | Now run the following:
214 | 
215 | ```bash
216 | imagedephi plan -f file_list.txt
217 | ```
218 | 
219 | 
220 | ## Next Steps
221 | 
222 | For more information about the ImageDePHI rules system, be sure to check out the [documention](../README.md).
223 | 
224 | ## Demo Data Citation
225 | ‘NCI SRP Mouse Tissue Whole Slide Images with Fake PHI/PII' data set, Version 1.0. Generated: December 29, 2021; Scanner: Leica Microsystems, Aperio AT2; Provided by: The National Cancer Institute (NCI) Surveillance Research Program (SRP).
226 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # ImageDePHI
  2 | ImageDePHI is an application to redact personal data (PHI) from whole slide images (WSIs).
  3 | 
  4 | > This project has been funded in whole or in part with Federal funds from the National Cancer Institute, National Institutes of Health, Department of Health and Human Services, under Contract No. 75N91022C00033
  5 | 
  6 | ## Installation
  7 | * Download the [latest ImageDePHI release](https://github.com/DigitalSlideArchive/ImageDePHI/releases/latest).
  8 | 
  9 | * Unzip the downloaded file, which will extract the executable named `imagedephi` (or `imagedephi.exe` on Windows).
 10 | 
 11 | * Please note that on Linux, only Ubuntu 20.04+ is supported.
 12 | 
 13 | ## Usage
 14 | For an in-depth walkthrough, check out the [demo](./docs/demo.md).
 15 | 
 16 | From a command line, execute the application to get full usage help.
 17 | 
 18 | Alternatively **on Windows only**, directly open `imagdephi.exe` in Windows Explorer to launch the ImageDePHI GUI.
 19 | 
 20 | If running on macOS, you may need to [add the executable to the list of trusted software](https://support.apple.com/guide/mac-help/apple-cant-check-app-for-malicious-software-mchleab3a043/mac) to launch ImageDePHI in the same way you would any other registered app.
 21 | 
 22 | # Rules
 23 | Image redaction is determined by a set of rules. By default, the base set of rules are used. These rules are provided by the `imagedephi` package and can be found [here](https://github.com/DigitalSlideArchive/ImageDePHI/blob/main/imagedephi/base_rules.yaml).
 24 | 
 25 | ## Rule Application
 26 | All runs of `imagedephi` use the provided base set of rules as a foundation. End users can use the ruleset framework to build custom rulesets that handle additional or custom metadata not covered by the base rules, or override the behavior of the base rule set.
 27 | 
 28 | Override rule sets can be specified by using the `-R my_ruleset.yaml` or `--override-rules my_ruleset.yaml` option. This option is available for both the `imagedephi run` and `imagedephi plan` commands. Override rules sets are not provided by `imagedephi`, and must de defined by the end user.
 29 | 
 30 | When `imagedephi` determines the steps to redact a file, it checks each piece of metadata in the file. For each piece of metadata found this way, it will first consult the override rule set, if present, for an applicable rule. If the override rule set does not contain a rule for that piece of metadata, the program will check the base ruleset.
 31 | 
 32 | If neither the override rule set or base rule set cover a piece of metadata, redaction will fail, and the program will list the metadata that it could not redact. There is no default behavior for unknown metadata.
 33 | 
 34 | ### Redaction Profiles
 35 | 
 36 | #### Strict Redaction
 37 | For whole slide image formats based on the tiff standard, `imagedephi` allows a strict type of redaction. Using the `--profile strict` option when calling `imagedephi` from the CLI will use this mode. In this mode, only tags strictly required by the tiff standard will remain, and all other metadata will be stripped from the images. For a full list of metadata tags that will remain after strict redaction, see the [minimum rules file](https://github.com/DigitalSlideArchive/ImageDePHI/blob/main/imagedephi/minimum_rules.yaml).
 38 | 
 39 | #### Fuzzing Dates and Times
 40 | Using the `--profile dates` option will replace dates, times, datetimes, and UTC offsets with values that semantically represent those things but with less precison than the original value. Dates will preserve the year, but the month and day will be set to January 1st. Times will be set to midnight and UTC offsets to +0000. Rules for this profile can be found in [modify_dates_rules.yaml](https://github.com/DigitalSlideArchive/ImageDePHI/blob/main/imagedephi/modify_dates_rules.yaml). For DICOM images, the [Attribute Confidentiality Profiles](https://dicom.nema.org/dicom/2013/output/chtml/part15/chapter_E.html) were used to determine which tags should be modified according to this profile.
 41 | 
 42 | 
 43 | ## Ruleset Format Overview
 44 | In order to read the base rules and build your own custom rule sets, it is important to understand the format in which rulesets are specified. Rulesets are defined by `.yaml` files (one ruleset per file), and are a dictionary with the following top-level tags: `name`, `description`, `output_file_name`, `tiff`, `svs`, and `dicom`.
 45 | 
 46 | ### Generic Properties
 47 | The following three properties belong to the rulesets themselves, and don't influence redaction behavior.
 48 | 
 49 | #### `name`
 50 | Provide a name for a ruleset. This is used by the `imagedephi plan` command to specify which ruleset is being used to redact a particular piece of metadata.
 51 | 
 52 | #### `description`
 53 | You can add a description to your custom rulesets. This is not used by the program, but can be helpful to communicate what cases your custom rulesets are designed for.
 54 | 
 55 | #### `output_file_name`
 56 | Specify how the output files should be named here. The base ruleset contains the value `study_slide`. In this case, if the input slides are named: `john_smith_lung.svs` and `john_smith_pancreas.svs`, the redacted output images will be named `study_slide_1.svs` and `study_slide_2.svs`.
 57 | 
 58 | ### Other Top-level Properties
 59 | 
 60 | #### `strict`
 61 | The `strict` property of rulesets is used to denote that ALL unspecified tags should be deleted. This is supported for `tiff` and `svs` files. An example of using the strict flag can be seen in the `minimum_rules.yaml` rule set.
 62 | 
 63 | ### File Format Rules
 64 | Redaction behavior is specified per file type. Currently pure `tiff` files, Aperio (`.svs`), and DICOM files are supported. Each image type has its own groups of data that can be redacted. For example, Aperio images have `tiff` metadata, certain associated images, and additional metadata specified in the `ImageDescription` tag. `svs` rulesets take the following shape:
 65 | 
 66 | 
 67 | ```yaml
 68 | svs:
 69 |     associated_images:
 70 |         ...
 71 |     metadata:
 72 |         ...
 73 |     image_description:
 74 |         ...
 75 | ```
 76 | 
 77 | Each group is a dictionary whose keys represent a way to identify a specific piece of metadata or specific associated image, and whose values are dictionaries that define redaction behavior. Each entry (key-value pair) in the dictionary is a "rule." Take the following `associated_image` rule from the base ruleset
 78 | 
 79 | ```yaml
 80 | svs:
 81 |     ...
 82 |     associated_images:
 83 |         label:
 84 |             action: replace
 85 |             replace_with: blank_image
 86 |     ...
 87 | ```
 88 | 
 89 | This describes how `imagedephi` handles `label` images for Aperio files by default. Since label images frequently contain PHI, but are required by the Aperio (.svs) format, they are replaced with a black square of the same size.
 90 | 
 91 | #### Image Rules
 92 | 
 93 | Image rules take the following form:
 94 | 
 95 | ```yaml
 96 | <image_key>:
 97 |     action:
 98 | ```
 99 | 
100 | Where `image_key` identifies a particular associated image. For a catch-all rule, use the key `default`.
101 | 
102 | Image rules can have the following actions:
103 | 
104 | * `replace`: Replace an image with another. If specified, a value for `replace_with` must also be provided
105 | * `keep`: Does nothing. The associated image matching this key will be included in the output file
106 | * `delete`: The image will not be included in the output file
107 | 
108 | For image rules, the only supported value of `replace_with` is `blank_image`.
109 | 
110 | #### Metadata Rules
111 | 
112 | Metadata rules take the following form:
113 | 
114 | ```yaml
115 | <metadata_key>:
116 |     action:
117 | ```
118 | 
119 | Where `metadata_key` identifies a piece of metadata. Possible values for this key depend on the type of metadata being redacted. For example, rules listed under
120 | 
121 | ```yaml
122 | tiff:
123 |     metadata:
124 | ```
125 | have `metadata_keys` for particular tiff tags (e.g. `ImageDescription`, `ImageWidth`).
126 | 
127 | Available actions for metadata rules are:
128 | 
129 | * `delete`: the metadata will not appear in the output file
130 | * `keep`: the metadata will appear unchanged in the output file
131 | * `replace`: replace the metadata with a specified value. If this is the `action`, additional fields are required.
132 | * `check_type`: This will either keep the metadata if the type matches or delete the metadata if the type does not match. Requires additional fields
133 | * `modify_date`: This will fuzz dates, times, datetimes, and time zone offsets. See the "Profiles" section for more details.
134 | 
135 | ##### `replace` rules
136 | Require the additional property `replace_with`. The value specified by the `replace_with` key will be used to override the metadata in the output image.
137 | 
138 | ##### `check_type` rules
139 | Use the additional properties:
140 | * `expected_type`: one of `integer`, `number`, `text`, `rational`
141 | * `expected_count` (optional): if the piece of metadata can contain multiple values, specify how many are expected using this property. Defaults to `1`. If the `expected_type` is `rational`, this should be the expected number of rationals. That is, an `expected_count` of 1 would match with 2 integer values in the metadata.
142 | 
143 | ### Supported Formats
144 | Currently, `imagedephi` supports redaction of the following types of files:
145 | * TIFF
146 | * Aperio (a tiff-like format, typically uses the extension `.svs`)
147 | * DICOM
148 | 
149 | #### Tiff
150 | Tiff rules have the following shape:
151 | 
152 | ```yaml
153 | tiff:
154 |     associated_images:
155 |         ...
156 |     metadata:
157 |         ...
158 | ```
159 | 
160 | The keys for the `metadata` rules are the names of tiff tags defined by the tiff standard.
161 | 
162 | #### Aperio
163 | Aperio format rules have the following shape:
164 | 
165 | ```yaml
166 | svs:
167 |     associated_images:
168 |         ...
169 |     metadata:
170 |         ...
171 |     image_description:
172 |         ...
173 | ```
174 | 
175 | The keys for the `metadata` rules are the names of tiff tags defined by the tiff standard. Names are case insensitive and common variations are accepted, e.g. `GrayResponseUnit` and `GreyResponseUnit` are both accepted
176 | 
177 | For Aperio files, additional metadata is stored as key-value pairs in the `ImageDescription` tag. See more information about this [here](https://openslide.org/formats/aperio/). Each key in the `image_description` section is a key found in this `ImageDescription` string.
178 | 
179 | #### DICOM
180 | DICOM format rules are much the same:
181 | 
182 | ```yaml
183 | dicom:
184 |     associated_images:
185 |         ...
186 |     custom_metadata_action: ...
187 |     metadata:
188 |         ...
189 | ```
190 | 
191 | Note that here there is an eplicit format-level setting for dealing with custom metadata. Any tag with an odd group number is considered custom metadata. This can be set to `keep`, `delete` or `use_rule`.
192 | 
193 | * `keep`: Retain the custom metadata value after redaction. Rules for custom tags specified in the `metadata` section take precedence over this setting.
194 | * `delete`: Delete the custom metadata tag from the image. Rules fro custom tags specified in the `metadata` section take precedence over this setting.
195 | * `use_rule`: This mode will fall back to rules specified for each piece of custom metadata in the `metadata` section of the rule set. If a custom metadata tag with no corresponding rule is encountered, the image will not be redacted, as the redaction plan would be considered incomplete.
196 | 
197 | Additionally, DICOM redaction supports additional redaction operations.
198 | 
199 | * `empty`: Replace the tag's value with `None`.
200 | * `replace_dummy`: Replace the tag's value with a dummy value, which is dependant on the original value type. For example, if the tag's value is a string, the dummy value is the empty string. If the tag's value is an integer, the dummy value is 0.
201 | * `replace_uid`: If the tag's value is a UID, it will be replaced with a randomly generated UID of the form `"2.25.<uuid>"` where `<uuid>` is a UUID generated a run time. The new custom UID is stored by Image DePHI and used to replace other UIDs that share the same initial value. This way, if a UID is used in different tags within an image, they all get the same replacement value.
202 | 
203 | ## Related Projects
204 | 
205 | Other efforts related to anonimyzing medical images include:
206 | 
207 | - [`dicom-anonymizer`](https://github.com/KitwareMedical/dicom-anonymizer): A python tool for anonymizing DICOM files
208 | - [WSI DeID](https://github.com/DigitalSlideArchive/DSA-WSI-DeID): A workflow built onto the [Digital Slide Archive](https://github.com/DigitalSlideArchive/digital_slide_archive/?tab=readme-ov-file#digital-slide-archive) for redacting medical images.
209 | 


--------------------------------------------------------------------------------
/imagedephi/redact/dicom.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | from collections.abc import Generator
  4 | from datetime import date, datetime
  5 | from pathlib import Path
  6 | from typing import TYPE_CHECKING
  7 | from uuid import uuid4
  8 | 
  9 | import pydicom
 10 | from pydicom import valuerep
 11 | from pydicom.datadict import keyword_for_tag
 12 | from pydicom.dataelem import DataElement
 13 | from pydicom.dataset import Dataset
 14 | from pydicom.tag import BaseTag
 15 | 
 16 | from imagedephi.rules import (
 17 |     ConcreteMetadataRule,
 18 |     DeleteRule,
 19 |     DicomRules,
 20 |     FileFormat,
 21 |     KeepRule,
 22 |     MetadataReplaceRule,
 23 |     RedactionOperation,
 24 | )
 25 | from imagedephi.utils.logger import logger
 26 | 
 27 | from .redaction_plan import RedactionPlan
 28 | 
 29 | if TYPE_CHECKING:
 30 |     from .redaction_plan import RedactionPlanReport
 31 | 
 32 | 
 33 | VR_TO_DUMMY_VALUE: dict[str, str | float | int | list | bytes] = {}
 34 | for vr in valuerep.STR_VR:
 35 |     VR_TO_DUMMY_VALUE[vr] = ""
 36 | for vr in valuerep.FLOAT_VR:
 37 |     VR_TO_DUMMY_VALUE[vr] = 0.0
 38 | for vr in valuerep.INT_VR:
 39 |     VR_TO_DUMMY_VALUE[vr] = 0
 40 | for vr in valuerep.LIST_VR:
 41 |     VR_TO_DUMMY_VALUE[vr] = []
 42 | for vr in valuerep.BYTES_VR:
 43 |     VR_TO_DUMMY_VALUE[vr] = b""
 44 | 
 45 | VR_TO_EXPECTED_TYPE: dict[str, type] = {}
 46 | for vr in valuerep.STR_VR:
 47 |     VR_TO_EXPECTED_TYPE[vr] = str
 48 | for vr in valuerep.FLOAT_VR:
 49 |     VR_TO_EXPECTED_TYPE[vr] = float
 50 | for vr in valuerep.INT_VR:
 51 |     VR_TO_EXPECTED_TYPE[vr] = int
 52 | for vr in valuerep.LIST_VR:
 53 |     VR_TO_EXPECTED_TYPE[vr] = list
 54 | for vr in valuerep.BYTES_VR:
 55 |     VR_TO_EXPECTED_TYPE[vr] = bytes
 56 | 
 57 | WSI_IMAGE_TYPE_INDEX = 2
 58 | 
 59 | 
 60 | class DicomRedactionPlan(RedactionPlan):
 61 |     """
 62 |     Represents a plan of action for redacting metadata from DICOM images.
 63 | 
 64 |     Each instance of this class works on a single .dcm file.
 65 |     """
 66 | 
 67 |     file_format = FileFormat.DICOM
 68 |     image_path: Path
 69 |     dicom_data: pydicom.FileDataset
 70 |     image_type: str
 71 |     metadata_redaction_steps: dict[int, ConcreteMetadataRule]
 72 |     no_match_tags: list[BaseTag]
 73 |     uid_map: dict[str, str]
 74 | 
 75 |     @staticmethod
 76 |     def _iter_dicom_elements(
 77 |         dicom_dataset: Dataset,
 78 |     ) -> Generator[tuple[DataElement, Dataset], None, None]:
 79 |         for element in dicom_dataset:
 80 |             if element.VR == valuerep.VR.SQ:
 81 |                 for dataset in element.value:
 82 |                     yield from DicomRedactionPlan._iter_dicom_elements(dataset)
 83 |                 # Treat the sequence as its own element as well.
 84 |                 # Some of the rules generated from the DICOM docs
 85 |                 # include rules for sequences.
 86 |                 # Return the sequence after to protect against deletion while looping.
 87 |                 yield element, dicom_dataset
 88 |             else:
 89 |                 yield element, dicom_dataset
 90 | 
 91 |     def __init__(self, image_path: Path, rules: DicomRules, uid_map: dict[str, str] | None) -> None:
 92 |         self.image_path = image_path
 93 |         self.dicom_data = pydicom.dcmread(image_path)
 94 |         self.image_type = str(self.dicom_data.ImageType[WSI_IMAGE_TYPE_INDEX])
 95 | 
 96 |         self.metadata_redaction_steps = {}
 97 |         self.no_match_tags = []
 98 | 
 99 |         # Determine what, if any, action to take with this file's
100 |         # image data. Currently only matters for label and overview
101 |         # images.
102 |         self.associated_image_rule = rules.associated_images.get(self.image_type.lower(), None)
103 | 
104 |         # When redacting many files at a time, keep track of all UIDs across all files,
105 |         # since the DICOM format uses separate files for different resolutions and
106 |         # associated images.
107 |         self.uid_map = uid_map if uid_map else {}
108 | 
109 |         for element, _ in DicomRedactionPlan._iter_dicom_elements(self.dicom_data):
110 |             custom_metadata_key = "CustomMetadataItem"
111 |             keyword = keyword_for_tag(element.tag)
112 |             # Check keyword and (gggg,eeee) representation
113 |             tag_in_rules = keyword in rules.metadata or str(element.tag) in rules.metadata
114 |             if not tag_in_rules:
115 |                 # For custom metadata, attempt to fall back to the custom_metadata_action (this can
116 |                 # be overriden by rules for individual tags). If the custom metadata action is to
117 |                 # use the rules, skip generating these on-the-fly rules.
118 |                 if element.tag.group % 2 == 1 and rules.custom_metadata_action != "use_rule":
119 |                     if rules.custom_metadata_action == "delete":
120 |                         self.metadata_redaction_steps[element.tag] = DeleteRule(
121 |                             key_name=custom_metadata_key, action="delete"
122 |                         )
123 |                     elif rules.custom_metadata_action == "keep":
124 |                         self.metadata_redaction_steps[element.tag] = KeepRule(
125 |                             key_name=custom_metadata_key, action="keep"
126 |                         )
127 |                 else:
128 |                     self.no_match_tags.append(element.tag)
129 |                 continue
130 | 
131 |             rule_key = keyword if keyword in rules.metadata else str(element.tag)
132 |             rule = rules.metadata[rule_key]
133 |             if rule.action in [
134 |                 "keep",
135 |                 "delete",
136 |                 "replace",
137 |                 "check_type",
138 |                 "empty",
139 |                 "replace_uid",
140 |                 "replace_dummy",
141 |                 "modify_date",
142 |             ]:
143 |                 self.metadata_redaction_steps[element.tag] = rule
144 |             else:
145 |                 self.no_match_tags.append(element.tag)
146 |                 continue
147 | 
148 |     def passes_type_check(self, element: DataElement) -> bool:
149 |         return isinstance(element.value, VR_TO_EXPECTED_TYPE[element.VR])
150 | 
151 |     def determine_redaction_operation(
152 |         self, rule: ConcreteMetadataRule, element: DataElement
153 |     ) -> RedactionOperation:
154 |         if rule.action == "check_type":
155 |             return "keep" if self.passes_type_check(element) else "delete"
156 |         if rule.action in [
157 |             "keep",
158 |             "delete",
159 |             "replace",
160 |             "replace_uid",
161 |             "replace_dummy",
162 |             "empty",
163 |             "modify_date",
164 |         ]:
165 |             return rule.action
166 |         return "delete"
167 | 
168 |     def report_plan(self) -> RedactionPlanReport:
169 |         logger.debug("DICOM Metadata Redaction Plan\n")
170 |         if self.associated_image_rule:
171 |             if self.associated_image_rule.action == "delete":
172 |                 logger.info(
173 |                     f"This image is a DICOM {self.image_type}."
174 |                     "This file will not be written to the output directory."
175 |                 )
176 |                 return {}
177 |         report: RedactionPlanReport = {}
178 |         report[self.image_path.name] = {}
179 |         for element, _ in DicomRedactionPlan._iter_dicom_elements(self.dicom_data):
180 |             rule = self.metadata_redaction_steps.get(element.tag, None)
181 |             if rule:
182 |                 operation = self.determine_redaction_operation(rule, element)
183 |                 logger.debug(f"DICOM Tag {element.tag} - {rule.key_name}: {operation}")
184 |                 report[self.image_path.name][f"{element.tag}_{rule.key_name}"] = {
185 |                     "action": operation,
186 |                     "value": element.value,
187 |                 }
188 |         self.report_missing_rules(report)
189 |         return report
190 | 
191 |     def _get_modified_date(self, element: DataElement) -> str | None:
192 |         """
193 |         Return a fuzzy date, time, or UTC offset based on the value in the given date element.
194 | 
195 |         Given a DICOM data element of type DA (date), DT (datetime), TM (time), or SH
196 |         (specifically representing a UTC offset), return a value for the element to hold
197 |         that conforms with preserving some degree of information for these fields. For
198 |         example, dates are set to January first of the same year.
199 | 
200 |         Tags that are treated specially for this mode of redaction are documented here:
201 |         https://dicom.nema.org/dicom/2013/output/chtml/part15/chapter_E.html#table_E.1-1
202 |         """
203 |         if element.VR == valuerep.VR.DA.value:
204 |             old_date = valuerep.DA(element.value)
205 |             return str(valuerep.DA(date(year=old_date.year, month=1, day=1))) if old_date else None
206 |         elif element.VR == valuerep.VR.DT.value:
207 |             old_datetime = valuerep.DT(element.value)
208 |             return (
209 |                 str(valuerep.DT(datetime(year=old_datetime.year, month=1, day=1)))
210 |                 if old_datetime
211 |                 else None
212 |             )
213 |         elif element.VR == valuerep.VR.TM.value:
214 |             # Change time to midnight, drop precision below hour
215 |             return "00"
216 |         elif element.VR == valuerep.VR.SH.value:
217 |             # element.VR == "SH"
218 |             # For UTC offset, change to +0000 (no offset)
219 |             return "+0000"
220 |         return None
221 | 
222 |     def apply(self, rule: ConcreteMetadataRule, element: DataElement, dataset: Dataset):
223 |         operation = self.determine_redaction_operation(rule, element)
224 |         if operation == "delete":
225 |             # TODO make sure this works as expected, we are modifying a dataset
226 |             # while looping through it
227 |             del dataset[element.tag]
228 |         elif operation == "replace":
229 |             assert isinstance(rule, MetadataReplaceRule)
230 |             element.value = rule.new_value
231 |         elif operation == "empty":
232 |             element.value = None
233 |         elif operation == "replace_uid":
234 |             if element.value not in self.uid_map:
235 |                 new_uid = "2.25." + str(uuid4().int)
236 |                 self.uid_map[element.value] = str(new_uid)
237 |             element.value = self.uid_map[element.value]
238 |         elif operation == "replace_dummy":
239 |             element.value = VR_TO_DUMMY_VALUE[element.VR]
240 |         elif operation == "modify_date":
241 |             element.value = self._get_modified_date(element)
242 | 
243 |     def execute_plan(self) -> None:
244 |         if self.associated_image_rule:
245 |             if self.associated_image_rule.action != "delete":
246 |                 raise NotImplementedError(
247 |                     "Only 'delete' is supported for associated DICOM images at this time."
248 |                 )
249 |         for element, dataset in DicomRedactionPlan._iter_dicom_elements(self.dicom_data):
250 |             rule = self.metadata_redaction_steps[element.tag]
251 |             if rule is not None:
252 |                 self.apply(rule, element, dataset)
253 | 
254 |     def is_comprehensive(self) -> bool:
255 |         return not self.no_match_tags
256 | 
257 |     def report_missing_rules(self, report=None) -> None:
258 |         if self.is_comprehensive():
259 |             logger.info("The redaction plan is comprehensive.")
260 |             if report:
261 |                 report[self.image_path.name]["comprehensive"] = True
262 |         else:
263 |             logger.error(
264 |                 f"{self.image_path} - The following tags could not be redacted "
265 |                 "given the current set of rules."
266 |             )
267 |             if report is not None:
268 |                 report[self.image_path.name]["missing_tags"] = []
269 |                 report[self.image_path.name]["comprehensive"] = False
270 | 
271 |             for tag in self.no_match_tags:
272 |                 logger.error(f"Missing tag (dicom): {tag} - {keyword_for_tag(tag)}")
273 |                 if report is not None:
274 |                     report[self.image_path.name]["missing_tags"].append({tag: keyword_for_tag(tag)})
275 | 
276 |     def save(self, output_path: Path, overwrite: bool) -> None:
277 |         if self.associated_image_rule and self.associated_image_rule.action == "delete":
278 |             # Don't write this file to the output directory if it is marked to be deleted
279 |             return
280 |         if output_path.exists():
281 |             if overwrite:
282 |                 logger.info(f"Found existing redaction for {self.image_path.name}. Overwriting...")
283 |             else:
284 |                 logger.warn(
285 |                     f"Could not redact {self.image_path.name}, existing redacted file in output "
286 |                     "directory. Use the --overwrite-existing-output flag to overwrite previously "
287 |                     "redacted fiels."
288 |                 )
289 |                 return
290 |         self.dicom_data.save_as(output_path)
291 | 


--------------------------------------------------------------------------------
/imagedephi/redact/svs.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import binascii
  4 | from pathlib import Path
  5 | from typing import TYPE_CHECKING
  6 | 
  7 | import tifftools
  8 | import tifftools.constants
  9 | 
 10 | from imagedephi.rules import (
 11 |     ConcreteMetadataRule,
 12 |     FileFormat,
 13 |     MetadataReplaceRule,
 14 |     RedactionOperation,
 15 |     SvsRules,
 16 | )
 17 | from imagedephi.utils.logger import logger
 18 | 
 19 | from .tiff import TiffRedactionPlan
 20 | 
 21 | if TYPE_CHECKING:
 22 |     from tifftools.tifftools import IFD
 23 | 
 24 |     from .redaction_plan import RedactionPlanReport
 25 | 
 26 | 
 27 | class SvsDescription:
 28 |     prefix: str
 29 |     metadata: dict[str, str | int | float]
 30 | 
 31 |     def try_get_numeric_value(self, value: str) -> str | int | float:
 32 |         """Given an ImageDescription value, return a number version of it if applicable."""
 33 |         try:
 34 |             int(value)
 35 |             return int(value)
 36 |         except ValueError:
 37 |             try:
 38 |                 float(value)
 39 |                 return float(value)
 40 |             except ValueError:
 41 |                 return value
 42 | 
 43 |     def __init__(self, svs_description_string: str):
 44 |         description_components = svs_description_string.split("|")
 45 |         self.prefix = description_components[0]
 46 | 
 47 |         self.metadata = {}
 48 |         for metadata_component in description_components[1:]:
 49 |             key, value = [token.strip() for token in metadata_component.split("=")]
 50 |             self.metadata[key] = self.try_get_numeric_value(value)
 51 | 
 52 |     def __str__(self) -> str:
 53 |         components = [self.prefix]
 54 |         components = components + [
 55 |             " = ".join([key, str(self.metadata[key])]) for key in self.metadata.keys()
 56 |         ]
 57 |         return "|".join(components)
 58 | 
 59 | 
 60 | class MalformedAperioFileError(Exception):
 61 |     """Raised when the program cannot process an Aperio/SVS file as expected."""
 62 | 
 63 |     ...
 64 | 
 65 | 
 66 | class SvsRedactionPlan(TiffRedactionPlan):
 67 |     """
 68 |     Represents a plan of action for redacting files in Aperio (.svs) format.
 69 | 
 70 |     Redaction for this type of file is similar to redaction for .tiff files, as the
 71 |     formats are similar. However, Aperio images store additional information in its
 72 |     ImageDescription tags. As a result, this tag is treated specially here.
 73 |     """
 74 | 
 75 |     file_format = FileFormat.SVS
 76 |     description_redaction_steps: dict[str, ConcreteMetadataRule]
 77 |     no_match_description_keys: set[str]
 78 |     rules: SvsRules
 79 | 
 80 |     def __init__(
 81 |         self,
 82 |         image_path: Path,
 83 |         rules: SvsRules,
 84 |         strict: bool = False,
 85 |     ) -> None:
 86 |         self.rules = rules
 87 |         self.image_redaction_steps = {}
 88 |         self.description_redaction_steps = {}
 89 |         self.no_match_description_keys = set()
 90 |         super().__init__(image_path, rules, strict)
 91 | 
 92 |         # For strict mode redactions, treat Aperio (.svs) images as if they were
 93 |         # plain tiffs. Skip special handling of image description metadata.
 94 |         if not strict:
 95 |             image_description_tag = tifftools.constants.Tag["ImageDescription"]
 96 |             if image_description_tag.value not in self.metadata_redaction_steps:
 97 |                 raise MalformedAperioFileError()
 98 |             del self.metadata_redaction_steps[image_description_tag.value]
 99 | 
100 |             ifds = self.tiff_info["ifds"]
101 |             for tag, ifd in self._iter_tiff_tag_entries(ifds):
102 |                 if tag.value != image_description_tag.value:
103 |                     continue
104 | 
105 |                 svs_description = SvsDescription(str(ifd["tags"][tag.value]["data"]))
106 | 
107 |                 for key in svs_description.metadata.keys():
108 |                     key_rule = rules.image_description.get(key, None)
109 |                     if key_rule and self.is_match(key_rule, key):
110 |                         self.description_redaction_steps[key] = key_rule
111 |                     else:
112 |                         self.no_match_description_keys.add(key)
113 | 
114 |     def get_associated_image_key_for_ifd(self, ifd: IFD) -> str:
115 |         """
116 |         Given a associated image IFD, return its semantic type.
117 | 
118 |         An associated image IFD is one that contains non-tiled image data.
119 | 
120 |         This will return `"default`" if no semantics can be determined.
121 |         """
122 |         # Check image description, it may contain 'macro' or 'label'
123 |         image_description_tag = tifftools.constants.Tag["ImageDescription"]
124 |         if image_description_tag.value in ifd["tags"]:
125 |             image_description = str(ifd["tags"][image_description_tag.value]["data"])
126 |             for key in self.rules.associated_images:
127 |                 if key in image_description:
128 |                     return key
129 | 
130 |         # Check NewSubFileType bitmask. 'macro' could be encoded here
131 |         newsubfiletype_tag = tifftools.constants.Tag["NewSubfileType"]
132 |         if newsubfiletype_tag.value in ifd["tags"]:
133 |             newsubfiletype = ifd["tags"][newsubfiletype_tag.value]["data"][0]
134 |             reduced_image_bit = tifftools.constants.NewSubfileType["ReducedImage"].value
135 |             macro_bit = tifftools.constants.NewSubfileType["Macro"].value
136 |             if newsubfiletype & reduced_image_bit and newsubfiletype & macro_bit:
137 |                 return "macro"
138 |         return "default"
139 | 
140 |     def is_match(self, rule: ConcreteMetadataRule, data: tifftools.TiffTag | str) -> bool:
141 |         if rule.action in ["keep", "delete", "replace", "check_type", "modify_date"]:
142 |             if isinstance(data, tifftools.TiffTag):
143 |                 return super().is_match(rule, data)
144 |             return rule.key_name == data
145 |         return False
146 | 
147 |     def determine_redaction_operation(
148 |         self, rule: ConcreteMetadataRule, data: SvsDescription | IFD
149 |     ) -> RedactionOperation:
150 |         if isinstance(data, SvsDescription):
151 |             if rule.action == "check_type":
152 |                 value = data.metadata[rule.key_name]
153 |                 passes_check = self.passes_type_check(
154 |                     value, rule.valid_data_types, rule.expected_count
155 |                 )
156 |                 return "keep" if passes_check else "delete"
157 |             if rule.action in ["keep", "replace", "delete", "modify_date"]:
158 |                 return rule.action
159 |         else:
160 |             return super().determine_redaction_operation(rule, data)
161 |         return "delete"
162 | 
163 |     def apply(self, rule: ConcreteMetadataRule, data: SvsDescription | IFD) -> None:
164 |         if isinstance(data, SvsDescription):
165 |             redaction_operation = self.determine_redaction_operation(rule, data)
166 |             if redaction_operation == "delete":
167 |                 del data.metadata[rule.key_name]
168 |             elif redaction_operation == "replace":
169 |                 assert isinstance(rule, MetadataReplaceRule)
170 |                 data.metadata[rule.key_name] = rule.new_value
171 |             elif redaction_operation == "modify_date":
172 |                 # The "Date" field in the SVS desription appears to follow the format
173 |                 # MM/DD/YY
174 |                 if rule.key_name == "Date":
175 |                     try:
176 |                         current_value = str(data.metadata[rule.key_name])
177 |                         _, _, year = current_value.split("/")
178 |                         new_value = f"01/01/{year}"
179 |                     except Exception:
180 |                         new_value = None
181 |                 elif rule.key_name == "Time":
182 |                     new_value = "00:00:00"
183 |                 elif rule.key_name == "Time Zone":
184 |                     new_value = "GMT+0000"
185 |                 if not new_value:
186 |                     del data.metadata[rule.key_name]
187 |                 else:
188 |                     data.metadata[rule.key_name] = new_value
189 |             return
190 |         return super().apply(rule, data)
191 | 
192 |     def is_comprehensive(self) -> bool:
193 |         return super().is_comprehensive() and not self.no_match_description_keys
194 | 
195 |     def report_missing_rules(self, report=None) -> None:
196 |         if self.is_comprehensive():
197 |             logger.info("The redaction plan is comprehensive.")
198 |             if report:
199 |                 report[self.image_path.name]["comprehensive"] = True
200 |         else:
201 |             if self.no_match_tags:
202 |                 super().report_missing_rules(report)
203 |             if self.no_match_description_keys:
204 |                 logger.error(
205 |                     "The following keys were found in Aperio ImageDescription strings "
206 |                     "and could not be redacted given the current set of rules."
207 |                 )
208 |                 if report:
209 |                     report[self.image_path.name]["comprehensive"] = False
210 |                 for key in self.no_match_description_keys:
211 |                     logger.error(f"Missing key (Aperio ImageDescription): {key}")
212 |                     if report is not None:
213 |                         report[self.image_path.name]["missing_description_keys"].append(key)
214 | 
215 |     def report_plan(
216 |         self,
217 |     ) -> RedactionPlanReport:
218 |         logger.debug("Aperio (.svs) Metadata Redaction Plan\n")
219 |         offset = -1
220 |         ifd_count = 0
221 |         report: RedactionPlanReport = {}
222 |         report[self.image_path.name] = {}
223 |         for tag, ifd in self._iter_tiff_tag_entries(self.tiff_info["ifds"]):
224 |             if ifd["offset"] != offset:
225 |                 offset = ifd["offset"]
226 |                 ifd_count += 1
227 |                 logger.debug(f"IFD {ifd_count}:")
228 |             if tag.value == tifftools.constants.Tag["ImageDescription"] and not self.strict:
229 |                 image_description = SvsDescription(str(ifd["tags"][tag.value]["data"]))
230 |                 for key_name, _data in image_description.metadata.items():
231 |                     rule = self.description_redaction_steps[key_name]
232 |                     operation = self.determine_redaction_operation(rule, image_description)
233 |                     logger.debug(f"SVS Image Description - {key_name}: {operation}")
234 |                     report[self.image_path.name][key_name] = {"action": operation, "value": _data}
235 |                 continue
236 |             if tag.value not in self.no_match_tags:
237 |                 rule = self.metadata_redaction_steps[tag.value]
238 |                 operation = self.determine_redaction_operation(rule, ifd)
239 |                 logger.debug(f"Tiff Tag {tag.value} - {rule.key_name}: {operation}")
240 |                 if (
241 |                     ifd["tags"][tag.value]["datatype"]
242 |                     == tifftools.constants.Datatype.UNDEFINED.value
243 |                 ):
244 |                     encoded_value: dict[str, str | int] = {
245 |                         "value": f"0x{binascii.hexlify(ifd['tags'][tag.value]['data'] ).decode('utf-8')}",  # type: ignore # noqa: E501
246 |                         "bytes": len(ifd["tags"][tag.value]["data"]),
247 |                     }
248 |                     report[self.image_path.name][rule.key_name] = {
249 |                         "action": operation,
250 |                         "binary": encoded_value,
251 |                     }
252 |                 else:
253 |                     report[self.image_path.name][rule.key_name] = {
254 |                         "action": operation,
255 |                         "value": ifd["tags"][tag.value]["data"],
256 |                     }
257 |         self.report_missing_rules(report)
258 |         logger.debug("Aperio (.svs) Associated Image Redaction Plan\n")
259 |         # Report the number of associated images found in the image that match each associated
260 |         # image rule.
261 |         associated_image_count_by_rule = {}
262 |         for _, image_rule in self.image_redaction_steps.items():
263 |             if image_rule.key_name not in associated_image_count_by_rule:
264 |                 associated_image_count_by_rule[image_rule.key_name] = 1
265 |             else:
266 |                 associated_image_count_by_rule[image_rule.key_name] = (
267 |                     associated_image_count_by_rule[image_rule.key_name] + 1
268 |                 )
269 |         for key in associated_image_count_by_rule:
270 |             logger.debug(
271 |                 f"{associated_image_count_by_rule[key]} image(s) match rule:"
272 |                 f" {key} - {self.rules.associated_images[key].action}"
273 |             )
274 | 
275 |         return report
276 | 
277 |     def _redact_svs_image_description(self, ifd: IFD) -> None:
278 |         image_description_tag = tifftools.constants.Tag["ImageDescription"]
279 |         image_description = SvsDescription(str(ifd["tags"][image_description_tag.value]["data"]))
280 | 
281 |         # We may be modifying the dictionary as we iterate over its keys,
282 |         # hence the need for a list
283 |         for key in list(image_description.metadata.keys()):
284 |             rule = self.description_redaction_steps.get(key)
285 |             if rule is not None:
286 |                 self.apply(rule, image_description)
287 |         ifd["tags"][image_description_tag.value]["data"] = str(image_description)
288 | 
289 |     def execute_plan(self) -> None:
290 |         ifds = self.tiff_info["ifds"]
291 |         new_ifds = self._redact_associated_images(ifds)
292 |         image_description_tag = tifftools.constants.Tag["ImageDescription"]
293 |         for tag, ifd in self._iter_tiff_tag_entries(new_ifds):
294 |             rule = self.metadata_redaction_steps.get(tag.value)
295 |             if rule is not None:
296 |                 self.apply(rule, ifd)
297 |             elif tag.value == image_description_tag.value and not self.strict:
298 |                 self._redact_svs_image_description(ifd)
299 |         self.tiff_info["ifds"] = new_ifds
300 | 


--------------------------------------------------------------------------------