├── imagedephi ├── __init__.py ├── utils │ ├── __init__.py │ ├── logger.py │ ├── progress_log.py │ ├── image.py │ ├── network.py │ ├── dicom.py │ ├── os.py │ ├── cli.py │ └── tiff.py ├── gui │ ├── utils │ │ ├── __init__.py │ │ ├── constants.py │ │ ├── directory.py │ │ └── image.py │ ├── api │ │ ├── __init__.py │ │ └── api.py │ ├── __init__.py │ └── app.py ├── __main__.py ├── command_file.py ├── redact │ ├── __init__.py │ ├── redaction_plan.py │ ├── build_redaction_plan.py │ ├── dicom.py │ └── svs.py ├── logging.conf ├── demo_files.csv ├── minimum_rules.yaml └── rules.py ├── client ├── .yarnrc.yml ├── src │ ├── vite-env.d.ts │ ├── style.css │ ├── shims-vue.d.ts │ ├── App.vue │ ├── main.ts │ ├── store │ │ ├── redactionStore.ts │ │ ├── types.ts │ │ ├── imageStore.ts │ │ └── directoryStore.ts │ ├── components │ │ ├── ImageDataDisplay.vue │ │ ├── InfiniteScroller.vue │ │ ├── MenuSteps.vue │ │ ├── ImageDataTable.vue │ │ └── FileBrowser.vue │ ├── api │ │ └── rest.ts │ └── HomePage.vue ├── public │ ├── logo.png │ ├── associatedPlaceholder.svg │ └── thumbnailPlaceholder.svg ├── postcss.config.js ├── .env.development ├── vite.config.ts ├── tsconfig.node.json ├── README.md ├── .gitignore ├── index.html ├── tsconfig.json ├── eslint.config.mjs ├── tailwind.config.js └── package.json ├── .gitattributes ├── docs ├── images │ ├── initial_ui.png │ ├── image_grid_errors_ui.png │ ├── image_grid_success_ui.png │ ├── redaction_complete_ui.png │ ├── redaction_progress_ui.png │ ├── step_3_ruleset_select_ruleset.png │ ├── step_1_input_directory_open_browser.png │ ├── step_1_input_directory_select_directory.png │ └── step_2_output_directory_select_directory.png ├── development.md └── demo.md ├── .github ├── dependabot.yaml ├── zip_and_upload_package.sh └── workflows │ ├── release.yaml │ └── ci.yaml ├── .git-blame-ignore-revs ├── stubs └── tifftools │ ├── exceptions.pyi │ ├── __init__.pyi │ ├── tifftools.pyi │ └── constants.pyi ├── .editorconfig ├── tests ├── override_rule_sets │ └── example_user_rules.yaml ├── test_utils_network.py ├── test_utils_os.py ├── test_utils_cli.py ├── test_gui.py ├── conftest.py ├── test_e2e.py └── test_redact.py ├── pyproject.toml ├── tox.ini ├── .gitignore ├── LICENSE └── README.md /imagedephi/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /imagedephi/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /imagedephi/gui/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /client/.yarnrc.yml: -------------------------------------------------------------------------------- 1 | --- 2 | nodeLinker: pnp 3 | -------------------------------------------------------------------------------- /client/src/vite-env.d.ts: -------------------------------------------------------------------------------- 1 | /// 2 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | **/tests/data/** filter=lfs diff=lfs merge=lfs -text 2 | -------------------------------------------------------------------------------- /imagedephi/gui/utils/constants.py: -------------------------------------------------------------------------------- 1 | MAX_ASSOCIATED_IMAGE_SIZE = 160 2 | -------------------------------------------------------------------------------- /imagedephi/gui/api/__init__.py: -------------------------------------------------------------------------------- 1 | from .api import router 2 | 3 | __all__ = ["router"] 4 | -------------------------------------------------------------------------------- /client/src/style.css: -------------------------------------------------------------------------------- 1 | @tailwind base; 2 | @tailwind components; 3 | @tailwind utilities; 4 | -------------------------------------------------------------------------------- /client/public/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DigitalSlideArchive/ImageDePHI/HEAD/client/public/logo.png -------------------------------------------------------------------------------- /imagedephi/gui/__init__.py: -------------------------------------------------------------------------------- 1 | from .app import app, shutdown_event 2 | 3 | __all__ = ["app", "shutdown_event"] 4 | -------------------------------------------------------------------------------- /client/src/shims-vue.d.ts: -------------------------------------------------------------------------------- 1 | declare module "*.vue" { 2 | import Vue from "vue"; 3 | export default Vue; 4 | } 5 | -------------------------------------------------------------------------------- /docs/images/initial_ui.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DigitalSlideArchive/ImageDePHI/HEAD/docs/images/initial_ui.png -------------------------------------------------------------------------------- /imagedephi/__main__.py: -------------------------------------------------------------------------------- 1 | from imagedephi import main 2 | 3 | if __name__ == "__main__": 4 | main.imagedephi() 5 | -------------------------------------------------------------------------------- /client/postcss.config.js: -------------------------------------------------------------------------------- 1 | export default { 2 | plugins: { 3 | tailwindcss: {}, 4 | autoprefixer: {}, 5 | }, 6 | }; 7 | -------------------------------------------------------------------------------- /docs/images/image_grid_errors_ui.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DigitalSlideArchive/ImageDePHI/HEAD/docs/images/image_grid_errors_ui.png -------------------------------------------------------------------------------- /docs/images/image_grid_success_ui.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DigitalSlideArchive/ImageDePHI/HEAD/docs/images/image_grid_success_ui.png -------------------------------------------------------------------------------- /docs/images/redaction_complete_ui.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DigitalSlideArchive/ImageDePHI/HEAD/docs/images/redaction_complete_ui.png -------------------------------------------------------------------------------- /docs/images/redaction_progress_ui.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DigitalSlideArchive/ImageDePHI/HEAD/docs/images/redaction_progress_ui.png -------------------------------------------------------------------------------- /client/.env.development: -------------------------------------------------------------------------------- 1 | # This could leak to production. Should only be used in DEBUG mode for FastAPI 2 | VITE_APP_API_URL=http://127.0.0.1:8000 3 | -------------------------------------------------------------------------------- /docs/images/step_3_ruleset_select_ruleset.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DigitalSlideArchive/ImageDePHI/HEAD/docs/images/step_3_ruleset_select_ruleset.png -------------------------------------------------------------------------------- /client/src/App.vue: -------------------------------------------------------------------------------- 1 | 4 | 5 | 8 | -------------------------------------------------------------------------------- /client/public/associatedPlaceholder.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /docs/images/step_1_input_directory_open_browser.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DigitalSlideArchive/ImageDePHI/HEAD/docs/images/step_1_input_directory_open_browser.png -------------------------------------------------------------------------------- /.github/dependabot.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | version: 2 3 | updates: 4 | - package-ecosystem: "github-actions" 5 | directory: "/" 6 | schedule: 7 | interval: "weekly" 8 | -------------------------------------------------------------------------------- /docs/images/step_1_input_directory_select_directory.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DigitalSlideArchive/ImageDePHI/HEAD/docs/images/step_1_input_directory_select_directory.png -------------------------------------------------------------------------------- /docs/images/step_2_output_directory_select_directory.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DigitalSlideArchive/ImageDePHI/HEAD/docs/images/step_2_output_directory_select_directory.png -------------------------------------------------------------------------------- /.git-blame-ignore-revs: -------------------------------------------------------------------------------- 1 | # Reformat code to standardize double quoted strings 2 | 92c1ec85ab680851055c16f022ab9ce80c600e3c 3 | 4 | # Format yaml files 5 | 8846ba15625de7bfb68d40c4eb224c2c2d59dda2 6 | -------------------------------------------------------------------------------- /client/src/main.ts: -------------------------------------------------------------------------------- 1 | import { createApp } from "vue"; 2 | import App from "./App.vue"; 3 | import "./style.css"; 4 | import "remixicon/fonts/remixicon.css"; 5 | 6 | createApp(App).mount("#app"); 7 | -------------------------------------------------------------------------------- /imagedephi/command_file.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel 2 | 3 | 4 | # The only two required fields are command and input_paths 5 | class CommandFile(BaseModel): 6 | input_paths: list[str] 7 | -------------------------------------------------------------------------------- /client/vite.config.ts: -------------------------------------------------------------------------------- 1 | import { defineConfig } from "vite"; 2 | import vue from "@vitejs/plugin-vue"; 3 | 4 | // https://vitejs.dev/config/ 5 | export default defineConfig({ 6 | plugins: [vue()], 7 | }); 8 | -------------------------------------------------------------------------------- /stubs/tifftools/exceptions.pyi: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | class TifftoolsError(Exception): ... 4 | class UnknownTagError(TifftoolsError): ... 5 | class MustBeBigTiffError(TifftoolsError): ... 6 | -------------------------------------------------------------------------------- /imagedephi/redact/__init__.py: -------------------------------------------------------------------------------- 1 | from .redact import ProfileChoice, iter_image_dirs, redact_images, show_redaction_plan 2 | 3 | __all__ = ["iter_image_dirs", "redact_images", "show_redaction_plan", "ProfileChoice"] 4 | -------------------------------------------------------------------------------- /client/src/store/redactionStore.ts: -------------------------------------------------------------------------------- 1 | import { ref, Ref } from "vue"; 2 | 3 | export const redactionStateFlags: Ref> = ref({ 4 | redacting: false, 5 | redactionComplete: false, 6 | showImageTable: false, 7 | redactionSnackbar: false, 8 | }); 9 | -------------------------------------------------------------------------------- /client/tsconfig.node.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "composite": true, 4 | "skipLibCheck": true, 5 | "module": "ESNext", 6 | "moduleResolution": "bundler", 7 | "allowSyntheticDefaultImports": true 8 | }, 9 | "include": ["vite.config.ts"] 10 | } 11 | -------------------------------------------------------------------------------- /imagedephi/utils/logger.py: -------------------------------------------------------------------------------- 1 | import importlib.resources 2 | import logging 3 | import logging.config 4 | import os 5 | 6 | try: 7 | logging.config.fileConfig( 8 | "logging.conf" 9 | if os.path.exists("logging.conf") 10 | else str(importlib.resources.files("imagedephi") / "logging.conf") 11 | ) 12 | except (FileNotFoundError, KeyError): 13 | pass 14 | 15 | logger = logging.getLogger("root") 16 | -------------------------------------------------------------------------------- /imagedephi/logging.conf: -------------------------------------------------------------------------------- 1 | [loggers] 2 | keys=root 3 | 4 | [handlers] 5 | keys=consoleHandler 6 | 7 | [formatters] 8 | keys=messageFormatter 9 | 10 | [logger_root] 11 | level=WARNING 12 | handlers=consoleHandler 13 | qualname=root 14 | propagate=0 15 | 16 | [handler_consoleHandler] 17 | class=StreamHandler 18 | args=(sys.stdout,) 19 | formatter=messageFormatter 20 | 21 | [formatter_messageFormatter] 22 | format=%(asctime)s - %(levelname)s - %(message)s 23 | -------------------------------------------------------------------------------- /client/README.md: -------------------------------------------------------------------------------- 1 | ## Development 2 | For efficient front end developement in the project root run: 3 | 4 | ```bash 5 | export DEBUG=True 6 | hypercorn --reload imagedephi.gui.app:app 7 | ``` 8 | 9 | In a new terminal: 10 | 11 | ```bash 12 | cd client/ 13 | yarn dev 14 | ``` 15 | 16 | **Note** 17 | `imagedephi gui` will break and tests will fail in debug mode. Remember to reset variable when done with development. 18 | 19 | ```bash 20 | export DEBUG=False 21 | ``` 22 | -------------------------------------------------------------------------------- /client/.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | npm-debug.log* 5 | yarn-debug.log* 6 | yarn-error.log* 7 | pnpm-debug.log* 8 | lerna-debug.log* 9 | 10 | 11 | dist 12 | dist-ssr 13 | *.local 14 | 15 | # Editor directories and files 16 | .vscode/* 17 | !.vscode/extensions.json 18 | .idea 19 | .DS_Store 20 | *.suo 21 | *.ntvs* 22 | *.njsproj 23 | *.sln 24 | *.sw? 25 | 26 | # Yarn directories 27 | node_modules/* 28 | .pnp.* 29 | .yarn/* 30 | !.yarn/patches 31 | !.yarn/plugins 32 | !.yarn/releases 33 | !.yarn/versions 34 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | indent_style = space 5 | end_of_line = lf 6 | insert_final_newline = true 7 | trim_trailing_whitespace = true 8 | charset = utf-8 9 | 10 | [*.toml] 11 | indent_size = 2 12 | 13 | [*.py] 14 | indent_size = 4 15 | max_line_length = 100 16 | 17 | [*.js] 18 | indent_size = 2 19 | max_line_length = 100 20 | 21 | [{*.html,*.html.j2}] 22 | indent_size = 2 23 | 24 | [*.css] 25 | indent_size = 2 26 | 27 | [*.json] 28 | indent_size = 2 29 | 30 | [{*.yml,*.yaml}] 31 | indent_size = 2 32 | -------------------------------------------------------------------------------- /imagedephi/utils/progress_log.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import queue 3 | 4 | _progress_queue: queue.Queue[tuple] = queue.Queue(-1) 5 | 6 | 7 | def push_progress(count: int, max: int, redact_dir: Path) -> None: 8 | _progress_queue.put_nowait((count, max, redact_dir)) 9 | 10 | 11 | def get_next_progress_message() -> tuple | None: 12 | try: 13 | record = _progress_queue.get_nowait() 14 | except queue.Empty: 15 | return None 16 | else: 17 | # return record.message 18 | return record 19 | -------------------------------------------------------------------------------- /stubs/tifftools/__init__.pyi: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from .constants import Datatype, Tag, TiffDatatype, TiffTag 4 | from .exceptions import MustBeBigTiffError, TifftoolsError, UnknownTagError 5 | from .tifftools import read_tiff, write_tiff 6 | 7 | __version__: str 8 | 9 | __all__ = [ 10 | "Datatype", 11 | "TiffDatatype", 12 | "Tag", 13 | "TiffTag", 14 | "TifftoolsError", 15 | "UnknownTagError", 16 | "MustBeBigTiffError", 17 | "read_tiff", 18 | "write_tiff", 19 | "__version__", 20 | ] 21 | -------------------------------------------------------------------------------- /.github/zip_and_upload_package.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -ex 4 | 5 | runner_os=$1 6 | tag_name=$2 7 | 8 | cd dist/ 9 | 10 | if [ -f "./imagedephi.exe" ]; then 11 | executable="imagedephi.exe" 12 | else 13 | executable="imagedephi" 14 | fi 15 | 16 | chmod +x $executable 17 | zipfile="${runner_os}-imagedephi-cli.zip" 18 | 19 | if [[ "$runner_os" = "Windows" ]]; then 20 | powershell Compress-Archive $executable $zipfile 21 | else 22 | zip $zipfile $executable 23 | fi 24 | 25 | gh release upload \ 26 | $tag_name \ 27 | "${zipfile}#${runner_os} executable" \ 28 | --clobber 29 | -------------------------------------------------------------------------------- /imagedephi/demo_files.csv: -------------------------------------------------------------------------------- 1 | file_name,hash 2 | SEER_Mouse_1_17158543_demo.svs,sha512:0d8559ad29cf5ff3a735f1fbb76c5b1d7a575d9ba2c4c894229cf44de270fd6c94415fd71fe3dac70c86188e72a3bd559b24f5b2345cb4aa1910d575522330e2 3 | SEER_Mouse_10_17158610_demo.svs,sha512:6e458c081910a8918317a2d3fa4cfa2fc27c5f4c07f4550827dc36eb7f566dd6e2aaf551a2051668c75921f5aebf6c11e632d3380aac62588ae91b33fe399be4 4 | SEER_Mouse_13_17158639_demo.svs,sha512:1a73693d8a4c83f7f6146faf376daf9cff1b30c9b9ec48996bd1da2a6a3f81bdd9481eefae36da00ee5f684ff6f8b315dffe40ded20d2f67f9cb24e1c1d3a258 5 | README.txt,sha512:a312dae0db79701b27cd71b3fe3e13ca52415dbb51373b3edc604a575e59f8bb6e37f247ff9ba76ca3b4d5d56b38174f2bbbd9171d40582300e9589bcea14e54 6 | -------------------------------------------------------------------------------- /client/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | ImageDePHI 10 | 11 | 12 |
13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /client/src/store/types.ts: -------------------------------------------------------------------------------- 1 | export type DirectoryData = { 2 | directory: string; 3 | ancestors: Path[]; 4 | children: Path[]; 5 | childrenImages: Path[]; 6 | childrenYaml: Path[]; 7 | }; 8 | 9 | export interface ImagePlanParams { 10 | directory: string; 11 | rules?: string; 12 | limit?: number; 13 | offset?: number; 14 | update?: boolean; 15 | } 16 | 17 | export type imagePlanResponse = { 18 | data: Record>; 19 | total: number; 20 | tags: string[]; 21 | missing_rules: boolean; 22 | }; 23 | 24 | export interface Path { 25 | name: string; 26 | path: string; 27 | } 28 | export interface SelectedDirectories { 29 | [key: string]: string; 30 | } 31 | -------------------------------------------------------------------------------- /client/public/thumbnailPlaceholder.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 11 | 12 | 13 | 14 | 15 | No Preview 16 | Available 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /imagedephi/utils/image.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | from imagedephi.rules import FileFormat 4 | 5 | 6 | def get_file_format_from_path(image_path: Path) -> FileFormat | None: 7 | """ 8 | Attempt to determine the file type of an image by looking at its file signature. 9 | 10 | See https://en.wikipedia.org/wiki/List_of_file_signatures. In case of a "dual-flavor" DICOM 11 | file (i.e. a file that can be read as a DICOM or a tiff), prefer to report the image as 12 | DICOM. 13 | """ 14 | data = open(image_path, "rb").read(132) 15 | if data[128:] == b"DICM": 16 | return FileFormat.DICOM 17 | elif data[:4] in (b"II\x2a\x00", b"MM\x00\x2a", b"II\x2b\x00", b"MM\x00\x2b"): 18 | return FileFormat.TIFF 19 | return None 20 | -------------------------------------------------------------------------------- /tests/override_rule_sets/example_user_rules.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | name: Example user rules 3 | description: A set of reasonable rules used for testing 4 | output_file_name: my_study_slide 5 | tiff: 6 | associated_images: {} 7 | metadata: 8 | ImageDescription: 9 | action: replace 10 | new_value: Redacted by ImageDePHI 11 | YCbCrSubsampling: 12 | action: check_type 13 | expected_type: number 14 | expected_count: 2 15 | svs: 16 | associated_images: {} 17 | metadata: 18 | YCbCrSubsampling: 19 | action: check_type 20 | expected_type: number 21 | expected_count: 2 22 | image_description: 23 | ICC Profile: 24 | action: delete 25 | Filename: 26 | action: check_type 27 | expected_type: number 28 | dicom: 29 | metadata: {} 30 | -------------------------------------------------------------------------------- /client/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES2020", 4 | "useDefineForClassFields": true, 5 | "module": "ESNext", 6 | "lib": ["ES2020", "DOM", "DOM.Iterable"], 7 | "skipLibCheck": true, 8 | 9 | /* Bundler mode */ 10 | "moduleResolution": "bundler", 11 | "allowImportingTsExtensions": true, 12 | "resolveJsonModule": true, 13 | "isolatedModules": true, 14 | "noEmit": true, 15 | "jsx": "preserve", 16 | 17 | /* Linting */ 18 | "strict": true, 19 | "noUnusedLocals": true, 20 | "noUnusedParameters": true, 21 | "noFallthroughCasesInSwitch": true 22 | }, 23 | "include": ["src/**/*.ts", "src/**/*.d.ts", "src/**/*.tsx", "src/**/*.vue"], 24 | "references": [ 25 | { 26 | "path": "./tsconfig.node.json" 27 | } 28 | ], 29 | "exclude": ["node_modules"] 30 | } 31 | -------------------------------------------------------------------------------- /imagedephi/utils/network.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import socket 3 | 4 | 5 | async def wait_for_port(port: int, host: str = "127.0.0.1") -> None: 6 | """Block until a TCP port on the specified host can be opened.""" 7 | while True: 8 | try: 9 | _, writer = await asyncio.open_connection(host, port) 10 | except ConnectionRefusedError: 11 | pass 12 | else: 13 | writer.close() 14 | await writer.wait_closed() 15 | return 16 | 17 | 18 | def unused_tcp_port() -> int: 19 | with socket.socket() as sock: 20 | # Ensure the port can be immediately reused 21 | sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) 22 | # Specifying 0 as the port will select a dynamic ephimeral port 23 | sock.bind(("127.0.0.1", 0)) 24 | _, sock_port = sock.getsockname() 25 | return sock_port 26 | -------------------------------------------------------------------------------- /client/eslint.config.mjs: -------------------------------------------------------------------------------- 1 | import eslint from '@eslint/js'; 2 | import eslintConfigPrettier from 'eslint-config-prettier'; 3 | import eslintPluginVue from 'eslint-plugin-vue'; 4 | import globals from 'globals'; 5 | import typescriptEslint from 'typescript-eslint'; 6 | 7 | export default typescriptEslint.config( 8 | { ignores: ['*.d.ts', '.yarn/**', '.gitignore', '.pnp.*'] }, 9 | { 10 | extends: [ 11 | eslint.configs.recommended, 12 | ...typescriptEslint.configs.recommended, 13 | ...eslintPluginVue.configs['flat/recommended'], 14 | ], 15 | files: ['**/*.{ts,tsx,vue,js,jsx,cjs,mjs,cts,mts}'], 16 | 17 | languageOptions: { 18 | globals: globals.browser, 19 | ecmaVersion: 'latest', 20 | sourceType: "module", 21 | 22 | parserOptions: { 23 | parser: "@typescript-eslint/parser", 24 | }, 25 | }, 26 | }, 27 | eslintConfigPrettier 28 | ); 29 | -------------------------------------------------------------------------------- /imagedephi/redact/redaction_plan.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import abc 4 | from pathlib import Path 5 | from typing import TYPE_CHECKING 6 | 7 | from imagedephi.rules import FileFormat 8 | 9 | if TYPE_CHECKING: 10 | from tifftools.tifftools import TagData 11 | 12 | ByteInfo = dict[str, str | int] 13 | 14 | TagRedactionPlan = dict[str, int | float | TagData | ByteInfo] 15 | 16 | RedactionPlanReport = dict[str, dict[str, int | str | list[str] | TagRedactionPlan]] 17 | 18 | 19 | class RedactionPlan: 20 | file_format: FileFormat 21 | 22 | @abc.abstractmethod 23 | def report_plan(self) -> RedactionPlanReport: ... 24 | 25 | @abc.abstractmethod 26 | def execute_plan(self) -> None: ... 27 | 28 | @abc.abstractmethod 29 | def is_comprehensive(self) -> bool: 30 | """Return whether the plan redacts all metadata and/or images needed.""" 31 | ... 32 | 33 | @abc.abstractmethod 34 | def report_missing_rules(self, report=None) -> None: ... 35 | 36 | @abc.abstractmethod 37 | def save(self, output_path: Path, overwrite: bool) -> None: ... 38 | -------------------------------------------------------------------------------- /tests/test_utils_network.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from collections.abc import AsyncGenerator 3 | import socket 4 | 5 | import pytest 6 | import pytest_asyncio 7 | 8 | from imagedephi.utils.network import unused_tcp_port, wait_for_port 9 | 10 | 11 | @pytest_asyncio.fixture 12 | async def server(unused_tcp_port: int) -> AsyncGenerator[asyncio.Server, None]: 13 | def server_callback(reader: asyncio.StreamReader, writer: asyncio.StreamWriter) -> None: 14 | pass 15 | 16 | server = await asyncio.start_server(server_callback, "127.0.0.1", unused_tcp_port) 17 | async with server: 18 | yield server 19 | server.sockets[0] 20 | 21 | 22 | @pytest.mark.timeout(1) 23 | @pytest.mark.asyncio 24 | async def test_utils_network_wait_for_port(server: asyncio.Server) -> None: 25 | server_port = server.sockets[0].getsockname()[1] 26 | 27 | await wait_for_port(server_port) 28 | 29 | 30 | def test_utils_network_unused_tcp_port() -> None: 31 | port = unused_tcp_port() 32 | 33 | # This will raise an OSError if the port is already in use 34 | with socket.create_server(("127.0.0.1", port)) as sock: 35 | assert sock 36 | -------------------------------------------------------------------------------- /stubs/tifftools/tifftools.pyi: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from os import PathLike 4 | from typing import BinaryIO, Literal, NotRequired, TypeAlias, TypedDict 5 | 6 | _PathOrStream: TypeAlias = str | PathLike[str] | BinaryIO 7 | 8 | class TiffInfo(TypedDict): 9 | ifds: list[IFD] 10 | path_or_fobj: _PathOrStream 11 | size: int 12 | header: bytes 13 | bigEndian: bool 14 | bigtiff: bool 15 | endianPack: Literal[">", "<"] 16 | firstifd: int 17 | 18 | class IFD(TypedDict): 19 | offset: int 20 | tags: dict[int, TagEntry] 21 | path_or_fobj: _PathOrStream 22 | size: int 23 | bigEndian: bool 24 | bigtiff: bool 25 | tagcount: int 26 | 27 | TagData = str | bytes | list[int | float] 28 | 29 | class TagEntry(TypedDict): 30 | datatype: int 31 | count: int 32 | datapos: int 33 | offset: NotRequired[int] 34 | ifds: NotRequired[list[list[IFD]]] 35 | data: TagData 36 | 37 | def read_tiff(path: _PathOrStream) -> TiffInfo: ... 38 | def write_tiff( 39 | ifds: TiffInfo | IFD | list[IFD], 40 | path: _PathOrStream, 41 | bigEndian: bool | None = ..., 42 | bigtiff: bool | None = ..., 43 | allowExisting: bool = ..., 44 | ifdsFirst: bool = ..., 45 | ) -> None: ... 46 | -------------------------------------------------------------------------------- /docs/development.md: -------------------------------------------------------------------------------- 1 | # Development 2 | 3 | ## Installation 4 | To install for development: 5 | * [Create and activate a Python virtual environment](https://docs.python.org/3/library/venv.html). 6 | * Install for local development: 7 | ```bash 8 | pip install -e . 9 | ``` 10 | * Install [Tox](https://tox.wiki/) to run development tasks: 11 | ```bash 12 | pip install tox 13 | ``` 14 | 15 | ## Running the CLI 16 | With the virtual environment active, run the CLI: 17 | ```bash 18 | imagedephi 19 | ``` 20 | 21 | ### Development 22 | #### Requirements 23 | 24 | ```bash 25 | python ^3.11 26 | node ^20 27 | ``` 28 | 29 | #### Initial Install 30 | This project uses yarn modern. As such you'll need to enable corepack to detect the correct yarn version: 31 | 32 | ```bash 33 | cd /client 34 | corepack enable 35 | ``` 36 | 37 | 38 | #### Developing the Web GUI 39 | While developing the web GUI, it may be useful to launch web server 40 | that auto-reloads code changes and shows in-browser exception tracebacks: 41 | ```bash 42 | DEBUG=1 hypercorn --reload imagedephi.gui:app 43 | ``` 44 | 45 | ## Auto-format Code Changes: 46 | To format all code to comply with style rules: 47 | ```bash 48 | tox -e format 49 | ``` 50 | 51 | ## Running Tests 52 | To run all tests: 53 | ```bash 54 | tox 55 | ``` 56 | -------------------------------------------------------------------------------- /client/tailwind.config.js: -------------------------------------------------------------------------------- 1 | /* eslint-disable @typescript-eslint/no-require-imports */ 2 | /** @type {import('tailwindcss').Config} */ 3 | module.exports = { 4 | content: ["./index.html", "./src/**/*.{vue,js,ts}"], 5 | theme: { 6 | extend: { 7 | fontFamily: { 8 | sans: ["Roboto", "sans-serif"], 9 | }, 10 | colors: { 11 | primary: "#5A387C", 12 | 13 | secondary: "#00A6BF", 14 | 15 | accent: "#FF6A6A", 16 | 17 | neutral: "#201C35", 18 | 19 | "base-100": "#FFFFFF", 20 | 21 | info: "#3ABFF8", 22 | 23 | success: "#36D399", 24 | 25 | warning: "#FBBD23", 26 | 27 | error: "#F87272", 28 | 29 | secondaryContent: "#E8F2F3", 30 | }, 31 | }, 32 | }, 33 | plugins: [require("daisyui")], 34 | daisyui: { 35 | themes: [ 36 | { 37 | light: { 38 | 39 | ...require("daisyui/src/theming/themes")["light"], 40 | 41 | primary: "#5A387C", 42 | 43 | secondary: "#00A6BF", 44 | 45 | accent: "#FF6A6A", 46 | 47 | neutral: "#201C35", 48 | 49 | "base-100": "#FFFFFF", 50 | 51 | info: "#3ABFF8", 52 | 53 | success: "#36D399", 54 | 55 | warning: "#FBBD23", 56 | 57 | error: "#F87272", 58 | 59 | secondaryContent: "#E8F2F3", 60 | }, 61 | }, 62 | ], 63 | }, 64 | }; 65 | -------------------------------------------------------------------------------- /imagedephi/gui/utils/directory.py: -------------------------------------------------------------------------------- 1 | import os 2 | from pathlib import Path 3 | 4 | from imagedephi.redact import iter_image_dirs 5 | 6 | 7 | class DirectoryData: 8 | directory: Path 9 | ancestors: list[dict[str, str | Path]] 10 | child_directories: list[dict[str, str | Path]] 11 | child_images: list[dict[str, str | Path]] 12 | child_yaml_files: list[dict[str, str | Path]] 13 | 14 | def __init__(self, directory: Path): 15 | self.directory = directory 16 | 17 | self.ancestors = [ 18 | {"name": ancestor.name, "path": ancestor} for ancestor in reversed(directory.parents) 19 | ] 20 | self.ancestors.append({"name": directory.name, "path": directory}) 21 | 22 | self.child_directories = [ 23 | {"name": child.name, "path": child} 24 | for child in directory.iterdir() 25 | if child.is_dir() and os.access(child, os.R_OK) 26 | ] 27 | 28 | self.child_images = [ 29 | {"name": image.name, "path": image} for image in list(iter_image_dirs([directory])) 30 | ] 31 | self.child_yaml_files = [ 32 | {"name": yaml_file.name, "path": yaml_file} for yaml_file in _iter_yaml_files(directory) 33 | ] 34 | 35 | 36 | def _iter_yaml_files(directory: Path): 37 | for child in directory.iterdir(): 38 | if child.is_file() and child.suffix == ".yaml": 39 | yield child 40 | -------------------------------------------------------------------------------- /.github/workflows/release.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | name: release 3 | on: 4 | release: 5 | types: 6 | - published 7 | jobs: 8 | publish: 9 | runs-on: ${{ matrix.os }} 10 | strategy: 11 | matrix: 12 | os: 13 | # Use an older Linux: https://pyinstaller.org/en/stable/usage.html#making-gnu-linux-apps-forward-compatible 14 | - ubuntu-22.04 15 | - macos-latest 16 | - windows-latest 17 | steps: 18 | - uses: actions/checkout@v6 19 | with: 20 | # LFS data is not needed for release 21 | lfs: false 22 | # Tags are needed to compute the current version number 23 | fetch-depth: 0 24 | - name: Set up Python 25 | uses: actions/setup-python@v6 26 | with: 27 | python-version: "3.11" 28 | - name: Set up Node.js 29 | uses: actions/setup-node@v6 30 | with: 31 | node-version: "20" 32 | - name: Enable Corepack 33 | run: | 34 | corepack enable 35 | - name: Install tox 36 | run: | 37 | pip install --upgrade pip 38 | pip install tox 39 | - name: Build binary 40 | run: | 41 | tox -e binary 42 | - name: Zip and upload binary 43 | run: | 44 | .github/zip_and_upload_package.sh ${{ runner.os }} ${{ github.event.release.tag_name }} 45 | shell: bash 46 | env: 47 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 48 | -------------------------------------------------------------------------------- /client/src/components/ImageDataDisplay.vue: -------------------------------------------------------------------------------- 1 | 35 | 36 | 46 | -------------------------------------------------------------------------------- /client/src/components/InfiniteScroller.vue: -------------------------------------------------------------------------------- 1 | 24 | 33 | 60 | -------------------------------------------------------------------------------- /tests/test_utils_os.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | import pytest 4 | from pytest_mock import MockerFixture 5 | 6 | from imagedephi.utils.os import launched_from_frozen_binary, launched_from_windows_explorer 7 | 8 | 9 | @pytest.mark.parametrize("frozen", [False, True]) 10 | def test_utils_os_launched_from_frozen_binary(frozen: bool, mocker: MockerFixture) -> None: 11 | mocker.patch("sys.frozen", new=frozen, create=True) 12 | 13 | result = launched_from_frozen_binary() 14 | 15 | assert result is frozen 16 | 17 | 18 | @pytest.mark.skipif(sys.platform == "win32", reason="non-windows only") 19 | def test_utils_os_launched_from_windows_explorer_nonwindows() -> None: 20 | result = launched_from_windows_explorer() 21 | 22 | assert result is False 23 | 24 | 25 | @pytest.mark.skipif(sys.platform != "win32", reason="windows only") 26 | @pytest.mark.parametrize( 27 | "frozen,process_count,expected", 28 | [ 29 | (False, 1, True), 30 | (False, 3, False), 31 | (True, 2, True), 32 | (True, 3, False), 33 | ], 34 | ids=["non-frozen explorer", "non-frozen console", "frozen explorer", "frozen console"], 35 | ) 36 | def test_utils_os_launched_from_windows_explorer_windows( 37 | frozen: bool, process_count: int, expected: bool, mocker: MockerFixture 38 | ) -> None: 39 | mocker.patch("imagedephi.utils.os.launched_from_frozen_binary", return_value=frozen) 40 | mocker.patch("ctypes.windll.kernel32.GetConsoleProcessList", return_value=process_count) 41 | 42 | result = launched_from_windows_explorer() 43 | 44 | assert result is expected 45 | -------------------------------------------------------------------------------- /imagedephi/utils/dicom.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import re 3 | 4 | import pydicom 5 | 6 | extensions = { 7 | None: True, 8 | "dcm": True, 9 | "dic": True, 10 | "dicom": True, 11 | } 12 | 13 | 14 | def file_is_same_series_as(original_path: Path, path: Path) -> bool: 15 | """ 16 | Determine if path belongs to the same series as original_path. 17 | 18 | These heuristics match those defined in the large image DICOM source found at 19 | https://github.com/girder/large_image/blob/master/sources/dicom/large_image_source_dicom/__init__.py#L226. 20 | """ 21 | might_match = False 22 | if original_path.suffix not in extensions: 23 | if original_path.suffix == path.suffix or path.suffix in extensions: 24 | might_match = True 25 | if ( 26 | not might_match 27 | and re.match(r"^([1-9][0-9]*|0)(\.([1-9][0-9]*|0))+$", str(path)) 28 | and len(str(path)) <= 64 29 | ): 30 | might_match = True 31 | if not might_match and re.match(r"^DCM_\d+$", str(path)): 32 | might_match = True 33 | if might_match: 34 | original = pydicom.dcmread(original_path, stop_before_pixels=True) 35 | original_series_uid = original.data_element("SeriesInstanceUID") 36 | if original_series_uid: 37 | original_series_uid = original_series_uid.value 38 | slide_to_test = pydicom.dcmread(path, stop_before_pixels=True) 39 | slide_series_uid = slide_to_test.data_element("SeriesInstanceUID") 40 | return slide_series_uid is not None and slide_series_uid.value == original_series_uid 41 | return False 42 | -------------------------------------------------------------------------------- /client/src/api/rest.ts: -------------------------------------------------------------------------------- 1 | import { ImagePlanParams } from "../store/types"; 2 | 3 | const basePath = import.meta.env.VITE_APP_API_URL 4 | ? import.meta.env.VITE_APP_API_URL 5 | : ""; 6 | 7 | export async function getDirectoryInfo(path?: string) { 8 | const selectedPath = path ? path : "/"; 9 | const response = await fetch( 10 | `${basePath}/directory/?directory=${selectedPath}`, 11 | { 12 | method: "GET", 13 | mode: "cors", 14 | }, 15 | ); 16 | return response.json().then((data) => { 17 | return data[0].directory_data; 18 | }); 19 | } 20 | 21 | export async function getRedactionPlan(params: ImagePlanParams) { 22 | const response = await fetch( 23 | `${basePath}/redaction_plan?input_directory=${params.directory}&rules_path=${params.rules}&limit=${params.limit}&offset=${params.offset}&update=${params.update}`, 24 | { 25 | method: "GET", 26 | mode: "cors", 27 | }, 28 | ); 29 | return response.json().then((data) => { 30 | return data; 31 | }); 32 | } 33 | 34 | export async function redactImages( 35 | inputDirectory: string, 36 | outputDirectory: string, 37 | rules?: string, 38 | ) { 39 | const response = await fetch( 40 | `${basePath}/redact/?input_directory=${inputDirectory}&output_directory=${outputDirectory}&rules_path=${rules}`, 41 | { 42 | method: "POST", 43 | mode: "cors", 44 | }, 45 | ); 46 | return response; 47 | } 48 | 49 | export async function getImages(path: string, imageKey: string) { 50 | const response = await fetch( 51 | `${basePath}/image/?file_name=${path}&image_key=${imageKey}`, 52 | { 53 | method: "GET", 54 | mode: "cors", 55 | }, 56 | ); 57 | return response; 58 | } 59 | -------------------------------------------------------------------------------- /.github/workflows/ci.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | name: ci 3 | on: 4 | pull_request: 5 | push: 6 | tags: "*" 7 | branches: 8 | - main 9 | jobs: 10 | test: 11 | runs-on: ${{ matrix.os }} 12 | strategy: 13 | # Seeing whether a failure is exclusive to one platform is useful 14 | fail-fast: false 15 | matrix: 16 | os: 17 | # Use an older Linux: https://pyinstaller.org/en/stable/usage.html#making-gnu-linux-apps-forward-compatible 18 | - ubuntu-22.04 19 | - macos-14 20 | - windows-latest 21 | steps: 22 | - uses: actions/checkout@v6 23 | with: 24 | lfs: true 25 | # Tags are needed to compute the current version number 26 | fetch-depth: 0 27 | - name: Set up Python 28 | uses: actions/setup-python@v6 29 | with: 30 | python-version: "3.11" 31 | - name: Set up Node.js 32 | uses: actions/setup-node@v6 33 | with: 34 | node-version: "20" 35 | - name: Enable Corepack 36 | run: | 37 | corepack enable 38 | - name: Install tox 39 | run: | 40 | pip install --upgrade pip 41 | pip install tox 42 | - name: Run tests 43 | run: | 44 | tox 45 | - name: Build binary 46 | run: | 47 | tox -e binary 48 | - name: Test binary runs 49 | run: | 50 | dist/imagedephi --help 51 | - name: Upload binary artifact 52 | uses: actions/upload-artifact@v6 53 | with: 54 | name: imagedephi-${{ matrix.os }}-binary 55 | path: | 56 | dist/imagedephi 57 | dist/imagedephi.exe 58 | retention-days: 5 59 | if-no-files-found: error 60 | -------------------------------------------------------------------------------- /client/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "imagedephi", 3 | "private": true, 4 | "version": "0.0.0", 5 | "license": "Apache-2.0", 6 | "type": "module", 7 | "scripts": { 8 | "lint:tsc": "vue-tsc --noEmit", 9 | "lint:check": "eslint . --fix ", 10 | "lint:format": "prettier src --write", 11 | "lint": "npm-run-all lint:tsc lint:check lint:format", 12 | "dev": "NODE_ENV=development vite --port 8080", 13 | "build:clean": "rimraf ../imagedephi/web_static", 14 | "build:compile": "vite build --outDir ../imagedephi/web_static", 15 | "build": "npm-run-all build:clean build:compile" 16 | }, 17 | "dependencies": { 18 | "remixicon": "^3.6.0", 19 | "vue": "^3.3.4" 20 | }, 21 | "devDependencies": { 22 | "@eslint/eslintrc": "^3.2.0", 23 | "@eslint/js": "^9.17.0", 24 | "@typescript-eslint/eslint-plugin": "^8.19.1", 25 | "@typescript-eslint/parser": "^8.19.1", 26 | "@vitejs/plugin-vue": "^4.2.3", 27 | "autoprefixer": "^10.4.16", 28 | "daisyui": "^4.0.7", 29 | "eslint": "^9.17.0", 30 | "eslint-config-prettier": "^9.1.0", 31 | "eslint-plugin-vue": "^9.32.0", 32 | "globals": "^15.14.0", 33 | "npm-run-all": "^4.1.5", 34 | "postcss": "^8.4.31", 35 | "prettier": "^3.1.1", 36 | "rimraf": "^5.0.5", 37 | "tailwindcss": "^3.3.5", 38 | "typescript": "^5.3.3", 39 | "typescript-eslint": "^8.19.1", 40 | "vite": "^5.4.21", 41 | "vue-eslint-parser": "^9.4.3", 42 | "vue-tsc": "^2.2.0" 43 | }, 44 | "packageManager": "yarn@4.10.3+sha512.c38cafb5c7bb273f3926d04e55e1d8c9dfa7d9c3ea1f36a4868fa028b9e5f72298f0b7f401ad5eb921749eb012eb1c3bb74bf7503df3ee43fd600d14a018266f", 45 | "dependenciesMeta": { 46 | "remixicon@3.7.0": { 47 | "unplugged": true 48 | } 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /imagedephi/utils/os.py: -------------------------------------------------------------------------------- 1 | import ctypes 2 | import sys 3 | 4 | 5 | def launched_from_frozen_binary() -> bool: 6 | """Return whether the current program was launched within a frozen binary.""" 7 | # https://pyinstaller.org/en/stable/runtime-information.html#run-time-information 8 | return getattr(sys, "frozen", False) 9 | 10 | 11 | def launched_from_windows_explorer() -> bool: 12 | """Return whether the current program was launched directly from the Windows Explorer.""" 13 | # Using "platform.system()" is preferred: https://stackoverflow.com/a/58071295 14 | # However, this is not recognised by Mypy yet: https://github.com/python/mypy/issues/8166 15 | if sys.platform == "win32": 16 | # See https://devblogs.microsoft.com/oldnewthing/20160125-00/?p=92922 for this algorithm. 17 | # Contradicting the blog, the API docs 18 | # https://learn.microsoft.com/en-us/windows/console/getconsoleprocesslist 19 | # indicate that the "process_list" array may not be null. 20 | # Also "process_list" must have a size larger than 0, but its full content isn't needed. 21 | process_list_size = 1 22 | # Array elements should be DWORD, which is a uint 23 | process_list = (ctypes.c_uint * process_list_size)() 24 | process_count: int = ctypes.windll.kernel32.GetConsoleProcessList( 25 | process_list, process_list_size 26 | ) 27 | if process_count == 0: 28 | # TODO: Log this internally 29 | raise OSError("Could not detect Windows console.") 30 | # If frozen, the Pyinstaller bootloader is also running in this console: 31 | # https://pyinstaller.org/en/stable/advanced-topics.html#the-bootstrap-process-in-detail 32 | expected_solo_process_count = 2 if launched_from_frozen_binary() else 1 33 | return process_count == expected_solo_process_count 34 | else: 35 | return False 36 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["hatchling", "hatch-vcs"] 3 | build-backend = "hatchling.build" 4 | 5 | [project] 6 | name = "ImageDePHI" 7 | requires-python = ">=3.11" 8 | dependencies = [ 9 | "click", 10 | "tifftools", 11 | "fastapi", 12 | "python-multipart", 13 | "hypercorn", 14 | "pyyaml", 15 | "Pillow", 16 | "pooch", 17 | "pydicom", 18 | "tqdm", 19 | "wsidicom", 20 | "websockets", 21 | ] 22 | dynamic = ["version"] 23 | 24 | [project.scripts] 25 | imagedephi = "imagedephi.main:imagedephi" 26 | 27 | [tool.hatch.build] 28 | only-include = [ 29 | "imagedephi", 30 | ] 31 | artifacts = [ 32 | "imagedephi/web_static", 33 | ] 34 | 35 | [tool.hatch.version] 36 | source = "vcs" 37 | 38 | [tool.hatch.build.hooks.jupyter-builder] 39 | dependencies = ["hatch-jupyter-builder"] 40 | build-function = "hatch_jupyter_builder.npm_builder" 41 | ensured-targets = [ 42 | "imagedephi/web_static/index.html", 43 | ] 44 | # Allow development installs even if the client build is broken 45 | optional-editable-build = true 46 | 47 | [tool.hatch.build.hooks.jupyter-builder.build-kwargs] 48 | path = "client" 49 | build_cmd = "build" 50 | # Will only rebuild if source_dir was modified after build_dir 51 | source_dir = "client" 52 | build_dir = "imagedephi/web_static" 53 | 54 | [tool.black] 55 | line-length = 100 56 | target-version = ["py311"] 57 | 58 | [tool.isort] 59 | profile = "black" 60 | line_length = 100 61 | force_sort_within_sections = true 62 | combine_as_imports = true 63 | 64 | [tool.mypy] 65 | ignore_missing_imports = true 66 | show_error_codes = true 67 | mypy_path = "$MYPY_CONFIG_FILE_DIR/stubs" 68 | exclude = [ 69 | "build/", 70 | ] 71 | 72 | [tool.pytest.ini_options] 73 | addopts = "--strict-config --strict-markers --showlocals --verbose" 74 | testpaths = ["tests"] 75 | 76 | [tool.pyright] 77 | stubPath = "stubs" 78 | 79 | [tool.yamlfix] 80 | line_length = 200 81 | preserve_quotes = true 82 | sequence_style = "block_style" 83 | -------------------------------------------------------------------------------- /client/src/store/imageStore.ts: -------------------------------------------------------------------------------- 1 | import { reactive } from "vue"; 2 | import { imagePlanResponse, ImagePlanParams } from "./types"; 3 | import { getRedactionPlan, getImages } from "../api/rest"; 4 | import { selectedDirectories } from "./directoryStore"; 5 | import { redactionStateFlags } from "./redactionStore"; 6 | 7 | export const useRedactionPlan = reactive({ 8 | imageRedactionPlan: {} as imagePlanResponse, 9 | currentDirectory: selectedDirectories.value.inputDirectory, 10 | async updateImageData(params: ImagePlanParams) { 11 | this.currentDirectory = params.directory; 12 | this.imageRedactionPlan = await getRedactionPlan(params); 13 | this.getThumbnail(this.imageRedactionPlan.data); 14 | }, 15 | async getThumbnail(imagedict: Record>) { 16 | Object.keys(imagedict).forEach(async (image) => { 17 | const keys = ["thumbnail", "label", "macro"]; 18 | for (let kidx=0; kidx < keys.length; kidx += 1) { 19 | const key = keys[kidx]; 20 | const response = await getImages( 21 | this.currentDirectory + "/" + image, 22 | key, 23 | ); 24 | if (response.status >= 400) { 25 | this.imageRedactionPlan.data[image][key] = key === "thumbnail" ? "/thumbnailPlaceholder.svg" : "/associatedPlaceholder.svg"; 26 | return; 27 | } 28 | if (response.body) { 29 | const reader = response.body.getReader(); 30 | const chunks = []; 31 | 32 | while (true) { 33 | const { done, value } = await reader.read(); 34 | if (done) break; 35 | chunks.push(value); 36 | } 37 | const blob = new Blob(chunks); 38 | const url = URL.createObjectURL(blob); 39 | this.imageRedactionPlan.data[image][key]= url; 40 | } 41 | }; 42 | }); 43 | }, 44 | 45 | clearImageData() { 46 | this.imageRedactionPlan = {} as imagePlanResponse; 47 | }, 48 | }); 49 | 50 | export const updateTableData = (params: ImagePlanParams) => { 51 | redactionStateFlags.value.redactionSnackbar = false; 52 | useRedactionPlan.updateImageData(params); 53 | }; 54 | -------------------------------------------------------------------------------- /tests/test_utils_cli.py: -------------------------------------------------------------------------------- 1 | from inspect import iscoroutinefunction 2 | 3 | import click 4 | from click.testing import CliRunner 5 | import pytest 6 | from pytest_mock import MockerFixture 7 | 8 | from imagedephi.utils.cli import FallthroughGroup, run_coroutine 9 | 10 | 11 | def test_utils_cli_run_coroutine(mocker: MockerFixture) -> None: 12 | async_mock = mocker.AsyncMock() 13 | 14 | wrapped = run_coroutine(async_mock) 15 | 16 | assert not iscoroutinefunction(wrapped) 17 | wrapped(5, foo="bar") 18 | async_mock.assert_awaited_once_with(5, foo="bar") 19 | 20 | 21 | def test_utils_cli_fallthrough_group_baseline(mocker: MockerFixture, cli_runner: CliRunner) -> None: 22 | cmd = mocker.Mock() 23 | sub = mocker.Mock() 24 | should_fallthrough = mocker.Mock() 25 | # Decorators can't be used with mocks, so create the group and subcommands here 26 | cmd_group = FallthroughGroup( 27 | subcommand_name="sub", should_fallthrough=should_fallthrough, callback=cmd 28 | ) 29 | cmd_group.add_command(click.Command(name="sub", callback=sub)) 30 | 31 | # Explicitly invoke a subcommand 32 | result = cli_runner.invoke(cmd_group, ["sub"]) 33 | 34 | assert result.exit_code == 0 35 | cmd.assert_called_once() 36 | sub.assert_called_once() 37 | should_fallthrough.assert_not_called() 38 | assert "Usage" not in result.output 39 | 40 | 41 | @pytest.mark.parametrize("should_fallthrough", [False, True]) 42 | def test_utils_cli_fallthrough_group_empty( 43 | should_fallthrough: bool, mocker: MockerFixture, cli_runner: CliRunner 44 | ) -> None: 45 | cmd = mocker.Mock() 46 | sub = mocker.Mock() 47 | cmd_group = FallthroughGroup( 48 | subcommand_name="sub", should_fallthrough=lambda: should_fallthrough, callback=cmd 49 | ) 50 | cmd_group.add_command(click.Command(name="sub", callback=sub)) 51 | 52 | # No subcommand 53 | result = cli_runner.invoke(cmd_group, []) 54 | 55 | assert result.exit_code == 0 56 | assert cmd.called is should_fallthrough 57 | assert sub.called is should_fallthrough 58 | assert ("Usage" in result.output) is not should_fallthrough 59 | -------------------------------------------------------------------------------- /stubs/tifftools/constants.pyi: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from collections.abc import Generator 4 | from typing import Any, Generic, TypeAlias, TypeVar, overload 5 | 6 | # Anything can be set as a TiffConstant attribute 7 | _TiffConstantAttr: TypeAlias = Any 8 | 9 | class TiffConstant(int): 10 | value: int 11 | name: str 12 | def __init__(self, value: int, constantDict: dict[str, _TiffConstantAttr]) -> None: ... 13 | def __getitem__(self, key: str) -> _TiffConstantAttr: ... 14 | def get(self, key: str, default: _TiffConstantAttr = ...) -> _TiffConstantAttr: ... 15 | 16 | _TiffConstantT = TypeVar("_TiffConstantT", bound=TiffConstant) 17 | 18 | class TiffConstantSet(Generic[_TiffConstantT]): 19 | def __init__( 20 | self, setNameOrClass: _TiffConstantT | str, setDict: dict[str, _TiffConstantAttr] 21 | ) -> None: ... 22 | def __contains__(self, other: str | int) -> bool: ... 23 | def __getattr__(self, key: str) -> _TiffConstantT: ... 24 | def __getitem__(self, key: str | int | _TiffConstantT) -> _TiffConstantT: ... 25 | def __iter__(self) -> Generator[_TiffConstantT, None, None]: ... 26 | def get( 27 | self, key: str | int, default: _TiffConstantT | None = ... 28 | ) -> _TiffConstantT | None: ... 29 | 30 | class TiffTag(TiffConstant): 31 | def isOffsetData(self) -> bool: ... 32 | def isIFD(self) -> bool: ... 33 | 34 | Tag: TiffConstantSet[TiffTag] 35 | 36 | Compression: TiffConstantSet 37 | 38 | GPSTag: TiffConstantSet[TiffTag] 39 | 40 | EXIFTag: TiffConstantSet[TiffTag] 41 | 42 | NewSubfileType: TiffConstantSet 43 | 44 | class TiffDatatype(TiffConstant): ... 45 | 46 | Datatype: TiffConstantSet[TiffDatatype] 47 | 48 | # When tagSet is None or not provided, this returns a TiffTag 49 | @overload 50 | def get_or_create_tag( 51 | key: str | int, 52 | tagSet: None = ..., 53 | upperLimit: bool = ..., 54 | **tagOptions: _TiffConstantAttr, 55 | ) -> TiffTag: ... 56 | @overload 57 | def get_or_create_tag( 58 | key: str | int, 59 | tagSet: TiffConstantSet[_TiffConstantT], 60 | upperLimit: bool = ..., 61 | **tagOptions: _TiffConstantAttr, 62 | ) -> _TiffConstantT: ... 63 | -------------------------------------------------------------------------------- /tests/test_gui.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | from fastapi.testclient import TestClient 4 | import pytest 5 | 6 | from imagedephi.gui.app import app 7 | 8 | 9 | @pytest.fixture 10 | def client() -> TestClient: 11 | return TestClient(app) 12 | 13 | 14 | def test_gui_select_directory(client: TestClient) -> None: 15 | response = client.get(app.url_path_for("select_directory")) 16 | 17 | assert response.status_code == 200 18 | 19 | 20 | def test_gui_select_directory_success( 21 | client: TestClient, 22 | tmp_path: Path, 23 | ) -> None: 24 | response = client.get( 25 | app.url_path_for("select_directory"), 26 | params={"input_directory": str(tmp_path), "output_directory": str(tmp_path)}, 27 | ) 28 | 29 | assert response.status_code == 200 30 | 31 | 32 | def test_gui_select_directory_not_found( 33 | client: TestClient, 34 | tmp_path: Path, 35 | ) -> None: 36 | response = client.get( 37 | app.url_path_for("select_directory"), params={"directory": str(tmp_path / "fake")} 38 | ) 39 | 40 | assert response.status_code == 404 41 | assert response.json() == {"detail": "Input directory not found"} 42 | 43 | 44 | def test_gui_redact( 45 | client: TestClient, 46 | tmp_path: Path, 47 | ) -> None: 48 | response = client.post( 49 | app.url_path_for("redact"), 50 | params={"input_directory": str(tmp_path), "output_directory": str(tmp_path)}, 51 | ) 52 | 53 | assert response.status_code == 200 54 | 55 | 56 | def test_gui_redact_input_failure( 57 | client: TestClient, 58 | tmp_path: Path, 59 | ) -> None: 60 | response = client.post( 61 | app.url_path_for("redact"), 62 | params={"input_directory": str(tmp_path / "fake"), "output_directory": str(tmp_path)}, 63 | ) 64 | 65 | assert response.status_code == 404 66 | assert response.json() == {"detail": "Input directory not found"} 67 | 68 | 69 | def test_gui_redact_output_failure( 70 | client: TestClient, 71 | tmp_path: Path, 72 | ) -> None: 73 | response = client.post( 74 | app.url_path_for("redact"), 75 | params={"input_directory": str(tmp_path), "output_directory": str(tmp_path / "fake")}, 76 | ) 77 | 78 | assert response.status_code == 404 79 | assert response.json() == {"detail": "Output directory not found"} 80 | -------------------------------------------------------------------------------- /imagedephi/utils/cli.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from collections.abc import Callable, Coroutine 3 | from functools import wraps 4 | from typing import Any, ParamSpec, TypeVar 5 | 6 | import click 7 | 8 | T = TypeVar("T") 9 | P = ParamSpec("P") 10 | 11 | 12 | def run_coroutine(f: Callable[P, Coroutine[None, None, T]]) -> Callable[P, T]: 13 | """Decorate an async function to be run in a new event loop.""" 14 | 15 | @wraps(f) 16 | def wrapper(*args: P.args, **kwargs: P.kwargs) -> T: 17 | return asyncio.run(f(*args, **kwargs)) 18 | 19 | return wrapper 20 | 21 | 22 | class FallthroughGroup(click.Group): 23 | """A Group which may run a subcommand when no subcommand is specified.""" 24 | 25 | def __init__(self, subcommand_name: str, should_fallthrough: Callable[[], bool], **attrs: Any): 26 | # Subcommands are not added until after this is instantiated, 27 | # so only store the future subcommand name 28 | self.subcommand_name = subcommand_name 29 | self.should_fallthrough = should_fallthrough 30 | 31 | attrs["invoke_without_command"] = True 32 | attrs["no_args_is_help"] = False 33 | super().__init__(**attrs) 34 | 35 | def invoke(self, ctx: click.Context) -> Any: 36 | # If no subcommand is specified. 37 | # Use this test, since "ctx.invoked_subcommand" is not set yet. 38 | if not ctx.protected_args: 39 | if self.should_fallthrough(): 40 | # Subcommands are stored in "ctx.protected_args", so fake a call by prepending it 41 | # Calling "ctx.invoke" directly here would not allow the parent command to run 42 | ctx.protected_args.insert(0, self.subcommand_name) 43 | elif not ctx.resilient_parsing: 44 | # Execute the normal Click "no_args_is_help" behavior 45 | click.echo(ctx.get_help(), color=ctx.color) 46 | ctx.exit() 47 | elif ctx.protected_args and ctx.protected_args[0] not in self.commands: 48 | # If the subcommand stored in "ctx.protected_args" is not a real 49 | # subcommand, show the entire help text in addition to the "no such 50 | # command" mesasge. 51 | click.echo(f"Error: No such command: '{ctx.protected_args[0]}'.") 52 | click.echo(ctx.get_help(), color=ctx.color) 53 | ctx.exit() 54 | 55 | # All non-help cases reach here 56 | return super().invoke(ctx) 57 | -------------------------------------------------------------------------------- /imagedephi/redact/build_redaction_plan.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | from imagedephi.rules import FileFormat, Ruleset 4 | from imagedephi.utils.image import get_file_format_from_path 5 | from imagedephi.utils.tiff import get_is_svs 6 | 7 | from .dicom import DicomRedactionPlan 8 | from .redaction_plan import RedactionPlan 9 | from .svs import SvsRedactionPlan 10 | from .tiff import TiffRedactionPlan, UnsupportedFileTypeError 11 | 12 | 13 | class ImageDePHIRedactionError(Exception): 14 | """Thrown when the program encounters problems with current configuration and image files.""" 15 | 16 | 17 | def build_redaction_plan( 18 | image_path: Path, 19 | base_rules: Ruleset, 20 | override_rules: Ruleset | None = None, 21 | dcm_uid_map: dict[str, str] | None = None, 22 | ) -> RedactionPlan: 23 | file_format = get_file_format_from_path(image_path) 24 | strict = override_rules.strict if override_rules else base_rules.strict 25 | if file_format == FileFormat.TIFF: 26 | if get_is_svs(image_path): 27 | merged_svs_rules = base_rules.svs.copy() 28 | if override_rules: 29 | merged_svs_rules.metadata.update(override_rules.svs.metadata) 30 | merged_svs_rules.associated_images.update(override_rules.svs.associated_images) 31 | merged_svs_rules.image_description.update(override_rules.svs.image_description) 32 | return SvsRedactionPlan(image_path, merged_svs_rules, strict) 33 | else: 34 | merged_tiff_rules = base_rules.tiff.copy() 35 | if override_rules: 36 | merged_tiff_rules.metadata.update(override_rules.tiff.metadata) 37 | merged_tiff_rules.associated_images.update(override_rules.tiff.associated_images) 38 | return TiffRedactionPlan(image_path, merged_tiff_rules, strict) 39 | elif file_format == FileFormat.DICOM: 40 | if strict: 41 | raise ImageDePHIRedactionError( 42 | "strict redaction is not currently supported for DICOM images" 43 | ) 44 | dicom_rules = base_rules.dicom.copy() 45 | if override_rules: 46 | dicom_rules.metadata.update(override_rules.dicom.metadata) 47 | dicom_rules.custom_metadata_action = override_rules.dicom.custom_metadata_action 48 | dicom_rules.associated_images.update(override_rules.dicom.associated_images) 49 | return DicomRedactionPlan(image_path, dicom_rules, dcm_uid_map) 50 | else: 51 | raise UnsupportedFileTypeError(f"File format for {image_path} not supported.") 52 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | # Don't use "min_version", to ensure Tox 3 respects this 3 | minversion = 4 4 | env_list = 5 | lint, 6 | type, 7 | test, 8 | 9 | [testenv] 10 | base_python = python3.11 11 | # Building and installing wheels is significantly faster 12 | package = wheel 13 | 14 | [testenv:lint] 15 | package = skip 16 | deps = 17 | flake8 18 | flake8-black 19 | flake8-bugbear 20 | flake8-docstrings 21 | flake8-isort 22 | pep8-naming 23 | yamlfix 24 | commands = 25 | flake8 {posargs:.} 26 | yamlfix -c pyproject.toml --exclude ./client/node_modules/**/* client docs .github imagedephi stubs tests --check 27 | 28 | [testenv:format] 29 | package = skip 30 | deps = 31 | black 32 | isort 33 | yamlfix 34 | commands = 35 | isort {posargs:.} 36 | black {posargs:.} 37 | yamlfix -c pyproject.toml --exclude ./client/node_modules/**/* client docs .github imagedephi stubs tests 38 | 39 | [testenv:type] 40 | # Editable ensures dependencies are installed, but full packaging isn't necessary 41 | package = editable 42 | deps = 43 | mypy 44 | pytest 45 | types-PyYAML 46 | types-Pillow 47 | commands = 48 | mypy {posargs:.} 49 | 50 | [testenv:test] 51 | deps = 52 | freezegun 53 | # httpx is needed for FastApi testing 54 | httpx 55 | pytest 56 | pytest-asyncio 57 | pytest-mock 58 | pytest-timeout 59 | pooch 60 | commands = 61 | pytest tests {posargs} 62 | 63 | [testenv:binary] 64 | deps = 65 | pyinstaller 66 | commands = 67 | pyinstaller \ 68 | --clean \ 69 | --noconfirm \ 70 | --onefile \ 71 | --name imagedephi \ 72 | --recursive-copy-metadata imagedephi \ 73 | --collect-data imagedephi \ 74 | --collect-submodules pydicom.encoders \ 75 | --specpath {env_tmp_dir} \ 76 | --workpath {env_tmp_dir} \ 77 | {env_site_packages_dir}/imagedephi/__main__.py 78 | 79 | [flake8] 80 | max-line-length = 100 81 | show-source = true 82 | extend-exclude = 83 | .mypy_cache 84 | # Expect many developers to create a virtual environment here 85 | .venv 86 | .direnv 87 | client 88 | ignore = 89 | # closing bracket does not match indentation of opening bracket’s line 90 | E123, 91 | # whitespace before ':' 92 | E203, 93 | # line break before binary operator 94 | W503, 95 | # Missing docstring in * 96 | D10, 97 | # Multiple statements on one line (https://github.com/psf/black/issues/3887) 98 | E704, 99 | 100 | [yamlfix] 101 | line_length = 200 102 | preserve_quotes = True 103 | sequence_style = YamlNodeStyle.BLOCK_STYLE 104 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Build artifacts 2 | /imagedephi/web_static/ 3 | 4 | # Test data retrieved by pooch 5 | /tests/data/* 6 | 7 | # Developer tools 8 | .envrc 9 | .vscode/* 10 | */web_static/* 11 | 12 | # Sample Redacted Images 13 | */REDACTED_* 14 | */Redacted_* 15 | Redacted_*/ 16 | 17 | 18 | # Byte-compiled / optimized / DLL files 19 | __pycache__/ 20 | *.py[cod] 21 | *$py.class 22 | 23 | # C extensions 24 | *.so 25 | 26 | # Distribution / packaging 27 | .Python 28 | build/ 29 | develop-eggs/ 30 | dist/ 31 | downloads/ 32 | eggs/ 33 | .eggs/ 34 | lib/ 35 | lib64/ 36 | parts/ 37 | sdist/ 38 | var/ 39 | wheels/ 40 | pip-wheel-metadata/ 41 | share/python-wheels/ 42 | *.egg-info/ 43 | .installed.cfg 44 | *.egg 45 | MANIFEST 46 | 47 | # PyInstaller 48 | # Usually these files are written by a python script from a template 49 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 50 | *.manifest 51 | *.spec 52 | 53 | # Installer logs 54 | pip-log.txt 55 | pip-delete-this-directory.txt 56 | 57 | # Unit test / coverage reports 58 | htmlcov/ 59 | .tox/ 60 | .nox/ 61 | .coverage 62 | .coverage.* 63 | .cache 64 | nosetests.xml 65 | coverage.xml 66 | *.cover 67 | *.py,cover 68 | .hypothesis/ 69 | .pytest_cache/ 70 | 71 | # Translations 72 | *.mo 73 | *.pot 74 | 75 | # Django stuff: 76 | *.log 77 | local_settings.py 78 | db.sqlite3 79 | db.sqlite3-journal 80 | 81 | # Flask stuff: 82 | instance/ 83 | .webassets-cache 84 | 85 | # Scrapy stuff: 86 | .scrapy 87 | 88 | # Sphinx documentation 89 | docs/_build/ 90 | 91 | # PyBuilder 92 | target/ 93 | 94 | # Jupyter Notebook 95 | .ipynb_checkpoints 96 | 97 | # IPython 98 | profile_default/ 99 | ipython_config.py 100 | 101 | # pyenv 102 | .python-version 103 | 104 | # pipenv 105 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 106 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 107 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 108 | # install all needed dependencies. 109 | #Pipfile.lock 110 | 111 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 112 | __pypackages__/ 113 | 114 | # Celery stuff 115 | celerybeat-schedule 116 | celerybeat.pid 117 | 118 | # SageMath parsed files 119 | *.sage.py 120 | 121 | # Environments 122 | .env 123 | .venv 124 | env/ 125 | venv/ 126 | ENV/ 127 | env.bak/ 128 | venv.bak/ 129 | 130 | # Spyder project settings 131 | .spyderproject 132 | .spyproject 133 | 134 | # Rope project settings 135 | .ropeproject 136 | 137 | # mkdocs documentation 138 | /site 139 | 140 | # mypy 141 | .mypy_cache/ 142 | .dmypy.json 143 | dmypy.json 144 | 145 | # Pyre type checker 146 | .pyre/ 147 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | from click.testing import CliRunner 4 | import pooch 5 | import pytest 6 | 7 | registry = { 8 | "secret_metadata.tiff": "sha512:28be4880984bbb5f8e2a2e2314e594686427aa3da54e6a804ed5682e2da2585d21d3e3f751e995ad5b133a8b183cc7eb3cdee55cfcf5e4589d69c543db10fa9e", # noqa: E501 9 | "test_dcm_image.dcm": "sha512:4cb4c76799ce5d6f3f66c6d3bc055c5527b9fbef3d684450f48c870a5fbd882c87f3d59349ace778e80e78085276fecd278844af98a2324b754345aab6d6eac5", # noqa: E501 10 | "test_image.tif": "sha512:269d974a373f08e3274a1074fa595f8c70c496c64590412b6233eb2e3ce691c92963e5d0a3518848f15d9353783624faab18a002a9e049691cb5b627e40c9423", # noqa: E501 11 | "test_svs_image_blank.svs": "sha512:76350f39bafd86ced9d94b9a095200d3894034c36a3bc9a45a57a8f5ea820b8fd877ae483e9d9d4aef018cd8ca96c54ea815467ec94f6d692fca0ecc69afab23", # noqa: E501 12 | "test_svs_no_extension": "sha512:76350f39bafd86ced9d94b9a095200d3894034c36a3bc9a45a57a8f5ea820b8fd877ae483e9d9d4aef018cd8ca96c54ea815467ec94f6d692fca0ecc69afab23", # noqa: E501 13 | } 14 | 15 | 16 | def retrieve_file(file_name: str, output_path: Path) -> Path: 17 | algo, hash_value = registry[file_name].split(":") 18 | h = registry[file_name] 19 | full_path = pooch.retrieve( 20 | url=f"https://data.kitware.com/api/v1/file/hashsum/{algo}/{hash_value}/download", 21 | known_hash=h, 22 | fname=file_name, 23 | path=output_path, 24 | ) 25 | return Path(full_path) 26 | 27 | 28 | @pytest.fixture 29 | def secret_metadata_image() -> list[Path]: 30 | path_list = [retrieve_file("secret_metadata.tiff", Path(__file__).with_name("data") / "input")] 31 | return path_list 32 | 33 | 34 | @pytest.fixture 35 | def test_image_tiff() -> Path: 36 | return retrieve_file("test_image.tif", Path(__file__).with_name("data") / "input" / "tiff") 37 | 38 | 39 | @pytest.fixture 40 | def test_image_svs() -> Path: 41 | return retrieve_file( 42 | "test_svs_image_blank.svs", Path(__file__).with_name("data") / "input" / "svs" 43 | ) 44 | 45 | 46 | @pytest.fixture 47 | def test_image_dcm() -> list[Path]: 48 | path_list = [ 49 | retrieve_file("test_dcm_image.dcm", Path(__file__).with_name("data") / "input" / "dcm") 50 | ] 51 | return path_list 52 | 53 | 54 | @pytest.fixture 55 | def test_image_svs_no_extension() -> list[Path]: 56 | path_list = [retrieve_file("test_svs_no_extension", Path(__file__).with_name("data") / "input")] 57 | return path_list 58 | 59 | 60 | @pytest.fixture 61 | def data_dir() -> Path: 62 | return Path(__file__).with_name("data") 63 | 64 | 65 | @pytest.fixture 66 | def rules_dir() -> Path: 67 | return Path(__file__).with_name("override_rule_sets") 68 | 69 | 70 | @pytest.fixture 71 | def cli_runner() -> CliRunner: 72 | return CliRunner() 73 | -------------------------------------------------------------------------------- /imagedephi/gui/app.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from collections.abc import AsyncGenerator 3 | from contextlib import asynccontextmanager 4 | import importlib.resources 5 | import os 6 | 7 | from fastapi import FastAPI, Request 8 | from fastapi.middleware.cors import CORSMiddleware 9 | from fastapi.responses import PlainTextResponse 10 | from fastapi.staticfiles import StaticFiles 11 | from starlette.background import BackgroundTask 12 | 13 | from imagedephi.gui.api import api 14 | 15 | shutdown_event = asyncio.Event() 16 | debug_mode = eval(str(os.environ.get("DEBUG"))) 17 | 18 | 19 | @asynccontextmanager 20 | async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]: 21 | # Reset server state on startup, to support unit testing 22 | shutdown_event.clear() 23 | app.state.last_exception = None 24 | 25 | yield 26 | 27 | if app.state.last_exception is not None: 28 | # This will cause a "lifespan.shutdown.failed" event to be sent. Hypercorn will re-raise 29 | # this from "serve", allowing exceptions to propagate to the top level. 30 | raise app.state.last_exception # pyright: ignore [reportGeneralTypeIssues] 31 | 32 | 33 | app = FastAPI( 34 | lifespan=lifespan, 35 | # End users don't need access to the OpenAPI spec 36 | openapi_url="/openapi.json" if debug_mode else None, 37 | # FastAPI's debug flag will render exception tracebacks 38 | debug=debug_mode, 39 | ) 40 | 41 | app.include_router(api.router) # type: ignore 42 | 43 | if debug_mode: 44 | app.add_middleware( 45 | CORSMiddleware, 46 | allow_origins=["*"], 47 | allow_methods=["*"], 48 | allow_headers=["*"], 49 | ) 50 | 51 | else: 52 | app.mount( 53 | "/", 54 | StaticFiles( 55 | directory=str(importlib.resources.files("imagedephi") / "web_static"), html=True 56 | ), 57 | name="home", 58 | ) 59 | app.mount( 60 | "/assets", 61 | StaticFiles( 62 | directory=str(importlib.resources.files("imagedephi") / "web_static" / "assets") 63 | ), 64 | name="assets", 65 | ) 66 | 67 | 68 | # This exception handler not be used when FastAPI debug flag is enabled, 69 | # due to how ServerErrorMiddleware works. 70 | @app.exception_handler(500) 71 | def on_internal_error(request: Request, exc: Exception) -> PlainTextResponse: 72 | """Return an error response and schedule the server for immediate shutdown.""" 73 | # Unlike the default error response, this also shuts down the server. 74 | # A desktop application doesn't need to continue running through internal errors, and 75 | # continuing to run makes it harder for users and the test environment to detect fatal errors. 76 | app.state.last_exception = exc 77 | return PlainTextResponse( 78 | "Internal Server Error", status_code=500, background=BackgroundTask(shutdown_event.set) 79 | ) 80 | -------------------------------------------------------------------------------- /client/src/store/directoryStore.ts: -------------------------------------------------------------------------------- 1 | import { ref, Ref, nextTick } from "vue"; 2 | import { SelectedDirectories, DirectoryData, Path } from "./types"; 3 | import { getDirectoryInfo } from "../api/rest"; 4 | 5 | const storedDirectories = { 6 | inputDirectory: localStorage.getItem("inputDirectory"), 7 | outputDirectory: localStorage.getItem("outputDirectory"), 8 | rulesetDirectory: localStorage.getItem("rulesetDirectory"), 9 | }; 10 | 11 | export const selectedDirectories: Ref = ref({ 12 | inputDirectory: storedDirectories.inputDirectory 13 | ? storedDirectories.inputDirectory 14 | : "", 15 | outputDirectory: storedDirectories.outputDirectory 16 | ? storedDirectories.outputDirectory 17 | : "", 18 | rulesetDirectory: storedDirectories.rulesetDirectory 19 | ? storedDirectories.rulesetDirectory 20 | : "", 21 | }); 22 | 23 | export const directoryData: Ref = ref({ 24 | directory: "", 25 | ancestors: [], 26 | children: [], 27 | childrenImages: [], 28 | childrenYaml: [], 29 | }); 30 | 31 | export const loadingData = ref(false); 32 | 33 | export const updateDirectories = async (currentDirectory?: string) => { 34 | directoryData.value.children = []; 35 | directoryData.value.childrenImages = []; 36 | directoryData.value.childrenYaml = []; 37 | const timeout = setTimeout(() => { 38 | loadingData.value = true; 39 | }, 100); 40 | const data = await getDirectoryInfo(currentDirectory); 41 | clearTimeout(timeout); 42 | loadingData.value = false; 43 | directoryData.value = await { 44 | ...data, 45 | children: data.child_directories, 46 | childrenImages: data.child_images, 47 | childrenYaml: data.child_yaml_files, 48 | }; 49 | loadingData.value = false; 50 | calculateVisibleItems(); 51 | }; 52 | 53 | export const visibleImages: Ref = ref([]); 54 | export const remainingImages = ref(0); 55 | 56 | export const calculateVisibleItems = () => { 57 | const menuTop = document.querySelector(".menu-top"); 58 | const listContainer = document.querySelector(".list-container"); 59 | // Determine and set the height of the list container 60 | listContainer?.setAttribute( 61 | "style", 62 | `height: calc(100% - (${menuTop?.clientHeight}px + 3.5rem))`, 63 | ); 64 | 65 | nextTick(() => { 66 | const listItems = listContainer?.querySelectorAll("li"); 67 | const containerHeight = listContainer?.clientHeight; 68 | const listHeight = ref(0); 69 | const visibleItems = ref(0); 70 | // Determine the height of each list item 71 | const listItemHeight = 72 | listItems && listItems[0] ? listItems[0].clientHeight : 0; 73 | 74 | directoryData.value.childrenImages.forEach(() => { 75 | listHeight.value += listItemHeight; 76 | if (containerHeight && listHeight.value < containerHeight) { 77 | visibleItems.value += 1; 78 | } 79 | }); 80 | 81 | visibleImages.value = directoryData.value.childrenImages.slice( 82 | 0, 83 | visibleItems.value, 84 | ); 85 | remainingImages.value = 86 | directoryData.value.childrenImages.length - visibleItems.value; 87 | }); 88 | }; 89 | -------------------------------------------------------------------------------- /client/src/components/MenuSteps.vue: -------------------------------------------------------------------------------- 1 | 59 | 60 | 135 | -------------------------------------------------------------------------------- /imagedephi/minimum_rules.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | name: Minimum Rules 3 | description: A set of rules that defines a minimum amount of metadata for images to be read. Metadata not specified by a rule is deleted (controlled by the metadata_fallback_action). 4 | output_file_name: study_slide 5 | strict: true 6 | tiff: 7 | metadata_fallback_action: delete 8 | associated_images: 9 | default: 10 | action: delete 11 | metadata: 12 | ImageWidth: 13 | action: keep 14 | ImageLength: 15 | action: keep 16 | BitsPerSample: 17 | action: keep 18 | Compression: 19 | action: keep 20 | Photometric: 21 | action: keep 22 | StripOffsets: 23 | action: keep 24 | SamplesPerPixel: 25 | action: keep 26 | RowsPerStrip: 27 | action: keep 28 | StripByteCounts: 29 | action: keep 30 | XResolution: 31 | action: keep 32 | YResolution: 33 | action: keep 34 | ResolutionUnit: 35 | action: keep 36 | NewSubfileType: 37 | action: keep 38 | FillOrder: 39 | action: keep 40 | Orientation: 41 | action: keep 42 | Predictor: 43 | action: keep 44 | ColorMap: 45 | action: keep 46 | TileWidth: 47 | action: keep 48 | TileLength: 49 | action: keep 50 | TileOffsets: 51 | action: keep 52 | TileByteCounts: 53 | action: keep 54 | SubIFD: 55 | action: keep 56 | ExtraSamples: 57 | action: keep 58 | SampleFormat: 59 | action: keep 60 | Indexed: 61 | action: keep 62 | JPEGTables: 63 | action: keep 64 | StripRowCounts: 65 | action: keep 66 | ICCProfile: 67 | action: keep 68 | JPEGProc: 69 | action: keep 70 | JPEGIFOffset: 71 | action: keep 72 | JPEGIFByteCount: 73 | action: keep 74 | JPEGRestartInterval: 75 | action: keep 76 | JPEGLosslessPredictors: 77 | action: keep 78 | JPEGPointTransform: 79 | action: keep 80 | JPEGQTables: 81 | action: keep 82 | JPEGDCTables: 83 | action: keep 84 | JPEGACTables: 85 | action: keep 86 | YCbCrCoefficients: 87 | action: keep 88 | YCbCrSubsampling: 89 | action: keep 90 | YCbCrPositioning: 91 | action: keep 92 | PlanarConfig: 93 | action: keep 94 | svs: 95 | metadata_fallback_action: delete 96 | associated_images: 97 | default: 98 | action: delete 99 | metadata: 100 | ImageWidth: 101 | action: keep 102 | ImageLength: 103 | action: keep 104 | BitsPerSample: 105 | action: keep 106 | Compression: 107 | action: keep 108 | Photometric: 109 | action: keep 110 | StripOffsets: 111 | action: keep 112 | SamplesPerPixel: 113 | action: keep 114 | RowsPerStrip: 115 | action: keep 116 | StripByteCounts: 117 | action: keep 118 | XResolution: 119 | action: keep 120 | YResolution: 121 | action: keep 122 | ResolutionUnit: 123 | action: keep 124 | NewSubfileType: 125 | action: keep 126 | FillOrder: 127 | action: keep 128 | Orientation: 129 | action: keep 130 | Predictor: 131 | action: keep 132 | ColorMap: 133 | action: keep 134 | TileWidth: 135 | action: keep 136 | TileLength: 137 | action: keep 138 | TileOffsets: 139 | action: keep 140 | TileByteCounts: 141 | action: keep 142 | SubIFD: 143 | action: keep 144 | ExtraSamples: 145 | action: keep 146 | SampleFormat: 147 | action: keep 148 | Indexed: 149 | action: keep 150 | JPEGTables: 151 | action: keep 152 | StripRowCounts: 153 | action: keep 154 | ICCProfile: 155 | action: keep 156 | JPEGProc: 157 | action: keep 158 | JPEGIFOffset: 159 | action: keep 160 | JPEGIFByteCount: 161 | action: keep 162 | JPEGRestartInterval: 163 | action: keep 164 | JPEGLosslessPredictors: 165 | action: keep 166 | JPEGPointTransform: 167 | action: keep 168 | JPEGQTables: 169 | action: keep 170 | JPEGDCTables: 171 | action: keep 172 | JPEGACTables: 173 | action: keep 174 | YCbCrCoefficients: 175 | action: keep 176 | YCbCrSubsampling: 177 | action: keep 178 | YCbCrPositioning: 179 | action: keep 180 | PlanarConfig: 181 | action: keep 182 | -------------------------------------------------------------------------------- /client/src/components/ImageDataTable.vue: -------------------------------------------------------------------------------- 1 | 13 | 107 | 141 | -------------------------------------------------------------------------------- /imagedephi/rules.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | from typing import Annotated, Any, Literal, Type, TypeAlias 3 | 4 | from pydantic import BaseModel, Field, validator 5 | 6 | 7 | class FileFormat(Enum): 8 | TIFF = "tiff" 9 | SVS = "svs" 10 | DICOM = "dicom" 11 | 12 | 13 | expected_type_map: dict[str, list[Type[Any]]] = { 14 | "integer": [int], 15 | "number": [int, float], 16 | "text": [str], 17 | "rational": [int], 18 | } 19 | 20 | RedactionOperation: TypeAlias = Literal[ 21 | "keep", 22 | "delete", 23 | "replace", 24 | "empty", 25 | "replace_uid", 26 | "replace_dummy", 27 | "modify_date", 28 | ] 29 | 30 | 31 | class _Rule(BaseModel): 32 | # key_name is not set by users, but is availible internally 33 | key_name: str = Field(exclude=True) 34 | action: Literal[ 35 | "keep", 36 | "delete", 37 | "replace", 38 | "replace_uid", 39 | "replace_dummy", 40 | "empty", 41 | "check_type", 42 | "modify_date", 43 | ] 44 | 45 | 46 | class KeepRule(_Rule): 47 | action: Literal["keep"] 48 | 49 | 50 | class DeleteRule(_Rule): 51 | action: Literal["delete"] 52 | 53 | 54 | class EmptyRule(_Rule): 55 | """Replace with a zero-length value.""" 56 | 57 | action: Literal["empty"] 58 | 59 | 60 | class ReplaceRule(_Rule): 61 | action: Literal["replace"] 62 | 63 | 64 | class MetadataReplaceRule(ReplaceRule): 65 | new_value: str 66 | 67 | 68 | class ModifyDateRule(_Rule): 69 | action: Literal["modify_date"] 70 | 71 | 72 | class ImageReplaceRule(ReplaceRule): 73 | replace_with: Literal["blank_image"] 74 | 75 | 76 | class CheckTypeMetadataRule(_Rule): 77 | action: Literal["check_type"] 78 | expected_type: Literal["number", "integer", "text", "rational"] 79 | valid_data_types: list[Type[Any]] = [] 80 | expected_count: int = 1 81 | 82 | @validator("valid_data_types", pre=True, always=True) 83 | @classmethod 84 | def set_valid_data_types( 85 | cls, valid_data_types: list[Type[Any]], values: dict[str, Any] 86 | ) -> list[Type[Any]]: 87 | valid_data_types = expected_type_map[values["expected_type"]] 88 | return valid_data_types 89 | 90 | 91 | class UidReplaceRule(_Rule): 92 | action: Literal["replace_uid"] 93 | 94 | 95 | class DummyReplaceRule(_Rule): 96 | """Replace value with a system-defined value based on original type.""" 97 | 98 | action: Literal["replace_dummy"] 99 | 100 | 101 | ConcreteMetadataRule = Annotated[ 102 | MetadataReplaceRule 103 | | KeepRule 104 | | DeleteRule 105 | | CheckTypeMetadataRule 106 | | UidReplaceRule 107 | | EmptyRule 108 | | DummyReplaceRule 109 | | ModifyDateRule, 110 | Field(discriminator="action"), 111 | ] 112 | 113 | ConcreteImageRule = Annotated[ 114 | ImageReplaceRule | KeepRule | DeleteRule, Field(discriminator="action") 115 | ] 116 | 117 | 118 | class BaseRules(BaseModel): 119 | matches: list[str] 120 | 121 | 122 | class TiffRules(BaseModel): 123 | associated_images: dict[str, ConcreteImageRule] = {} 124 | metadata: dict[str, ConcreteMetadataRule] = {} 125 | metadata_fallback_action: Literal["delete"] | Literal["keep"] | None = None 126 | associated_image_fallback: ConcreteImageRule | None = None 127 | 128 | # TODO: is pre necessary? 129 | @validator("metadata", "associated_images", pre=True) 130 | @classmethod 131 | def set_tag_name(cls, metadata: Any): 132 | if isinstance(metadata, dict): 133 | for key, value in metadata.items(): 134 | if isinstance(value, dict): 135 | value["key_name"] = key 136 | return metadata 137 | 138 | 139 | class SvsRules(TiffRules): 140 | image_description: dict[str, ConcreteMetadataRule] = {} 141 | 142 | # TODO: is pre necessary? 143 | @validator("metadata", "image_description", "associated_images", pre=True) 144 | @classmethod 145 | def set_tag_name(cls, metadata: Any): 146 | if isinstance(metadata, dict): 147 | for key, value in metadata.items(): 148 | if isinstance(value, dict): 149 | value["key_name"] = key 150 | return metadata 151 | 152 | 153 | class DicomRules(BaseModel): 154 | metadata: dict[str, ConcreteMetadataRule] = {} 155 | associated_images: dict[str, ConcreteImageRule] = {} 156 | custom_metadata_action: Literal["keep"] | Literal["delete"] | Literal["use_rule"] = "delete" 157 | 158 | @validator("metadata", "associated_images", pre=True) 159 | @classmethod 160 | def set_tag_name(cls, metadata: Any): 161 | if isinstance(metadata, dict): 162 | for key, value in metadata.items(): 163 | if isinstance(value, dict): 164 | value["key_name"] = key 165 | return metadata 166 | 167 | 168 | class Ruleset(BaseModel): 169 | name: str = "My Rules" 170 | description: str = "My rules" 171 | output_file_name: str = "study_slide" 172 | strict: bool = False 173 | tiff: TiffRules = TiffRules() 174 | svs: SvsRules = SvsRules() 175 | dicom: DicomRules = DicomRules() 176 | -------------------------------------------------------------------------------- /imagedephi/utils/tiff.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from collections.abc import Generator 4 | from pathlib import Path 5 | from typing import TYPE_CHECKING 6 | 7 | import tifftools 8 | 9 | from imagedephi.gui.utils.image import IMAGE_DEPHI_MAX_IMAGE_PIXELS 10 | 11 | if TYPE_CHECKING: 12 | from tifftools.tifftools import IFD 13 | 14 | 15 | IMAGE_DESCRIPTION_ID = tifftools.constants.Tag["ImageDescription"].value 16 | NEWSUBFILETYPE_ID = tifftools.constants.Tag["NewSubfileType"].value 17 | 18 | 19 | def iter_ifds( 20 | ifds: list[IFD], 21 | tag_set=tifftools.constants.Tag, 22 | ) -> Generator[IFD, None, None]: 23 | for ifd in ifds: 24 | for tag_id, entry in ifd["tags"].items(): 25 | tag: tifftools.TiffTag = tifftools.constants.get_or_create_tag( 26 | tag_id, 27 | tagSet=tag_set, 28 | datatype=tifftools.Datatype[entry["datatype"]], 29 | ) 30 | if tag.isIFD(): 31 | # entry['ifds'] contains a list of lists 32 | # see tifftools.read_tiff 33 | for sub_ifds in entry.get("ifds", []): 34 | yield from iter_ifds(sub_ifds, tag.get("tagset")) 35 | yield ifd 36 | 37 | 38 | def is_tiled(ifd: IFD): 39 | """Determine if an IFD represents a tiled image.""" 40 | return tifftools.Tag.TileWidth.value in ifd["tags"] 41 | 42 | 43 | def get_tiff_tag(tag_name: str) -> tifftools.TiffTag: 44 | """Given the name of a TIFF tag, attempt to return the TIFF tag from tifftools.""" 45 | # This function checks TagSet objects from tifftools for a given tag. If the tag is not found 46 | # after exhausting the tag sets, a new tag is created. 47 | for tag_set in [ 48 | tifftools.constants.Tag, 49 | tifftools.constants.GPSTag, 50 | tifftools.constants.EXIFTag, 51 | ]: 52 | if tag_name in tag_set: 53 | return tag_set[tag_name] 54 | return tifftools.constants.get_or_create_tag(tag_name) 55 | 56 | 57 | def _get_macro(ifds: list[IFD]) -> IFD | None: 58 | key = "macro" 59 | for ifd in iter_ifds(ifds): 60 | if IMAGE_DESCRIPTION_ID in ifd["tags"]: 61 | if key in str(ifd["tags"][IMAGE_DESCRIPTION_ID]["data"]): 62 | return ifd 63 | if NEWSUBFILETYPE_ID in ifd["tags"]: 64 | newsubfiletype = ifd["tags"][NEWSUBFILETYPE_ID]["data"][0] 65 | if newsubfiletype == 9: 66 | return ifd 67 | return None 68 | 69 | 70 | def _get_label(ifds: list[IFD]) -> IFD | None: 71 | key = "label" 72 | for ifd in iter_ifds(ifds): 73 | if IMAGE_DESCRIPTION_ID in ifd["tags"]: 74 | if key in str(ifd["tags"][IMAGE_DESCRIPTION_ID]["data"]): 75 | return ifd 76 | # Check NewSubfileType/tiled or non tiled 77 | if not is_tiled(ifd) and NEWSUBFILETYPE_ID in ifd["tags"]: 78 | if ifd["tags"][NEWSUBFILETYPE_ID]["data"][0] == 1: 79 | return ifd 80 | return None 81 | 82 | 83 | def get_associated_image_svs(image_path: Path, image_key: str) -> IFD | None: 84 | """Given a path to an SVS image, return the IFD for a given associated label or macro image.""" 85 | if image_key not in ["macro", "label"]: 86 | raise ValueError("image_key must be one of macro, label") 87 | 88 | image_info = tifftools.read_tiff(image_path) 89 | ifds = image_info["ifds"] 90 | 91 | if "aperio" not in str(ifds[0]["tags"][IMAGE_DESCRIPTION_ID]["data"]).lower(): 92 | return None 93 | 94 | if image_key == "macro": 95 | return _get_macro(ifds) 96 | elif image_key == "label": 97 | return _get_label(ifds) 98 | return None 99 | 100 | 101 | def get_ifd_for_thumbnail(image_path: Path, thumbnail_width=0, thumbnail_height=0) -> IFD | None: 102 | """Given a path to a TIFF image, return the IFD for the lowest resolution tiled image.""" 103 | image_info = tifftools.read_tiff(image_path) 104 | 105 | candidate_width = float("inf") 106 | candidate_height = float("inf") 107 | candidate_ifd = None 108 | for ifd in iter_ifds(image_info["ifds"]): 109 | # We are interested in the lowest res tiled image. 110 | if tifftools.Tag.TileWidth.value not in ifd["tags"]: 111 | continue 112 | 113 | image_width = int(ifd["tags"][tifftools.Tag.ImageWidth.value]["data"][0]) 114 | image_height = int(ifd["tags"][tifftools.Tag.ImageHeight.value]["data"][0]) 115 | 116 | # Pass over images that are too big or lacking information 117 | if ( 118 | not image_width 119 | or not image_height 120 | or image_width * image_height > IMAGE_DEPHI_MAX_IMAGE_PIXELS 121 | ): 122 | continue 123 | 124 | if candidate_ifd is None: 125 | candidate_ifd = ifd 126 | candidate_width = image_width 127 | candidate_height = image_height 128 | else: 129 | # Look at the candidate_ifd, the current ifd, and the thumbnail size 130 | if candidate_width > image_width: 131 | # This is case 1. If the current IFD is smaller than 132 | # the candidate AND is larger than the desired 133 | # thumbnail size, it is the new candidate 134 | if image_width >= thumbnail_width and image_height >= thumbnail_height: 135 | candidate_ifd = ifd 136 | candidate_width = image_width 137 | candidate_height = image_height 138 | else: 139 | # candidate_width <= image_width 140 | # Case 2. The candidate should be replaced if it is smaller 141 | # than the desired thumbnail size 142 | if candidate_height < thumbnail_height or candidate_width < thumbnail_width: 143 | candidate_ifd = ifd 144 | candidate_width = image_width 145 | candidate_height = image_height 146 | 147 | return candidate_ifd 148 | 149 | 150 | def get_is_svs(image_path: Path) -> bool: 151 | image_info = tifftools.read_tiff(image_path) 152 | if tifftools.Tag.ImageDescription.value not in image_info["ifds"][0]["tags"]: 153 | return False 154 | image_description = image_info["ifds"][0]["tags"][tifftools.Tag.ImageDescription.value]["data"] 155 | return "aperio" in str(image_description).lower() 156 | -------------------------------------------------------------------------------- /imagedephi/gui/utils/image.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from io import BytesIO 4 | from pathlib import Path 5 | from typing import TYPE_CHECKING 6 | 7 | from PIL import Image, UnidentifiedImageError 8 | from fastapi.responses import StreamingResponse 9 | import tifftools 10 | from wsidicom import WsiDicom 11 | from wsidicom.errors import WsiDicomNotFoundError 12 | 13 | from imagedephi.gui.utils.constants import MAX_ASSOCIATED_IMAGE_SIZE 14 | 15 | if TYPE_CHECKING: 16 | from tifftools.tifftools import IFD 17 | 18 | IMAGE_DEPHI_MAX_IMAGE_PIXELS = 1000000000 19 | 20 | 21 | def get_scale_factor(max_dimensions: tuple[int, int], image_dimensions: tuple[int, int]) -> float: 22 | height_scale = int(max_dimensions[1]) / image_dimensions[1] 23 | width_scale = int(max_dimensions[0]) / image_dimensions[0] 24 | return min(height_scale, width_scale) 25 | 26 | 27 | def extract_thumbnail_from_image_bytes( 28 | ifd: IFD, 29 | file_name: str, 30 | max_width=MAX_ASSOCIATED_IMAGE_SIZE, 31 | max_height=MAX_ASSOCIATED_IMAGE_SIZE, 32 | ) -> Image.Image | None: 33 | offsets = ifd["tags"][tifftools.Tag.TileOffsets.value]["data"] 34 | byte_counts = ifd["tags"][tifftools.Tag.TileByteCounts.value]["data"] 35 | num_tiles = len(offsets) 36 | 37 | height = int(ifd["tags"][tifftools.Tag.ImageLength.value]["data"][0]) 38 | width = int(ifd["tags"][tifftools.Tag.ImageWidth.value]["data"][0]) 39 | top: int = 0 40 | left: int = 0 41 | 42 | image_canvas: Image.Image | None = None 43 | with open(file_name, "rb") as image_file: 44 | for idx in range(num_tiles): 45 | image_file.seek(int(offsets[idx])) 46 | tile_bytes = BytesIO(image_file.read(int(byte_counts[idx]))) 47 | tile_image = Image.open(tile_bytes) 48 | 49 | if not image_canvas: 50 | image_canvas = Image.new(tile_image.mode, (width, height)) 51 | 52 | tile_size = tile_image.size 53 | 54 | bottom = top + tile_size[0] 55 | right = left + tile_size[1] 56 | if bottom > height: 57 | bottom = height 58 | if right > width: 59 | right = width 60 | 61 | piece_height = bottom - top 62 | piece_width = right - left 63 | 64 | if piece_width != tile_image.size[1] or piece_height != tile_image.size[0]: 65 | tile_image = tile_image.crop((0, 0, piece_width, piece_height)) 66 | 67 | image_canvas.paste(tile_image, (left, top, right, bottom)) 68 | 69 | left = right 70 | if left >= width: 71 | # go to next row 72 | left = 0 73 | top = top + tile_size[0] 74 | 75 | if not image_canvas: 76 | return None 77 | 78 | scale_factor = get_scale_factor((max_width, max_height), image_canvas.size) 79 | new_size = ( 80 | int(image_canvas.size[0] * scale_factor), 81 | int(image_canvas.size[1] * scale_factor), 82 | ) 83 | resized_image = image_canvas.resize(new_size, Image.LANCZOS) 84 | return resized_image 85 | 86 | 87 | def get_image_response_from_ifd( 88 | ifd: "IFD", 89 | file_name: str, 90 | max_width=MAX_ASSOCIATED_IMAGE_SIZE, 91 | max_height=MAX_ASSOCIATED_IMAGE_SIZE, 92 | ) -> StreamingResponse: 93 | # Make sure the image isn't too big 94 | height = int(ifd["tags"][tifftools.Tag.ImageLength.value]["data"][0]) 95 | width = int(ifd["tags"][tifftools.Tag.ImageWidth.value]["data"][0]) 96 | if height * width > IMAGE_DEPHI_MAX_IMAGE_PIXELS: 97 | raise Exception(f"{file_name} too large to create thumbnail") 98 | 99 | # use tifftools and PIL to create a jpeg of the associated image, sized for the browser 100 | tiff_buffer = BytesIO() 101 | jpeg_buffer = BytesIO() 102 | tifftools.write_tiff(ifd, tiff_buffer) 103 | try: 104 | image = Image.open(tiff_buffer) 105 | 106 | scale_factor = get_scale_factor((max_width, max_height), image.size) 107 | 108 | new_size = (int(image.size[0] * scale_factor), int(image.size[1] * scale_factor)) 109 | image.thumbnail(new_size, Image.LANCZOS) 110 | image.save(jpeg_buffer, "JPEG") 111 | jpeg_buffer.seek(0) 112 | 113 | except UnidentifiedImageError: 114 | # Extract a thumbnail from the original image if the IFD can't be opened by PIL 115 | composite_image = extract_thumbnail_from_image_bytes(ifd, file_name, max_width, max_height) 116 | if composite_image: 117 | composite_image.save(jpeg_buffer, "JPEG") 118 | jpeg_buffer.seek(0) 119 | return StreamingResponse(jpeg_buffer, media_type="image/jpeg") 120 | 121 | 122 | def get_image_response_from_tiff( 123 | file_name: str, max_width=MAX_ASSOCIATED_IMAGE_SIZE, max_height=MAX_ASSOCIATED_IMAGE_SIZE 124 | ): 125 | """ 126 | Use as a fallback when we can't find the best IFD for a thumbnail image. 127 | 128 | This happens when attempting to extract a thumbnail from a non-tiled tiff. 129 | We expect users to be opening very large images, so we override the default 130 | MAX_IMAGE_PIXELS of PIL.Image with our own value. 131 | """ 132 | max_size = Image.MAX_IMAGE_PIXELS 133 | Image.MAX_IMAGE_PIXELS = IMAGE_DEPHI_MAX_IMAGE_PIXELS 134 | jpeg_buffer = BytesIO() 135 | image = Image.open(file_name) 136 | scale_factor = get_scale_factor((max_width, max_height), image.size) 137 | new_size = (int(image.size[0] * scale_factor), int(image.size[1] * scale_factor)) 138 | image.thumbnail(new_size, Image.LANCZOS) 139 | image.save(jpeg_buffer, "JPEG") 140 | jpeg_buffer.seek(0) 141 | Image.MAX_IMAGE_PIXELS = max_size 142 | return StreamingResponse(jpeg_buffer, media_type="image/jpeg") 143 | 144 | 145 | def get_image_response_dicom( 146 | related_files: list[Path], 147 | key: str, 148 | max_width=MAX_ASSOCIATED_IMAGE_SIZE, 149 | max_height=MAX_ASSOCIATED_IMAGE_SIZE, 150 | ): 151 | slide = WsiDicom.open(related_files) 152 | image = None 153 | try: 154 | if key == "thumbnail": 155 | image = slide.read_thumbnail() 156 | elif key == "label": 157 | image = slide.read_label() 158 | elif key == "macro": 159 | image = slide.read_overview() 160 | if image: 161 | # resize the image 162 | scale_factor = get_scale_factor((max_width, max_height), image.size) 163 | new_size = (int(image.size[0] * scale_factor), int(image.size[1] * scale_factor)) 164 | image.thumbnail(new_size, Image.LANCZOS) 165 | img_buffer = BytesIO() 166 | image.save(img_buffer, "JPEG") 167 | img_buffer.seek(0) 168 | return StreamingResponse(img_buffer, media_type="image/jpeg") 169 | except WsiDicomNotFoundError: 170 | return StreamingResponse(img_buffer, status_code=404) 171 | -------------------------------------------------------------------------------- /client/src/components/FileBrowser.vue: -------------------------------------------------------------------------------- 1 | 58 | 59 | 200 | -------------------------------------------------------------------------------- /tests/test_e2e.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from collections.abc import Generator 3 | from concurrent.futures import ThreadPoolExecutor 4 | from pathlib import Path 5 | import subprocess 6 | import sys 7 | 8 | from click.testing import CliRunner 9 | from freezegun import freeze_time 10 | import httpx 11 | import pytest 12 | 13 | from imagedephi import main 14 | from imagedephi.utils.network import wait_for_port 15 | 16 | 17 | @freeze_time("2023-05-12 12:12:53") 18 | @pytest.mark.timeout(5) 19 | def test_e2e_run( 20 | cli_runner: CliRunner, data_dir: Path, test_image_tiff: Path, rules_dir: Path, tmp_path: Path 21 | ) -> None: 22 | result = cli_runner.invoke( 23 | main.imagedephi, 24 | [ 25 | "run", 26 | str(data_dir / "input" / "tiff"), 27 | "--output-dir", 28 | str(tmp_path), 29 | "-R", 30 | str(rules_dir / "example_user_rules.yaml"), 31 | ], 32 | ) 33 | assert result.exit_code == 0 34 | output_file = tmp_path / "Redacted_2023-05-12_12-12-53" / "my_study_slide_1.tif" 35 | output_file_bytes = output_file.read_bytes() 36 | assert b"large_image_converter" not in output_file_bytes 37 | assert b"Redacted by ImageDePHI" in output_file_bytes 38 | 39 | 40 | @freeze_time("2024-05-20 11:46:00") 41 | @pytest.mark.timeout(5) 42 | def test_e2e_strict( 43 | cli_runner: CliRunner, data_dir: Path, test_image_tiff: Path, rules_dir: Path, tmp_path: Path 44 | ) -> None: 45 | result = cli_runner.invoke( 46 | main.imagedephi, 47 | [ 48 | "run", 49 | str(data_dir / "input" / "tiff"), 50 | "--profile", 51 | "strict", 52 | "--output-dir", 53 | str(tmp_path), 54 | ], 55 | ) 56 | assert result.exit_code == 0 57 | output_file = tmp_path / "Redacted_2024-05-20_11-46-00" / "study_slide_1.tif" 58 | assert output_file.exists() 59 | 60 | 61 | @pytest.mark.timeout(5) 62 | def test_e2e_plan( 63 | cli_runner: CliRunner, data_dir: Path, test_image_tiff: Path, rules_dir: Path 64 | ) -> None: 65 | result = cli_runner.invoke( 66 | main.imagedephi, 67 | [ 68 | "--override-rules", 69 | str(rules_dir / "example_user_rules.yaml"), 70 | "plan", 71 | str(data_dir / "input" / "tiff" / "test_image.tif"), 72 | ], 73 | ) 74 | 75 | assert result.exit_code == 0 76 | 77 | 78 | def test_e2e_gui( 79 | unused_tcp_port: int, 80 | data_dir: Path, 81 | test_image_tiff: Path, 82 | tmp_path: Path, 83 | ) -> None: 84 | 85 | port = unused_tcp_port 86 | 87 | gui = subprocess.Popen( 88 | [sys.executable, "-m", "imagedephi", "gui", "--port", str(port)], 89 | ) 90 | 91 | asyncio.run(asyncio.wait_for(wait_for_port(port), timeout=2)) 92 | 93 | # Check that the GUI is running 94 | assert gui.poll() is None 95 | 96 | check_gui = httpx.get(f"http://127.0.0.1:{port}") 97 | assert check_gui.status_code == 200 98 | 99 | # flake8: noqa: E501 100 | check_redact = httpx.post( 101 | f"http://127.0.0.1:{port}/redact/?input_directory={str(data_dir /'input' /'tiff')}&output_directory={str(tmp_path)}", 102 | ) 103 | 104 | assert check_redact.status_code == 200 105 | 106 | gui.terminate() 107 | gui.wait() 108 | # Check that the GUI has stopped 109 | assert gui.poll() is not None 110 | 111 | redacted_dirs = [path for path in tmp_path.glob("*Redacted*") if path.is_dir()] 112 | assert len(redacted_dirs) > 0 113 | redacted_files = list(redacted_dirs[0].glob("*")) 114 | assert len(redacted_files) > 0 115 | output_file = redacted_dirs[0] / "study_slide_1.tif" 116 | output_file_bytes = output_file.read_bytes() 117 | assert b"large_image_converter" not in output_file_bytes 118 | 119 | 120 | def test_e2e_version(cli_runner: CliRunner) -> None: 121 | result = cli_runner.invoke(main.imagedephi, ["--version"]) 122 | 123 | assert result.exit_code == 0 124 | assert "ImageDePHI, version" in result.output 125 | 126 | 127 | @pytest.mark.parametrize( 128 | "help_flag", 129 | [ 130 | "--help", 131 | pytest.param( 132 | "/?", marks=pytest.mark.skipif(sys.platform != "win32", reason="windows only") 133 | ), 134 | ], 135 | ) 136 | def test_e2e_help(cli_runner: CliRunner, help_flag: str) -> None: 137 | result = cli_runner.invoke(main.imagedephi, [help_flag]) 138 | 139 | assert result.exit_code == 0 140 | assert "Usage: imagedephi" in result.output 141 | 142 | 143 | @freeze_time("2023-05-12 12:12:53") 144 | @pytest.mark.timeout(5) 145 | @pytest.mark.parametrize("rename", [True, False]) 146 | def test_e2e_rename_flag( 147 | cli_runner, data_dir: Path, test_image_tiff: Path, tmp_path: Path, rename: bool 148 | ): 149 | rename_flag = "--rename" if rename else "--skip-rename" 150 | result = cli_runner.invoke( 151 | main.imagedephi, 152 | ["run", str(data_dir / "input" / "tiff"), "--output-dir", str(tmp_path), rename_flag], 153 | ) 154 | 155 | assert result.exit_code == 0 156 | 157 | output_file_name = ( 158 | tmp_path / "Redacted_2023-05-12_12-12-53" / "study_slide_1.tif" 159 | if rename 160 | else tmp_path / "Redacted_2023-05-12_12-12-53" / "test_image.tif" 161 | ) 162 | assert output_file_name.exists() 163 | 164 | 165 | @freeze_time("2024-01-04 10:48:00") 166 | @pytest.mark.timeout(5) 167 | @pytest.mark.parametrize( 168 | "recursive,rename", [(True, True), (True, False), (False, False), (False, True)] 169 | ) 170 | def test_e2e_recursive( 171 | cli_runner, data_dir: Path, tmp_path: Path, test_image_svs: Path, recursive: bool, rename: bool 172 | ): 173 | args = ["run", str(data_dir / "input"), "--output-dir", str(tmp_path)] 174 | if recursive: 175 | args.append("--recursive") 176 | if rename: 177 | args.append("--skip-rename") 178 | result = cli_runner.invoke(main.imagedephi, args) 179 | 180 | assert result.exit_code == 0 181 | output_subdir = tmp_path / "Redacted_2024-01-04_10-48-00" / "svs" 182 | assert output_subdir.exists() == recursive 183 | 184 | if recursive: 185 | assert len(list(output_subdir.iterdir())) 186 | 187 | 188 | @freeze_time("2024-01-04 10:48:00") 189 | @pytest.mark.timeout(5) 190 | def test_e2e_manifest(cli_runner, data_dir: Path, tmp_path: Path, test_image_tiff: Path): 191 | args = ["run", str(data_dir / "input" / "tiff"), "--output-dir", str(tmp_path)] 192 | result = cli_runner.invoke(main.imagedephi, args) 193 | 194 | assert result.exit_code == 0 195 | manifest_path = tmp_path / "Redacted_2024-01-04_10-48-00_manifest.csv" 196 | assert manifest_path.exists() 197 | 198 | output_file_name = tmp_path / "Redacted_2024-01-04_10-48-00" / "study_slide_1.tif" 199 | assert output_file_name.exists() 200 | manifest_file_bytes = manifest_path.read_bytes() 201 | assert b"study_slide_1.tif" in manifest_file_bytes 202 | assert str(test_image_tiff).encode() in manifest_file_bytes 203 | 204 | 205 | @pytest.mark.parametrize("args", [["foo"], ["-r", "foo"]]) 206 | def test_e2e_no_such_command(cli_runner, args): 207 | result = cli_runner.invoke(main.imagedephi, args) 208 | assert result.exit_code == 0 209 | 210 | # Assert that the user has been told their command was invalid 211 | assert "No such command" in result.output 212 | # Assert the usage docs are shown to the user 213 | assert "Usage: imagedephi" in result.output 214 | -------------------------------------------------------------------------------- /imagedephi/gui/api/api.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import asyncio 4 | from pathlib import Path 5 | from typing import TYPE_CHECKING, Optional 6 | import urllib.parse 7 | 8 | from fastapi import APIRouter, HTTPException, WebSocket, WebSocketDisconnect 9 | from fastapi.responses import FileResponse 10 | 11 | from imagedephi.gui.utils.constants import MAX_ASSOCIATED_IMAGE_SIZE 12 | from imagedephi.gui.utils.directory import DirectoryData 13 | from imagedephi.gui.utils.image import ( 14 | get_image_response_dicom, 15 | get_image_response_from_ifd, 16 | get_image_response_from_tiff, 17 | ) 18 | from imagedephi.redact import redact_images, show_redaction_plan 19 | from imagedephi.rules import FileFormat 20 | from imagedephi.utils.dicom import file_is_same_series_as 21 | from imagedephi.utils.image import get_file_format_from_path 22 | from imagedephi.utils.progress_log import get_next_progress_message 23 | from imagedephi.utils.tiff import get_associated_image_svs, get_ifd_for_thumbnail, get_is_svs 24 | 25 | if TYPE_CHECKING: 26 | from tifftools.tifftools import IFD 27 | 28 | router = APIRouter() 29 | 30 | 31 | @router.get("/directory/") 32 | def select_directory( 33 | directory: str = ("/"), 34 | ): 35 | directory_path = Path(directory) 36 | # TODO: if input_directory is specified but an empty string, it gets instantiated as the CWD 37 | if not directory_path.exists(): 38 | raise HTTPException(status_code=404, detail="Input directory not found") 39 | 40 | def image_url(path: str, key: str) -> str: 41 | params = {"file_name": str(directory_path / path), "image_key": key} 42 | return "image/?" + urllib.parse.urlencode(params, safe="") 43 | 44 | return ( 45 | { 46 | "directory_data": DirectoryData(directory_path), 47 | "image_url": image_url, 48 | }, 49 | ) 50 | 51 | 52 | @router.get("/image/", response_class=FileResponse) 53 | def get_associated_image( 54 | file_name: str = "", 55 | image_key: str = "", 56 | max_height=MAX_ASSOCIATED_IMAGE_SIZE, 57 | max_width=MAX_ASSOCIATED_IMAGE_SIZE, 58 | ): 59 | if not file_name: 60 | raise HTTPException(status_code=400, detail="file_name is a required parameter") 61 | 62 | if not Path(file_name).exists(): 63 | raise HTTPException(status_code=404, detail=f"{file_name} does not exist") 64 | 65 | if image_key not in ["macro", "label", "thumbnail"]: 66 | raise HTTPException( 67 | status_code=400, 68 | detail=f"{image_key} is not a supported associated image key for {file_name}.", 69 | ) 70 | 71 | image_type = get_file_format_from_path(Path(file_name)) 72 | if image_type == FileFormat.SVS or image_type == FileFormat.TIFF: 73 | ifd: IFD | None = None 74 | if image_key == "thumbnail": 75 | ifd = get_ifd_for_thumbnail(Path(file_name), int(max_width), int(max_height)) 76 | if not ifd: 77 | try: 78 | # If the image is not tiled, no appropriate IFD was found. In this case 79 | # attempt to get a thumbnail using the entire image. 80 | return get_image_response_from_tiff(file_name, max_width, max_height) 81 | except Exception as e: 82 | raise HTTPException( 83 | status_code=422, # unprocessable content 84 | detail=f"Could not generate thumbnail image for {file_name}: {e.args[0]}", 85 | ) 86 | else: 87 | try: 88 | return get_image_response_from_ifd(ifd, file_name, max_width, max_height) 89 | except Exception as e: 90 | raise HTTPException( 91 | status_code=422, # unprocessable content 92 | detail=f"Could not generate thumbnail image for {file_name}: {e.args[0]}", 93 | ) 94 | 95 | # image key is one of "macro", "label" 96 | if not get_is_svs(Path(file_name)): 97 | raise HTTPException( 98 | status_code=404, detail=f"Image key {image_key} is not supported for {file_name}" 99 | ) 100 | 101 | ifd = get_associated_image_svs(Path(file_name), image_key) 102 | if not ifd: 103 | raise HTTPException( 104 | status_code=404, detail=f"No {image_key} image found for {file_name}" 105 | ) 106 | try: 107 | return get_image_response_from_ifd(ifd, file_name, max_height, max_width) 108 | except Exception as e: 109 | raise HTTPException( 110 | status_code=422, # unprocessable content 111 | detail=f"Could not generate thumbnail image for {file_name}: {e.args[0]}", 112 | ) 113 | elif image_type == FileFormat.DICOM: 114 | path = Path(file_name) 115 | related_files = [ 116 | child 117 | for child in path.parent.iterdir() 118 | if child != path and file_is_same_series_as(path, child) 119 | ] 120 | image_response = get_image_response_dicom(related_files, image_key, max_width, max_height) 121 | if image_response: 122 | return image_response 123 | raise HTTPException( 124 | status_code=404, detail=f"Could not retrieve {image_key} image for {file_name}" 125 | ) 126 | 127 | return HTTPException( 128 | status_code=404, detail=f"Could not retrieve {image_key} image for {file_name}" 129 | ) 130 | 131 | 132 | @router.get("/redaction_plan") 133 | def get_redaction_plan( 134 | input_directory: str = ("/"), # noqa: B008 135 | rules_path: Optional[str] = None, 136 | limit: int = 10, 137 | offset: int = 0, 138 | update: bool = True, 139 | ): 140 | input_path = Path(input_directory) 141 | if not input_path.is_dir(): 142 | raise HTTPException(status_code=404, detail="Input directory not found") 143 | 144 | # TODO: Add support for multiple input directories in the UI 145 | if rules_path and not Path(rules_path).is_file(): 146 | rules_path = None 147 | print("Rules file not found") 148 | if rules_path: 149 | return show_redaction_plan( 150 | [input_path], override_rules=Path(rules_path), limit=limit, offset=offset, update=update 151 | )._asdict() 152 | 153 | return show_redaction_plan([input_path], limit=limit, offset=offset, update=update)._asdict() 154 | 155 | 156 | @router.post("/redact/") 157 | def redact( 158 | input_directory: str, # noqa: B008 159 | output_directory: str, # noqa: B008 160 | rules_path: Optional[str] = None, 161 | ): 162 | input_path = Path(input_directory) 163 | output_path = Path(output_directory) 164 | if not input_path.is_dir(): 165 | raise HTTPException(status_code=404, detail="Input directory not found") 166 | if not output_path.is_dir(): 167 | raise HTTPException(status_code=404, detail="Output directory not found") 168 | if rules_path is not None and not Path(rules_path).is_file(): 169 | rules_path = None 170 | print("Rules file not found") 171 | # TODO: Add support for multiple input directories in the UI 172 | if rules_path: 173 | redact_images([input_path], output_path, override_rules=Path(rules_path)) 174 | else: 175 | redact_images([input_path], output_path) 176 | 177 | 178 | @router.websocket("/ws") 179 | async def websocket_endpoint(websocket: WebSocket): 180 | await websocket.accept() 181 | backoff = 1 182 | 183 | while True: 184 | try: 185 | print("Client connected") 186 | backoff = 1 187 | 188 | while True: 189 | message = get_next_progress_message() 190 | if message is not None: 191 | message_dict = dict( 192 | count=message[0], max=message[1], redact_dir=message[2].name 193 | ) 194 | await websocket.send_json(message_dict) 195 | else: 196 | await asyncio.sleep(0.001) # Add a small delay to avoid busy waiting 197 | 198 | except WebSocketDisconnect: 199 | print("Attempting to reconnect to client") 200 | await asyncio.sleep(backoff) 201 | backoff = min(backoff * 2, 60) 202 | await websocket.accept() 203 | -------------------------------------------------------------------------------- /client/src/HomePage.vue: -------------------------------------------------------------------------------- 1 | 113 | 114 | 263 | -------------------------------------------------------------------------------- /tests/test_redact.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import importlib.resources 3 | import logging 4 | from pathlib import Path, PurePath 5 | import struct 6 | 7 | from freezegun import freeze_time 8 | import pytest 9 | import yaml 10 | 11 | from imagedephi import redact 12 | from imagedephi.redact.redact import ProfileChoice, create_redact_dir_and_manifest 13 | from imagedephi.redact.svs import SvsRedactionPlan 14 | from imagedephi.rules import KeepRule, Ruleset 15 | from imagedephi.utils.logger import logger 16 | 17 | 18 | @pytest.fixture 19 | def base_rule_set(): 20 | base_rules_path = importlib.resources.files("imagedephi") / "base_rules.yaml" 21 | with base_rules_path.open() as base_rules_stream: 22 | return Ruleset.model_validate(yaml.safe_load(base_rules_stream)) 23 | 24 | 25 | @pytest.fixture 26 | def override_rule_set(rules_dir: Path): 27 | rule_file = rules_dir / "example_user_rules.yaml" 28 | return rule_file 29 | 30 | 31 | @pytest.fixture 32 | def strict_rule_set(): 33 | strict_rules_path = importlib.resources.files("imagedephi") / "minimum_rules.yaml" 34 | return strict_rules_path 35 | 36 | 37 | @pytest.fixture( 38 | params=[PurePath("svs"), PurePath("svs") / "test_svs_image_blank.svs"], 39 | ids=["input_dir", "input_file"], 40 | ) 41 | def svs_input_paths(test_image_svs, data_dir, request) -> list[Path]: 42 | path_list = [data_dir / "input" / request.param] 43 | return path_list 44 | 45 | 46 | @pytest.fixture( 47 | params=[PurePath("dcm"), PurePath("dcm") / "test_dcm_image.dcm"], 48 | ids=["input_dir", "input_file"], 49 | ) 50 | def dcm_input_path(data_dir, test_image_dcm, request) -> list[Path]: 51 | path_list = [data_dir / "input" / request.param] 52 | return path_list 53 | 54 | 55 | @pytest.fixture( 56 | params=[PurePath("tiff"), PurePath("tiff") / "test_image.tif"], 57 | ids=["input_dir", "input_file"], 58 | ) 59 | def tiff_input_path(data_dir, test_image_tiff, request) -> list[Path]: 60 | path_list = [data_dir / "input" / request.param] 61 | return path_list 62 | 63 | 64 | @freeze_time("2023-05-12 12:12:53") 65 | def test_create_redact_dir_and_manifest(tmp_path): 66 | time_stamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") 67 | output_dir, manifest = create_redact_dir_and_manifest(tmp_path / "fake", time_stamp) 68 | assert output_dir.exists() 69 | assert output_dir.name == "Redacted_2023-05-12_12-12-53" 70 | assert manifest.exists() 71 | assert manifest.name == "Redacted_2023-05-12_12-12-53_manifest.csv" 72 | 73 | 74 | @freeze_time("2023-05-12 12:12:53") 75 | def test_redact_svs(svs_input_paths, tmp_path, override_rule_set): 76 | redact.redact_images(svs_input_paths, tmp_path, override_rule_set) 77 | 78 | output_file = tmp_path / "Redacted_2023-05-12_12-12-53" / "my_study_slide_1.svs" 79 | svs_output_file_bytes = output_file.read_bytes() 80 | # verify our custom svs rule was applied 81 | assert b"ICC Profile" not in svs_output_file_bytes 82 | # verify the base image rule was applied to the macro 83 | assert b"macro" not in svs_output_file_bytes 84 | 85 | 86 | def test_redact_svs_no_extension(mocker, test_image_svs_no_extension, tmp_path): 87 | # Ensure the correct redaction plan is called for an SVS file with no 88 | # extension 89 | spy = mocker.spy(SvsRedactionPlan, "__init__") 90 | redact.redact_images(test_image_svs_no_extension, tmp_path) 91 | assert spy.call_count == 1 92 | 93 | 94 | def test_plan_svs(caplog, svs_input_paths, override_rule_set): 95 | logger.setLevel(logging.INFO) 96 | redact.show_redaction_plan(svs_input_paths, override_rule_set) 97 | 98 | # Behavior for directories: skip printing full plans 99 | # Behavior for single image file: print full plan 100 | for svs_input_path in svs_input_paths: 101 | if svs_input_path.is_dir() and len(list(svs_input_path.iterdir())) > 1: 102 | assert "Aperio (.svs) Metadata Redaction Plan" not in caplog.text 103 | assert "ICC Profile: delete" not in caplog.text 104 | else: 105 | assert "Aperio (.svs) Metadata Redaction Plan" in caplog.text 106 | assert "ICC Profile: delete" in caplog.text 107 | 108 | 109 | def test_associated_image_key_no_description(data_dir, base_rule_set): 110 | input_image = data_dir / "input" / "svs" / "test_svs_image_blank.svs" 111 | svs_redaction_plan = SvsRedactionPlan(input_image, base_rule_set.svs) 112 | test_tags = { 113 | 254: { 114 | "datatype": 4, 115 | "count": 1, 116 | "datapos": 0, 117 | "data": [9], 118 | } 119 | } 120 | test_ifd = { 121 | "offset": 0, 122 | "tags": test_tags, 123 | "path_or_fobj": "", 124 | "size": 0, 125 | "bigEndian": False, 126 | "bigtiff": False, 127 | "tagcount": 1, 128 | } 129 | associated_image_key = svs_redaction_plan.get_associated_image_key_for_ifd( 130 | test_ifd, # type: ignore 131 | ) 132 | assert associated_image_key == "macro" 133 | 134 | 135 | @freeze_time("2023-05-12 12:12:53") 136 | def test_remove_orphaned_metadata(secret_metadata_image, tmp_path, override_rule_set): 137 | input_bytes = b"" 138 | for image in secret_metadata_image: 139 | input_bytes = image.read_bytes() 140 | 141 | redact.redact_images(secret_metadata_image, tmp_path, override_rule_set) 142 | 143 | output_file = tmp_path / "Redacted_2023-05-12_12-12-53" / "my_study_slide_1.tiff" 144 | output_bytes = output_file.read_bytes() 145 | 146 | assert b"Secret" in input_bytes 147 | assert b"Secret" not in output_bytes 148 | 149 | 150 | @freeze_time("2023-05-12 12:12:53") 151 | def test_redact_dcm(test_image_dcm, tmp_path, override_rule_set): 152 | redact.redact_images(test_image_dcm, tmp_path, override_rule_set) 153 | 154 | output_file = tmp_path / "Redacted_2023-05-12_12-12-53" / "my_study_slide_1.dcm" 155 | dcm_output_file_bytes = output_file.read_bytes() 156 | # verify th ebase rule deleted "SeriesDescription" 157 | assert b"Sample" not in dcm_output_file_bytes 158 | 159 | 160 | def test_plan_dcm(caplog, test_image_dcm): 161 | logger.setLevel(logging.DEBUG) 162 | redact.show_redaction_plan(test_image_dcm) 163 | 164 | assert "DICOM Metadata Redaction Plan" in caplog.text 165 | assert "SeriesDescription: delete" in caplog.text 166 | 167 | 168 | @freeze_time("2023-05-12 12:12:53") 169 | @pytest.mark.timeout(5) 170 | def test_strict(svs_input_paths, tmp_path) -> None: 171 | redact.redact_images(svs_input_paths, tmp_path, profile=ProfileChoice.Strict.value) 172 | output_file = tmp_path / "Redacted_2023-05-12_12-12-53" / "study_slide_1.svs" 173 | output_file_bytes = output_file.read_bytes() 174 | assert b"Aperio" not in output_file_bytes 175 | assert b"macro" not in output_file_bytes 176 | 177 | 178 | @freeze_time("2023-05-12 12:12:53") 179 | @pytest.mark.timeout(5) 180 | def test_override_with_strict_flag(svs_input_paths, tmp_path, strict_rule_set) -> None: 181 | redact.redact_images(svs_input_paths, tmp_path, override_rules=strict_rule_set) 182 | output_file = tmp_path / "Redacted_2023-05-12_12-12-53" / "study_slide_1.svs" 183 | output_file_bytes = output_file.read_bytes() 184 | assert b"Aperio" not in output_file_bytes 185 | assert b"macro" not in output_file_bytes 186 | 187 | 188 | @freeze_time("2023-05-12 12:12:53") 189 | @pytest.mark.timeout(5) 190 | def test_strict_skip_dcm(dcm_input_path, tmp_path) -> None: 191 | redact.redact_images(dcm_input_path, tmp_path, profile=ProfileChoice.Strict.value) 192 | output_dir = tmp_path / "Redacted_2023-05-12_12-12-53" 193 | assert output_dir.is_dir() 194 | assert len(list(output_dir.iterdir())) == 0 195 | 196 | 197 | @freeze_time("2023-05-12 12:12:53") 198 | @pytest.mark.timeout(5) 199 | @pytest.mark.parametrize( 200 | "action,custom_tag_exists", [("keep", True), ("delete", False), ("use_rule", True)] 201 | ) 202 | def test_dcm_private_redaction(dcm_input_path, tmp_path, action, custom_tag_exists) -> None: 203 | override_ruleset = Ruleset() 204 | override_ruleset.dicom.custom_metadata_action = action 205 | if action == "use_rule": 206 | override_ruleset.dicom.metadata["(1001,1001)"] = KeepRule( 207 | key_name="TestItem", action="keep" 208 | ) 209 | 210 | override_rules = tmp_path / "override_rules.yaml" 211 | with override_rules.open("w") as override_rules_stream: 212 | yaml.safe_dump(override_ruleset.model_dump(), override_rules_stream) 213 | redact.redact_images( 214 | dcm_input_path, 215 | tmp_path, 216 | override_rules=override_rules, 217 | ) 218 | output_file = tmp_path / "Redacted_2023-05-12_12-12-53" / "study_slide_1.dcm" 219 | dcm_output_file_bytes = output_file.read_bytes() 220 | tag_bytes = struct.pack(" None: 227 | redact.redact_images(dcm_input_path, tmp_path, profile=ProfileChoice.Dates.value) 228 | output_file = tmp_path / "Redacted_2023-05-12_12-12-53" / "study_slide_1.dcm" 229 | dcm_output_file_bytes = output_file.read_bytes() 230 | assert b"20220101" in dcm_output_file_bytes 231 | 232 | 233 | @freeze_time("2023-05-12 12:12:53") 234 | @pytest.mark.timeout(5) 235 | def test_dates_svs(svs_input_paths, tmp_path) -> None: 236 | redact.redact_images(svs_input_paths, tmp_path, profile=ProfileChoice.Dates.value) 237 | output_file = tmp_path / "Redacted_2023-05-12_12-12-53" / "study_slide_1.svs" 238 | output_file_bytes = output_file.read_bytes() 239 | # DAte set to January 1 240 | assert b"01/01/08" in output_file_bytes 241 | # Time set to midnight 242 | assert b"00:00:00" in output_file_bytes 243 | 244 | 245 | @freeze_time("2023-05-12 12:12:53") 246 | @pytest.mark.timeout(5) 247 | def test_dates_tiff(tiff_input_path, tmp_path) -> None: 248 | redact.redact_images(tiff_input_path, tmp_path, profile=ProfileChoice.Dates.value) 249 | output_file = tmp_path / "Redacted_2023-05-12_12-12-53" / "study_slide_1.tif" 250 | output_file_bytes = output_file.read_bytes() 251 | assert b"2024:01:01 00:00:00" in output_file_bytes 252 | -------------------------------------------------------------------------------- /docs/demo.md: -------------------------------------------------------------------------------- 1 | # ImageDePHI Demo 2 | 3 | This walkthrough will guide you through using the ImageDePHI program. 4 | 5 | ## Getting the demo data 6 | 7 | In order to get the demo data, you will need to have installed ImageDePHI and run the following command: 8 | 9 | ```bash 10 | imagedephi demo-data 11 | ``` 12 | 13 | This will create a new directory in the location it is run called `demo_files` and download several whole slide images into that directory. These images contain fake PHI, which we will redact with ImageDePHI. 14 | 15 | ## Redacting with the Graphical User Interface (GUI) 16 | ImageDePHI allows redaction of whole slide images through either a graphical user interface, accessible through a web browser, or a command line interface. First, let's take a look at the redaction workflow using the graphical user interface. 17 | 18 | #### 1. Starting the program 19 | In order to start the program, install ImageDePHI and run: 20 | 21 | ```bash 22 | imagedephi gui 23 | ``` 24 | 25 | This will start the program, which will be accessible at a random port, and open up a browser at the correct address. 26 | 27 | By default, this command will select a random port to serve the application from. You can specify a port if you'd like by using the `--port` flag, e.g: 28 | 29 | ```bash 30 | imagedephi gui --port 8888 31 | ``` 32 | 33 | #### 2. Looking at the UI 34 | If your browser is not already open to ImageDePHI, open up your browser and go to `127.0.0.1:` where `` is either the random port picked by the command above or the number you supplied to the `--port` flag if you used that option to start the server. 35 | 36 | ![Initial ImageDePHI UI](./images/initial_ui.png) 37 | 38 | You should be greeted by the initial UI screen. On the left hand side there are several options for specifying which files should be redacted and how they should be redacted. We will go over each step individually. 39 | 40 | #### 3. Select Files to be Redacted 41 | 42 | The first thing you'll need to do is select files for redaction. 43 | 44 | ![Button to open input directory browser](./images/step_1_input_directory_open_browser.png) 45 | Click the button in Step 1 to open up a file browser. 46 | 47 | ![Input directory browswer](./images/step_1_input_directory_select_directory.png) 48 | Navigate your computer's file system until you come to the directory where you downloaded your demo files, then click "Select." 49 | 50 | #### 4. Select Output Destination 51 | 52 | Next, select a location for redacted images. ImageDePHI does not modify your original images. Instead, it creates new, redacted images saved into the location selected here. 53 | 54 | ![Output directory selector](./images/step_2_output_directory_select_directory.png) 55 | For this demo, select the directory that is the parent of your `demo_files/` directory. A new directory will be created at this location for the redacted images. 56 | 57 | #### 5. Preview Redaction Changes 58 | 59 | After selecting your input directory, you will see a table previewing the redaction that is about to happen. For each file in the input directory, you'll see a row containing the file name, a thumbnail, the redaction status, and the metadata tags. 60 | 61 | Looking at the metadata tags, you'll see that, for example, the "Date" tag is red with strikethrough. This indicates that this field will be removed and not present in the redacted output file. Scrolling over, you'll see tags like "AppMag" and "BitsPerSample" have no special styling, indicating that they will be included in the output file. 62 | 63 | Most importantly, you'll see that there's an issue in the "Redaction Status" column for the image "SEER_Mouse_1_17158543_demo.svs". If you hover over the red icon you'll see the message "1 tag(s) missing redaction rules." Below that you'll see "55500: 55500," indicating that this image contains a metadata tag with the number "55500" that ImageDePHI doesn't know how to redact. 64 | 65 | ![Image grid showing an error](./images/image_grid_errors_ui.png) 66 | 67 | #### 6. Creating a Custom Rule Set 68 | 69 | The base rule set provided by ImageDePHI is used every time images are redacted. User-defined rule sets can be used to supplement or modify the behavior defined by the base rules. 70 | 71 | The base rule set does not contain a rule for tag `55500`, so in order to redact the demo images, the program will need to be supplied a ruleset that knows what to do with tag `55500`. 72 | 73 | Let's create that ruleset now. Create a new file called `custom_rules.yaml` and add the following: 74 | 75 | ```yaml 76 | --- 77 | name: Custom Rules 78 | description: Custom ruleset used for the ImageDePHI demo. 79 | svs: 80 | metadata: 81 | '55500': 82 | action: delete 83 | ``` 84 | 85 | If you'd like to know the default behavior of ImageDePHI, take a look at the [base rules](../imagedephi/base_rules.yaml). 86 | 87 | #### 7. Using Your Custom Ruleset 88 | 89 | Now that you've created a rule to complete redaction of the demo images, let's use that rule set. 90 | 91 | Click the folder icon in Step 3 (Rulesets) to open the file navigator. 92 | 93 | ![Custom ruleset file navigator](./images/step_3_ruleset_select_ruleset.png) 94 | 95 | Navigate to the custom rule set you created in step 6 and select it. The rule set you select in this step will be composed with the base rule set provided by ImageDePHI. If a tag appears in both the base rules and the custom rule set, the custom rule will be applied instead of the base rule. 96 | 97 | The table should update to reflect that the program now knows how to redact tag `55500`, and each image should have a green checkmark icon in the "Redaction Status" column. 98 | 99 | ![Image grid showing no errors](./images/image_grid_success_ui.png) 100 | 101 | #### 8. Redact the Demo Images 102 | 103 | All that's left to do is click redact! Click the button that says "De-PHI Images." You'll see a progress bar that indicates how much time is left in the redaction process. 104 | 105 | ![Image redaction indicated by a progress bar](./images/redaction_progress_ui.png) 106 | 107 | Once that succeeds, you'll see a toast notification at the bottom of the screen indicating that the images have been redacted successfully. 108 | 109 | ![Redaction complete notification](./images/redaction_complete_ui.png) 110 | 111 | You'll find a new directory in the location you selected as your output directory. This new directory will have a name starting with "Redacted_" and ending with a timestamp of when you started redacting images. It will contain redacted images. Adjacent to that directory will be a manifest file mapping input file names to output file names. If there were any issues during redaction, those would be reported in the manifest file as well. 112 | 113 | ## Using the CLI 114 | 115 | If you would prefer to use the CLI to redact the images, follow this section to walk through the same example using that tool instead of the UI. Make sure the follow the instructions at the top of this guide to get the demo data. 116 | 117 | #### 1. Use the `plan` command 118 | 119 | The `plan` command is one way to determine if the files you want to redact are able to be redacted. If not, the output of the `plan` command will help you discover what you'll need to do in order to redact your images. After obtaining the test data, run the following command: 120 | 121 | ```bash 122 | imagedephi plan demo_files 123 | ``` 124 | 125 | You'll see in the output of that command that one of the files cannot be redacted. In order to find out why, you can run: 126 | 127 | ```bash 128 | imagedephi plan demo_files/SEER_Mouse_1_17158543_demo.svs 129 | ``` 130 | 131 | Running the `plan` command on a single image will provide a detailed report of exactly how that particular image is redacted. To see this level of detail for all images in a directory, use the `-v` (verbose) option. 132 | 133 | The ouput of the `plan` command for that particular image reveals that it contains a metadata item with tag `55500` with no corresponding rule. 134 | 135 | #### 2. Create an override rule set 136 | 137 | In order to redact the demo images, we'll need to give the program a rule it can use for tag `55500`. The mechanism we can use to do this is with an override, or custom, rule set. 138 | 139 | ImageDePHI comes with a base set of rules that covers most commonly seen metadata tags for SVS and DICOM images. If your images contain metadata not covered by the base rules, you'll need a custom rule set. 140 | 141 | For this demo, create a file called `custom_rules.yaml` add add the following: 142 | 143 | ```yaml 144 | --- 145 | name: Custom Rules 146 | description: Custom ruleset used for the ImageDePHI demo. 147 | svs: 148 | metadata: 149 | '55500': 150 | action: delete 151 | ``` 152 | 153 | We now have a ruleset to supplement the base rules and enable redaction of the demo images. 154 | 155 | #### 4. Use the `plan` command with the override rule set 156 | 157 | First, let's verify that our custom rule set works as intended. Run the following command: 158 | 159 | ```bash 160 | imagedephi plan -R custom_rules.yaml demo_files 161 | ``` 162 | 163 | Note the message "3 images able to be redacted" in the output. This means all of the demo files can now be redacted. 164 | 165 | #### 5. Use the `run` command to redact the images 166 | 167 | The `run` command is very similar to `plan`, except it also needs to be told where to save the redacted files. This is done using the `-o` option. Run the following: 168 | 169 | ```bash 170 | mkdir ./output_files 171 | imagedephi run -R custom_rules.yaml -o ./output_files demo_files 172 | ``` 173 | 174 | After that command finishes, you'll see a new directory in `./output_files` called `Redacted_` containing the redacted files. 175 | 176 | You'll also see a file next to that directory called `Redacted__manifest.csv`. This will contain a mapping of input file names to output file names, as well as any errors that may have occurred during redaction. 177 | 178 | ### Using a command file in the CLI 179 | In some instances you may want to pass a command file to the CLI. For example you may have an long list of input files that would be cumbersome to type in a terminal. 180 | 181 | For this demo create a file called `command_file.yaml` and add the following: 182 | 183 | ```bash 184 | --- 185 | command: plan 186 | input_path: 187 | - "demo_files" 188 | output_dir: ~/redacted_images 189 | ``` 190 | Now run the following: 191 | 192 | ```bash 193 | imagedephi plan -c command_file.yaml 194 | ``` 195 | This option is supported by both the `plan` and `run` commands. Any option that can be added to these commands can also be added to the command file. 196 | 197 | ```bash 198 | --- 199 | command: run 200 | input_paths: 201 | - "demo_files" 202 | output_dir: /redacted_images 203 | recursive: True 204 | ``` 205 | **Please Note:** The command file is meant to supplement the command given in the terminal. Any option supplied in the terminal takes priority. 206 | 207 | Additionally you can supply an unformatted yaml or text file with the `--file_list` option. 208 | 209 | Create a file called `file_list.txt` and add the following: 210 | ```bash 211 | demo_files 212 | ``` 213 | Now run the following: 214 | 215 | ```bash 216 | imagedephi plan -f file_list.txt 217 | ``` 218 | 219 | 220 | ## Next Steps 221 | 222 | For more information about the ImageDePHI rules system, be sure to check out the [documention](../README.md). 223 | 224 | ## Demo Data Citation 225 | ‘NCI SRP Mouse Tissue Whole Slide Images with Fake PHI/PII' data set, Version 1.0. Generated: December 29, 2021; Scanner: Leica Microsystems, Aperio AT2; Provided by: The National Cancer Institute (NCI) Surveillance Research Program (SRP). 226 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ImageDePHI 2 | ImageDePHI is an application to redact personal data (PHI) from whole slide images (WSIs). 3 | 4 | > This project has been funded in whole or in part with Federal funds from the National Cancer Institute, National Institutes of Health, Department of Health and Human Services, under Contract No. 75N91022C00033 5 | 6 | ## Installation 7 | * Download the [latest ImageDePHI release](https://github.com/DigitalSlideArchive/ImageDePHI/releases/latest). 8 | 9 | * Unzip the downloaded file, which will extract the executable named `imagedephi` (or `imagedephi.exe` on Windows). 10 | 11 | * Please note that on Linux, only Ubuntu 20.04+ is supported. 12 | 13 | ## Usage 14 | For an in-depth walkthrough, check out the [demo](./docs/demo.md). 15 | 16 | From a command line, execute the application to get full usage help. 17 | 18 | Alternatively **on Windows only**, directly open `imagdephi.exe` in Windows Explorer to launch the ImageDePHI GUI. 19 | 20 | If running on macOS, you may need to [add the executable to the list of trusted software](https://support.apple.com/guide/mac-help/apple-cant-check-app-for-malicious-software-mchleab3a043/mac) to launch ImageDePHI in the same way you would any other registered app. 21 | 22 | # Rules 23 | Image redaction is determined by a set of rules. By default, the base set of rules are used. These rules are provided by the `imagedephi` package and can be found [here](https://github.com/DigitalSlideArchive/ImageDePHI/blob/main/imagedephi/base_rules.yaml). 24 | 25 | ## Rule Application 26 | All runs of `imagedephi` use the provided base set of rules as a foundation. End users can use the ruleset framework to build custom rulesets that handle additional or custom metadata not covered by the base rules, or override the behavior of the base rule set. 27 | 28 | Override rule sets can be specified by using the `-R my_ruleset.yaml` or `--override-rules my_ruleset.yaml` option. This option is available for both the `imagedephi run` and `imagedephi plan` commands. Override rules sets are not provided by `imagedephi`, and must de defined by the end user. 29 | 30 | When `imagedephi` determines the steps to redact a file, it checks each piece of metadata in the file. For each piece of metadata found this way, it will first consult the override rule set, if present, for an applicable rule. If the override rule set does not contain a rule for that piece of metadata, the program will check the base ruleset. 31 | 32 | If neither the override rule set or base rule set cover a piece of metadata, redaction will fail, and the program will list the metadata that it could not redact. There is no default behavior for unknown metadata. 33 | 34 | ### Redaction Profiles 35 | 36 | #### Strict Redaction 37 | For whole slide image formats based on the tiff standard, `imagedephi` allows a strict type of redaction. Using the `--profile strict` option when calling `imagedephi` from the CLI will use this mode. In this mode, only tags strictly required by the tiff standard will remain, and all other metadata will be stripped from the images. For a full list of metadata tags that will remain after strict redaction, see the [minimum rules file](https://github.com/DigitalSlideArchive/ImageDePHI/blob/main/imagedephi/minimum_rules.yaml). 38 | 39 | #### Fuzzing Dates and Times 40 | Using the `--profile dates` option will replace dates, times, datetimes, and UTC offsets with values that semantically represent those things but with less precison than the original value. Dates will preserve the year, but the month and day will be set to January 1st. Times will be set to midnight and UTC offsets to +0000. Rules for this profile can be found in [modify_dates_rules.yaml](https://github.com/DigitalSlideArchive/ImageDePHI/blob/main/imagedephi/modify_dates_rules.yaml). For DICOM images, the [Attribute Confidentiality Profiles](https://dicom.nema.org/dicom/2013/output/chtml/part15/chapter_E.html) were used to determine which tags should be modified according to this profile. 41 | 42 | 43 | ## Ruleset Format Overview 44 | In order to read the base rules and build your own custom rule sets, it is important to understand the format in which rulesets are specified. Rulesets are defined by `.yaml` files (one ruleset per file), and are a dictionary with the following top-level tags: `name`, `description`, `output_file_name`, `tiff`, `svs`, and `dicom`. 45 | 46 | ### Generic Properties 47 | The following three properties belong to the rulesets themselves, and don't influence redaction behavior. 48 | 49 | #### `name` 50 | Provide a name for a ruleset. This is used by the `imagedephi plan` command to specify which ruleset is being used to redact a particular piece of metadata. 51 | 52 | #### `description` 53 | You can add a description to your custom rulesets. This is not used by the program, but can be helpful to communicate what cases your custom rulesets are designed for. 54 | 55 | #### `output_file_name` 56 | Specify how the output files should be named here. The base ruleset contains the value `study_slide`. In this case, if the input slides are named: `john_smith_lung.svs` and `john_smith_pancreas.svs`, the redacted output images will be named `study_slide_1.svs` and `study_slide_2.svs`. 57 | 58 | ### Other Top-level Properties 59 | 60 | #### `strict` 61 | The `strict` property of rulesets is used to denote that ALL unspecified tags should be deleted. This is supported for `tiff` and `svs` files. An example of using the strict flag can be seen in the `minimum_rules.yaml` rule set. 62 | 63 | ### File Format Rules 64 | Redaction behavior is specified per file type. Currently pure `tiff` files, Aperio (`.svs`), and DICOM files are supported. Each image type has its own groups of data that can be redacted. For example, Aperio images have `tiff` metadata, certain associated images, and additional metadata specified in the `ImageDescription` tag. `svs` rulesets take the following shape: 65 | 66 | 67 | ```yaml 68 | svs: 69 | associated_images: 70 | ... 71 | metadata: 72 | ... 73 | image_description: 74 | ... 75 | ``` 76 | 77 | Each group is a dictionary whose keys represent a way to identify a specific piece of metadata or specific associated image, and whose values are dictionaries that define redaction behavior. Each entry (key-value pair) in the dictionary is a "rule." Take the following `associated_image` rule from the base ruleset 78 | 79 | ```yaml 80 | svs: 81 | ... 82 | associated_images: 83 | label: 84 | action: replace 85 | replace_with: blank_image 86 | ... 87 | ``` 88 | 89 | This describes how `imagedephi` handles `label` images for Aperio files by default. Since label images frequently contain PHI, but are required by the Aperio (.svs) format, they are replaced with a black square of the same size. 90 | 91 | #### Image Rules 92 | 93 | Image rules take the following form: 94 | 95 | ```yaml 96 | : 97 | action: 98 | ``` 99 | 100 | Where `image_key` identifies a particular associated image. For a catch-all rule, use the key `default`. 101 | 102 | Image rules can have the following actions: 103 | 104 | * `replace`: Replace an image with another. If specified, a value for `replace_with` must also be provided 105 | * `keep`: Does nothing. The associated image matching this key will be included in the output file 106 | * `delete`: The image will not be included in the output file 107 | 108 | For image rules, the only supported value of `replace_with` is `blank_image`. 109 | 110 | #### Metadata Rules 111 | 112 | Metadata rules take the following form: 113 | 114 | ```yaml 115 | : 116 | action: 117 | ``` 118 | 119 | Where `metadata_key` identifies a piece of metadata. Possible values for this key depend on the type of metadata being redacted. For example, rules listed under 120 | 121 | ```yaml 122 | tiff: 123 | metadata: 124 | ``` 125 | have `metadata_keys` for particular tiff tags (e.g. `ImageDescription`, `ImageWidth`). 126 | 127 | Available actions for metadata rules are: 128 | 129 | * `delete`: the metadata will not appear in the output file 130 | * `keep`: the metadata will appear unchanged in the output file 131 | * `replace`: replace the metadata with a specified value. If this is the `action`, additional fields are required. 132 | * `check_type`: This will either keep the metadata if the type matches or delete the metadata if the type does not match. Requires additional fields 133 | * `modify_date`: This will fuzz dates, times, datetimes, and time zone offsets. See the "Profiles" section for more details. 134 | 135 | ##### `replace` rules 136 | Require the additional property `replace_with`. The value specified by the `replace_with` key will be used to override the metadata in the output image. 137 | 138 | ##### `check_type` rules 139 | Use the additional properties: 140 | * `expected_type`: one of `integer`, `number`, `text`, `rational` 141 | * `expected_count` (optional): if the piece of metadata can contain multiple values, specify how many are expected using this property. Defaults to `1`. If the `expected_type` is `rational`, this should be the expected number of rationals. That is, an `expected_count` of 1 would match with 2 integer values in the metadata. 142 | 143 | ### Supported Formats 144 | Currently, `imagedephi` supports redaction of the following types of files: 145 | * TIFF 146 | * Aperio (a tiff-like format, typically uses the extension `.svs`) 147 | * DICOM 148 | 149 | #### Tiff 150 | Tiff rules have the following shape: 151 | 152 | ```yaml 153 | tiff: 154 | associated_images: 155 | ... 156 | metadata: 157 | ... 158 | ``` 159 | 160 | The keys for the `metadata` rules are the names of tiff tags defined by the tiff standard. 161 | 162 | #### Aperio 163 | Aperio format rules have the following shape: 164 | 165 | ```yaml 166 | svs: 167 | associated_images: 168 | ... 169 | metadata: 170 | ... 171 | image_description: 172 | ... 173 | ``` 174 | 175 | The keys for the `metadata` rules are the names of tiff tags defined by the tiff standard. Names are case insensitive and common variations are accepted, e.g. `GrayResponseUnit` and `GreyResponseUnit` are both accepted 176 | 177 | For Aperio files, additional metadata is stored as key-value pairs in the `ImageDescription` tag. See more information about this [here](https://openslide.org/formats/aperio/). Each key in the `image_description` section is a key found in this `ImageDescription` string. 178 | 179 | #### DICOM 180 | DICOM format rules are much the same: 181 | 182 | ```yaml 183 | dicom: 184 | associated_images: 185 | ... 186 | custom_metadata_action: ... 187 | metadata: 188 | ... 189 | ``` 190 | 191 | Note that here there is an eplicit format-level setting for dealing with custom metadata. Any tag with an odd group number is considered custom metadata. This can be set to `keep`, `delete` or `use_rule`. 192 | 193 | * `keep`: Retain the custom metadata value after redaction. Rules for custom tags specified in the `metadata` section take precedence over this setting. 194 | * `delete`: Delete the custom metadata tag from the image. Rules fro custom tags specified in the `metadata` section take precedence over this setting. 195 | * `use_rule`: This mode will fall back to rules specified for each piece of custom metadata in the `metadata` section of the rule set. If a custom metadata tag with no corresponding rule is encountered, the image will not be redacted, as the redaction plan would be considered incomplete. 196 | 197 | Additionally, DICOM redaction supports additional redaction operations. 198 | 199 | * `empty`: Replace the tag's value with `None`. 200 | * `replace_dummy`: Replace the tag's value with a dummy value, which is dependant on the original value type. For example, if the tag's value is a string, the dummy value is the empty string. If the tag's value is an integer, the dummy value is 0. 201 | * `replace_uid`: If the tag's value is a UID, it will be replaced with a randomly generated UID of the form `"2.25."` where `` is a UUID generated a run time. The new custom UID is stored by Image DePHI and used to replace other UIDs that share the same initial value. This way, if a UID is used in different tags within an image, they all get the same replacement value. 202 | 203 | ## Related Projects 204 | 205 | Other efforts related to anonimyzing medical images include: 206 | 207 | - [`dicom-anonymizer`](https://github.com/KitwareMedical/dicom-anonymizer): A python tool for anonymizing DICOM files 208 | - [WSI DeID](https://github.com/DigitalSlideArchive/DSA-WSI-DeID): A workflow built onto the [Digital Slide Archive](https://github.com/DigitalSlideArchive/digital_slide_archive/?tab=readme-ov-file#digital-slide-archive) for redacting medical images. 209 | -------------------------------------------------------------------------------- /imagedephi/redact/dicom.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from collections.abc import Generator 4 | from datetime import date, datetime 5 | from pathlib import Path 6 | from typing import TYPE_CHECKING 7 | from uuid import uuid4 8 | 9 | import pydicom 10 | from pydicom import valuerep 11 | from pydicom.datadict import keyword_for_tag 12 | from pydicom.dataelem import DataElement 13 | from pydicom.dataset import Dataset 14 | from pydicom.tag import BaseTag 15 | 16 | from imagedephi.rules import ( 17 | ConcreteMetadataRule, 18 | DeleteRule, 19 | DicomRules, 20 | FileFormat, 21 | KeepRule, 22 | MetadataReplaceRule, 23 | RedactionOperation, 24 | ) 25 | from imagedephi.utils.logger import logger 26 | 27 | from .redaction_plan import RedactionPlan 28 | 29 | if TYPE_CHECKING: 30 | from .redaction_plan import RedactionPlanReport 31 | 32 | 33 | VR_TO_DUMMY_VALUE: dict[str, str | float | int | list | bytes] = {} 34 | for vr in valuerep.STR_VR: 35 | VR_TO_DUMMY_VALUE[vr] = "" 36 | for vr in valuerep.FLOAT_VR: 37 | VR_TO_DUMMY_VALUE[vr] = 0.0 38 | for vr in valuerep.INT_VR: 39 | VR_TO_DUMMY_VALUE[vr] = 0 40 | for vr in valuerep.LIST_VR: 41 | VR_TO_DUMMY_VALUE[vr] = [] 42 | for vr in valuerep.BYTES_VR: 43 | VR_TO_DUMMY_VALUE[vr] = b"" 44 | 45 | VR_TO_EXPECTED_TYPE: dict[str, type] = {} 46 | for vr in valuerep.STR_VR: 47 | VR_TO_EXPECTED_TYPE[vr] = str 48 | for vr in valuerep.FLOAT_VR: 49 | VR_TO_EXPECTED_TYPE[vr] = float 50 | for vr in valuerep.INT_VR: 51 | VR_TO_EXPECTED_TYPE[vr] = int 52 | for vr in valuerep.LIST_VR: 53 | VR_TO_EXPECTED_TYPE[vr] = list 54 | for vr in valuerep.BYTES_VR: 55 | VR_TO_EXPECTED_TYPE[vr] = bytes 56 | 57 | WSI_IMAGE_TYPE_INDEX = 2 58 | 59 | 60 | class DicomRedactionPlan(RedactionPlan): 61 | """ 62 | Represents a plan of action for redacting metadata from DICOM images. 63 | 64 | Each instance of this class works on a single .dcm file. 65 | """ 66 | 67 | file_format = FileFormat.DICOM 68 | image_path: Path 69 | dicom_data: pydicom.FileDataset 70 | image_type: str 71 | metadata_redaction_steps: dict[int, ConcreteMetadataRule] 72 | no_match_tags: list[BaseTag] 73 | uid_map: dict[str, str] 74 | 75 | @staticmethod 76 | def _iter_dicom_elements( 77 | dicom_dataset: Dataset, 78 | ) -> Generator[tuple[DataElement, Dataset], None, None]: 79 | for element in dicom_dataset: 80 | if element.VR == valuerep.VR.SQ: 81 | for dataset in element.value: 82 | yield from DicomRedactionPlan._iter_dicom_elements(dataset) 83 | # Treat the sequence as its own element as well. 84 | # Some of the rules generated from the DICOM docs 85 | # include rules for sequences. 86 | # Return the sequence after to protect against deletion while looping. 87 | yield element, dicom_dataset 88 | else: 89 | yield element, dicom_dataset 90 | 91 | def __init__(self, image_path: Path, rules: DicomRules, uid_map: dict[str, str] | None) -> None: 92 | self.image_path = image_path 93 | self.dicom_data = pydicom.dcmread(image_path) 94 | self.image_type = str(self.dicom_data.ImageType[WSI_IMAGE_TYPE_INDEX]) 95 | 96 | self.metadata_redaction_steps = {} 97 | self.no_match_tags = [] 98 | 99 | # Determine what, if any, action to take with this file's 100 | # image data. Currently only matters for label and overview 101 | # images. 102 | self.associated_image_rule = rules.associated_images.get(self.image_type.lower(), None) 103 | 104 | # When redacting many files at a time, keep track of all UIDs across all files, 105 | # since the DICOM format uses separate files for different resolutions and 106 | # associated images. 107 | self.uid_map = uid_map if uid_map else {} 108 | 109 | for element, _ in DicomRedactionPlan._iter_dicom_elements(self.dicom_data): 110 | custom_metadata_key = "CustomMetadataItem" 111 | keyword = keyword_for_tag(element.tag) 112 | # Check keyword and (gggg,eeee) representation 113 | tag_in_rules = keyword in rules.metadata or str(element.tag) in rules.metadata 114 | if not tag_in_rules: 115 | # For custom metadata, attempt to fall back to the custom_metadata_action (this can 116 | # be overriden by rules for individual tags). If the custom metadata action is to 117 | # use the rules, skip generating these on-the-fly rules. 118 | if element.tag.group % 2 == 1 and rules.custom_metadata_action != "use_rule": 119 | if rules.custom_metadata_action == "delete": 120 | self.metadata_redaction_steps[element.tag] = DeleteRule( 121 | key_name=custom_metadata_key, action="delete" 122 | ) 123 | elif rules.custom_metadata_action == "keep": 124 | self.metadata_redaction_steps[element.tag] = KeepRule( 125 | key_name=custom_metadata_key, action="keep" 126 | ) 127 | else: 128 | self.no_match_tags.append(element.tag) 129 | continue 130 | 131 | rule_key = keyword if keyword in rules.metadata else str(element.tag) 132 | rule = rules.metadata[rule_key] 133 | if rule.action in [ 134 | "keep", 135 | "delete", 136 | "replace", 137 | "check_type", 138 | "empty", 139 | "replace_uid", 140 | "replace_dummy", 141 | "modify_date", 142 | ]: 143 | self.metadata_redaction_steps[element.tag] = rule 144 | else: 145 | self.no_match_tags.append(element.tag) 146 | continue 147 | 148 | def passes_type_check(self, element: DataElement) -> bool: 149 | return isinstance(element.value, VR_TO_EXPECTED_TYPE[element.VR]) 150 | 151 | def determine_redaction_operation( 152 | self, rule: ConcreteMetadataRule, element: DataElement 153 | ) -> RedactionOperation: 154 | if rule.action == "check_type": 155 | return "keep" if self.passes_type_check(element) else "delete" 156 | if rule.action in [ 157 | "keep", 158 | "delete", 159 | "replace", 160 | "replace_uid", 161 | "replace_dummy", 162 | "empty", 163 | "modify_date", 164 | ]: 165 | return rule.action 166 | return "delete" 167 | 168 | def report_plan(self) -> RedactionPlanReport: 169 | logger.debug("DICOM Metadata Redaction Plan\n") 170 | if self.associated_image_rule: 171 | if self.associated_image_rule.action == "delete": 172 | logger.info( 173 | f"This image is a DICOM {self.image_type}." 174 | "This file will not be written to the output directory." 175 | ) 176 | return {} 177 | report: RedactionPlanReport = {} 178 | report[self.image_path.name] = {} 179 | for element, _ in DicomRedactionPlan._iter_dicom_elements(self.dicom_data): 180 | rule = self.metadata_redaction_steps.get(element.tag, None) 181 | if rule: 182 | operation = self.determine_redaction_operation(rule, element) 183 | logger.debug(f"DICOM Tag {element.tag} - {rule.key_name}: {operation}") 184 | report[self.image_path.name][f"{element.tag}_{rule.key_name}"] = { 185 | "action": operation, 186 | "value": element.value, 187 | } 188 | self.report_missing_rules(report) 189 | return report 190 | 191 | def _get_modified_date(self, element: DataElement) -> str | None: 192 | """ 193 | Return a fuzzy date, time, or UTC offset based on the value in the given date element. 194 | 195 | Given a DICOM data element of type DA (date), DT (datetime), TM (time), or SH 196 | (specifically representing a UTC offset), return a value for the element to hold 197 | that conforms with preserving some degree of information for these fields. For 198 | example, dates are set to January first of the same year. 199 | 200 | Tags that are treated specially for this mode of redaction are documented here: 201 | https://dicom.nema.org/dicom/2013/output/chtml/part15/chapter_E.html#table_E.1-1 202 | """ 203 | if element.VR == valuerep.VR.DA.value: 204 | old_date = valuerep.DA(element.value) 205 | return str(valuerep.DA(date(year=old_date.year, month=1, day=1))) if old_date else None 206 | elif element.VR == valuerep.VR.DT.value: 207 | old_datetime = valuerep.DT(element.value) 208 | return ( 209 | str(valuerep.DT(datetime(year=old_datetime.year, month=1, day=1))) 210 | if old_datetime 211 | else None 212 | ) 213 | elif element.VR == valuerep.VR.TM.value: 214 | # Change time to midnight, drop precision below hour 215 | return "00" 216 | elif element.VR == valuerep.VR.SH.value: 217 | # element.VR == "SH" 218 | # For UTC offset, change to +0000 (no offset) 219 | return "+0000" 220 | return None 221 | 222 | def apply(self, rule: ConcreteMetadataRule, element: DataElement, dataset: Dataset): 223 | operation = self.determine_redaction_operation(rule, element) 224 | if operation == "delete": 225 | # TODO make sure this works as expected, we are modifying a dataset 226 | # while looping through it 227 | del dataset[element.tag] 228 | elif operation == "replace": 229 | assert isinstance(rule, MetadataReplaceRule) 230 | element.value = rule.new_value 231 | elif operation == "empty": 232 | element.value = None 233 | elif operation == "replace_uid": 234 | if element.value not in self.uid_map: 235 | new_uid = "2.25." + str(uuid4().int) 236 | self.uid_map[element.value] = str(new_uid) 237 | element.value = self.uid_map[element.value] 238 | elif operation == "replace_dummy": 239 | element.value = VR_TO_DUMMY_VALUE[element.VR] 240 | elif operation == "modify_date": 241 | element.value = self._get_modified_date(element) 242 | 243 | def execute_plan(self) -> None: 244 | if self.associated_image_rule: 245 | if self.associated_image_rule.action != "delete": 246 | raise NotImplementedError( 247 | "Only 'delete' is supported for associated DICOM images at this time." 248 | ) 249 | for element, dataset in DicomRedactionPlan._iter_dicom_elements(self.dicom_data): 250 | rule = self.metadata_redaction_steps[element.tag] 251 | if rule is not None: 252 | self.apply(rule, element, dataset) 253 | 254 | def is_comprehensive(self) -> bool: 255 | return not self.no_match_tags 256 | 257 | def report_missing_rules(self, report=None) -> None: 258 | if self.is_comprehensive(): 259 | logger.info("The redaction plan is comprehensive.") 260 | if report: 261 | report[self.image_path.name]["comprehensive"] = True 262 | else: 263 | logger.error( 264 | f"{self.image_path} - The following tags could not be redacted " 265 | "given the current set of rules." 266 | ) 267 | if report is not None: 268 | report[self.image_path.name]["missing_tags"] = [] 269 | report[self.image_path.name]["comprehensive"] = False 270 | 271 | for tag in self.no_match_tags: 272 | logger.error(f"Missing tag (dicom): {tag} - {keyword_for_tag(tag)}") 273 | if report is not None: 274 | report[self.image_path.name]["missing_tags"].append({tag: keyword_for_tag(tag)}) 275 | 276 | def save(self, output_path: Path, overwrite: bool) -> None: 277 | if self.associated_image_rule and self.associated_image_rule.action == "delete": 278 | # Don't write this file to the output directory if it is marked to be deleted 279 | return 280 | if output_path.exists(): 281 | if overwrite: 282 | logger.info(f"Found existing redaction for {self.image_path.name}. Overwriting...") 283 | else: 284 | logger.warn( 285 | f"Could not redact {self.image_path.name}, existing redacted file in output " 286 | "directory. Use the --overwrite-existing-output flag to overwrite previously " 287 | "redacted fiels." 288 | ) 289 | return 290 | self.dicom_data.save_as(output_path) 291 | -------------------------------------------------------------------------------- /imagedephi/redact/svs.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import binascii 4 | from pathlib import Path 5 | from typing import TYPE_CHECKING 6 | 7 | import tifftools 8 | import tifftools.constants 9 | 10 | from imagedephi.rules import ( 11 | ConcreteMetadataRule, 12 | FileFormat, 13 | MetadataReplaceRule, 14 | RedactionOperation, 15 | SvsRules, 16 | ) 17 | from imagedephi.utils.logger import logger 18 | 19 | from .tiff import TiffRedactionPlan 20 | 21 | if TYPE_CHECKING: 22 | from tifftools.tifftools import IFD 23 | 24 | from .redaction_plan import RedactionPlanReport 25 | 26 | 27 | class SvsDescription: 28 | prefix: str 29 | metadata: dict[str, str | int | float] 30 | 31 | def try_get_numeric_value(self, value: str) -> str | int | float: 32 | """Given an ImageDescription value, return a number version of it if applicable.""" 33 | try: 34 | int(value) 35 | return int(value) 36 | except ValueError: 37 | try: 38 | float(value) 39 | return float(value) 40 | except ValueError: 41 | return value 42 | 43 | def __init__(self, svs_description_string: str): 44 | description_components = svs_description_string.split("|") 45 | self.prefix = description_components[0] 46 | 47 | self.metadata = {} 48 | for metadata_component in description_components[1:]: 49 | key, value = [token.strip() for token in metadata_component.split("=")] 50 | self.metadata[key] = self.try_get_numeric_value(value) 51 | 52 | def __str__(self) -> str: 53 | components = [self.prefix] 54 | components = components + [ 55 | " = ".join([key, str(self.metadata[key])]) for key in self.metadata.keys() 56 | ] 57 | return "|".join(components) 58 | 59 | 60 | class MalformedAperioFileError(Exception): 61 | """Raised when the program cannot process an Aperio/SVS file as expected.""" 62 | 63 | ... 64 | 65 | 66 | class SvsRedactionPlan(TiffRedactionPlan): 67 | """ 68 | Represents a plan of action for redacting files in Aperio (.svs) format. 69 | 70 | Redaction for this type of file is similar to redaction for .tiff files, as the 71 | formats are similar. However, Aperio images store additional information in its 72 | ImageDescription tags. As a result, this tag is treated specially here. 73 | """ 74 | 75 | file_format = FileFormat.SVS 76 | description_redaction_steps: dict[str, ConcreteMetadataRule] 77 | no_match_description_keys: set[str] 78 | rules: SvsRules 79 | 80 | def __init__( 81 | self, 82 | image_path: Path, 83 | rules: SvsRules, 84 | strict: bool = False, 85 | ) -> None: 86 | self.rules = rules 87 | self.image_redaction_steps = {} 88 | self.description_redaction_steps = {} 89 | self.no_match_description_keys = set() 90 | super().__init__(image_path, rules, strict) 91 | 92 | # For strict mode redactions, treat Aperio (.svs) images as if they were 93 | # plain tiffs. Skip special handling of image description metadata. 94 | if not strict: 95 | image_description_tag = tifftools.constants.Tag["ImageDescription"] 96 | if image_description_tag.value not in self.metadata_redaction_steps: 97 | raise MalformedAperioFileError() 98 | del self.metadata_redaction_steps[image_description_tag.value] 99 | 100 | ifds = self.tiff_info["ifds"] 101 | for tag, ifd in self._iter_tiff_tag_entries(ifds): 102 | if tag.value != image_description_tag.value: 103 | continue 104 | 105 | svs_description = SvsDescription(str(ifd["tags"][tag.value]["data"])) 106 | 107 | for key in svs_description.metadata.keys(): 108 | key_rule = rules.image_description.get(key, None) 109 | if key_rule and self.is_match(key_rule, key): 110 | self.description_redaction_steps[key] = key_rule 111 | else: 112 | self.no_match_description_keys.add(key) 113 | 114 | def get_associated_image_key_for_ifd(self, ifd: IFD) -> str: 115 | """ 116 | Given a associated image IFD, return its semantic type. 117 | 118 | An associated image IFD is one that contains non-tiled image data. 119 | 120 | This will return `"default`" if no semantics can be determined. 121 | """ 122 | # Check image description, it may contain 'macro' or 'label' 123 | image_description_tag = tifftools.constants.Tag["ImageDescription"] 124 | if image_description_tag.value in ifd["tags"]: 125 | image_description = str(ifd["tags"][image_description_tag.value]["data"]) 126 | for key in self.rules.associated_images: 127 | if key in image_description: 128 | return key 129 | 130 | # Check NewSubFileType bitmask. 'macro' could be encoded here 131 | newsubfiletype_tag = tifftools.constants.Tag["NewSubfileType"] 132 | if newsubfiletype_tag.value in ifd["tags"]: 133 | newsubfiletype = ifd["tags"][newsubfiletype_tag.value]["data"][0] 134 | reduced_image_bit = tifftools.constants.NewSubfileType["ReducedImage"].value 135 | macro_bit = tifftools.constants.NewSubfileType["Macro"].value 136 | if newsubfiletype & reduced_image_bit and newsubfiletype & macro_bit: 137 | return "macro" 138 | return "default" 139 | 140 | def is_match(self, rule: ConcreteMetadataRule, data: tifftools.TiffTag | str) -> bool: 141 | if rule.action in ["keep", "delete", "replace", "check_type", "modify_date"]: 142 | if isinstance(data, tifftools.TiffTag): 143 | return super().is_match(rule, data) 144 | return rule.key_name == data 145 | return False 146 | 147 | def determine_redaction_operation( 148 | self, rule: ConcreteMetadataRule, data: SvsDescription | IFD 149 | ) -> RedactionOperation: 150 | if isinstance(data, SvsDescription): 151 | if rule.action == "check_type": 152 | value = data.metadata[rule.key_name] 153 | passes_check = self.passes_type_check( 154 | value, rule.valid_data_types, rule.expected_count 155 | ) 156 | return "keep" if passes_check else "delete" 157 | if rule.action in ["keep", "replace", "delete", "modify_date"]: 158 | return rule.action 159 | else: 160 | return super().determine_redaction_operation(rule, data) 161 | return "delete" 162 | 163 | def apply(self, rule: ConcreteMetadataRule, data: SvsDescription | IFD) -> None: 164 | if isinstance(data, SvsDescription): 165 | redaction_operation = self.determine_redaction_operation(rule, data) 166 | if redaction_operation == "delete": 167 | del data.metadata[rule.key_name] 168 | elif redaction_operation == "replace": 169 | assert isinstance(rule, MetadataReplaceRule) 170 | data.metadata[rule.key_name] = rule.new_value 171 | elif redaction_operation == "modify_date": 172 | # The "Date" field in the SVS desription appears to follow the format 173 | # MM/DD/YY 174 | if rule.key_name == "Date": 175 | try: 176 | current_value = str(data.metadata[rule.key_name]) 177 | _, _, year = current_value.split("/") 178 | new_value = f"01/01/{year}" 179 | except Exception: 180 | new_value = None 181 | elif rule.key_name == "Time": 182 | new_value = "00:00:00" 183 | elif rule.key_name == "Time Zone": 184 | new_value = "GMT+0000" 185 | if not new_value: 186 | del data.metadata[rule.key_name] 187 | else: 188 | data.metadata[rule.key_name] = new_value 189 | return 190 | return super().apply(rule, data) 191 | 192 | def is_comprehensive(self) -> bool: 193 | return super().is_comprehensive() and not self.no_match_description_keys 194 | 195 | def report_missing_rules(self, report=None) -> None: 196 | if self.is_comprehensive(): 197 | logger.info("The redaction plan is comprehensive.") 198 | if report: 199 | report[self.image_path.name]["comprehensive"] = True 200 | else: 201 | if self.no_match_tags: 202 | super().report_missing_rules(report) 203 | if self.no_match_description_keys: 204 | logger.error( 205 | "The following keys were found in Aperio ImageDescription strings " 206 | "and could not be redacted given the current set of rules." 207 | ) 208 | if report: 209 | report[self.image_path.name]["comprehensive"] = False 210 | for key in self.no_match_description_keys: 211 | logger.error(f"Missing key (Aperio ImageDescription): {key}") 212 | if report is not None: 213 | report[self.image_path.name]["missing_description_keys"].append(key) 214 | 215 | def report_plan( 216 | self, 217 | ) -> RedactionPlanReport: 218 | logger.debug("Aperio (.svs) Metadata Redaction Plan\n") 219 | offset = -1 220 | ifd_count = 0 221 | report: RedactionPlanReport = {} 222 | report[self.image_path.name] = {} 223 | for tag, ifd in self._iter_tiff_tag_entries(self.tiff_info["ifds"]): 224 | if ifd["offset"] != offset: 225 | offset = ifd["offset"] 226 | ifd_count += 1 227 | logger.debug(f"IFD {ifd_count}:") 228 | if tag.value == tifftools.constants.Tag["ImageDescription"] and not self.strict: 229 | image_description = SvsDescription(str(ifd["tags"][tag.value]["data"])) 230 | for key_name, _data in image_description.metadata.items(): 231 | rule = self.description_redaction_steps[key_name] 232 | operation = self.determine_redaction_operation(rule, image_description) 233 | logger.debug(f"SVS Image Description - {key_name}: {operation}") 234 | report[self.image_path.name][key_name] = {"action": operation, "value": _data} 235 | continue 236 | if tag.value not in self.no_match_tags: 237 | rule = self.metadata_redaction_steps[tag.value] 238 | operation = self.determine_redaction_operation(rule, ifd) 239 | logger.debug(f"Tiff Tag {tag.value} - {rule.key_name}: {operation}") 240 | if ( 241 | ifd["tags"][tag.value]["datatype"] 242 | == tifftools.constants.Datatype.UNDEFINED.value 243 | ): 244 | encoded_value: dict[str, str | int] = { 245 | "value": f"0x{binascii.hexlify(ifd['tags'][tag.value]['data'] ).decode('utf-8')}", # type: ignore # noqa: E501 246 | "bytes": len(ifd["tags"][tag.value]["data"]), 247 | } 248 | report[self.image_path.name][rule.key_name] = { 249 | "action": operation, 250 | "binary": encoded_value, 251 | } 252 | else: 253 | report[self.image_path.name][rule.key_name] = { 254 | "action": operation, 255 | "value": ifd["tags"][tag.value]["data"], 256 | } 257 | self.report_missing_rules(report) 258 | logger.debug("Aperio (.svs) Associated Image Redaction Plan\n") 259 | # Report the number of associated images found in the image that match each associated 260 | # image rule. 261 | associated_image_count_by_rule = {} 262 | for _, image_rule in self.image_redaction_steps.items(): 263 | if image_rule.key_name not in associated_image_count_by_rule: 264 | associated_image_count_by_rule[image_rule.key_name] = 1 265 | else: 266 | associated_image_count_by_rule[image_rule.key_name] = ( 267 | associated_image_count_by_rule[image_rule.key_name] + 1 268 | ) 269 | for key in associated_image_count_by_rule: 270 | logger.debug( 271 | f"{associated_image_count_by_rule[key]} image(s) match rule:" 272 | f" {key} - {self.rules.associated_images[key].action}" 273 | ) 274 | 275 | return report 276 | 277 | def _redact_svs_image_description(self, ifd: IFD) -> None: 278 | image_description_tag = tifftools.constants.Tag["ImageDescription"] 279 | image_description = SvsDescription(str(ifd["tags"][image_description_tag.value]["data"])) 280 | 281 | # We may be modifying the dictionary as we iterate over its keys, 282 | # hence the need for a list 283 | for key in list(image_description.metadata.keys()): 284 | rule = self.description_redaction_steps.get(key) 285 | if rule is not None: 286 | self.apply(rule, image_description) 287 | ifd["tags"][image_description_tag.value]["data"] = str(image_description) 288 | 289 | def execute_plan(self) -> None: 290 | ifds = self.tiff_info["ifds"] 291 | new_ifds = self._redact_associated_images(ifds) 292 | image_description_tag = tifftools.constants.Tag["ImageDescription"] 293 | for tag, ifd in self._iter_tiff_tag_entries(new_ifds): 294 | rule = self.metadata_redaction_steps.get(tag.value) 295 | if rule is not None: 296 | self.apply(rule, ifd) 297 | elif tag.value == image_description_tag.value and not self.strict: 298 | self._redact_svs_image_description(ifd) 299 | self.tiff_info["ifds"] = new_ifds 300 | --------------------------------------------------------------------------------