├── .dockerignore ├── .github ├── FUNDING.yml ├── PULL_REQUEST_TEMPLATE.md ├── codespell_ignore_words.txt ├── dev-requirements.txt └── workflows │ ├── codespell.yaml │ ├── docs.yaml │ └── main.yaml ├── .gitignore ├── .pre-commit-config.yaml ├── .pylintrc ├── .travis.yml ├── AUTHORS.md ├── CHANGELOG.md ├── CONTRIBUTING.md ├── Dockerfile ├── LICENSE ├── MANIFEST.in ├── README.md ├── deid ├── __init__.py ├── config │ ├── __init__.py │ ├── standards.py │ └── utils.py ├── data │ ├── __init__.py │ ├── deid.dicom │ ├── deid.dicom.ultrasound │ └── deid.dicom.xray.chest ├── dicom │ ├── __init__.py │ ├── actions │ │ ├── __init__.py │ │ ├── jitter.py │ │ └── uids.py │ ├── config.json │ ├── fields.py │ ├── filter.py │ ├── groups.py │ ├── header.py │ ├── parser.py │ ├── pixels │ │ ├── __init__.py │ │ ├── clean.py │ │ └── detect.py │ ├── tags.py │ ├── utils.py │ └── validate.py ├── logger │ ├── __init__.py │ ├── message.py │ └── progress.py ├── main │ ├── __init__.py │ ├── identifiers.py │ └── inspect.py ├── tests │ ├── Xtest_dicom_header.py │ ├── __init__.py │ ├── common.py │ ├── resources │ │ ├── filter_multiple_first_filter_match.dicom │ │ ├── filter_multiple_rule_innerop_false.dicom │ │ ├── filter_multiple_rule_innerop_true.dicom │ │ ├── filter_multiple_second_filter_match.dicom │ │ ├── filter_multiple_two_filter_match.dicom │ │ ├── filter_multiple_zero_filter_match.dicom │ │ ├── filter_single_rule_false.dicom │ │ ├── filter_single_rule_innerop_false.dicom │ │ ├── filter_single_rule_innerop_true.dicom │ │ ├── filter_single_rule_true.dicom │ │ ├── filter_tag_number.dicom │ │ ├── keepcoordinates.dicom │ │ ├── keepcoordinates_from.dicom │ │ ├── keepcoordinates_noaction.dicom │ │ ├── remove_all.dicom │ │ ├── remove_coordinates.dicom │ │ ├── remove_coordinates_groups.dicom │ │ ├── remove_coordinates_multiple.dicom │ │ ├── remove_coordinates_multiple_filters.dicom │ │ ├── remove_coordinates_us.dicom │ │ └── remove_coordinates_us_all.dicom │ ├── test_action_interaction.py │ ├── test_blank_action.py │ ├── test_clean.py │ ├── test_clean_pixel_dimensions.py │ ├── test_cli.py │ ├── test_config.py │ ├── test_data.py │ ├── test_deid_recipe.py │ ├── test_dicom_fields.py │ ├── test_dicom_funcs.py │ ├── test_dicom_groups.py │ ├── test_dicom_pixels.py │ ├── test_dicom_tags.py │ ├── test_dicom_utils.py │ ├── test_file_meta.py │ ├── test_filter_detect.py │ ├── test_nested_dicom_fields.py │ ├── test_replace_action.py │ ├── test_replace_identifiers.py │ ├── test_sequence_blank.py │ ├── test_sequence_jitter.py │ ├── test_sequence_remove.py │ ├── test_sequence_replace.py │ ├── test_utils.py │ └── test_utils_files.py ├── utils │ ├── __init__.py │ ├── actions.py │ └── fileio.py └── version.py ├── docs ├── .gitignore ├── Gemfile ├── README.md ├── _config.yml ├── _data │ └── links.yml ├── _docs │ ├── _defaults.md │ ├── contributing │ │ ├── code.md │ │ └── docs.md │ ├── development │ │ ├── image-format.md │ │ ├── index.md │ │ └── linting-format.md │ ├── examples │ │ ├── client.md │ │ ├── deid-dataset.md │ │ ├── func-replace.md │ │ ├── func-sequence-replace.md │ │ ├── header-expanders.md │ │ ├── index.md │ │ └── recipe.md │ ├── getting-started │ │ ├── dicom-config.md │ │ ├── dicom-get.md │ │ ├── dicom-loading.md │ │ ├── dicom-pixels.md │ │ ├── dicom-put.md │ │ └── index.md │ ├── install │ │ ├── docker.md │ │ ├── index.md │ │ └── local.md │ └── user-docs │ │ ├── client.md │ │ ├── index.md │ │ ├── recipe-filters.md │ │ ├── recipe-funcs.md │ │ ├── recipe-groups.md │ │ ├── recipe-headers.md │ │ ├── recipe-labels.md │ │ └── tags.md ├── _includes │ ├── head.html │ └── navigation.html ├── _layouts │ └── default.html ├── _posts │ ├── 2018-12-09-docs.md │ └── _defaults.md ├── api_docs │ ├── .nojekyll │ ├── Makefile │ ├── _static │ │ └── theme.css │ ├── _templates │ │ ├── class.rst │ │ └── function.rst │ ├── assets │ │ ├── favicon.ico │ │ └── logo.png │ ├── conf.py │ ├── docs-requirements.txt │ ├── index.rst │ ├── requirements.txt │ └── source │ │ ├── deid.config.rst │ │ ├── deid.data.rst │ │ ├── deid.dicom.actions.rst │ │ ├── deid.dicom.pixels.rst │ │ ├── deid.dicom.rst │ │ ├── deid.logger.rst │ │ ├── deid.main.rst │ │ ├── deid.rst │ │ ├── deid.tests.rst │ │ ├── deid.utils.rst │ │ └── modules.rst ├── apidoc.sh ├── assets │ ├── css │ │ └── deid.css │ ├── fonts │ │ ├── helveticaneueout-webfont.woff │ │ └── helveticaneueout-webfont.woff2 │ ├── img │ │ ├── apple-touch-icon.png │ │ ├── emblem.svg │ │ ├── favicon.ico │ │ ├── favicon.png │ │ ├── interaction-grid.png │ │ ├── logo.png │ │ ├── menu.svg │ │ ├── open-source-halloween-2021.png │ │ ├── siteicon.png │ │ └── touch-icon.png │ └── js │ │ ├── lunr.min.js │ │ └── search.js └── pages │ ├── 404.md │ ├── changelog.html │ ├── index.md │ ├── robots.txt │ └── search.html ├── examples ├── README.md ├── deid │ ├── README.md │ ├── deid.dicom │ ├── deid.dicom-groups │ └── deid.dicom-pusheen └── dicom │ ├── README.md │ ├── dicom-extract │ ├── README.md │ └── create-dicom-csv.py │ ├── header-manipulation │ ├── README.md │ ├── deid.dicom │ ├── file-meta │ │ ├── deid.dicom │ │ └── example.py │ ├── func-replacement.py │ └── func-sequence-replace │ │ ├── MR.dcm │ │ ├── cleaned.dcm │ │ ├── deid.dicom │ │ └── example.py │ ├── pixels │ ├── run-cleaner-client.py │ └── run-inspect-pixels.py │ └── recipe │ ├── deid-dicom-example.py │ └── deid.dicom ├── pyproject.toml ├── setup.cfg └── setup.py /.dockerignore: -------------------------------------------------------------------------------- 1 | deid.egg-info 2 | .build 3 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | github: vsoch 2 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | # Description 2 | 3 | Related issues: # (issue) 4 | 5 | Please include a summary of the change(s) and if relevant, any related issues above. 6 | 7 | # Checklist 8 | 9 | - [ ] I have commented my code, particularly in hard-to-understand areas 10 | - [ ] My changes generate no new warnings 11 | - [ ] My code follows the style guidelines of this project 12 | 13 | 14 | # Open questions 15 | 16 | Questions that require more discussion or to be addressed in future development: 17 | -------------------------------------------------------------------------------- /.github/codespell_ignore_words.txt: -------------------------------------------------------------------------------- 1 | nd 2 | ot 3 | searchin 4 | som 5 | -------------------------------------------------------------------------------- /.github/dev-requirements.txt: -------------------------------------------------------------------------------- 1 | pre-commit 2 | black==23.3.0 3 | isort 4 | flake8 5 | -------------------------------------------------------------------------------- /.github/workflows/codespell.yaml: -------------------------------------------------------------------------------- 1 | name: Check for Typos 2 | 3 | on: 4 | push: 5 | branches: [master] 6 | pull_request: 7 | branches: [master] 8 | 9 | jobs: 10 | codespell: 11 | name: Check for spelling errors 12 | runs-on: ubuntu-latest 13 | 14 | steps: 15 | - uses: actions/checkout@v3 16 | - uses: crate-ci/typos@592b36d23c62cb378f6097a292bc902ee73f93ef # version 1.0.4 17 | with: 18 | files: ./deid ./docs/_docs ./docs/README.md ./docs/pages ./examples 19 | -------------------------------------------------------------------------------- /.github/workflows/docs.yaml: -------------------------------------------------------------------------------- 1 | name: Update Docs 2 | 3 | on: 4 | push: 5 | branches: [master] 6 | 7 | jobs: 8 | generate-docs: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - uses: actions/checkout@v3 12 | with: 13 | ref: "gh-pages" 14 | 15 | - name: Create conda environment 16 | run: conda create --quiet -c conda-forge --name deid 17 | 18 | - name: Install deid 19 | run: | 20 | export PATH="/usr/share/miniconda/bin:$PATH" 21 | root=$PWD 22 | source activate deid 23 | cd /tmp 24 | git clone https://github.com/pydicom/deid 25 | cd deid 26 | pip install -e . 27 | pip install deid-data 28 | cd ./docs 29 | pip install -r api_docs/docs-requirements.txt 30 | /bin/bash ./apidoc.sh 31 | cd ../ 32 | mv $root/.git ./docs/api_docs/.git 33 | rm -rf $root 34 | mv ./docs/api_docs $root 35 | cd $root 36 | ls -a 37 | git add . 38 | git status 39 | git config --global user.name "github-actions" 40 | git config --global user.email "github-actions@users.noreply.github.com" 41 | git config --global pull.rebase true 42 | 43 | if git diff-index --quiet HEAD --; then 44 | printf "No changes\n" 45 | else 46 | printf "Changes\n" 47 | git commit -a -m "Adding changed documentation files" 48 | git push origin gh-pages || echo "up to date" 49 | fi 50 | -------------------------------------------------------------------------------- /.github/workflows/main.yaml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | pull_request: 8 | branches_ignore: [] 9 | 10 | jobs: 11 | formatting: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v4 15 | 16 | - name: Setup black environment 17 | run: conda create --quiet --name black black 18 | 19 | - name: Lint and format Python code 20 | run: | 21 | export PATH="/usr/share/miniconda/bin:$PATH" 22 | source activate black 23 | pip install -r .github/dev-requirements.txt 24 | pre-commit run --all-files 25 | 26 | testing: 27 | runs-on: ubuntu-latest 28 | steps: 29 | - uses: actions/checkout@v4 30 | 31 | - name: Setup conda environment 32 | run: | 33 | conda create --quiet --name testing 34 | export PATH="/usr/share/miniconda/bin:$PATH" 35 | source activate testing 36 | pip install . 37 | 38 | - name: Test deid 39 | run: | 40 | export PATH="/usr/share/miniconda/bin:$PATH" 41 | source activate testing 42 | pip install deid-data 43 | python -m unittest discover -s deid/tests/ -p '[t|T]est*.py' 44 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | dist 3 | deid.egg-info 4 | build 5 | pypi.sh 6 | env 7 | _build 8 | 9 | private 10 | # dev tools 11 | .idea 12 | .vscode 13 | 14 | # osx 15 | # General 16 | .DS_Store 17 | .AppleDouble 18 | .LSOverride 19 | 20 | # Icon must end with two \r 21 | Icon 22 | 23 | # Thumbnails 24 | ._* 25 | 26 | # Files that might appear in the root of a volume 27 | .DocumentRevisions-V100 28 | .fseventsd 29 | .Spotlight-V100 30 | .TemporaryItems 31 | .Trashes 32 | .VolumeIcon.icns 33 | .com.apple.timemachine.donotpresent 34 | 35 | # Directories potentially created on remote AFP share 36 | .AppleDB 37 | .AppleDesktop 38 | Network Trash Folder 39 | Temporary Items 40 | .apdisk 41 | .cache 42 | 43 | # PyCharm 44 | .idea/ 45 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: v4.3.0 4 | hooks: 5 | - id: check-added-large-files 6 | - id: check-case-conflict 7 | - id: check-docstring-first 8 | - id: end-of-file-fixer 9 | - id: trailing-whitespace 10 | - id: mixed-line-ending 11 | 12 | - repo: local 13 | hooks: 14 | - id: black 15 | name: black 16 | language: python 17 | types: [python] 18 | entry: black 19 | 20 | - id: isort 21 | name: isort 22 | args: [--filter-files] 23 | language: python 24 | types: [python] 25 | entry: isort 26 | 27 | - id: flake8 28 | name: flake8 29 | language: python 30 | types: [python] 31 | entry: flake8 32 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | 3 | sudo: true 4 | 5 | os: linux 6 | 7 | language: python 8 | 9 | matrix: 10 | include: 11 | - name: "Python 3.5" 12 | python: "3.5" 13 | - name: "Python 2.7" 14 | python: "2.7" 15 | 16 | install: 17 | - pip install pydicom matplotlib deid-data 18 | - cd $TRAVIS_BUILD_DIR/ 19 | - python setup.py sdist 20 | - python setup.py install 21 | 22 | script: 23 | - python -m unittest discover -s $TRAVIS_BUILD_DIR/deid/tests/ -p '[t|T]est*.py' 24 | -------------------------------------------------------------------------------- /AUTHORS.md: -------------------------------------------------------------------------------- 1 | # Project Lead: 2 | 3 | - Vanessa Sochat 4 | 5 | # Contributors: 6 | 7 | - James Wetzel <@wetzelj> 8 | - Brian Kolowitz 9 | - Howard P. Chen 10 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | When contributing to deid, it is important to properly communicate the 4 | gist of the contribution. If it is a simple code or editorial fix, simply 5 | explaining this within the GitHub Pull Request (PR) will suffice. But if this 6 | is a larger fix or enhancement, it should be first discussed with the project 7 | developers first. 8 | 9 | Please note we have a code of conduct, described below. Please follow it in 10 | all your interactions with the project members and users. 11 | 12 | This code is licensed under the MIT [LICENSE](LICENSE). 13 | 14 | ## Pull Request Process 15 | 16 | 1. All PRs should be sent to the master branch. 17 | 2. Follow the existing code style precedent. This does not need to be strictly 18 | defined as there are many thousands of lines of examples. Note the lack 19 | of tabs anywhere in the project, parentheses and spacing, documentation 20 | style, source code layout, variable scoping, and follow the project's 21 | standards. 22 | 3. Test your PR locally, and provide the steps necessary to test for the 23 | reviewers. 24 | 4. The project's default copyright and header have been included in any new 25 | source files. 26 | 5. All (major) changes to deid must be documented in 27 | [docs](docs). If your PR changes a core functionality, please 28 | include clear description of the changes in your PR so that the docs 29 | can be updated, or better, submit another PR to update the docs directly. 30 | 6. If necessary, update the [README](README.md), and the [CHANGELOG](CHANGELOG.md). 31 | 7. The pull request will be reviewed by others, and the final merge must be 32 | done by the Singularity project lead, @vsoch (or approved by her). 33 | 34 | 35 | # Code of Conduct 36 | 37 | ## Our Pledge 38 | 39 | In the interest of fostering an open and welcoming environment, we as 40 | contributors and maintainers pledge to making participation in our project and 41 | our community a harassment-free experience for everyone, regardless of age, body 42 | size, disability, ethnicity, gender identity and expression, level of experience, 43 | nationality, personal appearance, race, religion, or sexual identity and 44 | orientation. 45 | 46 | ## Our Standards 47 | 48 | Examples of behavior that contributes to creating a positive environment 49 | include: 50 | 51 | * Using welcoming and inclusive language 52 | * Being respectful of differing viewpoints and experiences 53 | * Gracefully accepting constructive criticism 54 | * Focusing on what is best for the community 55 | * Showing empathy towards other community members 56 | 57 | Examples of unacceptable behavior by participants include: 58 | 59 | * The use of sexualized language or imagery and unwelcome sexual attention or 60 | advances 61 | * Trolling, insulting/derogatory comments, and personal or political attacks 62 | * Public or private harassment 63 | * Publishing others' private information, such as a physical or electronic 64 | address, without explicit permission 65 | * Other conduct which could reasonably be considered inappropriate in a 66 | professional setting 67 | 68 | ### Our Responsibilities 69 | 70 | Project maintainers are responsible for clarifying the standards of acceptable 71 | behavior and are expected to take appropriate and fair corrective action in 72 | response to any instances of unacceptable behavior. 73 | 74 | Project maintainers have the right and responsibility to remove, edit, or 75 | reject comments, commits, code, wiki edits, issues, and other contributions 76 | that are not aligned to this Code of Conduct, or to ban temporarily or 77 | permanently any contributor for other behaviors that they deem inappropriate, 78 | threatening, offensive, or harmful. 79 | 80 | ## Scope 81 | 82 | This Code of Conduct applies both within project spaces and in public spaces 83 | when an individual is representing the project or its community. Examples of 84 | representing a project or community include using an official project e-mail 85 | address, posting via an official social media account, or acting as an appointed 86 | representative at an online or offline event. Representation of a project may be 87 | further defined and clarified by project maintainers. 88 | 89 | ## Enforcement 90 | 91 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 92 | reported by contacting the project leader (gmkurtzer@gmail.com). All 93 | complaints will be reviewed and investigated and will result in a response 94 | that is deemed necessary and appropriate to the circumstances. The project 95 | team is obligated to maintain confidentiality with regard to the reporter of 96 | an incident. Further details of specific enforcement policies may be posted 97 | separately. 98 | 99 | Project maintainers, contributors and users who do not follow or enforce the 100 | Code of Conduct in good faith may face temporary or permanent repercussions 101 | with their involvement in the project as determined by the project's leader(s). 102 | 103 | ## Attribution 104 | 105 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, 106 | available at [http://contributor-covenant.org/version/1/4][version] 107 | 108 | [homepage]: http://contributor-covenant.org 109 | [version]: http://contributor-covenant.org/version/1/4/ 110 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM continuumio/miniconda3 2 | 3 | RUN apt-get update && apt-get install -y wget git pkg-config libfreetype6-dev g++ 4 | RUN conda install matplotlib 5 | WORKDIR /code 6 | ADD . /code 7 | RUN python /code/setup.py install 8 | 9 | RUN chmod 0755 /opt/conda/bin/deid 10 | ENTRYPOINT ["/opt/conda/bin/deid"] 11 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016-2022 Vanessa Sochat 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | SOFTWARE. 20 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.md LICENSE 2 | graft deid 3 | prune *.pyc 4 | prune deid/tests 5 | prune *OLD 6 | global-exclude __pycache__ 7 | global-exclude *.py[co] 8 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Deidentify (deid) 2 | 3 | Best effort anonymization for medical images in Python. 4 | 5 | [![DOI](https://zenodo.org/badge/94163984.svg)](https://zenodo.org/badge/latestdoi/94163984) 6 | [![Build Status](https://travis-ci.org/pydicom/deid.svg?branch=master)](https://travis-ci.org/pydicom/deid) 7 | 8 | Please see our [Documentation](https://pydicom.github.io/deid/). 9 | 10 | These are basic Python based tools for working with medical images and text, specifically for de-identification. 11 | The cleaning method used here mirrors the one by CTP in that we can identify images based on known 12 | locations. We are looking for collaborators to develop and validate an OCR cleaning method! Please reach out if you would like to help work on this. 13 | 14 | 15 | ## Installation 16 | 17 | ### Local 18 | For the stable release, install via pip: 19 | 20 | ```bash 21 | pip install deid 22 | ``` 23 | 24 | For the development version, install from Github: 25 | 26 | ```bash 27 | pip install git+git://github.com/pydicom/deid 28 | ``` 29 | 30 | ### Docker 31 | 32 | ```bash 33 | docker build -t pydicom/deid . 34 | docker run pydicom/deid --help 35 | ``` 36 | 37 | ## Issues 38 | If you have an issue, or want to request a feature, please do so on our [issues board](https://www.github.com/pydicom/deid/issues). 39 | -------------------------------------------------------------------------------- /deid/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pydicom/deid/fa4731f79d7002d51e31cbec6f2586d87afb479d/deid/__init__.py -------------------------------------------------------------------------------- /deid/config/standards.py: -------------------------------------------------------------------------------- 1 | __author__ = "Vanessa Sochat" 2 | __copyright__ = "Copyright 2016-2025, Vanessa Sochat" 3 | __license__ = "MIT" 4 | 5 | # Supported formats 6 | formats = ["dicom"] 7 | 8 | # Supported Sections 9 | sections = ["header", "labels", "filter", "values", "fields"] 10 | 11 | # Supported Header Actions 12 | actions = ("ADD", "BLANK", "JITTER", "KEEP", "REPLACE", "REMOVE", "LABEL") 13 | 14 | # Supported Group actions (SPLIT only supported for values) 15 | groups = ["values", "fields"] 16 | group_actions = ("FIELD", "SPLIT") 17 | 18 | # Valid actions for a field filter action 19 | filters = ( 20 | "contains", 21 | "notcontains", 22 | "equals", 23 | "notequals", 24 | "missing", 25 | "present", 26 | "empty", 27 | ) 28 | 29 | # valid actions for a value filter 30 | value_filters = ( 31 | "contains", 32 | "notcontains", 33 | "equals", 34 | "notequals", 35 | ) 36 | -------------------------------------------------------------------------------- /deid/data/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = "Vanessa Sochat" 2 | __copyright__ = "Copyright 2016-2025, Vanessa Sochat" 3 | __license__ = "MIT" 4 | 5 | import os 6 | 7 | data_base = os.path.abspath(os.path.dirname(__file__)) 8 | 9 | 10 | def get_dataset(dataset=None): 11 | """ 12 | Get a dataset by name. 13 | 14 | get_dataset will return some data provided by the application, 15 | based on a user-provided label. In the future, we can add https endpoints 16 | to retrieve online datasets. 17 | """ 18 | try: 19 | from deid_data import data 20 | except ImportError: 21 | raise ValueError("install deid data with `pip install deid-data`") 22 | 23 | return data.get_dataset(dataset) 24 | -------------------------------------------------------------------------------- /deid/data/deid.dicom.ultrasound: -------------------------------------------------------------------------------- 1 | FORMAT dicom 2 | 3 | %filter whitelist 4 | 5 | LABEL Marked as Clean Catch All # (Vanessa Sochat) 6 | contains BurnedInAnnotation No 7 | 8 | %filter graylist 9 | 10 | # Coordinates from fields 11 | 12 | LABEL Blank Image 13 | coordinates all 14 | 15 | LABEL Clean Ultrasound Regions 16 | present SequenceOfUltrasoundRegions 17 | keepcoordinates from:SequenceOfUltrasoundRegions 18 | 19 | %header 20 | 21 | REMOVE endswith:Time 22 | REMOVE endswith:Date 23 | REMOVE endswith:time 24 | REMOVE AdditionalPatientHistory 25 | REMOVE AccessionNumber 26 | REMOVE PatientID 27 | REMOVE startswith:IssueDate 28 | REMOVE IssuerOfPatientID 29 | REMOVE FillerOrderNumberImagingServiceRequest 30 | REMOVE OtherPatientIDs 31 | REMOVE OtherPatientNames 32 | REMOVE OtherPatientIDsSequence 33 | REMOVE PatientAddress 34 | REMOVE PatientBirthName 35 | REMOVE PatientMotherBirthName 36 | REMOVE PatientName 37 | REMOVE PatientsName 38 | REMOVE ReasonForStudy 39 | REMOVE contains:Trial 40 | REMOVE startswith:PatientTelephoneNumber 41 | REMOVE endswith:ID 42 | REMOVE endswith:IDs 43 | REMOVE ReferringPhysicianName 44 | REMOVE ConsultingPhysicianName 45 | REMOVE EvaluatorName 46 | REMOVE PerformedStationName 47 | REMOVE SecondaryReviewerName 48 | REMOVE PersonName 49 | REMOVE PersonAddress 50 | REMOVE ReferringPhysicianAddress 51 | REMOVE ReferringPhysicianTelephoneNumbers 52 | REMOVE ReferringPhysicianIdentificationSequence 53 | REMOVE ConsultingPhysicianIdentificationSequence 54 | REMOVE PhysicianApprovingInterpretation 55 | REMOVE PhysiciansOfRecord 56 | REMOVE PhysiciansOfRecordIdentificationSequence 57 | REMOVE PerformingPhysicianName 58 | REMOVE PerformingPhysicianIdentificationSequence 59 | REMOVE PhysiciansReadingStudyIdentificationSequence 60 | REMOVE RequestingPhysician 61 | REMOVE ScheduledPerformingPhysicianIdentificationSequence 62 | REMOVE RequestingPhysicianIdentificationSequence 63 | REMOVE HumanPerformerName 64 | REMOVE ContactDisplayName 65 | REMOVE PerformingPhysicianName 66 | REMOVE NameOfPhysiciansReadingStudy 67 | REMOVE OperatorsName 68 | REMOVE ScheduledPerformingPhysicianName 69 | REMOVE ReviewerName 70 | REMOVE NamesOfIntendedRecipientsOfResults 71 | REMOVE SourceApplicatorName 72 | REMOVE ClinicalTrialSponsorName 73 | REMOVE ContentCreatorName 74 | REMOVE ClinicalTrialProtocolEthicsCommitteeName 75 | REMOVE contains:UID 76 | REMOVE RegionOfResidence 77 | REMOVE CurrentPatientLocation 78 | REMOVE PatientComments 79 | REMOVE PatientTransportArrangements 80 | REMOVE PatientDeathDateInAlternativeCalendar 81 | REMOVE PatientInstitutionResidence 82 | REMOVE PerformedLocation 83 | REMOVE ScheduledStudyLocation 84 | REMOVE ScheduledStudyLocationAETitle 85 | REMOVE OrderEntererLocation 86 | REMOVE AssignedLocation 87 | 88 | ADD IssuerOfPatientID STARR 89 | ADD PatientBirthDate var:entity_timestamp 90 | ADD StudyDate var:item_timestamp 91 | ADD PatientID var:entity_id 92 | ADD AccessionNumber var:item_id 93 | ADD PatientIdentityRemoved YES 94 | REMOVE jitter 95 | -------------------------------------------------------------------------------- /deid/data/deid.dicom.xray.chest: -------------------------------------------------------------------------------- 1 | FORMAT dicom 2 | 3 | %filter whitelist 4 | 5 | LABEL Matt Lungren CHEST 6 | contains Modality CR|DX|PR 7 | || contains StudyDescription DX|PR 8 | + contains StudyDescription CHEST || contains BodyPartExamined CHEST 9 | 10 | LABEL Matt Lungren CHEST Missing 11 | contains Modality CR|DX|PR 12 | || contains StudyDescription DX|PR 13 | + contains StudyDescription CHEST || contains BodyPartExamined CHEST 14 | 15 | 16 | %filter blacklist 17 | 18 | LABEL Matt Lungren Only Accept X-ray 19 | contains Modality RF|XA 20 | 21 | LABEL Matt Lungren Filter Known notChest 22 | contains StudyDescription ANKLE|WRIST|PELVIS|EXTREMETY|KNEE|SHOULDER|CSPINE|LSPINE|FOOT|HAND|ELBOW 23 | || contains SeriesDescription ANKLE|WRIST|PELVIS|EXTREMETY|KNEE|SHOULDER|CSPINE|LSPINE|FOOT|HAND|ELBOW 24 | || contains BodyPartExamined ANKLE|WRIST|PELVIS|EXTREMETY|KNEE|SHOULDER|CSPINE|LSPINE|FOOT|HAND|ELBOW 25 | -------------------------------------------------------------------------------- /deid/dicom/__init__.py: -------------------------------------------------------------------------------- 1 | from .fields import extract_sequence 2 | from .header import get_identifiers, remove_private_identifiers, replace_identifiers 3 | from .pixels import DicomCleaner, clean_pixel_data, has_burned_pixels 4 | from .utils import get_files 5 | -------------------------------------------------------------------------------- /deid/dicom/actions/__init__.py: -------------------------------------------------------------------------------- 1 | from .jitter import jitter_timestamp, jitter_timestamp_func 2 | from .uids import basic_uuid, dicom_uuid, pydicom_uuid, suffix_uuid 3 | 4 | # Function lookup 5 | # Functions here must take an item, field, and value 6 | 7 | deid_funcs = { 8 | "jitter": jitter_timestamp_func, 9 | "dicom_uuid": dicom_uuid, 10 | "suffix_uuid": suffix_uuid, 11 | "basic_uuid": basic_uuid, 12 | "pydicom_uuid": pydicom_uuid, 13 | } 14 | -------------------------------------------------------------------------------- /deid/dicom/actions/jitter.py: -------------------------------------------------------------------------------- 1 | __author__ = "Vanessa Sochat" 2 | __copyright__ = "Copyright 2016-2025, Vanessa Sochat" 3 | __license__ = "MIT" 4 | 5 | from deid.logger import bot 6 | from deid.utils import get_timestamp, parse_keyvalue_pairs 7 | 8 | # Timestamps 9 | 10 | 11 | def jitter_timestamp_func(item, value, field, **kwargs): 12 | """ 13 | A wrapper to jitter_timestamp so it works as a custom function. 14 | """ 15 | opts = parse_keyvalue_pairs(kwargs.get("extras")) 16 | 17 | # Default to jitter by one day 18 | value = int(opts.get("days", 1)) 19 | 20 | # The user can optionally provide years 21 | if "years" in opts: 22 | value = (int(opts["years"]) * 365) + value 23 | return jitter_timestamp(field, value) 24 | 25 | 26 | def jitter_timestamp(field, value): 27 | """ 28 | Jitter a timestamp "field" by number of days specified by "value" 29 | 30 | The value can be positive or negative. This function is grandfathered 31 | into deid custom funcs, as it existed before they did. Since a custom 32 | func requires an item, we have a wrapper above to support this use case. 33 | 34 | Parameters 35 | ========== 36 | field: the field with the timestamp 37 | value: number of days to jitter by. Jitter bug! 38 | """ 39 | if not isinstance(value, int): 40 | value = int(value) 41 | 42 | original = field.element.value 43 | new_value = original 44 | 45 | if original is not None: 46 | # Create default for new value 47 | new_value = None 48 | dcmvr = field.element.VR 49 | 50 | # DICOM Value Representation can be either DA (Date) DT (Timestamp), 51 | # or something else, which is not supported. 52 | if dcmvr == "DA": 53 | # NEMA-compliant format for DICOM date is YYYYMMDD 54 | new_value = get_timestamp(original, jitter_days=value, format="%Y%m%d") 55 | 56 | elif dcmvr == "DT": 57 | # NEMA-compliant format for DICOM timestamp is 58 | # YYYYMMDDHHMMSS.FFFFFF&ZZXX 59 | try: 60 | new_value = get_timestamp( 61 | original, jitter_days=value, format="%Y%m%d%H%M%S.%f%z" 62 | ) 63 | except Exception: 64 | new_value = get_timestamp( 65 | original, jitter_days=value, format="%Y%m%d%H%M%S.%f" 66 | ) 67 | 68 | else: 69 | # If the field type is not supplied, attempt to parse different formats 70 | for fmtstr in ["%Y%m%d", "%Y%m%d%H%M%S.%f%z", "%Y%m%d%H%M%S.%f"]: 71 | try: 72 | new_value = get_timestamp( 73 | original, jitter_days=value, format=fmtstr 74 | ) 75 | break 76 | except Exception: 77 | pass 78 | 79 | # If nothing works, do nothing and issue a warning. 80 | if not new_value: 81 | bot.warning("JITTER not supported for %s with VR=%s" % (field, dcmvr)) 82 | 83 | return new_value 84 | -------------------------------------------------------------------------------- /deid/dicom/actions/uids.py: -------------------------------------------------------------------------------- 1 | __author__ = "Vanessa Sochat" 2 | __copyright__ = "Copyright 2016-2025, Vanessa Sochat" 3 | __license__ = "MIT" 4 | 5 | import uuid 6 | 7 | from pydicom.uid import generate_uid as pydicom_generate_uid 8 | 9 | from deid.logger import bot 10 | from deid.utils import parse_keyvalue_pairs 11 | 12 | 13 | def basic_uuid(item, value, field, **kwargs): 14 | """A basic function to replace a field with a uuid.uuid4() string""" 15 | return str(uuid.uuid4()) 16 | 17 | 18 | def pydicom_uuid(item, value, field, **kwargs): 19 | """ 20 | Use pydicom to generate the UID. Optional kwargs include: 21 | 22 | prefix (str): provide a custom prefix 23 | stable_remapping (bool): if true, use the orignal value for entropy. 24 | This ensures stability across different runs that use the same UID. 25 | 26 | The prefix must match '^(0|[1-9][0-9]*)(\\.(0|[1-9][0-9]*))*\\.$' 27 | """ 28 | opts = parse_keyvalue_pairs(kwargs.get("extras")) 29 | 30 | # We always provide a prefix so the stable remapping is done 31 | prefix = opts.get("prefix", "2.25.") 32 | stable_remapping = opts.get("stable_remapping", True) 33 | entropy_srcs = [] 34 | 35 | # They would need to unset the default prefix 36 | if stable_remapping is True and not prefix: 37 | bot.warning("A prefix must be provided to use stable remapping.") 38 | 39 | if stable_remapping is True: 40 | original = str(field.element.value) 41 | entropy_srcs.append(original) 42 | return pydicom_generate_uid(prefix=prefix, entropy_srcs=entropy_srcs) 43 | 44 | 45 | def suffix_uuid(item, value, field, **kwargs): 46 | """Return the same field, with a uuid suffix. 47 | 48 | Provided in docs: https://pydicom.github.io/deid/examples/func-replace/ 49 | """ 50 | # a field can either be just the name string, or a DicomElement 51 | if hasattr(field, "name"): 52 | field = field.name 53 | prefix = field.lower().replace(" ", " ") 54 | return prefix + "-" + str(uuid.uuid4()) 55 | 56 | 57 | def dicom_uuid(item, value, field, dicom, **kwargs): 58 | """ 59 | Generate a dicom uid that better conforms to the dicom standard. 60 | """ 61 | # a field can either be just the name string, or a DicomElement 62 | if hasattr(field, "name"): 63 | field = field.name 64 | 65 | opts = parse_keyvalue_pairs(kwargs.get("extras")) 66 | org_root = opts.get("org_root", "anonymous-organization") 67 | 68 | bigint_uid = str(uuid.uuid4().int) 69 | full_uid = org_root + "." + bigint_uid 70 | 71 | # A DICOM UID is limited to 64 characters 72 | return full_uid[0:64] 73 | -------------------------------------------------------------------------------- /deid/dicom/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "get": { 3 | 4 | "skip": ["PixelData", 5 | "RedPaletteColorLookupTableData", 6 | "GreenPaletteColorLookupTableData", 7 | "BluePaletteColorLookupTableData", 8 | "VOILUTSequence", 9 | "FileMetaInformationGroupLength", 10 | "FileMetaInformationVersion", 11 | "TransferSyntaxUID", 12 | "ImplementationClassUID"], 13 | 14 | "ids":{ 15 | "entity":"PatientID", 16 | "item":"AccessionNumber" 17 | } 18 | 19 | }, 20 | 21 | "put":{ 22 | 23 | "actions":[ 24 | {"action":"KEEP","field":"PixelData"} 25 | ] 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /deid/dicom/groups.py: -------------------------------------------------------------------------------- 1 | __author__ = "Vanessa Sochat" 2 | __copyright__ = "Copyright 2016-2025, Vanessa Sochat" 3 | __license__ = "MIT" 4 | 5 | 6 | from pydicom.multival import MultiValue 7 | 8 | from deid.logger import bot 9 | 10 | from .fields import expand_field_expression, get_fields 11 | 12 | 13 | def extract_values_list(dicom, actions, fields=None): 14 | """Given a list of actions for a named group (a list) extract values from 15 | the dicom based on the list of actions provided. This function 16 | always returns a list intended to update some lookup to be used 17 | to further process the dicom. 18 | """ 19 | values = set() 20 | 21 | # The function can be provided fields to save re-parsing 22 | if not fields: 23 | fields = get_fields(dicom) 24 | 25 | for action in actions: 26 | # Extract some subset of fields based on action 27 | subset = expand_field_expression( 28 | field=action["field"], dicom=dicom, contenders=fields 29 | ) 30 | 31 | # Just grab the entire value string for a field, no parsing 32 | if action["action"] == "FIELD": 33 | for uid, field in subset.items(): 34 | if field.element.value not in ["", None]: 35 | if isinstance(field.element.value, MultiValue): 36 | values.update(field.element.value) 37 | else: 38 | values.add(field.element.value) 39 | 40 | # Split action, can optionally have a "by" and/or minlength parameter 41 | elif action["action"] == "SPLIT": 42 | # Default values for split are length 1 and character empty space 43 | bot.debug("Parsing action %s" % action) 44 | split_by = " " 45 | minlength = 1 46 | 47 | if "value" in action: 48 | for param in action["value"].split(";"): 49 | param_name, param_val = param.split("=") 50 | param_name = param_name.strip() 51 | param_val = param_val.strip() 52 | 53 | # Set a custom parameter length 54 | if param_name == "minlength": 55 | minlength = int(param_val) 56 | bot.debug("Minimum length set to %s" % minlength) 57 | elif param_name == "by": 58 | split_by = param_val.strip("'").strip('"') 59 | bot.debug("Splitting value set to %s" % split_by) 60 | 61 | for uid, field in subset.items(): 62 | new_values = (str(field.element.value) or "").split(split_by) 63 | for new_value in new_values: 64 | if len(new_value) >= minlength: 65 | values.add(new_value) 66 | 67 | else: 68 | bot.warning( 69 | "Unrecognized action %s for values list extraction." % action["action"] 70 | ) 71 | 72 | return list(values) 73 | 74 | 75 | def extract_fields_list(dicom, actions, fields=None): 76 | """Given a list of actions for a named group (a list) extract values from 77 | the dicom based on the list of actions provided. This function 78 | always returns a list intended to update some lookup to be used 79 | to further process the dicom. 80 | """ 81 | subset = {} 82 | 83 | if not fields: 84 | fields = get_fields(dicom) 85 | 86 | for action in actions: 87 | if action["action"] == "FIELD": 88 | subset.update( 89 | expand_field_expression( 90 | field=action["field"], dicom=dicom, contenders=fields 91 | ) 92 | ) 93 | 94 | else: 95 | bot.warning( 96 | "Unrecognized action %s for fields list extraction." % action["action"] 97 | ) 98 | return subset 99 | -------------------------------------------------------------------------------- /deid/dicom/header.py: -------------------------------------------------------------------------------- 1 | __author__ = "Vanessa Sochat" 2 | __copyright__ = "Copyright 2016-2025, Vanessa Sochat" 3 | __license__ = "MIT" 4 | 5 | 6 | import os 7 | 8 | import deid.dicom.utils as utils 9 | from deid.dicom.parser import DicomParser 10 | from deid.dicom.utils import save_dicom 11 | from deid.logger import bot 12 | 13 | here = os.path.dirname(os.path.abspath(__file__)) 14 | 15 | 16 | def get_identifiers( 17 | dicom_files, 18 | force=True, 19 | config=None, 20 | strip_sequences=False, 21 | remove_private=False, 22 | disable_skip=False, 23 | expand_sequences=True, 24 | ): 25 | """ 26 | Extract all identifiers from a dicom image. 27 | 28 | This function returns a lookup by file name, where each value indexed 29 | includes a dictionary of nested fields (indexed by nested tag). 30 | 31 | Parameters 32 | ========== 33 | dicom_files: the dicom file(s) to extract from 34 | force: force reading the file (default True) 35 | config: if None, uses default in provided module folder 36 | strip_sequences: if True, remove all sequences 37 | remove_private: remove private tags 38 | disable_skip: do not skip over protected fields 39 | expand_sequences: if True, expand sequences. otherwise, skips 40 | """ 41 | if not isinstance(dicom_files, list): 42 | dicom_files = [dicom_files] 43 | 44 | bot.debug("Extracting identifiers for %s dicom" % len(dicom_files)) 45 | lookup = dict() 46 | 47 | # Parse each dicom file 48 | for dicom_file in dicom_files: 49 | parser = DicomParser(dicom_file, force=force, config=config, disable_skip=False) 50 | lookup[parser.dicom_file] = parser.get_fields(expand_sequences=expand_sequences) 51 | 52 | return lookup 53 | 54 | 55 | def remove_private_identifiers( 56 | dicom_files, save=True, overwrite=False, output_folder=None, force=True 57 | ): 58 | """ 59 | Remove private identifiers. 60 | 61 | remove_private_identifiers is a wrapper for the 62 | simple call to dicom.remove_private_tags, it simply 63 | reads in the files for the user and saves accordingly 64 | """ 65 | updated_files = [] 66 | if not isinstance(dicom_files, list): 67 | dicom_files = [dicom_files] 68 | 69 | for dicom_file in dicom_files: 70 | dicom = utils.dcmread(dicom_file, force=force) 71 | dicom.remove_private_tags() 72 | dicom_name = os.path.basename(dicom_file) 73 | bot.debug("Removed private identifiers for %s" % dicom_name) 74 | 75 | if save: 76 | dicom = save_dicom( 77 | dicom=dicom, 78 | dicom_file=dicom_file, 79 | output_folder=output_folder, 80 | overwrite=overwrite, 81 | ) 82 | 83 | updated_files.append(dicom) 84 | return updated_files 85 | 86 | 87 | def replace_identifiers( 88 | dicom_files, 89 | ids=None, 90 | deid=None, 91 | save=False, 92 | overwrite=False, 93 | output_folder=None, 94 | force=True, 95 | config=None, 96 | strip_sequences=False, 97 | remove_private=False, 98 | disable_skip=False, 99 | ): 100 | """ 101 | Replace identifiers. 102 | 103 | replace identifiers using pydicom, can be slow when writing 104 | and saving new files. If you want to replace sequences, they need 105 | to be extracted with get_identifiers and expand_sequences to True. 106 | """ 107 | if not isinstance(dicom_files, list): 108 | dicom_files = [dicom_files] 109 | 110 | # Warn the user that we use the default deid recipe 111 | if not deid: 112 | bot.warning("No deid specification provided, will use defaults.") 113 | 114 | # ids (a lookup) is not required 115 | ids = ids or {} 116 | 117 | # Parse through dicom files, update headers, and save 118 | updated_files = [] 119 | for dicom_file in dicom_files: 120 | parser = DicomParser( 121 | dicom_file, 122 | force=force, 123 | config=config, 124 | recipe=deid, 125 | disable_skip=disable_skip, 126 | ) 127 | 128 | # If a custom lookup was provided, update the parser 129 | if parser.dicom_file in ids: 130 | parser.lookup.update(ids[parser.dicom_file]) 131 | 132 | parser.parse(strip_sequences=strip_sequences, remove_private=remove_private) 133 | 134 | # Save to file, otherwise return updated objects 135 | if save is True: 136 | ds = save_dicom( 137 | dicom=parser.dicom, 138 | dicom_file=parser.dicom_file, 139 | output_folder=output_folder, 140 | overwrite=overwrite, 141 | ) 142 | updated_files.append(ds) 143 | else: 144 | updated_files.append(parser.dicom) 145 | 146 | return updated_files 147 | -------------------------------------------------------------------------------- /deid/dicom/pixels/__init__.py: -------------------------------------------------------------------------------- 1 | from .clean import DicomCleaner, clean_pixel_data 2 | from .detect import has_burned_pixels 3 | -------------------------------------------------------------------------------- /deid/dicom/utils.py: -------------------------------------------------------------------------------- 1 | __author__ = "Vanessa Sochat" 2 | __copyright__ = "Copyright 2016-2025, Vanessa Sochat" 3 | __license__ = "MIT" 4 | 5 | import os 6 | import tempfile 7 | import zipfile 8 | 9 | import pydicom 10 | from pydicom import FileDataset 11 | 12 | from deid.logger import bot 13 | from deid.utils import recursive_find 14 | 15 | from .validate import validate_dicoms 16 | 17 | ################################################################################ 18 | # Functions for Dicom files 19 | ################################################################################ 20 | 21 | 22 | def get_files(contenders, check=True, pattern=None, force=False, tempdir=None): 23 | """ 24 | Get a generator for files. 25 | 26 | get_files will take a list of single dicom files or directories, 27 | and return a generator that yields complete paths to all files 28 | 29 | Parameters 30 | ========== 31 | contenders: a list of files or directories (contenders!) 32 | check: boolean to indicate if we should validate dicoms (default True) 33 | pattern: A pattern to use with fnmatch. If None, * is used 34 | force: force reading of the files, if some headers invalid. 35 | Not recommended, as many non-dicom will come through 36 | 37 | """ 38 | if not isinstance(contenders, list): 39 | contenders = [contenders] 40 | 41 | for contender in contenders: 42 | if os.path.isdir(contender): 43 | dicom_files = recursive_find(contender, pattern=pattern) 44 | else: 45 | dicom_files = [contender] 46 | 47 | for dicom_file in dicom_files: 48 | dfile, dextension = os.path.splitext(dicom_file) 49 | # The code currently only assumes a single-file per zip. This could be 50 | # expanded to allow for multiple test files within an archive. 51 | if dextension == ".zip": 52 | with zipfile.ZipFile(dicom_file, "r") as compressedFile: 53 | compressedFile.extractall(tempdir) 54 | try: 55 | dicom_file = next( 56 | os.path.join(tempdir, f) 57 | for f in os.listdir(tempdir) 58 | if os.path.isfile(os.path.join(tempdir, f)) 59 | ) 60 | except StopIteration: 61 | continue # ZIP file does not contain any file 62 | 63 | if check: 64 | validated_files = validate_dicoms(dicom_file, force=force) 65 | else: 66 | validated_files = [dicom_file] 67 | 68 | for validated_file in validated_files: 69 | bot.debug("Found contender file %s" % (validated_file)) 70 | yield validated_file 71 | 72 | 73 | def save_dicom(dicom, dicom_file, output_folder=None, overwrite=False): 74 | """ 75 | Save a dicom file to an output folder. 76 | 77 | We make sure to not overwrite unless the user has enforced it 78 | 79 | Parameters 80 | ========== 81 | dicom: the pydicon Dataset to save 82 | dicom_file: the path to the dicom file to save (we only use basename) 83 | output_folder: the folder to save the file to 84 | overwrite: overwrite any existing file? (default is False) 85 | 86 | """ 87 | 88 | if output_folder is None: 89 | if overwrite is False: 90 | output_folder = tempfile.mkdtemp() 91 | else: 92 | output_folder = os.path.dirname(dicom_file) 93 | 94 | dicom_name = os.path.basename(dicom_file) 95 | output_dicom = os.path.join(output_folder, dicom_name) 96 | dowrite = True 97 | if overwrite is False: 98 | if os.path.exists(output_dicom): 99 | bot.error( 100 | "%s already exists, overwrite set to False. Not writing." % dicom_name 101 | ) 102 | dowrite = False 103 | 104 | if dowrite: 105 | dicom.save_as(output_dicom) 106 | return output_dicom 107 | 108 | 109 | def load_dicom(dcm_file, force=True): 110 | if isinstance(dcm_file, FileDataset): 111 | return dcm_file 112 | else: 113 | return pydicom.dcmread(dcm_file, force=force) 114 | 115 | 116 | def dcmread(filename, **kwargs): 117 | return pydicom.dcmread(filename, **kwargs) 118 | -------------------------------------------------------------------------------- /deid/dicom/validate.py: -------------------------------------------------------------------------------- 1 | __author__ = "Vanessa Sochat" 2 | __copyright__ = "Copyright 2016-2025, Vanessa Sochat" 3 | __license__ = "MIT" 4 | 5 | import deid.dicom.utils as utils 6 | from deid.logger import bot 7 | 8 | 9 | def validate_dicoms(dcm_files, force=False): 10 | """ 11 | Validate that dicom files can open and return valid set. 12 | 13 | validate dicoms will test opening one or more dicom files, 14 | and return a list of valid files. 15 | 16 | Parameters 17 | ========== 18 | dcm_files: one or more dicom files to test 19 | 20 | """ 21 | if not isinstance(dcm_files, list): 22 | dcm_files = [dcm_files] 23 | 24 | valids = [] 25 | 26 | bot.debug("Checking %s dicom files for validation." % (len(dcm_files))) 27 | for dcm_file in dcm_files: 28 | try: 29 | with open(dcm_file, "rb") as filey: 30 | utils.dcmread(filey, force=force) 31 | valids.append(dcm_file) 32 | except Exception: 33 | bot.warning("Cannot read input file {0!s}, skipping.".format(dcm_file)) 34 | 35 | bot.debug("Found %s valid dicom files" % (len(valids))) 36 | return valids 37 | -------------------------------------------------------------------------------- /deid/logger/__init__.py: -------------------------------------------------------------------------------- 1 | from .message import bot 2 | from .progress import ProgressBar 3 | -------------------------------------------------------------------------------- /deid/logger/progress.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | clint.textui.progress 5 | ~~~~~~~~~~~~~~~~~~~~~ 6 | 7 | A derivation of clint version, to not introduce a dependency and add custom functionality. 8 | Credit to base code goes to https://github.com/kennethreitz/clint/blob/master/clint/textui/progress.py 9 | 10 | """ 11 | 12 | from __future__ import absolute_import 13 | 14 | import sys 15 | import time 16 | 17 | STREAM = sys.stderr 18 | 19 | BAR_TEMPLATE = "%s[%s%s] %i/%i MB - %s\r" 20 | BAR_FILLED_CHAR = "-" 21 | BAR_EMPTY_CHAR = " " 22 | 23 | # How long to wait before recalculating the ETA 24 | ETA_INTERVAL = 1 25 | # How many intervals (excluding the current one) to calculate the simple moving 26 | # average 27 | ETA_SMA_WINDOW = 9 28 | 29 | 30 | class ProgressBar(object): 31 | def __enter__(self): 32 | return self 33 | 34 | def __exit__(self, exc_type, exc_val, exc_tb): 35 | self.done() 36 | return False # we're not suppressing exceptions 37 | 38 | def __init__( 39 | self, 40 | label="", 41 | width=32, 42 | hide=None, 43 | empty_char=BAR_EMPTY_CHAR, 44 | filled_char=BAR_FILLED_CHAR, 45 | expected_size=None, 46 | every=1, 47 | ): 48 | self.label = label 49 | self.width = width 50 | self.hide = hide 51 | # Only show bar in terminals by default (better for piping, logging etc.) 52 | if hide is None: 53 | try: 54 | self.hide = not STREAM.isatty() 55 | except AttributeError: # output does not support isatty() 56 | self.hide = True 57 | self.empty_char = empty_char 58 | self.filled_char = filled_char 59 | self.expected_size = expected_size 60 | self.every = every 61 | self.start = time.time() 62 | self.ittimes = [] 63 | self.eta = 0 64 | self.etadelta = time.time() 65 | self.etadisp = self.format_time(self.eta) 66 | self.last_progress = 0 67 | if self.expected_size: 68 | self.show(0) 69 | 70 | def show(self, progress, count=None): 71 | if count is not None: 72 | self.expected_size = count 73 | if self.expected_size is None: 74 | raise Exception("expected_size not initialized") 75 | self.last_progress = progress 76 | if (time.time() - self.etadelta) > ETA_INTERVAL: 77 | self.etadelta = time.time() 78 | self.ittimes = self.ittimes[-ETA_SMA_WINDOW:] + [ 79 | -(self.start - time.time()) / (progress + 1) 80 | ] 81 | self.eta = ( 82 | sum(self.ittimes) 83 | / float(len(self.ittimes)) 84 | * (self.expected_size - progress) 85 | ) 86 | self.etadisp = self.format_time(self.eta) 87 | x = int(self.width * progress / self.expected_size) 88 | if not self.hide: 89 | if (progress % self.every) == 0 or ( # True every "every" updates 90 | progress == self.expected_size 91 | ): # And when we're done 92 | STREAM.write( 93 | BAR_TEMPLATE 94 | % ( 95 | self.label, 96 | self.filled_char * x, 97 | self.empty_char * (self.width - x), 98 | progress, 99 | self.expected_size, 100 | self.etadisp, 101 | ) 102 | ) 103 | STREAM.flush() 104 | 105 | def done(self): 106 | self.elapsed = time.time() - self.start 107 | elapsed_disp = self.format_time(self.elapsed) 108 | if not self.hide: 109 | # Print completed bar with elapsed time 110 | STREAM.write( 111 | BAR_TEMPLATE 112 | % ( 113 | self.label, 114 | self.filled_char * self.width, 115 | self.empty_char * 0, 116 | self.last_progress, 117 | self.expected_size, 118 | elapsed_disp, 119 | ) 120 | ) 121 | STREAM.write("\n") 122 | STREAM.flush() 123 | 124 | def format_time(self, seconds): 125 | return time.strftime("%H:%M:%S", time.gmtime(seconds)) 126 | 127 | 128 | def bar( 129 | it, 130 | label="", 131 | width=32, 132 | hide=None, 133 | empty_char=BAR_EMPTY_CHAR, 134 | filled_char=BAR_FILLED_CHAR, 135 | expected_size=None, 136 | every=1, 137 | ): 138 | """Progress iterator. Wrap your iterables with it.""" 139 | 140 | count = len(it) if expected_size is None else expected_size 141 | 142 | with ProgressBar( 143 | label=label, 144 | width=width, 145 | hide=hide, 146 | empty_char=BAR_EMPTY_CHAR, 147 | filled_char=BAR_FILLED_CHAR, 148 | expected_size=count, 149 | every=every, 150 | ) as pbar: 151 | for i, item in enumerate(it): 152 | yield item 153 | pbar.show(i + 1) 154 | -------------------------------------------------------------------------------- /deid/main/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | __author__ = "Vanessa Sochat" 4 | __copyright__ = "Copyright 2016-2025, Vanessa Sochat" 5 | __license__ = "MIT" 6 | 7 | import argparse 8 | import os 9 | import sys 10 | import tempfile 11 | from glob import glob 12 | 13 | from deid.version import __version__ 14 | 15 | 16 | def get_parser(): 17 | parser = argparse.ArgumentParser( 18 | description="Deid (de-identification, anonymization) command line tool." 19 | ) 20 | 21 | # Global Variables 22 | parser.add_argument( 23 | "--quiet", 24 | "-q", 25 | dest="quiet", 26 | help="Quiet the verbose output", 27 | default=False, 28 | action="store_true", 29 | ) 30 | 31 | parser.add_argument( 32 | "--debug", 33 | dest="debug", 34 | help="use verbose logging to debug.", 35 | default=False, 36 | action="store_true", 37 | ) 38 | 39 | parser.add_argument( 40 | "--version", 41 | dest="version", 42 | help="print version and exit.", 43 | default=False, 44 | action="store_true", 45 | ) 46 | 47 | parser.add_argument( 48 | "--outfolder", 49 | "-o", 50 | dest="outfolder", 51 | help="full path to save output, will use temporary folder if not specified", 52 | type=str, 53 | default=None, 54 | ) 55 | 56 | parser.add_argument( 57 | "--format", 58 | "-f", 59 | dest="format", 60 | help="format of images, default is dicom", 61 | default="dicom", 62 | choices=["dicom"], 63 | ) 64 | 65 | parser.add_argument( 66 | "--overwrite", 67 | dest="overwrite", 68 | help="overwrite pre-existing files in output directory, if they exist.", 69 | default=False, 70 | action="store_true", 71 | ) 72 | 73 | subparsers = parser.add_subparsers( 74 | help="action for deid to perform", 75 | title="actions", 76 | description="actions for deid to perform", 77 | dest="command", 78 | ) 79 | 80 | subparsers.add_parser( 81 | "version", help="print version and exit" # pylint: disable=unused-variable 82 | ) 83 | 84 | # Checks (checks / tests for various services) 85 | inspect = subparsers.add_parser( 86 | "inspect", help="various checks for PHI and quality" 87 | ) 88 | 89 | inspect.add_argument( 90 | nargs="+", 91 | dest="folder", 92 | help="input folder or single image. If not provided, test data will be used.", 93 | type=str, 94 | default=None, 95 | ) 96 | 97 | inspect.add_argument( 98 | "--deid", 99 | dest="deid", 100 | help="deid file with preferences, if not specified, default used.", 101 | type=str, 102 | default=None, 103 | ) 104 | 105 | inspect.add_argument( 106 | "--pattern", 107 | dest="pattern", 108 | help="A pattern to match files in input folder.", 109 | type=str, 110 | default=None, 111 | ) 112 | 113 | inspect.add_argument( 114 | "--save", 115 | "-s", 116 | dest="save", 117 | help="save result to output tab separated file.", 118 | default=False, 119 | action="store_true", 120 | ) 121 | 122 | ids = subparsers.add_parser( 123 | "identifiers", help="extract and replace identifiers from headers" 124 | ) 125 | 126 | ids.add_argument( 127 | "--deid", 128 | dest="deid", 129 | help="deid file with preferences, if not specified, default used.", 130 | type=str, 131 | default=None, 132 | ) 133 | 134 | # A path to an ids file, required if user wants to put (without get) 135 | ids.add_argument( 136 | "--ids", 137 | dest="ids", 138 | help="Path to a json file with identifiers, required for PUT if you don't do get (via all)", 139 | type=str, 140 | default=None, 141 | ) 142 | 143 | ids.add_argument( 144 | "--input", 145 | dest="input", 146 | help="Input folder or single image to perform action on.", 147 | type=str, 148 | default=None, 149 | ) 150 | 151 | # Action 152 | ids.add_argument( 153 | "--action", 154 | "-a", 155 | dest="action", 156 | help="specify to get, put (replace), or all. Default will get identifiers.", 157 | default=None, 158 | choices=["get", "put", "all", "inspect"], 159 | required=True, 160 | ) 161 | 162 | return parser 163 | 164 | 165 | def main(): 166 | parser = get_parser() 167 | try: 168 | args = parser.parse_args() 169 | except Exception: 170 | sys.exit(0) 171 | 172 | if args.command == "version" or args.version: 173 | print(__version__) 174 | sys.exit(0) 175 | 176 | # if environment logging variable not set, make silent 177 | os.environ["MESSAGELEVEL"] = "INFO" 178 | if args.quiet is True: 179 | os.environ["MESSAGELEVEL"] = "QUIET" 180 | 181 | # Initialize the message bot, with level above 182 | from deid.logger import bot # pylint: disable=unused-import 183 | 184 | if args.command == "identifiers": 185 | from .identifiers import main 186 | elif args.command == "inspect": 187 | from .inspect import main 188 | else: 189 | parser.print_help() 190 | sys.exit(1) 191 | 192 | # Run main for selection 193 | return main(args, parser) 194 | 195 | 196 | if __name__ == "__main__": 197 | main() 198 | -------------------------------------------------------------------------------- /deid/main/identifiers.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | __author__ = "Vanessa Sochat" 4 | __copyright__ = "Copyright 2016-2025, Vanessa Sochat" 5 | __license__ = "MIT" 6 | 7 | 8 | import os 9 | import tempfile 10 | 11 | from deid.config import load_deid 12 | from deid.data import get_dataset 13 | from deid.dicom import get_files 14 | from deid.dicom.header import get_identifiers, replace_identifiers 15 | from deid.logger import bot 16 | 17 | 18 | def main(args, parser): 19 | # Global output folder 20 | output_folder = args.outfolder 21 | if output_folder is None: 22 | output_folder = tempfile.mkdtemp() 23 | 24 | # If a deid is given, check against format 25 | if args.deid is not None: 26 | params = load_deid(args.deid) 27 | if params["format"] != args.format: 28 | bot.error( 29 | "Format in deid (%s) doesn't match choice here (%s) exiting." 30 | % (params["format"], args.format) 31 | ) 32 | # Get list of dicom files 33 | base = args.input 34 | if base is None: 35 | bot.info("No input folder specified, will use demo dicom-cookies.") 36 | base = get_dataset("dicom-cookies") 37 | basename = os.path.basename(base) 38 | dicom_files = list(get_files(base)) # todo : consider using generator functionality 39 | 40 | do_get = False 41 | do_put = False 42 | ids = None 43 | if args.action == "all": 44 | bot.info("GET and PUT identifiers from %s" % (basename)) 45 | do_get = True 46 | do_put = True 47 | 48 | elif args.action == "get": 49 | do_get = True 50 | bot.info("GET and PUT identifiers from %s" % (basename)) 51 | 52 | elif args.action == "put": 53 | bot.info("PUT identifiers from %s" % (basename)) 54 | do_put = True 55 | if args.ids is None: 56 | bot.exit("To PUT without GET you must provide a json file with ids.") 57 | 58 | ids = args.ids 59 | 60 | # GET identifiers 61 | 62 | if do_get is True: 63 | ids = get_identifiers(dicom_files) 64 | 65 | if do_put is True: 66 | cleaned_files = replace_identifiers( 67 | dicom_files=dicom_files, 68 | ids=ids, 69 | deid=args.deid, 70 | overwrite=args.overwrite, 71 | output_folder=output_folder, 72 | save=True, 73 | ) 74 | 75 | bot.info("%s %s files at %s" % (len(cleaned_files), args.format, output_folder)) 76 | -------------------------------------------------------------------------------- /deid/main/inspect.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | __author__ = "Vanessa Sochat" 4 | __copyright__ = "Copyright 2016-2025, Vanessa Sochat" 5 | __license__ = "MIT" 6 | 7 | import datetime 8 | import os 9 | 10 | from deid.config import load_deid 11 | from deid.data import get_dataset 12 | from deid.dicom import get_files, has_burned_pixels 13 | from deid.logger import bot 14 | 15 | 16 | def main(args, parser): 17 | """ 18 | Inspect the header fields of dicom files. 19 | 20 | inspect currently serves to inspect the header fields of a set 21 | of dicom files against a standard, and flag images that don't 22 | pass the different levels of criteria 23 | """ 24 | 25 | # If a deid is given, check against format 26 | deid = args.deid 27 | if deid is not None: 28 | params = load_deid(deid) 29 | if params["format"] != args.format: 30 | bot.error( 31 | "Format in deid (%s) doesn't match choice here (%s) exiting." 32 | % (params["format"], args.format) 33 | ) 34 | # Get list of dicom files 35 | base = args.folder 36 | if base is None: 37 | bot.info("No input folder specified, will use demo dicom-cookies.") 38 | base = get_dataset("dicom-cookies") 39 | 40 | dicom_files = list( 41 | get_files(base, pattern=args.pattern) 42 | ) # todo : consider using generator functionality 43 | result = has_burned_pixels(dicom_files, deid=deid) 44 | 45 | print("\nSUMMARY ================================\n") 46 | if result["clean"]: 47 | bot.custom( 48 | prefix="CLEAN", message="%s files" % len(result["clean"]), color="CYAN" 49 | ) 50 | 51 | if result["flagged"]: 52 | for group, files in result["flagged"].items(): 53 | bot.flag("%s %s files" % (group, len(files))) 54 | 55 | if args.save: 56 | folders = "-".join([os.path.basename(folder) for folder in base]) 57 | outfile = "pixel-flag-results-%s-%s.tsv" % ( 58 | folders, 59 | datetime.datetime.now().strftime("%y-%m-%d"), 60 | ) 61 | 62 | with open(outfile, "w") as filey: 63 | filey.writelines("dicom_file\tpixels_flagged\tflag_list\treason\n") 64 | 65 | for clean in result["clean"]: 66 | filey.writelines("%s\tCLEAN\t\t\n" % clean) 67 | 68 | for flagged, details in result["flagged"].items(): 69 | if details["flagged"] is True: 70 | for result in details["results"]: 71 | group = result["group"] 72 | reason = result["reason"] 73 | filey.writelines( 74 | "%s\tFLAGGED\t%s\t%s\n" % (flagged, group, reason) 75 | ) 76 | 77 | print("Result written to %s" % outfile) 78 | -------------------------------------------------------------------------------- /deid/tests/Xtest_dicom_header.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import os 4 | import shutil 5 | import tempfile 6 | import unittest 7 | 8 | from deid.data import get_dataset 9 | from deid.dicom import utils 10 | from deid.utils import get_installdir 11 | 12 | 13 | class TestDicomHeader(unittest.TestCase): 14 | def setUp(self): 15 | self.pwd = get_installdir() 16 | self.deid = os.path.abspath("%s/../examples/deid/deid.dicom" % self.pwd) 17 | self.dataset = get_dataset("dicom-cookies") 18 | self.tmpdir = tempfile.mkdtemp() 19 | print("\n######################START######################") 20 | 21 | def tearDown(self): 22 | shutil.rmtree(self.tmpdir) 23 | print("\n######################END########################") 24 | 25 | def test_get_fields(self): 26 | print("Case 1: Test get fields from dataset") 27 | from deid.dicom.header import get_fields 28 | 29 | dicom = get_dicom(self.dataset) 30 | fields = get_fields(dicom) 31 | self.assertEqual(len(fields), 28) 32 | self.assertTrue("PatientID" in fields) 33 | 34 | def test_get_identifiers(self): 35 | print("Testing deid.dicom get_identifiers") 36 | from deid.dicom import get_identifiers 37 | 38 | dicom_files = get_dicom(self.dataset, return_dir=True) 39 | ids = get_identifiers(dicom_files) 40 | self.assertTrue(len(ids) == 1) 41 | self.assertTrue(isinstance(ids, dict)) 42 | self.assertEqual(len(ids["cookie-47"]), 7) 43 | 44 | def test_replace_identifiers(self): 45 | print("Testing deid.dicom replace_identifiers") 46 | 47 | from deid.dicom import get_identifiers, replace_identifiers 48 | 49 | dicom_files = get_dicom(self.dataset, return_dir=True) 50 | ids = get_identifiers(dicom_files) 51 | 52 | # Before blanking, 28 fields don't have blanks 53 | notblanked = utils.dcmread(dicom_files[0]) 54 | notblanked_fields = [ 55 | x for x in notblanked.dir() if notblanked.get(x) != "" 56 | ] # 28 57 | self.assertTrue(len(notblanked_fields) == 28) 58 | 59 | updated_files = replace_identifiers(dicom_files, ids, output_folder=self.tmpdir) 60 | 61 | # After replacing only 9 don't have blanks 62 | blanked = utils.dcmread(updated_files[0]) 63 | blanked_fields = [x for x in blanked.dir() if blanked.get(x) != ""] 64 | self.assertTrue(len(blanked_fields) == 9) 65 | 66 | 67 | def get_dicom(dataset, return_dir=False): 68 | """helper function to load a dicom""" 69 | 70 | from deid.dicom import get_files 71 | 72 | dicom_files = get_files(dataset) 73 | if return_dir: 74 | return list(dicom_files) 75 | return utils.dcmread(next(dicom_files)) 76 | 77 | 78 | if __name__ == "__main__": 79 | unittest.main() 80 | -------------------------------------------------------------------------------- /deid/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pydicom/deid/fa4731f79d7002d51e31cbec6f2586d87afb479d/deid/tests/__init__.py -------------------------------------------------------------------------------- /deid/tests/common.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from deid.config import DeidRecipe 4 | from deid.dicom import get_files, utils 5 | 6 | 7 | def create_recipe(actions, fields=None, values=None): 8 | """ 9 | Helper method to create a recipe file 10 | """ 11 | recipe = DeidRecipe() 12 | 13 | # .clear() only supported Python 3.3 and after 14 | del recipe.deid["header"][:] 15 | recipe.deid["header"] = actions 16 | 17 | if fields is not None: 18 | recipe.deid["fields"] = fields 19 | 20 | if values is not None: 21 | recipe.deid["values"] = values 22 | 23 | return recipe 24 | 25 | 26 | def get_dicom(dataset): 27 | """ 28 | helper function to load a dicom 29 | """ 30 | dicom_files = get_files(dataset) 31 | return utils.dcmread(next(dicom_files)) 32 | 33 | 34 | def get_same_file(dataset): 35 | """ 36 | get a consistent dicom file 37 | """ 38 | dicom_files = list(get_files(dataset)) 39 | return dicom_files[0] 40 | 41 | 42 | def get_file(dataset): 43 | """ 44 | get a dicom file 45 | """ 46 | dicom_files = get_files(dataset) 47 | return next(dicom_files) 48 | -------------------------------------------------------------------------------- /deid/tests/resources/filter_multiple_first_filter_match.dicom: -------------------------------------------------------------------------------- 1 | FORMAT dicom 2 | 3 | %filter ShouldMatch 4 | 5 | LABEL - To be tested with Cat.dcm. Intended to flag the image. 6 | contains Modality DX 7 | + contains Manufacturer Agfa 8 | 9 | %filter ShouldNotMatch 10 | 11 | LABEL - To be tested with Cat.dcm. Intended to NOT flag the image. 12 | contains PatientSex F 13 | + contains Manufacturer Agfa 14 | 15 | %header 16 | ADD PatientIdentityRemoved No 17 | -------------------------------------------------------------------------------- /deid/tests/resources/filter_multiple_rule_innerop_false.dicom: -------------------------------------------------------------------------------- 1 | FORMAT dicom 2 | 3 | %filter blacklist 4 | 5 | LABEL - To be tested with Cat.dcm. Intended to NOT flag the image. 6 | contains Modality CT + contains PatientSex F 7 | + contains Manufacturer Agfa 8 | 9 | %header 10 | ADD PatientIdentityRemoved No 11 | -------------------------------------------------------------------------------- /deid/tests/resources/filter_multiple_rule_innerop_true.dicom: -------------------------------------------------------------------------------- 1 | FORMAT dicom 2 | 3 | %filter blacklist 4 | 5 | LABEL - To be tested with Cat.dcm. Intended to flag the image. 6 | contains Modality DX + contains PatientSex M 7 | + contains Manufacturer Agfa 8 | 9 | %header 10 | ADD PatientIdentityRemoved No 11 | -------------------------------------------------------------------------------- /deid/tests/resources/filter_multiple_second_filter_match.dicom: -------------------------------------------------------------------------------- 1 | FORMAT dicom 2 | 3 | %filter ShouldNotMatch 4 | 5 | LABEL - To be tested with Cat.dcm. Intended to flag the image. 6 | contains Modality MR 7 | + contains Manufacturer Agfa 8 | 9 | %filter ShouldMatch 10 | 11 | LABEL - To be tested with Cat.dcm. Intended to NOT flag the image. 12 | contains PatientSex M 13 | + contains Manufacturer Agfa 14 | 15 | %header 16 | ADD PatientIdentityRemoved No 17 | -------------------------------------------------------------------------------- /deid/tests/resources/filter_multiple_two_filter_match.dicom: -------------------------------------------------------------------------------- 1 | FORMAT dicom 2 | 3 | %filter ShouldMatch1 4 | LABEL - To be tested with Cat.dcm. Intended to flag the image. 5 | contains Modality DX 6 | + contains Manufacturer Agfa 7 | 8 | %filter ShouldMatch2 9 | LABEL - To be tested with Cat.dcm. Intended to flag the image. 10 | contains PatientSex M 11 | + contains Manufacturer Agfa 12 | 13 | %header 14 | ADD PatientIdentityRemoved No 15 | -------------------------------------------------------------------------------- /deid/tests/resources/filter_multiple_zero_filter_match.dicom: -------------------------------------------------------------------------------- 1 | FORMAT dicom 2 | 3 | %filter ShouldNotMatch1 4 | 5 | LABEL - To be tested with Cat.dcm. Intended to flag the image. 6 | contains Modality MR 7 | + contains Manufacturer Agfa 8 | 9 | %filter ShouldNotMatch2 10 | 11 | LABEL - To be tested with Cat.dcm. Intended to flag the image. 12 | contains PatientSex F 13 | + contains Manufacturer Agfa 14 | 15 | %header 16 | ADD PatientIdentityRemoved No 17 | -------------------------------------------------------------------------------- /deid/tests/resources/filter_single_rule_false.dicom: -------------------------------------------------------------------------------- 1 | FORMAT dicom 2 | 3 | %filter blacklist 4 | 5 | LABEL - To be tested with Cat.dcm. Intended to NOT flag the image. 6 | contains Modality CT 7 | 8 | %header 9 | ADD PatientIdentityRemoved No 10 | -------------------------------------------------------------------------------- /deid/tests/resources/filter_single_rule_innerop_false.dicom: -------------------------------------------------------------------------------- 1 | FORMAT dicom 2 | 3 | %filter blacklist 4 | 5 | LABEL - To be tested with Cat.dcm. Intended to NOT flag the image. 6 | contains Modality CT + contains PatientSex F 7 | 8 | %header 9 | ADD PatientIdentityRemoved No 10 | -------------------------------------------------------------------------------- /deid/tests/resources/filter_single_rule_innerop_true.dicom: -------------------------------------------------------------------------------- 1 | FORMAT dicom 2 | 3 | %filter blacklist 4 | 5 | LABEL - To be tested with Cat.dcm. Intended to flag the image. 6 | contains Modality DX + contains PatientSex M 7 | 8 | %header 9 | ADD PatientIdentityRemoved No 10 | -------------------------------------------------------------------------------- /deid/tests/resources/filter_single_rule_true.dicom: -------------------------------------------------------------------------------- 1 | FORMAT dicom 2 | 3 | %filter blacklist 4 | 5 | LABEL - To be tested with Cat.dcm. Intended to flag the image. 6 | contains Modality DX 7 | 8 | %header 9 | ADD PatientIdentityRemoved No 10 | -------------------------------------------------------------------------------- /deid/tests/resources/filter_tag_number.dicom: -------------------------------------------------------------------------------- 1 | FORMAT dicom 2 | 3 | %filter blacklist 4 | 5 | LABEL - To be tested with Cat.dcm. Intended to NOT flag the image. 6 | contains 0x00110003 Agfa 7 | 8 | %header 9 | ADD PatientIdentityRemoved No 10 | -------------------------------------------------------------------------------- /deid/tests/resources/keepcoordinates.dicom: -------------------------------------------------------------------------------- 1 | FORMAT dicom 2 | 3 | %filter blacklist 4 | 5 | LABEL by SOPClassUID 6 | contains SOPClassUID 1.2.840.10008.5.1.4.1.1.1.1 7 | coordinates all 8 | keepcoordinates 0,0,1024,1024 9 | 10 | %header 11 | ADD PatientIdentityRemoved No 12 | -------------------------------------------------------------------------------- /deid/tests/resources/keepcoordinates_from.dicom: -------------------------------------------------------------------------------- 1 | FORMAT dicom 2 | 3 | %filter graylist 4 | LABEL Clean Ultrasound Regions 5 | present SequenceOfUltrasoundRegions 6 | coordinates all 7 | keepcoordinates from:SequenceOfUltrasoundRegions 8 | 9 | 10 | %header 11 | ADD PatientIdentityRemoved No 12 | -------------------------------------------------------------------------------- /deid/tests/resources/keepcoordinates_noaction.dicom: -------------------------------------------------------------------------------- 1 | FORMAT dicom 2 | 3 | %filter blacklist 4 | 5 | LABEL by SOPClassUID 6 | contains SOPClassUID 1.2.840.10008.5.1.4.1.1.1.1 7 | keepcoordinates 0,0,1024,1024 8 | 9 | %header 10 | ADD PatientIdentityRemoved No 11 | -------------------------------------------------------------------------------- /deid/tests/resources/remove_all.dicom: -------------------------------------------------------------------------------- 1 | FORMAT dicom 2 | 3 | %filter blacklist 4 | 5 | LABEL by SOPClassUID 6 | contains SOPClassUID 1.2.840.10008.5.1.4.1.1.1.1 7 | coordinates all 8 | 9 | %header 10 | ADD PatientIdentityRemoved No 11 | -------------------------------------------------------------------------------- /deid/tests/resources/remove_coordinates.dicom: -------------------------------------------------------------------------------- 1 | FORMAT dicom 2 | 3 | %filter blacklist 4 | 5 | LABEL by SOPClassUID 6 | contains SOPClassUID 1.2.840.10008.5.1.4.1.1.1.1 7 | coordinates 0,0,1024,1024 8 | 9 | %header 10 | ADD PatientIdentityRemoved No 11 | -------------------------------------------------------------------------------- /deid/tests/resources/remove_coordinates_groups.dicom: -------------------------------------------------------------------------------- 1 | FORMAT dicom 2 | 3 | %filter graylist 4 | 5 | LABEL Groups With OR 6 | contains Manufacturer foo 7 | + contains ManufacturerModelName bar || contains SeriesDescription flag me 8 | coordinates 0,0,1024,1024 9 | 10 | LABEL Value With OR 11 | contains Manufacturer foo|bar|baz 12 | + contains SeriesDescription bam 13 | coordinates 0,0,10,10 14 | 15 | LABEL Value With AND 16 | contains Manufacturer foo 17 | + contains ImageType DERIVED+SECONDARY 18 | coordinates 0,0,20,10 19 | -------------------------------------------------------------------------------- /deid/tests/resources/remove_coordinates_multiple.dicom: -------------------------------------------------------------------------------- 1 | FORMAT dicom 2 | 3 | %filter blacklist 4 | 5 | LABEL by SOPClassUID 6 | contains SOPClassUID 1.2.840.10008.5.1.4.1.1.1.1 7 | coordinates 0,0,10,10 8 | coordinates 10,10,20,20 9 | 10 | %header 11 | ADD PatientIdentityRemoved No 12 | -------------------------------------------------------------------------------- /deid/tests/resources/remove_coordinates_multiple_filters.dicom: -------------------------------------------------------------------------------- 1 | FORMAT dicom 2 | 3 | %filter blacklist1 4 | 5 | LABEL by SOPClassUID blacklist1 6 | contains SOPClassUID 1.2.840.10008.5.1.4.1.1.1.1 7 | coordinates 0,0,10,10 8 | 9 | %filter blacklist2 10 | 11 | LABEL by SOPClassUID blacklist2 12 | contains SOPClassUID 1.2.840.10008.5.1.4.1.1.1.1 13 | coordinates 10,10,20,20 14 | 15 | %header 16 | ADD PatientIdentityRemoved No 17 | -------------------------------------------------------------------------------- /deid/tests/resources/remove_coordinates_us.dicom: -------------------------------------------------------------------------------- 1 | FORMAT dicom 2 | 3 | %filter greylist 4 | LABEL by Modality 5 | contains Modality US 6 | coordinates 0,0,500,500 7 | 8 | %header 9 | ADD PatientIdentityRemoved No 10 | -------------------------------------------------------------------------------- /deid/tests/resources/remove_coordinates_us_all.dicom: -------------------------------------------------------------------------------- 1 | FORMAT dicom 2 | 3 | %filter greylist 4 | LABEL by Modality 5 | contains Modality US 6 | coordinates all 7 | 8 | %header 9 | ADD PatientIdentityRemoved No 10 | -------------------------------------------------------------------------------- /deid/tests/test_blank_action.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import os 4 | import shutil 5 | import tempfile 6 | import unittest 7 | 8 | from deid.data import get_dataset 9 | from deid.dicom import replace_identifiers, utils 10 | from deid.tests.common import create_recipe, get_file 11 | from deid.utils import get_installdir 12 | 13 | global generate_uid 14 | 15 | 16 | class TestBlankAction(unittest.TestCase): 17 | def setUp(self): 18 | self.pwd = get_installdir() 19 | self.deid = os.path.abspath("%s/../examples/deid/deid.dicom" % self.pwd) 20 | self.dataset = get_dataset("humans") 21 | self.tmpdir = tempfile.mkdtemp() 22 | print("\n######################START######################") 23 | 24 | def tearDown(self): 25 | shutil.rmtree(self.tmpdir) 26 | print("\n######################END########################") 27 | 28 | def run_blank_test(self, VR, Field, Expected): 29 | print(f"Test BLANK {VR}") 30 | dicom_file = get_file(self.dataset) 31 | 32 | actions = [ 33 | {"action": "BLANK", "field": Field}, 34 | ] 35 | recipe = create_recipe(actions) 36 | 37 | inputfile = utils.dcmread(dicom_file) 38 | currentValue = inputfile[Field].value 39 | currentVR = inputfile[Field].VR 40 | 41 | self.assertNotEqual(None, currentValue) 42 | self.assertNotEqual("", currentValue) 43 | self.assertEqual(VR, currentVR) 44 | 45 | result = replace_identifiers( 46 | dicom_files=dicom_file, 47 | deid=recipe, 48 | save=True, 49 | remove_private=False, 50 | strip_sequences=False, 51 | ) 52 | 53 | outputfile = utils.dcmread(result[0]) 54 | self.assertEqual(1, len(result)) 55 | self.assertEqual(Expected, outputfile[Field].value) 56 | 57 | def test_blank_AE(self): 58 | self.run_blank_test("AE", "NetworkID", "") 59 | 60 | def test_blank_AS(self): 61 | self.run_blank_test("AS", "PatientAge", "") 62 | 63 | def test_blank_AT(self): 64 | self.run_blank_test("AT", "00110004", None) 65 | 66 | def test_blank_CS(self): 67 | self.run_blank_test("CS", "BodyPartExamined", "") 68 | 69 | def test_blank_DA(self): 70 | self.run_blank_test("DA", "StudyDate", "") 71 | 72 | def test_blank_DS(self): 73 | self.run_blank_test("DS", "PatientWeight", None) 74 | 75 | def test_blank_DT(self): 76 | self.run_blank_test("DT", "AcquisitionDateTime", "") 77 | 78 | def test_blank_FD(self): 79 | self.run_blank_test("FD", "SingleCollimationWidth", None) 80 | 81 | def test_blank_FL(self): 82 | self.run_blank_test("FL", "CalciumScoringMassFactorDevice", None) 83 | 84 | def test_blank_IS(self): 85 | self.run_blank_test("IS", "Exposure", None) 86 | 87 | def test_blank_LO(self): 88 | self.run_blank_test("LO", "PatientID", "") 89 | 90 | def test_blank_LT(self): 91 | self.run_blank_test("LT", "AdditionalPatientHistory", "") 92 | 93 | def test_blank_OB(self): 94 | self.run_blank_test("OB", "00110011", None) 95 | 96 | def test_blank_OD(self): 97 | self.run_blank_test("OD", "00110012", None) 98 | 99 | def test_blank_OF(self): 100 | self.run_blank_test("OF", "00110013", None) 101 | 102 | def test_blank_OL(self): 103 | self.run_blank_test("OL", "00110014", None) 104 | 105 | def test_blank_OV(self): 106 | self.run_blank_test("OV", "00110016", None) 107 | 108 | def test_blank_OW(self): 109 | self.run_blank_test("OW", "00110015", None) 110 | 111 | def test_blank_PN(self): 112 | self.run_blank_test("PN", "ReferringPhysicianName", "") 113 | 114 | def test_blank_SH(self): 115 | self.run_blank_test("SH", "AccessionNumber", "") 116 | 117 | def test_blank_SL(self): 118 | self.run_blank_test("SL", "00110001", None) 119 | 120 | def test_blank_SQ(self): 121 | self.run_blank_test("SQ", "ProcedureCodeSequence", []) 122 | 123 | def test_blank_SS(self): 124 | self.run_blank_test("SS", "00110002", None) 125 | 126 | def test_blank_ST(self): 127 | self.run_blank_test("ST", "InstitutionAddress", "") 128 | 129 | def test_blank_SV(self): 130 | self.run_blank_test("SV", "00110007", None) 131 | 132 | def test_blank_TM(self): 133 | self.run_blank_test("TM", "StudyTime", "") 134 | 135 | def test_blank_UC(self): 136 | self.run_blank_test("UC", "00110009", "") 137 | 138 | def test_blank_UI(self): 139 | self.run_blank_test("UI", "FrameOfReferenceUID", "") 140 | 141 | def test_blank_UL(self): 142 | self.run_blank_test("UL", "00311101", None) 143 | 144 | def test_blank_UN(self): 145 | self.run_blank_test("UN", "00110003", None) 146 | 147 | def test_blank_UR(self): 148 | self.run_blank_test("UR", "00110008", "") 149 | 150 | def test_blank_US(self): 151 | self.run_blank_test("US", "PregnancyStatus", None) 152 | 153 | def test_blank_UT(self): 154 | self.run_blank_test("UT", "00291022", "") 155 | 156 | def test_blank_UV(self): 157 | self.run_blank_test("UV", "00110010", None) 158 | 159 | 160 | if __name__ == "__main__": 161 | unittest.main() 162 | -------------------------------------------------------------------------------- /deid/tests/test_cli.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import os 4 | import shutil 5 | import tempfile 6 | import unittest 7 | from unittest.mock import patch 8 | 9 | import deid.main 10 | from deid.data import get_dataset 11 | from deid.dicom import get_files, utils 12 | from deid.utils import get_installdir 13 | 14 | 15 | class TestMainAction(unittest.TestCase): 16 | def setUp(self): 17 | self.pwd = get_installdir() 18 | self.deid = os.path.abspath("%s/../examples/deid/deid.dicom" % self.pwd) 19 | self.dataset = get_dataset("humans") 20 | self.example = list(get_files(self.dataset))[0] 21 | self.tmpdir = tempfile.mkdtemp() 22 | print("\n######################START######################") 23 | 24 | def tearDown(self): 25 | shutil.rmtree(self.tmpdir) 26 | print("\n######################END########################") 27 | 28 | @patch( 29 | "sys.argv", 30 | "deid --outfolder out/ --overwrite identifiers --action all --input ./".split( 31 | " " 32 | ), 33 | ) 34 | def test_deidmain_write_identifiers(self): 35 | """ 36 | Run example command line call. Expect saved output. 37 | """ 38 | os.chdir(self.tmpdir) 39 | # Confirm input data has value that will be scrubbed. 40 | self.assertNotEqual(None, utils.dcmread(self.example).get("StudyTime")) 41 | 42 | shutil.copyfile(self.example, self.tmpdir + "/example.dicom") 43 | os.makedirs("out/") 44 | deid.main.main() 45 | 46 | # Confirm new file was created 47 | outfile = utils.dcmread("out/example.dicom") 48 | 49 | # Confirm new file was srubbed 50 | self.assertEqual(None, outfile.get("StudyTime")) 51 | 52 | 53 | if __name__ == "__main__": 54 | unittest.main() 55 | -------------------------------------------------------------------------------- /deid/tests/test_config.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import os 4 | import shutil 5 | import tempfile 6 | import unittest 7 | 8 | from deid.utils import get_installdir 9 | 10 | 11 | class TestConfig(unittest.TestCase): 12 | def setUp(self): 13 | self.pwd = get_installdir() 14 | self.deid = os.path.abspath("%s/../examples/deid/deid.dicom" % self.pwd) 15 | self.tmpdir = tempfile.mkdtemp() 16 | print("\n######################START######################") 17 | 18 | def tearDown(self): 19 | shutil.rmtree(self.tmpdir) 20 | print("\n######################END########################") 21 | 22 | def test_load_deid(self): 23 | print("Case 1: Test loading deid directly") 24 | from deid.config import load_deid 25 | 26 | config = load_deid(self.deid) 27 | self.assertTrue("format" in config) 28 | 29 | print("Case 2: Loading from folder") 30 | config = load_deid(os.path.dirname(self.deid)) 31 | self.assertTrue("format" in config) 32 | 33 | print("Case 3: Testing error on non-existing load of file") 34 | with self.assertRaises(SystemExit) as cm: 35 | config = load_deid(os.path.join(self.tmpdir, "deid.doesnt-exist")) 36 | self.assertEqual(cm.exception.code, 1) 37 | 38 | print("Case 4: Testing load of default deid.") 39 | config = load_deid(self.tmpdir) 40 | 41 | def test_find_deid(self): 42 | print("Testing finding deid file, referencing directly.") 43 | from deid.config.utils import find_deid 44 | 45 | config_file = find_deid(self.deid) 46 | self.assertTrue(os.path.exists(config_file)) 47 | 48 | print("Testing finding deid file in folder") 49 | from deid.config.utils import find_deid 50 | 51 | config_file = find_deid(os.path.dirname(self.deid)) 52 | self.assertTrue(os.path.exists(config_file)) 53 | 54 | def test_standards(self): 55 | from deid.config import actions, formats, sections 56 | 57 | print("Testing standards: default actions") 58 | default_actions = [ 59 | "ADD", 60 | "BLANK", 61 | "KEEP", 62 | "REPLACE", 63 | "REMOVE", 64 | "JITTER", 65 | "LABEL", 66 | ] 67 | [self.assertTrue(x in actions) for x in default_actions] 68 | 69 | # Should not be any we don't know about 70 | unknown = [x for x in actions if x not in default_actions] 71 | self.assertEqual(len(unknown), 0) 72 | 73 | print("Testing standards: allowed sections") 74 | default_sections = [ 75 | "header", 76 | "labels", 77 | "filter", 78 | "fields", 79 | "values", 80 | ] 81 | [self.assertTrue(x in sections) for x in default_sections] 82 | unknown = [x for x in sections if x not in default_sections] 83 | self.assertEqual(len(unknown), 0) 84 | 85 | print("Testing default formats") 86 | default_formats = ["dicom"] 87 | [self.assertTrue(x in formats) for x in default_formats] 88 | unknown = [x for x in formats if x not in default_formats] 89 | self.assertEqual(len(unknown), 0) 90 | 91 | 92 | if __name__ == "__main__": 93 | unittest.main() 94 | -------------------------------------------------------------------------------- /deid/tests/test_data.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import os 4 | import unittest 5 | 6 | 7 | class TestUtils(unittest.TestCase): 8 | def setUp(self): 9 | print("\n######################START######################") 10 | 11 | def tearDown(self): 12 | print("\n######################END########################") 13 | 14 | def test_get_dataset(self): 15 | """test_get_dataset will make sure we can load provided datasets""" 16 | print("Case 1: Ask for existing dataset.") 17 | from deid.data import get_dataset 18 | 19 | dataset = get_dataset("dicom-cookies") 20 | self.assertTrue(os.path.exists(dataset)) 21 | 22 | print("Case 2: Ask for non existing dataset") 23 | dataset = get_dataset("other-cookies") 24 | self.assertEqual(dataset, None) 25 | 26 | 27 | if __name__ == "__main__": 28 | unittest.main() 29 | -------------------------------------------------------------------------------- /deid/tests/test_deid_recipe.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import os 4 | import shutil 5 | import tempfile 6 | import unittest 7 | 8 | from deid.config import DeidRecipe 9 | from deid.utils import get_installdir 10 | 11 | 12 | class TestDeidRecipe(unittest.TestCase): 13 | def setUp(self): 14 | self.pwd = get_installdir() 15 | self.deid = os.path.abspath("%s/../examples/deid/deid.dicom" % self.pwd) 16 | self.tmpdir = tempfile.mkdtemp() 17 | print("\n######################START######################") 18 | 19 | def tearDown(self): 20 | shutil.rmtree(self.tmpdir) 21 | print("\n######################END########################") 22 | 23 | def test_load_recipe(self): 24 | print("Case 1: Test loading default DeidRecipe") 25 | 26 | recipe = DeidRecipe() 27 | 28 | self.assertTrue(isinstance(recipe.deid, dict)) 29 | 30 | print("Checking basic sections are loaded") 31 | print(recipe.deid.keys()) 32 | for section in ["header", "format", "filter"]: 33 | self.assertTrue(section in recipe.deid) 34 | 35 | print("Case 2: Loading from file") 36 | recipe = DeidRecipe(self.deid) 37 | 38 | def test_get_functions(self): 39 | recipe = DeidRecipe(self.deid) 40 | 41 | # Format 42 | self.assertEqual(recipe.get_format(), "dicom") 43 | 44 | # Actions for header 45 | print("Testing get_actions") 46 | actions = recipe.get_actions() 47 | self.assertTrue(isinstance(actions, list)) 48 | for key in ["action", "field", "value"]: 49 | self.assertTrue(key in actions[0]) 50 | self.assertTrue(recipe.has_actions()) 51 | 52 | # Filters 53 | print("Testing get_filters") 54 | filters = recipe.get_filters() 55 | self.assertTrue(isinstance(filters, dict)) 56 | 57 | # whitelist, blacklist, graylist 58 | for key in recipe.ls_filters(): 59 | self.assertTrue(key in filters) 60 | 61 | recipe = DeidRecipe() 62 | filters = recipe.get_filters() 63 | self.assertTrue(isinstance(filters["whitelist"], list)) 64 | 65 | # Test that each filter has a set of filters, coords, name 66 | for key in ["filters", "coordinates", "name"]: 67 | self.assertTrue(key in filters["whitelist"][0]) 68 | 69 | # Each filter is a list of actions, name is string, coords are list 70 | self.assertTrue(isinstance(filters["whitelist"][0]["filters"], list)) 71 | self.assertTrue(isinstance(filters["whitelist"][0]["name"], str)) 72 | self.assertTrue(isinstance(filters["whitelist"][0]["coordinates"], list)) 73 | 74 | # Check content of the first filter 75 | for key in ["action", "field", "operator", "InnerOperators", "value"]: 76 | self.assertTrue(key in filters["whitelist"][0]["filters"][0]) 77 | 78 | # Fields and Values 79 | print("Testing get_fields_lists and get_values_lists") 80 | self.assertEqual(recipe.get_fields_lists(), None) 81 | self.assertEqual(recipe.get_values_lists(), None) 82 | self.assertEqual(recipe.ls_fieldlists(), []) 83 | self.assertEqual(recipe.ls_valuelists(), []) 84 | self.assertTrue(not recipe.has_fields_lists()) 85 | self.assertTrue(not recipe.has_values_lists()) 86 | 87 | # Load in recipe with values and fields 88 | deid = os.path.abspath("%s/../examples/deid/deid.dicom-groups" % self.pwd) 89 | recipe = DeidRecipe(deid) 90 | 91 | assert "values" in recipe.deid 92 | assert "fields" in recipe.deid 93 | self.assertTrue(isinstance(recipe.deid["values"], dict)) 94 | self.assertTrue(isinstance(recipe.deid["fields"], dict)) 95 | 96 | self.assertTrue(recipe.get_fields_lists() is not None) 97 | self.assertTrue(recipe.get_values_lists() is not None) 98 | self.assertEqual(recipe.ls_fieldlists(), ["instance_fields"]) 99 | self.assertEqual(recipe.ls_valuelists(), ["cookie_names", "operator_names"]) 100 | self.assertTrue(recipe.has_fields_lists()) 101 | self.assertTrue(recipe.has_values_lists()) 102 | 103 | 104 | if __name__ == "__main__": 105 | unittest.main() 106 | -------------------------------------------------------------------------------- /deid/tests/test_dicom_fields.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import os 4 | import shutil 5 | import tempfile 6 | import unittest 7 | 8 | from deid.data import get_dataset 9 | from deid.dicom.fields import get_fields 10 | from deid.tests.common import get_dicom 11 | from deid.utils import get_installdir 12 | 13 | 14 | class TestDicomFields(unittest.TestCase): 15 | def setUp(self): 16 | self.pwd = get_installdir() 17 | self.deid = os.path.abspath("%s/../examples/deid/deid.dicom" % self.pwd) 18 | self.dataset = get_dataset("animals") # includes private tags 19 | self.tmpdir = tempfile.mkdtemp() 20 | print("\n######################START######################") 21 | 22 | def tearDown(self): 23 | shutil.rmtree(self.tmpdir) 24 | print("\n######################END########################") 25 | 26 | def test_field_expansion(self): 27 | print("Test deid.dicom.fields expand_field_expression") 28 | from deid.dicom.fields import expand_field_expression 29 | 30 | dicom = get_dicom(self.dataset) 31 | 32 | contenders = get_fields(dicom) 33 | 34 | print("Testing that field expansion works for basic tags") 35 | fields = expand_field_expression( 36 | dicom=dicom, field="endswith:Time", contenders=contenders 37 | ) 38 | 39 | # The fields returned should end in time 40 | for uid, field in fields.items(): 41 | assert field.name.endswith("Time") 42 | 43 | print("Testing that field expansion works for groups") 44 | fields = expand_field_expression( 45 | dicom=dicom, field="select:group:0020", contenders=contenders 46 | ) 47 | 48 | # The fields returned should be tag group 0020 49 | for uid, field in fields.items(): 50 | assert field.element.tag.group == 0x0020 51 | 52 | print("Testing that field expansion works for VR") 53 | fields = expand_field_expression( 54 | dicom=dicom, field="select:VR:TM", contenders=contenders 55 | ) 56 | 57 | # The fields returned should end in time 58 | for uid, field in fields.items(): 59 | assert field.name.endswith("Time") 60 | assert field.element.VR == "TM" 61 | 62 | print("Testing that we can also search private tags based on numbers.") 63 | fields = expand_field_expression( 64 | dicom=dicom, field="contains:0019", contenders=contenders 65 | ) 66 | 67 | # The fields returned should include tag group or element 0019 68 | for uid, field in fields.items(): 69 | assert "0019" in uid 70 | 71 | print("Testing nested private tags") 72 | dataset = get_dataset("animals") # includes nested private tags 73 | dicom = get_dicom(dataset) 74 | 75 | 76 | if __name__ == "__main__": 77 | unittest.main() 78 | -------------------------------------------------------------------------------- /deid/tests/test_dicom_groups.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import os 4 | import shutil 5 | import tempfile 6 | import unittest 7 | 8 | from deid.data import get_dataset 9 | from deid.dicom import get_identifiers, replace_identifiers 10 | from deid.dicom.fields import get_fields 11 | from deid.dicom.parser import DicomParser 12 | from deid.tests.common import get_dicom 13 | from deid.utils import get_installdir 14 | 15 | 16 | class TestDicomGroups(unittest.TestCase): 17 | def setUp(self): 18 | self.pwd = get_installdir() 19 | self.deid = os.path.abspath("%s/../examples/deid/deid.dicom-groups" % self.pwd) 20 | self.dataset = get_dataset("dicom-cookies") 21 | self.tmpdir = tempfile.mkdtemp() 22 | print("\n######################START######################") 23 | 24 | def tearDown(self): 25 | shutil.rmtree(self.tmpdir) 26 | print("\n######################END########################") 27 | 28 | def test_extract_groups(self): 29 | print("Test deid.dicom.groups extract_values_list") 30 | from deid.dicom.groups import extract_fields_list, extract_values_list 31 | 32 | dicom = get_dicom(self.dataset) 33 | fields = get_fields(dicom) 34 | 35 | # Test split action 36 | actions = [ 37 | {"action": "SPLIT", "field": "PatientID", "value": 'by="^";minlength=4'} 38 | ] 39 | expected_names = dicom.get("PatientID").split("^") 40 | actual = extract_values_list(dicom, actions) 41 | self.assertEqual(actual, expected_names) 42 | 43 | # Test field action 44 | actions = [{"action": "FIELD", "field": "startswith:Operator"}] 45 | expected_operator = [ 46 | x.element.value 47 | for uid, x in fields.items() 48 | if x.element.keyword.startswith("Operator") 49 | ] 50 | actual = extract_values_list(dicom, actions) 51 | self.assertEqual(actual, expected_operator) 52 | 53 | print("Test deid.dicom.groups extract_fields_list") 54 | actions = [{"action": "FIELD", "field": "contains:Instance"}] 55 | expected = { 56 | uid: x for uid, x in fields.items() if "Instance" in x.element.keyword 57 | } 58 | actual = extract_fields_list(dicom, actions) 59 | for uid in expected: 60 | assert uid in actual 61 | 62 | # Get identifiers for file 63 | ids = get_identifiers(dicom) 64 | self.assertTrue(isinstance(ids, dict)) 65 | 66 | # Add keys to be used for replace to ids - these first are for values 67 | parser = DicomParser(dicom, recipe=self.deid) 68 | parser.define("cookie_names", expected_names) 69 | parser.define("operator_names", expected_operator) 70 | 71 | # This is for fields 72 | parser.define("instance_fields", expected) 73 | parser.define("id", "new-cookie-id") 74 | parser.define("source_id", "new-operator-id") 75 | parser.parse() 76 | 77 | # Were the changes made? 78 | assert parser.dicom.get("PatientID") == "new-cookie-id" 79 | assert parser.dicom.get("OperatorsName") == "new-operator-id" 80 | 81 | # Instance fields should be removed based on recipe 82 | for uid, field in parser.lookup["instance_fields"].items(): 83 | self.assertTrue(field.element.keyword not in parser.dicom) 84 | 85 | # Start over 86 | dicom = get_dicom(self.dataset) 87 | 88 | # We need to provide ids with variables "id" and "source_id" 89 | ids = {dicom.filename: {"id": "new-cookie-id", "source_id": "new-operator-id"}} 90 | 91 | # Returns list of updated dicom, since save is False 92 | replaced = replace_identifiers(dicom, save=False, deid=self.deid, ids=ids) 93 | cleaned = replaced.pop() 94 | 95 | self.assertEqual(cleaned.get("PatientID"), "new-cookie-id") 96 | self.assertEqual(cleaned.get("OperatorsName"), "new-operator-id") 97 | 98 | 99 | if __name__ == "__main__": 100 | unittest.main() 101 | -------------------------------------------------------------------------------- /deid/tests/test_dicom_tags.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | import tempfile 4 | import unittest 5 | 6 | from deid.data import get_dataset 7 | from deid.utils import get_installdir 8 | 9 | 10 | class TestDicomTags(unittest.TestCase): 11 | def setUp(self): 12 | self.pwd = get_installdir() 13 | self.deid = os.path.abspath("%s/../examples/deid/deid.dicom" % self.pwd) 14 | self.dataset = get_dataset("dicom-cookies") 15 | self.tmpdir = tempfile.mkdtemp() 16 | print("\n######################START######################") 17 | 18 | def tearDown(self): 19 | shutil.rmtree(self.tmpdir) 20 | print("\n######################END########################") 21 | 22 | def test_get_tag(self): 23 | print("Test deid.dicom.tags get_tag") 24 | from pydicom.tag import BaseTag 25 | 26 | from deid.dicom.tags import get_tag 27 | 28 | print("Case 1: Ask for known tag") 29 | tag = get_tag("Modality") 30 | self.assertEqual(tag["VM"], "1") 31 | self.assertEqual(tag["VR"], "CS") 32 | self.assertEqual(tag["keyword"], "Modality") 33 | self.assertEqual(tag["name"], "Modality") 34 | self.assertTrue(isinstance(tag["tag"], BaseTag)) 35 | 36 | print("Case 2: Ask for unknown tag") 37 | tag = get_tag("KleenexTissue") 38 | self.assertTrue(not tag) 39 | 40 | 41 | if __name__ == "__main__": 42 | unittest.main() 43 | -------------------------------------------------------------------------------- /deid/tests/test_dicom_utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import os 4 | import shutil 5 | import tempfile 6 | import unittest 7 | 8 | from deid.data import get_dataset 9 | from deid.tests.common import get_dicom 10 | from deid.utils import get_installdir 11 | 12 | global generate_uid 13 | 14 | 15 | class TestDicomUtils(unittest.TestCase): 16 | def setUp(self): 17 | self.pwd = get_installdir() 18 | self.deid = os.path.abspath("%s/../examples/deid/deid.dicom" % self.pwd) 19 | self.dataset = get_dataset("dicom-cookies") 20 | self.tmpdir = tempfile.mkdtemp() 21 | print("\n######################START######################") 22 | 23 | def tearDown(self): 24 | shutil.rmtree(self.tmpdir) 25 | print("\n######################END########################") 26 | 27 | def test_get_files(self): 28 | print("Test test_get_files") 29 | print("Case 1: Test get files from dataset") 30 | from deid.dicom import get_files 31 | 32 | found = 0 33 | for dicom_file in get_files(self.dataset): 34 | found += 1 35 | expected = 7 36 | self.assertEqual(found, expected) 37 | 38 | print("Case 2: Ask for files from empty folder") 39 | found = 0 40 | for dicom_file in get_files(self.tmpdir): 41 | found += 1 42 | expected = 0 43 | self.assertEqual(found, expected) 44 | 45 | def test_get_files_as_list(self): 46 | print("Test test_get_files_as_list") 47 | print("Case 1: Test get files from dataset") 48 | from deid.dicom import get_files 49 | 50 | dicom_files = list(get_files(self.dataset)) 51 | found = len(dicom_files) 52 | expected = 7 53 | self.assertEqual(found, expected) 54 | 55 | print("Case 2: Ask for files from empty folder") 56 | dicom_files = list(get_files(self.tmpdir)) 57 | found = len(dicom_files) 58 | expected = 0 59 | self.assertEqual(found, expected) 60 | 61 | def test_jitter_timestamp(self): 62 | from deid.dicom.actions import jitter_timestamp 63 | from deid.dicom.fields import DicomField 64 | from deid.dicom.tags import get_tag 65 | 66 | dicom = get_dicom(self.dataset) 67 | 68 | print("Testing test_jitter_timestamp") 69 | 70 | print("Case 1: Testing jitter_timestamp with DICOM Date (DA)") 71 | name = "StudyDate" 72 | tag = get_tag(name) 73 | dicom.StudyDate = "20131210" 74 | dicom.data_element(name).VR = "DA" 75 | field = DicomField(dicom.data_element(name), name, str(tag["tag"])) 76 | actual = jitter_timestamp(field, 10) 77 | expected = "20131220" 78 | self.assertEqual(actual, expected) 79 | 80 | print("Case 2: Testing with DICOM timestamp (DT)") 81 | name = "AcquisitionDateTime" 82 | tag = get_tag(name) 83 | dicom.AcquisitionDateTime = "20131210081530" 84 | dicom.data_element(name).VR = "DT" 85 | field = DicomField(dicom.data_element(name), name, str(tag["tag"])) 86 | actual = jitter_timestamp(field, 10) 87 | expected = "20131220081530.000000" 88 | self.assertEqual(actual, expected) 89 | 90 | print("Case 3: Testing with non-standard DICOM date (DA)") 91 | name = "StudyDate" 92 | tag = get_tag(name) 93 | dicom.StudyDate = "20131210" 94 | dicom.data_element(name).VR = "DA" 95 | field = DicomField(dicom.data_element(name), name, str(tag["tag"])) 96 | actual = jitter_timestamp(field, 10) 97 | expected = "20131220" 98 | self.assertEqual(actual, expected) 99 | 100 | print("Case 4: Testing negative jitter value") 101 | name = "StudyDate" 102 | tag = get_tag(name) 103 | dicom.StudyDate = "20131210" 104 | field = DicomField(dicom.data_element(name), name, str(tag["tag"])) 105 | actual = jitter_timestamp(field, -5) 106 | expected = "20131205" 107 | self.assertEqual(actual, expected) 108 | 109 | print("Case 5: Testing with empty field") 110 | name = "StudyDate" 111 | tag = get_tag(name) 112 | dicom.StudyDate = "" 113 | field = DicomField(dicom.data_element(name), name, str(tag["tag"])) 114 | actual = jitter_timestamp(field, 10) 115 | expected = None 116 | self.assertEqual(actual, expected) 117 | 118 | 119 | if __name__ == "__main__": 120 | unittest.main() 121 | -------------------------------------------------------------------------------- /deid/tests/test_file_meta.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import unittest 4 | 5 | from deid.data import get_dataset 6 | from deid.dicom import replace_identifiers 7 | from deid.tests.common import create_recipe, get_file 8 | from deid.utils import get_installdir 9 | 10 | 11 | class TestDicom(unittest.TestCase): 12 | def setUp(self): 13 | self.pwd = get_installdir() 14 | self.dataset = get_dataset("animals") 15 | 16 | def test_replace_filemeta(self): 17 | """RECIPE RULE 18 | REPLACE MediaStorageSOPInstanceUID 1.2.3.4.5.4.3.2.1 19 | """ 20 | print("Test replace filemeta") 21 | dicom_file = get_file(self.dataset) 22 | 23 | actions = [ 24 | { 25 | "action": "REPLACE", 26 | "field": "MediaStorageSOPInstanceUID", 27 | "value": "1.2.3.4.5.4.3.2.1", 28 | } 29 | ] 30 | recipe = create_recipe(actions) 31 | 32 | result = replace_identifiers( 33 | dicom_files=dicom_file, 34 | deid=recipe, 35 | save=False, 36 | remove_private=False, 37 | strip_sequences=False, 38 | ) 39 | self.assertEqual(1, len(result)) 40 | self.assertEqual( 41 | "1.2.3.4.5.4.3.2.1", result[0].file_meta["MediaStorageSOPInstanceUID"].value 42 | ) 43 | 44 | def test_replace_protected_field(self): 45 | """RECIPE RULE 46 | REPLACE TransferSyntaxUID 1.2.3.4.5.4.3.2.1 47 | """ 48 | print("Test replace filemeta") 49 | dicom_file = get_file(self.dataset) 50 | 51 | actions = [ 52 | { 53 | "action": "REPLACE", 54 | "field": "TransferSyntaxUID", 55 | "value": "1.2.3.4.5.4.3.2.1", 56 | } 57 | ] 58 | recipe = create_recipe(actions) 59 | 60 | result = replace_identifiers( 61 | dicom_files=dicom_file, 62 | deid=recipe, 63 | save=False, 64 | remove_private=False, 65 | strip_sequences=False, 66 | ) 67 | 68 | # Here the field is protected by default 69 | self.assertEqual(1, len(result)) 70 | self.assertNotEqual("1.2.3.4.5.4.3.2.1", result[0].file_meta.TransferSyntaxUID) 71 | 72 | # Now we will unprotect it! 73 | result = replace_identifiers( 74 | dicom_files=dicom_file, 75 | deid=recipe, 76 | save=False, 77 | remove_private=False, 78 | strip_sequences=False, 79 | disable_skip=True, 80 | ) 81 | 82 | # Here the field is protected by default 83 | self.assertEqual(1, len(result)) 84 | self.assertEqual("1.2.3.4.5.4.3.2.1", result[0].file_meta.TransferSyntaxUID) 85 | 86 | 87 | if __name__ == "__main__": 88 | unittest.main() 89 | -------------------------------------------------------------------------------- /deid/tests/test_nested_dicom_fields.py: -------------------------------------------------------------------------------- 1 | import contextlib 2 | import os 3 | import tempfile 4 | import unittest 5 | 6 | from pydicom import dcmread 7 | from pydicom.dataset import Dataset 8 | from pydicom.sequence import Sequence 9 | from pydicom.uid import generate_uid 10 | 11 | from deid.config import DeidRecipe 12 | from deid.dicom.header import get_identifiers, replace_identifiers 13 | 14 | 15 | @contextlib.contextmanager 16 | def temporary_recipe(recipe_text: str): 17 | """ 18 | Create a temporary recipe file for testing. 19 | """ 20 | with tempfile.NamedTemporaryFile(delete=False, suffix=".txt") as recipe_file: 21 | recipe_file.write(recipe_text.encode()) 22 | recipe_file.flush() 23 | recipe = DeidRecipe(deid=recipe_file.name, base=False) 24 | yield recipe 25 | 26 | 27 | def hashuid(item, value, field, dicom, element_name=None): 28 | """ 29 | Generate a new UID based on the previous UID 30 | """ 31 | if hasattr(field, "element"): 32 | hash_src = str(field.element.value) 33 | else: 34 | hash_src = field 35 | new_uid = generate_uid(entropy_srcs=[hash_src]) 36 | return new_uid 37 | 38 | 39 | class TestNestedDicomFields(unittest.TestCase): 40 | def setUp(self): 41 | print("\n######################START######################") 42 | 43 | def tearDown(self): 44 | print("\n######################END########################") 45 | 46 | def test_nested_dicom_fields(self): 47 | """ 48 | Tests that header deidentification does not overwrite existing top-level tags 49 | when iterating over deeply nested tags. 50 | """ 51 | # Create a mock DICOM dataset with a top-level and nested SeriesInstanceUID 52 | original_dicom = Dataset() 53 | original_dicom.SeriesInstanceUID = generate_uid() 54 | 55 | referenced_series = Dataset() 56 | referenced_series.SeriesInstanceUID = generate_uid() 57 | 58 | original_dicom.ReferencedSeriesSequence = Sequence([referenced_series]) 59 | 60 | # Enforce precondition that the two SeriesInstanceUID attributes are different 61 | self.assertNotEqual( 62 | original_dicom.ReferencedSeriesSequence[0].SeriesInstanceUID, 63 | original_dicom.SeriesInstanceUID, 64 | ) 65 | 66 | recipe_text = """ 67 | FORMAT dicom 68 | %header 69 | REPLACE SeriesInstanceUID func:hashuid 70 | """ 71 | 72 | with ( 73 | tempfile.TemporaryDirectory() as temp_dir, 74 | temporary_recipe(recipe_text) as recipe, 75 | ): 76 | temp_file_name = os.path.join(temp_dir, "input.dcm") 77 | original_dicom.save_as(temp_file_name, implicit_vr=True, little_endian=True) 78 | dicom_paths = [temp_file_name] 79 | 80 | # Add hash function to deid context 81 | ids = get_identifiers(dicom_paths) 82 | for dicom_id in ids: 83 | ids[dicom_id]["hashuid"] = hashuid 84 | 85 | os.makedirs(os.path.join(temp_dir, "out")) 86 | output_paths = replace_identifiers( 87 | dicom_paths, 88 | ids=ids, 89 | deid=recipe, 90 | save=True, 91 | overwrite=True, 92 | output_folder=os.path.join(temp_dir, "out"), 93 | ) 94 | 95 | output_dataset = dcmread(output_paths[0], force=True) 96 | # Assert that the SeriesInstanceUID has been replaced 97 | self.assertNotEqual( 98 | output_dataset.SeriesInstanceUID, original_dicom.SeriesInstanceUID 99 | ) 100 | # Assert that the two unique UIDs were deidentified to different values 101 | self.assertNotEqual( 102 | output_dataset.ReferencedSeriesSequence[0].SeriesInstanceUID, 103 | output_dataset.SeriesInstanceUID, 104 | ) 105 | -------------------------------------------------------------------------------- /deid/tests/test_utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import json 4 | import os 5 | import shutil 6 | import tempfile 7 | import unittest 8 | 9 | from deid.utils import get_installdir 10 | 11 | 12 | class TestUtils(unittest.TestCase): 13 | def setUp(self): 14 | self.pwd = get_installdir() 15 | self.tmpdir = tempfile.mkdtemp() 16 | print("\n######################START######################") 17 | 18 | def tearDown(self): 19 | shutil.rmtree(self.tmpdir) 20 | print("\n######################END########################") 21 | 22 | def test_get_temporary_name(self): 23 | """test_get_temporary_name will test the generation of a temporary 24 | file name. 25 | """ 26 | from deid.utils import get_temporary_name 27 | 28 | print("Testing utils.get_temporary_name...") 29 | tmpname = get_temporary_name() 30 | self.assertTrue(not os.path.exists(tmpname)) 31 | self.assertTrue("deid" in tmpname) 32 | tmpname = get_temporary_name(prefix="clean") 33 | self.assertTrue("deid-clean" in tmpname) 34 | tmpname = get_temporary_name(ext=".dcm") 35 | self.assertTrue(tmpname.endswith(".dcm")) 36 | 37 | def test_write_read_files(self): 38 | """test_write_read_files will test the functions 39 | write_file and read_file 40 | """ 41 | print("Testing utils.write_file...") 42 | from deid.utils import write_file 43 | 44 | tmpfile = tempfile.mkstemp()[1] 45 | os.remove(tmpfile) 46 | write_file(tmpfile, "blaaahumbug") 47 | self.assertTrue(os.path.exists(tmpfile)) 48 | 49 | print("Testing utils.read_file...") 50 | from deid.utils import read_file 51 | 52 | content = read_file(tmpfile)[0] 53 | self.assertEqual("blaaahumbug", content) 54 | 55 | from deid.utils import write_json 56 | 57 | print("Testing utils.write_json...") 58 | print("Case 1: Providing bad json") 59 | bad_json = {"Wakkawakkawakka'}": [{True}, "2", 3]} 60 | tmpfile = tempfile.mkstemp()[1] 61 | os.remove(tmpfile) 62 | with self.assertRaises(TypeError): 63 | write_json(bad_json, tmpfile) 64 | 65 | print("Case 2: Providing good json") 66 | good_json = {"Wakkawakkawakka": [True, "2", 3]} 67 | tmpfile = tempfile.mkstemp()[1] 68 | os.remove(tmpfile) 69 | write_json(good_json, tmpfile) 70 | with open(tmpfile, "r") as fd: 71 | content = json.loads(fd.read()) 72 | self.assertTrue(isinstance(content, dict)) 73 | self.assertTrue("Wakkawakkawakka" in content) 74 | 75 | def test_get_installdir(self): 76 | """get install directory should return the base of where singularity 77 | is installed 78 | """ 79 | print("Testing finding the installation directory.") 80 | from deid.utils import get_installdir 81 | 82 | whereami = get_installdir() 83 | self.assertTrue(whereami.endswith("deid")) 84 | 85 | def test_recursive_find(self): 86 | """test_recursive_find should detect 7 dicoms""" 87 | print("Testing recursive find.") 88 | from deid.utils import recursive_find 89 | 90 | expected = 3 91 | found = len(list(recursive_find(self.pwd, pattern="deid*"))) 92 | print("Found %s deid files" % (found)) 93 | self.assertTrue(found == expected) 94 | 95 | 96 | if __name__ == "__main__": 97 | unittest.main() 98 | -------------------------------------------------------------------------------- /deid/tests/test_utils_files.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import os 4 | import shutil 5 | import tempfile 6 | import unittest 7 | 8 | from deid.data import get_dataset 9 | from deid.utils import get_installdir 10 | 11 | 12 | class TestDicom(unittest.TestCase): 13 | def setUp(self): 14 | self.pwd = get_installdir() 15 | self.deid = os.path.abspath("%s/../examples/deid/deid.dicom" % self.pwd) 16 | self.dataset = get_dataset("humans") 17 | self.tmpdir = tempfile.mkdtemp() 18 | print("\n######################START######################") 19 | 20 | def tearDown(self): 21 | shutil.rmtree(self.tmpdir) 22 | print("\n######################END########################") 23 | 24 | def test_get_files(self): 25 | print("Test test_get_files") 26 | print("Case 1: Test get files from dataset") 27 | from deid.dicom import get_files 28 | 29 | found = 0 30 | for dicom_file in get_files(self.dataset): 31 | found += 1 32 | expected = 2 33 | self.assertEqual(found, expected) 34 | 35 | print("Case 2: Ask for files from empty folder") 36 | found = 0 37 | for dicom_file in get_files(self.tmpdir): 38 | found += 1 39 | expected = 0 40 | self.assertEqual(found, expected) 41 | 42 | def test_get_files_as_list(self): 43 | print("Test test_get_files_as_list") 44 | print("Case 1: Test get files from dataset") 45 | from deid.dicom import get_files 46 | 47 | dicom_files = list(get_files(self.dataset)) 48 | found = len(dicom_files) 49 | expected = 2 50 | self.assertEqual(found, expected) 51 | 52 | print("Case 2: Ask for files from empty folder") 53 | dicom_files = list(get_files(self.tmpdir)) 54 | found = len(dicom_files) 55 | expected = 0 56 | self.assertEqual(found, expected) 57 | 58 | 59 | if __name__ == "__main__": 60 | unittest.main() 61 | -------------------------------------------------------------------------------- /deid/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .actions import get_func, get_timestamp, parse_keyvalue_pairs, parse_value 2 | from .fileio import ( 3 | get_installdir, 4 | get_temporary_name, 5 | read_file, 6 | read_json, 7 | recursive_find, 8 | to_int, 9 | write_file, 10 | write_json, 11 | ) 12 | -------------------------------------------------------------------------------- /deid/utils/fileio.py: -------------------------------------------------------------------------------- 1 | __author__ = "Vanessa Sochat" 2 | __copyright__ = "Copyright 2016-2025, Vanessa Sochat" 3 | __license__ = "MIT" 4 | 5 | import fnmatch 6 | import json 7 | import os 8 | import tempfile 9 | from collections import OrderedDict 10 | 11 | ################################################################################ 12 | # Local commands and requests 13 | ################################################################################ 14 | 15 | 16 | def get_installdir(): 17 | """ 18 | Get installation directory of the application 19 | """ 20 | return os.path.abspath(os.path.dirname(os.path.dirname(__file__))) 21 | 22 | 23 | def get_temporary_name(prefix=None, ext=None): 24 | """ 25 | Get a temporary name. 26 | 27 | Get a temporary name, can be used for a directory or file. This does so 28 | without creating the file, and adds an optional prefix 29 | 30 | Parameters 31 | ========== 32 | prefix: if defined, add the prefix after deid 33 | ext: if defined, return the file extension appended. Do not specify "." 34 | """ 35 | deid_prefix = "deid-" 36 | if prefix: 37 | deid_prefix = "deid-%s-" % prefix 38 | 39 | tmpname = os.path.join( 40 | tempfile.gettempdir(), 41 | "%s%s" % (deid_prefix, next(tempfile._get_candidate_names())), 42 | ) 43 | if ext: 44 | tmpname = "%s.%s" % (tmpname, ext) 45 | return tmpname 46 | 47 | 48 | ################################################################################ 49 | ## FILE OPERATIONS ############################################################# 50 | ################################################################################ 51 | 52 | 53 | def write_file(filename, content, mode="w"): 54 | """ 55 | Write to file. 56 | 57 | write_file will open a file, "filename" and write content, "content" 58 | and properly close the file 59 | 60 | Parameters 61 | ========== 62 | filename: the name of the file to write to 63 | content: the content to write to file 64 | mode: the mode to open the file, defaults to write (w) 65 | """ 66 | with open(filename, mode) as filey: 67 | filey.writelines(content) 68 | return filename 69 | 70 | 71 | def write_json(json_obj, filename, mode="w", print_pretty=True): 72 | """ 73 | Write a json object to file 74 | 75 | Parameters 76 | ========== 77 | json_obj: the dict to print to json 78 | filename: the output file to write to 79 | pretty_print: if True, will use nicer formatting 80 | """ 81 | with open(filename, mode) as filey: 82 | if print_pretty: 83 | filey.writelines(json.dumps(json_obj, indent=4, separators=(",", ": "))) 84 | else: 85 | filey.writelines(json.dumps(json_obj)) 86 | return filename 87 | 88 | 89 | def read_file(filename, mode="r"): 90 | """ 91 | Read a file. 92 | 93 | Parameters 94 | ========== 95 | filename: the name of the file to write to 96 | mode: the mode to open the file, defaults to read (r) 97 | 98 | """ 99 | with open(filename, mode) as filey: 100 | content = filey.readlines() 101 | return content 102 | 103 | 104 | def read_json(filename, mode="r", ordered_dict=False): 105 | """ 106 | Open a file, "filename" and read the string as json 107 | 108 | Parameters 109 | ========== 110 | filename: the name of the file to write to 111 | mode: the mode to open the file, defaults to read (r) 112 | ordered_dict: If true, return an OrderedDict (default is False) 113 | 114 | """ 115 | with open(filename, mode) as filey: 116 | if ordered_dict is False: 117 | content = json.loads(filey.read()) 118 | else: 119 | content = json.loads(filey.read(), object_pairs_hook=OrderedDict) 120 | return content 121 | 122 | 123 | def recursive_find(base, pattern=None): 124 | """ 125 | Recursively find files that match a pattern. 126 | 127 | recursive find will yield dicom files in all directory levels 128 | below a base path. It uses get_dcm_files to find the files in the bases. 129 | 130 | Parameters 131 | ========== 132 | base: the base directory to search 133 | pattern: a pattern to match. If None, defaults to "*" 134 | 135 | """ 136 | if pattern is None: 137 | pattern = "*" 138 | 139 | for root, _, filenames in os.walk(base): 140 | for filename in fnmatch.filter(filenames, pattern): 141 | yield os.path.join(root, filename) 142 | 143 | 144 | ################################################################################ 145 | ## DATA FORMATS ################################################################ 146 | ################################################################################ 147 | 148 | 149 | def to_int(value): 150 | """ 151 | Convert a value (value) to int, if found to be otherwise 152 | """ 153 | if not isinstance(value, int): 154 | value = int(float(value)) 155 | return value 156 | 157 | 158 | def is_number(value): 159 | """ 160 | Determine if the value for a field is numeric 161 | """ 162 | if isinstance(value, int): 163 | return True 164 | if isinstance(value, float): 165 | return True 166 | return False 167 | -------------------------------------------------------------------------------- /deid/version.py: -------------------------------------------------------------------------------- 1 | __author__ = "Vanessa Sochat" 2 | __copyright__ = "Copyright 2016-2025, Vanessa Sochat" 3 | __license__ = "MIT" 4 | 5 | __version__ = "0.4.3" 6 | AUTHOR = "Vanessa Sochat" 7 | AUTHOR_EMAIL = "vsoch@users.noreply.github.com" 8 | NAME = "deid" 9 | PACKAGE_URL = "https://github.com/pydicom/deid" 10 | KEYWORDS = "open source, python, anonymize, dicom" 11 | DESCRIPTION = "best effort deidentify dicom with python and pydicom" 12 | LICENSE = "LICENSE" 13 | 14 | INSTALL_REQUIRES = ( 15 | "matplotlib", 16 | "numpy>=1.20", 17 | "pydicom>=3.0.0,<4.0.0", 18 | "python-dateutil", 19 | ) 20 | -------------------------------------------------------------------------------- /docs/.gitignore: -------------------------------------------------------------------------------- 1 | _site/ 2 | -------------------------------------------------------------------------------- /docs/Gemfile: -------------------------------------------------------------------------------- 1 | source 'https://rubygems.org' 2 | 3 | gem 'jekyll', '3.8.4' 4 | 5 | group :jekyll_plugins do 6 | gem 'jekyll-feed', '0.11.0' 7 | gem 'jekyll-seo-tag', '2.5.0' 8 | gem 'jekyll-sitemap', '1.2.0' 9 | end 10 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # Deid Documentation 2 | 3 | ![assets/img/logo.png](assets/img/logo.png) 4 | 5 | This is a documentation site for [deid](https://www.github.com/pydicom/deid). 6 | It is part of the [pydicom](https://www.github.com/pydicom) family of tools. 7 | 8 | ## Setup 9 | 10 | 1. Install [Jekyll](https://jekyllrb.com/docs/installation/) locally. For Ruby, I recommend [rbenv](https://github.com/rbenv/rbenv). 11 | 2. Install Jekyll dependencies with `bundle install` 12 | 3. To serve the development server run `bundle exec jekyll serve` 13 | 14 | ## Folders Included 15 | If you aren't familiar with the structure of a Jekyll site, here is a quick overview: 16 | 17 | - [_config.yml](_config.yml) is the primary configuration file for the site. Variables in this file render as `{{ site.var }}` in the various html includes and templates. 18 | - [_layouts](_layouts) are base html templates for pages 19 | - [_includes](_includes) are snippets of html added to layouts 20 | - [pages](pages) are generic pages (e.g., changelog) that aren't considered docs 21 | - [_docs](_docs) is a collection of folders that get rendered into the docs sidebar and pages 22 | - [assets](assets) includes all static assets 23 | - [_data](_data) has different data files (they can be in `.yml` or `.csv` to render into the site. 24 | -------------------------------------------------------------------------------- /docs/_config.yml: -------------------------------------------------------------------------------- 1 | #---- 2 | # Site 3 | 4 | title: Deid 5 | url: "https://pydicom.github.io/deid" 6 | baseurl: "/deid" 7 | google_analytics_key: "UA-100220676-1" 8 | show_full_navigation: false 9 | 10 | # Values for the jekyll-seo-tag gem (https://github.com/jekyll/jekyll-seo-tag) 11 | logo: /siteicon.png 12 | description: Documentation for Deid 13 | author: 14 | name: Vanessa Sochat 15 | email: vsochat@users.noreply.github.com 16 | twitter: vsoch 17 | social: 18 | name: vsoch 19 | links: 20 | - https://github.com/pydicom 21 | 22 | # The current hosting location of the docs 23 | repo: pydicom/deid 24 | reponame: deid 25 | 26 | # The current Docker container 27 | docker: pydicom/deid 28 | 29 | # ----- 30 | # Build 31 | 32 | timezone: Etc/UTC 33 | 34 | permalink: pretty 35 | 36 | plugins: 37 | - jekyll-sitemap 38 | - jekyll-seo-tag 39 | - jekyll-feed 40 | 41 | exclude: 42 | - Gemfile 43 | - Gemfile.lock 44 | - README.md 45 | - LICENSE 46 | 47 | collections: 48 | docs: 49 | title: Documentation 50 | permalink: /:path/ 51 | output: true 52 | 53 | defaults: 54 | - 55 | scope: 56 | path: "" 57 | values: 58 | layout: default 59 | - 60 | scope: 61 | path: "" 62 | type: "docs" 63 | values: 64 | seo: 65 | type: Article 66 | _comments: 67 | category: Group navigation links with this field 68 | order: Used to sort links in the navigation 69 | _options: 70 | content: 71 | width: 800 72 | height: 2000 73 | - 74 | scope: 75 | path: "" 76 | type: "posts" 77 | values: 78 | _comments: 79 | type: Marks the impact of this release 80 | 81 | # --------- 82 | # ChangeLog 83 | 84 | types: 85 | - minor 86 | - major 87 | -------------------------------------------------------------------------------- /docs/_data/links.yml: -------------------------------------------------------------------------------- 1 | footer: 2 | - name: ChangeLog 3 | url: changelog 4 | navigation: 5 | - name: Installation 6 | url: install/ 7 | - name: Getting Started 8 | url: getting-started/ 9 | - name: Examples 10 | url: examples/ 11 | - name: User Documentation 12 | url: user-docs/ 13 | - name: Development 14 | url: development/ 15 | - name: Contributing 16 | url: contributing/ 17 | 18 | external_navigation: 19 | - name: API Documentation 20 | url: https://deid.readthedocs.io/en/latest/ 21 | -------------------------------------------------------------------------------- /docs/_docs/_defaults.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: 3 | category: 4 | order: 1 5 | --- 6 | -------------------------------------------------------------------------------- /docs/_docs/contributing/code.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Github Contribution 3 | category: Contributing 4 | order: 2 5 | --- 6 | 7 | Generally, for code contribution you should: 8 | 9 | 1. Open an issue to discuss what you would like to work on 10 | 2. Follow all existing docstring, coding styles 11 | 3. Make sure to write tests and format your code with black 12 | 4. Open a pull request against the master branch. 13 | 14 | See the repository `CONTRIBUTING.md` for these same details. 15 | 16 | ## Contributing a Custom Function 17 | 18 | Deid ships (as of version 0.2.3) with deid-provided functions that can be used in 19 | header parsing. To contribute a custom function you should do the following: 20 | 21 | 22 | 1. Add a function to deid/dicom/actions, ideally in the appropriate file (e.g., uid functions in uuid.py, etc) 23 | 2. Ensure your function is added to the lookup in `deid/dicom/actions/__init__.py` so it can be found. 24 | 3. Add a test to `deid/tests/test_dicom_funcs.py` that ensures your function works, with or without custom variables. 25 | 26 | 27 | Generally, a custom function should accept the following variables: 28 | 29 | - dicom: the dicom file 30 | - item: expected to be the dictionary lookup of user provided values 31 | - field: the dicom field 32 | - value: the value to replace 33 | 34 | You can generally define a catch all `**kwargs` if you don't need a field. Finally, 35 | if you do provide a custom variable, you'll need to also provide a default (or exit on error 36 | if it's absolutely essential). As an example, if your custom function in the lookup is named 37 | `generate_sesame_street_character` the user might provide a custom argument as follows: 38 | 39 | ``` 40 | %header 41 | 42 | REPLACE fields:PatientID deid_func:generate_sesame_street_character name=elmo 43 | ``` 44 | 45 | Within the function, you can expect the extra (unparsed) key value pairs to be provided as "extras" and you 46 | can use the deid utils helper to parse these into a dictionary: 47 | 48 | ```python 49 | from deid.utils import parse_keyvalue_pairs 50 | import random 51 | 52 | def generate_sesame_street_character(item, value, field, dicom, **kwargs): 53 | """ 54 | Add a sesame street character by name, or randomly chosen. 55 | """ 56 | opts = parse_keyvalue_pairs(kwargs.get("extras")) 57 | 58 | default_names = ["grover", "elmo", "big-bird", "oscar-the-grouch"] 59 | name = opts.get("name") or random.choice(default_names) 60 | 61 | # The thing we return is the final value to replace the field with. 62 | return name 63 | ``` 64 | 65 | And that should be it! You are free to use (or not use) the item, value, field, and dicom. 66 | Please open an issue if you have any questions. 67 | -------------------------------------------------------------------------------- /docs/_docs/contributing/docs.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Contributing to Documentation 3 | category: Contributing 4 | order: 1 5 | --- 6 | 7 | It's so great that you want to contribute! The documentation here includes information 8 | about using and developing {{ site.title }}, and they are hosted on Github, meaning that you 9 | can easily contribute via a [pull request](https://help.github.com/articles/about-pull-requests/). 10 | 11 | 12 | ## Getting Started 13 | 14 | 15 | ### Installing Dependencies 16 | Initially (on OS X), you will need to setup [Brew](http://brew.sh/) which is a 17 | package manager for OS X and [Git](https://git-scm.com/). To install Brew and Git, 18 | run the following commands: 19 | 20 | ```bash 21 | /usr/bin/ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)" 22 | brew install git 23 | ``` 24 | 25 | If you are on Debian/Ubuntu, then you can easily install git with `apt-get` 26 | 27 | ```bash 28 | apt-get update && apt-get install -y git 29 | ``` 30 | 31 | 32 | ### Fork the repo 33 | To contribute to the web based documentation, you should obtain a GitHub account and *fork* the {{ site.title }} Documentation repository by clicking the *fork* button on the top right of the page. Once forked, you will want to clone the fork of the repo to your computer. Let's say my GitHub username is *meatball*: 34 | 35 | ```bash 36 | git clone https://github.com/meatball/{{ site.reponame }} 37 | cd {{ site.reponame }}/ 38 | ``` 39 | 40 | 41 | ### Install a local Jekyll server 42 | This step is required if you want to render your work locally before committing the changes. This is highly recommended to ensure that your changes will render properly and will be accepted. 43 | 44 | ```bash 45 | brew install ruby 46 | gem install jekyll 47 | gem install bundler 48 | bundle install 49 | ``` 50 | 51 | Now you can see the site locally by running the server with jekyll: 52 | 53 | ```bash 54 | bundle exec jekyll serve 55 | ``` 56 | 57 | This will make the site viewable at http://localhost:4000/{{ site.title }}/. 58 | -------------------------------------------------------------------------------- /docs/_docs/development/image-format.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Add an Image Format 3 | category: Development 4 | order: 2 5 | --- 6 | 7 | 8 | When you add a new image format, it should go under "deid," akin to "diom". 9 | 10 | ``` 11 | deid 12 | ├── data 13 | ├── ,,, 14 | └── dicom 15 | 16 | ``` 17 | 18 | This folder, and others like it, should contain should contain the following files: 19 | 20 | - **config.json** this is the default specification for how a dicom header is parsed, which primarily means additions, and a set of custom actions. 21 | - **__init__.py**: has the purpose of exposing module functions to the higher up folder for import. For example, the function `get_identifiers` in [header.py](header.py) is programmatically accessible via `from deid.dicom import get_identifiers` thanks to this file. If you create a new module with the equivalent functions, you should be fine to just copy this file, or import the functions directly from tasks.py in the module folder. 22 | - **header.py**: should contain functions for `get_identifiers`, which should return a dictionary with top level indexes by entity, and the value of each entity another dictionary indexed by the item ids. This data structure, if provided by the client, must be understood by the function `remove_identifiers`. 23 | 24 | Note that, since we are working in Python, we will be using dicom headers 25 | that are mapped from the standard to pydicom, the entire mapping which is 26 | provided [here](https://github.com/pydicom/pydicom/blob/master/pydicom/_dicom_dict.py), 27 | and programmatically accessible via: 28 | 29 | ```python 30 | from pydicom._dicom_dict import DicomDictionary 31 | 32 | field_names = [] 33 | 34 | for key,entry in DicomDictionary.items(): 35 | if entry[3] != "Retired": 36 | field_names.append(entry[4]) 37 | ``` 38 | 39 | Since there are so many, we enforce (at least for dicom) the most conservative 40 | approach of removing header fields that the client has not asked anything special 41 | to be done for. Let's now talk about the [config.json](config.json). 42 | 43 | 44 | ## Config.json 45 | The base of the json has two classes, and they correspond with the actions of 46 | `get` and `put`, where a "get" is broadly the step of getting identifiers from 47 | the data, and the "put" is putting things back (and realistically, removing a lot). 48 | Here they are, completely empty: 49 | 50 | ```python 51 | { 52 | "get": {}, 53 | "put": {} 54 | } 55 | ``` 56 | 57 | The entire data structure isn't very large, and can be shown to you: 58 | 59 | ```python 60 | { 61 | "get": { 62 | 63 | 64 | "skip": ["PixelData"], 65 | "ids":{ 66 | "entity":"PatientID", 67 | "item":"SOPInstanceUID" 68 | } 69 | 70 | }, 71 | 72 | "put":{ 73 | 74 | "actions":[ 75 | 76 | {"action":"ADD","field":"PatientIdentityRemoved","value": "Yes"}, 77 | 78 | ] 79 | } 80 | } 81 | ``` 82 | 83 | Note that we don't need to specify the datatypes like "PixelData" or "Columns", 84 | or other fields related to the data. These fields are by default kept, as they 85 | are specific to the pixel data. For details see [this issue](https://github.com/pydicom/pydicom/issues/372). 86 | 87 | 88 | ### Get 89 | 90 | If you read the details about get (usage for the client) see [get]({{ site.baseurl }}/getting-started/dicom-get/), 91 | you probably see some commonality. We have identified default fields in the header 92 | for entity and item under `['get']['ids']` (both which can be altered by the user via 93 | a function call) and then we skip over PixelData, because we don't want to return that 94 | for inspection, or have it in the list to include. If there are others you don't 95 | want returned, then add them to the skip list. Have caution that the user won't see 96 | the field returned, and likely won't ask for any action to be taken, meaning it will 97 | by default be blanked. 98 | 99 | 100 | ### Put 101 | Put is primarily concerned with actions, which as they are for the user, can be 102 | `ADD`, `KEEP`, `REMOVE`, or `BLANK`. For the default, we keep the useful pixel data, 103 | and specify that we have removed the patient identity. 104 | -------------------------------------------------------------------------------- /docs/_docs/development/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Development Notes 3 | permalink: /development/index.html 4 | category: Development 5 | order: 1 6 | --- 7 | 8 | This readme is intended to explain how the functions work (on the back end) for those 9 | wishing to create a module for a new image type. The basic idea is that each folder 10 | (module, eg `dicom`) contains a base processing template that tells the functions to 11 | `get_identifiers` how to process different header values for the datatype 12 | (e.g, DICOM). 13 | 14 | - [Add an Image Format]({{ site.baseurl }}/development/add-format/) 15 | - [Linting and Formatting]({{ site.baseurl }}/development/linting-format/) 16 | -------------------------------------------------------------------------------- /docs/_docs/development/linting-format.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Linting and Formatting 3 | category: Development 4 | order: 3 5 | --- 6 | 7 | After installing deid to a local environment, you can use [pre-commit](https://pre-commit.com/) to help 8 | with linting and formatting. To do that: 9 | 10 | 11 | ```bash 12 | $ pip install -r .github/dev-requirements.txt 13 | ``` 14 | 15 | Then to run: 16 | 17 | ```bash 18 | $ pre-commit run --all-files 19 | ``` 20 | 21 | You can also install as a hook: 22 | 23 | ```bash 24 | $ pre-commit install 25 | ``` 26 | 27 | And it will run always before you commit. This is the same linting 28 | we use in our testing as well. 29 | -------------------------------------------------------------------------------- /docs/_docs/examples/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Introduction 3 | category: Examples 4 | permalink: /examples/index.html 5 | order: 1 6 | --- 7 | 8 | Here we have some examples to get you started! These examples correspond with 9 | the subfolders of the repository [examples](https://github.com/pydicom/deid/tree/master/examples) 10 | folder. 11 | 12 | 13 | ## The Deid Recipe 14 | 15 | In this small tutorial, we will walk through reading and interacting with a deid recipe, 16 | replacing header values, and saving new images. This is the recommended tutorial if you 17 | want a quick start overview of deid. 18 | 19 | - [Code](https://github.com/pydicom/deid/tree/master/examples/recipe) on Github 20 | - [Tutorial]({{ site.baseurl }}/examples/recipe) 21 | - Recipes Files provided as [examples](https://github.com/pydicom/deid/tree/master/examples/deid) or [installed with deid](https://github.com/pydicom/deid/tree/master/deid/data). 22 | 23 | 24 | ## Header Manipulation 25 | 26 | - [Replace with Function]({{ site.baseurl }}/examples/func-replace/) shows how to dynamically replace or update header values from a function. 27 | - [Replace Sequences]({{ site.baseurl }}/examples/func-sequence-replace/) similar, but replacement of nested sequences. 28 | - [Header Expanders]({{ site.baseurl }}/examples/header-expanders/) can be used to select one or more header fields to apply an action to 29 | 30 | 31 | ## Cleaning Pixels 32 | 33 | See an example of just "inspection" (flagging images based on criteria) or "clean" 34 | (replacing values and writing black boxes after inspect) in these examples. 35 | 36 | - [Code](https://github.com/pydicom/deid/tree/master/examples/dicom/pixels) on Github 37 | 38 | 39 | ## Dicom Extract 40 | These examples generally refer to the action of "getting data out of dicom files and putting 41 | them somewhere else." 42 | 43 | - [Top Level Examples](https://github.com/pydicom/deid/tree/master/examples/dicom/dicom-extract) folder to see all scripts. 44 | - [Create Dicom CSV](https://github.com/pydicom/deid/blob/master/examples/dicom/dicom-extract/create-dicom-csv.py) meaning extraction of header values into comma separated values file. 45 | -------------------------------------------------------------------------------- /docs/_docs/getting-started/dicom-config.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: 2. Configuration 3 | category: Getting Started 4 | order: 4 5 | --- 6 | 7 | Deid does two things, generally cleaning pixels and headers of dicom images. 8 | We do this by way of a file called a deid recipe. Here is a quick example 9 | that is intended for dicom images: 10 | 11 | ``` 12 | FORMAT dicom 13 | 14 | %filter dangerouscookie 15 | 16 | LABEL Criteria for Dangerous Cookie 17 | contains PatientSex M 18 | + notequals OperatorsName bold bread 19 | coordinates 0,0,512,110 20 | 21 | %header 22 | 23 | ADD PatientIdentityRemoved YES 24 | REPLACE PatientID var:id 25 | REPLACE SOPInstanceUID var:source_id 26 | ``` 27 | 28 | Don't worry that we haven't talked about this format yet! Generally, you could 29 | probably guess that we are going to create a filter called "dangerouscookie" 30 | based on some set of criteria, and perform some actions on image headers. 31 | Let's first discuss each of the sections. 32 | 33 | 34 | ## Sections 35 | 36 | A section is a part of the recipe that starts with a "%". You can think of 37 | a section as a chunk of text that is parsed for some purpose. For example, 38 | `%filter` is a section where it's expected that you've defined filters, and 39 | `%header` is expected to have actions to update and change headers. 40 | 41 | 42 | | Section | Description | Example | 43 | |-------------|-----------------------------------------------------------|-----------------------------------------| 44 | | %filter | a named set of filter criteria used when running the DicomCleaner | %filter filterName | 45 | | %header | actions to be taken to update, or otherwise change an image header | %header | 46 | | %labels | extra metadata (key value pairs) to add to a recipe | Maintainer @vsoch | 47 | 48 | > What functions do the recipe sections correspond to? 49 | 50 | Good question! Let's talk about the two primary functions of deid, and how 51 | to write recipes to do those things. 52 | 53 | 54 | ## Clean Pixels 55 | 56 | The general application flow of the clean function is the following: 57 | 58 | ``` 59 | [define criteria] -> [filter] -> [clean images] -> [save] 60 | ``` 61 | 62 | The "filter" tag broadly encompasses an inspection of the header data. The "clean" 63 | action corresponds with either: 64 | 65 | - an action applied to a header field, like "REPLACE FieldA with value B" or 66 | - replacing pixels in the image with a black box to hide text and other identifiers 67 | 68 | For reading more about how the Deid software does this by way of a file called 69 | a deid recipe, read about deid [recipe filters]({{ site.baseurl }}/user-docs/recipe-filters/). 70 | 71 | 72 | ## Clean Headers 73 | 74 | The general application flow to clean headers looks like this: 75 | 76 | ``` 77 | [define actions] -> [get identifiers] --> [update identifiers] --> [replace identifiers] 78 | ``` 79 | 80 | And then optionally save the updated files! 81 | 82 | More detail is provided about cleaning headers in the [recipe headers]({{ site.baseurl }}/user-docs/recipe-headers/) 83 | pages. 84 | 85 | 86 | > Where do I go from here? 87 | 88 | - [Recipe Filters]({{ site.baseurl }}/user-docs/recipe-filters/) 89 | - [Recipe Headers]({{ site.baseurl }}/user-docs/recipe-headers/) 90 | - [Recipe Labels]({{ site.baseurl }}/user-docs/recipe-labels/) 91 | -------------------------------------------------------------------------------- /docs/_docs/getting-started/dicom-loading.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: 1. Loading Data 3 | category: Getting Started 4 | order: 3 5 | --- 6 | 7 | 8 | ## Data 9 | 10 | To run these examples, you'll need to install external deid-data. 11 | 12 | ```bash 13 | $ pip install deid-data 14 | ``` 15 | 16 | 17 | 18 | ## Loading 19 | 20 | 21 | While they are different file organizations for dicom, we are going to take a simple 22 | approach of assuming some top level directory with some number of files within 23 | (yes, including subdirectories). For example, if you retrieved your data using a 24 | tool like [dcmqr](https://dcm4che.atlassian.net/wiki/display/d2/dcmqr) with a 25 | `C-MOVE`, then you might have a flat directory structure. Sometimes the 26 | files won't have an extension (for example, being named by a `SOPInstanceUID`. 27 | 28 | ```bash 29 | tree deid/data/dicom-cookies/ 30 | deid/data/dicom-cookies/ 31 | ├── image1.dcm 32 | ├── image2.dcm 33 | ├── image3.dcm 34 | ├── image4.dcm 35 | ├── image5.dcm 36 | ├── image6.dcm 37 | └── image7.dcm 38 | ``` 39 | 40 | It doesn't actually matter so much how your data is structured, 41 | you can use any method that you like to. You could technically 42 | just use `os.listdir` or `glob`: 43 | 44 | 45 | ``` 46 | from glob import glob 47 | import os 48 | 49 | base = "deid/data/dicom-cookies" 50 | 51 | dicom_files = glob("%s/*" %base) 52 | ['deid/data/cookie-series/image4.dcm', 53 | 'deid/data/cookie-series/image2.dcm', 54 | 'deid/data/cookie-series/image7.dcm', 55 | 'deid/data/cookie-series/image6.dcm', 56 | 'deid/data/cookie-series/image3.dcm', 57 | 'deid/data/cookie-series/image1.dcm', 58 | 'deid/data/cookie-series/image5.dcm'] 59 | 60 | os.listdir(base) 61 | ['image4.dcm', 62 | 'image2.dcm', 63 | 'image7.dcm', 64 | 'image6.dcm', 65 | 'image3.dcm', 66 | 'image1.dcm', 67 | 'image5.dcm'] 68 | ``` 69 | 70 | Notice anything that might trigger a bug with the above? You probably 71 | should ask for an absolute path. 72 | 73 | ```python 74 | # For glob 75 | dicom_files = glob("%s/*" %base) 76 | dicom_files = [os.path.abspath(x) for x in dicom_files] 77 | 78 | # For os module 79 | dicom_files = [] 80 | for root, folders, files in os.walk(base): 81 | for file in files: 82 | fullpath = os.path.abspath(os.path.join(root,file)) 83 | dicom_files.append(fullpath) 84 | ``` 85 | 86 | We provide a few more robust functions to find datasets, because it's usually the case that you want 87 | to match a pattern of file, have subfolders, or want a validation 88 | done to be sure that each file is dicom. 89 | 90 | 91 | 92 | ## Find Datasets 93 | The function that we have provided will find all datasets matching some pattern 94 | (or all files recursively in a folder). You simply need to provide a list of top folders, 95 | a list of files and folders, or just files to start. For the purposes of this 96 | walkthrough, we will load data folders that are provided with the application. 97 | 98 | ```python 99 | from deid.data import get_dataset 100 | 101 | base = get_dataset("dicom-cookies") 102 | base 103 | '/home/vanessa/anaconda3/lib/python3.5/site-packages/som-0.1.1-py3.5.egg/som/data/dicom-cookies' 104 | ``` 105 | 106 | In the above, all we've done it retrieved the full path for a 107 | folder of dicom files. Let's try to read in the data: 108 | 109 | 110 | ```python 111 | from deid.dicom import get_files 112 | 113 | dicom_files = list(get_files(base)) 114 | DEBUG Found 7 contender files in dicom-cookies 115 | DEBUG Checking 7 dicom files for validation. 116 | Found 7 valid dicom files 117 | ``` 118 | 119 | We can also specify to not do the check, if we are absolutely sure. 120 | For larger datasets this might speed up processing a little bit. 121 | 122 | ```python 123 | dicom_files = list(get_files(base,check=False)) 124 | DEBUG Found 7 contender files in dicom-cookies 125 | ``` 126 | 127 | We can also give it a particular pattern to match. Since these files all end with 128 | `.dcm`, that's not so useful. Let's give a pattern to just match `image1.dcm`: 129 | 130 | 131 | ```python 132 | dicom_files = list(get_files(base,pattern="image1*")) 133 | DEBUG Found 1 contender files in dicom-cookies 134 | DEBUG Checking 1 dicom files for validation. 135 | Found 1 valid dicom files 136 | ``` 137 | 138 | At this point, you should have a list of dicom files. You might now want 139 | to [configure]({{ site.baseurl }}/getting-started/dicom-config) your deidentifation. 140 | -------------------------------------------------------------------------------- /docs/_docs/getting-started/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Introduction 3 | category: Getting Started 4 | permalink: /getting-started/index.html 5 | order: 1 6 | --- 7 | 8 | Deid does two things: clean header and image data, and filter based on headers. 9 | These algorithms are not sophisticated - they perform their duties based on 10 | parsing header metadata. Here we will provide a simple walkthrough to get started 11 | with deid. In the following pages, we will show you how to load data, 12 | configure a custom recipe to deidentify and filter, and then clean pixels. 13 | 14 | ## Dicom Pipeline 15 | 16 | A complete deid pipeline typically means some level of cleaning and filtering, and then saving final images. 17 | 18 | - [Loading Data]({{ site.baseurl }}/getting-started/dicom-loading): The starting point for any de-identification process is to read in your files. 19 | - [Configuration]({{ site.baseurl }}/getting-started/dicom-config): You next want to tell the software how to handle various fields. 20 | - [Get Identifiers]({{ site.baseurl }}/getting-started/dicom-get): A request for identifiers is a get, or extraction of data that can be modified. 21 | - [Clean Pixels]({{ site.baseurl }}/getting-started/dicom-pixels): Before you scrape headers, you might need to use them to flag images. 22 | - [Put Identifiers]({{ site.baseurl }}/getting-started/dicom-put): A "put" corresponds to putting cleaned headers back into the images. 23 | 24 | If you are interested in other examples (with snippets of code) see our [examples]({{ site.baseurl }}/examples/) pages. 25 | For more detailed user documentation on writing recipes, see the [user documentation]({{ site.baseurl }}/user-docs/) base. 26 | -------------------------------------------------------------------------------- /docs/_docs/install/docker.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Deid via Docker 3 | category: Installation 4 | order: 2 5 | --- 6 | 7 | To use the Docker container, you should first ensure that you have 8 | [installed Docker](https://www.docker.com/get-started) on your computer. 9 | 10 | For the container we will use, we currently provide a container hosted 11 | at [pydicom/deid](http://hub.docker.com/r/pydicom/deid) that you can use to 12 | quickly run deid without any installation of other dependencies 13 | or compiling on your host. 14 | 15 | When you are ready, try running {{ site.title }} using it. This first command will 16 | access the deid executable: 17 | 18 | ```bash 19 | $ docker run {{ site.docker }} --help 20 | usage: deid [-h] [--quiet] [--debug] [--version] [--outfolder OUTFOLDER] 21 | [--format {dicom}] [--overwrite] 22 | {version,inspect,identifiers} ... 23 | ... 24 | ``` 25 | 26 | It might also be desired to shell into the container and interact with deid 27 | via python: 28 | 29 | ```bash 30 | $ docker run -it --entrypoint bash {{ site.docker }} 31 | (base) root@488f5e7f53a1:/code# 32 | ``` 33 | -------------------------------------------------------------------------------- /docs/_docs/install/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Installation 3 | category: Installation 4 | permalink: /install/index.html 5 | order: 1 6 | --- 7 | 8 | There are several ways for you to install {{ site.title }}, and your preference might 9 | depend on your operating system, or intended purpose. 10 | 11 | - [Docker]({{ site.baseurl }}/install/docker/): is a good solution for a reproducible install. You can run or develop deid without needing the dependencies on your system. 12 | - [Local]({{ site.baseurl }}/install/local/): is local installation of {{ site.title }}. You will need dependencies on your host. 13 | -------------------------------------------------------------------------------- /docs/_docs/install/local.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Local Installation 3 | category: Installation 4 | order: 3 5 | --- 6 | 7 | 8 | Let's walk through how to install {{ site.title }} locally. 9 | 10 | 11 | ## Install from Github 12 | 13 | First, clone the Github repository to your present working directory. If you 14 | are a developer, you might want to fork it first, and then clone your fork. 15 | 16 | ```bash 17 | git clone https://www.github.com/{{ site.repo }} 18 | cd {{ site.reponame }} 19 | ``` 20 | 21 | If you are updating, it's helpful to issue this command until you see it's no 22 | longer installed: 23 | 24 | ```bash 25 | pip uninstall deid 26 | ``` 27 | 28 | Then install with python! 29 | 30 | ```bash 31 | python setup.py install 32 | ``` 33 | 34 | 35 | ## Install from Pypi 36 | 37 | If you want to install a particular version, the package is [available on pypi](https://pypi.org/project/deid/). 38 | 39 | ```bash 40 | pip install deid 41 | ``` 42 | 43 | Install a particular version 44 | 45 | ```bash 46 | pip install deid==0.1.19 47 | ``` 48 | -------------------------------------------------------------------------------- /docs/_docs/user-docs/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Documentation 3 | category: User Documentation 4 | permalink: /user-docs/index.html 5 | order: 1 6 | --- 7 | 8 | Along with the [getting started]({{ site.baseurl }}/getting-started/) guides, 9 | these pages will help you to use the deid software. 10 | 11 | ## Recipes 12 | 13 | - [Filters]({{ site.baseurl }}/user-docs/recipe-filters/): How to write sections to filter and flag images. 14 | - [Headers]({{ site.baseurl }}/user-docs/recipe-headers/): How to write header actions to update image headers. 15 | - [Groups]({{ site.baseurl }}/user-docs/recipe-groups/): for tags (including fields and values) that can be referenced in headers. 16 | - [Labels]({{ site.baseurl }}/user-docs/recipe-labels/): can be used to add metadata to your recipes. 17 | 18 | ## Client 19 | 20 | - [Client]({{ site.baseurl }}/user-docs/client): A command line client for basic cleaning. 21 | 22 | 23 | ## Tools 24 | 25 | - [Tags]({{ site.baseurl }}/user-docs/tags): A few helpful functions for searching and filtering tags. 26 | -------------------------------------------------------------------------------- /docs/_docs/user-docs/recipe-funcs.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Recipe Functions 3 | category: User Documentation 4 | order: 5 5 | --- 6 | 7 | The [recipe headers]({{ site.baseurl }}/user-docs/recipe-headers/) page taught you 8 | how to write a recipe that has one or more commands to parse a dicom image header. 9 | For example, we might have defined a custom function [per the example here](https://pydicom.github.io/deid/examples/func-replace/) 10 | to replace patient info with a result from our custom function: 11 | 12 | ``` 13 | %header 14 | 15 | REPLACE fields:patient_info func:generate_uid 16 | ``` 17 | 18 | As of version 0.2.3 of deid, we have packaged functions along with deid that you can use without needing 19 | to write your own! Current functions are provided for: 20 | 21 | - generating unique identifiers 22 | - jittering 23 | - *let us know if you want to contribute or request a new function!* 24 | 25 | The current offerings include the following: 26 | 27 | | Name | Description | Extra Params | 28 | |---------------|-------------|--------------| 29 | | `simple_uuid` | Modify with a simple `uuid.uuid4()` string | None | 30 | | `dicom_uuid` | A more formal dicom uid that requires an org root | org_root | 31 | | `suffix_uuid` | Make the value the field name with a `uuid.uuid4()` suffix. | None | 32 | | `jitter` | The same as JITTER (grandfathered in) | days | 33 | 34 | 35 | ## A Simple UUID 36 | 37 | For a simple example, let's replace the recipe above with the deid provided "simple_uuid" function, 38 | which is simply going to replace the field of our choice with a `uuid.uuid4()` string in Python. 39 | That would look like this: 40 | 41 | ``` 42 | %header 43 | 44 | REPLACE fields:patient_info deid_func:simple_uuid 45 | ``` 46 | 47 | The only change is that we replaced `func` with `deid_func`. Deid will see this function 48 | is provided in its library, and grab it for use. 49 | 50 | 51 | ## A Pydicom UUID 52 | 53 | Pydicom provides [a function to generate a UUID](https://pydicom.github.io/pydicom/dev/reference/generated/pydicom.uid.generate_uid.html) 54 | and for most this is likely a good approach to take. The most basic usage (for one run) is to generate a random valid 55 | unique identifier: 56 | 57 | ``` 58 | %header 59 | 60 | REPLACE ReferringPhysicianName deid_func:pydicom_uuid 61 | ``` 62 | 63 | The default uses `stable_remapping=true`, which means we use the original UUID as entropy 64 | to be able to consistently return the same value between runs. You can disable it, however 65 | we do not recommended it (but maybe could be appropriate for your use case). 66 | 67 | You can also optionally define a custom prefix. Note that it needs to match the 68 | regular expression `^(0|[1-9][0-9]*)(\\.(0|[1-9][0-9]*))*\\.$` which (in spoken terms) 69 | is a number followed by a period, another number, and ending also in a period (e.g, `1.55.`). 70 | 71 | 72 | ``` 73 | %header 74 | 75 | REPLACE ReferringPhysicianName deid_func:pydicom_uuid prefix=1.55. 76 | ``` 77 | 78 | ## A Dicom UUID 79 | 80 | A more "formal" uuid function was added that requires an organization root. Your 81 | organization should have it's own - for example the `PYMEDPHYS_ROOT_UID` is 82 | "1.2.826.0.1.3680043.10.188" so we might do: 83 | 84 | ``` 85 | %header 86 | 87 | REPLACE fields:patient_info deid_func:dicom_uuid org_root=1.2.826.0.1.3680043.10.188 88 | ``` 89 | Notice how we've provided an extra argument, `org_root` to be parsed. If you don't 90 | provide one an `anonymous-organization` will be used, which isn't technically an organization root. 91 | 92 | 93 | ## A UUID Suffix 94 | 95 | If you simply want to take the current field and add a suffix to it as the value: 96 | 97 | ``` 98 | %header 99 | 100 | REPLACE fields:patient_info deid_func:suffix_uuid 101 | ``` 102 | This would make a final value that looks something like `patient_into-5897bd32-b4f3-4bda-9dc5-2d29e5688ea1` 103 | 104 | 105 | ## Jitter 106 | 107 | Jitter is intended for datetime fields, and technically you can just use the `JITTER` function provided 108 | natively in the recipe. We decided to include it here to add further customization. For example, you can provide 109 | variables for both days and years for a more fine-tuned jitter. We also wanted to add it here because 110 | technically it is a custom action. A jitter (as a custom deid function) might look like this: 111 | 112 | ``` 113 | %header 114 | 115 | REPLACE fields:AcquisitionDate deid_func:jitter days=1 116 | ``` 117 | 118 | or some number of years and days: 119 | 120 | ``` 121 | %header 122 | 123 | REPLACE fields:AcquisitionDate deid_func:jitter days=1 years=1 124 | ``` 125 | 126 | And that's it! If you want to request or contribute a custom (deid provided) function, please 127 | [open an issue](https://github.com/pydicom/deid/issues). 128 | -------------------------------------------------------------------------------- /docs/_docs/user-docs/recipe-groups.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Recipe Groups 3 | category: User Documentation 4 | order: 4 5 | --- 6 | 7 | The [recipe headers]({{ site.baseurl }}/user-docs/recipe-headers/) page taught you 8 | how to write a recipe that has one or more commands to parse a dicom image header. 9 | For example, we might have: 10 | 11 | ``` 12 | FORMAT dicom 13 | 14 | %header 15 | 16 | ADD PatientIdentityRemoved YES 17 | BLANK OrdValue 18 | KEEP Modality 19 | REPLACE id var:entity_id 20 | JITTER StudyDate var:entity_timestamp 21 | REMOVE ReferringPhysicianName 22 | ``` 23 | 24 | But what if we want to optimize our parsing by creating custom groups of tags 25 | that are based on the field names, or the values? This is the intended use 26 | case for groups - a group is a group of tags, either identified by 27 | fields or values, for which an action can be applied. For the examples 28 | below, we will use this sample header provided by [@wetzelj](https://github.com/wetzelj). Thank you! 29 | 30 | ``` 31 | (0008,0050) : SH Len: 10 AccessionNumber Value: [999999999 ] 32 | (0008,0070) : LO Len: 8 Manufacturer Value: [SIEMENS ] 33 | (0008,1090) : LO Len: 22 ManufacturersModelName Value: [SOMATOM Definition AS+] 34 | (0009,0010) : LO Len: 20 PrivateCreator10xx Value: [SIEMENS CT VA1 DUMMY] 35 | (0010,0010) : PN Len: 14 PatientsName Value: [SIMPSON^HOMER^J^] 36 | (0010,0020) : LO Len: 12 PatientID Value: [000991991991 ] 37 | (0010,1000) : LO Len: 8 OtherPatientIDs Value: [E123456] 38 | (0010,1001) : PN Len: 8 OtherPatientNames Value: [E123456] 39 | (0010,21B0) : LT Len: 90 AdditionalPatientHistory Value: [MR SIMPSON LIKES DUFF BEER] 40 | (0019,1091) : DS Len: 6 Value: [E123456] 41 | (0019,1092) : DS Len: 6 Value: [M123456] 42 | ``` 43 | 44 | 45 | ## Fields 46 | 47 | A fields section looks like the following: 48 | 49 | ``` 50 | FORMAT dicom 51 | 52 | %fields patient_info 53 | FIELD PatientID 54 | FIELD startswith:OtherPatient 55 | FIELD endswith:Name 56 | ``` 57 | 58 | There would be multiple ways to do this (for example you could have used `startswith:Patient` to target both `PatientsName` 59 | and `PatientID`) but generally this will produce a list of fields that are named "patient_info." Here is the list 60 | rendered out pretty: 61 | 62 | ``` 63 | patient_info 64 | ------------ 65 | PatientID 66 | OtherPatientIDs 67 | OtherPatientNames 68 | PatientsName 69 | ``` 70 | 71 | We can then use this in recipe header sections where we want to apply an action to one or more fields 72 | as follows: 73 | 74 | ``` 75 | %header 76 | 77 | REPLACE fields:patient_info func:generate_uid 78 | ``` 79 | 80 | And this reads nicely as "Replace fields defined in patient_info to be the variable 81 | I'm defining with the function generate_uid (which should be added to each item 82 | after lookup). 83 | 84 | This of course means that the actions supported for the `%fields` section includes: 85 | 86 | - **FIELD** reference to a full name of a field, or any parsing of any [expander]({{ site.baseurl }}/examples/header-expanders/). 87 | 88 | 89 | ## Values 90 | 91 | It could be that you want to generate a list of _values_ extracted from the dicom 92 | to use as flags for checking other fields. For example, if I know that the Patient's ID 93 | is in PatientID, I would want to extract the patient's name from that field, 94 | and then search across fields looking for any instance of a first or last name. 95 | This is the purpose of the `%values` group. Instead of defining rules to create 96 | a list of fields, we write rules to extract values. Let's take a look at an 97 | example: 98 | 99 | ``` 100 | %values patient_info 101 | SPLIT PatientsName splitval='^';minlength='4' 102 | FIELD PatientID 103 | FIELD OtherPatientIDs 104 | ``` 105 | 106 | You'll notice that we have `FIELD` again, but since this is in a `%values` 107 | section, this is saying "Find the fields Patient ID and Other Patient IDs, and whatever 108 | _values_ you find there, add to the list `patient_info`." You'll also 109 | notice that the first line uses a new action `SPLIT`: 110 | 111 | ``` 112 | SPLIT PatientsName splitval='^';minlength='4' 113 | ``` 114 | 115 | This action says to start with the field `PatientsName`, split based on the `^` 116 | character, and keep results that have a length greater than or equal to 4. 117 | Let's talk about these actions in detail. Field is the same, but we also have split: 118 | 119 | - **FIELD** refers to the full name of a field, or any parsing of any [expander]({{ site.baseurl }}/examples/header-expanders/). Instead of including these field names, we grab the values from them, and add to our list. 120 | - **SPLIT** indicates that we want to apply a split operation to a field (or expansion of fields) and for all, to split by a character (defaults to a space) and take a minimum length (defaults to 1). 121 | 122 | The result of the above operation might look like this - and remember that this is a list of values. 123 | 124 | ``` 125 | patient_info 126 | ------------ 127 | HOMER 128 | SIMPSON 129 | ``` 130 | 131 | You could then reference these values for some header action. For example, let's say 132 | we want to remove any field that contains these identifiers: 133 | 134 | ``` 135 | %header 136 | REMOVE values:patient_info 137 | ``` 138 | 139 | The implication of the above is that we are checking all fields for these values. 140 | This would be functionally equivalent: 141 | 142 | ``` 143 | %header 144 | REMOVE ALL values:patient_info 145 | ``` 146 | 147 | Or you could chose some other field name, or field expander, if you want to limit 148 | the removal to some subset. 149 | 150 | If you haven't yet, take a look at how at generate a basic [get]({{ site.baseurl }}/getting-started/dicom-get/), 151 | which is will get a set of fields and values from your dicom files. 152 | -------------------------------------------------------------------------------- /docs/_docs/user-docs/recipe-labels.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Recipe Labels 3 | category: User Documentation 4 | order: 3 5 | --- 6 | 7 | The `%labels` section is a way for the user to supply custom commands to an 8 | application that aren't relevant to the header or pixels. For example, If I 9 | wanted to carry around a version or a maintainer address, I could do that as follows: 10 | 11 | ``` 12 | FORMAT dicom 13 | 14 | %header 15 | 16 | ADD PatientIdentityRemoved YES 17 | REPLACE PatientID cookie-monster 18 | 19 | %labels 20 | ADD MAINTAINER vsochat@stanford.edu 21 | ADD VERSION 1.0 22 | ``` 23 | 24 | As you can see, the labels follow the same action commands as before, in the case 25 | that the application needs them. In case you are interested in what the 26 | application sees when it reads the file above (if you are a developer) it looks like this: 27 | 28 | ``` 29 | { 30 | "labels":[ 31 | { 32 | "field":"MAINTAINER", 33 | "value":"vsochat@stanford.edu", 34 | "action":"ADD" 35 | }, 36 | { 37 | "field":"VERSION", 38 | "value":"1.0", 39 | "action":"ADD" 40 | } 41 | ], 42 | 43 | "format":"dicom", 44 | "header":[ 45 | { 46 | "field":"PatientIdentityRemoved", 47 | "value":"Yes", 48 | "action":"ADD" 49 | }, 50 | { 51 | "field":"PatientID", 52 | "value":"cookie-monster", 53 | "action":"REPLACE" 54 | } 55 | ] 56 | } 57 | ``` 58 | 59 | And you are free to map the actions (eg, `ADD`, `REMOVE`) onto whatever functionality 60 | is relevant to your application, or just skip the action entirely and use the 61 | fields and values. 62 | -------------------------------------------------------------------------------- /docs/_docs/user-docs/tags.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Tags 3 | category: User Documentation 4 | order: 7 5 | --- 6 | 7 | It is sometimes helpful to be able to find a particular tag. [Pydicom](https://www.github.com/pydicom/pydicom) 8 | has done a great job of providing a dictionary of tags: 9 | 10 | ```python 11 | from pydicom._dicom_dict import DicomDictionary 12 | ``` 13 | 14 | 15 | ## Search By Name 16 | and we extend that here to make it easy to find tags. For example, 17 | we can use a function to search based on name: 18 | 19 | ```python 20 | from deid.dicom.tags import find_tag 21 | find_tag('Modality') 22 | 23 | [('CS', '1', 'Modality', '', 'Modality'), 24 | ('SQ', '1', 'Modality LUT Sequence', '', 'ModalityLUTSequence'), 25 | ('LO', '1', 'Modality LUT Type', '', 'ModalityLUTType'), 26 | ('CS', '1', 'Equipment Modality', '', 'EquipmentModality')] 27 | ``` 28 | 29 | We can also limit to a particular VR, or VM: 30 | 31 | ```python 32 | find_tag('Modality', VR='CS') 33 | [('CS', '1', 'Modality', '', 'Modality'), 34 | ('CS', '1', 'Equipment Modality', '', 'EquipmentModality')] 35 | ``` 36 | 37 | 38 | ## Search Repeaters (Retired) 39 | If you want to search the set of Repeats (or tags I think pydicom doesn't use / calls retired). 40 | Then set `retired=True`. For example, when I search for `Overlay Description` the normal 41 | way I get nothing, but setting this flag returns the (old) value. This would be useful 42 | given that you need to look up a tag for an older dataset. 43 | 44 | ```python 45 | find_tag('Overlay Description') 46 | 47 | find_tag('Overlay Description',retired=True) 48 | [('LO', '1', 'Overlay Description', '', 'OverlayDescription')] 49 | ``` 50 | -------------------------------------------------------------------------------- /docs/_includes/head.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | {% seo %} 6 | {% feed_meta %} 7 | 8 | 9 | 10 | 11 | 12 | {% if jekyll.environment == 'production' and site.google_analytics_key %} 13 | 14 | 15 | 22 | {% endif %} 23 | 24 | -------------------------------------------------------------------------------- /docs/_includes/navigation.html: -------------------------------------------------------------------------------- 1 | 32 | -------------------------------------------------------------------------------- /docs/_layouts/default.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | {% include head.html %} 5 | 6 | 7 |
8 |

9 | {{ site.title | downcase }} 10 | 11 | 12 | 13 |

14 | 15 |
16 | 17 | 18 |
19 | 20 | {% include navigation.html %} 21 |
22 | 23 |
24 | 28 |
29 | {{ content }} 30 |
31 |
32 | 37 | 38 | 39 | -------------------------------------------------------------------------------- /docs/_posts/2018-12-09-docs.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Documentation Development 3 | type: major 4 | --- 5 | 6 | This changelog represents all changes on and before December 9, 2018. 7 | 8 | **Major Updates (0.1.21)** 9 | 10 | - the general "identifiers" module has been removed to clean up the library 11 | - perform_action should only be imported from deid.dicom.actions 12 | - "utils" module was added to have get_timestamp, parse_value 13 | - option to define a function is added, "func:func_name" 14 | 15 | **Changes:** 16 | 17 | - fixing client bug, redoing docs to be better organized (0.1.20) 18 | - Removing MediaStorageSOPInstanceUID from file_meta, issue #72 (0.1.19) 19 | - need to clean up temporary directory (mkdtemp), issue #68 (0.1.18) 20 | - fixing issue #65, save for compressed data (0.1.17) 21 | - matplotlib must be less than or equal to 2.1.2 for install (0.1.16) 22 | - fixing bug with clean coordinate flipping rectangle 23 | - Fixing bug with saving self.cleaned (0.1.15) 24 | - Allowing for datasets to be passed in functions (not necessary for files) (0.1.14) 25 | - index should be full path in header.py (0.1.13) 26 | - pydicom bumped to install latest (1.0.2) (0.1.12) 27 | - ensuring that ids for images are full paths (0.1.11) 28 | - addition of the DeidRecipe class to better interact with and combine deid recipe files. 29 | - the get_files function now returns a generator instead of a list. 30 | 31 | **0.1.1** 32 | 33 | **additions** 34 | - addition of this CHANGELOG and an AUTHORS and CONTRIBUTING file to properly open source the project. 35 | **bug fix** 36 | - when the user specifies a deid recipe, instead of adding it to a base template we honor the choice and don't append a base. 37 | **creation** 38 | - this is the initial creation of deid, including recipes for cleaning of image headers and flagging of potential phi in pixels. 39 | -------------------------------------------------------------------------------- /docs/_posts/_defaults.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: 3 | type: major 4 | --- 5 | 6 | This release introduces 7 | 8 | **Features:** 9 | 10 | * 11 | 12 | **Fixes:** 13 | 14 | * 15 | -------------------------------------------------------------------------------- /docs/api_docs/.nojekyll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pydicom/deid/fa4731f79d7002d51e31cbec6f2586d87afb479d/docs/api_docs/.nojekyll -------------------------------------------------------------------------------- /docs/api_docs/_static/theme.css: -------------------------------------------------------------------------------- 1 | @import url("theme.css"); 2 | 3 | .highlight a { 4 | text-decoration: underline; 5 | } 6 | 7 | .deprecated p { 8 | padding: 10px 7px 10px 10px; 9 | color: #b94a48; 10 | background-color: #F3E5E5; 11 | border: 1px solid #eed3d7; 12 | } 13 | 14 | 15 | /* Sidebar */ 16 | 17 | .wy-menu-vertical a { 18 | color: #333 !important; 19 | } 20 | 21 | .wy-menu-vertical a:hover, 22 | .wy-menu-vertical a:active, 23 | .wy-menu-vertical a:focus { 24 | background-color: #ffdf5a; 25 | } 26 | 27 | .wy-nav-side { 28 | background: #fcfcfc !important; 29 | } 30 | 31 | .wy-side-nav-search>a, .wy-side-nav-search .wy-dropdown>a { 32 | color: #3f7cad !important 33 | } 34 | 35 | .version { 36 | color: #3f7cad !important; 37 | } 38 | 39 | .wy-side-nav-search { 40 | background-color: white !important; 41 | } 42 | 43 | .deprecated p span.versionmodified { 44 | font-weight: bold; 45 | } 46 | 47 | .wy-nav-content { 48 | max-width: 1200px !important; 49 | } 50 | 51 | 52 | /* Spinx Gallery */ 53 | 54 | div.sphx-glr-download a { 55 | background-image: none !important; 56 | background-color: #ffdf5a !important; 57 | border: 1px solid #cea455 !important; 58 | min-width: 20em; 59 | } 60 | 61 | div.sphx-glr-download a:hover { 62 | box-shadow: none !important; 63 | background-color: #3f7cad !important; 64 | color: white !important; 65 | border: 1px solid #3f7cad !important; 66 | } 67 | 68 | .section { 69 | padding-bottom:20px !important; 70 | } 71 | 72 | 73 | /* pydicom custom */ 74 | 75 | a:hover { 76 | text-decoration: underline; 77 | } 78 | 79 | div.rst-content a:hover { 80 | color: #00b0e4 !important; 81 | } 82 | 83 | a.reference code.xref { 84 | color: #2980B9 !important; 85 | background: none; 86 | font-size: 95%; 87 | } 88 | 89 | a.reference code.xref:hover { 90 | color: #00b0e4 !important; 91 | text-decoration: underline; 92 | } 93 | 94 | code.literal { 95 | color: #000000 !important; 96 | border: none !important; 97 | background: #f2f2f2; 98 | font-size: 90%; 99 | font-weight: 500 !important; 100 | padding: 2px 3px 2px 3px; 101 | } 102 | 103 | ul p { 104 | margin-bottom: 0px !important; 105 | } 106 | -------------------------------------------------------------------------------- /docs/api_docs/_templates/class.rst: -------------------------------------------------------------------------------- 1 | :mod:`{{module}}`.{{objname}} 2 | {{ underline }}============== 3 | 4 | .. currentmodule:: {{ module }} 5 | 6 | .. autoclass:: {{ objname }} 7 | 8 | {% block methods %} 9 | .. automethod:: __init__ 10 | {% endblock %} 11 | 12 | .. include:: {{module}}.{{objname}}.examples 13 | 14 | .. raw:: html 15 | 16 |
17 | -------------------------------------------------------------------------------- /docs/api_docs/_templates/function.rst: -------------------------------------------------------------------------------- 1 | :mod:`{{module}}`.{{objname}} 2 | {{ underline }}==================== 3 | 4 | .. currentmodule:: {{ module }} 5 | 6 | .. autofunction:: {{ objname }} 7 | 8 | .. include:: {{module}}.{{objname}}.examples 9 | 10 | .. raw:: html 11 | 12 |
13 | -------------------------------------------------------------------------------- /docs/api_docs/assets/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pydicom/deid/fa4731f79d7002d51e31cbec6f2586d87afb479d/docs/api_docs/assets/favicon.ico -------------------------------------------------------------------------------- /docs/api_docs/assets/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pydicom/deid/fa4731f79d7002d51e31cbec6f2586d87afb479d/docs/api_docs/assets/logo.png -------------------------------------------------------------------------------- /docs/api_docs/docs-requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx 2 | sphinxcontrib-napoleon 3 | sphinx-argparse 4 | sphinx_rtd_theme 5 | docutils 6 | recommonmark 7 | configargparse 8 | appdirs 9 | Jinja2<3.1 10 | -------------------------------------------------------------------------------- /docs/api_docs/index.rst: -------------------------------------------------------------------------------- 1 | .. _manual-main: 2 | 3 | ==== 4 | Deid 5 | ==== 6 | 7 | .. image:: https://img.shields.io/github/stars/pydicom/deid?style=social 8 | :alt: GitHub stars 9 | :target: https://github.com/pydicom/deid/stargazers 10 | 11 | 12 | This is the developer documentation (meaning docstrings) for deid. 13 | For user guides and tutorials see `the main documentation `_. 14 | To see the code, head over to the `repository `_. 15 | 16 | ------- 17 | Support 18 | ------- 19 | 20 | * For **bugs and feature requests**, please use the `issue tracker `_. 21 | * For **contributions**, visit Caliper on `Github `_. 22 | 23 | --------- 24 | Resources 25 | --------- 26 | 27 | `GitHub Repository `_ 28 | The code on GitHub. 29 | 30 | `Documentation `_ 31 | The main user guide. 32 | 33 | `Pydicom `_ 34 | The core pydicom to read dicom in Python. 35 | 36 | 37 | .. toctree:: 38 | :caption: API Reference 39 | :name: api-reference 40 | :maxdepth: 3 41 | 42 | source/deid.config.rst 43 | source/deid.data.rst 44 | source/deid.dicom.actions.rst 45 | source/deid.dicom.pixels.rst 46 | source/deid.dicom.rst 47 | source/deid.logger.rst 48 | source/deid.main.rst 49 | source/deid.utils.rst 50 | -------------------------------------------------------------------------------- /docs/api_docs/requirements.txt: -------------------------------------------------------------------------------- 1 | git+https://github.com/pydicom/deid@master#egg=deid 2 | sphinx 3 | sphinxcontrib-napoleon 4 | sphinx-argparse 5 | sphinx_rtd_theme 6 | docutils==0.12 7 | recommonmark 8 | configargparse 9 | appdirs 10 | Jinja2<3.1 11 | -------------------------------------------------------------------------------- /docs/api_docs/source/deid.config.rst: -------------------------------------------------------------------------------- 1 | deid.config package 2 | =================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | deid.config.standards module 8 | ---------------------------- 9 | 10 | .. automodule:: deid.config.standards 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | deid.config.utils module 16 | ------------------------ 17 | 18 | .. automodule:: deid.config.utils 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | Module contents 24 | --------------- 25 | 26 | .. automodule:: deid.config 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | -------------------------------------------------------------------------------- /docs/api_docs/source/deid.data.rst: -------------------------------------------------------------------------------- 1 | deid.data package 2 | ================= 3 | 4 | Module contents 5 | --------------- 6 | 7 | .. automodule:: deid.data 8 | :members: 9 | :undoc-members: 10 | :show-inheritance: 11 | -------------------------------------------------------------------------------- /docs/api_docs/source/deid.dicom.actions.rst: -------------------------------------------------------------------------------- 1 | deid.dicom.actions package 2 | ========================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | deid.dicom.actions.jitter module 8 | -------------------------------- 9 | 10 | .. automodule:: deid.dicom.actions.jitter 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | deid.dicom.actions.uids module 16 | ------------------------------ 17 | 18 | .. automodule:: deid.dicom.actions.uids 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | Module contents 24 | --------------- 25 | 26 | .. automodule:: deid.dicom.actions 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | -------------------------------------------------------------------------------- /docs/api_docs/source/deid.dicom.pixels.rst: -------------------------------------------------------------------------------- 1 | deid.dicom.pixels package 2 | ========================= 3 | 4 | Submodules 5 | ---------- 6 | 7 | deid.dicom.pixels.clean module 8 | ------------------------------ 9 | 10 | .. automodule:: deid.dicom.pixels.clean 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | deid.dicom.pixels.detect module 16 | ------------------------------- 17 | 18 | .. automodule:: deid.dicom.pixels.detect 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | Module contents 24 | --------------- 25 | 26 | .. automodule:: deid.dicom.pixels 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | -------------------------------------------------------------------------------- /docs/api_docs/source/deid.dicom.rst: -------------------------------------------------------------------------------- 1 | deid.dicom package 2 | ================== 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | :maxdepth: 4 9 | 10 | deid.dicom.actions 11 | deid.dicom.pixels 12 | 13 | Submodules 14 | ---------- 15 | 16 | deid.dicom.fields module 17 | ------------------------ 18 | 19 | .. automodule:: deid.dicom.fields 20 | :members: 21 | :undoc-members: 22 | :show-inheritance: 23 | 24 | deid.dicom.filter module 25 | ------------------------ 26 | 27 | .. automodule:: deid.dicom.filter 28 | :members: 29 | :undoc-members: 30 | :show-inheritance: 31 | 32 | deid.dicom.groups module 33 | ------------------------ 34 | 35 | .. automodule:: deid.dicom.groups 36 | :members: 37 | :undoc-members: 38 | :show-inheritance: 39 | 40 | deid.dicom.header module 41 | ------------------------ 42 | 43 | .. automodule:: deid.dicom.header 44 | :members: 45 | :undoc-members: 46 | :show-inheritance: 47 | 48 | deid.dicom.parser module 49 | ------------------------ 50 | 51 | .. automodule:: deid.dicom.parser 52 | :members: 53 | :undoc-members: 54 | :show-inheritance: 55 | 56 | deid.dicom.tags module 57 | ---------------------- 58 | 59 | .. automodule:: deid.dicom.tags 60 | :members: 61 | :undoc-members: 62 | :show-inheritance: 63 | 64 | deid.dicom.utils module 65 | ----------------------- 66 | 67 | .. automodule:: deid.dicom.utils 68 | :members: 69 | :undoc-members: 70 | :show-inheritance: 71 | 72 | deid.dicom.validate module 73 | -------------------------- 74 | 75 | .. automodule:: deid.dicom.validate 76 | :members: 77 | :undoc-members: 78 | :show-inheritance: 79 | 80 | Module contents 81 | --------------- 82 | 83 | .. automodule:: deid.dicom 84 | :members: 85 | :undoc-members: 86 | :show-inheritance: 87 | -------------------------------------------------------------------------------- /docs/api_docs/source/deid.logger.rst: -------------------------------------------------------------------------------- 1 | deid.logger package 2 | =================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | deid.logger.message module 8 | -------------------------- 9 | 10 | .. automodule:: deid.logger.message 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | deid.logger.progress module 16 | --------------------------- 17 | 18 | .. automodule:: deid.logger.progress 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | Module contents 24 | --------------- 25 | 26 | .. automodule:: deid.logger 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | -------------------------------------------------------------------------------- /docs/api_docs/source/deid.main.rst: -------------------------------------------------------------------------------- 1 | deid.main package 2 | ================= 3 | 4 | Submodules 5 | ---------- 6 | 7 | deid.main.identifiers module 8 | ---------------------------- 9 | 10 | .. automodule:: deid.main.identifiers 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | deid.main.inspect module 16 | ------------------------ 17 | 18 | .. automodule:: deid.main.inspect 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | Module contents 24 | --------------- 25 | 26 | .. automodule:: deid.main 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | -------------------------------------------------------------------------------- /docs/api_docs/source/deid.rst: -------------------------------------------------------------------------------- 1 | deid package 2 | ============ 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | :maxdepth: 4 9 | 10 | deid.config 11 | deid.data 12 | deid.dicom 13 | deid.logger 14 | deid.main 15 | deid.utils 16 | 17 | Submodules 18 | ---------- 19 | 20 | deid.version module 21 | ------------------- 22 | 23 | .. automodule:: deid.version 24 | :members: 25 | :undoc-members: 26 | :show-inheritance: 27 | 28 | Module contents 29 | --------------- 30 | 31 | .. automodule:: deid 32 | :members: 33 | :undoc-members: 34 | :show-inheritance: 35 | -------------------------------------------------------------------------------- /docs/api_docs/source/deid.tests.rst: -------------------------------------------------------------------------------- 1 | deid.tests package 2 | ================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | deid.tests.Xtest\_dicom\_header module 8 | -------------------------------------- 9 | 10 | .. automodule:: deid.tests.Xtest_dicom_header 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | deid.tests.common module 16 | ------------------------ 17 | 18 | .. automodule:: deid.tests.common 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | deid.tests.test\_clean module 24 | ----------------------------- 25 | 26 | .. automodule:: deid.tests.test_clean 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | deid.tests.test\_clean\_pixel\_dimensions module 32 | ------------------------------------------------ 33 | 34 | .. automodule:: deid.tests.test_clean_pixel_dimensions 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | 39 | deid.tests.test\_config module 40 | ------------------------------ 41 | 42 | .. automodule:: deid.tests.test_config 43 | :members: 44 | :undoc-members: 45 | :show-inheritance: 46 | 47 | deid.tests.test\_data module 48 | ---------------------------- 49 | 50 | .. automodule:: deid.tests.test_data 51 | :members: 52 | :undoc-members: 53 | :show-inheritance: 54 | 55 | deid.tests.test\_deid\_recipe module 56 | ------------------------------------ 57 | 58 | .. automodule:: deid.tests.test_deid_recipe 59 | :members: 60 | :undoc-members: 61 | :show-inheritance: 62 | 63 | deid.tests.test\_dicom\_fields module 64 | ------------------------------------- 65 | 66 | .. automodule:: deid.tests.test_dicom_fields 67 | :members: 68 | :undoc-members: 69 | :show-inheritance: 70 | 71 | deid.tests.test\_dicom\_funcs module 72 | ------------------------------------ 73 | 74 | .. automodule:: deid.tests.test_dicom_funcs 75 | :members: 76 | :undoc-members: 77 | :show-inheritance: 78 | 79 | deid.tests.test\_dicom\_groups module 80 | ------------------------------------- 81 | 82 | .. automodule:: deid.tests.test_dicom_groups 83 | :members: 84 | :undoc-members: 85 | :show-inheritance: 86 | 87 | deid.tests.test\_dicom\_tags module 88 | ----------------------------------- 89 | 90 | .. automodule:: deid.tests.test_dicom_tags 91 | :members: 92 | :undoc-members: 93 | :show-inheritance: 94 | 95 | deid.tests.test\_dicom\_utils module 96 | ------------------------------------ 97 | 98 | .. automodule:: deid.tests.test_dicom_utils 99 | :members: 100 | :undoc-members: 101 | :show-inheritance: 102 | 103 | deid.tests.test\_file\_meta module 104 | ---------------------------------- 105 | 106 | .. automodule:: deid.tests.test_file_meta 107 | :members: 108 | :undoc-members: 109 | :show-inheritance: 110 | 111 | deid.tests.test\_filter\_detect module 112 | -------------------------------------- 113 | 114 | .. automodule:: deid.tests.test_filter_detect 115 | :members: 116 | :undoc-members: 117 | :show-inheritance: 118 | 119 | deid.tests.test\_replace\_identifiers module 120 | -------------------------------------------- 121 | 122 | .. automodule:: deid.tests.test_replace_identifiers 123 | :members: 124 | :undoc-members: 125 | :show-inheritance: 126 | 127 | deid.tests.test\_utils module 128 | ----------------------------- 129 | 130 | .. automodule:: deid.tests.test_utils 131 | :members: 132 | :undoc-members: 133 | :show-inheritance: 134 | 135 | deid.tests.test\_utils\_files module 136 | ------------------------------------ 137 | 138 | .. automodule:: deid.tests.test_utils_files 139 | :members: 140 | :undoc-members: 141 | :show-inheritance: 142 | 143 | Module contents 144 | --------------- 145 | 146 | .. automodule:: deid.tests 147 | :members: 148 | :undoc-members: 149 | :show-inheritance: 150 | -------------------------------------------------------------------------------- /docs/api_docs/source/deid.utils.rst: -------------------------------------------------------------------------------- 1 | deid.utils package 2 | ================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | deid.utils.actions module 8 | ------------------------- 9 | 10 | .. automodule:: deid.utils.actions 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | deid.utils.fileio module 16 | ------------------------ 17 | 18 | .. automodule:: deid.utils.fileio 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | Module contents 24 | --------------- 25 | 26 | .. automodule:: deid.utils 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | -------------------------------------------------------------------------------- /docs/api_docs/source/modules.rst: -------------------------------------------------------------------------------- 1 | deid 2 | ==== 3 | 4 | .. toctree:: 5 | :maxdepth: 4 6 | 7 | deid 8 | -------------------------------------------------------------------------------- /docs/apidoc.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # If the modules changed, the content of "source" should be backed up and 3 | # new files generated (to update) by doing: 4 | # 5 | rm api_doc/source/deid*.rst 6 | sphinx-apidoc -o api_docs/source/ ../deid 7 | -------------------------------------------------------------------------------- /docs/assets/fonts/helveticaneueout-webfont.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pydicom/deid/fa4731f79d7002d51e31cbec6f2586d87afb479d/docs/assets/fonts/helveticaneueout-webfont.woff -------------------------------------------------------------------------------- /docs/assets/fonts/helveticaneueout-webfont.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pydicom/deid/fa4731f79d7002d51e31cbec6f2586d87afb479d/docs/assets/fonts/helveticaneueout-webfont.woff2 -------------------------------------------------------------------------------- /docs/assets/img/apple-touch-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pydicom/deid/fa4731f79d7002d51e31cbec6f2586d87afb479d/docs/assets/img/apple-touch-icon.png -------------------------------------------------------------------------------- /docs/assets/img/emblem.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | -------------------------------------------------------------------------------- /docs/assets/img/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pydicom/deid/fa4731f79d7002d51e31cbec6f2586d87afb479d/docs/assets/img/favicon.ico -------------------------------------------------------------------------------- /docs/assets/img/favicon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pydicom/deid/fa4731f79d7002d51e31cbec6f2586d87afb479d/docs/assets/img/favicon.png -------------------------------------------------------------------------------- /docs/assets/img/interaction-grid.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pydicom/deid/fa4731f79d7002d51e31cbec6f2586d87afb479d/docs/assets/img/interaction-grid.png -------------------------------------------------------------------------------- /docs/assets/img/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pydicom/deid/fa4731f79d7002d51e31cbec6f2586d87afb479d/docs/assets/img/logo.png -------------------------------------------------------------------------------- /docs/assets/img/menu.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | -------------------------------------------------------------------------------- /docs/assets/img/open-source-halloween-2021.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pydicom/deid/fa4731f79d7002d51e31cbec6f2586d87afb479d/docs/assets/img/open-source-halloween-2021.png -------------------------------------------------------------------------------- /docs/assets/img/siteicon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pydicom/deid/fa4731f79d7002d51e31cbec6f2586d87afb479d/docs/assets/img/siteicon.png -------------------------------------------------------------------------------- /docs/assets/img/touch-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pydicom/deid/fa4731f79d7002d51e31cbec6f2586d87afb479d/docs/assets/img/touch-icon.png -------------------------------------------------------------------------------- /docs/assets/js/search.js: -------------------------------------------------------------------------------- 1 | --- 2 | layout: null 3 | --- 4 | (function () { 5 | function getQueryVariable(variable) { 6 | var query = window.location.search.substring(1), 7 | vars = query.split("&"); 8 | 9 | for (var i = 0; i < vars.length; i++) { 10 | var pair = vars[i].split("="); 11 | 12 | if (pair[0] === variable) { 13 | return decodeURIComponent(pair[1].replace(/\+/g, '%20')).trim(); 14 | } 15 | } 16 | } 17 | 18 | function getPreview(query, content, previewLength) { 19 | previewLength = previewLength || (content.length * 2); 20 | 21 | var parts = query.split(" "), 22 | match = content.toLowerCase().indexOf(query.toLowerCase()), 23 | matchLength = query.length, 24 | preview; 25 | 26 | // Find a relevant location in content 27 | for (var i = 0; i < parts.length; i++) { 28 | if (match >= 0) { 29 | break; 30 | } 31 | 32 | match = content.toLowerCase().indexOf(parts[i].toLowerCase()); 33 | matchLength = parts[i].length; 34 | } 35 | 36 | // Create preview 37 | if (match >= 0) { 38 | var start = match - (previewLength / 2), 39 | end = start > 0 ? match + matchLength + (previewLength / 2) : previewLength; 40 | 41 | preview = content.substring(start, end).trim(); 42 | 43 | if (start > 0) { 44 | preview = "..." + preview; 45 | } 46 | 47 | if (end < content.length) { 48 | preview = preview + "..."; 49 | } 50 | 51 | // Highlight query parts 52 | preview = preview.replace(new RegExp("(" + parts.join("|") + ")", "gi"), "$1"); 53 | } else { 54 | // Use start of content if no match found 55 | preview = content.substring(0, previewLength).trim() + (content.length > previewLength ? "..." : ""); 56 | } 57 | 58 | return preview; 59 | } 60 | 61 | function displaySearchResults(results, query) { 62 | var searchResultsEl = document.getElementById("search-results"), 63 | searchProcessEl = document.getElementById("search-process"); 64 | 65 | if (results.length) { 66 | var resultsHTML = ""; 67 | results.forEach(function (result) { 68 | var item = window.data[result.ref], 69 | contentPreview = getPreview(query, item.content, 170), 70 | titlePreview = getPreview(query, item.title); 71 | 72 | resultsHTML += "
  • " + titlePreview + "

    " + contentPreview + "

  • "; 73 | }); 74 | 75 | searchResultsEl.innerHTML = resultsHTML; 76 | searchProcessEl.innerText = "Showing"; 77 | } else { 78 | searchResultsEl.style.display = "none"; 79 | searchProcessEl.innerText = "No"; 80 | } 81 | } 82 | 83 | window.index = lunr(function () { 84 | this.field("id"); 85 | this.field("title", {boost: 10}); 86 | this.field("category"); 87 | this.field("url"); 88 | this.field("content"); 89 | }); 90 | 91 | var query = decodeURIComponent((getQueryVariable("q") || "").replace(/\+/g, "%20")), 92 | searchQueryContainerEl = document.getElementById("search-query-container"), 93 | searchQueryEl = document.getElementById("search-query"), 94 | searchInputEl = document.getElementById("search-input"); 95 | 96 | searchInputEl.value = query; 97 | searchQueryEl.innerText = query; 98 | searchQueryContainerEl.style.display = "inline"; 99 | 100 | for (var key in window.data) { 101 | window.index.add(window.data[key]); 102 | } 103 | 104 | displaySearchResults(window.index.search(query), query); // Hand the results off to be displayed 105 | })(); 106 | -------------------------------------------------------------------------------- /docs/pages/404.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Not Found 3 | permalink: /404.html 4 | sitemap: false 5 | --- 6 | 7 | This page doesn't exist! 8 | -------------------------------------------------------------------------------- /docs/pages/changelog.html: -------------------------------------------------------------------------------- 1 | --- 2 | title: Change Log 3 | permalink: /changelog/ 4 | --- 5 | 6 |

    Subscribe with RSS to keep up with the latest changes. 7 | The most detailed and up to date changes are kept with the CHANGELOG 8 | with the code base.

    9 | 10 | 11 | 12 |
    13 | {% for change in site.posts %} 14 |
    15 |

    {{ change.title }}

    16 |

    {{ change.date | date: "%B %d, %Y" }} {{ change.type }}

    17 | 18 | {{ change.content }} 19 | 20 | 21 |
    22 | {% endfor %} 23 |
    24 | -------------------------------------------------------------------------------- /docs/pages/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Welcome 3 | permalink: / 4 | --- 5 | 6 | **Anonymization toward De-identification (deid)** 7 | 8 | This Python module is intended for basic coding of medical images, which means 9 | "cleaning" image headers and pixel data, and integrating with your own functions 10 | to replace with anonymous identifiers. Per HIPAA, this process is technically 11 | called "anonymization," meaning we did our _best effort_. 12 | 13 | > What does this module do? 14 | 15 | - Anonymize header data based on a specific logic of replacing, blanking, removing, or some custom function (e.g., "replace field X with item Y,") 16 | - Pass images through a filter for quarantine based on header logic, and if pixel coordinates are available, can black them out. 17 | - For each of the above, you can use defaults (blacklist, whitelist, graylist), or create your own customized logic. 18 | - provides functions for developers, and executables and containers for users. 19 | 20 | > What does this module *not* do? 21 | 22 | - does *not* provide a workflow manager to perform these actions. 23 | - does *not* implement custom API calls to retrieve identifiers from some specific database. 24 | - does *not* guarantee IRB validated outputs and is not liable for however you might use it. 25 | 26 | For dicom data, we use [pydicom](https://www.github.com/pydicom/pydicom) and for nifti we (will) use [nibabel](http://nipy.org/nibabel/). 27 | 28 | > Where do I go from here? 29 | 30 | If you are new to deid or pydicom, we recommend you start with 31 | the [getting started]({{ site.baseurl }}/getting-started/) pages. 32 | -------------------------------------------------------------------------------- /docs/pages/robots.txt: -------------------------------------------------------------------------------- 1 | --- 2 | layout: null 3 | sitemap: false 4 | permalink: /robots.txt 5 | --- 6 | User-agent: * 7 | Sitemap: {{ site.url }}/sitemap.xml 8 | Disallow: /search/ 9 | -------------------------------------------------------------------------------- /docs/pages/search.html: -------------------------------------------------------------------------------- 1 | --- 2 | title: Search 3 | sitemap: false 4 | permalink: /search/ 5 | --- 6 | 7 |

    Loading results

    8 |
      9 | 10 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /examples/README.md: -------------------------------------------------------------------------------- 1 | # Examples 2 | 3 | - [Example deid spec files](deid): are found in the folder [deid](deid) 4 | - [Example dicom scripts](dicom): are found in the folder [dicom](dicom) 5 | 6 | For detailed walk throughs, please reference our [docs](https://pydicom.github.io/deid). 7 | For questions, issues, suggestions, or if you want to help out, 8 | please [open an issue](https://www.github.com/pydicom/deid). 9 | -------------------------------------------------------------------------------- /examples/deid/README.md: -------------------------------------------------------------------------------- 1 | # Deid Recipes 2 | 3 | This is a folder of deid recipe examples. If you have a recipe that you think 4 | would be useful for others, please contribute it here! You can also 5 | add it to be included with the library (under deid/data) if you think this 6 | level of contribution is more relevant. 7 | -------------------------------------------------------------------------------- /examples/deid/deid.dicom: -------------------------------------------------------------------------------- 1 | FORMAT dicom 2 | 3 | %filter dangerouscookie 4 | 5 | LABEL Criteria for Dangerous Cookie 6 | contains PatientSex M 7 | + notequals OperatorsName bold bread 8 | coordinates 0,0,512,110 9 | 10 | 11 | %filter bigimage 12 | 13 | LABEL Image Size Good for Machine Learning 14 | equals Rows 2048 15 | + equals Columns 1536 16 | coordinates 0,0,512,200 17 | 18 | %header 19 | 20 | ADD PatientIdentityRemoved YES 21 | REPLACE PatientID var:id 22 | REPLACE SOPInstanceUID var:source_id 23 | -------------------------------------------------------------------------------- /examples/deid/deid.dicom-groups: -------------------------------------------------------------------------------- 1 | FORMAT dicom 2 | 3 | %values cookie_names 4 | SPLIT PatientID by="^";minlength=4 5 | 6 | %values operator_names 7 | FIELD startswith:Operator 8 | 9 | %fields instance_fields 10 | FIELD contains:Instance 11 | 12 | %header 13 | 14 | ADD PatientIdentityRemoved YES 15 | REPLACE values:cookie_names var:id 16 | REPLACE values:operator_names var:source_id 17 | REMOVE fields:instance_fields 18 | -------------------------------------------------------------------------------- /examples/deid/deid.dicom-pusheen: -------------------------------------------------------------------------------- 1 | FORMAT dicom 2 | 3 | %header 4 | 5 | # Requires the function "pusheenize" to accept an item, value, field 6 | # in the python working environment when you load the recipe and run 7 | # replace_identifiers 8 | REPLACE all func:pusheenize 9 | -------------------------------------------------------------------------------- /examples/dicom/README.md: -------------------------------------------------------------------------------- 1 | # Examples 2 | 3 | This folder contains examples for interacting with deid! For the basic 4 | dicom example, see the [recipes](recipes) folder. For tutorials with 5 | other examples, see [https://pydicom.github.io/deid/examples](https://pydicom.github.io/deid/examples). 6 | -------------------------------------------------------------------------------- /examples/dicom/dicom-extract/README.md: -------------------------------------------------------------------------------- 1 | # Extraction from Dicom Headers 2 | 3 | This is a user contribution that shows how to generate a csv file with 4 | dicom metadata. The example is provided in [create-dicom-csv.py](create-dicom-csv.py). 5 | -------------------------------------------------------------------------------- /examples/dicom/header-manipulation/deid.dicom: -------------------------------------------------------------------------------- 1 | FORMAT dicom 2 | 3 | %header 4 | 5 | REPLACE StudyInstanceUID func:generate_uid 6 | REPLACE SeriesInstanceUID func:generate_uid 7 | ADD FrameOfReferenceUID func:generate_uid 8 | -------------------------------------------------------------------------------- /examples/dicom/header-manipulation/file-meta/deid.dicom: -------------------------------------------------------------------------------- 1 | FORMAT dicom 2 | 3 | %header 4 | 5 | REPLACE MediaStorageSOPInstanceUID new-id 6 | -------------------------------------------------------------------------------- /examples/dicom/header-manipulation/file-meta/example.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from deid.config import DeidRecipe 4 | from deid.data import get_dataset 5 | from deid.dicom import get_files, get_identifiers, replace_identifiers 6 | 7 | # This is supported for deid.dicom version 0.1.34 8 | 9 | dicom_files = list(get_files(get_dataset("animals"))) 10 | print(dicom_files) 11 | 12 | items = get_identifiers(dicom_files) 13 | 14 | # Load in the recipe, we want to REPLACE InstanceCreationDate with a function 15 | 16 | recipe = DeidRecipe("deid.dicom") 17 | 18 | # Parse the files 19 | parsed_files = replace_identifiers( 20 | dicom_files=dicom_files, deid=recipe, strip_sequences=False, ids=items 21 | ) 22 | 23 | ## Print two instances (one in sequence) 24 | print(parsed_files[0].file_meta) 25 | -------------------------------------------------------------------------------- /examples/dicom/header-manipulation/func-replacement.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # Create the DeidRecipe Instance from deid.dicom 4 | from deid.config import DeidRecipe 5 | from deid.data import get_dataset 6 | from deid.dicom import get_files, get_identifiers, replace_identifiers 7 | 8 | # This is an example of replacing fields in dicom headers, 9 | # but via a function instead of a preset identifier. 10 | 11 | # This will get a set of example cookie dicoms 12 | base = get_dataset("dicom-cookies") 13 | dicom_files = list(get_files(base)) # todo : consider using generator functionality 14 | 15 | 16 | items = get_identifiers(dicom_files) 17 | 18 | # ** 19 | # The function performs an action to generate a uid, but you can also use 20 | # it to communicate with databases, APIs, or do something like 21 | # save the original (and newly generated one) in some (IRB approvied) place 22 | # ** 23 | 24 | ################################################################################ 25 | # The Deid Recipe 26 | # 27 | # The process of updating header values means writing a series of actions 28 | # in the deid recipe, in this folder the file "deid.dicom" that has the 29 | # following content: 30 | # 31 | # FORMAT dicom 32 | 33 | # %header 34 | 35 | # REPLACE StudyInstanceUID func:generate_uid 36 | # REPLACE SeriesInstanceUID func:generate_uid 37 | # ADD FrameOfReferenceUID func:generate_uid 38 | # 39 | # In the above we are saying we want to replace the fields above with the 40 | # output from the generate_uid function, which is expected in the item dict 41 | ################################## 42 | 43 | recipe = DeidRecipe("deid.dicom") 44 | 45 | # To see an entire (raw in a dictionary) recipe just look at 46 | recipe.deid 47 | 48 | # What is the format? 49 | recipe.get_format() 50 | # dicom 51 | 52 | # What actions do we want to do on the header? 53 | recipe.get_actions() 54 | 55 | 56 | # [{'action': 'REPLACE', 57 | # 'field': 'StudyInstanceUID', 58 | # 'value': 'func:generate_uid'}, 59 | # {'action': 'REPLACE', 60 | # 'field': 'SeriesInstanceUID', 61 | # 'value': 'func:generate_uid'}, 62 | # {'action': 'REPLACE', 63 | # 'field': 'FrameOfReferenceUID', 64 | # 'value': 'func:generate_uid'}] 65 | 66 | # We can filter to an action type (not useful here, we only have one type) 67 | recipe.get_actions(action="REPLACE") 68 | 69 | # or we can filter to a field 70 | recipe.get_actions(field="FrameOfReferenceUID") 71 | 72 | # [{'action': 'REPLACE', 73 | # 'field': 'FrameOfReferenceUID', 74 | # 'value': 'func:generate_uid'}] 75 | 76 | 77 | # and logically, both (not useful here) 78 | recipe.get_actions(field="PatientID", action="REMOVE") 79 | 80 | 81 | # Here we need to update each item with the function we want to use! 82 | 83 | 84 | def generate_uid(item, value, field, dicom): 85 | """This function will generate a dicom uid! You can expect it to be passed 86 | the dictionary of items extracted from the dicom (and your function) 87 | and variables, the original value (func:generate_uid) and the field 88 | object you are applying it to. 89 | """ 90 | import uuid 91 | 92 | # a field can either be just the name string, or a DicomElement 93 | if hasattr(field, "name"): 94 | field = field.name 95 | 96 | # Your organization should have it's own DICOM ORG ROOT. 97 | # For the purpose of an example, borrowing PYMEDPHYS_ROOT_UID. 98 | # 99 | # When using a UUID to dynamically create a UID (e.g. SOPInstanceUID), 100 | # the root '2.25' can be used instead of an organization's root. 101 | # For more information see DICOM PS3.5 2020b B.2 102 | ORG_ROOT = "1.2.826.0.1.3680043.10.188" # e.g. PYMEDPHYS_ROOT_UID 103 | prefix = field.lower().replace(" ", " ") 104 | bigint_uid = str(uuid.uuid4().int) 105 | full_uid = ORG_ROOT + "." + bigint_uid 106 | sliced_uid = full_uid[0:64] # A DICOM UID is limited to 64 characters 107 | return prefix + "-" + sliced_uid 108 | 109 | 110 | # Remember, the action is: 111 | # REPLACE StudyInstanceUID func:generate_uid 112 | # so the key needs to be generate_uid 113 | 114 | for item in items: 115 | items[item]["generate_uid"] = generate_uid 116 | 117 | # Now let's generate the cleaned files! It will output to a temporary directory 118 | # And then use the deid recipe and updated to create new files 119 | cleaned_files = replace_identifiers(dicom_files=dicom_files, deid=recipe, ids=items) 120 | 121 | 122 | # Print a cleaned file 123 | print(cleaned_files[0]) 124 | -------------------------------------------------------------------------------- /examples/dicom/header-manipulation/func-sequence-replace/MR.dcm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pydicom/deid/fa4731f79d7002d51e31cbec6f2586d87afb479d/examples/dicom/header-manipulation/func-sequence-replace/MR.dcm -------------------------------------------------------------------------------- /examples/dicom/header-manipulation/func-sequence-replace/cleaned.dcm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pydicom/deid/fa4731f79d7002d51e31cbec6f2586d87afb479d/examples/dicom/header-manipulation/func-sequence-replace/cleaned.dcm -------------------------------------------------------------------------------- /examples/dicom/header-manipulation/func-sequence-replace/deid.dicom: -------------------------------------------------------------------------------- 1 | FORMAT dicom 2 | 3 | %header 4 | 5 | REPLACE InstanceCreationDate func:generate_date 6 | -------------------------------------------------------------------------------- /examples/dicom/header-manipulation/func-sequence-replace/example.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from deid.config import DeidRecipe 3 | from deid.dicom import get_identifiers, replace_identifiers 4 | 5 | # This is supported for deid.dicom version 0.1.34 6 | 7 | # This dicom has nested InstanceCreationDate fields 8 | 9 | dicom_files = ["MR.dcm"] 10 | 11 | # They are extracted, and flattened in items 12 | # 'ReferencedPerformedProcedureStepSequence__InstanceCreationDate': '20091124', 13 | 14 | items = get_identifiers(dicom_files) 15 | 16 | # Load in the recipe, we want to REPLACE InstanceCreationDate with a function 17 | 18 | recipe = DeidRecipe("deid.dicom") 19 | 20 | # Here is our function 21 | 22 | 23 | def generate_date(item, value, field, dicom): 24 | """This function will generate a dicom uid! You can expect it to be passed 25 | the dictionary of items extracted from the dicom (and your function) 26 | and variables, the original value (func:generate_uid) and the field 27 | object you are applying it to. 28 | """ 29 | return "20200608" 30 | 31 | 32 | # Add the function to each item to be found 33 | for item in items: 34 | items[item]["generate_date"] = generate_date 35 | 36 | # Clean the files 37 | cleaned_files = replace_identifiers( 38 | dicom_files=dicom_files, deid=recipe, strip_sequences=False, ids=items 39 | ) 40 | 41 | # Print two instances (one in sequence) 42 | print(cleaned_files[0].InstanceCreationDate) 43 | print(cleaned_files[0].ReferencedPerformedProcedureStepSequence[0].InstanceCreationDate) 44 | -------------------------------------------------------------------------------- /examples/dicom/pixels/run-cleaner-client.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from deid.data import get_dataset 4 | 5 | # This will get a set of example cookie dicoms 6 | from deid.dicom import DicomCleaner, get_files 7 | 8 | # This is a complete example of using the cleaning client to inspect 9 | # and clean pixels 10 | # based on a deid.dicom specification 11 | # https://pydicom.github.io/deid 12 | 13 | ######################################### 14 | # 1. Get List of Files 15 | ######################################### 16 | 17 | 18 | base = get_dataset("dicom-cookies") 19 | dicom_files = list(get_files(base)) # todo : consider using generator functionality 20 | dicom_file = dicom_files[3] 21 | 22 | 23 | ######################################### 24 | # 2. Create Client 25 | ######################################### 26 | 27 | client = DicomCleaner() 28 | 29 | # You can set the output folder if you want, otherwise tmpdir is used 30 | client = DicomCleaner(output_folder="/home/vanessa/Desktop") 31 | 32 | # Steps are to detect, clean, and save in desired format, one image 33 | # at a time. 34 | # client.detect(dicom_file) 35 | # client.clean() 36 | # client.save_ 37 | 38 | 39 | ######################################### 40 | # 3. Detect 41 | ######################################### 42 | 43 | # Detect means using the deid recipe to parse headers 44 | 45 | # If we try to clean before we detect, we can't 46 | # client.clean() 47 | # WARNING Use .detect() to find coordinates first. 48 | 49 | client.detect(dicom_file) 50 | 51 | # {'flagged': True, 52 | # 'results': [{'coordinates': [], 53 | # 'group': 'blacklist', 54 | # 'reason': ' ImageType missing or ImageType empty '}]} 55 | 56 | 57 | ######################################### 58 | # 4. Clean and save 59 | ######################################### 60 | 61 | client.clean() 62 | 63 | # If there are coordinates, they are blanked. Otherwise, no change. 64 | # Blanking 0 coordinate results 65 | 66 | # Default output folder is temporary, unless specified at client onset 67 | # or directly to saving functions 68 | client.save_png() 69 | client.save_dicom() 70 | -------------------------------------------------------------------------------- /examples/dicom/pixels/run-inspect-pixels.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # This is a complete example of inspecting pixels for PHI 4 | # based on a deid.dicom specification 5 | # https://pydicom.github.io/deid 6 | 7 | 8 | from deid.data import get_dataset 9 | 10 | # This will get a set of example cookie dicoms 11 | from deid.dicom import get_files, has_burned_pixels 12 | from deid.logger import bot 13 | 14 | bot.level = 3 15 | 16 | base = get_dataset("dicom-cookies") 17 | dicom_files = list(get_files(base)) # todo : consider using generator functionality 18 | 19 | results = has_burned_pixels(dicom_files=dicom_files, deid="examples/deid") 20 | 21 | # The dictionary has a "clean" list, and a "flagged" list, 22 | # Eg: 23 | 24 | # {'clean': [], 25 | # 'flagged': {'/home/vanessa/Documents/Dropbox/Code/dicom/deid/deid/data/dicom-cookies/image1.dcm': {'flagged': True, 26 | # 'results': [{'coordinates': [], 27 | # 'group': 'blacklist', 28 | # 'reason': ' ImageType missing or ImageType empty '}]}, 29 | -------------------------------------------------------------------------------- /examples/dicom/recipe/deid.dicom: -------------------------------------------------------------------------------- 1 | FORMAT dicom 2 | 3 | %filter dangerouscookie 4 | 5 | LABEL Criteria for Dangerous Cookie 6 | contains PatientSex M 7 | + notequals OperatorsName bold bread 8 | coordinates 0,0,512,110 9 | 10 | 11 | %filter bigimage 12 | 13 | LABEL Image Size Good for Machine Learning 14 | equals Rows 2048 15 | + equals Columns 1536 16 | coordinates 0,0,512,200 17 | 18 | %header 19 | 20 | ADD PatientIdentityRemoved YES 21 | REPLACE PatientID var:id 22 | REPLACE SOPInstanceUID var:source_id 23 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.black] 2 | profile = "black" 3 | exclude = ["^env/"] 4 | 5 | [tool.isort] 6 | profile = "black" # needed for black/isort compatibility 7 | skip = [] 8 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [flake8] 2 | exclude = benchmarks docs 3 | max-line-length = 100 4 | ignore = E1 E2 E5 W5 5 | per-file-ignores = 6 | deid/dicom/__init__.py:F401 7 | deid/utils/__init__.py:F401 8 | deid/config/__init__.py:F401 9 | deid/main/__init__.py:F401 10 | deid/logger/__init__.py:F401 11 | deid/dicom/actions/__init__.py:F401 12 | deid/dicom/pixels/__init__.py:F401 13 | 14 | [metadata] 15 | description-file = README.md 16 | 17 | [codespell] 18 | skip = lunr.min.js 19 | ignore-words = .github/codespell_ignore_words.txt 20 | 21 | [coverage:run] 22 | branch = true 23 | source = 24 | deid/config 25 | deid/data 26 | deid/dicom 27 | deid/logger 28 | deid/main 29 | deid/utils 30 | 31 | [coverage:report] 32 | show_missing = true 33 | fail_under = 50 34 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from setuptools import find_packages, setup 4 | 5 | 6 | def get_lookup(): 7 | """ 8 | Get version lookup 9 | 10 | get version by way of deid.version, returns a 11 | lookup dictionary with several global variables without 12 | needing to import singularity 13 | """ 14 | lookup = dict() 15 | version_file = os.path.join("deid", "version.py") 16 | with open(version_file) as filey: 17 | exec(filey.read(), lookup) 18 | return lookup 19 | 20 | 21 | # Read in requirements 22 | def get_requirements(lookup=None): 23 | """ 24 | Get install requirements. 25 | 26 | get_requirements reads in requirements and versions from 27 | the lookup obtained with get_lookup 28 | """ 29 | 30 | if lookup is None: 31 | lookup = get_lookup() 32 | 33 | return lookup["INSTALL_REQUIRES"] 34 | 35 | 36 | # Make sure everything is relative to setup.py 37 | install_path = os.path.dirname(os.path.abspath(__file__)) 38 | os.chdir(install_path) 39 | 40 | # Get version information from the lookup 41 | lookup = get_lookup() 42 | VERSION = lookup["__version__"] 43 | NAME = lookup["NAME"] 44 | AUTHOR = lookup["AUTHOR"] 45 | AUTHOR_EMAIL = lookup["AUTHOR_EMAIL"] 46 | PACKAGE_URL = lookup["PACKAGE_URL"] 47 | KEYWORDS = lookup["KEYWORDS"] 48 | DESCRIPTION = lookup["DESCRIPTION"] 49 | LICENSE = lookup["LICENSE"] 50 | with open("README.md") as filey: 51 | LONG_DESCRIPTION = filey.read() 52 | 53 | ################################################################################ 54 | # MAIN ######################################################################### 55 | ################################################################################ 56 | 57 | 58 | INSTALL_REQUIRES = get_requirements(lookup) 59 | 60 | setup( 61 | name=NAME, 62 | version=VERSION, 63 | license=LICENSE, 64 | description=DESCRIPTION, 65 | author=AUTHOR, 66 | author_email=AUTHOR_EMAIL, 67 | url=PACKAGE_URL, 68 | packages=find_packages(), 69 | include_package_data=True, 70 | long_description=LONG_DESCRIPTION, 71 | long_description_content_type="text/markdown", 72 | keywords=KEYWORDS, 73 | install_requires=INSTALL_REQUIRES, 74 | python_requires=">=3.7", 75 | classifiers=[ 76 | "Development Status :: 5 - Production/Stable", 77 | "Environment :: Console", 78 | "Intended Audience :: Developers", 79 | "Intended Audience :: System Administrators", 80 | "Intended Audience :: Science/Research", 81 | "License :: OSI Approved :: MIT License", 82 | "Natural Language :: English", 83 | "Operating System :: Unix", 84 | "Programming Language :: Python", 85 | "Programming Language :: Python :: 3", 86 | "Programming Language :: Python :: 3.8", 87 | "Topic :: Software Development", 88 | "Topic :: Scientific/Engineering", 89 | "Topic :: System :: Shells", 90 | "Topic :: Terminals", 91 | "Topic :: Utilities", 92 | ], 93 | entry_points={"console_scripts": ["deid=deid.main:main"]}, 94 | ) 95 | --------------------------------------------------------------------------------