├── .editorconfig ├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md ├── PULL_REQUEST_TEMPLATE.md └── workflows │ ├── build-docs.yml │ ├── build-master.yml │ ├── publish.yml │ └── test-and-lint.yml ├── .gitignore ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── MANIFEST.in ├── Makefile ├── README.md ├── actk ├── __init__.py ├── bin │ ├── __init__.py │ ├── all.py │ └── cli.py ├── constants.py ├── exceptions.py ├── steps │ ├── __init__.py │ ├── diagnostic_sheets │ │ ├── __init__.py │ │ └── diagnostic_sheets.py │ ├── raw │ │ ├── __init__.py │ │ └── raw.py │ ├── single_cell_features │ │ ├── __init__.py │ │ └── single_cell_features.py │ ├── single_cell_images │ │ ├── __init__.py │ │ └── single_cell_images.py │ └── standardize_fov_array │ │ ├── __init__.py │ │ └── standardize_fov_array.py ├── tests │ ├── __init__.py │ ├── conftest.py │ ├── steps │ │ ├── __init__.py │ │ ├── test_diagnostic_sheets.py │ │ ├── test_single_cell_features.py │ │ ├── test_single_cell_images.py │ │ └── test_standardize_fov_array.py │ └── utils │ │ ├── __init__.py │ │ ├── test_dataset_utils.py │ │ └── test_image_utils.py └── utils │ ├── __init__.py │ ├── dataset_utils.py │ └── image_utils.py ├── codecov.yml ├── docs ├── Makefile ├── conf.py ├── contributing.rst ├── dataset_fields.md ├── index.rst ├── installation.rst ├── make.bat └── modules.rst ├── images └── header.png ├── scripts ├── download_aics_dataset.py ├── download_test_data.py └── upload_test_data.py ├── setup.cfg ├── setup.py └── tox.ini /.editorconfig: -------------------------------------------------------------------------------- 1 | # http://editorconfig.org 2 | 3 | root = true 4 | 5 | [*] 6 | indent_style = space 7 | indent_size = 4 8 | trim_trailing_whitespace = true 9 | insert_final_newline = true 10 | charset = utf-8 11 | end_of_line = lf 12 | 13 | [*.bat] 14 | indent_style = tab 15 | end_of_line = crlf 16 | 17 | [LICENSE] 18 | insert_final_newline = false 19 | 20 | [Makefile] 21 | indent_style = tab 22 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug Report 3 | about: '"Something''s wrong..."' 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | ## Description 11 | *A clear description of the bug* 12 | 13 | 14 | 15 | 16 | ## Expected Behavior 17 | *What did you expect to happen instead?* 18 | 19 | 20 | 21 | 22 | ## Reproduction 23 | *A minimal example that exhibits the behavior.* 24 | 25 | 26 | 27 | 28 | ## Environment 29 | *Any additional information about your environment* 30 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature Request 3 | about: '"It would be really cool if x did y..."' 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | ## Use Case 11 | *Please provide a use case to help us understand your request in context* 12 | 13 | 14 | 15 | 16 | ## Solution 17 | *Please describe your ideal solution* 18 | 19 | 20 | 21 | 22 | ## Alternatives 23 | *Please describe any alternatives you've considered, even if you've dismissed them* 24 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | **Pull request recommendations:** 2 | - [ ] Name your pull request _your-development-type/short-description_. Ex: _feature/read-tiff-files_ 3 | - [ ] Link to any relevant issue in the PR description. Ex: _Resolves [gh-12], adds tiff file format support_ 4 | - [ ] Provide context of changes. 5 | - [ ] Provide relevant tests for your feature or bug fix. 6 | - [ ] Provide or update documentation for any feature added by your pull request. 7 | 8 | Thanks for contributing! 9 | -------------------------------------------------------------------------------- /.github/workflows/build-docs.yml: -------------------------------------------------------------------------------- 1 | name: Documentation 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | 8 | jobs: 9 | docs: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: actions/checkout@v1 13 | - name: Set up Python 14 | uses: actions/setup-python@v1 15 | with: 16 | python-version: 3.8 17 | - name: Install Dependencies 18 | run: | 19 | pip install --upgrade pip 20 | pip install Cython 21 | pip install numpy 22 | pip install .[dev] 23 | - name: Generate Docs 24 | run: | 25 | make gen-docs 26 | touch docs/_build/html/.nojekyll 27 | - name: Publish Docs 28 | uses: JamesIves/github-pages-deploy-action@releases/v3 29 | with: 30 | ACCESS_TOKEN: ${{ secrets.ACCESS_TOKEN }} 31 | BASE_BRANCH: master # The branch the action should deploy from. 32 | BRANCH: gh-pages # The branch the action should deploy to. 33 | FOLDER: docs/_build/html/ # The folder the action should deploy. 34 | -------------------------------------------------------------------------------- /.github/workflows/build-master.yml: -------------------------------------------------------------------------------- 1 | name: Build Master 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | schedule: 8 | # 9 | # https://pubs.opengroup.org/onlinepubs/9699919799/utilities/crontab.html#tag_20_25_07 10 | # Run every Monday at 18:00:00 UTC (Monday at 10:00:00 PST) 11 | - cron: '0 18 * * 1' 12 | 13 | jobs: 14 | test: 15 | runs-on: ${{ matrix.os }} 16 | strategy: 17 | matrix: 18 | python-version: [3.7, 3.8] 19 | os: [ubuntu-latest, windows-latest, macOS-latest] 20 | 21 | steps: 22 | - uses: actions/checkout@v1 23 | - name: Set up Python ${{ matrix.python-version }} 24 | uses: actions/setup-python@v1 25 | with: 26 | python-version: ${{ matrix.python-version }} 27 | - name: Install Dependencies 28 | run: | 29 | python -m pip install --upgrade pip 30 | pip install Cython 31 | pip install numpy 32 | pip install .[test] 33 | - name: Download Test Data 34 | run: | 35 | python scripts/download_test_data.py --debug 36 | - name: Test with pytest 37 | run: | 38 | pytest --cov-report xml --cov=actk actk/tests/ 39 | codecov -t ${{ secrets.CODECOV_TOKEN }} 40 | 41 | lint: 42 | runs-on: ubuntu-latest 43 | 44 | steps: 45 | - uses: actions/checkout@v1 46 | - name: Set up Python 47 | uses: actions/setup-python@v1 48 | with: 49 | python-version: 3.8 50 | - name: Install Dependencies 51 | run: | 52 | python -m pip install --upgrade pip 53 | pip install Cython 54 | pip install numpy 55 | pip install .[test] 56 | - name: Lint with flake8 57 | run: | 58 | flake8 actk --count --verbose --show-source --statistics 59 | - name: Check with black 60 | run: | 61 | black --check actk 62 | -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | name: Publish 2 | 3 | on: 4 | push: 5 | branches: 6 | - stable 7 | 8 | jobs: 9 | publish: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: actions/checkout@v1 13 | - name: Set up Python 14 | uses: actions/setup-python@v1 15 | with: 16 | python-version: 3.8 17 | - name: Install Dependencies 18 | run: | 19 | python -m pip install --upgrade pip 20 | pip install Cython 21 | pip install numpy 22 | pip install setuptools wheel 23 | - name: Build Package 24 | run: | 25 | python setup.py sdist bdist_wheel 26 | - name: Publish to PyPI 27 | uses: pypa/gh-action-pypi-publish@master 28 | with: 29 | user: aicspypi 30 | password: ${{ secrets.PYPI_TOKEN }} 31 | -------------------------------------------------------------------------------- /.github/workflows/test-and-lint.yml: -------------------------------------------------------------------------------- 1 | name: Test and Lint 2 | 3 | on: pull_request 4 | 5 | jobs: 6 | test: 7 | runs-on: ${{ matrix.os }} 8 | strategy: 9 | matrix: 10 | python-version: [3.8] 11 | os: [ubuntu-latest] 12 | 13 | steps: 14 | - uses: actions/checkout@v1 15 | - name: Set up Python ${{ matrix.python-version }} 16 | uses: actions/setup-python@v1 17 | with: 18 | python-version: ${{ matrix.python-version }} 19 | - name: Install Dependencies 20 | run: | 21 | python -m pip install --upgrade pip 22 | pip install Cython 23 | pip install numpy 24 | pip install .[test] 25 | - name: Download Test Data 26 | run: | 27 | python scripts/download_test_data.py --debug 28 | - name: Test with pytest 29 | run: | 30 | pytest --cov-report xml --cov=actk actk/tests/ 31 | - name: Upload codecov 32 | uses: codecov/codecov-action@v1 33 | 34 | lint: 35 | runs-on: ubuntu-latest 36 | 37 | steps: 38 | - uses: actions/checkout@v1 39 | - name: Set up Python 40 | uses: actions/setup-python@v1 41 | with: 42 | python-version: 3.8 43 | - name: Install Dependencies 44 | run: | 45 | python -m pip install --upgrade pip 46 | pip install Cython 47 | pip install numpy 48 | pip install .[test] 49 | - name: Lint with flake8 50 | run: | 51 | flake8 actk --count --verbose --show-source --statistics 52 | - name: Check with black 53 | run: | 54 | black --check actk 55 | 56 | docs: 57 | runs-on: ubuntu-latest 58 | 59 | steps: 60 | - uses: actions/checkout@v1 61 | - name: Set up Python 3.7 62 | uses: actions/setup-python@v1 63 | with: 64 | python-version: 3.8 65 | - name: Install Dependencies 66 | run: | 67 | python -m pip install --upgrade pip 68 | pip install Cython 69 | pip install numpy 70 | pip install .[dev] 71 | - name: Generate Docs 72 | run: | 73 | make gen-docs 74 | touch docs/_build/html/.nojekyll 75 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # default local data staging directory 2 | /local_staging 3 | *.csv 4 | 5 | # notebooks bcz eww (force add them if you must?) 6 | *.ipynb 7 | 8 | # Byte-compiled / optimized / DLL files 9 | __pycache__/ 10 | *.py[cod] 11 | *$py.class 12 | 13 | # C extensions 14 | *.so 15 | 16 | # Distribution / packaging 17 | .Python 18 | env/ 19 | build/ 20 | develop-eggs/ 21 | dist/ 22 | downloads/ 23 | eggs/ 24 | .eggs/ 25 | lib/ 26 | lib64/ 27 | parts/ 28 | sdist/ 29 | var/ 30 | wheels/ 31 | *.egg-info/ 32 | .installed.cfg 33 | *.egg 34 | 35 | # OS generated files 36 | .DS_Store 37 | 38 | # PyInstaller 39 | # Usually these files are written by a python script from a template 40 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 41 | *.manifest 42 | *.spec 43 | 44 | # Installer logs 45 | pip-log.txt 46 | pip-delete-this-directory.txt 47 | 48 | # Unit test / coverage reports 49 | htmlcov/ 50 | .tox/ 51 | .coverage 52 | .coverage.* 53 | .cache 54 | nosetests.xml 55 | coverage.xml 56 | *.cover 57 | .hypothesis/ 58 | .pytest_cache/ 59 | 60 | # Translations 61 | *.mo 62 | *.pot 63 | 64 | # Django stuff: 65 | *.log 66 | local_settings.py 67 | 68 | # Flask stuff: 69 | instance/ 70 | .webassets-cache 71 | 72 | # Scrapy stuff: 73 | .scrapy 74 | 75 | # Sphinx documentation 76 | docs/_build/ 77 | docs/actk.*rst 78 | 79 | # PyBuilder 80 | target/ 81 | 82 | # Jupyter Notebook 83 | .ipynb_checkpoints 84 | 85 | # pyenv 86 | .python-version 87 | 88 | # celery beat schedule file 89 | celerybeat-schedule 90 | 91 | # Dask 92 | dask-worker-space/ 93 | 94 | # SageMath parsed files 95 | *.sage.py 96 | 97 | # dotenv 98 | .env 99 | 100 | # virtualenv 101 | .venv 102 | venv/ 103 | ENV/ 104 | 105 | # Spyder project settings 106 | .spyderproject 107 | .spyproject 108 | 109 | # Rope project settings 110 | .ropeproject 111 | 112 | # mkdocs documentation 113 | /site 114 | 115 | # VSCode 116 | .vscode/ 117 | 118 | # mypy 119 | .mypy_cache/ 120 | 121 | # Project specific 122 | data/ 123 | .dask_logs/ 124 | workflow_config.json 125 | aics_ic_data.csv 126 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as 6 | contributors and maintainers pledge to making participation in our project and 7 | our community a harassment-free experience for everyone, regardless of age, body 8 | size, disability, ethnicity, sex characteristics, gender identity and expression, 9 | level of experience, education, socio-economic status, nationality, personal 10 | appearance, race, religion, or sexual identity and orientation. 11 | 12 | ## Our Standards 13 | 14 | Examples of behavior that contributes to creating a positive environment 15 | include: 16 | 17 | * Using welcoming and inclusive language 18 | * Being respectful of differing viewpoints and experiences 19 | * Gracefully accepting constructive criticism 20 | * Focusing on what is best for the community 21 | * Showing empathy towards other community members 22 | 23 | Examples of unacceptable behavior by participants include: 24 | 25 | * The use of sexualized language or imagery and unwelcome sexual attention or 26 | advances 27 | * Trolling, insulting/derogatory comments, and personal or political attacks 28 | * Public or private harassment 29 | * Publishing others' private information, such as a physical or electronic 30 | address, without explicit permission 31 | * Other conduct which could reasonably be considered inappropriate in a 32 | professional setting 33 | 34 | ## Our Responsibilities 35 | 36 | Project maintainers are responsible for clarifying the standards of acceptable 37 | behavior and are expected to take appropriate and fair corrective action in 38 | response to any instances of unacceptable behavior. 39 | 40 | Project maintainers have the right and responsibility to remove, edit, or 41 | reject comments, commits, code, wiki edits, issues, and other contributions 42 | that are not aligned to this Code of Conduct, or to ban temporarily or 43 | permanently any contributor for other behaviors that they deem inappropriate, 44 | threatening, offensive, or harmful. 45 | 46 | ## Scope 47 | 48 | This Code of Conduct applies both within project spaces and in public spaces 49 | when an individual is representing the project or its community. Examples of 50 | representing a project or community include using an official project e-mail 51 | address, posting via an official social media account, or acting as an appointed 52 | representative at an online or offline event. Representation of a project may be 53 | further defined and clarified by project maintainers. 54 | 55 | ## Enforcement 56 | 57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 58 | reported by contacting any of the maintainers of this project and 59 | we will attempt to resolve the issues with respect and dignity. 60 | 61 | Project maintainers who do not follow or enforce the Code of Conduct in good 62 | faith may face temporary or permanent repercussions as determined by other 63 | members of the project's leadership. 64 | 65 | ## Attribution 66 | 67 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, 68 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html 69 | 70 | [homepage]: https://www.contributor-covenant.org 71 | 72 | For answers to common questions about this code of conduct, see 73 | https://www.contributor-covenant.org/faq 74 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | Contributions are welcome, and they are greatly appreciated! Every little bit 4 | helps, and credit will always be given. 5 | 6 | ## Get Started! 7 | Ready to contribute? Here's how to set up `actk` for local development. 8 | 9 | 1. Fork the `actk` repo on GitHub. 10 | 11 | 2. Clone your fork locally: 12 | 13 | ```bash 14 | git clone git@github.com:{your_name_here}/actk.git 15 | ``` 16 | 17 | 3. Install the project in editable mode. (It is also recommended to work in a 18 | virtualenv or anaconda environment): 19 | 20 | ```bash 21 | cd actk/ 22 | pip install -e .[dev] 23 | ``` 24 | 25 | 4. Create a branch for local development: 26 | 27 | ```bash 28 | git checkout -b {your_development_type}/short-description 29 | ``` 30 | 31 | Ex: feature/read-tiff-files or bugfix/handle-file-not-found
32 | Now you can make your changes locally. 33 | 34 | 5. When you're done making changes, check that your changes pass linting and 35 | tests, including testing other Python versions with make: 36 | 37 | ```bash 38 | make build 39 | ``` 40 | 41 | 6. Commit your changes and push your branch to GitHub: 42 | 43 | ```bash 44 | git add . 45 | git commit -m "Resolves gh-###. Your detailed description of your changes." 46 | git push origin {your_development_type}/short-description 47 | ``` 48 | 49 | 7. Submit a pull request through the GitHub website. 50 | 51 | ## Deploying 52 | 53 | A reminder for the maintainers on how to deploy. 54 | Make sure all your changes are committed. 55 | Then run: 56 | 57 | ```bash 58 | bumpversion patch # possible: major / minor / patch 59 | ``` 60 | 61 | Now *check to see if bumpversions primitive string matching set any dependencies to the wrong version*. 62 | Then: 63 | ```bash 64 | git push 65 | git push --tags 66 | git branch -D stable 67 | git checkout -b stable 68 | git push --set-upstream origin stable -f 69 | ``` 70 | 71 | This will release a new package version on Git + GitHub and publish to PyPI. 72 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Allen Institute Software License – This software license is the 2-clause BSD 2 | license plus a third clause that prohibits redistribution and use for 3 | commercial purposes without further permission. 4 | 5 | Copyright © 2020 6 | Jackson Maxfield Brown, Allen Institute. All rights reserved. 7 | 8 | Redistribution and use in source and binary forms, with or without 9 | modification, are permitted provided that the following conditions are met: 10 | 11 | 1. Redistributions of source code must retain the above copyright notice, this 12 | list of conditions and the following disclaimer. 13 | 14 | 2. Redistributions in binary form must reproduce the above copyright notice, 15 | this list of conditions and the following disclaimer in the documentation 16 | and/or other materials provided with the distribution. 17 | 18 | 3. Redistributions and use for commercial purposes are not permitted without 19 | the Allen Institute’s written permission. For purposes of this license, 20 | commercial purposes are the incorporation of the Allen Institute's software 21 | into anything for which you will charge fees or other compensation or use of 22 | the software to perform a commercial service for a third party. Contact 23 | terms@alleninstitute.org for commercial licensing opportunities. 24 | 25 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 26 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 27 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 28 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 29 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 31 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 32 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 33 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 34 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 35 | 36 | 37 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include CONTRIBUTING.md 2 | include LICENSE 3 | include README.md 4 | 5 | recursive-include tests * 6 | recursive-exclude * __pycache__ 7 | recursive-exclude * *.py[co] 8 | 9 | recursive-include docs *.rst conf.py Makefile make.bat *.jpg *.png *.gif 10 | graft actk/data 11 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: clean clean-test clean-pyc clean-build docs help 2 | .DEFAULT_GOAL := help 3 | 4 | define BROWSER_PYSCRIPT 5 | import os, webbrowser, sys 6 | 7 | try: 8 | from urllib import pathname2url 9 | except: 10 | from urllib.request import pathname2url 11 | 12 | webbrowser.open("file://" + pathname2url(os.path.abspath(sys.argv[1]))) 13 | endef 14 | export BROWSER_PYSCRIPT 15 | 16 | define PRINT_HELP_PYSCRIPT 17 | import re, sys 18 | 19 | for line in sys.stdin: 20 | match = re.match(r'^([a-zA-Z_-]+):.*?## (.*)$$', line) 21 | if match: 22 | target, help = match.groups() 23 | print("%-20s %s" % (target, help)) 24 | endef 25 | export PRINT_HELP_PYSCRIPT 26 | 27 | BROWSER := python -c "$$BROWSER_PYSCRIPT" 28 | 29 | help: 30 | @python -c "$$PRINT_HELP_PYSCRIPT" < $(MAKEFILE_LIST) 31 | 32 | clean: ## clean all build, python, and testing files 33 | rm -fr build/ 34 | rm -fr dist/ 35 | rm -fr .eggs/ 36 | find . -name '*.egg-info' -exec rm -fr {} + 37 | find . -name '*.egg' -exec rm -f {} + 38 | find . -name '*.pyc' -exec rm -f {} + 39 | find . -name '*.pyo' -exec rm -f {} + 40 | find . -name '*~' -exec rm -f {} + 41 | find . -name '__pycache__' -exec rm -fr {} + 42 | rm -fr .tox/ 43 | rm -fr .coverage 44 | rm -fr coverage.xml 45 | rm -fr htmlcov/ 46 | rm -fr .pytest_cache 47 | 48 | build: ## run tox / run tests and lint 49 | tox 50 | 51 | gen-docs: ## generate Sphinx HTML documentation, including API docs 52 | rm -f docs/actk*.rst 53 | rm -f docs/modules.rst 54 | sphinx-apidoc -o docs/ actk **/tests/ 55 | $(MAKE) -C docs html 56 | cp -r ./images ./docs/_build/html/images 57 | 58 | docs: ## generate Sphinx HTML documentation, including API docs, and serve to browser 59 | make gen-docs 60 | $(BROWSER) docs/_build/html/index.html 61 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # actk 2 | 3 | [![Build Status](https://github.com/AllenCellModeling/actk/workflows/Build%20Master/badge.svg)](https://github.com/AllenCellModeling/actk/actions) 4 | [![Documentation](https://github.com/AllenCellModeling/actk/workflows/Documentation/badge.svg)](https://AllenCellModeling.github.io/actk) 5 | [![Code Coverage](https://codecov.io/gh/AllenCellModeling/actk/branch/master/graph/badge.svg)](https://codecov.io/gh/AllenCellModeling/actk) 6 | [![Published Data](https://img.shields.io/badge/Data-Published-Success)](https://open.quiltdata.com/b/allencell/tree/aics/actk/) 7 | 8 | Automated Cell Toolkit 9 | 10 | A pipeline to process field-of-view (FOV) microscopy images and generate data and 11 | render-ready products for the cells in each field. Of note, the data produced by this 12 | pipeline is used for the [Cell Feature Explorer](https://cfe.allencell.org/). 13 | 14 | ![workflow as an image](./images/header.png) 15 | 16 | --- 17 | 18 | ## Features 19 | All steps and functionality in this package can be run as single steps or all together 20 | by using the command line. 21 | 22 | In general, all commands for this package will follow the format: 23 | `actk {step} {command}` 24 | 25 | * `step` is the name of the step, such as "StandardizeFOVArray" or "SingleCellFeatures" 26 | * `command` is what you want that step to do, such as "run" or "push" 27 | 28 | Each step will check that the dataset provided contains the required fields prior to 29 | processing. For details and definitions on each field, see our 30 | [dataset fields documentation](https://AllenCellModeling.github.io/actk/dataset_fields.html). 31 | 32 | An example dataset can be seen [here](https://open.quiltdata.com/b/aics-modeling-packages-test-resources/tree/actk/test_data/data/example_dataset.csv). 33 | 34 | ### Pipeline 35 | To run the entire pipeline from start to finish you can simply run: 36 | 37 | ```bash 38 | actk all run --dataset {path to dataset} 39 | ``` 40 | 41 | Step specific parameters can additionally be passed by simply appending them. 42 | For example: the step `SingleCellFeatures` has a parameter for 43 | `cell_ceiling_adjustment` and this can be set on both the individual step run level and 44 | also for the entire pipeline with: 45 | 46 | ```bash 47 | actk all run --dataset {path to dataset} --cell_ceiling_adjustment {integer} 48 | ``` 49 | 50 | See the [steps module in our documentation](https://AllenCellModeling.github.io/actk/actk.steps.html) 51 | for a full list of parameters for each step 52 | 53 | #### Pipeline Config 54 | 55 | A configuration file can be provided to the underlying `datastep` library that manages 56 | the data storage and upload of the steps in this workflow. 57 | 58 | The config file should simply be called `workflow_config.json` and be available from 59 | whichever directory you run `actk` from. If this config is not found in the current 60 | working directory, defaults are selected by the `datastep` package. 61 | 62 | Here is an example of our production config: 63 | 64 | ```json 65 | { 66 | "quilt_storage_bucket": "s3://allencell", 67 | "project_local_staging_dir": "/allen/aics/modeling/jacksonb/results/actk" 68 | } 69 | ``` 70 | 71 | You can even additionally attach step-specific configuration in this file by using the 72 | name of the step like so: 73 | 74 | ```json 75 | { 76 | "quilt_storage_bucket": "s3://example_config_7", 77 | "project_local_staging_dir": "example/config/7", 78 | "example": { 79 | "step_local_staging_dir": "example/step/local/staging/" 80 | } 81 | } 82 | ``` 83 | 84 | #### AICS Distributed Computing 85 | 86 | For members of the AICS team, to run in distributed mode across the SLURM cluster add 87 | the `--distributed` flag to the pipeline call. 88 | 89 | To set distributed cluster and worker parameters you can additionally add the flags: 90 | * `--n_workers {int}` (i.e. `--n_workers 100`) 91 | * `--worker_cpu {int}` (i.e. `--worker_cpu 2`) 92 | * `--worker_mem {str}` (i.e. `--worker_mem 100GB`) 93 | 94 | ### Individual Steps 95 | * `actk standardizefovarray run --dataset {path to dataset}`, Generate standardized, 96 | ordered, and normalized FOV images as OME-Tiffs. 97 | * `actk singlecellfeatures run --dataset {path to dataset}`, Generate a features JSON 98 | file for each cell in the dataset. 99 | * `actk singlecellimages run --dataset {path to dataset}`, Generate bounded 3D images 100 | and 2D projections for each cell in the dataset. 101 | * `actk diagnosticsheets run --dataset {path to dataset}`, Generate diagnostic sheets 102 | for single cell images. Useful for quality control. 103 | 104 | ## Installation 105 | **Install Requires:** The python package, `numpy`, must be installed prior to the 106 | installation of this package: `pip install numpy` 107 | 108 | **Stable Release:** `pip install actk`
109 | **Development Head:** `pip install git+https://github.com/AllenCellModeling/actk.git` 110 | 111 | ## Documentation 112 | For full package documentation please visit 113 | [allencellmodeling.github.io/actk](https://allencellmodeling.github.io/actk/index.html). 114 | 115 | ## Published Data 116 | 117 | For a large-scale example of what this library is capable of, please see the data 118 | produced by this pipeline after running our largest cell dataset through it. The data 119 | from the Allen Institute for Cell Science created from this pipeline can be found 120 | [here](https://open.quiltdata.com/b/allencell/tree/aics/actk/). 121 | 122 | This package contains the source microscopy images, segmentation files, pre-processed 123 | single cell images and features, and diagnostic sheets. 124 | 125 | Our source images are of endogenously-tagged hiPSC, grown for 4 days on Matrigel-coated 126 | 96-well, glass bottom imaging plates. Each field of view (FOV) includes 4 channels (BF, 127 | EGFP, DNA, Cell membrane) collected either interwoven with one camera (workflow 128 | Pipeline 4.0 - 4.2) or simultaneously with two cameras (Workflow Pipeline 4.4). You can 129 | use the file metadata of each image to target the specific channel you are interested 130 | in. FOVs were either selected randomly (mode A), enriched for mitotic events (mode B) 131 | or sampling 3 different areas of a colony (edge, ridge, center) using a photo 132 | protective cocktail (mode C). The images cataloged in this dataset come in several 133 | flavors: 134 | 135 | * Field of view (FOV) images with channels* : 136 | * Brightfield 137 | * EGFP 138 | * DNA 139 | * Cell Membrane 140 | * Segmentation files with channels: 141 | * Nucleus Segmentation 142 | * Nucleus Contour 143 | * Membrane Segmentation 144 | * Membrane Contour 145 | 146 | _* Some FOV images contain seven channels rather than four. The extra three channels 147 | are "dummy" channels added during acquisition that can be ignored._ 148 | 149 | The full details of the Allen Institute cell workflow are available on our website 150 | [here](https://www.allencell.org/methods-for-cells-in-the-lab.html).
151 | The full details of the Allen Institute microscopy workflow are available on our 152 | website [here](https://www.allencell.org/methods-for-microscopy.html). 153 | 154 | The following is provided for each cell: 155 | * Cell Id 156 | * Cell Index (from within the FOV's segmentation) 157 | * Metadata (Cell line, Labeled protein name, segmented region index, gene, etc.) 158 | * 3D cell and nuclear segmentation, and, DNA, membrane, and structure channels 159 | * 2D max projects for dimension pairs (XY, ZX, and ZY) of the above 3D images 160 | * A whole bunch of features for each cell 161 | 162 | For the 3D single cell images the channel ordering is: 163 | * Segmented DNA 164 | * Segmented Membrane 165 | * DNA (Hoechst) 166 | * Membrane (CellMask) 167 | * Labeled Structure (GFP) 168 | * Transmitted Light 169 | 170 | To interact with this dataset please see the 171 | [Quilt Documentation](https://docs.quiltdata.com/). 172 | 173 | ## Development 174 | See 175 | [CONTRIBUTING.md](https://github.com/AllenCellModeling/actk/blob/master/CONTRIBUTING.md) 176 | for information related to developing the code. 177 | 178 | For more details on how this pipeline is constructed please see 179 | [cookiecutter-stepworkflow](https://github.com/AllenCellModeling/cookiecutter-stepworkflow) 180 | and [datastep](https://github.com/AllenCellModeling/datastep). 181 | 182 | To add new steps to this pipeline, run `make_new_step` and follow the instructions in 183 | [CONTRIBUTING.md](https://github.com/AllenCellModeling/actk/blob/master/CONTRIBUTING.md) 184 | 185 | ### Developer Installation 186 | The following two commands will install the package with dev dependencies in editable 187 | mode and download all resources required for testing. 188 | 189 | ```bash 190 | pip install -e .[dev] 191 | python scripts/download_test_data.py 192 | ``` 193 | 194 | ### AICS Developer Instructions 195 | If you want to run this pipeline with the Pipeline Integrated Cell dataset 196 | (`pipeline 4.*`) run the following commands: 197 | 198 | ```bash 199 | pip install -e .[all] 200 | python scripts/download_aics_dataset.py 201 | ``` 202 | 203 | Options for this script are available and can be viewed with: 204 | `python scripts/download_aics_dataset.py --help` 205 | 206 | ## Acknowledgments 207 | 208 | A previous iteration of this pipeline was created and managed by 209 | [Gregory Johnson](https://github.com/gregjohnso) for work with 210 | [PyTorch Integrated Cell](https://github.com/AllenCellModeling/pytorch_integrated_cell). 211 | 212 | This version of this pipeline is more generalized and while still used for the 213 | Integrated Cell model, can be used to pre-process a variety of microscopy image 214 | datasets. 215 | 216 | The previous version of this pipeline produced the 217 | [pipeline_integrated_single_cell dataset](https://open.quiltdata.com/b/allencell/tree/aics/pipeline_integrated_single_cell/). 218 | 219 | ***Free software: Allen Institute Software License*** 220 | -------------------------------------------------------------------------------- /actk/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """Top-level package for actk.""" 4 | 5 | __author__ = "Jackson Maxfield Brown" 6 | __email__ = "jacksonb@alleninstitute.org" 7 | # Do not edit this string manually, always use bumpversion 8 | # Details in CONTRIBUTING.md 9 | __version__ = "0.2.2" 10 | 11 | 12 | def get_module_version(): 13 | return __version__ 14 | -------------------------------------------------------------------------------- /actk/bin/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """Bin scripts package for actk.""" 4 | -------------------------------------------------------------------------------- /actk/bin/all.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | """ 5 | This script will run all tasks in a prefect Flow. 6 | 7 | When you add steps to you step workflow be sure to add them to the step list 8 | and configure their IO in the `run` function. 9 | """ 10 | 11 | import logging 12 | from datetime import datetime 13 | from pathlib import Path 14 | from typing import Optional 15 | 16 | from dask_jobqueue import SLURMCluster 17 | from distributed import LocalCluster 18 | from prefect import Flow 19 | from prefect.engine.executors import DaskExecutor, LocalExecutor 20 | 21 | from actk import steps 22 | 23 | ############################################################################### 24 | 25 | log = logging.getLogger(__name__) 26 | 27 | ############################################################################### 28 | 29 | 30 | class All: 31 | def __init__(self): 32 | """ 33 | Set all of your available steps here. 34 | This is only used for data logging operations, not computation purposes. 35 | """ 36 | self.step_list = [ 37 | steps.StandardizeFOVArray(), 38 | steps.SingleCellFeatures(), 39 | steps.SingleCellImages(), 40 | steps.DiagnosticSheets(), 41 | ] 42 | 43 | def run( 44 | self, 45 | dataset: str, 46 | include_raw: bool = False, 47 | batch_size: Optional[int] = None, 48 | distributed: bool = False, 49 | n_workers: int = 10, 50 | worker_cpu: int = 8, 51 | worker_mem: str = "120GB", 52 | overwrite: bool = False, 53 | debug: bool = False, 54 | **kwargs, 55 | ): 56 | """ 57 | Run a flow with your steps. 58 | 59 | Parameters 60 | ---------- 61 | dataset: str 62 | The dataset to use for the pipeline. 63 | 64 | include_raw: bool 65 | A boolean option to determine if the raw data should be included in the 66 | Quilt package. 67 | Default: False (Do not include the raw data) 68 | 69 | batch_size: Optional[int] 70 | An optional batch size to provide to each step for processing their items. 71 | Default: None (auto batch size depending on CPU / threads available) 72 | 73 | distributed: bool 74 | A boolean option to determine if the jobs should be distributed to a SLURM 75 | cluster when possible. 76 | Default: False (Do not distribute) 77 | 78 | n_workers: int 79 | Number of workers to request (when distributed is enabled). 80 | Default: 10 81 | 82 | worker_cpu: int 83 | Number of cores to provide per worker (when distributed is enabled). 84 | Default: 8 85 | 86 | worker_mem: str 87 | Amount of memory to provide per worker (when distributed is enabled). 88 | Default: 120GB 89 | 90 | overwrite: bool 91 | If this pipeline has already partially or completely run, should it 92 | overwrite the previous files or not. 93 | Default: False (Do not overwrite or regenerate files) 94 | 95 | debug: bool 96 | A debug flag for the developer to use to manipulate how much data runs, 97 | how it is processed, etc. Additionally, if debug is True, any mapped 98 | operation will run on threads instead of processes. 99 | Default: False (Do not debug) 100 | """ 101 | # Initalize steps 102 | raw = steps.Raw() 103 | standardize_fov_array = steps.StandardizeFOVArray() 104 | single_cell_features = steps.SingleCellFeatures() 105 | single_cell_images = steps.SingleCellImages() 106 | diagnostic_sheets = steps.DiagnosticSheets() 107 | 108 | # Cluster / distributed defaults 109 | distributed_executor_address = None 110 | 111 | # Choose executor 112 | if debug: 113 | exe = LocalExecutor() 114 | log.info("Debug flagged. Will use threads instead of Dask.") 115 | else: 116 | if distributed: 117 | # Create or get log dir 118 | # Do not include ms 119 | log_dir_name = datetime.now().isoformat().split(".")[0] 120 | log_dir = Path(f".dask_logs/{log_dir_name}").expanduser() 121 | # Log dir settings 122 | log_dir.mkdir(parents=True, exist_ok=True) 123 | 124 | # Create cluster 125 | log.info("Creating SLURMCluster") 126 | cluster = SLURMCluster( 127 | cores=worker_cpu, 128 | memory=worker_mem, 129 | queue="aics_cpu_general", 130 | walltime="9-23:00:00", 131 | local_directory=str(log_dir), 132 | log_directory=str(log_dir), 133 | ) 134 | 135 | # Spawn workers 136 | cluster.scale(jobs=n_workers) 137 | log.info("Created SLURMCluster") 138 | 139 | # Use the port from the created connector to set executor address 140 | distributed_executor_address = cluster.scheduler_address 141 | 142 | # Only auto batch size if it is not None 143 | if batch_size is None: 144 | # Batch size is n_workers * worker_cpu * 0.75 145 | # We could just do n_workers * worker_cpu but 3/4 of that is safer 146 | batch_size = int(n_workers * worker_cpu * 0.75) 147 | 148 | # Log dashboard URI 149 | log.info(f"Dask dashboard available at: {cluster.dashboard_link}") 150 | else: 151 | # Create local cluster 152 | log.info("Creating LocalCluster") 153 | cluster = LocalCluster() 154 | log.info("Created LocalCluster") 155 | 156 | # Set distributed_executor_address 157 | distributed_executor_address = cluster.scheduler_address 158 | 159 | # Log dashboard URI 160 | log.info(f"Dask dashboard available at: {cluster.dashboard_link}") 161 | 162 | # Use dask cluster 163 | exe = DaskExecutor(distributed_executor_address) 164 | 165 | # Configure your flow 166 | with Flow("actk") as flow: 167 | if include_raw: 168 | dataset = raw(dataset, **kwargs) 169 | 170 | standardized_fov_paths_dataset = standardize_fov_array( 171 | dataset=dataset, 172 | distributed_executor_address=distributed_executor_address, 173 | batch_size=batch_size, 174 | overwrite=overwrite, 175 | debug=debug, 176 | # Allows us to pass `--desired_pixel_sizes [{float},{float},{float}]` 177 | **kwargs, 178 | ) 179 | 180 | single_cell_features_dataset = single_cell_features( 181 | dataset=standardized_fov_paths_dataset, 182 | distributed_executor_address=distributed_executor_address, 183 | batch_size=batch_size, 184 | overwrite=overwrite, 185 | debug=debug, 186 | # Allows us to pass `--cell_ceiling_adjustment {int}` 187 | **kwargs, 188 | ) 189 | 190 | single_cell_images_dataset = single_cell_images( 191 | dataset=single_cell_features_dataset, 192 | distributed_executor_address=distributed_executor_address, 193 | batch_size=batch_size, 194 | overwrite=overwrite, 195 | debug=debug, 196 | # Allows us to pass `--cell_ceiling_adjustment {int}` 197 | **kwargs, 198 | ) 199 | 200 | diagnostic_sheets( 201 | dataset=single_cell_images_dataset, 202 | distributed_executor_address=distributed_executor_address, 203 | overwrite=overwrite, 204 | # Allows us to pass `--metadata {str}`, 205 | # `--feature {str}'` 206 | **kwargs, 207 | ) 208 | 209 | # Run flow and get ending state, log duration 210 | start = datetime.now() 211 | state = flow.run(executor=exe) 212 | duration = datetime.now() - start 213 | log.info( 214 | f"Total duration of pipeline: " 215 | f"{duration.seconds // 60 // 60}:" 216 | f"{duration.seconds // 60}:" 217 | f"{duration.seconds % 60}" 218 | ) 219 | 220 | # Get and display any outputs you want to see on your local terminal 221 | log.info(single_cell_images_dataset.get_result(state, flow)) 222 | 223 | def pull(self): 224 | """ 225 | Pull all steps. 226 | """ 227 | for step in self.step_list: 228 | step.pull() 229 | 230 | def checkout(self): 231 | """ 232 | Checkout all steps. 233 | """ 234 | for step in self.step_list: 235 | step.checkout() 236 | 237 | def push(self): 238 | """ 239 | Push all steps. 240 | """ 241 | for step in self.step_list: 242 | step.push() 243 | 244 | def clean(self): 245 | """ 246 | Clean all steps. 247 | """ 248 | for step in self.step_list: 249 | step.clean() 250 | -------------------------------------------------------------------------------- /actk/bin/cli.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | """ 5 | This script will convert all the steps into CLI callables. 6 | 7 | You should not edit this script. 8 | """ 9 | 10 | import inspect 11 | import logging 12 | from unittest import mock 13 | 14 | import fire 15 | 16 | from actk import steps 17 | from actk.bin.all import All 18 | 19 | ############################################################################### 20 | 21 | log = logging.getLogger() 22 | logging.basicConfig( 23 | level=logging.INFO, format="[%(levelname)4s:%(lineno)4s %(asctime)s] %(message)s" 24 | ) 25 | 26 | ############################################################################### 27 | 28 | 29 | def cli(): 30 | step_map = { 31 | name.lower(): step 32 | for name, step in inspect.getmembers(steps) 33 | if inspect.isclass(step) 34 | } 35 | 36 | # Interrupt fire print return 37 | with mock.patch("fire.core._PrintResult"): 38 | fire.Fire({**step_map, "all": All}) 39 | 40 | 41 | if __name__ == "__main__": 42 | cli() 43 | -------------------------------------------------------------------------------- /actk/constants.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | 5 | class Channels: 6 | NucleusSegmentation = "nucleus_segmentation" 7 | MembraneSegmentation = "membrane_segmentation" 8 | DNA = "dna" 9 | Membrane = "membrane" 10 | Structure = "structure" 11 | Brightfield = "brightfield" 12 | DefaultOrderList = [ 13 | NucleusSegmentation, 14 | MembraneSegmentation, 15 | DNA, 16 | Membrane, 17 | Structure, 18 | Brightfield, 19 | ] 20 | 21 | 22 | class DatasetFields: 23 | CellId = "CellId" 24 | CellIndex = "CellIndex" 25 | FOVId = "FOVId" 26 | SourceReadPath = "SourceReadPath" 27 | NucleusSegmentationReadPath = "NucleusSegmentationReadPath" 28 | MembraneSegmentationReadPath = "MembraneSegmentationReadPath" 29 | ChannelIndexDNA = "ChannelIndexDNA" 30 | ChannelIndexMembrane = "ChannelIndexMembrane" 31 | ChannelIndexStructure = "ChannelIndexStructure" 32 | ChannelIndexBrightfield = "ChannelIndexBrightfield" 33 | ChannelIndexNucleusSegmentation = "ChannelIndexNucleusSegmentation" 34 | ChannelIndexMembraneSegmentation = "ChannelIndexMembraneSegmentation" 35 | StandardizedFOVPath = "StandardizedFOVPath" 36 | CellFeaturesPath = "CellFeaturesPath" 37 | CellImage3DPath = "CellImage3DPath" 38 | CellImage2DAllProjectionsPath = "CellImage2DAllProjectionsPath" 39 | CellImage2DYXProjectionPath = "CellImage2DYXProjectionPath" 40 | DiagnosticSheetPath = "DiagnosticSheetPath" 41 | AllExpectedInputs = [ 42 | CellId, 43 | CellIndex, 44 | FOVId, 45 | SourceReadPath, 46 | NucleusSegmentationReadPath, 47 | MembraneSegmentationReadPath, 48 | ChannelIndexDNA, 49 | ChannelIndexMembrane, 50 | ChannelIndexStructure, 51 | ChannelIndexBrightfield, 52 | ChannelIndexNucleusSegmentation, 53 | ChannelIndexMembraneSegmentation, 54 | ] 55 | -------------------------------------------------------------------------------- /actk/exceptions.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from typing import List, Union 5 | 6 | import dask.dataframe as dd 7 | import pandas as pd 8 | 9 | ############################################################################### 10 | 11 | 12 | class MissingDataError(Exception): 13 | def __init__( 14 | self, dataset: Union[pd.DataFrame, dd.DataFrame], missing_fields: List[str] 15 | ): 16 | # Run base exception init 17 | super().__init__() 18 | 19 | # Store params for display 20 | self.dataset = dataset 21 | self.missing_fields = missing_fields 22 | 23 | def __str__(self): 24 | return ( 25 | f"Dataset provided does not have the required columns for this operation. " 26 | f"Missing fields: {self.missing_fields} " 27 | ) 28 | -------------------------------------------------------------------------------- /actk/steps/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from .diagnostic_sheets import DiagnosticSheets 4 | from .raw import Raw 5 | from .single_cell_features import SingleCellFeatures 6 | from .single_cell_images import SingleCellImages 7 | from .standardize_fov_array import StandardizeFOVArray 8 | 9 | __all__ = [ 10 | "Raw", 11 | "SingleCellFeatures", 12 | "StandardizeFOVArray", 13 | "SingleCellImages", 14 | "DiagnosticSheets", 15 | ] 16 | -------------------------------------------------------------------------------- /actk/steps/diagnostic_sheets/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from .diagnostic_sheets import DiagnosticSheets # noqa: F401 4 | 5 | __all__ = ["DiagnosticSheets"] 6 | -------------------------------------------------------------------------------- /actk/steps/diagnostic_sheets/diagnostic_sheets.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import json 5 | import logging 6 | from pathlib import Path 7 | from typing import List, NamedTuple, Optional, Union 8 | 9 | import aicsimageio 10 | import dask.dataframe as dd 11 | import matplotlib.image as mpimg 12 | import matplotlib.pyplot as plt 13 | import numpy as np 14 | import pandas as pd 15 | from aics_dask_utils import DistributedHandler 16 | from datastep import Step, log_run_params 17 | 18 | from ...constants import DatasetFields 19 | from ...utils import dataset_utils 20 | from ..single_cell_images import SingleCellImages 21 | 22 | plt.style.use("dark_background") 23 | 24 | ############################################################################### 25 | 26 | log = logging.getLogger(__name__) 27 | 28 | ############################################################################### 29 | 30 | REQUIRED_DATASET_FIELDS = [ 31 | DatasetFields.CellId, 32 | DatasetFields.CellImage2DAllProjectionsPath, 33 | ] 34 | 35 | 36 | class DiagnosticSheetResult(NamedTuple): 37 | cell_id: Union[int, str] 38 | save_path: Optional[Path] = None 39 | 40 | 41 | class DiagnosticSheetError(NamedTuple): 42 | cell_id: Union[int, str] 43 | error: str 44 | 45 | 46 | ############################################################################### 47 | 48 | 49 | class DiagnosticSheets(Step): 50 | def __init__( 51 | self, 52 | direct_upstream_tasks: List["Step"] = [SingleCellImages], 53 | filepath_columns=[DatasetFields.DiagnosticSheetPath], 54 | **kwargs, 55 | ): 56 | super().__init__( 57 | direct_upstream_tasks=direct_upstream_tasks, 58 | filepath_columns=filepath_columns, 59 | **kwargs, 60 | ) 61 | 62 | @staticmethod 63 | def _save_plot( 64 | dataset: pd.DataFrame, 65 | metadata: str, 66 | metadata_value: str, 67 | number_of_subplots: int, 68 | feature: Optional[str] = None, 69 | fig_width: Optional[int] = None, 70 | fig_height: Optional[int] = None, 71 | ): 72 | 73 | log.info(f"Beginning diagnostic sheet generation for {metadata_value}") 74 | 75 | # Choose columns and rows 76 | columns = int(np.sqrt(number_of_subplots) + 0.5) 77 | rows = columns + 1 78 | 79 | # Set figure size 80 | if not fig_width: 81 | fig_width = columns * 7 82 | if not fig_height: 83 | fig_height = rows * 5 84 | 85 | # Set subplots 86 | fig, ax_array = plt.subplots( 87 | rows, 88 | columns, 89 | squeeze=False, 90 | figsize=(fig_height, fig_width), 91 | ) 92 | 93 | for row_index, row in dataset.iterrows(): 94 | this_axes = ax_array.flatten()[row_index] 95 | 96 | # Load feature to plot if feature 97 | if feature: 98 | with open(row[DatasetFields.CellFeaturesPath]) as f: 99 | cell_features = json.load(f) 100 | title = "CellId: {0}, {1} {2}: {3}".format( 101 | row[DatasetFields.CellId], 102 | "\n", 103 | feature, 104 | cell_features[feature], 105 | ) 106 | this_axes.set_title(title) 107 | else: 108 | this_axes.set_title(f"CellID: {row[DatasetFields.CellId]}") 109 | 110 | # Read AllProjections Image 111 | img = mpimg.imread(row[DatasetFields.CellImage2DAllProjectionsPath]) 112 | this_axes.imshow(img) 113 | this_axes.set_aspect(1) 114 | 115 | # Need to do this outside the loop because sometimes number 116 | # of rows < number of axes subplots 117 | [ax.axis("off") for ax in ax_array.flatten()] 118 | 119 | # Save figure 120 | ax_array.flatten()[0].get_figure().savefig( 121 | dataset[DatasetFields.DiagnosticSheetPath + str(metadata)][0] 122 | ) 123 | 124 | # Close figure, otherwise clogs memory 125 | plt.close(fig) 126 | log.info(f"Completed diagnostic sheet generation for" f"{metadata_value}") 127 | 128 | @staticmethod 129 | def _collect_group( 130 | row_index: int, 131 | row: pd.Series, 132 | diagnostic_sheet_dir: Path, 133 | overwrite: bool, 134 | metadata: str, 135 | max_cells: int, 136 | ) -> Union[DiagnosticSheetResult, DiagnosticSheetError]: 137 | # Don't use dask for image reading 138 | aicsimageio.use_dask(False) 139 | 140 | try: 141 | # Get the ultimate end save paths for grouped plot 142 | if row[str(metadata)] or row[str(metadata)] == 0: 143 | assert DatasetFields.CellImage2DAllProjectionsPath in row.index 144 | save_path_index = int( 145 | np.ceil((row["SubplotNumber" + str(metadata)] + 1) / max_cells) 146 | ) 147 | # np ceil for 0 = 0 148 | if save_path_index == 0: 149 | save_path_index = 1 150 | 151 | # Clean metadata name of spaces 152 | cleaned_metadata_name = str(row[str(metadata)]).replace(" ", "-") 153 | save_path = ( 154 | diagnostic_sheet_dir / f"{metadata}" 155 | f"_{cleaned_metadata_name}" 156 | f"_{save_path_index}.png" 157 | ) 158 | 159 | log.info( 160 | f"Collecting diagnostic sheet path for cell ID: {row.CellId}, " 161 | f"{metadata}: {row[str(metadata)]}" 162 | ) 163 | else: 164 | # else no path to save 165 | save_path = None 166 | 167 | # Check skip 168 | if not overwrite and save_path.is_file(): 169 | log.info( 170 | f"Skipping diagnostic sheet path for cell ID: {row.CellId}, " 171 | f"{metadata}: {row[str(metadata)]}" 172 | ) 173 | return DiagnosticSheetResult(row.CellId, None) 174 | 175 | # Return ready to save image 176 | return DiagnosticSheetResult(row.CellId, str(save_path)) 177 | # Catch and return error 178 | except Exception as e: 179 | log.info( 180 | f"Failed to retrieve the CellImage2DAllProjectionsPath" 181 | f"for cell ID: {row.CellId}," 182 | f"{metadata} {row[str(metadata)]}" 183 | f"Error: {e}" 184 | ) 185 | return DiagnosticSheetError(row.CellId, str(e)) 186 | 187 | @log_run_params 188 | def run( 189 | self, 190 | dataset: Union[str, Path, pd.DataFrame, dd.DataFrame], 191 | max_cells: int = 200, 192 | metadata: Optional[Union[list, str]] = DatasetFields.FOVId, 193 | feature: Optional[str] = None, 194 | fig_width: Optional[int] = None, 195 | fig_height: Optional[int] = None, 196 | distributed_executor_address: Optional[str] = None, 197 | batch_size: Optional[int] = None, 198 | overwrite: bool = False, 199 | **kwargs, 200 | ): 201 | """ 202 | Provided a dataset of single cell all projection images, generate a diagnostic 203 | sheet grouped by desired metadata and feature 204 | 205 | Parameters 206 | ---------- 207 | dataset: Union[str, Path, pd.DataFrame, dd.DataFrame] 208 | The primary cell dataset to use for generating 209 | diagnistic sheet for a group of cells. 210 | 211 | **Required dataset columns:** *["CellId", "CellImage2DAllProjectionsPath"]* 212 | 213 | max_cells: int 214 | The maximum number of cells to display on a single diagnostic sheet. 215 | Deafult: 200 216 | 217 | metadata: Optional[Union[list, str]] 218 | The metadata to group cells and generate a diagnostic sheet. 219 | For example, "FOVId" or "["FOVId", "ProteinDisplayName"]" 220 | Default: "FOVId" 221 | 222 | feature: Optional[str] 223 | The name of the single cell feature to display. For example, "imsize_orig". 224 | 225 | fig_width: Optional[int] 226 | Width of the diagnostic sheet figure. 227 | 228 | fig_height: Optional[int] 229 | Height of the diagnostic sheet figure. 230 | 231 | distributed_executor_address: Optional[str] 232 | An optional executor address to pass to some computation engine. 233 | Default: None 234 | 235 | batch_size: Optional[int] 236 | An optional batch size to process n features at a time. 237 | Default: None (Process all at once) 238 | 239 | overwrite: bool 240 | If this step has already partially or completely run, should it overwrite 241 | the previous files or not. 242 | Default: False (Do not overwrite or regenerate files) 243 | 244 | Returns 245 | ------- 246 | manifest_save_path: Path 247 | Path to the produced manifest with the DiagnosticSheetPath column added. 248 | """ 249 | if isinstance(dataset, (str, Path)): 250 | dataset = Path(dataset).expanduser().resolve(strict=True) 251 | 252 | # Read dataset 253 | dataset = pd.read_csv(dataset) 254 | 255 | # Check dataset and manifest have required fields 256 | dataset_utils.check_required_fields( 257 | dataset=dataset, 258 | required_fields=REQUIRED_DATASET_FIELDS, 259 | ) 260 | 261 | # Create save directories 262 | diagnostic_sheet_dir = self.step_local_staging_dir / "diagnostic_sheets" 263 | diagnostic_sheet_dir.mkdir(exist_ok=True) 264 | 265 | # Create empty manifest 266 | manifest = { 267 | DatasetFields.DiagnosticSheetPath: [], 268 | } 269 | 270 | # Check for metadata 271 | if metadata: 272 | # Make metadata a list 273 | metadata = metadata if isinstance(metadata, list) else [metadata] 274 | 275 | # Make an empty list of grouped_datasets to collect and 276 | # then distribute via Dask for plotting 277 | all_grouped_datasets = [] 278 | all_metadata = [] 279 | all_metadata_values = [] 280 | all_subplot_numbers = [] 281 | 282 | # Process each row 283 | for j, this_metadata in enumerate(metadata): 284 | 285 | # Add some helper columns for subsequent analysis 286 | helper_dataset = pd.DataFrame() 287 | 288 | for unique_metadata_value in dataset[this_metadata].unique(): 289 | dataset_subgroup = dataset.loc[ 290 | dataset[this_metadata] == unique_metadata_value 291 | ] 292 | # "SubplotNumber" + str(this_metadata) + "/MaxCells" is a new column 293 | # which will help iterate through subplots to add to a figure 294 | dataset_subgroup.insert( 295 | 2, 296 | "SubplotNumber" + str(this_metadata) + "/MaxCells", 297 | dataset_subgroup.groupby(this_metadata)["CellId"].transform( 298 | lambda x: ((~x.duplicated()).cumsum() - 1) % max_cells 299 | ), 300 | True, 301 | ) 302 | 303 | # "SubplotNumber" + str(this_metadata) is a new column 304 | # which will help in the _collect group method to identify 305 | # diagnostic sheet save paths per CellId 306 | dataset_subgroup.insert( 307 | 2, 308 | "SubplotNumber" + str(this_metadata), 309 | dataset_subgroup.groupby(this_metadata)["CellId"].transform( 310 | lambda x: ((~x.duplicated()).cumsum() - 1) 311 | ), 312 | True, 313 | ) 314 | 315 | helper_dataset = helper_dataset.append(dataset_subgroup) 316 | 317 | dataset = helper_dataset 318 | # Done creating helper columns 319 | 320 | # Create empty diagnostic sheet result dataset and errors 321 | diagnostic_sheet_result_dataset = [] 322 | errors = [] 323 | 324 | with DistributedHandler(distributed_executor_address) as handler: 325 | # First, lets collect all the diagnostic sheet save paths 326 | # per CellId. These are collected based on this_metadata 327 | # and max_cells 328 | diagnostic_sheet_result = handler.batched_map( 329 | self._collect_group, 330 | # Convert dataframe iterrows into two lists of items to iterate 331 | # One list will be row index 332 | # One list will be the pandas series of every row 333 | *zip(*list(dataset.iterrows())), 334 | [diagnostic_sheet_dir for i in range(len(dataset))], 335 | [overwrite for i in range(len(dataset))], 336 | [this_metadata for i in range(len(dataset))], 337 | [max_cells for i in range(len(dataset))], 338 | ) 339 | # Generate diagnostic sheet dataset rows 340 | for r in diagnostic_sheet_result: 341 | if isinstance(r, DiagnosticSheetResult): 342 | diagnostic_sheet_result_dataset.append( 343 | { 344 | DatasetFields.CellId: r.cell_id, 345 | DatasetFields.DiagnosticSheetPath 346 | + str(this_metadata): r.save_path, 347 | } 348 | ) 349 | else: 350 | errors.append( 351 | {DatasetFields.CellId: r.cell_id, "Error": r.error} 352 | ) 353 | 354 | # Convert diagnostic sheet paths rows to dataframe 355 | diagnostic_sheet_result_dataset = pd.DataFrame( 356 | diagnostic_sheet_result_dataset 357 | ) 358 | 359 | # Drop the various diagnostic sheet columns if they already exist 360 | # Check at j = 0 because the path will exist at j > 1 if 361 | # multiple metadata 362 | drop_columns = [] 363 | if ( 364 | DatasetFields.DiagnosticSheetPath + str(this_metadata) 365 | in dataset.columns 366 | ): 367 | drop_columns.append( 368 | DatasetFields.DiagnosticSheetPath + str(this_metadata) 369 | ) 370 | 371 | dataset = dataset.drop(columns=drop_columns) 372 | 373 | # Update manifest with these paths if there is data 374 | if len(diagnostic_sheet_result_dataset) > 0: 375 | 376 | # Join original dataset to the fov paths 377 | dataset = dataset.merge( 378 | diagnostic_sheet_result_dataset, 379 | on=DatasetFields.CellId, 380 | ) 381 | 382 | # Reset index in dataset 383 | if j == 0: 384 | dataset.dropna().reset_index(inplace=True) 385 | 386 | # Update manifest with these saved paths 387 | this_metadata_paths = dataset[ 388 | DatasetFields.DiagnosticSheetPath + str(this_metadata) 389 | ].unique() 390 | 391 | for this_path in this_metadata_paths: 392 | if this_path not in manifest[DatasetFields.DiagnosticSheetPath]: 393 | manifest[DatasetFields.DiagnosticSheetPath].append( 394 | this_path 395 | ) 396 | 397 | # Save errored cells to JSON 398 | with open( 399 | self.step_local_staging_dir / "errors.json", "w" 400 | ) as write_out: 401 | json.dump(errors, write_out) 402 | 403 | # Group the dataset by this metadata and the saved 404 | # diagnostic sheet paths (there can be many different save paths) 405 | # per metadata value (if max_cells < number of items of 406 | # this_metadata) 407 | grouped_dataset = dataset.groupby( 408 | [ 409 | str(this_metadata), 410 | DatasetFields.DiagnosticSheetPath + str(this_metadata), 411 | ] 412 | )["SubplotNumber" + str(this_metadata) + "/MaxCells"] 413 | 414 | # Get maximum values of the subplot numbers in this 415 | # grouped dataset. This will tell us the shape of the figure 416 | # to make 417 | grouped_max = grouped_dataset.max() 418 | 419 | # Loop through metadata value and max number of subplots 420 | for metadata_value, number_of_subplots in grouped_max.items(): 421 | 422 | # Total num of subplots = subplots + 1 423 | number_of_subplots = number_of_subplots + 1 424 | 425 | # Get this metadata group from the original dataset 426 | this_metadata_value_dataset = grouped_dataset.get_group( 427 | metadata_value, dataset 428 | ) 429 | 430 | # reset index 431 | this_metadata_value_dataset.reset_index(inplace=True) 432 | 433 | # Append to related lists for Dask distributed plotting 434 | # of all groups 435 | all_grouped_datasets.append(this_metadata_value_dataset) 436 | all_metadata.append(this_metadata) 437 | all_metadata_values.append(metadata_value) 438 | all_subplot_numbers.append(number_of_subplots) 439 | 440 | # Plot each diagnostic sheet 441 | with DistributedHandler(distributed_executor_address) as handler: 442 | # Start processing. This will add subplots to the current fig 443 | # axes via dask 444 | handler.batched_map( 445 | self._save_plot, 446 | # Convert dataframe iterrows into two lists of items to 447 | # iterate. One list will be row index 448 | # One list will be the pandas series of every row 449 | [dataset for dataset in all_grouped_datasets], 450 | [metadata for metadata in all_metadata], 451 | [metadata_value for metadata_value in all_metadata_values], 452 | [number_of_subplots for number_of_subplots in all_subplot_numbers], 453 | [feature for i in range(len(all_grouped_datasets))], 454 | [fig_width for i in range(len(all_grouped_datasets))], 455 | [fig_height for i in range(len(all_grouped_datasets))], 456 | ) 457 | 458 | self.manifest = pd.DataFrame(manifest) 459 | 460 | else: 461 | # If no metadata, just return input manifest 462 | self.manifest = dataset 463 | 464 | # Save manifest to CSV 465 | manifest_save_path = self.step_local_staging_dir / "manifest.csv" 466 | self.manifest.to_csv(manifest_save_path, index=False) 467 | 468 | return manifest_save_path 469 | -------------------------------------------------------------------------------- /actk/steps/raw/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from .raw import Raw # noqa: F401 4 | 5 | __all__ = ["Raw"] 6 | -------------------------------------------------------------------------------- /actk/steps/raw/raw.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import logging 5 | from pathlib import Path 6 | from typing import Union 7 | 8 | import dask.dataframe as dd 9 | import pandas as pd 10 | from datastep import Step, log_run_params 11 | 12 | from ...constants import DatasetFields 13 | from ...utils import dataset_utils 14 | 15 | ############################################################################### 16 | 17 | log = logging.getLogger(__name__) 18 | 19 | ############################################################################### 20 | 21 | # This is the merge of all other steps required fields. 22 | # Reasoning here is that the user will only want to upload the raw 23 | # if the user is doing a full pipeline run 24 | REQUIRED_DATASET_FIELDS = DatasetFields.AllExpectedInputs 25 | 26 | ############################################################################### 27 | 28 | 29 | class Raw(Step): 30 | def __init__( 31 | self, 32 | filepath_columns=[ 33 | DatasetFields.SourceReadPath, 34 | DatasetFields.NucleusSegmentationReadPath, 35 | DatasetFields.MembraneSegmentationReadPath, 36 | ], 37 | metadata_columns=[DatasetFields.FOVId], 38 | **kwargs, 39 | ): 40 | super().__init__( 41 | filepath_columns=filepath_columns, 42 | metadata_columns=metadata_columns, 43 | **kwargs, 44 | ) 45 | 46 | @log_run_params 47 | def run(self, dataset: Union[str, Path, pd.DataFrame, dd.DataFrame], **kwargs): 48 | """ 49 | Simple passthrough to store the dataset in local_staging/raw. 50 | This does not copy any the image files to local_staging/raw, only the manifest. 51 | This is an optional step that will only run if you want to upload the raw data. 52 | 53 | Parameters 54 | ---------- 55 | dataset: Union[str, Path, pd.DataFrame, dd.DataFrame] 56 | The dataset to use for the rest of the pipeline run. 57 | 58 | **Required dataset columns:** *["CellId", "CellIndex", "FOVId", 59 | "SourceReadPath", "NucleusSegmentationReadPath", 60 | "MembraneSegmentationReadPath", "ChannelIndexDNA", "ChannelIndexMembrane", 61 | "ChannelIndexStructure", "ChannelIndexBrightfield"]* 62 | 63 | Returns 64 | ------- 65 | manifest_save_path: Path 66 | The path to the manifest in local_staging with the raw data. 67 | """ 68 | if isinstance(dataset, (str, Path)): 69 | dataset = Path(dataset).expanduser().resolve(strict=True) 70 | 71 | # Read dataset 72 | dataset = pd.read_csv(dataset) 73 | 74 | # Check dataset and manifest have required fields 75 | dataset_utils.check_required_fields( 76 | dataset=dataset, 77 | required_fields=REQUIRED_DATASET_FIELDS, 78 | ) 79 | 80 | # Save manifest to CSV 81 | self.manifest = dataset 82 | manifest_save_path = self.step_local_staging_dir / "manifest.csv" 83 | self.manifest.to_csv(manifest_save_path, index=False) 84 | 85 | return manifest_save_path 86 | -------------------------------------------------------------------------------- /actk/steps/single_cell_features/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from .single_cell_features import SingleCellFeatures # noqa: F401 4 | 5 | __all__ = ["SingleCellFeatures"] 6 | -------------------------------------------------------------------------------- /actk/steps/single_cell_features/single_cell_features.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import json 5 | import logging 6 | from pathlib import Path 7 | from typing import NamedTuple, Optional, Union 8 | 9 | import aicsimageio 10 | import dask.dataframe as dd 11 | import pandas as pd 12 | from aics_dask_utils import DistributedHandler 13 | from aicsimageio import AICSImage 14 | from datastep import Step, log_run_params 15 | 16 | from ...constants import DatasetFields 17 | from ...utils import dataset_utils, image_utils 18 | from ..standardize_fov_array import StandardizeFOVArray 19 | 20 | ############################################################################### 21 | 22 | log = logging.getLogger(__name__) 23 | 24 | ############################################################################### 25 | 26 | REQUIRED_DATASET_FIELDS = [ 27 | DatasetFields.CellId, 28 | DatasetFields.CellIndex, 29 | DatasetFields.FOVId, 30 | DatasetFields.StandardizedFOVPath, 31 | ] 32 | 33 | 34 | class SingleCellFeaturesResult(NamedTuple): 35 | cell_id: Union[int, str] 36 | path: Path 37 | 38 | 39 | class SingleCellFeaturesError(NamedTuple): 40 | cell_id: int 41 | error: str 42 | 43 | 44 | ############################################################################### 45 | 46 | 47 | class SingleCellFeatures(Step): 48 | def __init__( 49 | self, 50 | direct_upstream_tasks=[StandardizeFOVArray], 51 | filepath_columns=[DatasetFields.CellFeaturesPath], 52 | **kwargs, 53 | ): 54 | super().__init__( 55 | direct_upstream_tasks=direct_upstream_tasks, 56 | filepath_columns=filepath_columns, 57 | **kwargs, 58 | ) 59 | 60 | @staticmethod 61 | def _generate_single_cell_features( 62 | row_index: int, 63 | row: pd.Series, 64 | cell_ceiling_adjustment: int, 65 | save_dir: Path, 66 | overwrite: bool, 67 | ) -> Union[SingleCellFeaturesResult, SingleCellFeaturesError]: 68 | # Don't use dask for image reading 69 | aicsimageio.use_dask(False) 70 | 71 | # Get the ultimate end save path for this cell 72 | save_path = save_dir / f"{row.CellId}.json" 73 | 74 | # Check skip 75 | if not overwrite and save_path.is_file(): 76 | log.info(f"Skipping cell feature generation for Cell Id: {row.CellId}") 77 | return SingleCellFeaturesResult(row.CellId, save_path) 78 | 79 | # Overwrite or didn't exist 80 | log.info(f"Beginning cell feature generation for CellId: {row.CellId}") 81 | 82 | # Wrap errors for debugging later 83 | try: 84 | # Read the standardized FOV 85 | image = AICSImage(row.StandardizedFOVPath) 86 | 87 | # Preload image data 88 | image.data 89 | 90 | # Select and adjust cell shape ceiling for this cell 91 | adjusted = image_utils.select_and_adjust_segmentation_ceiling( 92 | image=image.get_image_data("CYXZ", S=0, T=0), 93 | cell_index=row.CellIndex, 94 | cell_ceiling_adjustment=cell_ceiling_adjustment, 95 | ) 96 | 97 | # Crop the FOV to the segmentation portions 98 | cropped = image_utils.crop_raw_channels_with_segmentation( 99 | image=adjusted, 100 | channels=image.get_channel_names(), 101 | ) 102 | 103 | # Generate features 104 | features = image_utils.get_features_from_image(cropped) 105 | 106 | # Save to JSON 107 | with open(save_path, "w") as write_out: 108 | json.dump(features, write_out) 109 | 110 | log.info(f"Completed cell feature generation for CellId: {row.CellId}") 111 | return SingleCellFeaturesResult(row.CellId, save_path) 112 | 113 | # Catch and return error 114 | except Exception as e: 115 | log.info( 116 | f"Failed cell feature generation for CellId: {row.CellId}. Error: {e}" 117 | ) 118 | return SingleCellFeaturesError(row.CellId, str(e)) 119 | 120 | @log_run_params 121 | def run( 122 | self, 123 | dataset: Union[str, Path, pd.DataFrame, dd.DataFrame], 124 | cell_ceiling_adjustment: int = 0, 125 | distributed_executor_address: Optional[str] = None, 126 | batch_size: Optional[int] = None, 127 | overwrite: bool = False, 128 | **kwargs, 129 | ): 130 | """ 131 | Provided a dataset generate a features JSON file for each cell. 132 | 133 | Parameters 134 | ---------- 135 | dataset: Union[str, Path, pd.DataFrame, dd.DataFrame] 136 | The primary cell dataset to use for generating features JSON for each cell. 137 | 138 | **Required dataset columns:** *["CellId", "CellIndex", "FOVId", 139 | "StandardizedFOVPath"]* 140 | 141 | cell_ceiling_adjustment: int 142 | The adjust to use for raising the cell shape ceiling. If <= 0, this will be 143 | ignored and cell data will be selected but not adjusted. 144 | Default: 0 145 | 146 | distributed_executor_address: Optional[str] 147 | An optional executor address to pass to some computation engine. 148 | Default: None 149 | 150 | batch_size: Optional[int] 151 | An optional batch size to process n features at a time. 152 | Default: None (Process all at once) 153 | 154 | overwrite: bool 155 | If this step has already partially or completely run, should it overwrite 156 | the previous files or not. 157 | Default: False (Do not overwrite or regenerate files) 158 | 159 | Returns 160 | ------- 161 | manifest_save_path: Path 162 | Path to the produced manifest with the CellFeaturesPath column added. 163 | """ 164 | # Handle dataset provided as string or path 165 | if isinstance(dataset, (str, Path)): 166 | dataset = Path(dataset).expanduser().resolve(strict=True) 167 | 168 | # Read dataset 169 | dataset = pd.read_csv(dataset) 170 | 171 | # Check dataset and manifest have required fields 172 | dataset_utils.check_required_fields( 173 | dataset=dataset, 174 | required_fields=REQUIRED_DATASET_FIELDS, 175 | ) 176 | 177 | # Create features directory 178 | features_dir = self.step_local_staging_dir / "cell_features" 179 | features_dir.mkdir(exist_ok=True) 180 | 181 | # Process each row 182 | with DistributedHandler(distributed_executor_address) as handler: 183 | # Start processing 184 | results = handler.batched_map( 185 | self._generate_single_cell_features, 186 | # Convert dataframe iterrows into two lists of items to iterate over 187 | # One list will be row index 188 | # One list will be the pandas series of every row 189 | *zip(*list(dataset.iterrows())), 190 | # Pass the other parameters as list of the same thing for each 191 | # mapped function call 192 | [cell_ceiling_adjustment for i in range(len(dataset))], 193 | [features_dir for i in range(len(dataset))], 194 | [overwrite for i in range(len(dataset))], 195 | batch_size=batch_size, 196 | ) 197 | 198 | # Generate features paths rows 199 | cell_features_dataset = [] 200 | errors = [] 201 | for result in results: 202 | if isinstance(result, SingleCellFeaturesResult): 203 | cell_features_dataset.append( 204 | { 205 | DatasetFields.CellId: result.cell_id, 206 | DatasetFields.CellFeaturesPath: result.path, 207 | } 208 | ) 209 | else: 210 | errors.append( 211 | {DatasetFields.CellId: result.cell_id, "Error": result.error} 212 | ) 213 | 214 | # Convert features paths rows to dataframe 215 | cell_features_dataset = pd.DataFrame(cell_features_dataset) 216 | 217 | # Drop CellFeaturesPath column if it already exists 218 | if DatasetFields.CellFeaturesPath in dataset.columns: 219 | dataset = dataset.drop(columns=[DatasetFields.CellFeaturesPath]) 220 | 221 | # Join original dataset to the fov paths 222 | self.manifest = dataset.merge(cell_features_dataset, on=DatasetFields.CellId) 223 | 224 | # Save manifest to CSV 225 | manifest_save_path = self.step_local_staging_dir / "manifest.csv" 226 | self.manifest.to_csv(manifest_save_path, index=False) 227 | 228 | # Save errored cells to JSON 229 | with open(self.step_local_staging_dir / "errors.json", "w") as write_out: 230 | json.dump(errors, write_out) 231 | 232 | return manifest_save_path 233 | -------------------------------------------------------------------------------- /actk/steps/single_cell_images/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from .single_cell_images import SingleCellImages # noqa: F401 4 | 5 | __all__ = ["SingleCellImages"] 6 | -------------------------------------------------------------------------------- /actk/steps/single_cell_images/single_cell_images.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import json 5 | import logging 6 | from pathlib import Path 7 | from typing import List, NamedTuple, Optional, Union 8 | 9 | import aicsimageio 10 | import aicsimageprocessing as proc 11 | import dask.dataframe as dd 12 | import numpy as np 13 | import pandas as pd 14 | from aics_dask_utils import DistributedHandler 15 | from aicsimageio import AICSImage, transforms 16 | from aicsimageio.writers import OmeTiffWriter 17 | from datastep import Step, log_run_params 18 | from imageio import imwrite 19 | 20 | from ...constants import Channels, DatasetFields 21 | from ...utils import dataset_utils, image_utils 22 | from ..single_cell_features import SingleCellFeatures 23 | 24 | ############################################################################### 25 | 26 | log = logging.getLogger(__name__) 27 | 28 | ############################################################################### 29 | 30 | REQUIRED_DATASET_FIELDS = [ 31 | DatasetFields.CellId, 32 | DatasetFields.StandardizedFOVPath, 33 | DatasetFields.CellFeaturesPath, 34 | ] 35 | 36 | 37 | class CellImagesResult(NamedTuple): 38 | cell_id: Union[int, str] 39 | path_3d: Path 40 | path_2d_all_proj: Path 41 | path_2d_yx_proj: Path 42 | 43 | 44 | class CellImagesError(NamedTuple): 45 | cell_id: Union[int, str] 46 | error: str 47 | 48 | 49 | ############################################################################### 50 | 51 | 52 | class SingleCellImages(Step): 53 | def __init__( 54 | self, 55 | direct_upstream_tasks=[SingleCellFeatures], 56 | filepath_columns=[ 57 | DatasetFields.CellImage3DPath, 58 | DatasetFields.CellImage2DAllProjectionsPath, 59 | DatasetFields.CellImage2DYXProjectionPath, 60 | ], 61 | **kwargs, 62 | ): 63 | super().__init__( 64 | direct_upstream_tasks=direct_upstream_tasks, 65 | filepath_columns=filepath_columns, 66 | **kwargs, 67 | ) 68 | 69 | @staticmethod 70 | def _get_registered_image_size(row_index: int, row: pd.Series) -> List[int]: 71 | # Open cell features JSON 72 | with open(row.CellFeaturesPath, "r") as read_in: 73 | cell_features = json.load(read_in) 74 | 75 | # Return registered image size 76 | return cell_features["imsize_registered"] 77 | 78 | @staticmethod 79 | def _generate_single_cell_images( 80 | row_index: int, 81 | row: pd.Series, 82 | cell_ceiling_adjustment: int, 83 | bounding_box: np.ndarray, 84 | projection_method: str, 85 | cell_images_3d_dir: Path, 86 | cell_images_2d_all_proj_dir: Path, 87 | cell_images_2d_yx_proj_dir: Path, 88 | overwrite: bool, 89 | ) -> Union[CellImagesResult, CellImagesError]: 90 | # Don't use dask for image reading 91 | aicsimageio.use_dask(False) 92 | 93 | # Get the ultimate end save paths for this cell 94 | cell_image_3d_save_path = cell_images_3d_dir / f"{row.CellId}.ome.tiff" 95 | cell_image_2d_all_proj_save_path = ( 96 | cell_images_2d_all_proj_dir / f"{row.CellId}.png" 97 | ) 98 | cell_image_2d_yx_proj_save_path = ( 99 | cell_images_2d_yx_proj_dir / f"{row.CellId}.png" 100 | ) 101 | 102 | # Check skip 103 | if ( 104 | not overwrite 105 | # Only skip if all images exist for this cell 106 | and all( 107 | p.is_file() 108 | for p in [ 109 | cell_image_3d_save_path, 110 | cell_image_2d_all_proj_save_path, 111 | cell_image_2d_yx_proj_save_path, 112 | ] 113 | ) 114 | ): 115 | log.info(f"Skipping single cell image generation for CellId: {row.CellId}") 116 | return CellImagesResult( 117 | row.CellId, 118 | cell_image_3d_save_path, 119 | cell_image_2d_all_proj_save_path, 120 | cell_image_2d_yx_proj_save_path, 121 | ) 122 | 123 | # Overwrite or didn't exist 124 | log.info(f"Beginning single cell image generation for CellId: {row.CellId}") 125 | 126 | # Wrap errors for debugging later 127 | try: 128 | # Initialize image object with standardized FOV 129 | standardized_image = AICSImage(row.StandardizedFOVPath) 130 | channels = standardized_image.get_channel_names() 131 | 132 | # Preload image data 133 | standardized_image.data 134 | 135 | # Select and adjust cell shape ceiling for this cell 136 | image = image_utils.select_and_adjust_segmentation_ceiling( 137 | # Unlike most other operations, we can read in normal "CZYX" dimension 138 | # order here as all future operations are expecting it 139 | image=standardized_image.get_image_data("CYXZ", S=0, T=0), 140 | cell_index=row.CellIndex, 141 | cell_ceiling_adjustment=cell_ceiling_adjustment, 142 | ) 143 | 144 | # Perform a rigid registration on the image 145 | image, _, _ = proc.cell_rigid_registration( 146 | image, 147 | # Reorder bounding box as image is currently CYXZ 148 | bbox_size=bounding_box[[0, 2, 3, 1]], 149 | ) 150 | 151 | # Reduce size 152 | crop_3d = image * 255 153 | crop_3d = crop_3d.astype(np.uint8) 154 | 155 | # Transpose to CZYX for saving 156 | crop_3d = transforms.transpose_to_dims(crop_3d, "CYXZ", "CZYX") 157 | 158 | # Save to OME-TIFF 159 | with OmeTiffWriter(cell_image_3d_save_path, overwrite_file=True) as writer: 160 | writer.save( 161 | crop_3d, 162 | dimension_order="CZYX", 163 | channel_names=standardized_image.get_channel_names(), 164 | pixels_physical_size=standardized_image.get_physical_pixel_size(), 165 | ) 166 | 167 | # Generate 2d image projections 168 | # Crop raw channels using segmentations 169 | image = image_utils.crop_raw_channels_with_segmentation(image, channels) 170 | 171 | # Transpose to CZYX for projections 172 | image = transforms.transpose_to_dims(image, "CYXZ", "CZYX") 173 | 174 | # Select the DNA, Membrane, and Structure channels 175 | image = image[ 176 | [ 177 | channels.index(target) 178 | for target in [Channels.DNA, Channels.Membrane, Channels.Structure] 179 | ] 180 | ] 181 | 182 | # Set RGB colors 183 | # This will set: 184 | # DNA to Blue 185 | # Membrane to Red 186 | # Structure to Green 187 | colors = [[0, 0, 1], [1, 0, 0], [0, 1, 0]] 188 | 189 | # Get all axes projection image 190 | all_proj = proc.imgtoprojection( 191 | image, 192 | proj_all=True, 193 | proj_method=projection_method, 194 | local_adjust=False, 195 | global_adjust=True, 196 | colors=colors, 197 | ) 198 | 199 | # Convert to YXC for PNG writing 200 | all_proj = transforms.transpose_to_dims(all_proj, "CYX", "YXC") 201 | 202 | # Drop size to uint8 203 | all_proj = all_proj.astype(np.uint8) 204 | 205 | # Save to PNG 206 | 207 | imwrite(cell_image_2d_all_proj_save_path, all_proj) 208 | 209 | # Get YX axes projection image 210 | yx_proj = proc.imgtoprojection( 211 | image, 212 | proj_all=False, 213 | proj_method=projection_method, 214 | local_adjust=False, 215 | global_adjust=True, 216 | colors=colors, 217 | ) 218 | 219 | # Convert to YXC for PNG writing 220 | yx_proj = transforms.transpose_to_dims(yx_proj, "CYX", "YXC") 221 | 222 | # Drop size to uint8 223 | yx_proj = yx_proj.astype(np.uint8) 224 | 225 | # Save to PNG 226 | imwrite(cell_image_2d_yx_proj_save_path, yx_proj) 227 | 228 | log.info(f"Completed single cell image generation for CellId: {row.CellId}") 229 | 230 | # Return ready to save image 231 | return CellImagesResult( 232 | row.CellId, 233 | cell_image_3d_save_path, 234 | cell_image_2d_all_proj_save_path, 235 | cell_image_2d_yx_proj_save_path, 236 | ) 237 | 238 | # Catch and return error 239 | except Exception as e: 240 | log.info( 241 | f"Failed single cell image generation for CellId: {row.CellId}. " 242 | "Error: {e}" 243 | ) 244 | return CellImagesError(row.CellId, str(e)) 245 | 246 | @log_run_params 247 | def run( 248 | self, 249 | dataset: Union[str, Path, pd.DataFrame, dd.DataFrame], 250 | cell_ceiling_adjustment: int = 0, 251 | bounding_box_percentile: float = 95.0, 252 | projection_method: str = "max", 253 | distributed_executor_address: Optional[str] = None, 254 | batch_size: Optional[int] = None, 255 | overwrite: bool = False, 256 | bbox: Union[tuple, list, dict] = None, 257 | **kwargs, 258 | ): 259 | """ 260 | Provided a dataset of cell features and standardized FOV images, generate 3D 261 | single cell crops and 2D projections. 262 | 263 | Parameters 264 | ---------- 265 | dataset: Union[str, Path, pd.DataFrame, dd.DataFrame] 266 | The primary cell dataset to generate 3D single cell images for. 267 | 268 | **Required dataset columns:** *["CellId", "StandardizedFOVPath", 269 | "CellFeaturesPath"]* 270 | 271 | cell_ceiling_adjustment: int 272 | The adjust to use for raising the cell shape ceiling. If <= 0, this will be 273 | ignored and cell data will be selected but not adjusted. 274 | Default: 0 275 | 276 | bounding_box_percentile: float 277 | A float used to generate the actual bounding box for all cells by finding 278 | provided percentile of all cell image sizes. 279 | Default: 95.0 280 | 281 | bbox: tuple, list, dict 282 | Hard coded ZYX dimensions to set the bounding box. 283 | Note: This overrides the `bounding_box_percentile` parameter. 284 | Example: (64, 168, 104) 285 | 286 | projection_method: str 287 | The method to use for generating the flat projection. 288 | Default: max 289 | 290 | More details: 291 | https://allencellmodeling.github.io/aicsimageprocessing/aicsimageprocessing.html#aicsimageprocessing.imgToProjection.imgtoprojection 292 | 293 | distributed_executor_address: Optional[str] 294 | An optional executor address to pass to some computation engine. 295 | Default: None 296 | 297 | batch_size: Optional[int] 298 | An optional batch size to process n features at a time. 299 | Default: None (Process all at once) 300 | 301 | overwrite: bool 302 | If this step has already partially or completely run, should it overwrite 303 | the previous files or not. 304 | Default: False (Do not overwrite or regenerate files) 305 | 306 | Returns 307 | ------- 308 | manifest_save_path: Path 309 | Path to the produced manifest with the various cell image path fields added. 310 | """ 311 | # Handle dataset provided as string or path 312 | if isinstance(dataset, (str, Path)): 313 | dataset = Path(dataset).expanduser().resolve(strict=True) 314 | 315 | # Read dataset 316 | dataset = pd.read_csv(dataset) 317 | 318 | # Check dataset and manifest have required fields 319 | dataset_utils.check_required_fields( 320 | dataset=dataset, 321 | required_fields=REQUIRED_DATASET_FIELDS, 322 | ) 323 | 324 | # Create save directories 325 | cell_images_3d_dir = self.step_local_staging_dir / "cell_images_3d" 326 | cell_images_2d_all_proj_dir = ( 327 | self.step_local_staging_dir / "cell_images_2d_all_proj" 328 | ) 329 | cell_images_2d_yx_proj_dir = ( 330 | self.step_local_staging_dir / "cell_images_2d_yx_proj" 331 | ) 332 | cell_images_3d_dir.mkdir(exist_ok=True) 333 | cell_images_2d_all_proj_dir.mkdir(exist_ok=True) 334 | cell_images_2d_yx_proj_dir.mkdir(exist_ok=True) 335 | 336 | # Process each row 337 | with DistributedHandler(distributed_executor_address) as handler: 338 | # Start processing 339 | bbox_results = handler.batched_map( 340 | self._get_registered_image_size, 341 | # Convert dataframe iterrows into two lists of items to iterate over 342 | # One list will be row index 343 | # One list will be the pandas series of every row 344 | *zip(*list(dataset.iterrows())), 345 | batch_size=batch_size, 346 | ) 347 | 348 | if bbox: 349 | num_of_channels = bbox_results[0][0] 350 | bbox = [num_of_channels] + list(bbox) 351 | bbox_results = [bbox for i in range(len(bbox_results))] 352 | log.info(f"Using hard coded bounding box with ZYX dimensions: {bbox}.") 353 | 354 | # Compute bounding box with percentile 355 | bbox_results = np.array(bbox_results) 356 | bounding_box = np.percentile(bbox_results, bounding_box_percentile, axis=0) 357 | bounding_box = np.ceil(bounding_box) 358 | 359 | # Generate bounded arrays 360 | results = handler.batched_map( 361 | self._generate_single_cell_images, 362 | # Convert dataframe iterrows into two lists of items to iterate over 363 | # One list will be row index 364 | # One list will be the pandas series of every row 365 | *zip(*list(dataset.iterrows())), 366 | # Pass the other parameters as list of the same thing for each 367 | # mapped function call 368 | [cell_ceiling_adjustment for i in range(len(dataset))], 369 | [bounding_box for i in range(len(dataset))], 370 | [projection_method for i in range(len(dataset))], 371 | [cell_images_3d_dir for i in range(len(dataset))], 372 | [cell_images_2d_all_proj_dir for i in range(len(dataset))], 373 | [cell_images_2d_yx_proj_dir for i in range(len(dataset))], 374 | [overwrite for i in range(len(dataset))], 375 | batch_size=batch_size, 376 | ) 377 | 378 | # Generate single cell images dataset rows 379 | single_cell_images_dataset = [] 380 | errors = [] 381 | for r in results: 382 | if isinstance(r, CellImagesResult): 383 | single_cell_images_dataset.append( 384 | { 385 | DatasetFields.CellId: r.cell_id, 386 | DatasetFields.CellImage3DPath: r.path_3d, 387 | DatasetFields.CellImage2DAllProjectionsPath: r.path_2d_all_proj, 388 | DatasetFields.CellImage2DYXProjectionPath: r.path_2d_yx_proj, 389 | } 390 | ) 391 | else: 392 | errors.append({DatasetFields.CellId: r.cell_id, "Error": r.error}) 393 | 394 | # Convert features paths rows to dataframe 395 | single_cell_images_dataset = pd.DataFrame(single_cell_images_dataset) 396 | 397 | # Drop the various single cell image columns if they already exist 398 | drop_columns = [] 399 | if DatasetFields.CellImage3DPath in dataset.columns: 400 | drop_columns.append(DatasetFields.CellImage3DPath) 401 | if DatasetFields.CellImage2DAllProjectionsPath in dataset.columns: 402 | drop_columns.append(DatasetFields.CellImage2DAllProjectionsPath) 403 | if DatasetFields.CellImage2DYXProjectionPath in dataset.columns: 404 | drop_columns.append(DatasetFields.CellImage2DYXProjectionPath) 405 | 406 | dataset = dataset.drop(columns=drop_columns) 407 | 408 | # Join original dataset to the fov paths 409 | self.manifest = dataset.merge( 410 | single_cell_images_dataset, on=DatasetFields.CellId 411 | ) 412 | 413 | # Save manifest to CSV 414 | manifest_save_path = self.step_local_staging_dir / "manifest.csv" 415 | self.manifest.to_csv(manifest_save_path, index=False) 416 | 417 | # Save errored cells to JSON 418 | with open(self.step_local_staging_dir / "errors.json", "w") as write_out: 419 | json.dump(errors, write_out) 420 | 421 | return manifest_save_path 422 | -------------------------------------------------------------------------------- /actk/steps/standardize_fov_array/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from .standardize_fov_array import StandardizeFOVArray # noqa: F401 4 | 5 | __all__ = ["StandardizeFOVArray"] 6 | -------------------------------------------------------------------------------- /actk/steps/standardize_fov_array/standardize_fov_array.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import json 5 | import logging 6 | from pathlib import Path 7 | from typing import NamedTuple, Optional, Tuple, Union 8 | 9 | import aicsimageio 10 | import dask.dataframe as dd 11 | import pandas as pd 12 | from aics_dask_utils import DistributedHandler 13 | from aicsimageio import transforms 14 | from aicsimageio.writers import OmeTiffWriter 15 | from datastep import Step, log_run_params 16 | 17 | from ...constants import DatasetFields 18 | from ...utils import dataset_utils, image_utils 19 | 20 | ############################################################################### 21 | 22 | log = logging.getLogger(__name__) 23 | 24 | ############################################################################### 25 | 26 | REQUIRED_DATASET_FIELDS = [ 27 | DatasetFields.FOVId, 28 | DatasetFields.SourceReadPath, 29 | DatasetFields.NucleusSegmentationReadPath, 30 | DatasetFields.MembraneSegmentationReadPath, 31 | DatasetFields.ChannelIndexDNA, 32 | DatasetFields.ChannelIndexMembrane, 33 | DatasetFields.ChannelIndexStructure, 34 | DatasetFields.ChannelIndexBrightfield, 35 | DatasetFields.ChannelIndexNucleusSegmentation, 36 | DatasetFields.ChannelIndexMembraneSegmentation, 37 | ] 38 | 39 | 40 | class StandardizeFOVArrayResult(NamedTuple): 41 | fov_id: Union[int, str] 42 | path: Path 43 | 44 | 45 | class StandardizeFOVArrayError(NamedTuple): 46 | fov_id: int 47 | error: str 48 | 49 | 50 | ############################################################################### 51 | 52 | 53 | class StandardizeFOVArray(Step): 54 | def __init__(self, filepath_columns=[DatasetFields.StandardizedFOVPath], **kwargs): 55 | super().__init__(filepath_columns=filepath_columns, **kwargs) 56 | 57 | @staticmethod 58 | def _generate_standardized_fov_array( 59 | row_index: int, 60 | row: pd.Series, 61 | current_pixel_sizes: Optional[Tuple[float]], 62 | desired_pixel_sizes: Optional[Tuple[float]], 63 | save_dir: Path, 64 | overwrite: bool, 65 | ) -> Union[StandardizeFOVArrayResult, StandardizeFOVArrayError]: 66 | # Don't use dask for image reading 67 | aicsimageio.use_dask(False) 68 | 69 | # Get the ultimate end save path for this cell 70 | save_path = save_dir / f"{row.FOVId}.ome.tiff" 71 | 72 | # Check skip 73 | if not overwrite and save_path.is_file(): 74 | log.info(f"Skipping standardized FOV generation for FOVId: {row.FOVId}") 75 | return StandardizeFOVArrayResult(row.FOVId, save_path) 76 | 77 | # Overwrite or didn't exist 78 | log.info(f"Beginning standardized FOV generation for FOVId: {row.FOVId}") 79 | 80 | # Wrap errors for debugging later 81 | try: 82 | # Get normalized image array 83 | normalized_img, channels, pixel_sizes = image_utils.get_normed_image_array( 84 | raw_image=row.SourceReadPath, 85 | nucleus_seg_image=row.NucleusSegmentationReadPath, 86 | membrane_seg_image=row.MembraneSegmentationReadPath, 87 | dna_channel_index=row.ChannelIndexDNA, 88 | membrane_channel_index=row.ChannelIndexMembrane, 89 | structure_channel_index=row.ChannelIndexStructure, 90 | brightfield_channel_index=row.ChannelIndexBrightfield, 91 | nucleus_seg_channel_index=row.ChannelIndexNucleusSegmentation, 92 | membrane_seg_channel_index=row.ChannelIndexMembraneSegmentation, 93 | current_pixel_sizes=current_pixel_sizes, 94 | desired_pixel_sizes=desired_pixel_sizes, 95 | ) 96 | 97 | # Reshape data for serialization 98 | reshaped = transforms.transpose_to_dims(normalized_img, "CYXZ", "CZYX") 99 | 100 | # Save array as OME Tiff 101 | with OmeTiffWriter(save_path, overwrite_file=True) as writer: 102 | writer.save( 103 | data=reshaped, 104 | dimension_order="CZYX", 105 | channel_names=channels, 106 | pixels_physical_size=pixel_sizes, 107 | ) 108 | 109 | log.info(f"Completed standardized FOV generation for FOVId: {row.FOVId}") 110 | return StandardizeFOVArrayResult(row.FOVId, save_path) 111 | 112 | # Catch and return error 113 | except Exception as e: 114 | log.info( 115 | f"Failed standardized FOV generation for FOVId: {row.FOVId}. Error: {e}" 116 | ) 117 | return StandardizeFOVArrayError(row.FOVId, str(e)) 118 | 119 | @log_run_params 120 | def run( 121 | self, 122 | dataset: Union[str, Path, pd.DataFrame, dd.DataFrame], 123 | current_pixel_sizes: Optional[Tuple[float]] = ( 124 | 0.10833333333333332, 125 | 0.10833333333333332, 126 | 0.29, 127 | ), 128 | desired_pixel_sizes: Tuple[float] = (0.29, 0.29, 0.29), 129 | distributed_executor_address: Optional[str] = None, 130 | batch_size: Optional[int] = None, 131 | overwrite: bool = False, 132 | **kwargs, 133 | ) -> Path: 134 | """ 135 | Convert a dataset of raw FOV images and their nucleus and membrane 136 | segmentations, into a single, standard order and shape, and normalized image. 137 | 138 | Parameters 139 | ---------- 140 | dataset: Union[str, Path, pd.DataFrame, dd.DataFrame] 141 | The dataset to use for generating standard order, normalized, image arrays. 142 | 143 | **Required dataset columns:** *["FOVId", "SourceReadPath", 144 | "NucleusSegmentationReadPath", "MembraneSegmentationReadPath", 145 | "ChannelIndexDNA", "ChannelIndexMembrane", "ChannelIndexStructure", 146 | "ChannelIndexBrightfield"]* 147 | 148 | 149 | current_pixel_sizes: Optional[Tuple[float]] 150 | The current physical pixel sizes as a tuple of the raw image. 151 | Default: (0.10833333333333332, 0.10833333333333332, 0.29), though if None, 152 | uses (`aicsimageio.AICSImage.get_physical_pixel_size` on the raw image) 153 | 154 | 155 | desired_pixel_sizes: Tuple[float] 156 | The desired pixel size for to resize each image to in XYZ order. 157 | Default: (0.29, 0.29, 0.29) 158 | 159 | distributed_executor_address: Optional[str] 160 | An optional executor address to pass to some computation engine. 161 | Default: None 162 | 163 | batch_size: Optional[int] 164 | An optional batch size to process n features at a time. 165 | Default: None (Process all at once) 166 | 167 | overwrite: bool 168 | If this step has already partially or completely run, should it overwrite 169 | the previous files or not. 170 | Default: False (Do not overwrite or regenerate files) 171 | 172 | Returns 173 | ------- 174 | manifest_save_path: Path 175 | Path to the produced manifest with the StandardizedFOVPath column added. 176 | """ 177 | # Handle dataset provided as string or path 178 | if isinstance(dataset, (str, Path)): 179 | dataset = Path(dataset).expanduser().resolve(strict=True) 180 | 181 | # Read dataset 182 | dataset = pd.read_csv(dataset) 183 | 184 | # Check the dataset for the required columns 185 | dataset_utils.check_required_fields( 186 | dataset=dataset, 187 | required_fields=REQUIRED_DATASET_FIELDS, 188 | ) 189 | 190 | # Log original length of cell dataset 191 | log.info(f"Original dataset length: {len(dataset)}") 192 | 193 | # Check assumption: all fields per FOV are constant 194 | # except CellID and CellIndex 195 | const_cols_per_fov = [ 196 | c for c in dataset.columns if c not in ["CellId", "CellIndex"] 197 | ] 198 | df_const_cols = ( 199 | dataset.groupby("FOVId")[const_cols_per_fov].nunique(dropna=False).eq(1) 200 | ) 201 | 202 | for col_name, is_const in df_const_cols.all().iteritems(): 203 | try: 204 | assert is_const 205 | except AssertionError: 206 | example = df_const_cols[~df_const_cols[col_name]].sample() 207 | raise ValueError( 208 | f"{col_name} has multiple values per FOV. " 209 | f"Example: FOV {example.index.item()}" 210 | ) 211 | 212 | # As there is an assumption that this dataset is for cells, 213 | # generate the FOV dataset by selecting unique FOV Ids 214 | fov_dataset = dataset.drop_duplicates(DatasetFields.FOVId) 215 | 216 | # Log produced FOV dataset length 217 | log.info(f"Unique FOV's found in dataset: {len(fov_dataset)}") 218 | 219 | # Create standardized fovs directory 220 | fovs_dir = self.step_local_staging_dir / "standardized_fovs" 221 | fovs_dir.mkdir(exist_ok=True) 222 | 223 | # Process each row 224 | with DistributedHandler(distributed_executor_address) as handler: 225 | # Start processing 226 | results = handler.batched_map( 227 | self._generate_standardized_fov_array, 228 | # Convert dataframe iterrows into two lists of items to iterate over 229 | # One list will be row index 230 | # One list will be the pandas series of every row 231 | *zip(*list(fov_dataset.iterrows())), 232 | # Pass the other parameters as list of the same thing for each 233 | # mapped function call 234 | [current_pixel_sizes for i in range(len(fov_dataset))], 235 | [desired_pixel_sizes for i in range(len(fov_dataset))], 236 | [fovs_dir for i in range(len(fov_dataset))], 237 | [overwrite for i in range(len(dataset))], 238 | batch_size=batch_size, 239 | ) 240 | 241 | # Generate fov paths rows 242 | standardized_fov_paths_dataset = [] 243 | errors = [] 244 | for result in results: 245 | if isinstance(result, StandardizeFOVArrayResult): 246 | standardized_fov_paths_dataset.append( 247 | { 248 | DatasetFields.FOVId: result.fov_id, 249 | DatasetFields.StandardizedFOVPath: result.path, 250 | } 251 | ) 252 | else: 253 | errors.append( 254 | {DatasetFields.FOVId: result.fov_id, "Error": result.error} 255 | ) 256 | 257 | # Convert fov paths to dataframe 258 | standardized_fov_paths_dataset = pd.DataFrame(standardized_fov_paths_dataset) 259 | 260 | # Drop StandardizedFOVPath column if it already exists 261 | if DatasetFields.StandardizedFOVPath in dataset.columns: 262 | dataset = dataset.drop(columns=[DatasetFields.StandardizedFOVPath]) 263 | 264 | # Join original dataset to the fov paths 265 | self.manifest = dataset.merge( 266 | standardized_fov_paths_dataset, on=DatasetFields.FOVId 267 | ) 268 | 269 | # Save manifest to CSV 270 | manifest_save_path = self.step_local_staging_dir / "manifest.csv" 271 | self.manifest.to_csv(manifest_save_path, index=False) 272 | 273 | # Save errored FOVs to JSON 274 | with open(self.step_local_staging_dir / "errors.json", "w") as write_out: 275 | json.dump(errors, write_out) 276 | 277 | return manifest_save_path 278 | -------------------------------------------------------------------------------- /actk/tests/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """Unit test package for actk.""" 4 | -------------------------------------------------------------------------------- /actk/tests/conftest.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from pathlib import Path 5 | 6 | import pytest 7 | 8 | ############################################################################### 9 | 10 | 11 | @pytest.fixture 12 | def data_dir() -> Path: 13 | return Path(__file__).parent / "data" 14 | -------------------------------------------------------------------------------- /actk/tests/steps/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """Unit test package for actk.steps.""" 4 | -------------------------------------------------------------------------------- /actk/tests/steps/test_diagnostic_sheets.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from pathlib import Path 5 | 6 | import dask.dataframe as dd 7 | import pytest 8 | 9 | from actk.constants import DatasetFields 10 | from actk.steps import DiagnosticSheets 11 | 12 | ####################################################################################### 13 | 14 | 15 | def test_run(data_dir): 16 | # Initialize step 17 | step = DiagnosticSheets() 18 | 19 | # Ensure that it still runs 20 | output_manifest = step.run( 21 | data_dir / "example_single_cell_images_dataset.csv", 22 | metadata="FOVId", 23 | feature="imsize_orig", 24 | overwrite=True, 25 | ) 26 | output_manifest = dd.read_csv(output_manifest) 27 | 28 | # Run asserts 29 | # Check expected columns 30 | assert all( 31 | expected_col in output_manifest.columns 32 | for expected_col in [DatasetFields.DiagnosticSheetPath] 33 | ) 34 | 35 | # Check all expected files exist 36 | assert all( 37 | Path(f).resolve(strict=True) 38 | for f in output_manifest[DatasetFields.DiagnosticSheetPath] 39 | ) 40 | 41 | 42 | def test_catch_no_all_proj_image_path(data_dir): 43 | 44 | # Initialize step 45 | step = DiagnosticSheets() 46 | 47 | with pytest.raises(Exception): 48 | assert step.run( 49 | data_dir / "example_single_cell_features_dataset.csv", overwrite=True 50 | ) 51 | 52 | 53 | def test_max_num_cells_per_sheet(data_dir): 54 | 55 | # Initialize step 56 | step = DiagnosticSheets() 57 | 58 | # Ensure that it still runs 59 | output_manifest = step.run( 60 | data_dir / "example_single_cell_images_dataset.csv", 61 | max_cells=2, 62 | metadata="FOVId", 63 | feature="imsize_orig", 64 | overwrite=True, 65 | ) 66 | 67 | output_manifest = dd.read_csv(output_manifest) 68 | 69 | # Run asserts 70 | # Check expected columns 71 | assert all( 72 | expected_col in output_manifest.columns 73 | for expected_col in [DatasetFields.DiagnosticSheetPath] 74 | ) 75 | 76 | # Check all expected files exist 77 | assert all( 78 | Path(f).resolve(strict=True) 79 | for f in output_manifest[DatasetFields.DiagnosticSheetPath] 80 | ) 81 | 82 | 83 | def test_multiple_metadata_and_fig_size(data_dir): 84 | 85 | # Initialize step 86 | step = DiagnosticSheets() 87 | 88 | # Ensure that it still runs 89 | output_manifest = step.run( 90 | data_dir / "example_single_cell_images_dataset.csv", 91 | max_cells=2, 92 | metadata=["FOVId", "ChannelIndexDNA"], 93 | feature="imsize_orig", 94 | overwrite=True, 95 | fig_width=27, 96 | fig_height=27, 97 | ) 98 | 99 | output_manifest = dd.read_csv(output_manifest) 100 | 101 | # Run asserts 102 | # Check expected columns 103 | assert all( 104 | expected_col in output_manifest.columns 105 | for expected_col in [DatasetFields.DiagnosticSheetPath] 106 | ) 107 | 108 | # Check all expected files exist 109 | assert all( 110 | Path(f).resolve(strict=True) 111 | for f in output_manifest[DatasetFields.DiagnosticSheetPath] 112 | ) 113 | -------------------------------------------------------------------------------- /actk/tests/steps/test_single_cell_features.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from pathlib import Path 5 | 6 | import dask.dataframe as dd 7 | 8 | from actk.constants import DatasetFields 9 | from actk.steps import SingleCellFeatures 10 | 11 | ####################################################################################### 12 | 13 | 14 | def test_run(data_dir): 15 | # Initialize step 16 | step = SingleCellFeatures() 17 | 18 | # Ensure that it still runs 19 | output_manifest = step.run(data_dir / "example_standardized_fov_paths_dataset.csv") 20 | output_manifest = dd.read_csv(output_manifest) 21 | 22 | # Read input dataset 23 | input_dataset = dd.read_csv(data_dir / "example_standardized_fov_paths_dataset.csv") 24 | 25 | # Run asserts 26 | # Check expected columns 27 | assert all( 28 | expected_col in output_manifest.columns 29 | for expected_col in [*input_dataset.columns, DatasetFields.CellFeaturesPath] 30 | ) 31 | # Check output length 32 | assert len(output_manifest) == len(input_dataset) 33 | # Check all expected files exist 34 | assert all( 35 | Path(f).resolve(strict=True) 36 | for f in output_manifest[DatasetFields.CellFeaturesPath] 37 | ) 38 | -------------------------------------------------------------------------------- /actk/tests/steps/test_single_cell_images.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from pathlib import Path 5 | 6 | import dask.dataframe as dd 7 | 8 | from actk.constants import DatasetFields 9 | from actk.steps import SingleCellImages 10 | 11 | ####################################################################################### 12 | 13 | 14 | def test_run(data_dir): 15 | # Initialize step 16 | step = SingleCellImages() 17 | 18 | # Ensure that it still runs 19 | output_manifest = step.run(data_dir / "example_single_cell_features_dataset.csv") 20 | output_manifest = dd.read_csv(output_manifest) 21 | 22 | # Read input dataset 23 | input_dataset = dd.read_csv(data_dir / "example_single_cell_features_dataset.csv") 24 | 25 | # Run asserts 26 | # Check expected columns 27 | assert all( 28 | expected_col in output_manifest.columns 29 | for expected_col in [ 30 | *input_dataset.columns, 31 | DatasetFields.CellImage3DPath, 32 | DatasetFields.CellImage2DAllProjectionsPath, 33 | DatasetFields.CellImage2DYXProjectionPath, 34 | ] 35 | ) 36 | # Check output length 37 | assert len(output_manifest) == len(input_dataset) 38 | # Check all expected files exist 39 | for field in [ 40 | DatasetFields.CellImage3DPath, 41 | DatasetFields.CellImage2DAllProjectionsPath, 42 | DatasetFields.CellImage2DYXProjectionPath, 43 | ]: 44 | assert all(Path(f).resolve(strict=True) for f in output_manifest[field]) 45 | 46 | 47 | def test_run_bbox(data_dir): 48 | # Initialize step 49 | step = SingleCellImages() 50 | 51 | # Ensure that it still runs 52 | output_manifest = step.run( 53 | data_dir / "example_single_cell_features_dataset.csv", 54 | bbox=(64, 168, 104), 55 | ) 56 | output_manifest = dd.read_csv(output_manifest) 57 | 58 | # Read input dataset 59 | input_dataset = dd.read_csv(data_dir / "example_single_cell_features_dataset.csv") 60 | 61 | # Run asserts 62 | # Check expected columns 63 | assert all( 64 | expected_col in output_manifest.columns 65 | for expected_col in [ 66 | *input_dataset.columns, 67 | DatasetFields.CellImage3DPath, 68 | DatasetFields.CellImage2DAllProjectionsPath, 69 | DatasetFields.CellImage2DYXProjectionPath, 70 | ] 71 | ) 72 | # Check output length 73 | assert len(output_manifest) == len(input_dataset) 74 | # Check all expected files exist 75 | for field in [ 76 | DatasetFields.CellImage3DPath, 77 | DatasetFields.CellImage2DAllProjectionsPath, 78 | DatasetFields.CellImage2DYXProjectionPath, 79 | ]: 80 | assert all(Path(f).resolve(strict=True) for f in output_manifest[field]) 81 | -------------------------------------------------------------------------------- /actk/tests/steps/test_standardize_fov_array.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from pathlib import Path 5 | 6 | import dask.dataframe as dd 7 | import pytest 8 | 9 | from actk.constants import DatasetFields 10 | from actk.steps import StandardizeFOVArray 11 | 12 | ####################################################################################### 13 | 14 | 15 | def test_run(data_dir): 16 | # Initialize step 17 | step = StandardizeFOVArray() 18 | 19 | # Ensure that it still runs 20 | output_manifest = step.run(data_dir / "example_dataset.csv") 21 | output_manifest = dd.read_csv(output_manifest) 22 | 23 | # Read input dataset 24 | input_dataset = dd.read_csv(data_dir / "example_dataset.csv") 25 | 26 | # Run asserts 27 | # Check expected columns 28 | assert all( 29 | expected_col in output_manifest.columns 30 | for expected_col in [*input_dataset.columns, DatasetFields.StandardizedFOVPath] 31 | ) 32 | # Check output length 33 | assert len(output_manifest) == len(input_dataset) 34 | # Check all expected files exist 35 | assert all( 36 | Path(f).resolve(strict=True) 37 | for f in output_manifest[DatasetFields.StandardizedFOVPath] 38 | ) 39 | 40 | 41 | def test_catch_nonconstant_segs_per_fov(data_dir): 42 | # Initialize step 43 | step = StandardizeFOVArray() 44 | 45 | with pytest.raises(Exception): 46 | assert step.run(data_dir / "example_BAD_dataset_seg_paths_vary_per_fov.csv") 47 | -------------------------------------------------------------------------------- /actk/tests/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """Unit test package for actk.utils.""" 4 | -------------------------------------------------------------------------------- /actk/tests/utils/test_dataset_utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import dask.dataframe as dd 5 | import pandas as pd 6 | import pytest 7 | 8 | from actk import exceptions 9 | from actk.utils import dataset_utils 10 | 11 | ####################################################################################### 12 | 13 | EXAMPLE_PD_DATAFRAME = pd.DataFrame( 14 | [ 15 | {"name": "jackson", "job": "engineer"}, 16 | {"name": "rory", "job": "scientist"}, 17 | {"name": "julie", "job": "scientist"}, 18 | ] 19 | ) 20 | 21 | EXAMPLE_DD_DATAFRAME = dd.from_pandas(EXAMPLE_PD_DATAFRAME, npartitions=1) 22 | 23 | ####################################################################################### 24 | 25 | 26 | @pytest.mark.parametrize( 27 | "dataset, required_fields", 28 | [ 29 | (EXAMPLE_PD_DATAFRAME, ["name", "job"]), 30 | (EXAMPLE_DD_DATAFRAME, ["name", "job"]), 31 | pytest.param( 32 | EXAMPLE_PD_DATAFRAME, 33 | ["hello"], 34 | marks=pytest.mark.raises(exception=exceptions.MissingDataError), 35 | ), 36 | pytest.param( 37 | EXAMPLE_DD_DATAFRAME, 38 | ["hello"], 39 | marks=pytest.mark.raises(exception=exceptions.MissingDataError), 40 | ), 41 | pytest.param( 42 | EXAMPLE_PD_DATAFRAME, 43 | ["name", "job", "hello"], 44 | marks=pytest.mark.raises(exception=exceptions.MissingDataError), 45 | ), 46 | pytest.param( 47 | EXAMPLE_DD_DATAFRAME, 48 | ["name", "job", "hello"], 49 | marks=pytest.mark.raises(exception=exceptions.MissingDataError), 50 | ), 51 | ], 52 | ) 53 | def test_check_required_fields(dataset, required_fields): 54 | # Run check 55 | dataset_utils.check_required_fields(dataset, required_fields) 56 | -------------------------------------------------------------------------------- /actk/tests/utils/test_image_utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import json 5 | 6 | import numpy as np 7 | import pytest 8 | from aicsimageio import AICSImage 9 | from numpy import testing as nptest 10 | 11 | from actk.utils import image_utils 12 | 13 | ####################################################################################### 14 | 15 | 16 | @pytest.mark.parametrize( 17 | "raw_image, " 18 | "nuc_seg_image, " 19 | "memb_seg_image, " 20 | "dna_channel_index, " 21 | "membrane_channel_index, " 22 | "structure_channel_index, " 23 | "brightfield_channel_index, " 24 | "nucleus_seg_channel_index, " 25 | "membrane_seg_channel_index, " 26 | "current_pixel_sizes, " 27 | "desired_pixel_sizes, " 28 | "expected_image", 29 | [ 30 | ( 31 | "example_raw_0.ome.tiff", 32 | "example_nuc_seg_0.tiff", 33 | "example_memb_seg_0.tiff", 34 | 3, 35 | 2, 36 | 1, 37 | 0, 38 | 0, 39 | 0, 40 | None, 41 | # The most recently used desired pixel size from original repo 42 | (0.29, 0.29, 0.29), 43 | "example_normed_image_array_0.ome.tiff", 44 | ), 45 | ( 46 | "example_raw_1.ome.tiff", 47 | "example_nuc_seg_1.tiff", 48 | "example_memb_seg_1.tiff", 49 | 2, 50 | 0, 51 | 1, 52 | 3, 53 | 0, 54 | 0, 55 | None, 56 | # The most recently used desired pixel size from original repo 57 | (0.29, 0.29, 0.29), 58 | "example_normed_image_array_1.ome.tiff", 59 | ), 60 | ], 61 | ) 62 | def test_get_normed_image_array( 63 | data_dir, 64 | raw_image, 65 | nuc_seg_image, 66 | memb_seg_image, 67 | dna_channel_index, 68 | membrane_channel_index, 69 | structure_channel_index, 70 | brightfield_channel_index, 71 | nucleus_seg_channel_index, 72 | membrane_seg_channel_index, 73 | current_pixel_sizes, 74 | desired_pixel_sizes, 75 | expected_image, 76 | ): 77 | """ 78 | The example data used to test this function was generated with the original function 79 | and then stored with `aicsimageio.writers.OmeTiffWriter` after doing an 80 | `aicsimageio.transforms.transpose_to_dims` to transpose to "CZYX" as `OmeTiffWriter` 81 | requires data have the "YX" dimensions last. Additionally, metadata has been updated 82 | to the Channel name standards in the constants.py file. 83 | """ 84 | # Get actual 85 | actual_image, actual_channels, actual_px_sizes = image_utils.get_normed_image_array( 86 | data_dir / raw_image, 87 | data_dir / nuc_seg_image, 88 | data_dir / memb_seg_image, 89 | dna_channel_index, 90 | membrane_channel_index, 91 | structure_channel_index, 92 | brightfield_channel_index, 93 | nucleus_seg_channel_index, 94 | membrane_seg_channel_index, 95 | current_pixel_sizes, 96 | desired_pixel_sizes, 97 | ) 98 | 99 | # Read expected 100 | expected_image = AICSImage(data_dir / expected_image) 101 | 102 | # Assert actual equals expected 103 | assert np.array_equiv(actual_image, expected_image.get_image_data("CYXZ", S=0, T=0)) 104 | assert actual_channels == expected_image.get_channel_names() 105 | assert tuple(actual_px_sizes) == expected_image.get_physical_pixel_size() 106 | 107 | 108 | @pytest.mark.parametrize( 109 | "image, cell_index, cell_ceiling_adjustment, expected_image", 110 | [ 111 | ( 112 | "example_normed_image_array_0.ome.tiff", 113 | 1, 114 | 7, 115 | "example_selected_and_adjusted_array_0_1.ome.tiff", 116 | ), 117 | ( 118 | "example_normed_image_array_0.ome.tiff", 119 | 2, 120 | 7, 121 | "example_selected_and_adjusted_array_0_2.ome.tiff", 122 | ), 123 | ( 124 | "example_normed_image_array_0.ome.tiff", 125 | 3, 126 | 7, 127 | "example_selected_and_adjusted_array_0_3.ome.tiff", 128 | ), 129 | ], 130 | ) 131 | def test_select_and_adjust_segmentation_ceiling( 132 | data_dir, 133 | image, 134 | cell_index, 135 | cell_ceiling_adjustment, 136 | expected_image, 137 | ): 138 | """ 139 | The example data used to test this function was generated with the original function 140 | and then stored with `aicsimageio.writers.OmeTiffWriter` after doing an 141 | `aicsimageio.transforms.transpose_to_dims` to transpose to "CZYX" as `OmeTiffWriter` 142 | requires data have the "YX" dimensions last. Additionally, metadata has been updated 143 | to the Channel name standards in the constants.py file. 144 | """ 145 | # Get actual 146 | image = AICSImage(data_dir / image).get_image_data("CYXZ", S=0, T=0) 147 | actual_image = image_utils.select_and_adjust_segmentation_ceiling( 148 | image, cell_index, cell_ceiling_adjustment=cell_ceiling_adjustment 149 | ) 150 | 151 | # Read expected 152 | expected_image = AICSImage(data_dir / expected_image) 153 | 154 | # Assert actual equals expected 155 | assert np.array_equiv(actual_image, expected_image.get_image_data("CYXZ", S=0, T=0)) 156 | 157 | 158 | @pytest.mark.parametrize( 159 | "image, expected_image", 160 | [ 161 | ( 162 | "example_selected_and_adjusted_array_0_1.ome.tiff", 163 | "example_cropped_with_segs_array_0_1.ome.tiff", 164 | ), 165 | ( 166 | "example_selected_and_adjusted_array_0_2.ome.tiff", 167 | "example_cropped_with_segs_array_0_2.ome.tiff", 168 | ), 169 | ( 170 | "example_selected_and_adjusted_array_0_3.ome.tiff", 171 | "example_cropped_with_segs_array_0_3.ome.tiff", 172 | ), 173 | ], 174 | ) 175 | def test_crop_raw_channels_with_segmentation(data_dir, image, expected_image): 176 | """ 177 | The example data used to test this function was generated with the original function 178 | and then stored with `aicsimageio.writers.OmeTiffWriter` after doing an 179 | `aicsimageio.transforms.transpose_to_dims` to transpose to "CZYX" as `OmeTiffWriter` 180 | requires data have the "YX" dimensions last. Additionally, metadata has been updated 181 | to the Channel name standards in the constants.py file. 182 | """ 183 | # Get actual 184 | image = AICSImage(data_dir / image) 185 | data = image.get_image_data("CYXZ", S=0, T=0) 186 | channels = image.get_channel_names() 187 | actual_image = image_utils.crop_raw_channels_with_segmentation(data, channels) 188 | 189 | # Read expected 190 | expected_image = AICSImage(data_dir / expected_image) 191 | 192 | # Assert actual equals expected 193 | assert np.array_equiv(actual_image, expected_image.get_image_data("CYXZ", S=0, T=0)) 194 | 195 | 196 | @pytest.mark.parametrize( 197 | "image, expected_image, expected_params", 198 | [ 199 | ( 200 | "example_cropped_with_segs_array_0_1.ome.tiff", 201 | "example_prepared_image_for_feature_extraction_0_1.ome.tiff", 202 | "example_prepared_params_for_feature_extraction_0_1.json", 203 | ), 204 | ( 205 | "example_cropped_with_segs_array_0_2.ome.tiff", 206 | "example_prepared_image_for_feature_extraction_0_2.ome.tiff", 207 | "example_prepared_params_for_feature_extraction_0_2.json", 208 | ), 209 | ( 210 | "example_cropped_with_segs_array_0_3.ome.tiff", 211 | "example_prepared_image_for_feature_extraction_0_3.ome.tiff", 212 | "example_prepared_params_for_feature_extraction_0_3.json", 213 | ), 214 | ], 215 | ) 216 | def test_prepare_image_for_feature_extraction( 217 | data_dir, image, expected_image, expected_params 218 | ): 219 | """ 220 | The example image data used to test this function was generated with the original 221 | function and then stored with `aicsimageio.writers.OmeTiffWriter` after doing an 222 | `aicsimageio.transforms.transpose_to_dims` to transpose to "CZYX" as `OmeTiffWriter` 223 | requires data have the "YX" dimensions last. Additionally, metadata has been updated 224 | to the Channel name standards in the constants.py file. Example parameter data was 225 | stored in JSON after converting numpy arrays to lists. 226 | """ 227 | # Get actual 228 | image = AICSImage(data_dir / image).get_image_data("CYXZ", S=0, T=0) 229 | ( 230 | actual_image, 231 | actual_memb_com, 232 | actual_angle, 233 | actual_flipdim, 234 | ) = image_utils.prepare_image_for_feature_extraction(image) 235 | 236 | # Read expected image 237 | expected_image = AICSImage(data_dir / expected_image).get_image_data( 238 | "CYXZ", S=0, T=0 239 | ) 240 | 241 | # Read expected params 242 | with open(data_dir / expected_params, "r") as read_params: 243 | expected_params = json.load(read_params) 244 | 245 | # Unpack expected params and reformat 246 | expected_memb_com = np.array(expected_params["memb_com"]) 247 | expected_angle = expected_params["angle"] 248 | expected_flipdim = np.array(expected_params["flipdim"]) 249 | 250 | # Assert actual equals expected 251 | nptest.assert_almost_equal(actual_image, expected_image) 252 | nptest.assert_almost_equal(actual_memb_com, expected_memb_com) 253 | nptest.assert_almost_equal(actual_angle, expected_angle) 254 | nptest.assert_almost_equal(actual_flipdim, expected_flipdim) 255 | 256 | 257 | @pytest.mark.parametrize( 258 | "image, expected_features", 259 | [ 260 | ( 261 | "example_cropped_with_segs_array_0_1.ome.tiff", 262 | "example_generated_features_0_1.json", 263 | ), 264 | ( 265 | "example_cropped_with_segs_array_0_2.ome.tiff", 266 | "example_generated_features_0_2.json", 267 | ), 268 | ( 269 | "example_cropped_with_segs_array_0_3.ome.tiff", 270 | "example_generated_features_0_3.json", 271 | ), 272 | ], 273 | ) 274 | def test_get_features_from_image( 275 | data_dir, 276 | image, 277 | expected_features, 278 | ): 279 | """ 280 | The example data used to test this function was generated with the original function 281 | and then stored with JSON. 282 | """ 283 | # Get actual 284 | image = AICSImage(data_dir / image).get_image_data("CYXZ", S=0, T=0) 285 | actual_features = image_utils.get_features_from_image(image) 286 | 287 | # Serialize and deserialize the actual features 288 | # Things like tuples become lists during serialization 289 | # which technically assert False, even when the contents are equal 290 | actual_features = json.dumps(actual_features) 291 | actual_features = json.loads(actual_features) 292 | 293 | # Read expected 294 | with open(data_dir / expected_features, "r") as read_feats: 295 | expected_features = json.load(read_feats) 296 | 297 | # Assert each key value pair 298 | assert all(feat in actual_features for feat in expected_features) 299 | # These values may be a tiny bit different depending on 300 | # machine, environment, randomness, who knows. :shrug: 301 | # We will test a few specific representative features instead 302 | for feat in [ 303 | "imsize_orig", 304 | "com", 305 | "angle", 306 | "flipdim", 307 | "imsize_registered", 308 | "dna_intensity_mean", 309 | "cell_intensity_mean", 310 | "dna_cell_struct_cyto_distal_ratio", 311 | "dna_shape_sphericity", 312 | "cell_shape_sphericity", 313 | ]: 314 | if isinstance(actual_features[feat], int): 315 | assert np.isclose(actual_features[feat], expected_features[feat]) 316 | else: 317 | nptest.assert_almost_equal( 318 | actual_features[feat], expected_features[feat], decimal=2 319 | ) 320 | -------------------------------------------------------------------------------- /actk/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """Utilities package for actk.""" 4 | -------------------------------------------------------------------------------- /actk/utils/dataset_utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from pathlib import Path 5 | from typing import List, Optional, Union 6 | 7 | import dask.dataframe as dd 8 | import pandas as pd 9 | 10 | from .. import exceptions 11 | 12 | ####################################################################################### 13 | 14 | 15 | def check_required_fields( 16 | dataset: Union[str, Path, pd.DataFrame, dd.DataFrame], 17 | required_fields: List[str], 18 | ) -> Optional[exceptions.MissingDataError]: 19 | # Handle dataset provided as string or path 20 | if isinstance(dataset, (str, Path)): 21 | dataset = Path(dataset).expanduser().resolve(strict=True) 22 | 23 | # Read dataset 24 | dataset = dd.read_csv(dataset) 25 | 26 | # Check that all columns provided as required are in the dataset 27 | missing_fields = set(required_fields) - set(dataset.columns) 28 | if len(missing_fields) > 0: 29 | raise exceptions.MissingDataError(dataset, missing_fields) 30 | -------------------------------------------------------------------------------- /actk/utils/image_utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from typing import Dict, List, Optional, Tuple 5 | 6 | import aicsimageprocessing as proc 7 | import dask.array as da 8 | import numpy as np 9 | from aicsfeature.extractor import cell, cell_nuc, dna 10 | from aicsimageio import AICSImage, transforms, types 11 | from scipy.ndimage import gaussian_filter as ndf 12 | from scipy.signal import fftconvolve as convolve 13 | 14 | from ..constants import Channels 15 | 16 | ####################################################################################### 17 | 18 | 19 | def get_normed_image_array( 20 | raw_image: types.ImageLike, 21 | nucleus_seg_image: types.ImageLike, 22 | membrane_seg_image: types.ImageLike, 23 | dna_channel_index: int, 24 | membrane_channel_index: int, 25 | structure_channel_index: int, 26 | brightfield_channel_index: int, 27 | nucleus_seg_channel_index: int, 28 | membrane_seg_channel_index: int, 29 | current_pixel_sizes: Optional[Tuple[float]] = None, 30 | desired_pixel_sizes: Optional[Tuple[float]] = None, 31 | ) -> Tuple[np.ndarray, List[str], Tuple[float]]: 32 | """ 33 | Provided the original raw image, and a nucleus and membrane segmentation, construct 34 | a standardized, ordered, and normalized array of the images. 35 | 36 | Parameters 37 | ---------- 38 | raw_image: types.ImageLike 39 | A filepath to the raw imaging data. The image should be 4D and include 40 | channels for DNA, Membrane, Structure, and Transmitted Light. 41 | 42 | nucleus_seg_image: types.ImageLike 43 | A filepath to the nucleus segmentation for the provided raw image. 44 | 45 | membrane_seg_image: types.ImageLike 46 | A filepath to the membrane segmentation for the provided raw image. 47 | 48 | dna_channel_index: int 49 | The index in channel dimension in the raw image that stores DNA data. 50 | 51 | membrane_channel_index: int 52 | The index in the channel dimension in the raw image that stores membrane data. 53 | 54 | structure_channel_index: int 55 | The index in the channel dimension in the raw image that stores structure data. 56 | 57 | brightfield_channel_index: int 58 | The index in the channel dimension in the raw image that stores the brightfield 59 | data. 60 | 61 | nucleus_seg_channel_index: int 62 | The index in the channel dimension in the nucleus segmentation image that stores 63 | the segmentation. 64 | 65 | membrane_seg_channel_index: int 66 | The index in the channel dimension in the membrane segmentation image that 67 | stores the segmentation. 68 | 69 | current_pixel_sizes: Optioal[Tuple[float]] 70 | The current physical pixel sizes as a tuple of the raw image. 71 | Default: None (`aicsimageio.AICSImage.get_physical_pixel_size` on the raw image) 72 | 73 | desired_pixel_sizes: Optional[Tuple[float]] 74 | The desired physical pixel sizes as a tuple to scale all images to. 75 | Default: None (scale all images to current_pixel_sizes if different) 76 | 77 | Returns 78 | ------- 79 | normed: np.ndarray 80 | The normalized images stacked into a single CYXZ numpy ndarray. 81 | 82 | channels: List[str] 83 | The standardized channel names for the returned array. 84 | 85 | pixel_sizes: Tuple[float] 86 | The physical pixel sizes of the returned image in XYZ order. 87 | 88 | Notes 89 | ----- 90 | The original version of this function can be found at: 91 | https://aicsbitbucket.corp.alleninstitute.org/projects/MODEL/repos/image_processing_pipeline/browse/aics_single_cell_pipeline/utils.py#9 92 | """ 93 | # Construct image objects 94 | raw = AICSImage(raw_image) 95 | nuc_seg = AICSImage(nucleus_seg_image) 96 | memb_seg = AICSImage(membrane_seg_image) 97 | 98 | # Preload image data 99 | raw.data 100 | nuc_seg.data 101 | memb_seg.data 102 | 103 | # Get default current and desired pixel sizes 104 | if current_pixel_sizes is None: 105 | current_pixel_sizes = raw.get_physical_pixel_size() 106 | 107 | # Default desired to be the same pixel size 108 | if desired_pixel_sizes is None: 109 | desired_pixel_sizes = current_pixel_sizes 110 | 111 | # Select the channels 112 | channel_indices = [ 113 | dna_channel_index, 114 | membrane_channel_index, 115 | structure_channel_index, 116 | brightfield_channel_index, 117 | ] 118 | selected_channels = [ 119 | raw.get_image_dask_data("YXZ", S=0, T=0, C=index) for index in channel_indices 120 | ] 121 | 122 | # Combine selections and get numpy array 123 | raw = da.stack(selected_channels).compute() 124 | 125 | # Convert pixel sizes to numpy arrays 126 | current_pixel_sizes = np.array(current_pixel_sizes) 127 | desired_pixel_sizes = np.array(desired_pixel_sizes) 128 | 129 | # Only resize raw image if desired pixel sizes is different from current 130 | if not np.array_equal(current_pixel_sizes, desired_pixel_sizes): 131 | scale_raw = current_pixel_sizes / desired_pixel_sizes 132 | raw = np.stack([proc.resize(channel, scale_raw, "bilinear") for channel in raw]) 133 | 134 | # Prep segmentations 135 | nuc_seg = nuc_seg.get_image_data("YXZ", S=0, T=0, C=nucleus_seg_channel_index) 136 | memb_seg = memb_seg.get_image_data("YXZ", S=0, T=0, C=membrane_seg_channel_index) 137 | 138 | # We do not assume that the segmentations are the same size as the raw 139 | # Resize the segmentations to match the raw 140 | # We drop the channel dimension from the raw size retrieval 141 | raw_size = np.array(raw.shape[1:]).astype(float) 142 | nuc_size = np.array(nuc_seg.shape).astype(float) 143 | memb_size = np.array(memb_seg.shape).astype(float) 144 | scale_nuc = raw_size / nuc_size 145 | scale_memb = raw_size / memb_size 146 | 147 | # Actual resize 148 | nuc_seg = proc.resize(nuc_seg, scale_nuc, method="nearest") 149 | memb_seg = proc.resize(memb_seg, scale_memb, method="nearest") 150 | 151 | # Normalize images 152 | normalized_images = [] 153 | for i, index in enumerate(channel_indices): 154 | if index == brightfield_channel_index: 155 | norm_method = "trans" 156 | else: 157 | norm_method = "img_bg_sub" 158 | 159 | # Normalize and append 160 | normalized_images.append(proc.normalize_img(raw[i], method=norm_method)) 161 | 162 | # Stack all together 163 | img = np.stack([nuc_seg, memb_seg, *normalized_images]) 164 | channel_names = Channels.DefaultOrderList 165 | 166 | return img, channel_names, tuple(desired_pixel_sizes) 167 | 168 | 169 | def select_and_adjust_segmentation_ceiling( 170 | image: np.ndarray, cell_index: int, cell_ceiling_adjustment: int = 0 171 | ) -> np.ndarray: 172 | """ 173 | Select and adjust the cell shape "ceiling" for a specific cell in the provided 174 | image. 175 | 176 | Parameters 177 | ---------- 178 | image: np.ndarray 179 | The 4D, CYXZ, image numpy ndarray output from `get_normed_image_array`. 180 | 181 | cell_index: int 182 | The integer index for the target cell. 183 | 184 | cell_ceiling_adjustment: int 185 | The adjust to use for raising the cell shape ceiling. If <= 0, this will be 186 | ignored and cell data will be selected but not adjusted. 187 | Default: 0 188 | 189 | Returns 190 | ------- 191 | adjusted: np.ndarray 192 | The image with the membrane segmentation adjusted for ceiling shape correction. 193 | 194 | Notes 195 | ----- 196 | The original version of this function can be found at: 197 | https://aicsbitbucket.corp.alleninstitute.org/projects/MODEL/repos/image_processing_pipeline/browse/aics_single_cell_pipeline/utils.py#83 198 | """ 199 | # Select only the data in the first two channels (the segmentation channels) 200 | # where the data matches the provided cell index 201 | image[0:2] = image[0:2] == cell_index 202 | 203 | # Because they are conservatively segmented, 204 | # we raise the "ceiling" of the cell shape 205 | 206 | # This is the so-called "roof-augmentation" that Greg (@gregjohnso) invented to 207 | # handle the bad "roof" in old membrane segmentations. 208 | # 209 | # Specially, because the photobleaching, the signal near the top is very weak. 210 | # Then, the membrane segmentation stops earlier (in terms of Z position) than the 211 | # truth. For some structures living near the top of the cell, like mitochodira, the 212 | # structure segmentation may be out of the membrane segmentation, as the membrane 213 | # segmentation is "shorter" than it should be, then the structure segmentation 214 | # will be mostly choped off and make the integrated cell model learn nothing. 215 | # 216 | # So, "roof-augmentation" is the method to fix the "shorter" membrane segmentation 217 | # issues. 218 | 219 | # Adjust image ceiling if adjustment is greater than zero 220 | if cell_ceiling_adjustment > 0: 221 | # Get the center of mass of the nucleus 222 | nuc_com = proc.get_center_of_mass(image[0])[-1] 223 | 224 | # Get the top of the membrane 225 | memb_top = np.where(np.sum(np.sum(image[1], axis=0), axis=0))[0][-1] 226 | 227 | # Get the halfway point between the two 228 | start = int(np.floor((nuc_com + memb_top) / 2)) 229 | 230 | # Get the shape of the cell from the membrane segmentation 231 | cell_shape = image[1, :, :, start:] 232 | 233 | # Adjust cell shape "ceiling" using the adjustment integer provided 234 | start_ind = int(np.floor(cell_ceiling_adjustment)) - 1 235 | imf = np.zeros([1, 1, cell_ceiling_adjustment * 2 - 1]) 236 | imf[:, :, start_ind:] = 1 237 | cell_shape = convolve(cell_shape, imf, mode="same") > 1e-8 238 | 239 | # Set the image data with the new cell shape data 240 | image[1, :, :, start:] = cell_shape 241 | 242 | return image 243 | 244 | 245 | def crop_raw_channels_with_segmentation( 246 | image: np.ndarray, channels: List[str] 247 | ) -> np.ndarray: 248 | """ 249 | Crop imaging data in raw channels using a provided selected full field of with a 250 | target cell in the segmentation channels. 251 | 252 | Parameters 253 | ---------- 254 | image: np.ndarray 255 | The 4D, CYXZ, image numpy ndarray output from 256 | `select_and_adjust_segmentation_ceiling`. 257 | 258 | channels: List[str] 259 | The channel names for the provided image. 260 | The channels output from `get_normed_image_array`. 261 | 262 | Returns 263 | ------- 264 | cropped: np.ndarray 265 | A 4D numpy ndarray with CYXZ dimensions in the same order as provided. 266 | The raw DNA channel has been cropped using the nucleus segmentation. 267 | All other raw channels have been cropped using the membrane segmentation. 268 | 269 | Notes 270 | ----- 271 | The original version of this function can be found at: 272 | https://aicsbitbucket.corp.alleninstitute.org/projects/MODEL/repos/image_processing_pipeline/browse/aics_single_cell_pipeline/utils.py#114 273 | """ 274 | # Select segmentation indicies 275 | nuc_ind = np.array(channels) == Channels.NucleusSegmentation 276 | memb_ind = np.array(channels) == Channels.MembraneSegmentation 277 | 278 | # Select DNA and all other indicies 279 | dna_ind = np.array(channels) == Channels.DNA 280 | other_channel_inds = np.ones(len(channels)) 281 | other_channel_inds[nuc_ind | memb_ind | dna_ind] = 0 282 | 283 | # Crop DNA channel with the nucleus segmentation 284 | image[dna_ind] = image[dna_ind] * image[nuc_ind] 285 | 286 | # All other channels are cropped using membrane segmentation 287 | for i in np.where(other_channel_inds)[0]: 288 | image[i] = image[i] * image[memb_ind] 289 | 290 | return image 291 | 292 | 293 | def prepare_image_for_feature_extraction( 294 | image: np.ndarray, 295 | ) -> Tuple[np.ndarray, np.ndarray, List[List[float]], np.ndarray]: 296 | """ 297 | Prep an image and return any parameters required for feature extraction. 298 | 299 | Parameters 300 | ---------- 301 | image: np.ndarray 302 | The 4D, CYXZ, image numpy ndarray output from 303 | `crop_raw_channels_with_segmentation`. 304 | 305 | Returns 306 | ------- 307 | prepped_image: np.ndarray 308 | The prepared image after cell rigid registration and binarizing the 309 | segmentations. 310 | center_of_mass: np.ndarray 311 | The index of the center of mass of the membrane segmentation for the provided 312 | image. 313 | angle: List[List[float]] 314 | The major angle of the membrane segmentation for the provided image. 315 | flipdim: np.ndarry 316 | Boolean array informing if the dimensions of the image should be flipped. 317 | 318 | Notes 319 | ----- 320 | The original version of this function can be found at: 321 | https://aicsbitbucket.corp.alleninstitute.org/projects/MODEL/repos/image_processing_pipeline/browse/aics_single_cell_pipeline/alignment_tools.py#5 322 | 323 | The docstring for the original version of this function was incorrect. 324 | It stated that it took in a CXYZ image but it took in a CYXZ. 325 | See `get_features_from_image` for reasoning. 326 | """ 327 | # Get center of mass for the membrane 328 | memb_com = proc.get_center_of_mass(proc.get_channel(image, 1)) 329 | 330 | # Perform a rigid registration on the image 331 | image, angle, flipdim = proc.cell_rigid_registration(image) 332 | 333 | # Make sure the nuc and cell channels are binary 334 | image[0:2] = image[0:2] > 0.5 335 | 336 | return image, memb_com, angle, flipdim 337 | 338 | 339 | def get_features_from_image(image: np.ndarray) -> Dict: 340 | """ 341 | Generate all segmentation, DNA, membrane, and structure features from the provided 342 | image. 343 | 344 | Parameters 345 | ---------- 346 | image: np.ndarray 347 | The 4D, CYXZ, image numpy ndarray output from 348 | `crop_raw_channels_with_segmentation`. 349 | 350 | Returns 351 | ------- 352 | features: Dict 353 | A single dictionary filled with features. 354 | 355 | Notes 356 | ----- 357 | The original version of this function can be found at: 358 | https://aicsbitbucket.corp.alleninstitute.org/projects/MODEL/repos/image_processing_pipeline/browse/aics_single_cell_pipeline/features.py#8 359 | 360 | The docstring for the original version of this function was incorrect. 361 | It stated that it took in a CXYZ image but it took in a CYXZ. This can be seen from 362 | line #17 where a transpose to CZYX is done with `img.transpose(0, 3, 1, 2)`. 363 | A transpose of (0, 3, 1, 2) on a CXYZ image would result in a CZXY not CZYX. 364 | Additionally, simply following the original processing chain shows that the original 365 | function is simply handed the output from the original version of the function 366 | `crop_raw_channels_with_segmentation` (crop_cell_nuc) which results in a `CYXZ`. 367 | """ 368 | # Store original shape 369 | imsize_orig = image.shape 370 | 371 | # Get prepared image and feature parameters 372 | image, memb_com, angle, flipdim = prepare_image_for_feature_extraction(image) 373 | 374 | # Transpose to CZYX 375 | image = transforms.transpose_to_dims(image, "CYXZ", "CZYX") 376 | 377 | # Construct dictionary of basic features 378 | regularization_params = { 379 | "imsize_orig": imsize_orig, 380 | "com": memb_com.tolist(), 381 | "angle": angle, 382 | "flipdim": flipdim.tolist(), 383 | "imsize_registered": image.shape, 384 | } 385 | 386 | # Unpack channels 387 | nuc_seg = image[0] 388 | memb_seg = image[1] 389 | dna_image = image[2] 390 | memb_image = image[3] 391 | struct_image = image[4] 392 | 393 | # Adjust the DNA and membrane images 394 | adjusted_dna_image = ((nuc_seg * dna_image) * 2 ** 8).astype("uint16") 395 | adjusted_memb_image = ((memb_seg * memb_image) * 2 ** 8).astype("uint16") 396 | 397 | # Simple deblur for better structure localization detection 398 | imf1 = ndf(struct_image, 5, mode="constant") 399 | imf2 = ndf(struct_image, 1, mode="constant") 400 | 401 | # Adjust structure image 402 | adjusted_struct_image = imf2 - imf1 403 | adjusted_struct_image[adjusted_struct_image < 0] = 0 404 | 405 | # Get features for the image using the adjusted images 406 | memb_nuc_struct_feats = cell_nuc.get_features( 407 | nuc_seg, memb_seg, adjusted_struct_image 408 | ).to_dict("records")[0] 409 | 410 | # Get DNA and membrane image features 411 | dna_feats = dna.get_features(adjusted_dna_image, seg=nuc_seg).to_dict("records")[0] 412 | memb_feats = cell.get_features(adjusted_memb_image, seg=memb_seg).to_dict( 413 | "records" 414 | )[0] 415 | 416 | # Combine all features 417 | features = { 418 | **regularization_params, 419 | **memb_nuc_struct_feats, 420 | **dna_feats, 421 | **memb_feats, 422 | } 423 | 424 | return features 425 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | ignore: 2 | - "**/__init__.py" 3 | - "actk/bin/.*" 4 | - "actk/vendor/.*" 5 | 6 | coverage: 7 | status: 8 | patch: off 9 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = python -msphinx 7 | SPHINXPROJ = actk 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # actk documentation build configuration file, created by 5 | # sphinx-quickstart on Fri Jun 9 13:47:02 2017. 6 | # 7 | # This file is execfile()d with the current directory set to its 8 | # containing dir. 9 | # 10 | # Note that not all possible configuration values are present in this 11 | # autogenerated file. 12 | # 13 | # All configuration values have a default; values that are commented out 14 | # serve to show the default. 15 | 16 | # If extensions (or modules to document with autodoc) are in another 17 | # directory, add these directories to sys.path here. If the directory is 18 | # relative to the documentation root, use os.path.abspath to make it 19 | # absolute, like shown here. 20 | # 21 | import os 22 | import sys 23 | 24 | import sphinx_rtd_theme 25 | 26 | import actk 27 | 28 | sys.path.insert(0, os.path.abspath("..")) 29 | 30 | 31 | # -- General configuration --------------------------------------------- 32 | 33 | # If your documentation needs a minimal Sphinx version, state it here. 34 | # 35 | # needs_sphinx = "1.0" 36 | 37 | # Add any Sphinx extension module names here, as strings. They can be 38 | # extensions coming with Sphinx (named "sphinx.ext.*") or your custom ones. 39 | extensions = [ 40 | "sphinx.ext.autodoc", 41 | "sphinx.ext.viewcode", 42 | "sphinx.ext.napoleon", 43 | "sphinx.ext.mathjax", 44 | "m2r", 45 | ] 46 | 47 | # Control napoleon 48 | napoleon_google_docstring = False 49 | napolean_include_init_with_doc = True 50 | napoleon_use_ivar = True 51 | napoleon_use_param = False 52 | 53 | # Control autodoc 54 | autoclass_content = "both" # include init doc with class 55 | 56 | # Add any paths that contain templates here, relative to this directory. 57 | templates_path = ["_templates"] 58 | 59 | # The suffix(es) of source filenames. 60 | # You can specify multiple suffix as a list of string: 61 | # 62 | source_suffix = { 63 | ".rst": "restructuredtext", 64 | ".txt": "markdown", 65 | ".md": "markdown", 66 | } 67 | 68 | # The master toctree document. 69 | master_doc = "index" 70 | 71 | # General information about the project. 72 | project = u"actk" 73 | copyright = u"2020, Jackson Maxfield Brown" 74 | author = u"Jackson Maxfield Brown" 75 | 76 | # The version info for the project you"re documenting, acts as replacement 77 | # for |version| and |release|, also used in various other places throughout 78 | # the built documents. 79 | # 80 | # The short X.Y version. 81 | version = actk.__version__ 82 | # The full version, including alpha/beta/rc tags. 83 | release = actk.__version__ 84 | 85 | # The language for content autogenerated by Sphinx. Refer to documentation 86 | # for a list of supported languages. 87 | # 88 | # This is also used if you do content translation via gettext catalogs. 89 | # Usually you set "language" from the command line for these cases. 90 | language = None 91 | 92 | # List of patterns, relative to source directory, that match files and 93 | # directories to ignore when looking for source files. 94 | # This patterns also effect to html_static_path and html_extra_path 95 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] 96 | 97 | # The name of the Pygments (syntax highlighting) style to use. 98 | pygments_style = "sphinx" 99 | 100 | # If true, `todo` and `todoList` produce output, else they produce nothing. 101 | todo_include_todos = False 102 | 103 | 104 | # -- Options for HTML output ------------------------------------------- 105 | 106 | # The theme to use for HTML and HTML Help pages. See the documentation for 107 | # a list of builtin themes. 108 | # 109 | html_theme = "sphinx_rtd_theme" 110 | 111 | # Theme options are theme-specific and customize the look and feel of a 112 | # theme further. For a list of options available for each theme, see the 113 | # documentation. 114 | # 115 | html_theme_options = { 116 | "collapse_navigation": False, 117 | "prev_next_buttons_location": "top", 118 | } 119 | 120 | # Add any paths that contain custom static files (such as style sheets) here, 121 | # relative to this directory. They are copied after the builtin static files, 122 | # so a file named "default.css" will overwrite the builtin "default.css". 123 | html_static_path = ["_static"] 124 | 125 | 126 | # -- Options for HTMLHelp output --------------------------------------- 127 | 128 | # Output file base name for HTML help builder. 129 | htmlhelp_basename = "actkdoc" 130 | 131 | 132 | # -- Options for LaTeX output ------------------------------------------ 133 | 134 | latex_elements = { 135 | # The paper size ("letterpaper" or "a4paper"). 136 | # 137 | # "papersize": "letterpaper", 138 | # The font size ("10pt", "11pt" or "12pt"). 139 | # 140 | # "pointsize": "10pt", 141 | # Additional stuff for the LaTeX preamble. 142 | # 143 | # "preamble": "", 144 | # Latex figure (float) alignment 145 | # 146 | # "figure_align": "htbp", 147 | } 148 | 149 | # Grouping the document tree into LaTeX files. List of tuples 150 | # (source start file, target name, title, author, documentclass 151 | # [howto, manual, or own class]). 152 | latex_documents = [ 153 | ( 154 | master_doc, 155 | "actk.tex", 156 | u"actk Documentation", 157 | u"Jackson Maxfield Brown", 158 | "manual", 159 | ), 160 | ] 161 | 162 | 163 | # -- Options for manual page output ------------------------------------ 164 | 165 | # One entry per manual page. List of tuples 166 | # (source start file, name, description, authors, manual section). 167 | man_pages = [(master_doc, "actk", u"actk Documentation", [author], 1)] 168 | 169 | 170 | # -- Options for Texinfo output ---------------------------------------- 171 | 172 | # Grouping the document tree into Texinfo files. List of tuples 173 | # (source start file, target name, title, author, 174 | # dir menu entry, description, category) 175 | texinfo_documents = [ 176 | ( 177 | master_doc, 178 | "actk", 179 | u"actk Documentation", 180 | author, 181 | "actk", 182 | "One line description of project.", 183 | "Miscellaneous", 184 | ), 185 | ] 186 | -------------------------------------------------------------------------------- /docs/contributing.rst: -------------------------------------------------------------------------------- 1 | .. mdinclude:: ../CONTRIBUTING.md 2 | -------------------------------------------------------------------------------- /docs/dataset_fields.md: -------------------------------------------------------------------------------- 1 | # Dataset Fields 2 | 3 | Definitions, examples, and units for each field required for processing or produced. 4 | 5 | ### CellId 6 | * **Description:** A unique identifier for a cell. Can be an integer, string, or other, but must be serializable and unique. 7 | * **Example(s):** 1, 2, 3, f0e3ac9a-5f20-4c40-bc6b-9c7c0a7e026d 8 | * **Units:** None 9 | 10 | ### CellIndex 11 | * **Description:** The integer value used in a segmentation image to indicate that a voxel belongs to a specific cell. 12 | * **Example(s):** 1, 2, 3 13 | * **Units:** None 14 | 15 | ### FOVId 16 | * **Description:** A unique identifier for a Field-of-View image. Can be an integer, string, or other, but must be serializable and unique. 17 | * **Example(s):** 1, 2, 3, f0e3ac9a-5f20-4c40-bc6b-9c7c0a7e026d 18 | * **Units:** None 19 | 20 | ### SourceReadPath 21 | * **Description:** The path to reader a raw microscopy image file. This file should contain at least four channels (DNA, membrane, tagged structure, and brightfield). 22 | * **Example(s):** /allen/aics/modeling/jacksonb/data/example_raw.czi 23 | * **Units:** None 24 | 25 | ### NucleusSegmentationReadPath 26 | * **Description:** The path to read a file that contains an channel with a nucleus segmentation. This file is usually generated by segmenting the DNA channel from a raw microscopy image. 27 | * **Example(s):** /allen/aics/modeling/jacksonb/data/example_nuc_seg.ome.tiff 28 | * **Units:** None 29 | 30 | ### MembraneSegmentationReadPath 31 | * **Description:** The path to read a file that contains an channel with a membrane segmentation. This file is usually generated by segmenting the membrane channel from a raw microscopy image. 32 | * **Example(s):** /allen/aics/modeling/jacksonb/data/example_memb_seg.ome.tiff 33 | * **Units:** None 34 | 35 | ### ChannelIndexDNA 36 | * **Description:** The integer index of the DNA channel in a raw microscopy image after it has been read into memory. 37 | * **Example(s):** 0, 1, 2, 3 38 | * **Units:** None 39 | 40 | ### ChannelIndexMembrane 41 | * **Description:** The integer index of the membrane channel in a raw microscopy image after it has been read into memory. 42 | * **Example(s):** 0, 1, 2, 3 43 | * **Units:** None 44 | 45 | ### ChannelIndexMembrane 46 | * **Description:** The integer index of the membrane channel in a raw microscopy image after it has been read into memory. 47 | * **Example(s):** 0, 1, 2, 3 48 | * **Units:** None 49 | 50 | ### ChannelIndexStructure 51 | * **Description:** The integer index of the structure channel in a raw microscopy image after it has been read into memory. 52 | * **Example(s):** 0, 1, 2, 3 53 | * **Units:** None 54 | 55 | ### ChannelIndexBrightfield 56 | * **Description:** The integer index of the brightfield channel in a raw microscopy image after it has been read into memory. 57 | * **Example(s):** 0, 1, 2, 3 58 | * **Units:** None 59 | 60 | ### ChannelIndexNucleusSegmentation 61 | * **Description:** The integer index of the nucleus segmentation channel in a segmentation file image after it has been read into memory. 62 | * **Example(s):** 0, 1, 2, 3 63 | * **Units:** None 64 | 65 | ### ChannelIndexMembraneSegmentation 66 | * **Description:** The integer index of the membrane segmentation channel in a segmentation file image after it has been read into memory. 67 | * **Example(s):** 0, 1, 2, 3 68 | * **Units:** None 69 | 70 | ### StandardizedFOVPath 71 | * **Description:** The path to read a standardized FOV image file. This file is generated from the "StandardizeFOVArray" step. 72 | * **Example(s):** /allen/aics/modeling/jacksonb/data/example_fov_array.ome.tiff 73 | * **Units:** None 74 | 75 | ### CellFeaturesPath 76 | * **Description:** The path to read a cell features JSON file. This file is generated from the "SingleCellFeatures" step. 77 | * **Example(s):** /allen/aics/modeling/jacksonb/data/example_cell_feats.json 78 | * **Units:** None 79 | 80 | ### CellImage3DPath 81 | * **Description:** The path to read a normalized and bounded 3D single cell image. This file is generated from the "SingleCellImages" step. 82 | * **Example(s):** /allen/aics/modeling/jacksonb/data/example_single_cell.ome.tiff 83 | * **Units:** None 84 | 85 | ### CellImage2DAllProjectionsPath 86 | * **Description:** The path to view a normalized and bounded 2D single cell image where all axis projections can be seen in a single image. This file is generated from the "SingleCellImages" step. 87 | * **Example(s):** /allen/aics/modeling/jacksonb/data/example_single_cell_all_proj.png 88 | * **Units:** None 89 | 90 | ### CellImage2DYXProjectionPath 91 | * **Description:** The path to view a normalized and bounded 2D single cell image where only the YX axis projection can be seen in a single image. This file is generated from the "SingleCellImages" step. 92 | * **Example(s):** /allen/aics/modeling/jacksonb/data/example_single_cell_yx_proj.png 93 | * **Units:** None 94 | 95 | ### DiagnosticSheetPath 96 | * **Description:** The path to view a diagnostic or "contact" sheet of the cells produced by the pipeline grouped by metadata. Useful for quality control. This file is generated from the "DiagnosticSheets" step. 97 | * **Example(s):** /allen/aics/modeling/jacksonb/data/example_fov_dianostics.png 98 | * **Units:** None 99 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | Welcome to actk's documentation! 2 | ====================================== 3 | 4 | .. toctree:: 5 | :hidden: 6 | :maxdepth: 1 7 | :caption: Contents: 8 | 9 | Overview 10 | installation 11 | Package modules 12 | Dataset fields 13 | contributing 14 | 15 | .. mdinclude:: ../README.md 16 | 17 | Indices and tables 18 | ================== 19 | * :ref:`genindex` 20 | * :ref:`modindex` 21 | * :ref:`search` 22 | -------------------------------------------------------------------------------- /docs/installation.rst: -------------------------------------------------------------------------------- 1 | .. highlight:: shell 2 | 3 | ============ 4 | Installation 5 | ============ 6 | 7 | 8 | Stable release 9 | -------------- 10 | 11 | To install actk, run this command in your terminal: 12 | 13 | .. code-block:: console 14 | 15 | $ pip install numpy 16 | $ pip install actk 17 | 18 | This is the preferred method to install actk, as it will always install the most recent stable release. 19 | 20 | If you don't have `pip`_ installed, this `Python installation guide`_ can guide 21 | you through the process. 22 | 23 | .. _pip: https://pip.pypa.io 24 | .. _Python installation guide: http://docs.python-guide.org/en/latest/starting/installation/ 25 | 26 | 27 | From sources 28 | ------------ 29 | 30 | The sources for actk can be downloaded from the `Github repo`_. 31 | 32 | You can either clone the public repository: 33 | 34 | .. code-block:: console 35 | 36 | $ git clone git://github.com/AllenCellModeling/actk 37 | 38 | Or download the `tarball`_: 39 | 40 | .. code-block:: console 41 | 42 | $ curl -OL https://github.com/AllenCellModeling/actk/tarball/master 43 | 44 | Once you have a copy of the source, you can install it with: 45 | 46 | .. code-block:: console 47 | 48 | $ python setup.py install 49 | 50 | 51 | .. _Github repo: https://github.com/AllenCellModeling/actk 52 | .. _tarball: https://github.com/AllenCellModeling/actk/tarball/master 53 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=python -msphinx 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | set SPHINXPROJ=actk 13 | 14 | if "%1" == "" goto help 15 | 16 | %SPHINXBUILD% >NUL 2>NUL 17 | if errorlevel 9009 ( 18 | echo. 19 | echo.The Sphinx module was not found. Make sure you have Sphinx installed, 20 | echo.then set the SPHINXBUILD environment variable to point to the full 21 | echo.path of the 'sphinx-build' executable. Alternatively you may add the 22 | echo.Sphinx directory to PATH. 23 | echo. 24 | echo.If you don't have Sphinx installed, grab it from 25 | echo.http://sphinx-doc.org/ 26 | exit /b 1 27 | ) 28 | 29 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 30 | goto end 31 | 32 | :help 33 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 34 | 35 | :end 36 | popd 37 | -------------------------------------------------------------------------------- /docs/modules.rst: -------------------------------------------------------------------------------- 1 | actk 2 | ==== 3 | 4 | .. toctree:: 5 | :maxdepth: 4 6 | 7 | actk 8 | -------------------------------------------------------------------------------- /images/header.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenCellModeling/actk/20d0601083d4b6eced03997473add608f83b3c75/images/header.png -------------------------------------------------------------------------------- /scripts/download_aics_dataset.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import argparse 5 | import logging 6 | import sys 7 | import traceback 8 | from pathlib import Path 9 | 10 | import pandas as pd 11 | 12 | from actk.constants import DatasetFields 13 | from lkaccess import LabKey, contexts 14 | 15 | ############################################################################### 16 | 17 | logging.basicConfig( 18 | level=logging.INFO, 19 | format="[%(levelname)4s: %(module)s:%(lineno)4s %(asctime)s] %(message)s", 20 | ) 21 | log = logging.getLogger(__name__) 22 | 23 | ############################################################################### 24 | # Args 25 | 26 | 27 | class Args(argparse.Namespace): 28 | def __init__(self): 29 | self.__parse() 30 | 31 | def __parse(self): 32 | # Setup parser 33 | p = argparse.ArgumentParser( 34 | prog="download_aics_dataset", 35 | description=( 36 | "Retrieve a dataset ready for processing from the internal " 37 | "AICS database." 38 | ), 39 | ) 40 | 41 | # Arguments 42 | p.add_argument( 43 | "--sample", 44 | type=float, 45 | default=1.0, 46 | help=( 47 | "Percent how much data to download. Will be split across cell lines. " 48 | "Ex: 1.0 = 100 percent of each cell line, " 49 | "0.05 = 5 percent of each cell line." 50 | ), 51 | ) 52 | p.add_argument( 53 | "--instance", 54 | default="PROD", 55 | help="Which database instance to use for data retrieval. (PROD or STAGING)", 56 | ) 57 | p.add_argument( 58 | "--save_path", 59 | type=Path, 60 | default=Path("aics_ic_data.csv"), 61 | help="Path to save the dataset to.", 62 | ) 63 | p.add_argument( 64 | "--debug", 65 | action="store_true", 66 | help="Show traceback if the script were to fail.", 67 | ) 68 | 69 | # Parse 70 | p.parse_args(namespace=self) 71 | 72 | 73 | ############################################################################### 74 | # Retrieve and prepare AICS dataset 75 | 76 | 77 | def download_aics_dataset(args: Args): 78 | # Try running the download pipeline 79 | try: 80 | # Get instance context 81 | instance_context = getattr(contexts, args.instance.upper()) 82 | 83 | # Create connection to instance 84 | lk = LabKey(instance_context) 85 | log.info(f"Using LabKey instance: {lk}") 86 | 87 | # Get integrated cell data 88 | log.info("Retrieving pipeline integrated cell data...") 89 | data = pd.DataFrame(lk.dataset.get_pipeline_4_production_data()) 90 | 91 | # Get cell line data 92 | log.info("Retrieving cell line data...") 93 | cell_line_data = pd.DataFrame( 94 | lk.select_rows_as_list( 95 | schema_name="celllines", 96 | query_name="CellLineDefinition", 97 | columns=[ 98 | "CellLineId", 99 | "CellLineId/Name", 100 | "StructureId/Name", 101 | "ProteinId/Name", 102 | ], 103 | ) 104 | ) 105 | 106 | # Merge the data 107 | data = data.merge(cell_line_data, how="left", on="CellLineId") 108 | 109 | # Prepare the raw data for sampling 110 | data = data.drop_duplicates(subset=["CellId"], keep="first") 111 | data = data.reset_index(drop=True) 112 | data["CellLineId"] = data["CellLineId"].astype(int) 113 | 114 | # Sample the data 115 | if args.sample != 1.0: 116 | log.info(f"Sampling dataset with frac={args.sample}...") 117 | data = data.groupby("CellLineId", group_keys=False) 118 | data = data.apply(pd.DataFrame.sample, frac=args.sample) 119 | data = data.reset_index(drop=True) 120 | 121 | # Rename columns to match DatasetFields 122 | data = data.rename( 123 | columns={ 124 | "ChannelNumber405": DatasetFields.ChannelIndexDNA, 125 | "ChannelNumber638": DatasetFields.ChannelIndexMembrane, 126 | "ChannelNumberStruct": DatasetFields.ChannelIndexStructure, 127 | "ChannelNumberBrightfield": DatasetFields.ChannelIndexBrightfield, 128 | "NucleusSegmentationChannelIndex": ( 129 | DatasetFields.ChannelIndexNucleusSegmentation 130 | ), 131 | "MembraneSegmentationChannelIndex": ( 132 | DatasetFields.ChannelIndexMembraneSegmentation 133 | ), 134 | } 135 | ) 136 | 137 | # Merge Aligned and Source read path columns 138 | data[DatasetFields.SourceReadPath] = data["AlignedImageReadPath"].combine_first( 139 | data[DatasetFields.SourceReadPath] 140 | ) 141 | 142 | # Temporary drop because differing values 143 | data = data.drop( 144 | columns=[ 145 | "StructureSegmentationAlgorithm", 146 | "StructureSegmentationAlgorithmVersion", 147 | "StructureSegmentationFileId", 148 | "StructureSegmentationFilename", 149 | "StructureSegmentationReadPath", 150 | "StructureContourFileId", 151 | "StructureContourFilename", 152 | "StructureContourReadPath", 153 | "MembraneContourFileId", 154 | "MembraneContourFilename", 155 | "MembraneContourReadPath", 156 | "NucleusContourFileId", 157 | "NucleusContourFilename", 158 | "NucleusContourReadPath", 159 | ] 160 | ) 161 | 162 | # Save to CSV 163 | data.to_csv(args.save_path, index=False) 164 | 165 | log.info(f"Saved dataset manifest to: {args.save_path}") 166 | 167 | # Catch any exception 168 | except Exception as e: 169 | log.error("=============================================") 170 | if args.debug: 171 | log.error("\n\n" + traceback.format_exc()) 172 | log.error("=============================================") 173 | log.error("\n\n" + str(e) + "\n") 174 | log.error("=============================================") 175 | sys.exit(1) 176 | 177 | 178 | ############################################################################### 179 | # Runner 180 | 181 | 182 | def main(): 183 | args = Args() 184 | download_aics_dataset(args) 185 | 186 | 187 | ############################################################################### 188 | # Allow caller to directly run this module (usually in development scenarios) 189 | 190 | if __name__ == "__main__": 191 | main() 192 | -------------------------------------------------------------------------------- /scripts/download_test_data.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import argparse 5 | import logging 6 | import sys 7 | import traceback 8 | from pathlib import Path 9 | 10 | from quilt3 import Package 11 | 12 | ############################################################################### 13 | 14 | logging.basicConfig( 15 | level=logging.INFO, 16 | format="[%(levelname)4s: %(module)s:%(lineno)4s %(asctime)s] %(message)s", 17 | ) 18 | log = logging.getLogger(__name__) 19 | 20 | ############################################################################### 21 | # Args 22 | 23 | 24 | class Args(argparse.Namespace): 25 | def __init__(self): 26 | self.__parse() 27 | 28 | def __parse(self): 29 | # Setup parser 30 | p = argparse.ArgumentParser( 31 | prog="download_test_data", 32 | description=( 33 | "Download files used for testing this project. This will download " 34 | "all the required test resources and place them in the `tests/data` " 35 | "directory." 36 | ), 37 | ) 38 | 39 | # Arguments 40 | p.add_argument( 41 | "--top-hash", 42 | default="0ca3a651f3d048be4b8b68068a03eaaca6307299b4c5d91fa5add91c125b6265", 43 | help=("A specific version of the package to retrieve. Default: latest"), 44 | ) 45 | p.add_argument( 46 | "--debug", 47 | action="store_true", 48 | help="Show traceback if the script were to fail.", 49 | ) 50 | 51 | # Parse 52 | p.parse_args(namespace=self) 53 | 54 | 55 | ############################################################################### 56 | # Build package 57 | 58 | 59 | def download_test_data(args: Args): 60 | # Try running the download pipeline 61 | try: 62 | # Get test data dir 63 | data_dir = (Path(__file__).parent.parent / "actk" / "tests" / "data").resolve() 64 | data_dir.mkdir(exist_ok=True) 65 | 66 | # Get quilt package 67 | package = Package.browse( 68 | "actk/test_data", 69 | "s3://aics-modeling-packages-test-resources", 70 | top_hash=args.top_hash, 71 | ) 72 | 73 | # Download 74 | package["data"].fetch(data_dir) 75 | 76 | log.info(f"Completed package download.") 77 | 78 | # Catch any exception 79 | except Exception as e: 80 | log.error("=============================================") 81 | if args.debug: 82 | log.error("\n\n" + traceback.format_exc()) 83 | log.error("=============================================") 84 | log.error("\n\n" + str(e) + "\n") 85 | log.error("=============================================") 86 | sys.exit(1) 87 | 88 | 89 | ############################################################################### 90 | # Runner 91 | 92 | 93 | def main(): 94 | args = Args() 95 | download_test_data(args) 96 | 97 | 98 | ############################################################################### 99 | # Allow caller to directly run this module (usually in development scenarios) 100 | 101 | if __name__ == "__main__": 102 | main() 103 | -------------------------------------------------------------------------------- /scripts/upload_test_data.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import argparse 5 | import logging 6 | import sys 7 | import traceback 8 | from pathlib import Path 9 | 10 | from quilt3 import Package 11 | 12 | from actk import get_module_version 13 | 14 | ############################################################################### 15 | 16 | logging.basicConfig( 17 | level=logging.INFO, 18 | format="[%(levelname)4s: %(module)s:%(lineno)4s %(asctime)s] %(message)s", 19 | ) 20 | log = logging.getLogger(__name__) 21 | 22 | ############################################################################### 23 | # Args 24 | 25 | 26 | class Args(argparse.Namespace): 27 | def __init__(self): 28 | self.__parse() 29 | 30 | def __parse(self): 31 | # Setup parser 32 | p = argparse.ArgumentParser( 33 | prog="upload_test_data", 34 | description=( 35 | "Upload files used for testing this project. This will upload " 36 | "whatever files are currently found in the `tests/data` directory. To " 37 | "add more test files, simply add them to the `tests/data` directory " 38 | "and rerun this script." 39 | ), 40 | ) 41 | 42 | # Arguments 43 | p.add_argument( 44 | "--dry-run", 45 | action="store_true", 46 | help=( 47 | "Conduct dry run of the package generation. Will create a JSON " 48 | "manifest file of that package instead of uploading." 49 | ), 50 | ) 51 | p.add_argument( 52 | "--debug", 53 | action="store_true", 54 | help="Show traceback if the script were to fail.", 55 | ) 56 | 57 | # Parse 58 | p.parse_args(namespace=self) 59 | 60 | 61 | ############################################################################### 62 | # Build package 63 | 64 | 65 | def upload_test_data(args: Args): 66 | # Try running the download pipeline 67 | try: 68 | # Get test data dir 69 | data_dir = (Path(__file__).parent.parent / "actk" / "tests" / "data").resolve( 70 | strict=True 71 | ) 72 | 73 | # Report with directory will be used for upload 74 | log.info(f"Using contents of directory: {data_dir}") 75 | 76 | # Create quilt package 77 | package = Package() 78 | package.set_dir("data", data_dir) 79 | 80 | # Report package contents 81 | log.info(f"Package contents: {package}") 82 | 83 | # Check for dry run 84 | if args.dry_run: 85 | # Attempt to build the package 86 | built = package.build("actk/test_data") 87 | 88 | # Get resolved save path 89 | manifest_save_path = Path("upload_manifest.jsonl").resolve() 90 | with open(manifest_save_path, "w") as manifest_write: 91 | package.dump(manifest_write) 92 | 93 | # Report where manifest was saved 94 | log.info(f"Dry run generated manifest stored to: {manifest_save_path}") 95 | log.info(f"Completed package dry run. Result hash: {built.top_hash}") 96 | 97 | # Upload 98 | else: 99 | # Get upload confirmation 100 | confirmation = None 101 | while confirmation not in ["y", "n"]: 102 | # Get user input 103 | confirmation = input("Upload y/n? ") 104 | 105 | # Get first character and lowercase 106 | confirmation = confirmation[0].lower() 107 | 108 | # Check confirmation 109 | if confirmation == "y": 110 | pushed = package.push( 111 | "actk/test_data", 112 | "s3://aics-modeling-packages-test-resources", 113 | message=( 114 | f"Test resources for `actk` version: {get_module_version()}." 115 | ), 116 | ) 117 | 118 | log.info(f"Completed package push. Result hash: {pushed.top_hash}") 119 | else: 120 | log.info(f"Upload canceled.") 121 | 122 | # Catch any exception 123 | except Exception as e: 124 | log.error("=============================================") 125 | if args.debug: 126 | log.error("\n\n" + traceback.format_exc()) 127 | log.error("=============================================") 128 | log.error("\n\n" + str(e) + "\n") 129 | log.error("=============================================") 130 | sys.exit(1) 131 | 132 | 133 | ############################################################################### 134 | # Runner 135 | 136 | 137 | def main(): 138 | args = Args() 139 | upload_test_data(args) 140 | 141 | 142 | ############################################################################### 143 | # Allow caller to directly run this module (usually in development scenarios) 144 | 145 | if __name__ == "__main__": 146 | main() 147 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [bumpversion] 2 | current_version = 0.2.2 3 | commit = True 4 | tag = True 5 | 6 | [bumpversion:file:setup.py] 7 | search = {current_version} 8 | replace = {new_version} 9 | 10 | [bumpversion:file:actk/__init__.py] 11 | search = {current_version} 12 | replace = {new_version} 13 | 14 | [bdist_wheel] 15 | universal = 1 16 | 17 | [aliases] 18 | test = pytest 19 | 20 | [tool:pytest] 21 | collect_ignore = ['setup.py'] 22 | 23 | [flake8] 24 | exclude = 25 | docs/ 26 | ignore = 27 | E203 28 | E402 29 | W291 30 | W503 31 | max-line-length = 88 32 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | """The setup script.""" 5 | 6 | from setuptools import find_packages, setup 7 | 8 | with open("README.md") as readme_file: 9 | readme = readme_file.read() 10 | 11 | setup_requirements = [ 12 | "pytest-runner>=5.2", 13 | ] 14 | 15 | test_requirements = [ 16 | "black>=19.10b0", 17 | "codecov>=2.1.4", 18 | "flake8>=3.8.3", 19 | "flake8-debugger>=3.2.1", 20 | "pytest>=5.4.3", 21 | "pytest-cov>=2.9.0", 22 | "pytest-raises>=0.11", 23 | "quilt3>=3.1.10", 24 | ] 25 | 26 | dev_requirements = [ 27 | *setup_requirements, 28 | *test_requirements, 29 | "bumpversion>=0.6.0", 30 | "coverage>=5.1", 31 | "ipython>=7.15.0", 32 | "m2r>=0.2.1", 33 | "Sphinx>=2.0.0b1,<3", 34 | "sphinx_rtd_theme>=0.4.3", 35 | "tox>=3.15.2", 36 | "twine>=3.1.1", 37 | "wheel>=0.34.2", 38 | ] 39 | 40 | step_workflow_requirements = [ 41 | "aics_dask_utils>=0.2.0", 42 | "bokeh>=2.1.0", 43 | "dask[bag]>=2.19.0", 44 | "dask_jobqueue>=0.7.0", 45 | "datastep>=0.1.8", 46 | "distributed>=2.19.0", 47 | "fire", 48 | "psutil", 49 | ] 50 | 51 | requirements = [ 52 | *step_workflow_requirements, 53 | # project requires 54 | "aicsfeature>=0.2.1", 55 | "aicsimageio>=3.2.3", 56 | "aicsimageprocessing>=0.7.4", 57 | "matplotlib>=3.2.0", 58 | "numpy>=1.18.2", 59 | "pandas>=1.0.3", 60 | ] 61 | 62 | aics_data_requirements = [ 63 | "lkaccess>=1.4.25", 64 | ] 65 | 66 | extra_requirements = { 67 | "setup": setup_requirements, 68 | "test": test_requirements, 69 | "dev": dev_requirements, 70 | "aics": aics_data_requirements, 71 | "all": [ 72 | *requirements, 73 | *dev_requirements, 74 | ], 75 | } 76 | 77 | setup( 78 | author="Jackson Maxfield Brown", 79 | author_email="jacksonb@alleninstitute.org", 80 | classifiers=[ 81 | "Development Status :: 2 - Pre-Alpha", 82 | "Intended Audience :: Developers", 83 | "License :: Free for non-commercial use", 84 | "Natural Language :: English", 85 | "Programming Language :: Python :: 3.6", 86 | "Programming Language :: Python :: 3.7", 87 | "Programming Language :: Python :: 3.8", 88 | ], 89 | description="Automated Cell Toolkit", 90 | entry_points={"console_scripts": ["actk=actk.bin.cli:cli"]}, 91 | install_requires=requirements, 92 | license="Allen Institute Software License", 93 | long_description=readme, 94 | long_description_content_type="text/markdown", 95 | include_package_data=True, 96 | keywords="actk, computational biology, workflow, cell, microscopy", 97 | name="actk", 98 | packages=find_packages(exclude=["tests", "*.tests", "*.tests.*"]), 99 | python_requires=">=3.6", 100 | setup_requires=setup_requirements, 101 | test_suite="actk/tests", 102 | tests_require=test_requirements, 103 | extras_require=extra_requirements, 104 | url="https://github.com/AllenCellModeling/actk", 105 | # Do not edit this string manually, always use bumpversion 106 | # Details in CONTRIBUTING.rst 107 | version="0.2.2", 108 | zip_safe=False, 109 | ) 110 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | skipsdist = True 3 | envlist = py36, py37, py38, lint 4 | 5 | [testenv:lint] 6 | deps = 7 | .[test] 8 | commands = 9 | flake8 actk --count --verbose --show-source --statistics 10 | black --check actk 11 | 12 | [testenv] 13 | setenv = 14 | PYTHONPATH = {toxinidir} 15 | deps = 16 | .[test] 17 | commands = 18 | pytest --basetemp={envtmpdir} --cov-report html --cov=actk actk/tests/ 19 | --------------------------------------------------------------------------------