├── .editorconfig
├── .github
    ├── ISSUE_TEMPLATE
    │   ├── bug_report.md
    │   └── feature_request.md
    ├── PULL_REQUEST_TEMPLATE.md
    └── workflows
    │   ├── build-docs.yml
    │   ├── build-master.yml
    │   ├── publish.yml
    │   └── test-and-lint.yml
├── .gitignore
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── MANIFEST.in
├── Makefile
├── README.md
├── actk
    ├── __init__.py
    ├── bin
    │   ├── __init__.py
    │   ├── all.py
    │   └── cli.py
    ├── constants.py
    ├── exceptions.py
    ├── steps
    │   ├── __init__.py
    │   ├── diagnostic_sheets
    │   │   ├── __init__.py
    │   │   └── diagnostic_sheets.py
    │   ├── raw
    │   │   ├── __init__.py
    │   │   └── raw.py
    │   ├── single_cell_features
    │   │   ├── __init__.py
    │   │   └── single_cell_features.py
    │   ├── single_cell_images
    │   │   ├── __init__.py
    │   │   └── single_cell_images.py
    │   └── standardize_fov_array
    │   │   ├── __init__.py
    │   │   └── standardize_fov_array.py
    ├── tests
    │   ├── __init__.py
    │   ├── conftest.py
    │   ├── steps
    │   │   ├── __init__.py
    │   │   ├── test_diagnostic_sheets.py
    │   │   ├── test_single_cell_features.py
    │   │   ├── test_single_cell_images.py
    │   │   └── test_standardize_fov_array.py
    │   └── utils
    │   │   ├── __init__.py
    │   │   ├── test_dataset_utils.py
    │   │   └── test_image_utils.py
    └── utils
    │   ├── __init__.py
    │   ├── dataset_utils.py
    │   └── image_utils.py
├── codecov.yml
├── docs
    ├── Makefile
    ├── conf.py
    ├── contributing.rst
    ├── dataset_fields.md
    ├── index.rst
    ├── installation.rst
    ├── make.bat
    └── modules.rst
├── images
    └── header.png
├── scripts
    ├── download_aics_dataset.py
    ├── download_test_data.py
    └── upload_test_data.py
├── setup.cfg
├── setup.py
└── tox.ini


/.editorconfig:
--------------------------------------------------------------------------------
 1 | # http://editorconfig.org
 2 | 
 3 | root = true
 4 | 
 5 | [*]
 6 | indent_style = space
 7 | indent_size = 4
 8 | trim_trailing_whitespace = true
 9 | insert_final_newline = true
10 | charset = utf-8
11 | end_of_line = lf
12 | 
13 | [*.bat]
14 | indent_style = tab
15 | end_of_line = crlf
16 | 
17 | [LICENSE]
18 | insert_final_newline = false
19 | 
20 | [Makefile]
21 | indent_style = tab
22 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug Report
 3 | about: '"Something''s wrong..."'
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | ## Description
11 | *A clear description of the bug*
12 | 
13 | 
14 | 
15 | 
16 | ## Expected Behavior
17 | *What did you expect to happen instead?*
18 | 
19 | 
20 | 
21 | 
22 | ## Reproduction
23 | *A minimal example that exhibits the behavior.*
24 | 
25 | 
26 | 
27 | 
28 | ## Environment
29 | *Any additional information about your environment*
30 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature Request
 3 | about: '"It would be really cool if x did y..."'
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | ## Use Case
11 | *Please provide a use case to help us understand your request in context*
12 | 
13 | 
14 | 
15 | 
16 | ## Solution
17 | *Please describe your ideal solution*
18 | 
19 | 
20 | 
21 | 
22 | ## Alternatives
23 | *Please describe any alternatives you've considered, even if you've dismissed them*
24 | 


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
1 | **Pull request recommendations:**
2 | - [ ] Name your pull request _your-development-type/short-description_. Ex: _feature/read-tiff-files_
3 | - [ ] Link to any relevant issue in the PR description. Ex: _Resolves [gh-12], adds tiff file format support_
4 | - [ ] Provide context of changes.
5 | - [ ] Provide relevant tests for your feature or bug fix.
6 | - [ ] Provide or update documentation for any feature added by your pull request.
7 | 
8 | Thanks for contributing!
9 | 


--------------------------------------------------------------------------------
/.github/workflows/build-docs.yml:
--------------------------------------------------------------------------------
 1 | name: Documentation
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - master
 7 | 
 8 | jobs:
 9 |   docs:
10 |     runs-on: ubuntu-latest
11 |     steps:
12 |     - uses: actions/checkout@v1
13 |     - name: Set up Python
14 |       uses: actions/setup-python@v1
15 |       with:
16 |         python-version: 3.8
17 |     - name: Install Dependencies
18 |       run: |
19 |         pip install --upgrade pip
20 |         pip install Cython
21 |         pip install numpy
22 |         pip install .[dev]
23 |     - name: Generate Docs
24 |       run: |
25 |         make gen-docs
26 |         touch docs/_build/html/.nojekyll
27 |     - name: Publish Docs
28 |       uses: JamesIves/github-pages-deploy-action@releases/v3
29 |       with:
30 |         ACCESS_TOKEN: ${{ secrets.ACCESS_TOKEN }}
31 |         BASE_BRANCH: master # The branch the action should deploy from.
32 |         BRANCH: gh-pages # The branch the action should deploy to.
33 |         FOLDER: docs/_build/html/ # The folder the action should deploy.
34 | 


--------------------------------------------------------------------------------
/.github/workflows/build-master.yml:
--------------------------------------------------------------------------------
 1 | name: Build Master
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - master
 7 |   schedule:
 8 |     # <minute [0,59]> <hour [0,23]> <day of the month [1,31]> <month of the year [1,12]> <day of the week [0,6]>
 9 |     # https://pubs.opengroup.org/onlinepubs/9699919799/utilities/crontab.html#tag_20_25_07
10 |     # Run every Monday at 18:00:00 UTC (Monday at 10:00:00 PST)
11 |     - cron: '0 18 * * 1'
12 | 
13 | jobs:
14 |   test:
15 |     runs-on: ${{ matrix.os }}
16 |     strategy:
17 |       matrix:
18 |         python-version: [3.7, 3.8]
19 |         os: [ubuntu-latest, windows-latest, macOS-latest]
20 | 
21 |     steps:
22 |     - uses: actions/checkout@v1
23 |     - name: Set up Python ${{ matrix.python-version }}
24 |       uses: actions/setup-python@v1
25 |       with:
26 |         python-version: ${{ matrix.python-version }}
27 |     - name: Install Dependencies
28 |       run: |
29 |         python -m pip install --upgrade pip
30 |         pip install Cython
31 |         pip install numpy
32 |         pip install .[test]
33 |     - name: Download Test Data
34 |       run: |
35 |         python scripts/download_test_data.py --debug
36 |     - name: Test with pytest
37 |       run: |
38 |         pytest --cov-report xml --cov=actk actk/tests/
39 |         codecov -t ${{ secrets.CODECOV_TOKEN }}
40 | 
41 |   lint:
42 |     runs-on: ubuntu-latest
43 | 
44 |     steps:
45 |     - uses: actions/checkout@v1
46 |     - name: Set up Python
47 |       uses: actions/setup-python@v1
48 |       with:
49 |         python-version: 3.8
50 |     - name: Install Dependencies
51 |       run: |
52 |         python -m pip install --upgrade pip
53 |         pip install Cython
54 |         pip install numpy
55 |         pip install .[test]
56 |     - name: Lint with flake8
57 |       run: |
58 |         flake8 actk --count --verbose --show-source --statistics
59 |     - name: Check with black
60 |       run: |
61 |         black --check actk
62 | 


--------------------------------------------------------------------------------
/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
 1 | name: Publish
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - stable
 7 | 
 8 | jobs:
 9 |   publish:
10 |     runs-on: ubuntu-latest
11 |     steps:
12 |     - uses: actions/checkout@v1
13 |     - name: Set up Python
14 |       uses: actions/setup-python@v1
15 |       with:
16 |         python-version: 3.8
17 |     - name: Install Dependencies
18 |       run: |
19 |         python -m pip install --upgrade pip
20 |         pip install Cython
21 |         pip install numpy
22 |         pip install setuptools wheel
23 |     - name: Build Package
24 |       run: |
25 |         python setup.py sdist bdist_wheel
26 |     - name: Publish to PyPI
27 |       uses: pypa/gh-action-pypi-publish@master
28 |       with:
29 |         user: aicspypi
30 |         password: ${{ secrets.PYPI_TOKEN }}
31 | 


--------------------------------------------------------------------------------
/.github/workflows/test-and-lint.yml:
--------------------------------------------------------------------------------
 1 | name: Test and Lint
 2 | 
 3 | on: pull_request
 4 | 
 5 | jobs:
 6 |   test:
 7 |     runs-on: ${{ matrix.os }}
 8 |     strategy:
 9 |       matrix:
10 |         python-version: [3.8]
11 |         os: [ubuntu-latest]
12 | 
13 |     steps:
14 |     - uses: actions/checkout@v1
15 |     - name: Set up Python ${{ matrix.python-version }}
16 |       uses: actions/setup-python@v1
17 |       with:
18 |         python-version: ${{ matrix.python-version }}
19 |     - name: Install Dependencies
20 |       run: |
21 |         python -m pip install --upgrade pip
22 |         pip install Cython
23 |         pip install numpy
24 |         pip install .[test]
25 |     - name: Download Test Data
26 |       run: |
27 |         python scripts/download_test_data.py --debug
28 |     - name: Test with pytest
29 |       run: |
30 |         pytest --cov-report xml --cov=actk actk/tests/
31 |     - name: Upload codecov
32 |       uses: codecov/codecov-action@v1
33 | 
34 |   lint:
35 |     runs-on: ubuntu-latest
36 | 
37 |     steps:
38 |     - uses: actions/checkout@v1
39 |     - name: Set up Python
40 |       uses: actions/setup-python@v1
41 |       with:
42 |         python-version: 3.8
43 |     - name: Install Dependencies
44 |       run: |
45 |         python -m pip install --upgrade pip
46 |         pip install Cython
47 |         pip install numpy
48 |         pip install .[test]
49 |     - name: Lint with flake8
50 |       run: |
51 |         flake8 actk --count --verbose --show-source --statistics
52 |     - name: Check with black
53 |       run: |
54 |         black --check actk
55 | 
56 |   docs:
57 |     runs-on: ubuntu-latest
58 | 
59 |     steps:
60 |     - uses: actions/checkout@v1
61 |     - name: Set up Python 3.7
62 |       uses: actions/setup-python@v1
63 |       with:
64 |         python-version: 3.8
65 |     - name: Install Dependencies
66 |       run: |
67 |         python -m pip install --upgrade pip
68 |         pip install Cython
69 |         pip install numpy
70 |         pip install .[dev]
71 |     - name: Generate Docs
72 |       run: |
73 |         make gen-docs
74 |         touch docs/_build/html/.nojekyll
75 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # default local data staging directory
  2 | /local_staging
  3 | *.csv
  4 | 
  5 | # notebooks bcz eww (force add them if you must?)
  6 | *.ipynb
  7 | 
  8 | # Byte-compiled / optimized / DLL files
  9 | __pycache__/
 10 | *.py[cod]
 11 | *$py.class
 12 | 
 13 | # C extensions
 14 | *.so
 15 | 
 16 | # Distribution / packaging
 17 | .Python
 18 | env/
 19 | build/
 20 | develop-eggs/
 21 | dist/
 22 | downloads/
 23 | eggs/
 24 | .eggs/
 25 | lib/
 26 | lib64/
 27 | parts/
 28 | sdist/
 29 | var/
 30 | wheels/
 31 | *.egg-info/
 32 | .installed.cfg
 33 | *.egg
 34 | 
 35 | # OS generated files
 36 | .DS_Store
 37 | 
 38 | # PyInstaller
 39 | #  Usually these files are written by a python script from a template
 40 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 41 | *.manifest
 42 | *.spec
 43 | 
 44 | # Installer logs
 45 | pip-log.txt
 46 | pip-delete-this-directory.txt
 47 | 
 48 | # Unit test / coverage reports
 49 | htmlcov/
 50 | .tox/
 51 | .coverage
 52 | .coverage.*
 53 | .cache
 54 | nosetests.xml
 55 | coverage.xml
 56 | *.cover
 57 | .hypothesis/
 58 | .pytest_cache/
 59 | 
 60 | # Translations
 61 | *.mo
 62 | *.pot
 63 | 
 64 | # Django stuff:
 65 | *.log
 66 | local_settings.py
 67 | 
 68 | # Flask stuff:
 69 | instance/
 70 | .webassets-cache
 71 | 
 72 | # Scrapy stuff:
 73 | .scrapy
 74 | 
 75 | # Sphinx documentation
 76 | docs/_build/
 77 | docs/actk.*rst
 78 | 
 79 | # PyBuilder
 80 | target/
 81 | 
 82 | # Jupyter Notebook
 83 | .ipynb_checkpoints
 84 | 
 85 | # pyenv
 86 | .python-version
 87 | 
 88 | # celery beat schedule file
 89 | celerybeat-schedule
 90 | 
 91 | # Dask
 92 | dask-worker-space/
 93 | 
 94 | # SageMath parsed files
 95 | *.sage.py
 96 | 
 97 | # dotenv
 98 | .env
 99 | 
100 | # virtualenv
101 | .venv
102 | venv/
103 | ENV/
104 | 
105 | # Spyder project settings
106 | .spyderproject
107 | .spyproject
108 | 
109 | # Rope project settings
110 | .ropeproject
111 | 
112 | # mkdocs documentation
113 | /site
114 | 
115 | # VSCode
116 | .vscode/
117 | 
118 | # mypy
119 | .mypy_cache/
120 | 
121 | # Project specific
122 | data/
123 | .dask_logs/
124 | workflow_config.json
125 | aics_ic_data.csv
126 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Contributor Covenant Code of Conduct
 2 | 
 3 | ## Our Pledge
 4 | 
 5 | In the interest of fostering an open and welcoming environment, we as
 6 | contributors and maintainers pledge to making participation in our project and
 7 | our community a harassment-free experience for everyone, regardless of age, body
 8 | size, disability, ethnicity, sex characteristics, gender identity and expression,
 9 | level of experience, education, socio-economic status, nationality, personal
10 | appearance, race, religion, or sexual identity and orientation.
11 | 
12 | ## Our Standards
13 | 
14 | Examples of behavior that contributes to creating a positive environment
15 | include:
16 | 
17 | * Using welcoming and inclusive language
18 | * Being respectful of differing viewpoints and experiences
19 | * Gracefully accepting constructive criticism
20 | * Focusing on what is best for the community
21 | * Showing empathy towards other community members
22 | 
23 | Examples of unacceptable behavior by participants include:
24 | 
25 | * The use of sexualized language or imagery and unwelcome sexual attention or
26 |  advances
27 | * Trolling, insulting/derogatory comments, and personal or political attacks
28 | * Public or private harassment
29 | * Publishing others' private information, such as a physical or electronic
30 |  address, without explicit permission
31 | * Other conduct which could reasonably be considered inappropriate in a
32 |  professional setting
33 | 
34 | ## Our Responsibilities
35 | 
36 | Project maintainers are responsible for clarifying the standards of acceptable
37 | behavior and are expected to take appropriate and fair corrective action in
38 | response to any instances of unacceptable behavior.
39 | 
40 | Project maintainers have the right and responsibility to remove, edit, or
41 | reject comments, commits, code, wiki edits, issues, and other contributions
42 | that are not aligned to this Code of Conduct, or to ban temporarily or
43 | permanently any contributor for other behaviors that they deem inappropriate,
44 | threatening, offensive, or harmful.
45 | 
46 | ## Scope
47 | 
48 | This Code of Conduct applies both within project spaces and in public spaces
49 | when an individual is representing the project or its community. Examples of
50 | representing a project or community include using an official project e-mail
51 | address, posting via an official social media account, or acting as an appointed
52 | representative at an online or offline event. Representation of a project may be
53 | further defined and clarified by project maintainers.
54 | 
55 | ## Enforcement
56 | 
57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
58 | reported by contacting any of the maintainers of this project and
59 | we will attempt to resolve the issues with respect and dignity.
60 | 
61 | Project maintainers who do not follow or enforce the Code of Conduct in good
62 | faith may face temporary or permanent repercussions as determined by other
63 | members of the project's leadership.
64 | 
65 | ## Attribution
66 | 
67 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
68 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
69 | 
70 | [homepage]: https://www.contributor-covenant.org
71 | 
72 | For answers to common questions about this code of conduct, see
73 | https://www.contributor-covenant.org/faq
74 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing
 2 | 
 3 | Contributions are welcome, and they are greatly appreciated! Every little bit
 4 | helps, and credit will always be given.
 5 | 
 6 | ## Get Started!
 7 | Ready to contribute? Here's how to set up `actk` for local development.
 8 | 
 9 | 1. Fork the `actk` repo on GitHub.
10 | 
11 | 2. Clone your fork locally:
12 | 
13 |     ```bash
14 |     git clone git@github.com:{your_name_here}/actk.git
15 |     ```
16 | 
17 | 3. Install the project in editable mode. (It is also recommended to work in a
18 | virtualenv or anaconda environment):
19 | 
20 |     ```bash
21 |     cd actk/
22 |     pip install -e .[dev]
23 |     ```
24 | 
25 | 4. Create a branch for local development:
26 | 
27 |     ```bash
28 |     git checkout -b {your_development_type}/short-description
29 |     ```
30 | 
31 |     Ex: feature/read-tiff-files or bugfix/handle-file-not-found<br>
32 |     Now you can make your changes locally.
33 | 
34 | 5. When you're done making changes, check that your changes pass linting and
35 |    tests, including testing other Python versions with make:
36 | 
37 |     ```bash
38 |     make build
39 |     ```
40 | 
41 | 6. Commit your changes and push your branch to GitHub:
42 | 
43 |     ```bash
44 |     git add .
45 |     git commit -m "Resolves gh-###. Your detailed description of your changes."
46 |     git push origin {your_development_type}/short-description
47 |     ```
48 | 
49 | 7. Submit a pull request through the GitHub website.
50 | 
51 | ## Deploying
52 | 
53 | A reminder for the maintainers on how to deploy.
54 | Make sure all your changes are committed.
55 | Then run:
56 | 
57 | ```bash
58 | bumpversion patch # possible: major / minor / patch
59 | ```
60 | 
61 | Now *check to see if bumpversions primitive string matching set any dependencies to the wrong version*.
62 | Then:
63 | ```bash
64 | git push
65 | git push --tags
66 | git branch -D stable
67 | git checkout -b stable
68 | git push --set-upstream origin stable -f
69 | ```
70 | 
71 | This will release a new package version on Git + GitHub and publish to PyPI.
72 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Allen Institute Software License – This software license is the 2-clause BSD
 2 | license plus a third clause that prohibits redistribution and use for
 3 | commercial purposes without further permission.
 4 | 
 5 | Copyright © 2020
 6 | Jackson Maxfield Brown, Allen Institute.  All rights reserved.
 7 | 
 8 | Redistribution and use in source and binary forms, with or without
 9 | modification, are permitted provided that the following conditions are met:
10 | 
11 | 1. Redistributions of source code must retain the above copyright notice, this
12 | list of conditions and the following disclaimer.
13 | 
14 | 2. Redistributions in binary form must reproduce the above copyright notice,
15 | this list of conditions and the following disclaimer in the documentation
16 | and/or other materials provided with the distribution.
17 | 
18 | 3. Redistributions and use for commercial purposes are not permitted without
19 | the Allen Institute’s written permission. For purposes of this license,
20 | commercial purposes are the incorporation of the Allen Institute's software
21 | into anything for which you will charge fees or other compensation or use of
22 | the software to perform a commercial service for a third party. Contact
23 | terms@alleninstitute.org for commercial licensing opportunities.
24 | 
25 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
26 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
27 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
28 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
29 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
31 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
32 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
33 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
34 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 | 
36 | 
37 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | include CONTRIBUTING.md
 2 | include LICENSE
 3 | include README.md
 4 | 
 5 | recursive-include tests *
 6 | recursive-exclude * __pycache__
 7 | recursive-exclude * *.py[co]
 8 | 
 9 | recursive-include docs *.rst conf.py Makefile make.bat *.jpg *.png *.gif
10 | graft actk/data
11 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: clean clean-test clean-pyc clean-build docs help
 2 | .DEFAULT_GOAL := help
 3 | 
 4 | define BROWSER_PYSCRIPT
 5 | import os, webbrowser, sys
 6 | 
 7 | try:
 8 | 	from urllib import pathname2url
 9 | except:
10 | 	from urllib.request import pathname2url
11 | 
12 | webbrowser.open("file://" + pathname2url(os.path.abspath(sys.argv[1])))
13 | endef
14 | export BROWSER_PYSCRIPT
15 | 
16 | define PRINT_HELP_PYSCRIPT
17 | import re, sys
18 | 
19 | for line in sys.stdin:
20 | 	match = re.match(r'^([a-zA-Z_-]+):.*?## (.*)$$', line)
21 | 	if match:
22 | 		target, help = match.groups()
23 | 		print("%-20s %s" % (target, help))
24 | endef
25 | export PRINT_HELP_PYSCRIPT
26 | 
27 | BROWSER := python -c "$$BROWSER_PYSCRIPT"
28 | 
29 | help:
30 | 	@python -c "$$PRINT_HELP_PYSCRIPT" < $(MAKEFILE_LIST)
31 | 
32 | clean:  ## clean all build, python, and testing files
33 | 	rm -fr build/
34 | 	rm -fr dist/
35 | 	rm -fr .eggs/
36 | 	find . -name '*.egg-info' -exec rm -fr {} +
37 | 	find . -name '*.egg' -exec rm -f {} +
38 | 	find . -name '*.pyc' -exec rm -f {} +
39 | 	find . -name '*.pyo' -exec rm -f {} +
40 | 	find . -name '*~' -exec rm -f {} +
41 | 	find . -name '__pycache__' -exec rm -fr {} +
42 | 	rm -fr .tox/
43 | 	rm -fr .coverage
44 | 	rm -fr coverage.xml
45 | 	rm -fr htmlcov/
46 | 	rm -fr .pytest_cache
47 | 
48 | build: ## run tox / run tests and lint
49 | 	tox
50 | 
51 | gen-docs: ## generate Sphinx HTML documentation, including API docs
52 | 	rm -f docs/actk*.rst
53 | 	rm -f docs/modules.rst
54 | 	sphinx-apidoc -o docs/ actk **/tests/
55 | 	$(MAKE) -C docs html
56 | 	cp -r ./images ./docs/_build/html/images
57 | 
58 | docs: ## generate Sphinx HTML documentation, including API docs, and serve to browser
59 | 	make gen-docs
60 | 	$(BROWSER) docs/_build/html/index.html
61 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # actk
  2 | 
  3 | [![Build Status](https://github.com/AllenCellModeling/actk/workflows/Build%20Master/badge.svg)](https://github.com/AllenCellModeling/actk/actions)
  4 | [![Documentation](https://github.com/AllenCellModeling/actk/workflows/Documentation/badge.svg)](https://AllenCellModeling.github.io/actk)
  5 | [![Code Coverage](https://codecov.io/gh/AllenCellModeling/actk/branch/master/graph/badge.svg)](https://codecov.io/gh/AllenCellModeling/actk)
  6 | [![Published Data](https://img.shields.io/badge/Data-Published-Success)](https://open.quiltdata.com/b/allencell/tree/aics/actk/)
  7 | 
  8 | Automated Cell Toolkit
  9 | 
 10 | A pipeline to process field-of-view (FOV) microscopy images and generate data and
 11 | render-ready products for the cells in each field. Of note, the data produced by this
 12 | pipeline is used for the [Cell Feature Explorer](https://cfe.allencell.org/).
 13 | 
 14 | ![workflow as an image](./images/header.png)
 15 | 
 16 | ---
 17 | 
 18 | ## Features
 19 | All steps and functionality in this package can be run as single steps or all together
 20 | by using the command line.
 21 | 
 22 | In general, all commands for this package will follow the format:
 23 | `actk {step} {command}`
 24 | 
 25 | * `step` is the name of the step, such as "StandardizeFOVArray" or "SingleCellFeatures"
 26 | * `command` is what you want that step to do, such as "run" or "push"
 27 | 
 28 | Each step will check that the dataset provided contains the required fields prior to
 29 | processing. For details and definitions on each field, see our
 30 | [dataset fields documentation](https://AllenCellModeling.github.io/actk/dataset_fields.html).
 31 | 
 32 | An example dataset can be seen [here](https://open.quiltdata.com/b/aics-modeling-packages-test-resources/tree/actk/test_data/data/example_dataset.csv).
 33 | 
 34 | ### Pipeline
 35 | To run the entire pipeline from start to finish you can simply run:
 36 | 
 37 | ```bash
 38 | actk all run --dataset {path to dataset}
 39 | ```
 40 | 
 41 | Step specific parameters can additionally be passed by simply appending them.
 42 | For example: the step `SingleCellFeatures` has a parameter for
 43 | `cell_ceiling_adjustment` and this can be set on both the individual step run level and
 44 | also for the entire pipeline with:
 45 | 
 46 | ```bash
 47 | actk all run --dataset {path to dataset} --cell_ceiling_adjustment {integer}
 48 | ```
 49 | 
 50 | See the [steps module in our documentation](https://AllenCellModeling.github.io/actk/actk.steps.html)
 51 | for a full list of parameters for each step
 52 | 
 53 | #### Pipeline Config
 54 | 
 55 | A configuration file can be provided to the underlying `datastep` library that manages
 56 | the data storage and upload of the steps in this workflow.
 57 | 
 58 | The config file should simply be called `workflow_config.json` and be available from
 59 | whichever directory you run `actk` from. If this config is not found in the current
 60 | working directory, defaults are selected by the `datastep` package.
 61 | 
 62 | Here is an example of our production config:
 63 | 
 64 | ```json
 65 | {
 66 |     "quilt_storage_bucket": "s3://allencell",
 67 |     "project_local_staging_dir": "/allen/aics/modeling/jacksonb/results/actk"
 68 | }
 69 | ```
 70 | 
 71 | You can even additionally attach step-specific configuration in this file by using the
 72 | name of the step like so:
 73 | 
 74 | ```json
 75 | {
 76 |     "quilt_storage_bucket": "s3://example_config_7",
 77 |     "project_local_staging_dir": "example/config/7",
 78 |     "example": {
 79 |         "step_local_staging_dir": "example/step/local/staging/"
 80 |     }
 81 | }
 82 | ```
 83 | 
 84 | #### AICS Distributed Computing
 85 | 
 86 | For members of the AICS team, to run in distributed mode across the SLURM cluster add
 87 | the `--distributed` flag to the pipeline call.
 88 | 
 89 | To set distributed cluster and worker parameters you can additionally add the flags:
 90 | * `--n_workers {int}` (i.e. `--n_workers 100`)
 91 | * `--worker_cpu {int}` (i.e. `--worker_cpu 2`)
 92 | * `--worker_mem {str}` (i.e. `--worker_mem 100GB`)
 93 | 
 94 | ### Individual Steps
 95 | * `actk standardizefovarray run --dataset {path to dataset}`, Generate standardized,
 96 | ordered, and normalized FOV images as OME-Tiffs.
 97 | * `actk singlecellfeatures run --dataset {path to dataset}`, Generate a features JSON
 98 | file for each cell in the dataset.
 99 | * `actk singlecellimages run --dataset {path to dataset}`, Generate bounded 3D images
100 | and 2D projections for each cell in the dataset.
101 | * `actk diagnosticsheets run --dataset {path to dataset}`, Generate diagnostic sheets
102 | for single cell images. Useful for quality control.
103 | 
104 | ## Installation
105 | **Install Requires:** The python package, `numpy`, must be installed prior to the
106 | installation of this package: `pip install numpy`
107 | 
108 | **Stable Release:** `pip install actk`<br>
109 | **Development Head:** `pip install git+https://github.com/AllenCellModeling/actk.git`
110 | 
111 | ## Documentation
112 | For full package documentation please visit
113 | [allencellmodeling.github.io/actk](https://allencellmodeling.github.io/actk/index.html).
114 | 
115 | ## Published Data
116 | 
117 | For a large-scale example of what this library is capable of, please see the data
118 | produced by this pipeline after running our largest cell dataset through it. The data
119 | from the Allen Institute for Cell Science created from this pipeline can be found
120 | [here](https://open.quiltdata.com/b/allencell/tree/aics/actk/).
121 | 
122 | This package contains the source microscopy images, segmentation files, pre-processed
123 | single cell images and features, and diagnostic sheets.
124 | 
125 | Our source images are of endogenously-tagged hiPSC, grown for 4 days on Matrigel-coated
126 | 96-well, glass bottom imaging plates. Each field of view (FOV) includes 4 channels (BF,
127 | EGFP, DNA, Cell membrane) collected either interwoven with one camera (workflow
128 | Pipeline 4.0 - 4.2) or simultaneously with two cameras (Workflow Pipeline 4.4). You can
129 | use the file metadata of each image to target the specific channel you are interested
130 | in. FOVs were either selected randomly (mode A), enriched for mitotic events (mode B)
131 | or sampling 3 different areas of a colony (edge, ridge, center) using a photo
132 | protective cocktail (mode C). The images cataloged in this dataset come in several
133 | flavors:
134 | 
135 | * Field of view (FOV) images with channels* :
136 |   * Brightfield
137 |   * EGFP
138 |   * DNA
139 |   * Cell Membrane
140 | * Segmentation files with channels:
141 |   * Nucleus Segmentation
142 |   * Nucleus Contour
143 |   * Membrane Segmentation
144 |   * Membrane Contour
145 | 
146 | _* Some FOV images contain seven channels rather than four. The extra three channels
147 | are "dummy" channels added during acquisition that can be ignored._
148 | 
149 | The full details of the Allen Institute cell workflow are available on our website
150 | [here](https://www.allencell.org/methods-for-cells-in-the-lab.html).<br>
151 | The full details of the Allen Institute microscopy workflow are available on our
152 | website [here](https://www.allencell.org/methods-for-microscopy.html).
153 | 
154 | The following is provided for each cell:
155 | * Cell Id
156 | * Cell Index (from within the FOV's segmentation)
157 | * Metadata (Cell line, Labeled protein name, segmented region index, gene, etc.)
158 | * 3D cell and nuclear segmentation, and, DNA, membrane, and structure channels
159 | * 2D max projects for dimension pairs (XY, ZX, and ZY) of the above 3D images
160 | * A whole bunch of features for each cell
161 | 
162 | For the 3D single cell images the channel ordering is:
163 | * Segmented DNA
164 | * Segmented Membrane
165 | * DNA (Hoechst)
166 | * Membrane (CellMask)
167 | * Labeled Structure (GFP)
168 | * Transmitted Light
169 | 
170 | To interact with this dataset please see the
171 | [Quilt Documentation](https://docs.quiltdata.com/).
172 | 
173 | ## Development
174 | See
175 | [CONTRIBUTING.md](https://github.com/AllenCellModeling/actk/blob/master/CONTRIBUTING.md)
176 | for information related to developing the code.
177 | 
178 | For more details on how this pipeline is constructed please see
179 | [cookiecutter-stepworkflow](https://github.com/AllenCellModeling/cookiecutter-stepworkflow)
180 | and [datastep](https://github.com/AllenCellModeling/datastep).
181 | 
182 | To add new steps to this pipeline, run `make_new_step` and follow the instructions in
183 | [CONTRIBUTING.md](https://github.com/AllenCellModeling/actk/blob/master/CONTRIBUTING.md)
184 | 
185 | ### Developer Installation
186 | The following two commands will install the package with dev dependencies in editable
187 | mode and download all resources required for testing.
188 | 
189 | ```bash
190 | pip install -e .[dev]
191 | python scripts/download_test_data.py
192 | ```
193 | 
194 | ### AICS Developer Instructions
195 | If you want to run this pipeline with the Pipeline Integrated Cell dataset
196 | (`pipeline 4.*`) run the following commands:
197 | 
198 | ```bash
199 | pip install -e .[all]
200 | python scripts/download_aics_dataset.py
201 | ```
202 | 
203 | Options for this script are available and can be viewed with:
204 | `python scripts/download_aics_dataset.py --help`
205 | 
206 | ## Acknowledgments
207 | 
208 | A previous iteration of this pipeline was created and managed by
209 | [Gregory Johnson](https://github.com/gregjohnso) for work with
210 | [PyTorch Integrated Cell](https://github.com/AllenCellModeling/pytorch_integrated_cell).
211 | 
212 | This version of this pipeline is more generalized and while still used for the
213 | Integrated Cell model, can be used to pre-process a variety of microscopy image
214 | datasets.
215 | 
216 | The previous version of this pipeline produced the
217 | [pipeline_integrated_single_cell dataset](https://open.quiltdata.com/b/allencell/tree/aics/pipeline_integrated_single_cell/).
218 | 
219 | ***Free software: Allen Institute Software License***
220 | 


--------------------------------------------------------------------------------
/actk/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | """Top-level package for actk."""
 4 | 
 5 | __author__ = "Jackson Maxfield Brown"
 6 | __email__ = "jacksonb@alleninstitute.org"
 7 | # Do not edit this string manually, always use bumpversion
 8 | # Details in CONTRIBUTING.md
 9 | __version__ = "0.2.2"
10 | 
11 | 
12 | def get_module_version():
13 |     return __version__
14 | 


--------------------------------------------------------------------------------
/actk/bin/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | 
3 | """Bin scripts package for actk."""
4 | 


--------------------------------------------------------------------------------
/actk/bin/all.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | """
  5 | This script will run all tasks in a prefect Flow.
  6 | 
  7 | When you add steps to you step workflow be sure to add them to the step list
  8 | and configure their IO in the `run` function.
  9 | """
 10 | 
 11 | import logging
 12 | from datetime import datetime
 13 | from pathlib import Path
 14 | from typing import Optional
 15 | 
 16 | from dask_jobqueue import SLURMCluster
 17 | from distributed import LocalCluster
 18 | from prefect import Flow
 19 | from prefect.engine.executors import DaskExecutor, LocalExecutor
 20 | 
 21 | from actk import steps
 22 | 
 23 | ###############################################################################
 24 | 
 25 | log = logging.getLogger(__name__)
 26 | 
 27 | ###############################################################################
 28 | 
 29 | 
 30 | class All:
 31 |     def __init__(self):
 32 |         """
 33 |         Set all of your available steps here.
 34 |         This is only used for data logging operations, not computation purposes.
 35 |         """
 36 |         self.step_list = [
 37 |             steps.StandardizeFOVArray(),
 38 |             steps.SingleCellFeatures(),
 39 |             steps.SingleCellImages(),
 40 |             steps.DiagnosticSheets(),
 41 |         ]
 42 | 
 43 |     def run(
 44 |         self,
 45 |         dataset: str,
 46 |         include_raw: bool = False,
 47 |         batch_size: Optional[int] = None,
 48 |         distributed: bool = False,
 49 |         n_workers: int = 10,
 50 |         worker_cpu: int = 8,
 51 |         worker_mem: str = "120GB",
 52 |         overwrite: bool = False,
 53 |         debug: bool = False,
 54 |         **kwargs,
 55 |     ):
 56 |         """
 57 |         Run a flow with your steps.
 58 | 
 59 |         Parameters
 60 |         ----------
 61 |         dataset: str
 62 |             The dataset to use for the pipeline.
 63 | 
 64 |         include_raw: bool
 65 |             A boolean option to determine if the raw data should be included in the
 66 |             Quilt package.
 67 |             Default: False (Do not include the raw data)
 68 | 
 69 |         batch_size: Optional[int]
 70 |             An optional batch size to provide to each step for processing their items.
 71 |             Default: None (auto batch size depending on CPU / threads available)
 72 | 
 73 |         distributed: bool
 74 |             A boolean option to determine if the jobs should be distributed to a SLURM
 75 |             cluster when possible.
 76 |             Default: False (Do not distribute)
 77 | 
 78 |         n_workers: int
 79 |             Number of workers to request (when distributed is enabled).
 80 |             Default: 10
 81 | 
 82 |         worker_cpu: int
 83 |             Number of cores to provide per worker (when distributed is enabled).
 84 |             Default: 8
 85 | 
 86 |         worker_mem: str
 87 |             Amount of memory to provide per worker (when distributed is enabled).
 88 |             Default: 120GB
 89 | 
 90 |         overwrite: bool
 91 |             If this pipeline has already partially or completely run, should it
 92 |             overwrite the previous files or not.
 93 |             Default: False (Do not overwrite or regenerate files)
 94 | 
 95 |         debug: bool
 96 |             A debug flag for the developer to use to manipulate how much data runs,
 97 |             how it is processed, etc. Additionally, if debug is True, any mapped
 98 |             operation will run on threads instead of processes.
 99 |             Default: False (Do not debug)
100 |         """
101 |         # Initalize steps
102 |         raw = steps.Raw()
103 |         standardize_fov_array = steps.StandardizeFOVArray()
104 |         single_cell_features = steps.SingleCellFeatures()
105 |         single_cell_images = steps.SingleCellImages()
106 |         diagnostic_sheets = steps.DiagnosticSheets()
107 | 
108 |         # Cluster / distributed defaults
109 |         distributed_executor_address = None
110 | 
111 |         # Choose executor
112 |         if debug:
113 |             exe = LocalExecutor()
114 |             log.info("Debug flagged. Will use threads instead of Dask.")
115 |         else:
116 |             if distributed:
117 |                 # Create or get log dir
118 |                 # Do not include ms
119 |                 log_dir_name = datetime.now().isoformat().split(".")[0]
120 |                 log_dir = Path(f".dask_logs/{log_dir_name}").expanduser()
121 |                 # Log dir settings
122 |                 log_dir.mkdir(parents=True, exist_ok=True)
123 | 
124 |                 # Create cluster
125 |                 log.info("Creating SLURMCluster")
126 |                 cluster = SLURMCluster(
127 |                     cores=worker_cpu,
128 |                     memory=worker_mem,
129 |                     queue="aics_cpu_general",
130 |                     walltime="9-23:00:00",
131 |                     local_directory=str(log_dir),
132 |                     log_directory=str(log_dir),
133 |                 )
134 | 
135 |                 # Spawn workers
136 |                 cluster.scale(jobs=n_workers)
137 |                 log.info("Created SLURMCluster")
138 | 
139 |                 # Use the port from the created connector to set executor address
140 |                 distributed_executor_address = cluster.scheduler_address
141 | 
142 |                 # Only auto batch size if it is not None
143 |                 if batch_size is None:
144 |                     # Batch size is n_workers * worker_cpu * 0.75
145 |                     # We could just do n_workers * worker_cpu but 3/4 of that is safer
146 |                     batch_size = int(n_workers * worker_cpu * 0.75)
147 | 
148 |                 # Log dashboard URI
149 |                 log.info(f"Dask dashboard available at: {cluster.dashboard_link}")
150 |             else:
151 |                 # Create local cluster
152 |                 log.info("Creating LocalCluster")
153 |                 cluster = LocalCluster()
154 |                 log.info("Created LocalCluster")
155 | 
156 |                 # Set distributed_executor_address
157 |                 distributed_executor_address = cluster.scheduler_address
158 | 
159 |                 # Log dashboard URI
160 |                 log.info(f"Dask dashboard available at: {cluster.dashboard_link}")
161 | 
162 |             # Use dask cluster
163 |             exe = DaskExecutor(distributed_executor_address)
164 | 
165 |         # Configure your flow
166 |         with Flow("actk") as flow:
167 |             if include_raw:
168 |                 dataset = raw(dataset, **kwargs)
169 | 
170 |             standardized_fov_paths_dataset = standardize_fov_array(
171 |                 dataset=dataset,
172 |                 distributed_executor_address=distributed_executor_address,
173 |                 batch_size=batch_size,
174 |                 overwrite=overwrite,
175 |                 debug=debug,
176 |                 # Allows us to pass `--desired_pixel_sizes [{float},{float},{float}]`
177 |                 **kwargs,
178 |             )
179 | 
180 |             single_cell_features_dataset = single_cell_features(
181 |                 dataset=standardized_fov_paths_dataset,
182 |                 distributed_executor_address=distributed_executor_address,
183 |                 batch_size=batch_size,
184 |                 overwrite=overwrite,
185 |                 debug=debug,
186 |                 # Allows us to pass `--cell_ceiling_adjustment {int}`
187 |                 **kwargs,
188 |             )
189 | 
190 |             single_cell_images_dataset = single_cell_images(
191 |                 dataset=single_cell_features_dataset,
192 |                 distributed_executor_address=distributed_executor_address,
193 |                 batch_size=batch_size,
194 |                 overwrite=overwrite,
195 |                 debug=debug,
196 |                 # Allows us to pass `--cell_ceiling_adjustment {int}`
197 |                 **kwargs,
198 |             )
199 | 
200 |             diagnostic_sheets(
201 |                 dataset=single_cell_images_dataset,
202 |                 distributed_executor_address=distributed_executor_address,
203 |                 overwrite=overwrite,
204 |                 # Allows us to pass `--metadata {str}`,
205 |                 # `--feature {str}'`
206 |                 **kwargs,
207 |             )
208 | 
209 |         # Run flow and get ending state, log duration
210 |         start = datetime.now()
211 |         state = flow.run(executor=exe)
212 |         duration = datetime.now() - start
213 |         log.info(
214 |             f"Total duration of pipeline: "
215 |             f"{duration.seconds // 60 // 60}:"
216 |             f"{duration.seconds // 60}:"
217 |             f"{duration.seconds % 60}"
218 |         )
219 | 
220 |         # Get and display any outputs you want to see on your local terminal
221 |         log.info(single_cell_images_dataset.get_result(state, flow))
222 | 
223 |     def pull(self):
224 |         """
225 |         Pull all steps.
226 |         """
227 |         for step in self.step_list:
228 |             step.pull()
229 | 
230 |     def checkout(self):
231 |         """
232 |         Checkout all steps.
233 |         """
234 |         for step in self.step_list:
235 |             step.checkout()
236 | 
237 |     def push(self):
238 |         """
239 |         Push all steps.
240 |         """
241 |         for step in self.step_list:
242 |             step.push()
243 | 
244 |     def clean(self):
245 |         """
246 |         Clean all steps.
247 |         """
248 |         for step in self.step_list:
249 |             step.clean()
250 | 


--------------------------------------------------------------------------------
/actk/bin/cli.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | """
 5 | This script will convert all the steps into CLI callables.
 6 | 
 7 | You should not edit this script.
 8 | """
 9 | 
10 | import inspect
11 | import logging
12 | from unittest import mock
13 | 
14 | import fire
15 | 
16 | from actk import steps
17 | from actk.bin.all import All
18 | 
19 | ###############################################################################
20 | 
21 | log = logging.getLogger()
22 | logging.basicConfig(
23 |     level=logging.INFO, format="[%(levelname)4s:%(lineno)4s %(asctime)s] %(message)s"
24 | )
25 | 
26 | ###############################################################################
27 | 
28 | 
29 | def cli():
30 |     step_map = {
31 |         name.lower(): step
32 |         for name, step in inspect.getmembers(steps)
33 |         if inspect.isclass(step)
34 |     }
35 | 
36 |     # Interrupt fire print return
37 |     with mock.patch("fire.core._PrintResult"):
38 |         fire.Fire({**step_map, "all": All})
39 | 
40 | 
41 | if __name__ == "__main__":
42 |     cli()
43 | 


--------------------------------------------------------------------------------
/actk/constants.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | 
 5 | class Channels:
 6 |     NucleusSegmentation = "nucleus_segmentation"
 7 |     MembraneSegmentation = "membrane_segmentation"
 8 |     DNA = "dna"
 9 |     Membrane = "membrane"
10 |     Structure = "structure"
11 |     Brightfield = "brightfield"
12 |     DefaultOrderList = [
13 |         NucleusSegmentation,
14 |         MembraneSegmentation,
15 |         DNA,
16 |         Membrane,
17 |         Structure,
18 |         Brightfield,
19 |     ]
20 | 
21 | 
22 | class DatasetFields:
23 |     CellId = "CellId"
24 |     CellIndex = "CellIndex"
25 |     FOVId = "FOVId"
26 |     SourceReadPath = "SourceReadPath"
27 |     NucleusSegmentationReadPath = "NucleusSegmentationReadPath"
28 |     MembraneSegmentationReadPath = "MembraneSegmentationReadPath"
29 |     ChannelIndexDNA = "ChannelIndexDNA"
30 |     ChannelIndexMembrane = "ChannelIndexMembrane"
31 |     ChannelIndexStructure = "ChannelIndexStructure"
32 |     ChannelIndexBrightfield = "ChannelIndexBrightfield"
33 |     ChannelIndexNucleusSegmentation = "ChannelIndexNucleusSegmentation"
34 |     ChannelIndexMembraneSegmentation = "ChannelIndexMembraneSegmentation"
35 |     StandardizedFOVPath = "StandardizedFOVPath"
36 |     CellFeaturesPath = "CellFeaturesPath"
37 |     CellImage3DPath = "CellImage3DPath"
38 |     CellImage2DAllProjectionsPath = "CellImage2DAllProjectionsPath"
39 |     CellImage2DYXProjectionPath = "CellImage2DYXProjectionPath"
40 |     DiagnosticSheetPath = "DiagnosticSheetPath"
41 |     AllExpectedInputs = [
42 |         CellId,
43 |         CellIndex,
44 |         FOVId,
45 |         SourceReadPath,
46 |         NucleusSegmentationReadPath,
47 |         MembraneSegmentationReadPath,
48 |         ChannelIndexDNA,
49 |         ChannelIndexMembrane,
50 |         ChannelIndexStructure,
51 |         ChannelIndexBrightfield,
52 |         ChannelIndexNucleusSegmentation,
53 |         ChannelIndexMembraneSegmentation,
54 |     ]
55 | 


--------------------------------------------------------------------------------
/actk/exceptions.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | from typing import List, Union
 5 | 
 6 | import dask.dataframe as dd
 7 | import pandas as pd
 8 | 
 9 | ###############################################################################
10 | 
11 | 
12 | class MissingDataError(Exception):
13 |     def __init__(
14 |         self, dataset: Union[pd.DataFrame, dd.DataFrame], missing_fields: List[str]
15 |     ):
16 |         # Run base exception init
17 |         super().__init__()
18 | 
19 |         # Store params for display
20 |         self.dataset = dataset
21 |         self.missing_fields = missing_fields
22 | 
23 |     def __str__(self):
24 |         return (
25 |             f"Dataset provided does not have the required columns for this operation. "
26 |             f"Missing fields: {self.missing_fields} "
27 |         )
28 | 


--------------------------------------------------------------------------------
/actk/steps/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from .diagnostic_sheets import DiagnosticSheets
 4 | from .raw import Raw
 5 | from .single_cell_features import SingleCellFeatures
 6 | from .single_cell_images import SingleCellImages
 7 | from .standardize_fov_array import StandardizeFOVArray
 8 | 
 9 | __all__ = [
10 |     "Raw",
11 |     "SingleCellFeatures",
12 |     "StandardizeFOVArray",
13 |     "SingleCellImages",
14 |     "DiagnosticSheets",
15 | ]
16 | 


--------------------------------------------------------------------------------
/actk/steps/diagnostic_sheets/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | 
3 | from .diagnostic_sheets import DiagnosticSheets  # noqa: F401
4 | 
5 | __all__ = ["DiagnosticSheets"]
6 | 


--------------------------------------------------------------------------------
/actk/steps/diagnostic_sheets/diagnostic_sheets.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | import json
  5 | import logging
  6 | from pathlib import Path
  7 | from typing import List, NamedTuple, Optional, Union
  8 | 
  9 | import aicsimageio
 10 | import dask.dataframe as dd
 11 | import matplotlib.image as mpimg
 12 | import matplotlib.pyplot as plt
 13 | import numpy as np
 14 | import pandas as pd
 15 | from aics_dask_utils import DistributedHandler
 16 | from datastep import Step, log_run_params
 17 | 
 18 | from ...constants import DatasetFields
 19 | from ...utils import dataset_utils
 20 | from ..single_cell_images import SingleCellImages
 21 | 
 22 | plt.style.use("dark_background")
 23 | 
 24 | ###############################################################################
 25 | 
 26 | log = logging.getLogger(__name__)
 27 | 
 28 | ###############################################################################
 29 | 
 30 | REQUIRED_DATASET_FIELDS = [
 31 |     DatasetFields.CellId,
 32 |     DatasetFields.CellImage2DAllProjectionsPath,
 33 | ]
 34 | 
 35 | 
 36 | class DiagnosticSheetResult(NamedTuple):
 37 |     cell_id: Union[int, str]
 38 |     save_path: Optional[Path] = None
 39 | 
 40 | 
 41 | class DiagnosticSheetError(NamedTuple):
 42 |     cell_id: Union[int, str]
 43 |     error: str
 44 | 
 45 | 
 46 | ###############################################################################
 47 | 
 48 | 
 49 | class DiagnosticSheets(Step):
 50 |     def __init__(
 51 |         self,
 52 |         direct_upstream_tasks: List["Step"] = [SingleCellImages],
 53 |         filepath_columns=[DatasetFields.DiagnosticSheetPath],
 54 |         **kwargs,
 55 |     ):
 56 |         super().__init__(
 57 |             direct_upstream_tasks=direct_upstream_tasks,
 58 |             filepath_columns=filepath_columns,
 59 |             **kwargs,
 60 |         )
 61 | 
 62 |     @staticmethod
 63 |     def _save_plot(
 64 |         dataset: pd.DataFrame,
 65 |         metadata: str,
 66 |         metadata_value: str,
 67 |         number_of_subplots: int,
 68 |         feature: Optional[str] = None,
 69 |         fig_width: Optional[int] = None,
 70 |         fig_height: Optional[int] = None,
 71 |     ):
 72 | 
 73 |         log.info(f"Beginning diagnostic sheet generation for {metadata_value}")
 74 | 
 75 |         # Choose columns and rows
 76 |         columns = int(np.sqrt(number_of_subplots) + 0.5)
 77 |         rows = columns + 1
 78 | 
 79 |         # Set figure size
 80 |         if not fig_width:
 81 |             fig_width = columns * 7
 82 |         if not fig_height:
 83 |             fig_height = rows * 5
 84 | 
 85 |         # Set subplots
 86 |         fig, ax_array = plt.subplots(
 87 |             rows,
 88 |             columns,
 89 |             squeeze=False,
 90 |             figsize=(fig_height, fig_width),
 91 |         )
 92 | 
 93 |         for row_index, row in dataset.iterrows():
 94 |             this_axes = ax_array.flatten()[row_index]
 95 | 
 96 |             # Load feature to plot if feature
 97 |             if feature:
 98 |                 with open(row[DatasetFields.CellFeaturesPath]) as f:
 99 |                     cell_features = json.load(f)
100 |                 title = "CellId: {0}, {1} {2}: {3}".format(
101 |                     row[DatasetFields.CellId],
102 |                     "\n",
103 |                     feature,
104 |                     cell_features[feature],
105 |                 )
106 |                 this_axes.set_title(title)
107 |             else:
108 |                 this_axes.set_title(f"CellID: {row[DatasetFields.CellId]}")
109 | 
110 |             # Read AllProjections Image
111 |             img = mpimg.imread(row[DatasetFields.CellImage2DAllProjectionsPath])
112 |             this_axes.imshow(img)
113 |             this_axes.set_aspect(1)
114 | 
115 |         # Need to do this outside the loop because sometimes number
116 |         # of rows < number of axes subplots
117 |         [ax.axis("off") for ax in ax_array.flatten()]
118 | 
119 |         # Save figure
120 |         ax_array.flatten()[0].get_figure().savefig(
121 |             dataset[DatasetFields.DiagnosticSheetPath + str(metadata)][0]
122 |         )
123 | 
124 |         # Close figure, otherwise clogs memory
125 |         plt.close(fig)
126 |         log.info(f"Completed diagnostic sheet generation for" f"{metadata_value}")
127 | 
128 |     @staticmethod
129 |     def _collect_group(
130 |         row_index: int,
131 |         row: pd.Series,
132 |         diagnostic_sheet_dir: Path,
133 |         overwrite: bool,
134 |         metadata: str,
135 |         max_cells: int,
136 |     ) -> Union[DiagnosticSheetResult, DiagnosticSheetError]:
137 |         # Don't use dask for image reading
138 |         aicsimageio.use_dask(False)
139 | 
140 |         try:
141 |             # Get the ultimate end save paths for grouped plot
142 |             if row[str(metadata)] or row[str(metadata)] == 0:
143 |                 assert DatasetFields.CellImage2DAllProjectionsPath in row.index
144 |                 save_path_index = int(
145 |                     np.ceil((row["SubplotNumber" + str(metadata)] + 1) / max_cells)
146 |                 )
147 |                 # np ceil for 0 = 0
148 |                 if save_path_index == 0:
149 |                     save_path_index = 1
150 | 
151 |                 # Clean metadata name of spaces
152 |                 cleaned_metadata_name = str(row[str(metadata)]).replace(" ", "-")
153 |                 save_path = (
154 |                     diagnostic_sheet_dir / f"{metadata}"
155 |                     f"_{cleaned_metadata_name}"
156 |                     f"_{save_path_index}.png"
157 |                 )
158 | 
159 |                 log.info(
160 |                     f"Collecting diagnostic sheet path for cell ID: {row.CellId}, "
161 |                     f"{metadata}: {row[str(metadata)]}"
162 |                 )
163 |             else:
164 |                 # else no path to save
165 |                 save_path = None
166 | 
167 |             # Check skip
168 |             if not overwrite and save_path.is_file():
169 |                 log.info(
170 |                     f"Skipping diagnostic sheet path for cell ID: {row.CellId}, "
171 |                     f"{metadata}: {row[str(metadata)]}"
172 |                 )
173 |                 return DiagnosticSheetResult(row.CellId, None)
174 | 
175 |             # Return ready to save image
176 |             return DiagnosticSheetResult(row.CellId, str(save_path))
177 |         # Catch and return error
178 |         except Exception as e:
179 |             log.info(
180 |                 f"Failed to retrieve the CellImage2DAllProjectionsPath"
181 |                 f"for cell ID: {row.CellId},"
182 |                 f"{metadata} {row[str(metadata)]}"
183 |                 f"Error: {e}"
184 |             )
185 |             return DiagnosticSheetError(row.CellId, str(e))
186 | 
187 |     @log_run_params
188 |     def run(
189 |         self,
190 |         dataset: Union[str, Path, pd.DataFrame, dd.DataFrame],
191 |         max_cells: int = 200,
192 |         metadata: Optional[Union[list, str]] = DatasetFields.FOVId,
193 |         feature: Optional[str] = None,
194 |         fig_width: Optional[int] = None,
195 |         fig_height: Optional[int] = None,
196 |         distributed_executor_address: Optional[str] = None,
197 |         batch_size: Optional[int] = None,
198 |         overwrite: bool = False,
199 |         **kwargs,
200 |     ):
201 |         """
202 |         Provided a dataset of single cell all projection images, generate a diagnostic
203 |         sheet grouped by desired metadata and feature
204 | 
205 |         Parameters
206 |         ----------
207 |         dataset: Union[str, Path, pd.DataFrame, dd.DataFrame]
208 |             The primary cell dataset to use for generating
209 |             diagnistic sheet for a group of cells.
210 | 
211 |             **Required dataset columns:** *["CellId", "CellImage2DAllProjectionsPath"]*
212 | 
213 |         max_cells: int
214 |             The maximum number of cells to display on a single diagnostic sheet.
215 |             Deafult: 200
216 | 
217 |         metadata: Optional[Union[list, str]]
218 |             The metadata to group cells and generate a diagnostic sheet.
219 |             For example, "FOVId" or "["FOVId", "ProteinDisplayName"]"
220 |             Default: "FOVId"
221 | 
222 |         feature: Optional[str]
223 |             The name of the single cell feature to display. For example, "imsize_orig".
224 | 
225 |         fig_width: Optional[int]
226 |             Width of the diagnostic sheet figure.
227 | 
228 |         fig_height: Optional[int]
229 |             Height of the diagnostic sheet figure.
230 | 
231 |         distributed_executor_address: Optional[str]
232 |             An optional executor address to pass to some computation engine.
233 |             Default: None
234 | 
235 |         batch_size: Optional[int]
236 |             An optional batch size to process n features at a time.
237 |             Default: None (Process all at once)
238 | 
239 |         overwrite: bool
240 |             If this step has already partially or completely run, should it overwrite
241 |             the previous files or not.
242 |             Default: False (Do not overwrite or regenerate files)
243 | 
244 |         Returns
245 |         -------
246 |         manifest_save_path: Path
247 |             Path to the produced manifest with the DiagnosticSheetPath column added.
248 |         """
249 |         if isinstance(dataset, (str, Path)):
250 |             dataset = Path(dataset).expanduser().resolve(strict=True)
251 | 
252 |             # Read dataset
253 |             dataset = pd.read_csv(dataset)
254 | 
255 |         # Check dataset and manifest have required fields
256 |         dataset_utils.check_required_fields(
257 |             dataset=dataset,
258 |             required_fields=REQUIRED_DATASET_FIELDS,
259 |         )
260 | 
261 |         # Create save directories
262 |         diagnostic_sheet_dir = self.step_local_staging_dir / "diagnostic_sheets"
263 |         diagnostic_sheet_dir.mkdir(exist_ok=True)
264 | 
265 |         # Create empty manifest
266 |         manifest = {
267 |             DatasetFields.DiagnosticSheetPath: [],
268 |         }
269 | 
270 |         # Check for metadata
271 |         if metadata:
272 |             # Make metadata a list
273 |             metadata = metadata if isinstance(metadata, list) else [metadata]
274 | 
275 |             # Make an empty list of grouped_datasets to collect and
276 |             # then distribute via Dask for plotting
277 |             all_grouped_datasets = []
278 |             all_metadata = []
279 |             all_metadata_values = []
280 |             all_subplot_numbers = []
281 | 
282 |             # Process each row
283 |             for j, this_metadata in enumerate(metadata):
284 | 
285 |                 # Add some helper columns for subsequent analysis
286 |                 helper_dataset = pd.DataFrame()
287 | 
288 |                 for unique_metadata_value in dataset[this_metadata].unique():
289 |                     dataset_subgroup = dataset.loc[
290 |                         dataset[this_metadata] == unique_metadata_value
291 |                     ]
292 |                     # "SubplotNumber" + str(this_metadata) + "/MaxCells" is a new column
293 |                     # which will help iterate through subplots to add to a figure
294 |                     dataset_subgroup.insert(
295 |                         2,
296 |                         "SubplotNumber" + str(this_metadata) + "/MaxCells",
297 |                         dataset_subgroup.groupby(this_metadata)["CellId"].transform(
298 |                             lambda x: ((~x.duplicated()).cumsum() - 1) % max_cells
299 |                         ),
300 |                         True,
301 |                     )
302 | 
303 |                     # "SubplotNumber" + str(this_metadata) is a new column
304 |                     # which will help in the _collect group method to identify
305 |                     # diagnostic sheet save paths per CellId
306 |                     dataset_subgroup.insert(
307 |                         2,
308 |                         "SubplotNumber" + str(this_metadata),
309 |                         dataset_subgroup.groupby(this_metadata)["CellId"].transform(
310 |                             lambda x: ((~x.duplicated()).cumsum() - 1)
311 |                         ),
312 |                         True,
313 |                     )
314 | 
315 |                     helper_dataset = helper_dataset.append(dataset_subgroup)
316 | 
317 |                 dataset = helper_dataset
318 |                 # Done creating helper columns
319 | 
320 |                 # Create empty diagnostic sheet result dataset and errors
321 |                 diagnostic_sheet_result_dataset = []
322 |                 errors = []
323 | 
324 |                 with DistributedHandler(distributed_executor_address) as handler:
325 |                     # First, lets collect all the diagnostic sheet save paths
326 |                     # per CellId. These are collected based on this_metadata
327 |                     # and max_cells
328 |                     diagnostic_sheet_result = handler.batched_map(
329 |                         self._collect_group,
330 |                         # Convert dataframe iterrows into two lists of items to iterate
331 |                         # One list will be row index
332 |                         # One list will be the pandas series of every row
333 |                         *zip(*list(dataset.iterrows())),
334 |                         [diagnostic_sheet_dir for i in range(len(dataset))],
335 |                         [overwrite for i in range(len(dataset))],
336 |                         [this_metadata for i in range(len(dataset))],
337 |                         [max_cells for i in range(len(dataset))],
338 |                     )
339 |                     # Generate diagnostic sheet dataset rows
340 |                     for r in diagnostic_sheet_result:
341 |                         if isinstance(r, DiagnosticSheetResult):
342 |                             diagnostic_sheet_result_dataset.append(
343 |                                 {
344 |                                     DatasetFields.CellId: r.cell_id,
345 |                                     DatasetFields.DiagnosticSheetPath
346 |                                     + str(this_metadata): r.save_path,
347 |                                 }
348 |                             )
349 |                         else:
350 |                             errors.append(
351 |                                 {DatasetFields.CellId: r.cell_id, "Error": r.error}
352 |                             )
353 | 
354 |                     # Convert diagnostic sheet paths rows to dataframe
355 |                     diagnostic_sheet_result_dataset = pd.DataFrame(
356 |                         diagnostic_sheet_result_dataset
357 |                     )
358 | 
359 |                     # Drop the various diagnostic sheet columns if they already exist
360 |                     # Check at j = 0 because the path will exist at j > 1 if
361 |                     # multiple metadata
362 |                     drop_columns = []
363 |                     if (
364 |                         DatasetFields.DiagnosticSheetPath + str(this_metadata)
365 |                         in dataset.columns
366 |                     ):
367 |                         drop_columns.append(
368 |                             DatasetFields.DiagnosticSheetPath + str(this_metadata)
369 |                         )
370 | 
371 |                     dataset = dataset.drop(columns=drop_columns)
372 | 
373 |                     # Update manifest with these paths if there is data
374 |                     if len(diagnostic_sheet_result_dataset) > 0:
375 | 
376 |                         # Join original dataset to the fov paths
377 |                         dataset = dataset.merge(
378 |                             diagnostic_sheet_result_dataset,
379 |                             on=DatasetFields.CellId,
380 |                         )
381 | 
382 |                     # Reset index in dataset
383 |                     if j == 0:
384 |                         dataset.dropna().reset_index(inplace=True)
385 | 
386 |                     # Update manifest with these saved paths
387 |                     this_metadata_paths = dataset[
388 |                         DatasetFields.DiagnosticSheetPath + str(this_metadata)
389 |                     ].unique()
390 | 
391 |                     for this_path in this_metadata_paths:
392 |                         if this_path not in manifest[DatasetFields.DiagnosticSheetPath]:
393 |                             manifest[DatasetFields.DiagnosticSheetPath].append(
394 |                                 this_path
395 |                             )
396 | 
397 |                     # Save errored cells to JSON
398 |                     with open(
399 |                         self.step_local_staging_dir / "errors.json", "w"
400 |                     ) as write_out:
401 |                         json.dump(errors, write_out)
402 | 
403 |                     # Group the dataset by this metadata and the saved
404 |                     # diagnostic sheet paths (there can be many different save paths)
405 |                     # per metadata value (if max_cells < number of items of
406 |                     # this_metadata)
407 |                     grouped_dataset = dataset.groupby(
408 |                         [
409 |                             str(this_metadata),
410 |                             DatasetFields.DiagnosticSheetPath + str(this_metadata),
411 |                         ]
412 |                     )["SubplotNumber" + str(this_metadata) + "/MaxCells"]
413 | 
414 |                     # Get maximum values of the subplot numbers in this
415 |                     # grouped dataset. This will tell us the shape of the figure
416 |                     # to make
417 |                     grouped_max = grouped_dataset.max()
418 | 
419 |                     # Loop through metadata value and max number of subplots
420 |                     for metadata_value, number_of_subplots in grouped_max.items():
421 | 
422 |                         # Total num of subplots = subplots + 1
423 |                         number_of_subplots = number_of_subplots + 1
424 | 
425 |                         # Get this metadata group from the original dataset
426 |                         this_metadata_value_dataset = grouped_dataset.get_group(
427 |                             metadata_value, dataset
428 |                         )
429 | 
430 |                         # reset index
431 |                         this_metadata_value_dataset.reset_index(inplace=True)
432 | 
433 |                         # Append to related lists for Dask distributed plotting
434 |                         # of all groups
435 |                         all_grouped_datasets.append(this_metadata_value_dataset)
436 |                         all_metadata.append(this_metadata)
437 |                         all_metadata_values.append(metadata_value)
438 |                         all_subplot_numbers.append(number_of_subplots)
439 | 
440 |             # Plot each diagnostic sheet
441 |             with DistributedHandler(distributed_executor_address) as handler:
442 |                 # Start processing. This will add subplots to the current fig
443 |                 # axes via dask
444 |                 handler.batched_map(
445 |                     self._save_plot,
446 |                     # Convert dataframe iterrows into two lists of items to
447 |                     # iterate. One list will be row index
448 |                     # One list will be the pandas series of every row
449 |                     [dataset for dataset in all_grouped_datasets],
450 |                     [metadata for metadata in all_metadata],
451 |                     [metadata_value for metadata_value in all_metadata_values],
452 |                     [number_of_subplots for number_of_subplots in all_subplot_numbers],
453 |                     [feature for i in range(len(all_grouped_datasets))],
454 |                     [fig_width for i in range(len(all_grouped_datasets))],
455 |                     [fig_height for i in range(len(all_grouped_datasets))],
456 |                 )
457 | 
458 |             self.manifest = pd.DataFrame(manifest)
459 | 
460 |         else:
461 |             # If no metadata, just return input manifest
462 |             self.manifest = dataset
463 | 
464 |         # Save manifest to CSV
465 |         manifest_save_path = self.step_local_staging_dir / "manifest.csv"
466 |         self.manifest.to_csv(manifest_save_path, index=False)
467 | 
468 |         return manifest_save_path
469 | 


--------------------------------------------------------------------------------
/actk/steps/raw/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | 
3 | from .raw import Raw  # noqa: F401
4 | 
5 | __all__ = ["Raw"]
6 | 


--------------------------------------------------------------------------------
/actk/steps/raw/raw.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | import logging
 5 | from pathlib import Path
 6 | from typing import Union
 7 | 
 8 | import dask.dataframe as dd
 9 | import pandas as pd
10 | from datastep import Step, log_run_params
11 | 
12 | from ...constants import DatasetFields
13 | from ...utils import dataset_utils
14 | 
15 | ###############################################################################
16 | 
17 | log = logging.getLogger(__name__)
18 | 
19 | ###############################################################################
20 | 
21 | # This is the merge of all other steps required fields.
22 | # Reasoning here is that the user will only want to upload the raw
23 | # if the user is doing a full pipeline run
24 | REQUIRED_DATASET_FIELDS = DatasetFields.AllExpectedInputs
25 | 
26 | ###############################################################################
27 | 
28 | 
29 | class Raw(Step):
30 |     def __init__(
31 |         self,
32 |         filepath_columns=[
33 |             DatasetFields.SourceReadPath,
34 |             DatasetFields.NucleusSegmentationReadPath,
35 |             DatasetFields.MembraneSegmentationReadPath,
36 |         ],
37 |         metadata_columns=[DatasetFields.FOVId],
38 |         **kwargs,
39 |     ):
40 |         super().__init__(
41 |             filepath_columns=filepath_columns,
42 |             metadata_columns=metadata_columns,
43 |             **kwargs,
44 |         )
45 | 
46 |     @log_run_params
47 |     def run(self, dataset: Union[str, Path, pd.DataFrame, dd.DataFrame], **kwargs):
48 |         """
49 |         Simple passthrough to store the dataset in local_staging/raw.
50 |         This does not copy any the image files to local_staging/raw, only the manifest.
51 |         This is an optional step that will only run if you want to upload the raw data.
52 | 
53 |         Parameters
54 |         ----------
55 |         dataset: Union[str, Path, pd.DataFrame, dd.DataFrame]
56 |             The dataset to use for the rest of the pipeline run.
57 | 
58 |             **Required dataset columns:** *["CellId", "CellIndex", "FOVId",
59 |             "SourceReadPath", "NucleusSegmentationReadPath",
60 |             "MembraneSegmentationReadPath", "ChannelIndexDNA", "ChannelIndexMembrane",
61 |             "ChannelIndexStructure", "ChannelIndexBrightfield"]*
62 | 
63 |         Returns
64 |         -------
65 |         manifest_save_path: Path
66 |             The path to the manifest in local_staging with the raw data.
67 |         """
68 |         if isinstance(dataset, (str, Path)):
69 |             dataset = Path(dataset).expanduser().resolve(strict=True)
70 | 
71 |             # Read dataset
72 |             dataset = pd.read_csv(dataset)
73 | 
74 |         # Check dataset and manifest have required fields
75 |         dataset_utils.check_required_fields(
76 |             dataset=dataset,
77 |             required_fields=REQUIRED_DATASET_FIELDS,
78 |         )
79 | 
80 |         # Save manifest to CSV
81 |         self.manifest = dataset
82 |         manifest_save_path = self.step_local_staging_dir / "manifest.csv"
83 |         self.manifest.to_csv(manifest_save_path, index=False)
84 | 
85 |         return manifest_save_path
86 | 


--------------------------------------------------------------------------------
/actk/steps/single_cell_features/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | 
3 | from .single_cell_features import SingleCellFeatures  # noqa: F401
4 | 
5 | __all__ = ["SingleCellFeatures"]
6 | 


--------------------------------------------------------------------------------
/actk/steps/single_cell_features/single_cell_features.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | import json
  5 | import logging
  6 | from pathlib import Path
  7 | from typing import NamedTuple, Optional, Union
  8 | 
  9 | import aicsimageio
 10 | import dask.dataframe as dd
 11 | import pandas as pd
 12 | from aics_dask_utils import DistributedHandler
 13 | from aicsimageio import AICSImage
 14 | from datastep import Step, log_run_params
 15 | 
 16 | from ...constants import DatasetFields
 17 | from ...utils import dataset_utils, image_utils
 18 | from ..standardize_fov_array import StandardizeFOVArray
 19 | 
 20 | ###############################################################################
 21 | 
 22 | log = logging.getLogger(__name__)
 23 | 
 24 | ###############################################################################
 25 | 
 26 | REQUIRED_DATASET_FIELDS = [
 27 |     DatasetFields.CellId,
 28 |     DatasetFields.CellIndex,
 29 |     DatasetFields.FOVId,
 30 |     DatasetFields.StandardizedFOVPath,
 31 | ]
 32 | 
 33 | 
 34 | class SingleCellFeaturesResult(NamedTuple):
 35 |     cell_id: Union[int, str]
 36 |     path: Path
 37 | 
 38 | 
 39 | class SingleCellFeaturesError(NamedTuple):
 40 |     cell_id: int
 41 |     error: str
 42 | 
 43 | 
 44 | ###############################################################################
 45 | 
 46 | 
 47 | class SingleCellFeatures(Step):
 48 |     def __init__(
 49 |         self,
 50 |         direct_upstream_tasks=[StandardizeFOVArray],
 51 |         filepath_columns=[DatasetFields.CellFeaturesPath],
 52 |         **kwargs,
 53 |     ):
 54 |         super().__init__(
 55 |             direct_upstream_tasks=direct_upstream_tasks,
 56 |             filepath_columns=filepath_columns,
 57 |             **kwargs,
 58 |         )
 59 | 
 60 |     @staticmethod
 61 |     def _generate_single_cell_features(
 62 |         row_index: int,
 63 |         row: pd.Series,
 64 |         cell_ceiling_adjustment: int,
 65 |         save_dir: Path,
 66 |         overwrite: bool,
 67 |     ) -> Union[SingleCellFeaturesResult, SingleCellFeaturesError]:
 68 |         # Don't use dask for image reading
 69 |         aicsimageio.use_dask(False)
 70 | 
 71 |         # Get the ultimate end save path for this cell
 72 |         save_path = save_dir / f"{row.CellId}.json"
 73 | 
 74 |         # Check skip
 75 |         if not overwrite and save_path.is_file():
 76 |             log.info(f"Skipping cell feature generation for Cell Id: {row.CellId}")
 77 |             return SingleCellFeaturesResult(row.CellId, save_path)
 78 | 
 79 |         # Overwrite or didn't exist
 80 |         log.info(f"Beginning cell feature generation for CellId: {row.CellId}")
 81 | 
 82 |         # Wrap errors for debugging later
 83 |         try:
 84 |             # Read the standardized FOV
 85 |             image = AICSImage(row.StandardizedFOVPath)
 86 | 
 87 |             # Preload image data
 88 |             image.data
 89 | 
 90 |             # Select and adjust cell shape ceiling for this cell
 91 |             adjusted = image_utils.select_and_adjust_segmentation_ceiling(
 92 |                 image=image.get_image_data("CYXZ", S=0, T=0),
 93 |                 cell_index=row.CellIndex,
 94 |                 cell_ceiling_adjustment=cell_ceiling_adjustment,
 95 |             )
 96 | 
 97 |             # Crop the FOV to the segmentation portions
 98 |             cropped = image_utils.crop_raw_channels_with_segmentation(
 99 |                 image=adjusted,
100 |                 channels=image.get_channel_names(),
101 |             )
102 | 
103 |             # Generate features
104 |             features = image_utils.get_features_from_image(cropped)
105 | 
106 |             # Save to JSON
107 |             with open(save_path, "w") as write_out:
108 |                 json.dump(features, write_out)
109 | 
110 |             log.info(f"Completed cell feature generation for CellId: {row.CellId}")
111 |             return SingleCellFeaturesResult(row.CellId, save_path)
112 | 
113 |         # Catch and return error
114 |         except Exception as e:
115 |             log.info(
116 |                 f"Failed cell feature generation for CellId: {row.CellId}. Error: {e}"
117 |             )
118 |             return SingleCellFeaturesError(row.CellId, str(e))
119 | 
120 |     @log_run_params
121 |     def run(
122 |         self,
123 |         dataset: Union[str, Path, pd.DataFrame, dd.DataFrame],
124 |         cell_ceiling_adjustment: int = 0,
125 |         distributed_executor_address: Optional[str] = None,
126 |         batch_size: Optional[int] = None,
127 |         overwrite: bool = False,
128 |         **kwargs,
129 |     ):
130 |         """
131 |         Provided a dataset generate a features JSON file for each cell.
132 | 
133 |         Parameters
134 |         ----------
135 |         dataset: Union[str, Path, pd.DataFrame, dd.DataFrame]
136 |             The primary cell dataset to use for generating features JSON for each cell.
137 | 
138 |             **Required dataset columns:** *["CellId", "CellIndex", "FOVId",
139 |             "StandardizedFOVPath"]*
140 | 
141 |         cell_ceiling_adjustment: int
142 |             The adjust to use for raising the cell shape ceiling. If <= 0, this will be
143 |             ignored and cell data will be selected but not adjusted.
144 |             Default: 0
145 | 
146 |         distributed_executor_address: Optional[str]
147 |             An optional executor address to pass to some computation engine.
148 |             Default: None
149 | 
150 |         batch_size: Optional[int]
151 |             An optional batch size to process n features at a time.
152 |             Default: None (Process all at once)
153 | 
154 |         overwrite: bool
155 |             If this step has already partially or completely run, should it overwrite
156 |             the previous files or not.
157 |             Default: False (Do not overwrite or regenerate files)
158 | 
159 |         Returns
160 |         -------
161 |         manifest_save_path: Path
162 |             Path to the produced manifest with the CellFeaturesPath column added.
163 |         """
164 |         # Handle dataset provided as string or path
165 |         if isinstance(dataset, (str, Path)):
166 |             dataset = Path(dataset).expanduser().resolve(strict=True)
167 | 
168 |             # Read dataset
169 |             dataset = pd.read_csv(dataset)
170 | 
171 |         # Check dataset and manifest have required fields
172 |         dataset_utils.check_required_fields(
173 |             dataset=dataset,
174 |             required_fields=REQUIRED_DATASET_FIELDS,
175 |         )
176 | 
177 |         # Create features directory
178 |         features_dir = self.step_local_staging_dir / "cell_features"
179 |         features_dir.mkdir(exist_ok=True)
180 | 
181 |         # Process each row
182 |         with DistributedHandler(distributed_executor_address) as handler:
183 |             # Start processing
184 |             results = handler.batched_map(
185 |                 self._generate_single_cell_features,
186 |                 # Convert dataframe iterrows into two lists of items to iterate over
187 |                 # One list will be row index
188 |                 # One list will be the pandas series of every row
189 |                 *zip(*list(dataset.iterrows())),
190 |                 # Pass the other parameters as list of the same thing for each
191 |                 # mapped function call
192 |                 [cell_ceiling_adjustment for i in range(len(dataset))],
193 |                 [features_dir for i in range(len(dataset))],
194 |                 [overwrite for i in range(len(dataset))],
195 |                 batch_size=batch_size,
196 |             )
197 | 
198 |         # Generate features paths rows
199 |         cell_features_dataset = []
200 |         errors = []
201 |         for result in results:
202 |             if isinstance(result, SingleCellFeaturesResult):
203 |                 cell_features_dataset.append(
204 |                     {
205 |                         DatasetFields.CellId: result.cell_id,
206 |                         DatasetFields.CellFeaturesPath: result.path,
207 |                     }
208 |                 )
209 |             else:
210 |                 errors.append(
211 |                     {DatasetFields.CellId: result.cell_id, "Error": result.error}
212 |                 )
213 | 
214 |         # Convert features paths rows to dataframe
215 |         cell_features_dataset = pd.DataFrame(cell_features_dataset)
216 | 
217 |         # Drop CellFeaturesPath column if it already exists
218 |         if DatasetFields.CellFeaturesPath in dataset.columns:
219 |             dataset = dataset.drop(columns=[DatasetFields.CellFeaturesPath])
220 | 
221 |         # Join original dataset to the fov paths
222 |         self.manifest = dataset.merge(cell_features_dataset, on=DatasetFields.CellId)
223 | 
224 |         # Save manifest to CSV
225 |         manifest_save_path = self.step_local_staging_dir / "manifest.csv"
226 |         self.manifest.to_csv(manifest_save_path, index=False)
227 | 
228 |         # Save errored cells to JSON
229 |         with open(self.step_local_staging_dir / "errors.json", "w") as write_out:
230 |             json.dump(errors, write_out)
231 | 
232 |         return manifest_save_path
233 | 


--------------------------------------------------------------------------------
/actk/steps/single_cell_images/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | 
3 | from .single_cell_images import SingleCellImages  # noqa: F401
4 | 
5 | __all__ = ["SingleCellImages"]
6 | 


--------------------------------------------------------------------------------
/actk/steps/single_cell_images/single_cell_images.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | import json
  5 | import logging
  6 | from pathlib import Path
  7 | from typing import List, NamedTuple, Optional, Union
  8 | 
  9 | import aicsimageio
 10 | import aicsimageprocessing as proc
 11 | import dask.dataframe as dd
 12 | import numpy as np
 13 | import pandas as pd
 14 | from aics_dask_utils import DistributedHandler
 15 | from aicsimageio import AICSImage, transforms
 16 | from aicsimageio.writers import OmeTiffWriter
 17 | from datastep import Step, log_run_params
 18 | from imageio import imwrite
 19 | 
 20 | from ...constants import Channels, DatasetFields
 21 | from ...utils import dataset_utils, image_utils
 22 | from ..single_cell_features import SingleCellFeatures
 23 | 
 24 | ###############################################################################
 25 | 
 26 | log = logging.getLogger(__name__)
 27 | 
 28 | ###############################################################################
 29 | 
 30 | REQUIRED_DATASET_FIELDS = [
 31 |     DatasetFields.CellId,
 32 |     DatasetFields.StandardizedFOVPath,
 33 |     DatasetFields.CellFeaturesPath,
 34 | ]
 35 | 
 36 | 
 37 | class CellImagesResult(NamedTuple):
 38 |     cell_id: Union[int, str]
 39 |     path_3d: Path
 40 |     path_2d_all_proj: Path
 41 |     path_2d_yx_proj: Path
 42 | 
 43 | 
 44 | class CellImagesError(NamedTuple):
 45 |     cell_id: Union[int, str]
 46 |     error: str
 47 | 
 48 | 
 49 | ###############################################################################
 50 | 
 51 | 
 52 | class SingleCellImages(Step):
 53 |     def __init__(
 54 |         self,
 55 |         direct_upstream_tasks=[SingleCellFeatures],
 56 |         filepath_columns=[
 57 |             DatasetFields.CellImage3DPath,
 58 |             DatasetFields.CellImage2DAllProjectionsPath,
 59 |             DatasetFields.CellImage2DYXProjectionPath,
 60 |         ],
 61 |         **kwargs,
 62 |     ):
 63 |         super().__init__(
 64 |             direct_upstream_tasks=direct_upstream_tasks,
 65 |             filepath_columns=filepath_columns,
 66 |             **kwargs,
 67 |         )
 68 | 
 69 |     @staticmethod
 70 |     def _get_registered_image_size(row_index: int, row: pd.Series) -> List[int]:
 71 |         # Open cell features JSON
 72 |         with open(row.CellFeaturesPath, "r") as read_in:
 73 |             cell_features = json.load(read_in)
 74 | 
 75 |         # Return registered image size
 76 |         return cell_features["imsize_registered"]
 77 | 
 78 |     @staticmethod
 79 |     def _generate_single_cell_images(
 80 |         row_index: int,
 81 |         row: pd.Series,
 82 |         cell_ceiling_adjustment: int,
 83 |         bounding_box: np.ndarray,
 84 |         projection_method: str,
 85 |         cell_images_3d_dir: Path,
 86 |         cell_images_2d_all_proj_dir: Path,
 87 |         cell_images_2d_yx_proj_dir: Path,
 88 |         overwrite: bool,
 89 |     ) -> Union[CellImagesResult, CellImagesError]:
 90 |         # Don't use dask for image reading
 91 |         aicsimageio.use_dask(False)
 92 | 
 93 |         # Get the ultimate end save paths for this cell
 94 |         cell_image_3d_save_path = cell_images_3d_dir / f"{row.CellId}.ome.tiff"
 95 |         cell_image_2d_all_proj_save_path = (
 96 |             cell_images_2d_all_proj_dir / f"{row.CellId}.png"
 97 |         )
 98 |         cell_image_2d_yx_proj_save_path = (
 99 |             cell_images_2d_yx_proj_dir / f"{row.CellId}.png"
100 |         )
101 | 
102 |         # Check skip
103 |         if (
104 |             not overwrite
105 |             # Only skip if all images exist for this cell
106 |             and all(
107 |                 p.is_file()
108 |                 for p in [
109 |                     cell_image_3d_save_path,
110 |                     cell_image_2d_all_proj_save_path,
111 |                     cell_image_2d_yx_proj_save_path,
112 |                 ]
113 |             )
114 |         ):
115 |             log.info(f"Skipping single cell image generation for CellId: {row.CellId}")
116 |             return CellImagesResult(
117 |                 row.CellId,
118 |                 cell_image_3d_save_path,
119 |                 cell_image_2d_all_proj_save_path,
120 |                 cell_image_2d_yx_proj_save_path,
121 |             )
122 | 
123 |         # Overwrite or didn't exist
124 |         log.info(f"Beginning single cell image generation for CellId: {row.CellId}")
125 | 
126 |         # Wrap errors for debugging later
127 |         try:
128 |             # Initialize image object with standardized FOV
129 |             standardized_image = AICSImage(row.StandardizedFOVPath)
130 |             channels = standardized_image.get_channel_names()
131 | 
132 |             # Preload image data
133 |             standardized_image.data
134 | 
135 |             # Select and adjust cell shape ceiling for this cell
136 |             image = image_utils.select_and_adjust_segmentation_ceiling(
137 |                 # Unlike most other operations, we can read in normal "CZYX" dimension
138 |                 # order here as all future operations are expecting it
139 |                 image=standardized_image.get_image_data("CYXZ", S=0, T=0),
140 |                 cell_index=row.CellIndex,
141 |                 cell_ceiling_adjustment=cell_ceiling_adjustment,
142 |             )
143 | 
144 |             # Perform a rigid registration on the image
145 |             image, _, _ = proc.cell_rigid_registration(
146 |                 image,
147 |                 # Reorder bounding box as image is currently CYXZ
148 |                 bbox_size=bounding_box[[0, 2, 3, 1]],
149 |             )
150 | 
151 |             # Reduce size
152 |             crop_3d = image * 255
153 |             crop_3d = crop_3d.astype(np.uint8)
154 | 
155 |             # Transpose to CZYX for saving
156 |             crop_3d = transforms.transpose_to_dims(crop_3d, "CYXZ", "CZYX")
157 | 
158 |             # Save to OME-TIFF
159 |             with OmeTiffWriter(cell_image_3d_save_path, overwrite_file=True) as writer:
160 |                 writer.save(
161 |                     crop_3d,
162 |                     dimension_order="CZYX",
163 |                     channel_names=standardized_image.get_channel_names(),
164 |                     pixels_physical_size=standardized_image.get_physical_pixel_size(),
165 |                 )
166 | 
167 |             # Generate 2d image projections
168 |             # Crop raw channels using segmentations
169 |             image = image_utils.crop_raw_channels_with_segmentation(image, channels)
170 | 
171 |             # Transpose to CZYX for projections
172 |             image = transforms.transpose_to_dims(image, "CYXZ", "CZYX")
173 | 
174 |             # Select the DNA, Membrane, and Structure channels
175 |             image = image[
176 |                 [
177 |                     channels.index(target)
178 |                     for target in [Channels.DNA, Channels.Membrane, Channels.Structure]
179 |                 ]
180 |             ]
181 | 
182 |             # Set RGB colors
183 |             # This will set:
184 |             # DNA to Blue
185 |             # Membrane to Red
186 |             # Structure to Green
187 |             colors = [[0, 0, 1], [1, 0, 0], [0, 1, 0]]
188 | 
189 |             # Get all axes projection image
190 |             all_proj = proc.imgtoprojection(
191 |                 image,
192 |                 proj_all=True,
193 |                 proj_method=projection_method,
194 |                 local_adjust=False,
195 |                 global_adjust=True,
196 |                 colors=colors,
197 |             )
198 | 
199 |             # Convert to YXC for PNG writing
200 |             all_proj = transforms.transpose_to_dims(all_proj, "CYX", "YXC")
201 | 
202 |             # Drop size to uint8
203 |             all_proj = all_proj.astype(np.uint8)
204 | 
205 |             # Save to PNG
206 | 
207 |             imwrite(cell_image_2d_all_proj_save_path, all_proj)
208 | 
209 |             # Get YX axes projection image
210 |             yx_proj = proc.imgtoprojection(
211 |                 image,
212 |                 proj_all=False,
213 |                 proj_method=projection_method,
214 |                 local_adjust=False,
215 |                 global_adjust=True,
216 |                 colors=colors,
217 |             )
218 | 
219 |             # Convert to YXC for PNG writing
220 |             yx_proj = transforms.transpose_to_dims(yx_proj, "CYX", "YXC")
221 | 
222 |             # Drop size to uint8
223 |             yx_proj = yx_proj.astype(np.uint8)
224 | 
225 |             # Save to PNG
226 |             imwrite(cell_image_2d_yx_proj_save_path, yx_proj)
227 | 
228 |             log.info(f"Completed single cell image generation for CellId: {row.CellId}")
229 | 
230 |             # Return ready to save image
231 |             return CellImagesResult(
232 |                 row.CellId,
233 |                 cell_image_3d_save_path,
234 |                 cell_image_2d_all_proj_save_path,
235 |                 cell_image_2d_yx_proj_save_path,
236 |             )
237 | 
238 |         # Catch and return error
239 |         except Exception as e:
240 |             log.info(
241 |                 f"Failed single cell image generation for CellId: {row.CellId}. "
242 |                 "Error: {e}"
243 |             )
244 |             return CellImagesError(row.CellId, str(e))
245 | 
246 |     @log_run_params
247 |     def run(
248 |         self,
249 |         dataset: Union[str, Path, pd.DataFrame, dd.DataFrame],
250 |         cell_ceiling_adjustment: int = 0,
251 |         bounding_box_percentile: float = 95.0,
252 |         projection_method: str = "max",
253 |         distributed_executor_address: Optional[str] = None,
254 |         batch_size: Optional[int] = None,
255 |         overwrite: bool = False,
256 |         bbox: Union[tuple, list, dict] = None,
257 |         **kwargs,
258 |     ):
259 |         """
260 |         Provided a dataset of cell features and standardized FOV images, generate 3D
261 |         single cell crops and 2D projections.
262 | 
263 |         Parameters
264 |         ----------
265 |         dataset: Union[str, Path, pd.DataFrame, dd.DataFrame]
266 |             The primary cell dataset to generate 3D single cell images for.
267 | 
268 |             **Required dataset columns:** *["CellId", "StandardizedFOVPath",
269 |             "CellFeaturesPath"]*
270 | 
271 |         cell_ceiling_adjustment: int
272 |             The adjust to use for raising the cell shape ceiling. If <= 0, this will be
273 |             ignored and cell data will be selected but not adjusted.
274 |             Default: 0
275 | 
276 |         bounding_box_percentile: float
277 |             A float used to generate the actual bounding box for all cells by finding
278 |             provided percentile of all cell image sizes.
279 |             Default: 95.0
280 | 
281 |         bbox: tuple, list, dict
282 |             Hard coded ZYX dimensions to set the bounding box.
283 |             Note: This overrides the `bounding_box_percentile` parameter.
284 |             Example: (64, 168, 104)
285 | 
286 |         projection_method: str
287 |             The method to use for generating the flat projection.
288 |             Default: max
289 | 
290 |             More details:
291 |             https://allencellmodeling.github.io/aicsimageprocessing/aicsimageprocessing.html#aicsimageprocessing.imgToProjection.imgtoprojection
292 | 
293 |         distributed_executor_address: Optional[str]
294 |             An optional executor address to pass to some computation engine.
295 |             Default: None
296 | 
297 |         batch_size: Optional[int]
298 |             An optional batch size to process n features at a time.
299 |             Default: None (Process all at once)
300 | 
301 |         overwrite: bool
302 |             If this step has already partially or completely run, should it overwrite
303 |             the previous files or not.
304 |             Default: False (Do not overwrite or regenerate files)
305 | 
306 |         Returns
307 |         -------
308 |         manifest_save_path: Path
309 |             Path to the produced manifest with the various cell image path fields added.
310 |         """
311 |         # Handle dataset provided as string or path
312 |         if isinstance(dataset, (str, Path)):
313 |             dataset = Path(dataset).expanduser().resolve(strict=True)
314 | 
315 |             # Read dataset
316 |             dataset = pd.read_csv(dataset)
317 | 
318 |         # Check dataset and manifest have required fields
319 |         dataset_utils.check_required_fields(
320 |             dataset=dataset,
321 |             required_fields=REQUIRED_DATASET_FIELDS,
322 |         )
323 | 
324 |         # Create save directories
325 |         cell_images_3d_dir = self.step_local_staging_dir / "cell_images_3d"
326 |         cell_images_2d_all_proj_dir = (
327 |             self.step_local_staging_dir / "cell_images_2d_all_proj"
328 |         )
329 |         cell_images_2d_yx_proj_dir = (
330 |             self.step_local_staging_dir / "cell_images_2d_yx_proj"
331 |         )
332 |         cell_images_3d_dir.mkdir(exist_ok=True)
333 |         cell_images_2d_all_proj_dir.mkdir(exist_ok=True)
334 |         cell_images_2d_yx_proj_dir.mkdir(exist_ok=True)
335 | 
336 |         # Process each row
337 |         with DistributedHandler(distributed_executor_address) as handler:
338 |             # Start processing
339 |             bbox_results = handler.batched_map(
340 |                 self._get_registered_image_size,
341 |                 # Convert dataframe iterrows into two lists of items to iterate over
342 |                 # One list will be row index
343 |                 # One list will be the pandas series of every row
344 |                 *zip(*list(dataset.iterrows())),
345 |                 batch_size=batch_size,
346 |             )
347 | 
348 |             if bbox:
349 |                 num_of_channels = bbox_results[0][0]
350 |                 bbox = [num_of_channels] + list(bbox)
351 |                 bbox_results = [bbox for i in range(len(bbox_results))]
352 |                 log.info(f"Using hard coded bounding box with ZYX dimensions: {bbox}.")
353 | 
354 |             # Compute bounding box with percentile
355 |             bbox_results = np.array(bbox_results)
356 |             bounding_box = np.percentile(bbox_results, bounding_box_percentile, axis=0)
357 |             bounding_box = np.ceil(bounding_box)
358 | 
359 |             # Generate bounded arrays
360 |             results = handler.batched_map(
361 |                 self._generate_single_cell_images,
362 |                 # Convert dataframe iterrows into two lists of items to iterate over
363 |                 # One list will be row index
364 |                 # One list will be the pandas series of every row
365 |                 *zip(*list(dataset.iterrows())),
366 |                 # Pass the other parameters as list of the same thing for each
367 |                 # mapped function call
368 |                 [cell_ceiling_adjustment for i in range(len(dataset))],
369 |                 [bounding_box for i in range(len(dataset))],
370 |                 [projection_method for i in range(len(dataset))],
371 |                 [cell_images_3d_dir for i in range(len(dataset))],
372 |                 [cell_images_2d_all_proj_dir for i in range(len(dataset))],
373 |                 [cell_images_2d_yx_proj_dir for i in range(len(dataset))],
374 |                 [overwrite for i in range(len(dataset))],
375 |                 batch_size=batch_size,
376 |             )
377 | 
378 |         # Generate single cell images dataset rows
379 |         single_cell_images_dataset = []
380 |         errors = []
381 |         for r in results:
382 |             if isinstance(r, CellImagesResult):
383 |                 single_cell_images_dataset.append(
384 |                     {
385 |                         DatasetFields.CellId: r.cell_id,
386 |                         DatasetFields.CellImage3DPath: r.path_3d,
387 |                         DatasetFields.CellImage2DAllProjectionsPath: r.path_2d_all_proj,
388 |                         DatasetFields.CellImage2DYXProjectionPath: r.path_2d_yx_proj,
389 |                     }
390 |                 )
391 |             else:
392 |                 errors.append({DatasetFields.CellId: r.cell_id, "Error": r.error})
393 | 
394 |         # Convert features paths rows to dataframe
395 |         single_cell_images_dataset = pd.DataFrame(single_cell_images_dataset)
396 | 
397 |         # Drop the various single cell image columns if they already exist
398 |         drop_columns = []
399 |         if DatasetFields.CellImage3DPath in dataset.columns:
400 |             drop_columns.append(DatasetFields.CellImage3DPath)
401 |         if DatasetFields.CellImage2DAllProjectionsPath in dataset.columns:
402 |             drop_columns.append(DatasetFields.CellImage2DAllProjectionsPath)
403 |         if DatasetFields.CellImage2DYXProjectionPath in dataset.columns:
404 |             drop_columns.append(DatasetFields.CellImage2DYXProjectionPath)
405 | 
406 |         dataset = dataset.drop(columns=drop_columns)
407 | 
408 |         # Join original dataset to the fov paths
409 |         self.manifest = dataset.merge(
410 |             single_cell_images_dataset, on=DatasetFields.CellId
411 |         )
412 | 
413 |         # Save manifest to CSV
414 |         manifest_save_path = self.step_local_staging_dir / "manifest.csv"
415 |         self.manifest.to_csv(manifest_save_path, index=False)
416 | 
417 |         # Save errored cells to JSON
418 |         with open(self.step_local_staging_dir / "errors.json", "w") as write_out:
419 |             json.dump(errors, write_out)
420 | 
421 |         return manifest_save_path
422 | 


--------------------------------------------------------------------------------
/actk/steps/standardize_fov_array/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | 
3 | from .standardize_fov_array import StandardizeFOVArray  # noqa: F401
4 | 
5 | __all__ = ["StandardizeFOVArray"]
6 | 


--------------------------------------------------------------------------------
/actk/steps/standardize_fov_array/standardize_fov_array.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | import json
  5 | import logging
  6 | from pathlib import Path
  7 | from typing import NamedTuple, Optional, Tuple, Union
  8 | 
  9 | import aicsimageio
 10 | import dask.dataframe as dd
 11 | import pandas as pd
 12 | from aics_dask_utils import DistributedHandler
 13 | from aicsimageio import transforms
 14 | from aicsimageio.writers import OmeTiffWriter
 15 | from datastep import Step, log_run_params
 16 | 
 17 | from ...constants import DatasetFields
 18 | from ...utils import dataset_utils, image_utils
 19 | 
 20 | ###############################################################################
 21 | 
 22 | log = logging.getLogger(__name__)
 23 | 
 24 | ###############################################################################
 25 | 
 26 | REQUIRED_DATASET_FIELDS = [
 27 |     DatasetFields.FOVId,
 28 |     DatasetFields.SourceReadPath,
 29 |     DatasetFields.NucleusSegmentationReadPath,
 30 |     DatasetFields.MembraneSegmentationReadPath,
 31 |     DatasetFields.ChannelIndexDNA,
 32 |     DatasetFields.ChannelIndexMembrane,
 33 |     DatasetFields.ChannelIndexStructure,
 34 |     DatasetFields.ChannelIndexBrightfield,
 35 |     DatasetFields.ChannelIndexNucleusSegmentation,
 36 |     DatasetFields.ChannelIndexMembraneSegmentation,
 37 | ]
 38 | 
 39 | 
 40 | class StandardizeFOVArrayResult(NamedTuple):
 41 |     fov_id: Union[int, str]
 42 |     path: Path
 43 | 
 44 | 
 45 | class StandardizeFOVArrayError(NamedTuple):
 46 |     fov_id: int
 47 |     error: str
 48 | 
 49 | 
 50 | ###############################################################################
 51 | 
 52 | 
 53 | class StandardizeFOVArray(Step):
 54 |     def __init__(self, filepath_columns=[DatasetFields.StandardizedFOVPath], **kwargs):
 55 |         super().__init__(filepath_columns=filepath_columns, **kwargs)
 56 | 
 57 |     @staticmethod
 58 |     def _generate_standardized_fov_array(
 59 |         row_index: int,
 60 |         row: pd.Series,
 61 |         current_pixel_sizes: Optional[Tuple[float]],
 62 |         desired_pixel_sizes: Optional[Tuple[float]],
 63 |         save_dir: Path,
 64 |         overwrite: bool,
 65 |     ) -> Union[StandardizeFOVArrayResult, StandardizeFOVArrayError]:
 66 |         # Don't use dask for image reading
 67 |         aicsimageio.use_dask(False)
 68 | 
 69 |         # Get the ultimate end save path for this cell
 70 |         save_path = save_dir / f"{row.FOVId}.ome.tiff"
 71 | 
 72 |         # Check skip
 73 |         if not overwrite and save_path.is_file():
 74 |             log.info(f"Skipping standardized FOV generation for FOVId: {row.FOVId}")
 75 |             return StandardizeFOVArrayResult(row.FOVId, save_path)
 76 | 
 77 |         # Overwrite or didn't exist
 78 |         log.info(f"Beginning standardized FOV generation for FOVId: {row.FOVId}")
 79 | 
 80 |         # Wrap errors for debugging later
 81 |         try:
 82 |             # Get normalized image array
 83 |             normalized_img, channels, pixel_sizes = image_utils.get_normed_image_array(
 84 |                 raw_image=row.SourceReadPath,
 85 |                 nucleus_seg_image=row.NucleusSegmentationReadPath,
 86 |                 membrane_seg_image=row.MembraneSegmentationReadPath,
 87 |                 dna_channel_index=row.ChannelIndexDNA,
 88 |                 membrane_channel_index=row.ChannelIndexMembrane,
 89 |                 structure_channel_index=row.ChannelIndexStructure,
 90 |                 brightfield_channel_index=row.ChannelIndexBrightfield,
 91 |                 nucleus_seg_channel_index=row.ChannelIndexNucleusSegmentation,
 92 |                 membrane_seg_channel_index=row.ChannelIndexMembraneSegmentation,
 93 |                 current_pixel_sizes=current_pixel_sizes,
 94 |                 desired_pixel_sizes=desired_pixel_sizes,
 95 |             )
 96 | 
 97 |             # Reshape data for serialization
 98 |             reshaped = transforms.transpose_to_dims(normalized_img, "CYXZ", "CZYX")
 99 | 
100 |             # Save array as OME Tiff
101 |             with OmeTiffWriter(save_path, overwrite_file=True) as writer:
102 |                 writer.save(
103 |                     data=reshaped,
104 |                     dimension_order="CZYX",
105 |                     channel_names=channels,
106 |                     pixels_physical_size=pixel_sizes,
107 |                 )
108 | 
109 |             log.info(f"Completed standardized FOV generation for FOVId: {row.FOVId}")
110 |             return StandardizeFOVArrayResult(row.FOVId, save_path)
111 | 
112 |         # Catch and return error
113 |         except Exception as e:
114 |             log.info(
115 |                 f"Failed standardized FOV generation for FOVId: {row.FOVId}. Error: {e}"
116 |             )
117 |             return StandardizeFOVArrayError(row.FOVId, str(e))
118 | 
119 |     @log_run_params
120 |     def run(
121 |         self,
122 |         dataset: Union[str, Path, pd.DataFrame, dd.DataFrame],
123 |         current_pixel_sizes: Optional[Tuple[float]] = (
124 |             0.10833333333333332,
125 |             0.10833333333333332,
126 |             0.29,
127 |         ),
128 |         desired_pixel_sizes: Tuple[float] = (0.29, 0.29, 0.29),
129 |         distributed_executor_address: Optional[str] = None,
130 |         batch_size: Optional[int] = None,
131 |         overwrite: bool = False,
132 |         **kwargs,
133 |     ) -> Path:
134 |         """
135 |         Convert a dataset of raw FOV images and their nucleus and membrane
136 |         segmentations, into a single, standard order and shape, and normalized image.
137 | 
138 |         Parameters
139 |         ----------
140 |         dataset: Union[str, Path, pd.DataFrame, dd.DataFrame]
141 |             The dataset to use for generating standard order, normalized, image arrays.
142 | 
143 |             **Required dataset columns:** *["FOVId", "SourceReadPath",
144 |             "NucleusSegmentationReadPath", "MembraneSegmentationReadPath",
145 |             "ChannelIndexDNA", "ChannelIndexMembrane", "ChannelIndexStructure",
146 |             "ChannelIndexBrightfield"]*
147 | 
148 | 
149 |         current_pixel_sizes: Optional[Tuple[float]]
150 |             The current physical pixel sizes as a tuple of the raw image.
151 |             Default: (0.10833333333333332, 0.10833333333333332, 0.29), though if None,
152 |             uses (`aicsimageio.AICSImage.get_physical_pixel_size` on the raw image)
153 | 
154 | 
155 |         desired_pixel_sizes: Tuple[float]
156 |             The desired pixel size for to resize each image to in XYZ order.
157 |             Default: (0.29, 0.29, 0.29)
158 | 
159 |         distributed_executor_address: Optional[str]
160 |             An optional executor address to pass to some computation engine.
161 |             Default: None
162 | 
163 |         batch_size: Optional[int]
164 |             An optional batch size to process n features at a time.
165 |             Default: None (Process all at once)
166 | 
167 |         overwrite: bool
168 |             If this step has already partially or completely run, should it overwrite
169 |             the previous files or not.
170 |             Default: False (Do not overwrite or regenerate files)
171 | 
172 |         Returns
173 |         -------
174 |         manifest_save_path: Path
175 |             Path to the produced manifest with the StandardizedFOVPath column added.
176 |         """
177 |         # Handle dataset provided as string or path
178 |         if isinstance(dataset, (str, Path)):
179 |             dataset = Path(dataset).expanduser().resolve(strict=True)
180 | 
181 |             # Read dataset
182 |             dataset = pd.read_csv(dataset)
183 | 
184 |         # Check the dataset for the required columns
185 |         dataset_utils.check_required_fields(
186 |             dataset=dataset,
187 |             required_fields=REQUIRED_DATASET_FIELDS,
188 |         )
189 | 
190 |         # Log original length of cell dataset
191 |         log.info(f"Original dataset length: {len(dataset)}")
192 | 
193 |         # Check assumption: all fields per FOV are constant
194 |         # except CellID and CellIndex
195 |         const_cols_per_fov = [
196 |             c for c in dataset.columns if c not in ["CellId", "CellIndex"]
197 |         ]
198 |         df_const_cols = (
199 |             dataset.groupby("FOVId")[const_cols_per_fov].nunique(dropna=False).eq(1)
200 |         )
201 | 
202 |         for col_name, is_const in df_const_cols.all().iteritems():
203 |             try:
204 |                 assert is_const
205 |             except AssertionError:
206 |                 example = df_const_cols[~df_const_cols[col_name]].sample()
207 |                 raise ValueError(
208 |                     f"{col_name} has multiple values per FOV. "
209 |                     f"Example: FOV {example.index.item()}"
210 |                 )
211 | 
212 |         # As there is an assumption that this dataset is for cells,
213 |         # generate the FOV dataset by selecting unique FOV Ids
214 |         fov_dataset = dataset.drop_duplicates(DatasetFields.FOVId)
215 | 
216 |         # Log produced FOV dataset length
217 |         log.info(f"Unique FOV's found in dataset: {len(fov_dataset)}")
218 | 
219 |         # Create standardized fovs directory
220 |         fovs_dir = self.step_local_staging_dir / "standardized_fovs"
221 |         fovs_dir.mkdir(exist_ok=True)
222 | 
223 |         # Process each row
224 |         with DistributedHandler(distributed_executor_address) as handler:
225 |             # Start processing
226 |             results = handler.batched_map(
227 |                 self._generate_standardized_fov_array,
228 |                 # Convert dataframe iterrows into two lists of items to iterate over
229 |                 # One list will be row index
230 |                 # One list will be the pandas series of every row
231 |                 *zip(*list(fov_dataset.iterrows())),
232 |                 # Pass the other parameters as list of the same thing for each
233 |                 # mapped function call
234 |                 [current_pixel_sizes for i in range(len(fov_dataset))],
235 |                 [desired_pixel_sizes for i in range(len(fov_dataset))],
236 |                 [fovs_dir for i in range(len(fov_dataset))],
237 |                 [overwrite for i in range(len(dataset))],
238 |                 batch_size=batch_size,
239 |             )
240 | 
241 |         # Generate fov paths rows
242 |         standardized_fov_paths_dataset = []
243 |         errors = []
244 |         for result in results:
245 |             if isinstance(result, StandardizeFOVArrayResult):
246 |                 standardized_fov_paths_dataset.append(
247 |                     {
248 |                         DatasetFields.FOVId: result.fov_id,
249 |                         DatasetFields.StandardizedFOVPath: result.path,
250 |                     }
251 |                 )
252 |             else:
253 |                 errors.append(
254 |                     {DatasetFields.FOVId: result.fov_id, "Error": result.error}
255 |                 )
256 | 
257 |         # Convert fov paths to dataframe
258 |         standardized_fov_paths_dataset = pd.DataFrame(standardized_fov_paths_dataset)
259 | 
260 |         # Drop StandardizedFOVPath column if it already exists
261 |         if DatasetFields.StandardizedFOVPath in dataset.columns:
262 |             dataset = dataset.drop(columns=[DatasetFields.StandardizedFOVPath])
263 | 
264 |         # Join original dataset to the fov paths
265 |         self.manifest = dataset.merge(
266 |             standardized_fov_paths_dataset, on=DatasetFields.FOVId
267 |         )
268 | 
269 |         # Save manifest to CSV
270 |         manifest_save_path = self.step_local_staging_dir / "manifest.csv"
271 |         self.manifest.to_csv(manifest_save_path, index=False)
272 | 
273 |         # Save errored FOVs to JSON
274 |         with open(self.step_local_staging_dir / "errors.json", "w") as write_out:
275 |             json.dump(errors, write_out)
276 | 
277 |         return manifest_save_path
278 | 


--------------------------------------------------------------------------------
/actk/tests/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | 
3 | """Unit test package for actk."""
4 | 


--------------------------------------------------------------------------------
/actk/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | from pathlib import Path
 5 | 
 6 | import pytest
 7 | 
 8 | ###############################################################################
 9 | 
10 | 
11 | @pytest.fixture
12 | def data_dir() -> Path:
13 |     return Path(__file__).parent / "data"
14 | 


--------------------------------------------------------------------------------
/actk/tests/steps/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | 
3 | """Unit test package for actk.steps."""
4 | 


--------------------------------------------------------------------------------
/actk/tests/steps/test_diagnostic_sheets.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | from pathlib import Path
  5 | 
  6 | import dask.dataframe as dd
  7 | import pytest
  8 | 
  9 | from actk.constants import DatasetFields
 10 | from actk.steps import DiagnosticSheets
 11 | 
 12 | #######################################################################################
 13 | 
 14 | 
 15 | def test_run(data_dir):
 16 |     # Initialize step
 17 |     step = DiagnosticSheets()
 18 | 
 19 |     # Ensure that it still runs
 20 |     output_manifest = step.run(
 21 |         data_dir / "example_single_cell_images_dataset.csv",
 22 |         metadata="FOVId",
 23 |         feature="imsize_orig",
 24 |         overwrite=True,
 25 |     )
 26 |     output_manifest = dd.read_csv(output_manifest)
 27 | 
 28 |     # Run asserts
 29 |     # Check expected columns
 30 |     assert all(
 31 |         expected_col in output_manifest.columns
 32 |         for expected_col in [DatasetFields.DiagnosticSheetPath]
 33 |     )
 34 | 
 35 |     # Check all expected files exist
 36 |     assert all(
 37 |         Path(f).resolve(strict=True)
 38 |         for f in output_manifest[DatasetFields.DiagnosticSheetPath]
 39 |     )
 40 | 
 41 | 
 42 | def test_catch_no_all_proj_image_path(data_dir):
 43 | 
 44 |     # Initialize step
 45 |     step = DiagnosticSheets()
 46 | 
 47 |     with pytest.raises(Exception):
 48 |         assert step.run(
 49 |             data_dir / "example_single_cell_features_dataset.csv", overwrite=True
 50 |         )
 51 | 
 52 | 
 53 | def test_max_num_cells_per_sheet(data_dir):
 54 | 
 55 |     # Initialize step
 56 |     step = DiagnosticSheets()
 57 | 
 58 |     # Ensure that it still runs
 59 |     output_manifest = step.run(
 60 |         data_dir / "example_single_cell_images_dataset.csv",
 61 |         max_cells=2,
 62 |         metadata="FOVId",
 63 |         feature="imsize_orig",
 64 |         overwrite=True,
 65 |     )
 66 | 
 67 |     output_manifest = dd.read_csv(output_manifest)
 68 | 
 69 |     # Run asserts
 70 |     # Check expected columns
 71 |     assert all(
 72 |         expected_col in output_manifest.columns
 73 |         for expected_col in [DatasetFields.DiagnosticSheetPath]
 74 |     )
 75 | 
 76 |     # Check all expected files exist
 77 |     assert all(
 78 |         Path(f).resolve(strict=True)
 79 |         for f in output_manifest[DatasetFields.DiagnosticSheetPath]
 80 |     )
 81 | 
 82 | 
 83 | def test_multiple_metadata_and_fig_size(data_dir):
 84 | 
 85 |     # Initialize step
 86 |     step = DiagnosticSheets()
 87 | 
 88 |     # Ensure that it still runs
 89 |     output_manifest = step.run(
 90 |         data_dir / "example_single_cell_images_dataset.csv",
 91 |         max_cells=2,
 92 |         metadata=["FOVId", "ChannelIndexDNA"],
 93 |         feature="imsize_orig",
 94 |         overwrite=True,
 95 |         fig_width=27,
 96 |         fig_height=27,
 97 |     )
 98 | 
 99 |     output_manifest = dd.read_csv(output_manifest)
100 | 
101 |     # Run asserts
102 |     # Check expected columns
103 |     assert all(
104 |         expected_col in output_manifest.columns
105 |         for expected_col in [DatasetFields.DiagnosticSheetPath]
106 |     )
107 | 
108 |     # Check all expected files exist
109 |     assert all(
110 |         Path(f).resolve(strict=True)
111 |         for f in output_manifest[DatasetFields.DiagnosticSheetPath]
112 |     )
113 | 


--------------------------------------------------------------------------------
/actk/tests/steps/test_single_cell_features.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | from pathlib import Path
 5 | 
 6 | import dask.dataframe as dd
 7 | 
 8 | from actk.constants import DatasetFields
 9 | from actk.steps import SingleCellFeatures
10 | 
11 | #######################################################################################
12 | 
13 | 
14 | def test_run(data_dir):
15 |     # Initialize step
16 |     step = SingleCellFeatures()
17 | 
18 |     # Ensure that it still runs
19 |     output_manifest = step.run(data_dir / "example_standardized_fov_paths_dataset.csv")
20 |     output_manifest = dd.read_csv(output_manifest)
21 | 
22 |     # Read input dataset
23 |     input_dataset = dd.read_csv(data_dir / "example_standardized_fov_paths_dataset.csv")
24 | 
25 |     # Run asserts
26 |     # Check expected columns
27 |     assert all(
28 |         expected_col in output_manifest.columns
29 |         for expected_col in [*input_dataset.columns, DatasetFields.CellFeaturesPath]
30 |     )
31 |     # Check output length
32 |     assert len(output_manifest) == len(input_dataset)
33 |     # Check all expected files exist
34 |     assert all(
35 |         Path(f).resolve(strict=True)
36 |         for f in output_manifest[DatasetFields.CellFeaturesPath]
37 |     )
38 | 


--------------------------------------------------------------------------------
/actk/tests/steps/test_single_cell_images.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | from pathlib import Path
 5 | 
 6 | import dask.dataframe as dd
 7 | 
 8 | from actk.constants import DatasetFields
 9 | from actk.steps import SingleCellImages
10 | 
11 | #######################################################################################
12 | 
13 | 
14 | def test_run(data_dir):
15 |     # Initialize step
16 |     step = SingleCellImages()
17 | 
18 |     # Ensure that it still runs
19 |     output_manifest = step.run(data_dir / "example_single_cell_features_dataset.csv")
20 |     output_manifest = dd.read_csv(output_manifest)
21 | 
22 |     # Read input dataset
23 |     input_dataset = dd.read_csv(data_dir / "example_single_cell_features_dataset.csv")
24 | 
25 |     # Run asserts
26 |     # Check expected columns
27 |     assert all(
28 |         expected_col in output_manifest.columns
29 |         for expected_col in [
30 |             *input_dataset.columns,
31 |             DatasetFields.CellImage3DPath,
32 |             DatasetFields.CellImage2DAllProjectionsPath,
33 |             DatasetFields.CellImage2DYXProjectionPath,
34 |         ]
35 |     )
36 |     # Check output length
37 |     assert len(output_manifest) == len(input_dataset)
38 |     # Check all expected files exist
39 |     for field in [
40 |         DatasetFields.CellImage3DPath,
41 |         DatasetFields.CellImage2DAllProjectionsPath,
42 |         DatasetFields.CellImage2DYXProjectionPath,
43 |     ]:
44 |         assert all(Path(f).resolve(strict=True) for f in output_manifest[field])
45 | 
46 | 
47 | def test_run_bbox(data_dir):
48 |     # Initialize step
49 |     step = SingleCellImages()
50 | 
51 |     # Ensure that it still runs
52 |     output_manifest = step.run(
53 |         data_dir / "example_single_cell_features_dataset.csv",
54 |         bbox=(64, 168, 104),
55 |     )
56 |     output_manifest = dd.read_csv(output_manifest)
57 | 
58 |     # Read input dataset
59 |     input_dataset = dd.read_csv(data_dir / "example_single_cell_features_dataset.csv")
60 | 
61 |     # Run asserts
62 |     # Check expected columns
63 |     assert all(
64 |         expected_col in output_manifest.columns
65 |         for expected_col in [
66 |             *input_dataset.columns,
67 |             DatasetFields.CellImage3DPath,
68 |             DatasetFields.CellImage2DAllProjectionsPath,
69 |             DatasetFields.CellImage2DYXProjectionPath,
70 |         ]
71 |     )
72 |     # Check output length
73 |     assert len(output_manifest) == len(input_dataset)
74 |     # Check all expected files exist
75 |     for field in [
76 |         DatasetFields.CellImage3DPath,
77 |         DatasetFields.CellImage2DAllProjectionsPath,
78 |         DatasetFields.CellImage2DYXProjectionPath,
79 |     ]:
80 |         assert all(Path(f).resolve(strict=True) for f in output_manifest[field])
81 | 


--------------------------------------------------------------------------------
/actk/tests/steps/test_standardize_fov_array.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | from pathlib import Path
 5 | 
 6 | import dask.dataframe as dd
 7 | import pytest
 8 | 
 9 | from actk.constants import DatasetFields
10 | from actk.steps import StandardizeFOVArray
11 | 
12 | #######################################################################################
13 | 
14 | 
15 | def test_run(data_dir):
16 |     # Initialize step
17 |     step = StandardizeFOVArray()
18 | 
19 |     # Ensure that it still runs
20 |     output_manifest = step.run(data_dir / "example_dataset.csv")
21 |     output_manifest = dd.read_csv(output_manifest)
22 | 
23 |     # Read input dataset
24 |     input_dataset = dd.read_csv(data_dir / "example_dataset.csv")
25 | 
26 |     # Run asserts
27 |     # Check expected columns
28 |     assert all(
29 |         expected_col in output_manifest.columns
30 |         for expected_col in [*input_dataset.columns, DatasetFields.StandardizedFOVPath]
31 |     )
32 |     # Check output length
33 |     assert len(output_manifest) == len(input_dataset)
34 |     # Check all expected files exist
35 |     assert all(
36 |         Path(f).resolve(strict=True)
37 |         for f in output_manifest[DatasetFields.StandardizedFOVPath]
38 |     )
39 | 
40 | 
41 | def test_catch_nonconstant_segs_per_fov(data_dir):
42 |     # Initialize step
43 |     step = StandardizeFOVArray()
44 | 
45 |     with pytest.raises(Exception):
46 |         assert step.run(data_dir / "example_BAD_dataset_seg_paths_vary_per_fov.csv")
47 | 


--------------------------------------------------------------------------------
/actk/tests/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | 
3 | """Unit test package for actk.utils."""
4 | 


--------------------------------------------------------------------------------
/actk/tests/utils/test_dataset_utils.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | import dask.dataframe as dd
 5 | import pandas as pd
 6 | import pytest
 7 | 
 8 | from actk import exceptions
 9 | from actk.utils import dataset_utils
10 | 
11 | #######################################################################################
12 | 
13 | EXAMPLE_PD_DATAFRAME = pd.DataFrame(
14 |     [
15 |         {"name": "jackson", "job": "engineer"},
16 |         {"name": "rory", "job": "scientist"},
17 |         {"name": "julie", "job": "scientist"},
18 |     ]
19 | )
20 | 
21 | EXAMPLE_DD_DATAFRAME = dd.from_pandas(EXAMPLE_PD_DATAFRAME, npartitions=1)
22 | 
23 | #######################################################################################
24 | 
25 | 
26 | @pytest.mark.parametrize(
27 |     "dataset, required_fields",
28 |     [
29 |         (EXAMPLE_PD_DATAFRAME, ["name", "job"]),
30 |         (EXAMPLE_DD_DATAFRAME, ["name", "job"]),
31 |         pytest.param(
32 |             EXAMPLE_PD_DATAFRAME,
33 |             ["hello"],
34 |             marks=pytest.mark.raises(exception=exceptions.MissingDataError),
35 |         ),
36 |         pytest.param(
37 |             EXAMPLE_DD_DATAFRAME,
38 |             ["hello"],
39 |             marks=pytest.mark.raises(exception=exceptions.MissingDataError),
40 |         ),
41 |         pytest.param(
42 |             EXAMPLE_PD_DATAFRAME,
43 |             ["name", "job", "hello"],
44 |             marks=pytest.mark.raises(exception=exceptions.MissingDataError),
45 |         ),
46 |         pytest.param(
47 |             EXAMPLE_DD_DATAFRAME,
48 |             ["name", "job", "hello"],
49 |             marks=pytest.mark.raises(exception=exceptions.MissingDataError),
50 |         ),
51 |     ],
52 | )
53 | def test_check_required_fields(dataset, required_fields):
54 |     # Run check
55 |     dataset_utils.check_required_fields(dataset, required_fields)
56 | 


--------------------------------------------------------------------------------
/actk/tests/utils/test_image_utils.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | import json
  5 | 
  6 | import numpy as np
  7 | import pytest
  8 | from aicsimageio import AICSImage
  9 | from numpy import testing as nptest
 10 | 
 11 | from actk.utils import image_utils
 12 | 
 13 | #######################################################################################
 14 | 
 15 | 
 16 | @pytest.mark.parametrize(
 17 |     "raw_image, "
 18 |     "nuc_seg_image, "
 19 |     "memb_seg_image, "
 20 |     "dna_channel_index, "
 21 |     "membrane_channel_index, "
 22 |     "structure_channel_index, "
 23 |     "brightfield_channel_index, "
 24 |     "nucleus_seg_channel_index, "
 25 |     "membrane_seg_channel_index, "
 26 |     "current_pixel_sizes, "
 27 |     "desired_pixel_sizes, "
 28 |     "expected_image",
 29 |     [
 30 |         (
 31 |             "example_raw_0.ome.tiff",
 32 |             "example_nuc_seg_0.tiff",
 33 |             "example_memb_seg_0.tiff",
 34 |             3,
 35 |             2,
 36 |             1,
 37 |             0,
 38 |             0,
 39 |             0,
 40 |             None,
 41 |             # The most recently used desired pixel size from original repo
 42 |             (0.29, 0.29, 0.29),
 43 |             "example_normed_image_array_0.ome.tiff",
 44 |         ),
 45 |         (
 46 |             "example_raw_1.ome.tiff",
 47 |             "example_nuc_seg_1.tiff",
 48 |             "example_memb_seg_1.tiff",
 49 |             2,
 50 |             0,
 51 |             1,
 52 |             3,
 53 |             0,
 54 |             0,
 55 |             None,
 56 |             # The most recently used desired pixel size from original repo
 57 |             (0.29, 0.29, 0.29),
 58 |             "example_normed_image_array_1.ome.tiff",
 59 |         ),
 60 |     ],
 61 | )
 62 | def test_get_normed_image_array(
 63 |     data_dir,
 64 |     raw_image,
 65 |     nuc_seg_image,
 66 |     memb_seg_image,
 67 |     dna_channel_index,
 68 |     membrane_channel_index,
 69 |     structure_channel_index,
 70 |     brightfield_channel_index,
 71 |     nucleus_seg_channel_index,
 72 |     membrane_seg_channel_index,
 73 |     current_pixel_sizes,
 74 |     desired_pixel_sizes,
 75 |     expected_image,
 76 | ):
 77 |     """
 78 |     The example data used to test this function was generated with the original function
 79 |     and then stored with `aicsimageio.writers.OmeTiffWriter` after doing an
 80 |     `aicsimageio.transforms.transpose_to_dims` to transpose to "CZYX" as `OmeTiffWriter`
 81 |     requires data have the "YX" dimensions last. Additionally, metadata has been updated
 82 |     to the Channel name standards in the constants.py file.
 83 |     """
 84 |     # Get actual
 85 |     actual_image, actual_channels, actual_px_sizes = image_utils.get_normed_image_array(
 86 |         data_dir / raw_image,
 87 |         data_dir / nuc_seg_image,
 88 |         data_dir / memb_seg_image,
 89 |         dna_channel_index,
 90 |         membrane_channel_index,
 91 |         structure_channel_index,
 92 |         brightfield_channel_index,
 93 |         nucleus_seg_channel_index,
 94 |         membrane_seg_channel_index,
 95 |         current_pixel_sizes,
 96 |         desired_pixel_sizes,
 97 |     )
 98 | 
 99 |     # Read expected
100 |     expected_image = AICSImage(data_dir / expected_image)
101 | 
102 |     # Assert actual equals expected
103 |     assert np.array_equiv(actual_image, expected_image.get_image_data("CYXZ", S=0, T=0))
104 |     assert actual_channels == expected_image.get_channel_names()
105 |     assert tuple(actual_px_sizes) == expected_image.get_physical_pixel_size()
106 | 
107 | 
108 | @pytest.mark.parametrize(
109 |     "image, cell_index, cell_ceiling_adjustment, expected_image",
110 |     [
111 |         (
112 |             "example_normed_image_array_0.ome.tiff",
113 |             1,
114 |             7,
115 |             "example_selected_and_adjusted_array_0_1.ome.tiff",
116 |         ),
117 |         (
118 |             "example_normed_image_array_0.ome.tiff",
119 |             2,
120 |             7,
121 |             "example_selected_and_adjusted_array_0_2.ome.tiff",
122 |         ),
123 |         (
124 |             "example_normed_image_array_0.ome.tiff",
125 |             3,
126 |             7,
127 |             "example_selected_and_adjusted_array_0_3.ome.tiff",
128 |         ),
129 |     ],
130 | )
131 | def test_select_and_adjust_segmentation_ceiling(
132 |     data_dir,
133 |     image,
134 |     cell_index,
135 |     cell_ceiling_adjustment,
136 |     expected_image,
137 | ):
138 |     """
139 |     The example data used to test this function was generated with the original function
140 |     and then stored with `aicsimageio.writers.OmeTiffWriter` after doing an
141 |     `aicsimageio.transforms.transpose_to_dims` to transpose to "CZYX" as `OmeTiffWriter`
142 |     requires data have the "YX" dimensions last. Additionally, metadata has been updated
143 |     to the Channel name standards in the constants.py file.
144 |     """
145 |     # Get actual
146 |     image = AICSImage(data_dir / image).get_image_data("CYXZ", S=0, T=0)
147 |     actual_image = image_utils.select_and_adjust_segmentation_ceiling(
148 |         image, cell_index, cell_ceiling_adjustment=cell_ceiling_adjustment
149 |     )
150 | 
151 |     # Read expected
152 |     expected_image = AICSImage(data_dir / expected_image)
153 | 
154 |     # Assert actual equals expected
155 |     assert np.array_equiv(actual_image, expected_image.get_image_data("CYXZ", S=0, T=0))
156 | 
157 | 
158 | @pytest.mark.parametrize(
159 |     "image, expected_image",
160 |     [
161 |         (
162 |             "example_selected_and_adjusted_array_0_1.ome.tiff",
163 |             "example_cropped_with_segs_array_0_1.ome.tiff",
164 |         ),
165 |         (
166 |             "example_selected_and_adjusted_array_0_2.ome.tiff",
167 |             "example_cropped_with_segs_array_0_2.ome.tiff",
168 |         ),
169 |         (
170 |             "example_selected_and_adjusted_array_0_3.ome.tiff",
171 |             "example_cropped_with_segs_array_0_3.ome.tiff",
172 |         ),
173 |     ],
174 | )
175 | def test_crop_raw_channels_with_segmentation(data_dir, image, expected_image):
176 |     """
177 |     The example data used to test this function was generated with the original function
178 |     and then stored with `aicsimageio.writers.OmeTiffWriter` after doing an
179 |     `aicsimageio.transforms.transpose_to_dims` to transpose to "CZYX" as `OmeTiffWriter`
180 |     requires data have the "YX" dimensions last. Additionally, metadata has been updated
181 |     to the Channel name standards in the constants.py file.
182 |     """
183 |     # Get actual
184 |     image = AICSImage(data_dir / image)
185 |     data = image.get_image_data("CYXZ", S=0, T=0)
186 |     channels = image.get_channel_names()
187 |     actual_image = image_utils.crop_raw_channels_with_segmentation(data, channels)
188 | 
189 |     # Read expected
190 |     expected_image = AICSImage(data_dir / expected_image)
191 | 
192 |     # Assert actual equals expected
193 |     assert np.array_equiv(actual_image, expected_image.get_image_data("CYXZ", S=0, T=0))
194 | 
195 | 
196 | @pytest.mark.parametrize(
197 |     "image, expected_image, expected_params",
198 |     [
199 |         (
200 |             "example_cropped_with_segs_array_0_1.ome.tiff",
201 |             "example_prepared_image_for_feature_extraction_0_1.ome.tiff",
202 |             "example_prepared_params_for_feature_extraction_0_1.json",
203 |         ),
204 |         (
205 |             "example_cropped_with_segs_array_0_2.ome.tiff",
206 |             "example_prepared_image_for_feature_extraction_0_2.ome.tiff",
207 |             "example_prepared_params_for_feature_extraction_0_2.json",
208 |         ),
209 |         (
210 |             "example_cropped_with_segs_array_0_3.ome.tiff",
211 |             "example_prepared_image_for_feature_extraction_0_3.ome.tiff",
212 |             "example_prepared_params_for_feature_extraction_0_3.json",
213 |         ),
214 |     ],
215 | )
216 | def test_prepare_image_for_feature_extraction(
217 |     data_dir, image, expected_image, expected_params
218 | ):
219 |     """
220 |     The example image data used to test this function was generated with the original
221 |     function and then stored with `aicsimageio.writers.OmeTiffWriter` after doing an
222 |     `aicsimageio.transforms.transpose_to_dims` to transpose to "CZYX" as `OmeTiffWriter`
223 |     requires data have the "YX" dimensions last. Additionally, metadata has been updated
224 |     to the Channel name standards in the constants.py file. Example parameter data was
225 |     stored in JSON after converting numpy arrays to lists.
226 |     """
227 |     # Get actual
228 |     image = AICSImage(data_dir / image).get_image_data("CYXZ", S=0, T=0)
229 |     (
230 |         actual_image,
231 |         actual_memb_com,
232 |         actual_angle,
233 |         actual_flipdim,
234 |     ) = image_utils.prepare_image_for_feature_extraction(image)
235 | 
236 |     # Read expected image
237 |     expected_image = AICSImage(data_dir / expected_image).get_image_data(
238 |         "CYXZ", S=0, T=0
239 |     )
240 | 
241 |     # Read expected params
242 |     with open(data_dir / expected_params, "r") as read_params:
243 |         expected_params = json.load(read_params)
244 | 
245 |     # Unpack expected params and reformat
246 |     expected_memb_com = np.array(expected_params["memb_com"])
247 |     expected_angle = expected_params["angle"]
248 |     expected_flipdim = np.array(expected_params["flipdim"])
249 | 
250 |     # Assert actual equals expected
251 |     nptest.assert_almost_equal(actual_image, expected_image)
252 |     nptest.assert_almost_equal(actual_memb_com, expected_memb_com)
253 |     nptest.assert_almost_equal(actual_angle, expected_angle)
254 |     nptest.assert_almost_equal(actual_flipdim, expected_flipdim)
255 | 
256 | 
257 | @pytest.mark.parametrize(
258 |     "image, expected_features",
259 |     [
260 |         (
261 |             "example_cropped_with_segs_array_0_1.ome.tiff",
262 |             "example_generated_features_0_1.json",
263 |         ),
264 |         (
265 |             "example_cropped_with_segs_array_0_2.ome.tiff",
266 |             "example_generated_features_0_2.json",
267 |         ),
268 |         (
269 |             "example_cropped_with_segs_array_0_3.ome.tiff",
270 |             "example_generated_features_0_3.json",
271 |         ),
272 |     ],
273 | )
274 | def test_get_features_from_image(
275 |     data_dir,
276 |     image,
277 |     expected_features,
278 | ):
279 |     """
280 |     The example data used to test this function was generated with the original function
281 |     and then stored with JSON.
282 |     """
283 |     # Get actual
284 |     image = AICSImage(data_dir / image).get_image_data("CYXZ", S=0, T=0)
285 |     actual_features = image_utils.get_features_from_image(image)
286 | 
287 |     # Serialize and deserialize the actual features
288 |     # Things like tuples become lists during serialization
289 |     # which technically assert False, even when the contents are equal
290 |     actual_features = json.dumps(actual_features)
291 |     actual_features = json.loads(actual_features)
292 | 
293 |     # Read expected
294 |     with open(data_dir / expected_features, "r") as read_feats:
295 |         expected_features = json.load(read_feats)
296 | 
297 |     # Assert each key value pair
298 |     assert all(feat in actual_features for feat in expected_features)
299 |     # These values may be a tiny bit different depending on
300 |     # machine, environment, randomness, who knows. :shrug:
301 |     # We will test a few specific representative features instead
302 |     for feat in [
303 |         "imsize_orig",
304 |         "com",
305 |         "angle",
306 |         "flipdim",
307 |         "imsize_registered",
308 |         "dna_intensity_mean",
309 |         "cell_intensity_mean",
310 |         "dna_cell_struct_cyto_distal_ratio",
311 |         "dna_shape_sphericity",
312 |         "cell_shape_sphericity",
313 |     ]:
314 |         if isinstance(actual_features[feat], int):
315 |             assert np.isclose(actual_features[feat], expected_features[feat])
316 |         else:
317 |             nptest.assert_almost_equal(
318 |                 actual_features[feat], expected_features[feat], decimal=2
319 |             )
320 | 


--------------------------------------------------------------------------------
/actk/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | 
3 | """Utilities package for actk."""
4 | 


--------------------------------------------------------------------------------
/actk/utils/dataset_utils.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | from pathlib import Path
 5 | from typing import List, Optional, Union
 6 | 
 7 | import dask.dataframe as dd
 8 | import pandas as pd
 9 | 
10 | from .. import exceptions
11 | 
12 | #######################################################################################
13 | 
14 | 
15 | def check_required_fields(
16 |     dataset: Union[str, Path, pd.DataFrame, dd.DataFrame],
17 |     required_fields: List[str],
18 | ) -> Optional[exceptions.MissingDataError]:
19 |     # Handle dataset provided as string or path
20 |     if isinstance(dataset, (str, Path)):
21 |         dataset = Path(dataset).expanduser().resolve(strict=True)
22 | 
23 |         # Read dataset
24 |         dataset = dd.read_csv(dataset)
25 | 
26 |     # Check that all columns provided as required are in the dataset
27 |     missing_fields = set(required_fields) - set(dataset.columns)
28 |     if len(missing_fields) > 0:
29 |         raise exceptions.MissingDataError(dataset, missing_fields)
30 | 


--------------------------------------------------------------------------------
/actk/utils/image_utils.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | from typing import Dict, List, Optional, Tuple
  5 | 
  6 | import aicsimageprocessing as proc
  7 | import dask.array as da
  8 | import numpy as np
  9 | from aicsfeature.extractor import cell, cell_nuc, dna
 10 | from aicsimageio import AICSImage, transforms, types
 11 | from scipy.ndimage import gaussian_filter as ndf
 12 | from scipy.signal import fftconvolve as convolve
 13 | 
 14 | from ..constants import Channels
 15 | 
 16 | #######################################################################################
 17 | 
 18 | 
 19 | def get_normed_image_array(
 20 |     raw_image: types.ImageLike,
 21 |     nucleus_seg_image: types.ImageLike,
 22 |     membrane_seg_image: types.ImageLike,
 23 |     dna_channel_index: int,
 24 |     membrane_channel_index: int,
 25 |     structure_channel_index: int,
 26 |     brightfield_channel_index: int,
 27 |     nucleus_seg_channel_index: int,
 28 |     membrane_seg_channel_index: int,
 29 |     current_pixel_sizes: Optional[Tuple[float]] = None,
 30 |     desired_pixel_sizes: Optional[Tuple[float]] = None,
 31 | ) -> Tuple[np.ndarray, List[str], Tuple[float]]:
 32 |     """
 33 |     Provided the original raw image, and a nucleus and membrane segmentation, construct
 34 |     a standardized, ordered, and normalized array of the images.
 35 | 
 36 |     Parameters
 37 |     ----------
 38 |     raw_image: types.ImageLike
 39 |         A filepath to the raw imaging data. The image should be 4D and include
 40 |         channels for DNA, Membrane, Structure, and Transmitted Light.
 41 | 
 42 |     nucleus_seg_image: types.ImageLike
 43 |         A filepath to the nucleus segmentation for the provided raw image.
 44 | 
 45 |     membrane_seg_image: types.ImageLike
 46 |         A filepath to the membrane segmentation for the provided raw image.
 47 | 
 48 |     dna_channel_index: int
 49 |         The index in channel dimension in the raw image that stores DNA data.
 50 | 
 51 |     membrane_channel_index: int
 52 |         The index in the channel dimension in the raw image that stores membrane data.
 53 | 
 54 |     structure_channel_index: int
 55 |         The index in the channel dimension in the raw image that stores structure data.
 56 | 
 57 |     brightfield_channel_index: int
 58 |         The index in the channel dimension in the raw image that stores the brightfield
 59 |         data.
 60 | 
 61 |     nucleus_seg_channel_index: int
 62 |         The index in the channel dimension in the nucleus segmentation image that stores
 63 |         the segmentation.
 64 | 
 65 |     membrane_seg_channel_index: int
 66 |         The index in the channel dimension in the membrane segmentation image that
 67 |         stores the segmentation.
 68 | 
 69 |     current_pixel_sizes: Optioal[Tuple[float]]
 70 |         The current physical pixel sizes as a tuple of the raw image.
 71 |         Default: None (`aicsimageio.AICSImage.get_physical_pixel_size` on the raw image)
 72 | 
 73 |     desired_pixel_sizes: Optional[Tuple[float]]
 74 |         The desired physical pixel sizes as a tuple to scale all images to.
 75 |         Default: None (scale all images to current_pixel_sizes if different)
 76 | 
 77 |     Returns
 78 |     -------
 79 |     normed: np.ndarray
 80 |         The normalized images stacked into a single CYXZ numpy ndarray.
 81 | 
 82 |     channels: List[str]
 83 |         The standardized channel names for the returned array.
 84 | 
 85 |     pixel_sizes: Tuple[float]
 86 |         The physical pixel sizes of the returned image in XYZ order.
 87 | 
 88 |     Notes
 89 |     -----
 90 |     The original version of this function can be found at:
 91 |     https://aicsbitbucket.corp.alleninstitute.org/projects/MODEL/repos/image_processing_pipeline/browse/aics_single_cell_pipeline/utils.py#9
 92 |     """
 93 |     # Construct image objects
 94 |     raw = AICSImage(raw_image)
 95 |     nuc_seg = AICSImage(nucleus_seg_image)
 96 |     memb_seg = AICSImage(membrane_seg_image)
 97 | 
 98 |     # Preload image data
 99 |     raw.data
100 |     nuc_seg.data
101 |     memb_seg.data
102 | 
103 |     # Get default current and desired pixel sizes
104 |     if current_pixel_sizes is None:
105 |         current_pixel_sizes = raw.get_physical_pixel_size()
106 | 
107 |     # Default desired to be the same pixel size
108 |     if desired_pixel_sizes is None:
109 |         desired_pixel_sizes = current_pixel_sizes
110 | 
111 |     # Select the channels
112 |     channel_indices = [
113 |         dna_channel_index,
114 |         membrane_channel_index,
115 |         structure_channel_index,
116 |         brightfield_channel_index,
117 |     ]
118 |     selected_channels = [
119 |         raw.get_image_dask_data("YXZ", S=0, T=0, C=index) for index in channel_indices
120 |     ]
121 | 
122 |     # Combine selections and get numpy array
123 |     raw = da.stack(selected_channels).compute()
124 | 
125 |     # Convert pixel sizes to numpy arrays
126 |     current_pixel_sizes = np.array(current_pixel_sizes)
127 |     desired_pixel_sizes = np.array(desired_pixel_sizes)
128 | 
129 |     # Only resize raw image if desired pixel sizes is different from current
130 |     if not np.array_equal(current_pixel_sizes, desired_pixel_sizes):
131 |         scale_raw = current_pixel_sizes / desired_pixel_sizes
132 |         raw = np.stack([proc.resize(channel, scale_raw, "bilinear") for channel in raw])
133 | 
134 |     # Prep segmentations
135 |     nuc_seg = nuc_seg.get_image_data("YXZ", S=0, T=0, C=nucleus_seg_channel_index)
136 |     memb_seg = memb_seg.get_image_data("YXZ", S=0, T=0, C=membrane_seg_channel_index)
137 | 
138 |     # We do not assume that the segmentations are the same size as the raw
139 |     # Resize the segmentations to match the raw
140 |     # We drop the channel dimension from the raw size retrieval
141 |     raw_size = np.array(raw.shape[1:]).astype(float)
142 |     nuc_size = np.array(nuc_seg.shape).astype(float)
143 |     memb_size = np.array(memb_seg.shape).astype(float)
144 |     scale_nuc = raw_size / nuc_size
145 |     scale_memb = raw_size / memb_size
146 | 
147 |     # Actual resize
148 |     nuc_seg = proc.resize(nuc_seg, scale_nuc, method="nearest")
149 |     memb_seg = proc.resize(memb_seg, scale_memb, method="nearest")
150 | 
151 |     # Normalize images
152 |     normalized_images = []
153 |     for i, index in enumerate(channel_indices):
154 |         if index == brightfield_channel_index:
155 |             norm_method = "trans"
156 |         else:
157 |             norm_method = "img_bg_sub"
158 | 
159 |         # Normalize and append
160 |         normalized_images.append(proc.normalize_img(raw[i], method=norm_method))
161 | 
162 |     # Stack all together
163 |     img = np.stack([nuc_seg, memb_seg, *normalized_images])
164 |     channel_names = Channels.DefaultOrderList
165 | 
166 |     return img, channel_names, tuple(desired_pixel_sizes)
167 | 
168 | 
169 | def select_and_adjust_segmentation_ceiling(
170 |     image: np.ndarray, cell_index: int, cell_ceiling_adjustment: int = 0
171 | ) -> np.ndarray:
172 |     """
173 |     Select and adjust the cell shape "ceiling" for a specific cell in the provided
174 |     image.
175 | 
176 |     Parameters
177 |     ----------
178 |     image: np.ndarray
179 |         The 4D, CYXZ, image numpy ndarray output from `get_normed_image_array`.
180 | 
181 |     cell_index: int
182 |         The integer index for the target cell.
183 | 
184 |     cell_ceiling_adjustment: int
185 |         The adjust to use for raising the cell shape ceiling. If <= 0, this will be
186 |         ignored and cell data will be selected but not adjusted.
187 |         Default: 0
188 | 
189 |     Returns
190 |     -------
191 |     adjusted: np.ndarray
192 |         The image with the membrane segmentation adjusted for ceiling shape correction.
193 | 
194 |     Notes
195 |     -----
196 |     The original version of this function can be found at:
197 |     https://aicsbitbucket.corp.alleninstitute.org/projects/MODEL/repos/image_processing_pipeline/browse/aics_single_cell_pipeline/utils.py#83
198 |     """
199 |     # Select only the data in the first two channels (the segmentation channels)
200 |     # where the data matches the provided cell index
201 |     image[0:2] = image[0:2] == cell_index
202 | 
203 |     # Because they are conservatively segmented,
204 |     # we raise the "ceiling" of the cell shape
205 | 
206 |     # This is the so-called "roof-augmentation" that Greg (@gregjohnso) invented to
207 |     # handle the bad "roof" in old membrane segmentations.
208 |     #
209 |     # Specially, because the photobleaching, the signal near the top is very weak.
210 |     # Then, the membrane segmentation stops earlier (in terms of Z position) than the
211 |     # truth. For some structures living near the top of the cell, like mitochodira, the
212 |     # structure segmentation may be out of the membrane segmentation, as the membrane
213 |     # segmentation is "shorter" than it should be, then the structure segmentation
214 |     # will be mostly choped off and make the integrated cell model learn nothing.
215 |     #
216 |     # So, "roof-augmentation" is the method to fix the "shorter" membrane segmentation
217 |     # issues.
218 | 
219 |     # Adjust image ceiling if adjustment is greater than zero
220 |     if cell_ceiling_adjustment > 0:
221 |         # Get the center of mass of the nucleus
222 |         nuc_com = proc.get_center_of_mass(image[0])[-1]
223 | 
224 |         # Get the top of the membrane
225 |         memb_top = np.where(np.sum(np.sum(image[1], axis=0), axis=0))[0][-1]
226 | 
227 |         # Get the halfway point between the two
228 |         start = int(np.floor((nuc_com + memb_top) / 2))
229 | 
230 |         # Get the shape of the cell from the membrane segmentation
231 |         cell_shape = image[1, :, :, start:]
232 | 
233 |         # Adjust cell shape "ceiling" using the adjustment integer provided
234 |         start_ind = int(np.floor(cell_ceiling_adjustment)) - 1
235 |         imf = np.zeros([1, 1, cell_ceiling_adjustment * 2 - 1])
236 |         imf[:, :, start_ind:] = 1
237 |         cell_shape = convolve(cell_shape, imf, mode="same") > 1e-8
238 | 
239 |         # Set the image data with the new cell shape data
240 |         image[1, :, :, start:] = cell_shape
241 | 
242 |     return image
243 | 
244 | 
245 | def crop_raw_channels_with_segmentation(
246 |     image: np.ndarray, channels: List[str]
247 | ) -> np.ndarray:
248 |     """
249 |     Crop imaging data in raw channels using a provided selected full field of with a
250 |     target cell in the segmentation channels.
251 | 
252 |     Parameters
253 |     ----------
254 |     image: np.ndarray
255 |         The 4D, CYXZ, image numpy ndarray output from
256 |         `select_and_adjust_segmentation_ceiling`.
257 | 
258 |     channels: List[str]
259 |         The channel names for the provided image.
260 |         The channels output from `get_normed_image_array`.
261 | 
262 |     Returns
263 |     -------
264 |     cropped: np.ndarray
265 |         A 4D numpy ndarray with CYXZ dimensions in the same order as provided.
266 |         The raw DNA channel has been cropped using the nucleus segmentation.
267 |         All other raw channels have been cropped using the membrane segmentation.
268 | 
269 |     Notes
270 |     -----
271 |     The original version of this function can be found at:
272 |     https://aicsbitbucket.corp.alleninstitute.org/projects/MODEL/repos/image_processing_pipeline/browse/aics_single_cell_pipeline/utils.py#114
273 |     """
274 |     # Select segmentation indicies
275 |     nuc_ind = np.array(channels) == Channels.NucleusSegmentation
276 |     memb_ind = np.array(channels) == Channels.MembraneSegmentation
277 | 
278 |     # Select DNA and all other indicies
279 |     dna_ind = np.array(channels) == Channels.DNA
280 |     other_channel_inds = np.ones(len(channels))
281 |     other_channel_inds[nuc_ind | memb_ind | dna_ind] = 0
282 | 
283 |     # Crop DNA channel with the nucleus segmentation
284 |     image[dna_ind] = image[dna_ind] * image[nuc_ind]
285 | 
286 |     # All other channels are cropped using membrane segmentation
287 |     for i in np.where(other_channel_inds)[0]:
288 |         image[i] = image[i] * image[memb_ind]
289 | 
290 |     return image
291 | 
292 | 
293 | def prepare_image_for_feature_extraction(
294 |     image: np.ndarray,
295 | ) -> Tuple[np.ndarray, np.ndarray, List[List[float]], np.ndarray]:
296 |     """
297 |     Prep an image and return any parameters required for feature extraction.
298 | 
299 |     Parameters
300 |     ----------
301 |     image: np.ndarray
302 |         The 4D, CYXZ, image numpy ndarray output from
303 |         `crop_raw_channels_with_segmentation`.
304 | 
305 |     Returns
306 |     -------
307 |     prepped_image: np.ndarray
308 |         The prepared image after cell rigid registration and binarizing the
309 |         segmentations.
310 |     center_of_mass: np.ndarray
311 |         The index of the center of mass of the membrane segmentation for the provided
312 |         image.
313 |     angle: List[List[float]]
314 |         The major angle of the membrane segmentation for the provided image.
315 |     flipdim: np.ndarry
316 |         Boolean array informing if the dimensions of the image should be flipped.
317 | 
318 |     Notes
319 |     -----
320 |     The original version of this function can be found at:
321 |     https://aicsbitbucket.corp.alleninstitute.org/projects/MODEL/repos/image_processing_pipeline/browse/aics_single_cell_pipeline/alignment_tools.py#5
322 | 
323 |     The docstring for the original version of this function was incorrect.
324 |     It stated that it took in a CXYZ image but it took in a CYXZ.
325 |     See `get_features_from_image` for reasoning.
326 |     """
327 |     # Get center of mass for the membrane
328 |     memb_com = proc.get_center_of_mass(proc.get_channel(image, 1))
329 | 
330 |     # Perform a rigid registration on the image
331 |     image, angle, flipdim = proc.cell_rigid_registration(image)
332 | 
333 |     # Make sure the nuc and cell channels are binary
334 |     image[0:2] = image[0:2] > 0.5
335 | 
336 |     return image, memb_com, angle, flipdim
337 | 
338 | 
339 | def get_features_from_image(image: np.ndarray) -> Dict:
340 |     """
341 |     Generate all segmentation, DNA, membrane, and structure features from the provided
342 |     image.
343 | 
344 |     Parameters
345 |     ----------
346 |     image: np.ndarray
347 |         The 4D, CYXZ, image numpy ndarray output from
348 |         `crop_raw_channels_with_segmentation`.
349 | 
350 |     Returns
351 |     -------
352 |     features: Dict
353 |         A single dictionary filled with features.
354 | 
355 |     Notes
356 |     -----
357 |     The original version of this function can be found at:
358 |     https://aicsbitbucket.corp.alleninstitute.org/projects/MODEL/repos/image_processing_pipeline/browse/aics_single_cell_pipeline/features.py#8
359 | 
360 |     The docstring for the original version of this function was incorrect.
361 |     It stated that it took in a CXYZ image but it took in a CYXZ. This can be seen from
362 |     line #17 where a transpose to CZYX is done with `img.transpose(0, 3, 1, 2)`.
363 |     A transpose of (0, 3, 1, 2) on a CXYZ image would result in a CZXY not CZYX.
364 |     Additionally, simply following the original processing chain shows that the original
365 |     function is simply handed the output from the original version of the function
366 |     `crop_raw_channels_with_segmentation` (crop_cell_nuc) which results in a `CYXZ`.
367 |     """
368 |     # Store original shape
369 |     imsize_orig = image.shape
370 | 
371 |     # Get prepared image and feature parameters
372 |     image, memb_com, angle, flipdim = prepare_image_for_feature_extraction(image)
373 | 
374 |     # Transpose to CZYX
375 |     image = transforms.transpose_to_dims(image, "CYXZ", "CZYX")
376 | 
377 |     # Construct dictionary of basic features
378 |     regularization_params = {
379 |         "imsize_orig": imsize_orig,
380 |         "com": memb_com.tolist(),
381 |         "angle": angle,
382 |         "flipdim": flipdim.tolist(),
383 |         "imsize_registered": image.shape,
384 |     }
385 | 
386 |     # Unpack channels
387 |     nuc_seg = image[0]
388 |     memb_seg = image[1]
389 |     dna_image = image[2]
390 |     memb_image = image[3]
391 |     struct_image = image[4]
392 | 
393 |     # Adjust the DNA and membrane images
394 |     adjusted_dna_image = ((nuc_seg * dna_image) * 2 ** 8).astype("uint16")
395 |     adjusted_memb_image = ((memb_seg * memb_image) * 2 ** 8).astype("uint16")
396 | 
397 |     # Simple deblur for better structure localization detection
398 |     imf1 = ndf(struct_image, 5, mode="constant")
399 |     imf2 = ndf(struct_image, 1, mode="constant")
400 | 
401 |     # Adjust structure image
402 |     adjusted_struct_image = imf2 - imf1
403 |     adjusted_struct_image[adjusted_struct_image < 0] = 0
404 | 
405 |     # Get features for the image using the adjusted images
406 |     memb_nuc_struct_feats = cell_nuc.get_features(
407 |         nuc_seg, memb_seg, adjusted_struct_image
408 |     ).to_dict("records")[0]
409 | 
410 |     # Get DNA and membrane image features
411 |     dna_feats = dna.get_features(adjusted_dna_image, seg=nuc_seg).to_dict("records")[0]
412 |     memb_feats = cell.get_features(adjusted_memb_image, seg=memb_seg).to_dict(
413 |         "records"
414 |     )[0]
415 | 
416 |     # Combine all features
417 |     features = {
418 |         **regularization_params,
419 |         **memb_nuc_struct_feats,
420 |         **dna_feats,
421 |         **memb_feats,
422 |     }
423 | 
424 |     return features
425 | 


--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
1 | ignore:
2 |     - "**/__init__.py"
3 |     - "actk/bin/.*"
4 |     - "actk/vendor/.*"
5 | 
6 | coverage:
7 |   status:
8 |     patch: off
9 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = python -msphinx
 7 | SPHINXPROJ    = actk
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | # actk documentation build configuration file, created by
  5 | # sphinx-quickstart on Fri Jun  9 13:47:02 2017.
  6 | #
  7 | # This file is execfile()d with the current directory set to its
  8 | # containing dir.
  9 | #
 10 | # Note that not all possible configuration values are present in this
 11 | # autogenerated file.
 12 | #
 13 | # All configuration values have a default; values that are commented out
 14 | # serve to show the default.
 15 | 
 16 | # If extensions (or modules to document with autodoc) are in another
 17 | # directory, add these directories to sys.path here. If the directory is
 18 | # relative to the documentation root, use os.path.abspath to make it
 19 | # absolute, like shown here.
 20 | #
 21 | import os
 22 | import sys
 23 | 
 24 | import sphinx_rtd_theme
 25 | 
 26 | import actk
 27 | 
 28 | sys.path.insert(0, os.path.abspath(".."))
 29 | 
 30 | 
 31 | # -- General configuration ---------------------------------------------
 32 | 
 33 | # If your documentation needs a minimal Sphinx version, state it here.
 34 | #
 35 | # needs_sphinx = "1.0"
 36 | 
 37 | # Add any Sphinx extension module names here, as strings. They can be
 38 | # extensions coming with Sphinx (named "sphinx.ext.*") or your custom ones.
 39 | extensions = [
 40 |     "sphinx.ext.autodoc",
 41 |     "sphinx.ext.viewcode",
 42 |     "sphinx.ext.napoleon",
 43 |     "sphinx.ext.mathjax",
 44 |     "m2r",
 45 | ]
 46 | 
 47 | # Control napoleon
 48 | napoleon_google_docstring = False
 49 | napolean_include_init_with_doc = True
 50 | napoleon_use_ivar = True
 51 | napoleon_use_param = False
 52 | 
 53 | # Control autodoc
 54 | autoclass_content = "both"  # include init doc with class
 55 | 
 56 | # Add any paths that contain templates here, relative to this directory.
 57 | templates_path = ["_templates"]
 58 | 
 59 | # The suffix(es) of source filenames.
 60 | # You can specify multiple suffix as a list of string:
 61 | #
 62 | source_suffix = {
 63 |     ".rst": "restructuredtext",
 64 |     ".txt": "markdown",
 65 |     ".md": "markdown",
 66 | }
 67 | 
 68 | # The master toctree document.
 69 | master_doc = "index"
 70 | 
 71 | # General information about the project.
 72 | project = u"actk"
 73 | copyright = u"2020, Jackson Maxfield Brown"
 74 | author = u"Jackson Maxfield Brown"
 75 | 
 76 | # The version info for the project you"re documenting, acts as replacement
 77 | # for |version| and |release|, also used in various other places throughout
 78 | # the built documents.
 79 | #
 80 | # The short X.Y version.
 81 | version = actk.__version__
 82 | # The full version, including alpha/beta/rc tags.
 83 | release = actk.__version__
 84 | 
 85 | # The language for content autogenerated by Sphinx. Refer to documentation
 86 | # for a list of supported languages.
 87 | #
 88 | # This is also used if you do content translation via gettext catalogs.
 89 | # Usually you set "language" from the command line for these cases.
 90 | language = None
 91 | 
 92 | # List of patterns, relative to source directory, that match files and
 93 | # directories to ignore when looking for source files.
 94 | # This patterns also effect to html_static_path and html_extra_path
 95 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
 96 | 
 97 | # The name of the Pygments (syntax highlighting) style to use.
 98 | pygments_style = "sphinx"
 99 | 
100 | # If true, `todo` and `todoList` produce output, else they produce nothing.
101 | todo_include_todos = False
102 | 
103 | 
104 | # -- Options for HTML output -------------------------------------------
105 | 
106 | # The theme to use for HTML and HTML Help pages.  See the documentation for
107 | # a list of builtin themes.
108 | #
109 | html_theme = "sphinx_rtd_theme"
110 | 
111 | # Theme options are theme-specific and customize the look and feel of a
112 | # theme further.  For a list of options available for each theme, see the
113 | # documentation.
114 | #
115 | html_theme_options = {
116 |     "collapse_navigation": False,
117 |     "prev_next_buttons_location": "top",
118 | }
119 | 
120 | # Add any paths that contain custom static files (such as style sheets) here,
121 | # relative to this directory. They are copied after the builtin static files,
122 | # so a file named "default.css" will overwrite the builtin "default.css".
123 | html_static_path = ["_static"]
124 | 
125 | 
126 | # -- Options for HTMLHelp output ---------------------------------------
127 | 
128 | # Output file base name for HTML help builder.
129 | htmlhelp_basename = "actkdoc"
130 | 
131 | 
132 | # -- Options for LaTeX output ------------------------------------------
133 | 
134 | latex_elements = {
135 |     # The paper size ("letterpaper" or "a4paper").
136 |     #
137 |     # "papersize": "letterpaper",
138 |     # The font size ("10pt", "11pt" or "12pt").
139 |     #
140 |     # "pointsize": "10pt",
141 |     # Additional stuff for the LaTeX preamble.
142 |     #
143 |     # "preamble": "",
144 |     # Latex figure (float) alignment
145 |     #
146 |     # "figure_align": "htbp",
147 | }
148 | 
149 | # Grouping the document tree into LaTeX files. List of tuples
150 | # (source start file, target name, title, author, documentclass
151 | # [howto, manual, or own class]).
152 | latex_documents = [
153 |     (
154 |         master_doc,
155 |         "actk.tex",
156 |         u"actk Documentation",
157 |         u"Jackson Maxfield Brown",
158 |         "manual",
159 |     ),
160 | ]
161 | 
162 | 
163 | # -- Options for manual page output ------------------------------------
164 | 
165 | # One entry per manual page. List of tuples
166 | # (source start file, name, description, authors, manual section).
167 | man_pages = [(master_doc, "actk", u"actk Documentation", [author], 1)]
168 | 
169 | 
170 | # -- Options for Texinfo output ----------------------------------------
171 | 
172 | # Grouping the document tree into Texinfo files. List of tuples
173 | # (source start file, target name, title, author,
174 | #  dir menu entry, description, category)
175 | texinfo_documents = [
176 |     (
177 |         master_doc,
178 |         "actk",
179 |         u"actk Documentation",
180 |         author,
181 |         "actk",
182 |         "One line description of project.",
183 |         "Miscellaneous",
184 |     ),
185 | ]
186 | 


--------------------------------------------------------------------------------
/docs/contributing.rst:
--------------------------------------------------------------------------------
1 | .. mdinclude:: ../CONTRIBUTING.md
2 | 


--------------------------------------------------------------------------------
/docs/dataset_fields.md:
--------------------------------------------------------------------------------
 1 | # Dataset Fields
 2 | 
 3 | Definitions, examples, and units for each field required for processing or produced.
 4 | 
 5 | ### CellId
 6 | * **Description:** A unique identifier for a cell. Can be an integer, string, or other, but must be serializable and unique.
 7 | * **Example(s):** 1, 2, 3, f0e3ac9a-5f20-4c40-bc6b-9c7c0a7e026d
 8 | * **Units:** None
 9 | 
10 | ### CellIndex
11 | * **Description:** The integer value used in a segmentation image to indicate that a voxel belongs to a specific cell.
12 | * **Example(s):** 1, 2, 3
13 | * **Units:** None
14 | 
15 | ### FOVId
16 | * **Description:** A unique identifier for a Field-of-View image. Can be an integer, string, or other, but must be serializable and unique.
17 | * **Example(s):** 1, 2, 3, f0e3ac9a-5f20-4c40-bc6b-9c7c0a7e026d
18 | * **Units:** None
19 | 
20 | ### SourceReadPath
21 | * **Description:** The path to reader a raw microscopy image file. This file should contain at least four channels (DNA, membrane, tagged structure, and brightfield).
22 | * **Example(s):** /allen/aics/modeling/jacksonb/data/example_raw.czi
23 | * **Units:** None
24 | 
25 | ### NucleusSegmentationReadPath
26 | * **Description:** The path to read a file that contains an channel with a nucleus segmentation. This file is usually generated by segmenting the DNA channel from a raw microscopy image.
27 | * **Example(s):** /allen/aics/modeling/jacksonb/data/example_nuc_seg.ome.tiff
28 | * **Units:** None
29 | 
30 | ### MembraneSegmentationReadPath
31 | * **Description:** The path to read a file that contains an channel with a membrane segmentation. This file is usually generated by segmenting the membrane channel from a raw microscopy image.
32 | * **Example(s):** /allen/aics/modeling/jacksonb/data/example_memb_seg.ome.tiff
33 | * **Units:** None
34 | 
35 | ### ChannelIndexDNA
36 | * **Description:** The integer index of the DNA channel in a raw microscopy image after it has been read into memory.
37 | * **Example(s):** 0, 1, 2, 3
38 | * **Units:** None
39 | 
40 | ### ChannelIndexMembrane
41 | * **Description:** The integer index of the membrane channel in a raw microscopy image after it has been read into memory.
42 | * **Example(s):** 0, 1, 2, 3
43 | * **Units:** None
44 | 
45 | ### ChannelIndexMembrane
46 | * **Description:** The integer index of the membrane channel in a raw microscopy image after it has been read into memory.
47 | * **Example(s):** 0, 1, 2, 3
48 | * **Units:** None
49 | 
50 | ### ChannelIndexStructure
51 | * **Description:** The integer index of the structure channel in a raw microscopy image after it has been read into memory.
52 | * **Example(s):** 0, 1, 2, 3
53 | * **Units:** None
54 | 
55 | ### ChannelIndexBrightfield
56 | * **Description:** The integer index of the brightfield channel in a raw microscopy image after it has been read into memory.
57 | * **Example(s):** 0, 1, 2, 3
58 | * **Units:** None
59 | 
60 | ### ChannelIndexNucleusSegmentation
61 | * **Description:** The integer index of the nucleus segmentation channel in a segmentation file image after it has been read into memory.
62 | * **Example(s):** 0, 1, 2, 3
63 | * **Units:** None
64 | 
65 | ### ChannelIndexMembraneSegmentation
66 | * **Description:** The integer index of the membrane segmentation channel in a segmentation file image after it has been read into memory.
67 | * **Example(s):** 0, 1, 2, 3
68 | * **Units:** None
69 | 
70 | ### StandardizedFOVPath
71 | * **Description:** The path to read a standardized FOV image file. This file is generated from the "StandardizeFOVArray" step.
72 | * **Example(s):** /allen/aics/modeling/jacksonb/data/example_fov_array.ome.tiff
73 | * **Units:** None
74 | 
75 | ### CellFeaturesPath
76 | * **Description:** The path to read a cell features JSON file. This file is generated from the "SingleCellFeatures" step.
77 | * **Example(s):** /allen/aics/modeling/jacksonb/data/example_cell_feats.json
78 | * **Units:** None
79 | 
80 | ### CellImage3DPath
81 | * **Description:** The path to read a normalized and bounded 3D single cell image. This file is generated from the "SingleCellImages" step.
82 | * **Example(s):** /allen/aics/modeling/jacksonb/data/example_single_cell.ome.tiff
83 | * **Units:** None
84 | 
85 | ### CellImage2DAllProjectionsPath
86 | * **Description:** The path to view a normalized and bounded 2D single cell image where all axis projections can be seen in a single image. This file is generated from the "SingleCellImages" step.
87 | * **Example(s):** /allen/aics/modeling/jacksonb/data/example_single_cell_all_proj.png
88 | * **Units:** None
89 | 
90 | ### CellImage2DYXProjectionPath
91 | * **Description:** The path to view a normalized and bounded 2D single cell image where only the YX axis projection can be seen in a single image. This file is generated from the "SingleCellImages" step.
92 | * **Example(s):** /allen/aics/modeling/jacksonb/data/example_single_cell_yx_proj.png
93 | * **Units:** None
94 | 
95 | ### DiagnosticSheetPath
96 | * **Description:** The path to view a diagnostic or "contact" sheet of the cells produced by the pipeline grouped by metadata. Useful for quality control. This file is generated from the "DiagnosticSheets" step.
97 | * **Example(s):** /allen/aics/modeling/jacksonb/data/example_fov_dianostics.png
98 | * **Units:** None
99 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | Welcome to actk's documentation!
 2 | ======================================
 3 | 
 4 | .. toctree::
 5 |    :hidden:
 6 |    :maxdepth: 1
 7 |    :caption: Contents:
 8 | 
 9 |    Overview <self>
10 |    installation
11 |    Package modules <modules>
12 |    Dataset fields <dataset_fields>
13 |    contributing
14 | 
15 | .. mdinclude:: ../README.md
16 | 
17 | Indices and tables
18 | ==================
19 | * :ref:`genindex`
20 | * :ref:`modindex`
21 | * :ref:`search`
22 | 


--------------------------------------------------------------------------------
/docs/installation.rst:
--------------------------------------------------------------------------------
 1 | .. highlight:: shell
 2 | 
 3 | ============
 4 | Installation
 5 | ============
 6 | 
 7 | 
 8 | Stable release
 9 | --------------
10 | 
11 | To install actk, run this command in your terminal:
12 | 
13 | .. code-block:: console
14 | 
15 |     $ pip install numpy
16 |     $ pip install actk
17 | 
18 | This is the preferred method to install actk, as it will always install the most recent stable release.
19 | 
20 | If you don't have `pip`_ installed, this `Python installation guide`_ can guide
21 | you through the process.
22 | 
23 | .. _pip: https://pip.pypa.io
24 | .. _Python installation guide: http://docs.python-guide.org/en/latest/starting/installation/
25 | 
26 | 
27 | From sources
28 | ------------
29 | 
30 | The sources for actk can be downloaded from the `Github repo`_.
31 | 
32 | You can either clone the public repository:
33 | 
34 | .. code-block:: console
35 | 
36 |     $ git clone git://github.com/AllenCellModeling/actk
37 | 
38 | Or download the `tarball`_:
39 | 
40 | .. code-block:: console
41 | 
42 |     $ curl  -OL https://github.com/AllenCellModeling/actk/tarball/master
43 | 
44 | Once you have a copy of the source, you can install it with:
45 | 
46 | .. code-block:: console
47 | 
48 |     $ python setup.py install
49 | 
50 | 
51 | .. _Github repo: https://github.com/AllenCellModeling/actk
52 | .. _tarball: https://github.com/AllenCellModeling/actk/tarball/master
53 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=python -msphinx
 9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | set SPHINXPROJ=actk
13 | 
14 | if "%1" == "" goto help
15 | 
16 | %SPHINXBUILD% >NUL 2>NUL
17 | if errorlevel 9009 (
18 | 	echo.
19 | 	echo.The Sphinx module was not found. Make sure you have Sphinx installed,
20 | 	echo.then set the SPHINXBUILD environment variable to point to the full
21 | 	echo.path of the 'sphinx-build' executable. Alternatively you may add the
22 | 	echo.Sphinx directory to PATH.
23 | 	echo.
24 | 	echo.If you don't have Sphinx installed, grab it from
25 | 	echo.http://sphinx-doc.org/
26 | 	exit /b 1
27 | )
28 | 
29 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
30 | goto end
31 | 
32 | :help
33 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
34 | 
35 | :end
36 | popd
37 | 


--------------------------------------------------------------------------------
/docs/modules.rst:
--------------------------------------------------------------------------------
1 | actk
2 | ====
3 | 
4 | .. toctree::
5 |    :maxdepth: 4
6 | 
7 |    actk
8 | 


--------------------------------------------------------------------------------
/images/header.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AllenCellModeling/actk/20d0601083d4b6eced03997473add608f83b3c75/images/header.png


--------------------------------------------------------------------------------
/scripts/download_aics_dataset.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | import argparse
  5 | import logging
  6 | import sys
  7 | import traceback
  8 | from pathlib import Path
  9 | 
 10 | import pandas as pd
 11 | 
 12 | from actk.constants import DatasetFields
 13 | from lkaccess import LabKey, contexts
 14 | 
 15 | ###############################################################################
 16 | 
 17 | logging.basicConfig(
 18 |     level=logging.INFO,
 19 |     format="[%(levelname)4s: %(module)s:%(lineno)4s %(asctime)s] %(message)s",
 20 | )
 21 | log = logging.getLogger(__name__)
 22 | 
 23 | ###############################################################################
 24 | # Args
 25 | 
 26 | 
 27 | class Args(argparse.Namespace):
 28 |     def __init__(self):
 29 |         self.__parse()
 30 | 
 31 |     def __parse(self):
 32 |         # Setup parser
 33 |         p = argparse.ArgumentParser(
 34 |             prog="download_aics_dataset",
 35 |             description=(
 36 |                 "Retrieve a dataset ready for processing from the internal "
 37 |                 "AICS database."
 38 |             ),
 39 |         )
 40 | 
 41 |         # Arguments
 42 |         p.add_argument(
 43 |             "--sample",
 44 |             type=float,
 45 |             default=1.0,
 46 |             help=(
 47 |                 "Percent how much data to download. Will be split across cell lines. "
 48 |                 "Ex: 1.0 = 100 percent of each cell line, "
 49 |                 "0.05 = 5 percent of each cell line."
 50 |             ),
 51 |         )
 52 |         p.add_argument(
 53 |             "--instance",
 54 |             default="PROD",
 55 |             help="Which database instance to use for data retrieval. (PROD or STAGING)",
 56 |         )
 57 |         p.add_argument(
 58 |             "--save_path",
 59 |             type=Path,
 60 |             default=Path("aics_ic_data.csv"),
 61 |             help="Path to save the dataset to.",
 62 |         )
 63 |         p.add_argument(
 64 |             "--debug",
 65 |             action="store_true",
 66 |             help="Show traceback if the script were to fail.",
 67 |         )
 68 | 
 69 |         # Parse
 70 |         p.parse_args(namespace=self)
 71 | 
 72 | 
 73 | ###############################################################################
 74 | # Retrieve and prepare AICS dataset
 75 | 
 76 | 
 77 | def download_aics_dataset(args: Args):
 78 |     # Try running the download pipeline
 79 |     try:
 80 |         # Get instance context
 81 |         instance_context = getattr(contexts, args.instance.upper())
 82 | 
 83 |         # Create connection to instance
 84 |         lk = LabKey(instance_context)
 85 |         log.info(f"Using LabKey instance: {lk}")
 86 | 
 87 |         # Get integrated cell data
 88 |         log.info("Retrieving pipeline integrated cell data...")
 89 |         data = pd.DataFrame(lk.dataset.get_pipeline_4_production_data())
 90 | 
 91 |         # Get cell line data
 92 |         log.info("Retrieving cell line data...")
 93 |         cell_line_data = pd.DataFrame(
 94 |             lk.select_rows_as_list(
 95 |                 schema_name="celllines",
 96 |                 query_name="CellLineDefinition",
 97 |                 columns=[
 98 |                     "CellLineId",
 99 |                     "CellLineId/Name",
100 |                     "StructureId/Name",
101 |                     "ProteinId/Name",
102 |                 ],
103 |             )
104 |         )
105 | 
106 |         # Merge the data
107 |         data = data.merge(cell_line_data, how="left", on="CellLineId")
108 | 
109 |         # Prepare the raw data for sampling
110 |         data = data.drop_duplicates(subset=["CellId"], keep="first")
111 |         data = data.reset_index(drop=True)
112 |         data["CellLineId"] = data["CellLineId"].astype(int)
113 | 
114 |         # Sample the data
115 |         if args.sample != 1.0:
116 |             log.info(f"Sampling dataset with frac={args.sample}...")
117 |             data = data.groupby("CellLineId", group_keys=False)
118 |             data = data.apply(pd.DataFrame.sample, frac=args.sample)
119 |             data = data.reset_index(drop=True)
120 | 
121 |         # Rename columns to match DatasetFields
122 |         data = data.rename(
123 |             columns={
124 |                 "ChannelNumber405": DatasetFields.ChannelIndexDNA,
125 |                 "ChannelNumber638": DatasetFields.ChannelIndexMembrane,
126 |                 "ChannelNumberStruct": DatasetFields.ChannelIndexStructure,
127 |                 "ChannelNumberBrightfield": DatasetFields.ChannelIndexBrightfield,
128 |                 "NucleusSegmentationChannelIndex": (
129 |                     DatasetFields.ChannelIndexNucleusSegmentation
130 |                 ),
131 |                 "MembraneSegmentationChannelIndex": (
132 |                     DatasetFields.ChannelIndexMembraneSegmentation
133 |                 ),
134 |             }
135 |         )
136 | 
137 |         # Merge Aligned and Source read path columns
138 |         data[DatasetFields.SourceReadPath] = data["AlignedImageReadPath"].combine_first(
139 |             data[DatasetFields.SourceReadPath]
140 |         )
141 | 
142 |         # Temporary drop because differing values
143 |         data = data.drop(
144 |             columns=[
145 |                 "StructureSegmentationAlgorithm",
146 |                 "StructureSegmentationAlgorithmVersion",
147 |                 "StructureSegmentationFileId",
148 |                 "StructureSegmentationFilename",
149 |                 "StructureSegmentationReadPath",
150 |                 "StructureContourFileId",
151 |                 "StructureContourFilename",
152 |                 "StructureContourReadPath",
153 |                 "MembraneContourFileId",
154 |                 "MembraneContourFilename",
155 |                 "MembraneContourReadPath",
156 |                 "NucleusContourFileId",
157 |                 "NucleusContourFilename",
158 |                 "NucleusContourReadPath",
159 |             ]
160 |         )
161 | 
162 |         # Save to CSV
163 |         data.to_csv(args.save_path, index=False)
164 | 
165 |         log.info(f"Saved dataset manifest to: {args.save_path}")
166 | 
167 |     # Catch any exception
168 |     except Exception as e:
169 |         log.error("=============================================")
170 |         if args.debug:
171 |             log.error("\n\n" + traceback.format_exc())
172 |             log.error("=============================================")
173 |         log.error("\n\n" + str(e) + "\n")
174 |         log.error("=============================================")
175 |         sys.exit(1)
176 | 
177 | 
178 | ###############################################################################
179 | # Runner
180 | 
181 | 
182 | def main():
183 |     args = Args()
184 |     download_aics_dataset(args)
185 | 
186 | 
187 | ###############################################################################
188 | # Allow caller to directly run this module (usually in development scenarios)
189 | 
190 | if __name__ == "__main__":
191 |     main()
192 | 


--------------------------------------------------------------------------------
/scripts/download_test_data.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | import argparse
  5 | import logging
  6 | import sys
  7 | import traceback
  8 | from pathlib import Path
  9 | 
 10 | from quilt3 import Package
 11 | 
 12 | ###############################################################################
 13 | 
 14 | logging.basicConfig(
 15 |     level=logging.INFO,
 16 |     format="[%(levelname)4s: %(module)s:%(lineno)4s %(asctime)s] %(message)s",
 17 | )
 18 | log = logging.getLogger(__name__)
 19 | 
 20 | ###############################################################################
 21 | # Args
 22 | 
 23 | 
 24 | class Args(argparse.Namespace):
 25 |     def __init__(self):
 26 |         self.__parse()
 27 | 
 28 |     def __parse(self):
 29 |         # Setup parser
 30 |         p = argparse.ArgumentParser(
 31 |             prog="download_test_data",
 32 |             description=(
 33 |                 "Download files used for testing this project. This will download "
 34 |                 "all the required test resources and place them in the `tests/data` "
 35 |                 "directory."
 36 |             ),
 37 |         )
 38 | 
 39 |         # Arguments
 40 |         p.add_argument(
 41 |             "--top-hash",
 42 |             default="0ca3a651f3d048be4b8b68068a03eaaca6307299b4c5d91fa5add91c125b6265",
 43 |             help=("A specific version of the package to retrieve. Default: latest"),
 44 |         )
 45 |         p.add_argument(
 46 |             "--debug",
 47 |             action="store_true",
 48 |             help="Show traceback if the script were to fail.",
 49 |         )
 50 | 
 51 |         # Parse
 52 |         p.parse_args(namespace=self)
 53 | 
 54 | 
 55 | ###############################################################################
 56 | # Build package
 57 | 
 58 | 
 59 | def download_test_data(args: Args):
 60 |     # Try running the download pipeline
 61 |     try:
 62 |         # Get test data dir
 63 |         data_dir = (Path(__file__).parent.parent / "actk" / "tests" / "data").resolve()
 64 |         data_dir.mkdir(exist_ok=True)
 65 | 
 66 |         # Get quilt package
 67 |         package = Package.browse(
 68 |             "actk/test_data",
 69 |             "s3://aics-modeling-packages-test-resources",
 70 |             top_hash=args.top_hash,
 71 |         )
 72 | 
 73 |         # Download
 74 |         package["data"].fetch(data_dir)
 75 | 
 76 |         log.info(f"Completed package download.")
 77 | 
 78 |     # Catch any exception
 79 |     except Exception as e:
 80 |         log.error("=============================================")
 81 |         if args.debug:
 82 |             log.error("\n\n" + traceback.format_exc())
 83 |             log.error("=============================================")
 84 |         log.error("\n\n" + str(e) + "\n")
 85 |         log.error("=============================================")
 86 |         sys.exit(1)
 87 | 
 88 | 
 89 | ###############################################################################
 90 | # Runner
 91 | 
 92 | 
 93 | def main():
 94 |     args = Args()
 95 |     download_test_data(args)
 96 | 
 97 | 
 98 | ###############################################################################
 99 | # Allow caller to directly run this module (usually in development scenarios)
100 | 
101 | if __name__ == "__main__":
102 |     main()
103 | 


--------------------------------------------------------------------------------
/scripts/upload_test_data.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | import argparse
  5 | import logging
  6 | import sys
  7 | import traceback
  8 | from pathlib import Path
  9 | 
 10 | from quilt3 import Package
 11 | 
 12 | from actk import get_module_version
 13 | 
 14 | ###############################################################################
 15 | 
 16 | logging.basicConfig(
 17 |     level=logging.INFO,
 18 |     format="[%(levelname)4s: %(module)s:%(lineno)4s %(asctime)s] %(message)s",
 19 | )
 20 | log = logging.getLogger(__name__)
 21 | 
 22 | ###############################################################################
 23 | # Args
 24 | 
 25 | 
 26 | class Args(argparse.Namespace):
 27 |     def __init__(self):
 28 |         self.__parse()
 29 | 
 30 |     def __parse(self):
 31 |         # Setup parser
 32 |         p = argparse.ArgumentParser(
 33 |             prog="upload_test_data",
 34 |             description=(
 35 |                 "Upload files used for testing this project. This will upload "
 36 |                 "whatever files are currently found in the `tests/data` directory. To "
 37 |                 "add more test files, simply add them to the `tests/data` directory "
 38 |                 "and rerun this script."
 39 |             ),
 40 |         )
 41 | 
 42 |         # Arguments
 43 |         p.add_argument(
 44 |             "--dry-run",
 45 |             action="store_true",
 46 |             help=(
 47 |                 "Conduct dry run of the package generation. Will create a JSON "
 48 |                 "manifest file of that package instead of uploading."
 49 |             ),
 50 |         )
 51 |         p.add_argument(
 52 |             "--debug",
 53 |             action="store_true",
 54 |             help="Show traceback if the script were to fail.",
 55 |         )
 56 | 
 57 |         # Parse
 58 |         p.parse_args(namespace=self)
 59 | 
 60 | 
 61 | ###############################################################################
 62 | # Build package
 63 | 
 64 | 
 65 | def upload_test_data(args: Args):
 66 |     # Try running the download pipeline
 67 |     try:
 68 |         # Get test data dir
 69 |         data_dir = (Path(__file__).parent.parent / "actk" / "tests" / "data").resolve(
 70 |             strict=True
 71 |         )
 72 | 
 73 |         # Report with directory will be used for upload
 74 |         log.info(f"Using contents of directory: {data_dir}")
 75 | 
 76 |         # Create quilt package
 77 |         package = Package()
 78 |         package.set_dir("data", data_dir)
 79 | 
 80 |         # Report package contents
 81 |         log.info(f"Package contents: {package}")
 82 | 
 83 |         # Check for dry run
 84 |         if args.dry_run:
 85 |             # Attempt to build the package
 86 |             built = package.build("actk/test_data")
 87 | 
 88 |             # Get resolved save path
 89 |             manifest_save_path = Path("upload_manifest.jsonl").resolve()
 90 |             with open(manifest_save_path, "w") as manifest_write:
 91 |                 package.dump(manifest_write)
 92 | 
 93 |             # Report where manifest was saved
 94 |             log.info(f"Dry run generated manifest stored to: {manifest_save_path}")
 95 |             log.info(f"Completed package dry run. Result hash: {built.top_hash}")
 96 | 
 97 |         # Upload
 98 |         else:
 99 |             # Get upload confirmation
100 |             confirmation = None
101 |             while confirmation not in ["y", "n"]:
102 |                 # Get user input
103 |                 confirmation = input("Upload y/n? ")
104 | 
105 |                 # Get first character and lowercase
106 |                 confirmation = confirmation[0].lower()
107 | 
108 |             # Check confirmation
109 |             if confirmation == "y":
110 |                 pushed = package.push(
111 |                     "actk/test_data",
112 |                     "s3://aics-modeling-packages-test-resources",
113 |                     message=(
114 |                         f"Test resources for `actk` version: {get_module_version()}."
115 |                     ),
116 |                 )
117 | 
118 |                 log.info(f"Completed package push. Result hash: {pushed.top_hash}")
119 |             else:
120 |                 log.info(f"Upload canceled.")
121 | 
122 |     # Catch any exception
123 |     except Exception as e:
124 |         log.error("=============================================")
125 |         if args.debug:
126 |             log.error("\n\n" + traceback.format_exc())
127 |             log.error("=============================================")
128 |         log.error("\n\n" + str(e) + "\n")
129 |         log.error("=============================================")
130 |         sys.exit(1)
131 | 
132 | 
133 | ###############################################################################
134 | # Runner
135 | 
136 | 
137 | def main():
138 |     args = Args()
139 |     upload_test_data(args)
140 | 
141 | 
142 | ###############################################################################
143 | # Allow caller to directly run this module (usually in development scenarios)
144 | 
145 | if __name__ == "__main__":
146 |     main()
147 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [bumpversion]
 2 | current_version = 0.2.2
 3 | commit = True
 4 | tag = True
 5 | 
 6 | [bumpversion:file:setup.py]
 7 | search = {current_version}
 8 | replace = {new_version}
 9 | 
10 | [bumpversion:file:actk/__init__.py]
11 | search = {current_version}
12 | replace = {new_version}
13 | 
14 | [bdist_wheel]
15 | universal = 1
16 | 
17 | [aliases]
18 | test = pytest
19 | 
20 | [tool:pytest]
21 | collect_ignore = ['setup.py']
22 | 
23 | [flake8]
24 | exclude = 
25 | 	docs/
26 | ignore = 
27 | 	E203
28 | 	E402
29 | 	W291
30 | 	W503
31 | max-line-length = 88
32 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | """The setup script."""
  5 | 
  6 | from setuptools import find_packages, setup
  7 | 
  8 | with open("README.md") as readme_file:
  9 |     readme = readme_file.read()
 10 | 
 11 | setup_requirements = [
 12 |     "pytest-runner>=5.2",
 13 | ]
 14 | 
 15 | test_requirements = [
 16 |     "black>=19.10b0",
 17 |     "codecov>=2.1.4",
 18 |     "flake8>=3.8.3",
 19 |     "flake8-debugger>=3.2.1",
 20 |     "pytest>=5.4.3",
 21 |     "pytest-cov>=2.9.0",
 22 |     "pytest-raises>=0.11",
 23 |     "quilt3>=3.1.10",
 24 | ]
 25 | 
 26 | dev_requirements = [
 27 |     *setup_requirements,
 28 |     *test_requirements,
 29 |     "bumpversion>=0.6.0",
 30 |     "coverage>=5.1",
 31 |     "ipython>=7.15.0",
 32 |     "m2r>=0.2.1",
 33 |     "Sphinx>=2.0.0b1,<3",
 34 |     "sphinx_rtd_theme>=0.4.3",
 35 |     "tox>=3.15.2",
 36 |     "twine>=3.1.1",
 37 |     "wheel>=0.34.2",
 38 | ]
 39 | 
 40 | step_workflow_requirements = [
 41 |     "aics_dask_utils>=0.2.0",
 42 |     "bokeh>=2.1.0",
 43 |     "dask[bag]>=2.19.0",
 44 |     "dask_jobqueue>=0.7.0",
 45 |     "datastep>=0.1.8",
 46 |     "distributed>=2.19.0",
 47 |     "fire",
 48 |     "psutil",
 49 | ]
 50 | 
 51 | requirements = [
 52 |     *step_workflow_requirements,
 53 |     # project requires
 54 |     "aicsfeature>=0.2.1",
 55 |     "aicsimageio>=3.2.3",
 56 |     "aicsimageprocessing>=0.7.4",
 57 |     "matplotlib>=3.2.0",
 58 |     "numpy>=1.18.2",
 59 |     "pandas>=1.0.3",
 60 | ]
 61 | 
 62 | aics_data_requirements = [
 63 |     "lkaccess>=1.4.25",
 64 | ]
 65 | 
 66 | extra_requirements = {
 67 |     "setup": setup_requirements,
 68 |     "test": test_requirements,
 69 |     "dev": dev_requirements,
 70 |     "aics": aics_data_requirements,
 71 |     "all": [
 72 |         *requirements,
 73 |         *dev_requirements,
 74 |     ],
 75 | }
 76 | 
 77 | setup(
 78 |     author="Jackson Maxfield Brown",
 79 |     author_email="jacksonb@alleninstitute.org",
 80 |     classifiers=[
 81 |         "Development Status :: 2 - Pre-Alpha",
 82 |         "Intended Audience :: Developers",
 83 |         "License :: Free for non-commercial use",
 84 |         "Natural Language :: English",
 85 |         "Programming Language :: Python :: 3.6",
 86 |         "Programming Language :: Python :: 3.7",
 87 |         "Programming Language :: Python :: 3.8",
 88 |     ],
 89 |     description="Automated Cell Toolkit",
 90 |     entry_points={"console_scripts": ["actk=actk.bin.cli:cli"]},
 91 |     install_requires=requirements,
 92 |     license="Allen Institute Software License",
 93 |     long_description=readme,
 94 |     long_description_content_type="text/markdown",
 95 |     include_package_data=True,
 96 |     keywords="actk, computational biology, workflow, cell, microscopy",
 97 |     name="actk",
 98 |     packages=find_packages(exclude=["tests", "*.tests", "*.tests.*"]),
 99 |     python_requires=">=3.6",
100 |     setup_requires=setup_requirements,
101 |     test_suite="actk/tests",
102 |     tests_require=test_requirements,
103 |     extras_require=extra_requirements,
104 |     url="https://github.com/AllenCellModeling/actk",
105 |     # Do not edit this string manually, always use bumpversion
106 |     # Details in CONTRIBUTING.rst
107 |     version="0.2.2",
108 |     zip_safe=False,
109 | )
110 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
 1 | [tox]
 2 | skipsdist = True
 3 | envlist = py36, py37, py38, lint
 4 | 
 5 | [testenv:lint]
 6 | deps =
 7 |     .[test]
 8 | commands =
 9 |     flake8 actk --count --verbose --show-source --statistics
10 |     black --check actk
11 | 
12 | [testenv]
13 | setenv =
14 |     PYTHONPATH = {toxinidir}
15 | deps =
16 |     .[test]
17 | commands =
18 |     pytest --basetemp={envtmpdir} --cov-report html --cov=actk actk/tests/
19 | 


--------------------------------------------------------------------------------