├── .circleci
└── config.yml
├── .gitattributes
├── .gitignore
├── .pep8speaks.yml
├── CHANGELOG.md
├── README.md
├── codecov.yml
├── docs
├── Makefile
├── _autosummary
│ ├── merlin.analysis.rst
│ ├── merlin.core.rst
│ ├── merlin.util.rst
│ └── merlin.view.rst
├── _modules
│ ├── bokeh.rst
│ ├── merlin.analysis.rst
│ ├── merlin.core.rst
│ ├── merlin.rst
│ ├── merlin.util.rst
│ ├── merlin.view.rst
│ ├── merlin.view.widgets.rst
│ ├── modules.rst
│ └── setup.rst
├── _static
│ └── merlin_headline.png
├── api.rst
├── conf.py
├── contributing.rst
├── index.rst
├── installation.rst
├── make.bat
├── modules.rst
├── tasks.rst
└── usage.rst
├── license.md
├── merlin
├── __init__.py
├── __main__.py
├── analysis
│ ├── __init__.py
│ ├── decode.py
│ ├── exportbarcodes.py
│ ├── filterbarcodes.py
│ ├── generatemosaic.py
│ ├── globalalign.py
│ ├── optimize.py
│ ├── partition.py
│ ├── plotperformance.py
│ ├── preprocess.py
│ ├── segment.py
│ ├── sequential.py
│ ├── slurmreport.py
│ ├── testtask.py
│ └── warp.py
├── core
│ ├── __init__.py
│ ├── analysistask.py
│ ├── dataset.py
│ └── executor.py
├── data
│ ├── __init__.py
│ ├── codebook.py
│ └── dataorganization.py
├── ext
│ └── default.mplstyle
├── merlin.py
├── plots
│ ├── __init__.py
│ ├── _base.py
│ ├── decodeplots.py
│ ├── filterplots.py
│ ├── optimizationplots.py
│ ├── segmentationplots.py
│ └── testplots.py
├── util
│ ├── __init__.py
│ ├── aberration.py
│ ├── barcodedb.py
│ ├── barcodefilters.py
│ ├── binary.py
│ ├── dataportal.py
│ ├── decoding.py
│ ├── deconvolve.py
│ ├── imagefilters.py
│ ├── imagereader.py
│ ├── legacy.py
│ ├── matlab.py
│ ├── registration.py
│ ├── simulator.py
│ ├── snakewriter.py
│ ├── spatialfeature.py
│ └── watershed.py
└── view
│ ├── __init__.py
│ ├── __main__.py
│ ├── merlinview.py
│ └── widgets
│ ├── __init__.py
│ └── regionview.py
├── requirements.txt
├── setup.py
└── test
├── auxiliary_files
├── test.dax
├── test.inf
├── test_0_0.tif
├── test_0_1.tif
├── test_0_2.tif
├── test_0_3.tif
├── test_0_4.tif
├── test_0_5.tif
├── test_0_6.tif
├── test_0_7.tif
├── test_1_0.tif
├── test_1_1.tif
├── test_1_2.tif
├── test_1_3.tif
├── test_1_4.tif
├── test_1_5.tif
├── test_1_6.tif
├── test_1_7.tif
├── test_analysis_parameters.json
├── test_codebook.csv
├── test_codebook2.csv
├── test_data_organization.csv
├── test_microscope_parameters.json
└── test_positions.csv
├── conftest.py
├── pytest.ini
├── test_barcode_database.py
├── test_binary_utils.py
├── test_codebook.py
├── test_core.py
├── test_dataorganization.py
├── test_dataportal.py
├── test_dataset.py
├── test_decon.py
├── test_image_reader.py
├── test_merfish.py
├── test_plotting.py
├── test_snakemake.py
├── test_spatialfeature.py
└── test_zplane_duplicate_removal.py
/.circleci/config.yml:
--------------------------------------------------------------------------------
1 | # Python CircleCI 2.1 configuration file
2 | version: 2.1
3 | jobs:
4 | build:
5 | docker:
6 | - image: circleci/python:3.6.9
7 |
8 | working_directory: ~/MERlin
9 |
10 | steps:
11 | # Step 1: obtain repo from GitHub
12 | - checkout
13 | # Step 2: create virtual env and install dependencies
14 | - run:
15 | name: Install Dependencies
16 | command: |
17 | wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
18 | bash Miniconda3-latest-Linux-x86_64.sh -b -p $HOME/miniconda
19 | source ~/miniconda/bin/activate root
20 | conda activate base
21 | conda config --set always_yes true
22 | conda config --set quiet true
23 | conda create -n merlin_env python=3.6
24 | source activate merlin_env
25 | conda install rtree
26 | conda install pytables
27 | cd ~
28 | printf 'DATA_HOME=~\nANALYSIS_HOME=~\nPARAMETERS_HOME=~\n' >.merlinenv
29 | pip install -e MERlin
30 | cd ~/MERlin
31 | # Step 3: run linter and tests
32 | - run:
33 | name: Run Tests
34 | command: |
35 | source ~/miniconda/bin/activate root
36 | conda activate base
37 | source activate merlin_env
38 | cd ~/MERlin
39 | mkdir ~/test-reports
40 | pytest --cov --cov-report=xml
41 |
42 | - run:
43 | name: Upload Coverage to Codecov
44 | command: |
45 | source ~/miniconda/bin/activate root
46 | cd ~/MERlin
47 | pip install codecov && codecov
48 | - store_artifacts:
49 | path: ~/MERlin/coverage.xml
50 |
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | * text=auto
2 |
3 | *.py text
4 | *.rst text
5 | *.bat text
6 | *.json text
7 | *.csv text
8 |
9 | *.tif binary
10 |
11 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .env
2 | .idea
3 | *.swo
4 | *.swp
5 | *.py[cod]
6 | *.egg-info/
7 | __pycache__/
8 | .ipynb_checkpoints/
9 |
10 | pip-log.txt
11 | pip-delete-this-directory.txt
12 |
--------------------------------------------------------------------------------
/.pep8speaks.yml:
--------------------------------------------------------------------------------
1 | scanner:
2 | diff_only: True # If False, the entire file touched by the Pull Request is scanned for errors. If True, only the diff is scanned.
3 | linter: pycodestyle # Alternative option - flake8
4 |
5 | pycodestyle: # Valid if scanner.linter is pycodestyle
6 | max-line-length: 80
7 | ignore: [] # Errors and warnings to ignore
8 | exclude: [] # File path patterns to exclude
9 | count: False
10 | first: False
11 | show-pep8: True
12 | show-source: False
13 | statistics: False
14 | hang-closing: False
15 | filename: []
16 | select: []
17 |
18 | flake8: # Valid if scanner.linter is flake8
19 | max-line-length: 79
20 | ignore: []
21 | exclude: []
22 | count: False
23 | show-source: False
24 | statistics: False
25 | hang-closing: False
26 | filename: []
27 | select: []
28 |
29 | no_blank_comment: True # If True, no comment is made on PR without any errors.
30 | descending_issues_order: False # If True, PEP 8 issues in message will be displayed in descending order of line numbers in the file
31 | only_mention_files_with_errors: True # If False, a separate status section for each file is made in the comment.
32 |
33 | message: # Customize the comment made by the bot
34 | opened: # Messages when a new PR is submitted
35 | header: "Hello @{name}! Thanks for opening this PR."
36 | footer: ""
37 | updated: # Messages when a PR is updated
38 | header: "Hello @{name}! Thanks for updating this PR."
39 | footer: ""
40 | no_errors: "There are currently no PEP 8 issues detected in this Pull Request. Cheers! :beers: "
41 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # Changelog
2 | All notable changes to this project will be documented in this file.
3 |
4 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
5 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
6 |
7 | ## [0.1.0] - 2019-09-30
8 | ### Added
9 | - Iniatialization of this CHANGELOG file to track changes as the version increments
10 |
11 | ## [0.1.1] - 2019-10-03
12 | ### Fixed
13 | - Fixed bug in sum signal
14 |
15 | ## [0.1.2] - 2019-10-16
16 | ### Added
17 | - Exposed tolerance parameter in the adaptive filter barcodes method
18 | - Added plot for scale factor magnitude vs bit index
19 | - Fixed barcode partitioning to include cells from adjacent fields of view when a cell falls across fov boundaries
20 |
21 | ## [0.1.3] - 2019-12-04
22 | ### Fixed
23 | - Addressed bugs present in cleaning overlapping cells and assigning them to a fov
24 | ### Added
25 | - Added option to draw field of view labels overlaid on the mosaic
26 |
27 | ## [0.1.4] - 2019-12-05
28 | ### Added
29 | - Added task to evaluate whether a parallel analysis task has completed
30 | ### Changed
31 | - Changed the clean overlapping cells to run in parallel
32 | - Snakemake job inputs were simplified using the ParallelCompleteTask to improve DAG construction speed and overall snakemake runtime performance
33 |
34 | ## [0.1.5] - 2020-01-22
35 | ### Changed
36 | - Updated the filemap to only store the file name so that it can easily be pointed to new data home directories. This change maintains backward compatibility.
37 | - Improved decoding speed
38 | ### Added
39 | - Parameters to filter tasks that enable removing barcodes that were putatively duplicated across adjacent z planes.
40 |
41 | ## [0.1.6] -
42 | ### Fixed
43 | - Fixed bug and edge cases in removal of barcodes duplicated across z planes. Moved to the decode step to prevent unintended conflict with misidentification rate determination.
44 |
45 | ### Added
46 | - An alternative Lucy-Richardson deconvolution approach that requires ~10x fewer iterations.
47 |
48 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | [](https://circleci.com/gh/emanuega/MERlin/tree/master)
2 | [](https://codecov.io/gh/emanuega/MERlin)
3 | [](https://zenodo.org/badge/latestdoi/202668055)
4 |
5 | # MERlin - Extensible pipeline for scalable data analysis
6 |
7 | MERlin is an extensible data analysis pipeline for reproducible and scalable analysis of large
8 | datasets. Each MERlin workflow consists of a set of analysis tasks, each of which can be run as
9 | single task or split among many subtasks that can be executed in parallel. MERlin is able to
10 | execute workflows on a single computer, on a high performance cluster, or on the cloud
11 | (AWS and Google Cloud).
12 |
13 | If MERlin is useful for your research, consider citing:
14 | Emanuel, G., Eichhorn, S. W., Zhuang, X. 2020, MERlin - scalable and extensible MERFISH analysis software, v0.1.6, Zenodo, doi:10.5281/zenodo.3758540
15 |
16 | Please find the most recent version of MERlin [here](https://github.com/emanuega/merlin).
17 |
18 | ## MERFISH data analysis
19 |
20 | 
21 |
22 | MERlin was originally created for decoding MERFISH datasets.
23 | [MERFISH](https://science.sciencemag.org/lookup/doi/10.1126/science.aaa6090) is a technology for
24 | spatially resolved RNA profiling of 10s to 10,000s of RNA species in individual cells
25 | with high accuracy and high detection efficiency. The standard MERlin MERFISH analysis
26 | workflow decodes and segments MERFISH datasets to determine RNA molecules and the
27 | cell boundaries represented in the raw images.
28 |
29 | ## Documentation
30 |
31 | For more information on installation and usage, please see the [documentation](https://emanuega.github.io/MERlin/).
32 |
33 | ## Authors
34 |
35 | * [**George Emanuel**](mailto:emanuega0@gmail.com) - *Initial work*
36 | * **Stephen Eichhorn**
37 | * **Leonardo Sepulveda**
38 |
39 | Contributions are welcome! Please see the
40 | [documentation](https://emanuega.github.io/MERlin/contributing.html) for contribution guidelines.
41 |
42 |
--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
1 | ignore:
2 | - "**/test/*.py"
3 |
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line.
5 | SPHINXOPTS =
6 | SPHINXBUILD = sphinx-build
7 | SOURCEDIR = .
8 | BUILDDIR = ../../MERlin-docs
9 |
10 | # Put it first so that "make" without argument is like "make help".
11 | help:
12 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
13 |
14 | .PHONY: help Makefile
15 |
16 | # Catch-all target: route all unknown targets to Sphinx using the new
17 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
18 | %: Makefile
19 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
20 |
21 |
--------------------------------------------------------------------------------
/docs/_autosummary/merlin.analysis.rst:
--------------------------------------------------------------------------------
1 | merlin.analysis
2 | ===============
3 |
4 | .. automodule:: merlin.analysis
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
--------------------------------------------------------------------------------
/docs/_autosummary/merlin.core.rst:
--------------------------------------------------------------------------------
1 | merlin.core
2 | ===========
3 |
4 | .. automodule:: merlin.core
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
--------------------------------------------------------------------------------
/docs/_autosummary/merlin.util.rst:
--------------------------------------------------------------------------------
1 | merlin.util
2 | ===========
3 |
4 | .. automodule:: merlin.util
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
--------------------------------------------------------------------------------
/docs/_autosummary/merlin.view.rst:
--------------------------------------------------------------------------------
1 | merlin.view
2 | ===========
3 |
4 | .. automodule:: merlin.view
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
--------------------------------------------------------------------------------
/docs/_modules/bokeh.rst:
--------------------------------------------------------------------------------
1 | bokeh module
2 | ============
3 |
4 | .. automodule:: bokeh
5 | :members:
6 | :undoc-members:
7 | :show-inheritance:
8 |
--------------------------------------------------------------------------------
/docs/_modules/merlin.analysis.rst:
--------------------------------------------------------------------------------
1 | merlin.analysis package
2 | =======================
3 |
4 | Submodules
5 | ----------
6 |
7 | merlin.analysis.decode module
8 | -----------------------------
9 |
10 | .. automodule:: merlin.analysis.decode
11 | :members:
12 | :undoc-members:
13 | :show-inheritance:
14 |
15 | merlin.analysis.exportbarcodes module
16 | -------------------------------------
17 |
18 | .. automodule:: merlin.analysis.exportbarcodes
19 | :members:
20 | :undoc-members:
21 | :show-inheritance:
22 |
23 | merlin.analysis.filterbarcodes module
24 | -------------------------------------
25 |
26 | .. automodule:: merlin.analysis.filterbarcodes
27 | :members:
28 | :undoc-members:
29 | :show-inheritance:
30 |
31 | merlin.analysis.generatemosaic module
32 | -------------------------------------
33 |
34 | .. automodule:: merlin.analysis.generatemosaic
35 | :members:
36 | :undoc-members:
37 | :show-inheritance:
38 |
39 | merlin.analysis.globalalign module
40 | ----------------------------------
41 |
42 | .. automodule:: merlin.analysis.globalalign
43 | :members:
44 | :undoc-members:
45 | :show-inheritance:
46 |
47 | merlin.analysis.optimize module
48 | -------------------------------
49 |
50 | .. automodule:: merlin.analysis.optimize
51 | :members:
52 | :undoc-members:
53 | :show-inheritance:
54 |
55 | merlin.analysis.plotperformance module
56 | --------------------------------------
57 |
58 | .. automodule:: merlin.analysis.plotperformance
59 | :members:
60 | :undoc-members:
61 | :show-inheritance:
62 |
63 | merlin.analysis.preprocess module
64 | ---------------------------------
65 |
66 | .. automodule:: merlin.analysis.preprocess
67 | :members:
68 | :undoc-members:
69 | :show-inheritance:
70 |
71 | merlin.analysis.segment module
72 | ------------------------------
73 |
74 | .. automodule:: merlin.analysis.segment
75 | :members:
76 | :undoc-members:
77 | :show-inheritance:
78 |
79 | merlin.analysis.warp module
80 | ---------------------------
81 |
82 | .. automodule:: merlin.analysis.warp
83 | :members:
84 | :undoc-members:
85 | :show-inheritance:
86 |
87 |
88 | Module contents
89 | ---------------
90 |
91 | .. automodule:: merlin.analysis
92 | :members:
93 | :undoc-members:
94 | :show-inheritance:
95 |
--------------------------------------------------------------------------------
/docs/_modules/merlin.core.rst:
--------------------------------------------------------------------------------
1 | merlin.core package
2 | ===================
3 |
4 | Submodules
5 | ----------
6 |
7 | merlin.core.analysistask module
8 | -------------------------------
9 |
10 | .. automodule:: merlin.core.analysistask
11 | :members:
12 | :undoc-members:
13 | :show-inheritance:
14 |
15 | merlin.core.dataset module
16 | --------------------------
17 |
18 | .. automodule:: merlin.core.dataset
19 | :members:
20 | :undoc-members:
21 | :show-inheritance:
22 |
23 | merlin.core.executor module
24 | ---------------------------
25 |
26 | .. automodule:: merlin.core.executor
27 | :members:
28 | :undoc-members:
29 | :show-inheritance:
30 |
31 | merlin.core.scheduler module
32 | ----------------------------
33 |
34 | .. automodule:: merlin.core.scheduler
35 | :members:
36 | :undoc-members:
37 | :show-inheritance:
38 |
39 |
40 | Module contents
41 | ---------------
42 |
43 | .. automodule:: merlin.core
44 | :members:
45 | :undoc-members:
46 | :show-inheritance:
47 |
--------------------------------------------------------------------------------
/docs/_modules/merlin.rst:
--------------------------------------------------------------------------------
1 | merlin package
2 | ==============
3 |
4 | Subpackages
5 | -----------
6 |
7 | .. toctree::
8 |
9 | merlin.analysis
10 | merlin.core
11 | merlin.util
12 | merlin.view
13 |
14 | Submodules
15 | ----------
16 |
17 | merlin.merlin module
18 | --------------------
19 |
20 | .. automodule:: merlin.merlin
21 | :members:
22 | :undoc-members:
23 | :show-inheritance:
24 |
25 |
26 | Module contents
27 | ---------------
28 |
29 | .. automodule:: merlin
30 | :members:
31 | :undoc-members:
32 | :show-inheritance:
33 |
--------------------------------------------------------------------------------
/docs/_modules/merlin.util.rst:
--------------------------------------------------------------------------------
1 | merlin.util package
2 | ===================
3 |
4 | Submodules
5 | ----------
6 |
7 | merlin.util.barcodedb module
8 | ----------------------------
9 |
10 | .. automodule:: merlin.util.barcodedb
11 | :members:
12 | :undoc-members:
13 | :show-inheritance:
14 |
15 | merlin.util.binary module
16 | -------------------------
17 |
18 | .. automodule:: merlin.util.binary
19 | :members:
20 | :undoc-members:
21 | :show-inheritance:
22 |
23 | merlin.util.decoding module
24 | ---------------------------
25 |
26 | .. automodule:: merlin.util.decoding
27 | :members:
28 | :undoc-members:
29 | :show-inheritance:
30 |
31 |
32 | Module contents
33 | ---------------
34 |
35 | .. automodule:: merlin.util
36 | :members:
37 | :undoc-members:
38 | :show-inheritance:
39 |
--------------------------------------------------------------------------------
/docs/_modules/merlin.view.rst:
--------------------------------------------------------------------------------
1 | merlin.view package
2 | ===================
3 |
4 | Subpackages
5 | -----------
6 |
7 | .. toctree::
8 |
9 | merlin.view.widgets
10 |
11 | Submodules
12 | ----------
13 |
14 | merlin.view.merlinview module
15 | -----------------------------
16 |
17 | .. automodule:: merlin.view.merlinview
18 | :members:
19 | :undoc-members:
20 | :show-inheritance:
21 |
22 |
23 | Module contents
24 | ---------------
25 |
26 | .. automodule:: merlin.view
27 | :members:
28 | :undoc-members:
29 | :show-inheritance:
30 |
--------------------------------------------------------------------------------
/docs/_modules/merlin.view.widgets.rst:
--------------------------------------------------------------------------------
1 | merlin.view.widgets package
2 | ===========================
3 |
4 | Submodules
5 | ----------
6 |
7 | merlin.view.widgets.regionview module
8 | -------------------------------------
9 |
10 | .. automodule:: merlin.view.widgets.regionview
11 | :members:
12 | :undoc-members:
13 | :show-inheritance:
14 |
15 |
16 | Module contents
17 | ---------------
18 |
19 | .. automodule:: merlin.view.widgets
20 | :members:
21 | :undoc-members:
22 | :show-inheritance:
23 |
--------------------------------------------------------------------------------
/docs/_modules/modules.rst:
--------------------------------------------------------------------------------
1 | MERlin
2 | ======
3 |
4 | .. toctree::
5 | :maxdepth: 4
6 |
7 | bokeh
8 | merlin
9 | setup
10 |
--------------------------------------------------------------------------------
/docs/_modules/setup.rst:
--------------------------------------------------------------------------------
1 | setup module
2 | ============
3 |
4 | .. automodule:: setup
5 | :members:
6 | :undoc-members:
7 | :show-inheritance:
8 |
--------------------------------------------------------------------------------
/docs/_static/merlin_headline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/emanuega/MERlin/be3c994ef8fa97fbd3afb85705d8ddbed12118cf/docs/_static/merlin_headline.png
--------------------------------------------------------------------------------
/docs/api.rst:
--------------------------------------------------------------------------------
1 | API
2 | ****
3 |
4 | .. autosummary::
5 | :toctree: _autosummary
6 |
7 | merlin.core
8 | merlin.analysis
9 | merlin.util
10 | merlin.view
11 |
--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #
3 | # Configuration file for the Sphinx documentation builder.
4 | #
5 | # This file does only contain a selection of the most common options. For a
6 | # full list see the documentation:
7 | # http://www.sphinx-doc.org/en/master/config
8 |
9 | # -- Path setup --------------------------------------------------------------
10 |
11 | # If extensions (or modules to document with autodoc) are in another directory,
12 | # add these directories to sys.path here. If the directory is relative to the
13 | # documentation root, use os.path.abspath to make it absolute, like shown here.
14 |
15 | import os
16 | import sys
17 | dir_, _ = os.path.split(__file__)
18 | root_dir = os.path.abspath(os.path.join(dir_, '..', '..'))
19 | sys.path.insert(0, root_dir)
20 |
21 |
22 | # -- Project information -----------------------------------------------------
23 |
24 | project = 'MERlin'
25 | copyright = '2018, George Emanuel'
26 | author = 'George Emanuel'
27 |
28 | # The short X.Y version
29 | version = ''
30 | # The full version, including alpha/beta/rc tags
31 | release = ''
32 |
33 |
34 | # -- General configuration ---------------------------------------------------
35 |
36 | # If your documentation needs a minimal Sphinx version, state it here.
37 | #
38 | # needs_sphinx = '1.0'
39 |
40 | # Add any Sphinx extension module names here, as strings. They can be
41 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
42 | # ones.
43 | extensions = [
44 | 'sphinx.ext.autodoc',
45 | 'sphinx.ext.coverage',
46 | 'sphinx.ext.viewcode',
47 | 'sphinx.ext.githubpages',
48 | 'sphinx.ext.autosummary',
49 | 'sphinx.ext.napoleon',
50 | ]
51 |
52 |
53 | # Include Python objects as they appear in source files
54 | autodoc_member_order = 'bysource'
55 | # Default flags used by autodoc directives
56 | autodoc_default_flags = ['members', 'show-inheritance']
57 | # Generate autodoc stubs with summaries from code
58 | autosummary_generate = True
59 |
60 | # Add any paths that contain templates here, relative to this directory.
61 | templates_path = ['_templates']
62 |
63 | # The suffix(es) of source filenames.
64 | # You can specify multiple suffix as a list of string:
65 | #
66 | # source_suffix = ['.rst', '.md']
67 | source_suffix = '.rst'
68 |
69 | # The master toctree document.
70 | master_doc = 'index'
71 |
72 | # The language for content autogenerated by Sphinx. Refer to documentation
73 | # for a list of supported languages.
74 | #
75 | # This is also used if you do content translation via gettext catalogs.
76 | # Usually you set "language" from the command line for these cases.
77 | language = None
78 |
79 | # List of patterns, relative to source directory, that match files and
80 | # directories to ignore when looking for source files.
81 | # This pattern also affects html_static_path and html_extra_path.
82 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
83 |
84 | # The name of the Pygments (syntax highlighting) style to use.
85 | pygments_style = 'sphinx'
86 |
87 |
88 | # -- Options for HTML output -------------------------------------------------
89 |
90 | # The theme to use for HTML and HTML Help pages. See the documentation for
91 | # a list of builtin themes.
92 | #
93 | html_theme = 'sphinx_rtd_theme'
94 |
95 | # Theme options are theme-specific and customize the look and feel of a theme
96 | # further. For a list of options available for each theme, see the
97 | # documentation.
98 | #
99 | # html_theme_options = {}
100 |
101 | # Add any paths that contain custom static files (such as style sheets) here,
102 | # relative to this directory. They are copied after the builtin static files,
103 | # so a file named "default.css" will overwrite the builtin "default.css".
104 | html_static_path = ['_static']
105 |
106 | # Custom sidebar templates, must be a dictionary that maps document names
107 | # to template names.
108 | #
109 | # The default sidebars (for documents that don't match any pattern) are
110 | # defined by theme itself. Builtin themes are using these templates by
111 | # default: ``['localtoc.html', 'relations.html', 'sourcelink.html',
112 | # 'searchbox.html']``.
113 | #
114 | # html_sidebars = {}
115 |
116 |
117 | # -- Options for HTMLHelp output ---------------------------------------------
118 |
119 | # Output file base name for HTML help builder.
120 | htmlhelp_basename = 'MERlindoc'
121 |
122 |
123 | # -- Options for LaTeX output ------------------------------------------------
124 |
125 | latex_elements = {
126 | # The paper size ('letterpaper' or 'a4paper').
127 | #
128 | # 'papersize': 'letterpaper',
129 |
130 | # The font size ('10pt', '11pt' or '12pt').
131 | #
132 | # 'pointsize': '10pt',
133 |
134 | # Additional stuff for the LaTeX preamble.
135 | #
136 | # 'preamble': '',
137 |
138 | # Latex figure (float) alignment
139 | #
140 | # 'figure_align': 'htbp',
141 | }
142 |
143 | # Grouping the document tree into LaTeX files. List of tuples
144 | # (source start file, target name, title,
145 | # author, documentclass [howto, manual, or own class]).
146 | latex_documents = [
147 | (master_doc, 'MERlin.tex', 'MERlin Documentation',
148 | 'George Emanuel', 'manual'),
149 | ]
150 |
151 |
152 | # -- Options for manual page output ------------------------------------------
153 |
154 | # One entry per manual page. List of tuples
155 | # (source start file, name, description, authors, manual section).
156 | man_pages = [
157 | (master_doc, 'merlin', 'MERlin Documentation',
158 | [author], 1)
159 | ]
160 |
161 |
162 | # -- Options for Texinfo output ----------------------------------------------
163 |
164 | # Grouping the document tree into Texinfo files. List of tuples
165 | # (source start file, target name, title, author,
166 | # dir menu entry, description, category)
167 | texinfo_documents = [
168 | (master_doc, 'MERlin', 'MERlin Documentation',
169 | author, 'MERlin', 'One line description of project.',
170 | 'Miscellaneous'),
171 | ]
172 |
173 |
174 | # -- Options for Epub output -------------------------------------------------
175 |
176 | # Bibliographic Dublin Core info.
177 | epub_title = project
178 |
179 | # The unique identifier of the text. This can be a ISBN number
180 | # or the project homepage.
181 | #
182 | # epub_identifier = ''
183 |
184 | # A unique identification for the text.
185 | #
186 | # epub_uid = ''
187 |
188 | # A list of files that should not be packed into the epub file.
189 | epub_exclude_files = ['search.html']
190 |
191 |
192 | # -- Extension configuration -------------------------------------------------
193 |
194 | # -- Options for intersphinx extension ---------------------------------------
195 |
196 | # Example configuration for intersphinx: refer to the Python standard library.
197 | intersphinx_mapping = {'https://docs.python.org/': None}
198 |
199 | # Napoleon settings
200 | napoleon_google_docstring = True
201 | napoleon_numpy_docstring = True
202 | napoleon_include_init_with_doc = False
203 | napoleon_include_private_with_doc = False
204 | napoleon_include_special_with_doc = True
205 | napoleon_use_admonition_for_examples = False
206 | napoleon_use_admonition_for_notes = False
207 | napoleon_use_admonition_for_references = False
208 | napoleon_use_ivar = False
209 | napoleon_use_param = True
210 | napoleon_use_rtype = True
211 |
--------------------------------------------------------------------------------
/docs/contributing.rst:
--------------------------------------------------------------------------------
1 | Contributing to MERlin
2 | ************************
3 |
4 | Contributions to MERlin can either be submitted by opening an issue to raise concerns or offer suggestions or by opening a pull request to offer improvements to the code base.
5 |
6 | Opening a pull request
7 | ========================
8 |
9 | A pull request allows code to be proposed to be incorporated into MERlin. To receive feedback on work in progress, mark the pull request with WIP in the subject line. To open a pull request:
10 |
11 | #. Fork the repository to your github account and clone it locally.
12 | #. Create a new branch for your edits.
13 | #. Make your desired edits to the code.
14 | #. Run the tests to ensure MERlin is still functional. Write new tests to cover your new contribution as necessary.
15 | #. Submit a pull request from your edited branch to the latest vx.y.z (for example v0.1.4) branch of the MERlin repository representing the version of the next release.
16 | Be sure to reference any relevant issues and request at least one reviewer. Periodically the vx.y.z branch will be merged with the master branch.
17 |
18 | Code formatting
19 | ===============
20 |
21 | Code contributions should follow the `PEP 8 `_ style guide with the
22 | exception that variable names should be mixedCase instead of words separated by underscores. Comments should follow
23 | the `Google docstring style `_.
24 |
25 | Running the tests
26 | =================
27 |
28 | All contributions to MERlin must maintain the integrity of the tests. Before submitting a pull request, please ensure
29 | that all tests pass. Tests are implemented using the pytest_ framework. The tests are in the test directory and they
30 | can be run by executing pytest in the root MERlin directory. To facilitate efficient debugging, tests that take more
31 | than few seconds are marked with ```slowtest``` and can be excluded from the run using the command:
32 |
33 | .. _pytest: https://docs.pytest.org/
34 |
35 | .. code-block:: none
36 |
37 | pytest -v test
38 |
39 | Generating documentation
40 | =============================
41 |
42 | Documentation for MERlin is generated using Sphinx. The API documentation can be generated with the command from the root MERlin directory:
43 |
44 | .. code-block:: none
45 |
46 | sphinx-apidoc -f -o ./docs/_modules .
47 |
--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
1 | .. MERlin documentation master file, created by
2 | sphinx-quickstart on Mon Dec 3 17:31:12 2018.
3 | You can adapt this file completely to your liking, but it should at least
4 | contain the root `toctree` directive.
5 |
6 | MERlin
7 | ******
8 |
9 | .. toctree::
10 | :maxdepth: 2
11 |
12 | installation
13 | usage
14 | tasks
15 | contributing
16 | api
17 |
18 | Indices and tables
19 | ==================
20 |
21 | * :ref:`genindex`
22 | * :ref:`modindex`
23 | * :ref:`search`
24 |
--------------------------------------------------------------------------------
/docs/installation.rst:
--------------------------------------------------------------------------------
1 | Installation
2 | **************
3 |
4 | Set up a virtual environment
5 | =============================
6 |
7 | To ensure that Merlin and its dependencies don't interfere with other packages that are installed, we recommend that you install MERlin in a new virtual environment. MERlin requires python 3.6 or above.
8 |
9 | An anaconda virtual environment can be created using the command:
10 |
11 | .. code-block:: none
12 |
13 | conda create -n merlin_env python=3.6
14 |
15 | and the new environment can be activated using the command:
16 |
17 | .. code-block:: none
18 |
19 | conda activate merlin_env
20 |
21 | or
22 |
23 | .. code-block:: none
24 |
25 | source activate merlin_env
26 |
27 | Installing prerequisites
28 | ==========================
29 |
30 | The packages rtree and pytables are not properly installed by pip and should be installed independently. For example, using Anaconda:
31 |
32 | .. code-block:: none
33 |
34 | conda install rtree pytables
35 |
36 | On Harvard research computing, matplotlib raises an error saying that 'CXXABI_1.3.9' is not found. This can be corrected by loading the gcc module:
37 |
38 | .. code-block:: none
39 |
40 | module load gcc/8.2.0-fasrc01
41 |
42 | Installing MERlin
43 | ==================
44 |
45 | MERlin can be installed by cloning the repository and installing with pip:
46 |
47 | .. code-block:: none
48 |
49 | git clone https://github.com/emanuega/MERlin
50 |
51 | .. code-block:: none
52 |
53 | pip install -e MERlin
54 |
55 |
56 | .. _specifying-paths:
57 |
58 | Specifying paths with a .env file
59 | ==================================
60 |
61 | A .merlinenv file is required to specify the search locations for the various input and output files. The following variables should be defined in a file named .merlinenv in the user home directory (~\\.merlinenv on linux or C:\\users\\UserName\\.merlinenv on Windows):
62 |
63 | * DATA\_HOME - The path of the root directory to the raw data.
64 | * ANALYSIS\_HOME - The path of the root directory where analysis results should be stored.
65 | * PARAMETERS\_HOME - The path to the directory where the merfish-parameters directory resides.
66 |
67 | The PARAMETERS_HOME directory should contain the following folders:
68 |
69 | * analysis - Contains the analysis parameters json files.
70 | * codebooks - Contains the codebook csv files.
71 | * dataorganization - Contains the data organization csv files.
72 | * positions - Contains the position csv files.
73 | * microscope - Contains the microscope parameters json files.
74 | * fpkm - Contains the fpkm csv files.
75 | * snakemake - Contains the snakemake arguments json files.
76 |
77 | An example PARAMETERS_HOME directory with typical files can be found in the
78 | `merlin-parameters-example `_ repository.
79 |
80 | The contents of an example .merlinenv file are below:
81 |
82 | .. code-block:: none
83 |
84 | DATA_HOME=D:/data
85 | ANALYSIS_HOME=D:/analysis
86 | PARAMETERS_HOME=D:/merfish-parameters
87 |
88 | Merlin can create a .merlinenv file for you using the command:
89 |
90 | .. code-blocks:: none
91 |
92 | merlin --configure .
93 |
--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
1 | @ECHO OFF
2 |
3 | pushd %~dp0
4 |
5 | REM Command file for Sphinx documentation
6 |
7 | if "%SPHINXBUILD%" == "" (
8 | set SPHINXBUILD=sphinx-build
9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=..\..\MERlin-docs
12 |
13 | if "%1" == "" goto help
14 |
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | echo.
18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | echo.installed, then set the SPHINXBUILD environment variable to point
20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | echo.may add the Sphinx directory to PATH.
22 | echo.
23 | echo.If you don't have Sphinx installed, grab it from
24 | echo.http://sphinx-doc.org/
25 | exit /b 1
26 | )
27 |
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
29 | goto end
30 |
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
33 |
34 | :end
35 |
36 | cd ..\..\MERlin-docs\html
37 | git add .
38 | git commit -m "rebuilt docs"
39 | git push origin gh-pages
40 |
41 | popd
42 |
43 |
44 |
45 |
--------------------------------------------------------------------------------
/docs/modules.rst:
--------------------------------------------------------------------------------
1 | Project Modules
2 | ===============
3 |
4 | This page contains the list of project's modules
5 |
6 | .. autosummary::
7 | :toctree: _autosummary
8 |
9 | merlin.core
10 |
--------------------------------------------------------------------------------
/docs/usage.rst:
--------------------------------------------------------------------------------
1 | Usage
2 | ******
3 |
4 | MERlin organizes files into three folders, specified in the .merlinenv file (see :ref:`specifying-paths`). The three folders are:
5 |
6 | DATA_HOME – base directory for raw data
7 | ANALYSIS_HOME – base directory for the analysis results
8 | PARAMETERS_HOME – base directory for parameters
9 |
10 | MERlin reads raw data, such as images, from DATA_HOME and configuration parameters from PARAMETERS_HOME and writes
11 | analysis results into ANALYSIS_HOME. Each separate experiment should be a separated folder within DATAHOME and MERlin
12 | will create a corresponding folder in ANALYSIS_HOME. For example, the images for “experiment1” should be stored in the
13 | folder %DATA_HOME%/experiment1. When merlin runs, it will save the output files in %ANALYSIS_HOME%/experiment1. With
14 | this file system, %DATA_HOME% and %ANALYSIS_HOME% are constant for all the experiments you analyze and only the
15 | experiment name needs to be specified. The typical file structure for MERFISH experiments
16 | "experiment1" and "experiment2" could be:
17 |
18 | .. code-block:: none
19 |
20 | %DATA_HOME%/
21 | ├── experiment1/
22 | │ ├── image_000_00.tif
23 | │ ├── image_000_01.tif
24 | │ ├── ...
25 | │ └── image_150_10.tif
26 | └── experiment2/
27 | ├── image_000_00.tif
28 | ├── image_000_01.tif
29 | ├── ...
30 | └── image_150_10.tif
31 | %PARAMETERS_HOME%/
32 | ├── analysis/
33 | │ └── analysis_parameters.json
34 | ├── codebooks/
35 | │ └── codebook.csv
36 | ├── dataorganization/
37 | │ └── dataorganization.csv
38 | ├── microscope/
39 | │ └── microscope_parameters.json
40 | ├── positions/
41 | │ └── positions.csv
42 | └── snakemake/
43 | └── snakeconfig.json
44 | %ANALYSIS_HOME%/
45 | ├── experiment1/
46 | │ ├── FiducialCorrelationWarp
47 | │ ├── DeconvolutionPreprocess
48 | │ ├── ...
49 | │ └── PlotPerformance
50 | └── experiment2/
51 | ├── FiducialCorrelationWarp
52 | ├── DeconvolutionPreprocess
53 | ├── ...
54 | └── PlotPerformance
55 |
56 |
57 | Input specifications
58 | =====================
59 |
60 | Raw images
61 | -----------
62 |
63 | All raw images should be indicated in the same folder, as discussed above, and there should be a separated image
64 | file for each imaging round and each field of view. MERlin is able to read both tiff stacks and dax image files. The
65 | exact file name is specified by a regular expression in the data organization file (imageRegExp and fiducialRegExp).
66 | For example, you can specify the regular expression (?P[\w|-]+)_(?P[0-9]+)_(?P[0-9]+)
67 | for the image filenames specified below. This indicates that the first part of the file is the imageType (the value in
68 | the imageType column of the data organization file), followed by the fov index, followed by imagingRound index, all
69 | separated by an underscore.
70 |
71 | Data organization
72 | ------------------
73 |
74 | The data organization file specifies which the images correspond to each readout. The data organization file is a csv file. The first row is a header with column names and each following row designates one readout. The information provided for each readout indicates where to find the corresponding images in the raw image data and how to find the corresponding fiducial image to align the images between rounds.
75 |
76 | The columns in the data organization file are:
77 |
78 | - channelName - The name of the data channel. For genes measured sequential, this can be set as the gene name.
79 | - readoutName - The name of the readout sequence used to measure this channel.
80 | - imageType - The base name for the image file that contains the images for this readout, for example, ``Conventional_750_650_561_488_405``
81 | - imageRegExp - A regular expression specifying how image file names are constructed for each field of view and
82 | each imaging round. The parameters used in the regular expression are ``imageType``, ``fov``, and ``imagingRound``,
83 | for example: ``(?P[\w|-]+)_(?P[0-9]+)_(?P[0-9]+)``. Here, ``imageType`` specifies the
84 | string indicated in the ``imageType`` column for the corresponding row, ``imagingRound`` specifies the designated
85 | ``imagingRound`` for the corresponding row, and ``fov`` is filled with all field of view indexes in the data set. The
86 | imageRegExp should not include the file extension, which will be determined automatically.
87 | - bitNumber - The bit number corresponding to this readout.
88 | - imagingRound - The round of imaging where this readout is measured, starting from zero.
89 | - color - The illumination color that is used to measure this readout.
90 | - frame - The zero indexed frame or frames in the image file where images corresponding to this readout can be found. For a single frame, a single integer can be provided. For multiple frames, the frames can be provided as a list as ``[0, 1, 2, 3, 4, 5, 6]``
91 | - zPos - The z position for each of the frames specified in the previous column. For only a single frame, the z position should be provided as a decimal number while for multiple frames a list should be provided as for frame.
92 | - fiducialImageType - The base name for the image file that contains the fiducial images for aligning images this readout, for example, ``Conventional_750_650_561_488_405``
93 | - fiducialRegExp - A regular expression specifying how file names are constructed for the fiducial image files. This regex follows the same format as ``imageRegExp``.
94 | - fiducialImagingRound - The imaging round (zero indexed) corresponding to the fiducial images for aligning images for this readout.
95 | - fiducialFrame - The frame index in the fiducial image file where the fiducial frame can be found.
96 | - fiducialColor - The illumination color that is used to measure the fiducials.
97 |
98 | Codebook
99 | ----------
100 |
101 | The codebook specifies . Barcodes corresponding to blanks must have "blank" in their name.
102 |
103 | Position list
104 | --------------
105 |
106 | The position list is a csv file containing a list of positions for each imaged region. The i'th row in the file should be
107 | coordinates of the i'th field of view. Each position is specified by the x position and the y position, separated by a comma.
108 |
109 | The name of a position csv file that within the POSITION\_HOME directory can be provided, otherwise the positions are
110 | extracted from the image metadata xml.
111 |
112 | Microscope parameters
113 | -----------------------
114 |
115 | Microscope parameters specify properties specific to the image acquisition. The microscope parameter file should be placed in the MICROSCOPE_PARAMETERS_HOME directory. The parameters that can be set are:
116 |
117 | - microns_per_pixel - the number of microns corresponding to one pixel in the image.
118 | - flip_horizontal - flag indicating whether the images should be flipped horizontally in order to align with neighboring images.
119 | - flip_vertical - flag indicating whether the images should be flipped vertically in order to align with neighboring images.
120 | - transpose - flag indicating whether the images should be transposed in order to align with neighboring images.
121 |
122 |
123 | Executing locally
124 | ===================
125 |
126 | After installation, MERlin can be run from the command line with the input parameters specified, such as:
127 |
128 | .. code-block:: none
129 |
130 | merlin -a test_decode_and_segment.json -m microscope.json -o 7z_16bits.csv -c L26E1_codebook.csv -n 5 testdata
131 |
132 | Here the MERFISH images contained in the directory `%DATA\_HOME%/testdata/` are processed using the analysis tasks listed in `test\_analysis\_parameters.json` with microscope parameters `STORM5.json`, data organization `Culture\_16bits.csv`, codebook `L26E1_codebook.csv` using 5 cores for each process.
133 |
134 | Executing on a high performance cluster
135 | =====================================================
136 |
137 | MERlin executes tasks through Snakemake_, a workflow management system. Each task can be distributed over a high performance
138 | cluster that is run by a scheduler, such as SLURM or Sge, by indicating the appropriate command to submit the job to snakemake.
139 | See the `merlin-parameters-example `_ repository for an example snakemake
140 | configuration file. Additional arguments can be specified as indicated in the
141 | `snakemake api documentation `_.
142 |
143 | .. code-block:: none
144 |
145 | merlin -a test_decode_and_segment.json -m microscope.json -o 7z_16bits.csv -c L26E1_codebook.csv -k snake.json testdata
146 |
147 | .. _Snakemake: https://snakemake.readthedocs.io/en/stable/
148 |
149 |
--------------------------------------------------------------------------------
/license.md:
--------------------------------------------------------------------------------
1 | The MIT License
2 |
3 | Copyright (c) 2019 Harvard University
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/merlin/__init__.py:
--------------------------------------------------------------------------------
1 | import dotenv
2 | import os
3 | import glob
4 | import json
5 | import importlib
6 | from typing import List
7 |
8 | from merlin.core import dataset
9 |
10 | envPath = os.path.join(os.path.expanduser('~'), '.merlinenv')
11 |
12 | if os.path.exists(envPath):
13 | dotenv.load_dotenv(envPath)
14 |
15 | try:
16 | DATA_HOME = os.path.expanduser(os.environ.get('DATA_HOME'))
17 | ANALYSIS_HOME = os.path.expanduser(os.environ.get('ANALYSIS_HOME'))
18 | PARAMETERS_HOME = os.path.expanduser(os.environ.get('PARAMETERS_HOME'))
19 | ANALYSIS_PARAMETERS_HOME = os.sep.join(
20 | [PARAMETERS_HOME, 'analysis'])
21 | CODEBOOK_HOME = os.sep.join(
22 | [PARAMETERS_HOME, 'codebooks'])
23 | DATA_ORGANIZATION_HOME = os.sep.join(
24 | [PARAMETERS_HOME, 'dataorganization'])
25 | POSITION_HOME = os.sep.join(
26 | [PARAMETERS_HOME, 'positions'])
27 | MICROSCOPE_PARAMETERS_HOME = os.sep.join(
28 | [PARAMETERS_HOME, 'microscope'])
29 | FPKM_HOME = os.sep.join([PARAMETERS_HOME, 'fpkm'])
30 | SNAKEMAKE_PARAMETERS_HOME = os.sep.join(
31 | [PARAMETERS_HOME, 'snakemake'])
32 |
33 | except TypeError:
34 | print('MERlin environment appears corrupt. Please run ' +
35 | '\'merlin --configure .\' in order to configure the environment.')
36 | else:
37 | print(('Unable to find MERlin environment file at %s. Please run ' +
38 | '\'merlin --configure .\' in order to configure the environment.')
39 | % envPath)
40 |
41 |
42 | def store_env(dataHome, analysisHome, parametersHome):
43 | with open(envPath, 'w') as f:
44 | f.write('DATA_HOME=%s\n' % dataHome)
45 | f.write('ANALYSIS_HOME=%s\n' % analysisHome)
46 | f.write('PARAMETERS_HOME=%s\n' % parametersHome)
47 |
48 |
49 | class IncompatibleVersionException(Exception):
50 | pass
51 |
52 |
53 | def version():
54 | import pkg_resources
55 | return pkg_resources.get_distribution('merlin').version
56 |
57 |
58 | def is_compatible(testVersion: str, baseVersion: str = None) -> bool:
59 | """ Determine if testVersion is compatible with baseVersion
60 |
61 | Args:
62 | testVersion: the version identifier to test, as the string 'x.y.z'
63 | where x is the major version, y is the minor version,
64 | and z is the patch.
65 | baseVersion: the version to check testVersion's compatibility. If not
66 | specified then the current MERlin version is used as baseVersion.
67 | Returns: True if testVersion are compatible, otherwise false.
68 | """
69 | if baseVersion is None:
70 | baseVersion = version()
71 | return testVersion.split('.')[0] == baseVersion.split('.')[0]
72 |
73 |
74 | def get_analysis_datasets(maxDepth=2) -> List[dataset.DataSet]:
75 | """ Get a list of all datasets currently stored in analysis home.
76 |
77 | Args:
78 | maxDepth: the directory depth to search for datasets.
79 | Returns: A list of the dataset objects currently within analysis home.
80 | """
81 | metadataFiles = []
82 | for d in range(1, maxDepth+1):
83 | metadataFiles += glob.glob(os.path.join(
84 | ANALYSIS_HOME, *['*']*d, 'dataset.json'))
85 |
86 | def load_dataset(jsonPath) -> dataset.DataSet:
87 | with open(jsonPath, 'r') as f:
88 | metadata = json.load(f)
89 | analysisModule = importlib.import_module(metadata['module'])
90 | analysisTask = getattr(analysisModule, metadata['class'])
91 | return analysisTask(metadata['dataset_name'])
92 |
93 | return [load_dataset(m) for m in metadataFiles]
94 |
--------------------------------------------------------------------------------
/merlin/__main__.py:
--------------------------------------------------------------------------------
1 | from .merlin import merlin
2 |
3 | merlin()
4 |
--------------------------------------------------------------------------------
/merlin/analysis/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/emanuega/MERlin/be3c994ef8fa97fbd3afb85705d8ddbed12118cf/merlin/analysis/__init__.py
--------------------------------------------------------------------------------
/merlin/analysis/exportbarcodes.py:
--------------------------------------------------------------------------------
1 | from merlin.core import analysistask
2 |
3 |
4 | class ExportBarcodes(analysistask.AnalysisTask):
5 |
6 | """
7 | An analysis task that filters barcodes based on area and mean
8 | intensity.
9 | """
10 |
11 | def __init__(self, dataSet, parameters=None, analysisName=None):
12 | super().__init__(dataSet, parameters, analysisName)
13 |
14 | if 'columns' not in self.parameters:
15 | self.parameters['columns'] = ['barcode_id', 'global_x',
16 | 'global_y', 'cell_index']
17 | if 'exclude_blanks' not in self.parameters:
18 | self.parameters['exclude_blanks'] = True
19 |
20 | self.columns = self.parameters['columns']
21 | self.excludeBlanks = self.parameters['exclude_blanks']
22 |
23 | def get_estimated_memory(self):
24 | return 5000
25 |
26 | def get_estimated_time(self):
27 | return 30
28 |
29 | def get_dependencies(self):
30 | return [self.parameters['filter_task']]
31 |
32 | def _run_analysis(self):
33 | filterTask = self.dataSet.load_analysis_task(
34 | self.parameters['filter_task'])
35 |
36 | barcodeData = filterTask.get_barcode_database().get_barcodes(
37 | columnList=self.columns)
38 |
39 | if self.excludeBlanks:
40 | codebook = filterTask.get_codebook()
41 | barcodeData = barcodeData[
42 | barcodeData['barcode_id'].isin(
43 | codebook.get_coding_indexes())]
44 |
45 | self.dataSet.save_dataframe_to_csv(barcodeData, 'barcodes', self,
46 | index=False)
47 |
--------------------------------------------------------------------------------
/merlin/analysis/generatemosaic.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import cv2
3 | from typing import Tuple
4 |
5 | from merlin.core import analysistask
6 |
7 |
8 | ExtentTuple = Tuple[float, float, float, float]
9 |
10 |
11 | class GenerateMosaic(analysistask.AnalysisTask):
12 |
13 | """
14 | An analysis task that generates mosaic images by compiling different
15 | field of views.
16 | """
17 |
18 | def __init__(self, dataSet, parameters=None, analysisName=None):
19 | super().__init__(dataSet, parameters, analysisName)
20 |
21 | if 'microns_per_pixel' not in self.parameters:
22 | self.parameters['microns_per_pixel'] = 3
23 | if 'fov_crop_width' not in self.parameters:
24 | self.parameters['fov_crop_width'] = 0
25 | if 'separate_files' not in self.parameters:
26 | self.parameters['separate_files'] = False
27 | if 'draw_fov_labels' not in self.parameters:
28 | self.parameters['draw_fov_labels'] = False
29 |
30 | if self.parameters['microns_per_pixel'] == 'full_resolution':
31 | self.mosaicMicronsPerPixel = self.dataSet.get_microns_per_pixel()
32 | else:
33 | self.mosaicMicronsPerPixel = self.parameters['microns_per_pixel']
34 |
35 | def get_estimated_memory(self):
36 | return 10000
37 |
38 | def get_estimated_time(self):
39 | return 30
40 |
41 | def get_dependencies(self):
42 | return [self.parameters['global_align_task'],
43 | self.parameters['warp_task']]
44 |
45 | def get_mosaic(self) -> np.ndarray:
46 | """Get the mosaic generated by this analysis task.
47 |
48 | Returns:
49 | a 5-dimensional array containing the mosaic. The images are arranged
50 | as [channel, zIndex, 1, x, y]. The order of the channels is as
51 | specified in the provided parameters file or in the data
52 | organization if no data channels are specified.
53 | """
54 | return self.dataSet.get_analysis_image_set(self, 'mosaic')
55 |
56 | def _micron_to_mosaic_pixel(self, micronCoordinates,
57 | micronExtents) -> np.ndarray:
58 | """Calculates the mosaic coordinates in pixels from the specified
59 | global coordinates.
60 | """
61 | return np.matmul(self._micron_to_mosaic_transform(micronExtents),
62 | np.append(micronCoordinates, 1)).astype(np.int32)[:2]
63 |
64 | def _micron_to_mosaic_transform(self, micronExtents: ExtentTuple) \
65 | -> np.ndarray:
66 | s = 1/self.mosaicMicronsPerPixel
67 | return np.float32(
68 | [[s*1, 0, -s*micronExtents[0]],
69 | [0, s*1, -s*micronExtents[1]],
70 | [0, 0, 1]])
71 |
72 | def _transform_image_to_mosaic(
73 | self, inputImage: np.ndarray, fov: int, alignTask,
74 | micronExtents: ExtentTuple, mosaicDimensions: Tuple[int, int])\
75 | -> np.ndarray:
76 | transform = \
77 | np.matmul(self._micron_to_mosaic_transform(micronExtents),
78 | alignTask.fov_to_global_transform(fov))
79 | return cv2.warpAffine(
80 | inputImage, transform[:2, :], mosaicDimensions)
81 |
82 | def _run_analysis(self):
83 | alignTask = self.dataSet.load_analysis_task(
84 | self.parameters['global_align_task'])
85 | micronExtents = alignTask.get_global_extent()
86 | self.dataSet.save_numpy_txt_analysis_result(
87 | self._micron_to_mosaic_transform(micronExtents),
88 | 'micron_to_mosaic_pixel_transform', self)
89 |
90 | dataOrganization = self.dataSet.get_data_organization()
91 | if 'data_channels' in self.parameters:
92 | if isinstance(self.parameters['data_channels'], str):
93 | dataChannels = [dataOrganization.get_data_channel_index(
94 | self.parameters['data_channels'])]
95 | elif isinstance(self.parameters['data_channels'], int):
96 | dataChannels = [self.parameters['data_channels']]
97 | else:
98 | dataChannels = [dataOrganization.get_data_channel_index(x)
99 | if isinstance(x, str) else x
100 | for x in self.parameters['data_channels']]
101 | else:
102 | dataChannels = dataOrganization.get_data_channels()
103 |
104 | maximumProjection = False
105 | if 'z_index' in self.parameters:
106 | if self.parameters['z_index'] != 'maximum_projection':
107 | zIndexes = [self.parameters['z_index']]
108 | else:
109 | maximumProjection = True
110 | zIndexes = [0]
111 | else:
112 | zIndexes = range(len(self.dataSet.get_z_positions()))
113 |
114 | if not self.parameters['separate_files']:
115 | imageDescription = self.dataSet.analysis_tiff_description(
116 | len(zIndexes), len(dataChannels))
117 | with self.dataSet.writer_for_analysis_images(
118 | self, 'mosaic') as outputTif:
119 | for d in dataChannels:
120 | for z in zIndexes:
121 | mosaic = self._prepare_mosaic_slice(
122 | z, d, micronExtents, alignTask, maximumProjection)
123 | outputTif.save(mosaic, photometric='MINISBLACK',
124 | metadata=imageDescription)
125 | else:
126 | imageDescription = self.dataSet.analysis_tiff_description(1, 1)
127 | for d in dataChannels:
128 | for z in zIndexes:
129 | with self.dataSet.writer_for_analysis_images(
130 | self, 'mosaic_%s_%i'
131 | % (dataOrganization.get_data_channel_name(d), z))\
132 | as outputTif:
133 | mosaic = self._prepare_mosaic_slice(
134 | z, d, micronExtents, alignTask, maximumProjection)
135 | outputTif.save(mosaic, photometric='MINISBLACK',
136 | metadata=imageDescription)
137 |
138 | def _prepare_mosaic_slice(self, zIndex, dataChannel, micronExtents,
139 | alignTask, maximumProjection):
140 | warpTask = self.dataSet.load_analysis_task(
141 | self.parameters['warp_task'])
142 |
143 | chromaticCorrector = None
144 | if 'optimize_task' in self.parameters:
145 | chromaticCorrector = self.dataSet.load_analysis_task(
146 | self.parameters['optimize_task']).get_chromatic_corrector()
147 |
148 | cropWidth = self.parameters['fov_crop_width']
149 | mosaicDimensions = tuple(self._micron_to_mosaic_pixel(
150 | micronExtents[-2:], micronExtents))
151 |
152 | mosaic = np.zeros(np.flip(mosaicDimensions, axis=0), dtype=np.uint16)
153 |
154 | for f in self.dataSet.get_fovs():
155 | if maximumProjection:
156 | inputImage = np.max([warpTask.get_aligned_image(
157 | f, dataChannel, z, chromaticCorrector)
158 | for z in range(len(self.dataSet.get_z_positions()))],
159 | axis=0)
160 | else:
161 | inputImage = warpTask.get_aligned_image(
162 | f, dataChannel, zIndex, chromaticCorrector)
163 |
164 | if cropWidth > 0:
165 | inputImage[:cropWidth, :] = 0
166 | inputImage[inputImage.shape[0] - cropWidth:, :] = 0
167 | inputImage[:, :cropWidth] = 0
168 | inputImage[:, inputImage.shape[0] - cropWidth:] = 0
169 |
170 | if self.parameters['draw_fov_labels']:
171 | inputImage = cv2.putText(inputImage, str(f),
172 | (int(0.2*inputImage.shape[0]),
173 | int(0.2*inputImage.shape[1])),
174 | 0, 10, (65000, 65000, 65000), 20)
175 |
176 | transformedImage = self._transform_image_to_mosaic(
177 | inputImage, f, alignTask, micronExtents,
178 | mosaicDimensions)
179 |
180 | divisionMask = np.bitwise_and(
181 | transformedImage > 0, mosaic > 0)
182 | cv2.add(mosaic, transformedImage, dst=mosaic,
183 | mask=np.array(
184 | transformedImage > 0).astype(np.uint8))
185 | dividedMosaic = cv2.divide(mosaic, 2)
186 | mosaic[divisionMask] = dividedMosaic[divisionMask]
187 |
188 | return mosaic
189 |
--------------------------------------------------------------------------------
/merlin/analysis/partition.py:
--------------------------------------------------------------------------------
1 | import pandas
2 | import numpy as np
3 |
4 | from merlin.core import analysistask
5 | from merlin.util import spatialfeature
6 |
7 | class PartitionBarcodes(analysistask.ParallelAnalysisTask):
8 |
9 | """
10 | An analysis task that assigns RNAs and sequential signals to cells
11 | based on the boundaries determined during the segment task.
12 | """
13 |
14 | def __init__(self, dataSet, parameters=None, analysisName=None):
15 | super().__init__(dataSet, parameters, analysisName)
16 |
17 | def fragment_count(self):
18 | return len(self.dataSet.get_fovs())
19 |
20 | def get_estimated_memory(self):
21 | return 2048
22 |
23 | def get_estimated_time(self):
24 | return 1
25 |
26 | def get_dependencies(self):
27 | return [self.parameters['filter_task'],
28 | self.parameters['assignment_task'],
29 | self.parameters['alignment_task']]
30 |
31 | def get_partitioned_barcodes(self, fov: int = None) -> pandas.DataFrame:
32 | """Retrieve the cell by barcode matrixes calculated from this
33 | analysis task.
34 |
35 | Args:
36 | fov: the fov to get the barcode table for. If not specified, the
37 | combined table for all fovs are returned.
38 |
39 | Returns:
40 | A pandas data frame containing the parsed barcode information.
41 | """
42 | if fov is None:
43 | return pandas.concat(
44 | [self.get_partitioned_barcodes(fov)
45 | for fov in self.dataSet.get_fovs()]
46 | )
47 |
48 | return self.dataSet.load_dataframe_from_csv(
49 | 'counts_per_cell', self.get_analysis_name(), fov, index_col=0)
50 |
51 | def _run_analysis(self, fragmentIndex):
52 | filterTask = self.dataSet.load_analysis_task(
53 | self.parameters['filter_task'])
54 | assignmentTask = self.dataSet.load_analysis_task(
55 | self.parameters['assignment_task'])
56 | alignTask = self.dataSet.load_analysis_task(
57 | self.parameters['alignment_task'])
58 |
59 | fovBoxes = alignTask.get_fov_boxes()
60 | fovIntersections = sorted([i for i, x in enumerate(fovBoxes) if
61 | fovBoxes[fragmentIndex].intersects(x)])
62 |
63 | codebook = filterTask.get_codebook()
64 | barcodeCount = codebook.get_barcode_count()
65 |
66 | bcDB = filterTask.get_barcode_database()
67 | for fi in fovIntersections:
68 | partialBC = bcDB.get_barcodes(fi)
69 | if fi == fovIntersections[0]:
70 | currentFOVBarcodes = partialBC.copy(deep=True)
71 | else:
72 | currentFOVBarcodes = pandas.concat(
73 | [currentFOVBarcodes, partialBC], 0)
74 |
75 | currentFOVBarcodes = currentFOVBarcodes.reset_index().copy(deep=True)
76 |
77 | sDB = assignmentTask.get_feature_database()
78 | currentCells = sDB.read_features(fragmentIndex)
79 |
80 | countsDF = pandas.DataFrame(
81 | data=np.zeros((len(currentCells), barcodeCount)),
82 | columns=range(barcodeCount),
83 | index=[x.get_feature_id() for x in currentCells])
84 |
85 | for cell in currentCells:
86 | contained = cell.contains_positions(currentFOVBarcodes.loc[:,
87 | ['global_x', 'global_y',
88 | 'z']].values)
89 | count = currentFOVBarcodes[contained].groupby('barcode_id').size()
90 | count = count.reindex(range(barcodeCount), fill_value=0)
91 | countsDF.loc[cell.get_feature_id(), :] = count.values.tolist()
92 |
93 | barcodeNames = [codebook.get_name_for_barcode_index(x)
94 | for x in countsDF.columns.values.tolist()]
95 | countsDF.columns = barcodeNames
96 |
97 | self.dataSet.save_dataframe_to_csv(
98 | countsDF, 'counts_per_cell', self.get_analysis_name(),
99 | fragmentIndex)
100 |
101 |
102 | class ExportPartitionedBarcodes(analysistask.AnalysisTask):
103 |
104 | """
105 | An analysis task that combines counts per cells data from each
106 | field of view into a single output file.
107 | """
108 |
109 | def __init__(self, dataSet, parameters=None, analysisName=None):
110 | super().__init__(dataSet, parameters, analysisName)
111 |
112 | def get_estimated_memory(self):
113 | return 2048
114 |
115 | def get_estimated_time(self):
116 | return 5
117 |
118 | def get_dependencies(self):
119 | return [self.parameters['partition_task']]
120 |
121 | def _run_analysis(self):
122 | pTask = self.dataSet.load_analysis_task(
123 | self.parameters['partition_task'])
124 | parsedBarcodes = pTask.get_partitioned_barcodes()
125 |
126 | self.dataSet.save_dataframe_to_csv(
127 | parsedBarcodes, 'barcodes_per_feature',
128 | self.get_analysis_name())
129 |
--------------------------------------------------------------------------------
/merlin/analysis/plotperformance.py:
--------------------------------------------------------------------------------
1 | import os
2 | from matplotlib import pyplot as plt
3 | import pandas
4 | import merlin
5 | import seaborn
6 | import numpy as np
7 | from typing import List
8 | from merlin.core import analysistask
9 | from merlin.analysis import filterbarcodes
10 | from random import sample
11 | import time
12 |
13 | from merlin import plots
14 | plt.style.use(
15 | os.sep.join([os.path.dirname(merlin.__file__),
16 | 'ext', 'default.mplstyle']))
17 |
18 |
19 | class PlotPerformance(analysistask.AnalysisTask):
20 |
21 | """
22 | An analysis task that generates plots depicting metrics of the MERFISH
23 | decoding.
24 | """
25 |
26 | def __init__(self, dataSet, parameters=None, analysisName=None):
27 | super().__init__(dataSet, parameters, analysisName)
28 |
29 | if 'exclude_plots' in self.parameters:
30 | self.parameters['exclude_plots'] = []
31 |
32 | self.taskTypes = ['decode_task', 'filter_task', 'optimize_task',
33 | 'segment_task', 'sum_task', 'partition_task',
34 | 'global_align_task']
35 |
36 | def get_estimated_memory(self):
37 | return 30000
38 |
39 | def get_estimated_time(self):
40 | return 180
41 |
42 | def get_dependencies(self):
43 | return []
44 |
45 | def _run_analysis(self):
46 | taskDict = {t: self.dataSet.load_analysis_task(self.parameters[t])
47 | for t in self.taskTypes if t in self.parameters}
48 | plotEngine = plots.PlotEngine(self, taskDict)
49 | while not plotEngine.take_step():
50 | pass
51 |
--------------------------------------------------------------------------------
/merlin/analysis/preprocess.py:
--------------------------------------------------------------------------------
1 | import os
2 | import cv2
3 | import numpy as np
4 |
5 | from merlin.core import analysistask
6 | from merlin.util import deconvolve
7 | from merlin.util import aberration
8 | from merlin.util import imagefilters
9 | from merlin.data import codebook
10 |
11 |
12 | class Preprocess(analysistask.ParallelAnalysisTask):
13 |
14 | """
15 | An abstract class for preparing data for barcode calling.
16 | """
17 |
18 | def _image_name(self, fov):
19 | destPath = self.dataSet.get_analysis_subdirectory(
20 | self.analysisName, subdirectory='preprocessed_images')
21 | return os.sep.join([destPath, 'fov_' + str(fov) + '.tif'])
22 |
23 | def get_pixel_histogram(self, fov=None):
24 | if fov is not None:
25 | return self.dataSet.load_numpy_analysis_result(
26 | 'pixel_histogram', self.analysisName, fov, 'histograms')
27 |
28 | pixelHistogram = np.zeros(self.get_pixel_histogram(
29 | self.dataSet.get_fovs()[0]).shape)
30 | for f in self.dataSet.get_fovs():
31 | pixelHistogram += self.get_pixel_histogram(f)
32 |
33 | return pixelHistogram
34 |
35 | def _save_pixel_histogram(self, histogram, fov):
36 | self.dataSet.save_numpy_analysis_result(
37 | histogram, 'pixel_histogram', self.analysisName, fov, 'histograms')
38 |
39 |
40 | class DeconvolutionPreprocess(Preprocess):
41 |
42 | def __init__(self, dataSet, parameters=None, analysisName=None):
43 | super().__init__(dataSet, parameters, analysisName)
44 |
45 | if 'highpass_sigma' not in self.parameters:
46 | self.parameters['highpass_sigma'] = 3
47 | if 'decon_sigma' not in self.parameters:
48 | self.parameters['decon_sigma'] = 2
49 | if 'decon_filter_size' not in self.parameters:
50 | self.parameters['decon_filter_size'] = \
51 | int(2 * np.ceil(2 * self.parameters['decon_sigma']) + 1)
52 | if 'decon_iterations' not in self.parameters:
53 | self.parameters['decon_iterations'] = 20
54 | if 'codebook_index' not in self.parameters:
55 | self.parameters['codebook_index'] = 0
56 |
57 | self._highPassSigma = self.parameters['highpass_sigma']
58 | self._deconSigma = self.parameters['decon_sigma']
59 | self._deconIterations = self.parameters['decon_iterations']
60 |
61 | self.warpTask = self.dataSet.load_analysis_task(
62 | self.parameters['warp_task'])
63 |
64 | def fragment_count(self):
65 | return len(self.dataSet.get_fovs())
66 |
67 | def get_estimated_memory(self):
68 | return 2048
69 |
70 | def get_estimated_time(self):
71 | return 5
72 |
73 | def get_dependencies(self):
74 | return [self.parameters['warp_task']]
75 |
76 | def get_codebook(self) -> codebook.Codebook:
77 | return self.dataSet.get_codebook(self.parameters['codebook_index'])
78 |
79 | def get_processed_image_set(
80 | self, fov, zIndex: int = None,
81 | chromaticCorrector: aberration.ChromaticCorrector = None
82 | ) -> np.ndarray:
83 | if zIndex is None:
84 | return np.array([[self.get_processed_image(
85 | fov, self.dataSet.get_data_organization()
86 | .get_data_channel_for_bit(b), zIndex, chromaticCorrector)
87 | for zIndex in range(len(self.dataSet.get_z_positions()))]
88 | for b in self.get_codebook().get_bit_names()])
89 | else:
90 | return np.array([self.get_processed_image(
91 | fov, self.dataSet.get_data_organization()
92 | .get_data_channel_for_bit(b), zIndex, chromaticCorrector)
93 | for b in self.get_codebook().get_bit_names()])
94 |
95 | def get_processed_image(
96 | self, fov: int, dataChannel: int, zIndex: int,
97 | chromaticCorrector: aberration.ChromaticCorrector = None
98 | ) -> np.ndarray:
99 | inputImage = self.warpTask.get_aligned_image(fov, dataChannel, zIndex,
100 | chromaticCorrector)
101 | return self._preprocess_image(inputImage)
102 |
103 | def _high_pass_filter(self, inputImage: np.ndarray) -> np.ndarray:
104 | highPassFilterSize = int(2 * np.ceil(2 * self._highPassSigma) + 1)
105 | hpImage = imagefilters.high_pass_filter(inputImage,
106 | highPassFilterSize,
107 | self._highPassSigma)
108 | return hpImage.astype(np.float)
109 |
110 | def _run_analysis(self, fragmentIndex):
111 | warpTask = self.dataSet.load_analysis_task(
112 | self.parameters['warp_task'])
113 |
114 | histogramBins = np.arange(0, np.iinfo(np.uint16).max, 1)
115 | pixelHistogram = np.zeros(
116 | (self.get_codebook().get_bit_count(), len(histogramBins)-1))
117 |
118 | # this currently only is to calculate the pixel histograms in order
119 | # to estimate the initial scale factors. This is likely unnecessary
120 | for bi, b in enumerate(self.get_codebook().get_bit_names()):
121 | dataChannel = self.dataSet.get_data_organization()\
122 | .get_data_channel_for_bit(b)
123 | for i in range(len(self.dataSet.get_z_positions())):
124 | inputImage = warpTask.get_aligned_image(
125 | fragmentIndex, dataChannel, i)
126 | deconvolvedImage = self._preprocess_image(inputImage)
127 |
128 | pixelHistogram[bi, :] += np.histogram(
129 | deconvolvedImage, bins=histogramBins)[0]
130 |
131 | self._save_pixel_histogram(pixelHistogram, fragmentIndex)
132 |
133 | def _preprocess_image(self, inputImage: np.ndarray) -> np.ndarray:
134 | deconFilterSize = self.parameters['decon_filter_size']
135 |
136 | filteredImage = self._high_pass_filter(inputImage)
137 | deconvolvedImage = deconvolve.deconvolve_lucyrichardson(
138 | filteredImage, deconFilterSize, self._deconSigma,
139 | self._deconIterations).astype(np.uint16)
140 | return deconvolvedImage
141 |
142 |
143 | class DeconvolutionPreprocessGuo(DeconvolutionPreprocess):
144 |
145 | def __init__(self, dataSet, parameters=None, analysisName=None):
146 | super().__init__(dataSet, parameters, analysisName)
147 |
148 | # Check for 'decon_iterations' in parameters instead of
149 | # self.parameters as 'decon_iterations' is added to
150 | # self.parameters by the super-class with a default value
151 | # of 20, but we want the default value to be 2.
152 | if 'decon_iterations' not in parameters:
153 | self.parameters['decon_iterations'] = 2
154 |
155 | self._deconIterations = self.parameters['decon_iterations']
156 |
157 | def _preprocess_image(self, inputImage: np.ndarray) -> np.ndarray:
158 | deconFilterSize = self.parameters['decon_filter_size']
159 |
160 | filteredImage = self._high_pass_filter(inputImage)
161 | deconvolvedImage = deconvolve.deconvolve_lucyrichardson_guo(
162 | filteredImage, deconFilterSize, self._deconSigma,
163 | self._deconIterations).astype(np.uint16)
164 | return deconvolvedImage
165 |
--------------------------------------------------------------------------------
/merlin/analysis/sequential.py:
--------------------------------------------------------------------------------
1 | import pandas
2 | import rtree
3 | import networkx
4 | import numpy as np
5 | import cv2
6 | from skimage.measure import regionprops
7 |
8 | from merlin.core import analysistask
9 | from merlin.util import imagefilters
10 |
11 |
12 | class SumSignal(analysistask.ParallelAnalysisTask):
13 |
14 | """
15 | An analysis task that calculates the signal intensity within the boundaries
16 | of a cell for all rounds not used in the codebook, useful for measuring
17 | RNA species that were stained individually.
18 | """
19 |
20 | def __init__(self, dataSet, parameters=None, analysisName=None):
21 | super().__init__(dataSet, parameters, analysisName)
22 |
23 | if 'apply_highpass' not in self.parameters:
24 | self.parameters['apply_highpass'] = False
25 | if 'highpass_sigma' not in self.parameters:
26 | self.parameters['highpass_sigma'] = 5
27 | if 'z_index' not in self.parameters:
28 | self.parameters['z_index'] = 0
29 |
30 | if self.parameters['z_index'] >= len(self.dataSet.get_z_positions()):
31 | raise analysistask.InvalidParameterException(
32 | 'Invalid z_index specified for %s. (%i > %i)'
33 | % (self.analysisName, self.parameters['z_index'],
34 | len(self.dataSet.get_z_positions())))
35 |
36 | self.highpass = str(self.parameters['apply_highpass']).upper() == 'TRUE'
37 | self.alignTask = self.dataSet.load_analysis_task(
38 | self.parameters['global_align_task'])
39 |
40 | def fragment_count(self):
41 | return len(self.dataSet.get_fovs())
42 |
43 | def get_estimated_memory(self):
44 | return 2048
45 |
46 | def get_estimated_time(self):
47 | return 1
48 |
49 | def get_dependencies(self):
50 | return [self.parameters['warp_task'],
51 | self.parameters['segment_task'],
52 | self.parameters['global_align_task']]
53 |
54 | def _extract_signal(self, cells, inputImage, zIndex) -> pandas.DataFrame:
55 | cellCoords = []
56 | for cell in cells:
57 | regions = cell.get_boundaries()[zIndex]
58 | if len(regions) == 0:
59 | cellCoords.append([])
60 | else:
61 | pixels = []
62 | for region in regions:
63 | coords = region.exterior.coords.xy
64 | xyZip = list(zip(coords[0].tolist(), coords[1].tolist()))
65 | pixels.append(np.array(
66 | self.alignTask.global_coordinates_to_fov(
67 | cell.get_fov(), xyZip)))
68 | cellCoords.append(pixels)
69 |
70 | cellIDs = [str(cells[x].get_feature_id()) for x in range(len(cells))]
71 | mask = np.zeros(inputImage.shape, np.uint8)
72 | for i, cell in enumerate(cellCoords):
73 | cv2.drawContours(mask, cell, -1, i+1, -1)
74 | propsDict = {x.label: x for x in regionprops(mask, inputImage)}
75 | propsOut = pandas.DataFrame(
76 | data=[(propsDict[k].intensity_image.sum(),
77 | propsDict[k].filled_area)
78 | if k in propsDict else (0, 0)
79 | for k in range(1, len(cellCoords) + 1)],
80 | index=cellIDs,
81 | columns=['Intensity', 'Pixels'])
82 | return propsOut
83 |
84 | def _get_sum_signal(self, fov, channels, zIndex):
85 |
86 | fTask = self.dataSet.load_analysis_task(self.parameters['warp_task'])
87 | sTask = self.dataSet.load_analysis_task(self.parameters['segment_task'])
88 |
89 | cells = sTask.get_feature_database().read_features(fov)
90 |
91 | signals = []
92 | for ch in channels:
93 | img = fTask.get_aligned_image(fov, ch, zIndex)
94 | if self.highpass:
95 | highPassSigma = self.parameters['highpass_sigma']
96 | highPassFilterSize = int(2 * np.ceil(3 * highPassSigma) + 1)
97 | img = imagefilters.high_pass_filter(img,
98 | highPassFilterSize,
99 | highPassSigma)
100 | signals.append(self._extract_signal(cells, img,
101 | zIndex).iloc[:, [0]])
102 |
103 | # adding num of pixels
104 | signals.append(self._extract_signal(cells, img, zIndex).iloc[:, [1]])
105 |
106 | compiledSignal = pandas.concat(signals, 1)
107 | compiledSignal.columns = channels+['Pixels']
108 |
109 | return compiledSignal
110 |
111 | def get_sum_signals(self, fov: int = None) -> pandas.DataFrame:
112 | """Retrieve the sum signals calculated from this analysis task.
113 |
114 | Args:
115 | fov: the fov to get the sum signals for. If not specified, the
116 | sum signals for all fovs are returned.
117 |
118 | Returns:
119 | A pandas data frame containing the sum signal information.
120 | """
121 | if fov is None:
122 | return pandas.concat(
123 | [self.get_sum_signals(fov) for fov in self.dataSet.get_fovs()]
124 | )
125 |
126 | return self.dataSet.load_dataframe_from_csv(
127 | 'sequential_signal', self.get_analysis_name(),
128 | fov, 'signals', index_col=0)
129 |
130 | def _run_analysis(self, fragmentIndex):
131 | zIndex = int(self.parameters['z_index'])
132 | channels, geneNames = self.dataSet.get_data_organization()\
133 | .get_sequential_rounds()
134 |
135 | fovSignal = self._get_sum_signal(fragmentIndex, channels, zIndex)
136 | normSignal = fovSignal.iloc[:, :-1].div(fovSignal.loc[:, 'Pixels'], 0)
137 | normSignal.columns = geneNames
138 |
139 | self.dataSet.save_dataframe_to_csv(
140 | normSignal, 'sequential_signal', self.get_analysis_name(),
141 | fragmentIndex, 'signals')
142 |
143 |
144 | class ExportSumSignals(analysistask.AnalysisTask):
145 | def __init__(self, dataSet, parameters=None, analysisName=None):
146 | super().__init__(dataSet, parameters, analysisName)
147 |
148 | def get_estimated_memory(self):
149 | return 2048
150 |
151 | def get_estimated_time(self):
152 | return 5
153 |
154 | def get_dependencies(self):
155 | return [self.parameters['sequential_task']]
156 |
157 | def _run_analysis(self):
158 | sTask = self.dataSet.load_analysis_task(
159 | self.parameters['sequential_task'])
160 | signals = sTask.get_sum_signals()
161 |
162 | self.dataSet.save_dataframe_to_csv(
163 | signals, 'sequential_sum_signals',
164 | self.get_analysis_name())
165 |
--------------------------------------------------------------------------------
/merlin/analysis/testtask.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from merlin.core import analysistask
4 |
5 | '''This module contains dummy analysis tasks for running tests'''
6 |
7 |
8 | class SimpleAnalysisTask(analysistask.AnalysisTask):
9 |
10 | def __init__(self, dataSet, parameters=None, analysisName=None):
11 | super().__init__(dataSet, parameters, analysisName)
12 |
13 | def _run_analysis(self):
14 | pass
15 |
16 | def get_estimated_memory(self):
17 | return 100
18 |
19 | def get_estimated_time(self):
20 | return 1
21 |
22 | def get_dependencies(self):
23 | if 'dependencies' in self.parameters:
24 | return self.parameters['dependencies']
25 | else:
26 | return []
27 |
28 |
29 | class SimpleParallelAnalysisTask(analysistask.ParallelAnalysisTask):
30 |
31 | def __init__(self, dataSet, parameters=None, analysisName=None):
32 | super().__init__(dataSet, parameters, analysisName)
33 |
34 | def _run_analysis(self, fragmentIndex):
35 | pass
36 |
37 | def get_estimated_memory(self):
38 | return 100
39 |
40 | def get_estimated_time(self):
41 | return 1
42 |
43 | def get_dependencies(self):
44 | if 'dependencies' in self.parameters:
45 | return self.parameters['dependencies']
46 | else:
47 | return []
48 |
49 | def fragment_count(self):
50 | return 5
51 |
52 |
53 | class RandomNumberParallelAnalysisTask(analysistask.ParallelAnalysisTask):
54 |
55 | """A test analysis task that generates random numbers."""
56 |
57 | def __init__(self, dataSet, parameters=None, analysisName=None):
58 | super().__init__(dataSet, parameters, analysisName)
59 |
60 | def get_random_result(self, fragmentIndex):
61 | return self.dataSet.load_numpy_analysis_result('random_numbers',
62 | self, fragmentIndex)
63 |
64 | def _run_analysis(self, fragmentIndex):
65 | self.dataSet.save_numpy_analysis_result(
66 | fragmentIndex*np.random.rand(100), 'random_numbers', self,
67 | fragmentIndex)
68 |
69 | def get_estimated_memory(self):
70 | return 100
71 |
72 | def get_estimated_time(self):
73 | return 1
74 |
75 | def get_dependencies(self):
76 | if 'dependencies' in self.parameters:
77 | return self.parameters['dependencies']
78 | else:
79 | return []
80 |
81 | def fragment_count(self):
82 | return 10
83 |
84 |
85 | class SimpleInternallyParallelAnalysisTask(
86 | analysistask.InternallyParallelAnalysisTask):
87 |
88 | def __init__(self, dataSet, parameters=None, analysisName=None):
89 | super().__init__(dataSet, parameters, analysisName)
90 |
91 | def _run_analysis(self):
92 | pass
93 |
94 | def get_estimated_memory(self):
95 | return 100
96 |
97 | def get_estimated_time(self):
98 | return 1
99 |
100 | def get_dependencies(self):
101 | if 'dependencies' in self.parameters:
102 | return self.parameters['dependencies']
103 | else:
104 | return []
105 |
--------------------------------------------------------------------------------
/merlin/core/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/emanuega/MERlin/be3c994ef8fa97fbd3afb85705d8ddbed12118cf/merlin/core/__init__.py
--------------------------------------------------------------------------------
/merlin/core/executor.py:
--------------------------------------------------------------------------------
1 | from abc import abstractmethod
2 | import multiprocessing
3 | import threading
4 | from typing import Callable
5 |
6 | from merlin.core import analysistask
7 |
8 |
9 | class Executor(object):
10 |
11 | def __init__(self):
12 | super().__init__()
13 |
14 | @abstractmethod
15 | def run(self, task: analysistask.AnalysisTask, index: int=None,
16 | rerunCompleted: bool=False) -> None:
17 | """Run an analysis task.
18 |
19 | This method will not run analysis tasks that are already currently
20 | running and analysis is terminated early due to error or otherwise
21 | will not be restarted.
22 |
23 | Args:
24 | task: the analysis task to run.
25 | index: index of the analysis to run for a parallel analysis task.
26 | rerunCompleted: flag indicating if previous analysis should be
27 | run again even if it has previously completed. If overwrite
28 | is True, analysis will be run on the task regardless of its
29 | status. If overwrite is False, analysis will only be run on
30 | the task or fragments of the task that have either not been
31 | started or have previously completed in error.
32 | """
33 | pass
34 |
35 |
36 | class LocalExecutor(Executor):
37 |
38 | def __init__(self, coreCount=None):
39 | super().__init__()
40 |
41 | if coreCount is None:
42 | self.coreCount = int(multiprocessing.cpu_count()*0.7)
43 | else:
44 | self.coreCount = coreCount
45 |
46 | def run(self, task: analysistask.AnalysisTask, index: int=None,
47 | rerunCompleted: bool=False) -> None:
48 | if task.is_complete() and not rerunCompleted:
49 | return
50 |
51 | if index is not None:
52 | task.run(index)
53 | else:
54 | task.run()
55 |
56 |
--------------------------------------------------------------------------------
/merlin/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/emanuega/MERlin/be3c994ef8fa97fbd3afb85705d8ddbed12118cf/merlin/data/__init__.py
--------------------------------------------------------------------------------
/merlin/data/codebook.py:
--------------------------------------------------------------------------------
1 | import os
2 | import csv
3 | import numpy as np
4 | import pandas
5 | from typing import List
6 | from typing import Union
7 |
8 | import merlin
9 |
10 |
11 | def _parse_barcode_from_string(inputString):
12 | return np.array([int(x) for x in inputString if x is not ' '])
13 |
14 |
15 | class Codebook(object):
16 |
17 | """
18 | A Codebook stores the association of barcodes to genes.
19 | """
20 |
21 | def __init__(self, dataSet, filePath, codebookIndex: int = 0,
22 | codebookName: str = None):
23 | """
24 | Create a new Codebook for the data in the specified data set.
25 |
26 | If filePath is not specified, a previously stored Codebook
27 | is loaded from the dataSet if it exists. If filePath is specified,
28 | the Codebook at the specified filePath is loaded and
29 | stored in the dataSet, overwriting any previously stored
30 | Codebook.
31 | """
32 | self._dataSet = dataSet
33 | if not os.path.exists(filePath):
34 | filePath = os.sep.join([merlin.CODEBOOK_HOME, filePath])
35 |
36 | newVersion = True
37 | with open(filePath, 'r') as f:
38 | if 'version' in f.readline():
39 | newVersion = False
40 |
41 | if newVersion:
42 | self._data = pandas.read_csv(filePath)
43 | else:
44 | headerLength = 3
45 | barcodeData = pandas.read_csv(
46 | filePath, header=headerLength, skipinitialspace=True,
47 | usecols=['name', 'id', 'barcode'],
48 | converters={'barcode': _parse_barcode_from_string})
49 | with open(filePath, 'r') as inFile:
50 | csvReader = csv.reader(inFile, delimiter=',')
51 | header = [row for i, row in enumerate(csvReader)
52 | if i < headerLength]
53 |
54 | bitNames = [x.strip() for x in header[2][1:]]
55 |
56 | self._data = self._generate_codebook_dataframe(
57 | barcodeData, bitNames)
58 |
59 | if not codebookName:
60 | codebookName = os.path.splitext(os.path.basename(filePath))[0]
61 | self._codebookName = codebookName
62 | self._codebookIndex = codebookIndex
63 | self._dataSet.save_codebook(self)
64 |
65 | @staticmethod
66 | def _generate_codebook_dataframe(barcodeData, bitNames):
67 | dfData = np.array([[currentRow['name'], currentRow['id']]
68 | + currentRow['barcode'].tolist()
69 | for i, currentRow in barcodeData.iterrows()])
70 | df = pandas.DataFrame(dfData, columns=['name', 'id'] + bitNames)
71 | df[bitNames] = df[bitNames].astype('uint8')
72 | return df
73 |
74 | def get_data(self) -> pandas.DataFrame:
75 | """ Get the dataframe that contains the information for this codebook
76 |
77 | Returns: The pandas dataframe
78 | """
79 | return self._data
80 |
81 | def get_barcode(self, index: int) -> List[bool]:
82 | """ Get the barcode with the specified index.
83 |
84 | Args:
85 | index: the index of the barcode in the barcode list
86 | Returns:
87 | A list of 0's and 1's denoting the barcode
88 | """
89 | return [self._data.loc[index][n] for n in self.get_bit_names()]
90 |
91 | def get_barcode_count(self) -> int:
92 | """
93 | Get the number of barcodes in this codebook.
94 |
95 | Returns:
96 | The number of barcodes, counting barcodes for blanks and genes
97 | """
98 | return len(self._data)
99 |
100 | def get_bit_count(self) -> int:
101 | """
102 | Get the number of bits used for MERFISH barcodes in this codebook.
103 | """
104 | return len(self.get_bit_names())
105 |
106 | def get_bit_names(self) -> List[str]:
107 | """ Get the names of the bits for this MERFISH data set.
108 |
109 | Returns:
110 | A list of the names of the bits in order from the lowest to highest
111 | """
112 | return [s for s in self._data.columns if s not in ['name', 'id']]
113 |
114 | def get_barcodes(self, ignoreBlanks: bool = False) -> np.array:
115 | """ Get the barcodes present in this codebook.
116 |
117 | Args:
118 | ignoreBlanks: flag indicating whether barcodes corresponding
119 | to blanks should be included.
120 | Returns:
121 | A list of the barcodes represented as lists of bits.
122 | """
123 | bitNames = self.get_bit_names()
124 | if ignoreBlanks:
125 | return np.array([[x[n] for n in bitNames] for i, x
126 | in self._data.iterrows()
127 | if 'BLANK' not in x['name'].upper()])
128 | else:
129 | return np.array([[x[n] for n in bitNames]
130 | for i, x in self._data.iterrows()])
131 |
132 | def get_coding_indexes(self) -> List[int]:
133 | """ Get the barcode indexes that correspond with genes.
134 |
135 | Returns:
136 | A list of barcode indexes that correspond with genes and not
137 | blanks
138 | """
139 | return self._data[
140 | ~self._data['name'].str.contains('Blank', case=False)].index
141 |
142 | def get_blank_indexes(self) -> List[int]:
143 | """ Get the barcode indexes that do not correspond with genes.
144 |
145 | Returns:
146 | A list of barcode indexes that correspond with blanks
147 | """
148 | return self._data[
149 | self._data['name'].str.contains('Blank', case=False)].index
150 |
151 | def get_gene_names(self) -> List[str]:
152 | """" Get the names of the genes represented in this codebook.
153 |
154 | Returns:
155 | A list of the gene names. The list does not contain the names of
156 | the blanks.
157 | """
158 | return self._data.loc[self.get_coding_indexes()]['name'].tolist()
159 |
160 | def get_name_for_barcode_index(self, index: int) -> str:
161 | """ Get the gene name for the barcode with the specified index.
162 |
163 | Returns:
164 | The gene name
165 | """
166 | return self._data.loc[index]['name']
167 |
168 | def get_barcode_index_for_name(self, name: str) -> Union[int, None]:
169 | """ Get the barcode index for the barcode with the specified name.
170 |
171 | Returns:
172 | The barcode index. If name appears more than once, the index of
173 | the first appearance is returned. If name is not in this codebook
174 | then None is returned.
175 | """
176 | matches = self._data[self._data['name'].str.match('^' + name + '$')]
177 | if len(matches) == 0:
178 | return None
179 | return matches.index[0]
180 |
181 | def get_codebook_name(self) -> str:
182 | """ Gets the name of this codebook
183 |
184 | Returns:
185 | The codebook name. This is the original file name of codebook.
186 | """
187 | return self._codebookName
188 |
189 | def get_codebook_index(self) -> int:
190 | """ Get the index of this codebook
191 |
192 | Returns:
193 | The codebook index. All codebooks associated with the same dataset
194 | will have unique indexes starting from 0.
195 | """
196 | return self._codebookIndex
197 |
--------------------------------------------------------------------------------
/merlin/ext/default.mplstyle:
--------------------------------------------------------------------------------
1 | font.family : arial
2 |
3 | xtick.major.size : 3
4 | xtick.minor.size : 1.5
5 | xtick.major.pad : 2
6 | xtick.labelsize : 8
7 | xtick.direction : in
8 |
9 | ytick.major.size : 3
10 | ytick.minor.size : 1.5
11 | ytick.major.pad : 2
12 | ytick.labelsize : 8
13 | ytick.direction : in
14 |
15 |
16 | axes.facecolor : w
17 | axes.labelweight : bold
18 | axes.labelsize : 12
19 |
20 | axes.titlesize : 14
21 | axes.titleweight: bold
22 |
--------------------------------------------------------------------------------
/merlin/plots/__init__.py:
--------------------------------------------------------------------------------
1 | import inspect
2 | import pkgutil
3 | import importlib
4 | from typing import Set, List
5 |
6 | import merlin
7 | from merlin.plots._base import AbstractPlot
8 | from merlin.plots._base import PlotMetadata
9 |
10 |
11 | def get_available_plots() -> Set:
12 | """ Get all plots defined within any submodule of merlin.plots
13 |
14 | Returns: a set of references to the plots
15 | """
16 | plotSet = set()
17 | for importer, modname, ispkg in pkgutil.iter_modules(merlin.plots.__path__):
18 | currentModule = importlib.import_module(
19 | merlin.plots.__name__ + '.' + modname)
20 | for name, obj in inspect.getmembers(currentModule):
21 | if inspect.isclass(obj)\
22 | and issubclass(obj, AbstractPlot)\
23 | and obj != AbstractPlot:
24 | plotSet.add(obj)
25 | return plotSet
26 |
27 |
28 | class PlotEngine:
29 |
30 | def __init__(self, plotTask, taskDict):
31 | """ Create a new plot engine.
32 |
33 | Args:
34 | plotTask: the analysis task to save the plots and plot
35 | metadata into
36 | taskDict: a dictionary containing references to the analysis
37 | tasks to use for plotting results.
38 | """
39 | self.taskDict = taskDict
40 | availablePlots = [x(plotTask) for x in get_available_plots()]
41 | self.plotList = [x for x in availablePlots if x.is_relevant(taskDict)]
42 |
43 | requiredMetadata = \
44 | {m for p in self.plotList for m in p.get_required_metadata()}
45 | self.metadataDict = {x.metadata_name(): x(plotTask, taskDict)
46 | for x in requiredMetadata}
47 |
48 | def get_plots(self) -> List[AbstractPlot]:
49 | """ Get a list of the plots that this plot engine will generate.
50 |
51 | Returns: A list of the plot objects that will be generated by this
52 | plot engine.
53 | """
54 | return self.plotList
55 |
56 | def take_step(self) -> bool:
57 | """ Generate metadata and plots from newly available analysis results.
58 |
59 | Returns: True if all plots have been generated and otherwise false.
60 | """
61 |
62 | incompletePlots = [p for p in self.plotList if not p.is_complete()]
63 | if len(incompletePlots) == 0:
64 | return True
65 |
66 | for m in self.metadataDict.values():
67 | m.update()
68 |
69 | completeTasks = [k for k, v in self.taskDict.items() if v.is_complete()]
70 | completeMetadata = [k for k, v in self.metadataDict.items()
71 | if v.is_complete()]
72 | readyPlots = [p for p in incompletePlots
73 | if p.is_ready(completeTasks, completeMetadata)]
74 | for p in readyPlots:
75 | p.plot(self.taskDict, self.metadataDict)
76 |
77 | return len([p for p in self.plotList if not p.is_complete()]) == 0
78 |
--------------------------------------------------------------------------------
/merlin/plots/_base.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from abc import ABC, abstractmethod
3 | from typing import List, Dict, Tuple
4 | from matplotlib import pyplot as plt
5 |
6 | from merlin.core import analysistask
7 |
8 |
9 | class AbstractPlot(ABC):
10 |
11 | """
12 | A base class for generating a plot of the analysis results. Each plot
13 | should inherit from this class.
14 | """
15 |
16 | def __init__(self, analysisTask: analysistask.AnalysisTask):
17 | """ Create a new AbstractPlot
18 |
19 | Args:
20 | analysisTask: the analysisTask where the plot should be saved.
21 | """
22 | self._analysisTask = analysisTask
23 |
24 | def figure_name(self) -> str:
25 | """ Get the name for identifying this figure.
26 |
27 | Returns: the name of this figure
28 | """
29 | return type(self).__name__
30 |
31 | @abstractmethod
32 | def get_required_tasks(self) -> Dict[str, Tuple[type]]:
33 | """ Get the tasks that are required to be complete prior to
34 | generating this plot.
35 |
36 | Returns: A dictionary of the types of tasks as keys and a tuple
37 | of the accepted classes as values. The keys can include
38 | decode_task, filter_task, optimize_task, segment_task,
39 | sum_task, partition_task, and/or global_align_task. If all classes
40 | of the specified type are allowed, the value should be 'all'. If
41 | no tasks are required then an empty dictionary should be returned.
42 | """
43 | pass
44 |
45 | @abstractmethod
46 | def get_required_metadata(self) -> List[object]:
47 | """ Get the plot metadata that is required to generate this plot.
48 |
49 | Returns: A list of class references for the metadata
50 | objects that are required for this task.
51 | """
52 | pass
53 |
54 | @abstractmethod
55 | def _generate_plot(self, inputTasks: Dict[str, analysistask.AnalysisTask],
56 | inputMetadata: Dict[str, 'PlotMetadata']) -> plt.Figure:
57 | """ Generate the plot.
58 |
59 | This function should be implemented in all subclasses and the generated
60 | figure handle should be returned.
61 |
62 | Args:
63 | inputTasks: A dictionary of the input tasks to use to generate the
64 | plot. Each analysis task is indexed by a string indicating
65 | the task type as in get_required_tasks.
66 | inputMetadata: A dictionary of the input metadata for generating
67 | this plot. Each metadata object is indexed by the name of the
68 | metadata.
69 | Returns: the figure handle to the newly generated figure
70 | """
71 | pass
72 |
73 | def is_relevant(self, inputTasks: Dict[str, analysistask.AnalysisTask]
74 | ) -> bool:
75 | """ Determine if this plot is relevant given the analysis tasks
76 | provided.
77 |
78 | Args:
79 | inputTasks: A dictionary of the analysis tasks indexed with
80 | strings indicating the task type as in get_required_tasks
81 | Returns: True if this plot can be generated using the provided
82 | analysis tasks and false otherwise.
83 | """
84 | for rTask, rTypes in self.get_required_tasks().items():
85 | if rTask not in inputTasks:
86 | return False
87 | if rTypes != 'all' \
88 | and not isinstance(inputTasks[rTask], rTypes):
89 | return False
90 | return True
91 |
92 | def is_ready(self, completeTasks: List[str],
93 | completeMetadata: List[str]) -> bool:
94 | """ Determine if all requirements for generating this plot are
95 | satisfied.
96 |
97 | Args:
98 | completeTasks: A list of the types of tasks that are complete.
99 | The list can contain the same strings as in get_required_tasks
100 | completeMetadata: A list of the metadata that has been generated.
101 | Returns: True if all required tasks and all required metadata
102 | is complete
103 | """
104 | return all([t in completeTasks for t in self.get_required_tasks()])\
105 | and all([m.metadata_name() in completeMetadata
106 | for m in self.get_required_metadata()])
107 |
108 | def is_complete(self) -> bool:
109 | """ Determine if this plot has been generated.
110 |
111 | Returns: True if this plot has been generated and otherwise false.
112 | """
113 | return self._analysisTask.dataSet.figure_exists(
114 | self._analysisTask, self.figure_name(),
115 | type(self).__module__.split('.')[-1])
116 |
117 | def plot(self, inputTasks: Dict[str, analysistask.AnalysisTask],
118 | inputMetadata: Dict[str, 'PlotMetadata']) -> None:
119 | """ Generate this plot and save it within the analysis task.
120 |
121 | If the plot is not relevant for the types of analysis tasks passed,
122 | then the function will return without generating any plot.
123 |
124 | Args:
125 | inputTasks: A dictionary of the input tasks to use to generate the
126 | plot. Each analysis task is indexed by a string indicating
127 | the task type as in get_required_tasks.
128 | inputMetadata: A dictionary of the input metadata for generating
129 | this plot. Each metadata object is indexed by the name of the
130 | metadata.
131 | """
132 | if not self.is_relevant(inputTasks):
133 | return
134 | f = self._generate_plot(inputTasks, inputMetadata)
135 | f.tight_layout(pad=1)
136 | self._analysisTask.dataSet.save_figure(
137 | self._analysisTask, f, self.figure_name(),
138 | type(self).__module__.split('.')[-1])
139 | plt.close(f)
140 |
141 |
142 | class PlotMetadata(ABC):
143 |
144 | def __init__(self, analysisTask: analysistask.AnalysisTask,
145 | taskDict: Dict[str, analysistask.AnalysisTask]):
146 | """ Create a new metadata object.
147 |
148 | Args:
149 | analysisTask: the analysisTask where the metadata should be saved.
150 | taskDict: a dictionary containing the analysis tasks to use
151 | to generate the metadata indexed by the type of task as a
152 | string as in get_required_tasks
153 | """
154 | self._analysisTask = analysisTask
155 | self._taskDict = taskDict
156 |
157 | @classmethod
158 | def metadata_name(cls) -> str:
159 | return cls.__module__.split('.')[-1] + '/' + cls.__name__
160 |
161 | def _load_numpy_metadata(self, resultName: str,
162 | defaultValue: np.ndarray = None) -> np.ndarray:
163 | """ Convenience method for reading a result created by this metadata
164 | from the dataset.
165 |
166 | Args:
167 | resultName: the name of the metadata result
168 | defaultValue: the value to return if the metadata is not found
169 | Returns: a numpy array with the result or defaultValue if an IOError is
170 | raised while reading the metadata
171 | """
172 | return self._analysisTask.dataSet\
173 | .load_numpy_analysis_result_if_available(
174 | resultName, self._analysisTask, defaultValue,
175 | subdirectory=self.metadata_name())
176 |
177 | def _save_numpy_metadata(self, result: np.ndarray, resultName: str) -> None:
178 | """ Convenience method for saving a result created by this metadata
179 | from the dataset.
180 |
181 | Args:
182 | result: the numpy array to save
183 | resultName: the name of the metadata result
184 | """
185 | self._analysisTask.dataSet.save_numpy_analysis_result(
186 | result, resultName, self._analysisTask,
187 | subdirectory=self.metadata_name())
188 |
189 | @abstractmethod
190 | def update(self) -> None:
191 | """ Update this metadata with the latest analysis results.
192 |
193 | This method should be implemented in all subclasses and implementations
194 | should not wait for additional data to become available. They should
195 | only update the metadata as much as possible with the data that is ready
196 | when the function is called and should not wait for additional
197 | analysis to complete.
198 | """
199 | pass
200 |
201 | @abstractmethod
202 | def is_complete(self) -> bool:
203 | """ Determine if this metadata is complete.
204 |
205 | Returns: True if the metadata is complete or False if additional
206 | computation is necessary
207 | """
208 | pass
209 |
--------------------------------------------------------------------------------
/merlin/plots/optimizationplots.py:
--------------------------------------------------------------------------------
1 | import seaborn
2 | from matplotlib import pyplot as plt
3 |
4 | from merlin.plots._base import AbstractPlot
5 |
6 |
7 | class OptimizationScaleFactorsPlot(AbstractPlot):
8 |
9 | def __init__(self, analysisTask):
10 | super().__init__(analysisTask)
11 |
12 | def get_required_tasks(self):
13 | return {'optimize_task': 'all'}
14 |
15 | def get_required_metadata(self):
16 | return []
17 |
18 | def _generate_plot(self, inputTasks, inputMetadata):
19 | fig = plt.figure(figsize=(5, 5))
20 | seaborn.heatmap(
21 | inputTasks['optimize_task'].get_scale_factor_history())
22 | plt.xlabel('Bit index')
23 | plt.ylabel('Iteration number')
24 | plt.title('Scale factor optimization history')
25 | return fig
26 |
27 |
28 | class ScaleFactorVsBitNumberPlot(AbstractPlot):
29 |
30 | def __init__(self, analysisTask):
31 | super().__init__(analysisTask)
32 |
33 | def get_required_tasks(self):
34 | return {'optimize_task': 'all'}
35 |
36 | def get_required_metadata(self):
37 | return []
38 |
39 | def _generate_plot(self, inputTasks, inputMetadata):
40 | optimizeTask = inputTasks['optimize_task']
41 | codebook = optimizeTask.get_codebook()
42 | dataOrganization = optimizeTask.dataSet.get_data_organization()
43 | colors = [dataOrganization.get_data_channel_color(
44 | dataOrganization.get_data_channel_for_bit(x))
45 | for x in codebook.get_bit_names()]
46 |
47 | scaleFactors = optimizeTask.get_scale_factors()
48 | scaleFactorsByColor = {c: [] for c in set(colors)}
49 | for i, s in enumerate(scaleFactors):
50 | scaleFactorsByColor[colors[i]].append((i, s))
51 |
52 | fig = plt.figure(figsize=(5, 5))
53 | for c, d in scaleFactorsByColor.items():
54 | plt.plot([x[0] for x in d], [x[1] for x in d], 'o')
55 |
56 | plt.legend(scaleFactorsByColor.keys())
57 | plt.ylim(bottom=0)
58 | plt.xlabel('Bit index')
59 | plt.ylabel('Scale factor magnitude')
60 | plt.title('Scale factor magnitude vs bit index')
61 | return fig
62 |
63 |
64 | class OptimizationBarcodeCountsPlot(AbstractPlot):
65 |
66 | def __init__(self, analysisTask):
67 | super().__init__(analysisTask)
68 |
69 | def get_required_tasks(self):
70 | return {'optimize_task': 'all'}
71 |
72 | def get_required_metadata(self):
73 | return []
74 |
75 | def _generate_plot(self, inputTasks, inputMetadata):
76 | fig = plt.figure(figsize=(5, 5))
77 | seaborn.heatmap(
78 | inputTasks['optimize_task'].get_barcode_count_history())
79 | plt.xlabel('Barcode index')
80 | plt.ylabel('Iteration number')
81 | plt.title('Barcode counts optimization history')
82 | return fig
83 |
--------------------------------------------------------------------------------
/merlin/plots/segmentationplots.py:
--------------------------------------------------------------------------------
1 | from matplotlib import pyplot as plt
2 | import numpy as np
3 |
4 | from merlin.plots._base import AbstractPlot
5 |
6 |
7 | class SegmentationBoundaryPlot(AbstractPlot):
8 |
9 | def __init__(self, analysisTask):
10 | super().__init__(analysisTask)
11 |
12 | def get_required_tasks(self):
13 | return {'segment_task': 'all'}
14 |
15 | def get_required_metadata(self):
16 | return []
17 |
18 | def _generate_plot(self, inputTasks, inputMetadata):
19 | featureDB = inputTasks['segment_task'].get_feature_database()
20 | features = featureDB.read_features()
21 |
22 | fig = plt.figure(figsize=(15, 15))
23 | ax = fig.add_subplot(111)
24 | ax.set_aspect('equal', 'datalim')
25 |
26 | if len(features) == 0:
27 | return fig
28 |
29 | zPosition = 0
30 | if len(features[0].get_boundaries()) > 1:
31 | zPosition = int(len(features[0].get_boundaries())/2)
32 |
33 | featuresSingleZ = [feature.get_boundaries()[int(zPosition)]
34 | for feature in features]
35 | featuresSingleZ = [x for y in featuresSingleZ for x in y]
36 | allCoords = [[feature.exterior.coords.xy[0].tolist(),
37 | feature.exterior.coords.xy[1].tolist()]
38 | for feature in featuresSingleZ]
39 | allCoords = [x for y in allCoords for x in y]
40 | plt.plot(*allCoords)
41 |
42 | plt.xlabel('X position (microns)')
43 | plt.ylabel('Y position (microns)')
44 | plt.title('Segmentation boundaries')
45 | return fig
46 |
--------------------------------------------------------------------------------
/merlin/plots/testplots.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from matplotlib import pyplot as plt
3 |
4 | from merlin.plots._base import AbstractPlot
5 | from merlin.plots._base import PlotMetadata
6 |
7 |
8 | class TestPlot(AbstractPlot):
9 |
10 | def __init__(self, analysisTask):
11 | super().__init__(analysisTask)
12 |
13 | def get_required_tasks(self):
14 | return {'test_task': 'all'}
15 |
16 | def get_required_metadata(self):
17 | return [TestPlotMetadata]
18 |
19 | def _generate_plot(self, inputTasks, inputMetadata):
20 | fig = plt.figure(figsize=(10, 10))
21 | plt.plot(inputMetadata['testplots/TestPlotMetadata'].get_mean_values(),
22 | 'x')
23 | return fig
24 |
25 |
26 | class TestPlotMetadata(PlotMetadata):
27 |
28 | def __init__(self, analysisTask, taskDict):
29 | super().__init__(analysisTask, taskDict)
30 | self.testTask = self._taskDict['test_task']
31 | self.completeFragments = [False]*self.testTask.fragment_count()
32 | self.meanValues = np.zeros(self.testTask.fragment_count())
33 |
34 | def get_mean_values(self) -> np.ndarray:
35 | return self.meanValues
36 |
37 | def update(self) -> None:
38 | testTask = self._taskDict['test_task']
39 |
40 | for i in range(testTask.fragment_count()):
41 | if not self.completeFragments[i] and testTask.is_complete(i):
42 | self.meanValues[i] = np.mean(self.testTask.get_random_result(i))
43 | self.completeFragments[i] = True
44 |
45 | def is_complete(self) -> bool:
46 | return all(self.completeFragments)
47 |
--------------------------------------------------------------------------------
/merlin/util/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/emanuega/MERlin/be3c994ef8fa97fbd3afb85705d8ddbed12118cf/merlin/util/__init__.py
--------------------------------------------------------------------------------
/merlin/util/aberration.py:
--------------------------------------------------------------------------------
1 | from typing import Dict
2 | from skimage import transform
3 | import numpy as np
4 | from abc import ABC
5 | from abc import abstractmethod
6 |
7 | """
8 | This module contains tools for measuring and correcting chromatic aberrations.
9 | """
10 |
11 |
12 | class ChromaticCorrector(ABC):
13 |
14 | """
15 | An abstract class for color-specific image transformation.
16 | """
17 |
18 | @abstractmethod
19 | def transform_image(self, inputImage: np.ndarray, imageColor: str
20 | ) -> np.ndarray:
21 | """Transform inputImage to the reference color.
22 |
23 | Args:
24 | inputImage: The image to transform. If inputImage has two
25 | dimensions, it is transformed as a single image. If inputImage
26 | has three dimensions, each element in the first dimension is
27 | transformed as an image as (z, x, y).
28 | imageColor: The color of the input image as a string. If the color
29 | of the input image is not in the set of transformations for
30 | this corrector, no transformation is applied.
31 | """
32 | pass
33 |
34 |
35 | class IdentityChromaticCorrector(ChromaticCorrector):
36 |
37 | """
38 | A class for correcting chromatic aberration that performs no transformation.
39 | """
40 |
41 | def __init__(self):
42 | pass
43 |
44 | def transform_image(self, inputImage: np.ndarray, imageColor: str
45 | ) -> np.ndarray:
46 | return inputImage
47 |
48 |
49 | class RigidChromaticCorrector(ChromaticCorrector):
50 |
51 | """
52 | A class for correcting chromatic aberration using rigid transformation
53 | matrices.
54 | """
55 |
56 | def __init__(self, transformations: Dict[str, Dict[
57 | str, transform.EuclideanTransform]], referenceColor: str=None):
58 | """Creates a new RigidChromaticCorrector that transforms images
59 | using the specified transformations.
60 |
61 | Args:
62 | transformations: A dictionary of transformations
63 | referenceColor: the name of the color to transform the images to
64 | """
65 |
66 | self.transformations = transformations
67 | if referenceColor is None:
68 | self.referenceColor = min(transformations.keys())
69 | else:
70 | self.referenceColor = referenceColor
71 |
72 | def transform_image(self, inputImage: np.ndarray, imageColor: str
73 | ) -> np.ndarray:
74 | if imageColor not in self.transformations[self.referenceColor]:
75 | return inputImage
76 |
77 | if imageColor == self.referenceColor:
78 | return inputImage
79 |
80 | if len(inputImage.shape) == 3:
81 | return np.array([self.transform_image(x, imageColor)
82 | for x in inputImage])
83 |
84 | return transform.warp(
85 | inputImage,
86 | self.transformations[self.referenceColor][imageColor],
87 | preserve_range=True)
88 |
--------------------------------------------------------------------------------
/merlin/util/barcodefilters.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from scipy.spatial import cKDTree
3 | import networkx as nx
4 | import pandas as pd
5 | from typing import List
6 |
7 |
8 | def remove_zplane_duplicates_all_barcodeids(barcodes: pd.DataFrame,
9 | zPlanes: int,
10 | maxDist: float,
11 | allZPos: List) -> pd.DataFrame:
12 | """ Depending on the separation between z planes, spots from a single
13 | molecule may be observed in more than one z plane. These putative
14 | duplicates are removed based on supplied distance and z plane
15 | constraints. In evaluating this method, when z planes are separated
16 | by 1.5 µm the likelihood of finding a putative duplicate above or below
17 | the selected plane is ~5-10%, whereas the false-positive rate is closer
18 | to 1%, as determined by checking two planes above or below, or comparing
19 | barcodes of different identities but similar abundance between
20 | adjacent z planes.
21 |
22 | Args:
23 | barcodes: a pandas dataframe containing all the entries for a given
24 | barcode identity
25 | zPlanes: number of planes above and below to consider when evaluating
26 | potential duplicates
27 | maxDist: maximum euclidean distance allowed to separate centroids of
28 | putative barcode duplicate, in pixels
29 | Returns:
30 | keptBarcodes: pandas dataframe where barcodes of the same identity that
31 | fall within parameters of z plane duplicates have
32 | been removed.
33 | """
34 | if len(barcodes) == 0:
35 | return barcodes
36 | else:
37 | barcodeGroups = barcodes.groupby('barcode_id')
38 | bcToKeep = []
39 | for bcGroup, bcData in barcodeGroups:
40 | bcToKeep.append(
41 | remove_zplane_duplicates_single_barcodeid(bcData, zPlanes,
42 | maxDist, allZPos))
43 | mergedBC = pd.concat(bcToKeep, 0).reset_index(drop=True)
44 | mergedBC = mergedBC.sort_values(by=['barcode_id', 'z'])
45 | return mergedBC
46 |
47 |
48 | def remove_zplane_duplicates_single_barcodeid(barcodes: pd.DataFrame,
49 | zPlanes: int,
50 | maxDist: float,
51 | allZPos: List) -> pd.DataFrame:
52 | """ Remove barcodes with a given barcode id that are putative z plane
53 | duplicates.
54 |
55 | Args:
56 | barcodes: a pandas dataframe containing all the entries for a given
57 | barcode identity
58 | zPlanes: number of planes above and below to consider when evaluating
59 | potential duplicates
60 | maxDist: maximum euclidean distance allowed to separate centroids of
61 | putative barcode duplicate, in pixels
62 | Returns:
63 | keptBarcodes: pandas dataframe where barcodes of the same identity that
64 | fall within parameters of z plane duplicates have
65 | been removed.
66 | """
67 | barcodes.reset_index(drop=True, inplace=True)
68 | if not len(barcodes['barcode_id'].unique()) == 1:
69 | errorString = 'The method remove_zplane_duplicates_single_barcodeid ' +\
70 | 'should be given a dataframe containing molecules ' +\
71 | 'that all have the same barcode id. Please use ' +\
72 | 'remove_zplane_duplicates_all_barcodeids to handle ' +\
73 | 'dataframes containing multiple barcode ids'
74 | raise ValueError(errorString)
75 | graph = nx.Graph()
76 | zPos = sorted(allZPos)
77 | graph.add_nodes_from(barcodes.index.values.tolist())
78 | for z in range(0, len(zPos)):
79 | zToCompare = [pos for pos, otherZ in enumerate(zPos) if
80 | (pos >= z - zPlanes) & (pos <= z + zPlanes) & ~(pos == z)]
81 | treeBC = barcodes[barcodes['z'] == z]
82 | if len(treeBC) == 0:
83 | pass
84 | else:
85 | tree = cKDTree(treeBC.loc[:, ['x', 'y']].values)
86 | for compZ in zToCompare:
87 | queryBC = barcodes[barcodes['z'] == compZ]
88 | if len(queryBC) == 0:
89 | pass
90 | else:
91 | dist, idx = tree.query(queryBC.loc[:, ['x', 'y']].values,
92 | k=1, distance_upper_bound=maxDist)
93 | currentHits = treeBC.index.values[idx[np.isfinite(dist)]]
94 | comparisonHits = queryBC.index.values[np.isfinite(dist)]
95 | graph.add_edges_from(list(zip(currentHits, comparisonHits)))
96 | connectedComponents = [list(x) for x in
97 | list(nx.connected_components(graph))]
98 |
99 | def choose_brighter_barcode(barcodes, indexes):
100 | sortedBC = barcodes.loc[indexes, :].sort_values(by='mean_intensity',
101 | ascending=False)
102 | return sortedBC.index.values.tolist()[0]
103 |
104 | keptBarcodes = barcodes.loc[sorted([x[0] if len(x) == 1 else
105 | choose_brighter_barcode(barcodes, x)
106 | for x in connectedComponents]), :]
107 | return keptBarcodes
108 |
--------------------------------------------------------------------------------
/merlin/util/binary.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from typing import List
3 |
4 |
5 | def bit_list_to_int(bitList: List[bool]) -> int:
6 | """Converts a binary list to an integer
7 |
8 | Args:
9 | bitList: the binary list to convert
10 | Returns:
11 | The integer corresponding to the input bit list
12 | """
13 | out = 0
14 | for b in reversed(bitList):
15 | out = (out << 1) | b
16 | return out
17 |
18 |
19 | def int_to_bit_list(intIn: int, bitCount: int) -> List[bool]:
20 | """Converts an integer to a binary list with the specified number of bits.
21 |
22 | Args:
23 | intIn: the integer to convert
24 | bitCount: the number of bits to include in the output bit list
25 | Returns:
26 | A list of bit that specifies the input integer. The least significant
27 | bit is first in the list.
28 | """
29 | return [k_bit_set(intIn, k) for k in range(bitCount)]
30 |
31 |
32 | def k_bit_set(n: int, k: int) -> bool:
33 | """Determine if the k'th bit of integer n is set to 1.
34 |
35 | Args:
36 | n: the integer to check
37 | k: the index of the bit to check where 0 corresponds with the least
38 | significant bit
39 | Returns:
40 | true if the k'th bit of the integer n is 1, otherwise false. If
41 | k is None, this function returns None.
42 | """
43 | if k is None:
44 | return None
45 |
46 | if n & (1 << k):
47 | return True
48 | else:
49 | return False
50 |
51 |
52 | def flip_bit(barcode: List[bool], bitIndex: int) -> List[bool]:
53 | """Generates a version of the provided barcode where the bit at the
54 | specified index is inverted.
55 |
56 | The provided barcode is left unchanged. It is copied before flipping the
57 | bit.
58 |
59 | Args:
60 | barcode: A binary array where the i'th entry corresponds with the
61 | value of the i'th bit
62 | bitIndex: The index of the bit to reverse
63 | Returns:
64 | A copy of barcode with bitIndex inverted
65 | """
66 | bcCopy = np.copy(barcode)
67 | bcCopy[bitIndex] = not bcCopy[bitIndex]
68 | return bcCopy
69 |
--------------------------------------------------------------------------------
/merlin/util/deconvolve.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import numpy as np
3 | from scipy import ndimage
4 |
5 | from merlin.util import matlab
6 |
7 | """
8 | This module containts utility functions for performing deconvolution on
9 | images.
10 | """
11 |
12 |
13 | def calculate_projectors(windowSize: int, sigmaG: float) -> list:
14 | """Calculate forward and backward projectors as described in:
15 |
16 | 'Accelerating iterative deconvolution and multiview fusion by orders
17 | of magnitude', Guo et al, bioRxiv 2019.
18 |
19 | Args:
20 | windowSize: the size of the window over which to perform the gaussian.
21 | This must be an odd number.
22 | sigmaG: the standard deviation of the Gaussian point spread function
23 |
24 | Returns:
25 | A list containing the forward and backward projectors to use for
26 | Lucy-Richardson deconvolution.
27 | """
28 | pf = matlab.matlab_gauss2D(shape=(windowSize, windowSize),
29 | sigma=sigmaG)
30 | pfFFT = np.fft.fft2(pf)
31 |
32 | # Wiener-Butterworth back projector.
33 | #
34 | # These values are from Guo et al.
35 | alpha = 0.001
36 | beta = 0.001
37 | n = 8
38 |
39 | # This is the cut-off frequency.
40 | kc = 1.0/(0.5 * 2.355 * sigmaG)
41 |
42 | # FFT frequencies
43 | kv = np.fft.fftfreq(pfFFT.shape[0])
44 |
45 | kx = np.zeros((kv.size, kv.size))
46 | for i in range(kv.size):
47 | kx[i, :] = np.copy(kv)
48 |
49 | ky = np.transpose(kx)
50 | kk = np.sqrt(kx*kx + ky*ky)
51 |
52 | # Wiener filter
53 | bWiener = pfFFT/(np.abs(pfFFT) * np.abs(pfFFT) + alpha)
54 |
55 | # Buttersworth filter
56 | eps = np.sqrt(1.0/(beta*beta) - 1)
57 |
58 | kkSqr = kk*kk/(kc*kc)
59 | bBWorth = 1.0/np.sqrt(1.0 + eps * eps * np.power(kkSqr, n))
60 |
61 | # Weiner-Butterworth back projector
62 | pbFFT = bWiener * bBWorth
63 |
64 | # back projector.
65 | pb = np.real(np.fft.ifft2(pbFFT))
66 |
67 | return [pf, pb]
68 |
69 |
70 | def deconvolve_lucyrichardson(image: np.ndarray,
71 | windowSize: int,
72 | sigmaG: float,
73 | iterationCount: int) -> np.ndarray:
74 | """Performs Lucy-Richardson deconvolution on the provided image using a
75 | Gaussian point spread function.
76 |
77 | Ported from Matlab deconvlucy.
78 |
79 | Args:
80 | image: the input image to be deconvolved
81 | windowSize: the size of the window over which to perform the gaussian.
82 | This must be an odd number.
83 | sigmaG: the standard deviation of the Gaussian point spread function
84 | iterationCount: the number of iterations to perform
85 |
86 | Returns:
87 | the deconvolved image
88 | """
89 | eps = np.finfo(float).eps
90 | Y = np.copy(image)
91 | J1 = np.copy(image)
92 | J2 = np.copy(image)
93 | wI = np.copy(image)
94 | imR = np.copy(image)
95 | reblurred = np.copy(image)
96 | tmpMat1 = np.zeros(image.shape, dtype=float)
97 | tmpMat2 = np.zeros(image.shape, dtype=float)
98 | T1 = np.zeros(image.shape, dtype=float)
99 | T2 = np.zeros(image.shape, dtype=float)
100 | l = 0
101 |
102 | if windowSize % 2 != 1:
103 | gaussianFilter = matlab.matlab_gauss2D(shape=(windowSize, windowSize),
104 | sigma=sigmaG)
105 |
106 | for i in range(iterationCount):
107 | if i > 1:
108 | cv2.multiply(T1, T2, tmpMat1)
109 | cv2.multiply(T2, T2, tmpMat2)
110 | l = np.sum(tmpMat1) / (np.sum(tmpMat2) + eps)
111 | l = max(min(l, 1), 0)
112 | cv2.subtract(J1, J2, Y)
113 | cv2.addWeighted(J1, 1, Y, l, 0, Y)
114 | np.clip(Y, 0, None, Y)
115 | if windowSize % 2 == 1:
116 | cv2.GaussianBlur(Y, (windowSize, windowSize), sigmaG, reblurred,
117 | borderType=cv2.BORDER_REPLICATE)
118 | else:
119 | reblurred = ndimage.convolve(Y, gaussianFilter, mode='constant')
120 | np.clip(reblurred, eps, None, reblurred)
121 | cv2.divide(wI, reblurred, imR)
122 | imR += eps
123 | if windowSize % 2 == 1:
124 | cv2.GaussianBlur(imR, (windowSize, windowSize), sigmaG, imR,
125 | borderType=cv2.BORDER_REPLICATE)
126 | else:
127 | imR = ndimage.convolve(imR, gaussianFilter, mode='constant')
128 | imR[imR > 2 ** 16] = 0
129 | np.copyto(J2, J1)
130 | np.multiply(Y, imR, out=J1)
131 | np.copyto(T2, T1)
132 | np.subtract(J1, Y, out=T1)
133 | return J1
134 |
135 |
136 | def deconvolve_lucyrichardson_guo(image: np.ndarray,
137 | windowSize: int,
138 | sigmaG: float,
139 | iterationCount: int) -> np.ndarray:
140 | """Performs Lucy-Richardson deconvolution on the provided image using a
141 | Gaussian point spread function. This version used the optimized
142 | deconvolution approach described in:
143 |
144 | 'Accelerating iterative deconvolution and multiview fusion by orders
145 | of magnitude', Guo et al, bioRxiv 2019.
146 |
147 | Args:
148 | image: the input image to be deconvolved
149 | windowSize: the size of the window over which to perform the gaussian.
150 | This must be an odd number.
151 | sigmaG: the standard deviation of the Gaussian point spread function
152 | iterationCount: the number of iterations to perform
153 |
154 | Returns:
155 | the deconvolved image
156 | """
157 | [pf, pb] = calculate_projectors(windowSize, sigmaG)
158 |
159 | eps = 1.0e-6
160 | i_max = 2**16-1
161 |
162 | ek = np.copy(image)
163 | np.clip(ek, eps, None, ek)
164 |
165 | for i in range(iterationCount):
166 | ekf = cv2.filter2D(ek, -1, pf,
167 | borderType=cv2.BORDER_REPLICATE)
168 | np.clip(ekf, eps, i_max, ekf)
169 |
170 | ek = ek*cv2.filter2D(image/ekf, -1, pb,
171 | borderType=cv2.BORDER_REPLICATE)
172 | np.clip(ek, eps, i_max, ek)
173 |
174 | return ek
175 |
--------------------------------------------------------------------------------
/merlin/util/imagefilters.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import numpy as np
3 |
4 | """
5 | This module contains code for performing filtering operations on images
6 | """
7 |
8 |
9 | def high_pass_filter(image: np.ndarray,
10 | windowSize: int,
11 | sigma: float) -> np.ndarray:
12 | """
13 | Args:
14 | image: the input image to be filtered
15 | windowSize: the size of the Gaussian kernel to use.
16 | sigma: the sigma of the Gaussian.
17 |
18 | Returns:
19 | the high pass filtered image. The returned image is the same type
20 | as the input image.
21 | """
22 | lowpass = cv2.GaussianBlur(image,
23 | (windowSize, windowSize),
24 | sigma,
25 | borderType=cv2.BORDER_REPLICATE)
26 | gauss_highpass = image - lowpass
27 | gauss_highpass[lowpass > image] = 0
28 | return gauss_highpass
29 |
--------------------------------------------------------------------------------
/merlin/util/legacy.py:
--------------------------------------------------------------------------------
1 | import struct
2 | import pandas
3 | import numpy as np
4 | from typing import BinaryIO
5 | from typing import Tuple
6 | from typing import List
7 | from typing import Dict
8 | from typing import Iterator
9 |
10 |
11 | """
12 | This module contains convenience functions for reading and writing MERFISH
13 | analysis results created from the deprecated Matlab pipeline.
14 | """
15 |
16 |
17 | def read_blist(bFile: BinaryIO) -> pandas.DataFrame:
18 | entryCount, _, entryFormat = _read_binary_header(bFile)
19 | bytesPerEntry = int(np.sum(
20 | [struct.calcsize(typeNames[x['type']]) * np.prod(x['size']) for x in
21 | entryFormat]))
22 | return pandas.DataFrame(
23 | [_parse_entry_bytes(bFile.read(bytesPerEntry), entryFormat) for i in
24 | range(entryCount)])
25 |
26 |
27 | typeNames = {'int8': 'b',
28 | 'uint8': 'B',
29 | 'int16': 'h',
30 | 'uint16': 'H',
31 | 'int32': 'i',
32 | 'uint32': 'I',
33 | 'int64': 'q',
34 | 'uint64': 'Q',
35 | 'float': 'f',
36 | 'single': 'f',
37 | 'double': 'd',
38 | 'char': 's'}
39 |
40 |
41 | def _chunker(seq, size: int) -> Iterator:
42 | return (seq[pos:pos + size] for pos in range(0, len(seq), size))
43 |
44 |
45 | def _read_binary_header(bFile: BinaryIO) -> Tuple[int, int, List[Dict]]:
46 | version = struct.unpack(typeNames['uint8'], bFile.read(1))[0]
47 | bFile.read(1)
48 | entryCount = struct.unpack(typeNames['uint32'], bFile.read(4))[0]
49 | headerLength = struct.unpack(typeNames['uint32'], bFile.read(4))[0]
50 | layout = bFile.read(headerLength).decode('utf-8').split(',')
51 | entryList = [
52 | {'name': x, 'size': np.array(y.split(' ')).astype(int), 'type': z}
53 | for x, y, z in _chunker(layout, 3)]
54 | return entryCount, headerLength, entryList
55 |
56 |
57 | def _parse_entry_bytes(byteList, entryFormat: List[Dict]):
58 | entryData = {}
59 | byteIndex = 0
60 | for currentEntry in entryFormat:
61 | itemCount = int(np.prod(currentEntry['size']))
62 | itemType = typeNames[currentEntry['type']]
63 | itemSize = struct.calcsize(itemType)
64 | items = np.array([struct.unpack(
65 | itemType, byteList[byteIndex
66 | + i * itemSize:byteIndex
67 | + (i + 1) * itemSize])[0]
68 | for i in range(itemCount)])
69 | byteIndex += itemSize * itemCount
70 |
71 | if currentEntry['size'][0] == 1 and currentEntry['size'][1] == 1:
72 | items = items[0]
73 | if currentEntry['size'][0] != 1 and currentEntry['size'][1] != 1:
74 | items = items.reshape(currentEntry['size'])
75 |
76 | entryData[currentEntry['name']] = items
77 |
78 | return entryData
79 |
--------------------------------------------------------------------------------
/merlin/util/matlab.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from typing import Tuple
3 |
4 |
5 | """
6 | This module contains Matlab functions that do not have equivalents in
7 | python libraries.
8 | """
9 |
10 |
11 | def matlab_gauss2D(shape: Tuple[int, int]=(3, 3), sigma: float=0.5
12 | ) -> np.array:
13 | """
14 | 2D gaussian mask - should give the same result as MATLAB's
15 | fspecial('gaussian',[shape],[sigma])
16 | """
17 | m, n = [(ss-1.)/2. for ss in shape]
18 | y, x = np.ogrid[-m:m+1, -n:n+1]
19 | h = np.exp(-(x*x + y*y) / (2.*sigma*sigma))
20 | h[h < np.finfo(h.dtype).eps*h.max()] = 0
21 | sumh = h.sum()
22 | if sumh != 0:
23 | h /= sumh
24 | return h
25 |
--------------------------------------------------------------------------------
/merlin/util/registration.py:
--------------------------------------------------------------------------------
1 | from typing import Tuple
2 | from sklearn.neighbors import NearestNeighbors
3 | from skimage import transform
4 | import numpy as np
5 | from scipy import signal
6 |
7 |
8 | def extract_control_points(
9 | referencePoints: np.ndarray, movingPoints: np.ndarray,
10 | gridSpacing: float=0.5) -> Tuple[np.ndarray, np.ndarray]:
11 | """
12 | If fewer than 10 points are provided for either the reference or the moving
13 | list, this returns no points.
14 |
15 | Args:
16 | referencePoints: a n x 2 numpy array containing the reference points.
17 | movingPoints: a m x 2 numpy array containing the moving points.
18 | gridSpacing: the spacing of the grid for the 2d histogram for
19 | estimating the course transformation
20 | Returns: two np arrays (select reference points, select moving points)
21 | both of which are p x 2. The i'th point in the reference list
22 | has been matched to the i'th point in the moving list.
23 | """
24 | if len(referencePoints) < 10 or len(movingPoints) < 10:
25 | return np.zeros((0, 2)), np.zeros((0, 2))
26 |
27 | edges = np.arange(-200, 200, gridSpacing)
28 |
29 | neighbors = NearestNeighbors(n_neighbors=10)
30 | neighbors.fit(referencePoints)
31 | distances, indexes = neighbors.kneighbors(
32 | movingPoints, return_distance=True)
33 | differences = [[movingPoints[i] - referencePoints[x]
34 | for x in indexes[i]]
35 | for i in range(len(movingPoints))]
36 | counts, xedges, yedges = np.histogram2d(
37 | [x[0] for y in differences for x in y],
38 | [x[1] for y in differences for x in y],
39 | bins=edges)
40 | maxIndex = np.unravel_index(counts.argmax(), counts.shape)
41 | offset = (xedges[maxIndex[0]], yedges[maxIndex[1]])
42 |
43 | distancesShifted, indexesShifted = neighbors.kneighbors(
44 | movingPoints - np.tile(offset, (movingPoints.shape[0], 1)),
45 | return_distance=True)
46 |
47 | controlIndexes = [x[0] < gridSpacing for x in distancesShifted]
48 | referenceControls = np.array([referencePoints[x[0]]
49 | for x in indexesShifted[controlIndexes]])
50 | movingControls = movingPoints[controlIndexes, :]
51 |
52 | return referenceControls, movingControls
53 |
54 |
55 | def estimate_transform_from_points(
56 | referencePoints: np.ndarray, movingPoints: np.ndarray) \
57 | -> transform.EuclideanTransform:
58 | """
59 |
60 | If fewer than two points are provided, this will return the identity
61 | transform.
62 |
63 | Args:
64 | referencePoints: a n x 2 numpy array containing the reference points
65 | movingPoints: a n x 2 numpy array containing the moving points, where
66 | the i'th point of moving points corresponds with the i'th point
67 | of reference points.
68 | Returns: a similarity transform estimated from the paired points.
69 |
70 | """
71 | tform = transform.SimilarityTransform()
72 | if len(referencePoints) < 2 or len(movingPoints) < 2:
73 | return tform
74 | tform.estimate(referencePoints, movingPoints)
75 | return tform
76 |
77 |
78 | def lsradialcenterfit(m, b, w):
79 | wm2p1 = w / (m * m + 1)
80 | sw = np.sum(wm2p1)
81 | smmw = np.sum(m * m * wm2p1)
82 | smw = np.sum(m * wm2p1)
83 | smbw = np.sum(m * b * wm2p1)
84 | sbw = np.sum(b * wm2p1)
85 | det = smw * smw - smmw * sw
86 | xc = (smbw * sw - smw * sbw) / det
87 | yc = (smbw * smw - smmw * sbw) / det
88 |
89 | return xc, yc
90 |
91 |
92 | def radial_center(imageIn) -> Tuple[float, float]:
93 | """Determine the center of the object in imageIn using radial-symmetry-based
94 | particle localization.
95 |
96 | Adapted from Raghuveer, Nature Methods, 2012
97 | """
98 | Ny, Nx = imageIn.shape
99 | xm_onerow = np.arange(-(Nx - 1) / 2.0 + 0.5, (Nx) / 2.0 - 0.5)
100 | xm = np.tile(xm_onerow, (Ny - 1, 1))
101 | ym_onecol = [np.arange(-(Nx - 1) / 2.0 + 0.5, (Nx) / 2.0 - 0.5)]
102 | ym = np.tile(ym_onecol, (Nx - 1, 1)).transpose()
103 |
104 | imageIn = imageIn.astype(float)
105 |
106 | dIdu = imageIn[0:Ny - 1, 1:Nx] - imageIn[1:Ny, 0:Nx - 1];
107 | dIdv = imageIn[0:Ny - 1, 0:Nx - 1] - imageIn[1:Ny, 1:Nx];
108 |
109 | h = np.ones((3, 3)) / 9
110 | fdu = signal.convolve2d(dIdu, h, 'same')
111 | fdv = signal.convolve2d(dIdv, h, 'same')
112 | dImag2 = np.multiply(fdu, fdu) + np.multiply(fdv, fdv)
113 |
114 | m = np.divide(-(fdv + fdu), (fdu - fdv))
115 |
116 | if np.any(np.isnan(m)):
117 | unsmoothm = np.divide(dIdv + dIdu, dIdu - dIdv)
118 | m[np.isnan(m)] = unsmoothm[np.isnan(m)]
119 |
120 | if np.any(np.isnan(m)):
121 | m[np.isnan(m)] = 0
122 |
123 | if np.any(np.isinf(m)):
124 | if ~np.all(np.isinf(m)):
125 | m[np.isinf(m)] = 10 * np.max(m[~np.isinf(m)])
126 | else:
127 | m = np.divide((dIdv + dIdu), (dIdu - dIdv))
128 |
129 | b = ym - np.multiply(m, xm)
130 |
131 | sdI2 = np.sum(dImag2)
132 | xcentroid = np.sum(np.sum(np.multiply(dImag2, xm))) / sdI2
133 | ycentroid = np.sum(np.multiply(dImag2, ym)) / sdI2
134 | w = np.divide(dImag2, np.sqrt(
135 | (xm - xcentroid) * (xm - xcentroid) + (ym - ycentroid) * (
136 | ym - ycentroid)))
137 |
138 | xc, yc = lsradialcenterfit(m, b, w)
139 |
140 | xc = xc + (Nx + 1) / 2.0
141 | yc = yc + (Ny + 1) / 2.0
142 |
143 | return xc, yc
144 |
145 |
146 | def refine_position(image, x, y, cropSize=4) -> Tuple[float, float]:
147 | # TODO this would be more intuitive it it retransformed the output
148 | # coordinates to the original image coordinates
149 | subImage = image[int(y + 2 - cropSize):int(y + cropSize),
150 | int(x - cropSize + 2):int(x + cropSize)]
151 | return radial_center(subImage)
152 |
--------------------------------------------------------------------------------
/merlin/util/simulator.py:
--------------------------------------------------------------------------------
1 | import os
2 | import numpy as np
3 | import PIL
4 | import cv2
5 | import tifffile
6 | from scipy.signal import convolve2d
7 |
8 | import merlin
9 | from merlin.core import dataset
10 | from merlin.data import codebook as cb
11 |
12 | class MERFISHDataFactory(object):
13 |
14 | """
15 | A class for simulating MERFISH data sets.
16 | """
17 |
18 | def __init__(self):
19 | self.codebookPath = 'L26E1.csv'
20 | self.psfSigma = 1.2
21 | self.imageSize = np.array([1024, 1024])
22 | self.upsampleFactor = 10
23 | self.fluorophoreBrightness = 1000
24 | self.fiducialBrightness = 10000
25 | self.background = 100
26 | self.bitOrganization = [[0, 1], [0, 0], [1, 0], [1, 1],
27 | [2, 1], [2, 0], [3, 1], [3, 0], [4, 0], [4, 1],
28 | [5, 1], [5, 0], [6, 1], [6, 0], [7, 0], [7, 1]]
29 |
30 | def simulate_image(self, spotPositions: np.ndarray=None,
31 | addNoise: bool=False) -> np.ndarray:
32 | """Simulate a single image consisting of point sources with a Gaussian
33 | point spread function
34 |
35 | Args:
36 | spotPositions: a n x 2 numpy array containing the positions to
37 | simulate the point sources. If not specified, 1000 random
38 | positions are selected.
39 | addNoise: flag indicating whether poisson noise should be added
40 | to the simulated image.
41 | Returns:
42 | the simulated image
43 | """
44 | if spotPositions is None:
45 | spotPositions = np.random.uniform(size=(1000, 2))
46 | spotPositions[:, 0] *= self.imageSize[0]
47 | spotPositions[:, 1] *= self.imageSize[1]
48 |
49 | upsampledImage = np.zeros(self.upsampleFactor*self.imageSize)
50 | for p in spotPositions:
51 | upsampledImage[int(np.floor(p[0]*self.upsampleFactor)),
52 | int(np.floor(p[1]*self.upsampleFactor))] += 1000
53 |
54 | return self._downsample_image_stack([upsampledImage],
55 | addNoise=addNoise)[0]
56 |
57 | def simulate_dataset(self, datasetName, abundanceScale=1,
58 | fluorophoreCount=5, fovCount=10):
59 | """Simulate a full MERFISH dataset"""
60 | dataDir = os.sep.join([merlin.DATA_HOME, datasetName])
61 | if not os.path.exists(dataDir):
62 | os.mkdir(dataDir)
63 |
64 | simDataset = dataset.DataSet(datasetName)
65 | codebook = cb.Codebook(simDataset, self.codebookPath)
66 |
67 | barcodeNumber = codebook.get_barcode_count()
68 | barcodeAbundances = abundanceScale*np.array(
69 | [10**np.random.uniform(3) for i in range(barcodeNumber)])
70 | barcodeAbundances[:10] = 0
71 |
72 | for i in range(fovCount):
73 | merfishImages, rnaPositions = self._simulate_single_fov(
74 | codebook, barcodeAbundances, fluorophoreCount)
75 | fiducialImage = self._simulate_fiducial_image()
76 | tifffile.imsave(
77 | os.sep.join([dataDir, 'full_stack_' + str(i) + '.tiff']),
78 | merfishImages.astype(np.uint16))
79 |
80 | imageCount = np.max([x[0] for x in self.bitOrganization]) + 1
81 | for j in range(imageCount):
82 | fileName = 'Conventional_750_650_561_488_405_' + str(i) + \
83 | '_' + str(j) + '.tiff'
84 | filePath = os.sep.join([dataDir, fileName])
85 |
86 | imageData = np.zeros(
87 | shape=(5, *self.imageSize), dtype=np.uint16)
88 | firstBitIndex = [i for i,x in enumerate(self.bitOrganization) \
89 | if x[0] == j and x[1] == 0][0]
90 | secondBitIndex = [i for i,x in enumerate(self.bitOrganization) \
91 | if x[0] == j and x[1] == 1][0]
92 |
93 | imageData[0,:,:] = merfishImages[firstBitIndex]
94 | imageData[1,:,:] = merfishImages[secondBitIndex]
95 | imageData[2,:,:] = fiducialImage
96 |
97 | tifffile.imsave(filePath, imageData)
98 |
99 | np.save(os.sep.join(
100 | [dataDir, 'true_positions_' + str(i) + '.npy']), rnaPositions)
101 |
102 | def _simulate_fiducial_image(self):
103 | fiducialPositions = np.random.uniform(size=(1000,2))
104 | upsampledFiducials = self.fiducialBrightness*np.histogram2d(
105 | fiducialPositions[:,0]*self.imageSize[0],
106 | fiducialPositions[:,1]*self.imageSize[1],
107 | bins=self.upsampleFactor*self.imageSize)[0]
108 |
109 | return self._downsample_image_stack([upsampledFiducials])[0]
110 |
111 | def _simulate_single_fov(self, codebook, barcodeAbundances,
112 | fluorophoreCount):
113 | barcodeCount = len(barcodeAbundances)
114 | bitNumber = codebook.get_bit_count()
115 | imageSize = self.imageSize
116 |
117 | rnaCounts = np.random.poisson(barcodeAbundances)
118 | rnaPositions = [np.random.uniform(size=(c, 2)) for c in rnaCounts]
119 | for b in range(barcodeCount):
120 | rnaPositions[b][:, 0] *= imageSize[0]
121 | rnaPositions[b][:, 1] *= imageSize[1]
122 |
123 | upsampledStack = np.zeros((bitNumber, *self.upsampleFactor*imageSize))
124 |
125 | for b in range(barcodeCount):
126 | self._add_spots_for_barcode(
127 | codebook.get_barcode(b), rnaPositions[b], fluorophoreCount,
128 | upsampledStack)
129 |
130 | imageStack = self._downsample_image_stack(upsampledStack)
131 |
132 | return imageStack, rnaPositions
133 |
134 | def _add_spots_for_barcode(self, barcode, positions, fluorophoreCount,
135 | upsampledStack):
136 | upsampledImage = np.zeros(self.upsampleFactor*self.imageSize)
137 | for p in positions:
138 | upsampledImage[int(np.floor(p[0]*self.upsampleFactor)), \
139 | int(np.floor(p[1]*self.upsampleFactor))] += 1
140 | upsampledImage = self.fluorophoreBrightness*np.random.poisson(
141 | upsampledImage*fluorophoreCount)
142 |
143 | for i in np.where(barcode)[0]:
144 | np.add(upsampledStack[i], upsampledImage, out=upsampledStack[i])
145 |
146 | def _downsample_image_stack(self, upsampledStack, addNoise=True):
147 | imageStack = np.zeros((len(upsampledStack), *self.imageSize))
148 |
149 | for i in range(len(imageStack)):
150 | blurredImage = cv2.GaussianBlur(upsampledStack[i].astype(float),
151 | ksize=(51, 51), sigmaX=self.upsampleFactor*self.psfSigma)
152 | downsampledImage = np.array(PIL.Image.fromarray(
153 | convolve2d(blurredImage,
154 | np.ones((self.upsampleFactor, self.upsampleFactor))))\
155 | .resize(self.imageSize, PIL.Image.BILINEAR))
156 | if addNoise:
157 | imageStack[i] = np.random.poisson(
158 | downsampledImage + self.background)
159 | else:
160 | imageStack[i] = downsampledImage + self.background
161 |
162 | return imageStack
163 |
164 |
--------------------------------------------------------------------------------
/merlin/util/snakewriter.py:
--------------------------------------------------------------------------------
1 | import importlib
2 | import networkx
3 | from merlin.core import analysistask
4 | from merlin.core import dataset
5 |
6 |
7 | class SnakemakeRule(object):
8 |
9 | def __init__(self, analysisTask: analysistask.AnalysisTask,
10 | pythonPath=None):
11 | self._analysisTask = analysisTask
12 | self._pythonPath = pythonPath
13 |
14 | @staticmethod
15 | def _add_quotes(stringIn):
16 | return '\'%s\'' % stringIn
17 |
18 | @staticmethod
19 | def _clean_string(stringIn):
20 | return stringIn.replace('\\', '/')
21 |
22 | def _expand_as_string(self, taskName, indexCount) -> str:
23 | return 'expand(%s, g=list(range(%i)))' % (self._add_quotes(
24 | self._analysisTask.dataSet.analysis_done_filename(taskName, '{g}')),
25 | indexCount)
26 |
27 | def _generate_output(self) -> str:
28 | if isinstance(self._analysisTask, analysistask.ParallelAnalysisTask):
29 | return self._clean_string(
30 | self._add_quotes(
31 | self._analysisTask.dataSet.analysis_done_filename(
32 | self._analysisTask, '{i}')))
33 | else:
34 | return self._clean_string(
35 | self._add_quotes(
36 | self._analysisTask.dataSet.analysis_done_filename(
37 | self._analysisTask)))
38 |
39 | def _generate_current_task_inputs(self):
40 | inputTasks = [self._analysisTask.dataSet.load_analysis_task(x)
41 | for x in self._analysisTask.get_dependencies()]
42 | if len(inputTasks) > 0:
43 | inputString = ','.join(['ancient(' + self._add_quotes(
44 | x.dataSet.analysis_done_filename(x)) + ')'
45 | for x in inputTasks])
46 | else:
47 | inputString = ''
48 |
49 | return self._clean_string(inputString)
50 |
51 | def _generate_message(self) -> str:
52 | messageString = \
53 | ''.join(['Running ', self._analysisTask.get_analysis_name()])
54 | if isinstance(self._analysisTask, analysistask.ParallelAnalysisTask):
55 | messageString += ' {wildcards.i}'
56 | return self._add_quotes(messageString)
57 |
58 | def _base_shell_command(self) -> str:
59 | if self._pythonPath is None:
60 | shellString = 'python '
61 | else:
62 | shellString = self._clean_string(self._pythonPath) + ' '
63 | shellString += ''.join(
64 | ['-m merlin -t ',
65 | self._clean_string(self._analysisTask.analysisName),
66 | ' -e \"',
67 | self._clean_string(self._analysisTask.dataSet.dataHome), '\"',
68 | ' -s \"',
69 | self._clean_string(self._analysisTask.dataSet.analysisHome),
70 | '\"'])
71 | return shellString
72 |
73 | def _generate_shell(self) -> str:
74 | shellString = self._base_shell_command()
75 | if isinstance(self._analysisTask, analysistask.ParallelAnalysisTask):
76 | shellString += ' -i {wildcards.i}'
77 | shellString += ' ' + self._clean_string(
78 | self._analysisTask.dataSet.dataSetName)
79 | return self._add_quotes(shellString)
80 |
81 | def _generate_done_shell(self) -> str:
82 | """ Check done shell command for parallel analysis tasks
83 | """
84 | shellString = self._base_shell_command()
85 | shellString += ' --check-done'
86 | shellString += ' ' + self._clean_string(
87 | self._analysisTask.dataSet.dataSetName)
88 | return self._add_quotes(shellString)
89 |
90 | def as_string(self) -> str:
91 | fullString = ('rule %s:\n\tinput: %s\n\toutput: %s\n\tmessage: %s\n\t'
92 | + 'shell: %s\n\n') \
93 | % (self._analysisTask.get_analysis_name(),
94 | self._generate_current_task_inputs(),
95 | self._generate_output(),
96 | self._generate_message(), self._generate_shell())
97 | # for parallel tasks, add a second snakemake task to reduce the time
98 | # it takes to generate DAGs
99 | if isinstance(self._analysisTask, analysistask.ParallelAnalysisTask):
100 | fullString += \
101 | ('rule %s:\n\tinput: %s\n\toutput: %s\n\tmessage: %s\n\t'
102 | + 'shell: %s\n\n')\
103 | % (self._analysisTask.get_analysis_name() + 'Done',
104 | self._clean_string(self._expand_as_string(
105 | self._analysisTask,
106 | self._analysisTask.fragment_count())),
107 | self._add_quotes(self._clean_string(
108 | self._analysisTask.dataSet.analysis_done_filename(
109 | self._analysisTask))),
110 | self._add_quotes(
111 | 'Checking %s done' % self._analysisTask.analysisName),
112 | self._generate_done_shell())
113 | return fullString
114 |
115 | def full_output(self) -> str:
116 | if isinstance(self._analysisTask, analysistask.ParallelAnalysisTask):
117 | return self._clean_string(self._expand_as_string(
118 | self._analysisTask.get_analysis_name(),
119 | self._analysisTask.fragment_count()))
120 | else:
121 | return self._clean_string(
122 | self._add_quotes(
123 | self._analysisTask.dataSet.analysis_done_filename(
124 | self._analysisTask)))
125 |
126 |
127 | class SnakefileGenerator(object):
128 |
129 | def __init__(self, analysisParameters, dataSet: dataset.DataSet,
130 | pythonPath: str = None):
131 | self._analysisParameters = analysisParameters
132 | self._dataSet = dataSet
133 | self._pythonPath = pythonPath
134 |
135 | def _parse_parameters(self):
136 | analysisTasks = {}
137 | for tDict in self._analysisParameters['analysis_tasks']:
138 | analysisModule = importlib.import_module(tDict['module'])
139 | analysisClass = getattr(analysisModule, tDict['task'])
140 | analysisParameters = tDict.get('parameters')
141 | analysisName = tDict.get('analysis_name')
142 | newTask = analysisClass(
143 | self._dataSet, analysisParameters, analysisName)
144 | if newTask.get_analysis_name() in analysisTasks:
145 | raise Exception('Analysis tasks must have unique names. ' +
146 | newTask.get_analysis_name() + ' is redundant.')
147 | # TODO This should be more careful to not overwrite an existing
148 | # analysis task that has already been run.
149 | newTask.save()
150 | analysisTasks[newTask.get_analysis_name()] = newTask
151 | return analysisTasks
152 |
153 | def _identify_terminal_tasks(self, analysisTasks):
154 | taskGraph = networkx.DiGraph()
155 | for x in analysisTasks.keys():
156 | taskGraph.add_node(x)
157 |
158 | for x, a in analysisTasks.items():
159 | for d in a.get_dependencies():
160 | taskGraph.add_edge(d, x)
161 |
162 | return [k for k, v in taskGraph.out_degree if v == 0]
163 |
164 | def generate_workflow(self) -> str:
165 | """Generate a snakemake workflow for the analysis parameters
166 | of this SnakemakeGenerator and save the workflow into the dataset.
167 |
168 | Returns:
169 | the path to the generated snakemake workflow
170 | """
171 | analysisTasks = self._parse_parameters()
172 | terminalTasks = self._identify_terminal_tasks(analysisTasks)
173 |
174 | ruleList = {k: SnakemakeRule(v, self._pythonPath)
175 | for k, v in analysisTasks.items()}
176 |
177 | workflowString = 'rule all: \n\tinput: ' + \
178 | ','.join([ruleList[x].full_output()
179 | for x in terminalTasks]) + '\n\n'
180 | workflowString += '\n'.join([x.as_string() for x in ruleList.values()])
181 |
182 | return self._dataSet.save_workflow(workflowString)
183 |
--------------------------------------------------------------------------------
/merlin/util/watershed.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import cv2
3 | from scipy import ndimage
4 | from skimage import morphology
5 | from skimage import filters
6 | from skimage import measure
7 | from pyclustering.cluster import kmedoids
8 | from typing import Tuple
9 |
10 | from merlin.util import matlab
11 |
12 | """
13 | This module contains utility functions for preparing imagmes for
14 | watershed segmentation.
15 | """
16 |
17 | # To match Matlab's strel('disk', 20)
18 | diskStruct = morphology.diamond(28)[9:48, 9:48]
19 |
20 |
21 | def extract_seeds(seedImageStackIn: np.ndarray) -> np.ndarray:
22 | """Determine seed positions from the input images.
23 |
24 | The initial seeds are determined by finding the regional intensity maximums
25 | after erosion and filtering with an adaptive threshold. These initial
26 | seeds are then expanded by dilation.
27 |
28 | Args:
29 | seedImageStackIn: a 3 dimensional numpy array arranged as (z,x,y)
30 | Returns: a boolean numpy array with the same dimensions as seedImageStackIn
31 | where a given (z,x,y) coordinate is True if it corresponds to a seed
32 | position and false otherwise.
33 | """
34 | seedImages = seedImageStackIn.copy()
35 |
36 | seedImages = ndimage.grey_erosion(
37 | seedImages,
38 | footprint=ndimage.morphology.generate_binary_structure(3, 1))
39 | seedImages = np.array([cv2.erode(x, diskStruct,
40 | borderType=cv2.BORDER_REFLECT)
41 | for x in seedImages])
42 |
43 | thresholdFilterSize = int(2 * np.floor(seedImages.shape[1] / 16) + 1)
44 | seedMask = np.array([x < 1.1 * filters.threshold_local(
45 | x, thresholdFilterSize, method='mean', mode='nearest')
46 | for x in seedImages])
47 |
48 | seedImages[seedMask] = 0
49 |
50 | seeds = morphology.local_maxima(seedImages, allow_borders=True)
51 |
52 | seeds = ndimage.morphology.binary_dilation(
53 | seeds, structure=ndimage.morphology.generate_binary_structure(3, 1))
54 | seeds = np.array([ndimage.morphology.binary_dilation(
55 | x, structure=morphology.diamond(28)[9:48, 9:48]) for x in seeds])
56 |
57 | return seeds
58 |
59 |
60 | def separate_merged_seeds(seedsIn: np.ndarray) -> np.ndarray:
61 | """Separate seeds that are merged in 3 dimensions but are separated
62 | in some 2 dimensional slices.
63 |
64 | Args:
65 | seedsIn: a 3 dimensional binary numpy array arranged as (z,x,y) where
66 | True indicates the pixel corresponds with a seed.
67 | Returns: a 3 dimensional binary numpy array of the same size as seedsIn
68 | indicating the positions of seeds after processing.
69 | """
70 |
71 | def create_region_image(shape, c):
72 | region = np.zeros(shape)
73 | for x in c.coords:
74 | region[x[0], x[1], x[2]] = 1
75 | return region
76 |
77 | components = measure.regionprops(measure.label(seedsIn))
78 | seeds = np.zeros(seedsIn.shape)
79 | for c in components:
80 | seedImage = create_region_image(seeds.shape, c)
81 | localProps = [measure.regionprops(measure.label(x)) for x in seedImage]
82 | seedCounts = [len(x) for x in localProps]
83 |
84 | if all([x < 2 for x in seedCounts]):
85 | goodFrames = [i for i, x in enumerate(seedCounts) if x == 1]
86 | goodProperties = [y for x in goodFrames for y in localProps[x]]
87 | seedPositions = np.round([np.median(
88 | [x.centroid for x in goodProperties], axis=0)]).astype(int)
89 | else:
90 | goodFrames = [i for i, x in enumerate(seedCounts) if x > 1]
91 | goodProperties = [y for x in goodFrames for y in localProps[x]]
92 | goodCentroids = [x.centroid for x in goodProperties]
93 | km = kmedoids.kmedoids(
94 | goodCentroids,
95 | np.random.choice(np.arange(len(goodCentroids)),
96 | size=np.max(seedCounts)))
97 | km.process()
98 | seedPositions = np.round(
99 | [goodCentroids[x] for x in km.get_medoids()]).astype(int)
100 |
101 | for s in seedPositions:
102 | for f in goodFrames:
103 | seeds[f, s[0], s[1]] = 1
104 |
105 | seeds = ndimage.morphology.binary_dilation(
106 | seeds, structure=ndimage.morphology.generate_binary_structure(3, 1))
107 | seeds = np.array([ndimage.morphology.binary_dilation(
108 | x, structure=diskStruct) for x in seeds])
109 |
110 | return seeds
111 |
112 |
113 | def prepare_watershed_images(watershedImageStack: np.ndarray
114 | ) -> Tuple[np.ndarray, np.ndarray]:
115 | """Prepare the given images as the input image for watershedding.
116 |
117 | A watershed mask is determined using an adaptive threshold and the watershed
118 | images are inverted so the largest values in the watershed images become
119 | minima and then the image stack is normalized to have values between 0
120 | and 1.
121 |
122 | Args:
123 | watershedImageStack: a 3 dimensional numpy array containing the images
124 | arranged as (z, x, y).
125 | Returns: a tuple containing the normalized watershed images and the
126 | calculated watershed mask
127 | """
128 | filterSize = int(2 * np.floor(watershedImageStack.shape[1] / 16) + 1)
129 |
130 | watershedMask = np.array([ndimage.morphology.binary_fill_holes(
131 | x > 1.1 * filters.threshold_local(x, filterSize, method='mean',
132 | mode='nearest'))
133 | for x in watershedImageStack])
134 |
135 | normalizedWatershed = 1 - (watershedImageStack
136 | - np.min(watershedImageStack)) / \
137 | (np.max(watershedImageStack)
138 | - np.min(watershedImageStack))
139 | normalizedWatershed[np.invert(watershedMask)] = 1
140 |
141 | return normalizedWatershed, watershedMask
142 |
--------------------------------------------------------------------------------
/merlin/view/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/emanuega/MERlin/be3c994ef8fa97fbd3afb85705d8ddbed12118cf/merlin/view/__init__.py
--------------------------------------------------------------------------------
/merlin/view/__main__.py:
--------------------------------------------------------------------------------
1 | from .merlinview import merlin_view
2 |
3 | merlin_view()
4 |
--------------------------------------------------------------------------------
/merlin/view/merlinview.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import argparse
3 |
4 | from PyQt5 import QtWidgets
5 |
6 | from merlin.core import dataset
7 | from merlin.util import binary
8 | from merlin.view.widgets import regionview
9 |
10 | temp = '180710_HAECs_NoFlow_HAEC2\Sample1'
11 |
12 | def build_parser():
13 | parser = argparse.ArgumentParser()
14 |
15 | parser.add_argument('-d', '--data-set', required=True)
16 |
17 | return parser
18 |
19 | def merlin_view():
20 | print('MERlinView - MERFISH data exploration software')
21 | parser = build_parser()
22 | args, argv = parser.parse_known_args()
23 |
24 | data = dataset.MERFISHDataSet(args.data_set)
25 | wTask = data.load_analysis_task('FiducialCorrelationWarp')
26 | dTask = data.load_analysis_task('DeconvolutionPreprocess')
27 | fTask = data.load_analysis_task('StrictFilterBarcodes')
28 |
29 | app = QtWidgets.QApplication([])
30 |
31 | frame = QtWidgets.QFrame()
32 | window = QtWidgets.QMainWindow()
33 | window.setCentralWidget(frame)
34 | window.resize(1000,1000)
35 | layout = QtWidgets.QGridLayout(frame)
36 | layout.addWidget(regionview.RegionViewWidget(
37 | wTask, fTask.get_barcode_database(), data))
38 |
39 |
40 | window.show()
41 | sys.exit(app.exec_())
42 |
--------------------------------------------------------------------------------
/merlin/view/widgets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/emanuega/MERlin/be3c994ef8fa97fbd3afb85705d8ddbed12118cf/merlin/view/widgets/__init__.py
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | setuptools>=36.0.0
2 | urllib3
3 | python-dotenv>=0.8.2
4 | pandas>=0.23.4
5 | tifffile>=0.14.0
6 | opencv-python>=3
7 | scikit-image>=0.15.0
8 | scikit-learn>=0.19.0
9 | numpy>1.16.0
10 | scipy>=1.2
11 | matplotlib
12 | networkx
13 | rtree
14 | shapely<1.7a2
15 | seaborn>=0.9.0
16 | pyqt5
17 | Sphinx
18 | sphinx-rtd-theme
19 | pyclustering
20 | pytest
21 | pytest-cov
22 | h5py>=1.8.15
23 | numexpr>=2.6.2
24 | cython>=0.21
25 | snakemake
26 | requests>=2.18.0
27 | tables
28 | boto3
29 | xmltodict
30 | google-cloud-storage
31 | docutils<0.16,>=0.10
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | import os
2 | import setuptools
3 |
4 | CLASSIFIERS = [
5 | "Development Status :: 4 - Beta",
6 | "Natural Language :: English",
7 | "Operating System :: POSIX",
8 | "Operating System :: Unix",
9 | "Operating System :: MacOS :: MacOS X",
10 | "License :: Restricted use",
11 | "Programming Language :: Python :: 3.6",
12 | "Topic :: Scientific/Engineering :: Bio-Informatics",
13 | ]
14 |
15 | install_requires = [line.rstrip() for line in open(
16 | os.path.join(os.path.dirname(__file__), "requirements.txt"))]
17 |
18 | setuptools.setup(
19 | name="merlin",
20 | version="0.1.6",
21 | description="MERFISH decoding software",
22 | author="George Emanuel",
23 | author_email="emanuega0@gmail.com",
24 | license="Restricted use",
25 | packages=setuptools.find_packages(),
26 | install_requires=install_requires,
27 | entry_points={
28 | 'console_scripts': ["merlin=merlin.merlin:merlin"]
29 | },
30 | classifiers=CLASSIFIERS
31 | )
32 |
--------------------------------------------------------------------------------
/test/auxiliary_files/test.dax:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/emanuega/MERlin/be3c994ef8fa97fbd3afb85705d8ddbed12118cf/test/auxiliary_files/test.dax
--------------------------------------------------------------------------------
/test/auxiliary_files/test.inf:
--------------------------------------------------------------------------------
1 | binning = 1 x 1
2 | data type = 16 bit integers (binary, big endian)
3 | frame dimensions = 256 x 256
4 | number of frames = 10
5 | Lock Target = 0.0
6 | x_start = 1
7 | x_end = 256
8 | y_start = 1
9 | y_end = 256
10 |
--------------------------------------------------------------------------------
/test/auxiliary_files/test_0_0.tif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/emanuega/MERlin/be3c994ef8fa97fbd3afb85705d8ddbed12118cf/test/auxiliary_files/test_0_0.tif
--------------------------------------------------------------------------------
/test/auxiliary_files/test_0_1.tif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/emanuega/MERlin/be3c994ef8fa97fbd3afb85705d8ddbed12118cf/test/auxiliary_files/test_0_1.tif
--------------------------------------------------------------------------------
/test/auxiliary_files/test_0_2.tif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/emanuega/MERlin/be3c994ef8fa97fbd3afb85705d8ddbed12118cf/test/auxiliary_files/test_0_2.tif
--------------------------------------------------------------------------------
/test/auxiliary_files/test_0_3.tif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/emanuega/MERlin/be3c994ef8fa97fbd3afb85705d8ddbed12118cf/test/auxiliary_files/test_0_3.tif
--------------------------------------------------------------------------------
/test/auxiliary_files/test_0_4.tif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/emanuega/MERlin/be3c994ef8fa97fbd3afb85705d8ddbed12118cf/test/auxiliary_files/test_0_4.tif
--------------------------------------------------------------------------------
/test/auxiliary_files/test_0_5.tif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/emanuega/MERlin/be3c994ef8fa97fbd3afb85705d8ddbed12118cf/test/auxiliary_files/test_0_5.tif
--------------------------------------------------------------------------------
/test/auxiliary_files/test_0_6.tif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/emanuega/MERlin/be3c994ef8fa97fbd3afb85705d8ddbed12118cf/test/auxiliary_files/test_0_6.tif
--------------------------------------------------------------------------------
/test/auxiliary_files/test_0_7.tif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/emanuega/MERlin/be3c994ef8fa97fbd3afb85705d8ddbed12118cf/test/auxiliary_files/test_0_7.tif
--------------------------------------------------------------------------------
/test/auxiliary_files/test_1_0.tif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/emanuega/MERlin/be3c994ef8fa97fbd3afb85705d8ddbed12118cf/test/auxiliary_files/test_1_0.tif
--------------------------------------------------------------------------------
/test/auxiliary_files/test_1_1.tif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/emanuega/MERlin/be3c994ef8fa97fbd3afb85705d8ddbed12118cf/test/auxiliary_files/test_1_1.tif
--------------------------------------------------------------------------------
/test/auxiliary_files/test_1_2.tif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/emanuega/MERlin/be3c994ef8fa97fbd3afb85705d8ddbed12118cf/test/auxiliary_files/test_1_2.tif
--------------------------------------------------------------------------------
/test/auxiliary_files/test_1_3.tif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/emanuega/MERlin/be3c994ef8fa97fbd3afb85705d8ddbed12118cf/test/auxiliary_files/test_1_3.tif
--------------------------------------------------------------------------------
/test/auxiliary_files/test_1_4.tif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/emanuega/MERlin/be3c994ef8fa97fbd3afb85705d8ddbed12118cf/test/auxiliary_files/test_1_4.tif
--------------------------------------------------------------------------------
/test/auxiliary_files/test_1_5.tif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/emanuega/MERlin/be3c994ef8fa97fbd3afb85705d8ddbed12118cf/test/auxiliary_files/test_1_5.tif
--------------------------------------------------------------------------------
/test/auxiliary_files/test_1_6.tif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/emanuega/MERlin/be3c994ef8fa97fbd3afb85705d8ddbed12118cf/test/auxiliary_files/test_1_6.tif
--------------------------------------------------------------------------------
/test/auxiliary_files/test_1_7.tif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/emanuega/MERlin/be3c994ef8fa97fbd3afb85705d8ddbed12118cf/test/auxiliary_files/test_1_7.tif
--------------------------------------------------------------------------------
/test/auxiliary_files/test_analysis_parameters.json:
--------------------------------------------------------------------------------
1 | {
2 | "analysis_tasks": [
3 | {
4 | "task": "FiducialCorrelationWarp",
5 | "module": "merlin.analysis.warp",
6 | "parameters": {
7 | "write_aligned_images": true
8 | }
9 | },
10 | {
11 | "task": "DeconvolutionPreprocess",
12 | "module": "merlin.analysis.preprocess",
13 | "parameters": {
14 | "warp_task": "FiducialCorrelationWarp"
15 | }
16 | },
17 | {
18 | "task": "OptimizeIteration",
19 | "module": "merlin.analysis.optimize",
20 | "analysis_name": "Optimize1",
21 | "parameters": {
22 | "preprocess_task": "DeconvolutionPreprocess",
23 | "warp_task": "FiducialCorrelationWarp",
24 | "fov_per_iteration": 2,
25 | "iteration_count": 2,
26 | "optimize_chromatic_correction": false
27 | }
28 | },
29 | {
30 | "task": "OptimizeIteration",
31 | "module": "merlin.analysis.optimize",
32 | "analysis_name": "Optimize2",
33 | "parameters": {
34 | "preprocess_task": "DeconvolutionPreprocess",
35 | "warp_task": "FiducialCorrelationWarp",
36 | "fov_per_iteration": 2,
37 | "iteration_count": 2,
38 | "optimize_chromatic_correction": false,
39 | "previous_iteration": "Optimize1"
40 | }
41 | },
42 | {
43 | "task": "Decode",
44 | "module": "merlin.analysis.decode",
45 | "parameters": {
46 | "preprocess_task": "DeconvolutionPreprocess",
47 | "optimize_task": "Optimize2",
48 | "global_align_task": "SimpleGlobalAlignment",
49 | "crop_width": 10,
50 | "remove_z_duplicated_barcodes": true,
51 | "z_duplicate_zPlane_threshold": 1,
52 | "z_duplicate_xy_pixel_threshold": 1.414
53 | }
54 | },
55 | {
56 | "task": "SimpleGlobalAlignment",
57 | "module": "merlin.analysis.globalalign"
58 | },
59 | {
60 | "task": "GenerateMosaic",
61 | "module": "merlin.analysis.generatemosaic",
62 | "parameters": {
63 | "global_align_task": "SimpleGlobalAlignment",
64 | "warp_task": "FiducialCorrelationWarp"
65 | }
66 | },
67 | {
68 | "task": "FilterBarcodes",
69 | "module": "merlin.analysis.filterbarcodes",
70 | "parameters": {
71 | "decode_task": "Decode",
72 | "area_threshold": 5,
73 | "intensity_threshold": 1
74 | }
75 | },
76 | {
77 | "task": "GenerateAdaptiveThreshold",
78 | "module": "merlin.analysis.filterbarcodes",
79 | "parameters": {
80 | "decode_task": "Decode",
81 | "run_after_task": "Decode"
82 | }
83 | },
84 | {
85 | "task": "AdaptiveFilterBarcodes",
86 | "module": "merlin.analysis.filterbarcodes",
87 | "parameters": {
88 | "decode_task": "Decode",
89 | "adaptive_task": "GenerateAdaptiveThreshold"
90 | }
91 | },
92 | {
93 | "task": "ExportBarcodes",
94 | "module": "merlin.analysis.exportbarcodes",
95 | "parameters": {
96 | "filter_task": "FilterBarcodes"
97 | }
98 | },
99 | {
100 | "task": "PlotPerformance",
101 | "module": "merlin.analysis.plotperformance",
102 | "parameters": {
103 | "preprocess_task": "DeconvolutionPreprocess",
104 | "optimize_task": "Optimize2",
105 | "decode_task": "Decode",
106 | "filter_task": "AdaptiveFilterBarcodes",
107 | "global_align_task": "SimpleGlobalAlignment"
108 | }
109 | },
110 | {
111 | "task": "WatershedSegment",
112 | "module": "merlin.analysis.segment",
113 | "parameters": {
114 | "warp_task": "FiducialCorrelationWarp",
115 | "global_align_task": "SimpleGlobalAlignment"
116 | }
117 | },
118 | {
119 | "task": "CleanCellBoundaries",
120 | "module": "merlin.analysis.segment",
121 | "parameters": {
122 | "segment_task": "WatershedSegment",
123 | "global_align_task": "SimpleGlobalAlignment"
124 | }
125 | },
126 | {
127 | "task": "CombineCleanedBoundaries",
128 | "module": "merlin.analysis.segment",
129 | "parameters": {
130 | "cleaning_task": "CleanCellBoundaries"
131 | }
132 | },
133 | {
134 | "task": "RefineCellDatabases",
135 | "module": "merlin.analysis.segment",
136 | "parameters": {
137 | "segment_task": "WatershedSegment",
138 | "combine_cleaning_task": "CombineCleanedBoundaries"
139 | }
140 | },
141 | {
142 | "task": "PartitionBarcodes",
143 | "module": "merlin.analysis.partition",
144 | "parameters": {
145 | "filter_task": "AdaptiveFilterBarcodes",
146 | "assignment_task": "RefineCellDatabases",
147 | "alignment_task": "SimpleGlobalAlignment"
148 | }
149 | },
150 | {
151 | "task": "ExportPartitionedBarcodes",
152 | "module": "merlin.analysis.partition",
153 | "parameters": {
154 | "partition_task": "PartitionBarcodes"
155 | }
156 | },
157 | {
158 | "task": "ExportCellMetadata",
159 | "module": "merlin.analysis.segment",
160 | "parameters": {
161 | "segment_task": "RefineCellDatabases"
162 | }
163 | },
164 | {
165 | "task": "SumSignal",
166 | "module": "merlin.analysis.sequential",
167 | "parameters": {
168 | "z_index": 0,
169 | "apply_highpass": true,
170 | "warp_task": "FiducialCorrelationWarp",
171 | "highpass_sigma": 5,
172 | "segment_task": "RefineCellDatabases",
173 | "global_align_task": "SimpleGlobalAlignment"
174 | }
175 | },
176 | {
177 | "task": "ExportSumSignals",
178 | "module": "merlin.analysis.sequential",
179 | "parameters": {
180 | "sequential_task": "SumSignal"
181 | }
182 | }
183 |
184 | ]
185 |
186 | }
187 |
--------------------------------------------------------------------------------
/test/auxiliary_files/test_codebook.csv:
--------------------------------------------------------------------------------
1 | name,id,bit1,bit2,bit3,bit4,bit5,bit6,bit7,bit8,bit9,bit10,bit11,bit12,bit13,bit14,bit15,bit16
2 | STMN1,ENST00000465604.1,0,0,1,1,1,0,0,0,0,0,0,0,1,0,0,0
3 | DHCR24,ENST00000535035.5,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0
4 | VCAM1,ENST00000370115.1,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0,0
5 | SELL,ENST00000236147.4,0,0,1,1,0,1,0,0,0,1,0,0,0,0,0,0
6 | PTGS2,ENST00000367468.9,0,0,1,0,0,1,0,0,0,0,0,0,1,1,0,0
7 | PALD1,ENST00000263563.6,0,0,1,0,0,0,1,0,0,0,1,0,1,0,0,0
8 | PLAU,ENST00000446342.5,1,0,0,0,1,0,1,0,0,0,0,0,1,0,0,0
9 | PPP1R3C,ENST00000238994.5,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0
10 | SERPINH1,ENST00000524558.5,0,0,0,1,0,0,0,0,0,1,0,0,1,1,0,0
11 | HYOU1,ENST00000617285.4,0,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0
12 | A2M,ENST00000318602.11,0,1,1,0,1,0,0,0,1,0,0,0,0,0,0,0
13 | LMO7,ENST00000377499.9,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1,0
14 | MMP14,ENST00000311852.10,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0,0
15 | LTB4R,ENST00000396789.4,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,1
16 | VASH1,ENST00000167106.8,0,0,0,1,1,1,0,0,0,0,0,0,0,1,0,0
17 | FBN1,ENST00000316623.9,0,1,0,1,0,0,0,0,1,0,0,0,1,0,0,0
18 | SMAD6,ENST00000612349.1,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,1
19 | ITGA11,ENST00000423218.6,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,1
20 | CX3CL1,ENST00000006053.6,0,0,0,0,1,0,0,0,1,0,0,0,1,0,1,0
21 | MYH10,ENST00000269243.8,0,0,0,1,0,0,0,0,0,0,0,1,1,0,1,0
22 | ITGB4,ENST00000580542.5,0,1,0,0,1,0,0,0,0,0,0,1,1,0,0,0
23 | BCL2,ENST00000333681.4,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1
24 | ICAM1,ENST00000264832.7,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1
25 | ICAM3,ENST00000587992.1,0,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0
26 | FOSB,ENST00000591858.5,1,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0
27 | PLA2G4C,ENST00000599111.5,0,0,1,0,1,0,0,0,0,0,0,1,0,0,1,0
28 | RPS7,ENST00000481006.1,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,1
29 | ADAM17,ENST00000310823.7,0,0,1,1,0,0,0,0,1,0,0,0,0,0,1,0
30 | LIMS1,ENST00000544547.5,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0
31 | ITGAV,ENST00000433736.6,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,0
32 | COL5A2,ENST00000374866.7,0,1,1,1,0,0,0,0,0,0,0,1,0,0,0,0
33 | CASP10,ENST00000360132.7,0,0,0,1,1,0,1,0,0,0,1,0,0,0,0,0
34 | HSPA12B,ENST00000399701.1,0,0,1,0,0,0,0,0,1,0,0,1,1,0,0,0
35 | BCL2L1,ENST00000307677.4,0,0,0,1,0,0,1,1,0,0,0,0,1,0,0,0
36 | PPP1R16B,ENST00000373331.2,1,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0
37 | PTGIS,ENST00000244043.4,1,0,1,0,1,0,0,0,0,0,1,0,0,0,0,0
38 | TCN2,ENST00000215838.7,0,0,1,0,1,0,1,1,0,0,0,0,0,0,0,0
39 | STAB1,ENST00000461325.1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,1
40 | FAM107A,ENST00000360997.6,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,1
41 | MRAS,ENST00000621127.4,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,1
42 | BCL6,ENST00000406870.6,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1
43 | ATP13A3,ENST00000256031.8,0,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0
44 | MFI2,ENST00000296350.9,0,0,0,0,1,1,1,0,0,0,0,0,0,0,1,0
45 | LIMCH1,ENST00000511496.5,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1,0
46 | PDGFRA,ENST00000257290.9,0,1,0,1,0,0,0,0,0,0,1,0,0,1,0,0
47 | ENPEP,ENST00000265162.9,0,0,0,1,0,1,0,1,1,0,0,0,0,0,0,0
48 | FGF2,ENST00000608478.1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1
49 | IL15,ENST00000477265.5,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0
50 | FAM198B,ENST00000585682.5,1,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0
51 | CASP3,ENST00000308394.8,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0
52 | OCLN,ENST00000355237.6,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0
53 | LOX,ENST00000231004.4,1,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0
54 | FLT4,ENST00000393347.7,1,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0
55 | RIPK1,ENST00000259808.8,0,0,0,0,0,0,0,0,1,0,1,0,1,1,0,0
56 | SOX4,ENST00000244745.2,0,0,1,0,0,0,1,0,1,0,0,0,0,1,0,0
57 | ABCF1,ENST00000376545.7,0,0,1,0,0,0,0,0,0,0,1,1,0,1,0,0
58 | TNXB,ENST00000375244.7,0,1,1,0,0,0,0,1,0,0,0,0,0,1,0,0
59 | ARHGAP18,ENST00000368149.2,1,1,0,1,0,0,0,0,0,1,0,0,0,0,0,0
60 | TNFAIP3,ENST00000620204.3,0,0,0,0,0,0,0,1,0,1,0,1,1,0,0,0
61 | PDIA4,ENST00000286091.8,1,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0
62 | NOS3,ENST00000297494.7,0,1,0,0,0,1,0,1,0,0,0,0,1,0,0,0
63 | ANGPT2,ENST00000325203.9,0,0,0,0,0,0,1,0,0,1,0,0,1,0,1,0
64 | LOXL2,ENST00000389131.7,0,1,0,0,0,0,0,0,0,1,1,0,1,0,0,0
65 | STC1,ENST00000290271.6,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1
66 | PLAT,ENST00000352041.7,0,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0
67 | ZNF704,ENST00000327835.7,0,0,0,0,0,1,1,0,1,0,0,0,1,0,0,0
68 | NOV,ENST00000259526.3,1,0,0,1,0,1,0,0,0,0,0,1,0,0,0,0
69 | C9orf3,ENST00000297979.9,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,1
70 | RGS3,ENST00000317613.10,0,0,0,1,0,0,0,1,0,1,0,0,0,0,1,0
71 | COL5A1,ENST00000371817.7,0,0,0,0,1,0,1,0,1,1,0,0,0,0,0,0
72 | Blank-01,nan,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,1
73 | Blank-02,nan,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0
74 | Blank-03,nan,0,0,0,0,1,0,0,1,1,0,0,0,0,1,0,0
75 | Blank-04,nan,0,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0
76 | Blank-05,nan,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,1
77 | Blank-06,nan,0,0,0,1,0,0,0,1,0,0,0,1,0,1,0,0
78 | Blank-07,nan,0,0,1,0,0,0,0,1,1,1,0,0,0,0,0,0
79 | Blank-08,nan,0,0,0,0,1,0,0,0,0,0,1,0,0,1,1,0
80 | Blank-09,nan,0,0,1,0,0,1,1,0,0,0,0,1,0,0,0,0
81 | Blank-10,nan,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,1
82 | Blank-11,nan,1,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1
83 | Blank-12,nan,0,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1
84 | Blank-13,nan,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1
85 | Blank-14,nan,0,1,1,0,0,0,1,0,0,1,0,0,0,0,0,0
86 | Blank-15,nan,0,0,0,0,1,0,0,0,0,1,1,1,0,0,0,0
87 | Blank-16,nan,0,0,0,0,1,1,0,1,0,0,0,1,0,0,0,0
88 | Blank-17,nan,1,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0
89 | Blank-18,nan,0,1,0,1,0,1,1,0,0,0,0,0,0,0,0,0
90 | Blank-19,nan,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1
91 | Blank-20,nan,0,0,0,0,0,0,0,1,0,0,0,0,1,1,1,0
92 | Blank-21,nan,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1
93 | Blank-22,nan,1,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0
94 | Blank-23,nan,0,0,0,1,0,0,0,0,1,1,1,0,0,0,0,0
95 | Blank-24,nan,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0
96 | Blank-25,nan,0,0,0,1,0,0,1,0,0,0,0,0,0,1,1,0
97 | Blank-26,nan,1,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0
98 | Blank-27,nan,0,1,0,0,1,0,1,0,0,0,0,0,0,1,0,0
99 | Blank-28,nan,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,0
100 | Blank-29,nan,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,1
101 | Blank-30,nan,1,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0
102 | Blank-31,nan,1,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0
103 | Blank-32,nan,1,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1
104 | Blank-33,nan,0,1,0,0,0,0,0,0,1,0,0,1,0,0,1,0
105 | Blank-34,nan,0,1,0,0,0,1,0,0,0,1,0,1,0,0,0,0
106 | Blank-35,nan,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0
107 | Blank-36,nan,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,1
108 | Blank-37,nan,1,0,0,0,0,1,0,1,0,0,0,0,0,1,0,0
109 | Blank-38,nan,0,0,0,0,0,0,0,1,1,0,1,0,0,0,1,0
110 | Blank-39,nan,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,1
111 | Blank-40,nan,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,1
112 | Blank-41,nan,1,1,0,0,0,0,0,1,0,0,0,0,0,0,1,0
113 | Blank-42,nan,1,0,0,0,0,0,0,1,0,1,0,0,0,0,0,1
114 | Blank-43,nan,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,1
115 | Blank-44,nan,1,0,0,0,0,0,0,0,0,1,1,0,0,1,0,0
116 | Blank-45,nan,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,1
117 | Blank-46,nan,0,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0
118 | Blank-47,nan,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,1
119 | Blank-48,nan,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,0
120 | Blank-49,nan,1,0,0,0,0,0,0,0,0,0,1,1,0,0,1,0
121 | Blank-50,nan,0,0,0,0,0,0,0,0,0,1,0,1,0,1,1,0
122 | Blank-51,nan,0,0,0,0,0,1,0,1,0,1,1,0,0,0,0,0
123 | Blank-52,nan,0,0,0,0,0,0,1,0,1,0,1,1,0,0,0,0
124 | Blank-53,nan,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0
125 | Blank-54,nan,1,0,0,0,0,0,0,1,1,0,0,1,0,0,0,0
126 | Blank-55,nan,0,0,0,0,0,1,0,0,1,1,0,0,0,0,1,0
127 | Blank-56,nan,0,0,0,0,0,1,0,0,1,0,0,1,0,1,0,0
128 | Blank-57,nan,1,1,0,0,0,0,0,0,1,0,1,0,0,0,0,0
129 | Blank-58,nan,0,1,0,0,0,0,0,0,0,0,0,1,0,1,0,1
130 | Blank-59,nan,0,1,0,0,0,0,1,1,1,0,0,0,0,0,0,0
131 | Blank-60,nan,1,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0
132 | Blank-61,nan,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1
133 | Blank-62,nan,1,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0
134 | Blank-63,nan,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,0
135 | Blank-64,nan,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1
136 | Blank-65,nan,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,1
137 | Blank-66,nan,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,1
138 | Blank-67,nan,0,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0
139 | Blank-68,nan,0,0,0,0,0,0,1,1,0,1,0,0,0,1,0,0
140 | Blank-69,nan,0,1,0,0,0,0,0,1,0,0,1,1,0,0,0,0
141 | Blank-70,nan,1,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0
142 |
--------------------------------------------------------------------------------
/test/auxiliary_files/test_codebook2.csv:
--------------------------------------------------------------------------------
1 | name,id,bit1,bit2,bit3,bit4,bit5,bit6,bit7,bit8,bit9,bit10,bit11,bit12,bit13,bit14,bit15,bit16
2 | STMN1,ENST00000465604.1,0,0,1,1,1,0,0,0,0,0,0,0,1,0,0,0
3 | DHCR24,ENST00000535035.5,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0
4 | VCAM1,ENST00000370115.1,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0,0
5 | SELL,ENST00000236147.4,0,0,1,1,0,1,0,0,0,1,0,0,0,0,0,0
6 | PTGS2,ENST00000367468.9,0,0,1,0,0,1,0,0,0,0,0,0,1,1,0,0
7 | PALD1,ENST00000263563.6,0,0,1,0,0,0,1,0,0,0,1,0,1,0,0,0
8 | PLAU,ENST00000446342.5,1,0,0,0,1,0,1,0,0,0,0,0,1,0,0,0
9 | PPP1R3C,ENST00000238994.5,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0
10 | SERPINH1,ENST00000524558.5,0,0,0,1,0,0,0,0,0,1,0,0,1,1,0,0
11 | HYOU1,ENST00000617285.4,0,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0
12 |
--------------------------------------------------------------------------------
/test/auxiliary_files/test_data_organization.csv:
--------------------------------------------------------------------------------
1 | channelName,readoutName,imageType,imageRegExp,bitNumber,imagingRound,color,frame,zPos,fiducialImageType,fiducialRegExp,fiducialImagingRound,fiducialFrame,fiducialColor
2 | bit1,bit1,test,(?P[\w|-]+)_(?P[0-9]+)_(?P[0-9]+),1,0,650,1,0,test,(?P[\w|-]+)_(?P[0-9]+)_(?P[0-9]+),0,2,561
3 | bit2,bit2,test,(?P[\w|-]+)_(?P[0-9]+)_(?P[0-9]+),2,0,750,0,0,test,(?P[\w|-]+)_(?P[0-9]+)_(?P[0-9]+),0,2,561
4 | bit3,bit3,test,(?P[\w|-]+)_(?P[0-9]+)_(?P[0-9]+),3,1,750,0,0,test,(?P[\w|-]+)_(?P[0-9]+)_(?P[0-9]+),1,2,561
5 | bit4,bit4,test,(?P[\w|-]+)_(?P[0-9]+)_(?P[0-9]+),4,1,650,1,0,test,(?P[\w|-]+)_(?P[0-9]+)_(?P[0-9]+),1,2,561
6 | bit5,bit5,test,(?P[\w|-]+)_(?P[0-9]+)_(?P[0-9]+),5,2,650,1,0,test,(?P[\w|-]+)_(?P[0-9]+)_(?P[0-9]+),2,2,561
7 | bit6,bit6,test,(?P[\w|-]+)_(?P[0-9]+)_(?P[0-9]+),6,2,750,0,0,test,(?P[\w|-]+)_(?P[0-9]+)_(?P[0-9]+),2,2,561
8 | bit7,bit7,test,(?P[\w|-]+)_(?P[0-9]+)_(?P[0-9]+),7,3,650,1,0,test,(?P[\w|-]+)_(?P[0-9]+)_(?P[0-9]+),3,2,561
9 | bit8,bit8,test,(?P[\w|-]+)_(?P[0-9]+)_(?P[0-9]+),8,3,750,0,0,test,(?P[\w|-]+)_(?P[0-9]+)_(?P[0-9]+),3,2,561
10 | bit9,bit9,test,(?P[\w|-]+)_(?P[0-9]+)_(?P[0-9]+),9,4,750,0,0,test,(?P[\w|-]+)_(?P[0-9]+)_(?P[0-9]+),4,2,561
11 | bit10,bit10,test,(?P[\w|-]+)_(?P[0-9]+)_(?P[0-9]+),10,4,650,1,0,test,(?P[\w|-]+)_(?P[0-9]+)_(?P[0-9]+),4,2,561
12 | bit11,bit11,test,(?P[\w|-]+)_(?P[0-9]+)_(?P[0-9]+),11,5,650,1,0,test,(?P[\w|-]+)_(?P[0-9]+)_(?P[0-9]+),5,2,561
13 | bit12,bit12,test,(?P[\w|-]+)_(?P[0-9]+)_(?P[0-9]+),12,5,750,0,0,test,(?P[\w|-]+)_(?P[0-9]+)_(?P[0-9]+),5,2,561
14 | bit13,bit13,test,(?P[\w|-]+)_(?P[0-9]+)_(?P[0-9]+),13,6,650,1,0,test,(?P[\w|-]+)_(?P[0-9]+)_(?P[0-9]+),6,2,561
15 | bit14,bit14,test,(?P[\w|-]+)_(?P[0-9]+)_(?P[0-9]+),14,6,750,0,0,test,(?P[\w|-]+)_(?P[0-9]+)_(?P[0-9]+),6,2,561
16 | bit15,bit15,test,(?P[\w|-]+)_(?P[0-9]+)_(?P[0-9]+),15,7,750,0,0,test,(?P[\w|-]+)_(?P[0-9]+)_(?P[0-9]+),7,2,561
17 | bit16,bit16,test,(?P[\w|-]+)_(?P[0-9]+)_(?P[0-9]+),16,7,650,1,0,test,(?P[\w|-]+)_(?P[0-9]+)_(?P[0-9]+),7,2,561
18 | DAPI,cellstain,test,(?P[\w|-]+)_(?P[0-9]+)_(?P[0-9]+),48,0,488,3,0,test,(?P[\w|-]+)_(?P[0-9]+)_(?P[0-9]+),0,2,561
19 | polyT,nuclearstain,test,(?P[\w|-]+)_(?P[0-9]+)_(?P[0-9]+),47,0,405,4,0,test,(?P[\w|-]+)_(?P[0-9]+)_(?P[0-9]+),0,2,561
20 |
--------------------------------------------------------------------------------
/test/auxiliary_files/test_microscope_parameters.json:
--------------------------------------------------------------------------------
1 | {
2 | "flip_horizontal": false,
3 | "flip_vertical": true,
4 | "transpose": true,
5 | "microns_per_pixel": 0.108,
6 | "image_dimensions": [128, 128]
7 | }
8 |
--------------------------------------------------------------------------------
/test/auxiliary_files/test_positions.csv:
--------------------------------------------------------------------------------
1 | -2000,-2000
2 | -2000,-1805
3 |
--------------------------------------------------------------------------------
/test/conftest.py:
--------------------------------------------------------------------------------
1 | import os
2 | import pytest
3 | import shutil
4 | import glob
5 | from merlin.core import dataset
6 | from merlin.analysis import testtask
7 | import merlin
8 |
9 |
10 | root = os.path.join(os.path.dirname(merlin.__file__), '..', 'test')
11 | merlin.DATA_HOME = os.path.abspath('test_data')
12 | merlin.ANALYSIS_HOME = os.path.abspath('test_analysis')
13 | merlin.ANALYSIS_PARAMETERS_HOME = os.path.abspath('test_analysis_parameters')
14 | merlin.CODEBOOK_HOME = os.path.abspath('test_codebooks')
15 | merlin.DATA_ORGANIZATION_HOME = os.path.abspath('test_dataorganization')
16 | merlin.POSITION_HOME = os.path.abspath('test_positions')
17 | merlin.MICROSCOPE_PARAMETERS_HOME = os.path.abspath('test_microcope_parameters')
18 |
19 |
20 | dataDirectory = os.sep.join([merlin.DATA_HOME, 'test'])
21 | merfishDataDirectory = os.sep.join([merlin.DATA_HOME, 'merfish_test'])
22 |
23 |
24 | @pytest.fixture(scope='session')
25 | def base_files():
26 | folderList = [merlin.DATA_HOME, merlin.ANALYSIS_HOME,
27 | merlin.ANALYSIS_PARAMETERS_HOME, merlin.CODEBOOK_HOME,
28 | merlin.DATA_ORGANIZATION_HOME, merlin.POSITION_HOME,
29 | merlin.MICROSCOPE_PARAMETERS_HOME]
30 | for folder in folderList:
31 | if os.path.exists(folder):
32 | shutil.rmtree(folder)
33 | os.makedirs(folder)
34 |
35 | shutil.copyfile(
36 | os.sep.join(
37 | [root, 'auxiliary_files', 'test_data_organization.csv']),
38 | os.sep.join(
39 | [merlin.DATA_ORGANIZATION_HOME, 'test_data_organization.csv']))
40 | shutil.copyfile(
41 | os.sep.join(
42 | [root, 'auxiliary_files', 'test_codebook.csv']),
43 | os.sep.join(
44 | [merlin.CODEBOOK_HOME, 'test_codebook.csv']))
45 | shutil.copyfile(
46 | os.sep.join(
47 | [root, 'auxiliary_files', 'test_codebook2.csv']),
48 | os.sep.join(
49 | [merlin.CODEBOOK_HOME, 'test_codebook2.csv']))
50 | shutil.copyfile(
51 | os.sep.join(
52 | [root, 'auxiliary_files', 'test_positions.csv']),
53 | os.sep.join(
54 | [merlin.POSITION_HOME, 'test_positions.csv']))
55 | shutil.copyfile(
56 | os.sep.join(
57 | [root, 'auxiliary_files', 'test_analysis_parameters.json']),
58 | os.sep.join(
59 | [merlin.ANALYSIS_PARAMETERS_HOME, 'test_analysis_parameters.json']))
60 | shutil.copyfile(
61 | os.sep.join(
62 | [root, 'auxiliary_files', 'test_microscope_parameters.json']),
63 | os.sep.join(
64 | [merlin.MICROSCOPE_PARAMETERS_HOME,
65 | 'test_microscope_parameters.json']))
66 |
67 | yield
68 |
69 | for folder in folderList:
70 | shutil.rmtree(folder)
71 |
72 |
73 | @pytest.fixture(scope='session')
74 | def merfish_files(base_files):
75 | os.mkdir(merfishDataDirectory)
76 |
77 | for imageFile in glob.iglob(
78 | os.sep.join([root, 'auxiliary_files', '*.tif'])):
79 | if os.path.isfile(imageFile):
80 | shutil.copy(imageFile, merfishDataDirectory)
81 |
82 | yield
83 |
84 | shutil.rmtree(merfishDataDirectory)
85 |
86 |
87 | @pytest.fixture(scope='session')
88 | def simple_data(base_files):
89 | os.mkdir(dataDirectory)
90 | testData = dataset.DataSet('test')
91 |
92 | yield testData
93 |
94 | shutil.rmtree(dataDirectory)
95 |
96 |
97 | @pytest.fixture(scope='session')
98 | def simple_merfish_data(merfish_files):
99 | testMERFISHData = dataset.MERFISHDataSet(
100 | 'merfish_test',
101 | dataOrganizationName='test_data_organization.csv',
102 | codebookNames=['test_codebook.csv'],
103 | positionFileName='test_positions.csv',
104 | microscopeParametersName='test_microscope_parameters.json')
105 | yield testMERFISHData
106 |
107 |
108 | @pytest.fixture(scope='session')
109 | def two_codebook_merfish_data(merfish_files):
110 | testMERFISHData = dataset.MERFISHDataSet(
111 | 'merfish_test',
112 | dataOrganizationName='test_data_organization.csv',
113 | codebookNames=['test_codebook2.csv', 'test_codebook.csv'],
114 | positionFileName='test_positions.csv',
115 | analysisHome=os.path.join(merlin.ANALYSIS_HOME, '..',
116 | 'test_analysis_two_codebook'),
117 | microscopeParametersName='test_microscope_parameters.json')
118 | yield testMERFISHData
119 |
120 | shutil.rmtree('test_analysis_two_codebook')
121 |
122 |
123 | @pytest.fixture(scope='function')
124 | def single_task(simple_data):
125 | task = testtask.SimpleAnalysisTask(
126 | simple_data, parameters={'a': 5, 'b': 'b_string'})
127 | yield task
128 | simple_data.delete_analysis(task)
129 |
130 |
131 | @pytest.fixture(scope='function', params=[
132 | testtask.SimpleAnalysisTask, testtask.SimpleParallelAnalysisTask,
133 | testtask.SimpleInternallyParallelAnalysisTask])
134 | def simple_task(simple_data, request):
135 | task = request.param(
136 | simple_data, parameters={'a': 5, 'b': 'b_string'})
137 | yield task
138 | simple_data.delete_analysis(task)
139 |
140 |
141 | @pytest.fixture(scope='function', params=[
142 | testtask.SimpleAnalysisTask, testtask.SimpleParallelAnalysisTask,
143 | testtask.SimpleInternallyParallelAnalysisTask])
144 | def simple_merfish_task(simple_merfish_data, request):
145 | task = request.param(
146 | simple_merfish_data, parameters={'a': 5, 'b': 'b_string'})
147 | yield task
148 | simple_merfish_data.delete_analysis(task)
149 |
--------------------------------------------------------------------------------
/test/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | markers =
3 | slowtest: a test that takes longer than a couple seconds
4 | fullrun: a test that runs the full MERFISH decoding pipeline
5 |
--------------------------------------------------------------------------------
/test/test_binary_utils.py:
--------------------------------------------------------------------------------
1 | import random
2 | import numpy as np
3 |
4 | from merlin.util import binary
5 |
6 | def test_bit_array_to_int_conversion():
7 | for i in range(50):
8 | intIn = random.getrandbits(64)
9 | listOut = binary.int_to_bit_list(intIn, 64)
10 | intOut = binary.bit_list_to_int(listOut)
11 | assert intIn == intOut
12 |
13 | def test_flip_bit():
14 | barcode = [random.getrandbits(1) for i in range(128)]
15 | barcodeCopy = np.copy(barcode)
16 | for i in range(len(barcode)):
17 | flippedBarcode = binary.flip_bit(barcode, i)
18 | assert np.array_equal(barcode, barcodeCopy)
19 | assert all([barcode[j] == flippedBarcode[j] \
20 | for j in range(len(barcode)) if j != i])
21 | assert barcode[i] == (not flippedBarcode[i])
22 |
--------------------------------------------------------------------------------
/test/test_codebook.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pytest
3 |
4 | from merlin.core import dataset
5 |
6 |
7 | def test_codebook_get_barcode_count(simple_merfish_data):
8 | assert simple_merfish_data.get_codebook().get_barcode_count() == 140
9 |
10 |
11 | def test_codebook_get_bit_count(simple_merfish_data):
12 | assert simple_merfish_data.get_codebook().get_bit_count() == 16
13 |
14 |
15 | def test_codebook_get_bit_names(simple_merfish_data):
16 | for i, n in enumerate(simple_merfish_data.get_codebook().get_bit_names()):
17 | assert n == 'bit' + str(i+1)
18 |
19 |
20 | def test_codebook_get_barcode(simple_merfish_data):
21 | codebook = simple_merfish_data.get_codebook()
22 | for i in range(codebook.get_barcode_count()):
23 | assert np.sum(codebook.get_barcode(i)) == 4
24 | assert np.array_equal(
25 | codebook.get_barcode(0),
26 | [0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0])
27 |
28 |
29 | def test_codebook_get_coding_indexes(simple_merfish_data):
30 | assert np.array_equal(
31 | simple_merfish_data.get_codebook().get_coding_indexes(),
32 | np.arange(70))
33 |
34 |
35 | def test_codebook_get_blank_indexes(simple_merfish_data):
36 | assert np.array_equal(
37 | simple_merfish_data.get_codebook().get_blank_indexes(),
38 | np.arange(70, 140))
39 |
40 |
41 | def test_codebook_get_barcodes(simple_merfish_data):
42 | bcSetWithBlanks = simple_merfish_data.get_codebook().get_barcodes()
43 | assert len(bcSetWithBlanks) == 140
44 | assert all([len(x) == 16 for x in bcSetWithBlanks])
45 | assert all([np.sum(x) == 4 for x in bcSetWithBlanks])
46 | bcSetNoBlanks = simple_merfish_data.get_codebook().get_barcodes(
47 | ignoreBlanks=True)
48 | assert len(bcSetNoBlanks) == 70
49 | assert all([len(x) == 16 for x in bcSetNoBlanks])
50 | assert all([np.sum(x) == 4 for x in bcSetNoBlanks])
51 |
52 |
53 | def test_codebook_get_name(simple_merfish_data):
54 | assert simple_merfish_data.get_codebook().get_codebook_name() \
55 | == 'test_codebook'
56 |
57 |
58 | def test_codebook_get_index(simple_merfish_data):
59 | assert simple_merfish_data.get_codebook().get_codebook_index() == 0
60 |
61 |
62 | def test_codebook_get_gene_names(simple_merfish_data):
63 | names = simple_merfish_data.get_codebook().get_gene_names()
64 | codebook = simple_merfish_data.get_codebook()
65 | for n in names:
66 | assert n == codebook.get_name_for_barcode_index(
67 | codebook.get_barcode_index_for_name(n))
68 |
69 |
70 | def test_two_codebook_save_load(two_codebook_merfish_data):
71 | codebook1 = two_codebook_merfish_data.get_codebook(0)
72 | codebook2 = two_codebook_merfish_data.get_codebook(1)
73 | assert len(two_codebook_merfish_data.get_codebooks()) == 2
74 | assert codebook1.get_codebook_name() == 'test_codebook2'
75 | assert codebook1.get_codebook_index() == 0
76 | assert len(codebook1.get_barcodes()) == 10
77 | assert codebook2.get_codebook_name() == 'test_codebook'
78 | assert codebook2.get_codebook_index() == 1
79 | assert len(codebook2.get_barcodes()) == 140
80 |
81 | reloadedDataset = dataset.MERFISHDataSet(
82 | 'merfish_test', analysisHome='test_analysis_two_codebook')
83 | reloaded1 = reloadedDataset.get_codebook(0)
84 | reloaded2 = reloadedDataset.get_codebook(1)
85 | assert len(reloadedDataset.get_codebooks()) == 2
86 | assert reloaded1.get_codebook_name() == 'test_codebook2'
87 | assert reloaded1.get_codebook_index() == 0
88 | assert len(reloaded1.get_barcodes()) == 10
89 | assert reloaded2.get_codebook_name() == 'test_codebook'
90 | assert reloaded2.get_codebook_index() == 1
91 | assert len(reloaded2.get_barcodes()) == 140
92 |
93 | with pytest.raises(FileExistsError):
94 | dataset.MERFISHDataSet(
95 | 'merfish_test',
96 | codebookNames=['test_codebook.csv', 'test_codebook2.csv'],
97 | analysisHome='test_analysis_two_codebook')
98 |
--------------------------------------------------------------------------------
/test/test_core.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import os
3 |
4 | from merlin.core import executor
5 | from merlin.core import analysistask
6 |
7 |
8 | def test_task_delete(simple_data, simple_task):
9 | simple_data.save_analysis_task(simple_task)
10 | assert simple_data.analysis_exists(simple_task)
11 | simple_data.delete_analysis(simple_task)
12 | assert not simple_data.analysis_exists(simple_task)
13 |
14 |
15 | def test_task_save(simple_data, simple_task):
16 | task1 = simple_task
17 | simple_data.save_analysis_task(task1)
18 | loadedTask = simple_data.load_analysis_task(task1.analysisName)
19 | unsharedKeys1 = [k for k in task1.parameters
20 | if k not in loadedTask.parameters
21 | or task1.parameters[k] != loadedTask.parameters[k]]
22 | assert len(unsharedKeys1) == 0
23 | unsharedKeys2 = [k for k in loadedTask.parameters
24 | if k not in task1.parameters
25 | or loadedTask.parameters[k] != task1.parameters[k]]
26 | assert len(unsharedKeys2) == 0
27 | assert loadedTask.analysisName == task1.analysisName
28 |
29 |
30 | def test_task_run(simple_task):
31 | task1 = simple_task
32 | assert not task1.is_complete()
33 | assert not task1.is_started()
34 | assert not task1.is_running()
35 | assert not task1.is_error()
36 | task1.run()
37 | assert task1.is_started()
38 | assert not task1.is_running()
39 | assert not task1.is_error()
40 | assert task1.is_complete()
41 |
42 |
43 | def test_save_environment(simple_task):
44 | task1 = simple_task
45 | task1.run()
46 | environment = dict(os.environ)
47 | if isinstance(simple_task, analysistask.ParallelAnalysisTask):
48 | taskEnvironment = simple_task.dataSet.get_analysis_environment(
49 | simple_task, 0)
50 | else:
51 | taskEnvironment = simple_task.dataSet.get_analysis_environment(
52 | simple_task)
53 |
54 | assert environment == taskEnvironment
55 |
56 |
57 | @pytest.mark.slowtest
58 | def test_task_run_with_executor(simple_task):
59 | task1 = simple_task
60 | assert not task1.is_complete()
61 | assert not task1.is_started()
62 | assert not task1.is_running()
63 | assert not task1.is_error()
64 | e = executor.LocalExecutor()
65 | e.run(task1)
66 | assert task1.is_started()
67 | assert not task1.is_running()
68 | assert not task1.is_error()
69 | assert task1.is_complete()
70 |
71 |
72 | def test_task_reset(simple_task):
73 | simple_task.run(overwrite=False)
74 | assert simple_task.is_complete()
75 | with pytest.raises(analysistask.AnalysisAlreadyStartedException):
76 | simple_task.run(overwrite=False)
77 | simple_task.run(overwrite=True)
78 | assert simple_task.is_complete()
79 |
80 |
81 | def test_task_overwrite(simple_task):
82 | simple_task.save()
83 | simple_task.parameters['new_parameter'] = 0
84 | with pytest.raises(analysistask.AnalysisAlreadyExistsException):
85 | simple_task.save()
86 |
--------------------------------------------------------------------------------
/test/test_dataorganization.py:
--------------------------------------------------------------------------------
1 | import os
2 | import numpy as np
3 |
4 | from merlin.data import dataorganization
5 |
6 |
7 | def test_dataorganization_get_channels(simple_merfish_data):
8 | assert np.array_equal(
9 | simple_merfish_data.get_data_organization().get_data_channels(),
10 | np.arange(18))
11 |
12 |
13 | def test_dataorganization_get_channel_name(simple_merfish_data):
14 | for i in range(16):
15 | assert simple_merfish_data.get_data_organization()\
16 | .get_data_channel_name(i) == 'bit' + str(i+1)
17 |
18 | assert simple_merfish_data.get_data_organization()\
19 | .get_data_channel_name(16) == 'DAPI'
20 | assert simple_merfish_data.get_data_organization()\
21 | .get_data_channel_name(17) == 'polyT'
22 |
23 |
24 | def test_dataorganization_get_channel_index(simple_merfish_data):
25 | for i in range(16):
26 | assert simple_merfish_data.get_data_organization() \
27 | .get_data_channel_index('bit' + str(i+1)) == i
28 |
29 | assert simple_merfish_data.get_data_organization() \
30 | .get_data_channel_index('DAPI') == 16
31 | assert simple_merfish_data.get_data_organization() \
32 | .get_data_channel_index('polyT') == 17
33 |
34 |
35 | def test_dataorganization_get_fovs(simple_merfish_data):
36 | assert np.array_equal(
37 | simple_merfish_data.get_data_organization().get_fovs(),
38 | np.arange(2))
39 |
40 |
41 | def test_dataorganization_get_z_positions(simple_merfish_data):
42 | assert np.array_equal(
43 | simple_merfish_data.get_data_organization().get_z_positions(),
44 | np.array([0]))
45 |
46 |
47 | def test_dataorganization_get_fiducial_information(simple_merfish_data):
48 | data = simple_merfish_data.get_data_organization()
49 | for d in data.get_data_channels():
50 | assert data.get_fiducial_frame_index(d) == 2
51 | assert os.path.normpath(data.get_fiducial_filename(0, 0)) \
52 | == os.path.normpath(
53 | os.path.abspath('test_data/merfish_test/test_0_0.tif'))
54 | assert os.path.normpath(data.get_fiducial_filename(0, 1)) \
55 | == os.path.normpath(
56 | os.path.abspath('test_data/merfish_test/test_1_0.tif'))
57 | assert os.path.normpath(data.get_fiducial_filename(1, 1)) \
58 | == os.path.normpath(
59 | os.path.abspath('test_data/merfish_test/test_1_0.tif'))
60 | assert os.path.normpath(data.get_fiducial_filename(2, 1)) \
61 | == os.path.normpath(
62 | os.path.abspath('test_data/merfish_test/test_1_1.tif'))
63 |
64 |
65 | def test_dataorganization_get_image_information(simple_merfish_data):
66 | data = simple_merfish_data.get_data_organization()
67 | assert data.get_image_frame_index(0, 0) == 1
68 | assert data.get_image_frame_index(1, 0) == 0
69 | assert data.get_image_frame_index(16, 0) == 3
70 | assert os.path.normpath(data.get_image_filename(0, 0)) \
71 | == os.path.normpath(
72 | os.path.abspath('test_data/merfish_test/test_0_0.tif'))
73 | assert os.path.normpath(data.get_image_filename(0, 1)) \
74 | == os.path.normpath(
75 | os.path.abspath('test_data/merfish_test/test_1_0.tif'))
76 | assert os.path.normpath(data.get_image_filename(1, 1)) \
77 | == os.path.normpath(
78 | os.path.abspath('test_data/merfish_test/test_1_0.tif'))
79 | assert os.path.normpath(data.get_image_filename(2, 1)) \
80 | == os.path.normpath(
81 | os.path.abspath('test_data/merfish_test/test_1_1.tif'))
82 |
83 |
84 | def test_dataorganization_load_from_dataset(simple_merfish_data):
85 | originalOrganization = simple_merfish_data.get_data_organization()
86 | loadedOrganization = dataorganization.DataOrganization(simple_merfish_data)
87 |
88 | assert np.array_equal(
89 | originalOrganization.get_data_channels(),
90 | loadedOrganization.get_data_channels())
91 | assert np.array_equal(
92 | originalOrganization.get_fovs(), loadedOrganization.get_fovs())
93 | assert np.array_equal(
94 | originalOrganization.get_z_positions(),
95 | loadedOrganization.get_z_positions())
96 |
97 | for channel in originalOrganization.get_data_channels():
98 | assert originalOrganization.get_data_channel_name(channel) \
99 | == loadedOrganization.get_data_channel_name(channel)
100 | assert originalOrganization.get_fiducial_frame_index(channel) \
101 | == loadedOrganization.get_fiducial_frame_index(channel)
102 |
103 | for fov in originalOrganization.get_fovs():
104 | assert originalOrganization.get_fiducial_filename(channel, fov) \
105 | == loadedOrganization.get_fiducial_filename(channel, fov)
106 | assert originalOrganization.get_image_filename(channel, fov) \
107 | == loadedOrganization.get_image_filename(channel, fov)
108 |
109 | for z in originalOrganization.get_z_positions():
110 | assert originalOrganization.get_image_frame_index(channel, z) \
111 | == loadedOrganization.get_image_frame_index(channel, z)
112 |
113 |
114 | def test_dataorganization_get_sequential_rounds(simple_merfish_data):
115 | dataOrganization = simple_merfish_data.get_data_organization()
116 | sequentialRounds, sequentialChannels = \
117 | dataOrganization.get_sequential_rounds()
118 |
119 | assert sequentialRounds == [16, 17]
120 | assert sequentialChannels == ['DAPI', 'polyT']
121 |
122 |
123 | def test_dataorganization_get_sequential_rounds_two_codebooks(
124 | two_codebook_merfish_data):
125 | dataOrganization = two_codebook_merfish_data.get_data_organization()
126 | sequentialRounds, sequentialChannels = \
127 | dataOrganization.get_sequential_rounds()
128 |
129 | assert sequentialRounds == [16, 17]
130 |
--------------------------------------------------------------------------------
/test/test_dataportal.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import shutil
3 | import tempfile
4 | import os
5 | import numpy as np
6 | from botocore import UNSIGNED
7 | from botocore.client import Config
8 | from google.auth.credentials import AnonymousCredentials
9 |
10 | from merlin.util import dataportal
11 |
12 |
13 | def local_data_portal():
14 | tempPath = tempfile.mkdtemp()
15 | with open(os.path.join(tempPath, 'test.txt'), 'w') as f:
16 | f.write('MERlin test file')
17 | with open(os.path.join(tempPath, 'test.bin'), 'wb') as f:
18 | f.write(np.array([0, 1, 2], dtype='uint16').tobytes())
19 |
20 | yield dataportal.LocalDataPortal(tempPath)
21 |
22 | shutil.rmtree(tempPath)
23 |
24 |
25 | def s3_data_portal():
26 | yield dataportal.S3DataPortal('s3://merlin-test-bucket-vg/test-files',
27 | region_name='us-east-2',
28 | config=Config(signature_version=UNSIGNED))
29 |
30 |
31 | def gcloud_data_portal():
32 | yield dataportal.GCloudDataPortal('gc://merlin-test-bucket/test-files',
33 | project='merlin-253419',
34 | credentials=AnonymousCredentials())
35 |
36 |
37 | @pytest.fixture(scope='function', params=[
38 | local_data_portal, s3_data_portal, gcloud_data_portal])
39 | def data_portal(request):
40 | yield next(request.param())
41 |
42 |
43 | def test_portal_list_files(data_portal):
44 | # filter out directory blob for google cloud
45 | fileList = [x for x in data_portal.list_files() if not x.endswith('/')]
46 | filteredList = data_portal.list_files(extensionList='.txt')
47 | assert len(fileList) == 2
48 | assert any([x.endswith('test.txt') for x in fileList])
49 | assert any([x.endswith('test.bin') for x in fileList])
50 | assert len(filteredList) == 1
51 | assert filteredList[0].endswith('test.txt')
52 |
53 |
54 | def test_portal_available(data_portal):
55 | assert data_portal.is_available()
56 |
57 |
58 | def test_portal_read(data_portal):
59 | textFile = data_portal.open_file('test.txt')
60 | binFile = data_portal.open_file('test.bin')
61 | assert textFile.exists()
62 | assert binFile.exists()
63 | assert textFile.read_as_text() == 'MERlin test file'
64 | assert np.array_equal(
65 | np.frombuffer(binFile.read_file_bytes(0, 6), dtype='uint16'),
66 | np.array([0, 1, 2], dtype='uint16'))
67 | assert np.array_equal(
68 | np.frombuffer(binFile.read_file_bytes(2, 4), dtype='uint16'),
69 | np.array([1], dtype='uint16'))
70 |
71 |
72 | def test_exchange_extension(data_portal):
73 | textFile = data_portal.open_file('test.txt')
74 | assert textFile.get_file_extension() == '.txt'
75 | assert textFile.read_as_text() == 'MERlin test file'
76 | binFile = textFile.get_sibling_with_extension('.bin')
77 | assert binFile.get_file_extension() == '.bin'
78 | assert np.array_equal(
79 | np.frombuffer(binFile.read_file_bytes(0, 6), dtype='uint16'),
80 | np.array([0, 1, 2], dtype='uint16'))
81 |
--------------------------------------------------------------------------------
/test/test_dataset.py:
--------------------------------------------------------------------------------
1 | def test_get_analysis_tasks(simple_data, simple_task):
2 | assert len(simple_data.get_analysis_tasks()) == 0
3 | simple_task.save()
4 | assert len(simple_data.get_analysis_tasks()) == 1
5 | assert simple_data.get_analysis_tasks()[0]\
6 | == simple_task.get_analysis_name()
7 |
--------------------------------------------------------------------------------
/test/test_decon.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import numpy as np
3 | import random
4 |
5 | import merlin.util.deconvolve as deconvolve
6 | import merlin.util.matlab as matlab
7 |
8 |
9 | decon_sigma = 2
10 | decon_filter_size = 9
11 |
12 |
13 | def decon_diff(image, gt_image):
14 | on_gt = np.sum(image[(gt_image > 0)])
15 | off_gt = np.sum(image[gt_image == 0])
16 |
17 | return (on_gt/(on_gt + off_gt))
18 |
19 |
20 | def make_image():
21 | # Always make the same image.
22 | random.seed(42)
23 |
24 | # Ground truth.
25 | gt_image = np.zeros((100, 150))
26 | for i in range(40):
27 | x = random.randint(5, 95)
28 | y = random.randint(5, 145)
29 | gt_image[x, y] = random.randint(10, 50)
30 |
31 | [pf, pb] = deconvolve.calculate_projectors(64, decon_sigma)
32 | image = cv2.filter2D(gt_image, -1, pf, borderType=cv2.BORDER_REPLICATE)
33 |
34 | return [image, gt_image]
35 |
36 |
37 | def test_deconvolve_lucyrichardson():
38 | [image, gt_image] = make_image()
39 |
40 | d1 = decon_diff(image, gt_image)
41 | d_image = deconvolve.deconvolve_lucyrichardson(image,
42 | decon_filter_size,
43 | decon_sigma,
44 | 20)
45 | d2 = decon_diff(d_image, gt_image)
46 | print(d1, d2)
47 |
48 | assert (d2 > d1)
49 |
50 |
51 | def test_deconvolve_lucyrichardson_guo():
52 | [image, gt_image] = make_image()
53 |
54 | d1 = decon_diff(image, gt_image)
55 | d_image = deconvolve.deconvolve_lucyrichardson_guo(image,
56 | decon_filter_size,
57 | decon_sigma,
58 | 2)
59 | d2 = decon_diff(d_image, gt_image)
60 | print(d1, d2)
61 |
62 | assert (d2 > d1)
63 |
--------------------------------------------------------------------------------
/test/test_image_reader.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import os
3 |
4 | import merlin
5 | from merlin.util import imagereader
6 | from merlin.util import dataportal
7 |
8 | root = os.path.join(os.path.dirname(merlin.__file__), '..', 'test')
9 |
10 |
11 | def test_read_dax():
12 | print(root)
13 | dataPortal = dataportal.LocalDataPortal(
14 | os.path.join(root, 'auxiliary_files'))
15 | daxPortal = dataPortal.open_file('test.dax')
16 | daxReader = imagereader.infer_reader(daxPortal)
17 | frame0 = daxReader.load_frame(0)
18 | frame5 = daxReader.load_frame(5)
19 | frame9 = daxReader.load_frame(9)
20 |
21 | assert daxReader.number_frames == 10
22 | assert daxReader.image_height == 256
23 | assert daxReader.image_width == 256
24 | assert frame0.shape == (256, 256)
25 | assert frame5.shape == (256, 256)
26 | assert frame0[0, 0] == 144
27 | assert frame5[0, 0] == 156
28 | assert np.sum(frame0) == 10459722
29 | assert np.sum(frame5) == 10460240
30 |
--------------------------------------------------------------------------------
/test/test_merfish.py:
--------------------------------------------------------------------------------
1 | import os
2 | import pytest
3 |
4 | import merlin
5 | from merlin import merlin as m
6 |
7 |
8 | @pytest.mark.fullrun
9 | @pytest.mark.slowtest
10 | def test_merfish_2d_full_local(simple_merfish_data):
11 | with open(os.sep.join([merlin.ANALYSIS_PARAMETERS_HOME,
12 | 'test_analysis_parameters.json']), 'r') as f:
13 | snakefilePath = m.generate_analysis_tasks_and_snakefile(
14 | simple_merfish_data, f)
15 | m.run_with_snakemake(simple_merfish_data, snakefilePath, 5)
16 |
--------------------------------------------------------------------------------
/test/test_plotting.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from merlin.analysis import testtask
4 | from merlin.plots import testplots
5 | from merlin import plots
6 |
7 |
8 | def test_metadata(simple_merfish_data):
9 | randomTask = testtask.RandomNumberParallelAnalysisTask(simple_merfish_data)
10 | randomMetadata = testplots.TestPlotMetadata(randomTask,
11 | {'test_task': randomTask})
12 | assert not randomTask.is_complete()
13 | assert not randomMetadata.is_complete()
14 | assert randomMetadata.metadata_name() == 'testplots/TestPlotMetadata'
15 |
16 | for i in range(randomTask.fragment_count()-1):
17 | randomTask.run(i)
18 | randomMetadata.update()
19 | assert not randomTask.is_complete()
20 | assert not randomMetadata.is_complete()
21 |
22 | randomTask.run(randomTask.fragment_count()-1)
23 | randomMetadata.update()
24 | assert np.isclose(
25 | randomMetadata.get_mean_values(),
26 | np.array([np.mean(randomTask.get_random_result(i))
27 | for i in range(randomTask.fragment_count())])).all()
28 | assert randomTask.is_complete()
29 | assert randomMetadata.is_complete()
30 | simple_merfish_data.delete_analysis(randomTask)
31 |
32 |
33 | def test_plotengine(simple_merfish_data):
34 | randomTask = testtask.RandomNumberParallelAnalysisTask(simple_merfish_data)
35 | assert not randomTask.is_complete()
36 |
37 | plotEngine = plots.PlotEngine(randomTask, {'test_task': randomTask})
38 | assert len(plotEngine.get_plots()) == 1
39 | assert not plotEngine.take_step()
40 | randomTask.run(0)
41 | assert not plotEngine.take_step()
42 |
43 | for i in range(1, randomTask.fragment_count()):
44 | randomTask.run(i)
45 | assert plotEngine.take_step()
46 | assert plotEngine.get_plots()[0].is_complete()
47 |
48 | simple_merfish_data.delete_analysis(randomTask)
49 |
--------------------------------------------------------------------------------
/test/test_snakemake.py:
--------------------------------------------------------------------------------
1 | import snakemake
2 | import os
3 | import shutil
4 |
5 | from merlin.util import snakewriter
6 |
7 |
8 | def test_run_single_task(simple_merfish_task):
9 | simple_merfish_task.save()
10 | assert not simple_merfish_task.is_complete()
11 | snakeRule = snakewriter.SnakemakeRule(simple_merfish_task)
12 | with open('temp.Snakefile', 'w') as outFile:
13 | outFile.write('rule all: \n\tinput: '
14 | + snakeRule.full_output() + '\n\n')
15 | outFile.write(snakeRule.as_string())
16 |
17 | snakemake.snakemake('temp.Snakefile')
18 | os.remove('temp.Snakefile')
19 | shutil.rmtree('.snakemake')
20 |
21 | assert simple_merfish_task.is_complete()
22 |
23 |
24 | def test_snakemake_generator_one_task(simple_merfish_data):
25 | taskDict = {'analysis_tasks': [
26 | {'task': 'SimpleAnalysisTask',
27 | 'module': 'merlin.analysis.testtask',
28 | 'parameters': {}}
29 | ]}
30 |
31 | generator = snakewriter.SnakefileGenerator(taskDict, simple_merfish_data)
32 | workflow = generator.generate_workflow()
33 | outputTask = simple_merfish_data.load_analysis_task('SimpleAnalysisTask')
34 | assert not outputTask.is_complete()
35 | snakemake.snakemake(workflow)
36 | assert outputTask.is_complete()
37 |
38 | shutil.rmtree('.snakemake')
39 |
40 |
41 | def test_snakemake_generator_task_chain(simple_merfish_data):
42 | taskDict = {'analysis_tasks': [
43 | {'task': 'SimpleAnalysisTask',
44 | 'module': 'merlin.analysis.testtask',
45 | 'analysis_name': 'Task1',
46 | 'parameters': {}},
47 | {'task': 'SimpleParallelAnalysisTask',
48 | 'module': 'merlin.analysis.testtask',
49 | 'analysis_name': 'Task2',
50 | 'parameters': {'dependencies': ['Task1']}},
51 | {'task': 'SimpleParallelAnalysisTask',
52 | 'module': 'merlin.analysis.testtask',
53 | 'analysis_name': 'Task3',
54 | 'parameters': {'dependencies': ['Task2']}}
55 | ]}
56 |
57 | generator = snakewriter.SnakefileGenerator(taskDict, simple_merfish_data)
58 | workflow = generator.generate_workflow()
59 | outputTask1 = simple_merfish_data.load_analysis_task('Task1')
60 | outputTask2 = simple_merfish_data.load_analysis_task('Task2')
61 | outputTask3 = simple_merfish_data.load_analysis_task('Task3')
62 | assert not outputTask1.is_complete()
63 | assert not outputTask2.is_complete()
64 | assert not outputTask3.is_complete()
65 | snakemake.snakemake(workflow)
66 | assert outputTask1.is_complete()
67 | assert outputTask2.is_complete()
68 | assert outputTask3.is_complete()
69 |
70 | shutil.rmtree('.snakemake')
71 |
--------------------------------------------------------------------------------
/test/test_zplane_duplicate_removal.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import random
3 | import numpy as np
4 | from merlin.util import barcodefilters
5 |
6 |
7 | def generate_barcode(fov, barcode_id, x, y, z, mean_intensity):
8 | bc = {'barcode': random.getrandbits(32),
9 | 'barcode_id': barcode_id,
10 | 'fov': fov,
11 | 'mean_intensity': mean_intensity,
12 | 'max_intensity': random.uniform(5, 15),
13 | 'area': random.randint(0, 10),
14 | 'mean_distance': random.random(),
15 | 'min_distance': random.random(),
16 | 'x': x,
17 | 'y': y,
18 | 'z': z,
19 | 'global_x': random.uniform(0, 200000),
20 | 'global_y': random.uniform(0, 200000),
21 | 'global_z': random.uniform(0, 5),
22 | 'cell_index': random.randint(0, 5000)}
23 |
24 | for i in range(16):
25 | bc['intensity_' + str(i)] = random.uniform(5, 15)
26 |
27 | return bc
28 |
29 |
30 | b1 = generate_barcode(100, 5, 402.21, 787.11, 2, 14.23)
31 | b2 = generate_barcode(100, 5, 502.21, 687.11, 3, 12.23)
32 | b3 = generate_barcode(100, 17, 402.21, 787.11, 2, 10.23)
33 |
34 | b1_above_dimmer = generate_barcode(100, 5, 402.21, 787.11, 3, 11.23)
35 | b1_closeby_above_brighter = generate_barcode(100, 5, 403.21, 787.11, 3, 15.23)
36 | b2_above_brighter = generate_barcode(100, 5, 502.31, 687.11, 4, 14.23)
37 | b1_closeby_below_brighter = generate_barcode(100, 5, 403.21, 787.11, 1, 15.0)
38 | b1_closeby_toofar_brighter = generate_barcode(100, 5, 403.21, 787.11, 0, 15.0)
39 |
40 |
41 | def test_multiple_comparisons_barcodes():
42 | zplane_cutoff = 1
43 | xy_cutoff = np.sqrt(2)
44 | zpositions = [0, 1.5, 3, 4.5, 6, 7.5, 9]
45 |
46 | bcSet = [b1, b2, b3, b1_above_dimmer, b1_closeby_above_brighter,
47 | b2_above_brighter, b1_closeby_below_brighter,
48 | b1_closeby_toofar_brighter]
49 | bcDF = pd.DataFrame(bcSet)
50 | expected = [x['barcode'] for x in
51 | [b1_closeby_above_brighter, b2_above_brighter, b3]]
52 | notExpected = [x['barcode'] for x in [b1, b2, b1_above_dimmer,
53 | b1_closeby_below_brighter,
54 | b1_closeby_toofar_brighter]]
55 |
56 | keptBC = barcodefilters.remove_zplane_duplicates_all_barcodeids(
57 | bcDF, zplane_cutoff, xy_cutoff, zpositions)
58 | for ex in expected:
59 | assert ex in keptBC['barcode'].values
60 | for notEx in notExpected:
61 | assert notEx not in keptBC['barcode'].values
62 |
63 |
64 | def test_all_compatible_barcodes():
65 | zplane_cutoff = 1
66 | xy_cutoff = np.sqrt(2)
67 | zpositions = [0, 1.5, 3, 4.5, 6, 7.5, 9]
68 |
69 | bcSet = [b1, b2, b3, b1_closeby_toofar_brighter]
70 | bcDF = pd.DataFrame(bcSet)
71 | expected = [x['barcode'] for x in bcSet]
72 | keptBC = barcodefilters.remove_zplane_duplicates_all_barcodeids(
73 | bcDF, zplane_cutoff, xy_cutoff, zpositions)
74 | for ex in expected:
75 | assert ex in keptBC['barcode'].values
76 | assert len(keptBC) == len(bcSet)
77 |
78 |
79 | def test_farther_zrange():
80 | zplane_cutoff = 2
81 | xy_cutoff = np.sqrt(2)
82 | zpositions = [0, 1.5, 3, 4.5, 6, 7.5, 9]
83 |
84 | bcSet = [b1, b2, b3, b1_closeby_toofar_brighter]
85 | bcDF = pd.DataFrame(bcSet)
86 | expected = [x['barcode'] for x in [b2, b3, b1_closeby_toofar_brighter]]
87 | notExpected = [x['barcode'] for x in [b1]]
88 | keptBC = barcodefilters.remove_zplane_duplicates_all_barcodeids(
89 | bcDF, zplane_cutoff, xy_cutoff, zpositions)
90 | for ex in expected:
91 | assert ex in keptBC['barcode'].values
92 | for notEx in notExpected:
93 | assert notEx not in keptBC['barcode'].values
94 |
95 |
96 | def test_farther_xyrange():
97 | zplane_cutoff = 1
98 | xy_cutoff = np.sqrt(20001)
99 | zpositions = [0, 1.5, 3, 4.5, 6, 7.5, 9]
100 |
101 | bcSet = [b1, b2, b3]
102 | bcDF = pd.DataFrame(bcSet)
103 | expected = [x['barcode'] for x in [b1, b3]]
104 | notExpected = [x['barcode'] for x in [b2]]
105 | keptBC = barcodefilters.remove_zplane_duplicates_all_barcodeids(
106 | bcDF, zplane_cutoff, xy_cutoff, zpositions)
107 | for ex in expected:
108 | assert ex in keptBC['barcode'].values
109 | for notEx in notExpected:
110 | assert notEx not in keptBC['barcode'].values
111 |
112 |
113 | def test_empty_barcodes():
114 | zplane_cutoff = 1
115 | xy_cutoff = np.sqrt(2)
116 | zpositions = [0, 1.5, 3, 4.5, 6, 7.5, 9]
117 |
118 | bcDF = pd.DataFrame([b1])
119 | bcDF.drop(0, inplace=True)
120 |
121 | keptBC = barcodefilters.remove_zplane_duplicates_all_barcodeids(
122 | bcDF, zplane_cutoff, xy_cutoff, zpositions)
123 | assert type(keptBC) == pd.DataFrame
124 |
--------------------------------------------------------------------------------