├── .circleci └── config.yml ├── .gitattributes ├── .gitignore ├── .pep8speaks.yml ├── CHANGELOG.md ├── README.md ├── codecov.yml ├── docs ├── Makefile ├── _autosummary │ ├── merlin.analysis.rst │ ├── merlin.core.rst │ ├── merlin.util.rst │ └── merlin.view.rst ├── _modules │ ├── bokeh.rst │ ├── merlin.analysis.rst │ ├── merlin.core.rst │ ├── merlin.rst │ ├── merlin.util.rst │ ├── merlin.view.rst │ ├── merlin.view.widgets.rst │ ├── modules.rst │ └── setup.rst ├── _static │ └── merlin_headline.png ├── api.rst ├── conf.py ├── contributing.rst ├── index.rst ├── installation.rst ├── make.bat ├── modules.rst ├── tasks.rst └── usage.rst ├── license.md ├── merlin ├── __init__.py ├── __main__.py ├── analysis │ ├── __init__.py │ ├── decode.py │ ├── exportbarcodes.py │ ├── filterbarcodes.py │ ├── generatemosaic.py │ ├── globalalign.py │ ├── optimize.py │ ├── partition.py │ ├── plotperformance.py │ ├── preprocess.py │ ├── segment.py │ ├── sequential.py │ ├── slurmreport.py │ ├── testtask.py │ └── warp.py ├── core │ ├── __init__.py │ ├── analysistask.py │ ├── dataset.py │ └── executor.py ├── data │ ├── __init__.py │ ├── codebook.py │ └── dataorganization.py ├── ext │ └── default.mplstyle ├── merlin.py ├── plots │ ├── __init__.py │ ├── _base.py │ ├── decodeplots.py │ ├── filterplots.py │ ├── optimizationplots.py │ ├── segmentationplots.py │ └── testplots.py ├── util │ ├── __init__.py │ ├── aberration.py │ ├── barcodedb.py │ ├── barcodefilters.py │ ├── binary.py │ ├── dataportal.py │ ├── decoding.py │ ├── deconvolve.py │ ├── imagefilters.py │ ├── imagereader.py │ ├── legacy.py │ ├── matlab.py │ ├── registration.py │ ├── simulator.py │ ├── snakewriter.py │ ├── spatialfeature.py │ └── watershed.py └── view │ ├── __init__.py │ ├── __main__.py │ ├── merlinview.py │ └── widgets │ ├── __init__.py │ └── regionview.py ├── requirements.txt ├── setup.py └── test ├── auxiliary_files ├── test.dax ├── test.inf ├── test_0_0.tif ├── test_0_1.tif ├── test_0_2.tif ├── test_0_3.tif ├── test_0_4.tif ├── test_0_5.tif ├── test_0_6.tif ├── test_0_7.tif ├── test_1_0.tif ├── test_1_1.tif ├── test_1_2.tif ├── test_1_3.tif ├── test_1_4.tif ├── test_1_5.tif ├── test_1_6.tif ├── test_1_7.tif ├── test_analysis_parameters.json ├── test_codebook.csv ├── test_codebook2.csv ├── test_data_organization.csv ├── test_microscope_parameters.json └── test_positions.csv ├── conftest.py ├── pytest.ini ├── test_barcode_database.py ├── test_binary_utils.py ├── test_codebook.py ├── test_core.py ├── test_dataorganization.py ├── test_dataportal.py ├── test_dataset.py ├── test_decon.py ├── test_image_reader.py ├── test_merfish.py ├── test_plotting.py ├── test_snakemake.py ├── test_spatialfeature.py └── test_zplane_duplicate_removal.py /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | # Python CircleCI 2.1 configuration file 2 | version: 2.1 3 | jobs: 4 | build: 5 | docker: 6 | - image: circleci/python:3.6.9 7 | 8 | working_directory: ~/MERlin 9 | 10 | steps: 11 | # Step 1: obtain repo from GitHub 12 | - checkout 13 | # Step 2: create virtual env and install dependencies 14 | - run: 15 | name: Install Dependencies 16 | command: | 17 | wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh 18 | bash Miniconda3-latest-Linux-x86_64.sh -b -p $HOME/miniconda 19 | source ~/miniconda/bin/activate root 20 | conda activate base 21 | conda config --set always_yes true 22 | conda config --set quiet true 23 | conda create -n merlin_env python=3.6 24 | source activate merlin_env 25 | conda install rtree 26 | conda install pytables 27 | cd ~ 28 | printf 'DATA_HOME=~\nANALYSIS_HOME=~\nPARAMETERS_HOME=~\n' >.merlinenv 29 | pip install -e MERlin 30 | cd ~/MERlin 31 | # Step 3: run linter and tests 32 | - run: 33 | name: Run Tests 34 | command: | 35 | source ~/miniconda/bin/activate root 36 | conda activate base 37 | source activate merlin_env 38 | cd ~/MERlin 39 | mkdir ~/test-reports 40 | pytest --cov --cov-report=xml 41 | 42 | - run: 43 | name: Upload Coverage to Codecov 44 | command: | 45 | source ~/miniconda/bin/activate root 46 | cd ~/MERlin 47 | pip install codecov && codecov 48 | - store_artifacts: 49 | path: ~/MERlin/coverage.xml 50 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | * text=auto 2 | 3 | *.py text 4 | *.rst text 5 | *.bat text 6 | *.json text 7 | *.csv text 8 | 9 | *.tif binary 10 | 11 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .env 2 | .idea 3 | *.swo 4 | *.swp 5 | *.py[cod] 6 | *.egg-info/ 7 | __pycache__/ 8 | .ipynb_checkpoints/ 9 | 10 | pip-log.txt 11 | pip-delete-this-directory.txt 12 | -------------------------------------------------------------------------------- /.pep8speaks.yml: -------------------------------------------------------------------------------- 1 | scanner: 2 | diff_only: True # If False, the entire file touched by the Pull Request is scanned for errors. If True, only the diff is scanned. 3 | linter: pycodestyle # Alternative option - flake8 4 | 5 | pycodestyle: # Valid if scanner.linter is pycodestyle 6 | max-line-length: 80 7 | ignore: [] # Errors and warnings to ignore 8 | exclude: [] # File path patterns to exclude 9 | count: False 10 | first: False 11 | show-pep8: True 12 | show-source: False 13 | statistics: False 14 | hang-closing: False 15 | filename: [] 16 | select: [] 17 | 18 | flake8: # Valid if scanner.linter is flake8 19 | max-line-length: 79 20 | ignore: [] 21 | exclude: [] 22 | count: False 23 | show-source: False 24 | statistics: False 25 | hang-closing: False 26 | filename: [] 27 | select: [] 28 | 29 | no_blank_comment: True # If True, no comment is made on PR without any errors. 30 | descending_issues_order: False # If True, PEP 8 issues in message will be displayed in descending order of line numbers in the file 31 | only_mention_files_with_errors: True # If False, a separate status section for each file is made in the comment. 32 | 33 | message: # Customize the comment made by the bot 34 | opened: # Messages when a new PR is submitted 35 | header: "Hello @{name}! Thanks for opening this PR." 36 | footer: "" 37 | updated: # Messages when a PR is updated 38 | header: "Hello @{name}! Thanks for updating this PR." 39 | footer: "" 40 | no_errors: "There are currently no PEP 8 issues detected in this Pull Request. Cheers! :beers: " 41 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | All notable changes to this project will be documented in this file. 3 | 4 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), 5 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). 6 | 7 | ## [0.1.0] - 2019-09-30 8 | ### Added 9 | - Iniatialization of this CHANGELOG file to track changes as the version increments 10 | 11 | ## [0.1.1] - 2019-10-03 12 | ### Fixed 13 | - Fixed bug in sum signal 14 | 15 | ## [0.1.2] - 2019-10-16 16 | ### Added 17 | - Exposed tolerance parameter in the adaptive filter barcodes method 18 | - Added plot for scale factor magnitude vs bit index 19 | - Fixed barcode partitioning to include cells from adjacent fields of view when a cell falls across fov boundaries 20 | 21 | ## [0.1.3] - 2019-12-04 22 | ### Fixed 23 | - Addressed bugs present in cleaning overlapping cells and assigning them to a fov 24 | ### Added 25 | - Added option to draw field of view labels overlaid on the mosaic 26 | 27 | ## [0.1.4] - 2019-12-05 28 | ### Added 29 | - Added task to evaluate whether a parallel analysis task has completed 30 | ### Changed 31 | - Changed the clean overlapping cells to run in parallel 32 | - Snakemake job inputs were simplified using the ParallelCompleteTask to improve DAG construction speed and overall snakemake runtime performance 33 | 34 | ## [0.1.5] - 2020-01-22 35 | ### Changed 36 | - Updated the filemap to only store the file name so that it can easily be pointed to new data home directories. This change maintains backward compatibility. 37 | - Improved decoding speed 38 | ### Added 39 | - Parameters to filter tasks that enable removing barcodes that were putatively duplicated across adjacent z planes. 40 | 41 | ## [0.1.6] - 42 | ### Fixed 43 | - Fixed bug and edge cases in removal of barcodes duplicated across z planes. Moved to the decode step to prevent unintended conflict with misidentification rate determination. 44 | 45 | ### Added 46 | - An alternative Lucy-Richardson deconvolution approach that requires ~10x fewer iterations. 47 | 48 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![CircleCI](https://circleci.com/gh/emanuega/MERlin/tree/master.svg?style=svg)](https://circleci.com/gh/emanuega/MERlin/tree/master) 2 | [![codecov](https://codecov.io/gh/emanuega/MERlin/branch/master/graph/badge.svg)](https://codecov.io/gh/emanuega/MERlin) 3 | [![DOI](https://zenodo.org/badge/202668055.svg)](https://zenodo.org/badge/latestdoi/202668055) 4 | 5 | # MERlin - Extensible pipeline for scalable data analysis 6 | 7 | MERlin is an extensible data analysis pipeline for reproducible and scalable analysis of large 8 | datasets. Each MERlin workflow consists of a set of analysis tasks, each of which can be run as 9 | single task or split among many subtasks that can be executed in parallel. MERlin is able to 10 | execute workflows on a single computer, on a high performance cluster, or on the cloud 11 | (AWS and Google Cloud). 12 | 13 | If MERlin is useful for your research, consider citing: 14 | Emanuel, G., Eichhorn, S. W., Zhuang, X. 2020, MERlin - scalable and extensible MERFISH analysis software, v0.1.6, Zenodo, doi:10.5281/zenodo.3758540 15 | 16 | Please find the most recent version of MERlin [here](https://github.com/emanuega/merlin). 17 | 18 | ## MERFISH data analysis 19 | 20 | ![Image of MERlin](docs/_static/merlin_headline.png "MERlin - the MERFISH decoding software") 21 | 22 | MERlin was originally created for decoding MERFISH datasets. 23 | [MERFISH](https://science.sciencemag.org/lookup/doi/10.1126/science.aaa6090) is a technology for 24 | spatially resolved RNA profiling of 10s to 10,000s of RNA species in individual cells 25 | with high accuracy and high detection efficiency. The standard MERlin MERFISH analysis 26 | workflow decodes and segments MERFISH datasets to determine RNA molecules and the 27 | cell boundaries represented in the raw images. 28 | 29 | ## Documentation 30 | 31 | For more information on installation and usage, please see the [documentation](https://emanuega.github.io/MERlin/). 32 | 33 | ## Authors 34 | 35 | * [**George Emanuel**](mailto:emanuega0@gmail.com) - *Initial work* 36 | * **Stephen Eichhorn** 37 | * **Leonardo Sepulveda** 38 | 39 | Contributions are welcome! Please see the 40 | [documentation](https://emanuega.github.io/MERlin/contributing.html) for contribution guidelines. 41 | 42 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | ignore: 2 | - "**/test/*.py" 3 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SOURCEDIR = . 8 | BUILDDIR = ../../MERlin-docs 9 | 10 | # Put it first so that "make" without argument is like "make help". 11 | help: 12 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 13 | 14 | .PHONY: help Makefile 15 | 16 | # Catch-all target: route all unknown targets to Sphinx using the new 17 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 18 | %: Makefile 19 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 20 | 21 | -------------------------------------------------------------------------------- /docs/_autosummary/merlin.analysis.rst: -------------------------------------------------------------------------------- 1 | merlin.analysis 2 | =============== 3 | 4 | .. automodule:: merlin.analysis 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /docs/_autosummary/merlin.core.rst: -------------------------------------------------------------------------------- 1 | merlin.core 2 | =========== 3 | 4 | .. automodule:: merlin.core 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /docs/_autosummary/merlin.util.rst: -------------------------------------------------------------------------------- 1 | merlin.util 2 | =========== 3 | 4 | .. automodule:: merlin.util 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /docs/_autosummary/merlin.view.rst: -------------------------------------------------------------------------------- 1 | merlin.view 2 | =========== 3 | 4 | .. automodule:: merlin.view 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /docs/_modules/bokeh.rst: -------------------------------------------------------------------------------- 1 | bokeh module 2 | ============ 3 | 4 | .. automodule:: bokeh 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/_modules/merlin.analysis.rst: -------------------------------------------------------------------------------- 1 | merlin.analysis package 2 | ======================= 3 | 4 | Submodules 5 | ---------- 6 | 7 | merlin.analysis.decode module 8 | ----------------------------- 9 | 10 | .. automodule:: merlin.analysis.decode 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | merlin.analysis.exportbarcodes module 16 | ------------------------------------- 17 | 18 | .. automodule:: merlin.analysis.exportbarcodes 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | merlin.analysis.filterbarcodes module 24 | ------------------------------------- 25 | 26 | .. automodule:: merlin.analysis.filterbarcodes 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | merlin.analysis.generatemosaic module 32 | ------------------------------------- 33 | 34 | .. automodule:: merlin.analysis.generatemosaic 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | 39 | merlin.analysis.globalalign module 40 | ---------------------------------- 41 | 42 | .. automodule:: merlin.analysis.globalalign 43 | :members: 44 | :undoc-members: 45 | :show-inheritance: 46 | 47 | merlin.analysis.optimize module 48 | ------------------------------- 49 | 50 | .. automodule:: merlin.analysis.optimize 51 | :members: 52 | :undoc-members: 53 | :show-inheritance: 54 | 55 | merlin.analysis.plotperformance module 56 | -------------------------------------- 57 | 58 | .. automodule:: merlin.analysis.plotperformance 59 | :members: 60 | :undoc-members: 61 | :show-inheritance: 62 | 63 | merlin.analysis.preprocess module 64 | --------------------------------- 65 | 66 | .. automodule:: merlin.analysis.preprocess 67 | :members: 68 | :undoc-members: 69 | :show-inheritance: 70 | 71 | merlin.analysis.segment module 72 | ------------------------------ 73 | 74 | .. automodule:: merlin.analysis.segment 75 | :members: 76 | :undoc-members: 77 | :show-inheritance: 78 | 79 | merlin.analysis.warp module 80 | --------------------------- 81 | 82 | .. automodule:: merlin.analysis.warp 83 | :members: 84 | :undoc-members: 85 | :show-inheritance: 86 | 87 | 88 | Module contents 89 | --------------- 90 | 91 | .. automodule:: merlin.analysis 92 | :members: 93 | :undoc-members: 94 | :show-inheritance: 95 | -------------------------------------------------------------------------------- /docs/_modules/merlin.core.rst: -------------------------------------------------------------------------------- 1 | merlin.core package 2 | =================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | merlin.core.analysistask module 8 | ------------------------------- 9 | 10 | .. automodule:: merlin.core.analysistask 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | merlin.core.dataset module 16 | -------------------------- 17 | 18 | .. automodule:: merlin.core.dataset 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | merlin.core.executor module 24 | --------------------------- 25 | 26 | .. automodule:: merlin.core.executor 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | merlin.core.scheduler module 32 | ---------------------------- 33 | 34 | .. automodule:: merlin.core.scheduler 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | 39 | 40 | Module contents 41 | --------------- 42 | 43 | .. automodule:: merlin.core 44 | :members: 45 | :undoc-members: 46 | :show-inheritance: 47 | -------------------------------------------------------------------------------- /docs/_modules/merlin.rst: -------------------------------------------------------------------------------- 1 | merlin package 2 | ============== 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | 9 | merlin.analysis 10 | merlin.core 11 | merlin.util 12 | merlin.view 13 | 14 | Submodules 15 | ---------- 16 | 17 | merlin.merlin module 18 | -------------------- 19 | 20 | .. automodule:: merlin.merlin 21 | :members: 22 | :undoc-members: 23 | :show-inheritance: 24 | 25 | 26 | Module contents 27 | --------------- 28 | 29 | .. automodule:: merlin 30 | :members: 31 | :undoc-members: 32 | :show-inheritance: 33 | -------------------------------------------------------------------------------- /docs/_modules/merlin.util.rst: -------------------------------------------------------------------------------- 1 | merlin.util package 2 | =================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | merlin.util.barcodedb module 8 | ---------------------------- 9 | 10 | .. automodule:: merlin.util.barcodedb 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | merlin.util.binary module 16 | ------------------------- 17 | 18 | .. automodule:: merlin.util.binary 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | merlin.util.decoding module 24 | --------------------------- 25 | 26 | .. automodule:: merlin.util.decoding 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | 32 | Module contents 33 | --------------- 34 | 35 | .. automodule:: merlin.util 36 | :members: 37 | :undoc-members: 38 | :show-inheritance: 39 | -------------------------------------------------------------------------------- /docs/_modules/merlin.view.rst: -------------------------------------------------------------------------------- 1 | merlin.view package 2 | =================== 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | 9 | merlin.view.widgets 10 | 11 | Submodules 12 | ---------- 13 | 14 | merlin.view.merlinview module 15 | ----------------------------- 16 | 17 | .. automodule:: merlin.view.merlinview 18 | :members: 19 | :undoc-members: 20 | :show-inheritance: 21 | 22 | 23 | Module contents 24 | --------------- 25 | 26 | .. automodule:: merlin.view 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | -------------------------------------------------------------------------------- /docs/_modules/merlin.view.widgets.rst: -------------------------------------------------------------------------------- 1 | merlin.view.widgets package 2 | =========================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | merlin.view.widgets.regionview module 8 | ------------------------------------- 9 | 10 | .. automodule:: merlin.view.widgets.regionview 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | 16 | Module contents 17 | --------------- 18 | 19 | .. automodule:: merlin.view.widgets 20 | :members: 21 | :undoc-members: 22 | :show-inheritance: 23 | -------------------------------------------------------------------------------- /docs/_modules/modules.rst: -------------------------------------------------------------------------------- 1 | MERlin 2 | ====== 3 | 4 | .. toctree:: 5 | :maxdepth: 4 6 | 7 | bokeh 8 | merlin 9 | setup 10 | -------------------------------------------------------------------------------- /docs/_modules/setup.rst: -------------------------------------------------------------------------------- 1 | setup module 2 | ============ 3 | 4 | .. automodule:: setup 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/_static/merlin_headline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/emanuega/MERlin/be3c994ef8fa97fbd3afb85705d8ddbed12118cf/docs/_static/merlin_headline.png -------------------------------------------------------------------------------- /docs/api.rst: -------------------------------------------------------------------------------- 1 | API 2 | **** 3 | 4 | .. autosummary:: 5 | :toctree: _autosummary 6 | 7 | merlin.core 8 | merlin.analysis 9 | merlin.util 10 | merlin.view 11 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Configuration file for the Sphinx documentation builder. 4 | # 5 | # This file does only contain a selection of the most common options. For a 6 | # full list see the documentation: 7 | # http://www.sphinx-doc.org/en/master/config 8 | 9 | # -- Path setup -------------------------------------------------------------- 10 | 11 | # If extensions (or modules to document with autodoc) are in another directory, 12 | # add these directories to sys.path here. If the directory is relative to the 13 | # documentation root, use os.path.abspath to make it absolute, like shown here. 14 | 15 | import os 16 | import sys 17 | dir_, _ = os.path.split(__file__) 18 | root_dir = os.path.abspath(os.path.join(dir_, '..', '..')) 19 | sys.path.insert(0, root_dir) 20 | 21 | 22 | # -- Project information ----------------------------------------------------- 23 | 24 | project = 'MERlin' 25 | copyright = '2018, George Emanuel' 26 | author = 'George Emanuel' 27 | 28 | # The short X.Y version 29 | version = '' 30 | # The full version, including alpha/beta/rc tags 31 | release = '' 32 | 33 | 34 | # -- General configuration --------------------------------------------------- 35 | 36 | # If your documentation needs a minimal Sphinx version, state it here. 37 | # 38 | # needs_sphinx = '1.0' 39 | 40 | # Add any Sphinx extension module names here, as strings. They can be 41 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 42 | # ones. 43 | extensions = [ 44 | 'sphinx.ext.autodoc', 45 | 'sphinx.ext.coverage', 46 | 'sphinx.ext.viewcode', 47 | 'sphinx.ext.githubpages', 48 | 'sphinx.ext.autosummary', 49 | 'sphinx.ext.napoleon', 50 | ] 51 | 52 | 53 | # Include Python objects as they appear in source files 54 | autodoc_member_order = 'bysource' 55 | # Default flags used by autodoc directives 56 | autodoc_default_flags = ['members', 'show-inheritance'] 57 | # Generate autodoc stubs with summaries from code 58 | autosummary_generate = True 59 | 60 | # Add any paths that contain templates here, relative to this directory. 61 | templates_path = ['_templates'] 62 | 63 | # The suffix(es) of source filenames. 64 | # You can specify multiple suffix as a list of string: 65 | # 66 | # source_suffix = ['.rst', '.md'] 67 | source_suffix = '.rst' 68 | 69 | # The master toctree document. 70 | master_doc = 'index' 71 | 72 | # The language for content autogenerated by Sphinx. Refer to documentation 73 | # for a list of supported languages. 74 | # 75 | # This is also used if you do content translation via gettext catalogs. 76 | # Usually you set "language" from the command line for these cases. 77 | language = None 78 | 79 | # List of patterns, relative to source directory, that match files and 80 | # directories to ignore when looking for source files. 81 | # This pattern also affects html_static_path and html_extra_path. 82 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 83 | 84 | # The name of the Pygments (syntax highlighting) style to use. 85 | pygments_style = 'sphinx' 86 | 87 | 88 | # -- Options for HTML output ------------------------------------------------- 89 | 90 | # The theme to use for HTML and HTML Help pages. See the documentation for 91 | # a list of builtin themes. 92 | # 93 | html_theme = 'sphinx_rtd_theme' 94 | 95 | # Theme options are theme-specific and customize the look and feel of a theme 96 | # further. For a list of options available for each theme, see the 97 | # documentation. 98 | # 99 | # html_theme_options = {} 100 | 101 | # Add any paths that contain custom static files (such as style sheets) here, 102 | # relative to this directory. They are copied after the builtin static files, 103 | # so a file named "default.css" will overwrite the builtin "default.css". 104 | html_static_path = ['_static'] 105 | 106 | # Custom sidebar templates, must be a dictionary that maps document names 107 | # to template names. 108 | # 109 | # The default sidebars (for documents that don't match any pattern) are 110 | # defined by theme itself. Builtin themes are using these templates by 111 | # default: ``['localtoc.html', 'relations.html', 'sourcelink.html', 112 | # 'searchbox.html']``. 113 | # 114 | # html_sidebars = {} 115 | 116 | 117 | # -- Options for HTMLHelp output --------------------------------------------- 118 | 119 | # Output file base name for HTML help builder. 120 | htmlhelp_basename = 'MERlindoc' 121 | 122 | 123 | # -- Options for LaTeX output ------------------------------------------------ 124 | 125 | latex_elements = { 126 | # The paper size ('letterpaper' or 'a4paper'). 127 | # 128 | # 'papersize': 'letterpaper', 129 | 130 | # The font size ('10pt', '11pt' or '12pt'). 131 | # 132 | # 'pointsize': '10pt', 133 | 134 | # Additional stuff for the LaTeX preamble. 135 | # 136 | # 'preamble': '', 137 | 138 | # Latex figure (float) alignment 139 | # 140 | # 'figure_align': 'htbp', 141 | } 142 | 143 | # Grouping the document tree into LaTeX files. List of tuples 144 | # (source start file, target name, title, 145 | # author, documentclass [howto, manual, or own class]). 146 | latex_documents = [ 147 | (master_doc, 'MERlin.tex', 'MERlin Documentation', 148 | 'George Emanuel', 'manual'), 149 | ] 150 | 151 | 152 | # -- Options for manual page output ------------------------------------------ 153 | 154 | # One entry per manual page. List of tuples 155 | # (source start file, name, description, authors, manual section). 156 | man_pages = [ 157 | (master_doc, 'merlin', 'MERlin Documentation', 158 | [author], 1) 159 | ] 160 | 161 | 162 | # -- Options for Texinfo output ---------------------------------------------- 163 | 164 | # Grouping the document tree into Texinfo files. List of tuples 165 | # (source start file, target name, title, author, 166 | # dir menu entry, description, category) 167 | texinfo_documents = [ 168 | (master_doc, 'MERlin', 'MERlin Documentation', 169 | author, 'MERlin', 'One line description of project.', 170 | 'Miscellaneous'), 171 | ] 172 | 173 | 174 | # -- Options for Epub output ------------------------------------------------- 175 | 176 | # Bibliographic Dublin Core info. 177 | epub_title = project 178 | 179 | # The unique identifier of the text. This can be a ISBN number 180 | # or the project homepage. 181 | # 182 | # epub_identifier = '' 183 | 184 | # A unique identification for the text. 185 | # 186 | # epub_uid = '' 187 | 188 | # A list of files that should not be packed into the epub file. 189 | epub_exclude_files = ['search.html'] 190 | 191 | 192 | # -- Extension configuration ------------------------------------------------- 193 | 194 | # -- Options for intersphinx extension --------------------------------------- 195 | 196 | # Example configuration for intersphinx: refer to the Python standard library. 197 | intersphinx_mapping = {'https://docs.python.org/': None} 198 | 199 | # Napoleon settings 200 | napoleon_google_docstring = True 201 | napoleon_numpy_docstring = True 202 | napoleon_include_init_with_doc = False 203 | napoleon_include_private_with_doc = False 204 | napoleon_include_special_with_doc = True 205 | napoleon_use_admonition_for_examples = False 206 | napoleon_use_admonition_for_notes = False 207 | napoleon_use_admonition_for_references = False 208 | napoleon_use_ivar = False 209 | napoleon_use_param = True 210 | napoleon_use_rtype = True 211 | -------------------------------------------------------------------------------- /docs/contributing.rst: -------------------------------------------------------------------------------- 1 | Contributing to MERlin 2 | ************************ 3 | 4 | Contributions to MERlin can either be submitted by opening an issue to raise concerns or offer suggestions or by opening a pull request to offer improvements to the code base. 5 | 6 | Opening a pull request 7 | ======================== 8 | 9 | A pull request allows code to be proposed to be incorporated into MERlin. To receive feedback on work in progress, mark the pull request with WIP in the subject line. To open a pull request: 10 | 11 | #. Fork the repository to your github account and clone it locally. 12 | #. Create a new branch for your edits. 13 | #. Make your desired edits to the code. 14 | #. Run the tests to ensure MERlin is still functional. Write new tests to cover your new contribution as necessary. 15 | #. Submit a pull request from your edited branch to the latest vx.y.z (for example v0.1.4) branch of the MERlin repository representing the version of the next release. 16 | Be sure to reference any relevant issues and request at least one reviewer. Periodically the vx.y.z branch will be merged with the master branch. 17 | 18 | Code formatting 19 | =============== 20 | 21 | Code contributions should follow the `PEP 8 `_ style guide with the 22 | exception that variable names should be mixedCase instead of words separated by underscores. Comments should follow 23 | the `Google docstring style `_. 24 | 25 | Running the tests 26 | ================= 27 | 28 | All contributions to MERlin must maintain the integrity of the tests. Before submitting a pull request, please ensure 29 | that all tests pass. Tests are implemented using the pytest_ framework. The tests are in the test directory and they 30 | can be run by executing pytest in the root MERlin directory. To facilitate efficient debugging, tests that take more 31 | than few seconds are marked with ```slowtest``` and can be excluded from the run using the command: 32 | 33 | .. _pytest: https://docs.pytest.org/ 34 | 35 | .. code-block:: none 36 | 37 | pytest -v test 38 | 39 | Generating documentation 40 | ============================= 41 | 42 | Documentation for MERlin is generated using Sphinx. The API documentation can be generated with the command from the root MERlin directory: 43 | 44 | .. code-block:: none 45 | 46 | sphinx-apidoc -f -o ./docs/_modules . 47 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. MERlin documentation master file, created by 2 | sphinx-quickstart on Mon Dec 3 17:31:12 2018. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | MERlin 7 | ****** 8 | 9 | .. toctree:: 10 | :maxdepth: 2 11 | 12 | installation 13 | usage 14 | tasks 15 | contributing 16 | api 17 | 18 | Indices and tables 19 | ================== 20 | 21 | * :ref:`genindex` 22 | * :ref:`modindex` 23 | * :ref:`search` 24 | -------------------------------------------------------------------------------- /docs/installation.rst: -------------------------------------------------------------------------------- 1 | Installation 2 | ************** 3 | 4 | Set up a virtual environment 5 | ============================= 6 | 7 | To ensure that Merlin and its dependencies don't interfere with other packages that are installed, we recommend that you install MERlin in a new virtual environment. MERlin requires python 3.6 or above. 8 | 9 | An anaconda virtual environment can be created using the command: 10 | 11 | .. code-block:: none 12 | 13 | conda create -n merlin_env python=3.6 14 | 15 | and the new environment can be activated using the command: 16 | 17 | .. code-block:: none 18 | 19 | conda activate merlin_env 20 | 21 | or 22 | 23 | .. code-block:: none 24 | 25 | source activate merlin_env 26 | 27 | Installing prerequisites 28 | ========================== 29 | 30 | The packages rtree and pytables are not properly installed by pip and should be installed independently. For example, using Anaconda: 31 | 32 | .. code-block:: none 33 | 34 | conda install rtree pytables 35 | 36 | On Harvard research computing, matplotlib raises an error saying that 'CXXABI_1.3.9' is not found. This can be corrected by loading the gcc module: 37 | 38 | .. code-block:: none 39 | 40 | module load gcc/8.2.0-fasrc01 41 | 42 | Installing MERlin 43 | ================== 44 | 45 | MERlin can be installed by cloning the repository and installing with pip: 46 | 47 | .. code-block:: none 48 | 49 | git clone https://github.com/emanuega/MERlin 50 | 51 | .. code-block:: none 52 | 53 | pip install -e MERlin 54 | 55 | 56 | .. _specifying-paths: 57 | 58 | Specifying paths with a .env file 59 | ================================== 60 | 61 | A .merlinenv file is required to specify the search locations for the various input and output files. The following variables should be defined in a file named .merlinenv in the user home directory (~\\.merlinenv on linux or C:\\users\\UserName\\.merlinenv on Windows): 62 | 63 | * DATA\_HOME - The path of the root directory to the raw data. 64 | * ANALYSIS\_HOME - The path of the root directory where analysis results should be stored. 65 | * PARAMETERS\_HOME - The path to the directory where the merfish-parameters directory resides. 66 | 67 | The PARAMETERS_HOME directory should contain the following folders: 68 | 69 | * analysis - Contains the analysis parameters json files. 70 | * codebooks - Contains the codebook csv files. 71 | * dataorganization - Contains the data organization csv files. 72 | * positions - Contains the position csv files. 73 | * microscope - Contains the microscope parameters json files. 74 | * fpkm - Contains the fpkm csv files. 75 | * snakemake - Contains the snakemake arguments json files. 76 | 77 | An example PARAMETERS_HOME directory with typical files can be found in the 78 | `merlin-parameters-example `_ repository. 79 | 80 | The contents of an example .merlinenv file are below: 81 | 82 | .. code-block:: none 83 | 84 | DATA_HOME=D:/data 85 | ANALYSIS_HOME=D:/analysis 86 | PARAMETERS_HOME=D:/merfish-parameters 87 | 88 | Merlin can create a .merlinenv file for you using the command: 89 | 90 | .. code-blocks:: none 91 | 92 | merlin --configure . 93 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=..\..\MERlin-docs 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 33 | 34 | :end 35 | 36 | cd ..\..\MERlin-docs\html 37 | git add . 38 | git commit -m "rebuilt docs" 39 | git push origin gh-pages 40 | 41 | popd 42 | 43 | 44 | 45 | -------------------------------------------------------------------------------- /docs/modules.rst: -------------------------------------------------------------------------------- 1 | Project Modules 2 | =============== 3 | 4 | This page contains the list of project's modules 5 | 6 | .. autosummary:: 7 | :toctree: _autosummary 8 | 9 | merlin.core 10 | -------------------------------------------------------------------------------- /docs/usage.rst: -------------------------------------------------------------------------------- 1 | Usage 2 | ****** 3 | 4 | MERlin organizes files into three folders, specified in the .merlinenv file (see :ref:`specifying-paths`). The three folders are: 5 | 6 | DATA_HOME – base directory for raw data 7 | ANALYSIS_HOME – base directory for the analysis results 8 | PARAMETERS_HOME – base directory for parameters 9 | 10 | MERlin reads raw data, such as images, from DATA_HOME and configuration parameters from PARAMETERS_HOME and writes 11 | analysis results into ANALYSIS_HOME. Each separate experiment should be a separated folder within DATAHOME and MERlin 12 | will create a corresponding folder in ANALYSIS_HOME. For example, the images for “experiment1” should be stored in the 13 | folder %DATA_HOME%/experiment1. When merlin runs, it will save the output files in %ANALYSIS_HOME%/experiment1. With 14 | this file system, %DATA_HOME% and %ANALYSIS_HOME% are constant for all the experiments you analyze and only the 15 | experiment name needs to be specified. The typical file structure for MERFISH experiments 16 | "experiment1" and "experiment2" could be: 17 | 18 | .. code-block:: none 19 | 20 | %DATA_HOME%/ 21 | ├── experiment1/ 22 | │ ├── image_000_00.tif 23 | │ ├── image_000_01.tif 24 | │ ├── ... 25 | │ └── image_150_10.tif 26 | └── experiment2/ 27 | ├── image_000_00.tif 28 | ├── image_000_01.tif 29 | ├── ... 30 | └── image_150_10.tif 31 | %PARAMETERS_HOME%/ 32 | ├── analysis/ 33 | │ └── analysis_parameters.json 34 | ├── codebooks/ 35 | │ └── codebook.csv 36 | ├── dataorganization/ 37 | │ └── dataorganization.csv 38 | ├── microscope/ 39 | │ └── microscope_parameters.json 40 | ├── positions/ 41 | │ └── positions.csv 42 | └── snakemake/ 43 | └── snakeconfig.json 44 | %ANALYSIS_HOME%/ 45 | ├── experiment1/ 46 | │ ├── FiducialCorrelationWarp 47 | │ ├── DeconvolutionPreprocess 48 | │ ├── ... 49 | │ └── PlotPerformance 50 | └── experiment2/ 51 | ├── FiducialCorrelationWarp 52 | ├── DeconvolutionPreprocess 53 | ├── ... 54 | └── PlotPerformance 55 | 56 | 57 | Input specifications 58 | ===================== 59 | 60 | Raw images 61 | ----------- 62 | 63 | All raw images should be indicated in the same folder, as discussed above, and there should be a separated image 64 | file for each imaging round and each field of view. MERlin is able to read both tiff stacks and dax image files. The 65 | exact file name is specified by a regular expression in the data organization file (imageRegExp and fiducialRegExp). 66 | For example, you can specify the regular expression (?P[\w|-]+)_(?P[0-9]+)_(?P[0-9]+) 67 | for the image filenames specified below. This indicates that the first part of the file is the imageType (the value in 68 | the imageType column of the data organization file), followed by the fov index, followed by imagingRound index, all 69 | separated by an underscore. 70 | 71 | Data organization 72 | ------------------ 73 | 74 | The data organization file specifies which the images correspond to each readout. The data organization file is a csv file. The first row is a header with column names and each following row designates one readout. The information provided for each readout indicates where to find the corresponding images in the raw image data and how to find the corresponding fiducial image to align the images between rounds. 75 | 76 | The columns in the data organization file are: 77 | 78 | - channelName - The name of the data channel. For genes measured sequential, this can be set as the gene name. 79 | - readoutName - The name of the readout sequence used to measure this channel. 80 | - imageType - The base name for the image file that contains the images for this readout, for example, ``Conventional_750_650_561_488_405`` 81 | - imageRegExp - A regular expression specifying how image file names are constructed for each field of view and 82 | each imaging round. The parameters used in the regular expression are ``imageType``, ``fov``, and ``imagingRound``, 83 | for example: ``(?P[\w|-]+)_(?P[0-9]+)_(?P[0-9]+)``. Here, ``imageType`` specifies the 84 | string indicated in the ``imageType`` column for the corresponding row, ``imagingRound`` specifies the designated 85 | ``imagingRound`` for the corresponding row, and ``fov`` is filled with all field of view indexes in the data set. The 86 | imageRegExp should not include the file extension, which will be determined automatically. 87 | - bitNumber - The bit number corresponding to this readout. 88 | - imagingRound - The round of imaging where this readout is measured, starting from zero. 89 | - color - The illumination color that is used to measure this readout. 90 | - frame - The zero indexed frame or frames in the image file where images corresponding to this readout can be found. For a single frame, a single integer can be provided. For multiple frames, the frames can be provided as a list as ``[0, 1, 2, 3, 4, 5, 6]`` 91 | - zPos - The z position for each of the frames specified in the previous column. For only a single frame, the z position should be provided as a decimal number while for multiple frames a list should be provided as for frame. 92 | - fiducialImageType - The base name for the image file that contains the fiducial images for aligning images this readout, for example, ``Conventional_750_650_561_488_405`` 93 | - fiducialRegExp - A regular expression specifying how file names are constructed for the fiducial image files. This regex follows the same format as ``imageRegExp``. 94 | - fiducialImagingRound - The imaging round (zero indexed) corresponding to the fiducial images for aligning images for this readout. 95 | - fiducialFrame - The frame index in the fiducial image file where the fiducial frame can be found. 96 | - fiducialColor - The illumination color that is used to measure the fiducials. 97 | 98 | Codebook 99 | ---------- 100 | 101 | The codebook specifies . Barcodes corresponding to blanks must have "blank" in their name. 102 | 103 | Position list 104 | -------------- 105 | 106 | The position list is a csv file containing a list of positions for each imaged region. The i'th row in the file should be 107 | coordinates of the i'th field of view. Each position is specified by the x position and the y position, separated by a comma. 108 | 109 | The name of a position csv file that within the POSITION\_HOME directory can be provided, otherwise the positions are 110 | extracted from the image metadata xml. 111 | 112 | Microscope parameters 113 | ----------------------- 114 | 115 | Microscope parameters specify properties specific to the image acquisition. The microscope parameter file should be placed in the MICROSCOPE_PARAMETERS_HOME directory. The parameters that can be set are: 116 | 117 | - microns_per_pixel - the number of microns corresponding to one pixel in the image. 118 | - flip_horizontal - flag indicating whether the images should be flipped horizontally in order to align with neighboring images. 119 | - flip_vertical - flag indicating whether the images should be flipped vertically in order to align with neighboring images. 120 | - transpose - flag indicating whether the images should be transposed in order to align with neighboring images. 121 | 122 | 123 | Executing locally 124 | =================== 125 | 126 | After installation, MERlin can be run from the command line with the input parameters specified, such as: 127 | 128 | .. code-block:: none 129 | 130 | merlin -a test_decode_and_segment.json -m microscope.json -o 7z_16bits.csv -c L26E1_codebook.csv -n 5 testdata 131 | 132 | Here the MERFISH images contained in the directory `%DATA\_HOME%/testdata/` are processed using the analysis tasks listed in `test\_analysis\_parameters.json` with microscope parameters `STORM5.json`, data organization `Culture\_16bits.csv`, codebook `L26E1_codebook.csv` using 5 cores for each process. 133 | 134 | Executing on a high performance cluster 135 | ===================================================== 136 | 137 | MERlin executes tasks through Snakemake_, a workflow management system. Each task can be distributed over a high performance 138 | cluster that is run by a scheduler, such as SLURM or Sge, by indicating the appropriate command to submit the job to snakemake. 139 | See the `merlin-parameters-example `_ repository for an example snakemake 140 | configuration file. Additional arguments can be specified as indicated in the 141 | `snakemake api documentation `_. 142 | 143 | .. code-block:: none 144 | 145 | merlin -a test_decode_and_segment.json -m microscope.json -o 7z_16bits.csv -c L26E1_codebook.csv -k snake.json testdata 146 | 147 | .. _Snakemake: https://snakemake.readthedocs.io/en/stable/ 148 | 149 | -------------------------------------------------------------------------------- /license.md: -------------------------------------------------------------------------------- 1 | The MIT License 2 | 3 | Copyright (c) 2019 Harvard University 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /merlin/__init__.py: -------------------------------------------------------------------------------- 1 | import dotenv 2 | import os 3 | import glob 4 | import json 5 | import importlib 6 | from typing import List 7 | 8 | from merlin.core import dataset 9 | 10 | envPath = os.path.join(os.path.expanduser('~'), '.merlinenv') 11 | 12 | if os.path.exists(envPath): 13 | dotenv.load_dotenv(envPath) 14 | 15 | try: 16 | DATA_HOME = os.path.expanduser(os.environ.get('DATA_HOME')) 17 | ANALYSIS_HOME = os.path.expanduser(os.environ.get('ANALYSIS_HOME')) 18 | PARAMETERS_HOME = os.path.expanduser(os.environ.get('PARAMETERS_HOME')) 19 | ANALYSIS_PARAMETERS_HOME = os.sep.join( 20 | [PARAMETERS_HOME, 'analysis']) 21 | CODEBOOK_HOME = os.sep.join( 22 | [PARAMETERS_HOME, 'codebooks']) 23 | DATA_ORGANIZATION_HOME = os.sep.join( 24 | [PARAMETERS_HOME, 'dataorganization']) 25 | POSITION_HOME = os.sep.join( 26 | [PARAMETERS_HOME, 'positions']) 27 | MICROSCOPE_PARAMETERS_HOME = os.sep.join( 28 | [PARAMETERS_HOME, 'microscope']) 29 | FPKM_HOME = os.sep.join([PARAMETERS_HOME, 'fpkm']) 30 | SNAKEMAKE_PARAMETERS_HOME = os.sep.join( 31 | [PARAMETERS_HOME, 'snakemake']) 32 | 33 | except TypeError: 34 | print('MERlin environment appears corrupt. Please run ' + 35 | '\'merlin --configure .\' in order to configure the environment.') 36 | else: 37 | print(('Unable to find MERlin environment file at %s. Please run ' + 38 | '\'merlin --configure .\' in order to configure the environment.') 39 | % envPath) 40 | 41 | 42 | def store_env(dataHome, analysisHome, parametersHome): 43 | with open(envPath, 'w') as f: 44 | f.write('DATA_HOME=%s\n' % dataHome) 45 | f.write('ANALYSIS_HOME=%s\n' % analysisHome) 46 | f.write('PARAMETERS_HOME=%s\n' % parametersHome) 47 | 48 | 49 | class IncompatibleVersionException(Exception): 50 | pass 51 | 52 | 53 | def version(): 54 | import pkg_resources 55 | return pkg_resources.get_distribution('merlin').version 56 | 57 | 58 | def is_compatible(testVersion: str, baseVersion: str = None) -> bool: 59 | """ Determine if testVersion is compatible with baseVersion 60 | 61 | Args: 62 | testVersion: the version identifier to test, as the string 'x.y.z' 63 | where x is the major version, y is the minor version, 64 | and z is the patch. 65 | baseVersion: the version to check testVersion's compatibility. If not 66 | specified then the current MERlin version is used as baseVersion. 67 | Returns: True if testVersion are compatible, otherwise false. 68 | """ 69 | if baseVersion is None: 70 | baseVersion = version() 71 | return testVersion.split('.')[0] == baseVersion.split('.')[0] 72 | 73 | 74 | def get_analysis_datasets(maxDepth=2) -> List[dataset.DataSet]: 75 | """ Get a list of all datasets currently stored in analysis home. 76 | 77 | Args: 78 | maxDepth: the directory depth to search for datasets. 79 | Returns: A list of the dataset objects currently within analysis home. 80 | """ 81 | metadataFiles = [] 82 | for d in range(1, maxDepth+1): 83 | metadataFiles += glob.glob(os.path.join( 84 | ANALYSIS_HOME, *['*']*d, 'dataset.json')) 85 | 86 | def load_dataset(jsonPath) -> dataset.DataSet: 87 | with open(jsonPath, 'r') as f: 88 | metadata = json.load(f) 89 | analysisModule = importlib.import_module(metadata['module']) 90 | analysisTask = getattr(analysisModule, metadata['class']) 91 | return analysisTask(metadata['dataset_name']) 92 | 93 | return [load_dataset(m) for m in metadataFiles] 94 | -------------------------------------------------------------------------------- /merlin/__main__.py: -------------------------------------------------------------------------------- 1 | from .merlin import merlin 2 | 3 | merlin() 4 | -------------------------------------------------------------------------------- /merlin/analysis/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/emanuega/MERlin/be3c994ef8fa97fbd3afb85705d8ddbed12118cf/merlin/analysis/__init__.py -------------------------------------------------------------------------------- /merlin/analysis/exportbarcodes.py: -------------------------------------------------------------------------------- 1 | from merlin.core import analysistask 2 | 3 | 4 | class ExportBarcodes(analysistask.AnalysisTask): 5 | 6 | """ 7 | An analysis task that filters barcodes based on area and mean 8 | intensity. 9 | """ 10 | 11 | def __init__(self, dataSet, parameters=None, analysisName=None): 12 | super().__init__(dataSet, parameters, analysisName) 13 | 14 | if 'columns' not in self.parameters: 15 | self.parameters['columns'] = ['barcode_id', 'global_x', 16 | 'global_y', 'cell_index'] 17 | if 'exclude_blanks' not in self.parameters: 18 | self.parameters['exclude_blanks'] = True 19 | 20 | self.columns = self.parameters['columns'] 21 | self.excludeBlanks = self.parameters['exclude_blanks'] 22 | 23 | def get_estimated_memory(self): 24 | return 5000 25 | 26 | def get_estimated_time(self): 27 | return 30 28 | 29 | def get_dependencies(self): 30 | return [self.parameters['filter_task']] 31 | 32 | def _run_analysis(self): 33 | filterTask = self.dataSet.load_analysis_task( 34 | self.parameters['filter_task']) 35 | 36 | barcodeData = filterTask.get_barcode_database().get_barcodes( 37 | columnList=self.columns) 38 | 39 | if self.excludeBlanks: 40 | codebook = filterTask.get_codebook() 41 | barcodeData = barcodeData[ 42 | barcodeData['barcode_id'].isin( 43 | codebook.get_coding_indexes())] 44 | 45 | self.dataSet.save_dataframe_to_csv(barcodeData, 'barcodes', self, 46 | index=False) 47 | -------------------------------------------------------------------------------- /merlin/analysis/generatemosaic.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | from typing import Tuple 4 | 5 | from merlin.core import analysistask 6 | 7 | 8 | ExtentTuple = Tuple[float, float, float, float] 9 | 10 | 11 | class GenerateMosaic(analysistask.AnalysisTask): 12 | 13 | """ 14 | An analysis task that generates mosaic images by compiling different 15 | field of views. 16 | """ 17 | 18 | def __init__(self, dataSet, parameters=None, analysisName=None): 19 | super().__init__(dataSet, parameters, analysisName) 20 | 21 | if 'microns_per_pixel' not in self.parameters: 22 | self.parameters['microns_per_pixel'] = 3 23 | if 'fov_crop_width' not in self.parameters: 24 | self.parameters['fov_crop_width'] = 0 25 | if 'separate_files' not in self.parameters: 26 | self.parameters['separate_files'] = False 27 | if 'draw_fov_labels' not in self.parameters: 28 | self.parameters['draw_fov_labels'] = False 29 | 30 | if self.parameters['microns_per_pixel'] == 'full_resolution': 31 | self.mosaicMicronsPerPixel = self.dataSet.get_microns_per_pixel() 32 | else: 33 | self.mosaicMicronsPerPixel = self.parameters['microns_per_pixel'] 34 | 35 | def get_estimated_memory(self): 36 | return 10000 37 | 38 | def get_estimated_time(self): 39 | return 30 40 | 41 | def get_dependencies(self): 42 | return [self.parameters['global_align_task'], 43 | self.parameters['warp_task']] 44 | 45 | def get_mosaic(self) -> np.ndarray: 46 | """Get the mosaic generated by this analysis task. 47 | 48 | Returns: 49 | a 5-dimensional array containing the mosaic. The images are arranged 50 | as [channel, zIndex, 1, x, y]. The order of the channels is as 51 | specified in the provided parameters file or in the data 52 | organization if no data channels are specified. 53 | """ 54 | return self.dataSet.get_analysis_image_set(self, 'mosaic') 55 | 56 | def _micron_to_mosaic_pixel(self, micronCoordinates, 57 | micronExtents) -> np.ndarray: 58 | """Calculates the mosaic coordinates in pixels from the specified 59 | global coordinates. 60 | """ 61 | return np.matmul(self._micron_to_mosaic_transform(micronExtents), 62 | np.append(micronCoordinates, 1)).astype(np.int32)[:2] 63 | 64 | def _micron_to_mosaic_transform(self, micronExtents: ExtentTuple) \ 65 | -> np.ndarray: 66 | s = 1/self.mosaicMicronsPerPixel 67 | return np.float32( 68 | [[s*1, 0, -s*micronExtents[0]], 69 | [0, s*1, -s*micronExtents[1]], 70 | [0, 0, 1]]) 71 | 72 | def _transform_image_to_mosaic( 73 | self, inputImage: np.ndarray, fov: int, alignTask, 74 | micronExtents: ExtentTuple, mosaicDimensions: Tuple[int, int])\ 75 | -> np.ndarray: 76 | transform = \ 77 | np.matmul(self._micron_to_mosaic_transform(micronExtents), 78 | alignTask.fov_to_global_transform(fov)) 79 | return cv2.warpAffine( 80 | inputImage, transform[:2, :], mosaicDimensions) 81 | 82 | def _run_analysis(self): 83 | alignTask = self.dataSet.load_analysis_task( 84 | self.parameters['global_align_task']) 85 | micronExtents = alignTask.get_global_extent() 86 | self.dataSet.save_numpy_txt_analysis_result( 87 | self._micron_to_mosaic_transform(micronExtents), 88 | 'micron_to_mosaic_pixel_transform', self) 89 | 90 | dataOrganization = self.dataSet.get_data_organization() 91 | if 'data_channels' in self.parameters: 92 | if isinstance(self.parameters['data_channels'], str): 93 | dataChannels = [dataOrganization.get_data_channel_index( 94 | self.parameters['data_channels'])] 95 | elif isinstance(self.parameters['data_channels'], int): 96 | dataChannels = [self.parameters['data_channels']] 97 | else: 98 | dataChannels = [dataOrganization.get_data_channel_index(x) 99 | if isinstance(x, str) else x 100 | for x in self.parameters['data_channels']] 101 | else: 102 | dataChannels = dataOrganization.get_data_channels() 103 | 104 | maximumProjection = False 105 | if 'z_index' in self.parameters: 106 | if self.parameters['z_index'] != 'maximum_projection': 107 | zIndexes = [self.parameters['z_index']] 108 | else: 109 | maximumProjection = True 110 | zIndexes = [0] 111 | else: 112 | zIndexes = range(len(self.dataSet.get_z_positions())) 113 | 114 | if not self.parameters['separate_files']: 115 | imageDescription = self.dataSet.analysis_tiff_description( 116 | len(zIndexes), len(dataChannels)) 117 | with self.dataSet.writer_for_analysis_images( 118 | self, 'mosaic') as outputTif: 119 | for d in dataChannels: 120 | for z in zIndexes: 121 | mosaic = self._prepare_mosaic_slice( 122 | z, d, micronExtents, alignTask, maximumProjection) 123 | outputTif.save(mosaic, photometric='MINISBLACK', 124 | metadata=imageDescription) 125 | else: 126 | imageDescription = self.dataSet.analysis_tiff_description(1, 1) 127 | for d in dataChannels: 128 | for z in zIndexes: 129 | with self.dataSet.writer_for_analysis_images( 130 | self, 'mosaic_%s_%i' 131 | % (dataOrganization.get_data_channel_name(d), z))\ 132 | as outputTif: 133 | mosaic = self._prepare_mosaic_slice( 134 | z, d, micronExtents, alignTask, maximumProjection) 135 | outputTif.save(mosaic, photometric='MINISBLACK', 136 | metadata=imageDescription) 137 | 138 | def _prepare_mosaic_slice(self, zIndex, dataChannel, micronExtents, 139 | alignTask, maximumProjection): 140 | warpTask = self.dataSet.load_analysis_task( 141 | self.parameters['warp_task']) 142 | 143 | chromaticCorrector = None 144 | if 'optimize_task' in self.parameters: 145 | chromaticCorrector = self.dataSet.load_analysis_task( 146 | self.parameters['optimize_task']).get_chromatic_corrector() 147 | 148 | cropWidth = self.parameters['fov_crop_width'] 149 | mosaicDimensions = tuple(self._micron_to_mosaic_pixel( 150 | micronExtents[-2:], micronExtents)) 151 | 152 | mosaic = np.zeros(np.flip(mosaicDimensions, axis=0), dtype=np.uint16) 153 | 154 | for f in self.dataSet.get_fovs(): 155 | if maximumProjection: 156 | inputImage = np.max([warpTask.get_aligned_image( 157 | f, dataChannel, z, chromaticCorrector) 158 | for z in range(len(self.dataSet.get_z_positions()))], 159 | axis=0) 160 | else: 161 | inputImage = warpTask.get_aligned_image( 162 | f, dataChannel, zIndex, chromaticCorrector) 163 | 164 | if cropWidth > 0: 165 | inputImage[:cropWidth, :] = 0 166 | inputImage[inputImage.shape[0] - cropWidth:, :] = 0 167 | inputImage[:, :cropWidth] = 0 168 | inputImage[:, inputImage.shape[0] - cropWidth:] = 0 169 | 170 | if self.parameters['draw_fov_labels']: 171 | inputImage = cv2.putText(inputImage, str(f), 172 | (int(0.2*inputImage.shape[0]), 173 | int(0.2*inputImage.shape[1])), 174 | 0, 10, (65000, 65000, 65000), 20) 175 | 176 | transformedImage = self._transform_image_to_mosaic( 177 | inputImage, f, alignTask, micronExtents, 178 | mosaicDimensions) 179 | 180 | divisionMask = np.bitwise_and( 181 | transformedImage > 0, mosaic > 0) 182 | cv2.add(mosaic, transformedImage, dst=mosaic, 183 | mask=np.array( 184 | transformedImage > 0).astype(np.uint8)) 185 | dividedMosaic = cv2.divide(mosaic, 2) 186 | mosaic[divisionMask] = dividedMosaic[divisionMask] 187 | 188 | return mosaic 189 | -------------------------------------------------------------------------------- /merlin/analysis/partition.py: -------------------------------------------------------------------------------- 1 | import pandas 2 | import numpy as np 3 | 4 | from merlin.core import analysistask 5 | from merlin.util import spatialfeature 6 | 7 | class PartitionBarcodes(analysistask.ParallelAnalysisTask): 8 | 9 | """ 10 | An analysis task that assigns RNAs and sequential signals to cells 11 | based on the boundaries determined during the segment task. 12 | """ 13 | 14 | def __init__(self, dataSet, parameters=None, analysisName=None): 15 | super().__init__(dataSet, parameters, analysisName) 16 | 17 | def fragment_count(self): 18 | return len(self.dataSet.get_fovs()) 19 | 20 | def get_estimated_memory(self): 21 | return 2048 22 | 23 | def get_estimated_time(self): 24 | return 1 25 | 26 | def get_dependencies(self): 27 | return [self.parameters['filter_task'], 28 | self.parameters['assignment_task'], 29 | self.parameters['alignment_task']] 30 | 31 | def get_partitioned_barcodes(self, fov: int = None) -> pandas.DataFrame: 32 | """Retrieve the cell by barcode matrixes calculated from this 33 | analysis task. 34 | 35 | Args: 36 | fov: the fov to get the barcode table for. If not specified, the 37 | combined table for all fovs are returned. 38 | 39 | Returns: 40 | A pandas data frame containing the parsed barcode information. 41 | """ 42 | if fov is None: 43 | return pandas.concat( 44 | [self.get_partitioned_barcodes(fov) 45 | for fov in self.dataSet.get_fovs()] 46 | ) 47 | 48 | return self.dataSet.load_dataframe_from_csv( 49 | 'counts_per_cell', self.get_analysis_name(), fov, index_col=0) 50 | 51 | def _run_analysis(self, fragmentIndex): 52 | filterTask = self.dataSet.load_analysis_task( 53 | self.parameters['filter_task']) 54 | assignmentTask = self.dataSet.load_analysis_task( 55 | self.parameters['assignment_task']) 56 | alignTask = self.dataSet.load_analysis_task( 57 | self.parameters['alignment_task']) 58 | 59 | fovBoxes = alignTask.get_fov_boxes() 60 | fovIntersections = sorted([i for i, x in enumerate(fovBoxes) if 61 | fovBoxes[fragmentIndex].intersects(x)]) 62 | 63 | codebook = filterTask.get_codebook() 64 | barcodeCount = codebook.get_barcode_count() 65 | 66 | bcDB = filterTask.get_barcode_database() 67 | for fi in fovIntersections: 68 | partialBC = bcDB.get_barcodes(fi) 69 | if fi == fovIntersections[0]: 70 | currentFOVBarcodes = partialBC.copy(deep=True) 71 | else: 72 | currentFOVBarcodes = pandas.concat( 73 | [currentFOVBarcodes, partialBC], 0) 74 | 75 | currentFOVBarcodes = currentFOVBarcodes.reset_index().copy(deep=True) 76 | 77 | sDB = assignmentTask.get_feature_database() 78 | currentCells = sDB.read_features(fragmentIndex) 79 | 80 | countsDF = pandas.DataFrame( 81 | data=np.zeros((len(currentCells), barcodeCount)), 82 | columns=range(barcodeCount), 83 | index=[x.get_feature_id() for x in currentCells]) 84 | 85 | for cell in currentCells: 86 | contained = cell.contains_positions(currentFOVBarcodes.loc[:, 87 | ['global_x', 'global_y', 88 | 'z']].values) 89 | count = currentFOVBarcodes[contained].groupby('barcode_id').size() 90 | count = count.reindex(range(barcodeCount), fill_value=0) 91 | countsDF.loc[cell.get_feature_id(), :] = count.values.tolist() 92 | 93 | barcodeNames = [codebook.get_name_for_barcode_index(x) 94 | for x in countsDF.columns.values.tolist()] 95 | countsDF.columns = barcodeNames 96 | 97 | self.dataSet.save_dataframe_to_csv( 98 | countsDF, 'counts_per_cell', self.get_analysis_name(), 99 | fragmentIndex) 100 | 101 | 102 | class ExportPartitionedBarcodes(analysistask.AnalysisTask): 103 | 104 | """ 105 | An analysis task that combines counts per cells data from each 106 | field of view into a single output file. 107 | """ 108 | 109 | def __init__(self, dataSet, parameters=None, analysisName=None): 110 | super().__init__(dataSet, parameters, analysisName) 111 | 112 | def get_estimated_memory(self): 113 | return 2048 114 | 115 | def get_estimated_time(self): 116 | return 5 117 | 118 | def get_dependencies(self): 119 | return [self.parameters['partition_task']] 120 | 121 | def _run_analysis(self): 122 | pTask = self.dataSet.load_analysis_task( 123 | self.parameters['partition_task']) 124 | parsedBarcodes = pTask.get_partitioned_barcodes() 125 | 126 | self.dataSet.save_dataframe_to_csv( 127 | parsedBarcodes, 'barcodes_per_feature', 128 | self.get_analysis_name()) 129 | -------------------------------------------------------------------------------- /merlin/analysis/plotperformance.py: -------------------------------------------------------------------------------- 1 | import os 2 | from matplotlib import pyplot as plt 3 | import pandas 4 | import merlin 5 | import seaborn 6 | import numpy as np 7 | from typing import List 8 | from merlin.core import analysistask 9 | from merlin.analysis import filterbarcodes 10 | from random import sample 11 | import time 12 | 13 | from merlin import plots 14 | plt.style.use( 15 | os.sep.join([os.path.dirname(merlin.__file__), 16 | 'ext', 'default.mplstyle'])) 17 | 18 | 19 | class PlotPerformance(analysistask.AnalysisTask): 20 | 21 | """ 22 | An analysis task that generates plots depicting metrics of the MERFISH 23 | decoding. 24 | """ 25 | 26 | def __init__(self, dataSet, parameters=None, analysisName=None): 27 | super().__init__(dataSet, parameters, analysisName) 28 | 29 | if 'exclude_plots' in self.parameters: 30 | self.parameters['exclude_plots'] = [] 31 | 32 | self.taskTypes = ['decode_task', 'filter_task', 'optimize_task', 33 | 'segment_task', 'sum_task', 'partition_task', 34 | 'global_align_task'] 35 | 36 | def get_estimated_memory(self): 37 | return 30000 38 | 39 | def get_estimated_time(self): 40 | return 180 41 | 42 | def get_dependencies(self): 43 | return [] 44 | 45 | def _run_analysis(self): 46 | taskDict = {t: self.dataSet.load_analysis_task(self.parameters[t]) 47 | for t in self.taskTypes if t in self.parameters} 48 | plotEngine = plots.PlotEngine(self, taskDict) 49 | while not plotEngine.take_step(): 50 | pass 51 | -------------------------------------------------------------------------------- /merlin/analysis/preprocess.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import numpy as np 4 | 5 | from merlin.core import analysistask 6 | from merlin.util import deconvolve 7 | from merlin.util import aberration 8 | from merlin.util import imagefilters 9 | from merlin.data import codebook 10 | 11 | 12 | class Preprocess(analysistask.ParallelAnalysisTask): 13 | 14 | """ 15 | An abstract class for preparing data for barcode calling. 16 | """ 17 | 18 | def _image_name(self, fov): 19 | destPath = self.dataSet.get_analysis_subdirectory( 20 | self.analysisName, subdirectory='preprocessed_images') 21 | return os.sep.join([destPath, 'fov_' + str(fov) + '.tif']) 22 | 23 | def get_pixel_histogram(self, fov=None): 24 | if fov is not None: 25 | return self.dataSet.load_numpy_analysis_result( 26 | 'pixel_histogram', self.analysisName, fov, 'histograms') 27 | 28 | pixelHistogram = np.zeros(self.get_pixel_histogram( 29 | self.dataSet.get_fovs()[0]).shape) 30 | for f in self.dataSet.get_fovs(): 31 | pixelHistogram += self.get_pixel_histogram(f) 32 | 33 | return pixelHistogram 34 | 35 | def _save_pixel_histogram(self, histogram, fov): 36 | self.dataSet.save_numpy_analysis_result( 37 | histogram, 'pixel_histogram', self.analysisName, fov, 'histograms') 38 | 39 | 40 | class DeconvolutionPreprocess(Preprocess): 41 | 42 | def __init__(self, dataSet, parameters=None, analysisName=None): 43 | super().__init__(dataSet, parameters, analysisName) 44 | 45 | if 'highpass_sigma' not in self.parameters: 46 | self.parameters['highpass_sigma'] = 3 47 | if 'decon_sigma' not in self.parameters: 48 | self.parameters['decon_sigma'] = 2 49 | if 'decon_filter_size' not in self.parameters: 50 | self.parameters['decon_filter_size'] = \ 51 | int(2 * np.ceil(2 * self.parameters['decon_sigma']) + 1) 52 | if 'decon_iterations' not in self.parameters: 53 | self.parameters['decon_iterations'] = 20 54 | if 'codebook_index' not in self.parameters: 55 | self.parameters['codebook_index'] = 0 56 | 57 | self._highPassSigma = self.parameters['highpass_sigma'] 58 | self._deconSigma = self.parameters['decon_sigma'] 59 | self._deconIterations = self.parameters['decon_iterations'] 60 | 61 | self.warpTask = self.dataSet.load_analysis_task( 62 | self.parameters['warp_task']) 63 | 64 | def fragment_count(self): 65 | return len(self.dataSet.get_fovs()) 66 | 67 | def get_estimated_memory(self): 68 | return 2048 69 | 70 | def get_estimated_time(self): 71 | return 5 72 | 73 | def get_dependencies(self): 74 | return [self.parameters['warp_task']] 75 | 76 | def get_codebook(self) -> codebook.Codebook: 77 | return self.dataSet.get_codebook(self.parameters['codebook_index']) 78 | 79 | def get_processed_image_set( 80 | self, fov, zIndex: int = None, 81 | chromaticCorrector: aberration.ChromaticCorrector = None 82 | ) -> np.ndarray: 83 | if zIndex is None: 84 | return np.array([[self.get_processed_image( 85 | fov, self.dataSet.get_data_organization() 86 | .get_data_channel_for_bit(b), zIndex, chromaticCorrector) 87 | for zIndex in range(len(self.dataSet.get_z_positions()))] 88 | for b in self.get_codebook().get_bit_names()]) 89 | else: 90 | return np.array([self.get_processed_image( 91 | fov, self.dataSet.get_data_organization() 92 | .get_data_channel_for_bit(b), zIndex, chromaticCorrector) 93 | for b in self.get_codebook().get_bit_names()]) 94 | 95 | def get_processed_image( 96 | self, fov: int, dataChannel: int, zIndex: int, 97 | chromaticCorrector: aberration.ChromaticCorrector = None 98 | ) -> np.ndarray: 99 | inputImage = self.warpTask.get_aligned_image(fov, dataChannel, zIndex, 100 | chromaticCorrector) 101 | return self._preprocess_image(inputImage) 102 | 103 | def _high_pass_filter(self, inputImage: np.ndarray) -> np.ndarray: 104 | highPassFilterSize = int(2 * np.ceil(2 * self._highPassSigma) + 1) 105 | hpImage = imagefilters.high_pass_filter(inputImage, 106 | highPassFilterSize, 107 | self._highPassSigma) 108 | return hpImage.astype(np.float) 109 | 110 | def _run_analysis(self, fragmentIndex): 111 | warpTask = self.dataSet.load_analysis_task( 112 | self.parameters['warp_task']) 113 | 114 | histogramBins = np.arange(0, np.iinfo(np.uint16).max, 1) 115 | pixelHistogram = np.zeros( 116 | (self.get_codebook().get_bit_count(), len(histogramBins)-1)) 117 | 118 | # this currently only is to calculate the pixel histograms in order 119 | # to estimate the initial scale factors. This is likely unnecessary 120 | for bi, b in enumerate(self.get_codebook().get_bit_names()): 121 | dataChannel = self.dataSet.get_data_organization()\ 122 | .get_data_channel_for_bit(b) 123 | for i in range(len(self.dataSet.get_z_positions())): 124 | inputImage = warpTask.get_aligned_image( 125 | fragmentIndex, dataChannel, i) 126 | deconvolvedImage = self._preprocess_image(inputImage) 127 | 128 | pixelHistogram[bi, :] += np.histogram( 129 | deconvolvedImage, bins=histogramBins)[0] 130 | 131 | self._save_pixel_histogram(pixelHistogram, fragmentIndex) 132 | 133 | def _preprocess_image(self, inputImage: np.ndarray) -> np.ndarray: 134 | deconFilterSize = self.parameters['decon_filter_size'] 135 | 136 | filteredImage = self._high_pass_filter(inputImage) 137 | deconvolvedImage = deconvolve.deconvolve_lucyrichardson( 138 | filteredImage, deconFilterSize, self._deconSigma, 139 | self._deconIterations).astype(np.uint16) 140 | return deconvolvedImage 141 | 142 | 143 | class DeconvolutionPreprocessGuo(DeconvolutionPreprocess): 144 | 145 | def __init__(self, dataSet, parameters=None, analysisName=None): 146 | super().__init__(dataSet, parameters, analysisName) 147 | 148 | # Check for 'decon_iterations' in parameters instead of 149 | # self.parameters as 'decon_iterations' is added to 150 | # self.parameters by the super-class with a default value 151 | # of 20, but we want the default value to be 2. 152 | if 'decon_iterations' not in parameters: 153 | self.parameters['decon_iterations'] = 2 154 | 155 | self._deconIterations = self.parameters['decon_iterations'] 156 | 157 | def _preprocess_image(self, inputImage: np.ndarray) -> np.ndarray: 158 | deconFilterSize = self.parameters['decon_filter_size'] 159 | 160 | filteredImage = self._high_pass_filter(inputImage) 161 | deconvolvedImage = deconvolve.deconvolve_lucyrichardson_guo( 162 | filteredImage, deconFilterSize, self._deconSigma, 163 | self._deconIterations).astype(np.uint16) 164 | return deconvolvedImage 165 | -------------------------------------------------------------------------------- /merlin/analysis/sequential.py: -------------------------------------------------------------------------------- 1 | import pandas 2 | import rtree 3 | import networkx 4 | import numpy as np 5 | import cv2 6 | from skimage.measure import regionprops 7 | 8 | from merlin.core import analysistask 9 | from merlin.util import imagefilters 10 | 11 | 12 | class SumSignal(analysistask.ParallelAnalysisTask): 13 | 14 | """ 15 | An analysis task that calculates the signal intensity within the boundaries 16 | of a cell for all rounds not used in the codebook, useful for measuring 17 | RNA species that were stained individually. 18 | """ 19 | 20 | def __init__(self, dataSet, parameters=None, analysisName=None): 21 | super().__init__(dataSet, parameters, analysisName) 22 | 23 | if 'apply_highpass' not in self.parameters: 24 | self.parameters['apply_highpass'] = False 25 | if 'highpass_sigma' not in self.parameters: 26 | self.parameters['highpass_sigma'] = 5 27 | if 'z_index' not in self.parameters: 28 | self.parameters['z_index'] = 0 29 | 30 | if self.parameters['z_index'] >= len(self.dataSet.get_z_positions()): 31 | raise analysistask.InvalidParameterException( 32 | 'Invalid z_index specified for %s. (%i > %i)' 33 | % (self.analysisName, self.parameters['z_index'], 34 | len(self.dataSet.get_z_positions()))) 35 | 36 | self.highpass = str(self.parameters['apply_highpass']).upper() == 'TRUE' 37 | self.alignTask = self.dataSet.load_analysis_task( 38 | self.parameters['global_align_task']) 39 | 40 | def fragment_count(self): 41 | return len(self.dataSet.get_fovs()) 42 | 43 | def get_estimated_memory(self): 44 | return 2048 45 | 46 | def get_estimated_time(self): 47 | return 1 48 | 49 | def get_dependencies(self): 50 | return [self.parameters['warp_task'], 51 | self.parameters['segment_task'], 52 | self.parameters['global_align_task']] 53 | 54 | def _extract_signal(self, cells, inputImage, zIndex) -> pandas.DataFrame: 55 | cellCoords = [] 56 | for cell in cells: 57 | regions = cell.get_boundaries()[zIndex] 58 | if len(regions) == 0: 59 | cellCoords.append([]) 60 | else: 61 | pixels = [] 62 | for region in regions: 63 | coords = region.exterior.coords.xy 64 | xyZip = list(zip(coords[0].tolist(), coords[1].tolist())) 65 | pixels.append(np.array( 66 | self.alignTask.global_coordinates_to_fov( 67 | cell.get_fov(), xyZip))) 68 | cellCoords.append(pixels) 69 | 70 | cellIDs = [str(cells[x].get_feature_id()) for x in range(len(cells))] 71 | mask = np.zeros(inputImage.shape, np.uint8) 72 | for i, cell in enumerate(cellCoords): 73 | cv2.drawContours(mask, cell, -1, i+1, -1) 74 | propsDict = {x.label: x for x in regionprops(mask, inputImage)} 75 | propsOut = pandas.DataFrame( 76 | data=[(propsDict[k].intensity_image.sum(), 77 | propsDict[k].filled_area) 78 | if k in propsDict else (0, 0) 79 | for k in range(1, len(cellCoords) + 1)], 80 | index=cellIDs, 81 | columns=['Intensity', 'Pixels']) 82 | return propsOut 83 | 84 | def _get_sum_signal(self, fov, channels, zIndex): 85 | 86 | fTask = self.dataSet.load_analysis_task(self.parameters['warp_task']) 87 | sTask = self.dataSet.load_analysis_task(self.parameters['segment_task']) 88 | 89 | cells = sTask.get_feature_database().read_features(fov) 90 | 91 | signals = [] 92 | for ch in channels: 93 | img = fTask.get_aligned_image(fov, ch, zIndex) 94 | if self.highpass: 95 | highPassSigma = self.parameters['highpass_sigma'] 96 | highPassFilterSize = int(2 * np.ceil(3 * highPassSigma) + 1) 97 | img = imagefilters.high_pass_filter(img, 98 | highPassFilterSize, 99 | highPassSigma) 100 | signals.append(self._extract_signal(cells, img, 101 | zIndex).iloc[:, [0]]) 102 | 103 | # adding num of pixels 104 | signals.append(self._extract_signal(cells, img, zIndex).iloc[:, [1]]) 105 | 106 | compiledSignal = pandas.concat(signals, 1) 107 | compiledSignal.columns = channels+['Pixels'] 108 | 109 | return compiledSignal 110 | 111 | def get_sum_signals(self, fov: int = None) -> pandas.DataFrame: 112 | """Retrieve the sum signals calculated from this analysis task. 113 | 114 | Args: 115 | fov: the fov to get the sum signals for. If not specified, the 116 | sum signals for all fovs are returned. 117 | 118 | Returns: 119 | A pandas data frame containing the sum signal information. 120 | """ 121 | if fov is None: 122 | return pandas.concat( 123 | [self.get_sum_signals(fov) for fov in self.dataSet.get_fovs()] 124 | ) 125 | 126 | return self.dataSet.load_dataframe_from_csv( 127 | 'sequential_signal', self.get_analysis_name(), 128 | fov, 'signals', index_col=0) 129 | 130 | def _run_analysis(self, fragmentIndex): 131 | zIndex = int(self.parameters['z_index']) 132 | channels, geneNames = self.dataSet.get_data_organization()\ 133 | .get_sequential_rounds() 134 | 135 | fovSignal = self._get_sum_signal(fragmentIndex, channels, zIndex) 136 | normSignal = fovSignal.iloc[:, :-1].div(fovSignal.loc[:, 'Pixels'], 0) 137 | normSignal.columns = geneNames 138 | 139 | self.dataSet.save_dataframe_to_csv( 140 | normSignal, 'sequential_signal', self.get_analysis_name(), 141 | fragmentIndex, 'signals') 142 | 143 | 144 | class ExportSumSignals(analysistask.AnalysisTask): 145 | def __init__(self, dataSet, parameters=None, analysisName=None): 146 | super().__init__(dataSet, parameters, analysisName) 147 | 148 | def get_estimated_memory(self): 149 | return 2048 150 | 151 | def get_estimated_time(self): 152 | return 5 153 | 154 | def get_dependencies(self): 155 | return [self.parameters['sequential_task']] 156 | 157 | def _run_analysis(self): 158 | sTask = self.dataSet.load_analysis_task( 159 | self.parameters['sequential_task']) 160 | signals = sTask.get_sum_signals() 161 | 162 | self.dataSet.save_dataframe_to_csv( 163 | signals, 'sequential_sum_signals', 164 | self.get_analysis_name()) 165 | -------------------------------------------------------------------------------- /merlin/analysis/testtask.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from merlin.core import analysistask 4 | 5 | '''This module contains dummy analysis tasks for running tests''' 6 | 7 | 8 | class SimpleAnalysisTask(analysistask.AnalysisTask): 9 | 10 | def __init__(self, dataSet, parameters=None, analysisName=None): 11 | super().__init__(dataSet, parameters, analysisName) 12 | 13 | def _run_analysis(self): 14 | pass 15 | 16 | def get_estimated_memory(self): 17 | return 100 18 | 19 | def get_estimated_time(self): 20 | return 1 21 | 22 | def get_dependencies(self): 23 | if 'dependencies' in self.parameters: 24 | return self.parameters['dependencies'] 25 | else: 26 | return [] 27 | 28 | 29 | class SimpleParallelAnalysisTask(analysistask.ParallelAnalysisTask): 30 | 31 | def __init__(self, dataSet, parameters=None, analysisName=None): 32 | super().__init__(dataSet, parameters, analysisName) 33 | 34 | def _run_analysis(self, fragmentIndex): 35 | pass 36 | 37 | def get_estimated_memory(self): 38 | return 100 39 | 40 | def get_estimated_time(self): 41 | return 1 42 | 43 | def get_dependencies(self): 44 | if 'dependencies' in self.parameters: 45 | return self.parameters['dependencies'] 46 | else: 47 | return [] 48 | 49 | def fragment_count(self): 50 | return 5 51 | 52 | 53 | class RandomNumberParallelAnalysisTask(analysistask.ParallelAnalysisTask): 54 | 55 | """A test analysis task that generates random numbers.""" 56 | 57 | def __init__(self, dataSet, parameters=None, analysisName=None): 58 | super().__init__(dataSet, parameters, analysisName) 59 | 60 | def get_random_result(self, fragmentIndex): 61 | return self.dataSet.load_numpy_analysis_result('random_numbers', 62 | self, fragmentIndex) 63 | 64 | def _run_analysis(self, fragmentIndex): 65 | self.dataSet.save_numpy_analysis_result( 66 | fragmentIndex*np.random.rand(100), 'random_numbers', self, 67 | fragmentIndex) 68 | 69 | def get_estimated_memory(self): 70 | return 100 71 | 72 | def get_estimated_time(self): 73 | return 1 74 | 75 | def get_dependencies(self): 76 | if 'dependencies' in self.parameters: 77 | return self.parameters['dependencies'] 78 | else: 79 | return [] 80 | 81 | def fragment_count(self): 82 | return 10 83 | 84 | 85 | class SimpleInternallyParallelAnalysisTask( 86 | analysistask.InternallyParallelAnalysisTask): 87 | 88 | def __init__(self, dataSet, parameters=None, analysisName=None): 89 | super().__init__(dataSet, parameters, analysisName) 90 | 91 | def _run_analysis(self): 92 | pass 93 | 94 | def get_estimated_memory(self): 95 | return 100 96 | 97 | def get_estimated_time(self): 98 | return 1 99 | 100 | def get_dependencies(self): 101 | if 'dependencies' in self.parameters: 102 | return self.parameters['dependencies'] 103 | else: 104 | return [] 105 | -------------------------------------------------------------------------------- /merlin/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/emanuega/MERlin/be3c994ef8fa97fbd3afb85705d8ddbed12118cf/merlin/core/__init__.py -------------------------------------------------------------------------------- /merlin/core/executor.py: -------------------------------------------------------------------------------- 1 | from abc import abstractmethod 2 | import multiprocessing 3 | import threading 4 | from typing import Callable 5 | 6 | from merlin.core import analysistask 7 | 8 | 9 | class Executor(object): 10 | 11 | def __init__(self): 12 | super().__init__() 13 | 14 | @abstractmethod 15 | def run(self, task: analysistask.AnalysisTask, index: int=None, 16 | rerunCompleted: bool=False) -> None: 17 | """Run an analysis task. 18 | 19 | This method will not run analysis tasks that are already currently 20 | running and analysis is terminated early due to error or otherwise 21 | will not be restarted. 22 | 23 | Args: 24 | task: the analysis task to run. 25 | index: index of the analysis to run for a parallel analysis task. 26 | rerunCompleted: flag indicating if previous analysis should be 27 | run again even if it has previously completed. If overwrite 28 | is True, analysis will be run on the task regardless of its 29 | status. If overwrite is False, analysis will only be run on 30 | the task or fragments of the task that have either not been 31 | started or have previously completed in error. 32 | """ 33 | pass 34 | 35 | 36 | class LocalExecutor(Executor): 37 | 38 | def __init__(self, coreCount=None): 39 | super().__init__() 40 | 41 | if coreCount is None: 42 | self.coreCount = int(multiprocessing.cpu_count()*0.7) 43 | else: 44 | self.coreCount = coreCount 45 | 46 | def run(self, task: analysistask.AnalysisTask, index: int=None, 47 | rerunCompleted: bool=False) -> None: 48 | if task.is_complete() and not rerunCompleted: 49 | return 50 | 51 | if index is not None: 52 | task.run(index) 53 | else: 54 | task.run() 55 | 56 | -------------------------------------------------------------------------------- /merlin/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/emanuega/MERlin/be3c994ef8fa97fbd3afb85705d8ddbed12118cf/merlin/data/__init__.py -------------------------------------------------------------------------------- /merlin/data/codebook.py: -------------------------------------------------------------------------------- 1 | import os 2 | import csv 3 | import numpy as np 4 | import pandas 5 | from typing import List 6 | from typing import Union 7 | 8 | import merlin 9 | 10 | 11 | def _parse_barcode_from_string(inputString): 12 | return np.array([int(x) for x in inputString if x is not ' ']) 13 | 14 | 15 | class Codebook(object): 16 | 17 | """ 18 | A Codebook stores the association of barcodes to genes. 19 | """ 20 | 21 | def __init__(self, dataSet, filePath, codebookIndex: int = 0, 22 | codebookName: str = None): 23 | """ 24 | Create a new Codebook for the data in the specified data set. 25 | 26 | If filePath is not specified, a previously stored Codebook 27 | is loaded from the dataSet if it exists. If filePath is specified, 28 | the Codebook at the specified filePath is loaded and 29 | stored in the dataSet, overwriting any previously stored 30 | Codebook. 31 | """ 32 | self._dataSet = dataSet 33 | if not os.path.exists(filePath): 34 | filePath = os.sep.join([merlin.CODEBOOK_HOME, filePath]) 35 | 36 | newVersion = True 37 | with open(filePath, 'r') as f: 38 | if 'version' in f.readline(): 39 | newVersion = False 40 | 41 | if newVersion: 42 | self._data = pandas.read_csv(filePath) 43 | else: 44 | headerLength = 3 45 | barcodeData = pandas.read_csv( 46 | filePath, header=headerLength, skipinitialspace=True, 47 | usecols=['name', 'id', 'barcode'], 48 | converters={'barcode': _parse_barcode_from_string}) 49 | with open(filePath, 'r') as inFile: 50 | csvReader = csv.reader(inFile, delimiter=',') 51 | header = [row for i, row in enumerate(csvReader) 52 | if i < headerLength] 53 | 54 | bitNames = [x.strip() for x in header[2][1:]] 55 | 56 | self._data = self._generate_codebook_dataframe( 57 | barcodeData, bitNames) 58 | 59 | if not codebookName: 60 | codebookName = os.path.splitext(os.path.basename(filePath))[0] 61 | self._codebookName = codebookName 62 | self._codebookIndex = codebookIndex 63 | self._dataSet.save_codebook(self) 64 | 65 | @staticmethod 66 | def _generate_codebook_dataframe(barcodeData, bitNames): 67 | dfData = np.array([[currentRow['name'], currentRow['id']] 68 | + currentRow['barcode'].tolist() 69 | for i, currentRow in barcodeData.iterrows()]) 70 | df = pandas.DataFrame(dfData, columns=['name', 'id'] + bitNames) 71 | df[bitNames] = df[bitNames].astype('uint8') 72 | return df 73 | 74 | def get_data(self) -> pandas.DataFrame: 75 | """ Get the dataframe that contains the information for this codebook 76 | 77 | Returns: The pandas dataframe 78 | """ 79 | return self._data 80 | 81 | def get_barcode(self, index: int) -> List[bool]: 82 | """ Get the barcode with the specified index. 83 | 84 | Args: 85 | index: the index of the barcode in the barcode list 86 | Returns: 87 | A list of 0's and 1's denoting the barcode 88 | """ 89 | return [self._data.loc[index][n] for n in self.get_bit_names()] 90 | 91 | def get_barcode_count(self) -> int: 92 | """ 93 | Get the number of barcodes in this codebook. 94 | 95 | Returns: 96 | The number of barcodes, counting barcodes for blanks and genes 97 | """ 98 | return len(self._data) 99 | 100 | def get_bit_count(self) -> int: 101 | """ 102 | Get the number of bits used for MERFISH barcodes in this codebook. 103 | """ 104 | return len(self.get_bit_names()) 105 | 106 | def get_bit_names(self) -> List[str]: 107 | """ Get the names of the bits for this MERFISH data set. 108 | 109 | Returns: 110 | A list of the names of the bits in order from the lowest to highest 111 | """ 112 | return [s for s in self._data.columns if s not in ['name', 'id']] 113 | 114 | def get_barcodes(self, ignoreBlanks: bool = False) -> np.array: 115 | """ Get the barcodes present in this codebook. 116 | 117 | Args: 118 | ignoreBlanks: flag indicating whether barcodes corresponding 119 | to blanks should be included. 120 | Returns: 121 | A list of the barcodes represented as lists of bits. 122 | """ 123 | bitNames = self.get_bit_names() 124 | if ignoreBlanks: 125 | return np.array([[x[n] for n in bitNames] for i, x 126 | in self._data.iterrows() 127 | if 'BLANK' not in x['name'].upper()]) 128 | else: 129 | return np.array([[x[n] for n in bitNames] 130 | for i, x in self._data.iterrows()]) 131 | 132 | def get_coding_indexes(self) -> List[int]: 133 | """ Get the barcode indexes that correspond with genes. 134 | 135 | Returns: 136 | A list of barcode indexes that correspond with genes and not 137 | blanks 138 | """ 139 | return self._data[ 140 | ~self._data['name'].str.contains('Blank', case=False)].index 141 | 142 | def get_blank_indexes(self) -> List[int]: 143 | """ Get the barcode indexes that do not correspond with genes. 144 | 145 | Returns: 146 | A list of barcode indexes that correspond with blanks 147 | """ 148 | return self._data[ 149 | self._data['name'].str.contains('Blank', case=False)].index 150 | 151 | def get_gene_names(self) -> List[str]: 152 | """" Get the names of the genes represented in this codebook. 153 | 154 | Returns: 155 | A list of the gene names. The list does not contain the names of 156 | the blanks. 157 | """ 158 | return self._data.loc[self.get_coding_indexes()]['name'].tolist() 159 | 160 | def get_name_for_barcode_index(self, index: int) -> str: 161 | """ Get the gene name for the barcode with the specified index. 162 | 163 | Returns: 164 | The gene name 165 | """ 166 | return self._data.loc[index]['name'] 167 | 168 | def get_barcode_index_for_name(self, name: str) -> Union[int, None]: 169 | """ Get the barcode index for the barcode with the specified name. 170 | 171 | Returns: 172 | The barcode index. If name appears more than once, the index of 173 | the first appearance is returned. If name is not in this codebook 174 | then None is returned. 175 | """ 176 | matches = self._data[self._data['name'].str.match('^' + name + '$')] 177 | if len(matches) == 0: 178 | return None 179 | return matches.index[0] 180 | 181 | def get_codebook_name(self) -> str: 182 | """ Gets the name of this codebook 183 | 184 | Returns: 185 | The codebook name. This is the original file name of codebook. 186 | """ 187 | return self._codebookName 188 | 189 | def get_codebook_index(self) -> int: 190 | """ Get the index of this codebook 191 | 192 | Returns: 193 | The codebook index. All codebooks associated with the same dataset 194 | will have unique indexes starting from 0. 195 | """ 196 | return self._codebookIndex 197 | -------------------------------------------------------------------------------- /merlin/ext/default.mplstyle: -------------------------------------------------------------------------------- 1 | font.family : arial 2 | 3 | xtick.major.size : 3 4 | xtick.minor.size : 1.5 5 | xtick.major.pad : 2 6 | xtick.labelsize : 8 7 | xtick.direction : in 8 | 9 | ytick.major.size : 3 10 | ytick.minor.size : 1.5 11 | ytick.major.pad : 2 12 | ytick.labelsize : 8 13 | ytick.direction : in 14 | 15 | 16 | axes.facecolor : w 17 | axes.labelweight : bold 18 | axes.labelsize : 12 19 | 20 | axes.titlesize : 14 21 | axes.titleweight: bold 22 | -------------------------------------------------------------------------------- /merlin/plots/__init__.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | import pkgutil 3 | import importlib 4 | from typing import Set, List 5 | 6 | import merlin 7 | from merlin.plots._base import AbstractPlot 8 | from merlin.plots._base import PlotMetadata 9 | 10 | 11 | def get_available_plots() -> Set: 12 | """ Get all plots defined within any submodule of merlin.plots 13 | 14 | Returns: a set of references to the plots 15 | """ 16 | plotSet = set() 17 | for importer, modname, ispkg in pkgutil.iter_modules(merlin.plots.__path__): 18 | currentModule = importlib.import_module( 19 | merlin.plots.__name__ + '.' + modname) 20 | for name, obj in inspect.getmembers(currentModule): 21 | if inspect.isclass(obj)\ 22 | and issubclass(obj, AbstractPlot)\ 23 | and obj != AbstractPlot: 24 | plotSet.add(obj) 25 | return plotSet 26 | 27 | 28 | class PlotEngine: 29 | 30 | def __init__(self, plotTask, taskDict): 31 | """ Create a new plot engine. 32 | 33 | Args: 34 | plotTask: the analysis task to save the plots and plot 35 | metadata into 36 | taskDict: a dictionary containing references to the analysis 37 | tasks to use for plotting results. 38 | """ 39 | self.taskDict = taskDict 40 | availablePlots = [x(plotTask) for x in get_available_plots()] 41 | self.plotList = [x for x in availablePlots if x.is_relevant(taskDict)] 42 | 43 | requiredMetadata = \ 44 | {m for p in self.plotList for m in p.get_required_metadata()} 45 | self.metadataDict = {x.metadata_name(): x(plotTask, taskDict) 46 | for x in requiredMetadata} 47 | 48 | def get_plots(self) -> List[AbstractPlot]: 49 | """ Get a list of the plots that this plot engine will generate. 50 | 51 | Returns: A list of the plot objects that will be generated by this 52 | plot engine. 53 | """ 54 | return self.plotList 55 | 56 | def take_step(self) -> bool: 57 | """ Generate metadata and plots from newly available analysis results. 58 | 59 | Returns: True if all plots have been generated and otherwise false. 60 | """ 61 | 62 | incompletePlots = [p for p in self.plotList if not p.is_complete()] 63 | if len(incompletePlots) == 0: 64 | return True 65 | 66 | for m in self.metadataDict.values(): 67 | m.update() 68 | 69 | completeTasks = [k for k, v in self.taskDict.items() if v.is_complete()] 70 | completeMetadata = [k for k, v in self.metadataDict.items() 71 | if v.is_complete()] 72 | readyPlots = [p for p in incompletePlots 73 | if p.is_ready(completeTasks, completeMetadata)] 74 | for p in readyPlots: 75 | p.plot(self.taskDict, self.metadataDict) 76 | 77 | return len([p for p in self.plotList if not p.is_complete()]) == 0 78 | -------------------------------------------------------------------------------- /merlin/plots/_base.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from abc import ABC, abstractmethod 3 | from typing import List, Dict, Tuple 4 | from matplotlib import pyplot as plt 5 | 6 | from merlin.core import analysistask 7 | 8 | 9 | class AbstractPlot(ABC): 10 | 11 | """ 12 | A base class for generating a plot of the analysis results. Each plot 13 | should inherit from this class. 14 | """ 15 | 16 | def __init__(self, analysisTask: analysistask.AnalysisTask): 17 | """ Create a new AbstractPlot 18 | 19 | Args: 20 | analysisTask: the analysisTask where the plot should be saved. 21 | """ 22 | self._analysisTask = analysisTask 23 | 24 | def figure_name(self) -> str: 25 | """ Get the name for identifying this figure. 26 | 27 | Returns: the name of this figure 28 | """ 29 | return type(self).__name__ 30 | 31 | @abstractmethod 32 | def get_required_tasks(self) -> Dict[str, Tuple[type]]: 33 | """ Get the tasks that are required to be complete prior to 34 | generating this plot. 35 | 36 | Returns: A dictionary of the types of tasks as keys and a tuple 37 | of the accepted classes as values. The keys can include 38 | decode_task, filter_task, optimize_task, segment_task, 39 | sum_task, partition_task, and/or global_align_task. If all classes 40 | of the specified type are allowed, the value should be 'all'. If 41 | no tasks are required then an empty dictionary should be returned. 42 | """ 43 | pass 44 | 45 | @abstractmethod 46 | def get_required_metadata(self) -> List[object]: 47 | """ Get the plot metadata that is required to generate this plot. 48 | 49 | Returns: A list of class references for the metadata 50 | objects that are required for this task. 51 | """ 52 | pass 53 | 54 | @abstractmethod 55 | def _generate_plot(self, inputTasks: Dict[str, analysistask.AnalysisTask], 56 | inputMetadata: Dict[str, 'PlotMetadata']) -> plt.Figure: 57 | """ Generate the plot. 58 | 59 | This function should be implemented in all subclasses and the generated 60 | figure handle should be returned. 61 | 62 | Args: 63 | inputTasks: A dictionary of the input tasks to use to generate the 64 | plot. Each analysis task is indexed by a string indicating 65 | the task type as in get_required_tasks. 66 | inputMetadata: A dictionary of the input metadata for generating 67 | this plot. Each metadata object is indexed by the name of the 68 | metadata. 69 | Returns: the figure handle to the newly generated figure 70 | """ 71 | pass 72 | 73 | def is_relevant(self, inputTasks: Dict[str, analysistask.AnalysisTask] 74 | ) -> bool: 75 | """ Determine if this plot is relevant given the analysis tasks 76 | provided. 77 | 78 | Args: 79 | inputTasks: A dictionary of the analysis tasks indexed with 80 | strings indicating the task type as in get_required_tasks 81 | Returns: True if this plot can be generated using the provided 82 | analysis tasks and false otherwise. 83 | """ 84 | for rTask, rTypes in self.get_required_tasks().items(): 85 | if rTask not in inputTasks: 86 | return False 87 | if rTypes != 'all' \ 88 | and not isinstance(inputTasks[rTask], rTypes): 89 | return False 90 | return True 91 | 92 | def is_ready(self, completeTasks: List[str], 93 | completeMetadata: List[str]) -> bool: 94 | """ Determine if all requirements for generating this plot are 95 | satisfied. 96 | 97 | Args: 98 | completeTasks: A list of the types of tasks that are complete. 99 | The list can contain the same strings as in get_required_tasks 100 | completeMetadata: A list of the metadata that has been generated. 101 | Returns: True if all required tasks and all required metadata 102 | is complete 103 | """ 104 | return all([t in completeTasks for t in self.get_required_tasks()])\ 105 | and all([m.metadata_name() in completeMetadata 106 | for m in self.get_required_metadata()]) 107 | 108 | def is_complete(self) -> bool: 109 | """ Determine if this plot has been generated. 110 | 111 | Returns: True if this plot has been generated and otherwise false. 112 | """ 113 | return self._analysisTask.dataSet.figure_exists( 114 | self._analysisTask, self.figure_name(), 115 | type(self).__module__.split('.')[-1]) 116 | 117 | def plot(self, inputTasks: Dict[str, analysistask.AnalysisTask], 118 | inputMetadata: Dict[str, 'PlotMetadata']) -> None: 119 | """ Generate this plot and save it within the analysis task. 120 | 121 | If the plot is not relevant for the types of analysis tasks passed, 122 | then the function will return without generating any plot. 123 | 124 | Args: 125 | inputTasks: A dictionary of the input tasks to use to generate the 126 | plot. Each analysis task is indexed by a string indicating 127 | the task type as in get_required_tasks. 128 | inputMetadata: A dictionary of the input metadata for generating 129 | this plot. Each metadata object is indexed by the name of the 130 | metadata. 131 | """ 132 | if not self.is_relevant(inputTasks): 133 | return 134 | f = self._generate_plot(inputTasks, inputMetadata) 135 | f.tight_layout(pad=1) 136 | self._analysisTask.dataSet.save_figure( 137 | self._analysisTask, f, self.figure_name(), 138 | type(self).__module__.split('.')[-1]) 139 | plt.close(f) 140 | 141 | 142 | class PlotMetadata(ABC): 143 | 144 | def __init__(self, analysisTask: analysistask.AnalysisTask, 145 | taskDict: Dict[str, analysistask.AnalysisTask]): 146 | """ Create a new metadata object. 147 | 148 | Args: 149 | analysisTask: the analysisTask where the metadata should be saved. 150 | taskDict: a dictionary containing the analysis tasks to use 151 | to generate the metadata indexed by the type of task as a 152 | string as in get_required_tasks 153 | """ 154 | self._analysisTask = analysisTask 155 | self._taskDict = taskDict 156 | 157 | @classmethod 158 | def metadata_name(cls) -> str: 159 | return cls.__module__.split('.')[-1] + '/' + cls.__name__ 160 | 161 | def _load_numpy_metadata(self, resultName: str, 162 | defaultValue: np.ndarray = None) -> np.ndarray: 163 | """ Convenience method for reading a result created by this metadata 164 | from the dataset. 165 | 166 | Args: 167 | resultName: the name of the metadata result 168 | defaultValue: the value to return if the metadata is not found 169 | Returns: a numpy array with the result or defaultValue if an IOError is 170 | raised while reading the metadata 171 | """ 172 | return self._analysisTask.dataSet\ 173 | .load_numpy_analysis_result_if_available( 174 | resultName, self._analysisTask, defaultValue, 175 | subdirectory=self.metadata_name()) 176 | 177 | def _save_numpy_metadata(self, result: np.ndarray, resultName: str) -> None: 178 | """ Convenience method for saving a result created by this metadata 179 | from the dataset. 180 | 181 | Args: 182 | result: the numpy array to save 183 | resultName: the name of the metadata result 184 | """ 185 | self._analysisTask.dataSet.save_numpy_analysis_result( 186 | result, resultName, self._analysisTask, 187 | subdirectory=self.metadata_name()) 188 | 189 | @abstractmethod 190 | def update(self) -> None: 191 | """ Update this metadata with the latest analysis results. 192 | 193 | This method should be implemented in all subclasses and implementations 194 | should not wait for additional data to become available. They should 195 | only update the metadata as much as possible with the data that is ready 196 | when the function is called and should not wait for additional 197 | analysis to complete. 198 | """ 199 | pass 200 | 201 | @abstractmethod 202 | def is_complete(self) -> bool: 203 | """ Determine if this metadata is complete. 204 | 205 | Returns: True if the metadata is complete or False if additional 206 | computation is necessary 207 | """ 208 | pass 209 | -------------------------------------------------------------------------------- /merlin/plots/optimizationplots.py: -------------------------------------------------------------------------------- 1 | import seaborn 2 | from matplotlib import pyplot as plt 3 | 4 | from merlin.plots._base import AbstractPlot 5 | 6 | 7 | class OptimizationScaleFactorsPlot(AbstractPlot): 8 | 9 | def __init__(self, analysisTask): 10 | super().__init__(analysisTask) 11 | 12 | def get_required_tasks(self): 13 | return {'optimize_task': 'all'} 14 | 15 | def get_required_metadata(self): 16 | return [] 17 | 18 | def _generate_plot(self, inputTasks, inputMetadata): 19 | fig = plt.figure(figsize=(5, 5)) 20 | seaborn.heatmap( 21 | inputTasks['optimize_task'].get_scale_factor_history()) 22 | plt.xlabel('Bit index') 23 | plt.ylabel('Iteration number') 24 | plt.title('Scale factor optimization history') 25 | return fig 26 | 27 | 28 | class ScaleFactorVsBitNumberPlot(AbstractPlot): 29 | 30 | def __init__(self, analysisTask): 31 | super().__init__(analysisTask) 32 | 33 | def get_required_tasks(self): 34 | return {'optimize_task': 'all'} 35 | 36 | def get_required_metadata(self): 37 | return [] 38 | 39 | def _generate_plot(self, inputTasks, inputMetadata): 40 | optimizeTask = inputTasks['optimize_task'] 41 | codebook = optimizeTask.get_codebook() 42 | dataOrganization = optimizeTask.dataSet.get_data_organization() 43 | colors = [dataOrganization.get_data_channel_color( 44 | dataOrganization.get_data_channel_for_bit(x)) 45 | for x in codebook.get_bit_names()] 46 | 47 | scaleFactors = optimizeTask.get_scale_factors() 48 | scaleFactorsByColor = {c: [] for c in set(colors)} 49 | for i, s in enumerate(scaleFactors): 50 | scaleFactorsByColor[colors[i]].append((i, s)) 51 | 52 | fig = plt.figure(figsize=(5, 5)) 53 | for c, d in scaleFactorsByColor.items(): 54 | plt.plot([x[0] for x in d], [x[1] for x in d], 'o') 55 | 56 | plt.legend(scaleFactorsByColor.keys()) 57 | plt.ylim(bottom=0) 58 | plt.xlabel('Bit index') 59 | plt.ylabel('Scale factor magnitude') 60 | plt.title('Scale factor magnitude vs bit index') 61 | return fig 62 | 63 | 64 | class OptimizationBarcodeCountsPlot(AbstractPlot): 65 | 66 | def __init__(self, analysisTask): 67 | super().__init__(analysisTask) 68 | 69 | def get_required_tasks(self): 70 | return {'optimize_task': 'all'} 71 | 72 | def get_required_metadata(self): 73 | return [] 74 | 75 | def _generate_plot(self, inputTasks, inputMetadata): 76 | fig = plt.figure(figsize=(5, 5)) 77 | seaborn.heatmap( 78 | inputTasks['optimize_task'].get_barcode_count_history()) 79 | plt.xlabel('Barcode index') 80 | plt.ylabel('Iteration number') 81 | plt.title('Barcode counts optimization history') 82 | return fig 83 | -------------------------------------------------------------------------------- /merlin/plots/segmentationplots.py: -------------------------------------------------------------------------------- 1 | from matplotlib import pyplot as plt 2 | import numpy as np 3 | 4 | from merlin.plots._base import AbstractPlot 5 | 6 | 7 | class SegmentationBoundaryPlot(AbstractPlot): 8 | 9 | def __init__(self, analysisTask): 10 | super().__init__(analysisTask) 11 | 12 | def get_required_tasks(self): 13 | return {'segment_task': 'all'} 14 | 15 | def get_required_metadata(self): 16 | return [] 17 | 18 | def _generate_plot(self, inputTasks, inputMetadata): 19 | featureDB = inputTasks['segment_task'].get_feature_database() 20 | features = featureDB.read_features() 21 | 22 | fig = plt.figure(figsize=(15, 15)) 23 | ax = fig.add_subplot(111) 24 | ax.set_aspect('equal', 'datalim') 25 | 26 | if len(features) == 0: 27 | return fig 28 | 29 | zPosition = 0 30 | if len(features[0].get_boundaries()) > 1: 31 | zPosition = int(len(features[0].get_boundaries())/2) 32 | 33 | featuresSingleZ = [feature.get_boundaries()[int(zPosition)] 34 | for feature in features] 35 | featuresSingleZ = [x for y in featuresSingleZ for x in y] 36 | allCoords = [[feature.exterior.coords.xy[0].tolist(), 37 | feature.exterior.coords.xy[1].tolist()] 38 | for feature in featuresSingleZ] 39 | allCoords = [x for y in allCoords for x in y] 40 | plt.plot(*allCoords) 41 | 42 | plt.xlabel('X position (microns)') 43 | plt.ylabel('Y position (microns)') 44 | plt.title('Segmentation boundaries') 45 | return fig 46 | -------------------------------------------------------------------------------- /merlin/plots/testplots.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from matplotlib import pyplot as plt 3 | 4 | from merlin.plots._base import AbstractPlot 5 | from merlin.plots._base import PlotMetadata 6 | 7 | 8 | class TestPlot(AbstractPlot): 9 | 10 | def __init__(self, analysisTask): 11 | super().__init__(analysisTask) 12 | 13 | def get_required_tasks(self): 14 | return {'test_task': 'all'} 15 | 16 | def get_required_metadata(self): 17 | return [TestPlotMetadata] 18 | 19 | def _generate_plot(self, inputTasks, inputMetadata): 20 | fig = plt.figure(figsize=(10, 10)) 21 | plt.plot(inputMetadata['testplots/TestPlotMetadata'].get_mean_values(), 22 | 'x') 23 | return fig 24 | 25 | 26 | class TestPlotMetadata(PlotMetadata): 27 | 28 | def __init__(self, analysisTask, taskDict): 29 | super().__init__(analysisTask, taskDict) 30 | self.testTask = self._taskDict['test_task'] 31 | self.completeFragments = [False]*self.testTask.fragment_count() 32 | self.meanValues = np.zeros(self.testTask.fragment_count()) 33 | 34 | def get_mean_values(self) -> np.ndarray: 35 | return self.meanValues 36 | 37 | def update(self) -> None: 38 | testTask = self._taskDict['test_task'] 39 | 40 | for i in range(testTask.fragment_count()): 41 | if not self.completeFragments[i] and testTask.is_complete(i): 42 | self.meanValues[i] = np.mean(self.testTask.get_random_result(i)) 43 | self.completeFragments[i] = True 44 | 45 | def is_complete(self) -> bool: 46 | return all(self.completeFragments) 47 | -------------------------------------------------------------------------------- /merlin/util/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/emanuega/MERlin/be3c994ef8fa97fbd3afb85705d8ddbed12118cf/merlin/util/__init__.py -------------------------------------------------------------------------------- /merlin/util/aberration.py: -------------------------------------------------------------------------------- 1 | from typing import Dict 2 | from skimage import transform 3 | import numpy as np 4 | from abc import ABC 5 | from abc import abstractmethod 6 | 7 | """ 8 | This module contains tools for measuring and correcting chromatic aberrations. 9 | """ 10 | 11 | 12 | class ChromaticCorrector(ABC): 13 | 14 | """ 15 | An abstract class for color-specific image transformation. 16 | """ 17 | 18 | @abstractmethod 19 | def transform_image(self, inputImage: np.ndarray, imageColor: str 20 | ) -> np.ndarray: 21 | """Transform inputImage to the reference color. 22 | 23 | Args: 24 | inputImage: The image to transform. If inputImage has two 25 | dimensions, it is transformed as a single image. If inputImage 26 | has three dimensions, each element in the first dimension is 27 | transformed as an image as (z, x, y). 28 | imageColor: The color of the input image as a string. If the color 29 | of the input image is not in the set of transformations for 30 | this corrector, no transformation is applied. 31 | """ 32 | pass 33 | 34 | 35 | class IdentityChromaticCorrector(ChromaticCorrector): 36 | 37 | """ 38 | A class for correcting chromatic aberration that performs no transformation. 39 | """ 40 | 41 | def __init__(self): 42 | pass 43 | 44 | def transform_image(self, inputImage: np.ndarray, imageColor: str 45 | ) -> np.ndarray: 46 | return inputImage 47 | 48 | 49 | class RigidChromaticCorrector(ChromaticCorrector): 50 | 51 | """ 52 | A class for correcting chromatic aberration using rigid transformation 53 | matrices. 54 | """ 55 | 56 | def __init__(self, transformations: Dict[str, Dict[ 57 | str, transform.EuclideanTransform]], referenceColor: str=None): 58 | """Creates a new RigidChromaticCorrector that transforms images 59 | using the specified transformations. 60 | 61 | Args: 62 | transformations: A dictionary of transformations 63 | referenceColor: the name of the color to transform the images to 64 | """ 65 | 66 | self.transformations = transformations 67 | if referenceColor is None: 68 | self.referenceColor = min(transformations.keys()) 69 | else: 70 | self.referenceColor = referenceColor 71 | 72 | def transform_image(self, inputImage: np.ndarray, imageColor: str 73 | ) -> np.ndarray: 74 | if imageColor not in self.transformations[self.referenceColor]: 75 | return inputImage 76 | 77 | if imageColor == self.referenceColor: 78 | return inputImage 79 | 80 | if len(inputImage.shape) == 3: 81 | return np.array([self.transform_image(x, imageColor) 82 | for x in inputImage]) 83 | 84 | return transform.warp( 85 | inputImage, 86 | self.transformations[self.referenceColor][imageColor], 87 | preserve_range=True) 88 | -------------------------------------------------------------------------------- /merlin/util/barcodefilters.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy.spatial import cKDTree 3 | import networkx as nx 4 | import pandas as pd 5 | from typing import List 6 | 7 | 8 | def remove_zplane_duplicates_all_barcodeids(barcodes: pd.DataFrame, 9 | zPlanes: int, 10 | maxDist: float, 11 | allZPos: List) -> pd.DataFrame: 12 | """ Depending on the separation between z planes, spots from a single 13 | molecule may be observed in more than one z plane. These putative 14 | duplicates are removed based on supplied distance and z plane 15 | constraints. In evaluating this method, when z planes are separated 16 | by 1.5 µm the likelihood of finding a putative duplicate above or below 17 | the selected plane is ~5-10%, whereas the false-positive rate is closer 18 | to 1%, as determined by checking two planes above or below, or comparing 19 | barcodes of different identities but similar abundance between 20 | adjacent z planes. 21 | 22 | Args: 23 | barcodes: a pandas dataframe containing all the entries for a given 24 | barcode identity 25 | zPlanes: number of planes above and below to consider when evaluating 26 | potential duplicates 27 | maxDist: maximum euclidean distance allowed to separate centroids of 28 | putative barcode duplicate, in pixels 29 | Returns: 30 | keptBarcodes: pandas dataframe where barcodes of the same identity that 31 | fall within parameters of z plane duplicates have 32 | been removed. 33 | """ 34 | if len(barcodes) == 0: 35 | return barcodes 36 | else: 37 | barcodeGroups = barcodes.groupby('barcode_id') 38 | bcToKeep = [] 39 | for bcGroup, bcData in barcodeGroups: 40 | bcToKeep.append( 41 | remove_zplane_duplicates_single_barcodeid(bcData, zPlanes, 42 | maxDist, allZPos)) 43 | mergedBC = pd.concat(bcToKeep, 0).reset_index(drop=True) 44 | mergedBC = mergedBC.sort_values(by=['barcode_id', 'z']) 45 | return mergedBC 46 | 47 | 48 | def remove_zplane_duplicates_single_barcodeid(barcodes: pd.DataFrame, 49 | zPlanes: int, 50 | maxDist: float, 51 | allZPos: List) -> pd.DataFrame: 52 | """ Remove barcodes with a given barcode id that are putative z plane 53 | duplicates. 54 | 55 | Args: 56 | barcodes: a pandas dataframe containing all the entries for a given 57 | barcode identity 58 | zPlanes: number of planes above and below to consider when evaluating 59 | potential duplicates 60 | maxDist: maximum euclidean distance allowed to separate centroids of 61 | putative barcode duplicate, in pixels 62 | Returns: 63 | keptBarcodes: pandas dataframe where barcodes of the same identity that 64 | fall within parameters of z plane duplicates have 65 | been removed. 66 | """ 67 | barcodes.reset_index(drop=True, inplace=True) 68 | if not len(barcodes['barcode_id'].unique()) == 1: 69 | errorString = 'The method remove_zplane_duplicates_single_barcodeid ' +\ 70 | 'should be given a dataframe containing molecules ' +\ 71 | 'that all have the same barcode id. Please use ' +\ 72 | 'remove_zplane_duplicates_all_barcodeids to handle ' +\ 73 | 'dataframes containing multiple barcode ids' 74 | raise ValueError(errorString) 75 | graph = nx.Graph() 76 | zPos = sorted(allZPos) 77 | graph.add_nodes_from(barcodes.index.values.tolist()) 78 | for z in range(0, len(zPos)): 79 | zToCompare = [pos for pos, otherZ in enumerate(zPos) if 80 | (pos >= z - zPlanes) & (pos <= z + zPlanes) & ~(pos == z)] 81 | treeBC = barcodes[barcodes['z'] == z] 82 | if len(treeBC) == 0: 83 | pass 84 | else: 85 | tree = cKDTree(treeBC.loc[:, ['x', 'y']].values) 86 | for compZ in zToCompare: 87 | queryBC = barcodes[barcodes['z'] == compZ] 88 | if len(queryBC) == 0: 89 | pass 90 | else: 91 | dist, idx = tree.query(queryBC.loc[:, ['x', 'y']].values, 92 | k=1, distance_upper_bound=maxDist) 93 | currentHits = treeBC.index.values[idx[np.isfinite(dist)]] 94 | comparisonHits = queryBC.index.values[np.isfinite(dist)] 95 | graph.add_edges_from(list(zip(currentHits, comparisonHits))) 96 | connectedComponents = [list(x) for x in 97 | list(nx.connected_components(graph))] 98 | 99 | def choose_brighter_barcode(barcodes, indexes): 100 | sortedBC = barcodes.loc[indexes, :].sort_values(by='mean_intensity', 101 | ascending=False) 102 | return sortedBC.index.values.tolist()[0] 103 | 104 | keptBarcodes = barcodes.loc[sorted([x[0] if len(x) == 1 else 105 | choose_brighter_barcode(barcodes, x) 106 | for x in connectedComponents]), :] 107 | return keptBarcodes 108 | -------------------------------------------------------------------------------- /merlin/util/binary.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from typing import List 3 | 4 | 5 | def bit_list_to_int(bitList: List[bool]) -> int: 6 | """Converts a binary list to an integer 7 | 8 | Args: 9 | bitList: the binary list to convert 10 | Returns: 11 | The integer corresponding to the input bit list 12 | """ 13 | out = 0 14 | for b in reversed(bitList): 15 | out = (out << 1) | b 16 | return out 17 | 18 | 19 | def int_to_bit_list(intIn: int, bitCount: int) -> List[bool]: 20 | """Converts an integer to a binary list with the specified number of bits. 21 | 22 | Args: 23 | intIn: the integer to convert 24 | bitCount: the number of bits to include in the output bit list 25 | Returns: 26 | A list of bit that specifies the input integer. The least significant 27 | bit is first in the list. 28 | """ 29 | return [k_bit_set(intIn, k) for k in range(bitCount)] 30 | 31 | 32 | def k_bit_set(n: int, k: int) -> bool: 33 | """Determine if the k'th bit of integer n is set to 1. 34 | 35 | Args: 36 | n: the integer to check 37 | k: the index of the bit to check where 0 corresponds with the least 38 | significant bit 39 | Returns: 40 | true if the k'th bit of the integer n is 1, otherwise false. If 41 | k is None, this function returns None. 42 | """ 43 | if k is None: 44 | return None 45 | 46 | if n & (1 << k): 47 | return True 48 | else: 49 | return False 50 | 51 | 52 | def flip_bit(barcode: List[bool], bitIndex: int) -> List[bool]: 53 | """Generates a version of the provided barcode where the bit at the 54 | specified index is inverted. 55 | 56 | The provided barcode is left unchanged. It is copied before flipping the 57 | bit. 58 | 59 | Args: 60 | barcode: A binary array where the i'th entry corresponds with the 61 | value of the i'th bit 62 | bitIndex: The index of the bit to reverse 63 | Returns: 64 | A copy of barcode with bitIndex inverted 65 | """ 66 | bcCopy = np.copy(barcode) 67 | bcCopy[bitIndex] = not bcCopy[bitIndex] 68 | return bcCopy 69 | -------------------------------------------------------------------------------- /merlin/util/deconvolve.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | from scipy import ndimage 4 | 5 | from merlin.util import matlab 6 | 7 | """ 8 | This module containts utility functions for performing deconvolution on 9 | images. 10 | """ 11 | 12 | 13 | def calculate_projectors(windowSize: int, sigmaG: float) -> list: 14 | """Calculate forward and backward projectors as described in: 15 | 16 | 'Accelerating iterative deconvolution and multiview fusion by orders 17 | of magnitude', Guo et al, bioRxiv 2019. 18 | 19 | Args: 20 | windowSize: the size of the window over which to perform the gaussian. 21 | This must be an odd number. 22 | sigmaG: the standard deviation of the Gaussian point spread function 23 | 24 | Returns: 25 | A list containing the forward and backward projectors to use for 26 | Lucy-Richardson deconvolution. 27 | """ 28 | pf = matlab.matlab_gauss2D(shape=(windowSize, windowSize), 29 | sigma=sigmaG) 30 | pfFFT = np.fft.fft2(pf) 31 | 32 | # Wiener-Butterworth back projector. 33 | # 34 | # These values are from Guo et al. 35 | alpha = 0.001 36 | beta = 0.001 37 | n = 8 38 | 39 | # This is the cut-off frequency. 40 | kc = 1.0/(0.5 * 2.355 * sigmaG) 41 | 42 | # FFT frequencies 43 | kv = np.fft.fftfreq(pfFFT.shape[0]) 44 | 45 | kx = np.zeros((kv.size, kv.size)) 46 | for i in range(kv.size): 47 | kx[i, :] = np.copy(kv) 48 | 49 | ky = np.transpose(kx) 50 | kk = np.sqrt(kx*kx + ky*ky) 51 | 52 | # Wiener filter 53 | bWiener = pfFFT/(np.abs(pfFFT) * np.abs(pfFFT) + alpha) 54 | 55 | # Buttersworth filter 56 | eps = np.sqrt(1.0/(beta*beta) - 1) 57 | 58 | kkSqr = kk*kk/(kc*kc) 59 | bBWorth = 1.0/np.sqrt(1.0 + eps * eps * np.power(kkSqr, n)) 60 | 61 | # Weiner-Butterworth back projector 62 | pbFFT = bWiener * bBWorth 63 | 64 | # back projector. 65 | pb = np.real(np.fft.ifft2(pbFFT)) 66 | 67 | return [pf, pb] 68 | 69 | 70 | def deconvolve_lucyrichardson(image: np.ndarray, 71 | windowSize: int, 72 | sigmaG: float, 73 | iterationCount: int) -> np.ndarray: 74 | """Performs Lucy-Richardson deconvolution on the provided image using a 75 | Gaussian point spread function. 76 | 77 | Ported from Matlab deconvlucy. 78 | 79 | Args: 80 | image: the input image to be deconvolved 81 | windowSize: the size of the window over which to perform the gaussian. 82 | This must be an odd number. 83 | sigmaG: the standard deviation of the Gaussian point spread function 84 | iterationCount: the number of iterations to perform 85 | 86 | Returns: 87 | the deconvolved image 88 | """ 89 | eps = np.finfo(float).eps 90 | Y = np.copy(image) 91 | J1 = np.copy(image) 92 | J2 = np.copy(image) 93 | wI = np.copy(image) 94 | imR = np.copy(image) 95 | reblurred = np.copy(image) 96 | tmpMat1 = np.zeros(image.shape, dtype=float) 97 | tmpMat2 = np.zeros(image.shape, dtype=float) 98 | T1 = np.zeros(image.shape, dtype=float) 99 | T2 = np.zeros(image.shape, dtype=float) 100 | l = 0 101 | 102 | if windowSize % 2 != 1: 103 | gaussianFilter = matlab.matlab_gauss2D(shape=(windowSize, windowSize), 104 | sigma=sigmaG) 105 | 106 | for i in range(iterationCount): 107 | if i > 1: 108 | cv2.multiply(T1, T2, tmpMat1) 109 | cv2.multiply(T2, T2, tmpMat2) 110 | l = np.sum(tmpMat1) / (np.sum(tmpMat2) + eps) 111 | l = max(min(l, 1), 0) 112 | cv2.subtract(J1, J2, Y) 113 | cv2.addWeighted(J1, 1, Y, l, 0, Y) 114 | np.clip(Y, 0, None, Y) 115 | if windowSize % 2 == 1: 116 | cv2.GaussianBlur(Y, (windowSize, windowSize), sigmaG, reblurred, 117 | borderType=cv2.BORDER_REPLICATE) 118 | else: 119 | reblurred = ndimage.convolve(Y, gaussianFilter, mode='constant') 120 | np.clip(reblurred, eps, None, reblurred) 121 | cv2.divide(wI, reblurred, imR) 122 | imR += eps 123 | if windowSize % 2 == 1: 124 | cv2.GaussianBlur(imR, (windowSize, windowSize), sigmaG, imR, 125 | borderType=cv2.BORDER_REPLICATE) 126 | else: 127 | imR = ndimage.convolve(imR, gaussianFilter, mode='constant') 128 | imR[imR > 2 ** 16] = 0 129 | np.copyto(J2, J1) 130 | np.multiply(Y, imR, out=J1) 131 | np.copyto(T2, T1) 132 | np.subtract(J1, Y, out=T1) 133 | return J1 134 | 135 | 136 | def deconvolve_lucyrichardson_guo(image: np.ndarray, 137 | windowSize: int, 138 | sigmaG: float, 139 | iterationCount: int) -> np.ndarray: 140 | """Performs Lucy-Richardson deconvolution on the provided image using a 141 | Gaussian point spread function. This version used the optimized 142 | deconvolution approach described in: 143 | 144 | 'Accelerating iterative deconvolution and multiview fusion by orders 145 | of magnitude', Guo et al, bioRxiv 2019. 146 | 147 | Args: 148 | image: the input image to be deconvolved 149 | windowSize: the size of the window over which to perform the gaussian. 150 | This must be an odd number. 151 | sigmaG: the standard deviation of the Gaussian point spread function 152 | iterationCount: the number of iterations to perform 153 | 154 | Returns: 155 | the deconvolved image 156 | """ 157 | [pf, pb] = calculate_projectors(windowSize, sigmaG) 158 | 159 | eps = 1.0e-6 160 | i_max = 2**16-1 161 | 162 | ek = np.copy(image) 163 | np.clip(ek, eps, None, ek) 164 | 165 | for i in range(iterationCount): 166 | ekf = cv2.filter2D(ek, -1, pf, 167 | borderType=cv2.BORDER_REPLICATE) 168 | np.clip(ekf, eps, i_max, ekf) 169 | 170 | ek = ek*cv2.filter2D(image/ekf, -1, pb, 171 | borderType=cv2.BORDER_REPLICATE) 172 | np.clip(ek, eps, i_max, ek) 173 | 174 | return ek 175 | -------------------------------------------------------------------------------- /merlin/util/imagefilters.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | 4 | """ 5 | This module contains code for performing filtering operations on images 6 | """ 7 | 8 | 9 | def high_pass_filter(image: np.ndarray, 10 | windowSize: int, 11 | sigma: float) -> np.ndarray: 12 | """ 13 | Args: 14 | image: the input image to be filtered 15 | windowSize: the size of the Gaussian kernel to use. 16 | sigma: the sigma of the Gaussian. 17 | 18 | Returns: 19 | the high pass filtered image. The returned image is the same type 20 | as the input image. 21 | """ 22 | lowpass = cv2.GaussianBlur(image, 23 | (windowSize, windowSize), 24 | sigma, 25 | borderType=cv2.BORDER_REPLICATE) 26 | gauss_highpass = image - lowpass 27 | gauss_highpass[lowpass > image] = 0 28 | return gauss_highpass 29 | -------------------------------------------------------------------------------- /merlin/util/legacy.py: -------------------------------------------------------------------------------- 1 | import struct 2 | import pandas 3 | import numpy as np 4 | from typing import BinaryIO 5 | from typing import Tuple 6 | from typing import List 7 | from typing import Dict 8 | from typing import Iterator 9 | 10 | 11 | """ 12 | This module contains convenience functions for reading and writing MERFISH 13 | analysis results created from the deprecated Matlab pipeline. 14 | """ 15 | 16 | 17 | def read_blist(bFile: BinaryIO) -> pandas.DataFrame: 18 | entryCount, _, entryFormat = _read_binary_header(bFile) 19 | bytesPerEntry = int(np.sum( 20 | [struct.calcsize(typeNames[x['type']]) * np.prod(x['size']) for x in 21 | entryFormat])) 22 | return pandas.DataFrame( 23 | [_parse_entry_bytes(bFile.read(bytesPerEntry), entryFormat) for i in 24 | range(entryCount)]) 25 | 26 | 27 | typeNames = {'int8': 'b', 28 | 'uint8': 'B', 29 | 'int16': 'h', 30 | 'uint16': 'H', 31 | 'int32': 'i', 32 | 'uint32': 'I', 33 | 'int64': 'q', 34 | 'uint64': 'Q', 35 | 'float': 'f', 36 | 'single': 'f', 37 | 'double': 'd', 38 | 'char': 's'} 39 | 40 | 41 | def _chunker(seq, size: int) -> Iterator: 42 | return (seq[pos:pos + size] for pos in range(0, len(seq), size)) 43 | 44 | 45 | def _read_binary_header(bFile: BinaryIO) -> Tuple[int, int, List[Dict]]: 46 | version = struct.unpack(typeNames['uint8'], bFile.read(1))[0] 47 | bFile.read(1) 48 | entryCount = struct.unpack(typeNames['uint32'], bFile.read(4))[0] 49 | headerLength = struct.unpack(typeNames['uint32'], bFile.read(4))[0] 50 | layout = bFile.read(headerLength).decode('utf-8').split(',') 51 | entryList = [ 52 | {'name': x, 'size': np.array(y.split(' ')).astype(int), 'type': z} 53 | for x, y, z in _chunker(layout, 3)] 54 | return entryCount, headerLength, entryList 55 | 56 | 57 | def _parse_entry_bytes(byteList, entryFormat: List[Dict]): 58 | entryData = {} 59 | byteIndex = 0 60 | for currentEntry in entryFormat: 61 | itemCount = int(np.prod(currentEntry['size'])) 62 | itemType = typeNames[currentEntry['type']] 63 | itemSize = struct.calcsize(itemType) 64 | items = np.array([struct.unpack( 65 | itemType, byteList[byteIndex 66 | + i * itemSize:byteIndex 67 | + (i + 1) * itemSize])[0] 68 | for i in range(itemCount)]) 69 | byteIndex += itemSize * itemCount 70 | 71 | if currentEntry['size'][0] == 1 and currentEntry['size'][1] == 1: 72 | items = items[0] 73 | if currentEntry['size'][0] != 1 and currentEntry['size'][1] != 1: 74 | items = items.reshape(currentEntry['size']) 75 | 76 | entryData[currentEntry['name']] = items 77 | 78 | return entryData 79 | -------------------------------------------------------------------------------- /merlin/util/matlab.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from typing import Tuple 3 | 4 | 5 | """ 6 | This module contains Matlab functions that do not have equivalents in 7 | python libraries. 8 | """ 9 | 10 | 11 | def matlab_gauss2D(shape: Tuple[int, int]=(3, 3), sigma: float=0.5 12 | ) -> np.array: 13 | """ 14 | 2D gaussian mask - should give the same result as MATLAB's 15 | fspecial('gaussian',[shape],[sigma]) 16 | """ 17 | m, n = [(ss-1.)/2. for ss in shape] 18 | y, x = np.ogrid[-m:m+1, -n:n+1] 19 | h = np.exp(-(x*x + y*y) / (2.*sigma*sigma)) 20 | h[h < np.finfo(h.dtype).eps*h.max()] = 0 21 | sumh = h.sum() 22 | if sumh != 0: 23 | h /= sumh 24 | return h 25 | -------------------------------------------------------------------------------- /merlin/util/registration.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple 2 | from sklearn.neighbors import NearestNeighbors 3 | from skimage import transform 4 | import numpy as np 5 | from scipy import signal 6 | 7 | 8 | def extract_control_points( 9 | referencePoints: np.ndarray, movingPoints: np.ndarray, 10 | gridSpacing: float=0.5) -> Tuple[np.ndarray, np.ndarray]: 11 | """ 12 | If fewer than 10 points are provided for either the reference or the moving 13 | list, this returns no points. 14 | 15 | Args: 16 | referencePoints: a n x 2 numpy array containing the reference points. 17 | movingPoints: a m x 2 numpy array containing the moving points. 18 | gridSpacing: the spacing of the grid for the 2d histogram for 19 | estimating the course transformation 20 | Returns: two np arrays (select reference points, select moving points) 21 | both of which are p x 2. The i'th point in the reference list 22 | has been matched to the i'th point in the moving list. 23 | """ 24 | if len(referencePoints) < 10 or len(movingPoints) < 10: 25 | return np.zeros((0, 2)), np.zeros((0, 2)) 26 | 27 | edges = np.arange(-200, 200, gridSpacing) 28 | 29 | neighbors = NearestNeighbors(n_neighbors=10) 30 | neighbors.fit(referencePoints) 31 | distances, indexes = neighbors.kneighbors( 32 | movingPoints, return_distance=True) 33 | differences = [[movingPoints[i] - referencePoints[x] 34 | for x in indexes[i]] 35 | for i in range(len(movingPoints))] 36 | counts, xedges, yedges = np.histogram2d( 37 | [x[0] for y in differences for x in y], 38 | [x[1] for y in differences for x in y], 39 | bins=edges) 40 | maxIndex = np.unravel_index(counts.argmax(), counts.shape) 41 | offset = (xedges[maxIndex[0]], yedges[maxIndex[1]]) 42 | 43 | distancesShifted, indexesShifted = neighbors.kneighbors( 44 | movingPoints - np.tile(offset, (movingPoints.shape[0], 1)), 45 | return_distance=True) 46 | 47 | controlIndexes = [x[0] < gridSpacing for x in distancesShifted] 48 | referenceControls = np.array([referencePoints[x[0]] 49 | for x in indexesShifted[controlIndexes]]) 50 | movingControls = movingPoints[controlIndexes, :] 51 | 52 | return referenceControls, movingControls 53 | 54 | 55 | def estimate_transform_from_points( 56 | referencePoints: np.ndarray, movingPoints: np.ndarray) \ 57 | -> transform.EuclideanTransform: 58 | """ 59 | 60 | If fewer than two points are provided, this will return the identity 61 | transform. 62 | 63 | Args: 64 | referencePoints: a n x 2 numpy array containing the reference points 65 | movingPoints: a n x 2 numpy array containing the moving points, where 66 | the i'th point of moving points corresponds with the i'th point 67 | of reference points. 68 | Returns: a similarity transform estimated from the paired points. 69 | 70 | """ 71 | tform = transform.SimilarityTransform() 72 | if len(referencePoints) < 2 or len(movingPoints) < 2: 73 | return tform 74 | tform.estimate(referencePoints, movingPoints) 75 | return tform 76 | 77 | 78 | def lsradialcenterfit(m, b, w): 79 | wm2p1 = w / (m * m + 1) 80 | sw = np.sum(wm2p1) 81 | smmw = np.sum(m * m * wm2p1) 82 | smw = np.sum(m * wm2p1) 83 | smbw = np.sum(m * b * wm2p1) 84 | sbw = np.sum(b * wm2p1) 85 | det = smw * smw - smmw * sw 86 | xc = (smbw * sw - smw * sbw) / det 87 | yc = (smbw * smw - smmw * sbw) / det 88 | 89 | return xc, yc 90 | 91 | 92 | def radial_center(imageIn) -> Tuple[float, float]: 93 | """Determine the center of the object in imageIn using radial-symmetry-based 94 | particle localization. 95 | 96 | Adapted from Raghuveer, Nature Methods, 2012 97 | """ 98 | Ny, Nx = imageIn.shape 99 | xm_onerow = np.arange(-(Nx - 1) / 2.0 + 0.5, (Nx) / 2.0 - 0.5) 100 | xm = np.tile(xm_onerow, (Ny - 1, 1)) 101 | ym_onecol = [np.arange(-(Nx - 1) / 2.0 + 0.5, (Nx) / 2.0 - 0.5)] 102 | ym = np.tile(ym_onecol, (Nx - 1, 1)).transpose() 103 | 104 | imageIn = imageIn.astype(float) 105 | 106 | dIdu = imageIn[0:Ny - 1, 1:Nx] - imageIn[1:Ny, 0:Nx - 1]; 107 | dIdv = imageIn[0:Ny - 1, 0:Nx - 1] - imageIn[1:Ny, 1:Nx]; 108 | 109 | h = np.ones((3, 3)) / 9 110 | fdu = signal.convolve2d(dIdu, h, 'same') 111 | fdv = signal.convolve2d(dIdv, h, 'same') 112 | dImag2 = np.multiply(fdu, fdu) + np.multiply(fdv, fdv) 113 | 114 | m = np.divide(-(fdv + fdu), (fdu - fdv)) 115 | 116 | if np.any(np.isnan(m)): 117 | unsmoothm = np.divide(dIdv + dIdu, dIdu - dIdv) 118 | m[np.isnan(m)] = unsmoothm[np.isnan(m)] 119 | 120 | if np.any(np.isnan(m)): 121 | m[np.isnan(m)] = 0 122 | 123 | if np.any(np.isinf(m)): 124 | if ~np.all(np.isinf(m)): 125 | m[np.isinf(m)] = 10 * np.max(m[~np.isinf(m)]) 126 | else: 127 | m = np.divide((dIdv + dIdu), (dIdu - dIdv)) 128 | 129 | b = ym - np.multiply(m, xm) 130 | 131 | sdI2 = np.sum(dImag2) 132 | xcentroid = np.sum(np.sum(np.multiply(dImag2, xm))) / sdI2 133 | ycentroid = np.sum(np.multiply(dImag2, ym)) / sdI2 134 | w = np.divide(dImag2, np.sqrt( 135 | (xm - xcentroid) * (xm - xcentroid) + (ym - ycentroid) * ( 136 | ym - ycentroid))) 137 | 138 | xc, yc = lsradialcenterfit(m, b, w) 139 | 140 | xc = xc + (Nx + 1) / 2.0 141 | yc = yc + (Ny + 1) / 2.0 142 | 143 | return xc, yc 144 | 145 | 146 | def refine_position(image, x, y, cropSize=4) -> Tuple[float, float]: 147 | # TODO this would be more intuitive it it retransformed the output 148 | # coordinates to the original image coordinates 149 | subImage = image[int(y + 2 - cropSize):int(y + cropSize), 150 | int(x - cropSize + 2):int(x + cropSize)] 151 | return radial_center(subImage) 152 | -------------------------------------------------------------------------------- /merlin/util/simulator.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import PIL 4 | import cv2 5 | import tifffile 6 | from scipy.signal import convolve2d 7 | 8 | import merlin 9 | from merlin.core import dataset 10 | from merlin.data import codebook as cb 11 | 12 | class MERFISHDataFactory(object): 13 | 14 | """ 15 | A class for simulating MERFISH data sets. 16 | """ 17 | 18 | def __init__(self): 19 | self.codebookPath = 'L26E1.csv' 20 | self.psfSigma = 1.2 21 | self.imageSize = np.array([1024, 1024]) 22 | self.upsampleFactor = 10 23 | self.fluorophoreBrightness = 1000 24 | self.fiducialBrightness = 10000 25 | self.background = 100 26 | self.bitOrganization = [[0, 1], [0, 0], [1, 0], [1, 1], 27 | [2, 1], [2, 0], [3, 1], [3, 0], [4, 0], [4, 1], 28 | [5, 1], [5, 0], [6, 1], [6, 0], [7, 0], [7, 1]] 29 | 30 | def simulate_image(self, spotPositions: np.ndarray=None, 31 | addNoise: bool=False) -> np.ndarray: 32 | """Simulate a single image consisting of point sources with a Gaussian 33 | point spread function 34 | 35 | Args: 36 | spotPositions: a n x 2 numpy array containing the positions to 37 | simulate the point sources. If not specified, 1000 random 38 | positions are selected. 39 | addNoise: flag indicating whether poisson noise should be added 40 | to the simulated image. 41 | Returns: 42 | the simulated image 43 | """ 44 | if spotPositions is None: 45 | spotPositions = np.random.uniform(size=(1000, 2)) 46 | spotPositions[:, 0] *= self.imageSize[0] 47 | spotPositions[:, 1] *= self.imageSize[1] 48 | 49 | upsampledImage = np.zeros(self.upsampleFactor*self.imageSize) 50 | for p in spotPositions: 51 | upsampledImage[int(np.floor(p[0]*self.upsampleFactor)), 52 | int(np.floor(p[1]*self.upsampleFactor))] += 1000 53 | 54 | return self._downsample_image_stack([upsampledImage], 55 | addNoise=addNoise)[0] 56 | 57 | def simulate_dataset(self, datasetName, abundanceScale=1, 58 | fluorophoreCount=5, fovCount=10): 59 | """Simulate a full MERFISH dataset""" 60 | dataDir = os.sep.join([merlin.DATA_HOME, datasetName]) 61 | if not os.path.exists(dataDir): 62 | os.mkdir(dataDir) 63 | 64 | simDataset = dataset.DataSet(datasetName) 65 | codebook = cb.Codebook(simDataset, self.codebookPath) 66 | 67 | barcodeNumber = codebook.get_barcode_count() 68 | barcodeAbundances = abundanceScale*np.array( 69 | [10**np.random.uniform(3) for i in range(barcodeNumber)]) 70 | barcodeAbundances[:10] = 0 71 | 72 | for i in range(fovCount): 73 | merfishImages, rnaPositions = self._simulate_single_fov( 74 | codebook, barcodeAbundances, fluorophoreCount) 75 | fiducialImage = self._simulate_fiducial_image() 76 | tifffile.imsave( 77 | os.sep.join([dataDir, 'full_stack_' + str(i) + '.tiff']), 78 | merfishImages.astype(np.uint16)) 79 | 80 | imageCount = np.max([x[0] for x in self.bitOrganization]) + 1 81 | for j in range(imageCount): 82 | fileName = 'Conventional_750_650_561_488_405_' + str(i) + \ 83 | '_' + str(j) + '.tiff' 84 | filePath = os.sep.join([dataDir, fileName]) 85 | 86 | imageData = np.zeros( 87 | shape=(5, *self.imageSize), dtype=np.uint16) 88 | firstBitIndex = [i for i,x in enumerate(self.bitOrganization) \ 89 | if x[0] == j and x[1] == 0][0] 90 | secondBitIndex = [i for i,x in enumerate(self.bitOrganization) \ 91 | if x[0] == j and x[1] == 1][0] 92 | 93 | imageData[0,:,:] = merfishImages[firstBitIndex] 94 | imageData[1,:,:] = merfishImages[secondBitIndex] 95 | imageData[2,:,:] = fiducialImage 96 | 97 | tifffile.imsave(filePath, imageData) 98 | 99 | np.save(os.sep.join( 100 | [dataDir, 'true_positions_' + str(i) + '.npy']), rnaPositions) 101 | 102 | def _simulate_fiducial_image(self): 103 | fiducialPositions = np.random.uniform(size=(1000,2)) 104 | upsampledFiducials = self.fiducialBrightness*np.histogram2d( 105 | fiducialPositions[:,0]*self.imageSize[0], 106 | fiducialPositions[:,1]*self.imageSize[1], 107 | bins=self.upsampleFactor*self.imageSize)[0] 108 | 109 | return self._downsample_image_stack([upsampledFiducials])[0] 110 | 111 | def _simulate_single_fov(self, codebook, barcodeAbundances, 112 | fluorophoreCount): 113 | barcodeCount = len(barcodeAbundances) 114 | bitNumber = codebook.get_bit_count() 115 | imageSize = self.imageSize 116 | 117 | rnaCounts = np.random.poisson(barcodeAbundances) 118 | rnaPositions = [np.random.uniform(size=(c, 2)) for c in rnaCounts] 119 | for b in range(barcodeCount): 120 | rnaPositions[b][:, 0] *= imageSize[0] 121 | rnaPositions[b][:, 1] *= imageSize[1] 122 | 123 | upsampledStack = np.zeros((bitNumber, *self.upsampleFactor*imageSize)) 124 | 125 | for b in range(barcodeCount): 126 | self._add_spots_for_barcode( 127 | codebook.get_barcode(b), rnaPositions[b], fluorophoreCount, 128 | upsampledStack) 129 | 130 | imageStack = self._downsample_image_stack(upsampledStack) 131 | 132 | return imageStack, rnaPositions 133 | 134 | def _add_spots_for_barcode(self, barcode, positions, fluorophoreCount, 135 | upsampledStack): 136 | upsampledImage = np.zeros(self.upsampleFactor*self.imageSize) 137 | for p in positions: 138 | upsampledImage[int(np.floor(p[0]*self.upsampleFactor)), \ 139 | int(np.floor(p[1]*self.upsampleFactor))] += 1 140 | upsampledImage = self.fluorophoreBrightness*np.random.poisson( 141 | upsampledImage*fluorophoreCount) 142 | 143 | for i in np.where(barcode)[0]: 144 | np.add(upsampledStack[i], upsampledImage, out=upsampledStack[i]) 145 | 146 | def _downsample_image_stack(self, upsampledStack, addNoise=True): 147 | imageStack = np.zeros((len(upsampledStack), *self.imageSize)) 148 | 149 | for i in range(len(imageStack)): 150 | blurredImage = cv2.GaussianBlur(upsampledStack[i].astype(float), 151 | ksize=(51, 51), sigmaX=self.upsampleFactor*self.psfSigma) 152 | downsampledImage = np.array(PIL.Image.fromarray( 153 | convolve2d(blurredImage, 154 | np.ones((self.upsampleFactor, self.upsampleFactor))))\ 155 | .resize(self.imageSize, PIL.Image.BILINEAR)) 156 | if addNoise: 157 | imageStack[i] = np.random.poisson( 158 | downsampledImage + self.background) 159 | else: 160 | imageStack[i] = downsampledImage + self.background 161 | 162 | return imageStack 163 | 164 | -------------------------------------------------------------------------------- /merlin/util/snakewriter.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | import networkx 3 | from merlin.core import analysistask 4 | from merlin.core import dataset 5 | 6 | 7 | class SnakemakeRule(object): 8 | 9 | def __init__(self, analysisTask: analysistask.AnalysisTask, 10 | pythonPath=None): 11 | self._analysisTask = analysisTask 12 | self._pythonPath = pythonPath 13 | 14 | @staticmethod 15 | def _add_quotes(stringIn): 16 | return '\'%s\'' % stringIn 17 | 18 | @staticmethod 19 | def _clean_string(stringIn): 20 | return stringIn.replace('\\', '/') 21 | 22 | def _expand_as_string(self, taskName, indexCount) -> str: 23 | return 'expand(%s, g=list(range(%i)))' % (self._add_quotes( 24 | self._analysisTask.dataSet.analysis_done_filename(taskName, '{g}')), 25 | indexCount) 26 | 27 | def _generate_output(self) -> str: 28 | if isinstance(self._analysisTask, analysistask.ParallelAnalysisTask): 29 | return self._clean_string( 30 | self._add_quotes( 31 | self._analysisTask.dataSet.analysis_done_filename( 32 | self._analysisTask, '{i}'))) 33 | else: 34 | return self._clean_string( 35 | self._add_quotes( 36 | self._analysisTask.dataSet.analysis_done_filename( 37 | self._analysisTask))) 38 | 39 | def _generate_current_task_inputs(self): 40 | inputTasks = [self._analysisTask.dataSet.load_analysis_task(x) 41 | for x in self._analysisTask.get_dependencies()] 42 | if len(inputTasks) > 0: 43 | inputString = ','.join(['ancient(' + self._add_quotes( 44 | x.dataSet.analysis_done_filename(x)) + ')' 45 | for x in inputTasks]) 46 | else: 47 | inputString = '' 48 | 49 | return self._clean_string(inputString) 50 | 51 | def _generate_message(self) -> str: 52 | messageString = \ 53 | ''.join(['Running ', self._analysisTask.get_analysis_name()]) 54 | if isinstance(self._analysisTask, analysistask.ParallelAnalysisTask): 55 | messageString += ' {wildcards.i}' 56 | return self._add_quotes(messageString) 57 | 58 | def _base_shell_command(self) -> str: 59 | if self._pythonPath is None: 60 | shellString = 'python ' 61 | else: 62 | shellString = self._clean_string(self._pythonPath) + ' ' 63 | shellString += ''.join( 64 | ['-m merlin -t ', 65 | self._clean_string(self._analysisTask.analysisName), 66 | ' -e \"', 67 | self._clean_string(self._analysisTask.dataSet.dataHome), '\"', 68 | ' -s \"', 69 | self._clean_string(self._analysisTask.dataSet.analysisHome), 70 | '\"']) 71 | return shellString 72 | 73 | def _generate_shell(self) -> str: 74 | shellString = self._base_shell_command() 75 | if isinstance(self._analysisTask, analysistask.ParallelAnalysisTask): 76 | shellString += ' -i {wildcards.i}' 77 | shellString += ' ' + self._clean_string( 78 | self._analysisTask.dataSet.dataSetName) 79 | return self._add_quotes(shellString) 80 | 81 | def _generate_done_shell(self) -> str: 82 | """ Check done shell command for parallel analysis tasks 83 | """ 84 | shellString = self._base_shell_command() 85 | shellString += ' --check-done' 86 | shellString += ' ' + self._clean_string( 87 | self._analysisTask.dataSet.dataSetName) 88 | return self._add_quotes(shellString) 89 | 90 | def as_string(self) -> str: 91 | fullString = ('rule %s:\n\tinput: %s\n\toutput: %s\n\tmessage: %s\n\t' 92 | + 'shell: %s\n\n') \ 93 | % (self._analysisTask.get_analysis_name(), 94 | self._generate_current_task_inputs(), 95 | self._generate_output(), 96 | self._generate_message(), self._generate_shell()) 97 | # for parallel tasks, add a second snakemake task to reduce the time 98 | # it takes to generate DAGs 99 | if isinstance(self._analysisTask, analysistask.ParallelAnalysisTask): 100 | fullString += \ 101 | ('rule %s:\n\tinput: %s\n\toutput: %s\n\tmessage: %s\n\t' 102 | + 'shell: %s\n\n')\ 103 | % (self._analysisTask.get_analysis_name() + 'Done', 104 | self._clean_string(self._expand_as_string( 105 | self._analysisTask, 106 | self._analysisTask.fragment_count())), 107 | self._add_quotes(self._clean_string( 108 | self._analysisTask.dataSet.analysis_done_filename( 109 | self._analysisTask))), 110 | self._add_quotes( 111 | 'Checking %s done' % self._analysisTask.analysisName), 112 | self._generate_done_shell()) 113 | return fullString 114 | 115 | def full_output(self) -> str: 116 | if isinstance(self._analysisTask, analysistask.ParallelAnalysisTask): 117 | return self._clean_string(self._expand_as_string( 118 | self._analysisTask.get_analysis_name(), 119 | self._analysisTask.fragment_count())) 120 | else: 121 | return self._clean_string( 122 | self._add_quotes( 123 | self._analysisTask.dataSet.analysis_done_filename( 124 | self._analysisTask))) 125 | 126 | 127 | class SnakefileGenerator(object): 128 | 129 | def __init__(self, analysisParameters, dataSet: dataset.DataSet, 130 | pythonPath: str = None): 131 | self._analysisParameters = analysisParameters 132 | self._dataSet = dataSet 133 | self._pythonPath = pythonPath 134 | 135 | def _parse_parameters(self): 136 | analysisTasks = {} 137 | for tDict in self._analysisParameters['analysis_tasks']: 138 | analysisModule = importlib.import_module(tDict['module']) 139 | analysisClass = getattr(analysisModule, tDict['task']) 140 | analysisParameters = tDict.get('parameters') 141 | analysisName = tDict.get('analysis_name') 142 | newTask = analysisClass( 143 | self._dataSet, analysisParameters, analysisName) 144 | if newTask.get_analysis_name() in analysisTasks: 145 | raise Exception('Analysis tasks must have unique names. ' + 146 | newTask.get_analysis_name() + ' is redundant.') 147 | # TODO This should be more careful to not overwrite an existing 148 | # analysis task that has already been run. 149 | newTask.save() 150 | analysisTasks[newTask.get_analysis_name()] = newTask 151 | return analysisTasks 152 | 153 | def _identify_terminal_tasks(self, analysisTasks): 154 | taskGraph = networkx.DiGraph() 155 | for x in analysisTasks.keys(): 156 | taskGraph.add_node(x) 157 | 158 | for x, a in analysisTasks.items(): 159 | for d in a.get_dependencies(): 160 | taskGraph.add_edge(d, x) 161 | 162 | return [k for k, v in taskGraph.out_degree if v == 0] 163 | 164 | def generate_workflow(self) -> str: 165 | """Generate a snakemake workflow for the analysis parameters 166 | of this SnakemakeGenerator and save the workflow into the dataset. 167 | 168 | Returns: 169 | the path to the generated snakemake workflow 170 | """ 171 | analysisTasks = self._parse_parameters() 172 | terminalTasks = self._identify_terminal_tasks(analysisTasks) 173 | 174 | ruleList = {k: SnakemakeRule(v, self._pythonPath) 175 | for k, v in analysisTasks.items()} 176 | 177 | workflowString = 'rule all: \n\tinput: ' + \ 178 | ','.join([ruleList[x].full_output() 179 | for x in terminalTasks]) + '\n\n' 180 | workflowString += '\n'.join([x.as_string() for x in ruleList.values()]) 181 | 182 | return self._dataSet.save_workflow(workflowString) 183 | -------------------------------------------------------------------------------- /merlin/util/watershed.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | from scipy import ndimage 4 | from skimage import morphology 5 | from skimage import filters 6 | from skimage import measure 7 | from pyclustering.cluster import kmedoids 8 | from typing import Tuple 9 | 10 | from merlin.util import matlab 11 | 12 | """ 13 | This module contains utility functions for preparing imagmes for 14 | watershed segmentation. 15 | """ 16 | 17 | # To match Matlab's strel('disk', 20) 18 | diskStruct = morphology.diamond(28)[9:48, 9:48] 19 | 20 | 21 | def extract_seeds(seedImageStackIn: np.ndarray) -> np.ndarray: 22 | """Determine seed positions from the input images. 23 | 24 | The initial seeds are determined by finding the regional intensity maximums 25 | after erosion and filtering with an adaptive threshold. These initial 26 | seeds are then expanded by dilation. 27 | 28 | Args: 29 | seedImageStackIn: a 3 dimensional numpy array arranged as (z,x,y) 30 | Returns: a boolean numpy array with the same dimensions as seedImageStackIn 31 | where a given (z,x,y) coordinate is True if it corresponds to a seed 32 | position and false otherwise. 33 | """ 34 | seedImages = seedImageStackIn.copy() 35 | 36 | seedImages = ndimage.grey_erosion( 37 | seedImages, 38 | footprint=ndimage.morphology.generate_binary_structure(3, 1)) 39 | seedImages = np.array([cv2.erode(x, diskStruct, 40 | borderType=cv2.BORDER_REFLECT) 41 | for x in seedImages]) 42 | 43 | thresholdFilterSize = int(2 * np.floor(seedImages.shape[1] / 16) + 1) 44 | seedMask = np.array([x < 1.1 * filters.threshold_local( 45 | x, thresholdFilterSize, method='mean', mode='nearest') 46 | for x in seedImages]) 47 | 48 | seedImages[seedMask] = 0 49 | 50 | seeds = morphology.local_maxima(seedImages, allow_borders=True) 51 | 52 | seeds = ndimage.morphology.binary_dilation( 53 | seeds, structure=ndimage.morphology.generate_binary_structure(3, 1)) 54 | seeds = np.array([ndimage.morphology.binary_dilation( 55 | x, structure=morphology.diamond(28)[9:48, 9:48]) for x in seeds]) 56 | 57 | return seeds 58 | 59 | 60 | def separate_merged_seeds(seedsIn: np.ndarray) -> np.ndarray: 61 | """Separate seeds that are merged in 3 dimensions but are separated 62 | in some 2 dimensional slices. 63 | 64 | Args: 65 | seedsIn: a 3 dimensional binary numpy array arranged as (z,x,y) where 66 | True indicates the pixel corresponds with a seed. 67 | Returns: a 3 dimensional binary numpy array of the same size as seedsIn 68 | indicating the positions of seeds after processing. 69 | """ 70 | 71 | def create_region_image(shape, c): 72 | region = np.zeros(shape) 73 | for x in c.coords: 74 | region[x[0], x[1], x[2]] = 1 75 | return region 76 | 77 | components = measure.regionprops(measure.label(seedsIn)) 78 | seeds = np.zeros(seedsIn.shape) 79 | for c in components: 80 | seedImage = create_region_image(seeds.shape, c) 81 | localProps = [measure.regionprops(measure.label(x)) for x in seedImage] 82 | seedCounts = [len(x) for x in localProps] 83 | 84 | if all([x < 2 for x in seedCounts]): 85 | goodFrames = [i for i, x in enumerate(seedCounts) if x == 1] 86 | goodProperties = [y for x in goodFrames for y in localProps[x]] 87 | seedPositions = np.round([np.median( 88 | [x.centroid for x in goodProperties], axis=0)]).astype(int) 89 | else: 90 | goodFrames = [i for i, x in enumerate(seedCounts) if x > 1] 91 | goodProperties = [y for x in goodFrames for y in localProps[x]] 92 | goodCentroids = [x.centroid for x in goodProperties] 93 | km = kmedoids.kmedoids( 94 | goodCentroids, 95 | np.random.choice(np.arange(len(goodCentroids)), 96 | size=np.max(seedCounts))) 97 | km.process() 98 | seedPositions = np.round( 99 | [goodCentroids[x] for x in km.get_medoids()]).astype(int) 100 | 101 | for s in seedPositions: 102 | for f in goodFrames: 103 | seeds[f, s[0], s[1]] = 1 104 | 105 | seeds = ndimage.morphology.binary_dilation( 106 | seeds, structure=ndimage.morphology.generate_binary_structure(3, 1)) 107 | seeds = np.array([ndimage.morphology.binary_dilation( 108 | x, structure=diskStruct) for x in seeds]) 109 | 110 | return seeds 111 | 112 | 113 | def prepare_watershed_images(watershedImageStack: np.ndarray 114 | ) -> Tuple[np.ndarray, np.ndarray]: 115 | """Prepare the given images as the input image for watershedding. 116 | 117 | A watershed mask is determined using an adaptive threshold and the watershed 118 | images are inverted so the largest values in the watershed images become 119 | minima and then the image stack is normalized to have values between 0 120 | and 1. 121 | 122 | Args: 123 | watershedImageStack: a 3 dimensional numpy array containing the images 124 | arranged as (z, x, y). 125 | Returns: a tuple containing the normalized watershed images and the 126 | calculated watershed mask 127 | """ 128 | filterSize = int(2 * np.floor(watershedImageStack.shape[1] / 16) + 1) 129 | 130 | watershedMask = np.array([ndimage.morphology.binary_fill_holes( 131 | x > 1.1 * filters.threshold_local(x, filterSize, method='mean', 132 | mode='nearest')) 133 | for x in watershedImageStack]) 134 | 135 | normalizedWatershed = 1 - (watershedImageStack 136 | - np.min(watershedImageStack)) / \ 137 | (np.max(watershedImageStack) 138 | - np.min(watershedImageStack)) 139 | normalizedWatershed[np.invert(watershedMask)] = 1 140 | 141 | return normalizedWatershed, watershedMask 142 | -------------------------------------------------------------------------------- /merlin/view/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/emanuega/MERlin/be3c994ef8fa97fbd3afb85705d8ddbed12118cf/merlin/view/__init__.py -------------------------------------------------------------------------------- /merlin/view/__main__.py: -------------------------------------------------------------------------------- 1 | from .merlinview import merlin_view 2 | 3 | merlin_view() 4 | -------------------------------------------------------------------------------- /merlin/view/merlinview.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import argparse 3 | 4 | from PyQt5 import QtWidgets 5 | 6 | from merlin.core import dataset 7 | from merlin.util import binary 8 | from merlin.view.widgets import regionview 9 | 10 | temp = '180710_HAECs_NoFlow_HAEC2\Sample1' 11 | 12 | def build_parser(): 13 | parser = argparse.ArgumentParser() 14 | 15 | parser.add_argument('-d', '--data-set', required=True) 16 | 17 | return parser 18 | 19 | def merlin_view(): 20 | print('MERlinView - MERFISH data exploration software') 21 | parser = build_parser() 22 | args, argv = parser.parse_known_args() 23 | 24 | data = dataset.MERFISHDataSet(args.data_set) 25 | wTask = data.load_analysis_task('FiducialCorrelationWarp') 26 | dTask = data.load_analysis_task('DeconvolutionPreprocess') 27 | fTask = data.load_analysis_task('StrictFilterBarcodes') 28 | 29 | app = QtWidgets.QApplication([]) 30 | 31 | frame = QtWidgets.QFrame() 32 | window = QtWidgets.QMainWindow() 33 | window.setCentralWidget(frame) 34 | window.resize(1000,1000) 35 | layout = QtWidgets.QGridLayout(frame) 36 | layout.addWidget(regionview.RegionViewWidget( 37 | wTask, fTask.get_barcode_database(), data)) 38 | 39 | 40 | window.show() 41 | sys.exit(app.exec_()) 42 | -------------------------------------------------------------------------------- /merlin/view/widgets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/emanuega/MERlin/be3c994ef8fa97fbd3afb85705d8ddbed12118cf/merlin/view/widgets/__init__.py -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | setuptools>=36.0.0 2 | urllib3 3 | python-dotenv>=0.8.2 4 | pandas>=0.23.4 5 | tifffile>=0.14.0 6 | opencv-python>=3 7 | scikit-image>=0.15.0 8 | scikit-learn>=0.19.0 9 | numpy>1.16.0 10 | scipy>=1.2 11 | matplotlib 12 | networkx 13 | rtree 14 | shapely<1.7a2 15 | seaborn>=0.9.0 16 | pyqt5 17 | Sphinx 18 | sphinx-rtd-theme 19 | pyclustering 20 | pytest 21 | pytest-cov 22 | h5py>=1.8.15 23 | numexpr>=2.6.2 24 | cython>=0.21 25 | snakemake 26 | requests>=2.18.0 27 | tables 28 | boto3 29 | xmltodict 30 | google-cloud-storage 31 | docutils<0.16,>=0.10 -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | import setuptools 3 | 4 | CLASSIFIERS = [ 5 | "Development Status :: 4 - Beta", 6 | "Natural Language :: English", 7 | "Operating System :: POSIX", 8 | "Operating System :: Unix", 9 | "Operating System :: MacOS :: MacOS X", 10 | "License :: Restricted use", 11 | "Programming Language :: Python :: 3.6", 12 | "Topic :: Scientific/Engineering :: Bio-Informatics", 13 | ] 14 | 15 | install_requires = [line.rstrip() for line in open( 16 | os.path.join(os.path.dirname(__file__), "requirements.txt"))] 17 | 18 | setuptools.setup( 19 | name="merlin", 20 | version="0.1.6", 21 | description="MERFISH decoding software", 22 | author="George Emanuel", 23 | author_email="emanuega0@gmail.com", 24 | license="Restricted use", 25 | packages=setuptools.find_packages(), 26 | install_requires=install_requires, 27 | entry_points={ 28 | 'console_scripts': ["merlin=merlin.merlin:merlin"] 29 | }, 30 | classifiers=CLASSIFIERS 31 | ) 32 | -------------------------------------------------------------------------------- /test/auxiliary_files/test.dax: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/emanuega/MERlin/be3c994ef8fa97fbd3afb85705d8ddbed12118cf/test/auxiliary_files/test.dax -------------------------------------------------------------------------------- /test/auxiliary_files/test.inf: -------------------------------------------------------------------------------- 1 | binning = 1 x 1 2 | data type = 16 bit integers (binary, big endian) 3 | frame dimensions = 256 x 256 4 | number of frames = 10 5 | Lock Target = 0.0 6 | x_start = 1 7 | x_end = 256 8 | y_start = 1 9 | y_end = 256 10 | -------------------------------------------------------------------------------- /test/auxiliary_files/test_0_0.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/emanuega/MERlin/be3c994ef8fa97fbd3afb85705d8ddbed12118cf/test/auxiliary_files/test_0_0.tif -------------------------------------------------------------------------------- /test/auxiliary_files/test_0_1.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/emanuega/MERlin/be3c994ef8fa97fbd3afb85705d8ddbed12118cf/test/auxiliary_files/test_0_1.tif -------------------------------------------------------------------------------- /test/auxiliary_files/test_0_2.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/emanuega/MERlin/be3c994ef8fa97fbd3afb85705d8ddbed12118cf/test/auxiliary_files/test_0_2.tif -------------------------------------------------------------------------------- /test/auxiliary_files/test_0_3.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/emanuega/MERlin/be3c994ef8fa97fbd3afb85705d8ddbed12118cf/test/auxiliary_files/test_0_3.tif -------------------------------------------------------------------------------- /test/auxiliary_files/test_0_4.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/emanuega/MERlin/be3c994ef8fa97fbd3afb85705d8ddbed12118cf/test/auxiliary_files/test_0_4.tif -------------------------------------------------------------------------------- /test/auxiliary_files/test_0_5.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/emanuega/MERlin/be3c994ef8fa97fbd3afb85705d8ddbed12118cf/test/auxiliary_files/test_0_5.tif -------------------------------------------------------------------------------- /test/auxiliary_files/test_0_6.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/emanuega/MERlin/be3c994ef8fa97fbd3afb85705d8ddbed12118cf/test/auxiliary_files/test_0_6.tif -------------------------------------------------------------------------------- /test/auxiliary_files/test_0_7.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/emanuega/MERlin/be3c994ef8fa97fbd3afb85705d8ddbed12118cf/test/auxiliary_files/test_0_7.tif -------------------------------------------------------------------------------- /test/auxiliary_files/test_1_0.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/emanuega/MERlin/be3c994ef8fa97fbd3afb85705d8ddbed12118cf/test/auxiliary_files/test_1_0.tif -------------------------------------------------------------------------------- /test/auxiliary_files/test_1_1.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/emanuega/MERlin/be3c994ef8fa97fbd3afb85705d8ddbed12118cf/test/auxiliary_files/test_1_1.tif -------------------------------------------------------------------------------- /test/auxiliary_files/test_1_2.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/emanuega/MERlin/be3c994ef8fa97fbd3afb85705d8ddbed12118cf/test/auxiliary_files/test_1_2.tif -------------------------------------------------------------------------------- /test/auxiliary_files/test_1_3.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/emanuega/MERlin/be3c994ef8fa97fbd3afb85705d8ddbed12118cf/test/auxiliary_files/test_1_3.tif -------------------------------------------------------------------------------- /test/auxiliary_files/test_1_4.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/emanuega/MERlin/be3c994ef8fa97fbd3afb85705d8ddbed12118cf/test/auxiliary_files/test_1_4.tif -------------------------------------------------------------------------------- /test/auxiliary_files/test_1_5.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/emanuega/MERlin/be3c994ef8fa97fbd3afb85705d8ddbed12118cf/test/auxiliary_files/test_1_5.tif -------------------------------------------------------------------------------- /test/auxiliary_files/test_1_6.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/emanuega/MERlin/be3c994ef8fa97fbd3afb85705d8ddbed12118cf/test/auxiliary_files/test_1_6.tif -------------------------------------------------------------------------------- /test/auxiliary_files/test_1_7.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/emanuega/MERlin/be3c994ef8fa97fbd3afb85705d8ddbed12118cf/test/auxiliary_files/test_1_7.tif -------------------------------------------------------------------------------- /test/auxiliary_files/test_analysis_parameters.json: -------------------------------------------------------------------------------- 1 | { 2 | "analysis_tasks": [ 3 | { 4 | "task": "FiducialCorrelationWarp", 5 | "module": "merlin.analysis.warp", 6 | "parameters": { 7 | "write_aligned_images": true 8 | } 9 | }, 10 | { 11 | "task": "DeconvolutionPreprocess", 12 | "module": "merlin.analysis.preprocess", 13 | "parameters": { 14 | "warp_task": "FiducialCorrelationWarp" 15 | } 16 | }, 17 | { 18 | "task": "OptimizeIteration", 19 | "module": "merlin.analysis.optimize", 20 | "analysis_name": "Optimize1", 21 | "parameters": { 22 | "preprocess_task": "DeconvolutionPreprocess", 23 | "warp_task": "FiducialCorrelationWarp", 24 | "fov_per_iteration": 2, 25 | "iteration_count": 2, 26 | "optimize_chromatic_correction": false 27 | } 28 | }, 29 | { 30 | "task": "OptimizeIteration", 31 | "module": "merlin.analysis.optimize", 32 | "analysis_name": "Optimize2", 33 | "parameters": { 34 | "preprocess_task": "DeconvolutionPreprocess", 35 | "warp_task": "FiducialCorrelationWarp", 36 | "fov_per_iteration": 2, 37 | "iteration_count": 2, 38 | "optimize_chromatic_correction": false, 39 | "previous_iteration": "Optimize1" 40 | } 41 | }, 42 | { 43 | "task": "Decode", 44 | "module": "merlin.analysis.decode", 45 | "parameters": { 46 | "preprocess_task": "DeconvolutionPreprocess", 47 | "optimize_task": "Optimize2", 48 | "global_align_task": "SimpleGlobalAlignment", 49 | "crop_width": 10, 50 | "remove_z_duplicated_barcodes": true, 51 | "z_duplicate_zPlane_threshold": 1, 52 | "z_duplicate_xy_pixel_threshold": 1.414 53 | } 54 | }, 55 | { 56 | "task": "SimpleGlobalAlignment", 57 | "module": "merlin.analysis.globalalign" 58 | }, 59 | { 60 | "task": "GenerateMosaic", 61 | "module": "merlin.analysis.generatemosaic", 62 | "parameters": { 63 | "global_align_task": "SimpleGlobalAlignment", 64 | "warp_task": "FiducialCorrelationWarp" 65 | } 66 | }, 67 | { 68 | "task": "FilterBarcodes", 69 | "module": "merlin.analysis.filterbarcodes", 70 | "parameters": { 71 | "decode_task": "Decode", 72 | "area_threshold": 5, 73 | "intensity_threshold": 1 74 | } 75 | }, 76 | { 77 | "task": "GenerateAdaptiveThreshold", 78 | "module": "merlin.analysis.filterbarcodes", 79 | "parameters": { 80 | "decode_task": "Decode", 81 | "run_after_task": "Decode" 82 | } 83 | }, 84 | { 85 | "task": "AdaptiveFilterBarcodes", 86 | "module": "merlin.analysis.filterbarcodes", 87 | "parameters": { 88 | "decode_task": "Decode", 89 | "adaptive_task": "GenerateAdaptiveThreshold" 90 | } 91 | }, 92 | { 93 | "task": "ExportBarcodes", 94 | "module": "merlin.analysis.exportbarcodes", 95 | "parameters": { 96 | "filter_task": "FilterBarcodes" 97 | } 98 | }, 99 | { 100 | "task": "PlotPerformance", 101 | "module": "merlin.analysis.plotperformance", 102 | "parameters": { 103 | "preprocess_task": "DeconvolutionPreprocess", 104 | "optimize_task": "Optimize2", 105 | "decode_task": "Decode", 106 | "filter_task": "AdaptiveFilterBarcodes", 107 | "global_align_task": "SimpleGlobalAlignment" 108 | } 109 | }, 110 | { 111 | "task": "WatershedSegment", 112 | "module": "merlin.analysis.segment", 113 | "parameters": { 114 | "warp_task": "FiducialCorrelationWarp", 115 | "global_align_task": "SimpleGlobalAlignment" 116 | } 117 | }, 118 | { 119 | "task": "CleanCellBoundaries", 120 | "module": "merlin.analysis.segment", 121 | "parameters": { 122 | "segment_task": "WatershedSegment", 123 | "global_align_task": "SimpleGlobalAlignment" 124 | } 125 | }, 126 | { 127 | "task": "CombineCleanedBoundaries", 128 | "module": "merlin.analysis.segment", 129 | "parameters": { 130 | "cleaning_task": "CleanCellBoundaries" 131 | } 132 | }, 133 | { 134 | "task": "RefineCellDatabases", 135 | "module": "merlin.analysis.segment", 136 | "parameters": { 137 | "segment_task": "WatershedSegment", 138 | "combine_cleaning_task": "CombineCleanedBoundaries" 139 | } 140 | }, 141 | { 142 | "task": "PartitionBarcodes", 143 | "module": "merlin.analysis.partition", 144 | "parameters": { 145 | "filter_task": "AdaptiveFilterBarcodes", 146 | "assignment_task": "RefineCellDatabases", 147 | "alignment_task": "SimpleGlobalAlignment" 148 | } 149 | }, 150 | { 151 | "task": "ExportPartitionedBarcodes", 152 | "module": "merlin.analysis.partition", 153 | "parameters": { 154 | "partition_task": "PartitionBarcodes" 155 | } 156 | }, 157 | { 158 | "task": "ExportCellMetadata", 159 | "module": "merlin.analysis.segment", 160 | "parameters": { 161 | "segment_task": "RefineCellDatabases" 162 | } 163 | }, 164 | { 165 | "task": "SumSignal", 166 | "module": "merlin.analysis.sequential", 167 | "parameters": { 168 | "z_index": 0, 169 | "apply_highpass": true, 170 | "warp_task": "FiducialCorrelationWarp", 171 | "highpass_sigma": 5, 172 | "segment_task": "RefineCellDatabases", 173 | "global_align_task": "SimpleGlobalAlignment" 174 | } 175 | }, 176 | { 177 | "task": "ExportSumSignals", 178 | "module": "merlin.analysis.sequential", 179 | "parameters": { 180 | "sequential_task": "SumSignal" 181 | } 182 | } 183 | 184 | ] 185 | 186 | } 187 | -------------------------------------------------------------------------------- /test/auxiliary_files/test_codebook.csv: -------------------------------------------------------------------------------- 1 | name,id,bit1,bit2,bit3,bit4,bit5,bit6,bit7,bit8,bit9,bit10,bit11,bit12,bit13,bit14,bit15,bit16 2 | STMN1,ENST00000465604.1,0,0,1,1,1,0,0,0,0,0,0,0,1,0,0,0 3 | DHCR24,ENST00000535035.5,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0 4 | VCAM1,ENST00000370115.1,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0,0 5 | SELL,ENST00000236147.4,0,0,1,1,0,1,0,0,0,1,0,0,0,0,0,0 6 | PTGS2,ENST00000367468.9,0,0,1,0,0,1,0,0,0,0,0,0,1,1,0,0 7 | PALD1,ENST00000263563.6,0,0,1,0,0,0,1,0,0,0,1,0,1,0,0,0 8 | PLAU,ENST00000446342.5,1,0,0,0,1,0,1,0,0,0,0,0,1,0,0,0 9 | PPP1R3C,ENST00000238994.5,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0 10 | SERPINH1,ENST00000524558.5,0,0,0,1,0,0,0,0,0,1,0,0,1,1,0,0 11 | HYOU1,ENST00000617285.4,0,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0 12 | A2M,ENST00000318602.11,0,1,1,0,1,0,0,0,1,0,0,0,0,0,0,0 13 | LMO7,ENST00000377499.9,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1,0 14 | MMP14,ENST00000311852.10,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0,0 15 | LTB4R,ENST00000396789.4,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,1 16 | VASH1,ENST00000167106.8,0,0,0,1,1,1,0,0,0,0,0,0,0,1,0,0 17 | FBN1,ENST00000316623.9,0,1,0,1,0,0,0,0,1,0,0,0,1,0,0,0 18 | SMAD6,ENST00000612349.1,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,1 19 | ITGA11,ENST00000423218.6,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,1 20 | CX3CL1,ENST00000006053.6,0,0,0,0,1,0,0,0,1,0,0,0,1,0,1,0 21 | MYH10,ENST00000269243.8,0,0,0,1,0,0,0,0,0,0,0,1,1,0,1,0 22 | ITGB4,ENST00000580542.5,0,1,0,0,1,0,0,0,0,0,0,1,1,0,0,0 23 | BCL2,ENST00000333681.4,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1 24 | ICAM1,ENST00000264832.7,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1 25 | ICAM3,ENST00000587992.1,0,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0 26 | FOSB,ENST00000591858.5,1,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0 27 | PLA2G4C,ENST00000599111.5,0,0,1,0,1,0,0,0,0,0,0,1,0,0,1,0 28 | RPS7,ENST00000481006.1,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,1 29 | ADAM17,ENST00000310823.7,0,0,1,1,0,0,0,0,1,0,0,0,0,0,1,0 30 | LIMS1,ENST00000544547.5,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0 31 | ITGAV,ENST00000433736.6,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,0 32 | COL5A2,ENST00000374866.7,0,1,1,1,0,0,0,0,0,0,0,1,0,0,0,0 33 | CASP10,ENST00000360132.7,0,0,0,1,1,0,1,0,0,0,1,0,0,0,0,0 34 | HSPA12B,ENST00000399701.1,0,0,1,0,0,0,0,0,1,0,0,1,1,0,0,0 35 | BCL2L1,ENST00000307677.4,0,0,0,1,0,0,1,1,0,0,0,0,1,0,0,0 36 | PPP1R16B,ENST00000373331.2,1,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0 37 | PTGIS,ENST00000244043.4,1,0,1,0,1,0,0,0,0,0,1,0,0,0,0,0 38 | TCN2,ENST00000215838.7,0,0,1,0,1,0,1,1,0,0,0,0,0,0,0,0 39 | STAB1,ENST00000461325.1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,1 40 | FAM107A,ENST00000360997.6,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,1 41 | MRAS,ENST00000621127.4,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,1 42 | BCL6,ENST00000406870.6,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1 43 | ATP13A3,ENST00000256031.8,0,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0 44 | MFI2,ENST00000296350.9,0,0,0,0,1,1,1,0,0,0,0,0,0,0,1,0 45 | LIMCH1,ENST00000511496.5,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1,0 46 | PDGFRA,ENST00000257290.9,0,1,0,1,0,0,0,0,0,0,1,0,0,1,0,0 47 | ENPEP,ENST00000265162.9,0,0,0,1,0,1,0,1,1,0,0,0,0,0,0,0 48 | FGF2,ENST00000608478.1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1 49 | IL15,ENST00000477265.5,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0 50 | FAM198B,ENST00000585682.5,1,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0 51 | CASP3,ENST00000308394.8,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0 52 | OCLN,ENST00000355237.6,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0 53 | LOX,ENST00000231004.4,1,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0 54 | FLT4,ENST00000393347.7,1,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0 55 | RIPK1,ENST00000259808.8,0,0,0,0,0,0,0,0,1,0,1,0,1,1,0,0 56 | SOX4,ENST00000244745.2,0,0,1,0,0,0,1,0,1,0,0,0,0,1,0,0 57 | ABCF1,ENST00000376545.7,0,0,1,0,0,0,0,0,0,0,1,1,0,1,0,0 58 | TNXB,ENST00000375244.7,0,1,1,0,0,0,0,1,0,0,0,0,0,1,0,0 59 | ARHGAP18,ENST00000368149.2,1,1,0,1,0,0,0,0,0,1,0,0,0,0,0,0 60 | TNFAIP3,ENST00000620204.3,0,0,0,0,0,0,0,1,0,1,0,1,1,0,0,0 61 | PDIA4,ENST00000286091.8,1,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0 62 | NOS3,ENST00000297494.7,0,1,0,0,0,1,0,1,0,0,0,0,1,0,0,0 63 | ANGPT2,ENST00000325203.9,0,0,0,0,0,0,1,0,0,1,0,0,1,0,1,0 64 | LOXL2,ENST00000389131.7,0,1,0,0,0,0,0,0,0,1,1,0,1,0,0,0 65 | STC1,ENST00000290271.6,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1 66 | PLAT,ENST00000352041.7,0,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0 67 | ZNF704,ENST00000327835.7,0,0,0,0,0,1,1,0,1,0,0,0,1,0,0,0 68 | NOV,ENST00000259526.3,1,0,0,1,0,1,0,0,0,0,0,1,0,0,0,0 69 | C9orf3,ENST00000297979.9,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,1 70 | RGS3,ENST00000317613.10,0,0,0,1,0,0,0,1,0,1,0,0,0,0,1,0 71 | COL5A1,ENST00000371817.7,0,0,0,0,1,0,1,0,1,1,0,0,0,0,0,0 72 | Blank-01,nan,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,1 73 | Blank-02,nan,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0 74 | Blank-03,nan,0,0,0,0,1,0,0,1,1,0,0,0,0,1,0,0 75 | Blank-04,nan,0,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0 76 | Blank-05,nan,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,1 77 | Blank-06,nan,0,0,0,1,0,0,0,1,0,0,0,1,0,1,0,0 78 | Blank-07,nan,0,0,1,0,0,0,0,1,1,1,0,0,0,0,0,0 79 | Blank-08,nan,0,0,0,0,1,0,0,0,0,0,1,0,0,1,1,0 80 | Blank-09,nan,0,0,1,0,0,1,1,0,0,0,0,1,0,0,0,0 81 | Blank-10,nan,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,1 82 | Blank-11,nan,1,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1 83 | Blank-12,nan,0,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1 84 | Blank-13,nan,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1 85 | Blank-14,nan,0,1,1,0,0,0,1,0,0,1,0,0,0,0,0,0 86 | Blank-15,nan,0,0,0,0,1,0,0,0,0,1,1,1,0,0,0,0 87 | Blank-16,nan,0,0,0,0,1,1,0,1,0,0,0,1,0,0,0,0 88 | Blank-17,nan,1,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0 89 | Blank-18,nan,0,1,0,1,0,1,1,0,0,0,0,0,0,0,0,0 90 | Blank-19,nan,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1 91 | Blank-20,nan,0,0,0,0,0,0,0,1,0,0,0,0,1,1,1,0 92 | Blank-21,nan,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1 93 | Blank-22,nan,1,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0 94 | Blank-23,nan,0,0,0,1,0,0,0,0,1,1,1,0,0,0,0,0 95 | Blank-24,nan,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0 96 | Blank-25,nan,0,0,0,1,0,0,1,0,0,0,0,0,0,1,1,0 97 | Blank-26,nan,1,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0 98 | Blank-27,nan,0,1,0,0,1,0,1,0,0,0,0,0,0,1,0,0 99 | Blank-28,nan,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,0 100 | Blank-29,nan,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,1 101 | Blank-30,nan,1,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0 102 | Blank-31,nan,1,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0 103 | Blank-32,nan,1,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1 104 | Blank-33,nan,0,1,0,0,0,0,0,0,1,0,0,1,0,0,1,0 105 | Blank-34,nan,0,1,0,0,0,1,0,0,0,1,0,1,0,0,0,0 106 | Blank-35,nan,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0 107 | Blank-36,nan,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,1 108 | Blank-37,nan,1,0,0,0,0,1,0,1,0,0,0,0,0,1,0,0 109 | Blank-38,nan,0,0,0,0,0,0,0,1,1,0,1,0,0,0,1,0 110 | Blank-39,nan,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,1 111 | Blank-40,nan,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,1 112 | Blank-41,nan,1,1,0,0,0,0,0,1,0,0,0,0,0,0,1,0 113 | Blank-42,nan,1,0,0,0,0,0,0,1,0,1,0,0,0,0,0,1 114 | Blank-43,nan,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,1 115 | Blank-44,nan,1,0,0,0,0,0,0,0,0,1,1,0,0,1,0,0 116 | Blank-45,nan,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,1 117 | Blank-46,nan,0,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0 118 | Blank-47,nan,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,1 119 | Blank-48,nan,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,0 120 | Blank-49,nan,1,0,0,0,0,0,0,0,0,0,1,1,0,0,1,0 121 | Blank-50,nan,0,0,0,0,0,0,0,0,0,1,0,1,0,1,1,0 122 | Blank-51,nan,0,0,0,0,0,1,0,1,0,1,1,0,0,0,0,0 123 | Blank-52,nan,0,0,0,0,0,0,1,0,1,0,1,1,0,0,0,0 124 | Blank-53,nan,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0 125 | Blank-54,nan,1,0,0,0,0,0,0,1,1,0,0,1,0,0,0,0 126 | Blank-55,nan,0,0,0,0,0,1,0,0,1,1,0,0,0,0,1,0 127 | Blank-56,nan,0,0,0,0,0,1,0,0,1,0,0,1,0,1,0,0 128 | Blank-57,nan,1,1,0,0,0,0,0,0,1,0,1,0,0,0,0,0 129 | Blank-58,nan,0,1,0,0,0,0,0,0,0,0,0,1,0,1,0,1 130 | Blank-59,nan,0,1,0,0,0,0,1,1,1,0,0,0,0,0,0,0 131 | Blank-60,nan,1,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0 132 | Blank-61,nan,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1 133 | Blank-62,nan,1,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0 134 | Blank-63,nan,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,0 135 | Blank-64,nan,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1 136 | Blank-65,nan,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,1 137 | Blank-66,nan,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,1 138 | Blank-67,nan,0,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0 139 | Blank-68,nan,0,0,0,0,0,0,1,1,0,1,0,0,0,1,0,0 140 | Blank-69,nan,0,1,0,0,0,0,0,1,0,0,1,1,0,0,0,0 141 | Blank-70,nan,1,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0 142 | -------------------------------------------------------------------------------- /test/auxiliary_files/test_codebook2.csv: -------------------------------------------------------------------------------- 1 | name,id,bit1,bit2,bit3,bit4,bit5,bit6,bit7,bit8,bit9,bit10,bit11,bit12,bit13,bit14,bit15,bit16 2 | STMN1,ENST00000465604.1,0,0,1,1,1,0,0,0,0,0,0,0,1,0,0,0 3 | DHCR24,ENST00000535035.5,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0 4 | VCAM1,ENST00000370115.1,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0,0 5 | SELL,ENST00000236147.4,0,0,1,1,0,1,0,0,0,1,0,0,0,0,0,0 6 | PTGS2,ENST00000367468.9,0,0,1,0,0,1,0,0,0,0,0,0,1,1,0,0 7 | PALD1,ENST00000263563.6,0,0,1,0,0,0,1,0,0,0,1,0,1,0,0,0 8 | PLAU,ENST00000446342.5,1,0,0,0,1,0,1,0,0,0,0,0,1,0,0,0 9 | PPP1R3C,ENST00000238994.5,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0 10 | SERPINH1,ENST00000524558.5,0,0,0,1,0,0,0,0,0,1,0,0,1,1,0,0 11 | HYOU1,ENST00000617285.4,0,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0 12 | -------------------------------------------------------------------------------- /test/auxiliary_files/test_data_organization.csv: -------------------------------------------------------------------------------- 1 | channelName,readoutName,imageType,imageRegExp,bitNumber,imagingRound,color,frame,zPos,fiducialImageType,fiducialRegExp,fiducialImagingRound,fiducialFrame,fiducialColor 2 | bit1,bit1,test,(?P[\w|-]+)_(?P[0-9]+)_(?P[0-9]+),1,0,650,1,0,test,(?P[\w|-]+)_(?P[0-9]+)_(?P[0-9]+),0,2,561 3 | bit2,bit2,test,(?P[\w|-]+)_(?P[0-9]+)_(?P[0-9]+),2,0,750,0,0,test,(?P[\w|-]+)_(?P[0-9]+)_(?P[0-9]+),0,2,561 4 | bit3,bit3,test,(?P[\w|-]+)_(?P[0-9]+)_(?P[0-9]+),3,1,750,0,0,test,(?P[\w|-]+)_(?P[0-9]+)_(?P[0-9]+),1,2,561 5 | bit4,bit4,test,(?P[\w|-]+)_(?P[0-9]+)_(?P[0-9]+),4,1,650,1,0,test,(?P[\w|-]+)_(?P[0-9]+)_(?P[0-9]+),1,2,561 6 | bit5,bit5,test,(?P[\w|-]+)_(?P[0-9]+)_(?P[0-9]+),5,2,650,1,0,test,(?P[\w|-]+)_(?P[0-9]+)_(?P[0-9]+),2,2,561 7 | bit6,bit6,test,(?P[\w|-]+)_(?P[0-9]+)_(?P[0-9]+),6,2,750,0,0,test,(?P[\w|-]+)_(?P[0-9]+)_(?P[0-9]+),2,2,561 8 | bit7,bit7,test,(?P[\w|-]+)_(?P[0-9]+)_(?P[0-9]+),7,3,650,1,0,test,(?P[\w|-]+)_(?P[0-9]+)_(?P[0-9]+),3,2,561 9 | bit8,bit8,test,(?P[\w|-]+)_(?P[0-9]+)_(?P[0-9]+),8,3,750,0,0,test,(?P[\w|-]+)_(?P[0-9]+)_(?P[0-9]+),3,2,561 10 | bit9,bit9,test,(?P[\w|-]+)_(?P[0-9]+)_(?P[0-9]+),9,4,750,0,0,test,(?P[\w|-]+)_(?P[0-9]+)_(?P[0-9]+),4,2,561 11 | bit10,bit10,test,(?P[\w|-]+)_(?P[0-9]+)_(?P[0-9]+),10,4,650,1,0,test,(?P[\w|-]+)_(?P[0-9]+)_(?P[0-9]+),4,2,561 12 | bit11,bit11,test,(?P[\w|-]+)_(?P[0-9]+)_(?P[0-9]+),11,5,650,1,0,test,(?P[\w|-]+)_(?P[0-9]+)_(?P[0-9]+),5,2,561 13 | bit12,bit12,test,(?P[\w|-]+)_(?P[0-9]+)_(?P[0-9]+),12,5,750,0,0,test,(?P[\w|-]+)_(?P[0-9]+)_(?P[0-9]+),5,2,561 14 | bit13,bit13,test,(?P[\w|-]+)_(?P[0-9]+)_(?P[0-9]+),13,6,650,1,0,test,(?P[\w|-]+)_(?P[0-9]+)_(?P[0-9]+),6,2,561 15 | bit14,bit14,test,(?P[\w|-]+)_(?P[0-9]+)_(?P[0-9]+),14,6,750,0,0,test,(?P[\w|-]+)_(?P[0-9]+)_(?P[0-9]+),6,2,561 16 | bit15,bit15,test,(?P[\w|-]+)_(?P[0-9]+)_(?P[0-9]+),15,7,750,0,0,test,(?P[\w|-]+)_(?P[0-9]+)_(?P[0-9]+),7,2,561 17 | bit16,bit16,test,(?P[\w|-]+)_(?P[0-9]+)_(?P[0-9]+),16,7,650,1,0,test,(?P[\w|-]+)_(?P[0-9]+)_(?P[0-9]+),7,2,561 18 | DAPI,cellstain,test,(?P[\w|-]+)_(?P[0-9]+)_(?P[0-9]+),48,0,488,3,0,test,(?P[\w|-]+)_(?P[0-9]+)_(?P[0-9]+),0,2,561 19 | polyT,nuclearstain,test,(?P[\w|-]+)_(?P[0-9]+)_(?P[0-9]+),47,0,405,4,0,test,(?P[\w|-]+)_(?P[0-9]+)_(?P[0-9]+),0,2,561 20 | -------------------------------------------------------------------------------- /test/auxiliary_files/test_microscope_parameters.json: -------------------------------------------------------------------------------- 1 | { 2 | "flip_horizontal": false, 3 | "flip_vertical": true, 4 | "transpose": true, 5 | "microns_per_pixel": 0.108, 6 | "image_dimensions": [128, 128] 7 | } 8 | -------------------------------------------------------------------------------- /test/auxiliary_files/test_positions.csv: -------------------------------------------------------------------------------- 1 | -2000,-2000 2 | -2000,-1805 3 | -------------------------------------------------------------------------------- /test/conftest.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pytest 3 | import shutil 4 | import glob 5 | from merlin.core import dataset 6 | from merlin.analysis import testtask 7 | import merlin 8 | 9 | 10 | root = os.path.join(os.path.dirname(merlin.__file__), '..', 'test') 11 | merlin.DATA_HOME = os.path.abspath('test_data') 12 | merlin.ANALYSIS_HOME = os.path.abspath('test_analysis') 13 | merlin.ANALYSIS_PARAMETERS_HOME = os.path.abspath('test_analysis_parameters') 14 | merlin.CODEBOOK_HOME = os.path.abspath('test_codebooks') 15 | merlin.DATA_ORGANIZATION_HOME = os.path.abspath('test_dataorganization') 16 | merlin.POSITION_HOME = os.path.abspath('test_positions') 17 | merlin.MICROSCOPE_PARAMETERS_HOME = os.path.abspath('test_microcope_parameters') 18 | 19 | 20 | dataDirectory = os.sep.join([merlin.DATA_HOME, 'test']) 21 | merfishDataDirectory = os.sep.join([merlin.DATA_HOME, 'merfish_test']) 22 | 23 | 24 | @pytest.fixture(scope='session') 25 | def base_files(): 26 | folderList = [merlin.DATA_HOME, merlin.ANALYSIS_HOME, 27 | merlin.ANALYSIS_PARAMETERS_HOME, merlin.CODEBOOK_HOME, 28 | merlin.DATA_ORGANIZATION_HOME, merlin.POSITION_HOME, 29 | merlin.MICROSCOPE_PARAMETERS_HOME] 30 | for folder in folderList: 31 | if os.path.exists(folder): 32 | shutil.rmtree(folder) 33 | os.makedirs(folder) 34 | 35 | shutil.copyfile( 36 | os.sep.join( 37 | [root, 'auxiliary_files', 'test_data_organization.csv']), 38 | os.sep.join( 39 | [merlin.DATA_ORGANIZATION_HOME, 'test_data_organization.csv'])) 40 | shutil.copyfile( 41 | os.sep.join( 42 | [root, 'auxiliary_files', 'test_codebook.csv']), 43 | os.sep.join( 44 | [merlin.CODEBOOK_HOME, 'test_codebook.csv'])) 45 | shutil.copyfile( 46 | os.sep.join( 47 | [root, 'auxiliary_files', 'test_codebook2.csv']), 48 | os.sep.join( 49 | [merlin.CODEBOOK_HOME, 'test_codebook2.csv'])) 50 | shutil.copyfile( 51 | os.sep.join( 52 | [root, 'auxiliary_files', 'test_positions.csv']), 53 | os.sep.join( 54 | [merlin.POSITION_HOME, 'test_positions.csv'])) 55 | shutil.copyfile( 56 | os.sep.join( 57 | [root, 'auxiliary_files', 'test_analysis_parameters.json']), 58 | os.sep.join( 59 | [merlin.ANALYSIS_PARAMETERS_HOME, 'test_analysis_parameters.json'])) 60 | shutil.copyfile( 61 | os.sep.join( 62 | [root, 'auxiliary_files', 'test_microscope_parameters.json']), 63 | os.sep.join( 64 | [merlin.MICROSCOPE_PARAMETERS_HOME, 65 | 'test_microscope_parameters.json'])) 66 | 67 | yield 68 | 69 | for folder in folderList: 70 | shutil.rmtree(folder) 71 | 72 | 73 | @pytest.fixture(scope='session') 74 | def merfish_files(base_files): 75 | os.mkdir(merfishDataDirectory) 76 | 77 | for imageFile in glob.iglob( 78 | os.sep.join([root, 'auxiliary_files', '*.tif'])): 79 | if os.path.isfile(imageFile): 80 | shutil.copy(imageFile, merfishDataDirectory) 81 | 82 | yield 83 | 84 | shutil.rmtree(merfishDataDirectory) 85 | 86 | 87 | @pytest.fixture(scope='session') 88 | def simple_data(base_files): 89 | os.mkdir(dataDirectory) 90 | testData = dataset.DataSet('test') 91 | 92 | yield testData 93 | 94 | shutil.rmtree(dataDirectory) 95 | 96 | 97 | @pytest.fixture(scope='session') 98 | def simple_merfish_data(merfish_files): 99 | testMERFISHData = dataset.MERFISHDataSet( 100 | 'merfish_test', 101 | dataOrganizationName='test_data_organization.csv', 102 | codebookNames=['test_codebook.csv'], 103 | positionFileName='test_positions.csv', 104 | microscopeParametersName='test_microscope_parameters.json') 105 | yield testMERFISHData 106 | 107 | 108 | @pytest.fixture(scope='session') 109 | def two_codebook_merfish_data(merfish_files): 110 | testMERFISHData = dataset.MERFISHDataSet( 111 | 'merfish_test', 112 | dataOrganizationName='test_data_organization.csv', 113 | codebookNames=['test_codebook2.csv', 'test_codebook.csv'], 114 | positionFileName='test_positions.csv', 115 | analysisHome=os.path.join(merlin.ANALYSIS_HOME, '..', 116 | 'test_analysis_two_codebook'), 117 | microscopeParametersName='test_microscope_parameters.json') 118 | yield testMERFISHData 119 | 120 | shutil.rmtree('test_analysis_two_codebook') 121 | 122 | 123 | @pytest.fixture(scope='function') 124 | def single_task(simple_data): 125 | task = testtask.SimpleAnalysisTask( 126 | simple_data, parameters={'a': 5, 'b': 'b_string'}) 127 | yield task 128 | simple_data.delete_analysis(task) 129 | 130 | 131 | @pytest.fixture(scope='function', params=[ 132 | testtask.SimpleAnalysisTask, testtask.SimpleParallelAnalysisTask, 133 | testtask.SimpleInternallyParallelAnalysisTask]) 134 | def simple_task(simple_data, request): 135 | task = request.param( 136 | simple_data, parameters={'a': 5, 'b': 'b_string'}) 137 | yield task 138 | simple_data.delete_analysis(task) 139 | 140 | 141 | @pytest.fixture(scope='function', params=[ 142 | testtask.SimpleAnalysisTask, testtask.SimpleParallelAnalysisTask, 143 | testtask.SimpleInternallyParallelAnalysisTask]) 144 | def simple_merfish_task(simple_merfish_data, request): 145 | task = request.param( 146 | simple_merfish_data, parameters={'a': 5, 'b': 'b_string'}) 147 | yield task 148 | simple_merfish_data.delete_analysis(task) 149 | -------------------------------------------------------------------------------- /test/pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | markers = 3 | slowtest: a test that takes longer than a couple seconds 4 | fullrun: a test that runs the full MERFISH decoding pipeline 5 | -------------------------------------------------------------------------------- /test/test_binary_utils.py: -------------------------------------------------------------------------------- 1 | import random 2 | import numpy as np 3 | 4 | from merlin.util import binary 5 | 6 | def test_bit_array_to_int_conversion(): 7 | for i in range(50): 8 | intIn = random.getrandbits(64) 9 | listOut = binary.int_to_bit_list(intIn, 64) 10 | intOut = binary.bit_list_to_int(listOut) 11 | assert intIn == intOut 12 | 13 | def test_flip_bit(): 14 | barcode = [random.getrandbits(1) for i in range(128)] 15 | barcodeCopy = np.copy(barcode) 16 | for i in range(len(barcode)): 17 | flippedBarcode = binary.flip_bit(barcode, i) 18 | assert np.array_equal(barcode, barcodeCopy) 19 | assert all([barcode[j] == flippedBarcode[j] \ 20 | for j in range(len(barcode)) if j != i]) 21 | assert barcode[i] == (not flippedBarcode[i]) 22 | -------------------------------------------------------------------------------- /test/test_codebook.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | 4 | from merlin.core import dataset 5 | 6 | 7 | def test_codebook_get_barcode_count(simple_merfish_data): 8 | assert simple_merfish_data.get_codebook().get_barcode_count() == 140 9 | 10 | 11 | def test_codebook_get_bit_count(simple_merfish_data): 12 | assert simple_merfish_data.get_codebook().get_bit_count() == 16 13 | 14 | 15 | def test_codebook_get_bit_names(simple_merfish_data): 16 | for i, n in enumerate(simple_merfish_data.get_codebook().get_bit_names()): 17 | assert n == 'bit' + str(i+1) 18 | 19 | 20 | def test_codebook_get_barcode(simple_merfish_data): 21 | codebook = simple_merfish_data.get_codebook() 22 | for i in range(codebook.get_barcode_count()): 23 | assert np.sum(codebook.get_barcode(i)) == 4 24 | assert np.array_equal( 25 | codebook.get_barcode(0), 26 | [0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0]) 27 | 28 | 29 | def test_codebook_get_coding_indexes(simple_merfish_data): 30 | assert np.array_equal( 31 | simple_merfish_data.get_codebook().get_coding_indexes(), 32 | np.arange(70)) 33 | 34 | 35 | def test_codebook_get_blank_indexes(simple_merfish_data): 36 | assert np.array_equal( 37 | simple_merfish_data.get_codebook().get_blank_indexes(), 38 | np.arange(70, 140)) 39 | 40 | 41 | def test_codebook_get_barcodes(simple_merfish_data): 42 | bcSetWithBlanks = simple_merfish_data.get_codebook().get_barcodes() 43 | assert len(bcSetWithBlanks) == 140 44 | assert all([len(x) == 16 for x in bcSetWithBlanks]) 45 | assert all([np.sum(x) == 4 for x in bcSetWithBlanks]) 46 | bcSetNoBlanks = simple_merfish_data.get_codebook().get_barcodes( 47 | ignoreBlanks=True) 48 | assert len(bcSetNoBlanks) == 70 49 | assert all([len(x) == 16 for x in bcSetNoBlanks]) 50 | assert all([np.sum(x) == 4 for x in bcSetNoBlanks]) 51 | 52 | 53 | def test_codebook_get_name(simple_merfish_data): 54 | assert simple_merfish_data.get_codebook().get_codebook_name() \ 55 | == 'test_codebook' 56 | 57 | 58 | def test_codebook_get_index(simple_merfish_data): 59 | assert simple_merfish_data.get_codebook().get_codebook_index() == 0 60 | 61 | 62 | def test_codebook_get_gene_names(simple_merfish_data): 63 | names = simple_merfish_data.get_codebook().get_gene_names() 64 | codebook = simple_merfish_data.get_codebook() 65 | for n in names: 66 | assert n == codebook.get_name_for_barcode_index( 67 | codebook.get_barcode_index_for_name(n)) 68 | 69 | 70 | def test_two_codebook_save_load(two_codebook_merfish_data): 71 | codebook1 = two_codebook_merfish_data.get_codebook(0) 72 | codebook2 = two_codebook_merfish_data.get_codebook(1) 73 | assert len(two_codebook_merfish_data.get_codebooks()) == 2 74 | assert codebook1.get_codebook_name() == 'test_codebook2' 75 | assert codebook1.get_codebook_index() == 0 76 | assert len(codebook1.get_barcodes()) == 10 77 | assert codebook2.get_codebook_name() == 'test_codebook' 78 | assert codebook2.get_codebook_index() == 1 79 | assert len(codebook2.get_barcodes()) == 140 80 | 81 | reloadedDataset = dataset.MERFISHDataSet( 82 | 'merfish_test', analysisHome='test_analysis_two_codebook') 83 | reloaded1 = reloadedDataset.get_codebook(0) 84 | reloaded2 = reloadedDataset.get_codebook(1) 85 | assert len(reloadedDataset.get_codebooks()) == 2 86 | assert reloaded1.get_codebook_name() == 'test_codebook2' 87 | assert reloaded1.get_codebook_index() == 0 88 | assert len(reloaded1.get_barcodes()) == 10 89 | assert reloaded2.get_codebook_name() == 'test_codebook' 90 | assert reloaded2.get_codebook_index() == 1 91 | assert len(reloaded2.get_barcodes()) == 140 92 | 93 | with pytest.raises(FileExistsError): 94 | dataset.MERFISHDataSet( 95 | 'merfish_test', 96 | codebookNames=['test_codebook.csv', 'test_codebook2.csv'], 97 | analysisHome='test_analysis_two_codebook') 98 | -------------------------------------------------------------------------------- /test/test_core.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import os 3 | 4 | from merlin.core import executor 5 | from merlin.core import analysistask 6 | 7 | 8 | def test_task_delete(simple_data, simple_task): 9 | simple_data.save_analysis_task(simple_task) 10 | assert simple_data.analysis_exists(simple_task) 11 | simple_data.delete_analysis(simple_task) 12 | assert not simple_data.analysis_exists(simple_task) 13 | 14 | 15 | def test_task_save(simple_data, simple_task): 16 | task1 = simple_task 17 | simple_data.save_analysis_task(task1) 18 | loadedTask = simple_data.load_analysis_task(task1.analysisName) 19 | unsharedKeys1 = [k for k in task1.parameters 20 | if k not in loadedTask.parameters 21 | or task1.parameters[k] != loadedTask.parameters[k]] 22 | assert len(unsharedKeys1) == 0 23 | unsharedKeys2 = [k for k in loadedTask.parameters 24 | if k not in task1.parameters 25 | or loadedTask.parameters[k] != task1.parameters[k]] 26 | assert len(unsharedKeys2) == 0 27 | assert loadedTask.analysisName == task1.analysisName 28 | 29 | 30 | def test_task_run(simple_task): 31 | task1 = simple_task 32 | assert not task1.is_complete() 33 | assert not task1.is_started() 34 | assert not task1.is_running() 35 | assert not task1.is_error() 36 | task1.run() 37 | assert task1.is_started() 38 | assert not task1.is_running() 39 | assert not task1.is_error() 40 | assert task1.is_complete() 41 | 42 | 43 | def test_save_environment(simple_task): 44 | task1 = simple_task 45 | task1.run() 46 | environment = dict(os.environ) 47 | if isinstance(simple_task, analysistask.ParallelAnalysisTask): 48 | taskEnvironment = simple_task.dataSet.get_analysis_environment( 49 | simple_task, 0) 50 | else: 51 | taskEnvironment = simple_task.dataSet.get_analysis_environment( 52 | simple_task) 53 | 54 | assert environment == taskEnvironment 55 | 56 | 57 | @pytest.mark.slowtest 58 | def test_task_run_with_executor(simple_task): 59 | task1 = simple_task 60 | assert not task1.is_complete() 61 | assert not task1.is_started() 62 | assert not task1.is_running() 63 | assert not task1.is_error() 64 | e = executor.LocalExecutor() 65 | e.run(task1) 66 | assert task1.is_started() 67 | assert not task1.is_running() 68 | assert not task1.is_error() 69 | assert task1.is_complete() 70 | 71 | 72 | def test_task_reset(simple_task): 73 | simple_task.run(overwrite=False) 74 | assert simple_task.is_complete() 75 | with pytest.raises(analysistask.AnalysisAlreadyStartedException): 76 | simple_task.run(overwrite=False) 77 | simple_task.run(overwrite=True) 78 | assert simple_task.is_complete() 79 | 80 | 81 | def test_task_overwrite(simple_task): 82 | simple_task.save() 83 | simple_task.parameters['new_parameter'] = 0 84 | with pytest.raises(analysistask.AnalysisAlreadyExistsException): 85 | simple_task.save() 86 | -------------------------------------------------------------------------------- /test/test_dataorganization.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | 4 | from merlin.data import dataorganization 5 | 6 | 7 | def test_dataorganization_get_channels(simple_merfish_data): 8 | assert np.array_equal( 9 | simple_merfish_data.get_data_organization().get_data_channels(), 10 | np.arange(18)) 11 | 12 | 13 | def test_dataorganization_get_channel_name(simple_merfish_data): 14 | for i in range(16): 15 | assert simple_merfish_data.get_data_organization()\ 16 | .get_data_channel_name(i) == 'bit' + str(i+1) 17 | 18 | assert simple_merfish_data.get_data_organization()\ 19 | .get_data_channel_name(16) == 'DAPI' 20 | assert simple_merfish_data.get_data_organization()\ 21 | .get_data_channel_name(17) == 'polyT' 22 | 23 | 24 | def test_dataorganization_get_channel_index(simple_merfish_data): 25 | for i in range(16): 26 | assert simple_merfish_data.get_data_organization() \ 27 | .get_data_channel_index('bit' + str(i+1)) == i 28 | 29 | assert simple_merfish_data.get_data_organization() \ 30 | .get_data_channel_index('DAPI') == 16 31 | assert simple_merfish_data.get_data_organization() \ 32 | .get_data_channel_index('polyT') == 17 33 | 34 | 35 | def test_dataorganization_get_fovs(simple_merfish_data): 36 | assert np.array_equal( 37 | simple_merfish_data.get_data_organization().get_fovs(), 38 | np.arange(2)) 39 | 40 | 41 | def test_dataorganization_get_z_positions(simple_merfish_data): 42 | assert np.array_equal( 43 | simple_merfish_data.get_data_organization().get_z_positions(), 44 | np.array([0])) 45 | 46 | 47 | def test_dataorganization_get_fiducial_information(simple_merfish_data): 48 | data = simple_merfish_data.get_data_organization() 49 | for d in data.get_data_channels(): 50 | assert data.get_fiducial_frame_index(d) == 2 51 | assert os.path.normpath(data.get_fiducial_filename(0, 0)) \ 52 | == os.path.normpath( 53 | os.path.abspath('test_data/merfish_test/test_0_0.tif')) 54 | assert os.path.normpath(data.get_fiducial_filename(0, 1)) \ 55 | == os.path.normpath( 56 | os.path.abspath('test_data/merfish_test/test_1_0.tif')) 57 | assert os.path.normpath(data.get_fiducial_filename(1, 1)) \ 58 | == os.path.normpath( 59 | os.path.abspath('test_data/merfish_test/test_1_0.tif')) 60 | assert os.path.normpath(data.get_fiducial_filename(2, 1)) \ 61 | == os.path.normpath( 62 | os.path.abspath('test_data/merfish_test/test_1_1.tif')) 63 | 64 | 65 | def test_dataorganization_get_image_information(simple_merfish_data): 66 | data = simple_merfish_data.get_data_organization() 67 | assert data.get_image_frame_index(0, 0) == 1 68 | assert data.get_image_frame_index(1, 0) == 0 69 | assert data.get_image_frame_index(16, 0) == 3 70 | assert os.path.normpath(data.get_image_filename(0, 0)) \ 71 | == os.path.normpath( 72 | os.path.abspath('test_data/merfish_test/test_0_0.tif')) 73 | assert os.path.normpath(data.get_image_filename(0, 1)) \ 74 | == os.path.normpath( 75 | os.path.abspath('test_data/merfish_test/test_1_0.tif')) 76 | assert os.path.normpath(data.get_image_filename(1, 1)) \ 77 | == os.path.normpath( 78 | os.path.abspath('test_data/merfish_test/test_1_0.tif')) 79 | assert os.path.normpath(data.get_image_filename(2, 1)) \ 80 | == os.path.normpath( 81 | os.path.abspath('test_data/merfish_test/test_1_1.tif')) 82 | 83 | 84 | def test_dataorganization_load_from_dataset(simple_merfish_data): 85 | originalOrganization = simple_merfish_data.get_data_organization() 86 | loadedOrganization = dataorganization.DataOrganization(simple_merfish_data) 87 | 88 | assert np.array_equal( 89 | originalOrganization.get_data_channels(), 90 | loadedOrganization.get_data_channels()) 91 | assert np.array_equal( 92 | originalOrganization.get_fovs(), loadedOrganization.get_fovs()) 93 | assert np.array_equal( 94 | originalOrganization.get_z_positions(), 95 | loadedOrganization.get_z_positions()) 96 | 97 | for channel in originalOrganization.get_data_channels(): 98 | assert originalOrganization.get_data_channel_name(channel) \ 99 | == loadedOrganization.get_data_channel_name(channel) 100 | assert originalOrganization.get_fiducial_frame_index(channel) \ 101 | == loadedOrganization.get_fiducial_frame_index(channel) 102 | 103 | for fov in originalOrganization.get_fovs(): 104 | assert originalOrganization.get_fiducial_filename(channel, fov) \ 105 | == loadedOrganization.get_fiducial_filename(channel, fov) 106 | assert originalOrganization.get_image_filename(channel, fov) \ 107 | == loadedOrganization.get_image_filename(channel, fov) 108 | 109 | for z in originalOrganization.get_z_positions(): 110 | assert originalOrganization.get_image_frame_index(channel, z) \ 111 | == loadedOrganization.get_image_frame_index(channel, z) 112 | 113 | 114 | def test_dataorganization_get_sequential_rounds(simple_merfish_data): 115 | dataOrganization = simple_merfish_data.get_data_organization() 116 | sequentialRounds, sequentialChannels = \ 117 | dataOrganization.get_sequential_rounds() 118 | 119 | assert sequentialRounds == [16, 17] 120 | assert sequentialChannels == ['DAPI', 'polyT'] 121 | 122 | 123 | def test_dataorganization_get_sequential_rounds_two_codebooks( 124 | two_codebook_merfish_data): 125 | dataOrganization = two_codebook_merfish_data.get_data_organization() 126 | sequentialRounds, sequentialChannels = \ 127 | dataOrganization.get_sequential_rounds() 128 | 129 | assert sequentialRounds == [16, 17] 130 | -------------------------------------------------------------------------------- /test/test_dataportal.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import shutil 3 | import tempfile 4 | import os 5 | import numpy as np 6 | from botocore import UNSIGNED 7 | from botocore.client import Config 8 | from google.auth.credentials import AnonymousCredentials 9 | 10 | from merlin.util import dataportal 11 | 12 | 13 | def local_data_portal(): 14 | tempPath = tempfile.mkdtemp() 15 | with open(os.path.join(tempPath, 'test.txt'), 'w') as f: 16 | f.write('MERlin test file') 17 | with open(os.path.join(tempPath, 'test.bin'), 'wb') as f: 18 | f.write(np.array([0, 1, 2], dtype='uint16').tobytes()) 19 | 20 | yield dataportal.LocalDataPortal(tempPath) 21 | 22 | shutil.rmtree(tempPath) 23 | 24 | 25 | def s3_data_portal(): 26 | yield dataportal.S3DataPortal('s3://merlin-test-bucket-vg/test-files', 27 | region_name='us-east-2', 28 | config=Config(signature_version=UNSIGNED)) 29 | 30 | 31 | def gcloud_data_portal(): 32 | yield dataportal.GCloudDataPortal('gc://merlin-test-bucket/test-files', 33 | project='merlin-253419', 34 | credentials=AnonymousCredentials()) 35 | 36 | 37 | @pytest.fixture(scope='function', params=[ 38 | local_data_portal, s3_data_portal, gcloud_data_portal]) 39 | def data_portal(request): 40 | yield next(request.param()) 41 | 42 | 43 | def test_portal_list_files(data_portal): 44 | # filter out directory blob for google cloud 45 | fileList = [x for x in data_portal.list_files() if not x.endswith('/')] 46 | filteredList = data_portal.list_files(extensionList='.txt') 47 | assert len(fileList) == 2 48 | assert any([x.endswith('test.txt') for x in fileList]) 49 | assert any([x.endswith('test.bin') for x in fileList]) 50 | assert len(filteredList) == 1 51 | assert filteredList[0].endswith('test.txt') 52 | 53 | 54 | def test_portal_available(data_portal): 55 | assert data_portal.is_available() 56 | 57 | 58 | def test_portal_read(data_portal): 59 | textFile = data_portal.open_file('test.txt') 60 | binFile = data_portal.open_file('test.bin') 61 | assert textFile.exists() 62 | assert binFile.exists() 63 | assert textFile.read_as_text() == 'MERlin test file' 64 | assert np.array_equal( 65 | np.frombuffer(binFile.read_file_bytes(0, 6), dtype='uint16'), 66 | np.array([0, 1, 2], dtype='uint16')) 67 | assert np.array_equal( 68 | np.frombuffer(binFile.read_file_bytes(2, 4), dtype='uint16'), 69 | np.array([1], dtype='uint16')) 70 | 71 | 72 | def test_exchange_extension(data_portal): 73 | textFile = data_portal.open_file('test.txt') 74 | assert textFile.get_file_extension() == '.txt' 75 | assert textFile.read_as_text() == 'MERlin test file' 76 | binFile = textFile.get_sibling_with_extension('.bin') 77 | assert binFile.get_file_extension() == '.bin' 78 | assert np.array_equal( 79 | np.frombuffer(binFile.read_file_bytes(0, 6), dtype='uint16'), 80 | np.array([0, 1, 2], dtype='uint16')) 81 | -------------------------------------------------------------------------------- /test/test_dataset.py: -------------------------------------------------------------------------------- 1 | def test_get_analysis_tasks(simple_data, simple_task): 2 | assert len(simple_data.get_analysis_tasks()) == 0 3 | simple_task.save() 4 | assert len(simple_data.get_analysis_tasks()) == 1 5 | assert simple_data.get_analysis_tasks()[0]\ 6 | == simple_task.get_analysis_name() 7 | -------------------------------------------------------------------------------- /test/test_decon.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import random 4 | 5 | import merlin.util.deconvolve as deconvolve 6 | import merlin.util.matlab as matlab 7 | 8 | 9 | decon_sigma = 2 10 | decon_filter_size = 9 11 | 12 | 13 | def decon_diff(image, gt_image): 14 | on_gt = np.sum(image[(gt_image > 0)]) 15 | off_gt = np.sum(image[gt_image == 0]) 16 | 17 | return (on_gt/(on_gt + off_gt)) 18 | 19 | 20 | def make_image(): 21 | # Always make the same image. 22 | random.seed(42) 23 | 24 | # Ground truth. 25 | gt_image = np.zeros((100, 150)) 26 | for i in range(40): 27 | x = random.randint(5, 95) 28 | y = random.randint(5, 145) 29 | gt_image[x, y] = random.randint(10, 50) 30 | 31 | [pf, pb] = deconvolve.calculate_projectors(64, decon_sigma) 32 | image = cv2.filter2D(gt_image, -1, pf, borderType=cv2.BORDER_REPLICATE) 33 | 34 | return [image, gt_image] 35 | 36 | 37 | def test_deconvolve_lucyrichardson(): 38 | [image, gt_image] = make_image() 39 | 40 | d1 = decon_diff(image, gt_image) 41 | d_image = deconvolve.deconvolve_lucyrichardson(image, 42 | decon_filter_size, 43 | decon_sigma, 44 | 20) 45 | d2 = decon_diff(d_image, gt_image) 46 | print(d1, d2) 47 | 48 | assert (d2 > d1) 49 | 50 | 51 | def test_deconvolve_lucyrichardson_guo(): 52 | [image, gt_image] = make_image() 53 | 54 | d1 = decon_diff(image, gt_image) 55 | d_image = deconvolve.deconvolve_lucyrichardson_guo(image, 56 | decon_filter_size, 57 | decon_sigma, 58 | 2) 59 | d2 = decon_diff(d_image, gt_image) 60 | print(d1, d2) 61 | 62 | assert (d2 > d1) 63 | -------------------------------------------------------------------------------- /test/test_image_reader.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | 4 | import merlin 5 | from merlin.util import imagereader 6 | from merlin.util import dataportal 7 | 8 | root = os.path.join(os.path.dirname(merlin.__file__), '..', 'test') 9 | 10 | 11 | def test_read_dax(): 12 | print(root) 13 | dataPortal = dataportal.LocalDataPortal( 14 | os.path.join(root, 'auxiliary_files')) 15 | daxPortal = dataPortal.open_file('test.dax') 16 | daxReader = imagereader.infer_reader(daxPortal) 17 | frame0 = daxReader.load_frame(0) 18 | frame5 = daxReader.load_frame(5) 19 | frame9 = daxReader.load_frame(9) 20 | 21 | assert daxReader.number_frames == 10 22 | assert daxReader.image_height == 256 23 | assert daxReader.image_width == 256 24 | assert frame0.shape == (256, 256) 25 | assert frame5.shape == (256, 256) 26 | assert frame0[0, 0] == 144 27 | assert frame5[0, 0] == 156 28 | assert np.sum(frame0) == 10459722 29 | assert np.sum(frame5) == 10460240 30 | -------------------------------------------------------------------------------- /test/test_merfish.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pytest 3 | 4 | import merlin 5 | from merlin import merlin as m 6 | 7 | 8 | @pytest.mark.fullrun 9 | @pytest.mark.slowtest 10 | def test_merfish_2d_full_local(simple_merfish_data): 11 | with open(os.sep.join([merlin.ANALYSIS_PARAMETERS_HOME, 12 | 'test_analysis_parameters.json']), 'r') as f: 13 | snakefilePath = m.generate_analysis_tasks_and_snakefile( 14 | simple_merfish_data, f) 15 | m.run_with_snakemake(simple_merfish_data, snakefilePath, 5) 16 | -------------------------------------------------------------------------------- /test/test_plotting.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from merlin.analysis import testtask 4 | from merlin.plots import testplots 5 | from merlin import plots 6 | 7 | 8 | def test_metadata(simple_merfish_data): 9 | randomTask = testtask.RandomNumberParallelAnalysisTask(simple_merfish_data) 10 | randomMetadata = testplots.TestPlotMetadata(randomTask, 11 | {'test_task': randomTask}) 12 | assert not randomTask.is_complete() 13 | assert not randomMetadata.is_complete() 14 | assert randomMetadata.metadata_name() == 'testplots/TestPlotMetadata' 15 | 16 | for i in range(randomTask.fragment_count()-1): 17 | randomTask.run(i) 18 | randomMetadata.update() 19 | assert not randomTask.is_complete() 20 | assert not randomMetadata.is_complete() 21 | 22 | randomTask.run(randomTask.fragment_count()-1) 23 | randomMetadata.update() 24 | assert np.isclose( 25 | randomMetadata.get_mean_values(), 26 | np.array([np.mean(randomTask.get_random_result(i)) 27 | for i in range(randomTask.fragment_count())])).all() 28 | assert randomTask.is_complete() 29 | assert randomMetadata.is_complete() 30 | simple_merfish_data.delete_analysis(randomTask) 31 | 32 | 33 | def test_plotengine(simple_merfish_data): 34 | randomTask = testtask.RandomNumberParallelAnalysisTask(simple_merfish_data) 35 | assert not randomTask.is_complete() 36 | 37 | plotEngine = plots.PlotEngine(randomTask, {'test_task': randomTask}) 38 | assert len(plotEngine.get_plots()) == 1 39 | assert not plotEngine.take_step() 40 | randomTask.run(0) 41 | assert not plotEngine.take_step() 42 | 43 | for i in range(1, randomTask.fragment_count()): 44 | randomTask.run(i) 45 | assert plotEngine.take_step() 46 | assert plotEngine.get_plots()[0].is_complete() 47 | 48 | simple_merfish_data.delete_analysis(randomTask) 49 | -------------------------------------------------------------------------------- /test/test_snakemake.py: -------------------------------------------------------------------------------- 1 | import snakemake 2 | import os 3 | import shutil 4 | 5 | from merlin.util import snakewriter 6 | 7 | 8 | def test_run_single_task(simple_merfish_task): 9 | simple_merfish_task.save() 10 | assert not simple_merfish_task.is_complete() 11 | snakeRule = snakewriter.SnakemakeRule(simple_merfish_task) 12 | with open('temp.Snakefile', 'w') as outFile: 13 | outFile.write('rule all: \n\tinput: ' 14 | + snakeRule.full_output() + '\n\n') 15 | outFile.write(snakeRule.as_string()) 16 | 17 | snakemake.snakemake('temp.Snakefile') 18 | os.remove('temp.Snakefile') 19 | shutil.rmtree('.snakemake') 20 | 21 | assert simple_merfish_task.is_complete() 22 | 23 | 24 | def test_snakemake_generator_one_task(simple_merfish_data): 25 | taskDict = {'analysis_tasks': [ 26 | {'task': 'SimpleAnalysisTask', 27 | 'module': 'merlin.analysis.testtask', 28 | 'parameters': {}} 29 | ]} 30 | 31 | generator = snakewriter.SnakefileGenerator(taskDict, simple_merfish_data) 32 | workflow = generator.generate_workflow() 33 | outputTask = simple_merfish_data.load_analysis_task('SimpleAnalysisTask') 34 | assert not outputTask.is_complete() 35 | snakemake.snakemake(workflow) 36 | assert outputTask.is_complete() 37 | 38 | shutil.rmtree('.snakemake') 39 | 40 | 41 | def test_snakemake_generator_task_chain(simple_merfish_data): 42 | taskDict = {'analysis_tasks': [ 43 | {'task': 'SimpleAnalysisTask', 44 | 'module': 'merlin.analysis.testtask', 45 | 'analysis_name': 'Task1', 46 | 'parameters': {}}, 47 | {'task': 'SimpleParallelAnalysisTask', 48 | 'module': 'merlin.analysis.testtask', 49 | 'analysis_name': 'Task2', 50 | 'parameters': {'dependencies': ['Task1']}}, 51 | {'task': 'SimpleParallelAnalysisTask', 52 | 'module': 'merlin.analysis.testtask', 53 | 'analysis_name': 'Task3', 54 | 'parameters': {'dependencies': ['Task2']}} 55 | ]} 56 | 57 | generator = snakewriter.SnakefileGenerator(taskDict, simple_merfish_data) 58 | workflow = generator.generate_workflow() 59 | outputTask1 = simple_merfish_data.load_analysis_task('Task1') 60 | outputTask2 = simple_merfish_data.load_analysis_task('Task2') 61 | outputTask3 = simple_merfish_data.load_analysis_task('Task3') 62 | assert not outputTask1.is_complete() 63 | assert not outputTask2.is_complete() 64 | assert not outputTask3.is_complete() 65 | snakemake.snakemake(workflow) 66 | assert outputTask1.is_complete() 67 | assert outputTask2.is_complete() 68 | assert outputTask3.is_complete() 69 | 70 | shutil.rmtree('.snakemake') 71 | -------------------------------------------------------------------------------- /test/test_zplane_duplicate_removal.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import random 3 | import numpy as np 4 | from merlin.util import barcodefilters 5 | 6 | 7 | def generate_barcode(fov, barcode_id, x, y, z, mean_intensity): 8 | bc = {'barcode': random.getrandbits(32), 9 | 'barcode_id': barcode_id, 10 | 'fov': fov, 11 | 'mean_intensity': mean_intensity, 12 | 'max_intensity': random.uniform(5, 15), 13 | 'area': random.randint(0, 10), 14 | 'mean_distance': random.random(), 15 | 'min_distance': random.random(), 16 | 'x': x, 17 | 'y': y, 18 | 'z': z, 19 | 'global_x': random.uniform(0, 200000), 20 | 'global_y': random.uniform(0, 200000), 21 | 'global_z': random.uniform(0, 5), 22 | 'cell_index': random.randint(0, 5000)} 23 | 24 | for i in range(16): 25 | bc['intensity_' + str(i)] = random.uniform(5, 15) 26 | 27 | return bc 28 | 29 | 30 | b1 = generate_barcode(100, 5, 402.21, 787.11, 2, 14.23) 31 | b2 = generate_barcode(100, 5, 502.21, 687.11, 3, 12.23) 32 | b3 = generate_barcode(100, 17, 402.21, 787.11, 2, 10.23) 33 | 34 | b1_above_dimmer = generate_barcode(100, 5, 402.21, 787.11, 3, 11.23) 35 | b1_closeby_above_brighter = generate_barcode(100, 5, 403.21, 787.11, 3, 15.23) 36 | b2_above_brighter = generate_barcode(100, 5, 502.31, 687.11, 4, 14.23) 37 | b1_closeby_below_brighter = generate_barcode(100, 5, 403.21, 787.11, 1, 15.0) 38 | b1_closeby_toofar_brighter = generate_barcode(100, 5, 403.21, 787.11, 0, 15.0) 39 | 40 | 41 | def test_multiple_comparisons_barcodes(): 42 | zplane_cutoff = 1 43 | xy_cutoff = np.sqrt(2) 44 | zpositions = [0, 1.5, 3, 4.5, 6, 7.5, 9] 45 | 46 | bcSet = [b1, b2, b3, b1_above_dimmer, b1_closeby_above_brighter, 47 | b2_above_brighter, b1_closeby_below_brighter, 48 | b1_closeby_toofar_brighter] 49 | bcDF = pd.DataFrame(bcSet) 50 | expected = [x['barcode'] for x in 51 | [b1_closeby_above_brighter, b2_above_brighter, b3]] 52 | notExpected = [x['barcode'] for x in [b1, b2, b1_above_dimmer, 53 | b1_closeby_below_brighter, 54 | b1_closeby_toofar_brighter]] 55 | 56 | keptBC = barcodefilters.remove_zplane_duplicates_all_barcodeids( 57 | bcDF, zplane_cutoff, xy_cutoff, zpositions) 58 | for ex in expected: 59 | assert ex in keptBC['barcode'].values 60 | for notEx in notExpected: 61 | assert notEx not in keptBC['barcode'].values 62 | 63 | 64 | def test_all_compatible_barcodes(): 65 | zplane_cutoff = 1 66 | xy_cutoff = np.sqrt(2) 67 | zpositions = [0, 1.5, 3, 4.5, 6, 7.5, 9] 68 | 69 | bcSet = [b1, b2, b3, b1_closeby_toofar_brighter] 70 | bcDF = pd.DataFrame(bcSet) 71 | expected = [x['barcode'] for x in bcSet] 72 | keptBC = barcodefilters.remove_zplane_duplicates_all_barcodeids( 73 | bcDF, zplane_cutoff, xy_cutoff, zpositions) 74 | for ex in expected: 75 | assert ex in keptBC['barcode'].values 76 | assert len(keptBC) == len(bcSet) 77 | 78 | 79 | def test_farther_zrange(): 80 | zplane_cutoff = 2 81 | xy_cutoff = np.sqrt(2) 82 | zpositions = [0, 1.5, 3, 4.5, 6, 7.5, 9] 83 | 84 | bcSet = [b1, b2, b3, b1_closeby_toofar_brighter] 85 | bcDF = pd.DataFrame(bcSet) 86 | expected = [x['barcode'] for x in [b2, b3, b1_closeby_toofar_brighter]] 87 | notExpected = [x['barcode'] for x in [b1]] 88 | keptBC = barcodefilters.remove_zplane_duplicates_all_barcodeids( 89 | bcDF, zplane_cutoff, xy_cutoff, zpositions) 90 | for ex in expected: 91 | assert ex in keptBC['barcode'].values 92 | for notEx in notExpected: 93 | assert notEx not in keptBC['barcode'].values 94 | 95 | 96 | def test_farther_xyrange(): 97 | zplane_cutoff = 1 98 | xy_cutoff = np.sqrt(20001) 99 | zpositions = [0, 1.5, 3, 4.5, 6, 7.5, 9] 100 | 101 | bcSet = [b1, b2, b3] 102 | bcDF = pd.DataFrame(bcSet) 103 | expected = [x['barcode'] for x in [b1, b3]] 104 | notExpected = [x['barcode'] for x in [b2]] 105 | keptBC = barcodefilters.remove_zplane_duplicates_all_barcodeids( 106 | bcDF, zplane_cutoff, xy_cutoff, zpositions) 107 | for ex in expected: 108 | assert ex in keptBC['barcode'].values 109 | for notEx in notExpected: 110 | assert notEx not in keptBC['barcode'].values 111 | 112 | 113 | def test_empty_barcodes(): 114 | zplane_cutoff = 1 115 | xy_cutoff = np.sqrt(2) 116 | zpositions = [0, 1.5, 3, 4.5, 6, 7.5, 9] 117 | 118 | bcDF = pd.DataFrame([b1]) 119 | bcDF.drop(0, inplace=True) 120 | 121 | keptBC = barcodefilters.remove_zplane_duplicates_all_barcodeids( 122 | bcDF, zplane_cutoff, xy_cutoff, zpositions) 123 | assert type(keptBC) == pd.DataFrame 124 | --------------------------------------------------------------------------------