├── .gitignore
├── .pre-commit-config.yaml
├── .travis.yml
├── Dockerfile
├── Dockerfile_fiji
├── LICENSE
├── README.md
├── bin
    ├── background_subtraction
    │   └── run_background_subtraction.py
    ├── best_focus
    │   ├── best_z_identification.py
    │   ├── best_z_paths.py
    │   ├── file_manipulation.py
    │   └── run_best_focus_selection.py
    ├── codex_stitching
    │   ├── bigstitcher_dataset_meta.py
    │   ├── directory_management.py
    │   ├── generate_bigstitcher_macro.py
    │   ├── image_stitching.py
    │   ├── run_stitching.py
    │   └── secondary_stitcher
    │   │   ├── mask_stitching.py
    │   │   ├── match_masks.py
    │   │   ├── secondary_stitcher.py
    │   │   └── secondary_stitcher_runner.py
    ├── convert_to_ometiff.py
    ├── create_cytokit_config.py
    ├── dataset_info
    │   ├── collect_dataset_info.py
    │   ├── collect_dataset_info_old.py
    │   └── run_collection.py
    ├── illumination_correction
    │   ├── generate_basic_macro.py
    │   └── run_illumination_correction.py
    ├── pipeline_utils
    │   ├── dataset_listing.py
    │   └── pipeline_config_reader.py
    ├── slicing
    │   ├── modify_pipeline_config.py
    │   ├── run_slicing.py
    │   └── slicer.py
    └── utils.py
├── cytokit-docker
    ├── Dockerfile
    ├── cytokit_wrapper.py
    └── setup_data_directory.py
├── docker_images.txt
├── environment.yml
├── metadata_examples
    ├── channelnames.txt
    ├── channelnames_report.csv
    ├── experiment.json
    ├── exposure_times.txt
    └── segmentation.json
├── pipeline-manifest.json
├── pipeline.cwl
├── pipeline_release_mgmt.yaml
├── pyproject.toml
├── requirements-test.txt
├── steps
    ├── illumination_first_stitching.cwl
    ├── illumination_first_stitching
    │   ├── best_focus.cwl
    │   ├── collect_dataset_info.cwl
    │   ├── create_yaml_config.cwl
    │   ├── first_stitching.cwl
    │   ├── illumination_correction.cwl
    │   └── slicing.cwl
    ├── ometiff_second_stitching-manifest.json
    ├── ometiff_second_stitching.cwl
    ├── ometiff_second_stitching
    │   ├── background_subtraction.cwl
    │   ├── ome_tiff_creation.cwl
    │   └── second_stitching.cwl
    ├── run_cytokit-manifest.json
    └── run_cytokit.cwl
├── subm.yaml
└── test.sh


/.gitignore:
--------------------------------------------------------------------------------
1 | .idea/
2 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |   - repo: https://github.com/psf/black
 3 |     rev: 23.9.1
 4 |     hooks:
 5 |       - id: black
 6 |         language_version: python3
 7 |   - repo: https://github.com/pycqa/isort
 8 |     rev: 5.12.0
 9 |     hooks:
10 |       - id: isort
11 |         args: ["--profile", "black"]
12 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | dist: focal
2 | language: python
3 | python: 3.8
4 | install:
5 |   - pip install -r requirements-test.txt
6 | script:
7 |   - ./test.sh
8 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:focal
 2 | 
 3 | RUN apt-get -qq update \
 4 |     && apt-get -qq install --no-install-recommends --yes \
 5 |     wget \
 6 |     bzip2 \
 7 |     ca-certificates \
 8 |     curl \
 9 |     unzip \
10 |     git \
11 |     && apt-get clean \
12 |     && rm -rf /var/lib/apt/lists/*
13 | 
14 | RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-py39_4.12.0-Linux-x86_64.sh -O /tmp/miniconda.sh \
15 |     && /bin/bash /tmp/miniconda.sh -b -p /opt/conda \
16 |     && rm /tmp/miniconda.sh
17 | ENV PATH /opt/conda/bin:$PATH
18 | 
19 | # update base environment from yaml file
20 | COPY environment.yml /tmp/
21 | RUN conda env update -f /tmp/environment.yml \
22 |     && echo "source activate base" > ~/.bashrc \
23 |     && conda clean --index-cache --tarballs --yes \
24 |     && rm /tmp/environment.yml
25 | 
26 | ENV PATH /opt/conda/envs/hubmap/bin:$PATH
27 | 
28 | #Copy fiji from container
29 | COPY --from=hubmap/fiji_bigstitcher:latest /opt/Fiji.app /opt/Fiji.app
30 | ENV PATH /opt/Fiji.app:$PATH
31 | 
32 | RUN mkdir /output && chmod -R a+rwx /output
33 | 
34 | WORKDIR /opt
35 | COPY bin /opt
36 | 
37 | CMD ["/bin/bash"]
38 | 


--------------------------------------------------------------------------------
/Dockerfile_fiji:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:focal
 2 | 
 3 | RUN apt-get -qq update \
 4 |     && apt-get -qq install --no-install-recommends --yes \
 5 |     wget \
 6 |     bzip2 \
 7 |     ca-certificates \
 8 |     curl \
 9 |     unzip \
10 |     && apt-get clean \
11 |     && rm -rf /var/lib/apt/lists/*
12 | 
13 | 
14 | #Get imagej
15 | RUN wget --quiet https://downloads.imagej.net/fiji/latest/fiji-linux64.zip -P /tmp/ \
16 |     && unzip /tmp/fiji-linux64.zip -d /opt/ \
17 |     && rm /tmp/fiji-linux64.zip
18 | 
19 | ENV PATH /opt/Fiji.app:$PATH
20 | 
21 | # Install BigStitcher
22 | RUN ImageJ-linux64 --headless --update add-update-site BigStitcher https://sites.imagej.net/BigStitcher/ \
23 | && ImageJ-linux64 --headless --update update
24 | 
25 | # Install BaSiC_Mod
26 | RUN wget --quiet https://github.com/VasylVaskivskyi/BaSiC_Mod/releases/download/v1.0/BaSiC_Mod_v10.zip -P /tmp/ \
27 |     && unzip /tmp/BaSiC_Mod_v10.zip -d /tmp/ \
28 |     && mv /tmp/BaSiC_Mod_v10/BaSiC_Mod.jar /opt/Fiji.app/plugins/ \
29 |     && mv /tmp/BaSiC_Mod_v10/dependencies/* /opt/Fiji.app/jars/. \
30 |     && rm -r /tmp/BaSiC_Mod_v10 \
31 |     && rm /tmp/BaSiC_Mod_v10.zip \
32 |     && rm /opt/Fiji.app/jars/jtransforms-2.4.jar \
33 |     && rm /opt/Fiji.app/jars/netlib-java-0.9.3-renjin-patched-2.jar \
34 |     && ImageJ-linux64 --headless --update update
35 | FROM ubuntu:focal
36 | 
37 | RUN apt-get -qq update \
38 |     && apt-get -qq install --no-install-recommends --yes \
39 |     wget \
40 |     bzip2 \
41 |     ca-certificates \
42 |     curl \
43 |     unzip \
44 |     && apt-get clean \
45 |     && rm -rf /var/lib/apt/lists/*
46 | 
47 | 
48 | #Get imagej
49 | RUN wget --quiet https://downloads.imagej.net/fiji/latest/fiji-linux64.zip -P /tmp/ \
50 |     && unzip /tmp/fiji-linux64.zip -d /opt/ \
51 |     && rm /tmp/fiji-linux64.zip
52 | 
53 | ENV PATH /opt/Fiji.app:$PATH
54 | 
55 | # Install BigStitcher
56 | RUN ImageJ-linux64 --headless --update add-update-site BigStitcher https://sites.imagej.net/BigStitcher/ \
57 | && ImageJ-linux64 --headless --update update
58 | 
59 | # Install BaSiC_Mod
60 | RUN wget --quiet https://github.com/hubmapconsortium/BaSiC_Mod/releases/download/v1.0/BaSiC_Mod_v10.zip -P /tmp/ \
61 |     && unzip /tmp/BaSiC_Mod_v10.zip -d /tmp/ \
62 |     && mv /tmp/BaSiC_Mod_v10/BaSiC_Mod.jar /opt/Fiji.app/plugins/ \
63 |     && mv /tmp/BaSiC_Mod_v10/dependencies/* /opt/Fiji.app/jars/. \
64 |     && rm -r /tmp/BaSiC_Mod_v10 \
65 |     && rm /tmp/BaSiC_Mod_v10.zip \
66 |     && rm /opt/Fiji.app/jars/jtransforms-2.4.jar \
67 |     && rm /opt/Fiji.app/jars/netlib-java-0.9.3-renjin-patched-2.jar \
68 |     && ImageJ-linux64 --headless --update update
69 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | [![Build Status](https://travis-ci.com/hubmapconsortium/codex-pipeline.svg?branch=master)](https://travis-ci.com/hubmapconsortium/codex-pipeline)
  2 | [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
  3 | 
  4 | # codex-pipeline
  5 | A [CWL](https://www.commonwl.org/) pipeline for processing [CODEX](https://www.akoyabio.com/codextm/technology) image data, using [Cytokit](https://github.com/hammerlab/cytokit).
  6 | 
  7 | ## Pipeline steps
  8 | * Collect required parameters from metadata files.
  9 | * Perform illumination correction with Fiji plugin [BaSiC](https://github.com/VasylVaskivskyi/BaSiC_Mod) 
 10 | * Find sharpest z-plane for each channel, using variation of Laplacian
 11 | * Perform stitching of tiles using Fiji plugin [BigStitcher](https://imagej.net/plugins/bigstitcher/)
 12 | * Create Cytokit YAML config file containing parameters from input metadata
 13 | * Run Cytokit's `processor` command to perform tile pre-processing, and nucleus and cell segmentation.
 14 | * Run Cytokit's `operator` command to extract all antigen fluoresence images (discarding blanks and empty channels).
 15 | * Generate [OME-TIFF](https://docs.openmicroscopy.org/ome-model/6.0.1/ome-tiff/specification.html) versions of TIFFs created by Cytokit.
 16 | * Stitch tiles with segmentation masks
 17 | * Perform downstream analysis using [SPRM](https://github.com/hubmapconsortium/sprm).
 18 | 
 19 | 
 20 | ## Requirements
 21 | 
 22 | Please use [HuBMAP Consortium fork of cwltool](https://github.com/hubmapconsortium/cwltool) 
 23 | to be able to run pipeline with GPU in Docker and Singularity containers.\
 24 | For the list of python packages check `environment.yml`.
 25 | 
 26 | 
 27 | ## How to run
 28 | 
 29 | `cwltool pipeline.cwl subm.yaml`
 30 | 
 31 | If you use Singularity containers add `--singularity`. Example of submission file `subm.yaml` is provided in the repo.
 32 | 
 33 | 
 34 | ## Expected input directory and file structure
 35 | 
 36 | ```
 37 | codex_dataset/
 38 | src_data OR raw
 39 |     ├── channelnames.txt
 40 |     ├── channelnames_report.csv
 41 |     ├── experiment.json
 42 |     ├── exposure_times.txt
 43 |     ├── segmentation.json
 44 |     ├── Cyc1_reg1 OR Cyc001_reg001  
 45 |     │     ├── 1_00001_Z001_CH1.tif
 46 |     │     ├── 1_00001_Z001_CH2.tif
 47 |     │     │              ...
 48 |     │     └── 1_0000N_Z00N_CHN.tif
 49 |     └── Cyc1_reg2 OR Cyc001_reg002  
 50 |           ├── 2_00001_Z001_CH1.tif
 51 |           ├── 2_00001_Z001_CH2.tif
 52 |           │             ...
 53 |           └── 1_0000N_Z00N_CHN.tif
 54 | 
 55 | ```
 56 | 
 57 | Images should be separated into directories by cycles and regions using the following pattern `Cyc{cycle:d}_reg{region:d}`.
 58 | The file names must contain region, tile, z-plane and channel ids starting from 1, and follow this pattern 
 59 | `{region:d}_{tile:05d}_Z{zplane:03d}_CH{channel:d}.tif`.
 60 | 
 61 | Necessary metadata files that must be present in the input directory:
 62 | 
 63 | * `experiment.json` - acquisition parameters and data structure;
 64 | * `segmentation.json` - which channel from which cycle to use for segmentation;
 65 | * `channelnames.txt` - list of channel names, one per row;
 66 | * `channelnames_report.csv` - which channels to use, and which to exclude;
 67 | * `exposure_times.txt` - not used at the moment, but will be useful for background subtraction.
 68 | 
 69 | Examples of these files are present in the directory `metadata_examples`. 
 70 | Note: all fields related to regions, cycles, channels, z-planes and tiles start from 1, 
 71 | and xyResolution, zPitch are measured in `nm`.
 72 | 
 73 | ## Output file structure
 74 | 
 75 | ```
 76 | pipeline_output/
 77 | ├── expr
 78 | │   ├── reg001_expr.ome.tiff
 79 | │   └── reg002_expr.ome.tiff
 80 | └── mask
 81 |     ├── reg001_mask.ome.tiff
 82 |     └── reg002_expr.ome.tiff
 83 | ```
 84 | 
 85 | Where `expr` directory contains processed images and `mask` contains segmentation masks.
 86 | The output of SPRM will be different, see https://github.com/hubmapconsortium/sprm .
 87 | 
 88 | 
 89 | ## Development
 90 | Code in this repository is formatted with [black](https://github.com/psf/black) and
 91 | [isort](https://pypi.org/project/isort/), and this is checked via Travis CI.
 92 | 
 93 | A [pre-commit](https://pre-commit.com/) hook configuration is provided, which runs `black` and `isort` before committing.
 94 | Run `pre-commit install` in each clone of this repository which you will use for development (after `pip install pre-commit`
 95 | into an appropriate Python environment, if necessary).
 96 | 
 97 | ## Building containers
 98 | Two `Dockerfile`s are included in this repository. A `docker_images.txt` manifest is included, which is intended
 99 | for use in the `build_docker_containers` script provided by the
100 | [`multi-docker-build`](https://github.com/mruffalo/multi-docker-build) Python package. This package can be installed
101 | with
102 | ```shell script
103 | python -m pip install multi-docker-build
104 | ```
105 | 
106 | ## Release process
107 | 
108 | The `master` branch is intended to be production-ready at all times, and should always reference Docker containers
109 | with the `latest` tag.
110 | 
111 | Publication of tagged "release" versions of the pipeline is handled with the
112 | [HuBMAP pipeline release management](https://github.com/hubmapconsortium/pipeline-release-mgmt) Python package. To
113 | release a new pipeline version, *ensure that the `master` branch contains all commits that you want to include in the release,*
114 | then run
115 | ```shell
116 | tag_releae_pipeline v0.whatever
117 | ```
118 | See the pipeline release managment script usage notes for additional options, such as GPG signing.
119 | 


--------------------------------------------------------------------------------
/bin/best_focus/best_z_identification.py:
--------------------------------------------------------------------------------
  1 | from pathlib import Path
  2 | from typing import Dict, List
  3 | 
  4 | import cv2 as cv
  5 | import dask
  6 | import numpy as np
  7 | import tifffile as tif
  8 | from scipy.ndimage import gaussian_filter
  9 | 
 10 | Image = np.ndarray
 11 | 
 12 | 
 13 | def _laplacian_variance(img: Image) -> float:
 14 |     """
 15 |     DOI:10.1016/j.patcog.2012.11.011
 16 |     Analysis of focus measure operators for shape-from-focus
 17 |     """
 18 |     return np.var(cv.Laplacian(img, cv.CV_64F, ksize=21))
 19 | 
 20 | 
 21 | def _find_best_z_plane_id(img_list: List[Image]) -> int:
 22 |     lap_vars_per_z_plane = []
 23 |     for img in img_list:
 24 |         lap_vars_per_z_plane.append(_laplacian_variance(img))
 25 |     max_var = max(lap_vars_per_z_plane)
 26 |     max_var_id = lap_vars_per_z_plane.index(max_var)
 27 |     return max_var_id
 28 | 
 29 | 
 30 | def _load_images(path_list: List[Path]) -> List[Image]:
 31 |     img_list = []
 32 |     for path in path_list:
 33 |         img_list.append(tif.imread(str(path)))
 34 |     return img_list
 35 | 
 36 | 
 37 | def get_best_z_plane_id(path_list: List[Path]) -> int:
 38 |     img_list = _load_images(path_list)
 39 |     return _find_best_z_plane_id(img_list) + 1
 40 | 
 41 | 
 42 | def get_best_z_plane_id_parallelized(plane_paths_per_tile: dict) -> List[int]:
 43 |     task = []
 44 |     for tile, plane_paths in plane_paths_per_tile.items():
 45 |         plane_path_list = list(plane_paths.values())
 46 |         task.append(dask.delayed(get_best_z_plane_id)(plane_path_list))
 47 |     best_z_plane_id_list = dask.compute(*task)
 48 |     best_z_plane_id_list = list(best_z_plane_id_list)
 49 |     return best_z_plane_id_list
 50 | 
 51 | 
 52 | def smoothing_z_ids(arr: np.ndarray):
 53 |     smoothed_ids_float = gaussian_filter(arr.astype(np.float32), 1, mode="reflect")
 54 |     smoothed_ids = np.round(smoothed_ids_float, 0).astype(np.uint32)
 55 |     return smoothed_ids
 56 | 
 57 | 
 58 | def best_z_correction(best_z_plane_id_list: List[int], x_ntiles: int, y_ntiles: int) -> np.ndarray:
 59 |     best_z_per_tile_arr = np.array(best_z_plane_id_list, dtype=np.int32).reshape(
 60 |         y_ntiles, x_ntiles
 61 |     )
 62 |     print("Best z-plane per tile")
 63 |     print("Original values\n", best_z_per_tile_arr)
 64 |     smoothed_best_z_per_tile_arr = smoothing_z_ids(best_z_per_tile_arr)
 65 |     print("Corrected values\n", smoothed_best_z_per_tile_arr)
 66 |     result = smoothed_best_z_per_tile_arr.ravel().tolist()
 67 | 
 68 |     return result
 69 | 
 70 | 
 71 | def pick_z_planes_below_and_above(best_z: int, max_z: int, above: int, below: int) -> List[int]:
 72 |     range_end = best_z + above
 73 |     if range_end > max_z:
 74 |         range_end = max_z
 75 | 
 76 |     range_start = best_z - below
 77 |     if range_start < 1:
 78 |         range_start = 1
 79 | 
 80 |     if max_z == 1:
 81 |         return [best_z]
 82 |     elif best_z == max_z:
 83 |         below_planes = list(range(range_start, best_z))
 84 |         above_planes = []
 85 |     elif best_z == 1:
 86 |         below_planes = []
 87 |         above_planes = list(range(best_z + 1, range_end + 1))
 88 |     else:
 89 |         below_planes = list(range(range_start, best_z))
 90 |         above_planes = list(range(best_z + 1, range_end + 1))
 91 |     return below_planes + [best_z] + above_planes
 92 | 
 93 | 
 94 | def get_best_z_plane_ids_per_tile(
 95 |     plane_paths_per_tile: dict, x_ntiles: int, y_ntiles: int, max_z: int
 96 | ) -> Dict[int, List[int]]:
 97 |     best_z_plane_id_list = get_best_z_plane_id_parallelized(plane_paths_per_tile)
 98 |     corrected_best_z_plane_id_list = best_z_correction(best_z_plane_id_list, x_ntiles, y_ntiles)
 99 | 
100 |     best_z_plane_per_tile = dict()
101 |     for i, tile in enumerate(plane_paths_per_tile.keys()):
102 |         best_z_plane_per_tile[tile] = pick_z_planes_below_and_above(
103 |             corrected_best_z_plane_id_list[i], max_z, 1, 1
104 |         )
105 |     return best_z_plane_per_tile
106 | 


--------------------------------------------------------------------------------
/bin/best_focus/best_z_paths.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | from math import ceil
  3 | from pathlib import Path
  4 | from typing import Any, Dict, List, Tuple
  5 | 
  6 | sys.path.append("/opt/")
  7 | from best_z_identification import get_best_z_plane_ids_per_tile
  8 | 
  9 | from pipeline_utils.dataset_listing import (
 10 |     create_listing_for_each_cycle_region,
 11 |     extract_digits_from_string,
 12 | )
 13 | 
 14 | 
 15 | def _change_image_file_name(original_name: str) -> str:
 16 |     """Output tiles will have names 1_00001_Z001_CH1.tif, 1_00002_Z001_CH1.tif ..."""
 17 |     digits = extract_digits_from_string(original_name)
 18 |     region = digits[0]
 19 |     tile = digits[1]
 20 |     zplane = 1
 21 |     channel = digits[3]
 22 |     return "{reg:d}_{tile:05d}_Z{z:03d}_CH{ch:d}.tif".format(
 23 |         reg=region, tile=tile, z=zplane, ch=channel
 24 |     )
 25 | 
 26 | 
 27 | def _get_reference_channel_paths(
 28 |     listing_per_cycle: dict, num_channels_per_cycle: int, reference_channel_id: int
 29 | ) -> Dict[int, Path]:
 30 |     ref_cycle_id = ceil(reference_channel_id / num_channels_per_cycle) - 1
 31 |     ref_cycle = sorted(listing_per_cycle.keys())[ref_cycle_id]
 32 |     ref_cycle_ref_channel_id = reference_channel_id - ref_cycle_id * num_channels_per_cycle
 33 | 
 34 |     reference_channel_tile_paths = dict()
 35 |     for region in listing_per_cycle[ref_cycle]:
 36 |         reference_channel_tile_paths.update({region: {}})
 37 |         this_channel_tile_paths = listing_per_cycle[ref_cycle][region][ref_cycle_ref_channel_id]
 38 |         reference_channel_tile_paths[region] = this_channel_tile_paths
 39 |     return reference_channel_tile_paths
 40 | 
 41 | 
 42 | def _create_dirs_for_each_cycle_region(
 43 |     listing_per_cycle: dict, out_dir: Path
 44 | ) -> Dict[int, Dict[int, Path]]:
 45 |     naming_template = "Cyc{cyc:03d}_reg{reg:03d}"
 46 |     cyc_reg_dirs = dict()
 47 |     for cycle in listing_per_cycle:
 48 |         cyc_reg_dirs[cycle] = dict()
 49 |         for region in listing_per_cycle[cycle]:
 50 |             dir_name = naming_template.format(cyc=cycle, reg=region)
 51 |             cyc_reg_dirs[cycle][region] = out_dir / dir_name
 52 |     return cyc_reg_dirs
 53 | 
 54 | 
 55 | def _find_best_z_planes_per_region_tile(
 56 |     reference_channel_tile_paths: dict, max_z: int, x_ntiles: int, y_ntiles: int
 57 | ) -> Dict[int, Dict[int, List[int]]]:
 58 |     best_z_plane_per_region = dict()
 59 | 
 60 |     for region in reference_channel_tile_paths:
 61 |         best_z_plane_per_region[region] = get_best_z_plane_ids_per_tile(
 62 |             reference_channel_tile_paths[region], x_ntiles, y_ntiles, max_z
 63 |         )  # output {region: {tile: [ids] }}
 64 |     return best_z_plane_per_region
 65 | 
 66 | 
 67 | def _map_best_z_planes_in_channel_to_output_plane(
 68 |     channel_paths: dict, out_dir: Path, best_z_plane_per_tile: dict
 69 | ) -> List[Tuple[List[Path], Path]]:
 70 |     best_z_plane_paths = list()
 71 |     for tile in channel_paths:
 72 |         this_tile_paths = channel_paths[tile]
 73 |         best_focal_plane_ids = best_z_plane_per_tile[tile]  # list of ids
 74 | 
 75 |         best_z_input_paths = []
 76 |         for _id in best_focal_plane_ids:
 77 |             best_z_input_paths.append(this_tile_paths[_id])
 78 | 
 79 |         best_z_file_name = best_z_input_paths[0].name
 80 |         output_combined_name = _change_image_file_name(best_z_file_name)
 81 |         output_combined_path = Path(out_dir).joinpath(output_combined_name)
 82 | 
 83 |         best_z_plane_paths.append((best_z_input_paths, output_combined_path))
 84 | 
 85 |     return best_z_plane_paths
 86 | 
 87 | 
 88 | def _select_best_z_plane_paths(
 89 |     listing: Dict[int, Dict[int, Dict[int, Dict[int, Dict[int, Path]]]]],
 90 |     out_dirs: Dict[int, Dict[int, Path]],
 91 |     best_z_plane_per_region: Dict[int, Dict[int, List[int]]],
 92 | ) -> Dict[int, Dict[int, Dict[int, Dict[int, List[Tuple[List[Path], Path]]]]]]:
 93 |     """Creates a map of several raw planes that will be processed into one image"""
 94 |     best_z_plane_paths = dict()
 95 |     for cycle in listing:
 96 |         best_z_plane_paths[cycle] = dict()
 97 |         for region in listing[cycle]:
 98 |             best_z_plane_paths[cycle][region] = dict()
 99 |             this_cyc_reg_out_dir = out_dirs[cycle][region]
100 |             this_region_best_z_planes = best_z_plane_per_region[region]
101 |             for channel in listing[cycle][region]:
102 |                 best_z_plane_paths[cycle][region][channel] = dict()
103 |                 for tile, zplane_dict in listing[cycle][region][channel].items():
104 |                     this_tile_best_z_ids = this_region_best_z_planes[tile]
105 |                     this_tile_best_z_src_paths = []
106 |                     for _id in this_tile_best_z_ids:
107 |                         this_tile_best_z_src_paths.append(zplane_dict[_id])
108 | 
109 |                     best_z_file_name = this_tile_best_z_src_paths[0].name
110 |                     this_tile_best_z_dst_combined_name = _change_image_file_name(best_z_file_name)
111 |                     this_tile_best_z_dst_combined_path = (
112 |                         this_cyc_reg_out_dir / this_tile_best_z_dst_combined_name
113 |                     )
114 | 
115 |                     if tile in best_z_plane_paths[cycle][region][channel]:
116 |                         best_z_plane_paths[cycle][region][channel][tile].append(
117 |                             (this_tile_best_z_src_paths, this_tile_best_z_dst_combined_path)
118 |                         )
119 |                     else:
120 |                         best_z_plane_paths[cycle][region][channel][tile] = [
121 |                             (this_tile_best_z_src_paths, this_tile_best_z_dst_combined_path)
122 |                         ]
123 |     return best_z_plane_paths
124 | 
125 | 
126 | def get_best_z_dirs_and_paths(
127 |     img_dirs: List[Path],
128 |     out_dir: Path,
129 |     num_channels_per_cycle: int,
130 |     max_z: int,
131 |     x_ntiles: int,
132 |     y_ntiles: int,
133 |     reference_channel_id: int,
134 | ) -> Tuple[
135 |     Dict[int, Dict[int, Path]],
136 |     Dict[int, Dict[int, Dict[int, Dict[int, List[Tuple[List[Path], Path]]]]]],
137 | ]:
138 |     listing_per_cycle = create_listing_for_each_cycle_region(img_dirs)
139 |     reference_channel_tile_paths = _get_reference_channel_paths(
140 |         listing_per_cycle, num_channels_per_cycle, reference_channel_id
141 |     )
142 |     best_z_dirs = _create_dirs_for_each_cycle_region(listing_per_cycle, out_dir)
143 |     best_z_plane_per_region = _find_best_z_planes_per_region_tile(
144 |         reference_channel_tile_paths, max_z, x_ntiles, y_ntiles
145 |     )
146 |     best_z_plane_paths = _select_best_z_plane_paths(
147 |         listing_per_cycle, best_z_dirs, best_z_plane_per_region
148 |     )
149 |     return best_z_dirs, best_z_plane_paths
150 | 
151 | 
152 | def find_best_z_paths_and_dirs(
153 |     dataset_info: Dict[str, Any], img_dirs: List[Path], out_dir: Path
154 | ) -> Tuple[
155 |     Dict[int, Dict[int, Path]],
156 |     Dict[int, Dict[int, Dict[int, Dict[int, List[Tuple[List[Path], Path]]]]]],
157 | ]:
158 |     nzplanes = dataset_info["num_z_planes"]
159 |     x_ntiles = dataset_info["num_tiles_x"]
160 |     y_ntiles = dataset_info["num_tiles_y"]
161 |     reference_channel_id = dataset_info["reference_channel"]
162 |     num_channels_per_cycle = dataset_info["num_channels"]
163 | 
164 |     best_z_channel_dirs, best_z_plane_paths = get_best_z_dirs_and_paths(
165 |         img_dirs,
166 |         out_dir,
167 |         num_channels_per_cycle,
168 |         nzplanes,
169 |         x_ntiles,
170 |         y_ntiles,
171 |         reference_channel_id,
172 |     )
173 |     return best_z_channel_dirs, best_z_plane_paths
174 | 


--------------------------------------------------------------------------------
/bin/best_focus/file_manipulation.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | from typing import Dict, List, Tuple
 3 | 
 4 | import dask
 5 | import numpy as np
 6 | import tifffile as tif
 7 | 
 8 | 
 9 | def make_dir_if_not_exists(dir_path: Path):
10 |     if not dir_path.exists():
11 |         dir_path.mkdir(parents=True)
12 | 
13 | 
14 | def project_stack(path_list: List[Path]):
15 |     path_strs = [str(path) for path in path_list]
16 |     stack = np.stack(list(map(tif.imread, path_strs)), axis=0)
17 |     stack_dt = stack.dtype
18 |     stack_mean = np.round(np.mean(stack, axis=0)).astype(stack_dt)
19 |     return stack_mean
20 | 
21 | 
22 | def process_images(src, dst):
23 |     """Read, take average of several z-planes, write"""
24 |     img = project_stack(src)
25 |     tif.imwrite(str(dst), img)
26 | 
27 | 
28 | def process_images_parallelized(best_z_plane_paths: List[tuple]):
29 |     task = []
30 |     for src, dst in best_z_plane_paths:
31 |         task.append(dask.delayed(process_images)(src, dst))
32 |         # shutil.copy(src[0], dst)
33 |     dask.compute(*task, scheduler="processes")
34 | 
35 | 
36 | def process_z_planes_and_save_to_out_dirs(
37 |     best_z_out_dirs: Dict[int, Dict[int, Path]],
38 |     best_z_plane_paths: Dict[int, Dict[int, Dict[int, Dict[int, List[Tuple[List[Path], Path]]]]]],
39 | ):
40 |     for cycle in best_z_out_dirs:
41 |         for region, dir_path in best_z_out_dirs[cycle].items():
42 |             make_dir_if_not_exists(dir_path)
43 | 
44 |     for cycle in best_z_plane_paths:
45 |         for region in best_z_plane_paths[cycle]:
46 |             for channel in best_z_plane_paths[cycle][region]:
47 |                 for tile, paths in best_z_plane_paths[cycle][region][channel].items():
48 |                     process_images_parallelized(paths)
49 | 


--------------------------------------------------------------------------------
/bin/best_focus/run_best_focus_selection.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import os
 3 | import sys
 4 | from pathlib import Path
 5 | from typing import List
 6 | 
 7 | sys.path.append("/opt/")
 8 | from best_z_paths import find_best_z_paths_and_dirs
 9 | from file_manipulation import process_z_planes_and_save_to_out_dirs
10 | 
11 | from pipeline_utils.pipeline_config_reader import load_dataset_info
12 | 
13 | 
14 | def make_dir_if_not_exists(dir_path: Path):
15 |     if not dir_path.exists():
16 |         dir_path.mkdir(parents=True)
17 | 
18 | 
19 | def get_img_dirs(dataset_dir: Path) -> List[Path]:
20 |     dataset_dir = dataset_dir.absolute()
21 |     img_dir_names = next(os.walk(dataset_dir))[1]
22 |     img_dir_paths = [dataset_dir.joinpath(dir_name).absolute() for dir_name in img_dir_names]
23 |     return img_dir_paths
24 | 
25 | 
26 | def main(data_dir: Path, pipeline_config_path: Path):
27 |     best_focus_dir = Path("/output/best_focus")
28 |     make_dir_if_not_exists(best_focus_dir)
29 |     dataset_info = load_dataset_info(pipeline_config_path)
30 |     img_dirs = get_img_dirs(data_dir)
31 |     best_z_channel_dirs, best_z_plane_paths = find_best_z_paths_and_dirs(
32 |         dataset_info, img_dirs, best_focus_dir
33 |     )
34 |     process_z_planes_and_save_to_out_dirs(best_z_channel_dirs, best_z_plane_paths)
35 | 
36 | 
37 | if __name__ == "__main__":
38 |     parser = argparse.ArgumentParser()
39 |     parser.add_argument("--data_dir", type=Path, help="path to directory with dataset directory")
40 |     parser.add_argument(
41 |         "--pipeline_config_path", type=Path, help="path to pipelineConfig.json file"
42 |     )
43 |     args = parser.parse_args()
44 |     main(args.data_dir, args.pipeline_config_path)
45 | 


--------------------------------------------------------------------------------
/bin/codex_stitching/bigstitcher_dataset_meta.py:
--------------------------------------------------------------------------------
  1 | import xml.dom.minidom
  2 | import xml.etree.ElementTree as ET
  3 | from copy import deepcopy
  4 | from pathlib import Path
  5 | from typing import Tuple
  6 | 
  7 | import numpy as np
  8 | 
  9 | 
 10 | def convert_location(x, y):
 11 |     tile_loc = "1.0 0.0 0.0 {x} 0.0 1.0 0.0 {y} 0.0 0.0 1.0 0.0"
 12 |     return tile_loc.format(x=x, y=y)
 13 | 
 14 | 
 15 | def create_meta(file_pattern_str, num_tiles, tile_shape, tile_locations):
 16 |     root = ET.Element("SpimData", {"version": "0.2"})
 17 |     base_path = ET.SubElement(root, "BasePath", {"type": "relative"}).text = "."
 18 |     sequence_description = ET.SubElement(root, "SequenceDescription")
 19 | 
 20 |     # <ImageLoader>
 21 |     image_loader = ET.SubElement(
 22 |         sequence_description, "ImageLoader", {"format": "spimreconstruction.stack.loci"}
 23 |     )
 24 |     ET.SubElement(image_loader, "imagedirectory", {"type": "relative"}).text = "."
 25 |     ET.SubElement(image_loader, "filePattern").text = file_pattern_str
 26 |     ET.SubElement(image_loader, "layoutTimepoints").text = "0"
 27 |     ET.SubElement(image_loader, "layoutChannels").text = "0"
 28 |     ET.SubElement(image_loader, "layoutIlluminations").text = "0"
 29 |     ET.SubElement(image_loader, "layoutAngles").text = "0"
 30 |     ET.SubElement(image_loader, "layoutTiles").text = "1"
 31 |     ET.SubElement(image_loader, "imglib2container").text = "CellImgFactory"
 32 |     # </ImageLoader>
 33 |     # <ViewSetups>
 34 |     view_setups = ET.SubElement(sequence_description, "ViewSetups")
 35 | 
 36 |     view_setup_template = ET.Element("ViewSetup")
 37 |     ET.SubElement(view_setup_template, "id").text = "0"
 38 |     ET.SubElement(view_setup_template, "name").text = "0"
 39 |     ET.SubElement(view_setup_template, "size").text = "2048 2048 1"
 40 |     voxel_size = ET.SubElement(view_setup_template, "voxelSize")
 41 |     ET.SubElement(voxel_size, "unit").text = "um"
 42 |     ET.SubElement(voxel_size, "size").text = "1.0 1.0 1.0"
 43 |     view_attributes = ET.SubElement(view_setup_template, "attributes")
 44 |     ET.SubElement(view_attributes, "illumination").text = "0"
 45 |     ET.SubElement(view_attributes, "channel").text = "0"
 46 |     ET.SubElement(view_attributes, "tile").text = "0"
 47 |     ET.SubElement(view_attributes, "angle").text = "0"
 48 |     tile_shape_str = str(tile_shape[1]) + " " + str(tile_shape[0]) + " 1"
 49 |     for i in range(0, num_tiles):
 50 |         vs = deepcopy(view_setup_template)
 51 |         vs.find("id").text = str(i)
 52 |         vs.find("name").text = str(i)
 53 |         vs.find("size").text = tile_shape_str
 54 |         vs.find("attributes").find("tile").text = str(i)
 55 |         view_setups.append(vs)
 56 |     # </ViewSetups>
 57 |     # <Attributes>
 58 |     attrib_illumination = ET.SubElement(view_setups, "Attributes", {"name": "illumination"})
 59 |     attrib_illumination_illumination = ET.SubElement(attrib_illumination, "Illumination")
 60 |     ET.SubElement(attrib_illumination_illumination, "id").text = "0"
 61 |     ET.SubElement(attrib_illumination_illumination, "name").text = "0"
 62 | 
 63 |     attrib_channel = ET.SubElement(view_setups, "Attributes", {"name": "channel"})
 64 |     attrib_channel_channel = ET.SubElement(attrib_channel, "Channel")
 65 |     ET.SubElement(attrib_channel_channel, "id").text = "0"
 66 |     ET.SubElement(attrib_channel_channel, "name").text = "0"
 67 | 
 68 |     attrib_tile = ET.SubElement(view_setups, "Attributes", {"name": "tile"})
 69 | 
 70 |     attrib_tile_tile = ET.Element("Tile")
 71 |     ET.SubElement(attrib_tile_tile, "id").text = "0"
 72 |     ET.SubElement(attrib_tile_tile, "name").text = "0"
 73 |     ET.SubElement(attrib_tile_tile, "location").text = "0.0 0.0 0.0"
 74 |     for i in range(0, num_tiles):
 75 |         att = deepcopy(attrib_tile_tile)
 76 |         att.find("id").text = str(i)
 77 |         att.find("name").text = str(i + 1)
 78 |         attrib_tile.append(att)
 79 | 
 80 |     attrib_angle = ET.SubElement(view_setups, "Attributes", {"name": "angle"})
 81 |     attrib_angle_angle = ET.SubElement(attrib_angle, "Angle")
 82 |     ET.SubElement(attrib_angle_angle, "id").text = "0"
 83 |     ET.SubElement(attrib_angle_angle, "name").text = "0"
 84 |     # </Attributes>
 85 | 
 86 |     timepoints = ET.SubElement(sequence_description, "Timepoints", {"type": "pattern"})
 87 |     ET.SubElement(timepoints, "integerpattern")
 88 |     # </SequenceDescription>
 89 |     # <ViewRegistrations>
 90 |     view_registrations = ET.SubElement(root, "ViewRegistrations")
 91 | 
 92 |     view_registration_template = ET.Element("ViewRegistration", {"timepoint": "0", "setup": "0"})
 93 |     view_transform_translation = ET.SubElement(
 94 |         view_registration_template, "ViewTransform", {"type": "affine"}
 95 |     )
 96 |     ET.SubElement(view_transform_translation, "Name").text = "Translation to Regular Grid"
 97 |     ET.SubElement(view_transform_translation, "affine").text = (
 98 |         "1.0 0.0 0.0 -2867.2 0.0 1.0 0.0 -1024.0 0.0 0.0 1.0 0.0"
 99 |     )
100 |     view_transform_calibration = ET.SubElement(
101 |         view_registration_template, "ViewTransform", {"type": "affine"}
102 |     )
103 |     ET.SubElement(view_transform_calibration, "Name").text = "calibration"
104 |     ET.SubElement(view_transform_calibration, "affine").text = (
105 |         "1.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0"
106 |     )
107 | 
108 |     for i in range(0, num_tiles):
109 |         vr = deepcopy(view_registration_template)
110 |         vr.set("timepoint", "0")
111 |         vr.set("setup", str(i))
112 |         vr.find("ViewTransform").find("affine").text = convert_location(*tile_locations[i])
113 |         view_registrations.append(vr)
114 | 
115 |     # </ViewRegistrations>
116 |     view_interest_points = ET.SubElement(root, "ViewInterestPoints")
117 |     bounding_boxes = ET.SubElement(root, "BoundingBoxes")
118 |     point_spread_functions = ET.SubElement(root, "PointSpreadFunctions")
119 |     stitching_results = ET.SubElement(root, "StitchingResults")
120 |     IntensityAdjustments = ET.SubElement(root, "IntensityAdjustments")
121 | 
122 |     declaration = '<?xml version="1.0" encoding="UTF-8"?>'
123 |     xml_str = ET.tostring(root, encoding="utf-8").decode()
124 |     xml_str = declaration + xml_str
125 | 
126 |     return xml_str
127 | 
128 | 
129 | def grid_to_snake(arr):
130 |     nrows = arr.shape[0]
131 |     new_arr = arr.copy()
132 |     for i in range(0, nrows):
133 |         if i % 2 != 0:
134 |             new_arr[i, :] = new_arr[i, :][::-1]
135 |     return new_arr
136 | 
137 | 
138 | def generate_dataset_xml(
139 |     x_ntiles: int,
140 |     y_ntiles: int,
141 |     tile_shape: Tuple[int, int],
142 |     x_overlap: int,
143 |     y_overlap: int,
144 |     pattern_str: str,
145 |     out_path: Path,
146 |     is_snake=True,
147 | ):
148 |     num_tiles = x_ntiles * y_ntiles
149 | 
150 |     loc_array = np.arange(0, y_ntiles * x_ntiles).reshape(y_ntiles, x_ntiles)
151 |     img_sizes_x = np.zeros_like(loc_array)
152 |     img_sizes_y = np.zeros_like(loc_array)
153 | 
154 |     for y in range(0, y_ntiles):
155 |         y_size = tile_shape[0] - y_overlap
156 |         for x in range(0, x_ntiles):
157 |             x_size = tile_shape[1] - x_overlap
158 | 
159 |             img_sizes_x[y, x] = x_size
160 |             img_sizes_y[y, x] = y_size
161 | 
162 |     img_positions_x = np.concatenate((np.zeros((y_ntiles, 1)), img_sizes_x[:, 1:]), axis=1)
163 |     img_positions_y = np.concatenate((np.zeros((1, x_ntiles)), img_sizes_y[1:, :]), axis=0)
164 | 
165 |     img_positions_x = np.cumsum(img_positions_x, axis=1)
166 |     img_positions_y = np.cumsum(img_positions_y, axis=0)
167 | 
168 |     if is_snake:
169 |         img_positions_x = grid_to_snake(img_positions_x)
170 |         img_positions_y = grid_to_snake(img_positions_y)
171 | 
172 |     tile_locations = list(zip(list(np.ravel(img_positions_x)), list(np.ravel(img_positions_y))))
173 | 
174 |     bs_xml = create_meta(pattern_str, num_tiles, tile_shape, tile_locations)
175 | 
176 |     dom = xml.dom.minidom.parseString(bs_xml)
177 |     pretty_xml_as_string = dom.toprettyxml()
178 | 
179 |     with open(out_path, "w") as s:
180 |         s.write(pretty_xml_as_string)
181 | 


--------------------------------------------------------------------------------
/bin/codex_stitching/directory_management.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import shutil
  3 | from math import ceil
  4 | from pathlib import Path
  5 | from typing import List
  6 | 
  7 | 
  8 | def make_dir_if_not_exists(dir_path: Path):
  9 |     if not dir_path.exists():
 10 |         dir_path.mkdir(parents=True)
 11 | 
 12 | 
 13 | def get_img_dirs(dataset_dir: Path) -> List[Path]:
 14 |     dataset_dir = dataset_dir.absolute()
 15 |     img_dir_names = next(os.walk(dataset_dir))[1]
 16 |     img_dir_paths = [dataset_dir.joinpath(dir_name).absolute() for dir_name in img_dir_names]
 17 |     return img_dir_paths
 18 | 
 19 | 
 20 | def create_dirs_for_stitched_channels(channel_dirs: dict, out_dir: Path):
 21 |     stitched_channel_dirs = dict()
 22 |     for cycle in channel_dirs:
 23 |         stitched_channel_dirs[cycle] = {}
 24 |         for region in channel_dirs[cycle]:
 25 |             stitched_channel_dirs[cycle][region] = {}
 26 |             for channel, dir_path in channel_dirs[cycle][region].items():
 27 |                 dirname = Path(dir_path).name
 28 |                 stitched_dir_path = out_dir.joinpath(dirname)
 29 |                 stitched_channel_dirs[cycle][region][channel] = stitched_dir_path
 30 |                 make_dir_if_not_exists(stitched_dir_path)
 31 | 
 32 |     return stitched_channel_dirs
 33 | 
 34 | 
 35 | def get_ref_channel_dir_per_region(
 36 |     channel_dirs: dict,
 37 |     stitched_channel_dirs: dict,
 38 |     num_channels_per_cycle: int,
 39 |     reference_channel_id: int,
 40 | ):
 41 |     ref_cycle_id = ceil(reference_channel_id / num_channels_per_cycle) - 1
 42 |     ref_cycle = sorted(channel_dirs.keys())[ref_cycle_id]
 43 |     in_cycle_ref_channel_id = reference_channel_id - ref_cycle_id * num_channels_per_cycle
 44 | 
 45 |     reference_channel_dir = dict()
 46 |     for region in channel_dirs[ref_cycle]:
 47 |         this_channel_dir = channel_dirs[ref_cycle][region][in_cycle_ref_channel_id]
 48 |         reference_channel_dir[region] = this_channel_dir
 49 | 
 50 |     stitched_ref_channel_dir = dict()
 51 |     for region in stitched_channel_dirs[ref_cycle]:
 52 |         this_channel_dir = stitched_channel_dirs[ref_cycle][region][in_cycle_ref_channel_id]
 53 |         stitched_ref_channel_dir[region] = this_channel_dir
 54 | 
 55 |     return reference_channel_dir, stitched_ref_channel_dir
 56 | 
 57 | 
 58 | def create_output_dirs_for_tiles(
 59 |     stitched_channel_dirs: dict, out_dir: Path, dir_naming_template: str
 60 | ):
 61 |     new_tiles_dirs = dict()
 62 |     for cycle in stitched_channel_dirs:
 63 |         new_tiles_dirs[cycle] = {}
 64 |         for region in stitched_channel_dirs[cycle]:
 65 |             new_tiles_dir_name = dir_naming_template.format(cycle=cycle, region=region)
 66 |             new_tiles_dir_path = out_dir.joinpath(new_tiles_dir_name)
 67 |             make_dir_if_not_exists(new_tiles_dir_path)
 68 |             new_tiles_dirs[cycle][region] = new_tiles_dir_path
 69 | 
 70 |     return new_tiles_dirs
 71 | 
 72 | 
 73 | def remove_temp_dirs(stitched_channel_dirs: dict):
 74 |     for cycle in stitched_channel_dirs:
 75 |         for region in stitched_channel_dirs[cycle]:
 76 |             for channel, dir_path in stitched_channel_dirs[cycle][region].items():
 77 |                 shutil.rmtree(str(dir_path))
 78 | 
 79 | 
 80 | def check_if_images_in_dir(dir_path: Path):
 81 |     allowed_extensions = (".tif", ".tiff")
 82 |     listing = list(dir_path.iterdir())
 83 |     img_listing = [f for f in listing if f.suffix in allowed_extensions]
 84 |     if img_listing:
 85 |         return True
 86 |     else:
 87 |         return False
 88 | 
 89 | 
 90 | def check_stitched_dirs(stitched_channel_dirs: dict):
 91 |     print("\nChecking if BigStitcher produced image:")
 92 |     checked_str = []
 93 |     checked_bool = []
 94 |     for cycle in stitched_channel_dirs:
 95 |         for region in stitched_channel_dirs[cycle]:
 96 |             for channel, dir_path in stitched_channel_dirs[cycle][region].items():
 97 |                 if check_if_images_in_dir(dir_path):
 98 |                     checked_str.append(str(dir_path) + " passed")
 99 |                     checked_bool.append(True)
100 |                 else:
101 |                     checked_str.append(str(dir_path) + " no image in dir")
102 |                     checked_bool.append(False)
103 | 
104 |     print("\n".join(checked_str))
105 | 
106 |     if sum(checked_bool) < len(checked_bool):
107 |         raise ValueError(
108 |             "Probably there was an error while running BigStitcher. "
109 |             + "There is no image in one or more directories."
110 |         )
111 | 


--------------------------------------------------------------------------------
/bin/codex_stitching/generate_bigstitcher_macro.py:
--------------------------------------------------------------------------------
  1 | from pathlib import Path
  2 | 
  3 | # from datetime import datetime
  4 | from bigstitcher_dataset_meta import generate_dataset_xml
  5 | 
  6 | 
  7 | class BigStitcherMacro:
  8 |     def __init__(self):
  9 |         self.img_dir = Path(".")
 10 |         self.out_dir = Path(".")
 11 |         self.xml_file_name = "dataset.xml"
 12 |         self.pattern = "{xxxxx}.tif"
 13 | 
 14 |         # range: 1-5 or list: 1,2,3,4,5
 15 |         self.num_tiles = 1
 16 | 
 17 |         self.num_tiles_x = 1
 18 |         self.num_tiles_y = 1
 19 | 
 20 |         self.tile_shape = (1440, 1920)
 21 | 
 22 |         # overlap in pixels
 23 |         self.overlap_x = 10
 24 |         self.overlap_y = 10
 25 |         self.overlap_z = 1
 26 | 
 27 |         # distance in um
 28 |         self.pixel_distance_x = 1
 29 |         self.pixel_distance_y = 1
 30 |         self.pixel_distance_z = 1
 31 | 
 32 |         self.tiling_mode = "snake"
 33 |         self.is_snake = True
 34 |         self.region = 1
 35 | 
 36 |         self.path_to_xml_file = Path(".")
 37 | 
 38 |         self.__location = Path(__file__).parent.resolve()
 39 | 
 40 |     def generate(self):
 41 |         self.make_dir_if_not_exists(self.out_dir)
 42 |         self.create_path_to_xml_file()
 43 |         self.check_if_tiling_mode_is_snake()
 44 | 
 45 |         formatted_macro = self.replace_values_in_macro()
 46 |         print("fiji macro script for estimation of stitching parameters")
 47 |         print(formatted_macro)
 48 |         macro_file_path = self.write_to_temp_macro_file(formatted_macro)
 49 | 
 50 |         generate_dataset_xml(
 51 |             self.num_tiles_x,
 52 |             self.num_tiles_y,
 53 |             self.tile_shape,
 54 |             self.overlap_x,
 55 |             self.overlap_y,
 56 |             self.pattern,
 57 |             self.path_to_xml_file,
 58 |             self.is_snake,
 59 |         )
 60 | 
 61 |         return macro_file_path
 62 | 
 63 |     def make_dir_if_not_exists(self, dir_path: Path):
 64 |         if not dir_path.exists():
 65 |             dir_path.mkdir(parents=True)
 66 | 
 67 |     def create_path_to_xml_file(self):
 68 |         self.path_to_xml_file = self.img_dir.joinpath(self.xml_file_name)
 69 | 
 70 |     def check_if_tiling_mode_is_snake(self):
 71 |         if self.tiling_mode == "snake":
 72 |             self.is_snake = True
 73 |         else:
 74 |             self.is_snake = False
 75 | 
 76 |     def convert_tiling_mode(self, tiling_mode):
 77 |         if tiling_mode == "snake":
 78 |             bigstitcher_tiling_mode = "[Snake: Right & Down      ]"
 79 |         elif tiling_mode == "grid":
 80 |             bigstitcher_tiling_mode = "[Grid: Right & Down      ]"
 81 |         return bigstitcher_tiling_mode
 82 | 
 83 |     def replace_values_in_macro(self):
 84 |         macro_template = self.estimate_stitch_param_macro_template
 85 |         formatted_macro = macro_template.format(
 86 |             img_dir=self.path_to_str(self.img_dir),
 87 |             out_dir=self.path_to_str(self.out_dir),
 88 |             path_to_xml_file=self.path_to_str(self.path_to_xml_file),
 89 |             pattern=self.path_to_str(self.img_dir.joinpath(self.pattern)),
 90 |             num_tiles=self.make_range(self.num_tiles),
 91 |             num_tiles_x=self.num_tiles_x,
 92 |             num_tiles_y=self.num_tiles_y,
 93 |             overlap_x=self.overlap_x,
 94 |             overlap_y=self.overlap_y,
 95 |             overlap_z=self.overlap_z,
 96 |             pixel_distance_x=self.pixel_distance_x,
 97 |             pixel_distance_y=self.pixel_distance_y,
 98 |             pixel_distance_z=self.pixel_distance_z,
 99 |             tiling_mode=self.convert_tiling_mode(self.tiling_mode),
100 |         )
101 |         return formatted_macro
102 | 
103 |     def write_to_temp_macro_file(self, formatted_macro):
104 |         file_name = "reg" + str(self.region) + "_stitch_macro.ijm"
105 |         macro_file_path = self.img_dir.joinpath(file_name)
106 |         with open(macro_file_path, "w") as f:
107 |             f.write(formatted_macro)
108 |         return macro_file_path
109 | 
110 |     def make_range(self, number):
111 |         return ",".join([str(n) for n in range(1, number + 1)])
112 | 
113 |     def path_to_str(self, path: Path):
114 |         return str(path.absolute().as_posix())
115 | 
116 |     estimate_stitch_param_macro_template = """
117 |     // calculate pairwise shifts
118 |     run("Calculate pairwise shifts ...",
119 |         "select={path_to_xml_file}" +
120 |         " process_angle=[All angles]" +
121 |         " process_channel=[All channels]" +
122 |         " process_illumination=[All illuminations]" +
123 |         " process_tile=[All tiles]" +
124 |         " process_timepoint=[All Timepoints]" +
125 |         " method=[Phase Correlation]" +
126 |         " show_expert_algorithm_parameters" +
127 |         " downsample_in_x=1" +
128 |         " downsample_in_y=1" +
129 |         " number=5" +
130 |         " minimal=10" +
131 |         " subpixel");
132 | 
133 |     // filter shifts with 0.7 corr. threshold
134 |     run("Filter pairwise shifts ...",
135 |         "select={path_to_xml_file}" +
136 |         " filter_by_link_quality" +
137 |         " min_r=0.7" +
138 |         " max_r=1" +
139 |         " max_shift_in_x=0" +
140 |         " max_shift_in_y=0" +
141 |         " max_shift_in_z=0" +
142 |         " max_displacement=0");
143 |          
144 |     // do global optimization
145 |     run("Optimize globally and apply shifts ...",
146 |         "select={path_to_xml_file}" +
147 |         " process_angle=[All angles]" +
148 |         " process_channel=[All channels]" +
149 |         " process_illumination=[All illuminations]" +
150 |         " process_tile=[All tiles]" +
151 |         " process_timepoint=[All Timepoints]" +
152 |         " relative=2.500" +
153 |         " absolute=3.500" +
154 |         " global_optimization_strategy=[Two-Round using Metadata to align unconnected Tiles]" +
155 |         " fix_group_0-0,");
156 |     
157 |     run("Quit");
158 |     eval("script", "System.exit(0);");
159 | 
160 |     """
161 | 
162 | 
163 | class FuseMacro:
164 |     def __init__(self):
165 |         self.img_dir = Path(".")
166 |         self.xml_file_name = "dataset.xml"
167 |         self.out_dir = Path(".")
168 |         self.__location = Path(__file__).parent.absolute()
169 | 
170 |     def generate(self):
171 |         formatted_macro = self.replace_values_in_macro()
172 |         macro_file_path = self.write_to_macro_file_in_channel_dir(self.img_dir, formatted_macro)
173 | 
174 |     def replace_values_in_macro(self):
175 |         macro_template = self.fuse_macro_template
176 |         formatted_macro = macro_template.format(
177 |             img_dir=self.path_to_str(self.img_dir),
178 |             path_to_xml_file=self.path_to_str(self.img_dir.joinpath(self.xml_file_name)),
179 |             out_dir=self.path_to_str(self.out_dir),
180 |         )
181 |         return formatted_macro
182 | 
183 |     def write_to_macro_file_in_channel_dir(self, img_dir: Path, formatted_macro: str):
184 |         macro_file_path = img_dir.joinpath("fuse_macro.ijm")
185 |         with open(macro_file_path, "w") as f:
186 |             f.write(formatted_macro)
187 |         return macro_file_path
188 | 
189 |     def path_to_str(self, path: Path):
190 |         return str(path.absolute().as_posix())
191 | 
192 |     fuse_macro_template = """
193 |     // fuse dataset, save as TIFF
194 |     run("Fuse dataset ...",
195 |         "select={path_to_xml_file}" +
196 |         " process_angle=[All angles]" +
197 |         " process_channel=[All channels]" +
198 |         " process_illumination=[All illuminations]" +
199 |         " process_tile=[All tiles]" +
200 |         " process_timepoint=[All Timepoints]" +
201 |         " bounding_box=[All Views]" +
202 |         " downsampling=1" +
203 |         " pixel_type=[16-bit unsigned integer]" +
204 |         " interpolation=[Linear Interpolation]" +
205 |         " image=[Precompute Image]" +
206 |         " interest_points_for_non_rigid=[-= Disable Non-Rigid =-]" +
207 |         " blend produce=[Each timepoint & channel]" +
208 |         " fused_image=[Save as (compressed) TIFF stacks]" +
209 |         " output_file_directory={out_dir}");
210 |     
211 |     run("Quit");
212 |     eval("script", "System.exit(0);");
213 | 
214 |     """
215 | 


--------------------------------------------------------------------------------
/bin/codex_stitching/image_stitching.py:
--------------------------------------------------------------------------------
  1 | import platform
  2 | import shutil
  3 | import subprocess
  4 | from pathlib import Path
  5 | from typing import List
  6 | 
  7 | import dask
  8 | import tifffile as tif
  9 | from directory_management import (
 10 |     check_stitched_dirs,
 11 |     create_dirs_for_stitched_channels,
 12 |     get_ref_channel_dir_per_region,
 13 | )
 14 | from generate_bigstitcher_macro import BigStitcherMacro, FuseMacro
 15 | 
 16 | 
 17 | def get_image_path_in_dir(dir_path: Path) -> Path:
 18 |     allowed_extensions = (".tif", ".tiff")
 19 |     listing = list(dir_path.iterdir())
 20 |     img_listing = [f for f in listing if f.suffix in allowed_extensions]
 21 |     return img_listing[0]
 22 | 
 23 | 
 24 | def generate_bigstitcher_macro_for_reference_channel(
 25 |     reference_channel_dir: Path, out_dir: Path, dataset_info: dict, region: int
 26 | ) -> Path:
 27 |     tile_shape = (
 28 |         dataset_info["tile_height"] + dataset_info["overlap_y"],
 29 |         dataset_info["tile_width"] + dataset_info["overlap_x"],
 30 |     )
 31 | 
 32 |     macro = BigStitcherMacro()
 33 |     macro.img_dir = reference_channel_dir
 34 |     macro.out_dir = out_dir
 35 |     macro.pattern = "{xxxxx}.tif"
 36 |     macro.num_tiles = dataset_info["num_tiles"]
 37 |     macro.num_tiles_x = dataset_info["num_tiles_x"]
 38 |     macro.num_tiles_y = dataset_info["num_tiles_y"]
 39 |     macro.tile_shape = tile_shape
 40 |     macro.overlap_x = dataset_info["overlap_x"]
 41 |     macro.overlap_y = dataset_info["overlap_y"]
 42 |     macro.overlap_z = dataset_info["overlap_z"]
 43 |     macro.pixel_distance_x = dataset_info["pixel_distance_x"]
 44 |     macro.pixel_distance_y = dataset_info["pixel_distance_y"]
 45 |     macro.pixel_distance_z = dataset_info["pixel_distance_z"]
 46 |     macro.tiling_mode = dataset_info["tiling_mode"]
 47 |     macro.region = region
 48 |     macro_path = macro.generate()
 49 | 
 50 |     return macro_path
 51 | 
 52 | 
 53 | def run_bigstitcher(bigstitcher_macro_path: Path):
 54 |     # It is expected that ImageJ is added to system PATH
 55 | 
 56 |     if platform.system() == "Windows":
 57 |         imagej_name = "ImageJ-win64"
 58 |     elif platform.system() == "Linux":
 59 |         imagej_name = "ImageJ-linux64"
 60 |     elif platform.system() == "Darwin":
 61 |         imagej_name = "ImageJ-macosx"
 62 |     else:
 63 |         raise ValueError(f"unsupported platform: {platform.system()}")
 64 | 
 65 |     command = imagej_name + " --headless --console -macro " + str(bigstitcher_macro_path)
 66 |     print("Started running BigStitcher for", str(bigstitcher_macro_path))
 67 |     res = subprocess.run(
 68 |         command, shell=True, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
 69 |     )
 70 |     if res.returncode == 0:
 71 |         print("Finished", str(bigstitcher_macro_path))
 72 |     else:
 73 |         raise Exception(
 74 |             "There was an error while running the BigStitcher for "
 75 |             + str(bigstitcher_macro_path)
 76 |             + "\n"
 77 |             + res.stderr.decode("utf-8")
 78 |         )
 79 | 
 80 | 
 81 | def run_bigstitcher_for_ref_channel_per_region(
 82 |     ref_channel_dir_per_region: dict,
 83 |     ref_channel_stitched_dir_per_region: dict,
 84 |     info_for_bigstitcher: dict,
 85 | ):
 86 |     for region, dir_path in ref_channel_dir_per_region.items():
 87 |         ref_channel_dir = dir_path
 88 |         ref_channel_stitched_dir = ref_channel_stitched_dir_per_region[region]
 89 |         bigstitcher_macro_path = generate_bigstitcher_macro_for_reference_channel(
 90 |             ref_channel_dir, ref_channel_stitched_dir, info_for_bigstitcher, region
 91 |         )
 92 |         run_bigstitcher(bigstitcher_macro_path)
 93 | 
 94 | 
 95 | def copy_dataset_xml_to_channel_dirs(ref_channel_dir: Path, other_channel_dirs: List[Path]):
 96 |     dataset_xml_path = ref_channel_dir.joinpath("dataset.xml")
 97 |     for dir_path in other_channel_dirs:
 98 |         dst_path = dir_path.joinpath("dataset.xml")
 99 |         try:
100 |             shutil.copy(dataset_xml_path, dst_path)
101 |         except shutil.SameFileError:
102 |             continue
103 | 
104 | 
105 | def copy_fuse_macro_to_channel_dirs(channel_dirs: List[Path], channel_stitched_dirs: List[Path]):
106 |     macro = FuseMacro()
107 |     for i, dir_path in enumerate(channel_dirs):
108 |         macro.img_dir = dir_path
109 |         macro.xml_file_name = "dataset.xml"
110 |         macro.out_dir = channel_stitched_dirs[i]
111 |         macro.generate()
112 | 
113 | 
114 | def copy_bigsticher_files_to_dirs(
115 |     channel_dirs: dict, stitched_channel_dirs: dict, ref_channel_dir_per_region: dict
116 | ):
117 |     for cycle in channel_dirs:
118 |         for region in channel_dirs[cycle]:
119 |             this_region_ref_channel_dir = ref_channel_dir_per_region[region]
120 |             channel_dir_list = list(channel_dirs[cycle][region].values())
121 |             channel_stitched_dir_list = list(stitched_channel_dirs[cycle][region].values())
122 | 
123 |             copy_dataset_xml_to_channel_dirs(this_region_ref_channel_dir, channel_dir_list)
124 |             copy_fuse_macro_to_channel_dirs(channel_dir_list, channel_stitched_dir_list)
125 | 
126 | 
127 | def run_stitching_for_all_channels(channel_dirs: dict):
128 |     task = []
129 |     for cycle in channel_dirs:
130 |         for region in channel_dirs[cycle]:
131 |             for channel, dir_path in channel_dirs[cycle][region].items():
132 |                 macro_path = dir_path.joinpath("fuse_macro.ijm")
133 |                 task.append(dask.delayed(run_bigstitcher)(macro_path))
134 | 
135 |     dask.compute(*task, scheduler="processes")
136 | 
137 | 
138 | def get_stitched_image_shape(ref_channel_stitched_dir_per_region):
139 |     for region, dir_path in ref_channel_stitched_dir_per_region.items():
140 |         stitched_image_path = get_image_path_in_dir(dir_path)
141 |         break
142 |     with tif.TiffFile(stitched_image_path) as TF:
143 |         stitched_image_shape = TF.series[0].shape
144 | 
145 |     return stitched_image_shape
146 | 
147 | 
148 | def stitch_images(channel_dirs, dataset_meta, out_dir):
149 |     ref_channel_id = int(dataset_meta["reference_channel"])
150 |     num_channels_per_cycle = dataset_meta["num_channels"]
151 | 
152 |     stitched_channel_dirs = create_dirs_for_stitched_channels(channel_dirs, out_dir)
153 | 
154 |     ref_ch_dirs = get_ref_channel_dir_per_region(
155 |         channel_dirs, stitched_channel_dirs, num_channels_per_cycle, ref_channel_id
156 |     )
157 |     ref_channel_dir_per_region, ref_channel_stitched_dir_per_region = ref_ch_dirs
158 | 
159 |     print("\nEstimating stitching parameters")
160 |     run_bigstitcher_for_ref_channel_per_region(
161 |         ref_channel_dir_per_region, ref_channel_stitched_dir_per_region, dataset_meta
162 |     )
163 | 
164 |     print("\nStitching channels")
165 |     copy_bigsticher_files_to_dirs(channel_dirs, stitched_channel_dirs, ref_channel_dir_per_region)
166 |     run_stitching_for_all_channels(channel_dirs)
167 |     check_stitched_dirs(stitched_channel_dirs)
168 |     stitched_img_shape = get_stitched_image_shape(ref_channel_stitched_dir_per_region)
169 | 
170 |     return stitched_channel_dirs, stitched_img_shape
171 | 


--------------------------------------------------------------------------------
/bin/codex_stitching/run_stitching.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import json
 3 | import shutil
 4 | import sys
 5 | from datetime import datetime
 6 | from pathlib import Path
 7 | from typing import Dict, List
 8 | 
 9 | import dask
10 | 
11 | sys.path.append("/opt/")
12 | from directory_management import (
13 |     create_output_dirs_for_tiles,
14 |     get_img_dirs,
15 |     make_dir_if_not_exists,
16 |     remove_temp_dirs,
17 | )
18 | from image_stitching import stitch_images
19 | 
20 | from pipeline_utils.dataset_listing import (
21 |     create_listing_for_each_cycle_region,
22 |     get_img_dirs,
23 | )
24 | from pipeline_utils.pipeline_config_reader import load_dataset_info
25 | 
26 | 
27 | def print_img_dirs(img_dirs: List[Path]):
28 |     print("Image directories:")
29 |     for dir_path in img_dirs:
30 |         print(str(dir_path))
31 | 
32 | 
33 | def load_pipeline_config(pipeline_config_path: Path) -> dict:
34 |     with open(pipeline_config_path, "r") as s:
35 |         submission = json.load(s)
36 | 
37 |     return submission
38 | 
39 | 
40 | def get_file_listing(data_dir: Path):
41 |     img_dirs = get_img_dirs(data_dir)
42 |     listing = create_listing_for_each_cycle_region(img_dirs)
43 |     return listing
44 | 
45 | 
46 | def copy_to_channel_dirs(listing, base_channel_dir: Path) -> Dict[int, Dict[int, Dict[int, Path]]]:
47 |     new_dir_name_template = "Cyc{cyc:03d}_Reg{reg:03d}_Ch{ch:03d}"
48 |     dst_name_template = "{tile:05d}.tif"
49 |     channel_dirs = dict()
50 |     for cycle in listing:
51 |         channel_dirs[cycle] = dict()
52 |         for region in listing[cycle]:
53 |             channel_dirs[cycle][region] = dict()
54 |             for channel in listing[cycle][region]:
55 |                 dir_name = new_dir_name_template.format(cyc=cycle, reg=region, ch=channel)
56 |                 dir_path = base_channel_dir / dir_name
57 |                 make_dir_if_not_exists(dir_path)
58 |                 channel_dirs[cycle][region][channel] = dir_path
59 |                 for tile in listing[cycle][region][channel]:
60 |                     for zplane, src in listing[cycle][region][channel][tile].items():
61 |                         dst_name = dst_name_template.format(tile=tile)
62 |                         dst = dir_path / dst_name
63 |                         shutil.copy(src, dst)
64 |     return channel_dirs
65 | 
66 | 
67 | def main(data_dir: Path, pipeline_config_path: Path):
68 |     start = datetime.now()
69 |     print("\nStarted", start)
70 | 
71 |     dataset_info = load_dataset_info(pipeline_config_path)
72 | 
73 |     out_dir = Path("/output/stitched_images")
74 |     base_channel_dir = Path("/output/channel_dirs")
75 | 
76 |     make_dir_if_not_exists(out_dir)
77 |     make_dir_if_not_exists(base_channel_dir)
78 | 
79 |     num_workers = dataset_info["num_concurrent_tasks"]
80 |     dask.config.set({"num_workers": num_workers, "scheduler": "processes"})
81 | 
82 |     listing = get_file_listing(data_dir)
83 |     channel_dirs = copy_to_channel_dirs(listing, base_channel_dir)
84 |     stitched_channel_dirs, stitched_img_shape = stitch_images(channel_dirs, dataset_info, out_dir)
85 | 
86 |     print("\nTime elapsed", datetime.now() - start)
87 | 
88 | 
89 | if __name__ == "__main__":
90 |     parser = argparse.ArgumentParser()
91 |     parser.add_argument("--data_dir", type=Path, help="path to directory with image directories")
92 |     parser.add_argument(
93 |         "--pipeline_config_path", type=Path, help="path to pipelineConfig.json file"
94 |     )
95 | 
96 |     args = parser.parse_args()
97 | 
98 |     main(args.data_dir, args.pipeline_config_path)
99 | 


--------------------------------------------------------------------------------
/bin/codex_stitching/secondary_stitcher/mask_stitching.py:
--------------------------------------------------------------------------------
  1 | import gc
  2 | from copy import deepcopy
  3 | from typing import Dict, List, Tuple
  4 | 
  5 | import dask
  6 | import numpy as np
  7 | import pandas as pd
  8 | from match_masks import get_matched_masks
  9 | from skimage.measure import regionprops_table
 10 | 
 11 | Image = np.ndarray
 12 | 
 13 | 
 14 | def generate_ome_meta_for_mask(size_y: int, size_x: int, dtype, match_fraction: float) -> str:
 15 |     template = """<?xml version="1.0" encoding="utf-8"?>
 16 |             <OME xmlns="http://www.openmicroscopy.org/Schemas/OME/2016-06" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openmicroscopy.org/Schemas/OME/2016-06 http://www.openmicroscopy.org/Schemas/OME/2016-06/ome.xsd">
 17 |               <Image ID="Image:0" Name="mask.ome.tiff">
 18 |                 <Pixels BigEndian="true" DimensionOrder="XYZCT" ID="Pixels:0" SizeC="4" SizeT="1" SizeX="{size_x}" SizeY="{size_y}" SizeZ="1" Type="{dtype}">
 19 |                     <Channel ID="Channel:0:0" Name="cells" SamplesPerPixel="1" />
 20 |                     <Channel ID="Channel:0:1" Name="nuclei" SamplesPerPixel="1" />
 21 |                     <Channel ID="Channel:0:2" Name="cell_boundaries" SamplesPerPixel="1" />
 22 |                     <Channel ID="Channel:0:3" Name="nucleus_boundaries" SamplesPerPixel="1" />
 23 |                     <TiffData FirstC="0" FirstT="0" FirstZ="0" IFD="0" PlaneCount="1" />
 24 |                     <TiffData FirstC="1" FirstT="0" FirstZ="0" IFD="1" PlaneCount="1" />
 25 |                     <TiffData FirstC="2" FirstT="0" FirstZ="0" IFD="2" PlaneCount="1" />
 26 |                     <TiffData FirstC="3" FirstT="0" FirstZ="0" IFD="3" PlaneCount="1" />
 27 |                 </Pixels>
 28 |               </Image>
 29 |               <StructuredAnnotations>
 30 |                 <XMLAnnotation ID="Annotation:0">
 31 |                     <Value>
 32 |                         <OriginalMetadata>
 33 |                             <Key>FractionOfMatchedCellsAndNuclei</Key>
 34 |                             <Value>{match_fraction}</Value>
 35 |                         </OriginalMetadata>
 36 |                     </Value>
 37 |                 </XMLAnnotation>
 38 |               </StructuredAnnotations>
 39 |             </OME>
 40 |         """
 41 |     ome_meta = template.format(
 42 |         size_y=size_y, size_x=size_x, dtype=np.dtype(dtype).name, match_fraction=match_fraction
 43 |     )
 44 |     return ome_meta
 45 | 
 46 | 
 47 | def get_labels_sorted_by_coordinates(img) -> List[int]:
 48 |     props = regionprops_table(img, properties=("label", "centroid"))
 49 |     coord_arr = np.array((props["label"], props["centroid-0"], props["centroid-1"]))
 50 |     coord_df = pd.DataFrame(coord_arr)
 51 |     # sort first by y, then by x coord
 52 |     sorted_coord_arr = coord_df.sort_values(by=[1, 2], axis=1).to_numpy()
 53 |     labels_sorted_by_coord = sorted_coord_arr[0, :].tolist()
 54 |     return labels_sorted_by_coord
 55 | 
 56 | 
 57 | def get_new_labels(img: Image) -> np.ndarray:
 58 |     dtype = img.dtype
 59 |     unique_label_ids, indices = np.unique(img, return_inverse=True)
 60 | 
 61 |     old_label_ids = unique_label_ids.tolist()
 62 |     old_label_ids_sorted_by_coord = get_labels_sorted_by_coordinates(img)
 63 | 
 64 |     new_label_ids = list(range(0, len(old_label_ids)))
 65 |     label_pairs = zip(old_label_ids_sorted_by_coord, new_label_ids)
 66 |     label_map = {lab_pair[0]: lab_pair[1] for lab_pair in label_pairs}
 67 | 
 68 |     updated_label_ids = [0]
 69 |     for _id in old_label_ids[1:]:
 70 |         updated_label_ids.append(label_map[_id])
 71 | 
 72 |     new_unique_label_ids = np.array(updated_label_ids, dtype=dtype)
 73 |     return new_unique_label_ids
 74 | 
 75 | 
 76 | def reset_label_ids(img, new_label_ids) -> Image:
 77 |     dtype = img.dtype
 78 |     unique_labels, indices = np.unique(img, return_inverse=True)
 79 |     reset_img = new_label_ids[indices].reshape(img.shape).astype(dtype)
 80 |     return reset_img
 81 | 
 82 | 
 83 | def remove_labels(
 84 |     img: Image, y_slice: slice, x_slice: slice, exclude_start: bool
 85 | ) -> Tuple[Image, List[int]]:
 86 |     exclude_from_val_to_remove = [0]
 87 | 
 88 |     val_to_remove = []
 89 |     if y_slice != slice(None):
 90 |         img_slice_y = (y_slice, slice(None))
 91 |         val_to_remove_y = np.unique(img[img_slice_y]).tolist()
 92 |         val_to_remove.extend(val_to_remove_y)
 93 | 
 94 |     if x_slice != slice(None):
 95 |         img_slice_x = (slice(None), x_slice)
 96 |         val_to_remove_x = np.unique(img[img_slice_x]).tolist()
 97 |         val_to_remove.extend(val_to_remove_x)
 98 | 
 99 |     val_to_remove = set(sorted(val_to_remove))
100 | 
101 |     if exclude_start:
102 |         if y_slice.start is None and x_slice.start is None:
103 |             raise ValueError("Exclude start is enabled but slice start is None")
104 |         exclusions = []
105 |         if y_slice.start is not None:
106 |             line_slice_y = (slice(y_slice.start, y_slice.start + 1), x_slice)
107 |             exclusions.extend(np.unique(img[line_slice_y]).tolist())
108 |         if x_slice.start is not None:
109 |             line_slice_x = (y_slice, slice(x_slice.start, x_slice.start + 1))
110 |             exclusions.extend(np.unique(img[line_slice_x]).tolist())
111 | 
112 |         unique_exclusions = sorted(set(exclusions))
113 |         exclude_from_val_to_remove.extend(unique_exclusions)
114 | 
115 |     exclude_from_val_to_remove = set(sorted(exclude_from_val_to_remove))
116 |     val_to_remove = [val for val in val_to_remove if val not in exclude_from_val_to_remove]
117 | 
118 |     img_copy = img.copy()
119 |     for val in val_to_remove:
120 |         img_copy[img_copy == val] = 0
121 |     return img_copy, val_to_remove
122 | 
123 | 
124 | def remove_overlapping_labels(img: Image, overlap: int, mode: str) -> Tuple[Image, List[int]]:
125 |     left = (slice(None), slice(None, overlap))
126 |     right = (slice(None), slice(-overlap, None))
127 |     top = (slice(None, overlap), slice(None))
128 |     bottom = (slice(-overlap, None), slice(None))
129 | 
130 |     mod_img = img.copy()
131 |     excluded_labels = []
132 |     if "left" in mode:
133 |         mod_img, ex_lab = remove_labels(mod_img, *left, exclude_start=False)
134 |         excluded_labels.extend(ex_lab)
135 |     if "right" in mode:
136 |         mod_img, ex_lab = remove_labels(mod_img, *right, exclude_start=True)
137 |         excluded_labels.extend(ex_lab)
138 |     if "top" in mode:
139 |         mod_img, ex_lab = remove_labels(mod_img, *top, exclude_start=False)
140 |         excluded_labels.extend(ex_lab)
141 |     if "bottom" in mode:
142 |         mod_img, ex_lab = remove_labels(mod_img, *bottom, exclude_start=True)
143 |         excluded_labels.extend(ex_lab)
144 |     excluded_labels = sorted(set(excluded_labels))
145 |     return mod_img, excluded_labels
146 | 
147 | 
148 | def find_and_remove_overlapping_labels_in_first_channel(
149 |     tiles: List[Image], y_ntiles: int, x_ntiles: int, overlap: int
150 | ) -> Tuple[List[Image], Dict[int, Dict[int, int]]]:
151 |     excluded_labels = dict()
152 |     modified_tiles = []
153 |     task = []
154 |     n = 0
155 |     for i in range(0, y_ntiles):
156 |         for j in range(0, x_ntiles):
157 |             label_remove_mode = ""
158 |             if i == 0:
159 |                 label_remove_mode += " bottom "
160 |             elif i == y_ntiles - 1:
161 |                 label_remove_mode += " top "
162 |             else:
163 |                 label_remove_mode += " top bottom "
164 |             if j == 0:
165 |                 label_remove_mode += " right "
166 |             elif j == x_ntiles - 1:
167 |                 label_remove_mode += " left "
168 |             else:
169 |                 label_remove_mode += " left right "
170 | 
171 |             task.append(
172 |                 dask.delayed(remove_overlapping_labels)(tiles[n], overlap, label_remove_mode)
173 |             )
174 |             n += 1
175 |     computed_modifications = dask.compute(*task)
176 |     for i, mod in enumerate(computed_modifications):
177 |         modified_tiles.append(mod[0])
178 |         excluded_labels[i] = {lab: 0 for lab in mod[1]}
179 | 
180 |     return modified_tiles, excluded_labels
181 | 
182 | 
183 | def remove_overlapping_labels_in_another_channel(
184 |     tiles: List[Image], excluded_labels: dict
185 | ) -> List[Image]:
186 |     def exclude_labels(tile, labels):
187 |         for lab in labels:
188 |             tile[tile == lab] = 0
189 |         return tile
190 | 
191 |     task = []
192 |     for i in range(0, len(tiles)):
193 |         task.append(dask.delayed(exclude_labels)(tiles[i], excluded_labels[i]))
194 |     modified_tiles = dask.compute(*task)
195 |     return list(modified_tiles)
196 | 
197 | 
198 | def find_overlapping_border_labels(
199 |     img1: Image, img2: Image, overlap: int, mode: str
200 | ) -> Dict[int, int]:
201 |     """Find which pixels in img2 overlap pixels in img1
202 |     Return mapping
203 |     { img2px: img1px, }
204 |     """
205 |     if mode == "horizontal":
206 |         img1_ov = img1[:, -overlap:]
207 |         img2_ov = img2[:, overlap : overlap * 2]
208 |     elif mode == "vertical":
209 |         img1_ov = img1[-overlap:, :]
210 |         img2_ov = img2[overlap : overlap * 2, :]
211 |     else:  # horizontal+vertical
212 |         img1_ov = img1[-overlap:, -overlap:]
213 |         img2_ov = img2[overlap : overlap * 2, overlap : overlap * 2]
214 | 
215 |     nrows, ncols = img2_ov.shape
216 | 
217 |     border_map = dict()
218 | 
219 |     for i in range(0, nrows):
220 |         for j in range(0, ncols):
221 |             old_value = img2_ov[i, j]
222 |             if old_value in border_map:
223 |                 continue
224 |             else:
225 |                 new_value = img1_ov[i, j]
226 |                 if old_value > 0 and new_value > 0:
227 |                     border_map[old_value] = new_value
228 | 
229 |     return border_map
230 | 
231 | 
232 | def replace_overlapping_border_labels(
233 |     img1: Image, img2: Image, overlap: int, mode: str
234 | ) -> Tuple[Image, Dict[int, int]]:
235 |     """Replace label ids in img2 with label ids of img1"""
236 |     border_map = find_overlapping_border_labels(img1, img2, overlap, mode)
237 |     # to avoid merging of old and new labels
238 |     # find old labels that have same ids as new ones
239 |     # and add some value
240 |     old_lab_ids = tuple(np.unique(img2).tolist())
241 |     matches = []
242 |     for new_lab_id in border_map.values():
243 |         if new_lab_id in old_lab_ids:
244 |             matches.append(new_lab_id)
245 |     if matches != []:
246 |         addition = img2.max() + max(matches)
247 |         for value in matches:
248 |             img2[img2 == value] += addition
249 | 
250 |     for old_value, new_value in border_map.items():
251 |         img2[img2 == old_value] = new_value
252 |     return img2, border_map
253 | 
254 | 
255 | def find_and_replace_overlapping_border_labels_in_first_channel(
256 |     tiles: List[Image], y_ntiles: int, x_ntiles: int, overlap: int, dtype
257 | ) -> Tuple[List[Image], Dict[int, Dict[int, int]], List[int]]:
258 |     previous_tile_max = 0
259 |     tile_ids = np.arange(0, y_ntiles * x_ntiles).reshape((y_ntiles, x_ntiles))
260 |     modified_tiles = []
261 |     tile_additions = []
262 |     border_maps = dict()
263 |     n = 0
264 |     for i in range(0, y_ntiles):
265 |         for j in range(0, x_ntiles):
266 |             tile = tiles[n]
267 |             tile = tile.astype(dtype)
268 |             this_tile_max = tile.max()
269 |             tile_additions.append(previous_tile_max)
270 |             tile[np.nonzero(tile)] += previous_tile_max
271 | 
272 |             if i != 0:
273 |                 top_tile_id = tile_ids[i - 1, j]
274 |             else:
275 |                 top_tile_id = None
276 |             if j != 0:
277 |                 left_tile_id = tile_ids[i, j - 1]
278 |             else:
279 |                 left_tile_id = None
280 |             if i != 0 and j != 0:
281 |                 top_left_tile_id = tile_ids[i - 1, j - 1]
282 |             else:
283 |                 top_left_tile_id = None
284 | 
285 |             this_tile_border_map = dict()
286 |             if top_tile_id is not None:
287 |                 tile, border_map = replace_overlapping_border_labels(
288 |                     modified_tiles[top_tile_id], tile, overlap, "vertical"
289 |                 )
290 |                 this_tile_border_map.update(border_map)
291 |             if left_tile_id is not None:
292 |                 tile, border_map = replace_overlapping_border_labels(
293 |                     modified_tiles[left_tile_id], tile, overlap, "horizontal"
294 |                 )
295 |                 this_tile_border_map.update(border_map)
296 |             if top_left_tile_id is not None:
297 |                 tile, border_map = replace_overlapping_border_labels(
298 |                     modified_tiles[top_left_tile_id], tile, overlap, "horizontal+vertical"
299 |                 )
300 |                 this_tile_border_map.update(border_map)
301 | 
302 |             modified_tiles.append(tile)
303 |             border_maps[n] = this_tile_border_map
304 |             previous_tile_max += this_tile_max
305 |             n += 1
306 |     return modified_tiles, border_maps, tile_additions
307 | 
308 | 
309 | def replace_overlapping_border_labels_in_another_channel(
310 |     tiles: List[Image], border_maps: Dict[int, dict], tile_additions: List[int], dtype
311 | ) -> List[Image]:
312 |     def replace_values(tile, value_map, tile_addition, dtype):
313 |         modified_tile = tile.astype(dtype)
314 |         modified_tile[np.nonzero(modified_tile)] += tile_addition
315 |         if value_map != {}:
316 |             old_lab_ids = tuple(np.unique(modified_tile).tolist())
317 |             matches = []
318 |             for new_lab_id in value_map.values():
319 |                 if new_lab_id in old_lab_ids:
320 |                     matches.append(new_lab_id)
321 |             if matches != []:
322 |                 addition = modified_tile.max() + max(matches)
323 |                 for value in matches:
324 |                     modified_tile[modified_tile == value] += addition
325 |         return modified_tile
326 | 
327 |     task = []
328 |     for i, tile in enumerate(tiles):
329 |         task.append(dask.delayed(replace_values)(tile, border_maps[i], tile_additions[i], dtype))
330 |     modified_tiles = dask.compute(*task)
331 |     return list(modified_tiles)
332 | 
333 | 
334 | def update_old_values(
335 |     excluded_labels: dict, tile_additions: List[int]
336 | ) -> Dict[int, Dict[int, int]]:
337 |     upd_excluded_labels = dict()
338 |     for tile in excluded_labels:
339 |         this_tile_excluded_labels = dict()
340 |         for old_value, new_value in excluded_labels[tile].items():
341 |             upd_old_value = old_value + tile_additions[tile]
342 |             this_tile_excluded_labels[upd_old_value] = new_value
343 |         upd_excluded_labels[tile] = this_tile_excluded_labels
344 |     return upd_excluded_labels
345 | 
346 | 
347 | def modify_tiles_first_channel(
348 |     tiles: List[Image], y_ntiles: int, x_ntiles: int, overlap: int, dtype
349 | ) -> Tuple[List[Image], Dict[int, Dict[int, int]], Dict[int, Dict[int, int]], List[int]]:
350 |     mod_tiles, excluded_labels = find_and_remove_overlapping_labels_in_first_channel(
351 |         tiles, y_ntiles, x_ntiles, overlap
352 |     )
353 |     (
354 |         mod_tiles,
355 |         border_maps,
356 |         tile_additions,
357 |     ) = find_and_replace_overlapping_border_labels_in_first_channel(
358 |         mod_tiles, y_ntiles, x_ntiles, overlap, dtype
359 |     )
360 | 
361 |     return mod_tiles, excluded_labels, border_maps, tile_additions
362 | 
363 | 
364 | def modify_tiles_another_channel(
365 |     tiles: List[Image], excluded_labels: dict, border_maps: dict, tile_additions: list, dtype
366 | ) -> List[Image]:
367 |     mod_tiles = remove_overlapping_labels_in_another_channel(tiles, excluded_labels)
368 |     if border_maps != {}:
369 |         mod_tiles = replace_overlapping_border_labels_in_another_channel(
370 |             mod_tiles, border_maps, tile_additions, dtype
371 |         )
372 | 
373 |     return mod_tiles
374 | 
375 | 
376 | def get_slices(
377 |     tile_shape: tuple, overlap: int, y_tile_id: int, x_tile_id: int, y_id_max: int, x_id_max: int
378 | ) -> Tuple[Tuple[slice, slice], Tuple[slice, slice]]:
379 |     if y_id_max - 1 == 0:
380 |         tile_slice_y = slice(overlap, tile_shape[0] + overlap)
381 |         y_f = 0
382 |         y_t = tile_shape[0]
383 |     elif y_tile_id == 0:
384 |         tile_slice_y = slice(overlap, tile_shape[0] + overlap * 2)
385 |         y_f = 0
386 |         y_t = tile_shape[0] + overlap
387 |     elif y_tile_id == y_id_max - 1:
388 |         tile_slice_y = slice(overlap, tile_shape[0] + overlap)
389 |         y_f = y_tile_id * tile_shape[0]
390 |         y_t = y_f + tile_shape[0]
391 |     else:
392 |         tile_slice_y = slice(overlap, tile_shape[0] + overlap * 2)
393 |         y_f = y_tile_id * tile_shape[0]
394 |         y_t = y_f + tile_shape[0] + overlap
395 | 
396 |     if x_id_max - 1 == 0:
397 |         tile_slice_x = slice(overlap, tile_shape[1] + overlap)
398 |         x_f = 0
399 |         x_t = tile_shape[1]
400 |     elif x_tile_id == 0:
401 |         tile_slice_x = slice(overlap, tile_shape[1] + overlap * 2)
402 |         x_f = 0
403 |         x_t = tile_shape[1] + overlap
404 |     elif x_tile_id == x_id_max - 1:
405 |         tile_slice_x = slice(overlap, tile_shape[1] + overlap)
406 |         x_f = x_tile_id * tile_shape[1]
407 |         x_t = x_f + tile_shape[1]
408 |     else:
409 |         tile_slice_x = slice(overlap, tile_shape[1] + overlap * 2)
410 |         x_f = x_tile_id * tile_shape[1]
411 |         x_t = x_f + tile_shape[1] + overlap
412 | 
413 |     tile_slice = (tile_slice_y, tile_slice_x)
414 |     big_image_slice = (slice(y_f, y_t), slice(x_f, x_t))
415 | 
416 |     return tile_slice, big_image_slice
417 | 
418 | 
419 | def stitch_mask(
420 |     tiles: List[Image],
421 |     y_ntiles: int,
422 |     x_ntiles: int,
423 |     tile_shape: list,
424 |     dtype,
425 |     overlap: int,
426 |     padding: dict,
427 | ) -> Image:
428 |     y_axis = -2
429 |     x_axis = -1
430 | 
431 |     tile_y_size = tile_shape[y_axis] - overlap * 2
432 |     tile_x_size = tile_shape[x_axis] - overlap * 2
433 | 
434 |     big_image_y_size = y_ntiles * tile_y_size
435 |     big_image_x_size = x_ntiles * tile_x_size
436 | 
437 |     y_pad = padding["top"] + padding["bottom"]
438 |     x_pad = padding["left"] + padding["right"]
439 | 
440 |     big_image_shape = (big_image_y_size, big_image_x_size)
441 |     big_image = np.zeros(big_image_shape, dtype=dtype)
442 | 
443 |     print("n tiles x,y:", (x_ntiles, y_ntiles))
444 |     print("plane shape x,y:", big_image_x_size - x_pad, big_image_y_size - y_pad)
445 | 
446 |     n = 0
447 |     for i in range(0, y_ntiles):
448 |         for j in range(0, x_ntiles):
449 |             tile_slice, big_image_slice = get_slices(
450 |                 (tile_y_size, tile_x_size), overlap, i, j, y_ntiles, x_ntiles
451 |             )
452 | 
453 |             tile = tiles[n]
454 |             tile = tile.astype(dtype)
455 | 
456 |             mask_nonzeros = tile[tile_slice] != 0
457 |             big_image[big_image_slice][mask_nonzeros] = tile[tile_slice][mask_nonzeros]
458 |             n += 1
459 | 
460 |     new_big_image_shape = (big_image_shape[0] - y_pad, big_image_shape[1] - x_pad)
461 |     return big_image[: new_big_image_shape[0], : new_big_image_shape[1]]
462 | 
463 | 
464 | def process_all_masks(
465 |     tiles, tile_shape, y_ntiles, x_ntiles, overlap, padding, dtype
466 | ) -> Tuple[List[Image], str]:
467 |     print("Started processing masks")
468 |     tiles_cell = [t[0, :, :] for t in tiles]
469 |     tiles_nuc = [t[1, :, :] for t in tiles]
470 |     tiles_cell_b = [t[2, :, :] for t in tiles]
471 |     tiles_nuc_b = [t[3, :, :] for t in tiles]
472 |     raw_tile_groups = [tiles_cell, tiles_nuc, tiles_cell_b, tiles_nuc_b]
473 |     print("Identifying and trimming border labels in all tiles")
474 |     (
475 |         mod_tiles_nuc,
476 |         excluded_labels_nuc,
477 |         border_maps_nuc,
478 |         tile_additions_nuc,
479 |     ) = modify_tiles_first_channel(tiles_nuc, y_ntiles, x_ntiles, overlap, dtype)
480 | 
481 |     (
482 |         mod_tiles_cell,
483 |         excluded_labels_cell,
484 |         border_maps_cell,
485 |         tile_additions_cell,
486 |     ) = modify_tiles_first_channel(tiles_cell, y_ntiles, x_ntiles, overlap, dtype)
487 | 
488 |     all_exclusions = deepcopy(excluded_labels_nuc)
489 |     for tile in excluded_labels_cell:
490 |         if tile in all_exclusions:
491 |             for lab in excluded_labels_cell[tile]:
492 |                 all_exclusions[tile][lab] = excluded_labels_cell[tile][lab]
493 |         else:
494 |             all_exclusions[tile] = excluded_labels_cell[tile]
495 | 
496 |     all_border_maps = deepcopy(border_maps_nuc)
497 |     for tile in border_maps_cell:
498 |         if tile in all_border_maps:
499 |             for lab in border_maps_cell[tile]:
500 |                 all_border_maps[tile][lab] = border_maps_cell[tile][lab]
501 |         else:
502 |             all_border_maps[tile] = border_maps_cell[tile]
503 | 
504 |     mod_tile_groups = []
505 |     for tile_group in raw_tile_groups:
506 |         mod_tile_group = modify_tiles_another_channel(
507 |             tile_group, all_exclusions, all_border_maps, tile_additions_cell, dtype
508 |         )
509 |         mod_tile_groups.append(mod_tile_group)
510 | 
511 |     del raw_tile_groups
512 |     gc.collect()
513 |     print("Stitching masks")
514 |     stitched_imgs = []
515 |     for tile_group in mod_tile_groups:
516 |         stitched_img = stitch_mask(
517 |             tile_group, y_ntiles, x_ntiles, tile_shape, dtype, overlap, padding
518 |         )
519 |         stitched_imgs.append(stitched_img)
520 | 
521 |     del mod_tile_groups
522 |     gc.collect()
523 | 
524 |     matched_masks, fraction_matched = get_matched_masks(
525 |         cell_mask=stitched_imgs[0],
526 |         nucleus_mask=stitched_imgs[1],
527 |         dtype=dtype,
528 |         do_mismatch_repair=True,
529 |     )
530 |     del stitched_imgs
531 |     gc.collect()
532 | 
533 |     new_label_ids = get_new_labels(matched_masks[0])  # cell
534 |     reset_imgs = []
535 |     for i in range(0, len(matched_masks)):
536 |         reset_img = reset_label_ids(matched_masks[i], new_label_ids)
537 |         reset_imgs.append(reset_img)
538 | 
539 |     y_size = reset_imgs[0].shape[0]
540 |     x_size = reset_imgs[0].shape[1]
541 |     ome_meta = generate_ome_meta_for_mask(y_size, x_size, dtype, fraction_matched)
542 |     print("Finished processing masks")
543 |     return reset_imgs, ome_meta
544 | 


--------------------------------------------------------------------------------
/bin/codex_stitching/secondary_stitcher/match_masks.py:
--------------------------------------------------------------------------------
  1 | from typing import List, Tuple
  2 | 
  3 | import numpy as np
  4 | from scipy.sparse import csr_matrix
  5 | from skimage.segmentation import find_boundaries
  6 | 
  7 | Image = np.ndarray
  8 | 
  9 | """
 10 | Package functions that repair and generate matched cell, nuclear,
 11 | cell membrane and nuclear membrane segmentation masks
 12 | Author: Haoran Chen
 13 | Version: 1.1
 14 | 08/09/2021
 15 | """
 16 | 
 17 | 
 18 | def get_matched_cells(cell_arr, cell_membrane_arr, nuclear_arr, mismatch_repair):
 19 |     a = set((tuple(i) for i in cell_arr))
 20 |     b = set((tuple(i) for i in cell_membrane_arr))
 21 |     c = set((tuple(i) for i in nuclear_arr))
 22 |     d = a - b
 23 |     # remove cell membrane from cell
 24 |     mismatch_pixel_num = len(list(c - d))
 25 |     mismatch_fraction = len(list(c - d)) / len(list(c))
 26 |     if not mismatch_repair:
 27 |         if mismatch_pixel_num == 0:
 28 |             return np.array(list(a)), np.array(list(c)), 0
 29 |         else:
 30 |             return False, False, False
 31 |     else:
 32 |         if mismatch_pixel_num < len(c):
 33 |             return np.array(list(a)), np.array(list(d & c)), mismatch_fraction
 34 |         else:
 35 |             return False, False, False
 36 | 
 37 | 
 38 | def compute_M(data):
 39 |     cols = np.arange(data.size)
 40 |     return csr_matrix((cols, (data.ravel(), cols)), shape=(data.max() + 1, data.size))
 41 | 
 42 | 
 43 | def get_indices_sparse(data):
 44 |     M = compute_M(data)
 45 |     return [np.unravel_index(row.data, data.shape) for row in M]
 46 | 
 47 | 
 48 | def list_remove(c_list, indexes):
 49 |     for index in sorted(indexes, reverse=True):
 50 |         del c_list[index]
 51 |     return c_list
 52 | 
 53 | 
 54 | def get_indexed_mask(mask, boundary):
 55 |     boundary = boundary * 1
 56 |     boundary_loc = np.where(boundary == 1)
 57 |     boundary[boundary_loc] = mask[boundary_loc]
 58 |     return boundary
 59 | 
 60 | 
 61 | def get_boundary(mask: Image):
 62 |     mask_boundary = find_boundaries(mask, mode="inner")
 63 |     mask_boundary_indexed = get_indexed_mask(mask, mask_boundary)
 64 |     return mask_boundary_indexed
 65 | 
 66 | 
 67 | def get_mask(cell_list, shape: Tuple[int]):
 68 |     mask = np.zeros(shape)
 69 |     for cell_num in range(len(cell_list)):
 70 |         mask[tuple(cell_list[cell_num].T)] = cell_num + 1
 71 |     return mask
 72 | 
 73 | 
 74 | def get_cell_num(mask: Image):
 75 |     return len(np.unique(mask))
 76 | 
 77 | 
 78 | def get_mismatched_fraction(
 79 |     whole_cell_mask: Image,
 80 |     nuclear_mask: Image,
 81 |     cell_matched_mask: Image,
 82 |     nuclear_matched_mask: Image,
 83 | ) -> float:
 84 |     whole_cell_mask_binary = np.sign(whole_cell_mask)
 85 |     nuclear_mask_binary = np.sign(nuclear_mask)
 86 |     cell_matched_mask_binary = np.sign(cell_matched_mask)
 87 |     nuclear_matched_mask_binary = np.sign(nuclear_matched_mask)
 88 |     total_area = np.sum(np.sign(whole_cell_mask_binary + nuclear_mask_binary))
 89 |     mismatched_area = np.sum(
 90 |         np.sign(
 91 |             (nuclear_mask_binary - nuclear_matched_mask_binary)
 92 |             + (whole_cell_mask_binary - cell_matched_mask_binary)
 93 |         )
 94 |     )
 95 |     mismatched_fraction = mismatched_area / total_area
 96 |     return mismatched_fraction
 97 | 
 98 | 
 99 | def get_fraction_matched_cells(
100 |     whole_cell_mask: Image, nuclear_mask: Image, cell_matched_mask: Image
101 | ) -> float:
102 |     matched_cell_num = len(np.unique(cell_matched_mask)) - 1
103 |     total_cell_num = len(np.unique(whole_cell_mask)) - 1
104 |     total_nuclei_num = len(np.unique(nuclear_mask)) - 1
105 |     mismatched_cell_num = total_cell_num - matched_cell_num
106 |     mismatched_nuclei_num = total_nuclei_num - matched_cell_num
107 |     fraction_matched_cells = matched_cell_num / (
108 |         mismatched_cell_num + mismatched_nuclei_num + matched_cell_num
109 |     )
110 |     return fraction_matched_cells
111 | 
112 | 
113 | def get_matched_masks(
114 |     cell_mask: Image, nucleus_mask: Image, dtype, do_mismatch_repair: bool
115 | ) -> Tuple[List[Image], float]:
116 |     """
117 |     returns masks with matched cells and nuclei
118 |     """
119 |     whole_cell_mask = cell_mask.copy()
120 |     nuclear_mask = nucleus_mask.copy()
121 |     cell_membrane_mask = get_boundary(whole_cell_mask)
122 | 
123 |     cell_coords = get_indices_sparse(whole_cell_mask)[1:]
124 |     nucleus_coords = get_indices_sparse(nuclear_mask)[1:]
125 |     cell_membrane_coords = get_indices_sparse(cell_membrane_mask)[1:]
126 | 
127 |     cell_coords = list(map(lambda x: np.array(x).T, cell_coords))
128 |     nucleus_coords = list(map(lambda x: np.array(x).T, nucleus_coords))
129 |     cell_membrane_coords = list(map(lambda x: np.array(x).T, cell_membrane_coords))
130 | 
131 |     cell_matched_index_list = []
132 |     nucleus_matched_index_list = []
133 |     cell_matched_list = []
134 |     nucleus_matched_list = []
135 | 
136 |     for i in range(len(cell_coords)):
137 |         if len(cell_coords[i]) != 0:
138 |             current_cell_coords = cell_coords[i]
139 |             nuclear_search_num = np.unique(
140 |                 list(map(lambda x: nuclear_mask[tuple(x)], current_cell_coords))
141 |             )
142 |             best_mismatch_fraction = 1
143 |             whole_cell_best = []
144 |             for j in nuclear_search_num:
145 |                 if j != 0:
146 |                     if (j - 1 not in nucleus_matched_index_list) and (
147 |                         i not in cell_matched_index_list
148 |                     ):
149 |                         whole_cell, nucleus, mismatch_fraction = get_matched_cells(
150 |                             cell_coords[i],
151 |                             cell_membrane_coords[i],
152 |                             nucleus_coords[j - 1],
153 |                             mismatch_repair=do_mismatch_repair,
154 |                         )
155 |                         if type(whole_cell) != bool:
156 |                             if mismatch_fraction < best_mismatch_fraction:
157 |                                 best_mismatch_fraction = mismatch_fraction
158 |                                 whole_cell_best = whole_cell
159 |                                 nucleus_best = nucleus
160 |                                 i_ind = i
161 |                                 j_ind = j - 1
162 |             if len(whole_cell_best) > 0:
163 |                 cell_matched_list.append(whole_cell_best)
164 |                 nucleus_matched_list.append(nucleus_best)
165 |                 cell_matched_index_list.append(i_ind)
166 |                 nucleus_matched_index_list.append(j_ind)
167 | 
168 |     del cell_coords
169 |     del nucleus_coords
170 | 
171 |     cell_matched_mask = get_mask(cell_matched_list, whole_cell_mask.shape)
172 |     nuclear_matched_mask = get_mask(nucleus_matched_list, whole_cell_mask.shape)
173 |     cell_membrane_mask = get_boundary(cell_matched_mask)
174 |     nuclear_membrane_mask = get_boundary(nuclear_matched_mask)
175 | 
176 |     if do_mismatch_repair:
177 |         fraction_matched_cells = 1.0
178 |     else:
179 |         fraction_matched_cells = get_fraction_matched_cells(
180 |             whole_cell_mask, nuclear_mask, cell_matched_mask
181 |         )
182 | 
183 |     out_list = [
184 |         cell_matched_mask.astype(dtype),
185 |         nuclear_matched_mask.astype(dtype),
186 |         cell_membrane_mask.astype(dtype),
187 |         nuclear_membrane_mask.astype(dtype),
188 |     ]
189 |     return out_list, fraction_matched_cells
190 | 


--------------------------------------------------------------------------------
/bin/codex_stitching/secondary_stitcher/secondary_stitcher.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | import xml.etree.ElementTree as ET
  3 | from pathlib import Path
  4 | from typing import Dict, List, Union
  5 | 
  6 | import numpy as np
  7 | import pandas as pd
  8 | import tifffile as tif
  9 | from mask_stitching import process_all_masks
 10 | from skimage.measure import regionprops_table
 11 | 
 12 | Image = np.ndarray
 13 | 
 14 | 
 15 | def add_structured_annotations(omexml_str: str, nucleus_channel: str, cell_channel: str) -> str:
 16 |     """
 17 |     Will add this, to the root, after Image node
 18 |     <StructuredAnnotations>
 19 |     <XMLAnnotation ID="Annotation:0">
 20 |         <Value>
 21 |             <OriginalMetadata>
 22 |                 <Key>SegmentationChannels</Key>
 23 |                 <Value>
 24 |                     <Nucleus>DAPI-02</Nucleus>
 25 |                     <Cell>CD45</Cell>
 26 |                 </Value>
 27 |             </OriginalMetadata>
 28 |         </Value>
 29 |     </XMLAnnotation>
 30 |     </StructuredAnnotations>
 31 |     """
 32 | 
 33 |     # Remove some prefixes
 34 |     nucleus_channel = re.sub(r"cyc(\d+)_ch(\d+)_orig(.*)", r"\3", nucleus_channel)
 35 |     cell_channel = re.sub(r"cyc(\d+)_ch(\d+)_orig(.*)", r"\3", cell_channel)
 36 | 
 37 |     structured_annotation = ET.Element("StructuredAnnotations")
 38 |     annotation = ET.SubElement(structured_annotation, "XMLAnnotation", {"ID": "Annotation:0"})
 39 |     annotation_value = ET.SubElement(annotation, "Value")
 40 |     original_metadata = ET.SubElement(annotation_value, "OriginalMetadata")
 41 |     segmentation_channels_key = ET.SubElement(original_metadata, "Key").text = (
 42 |         "SegmentationChannels"
 43 |     )
 44 |     segmentation_channels_value = ET.SubElement(original_metadata, "Value")
 45 |     ET.SubElement(segmentation_channels_value, "Nucleus").text = nucleus_channel
 46 |     ET.SubElement(segmentation_channels_value, "Cell").text = cell_channel
 47 |     sa_str = ET.tostring(structured_annotation, encoding="utf-8").decode("utf-8")
 48 | 
 49 |     if "StructuredAnnotations" in omexml_str:
 50 |         sa_placement = omexml_str.find("<StructuredAnnotations>") + len("<StructuredAnnotations>")
 51 |         sa_str = re.sub(r"</?StructuredAnnotations>", "", sa_str)
 52 |     else:
 53 |         sa_placement = omexml_str.find("</Image>") + len("</Image>")
 54 | 
 55 |     omexml_str_with_sa = omexml_str[:sa_placement] + sa_str + omexml_str[sa_placement:]
 56 |     return omexml_str_with_sa
 57 | 
 58 | 
 59 | def alpha_num_order(string: str) -> str:
 60 |     """Returns all numbers on 5 digits to let sort the string with numeric order.
 61 |     Ex: alphaNumOrder("a6b12.125")  ==> "a00006b00012.00125"
 62 |     """
 63 |     return "".join(
 64 |         [format(int(x), "05d") if x.isdigit() else x for x in re.split(r"(\d+)", string)]
 65 |     )
 66 | 
 67 | 
 68 | def get_img_listing(in_dir: Path) -> List[Path]:
 69 |     allowed_extensions = (".tif", ".tiff")
 70 |     listing = list(in_dir.iterdir())
 71 |     img_listing = [f for f in listing if f.suffix in allowed_extensions]
 72 |     img_listing = sorted(img_listing, key=lambda x: alpha_num_order(x.name))
 73 |     return img_listing
 74 | 
 75 | 
 76 | def path_to_str(path: Path):
 77 |     return str(path.absolute().as_posix())
 78 | 
 79 | 
 80 | def path_to_dict(path: Path):
 81 |     """
 82 |     Extract region, x position, y position and put into the dictionary
 83 |     {R:region, X: position, Y: position, path: path}
 84 |     """
 85 |     value_list = re.split(r"(\d+)(?:_?)", path.name)[:-1]
 86 |     d = dict(zip(*[iter(value_list)] * 2))
 87 |     d = {k: int(v) for k, v in d.items()}
 88 |     d.update({"path": path})
 89 |     return d
 90 | 
 91 | 
 92 | def get_slices(
 93 |     arr: np.ndarray, hor_f: int, hor_t: int, ver_f: int, ver_t: int, padding: dict, overlap=0
 94 | ):
 95 |     left_check = hor_f - padding["left"]
 96 |     top_check = ver_f - padding["top"]
 97 |     right_check = hor_t - arr.shape[-1]
 98 |     bot_check = ver_t - arr.shape[-2]
 99 | 
100 |     left_pad_size = 0
101 |     top_pad_size = 0
102 |     right_pad_size = 0
103 |     bot_pad_size = 0
104 | 
105 |     if left_check < 0:
106 |         left_pad_size = abs(left_check)
107 |         hor_f = 0
108 |     if top_check < 0:
109 |         top_pad_size = abs(top_check)
110 |         ver_f = 0
111 |     if right_check > 0:
112 |         right_pad_size = right_check
113 |         hor_t = arr.shape[1]
114 |     if bot_check > 0:
115 |         ver_t = arr.shape[0]
116 | 
117 |     big_image_slice = (slice(ver_f, ver_t), slice(hor_f, hor_t))
118 |     tile_shape = (ver_t - ver_f, hor_t - hor_f)
119 |     tile_slice = (
120 |         slice(top_pad_size + overlap, tile_shape[0] + overlap),
121 |         slice(left_pad_size + overlap, tile_shape[1] + overlap),
122 |     )
123 | 
124 |     return big_image_slice, tile_slice
125 | 
126 | 
127 | def get_dataset_info(img_dir: Path):
128 |     img_paths = get_img_listing(img_dir)
129 |     positions = [path_to_dict(p) for p in img_paths]
130 |     df = pd.DataFrame(positions)
131 |     df.sort_values(["R", "Y", "X"], inplace=True)
132 |     df.reset_index(inplace=True)
133 | 
134 |     region_ids = list(df["R"].unique())
135 |     y_ntiles = df["Y"].max()
136 |     x_ntiles = df["X"].max()
137 | 
138 |     path_list_per_region = []
139 | 
140 |     for r in region_ids:
141 |         region_selection = df[df["R"] == r].index
142 |         path_list = list(df.loc[region_selection, "path"])
143 |         path_list_per_region.append(path_list)
144 | 
145 |     return path_list_per_region, y_ntiles, x_ntiles
146 | 
147 | 
148 | def load_tiles(path_list: List[Path], key: Union[None, int]):
149 |     tiles = []
150 |     if key is None:
151 |         for path in path_list:
152 |             tiles.append(tif.imread(path_to_str(path)))
153 |     else:
154 |         for path in path_list:
155 |             tiles.append(tif.imread(path_to_str(path), key=key))
156 | 
157 |     return tiles
158 | 
159 | 
160 | def calc_mask_coverage(segm_mask: Image) -> float:
161 |     mask_pixels = np.sum(segm_mask != 0)
162 |     total_pixels = segm_mask.shape[-2] * segm_mask.shape[-1]
163 |     return float(round(mask_pixels / total_pixels, 3))
164 | 
165 | 
166 | def calc_snr(img: Image) -> float:
167 |     return float(round(np.mean(img) / np.std(img), 3))
168 | 
169 | 
170 | def calc_label_sizes(segm_mask: Image) -> Dict[str, List[float]]:
171 |     # bounding boxes around labels
172 |     # useful to check if there are merged labels
173 |     props = regionprops_table(segm_mask, properties=("label", "bbox"))
174 |     min_rows = props["bbox-0"]
175 |     min_cols = props["bbox-1"]
176 |     max_rows = props["bbox-2"]
177 |     max_cols = props["bbox-3"]
178 |     bbox_arr = np.stack((min_rows, max_rows, min_cols, max_cols), axis=1)
179 |     dif = np.stack((bbox_arr[:, 1] - bbox_arr[:, 0], bbox_arr[:, 3] - bbox_arr[:, 2]), axis=1)
180 |     long_sides = np.max(dif, axis=1)
181 |     label_sizes = dict(
182 |         min_bbox_size=[float(i) for i in dif[np.argmin(long_sides)].tolist()],
183 |         max_bbox_size=[float(i) for i in dif[np.argmax(long_sides)].tolist()],
184 |         mean_bbox_size=[float(i) for i in np.round(np.mean(dif, axis=0), 3).tolist()],
185 |     )
186 |     return label_sizes
187 | 
188 | 
189 | def stitch_plane(
190 |     tiles: List[Image],
191 |     y_ntiles: int,
192 |     x_ntiles: int,
193 |     tile_shape: list,
194 |     dtype,
195 |     overlap: int,
196 |     padding: dict,
197 | ) -> Image:
198 |     y_axis = -2
199 |     x_axis = -1
200 | 
201 |     tile_y_size = tile_shape[y_axis] - overlap * 2
202 |     tile_x_size = tile_shape[x_axis] - overlap * 2
203 | 
204 |     big_image_y_size = (y_ntiles * tile_y_size) - padding["top"] - padding["bottom"]
205 |     big_image_x_size = (x_ntiles * tile_x_size) - padding["left"] - padding["right"]
206 | 
207 |     big_image_shape = (big_image_y_size, big_image_x_size)
208 |     big_image = np.zeros(big_image_shape, dtype=dtype)
209 | 
210 |     print("n tiles x,y:", (x_ntiles, y_ntiles))
211 |     print("plane shape x,y:", big_image_shape[::-1])
212 |     n = 0
213 |     for i in range(0, y_ntiles):
214 |         ver_f = i * tile_y_size
215 |         ver_t = ver_f + tile_y_size
216 | 
217 |         for j in range(0, x_ntiles):
218 |             hor_f = j * tile_x_size
219 |             hor_t = hor_f + tile_x_size
220 | 
221 |             big_image_slice, tile_slice = get_slices(
222 |                 big_image, hor_f, hor_t, ver_f, ver_t, padding, overlap
223 |             )
224 |             tile = tiles[n]
225 | 
226 |             big_image[tuple(big_image_slice)] = tile[tuple(tile_slice)]
227 | 
228 |             n += 1
229 |     return big_image
230 | 
231 | 
232 | def main(
233 |     img_dir: Path,
234 |     out_dir: Path,
235 |     img_name_template: str,
236 |     overlap: int,
237 |     padding_str: str,
238 |     is_mask: bool,
239 |     nucleus_channel: str,
240 |     cell_channel: str,
241 | ):
242 |     padding_int = [int(i) for i in padding_str.split(",")]
243 |     padding = {
244 |         "left": padding_int[0],
245 |         "right": padding_int[1],
246 |         "top": padding_int[2],
247 |         "bottom": padding_int[3],
248 |     }
249 | 
250 |     path_list_per_region, y_ntiles, x_ntiles = get_dataset_info(img_dir)
251 | 
252 |     with tif.TiffFile(path_to_str(path_list_per_region[0][0])) as TF:
253 |         tile_shape = list(TF.series[0].shape)
254 |         npages = len(TF.pages)
255 |         dtype = TF.series[0].dtype
256 |         ome_meta = TF.ome_metadata
257 | 
258 |     big_image_y_size = (
259 |         (y_ntiles * (tile_shape[-2] - overlap * 2)) - padding["top"] - padding["bottom"]
260 |     )
261 |     big_image_x_size = (
262 |         (x_ntiles * (tile_shape[-1] - overlap * 2)) - padding["left"] - padding["right"]
263 |     )
264 | 
265 |     if is_mask:
266 |         dtype = np.uint32
267 |     else:
268 |         ome_meta = re.sub(r'\sSizeY="\d+"', ' SizeY="' + str(big_image_y_size) + '"', ome_meta)
269 |         ome_meta = re.sub(r'\sSizeX="\d+"', ' SizeX="' + str(big_image_x_size) + '"', ome_meta)
270 |         ome_meta = re.sub(r'\sDimensionOrder="[XYCZT]+"', ' DimensionOrder="XYZCT"', ome_meta)
271 |         ome_meta = add_structured_annotations(ome_meta, nucleus_channel, cell_channel)
272 |     # part of this report is generated after mask stitching and part after expression stitching
273 | 
274 |     total_report = dict()
275 |     for r, path_list in enumerate(path_list_per_region):
276 |         new_path = out_dir / img_name_template.format(r=r + 1)
277 |         this_region_report = dict()
278 |         TW = tif.TiffWriter(path_to_str(new_path), bigtiff=True, shaped=False)
279 |         if is_mask:
280 |             # mask channels 0 - cells, 1 - nuclei, 2 - cell boundaries, 3 - nucleus boundaries
281 |             tiles = load_tiles(path_list, key=None)
282 |             masks, ome_meta = process_all_masks(
283 |                 tiles, tile_shape, y_ntiles, x_ntiles, overlap, padding, dtype
284 |             )
285 |             for mask in masks:
286 |                 new_shape = (1, mask.shape[0], mask.shape[1])
287 |                 TW.write(
288 |                     mask.reshape(new_shape),
289 |                     contiguous=True,
290 |                     photometric="minisblack",
291 |                     description=ome_meta,
292 |                 )
293 | 
294 |             this_region_report["num_cells"] = int(masks[0].max())
295 |             this_region_report["num_nuclei"] = int(masks[1].max())
296 |             this_region_report["cell_coverage"] = calc_mask_coverage(masks[0])
297 |             this_region_report["nuclei_coverage"] = calc_mask_coverage(masks[1])
298 |             this_region_report["cell_sizes"] = calc_label_sizes(masks[0])
299 |             this_region_report["nucleus_sizes"] = calc_label_sizes(masks[1])
300 |         else:
301 |             for p in range(0, npages):
302 |                 tiles = load_tiles(path_list, key=p)
303 |                 print("\nstitching expressions page", p + 1, "/", npages)
304 |                 plane = stitch_plane(
305 |                     tiles, y_ntiles, x_ntiles, tile_shape, dtype, overlap, padding
306 |                 )
307 |                 new_shape = (1, plane.shape[0], plane.shape[1])
308 |                 if p == 0:
309 |                     this_region_report["num_channels"] = int(npages)
310 |                     this_region_report["img_height"] = int(plane.shape[0])
311 |                     this_region_report["img_width"] = int(plane.shape[1])
312 |                     this_region_report["per_channel_snr"] = dict()
313 |                     this_region_report["nucleus_channel"] = nucleus_channel
314 |                     this_region_report["cell_channel"] = cell_channel
315 |                 this_region_report["per_channel_snr"][p] = calc_snr(plane)
316 |                 TW.write(
317 |                     plane.reshape(new_shape),
318 |                     contiguous=True,
319 |                     photometric="minisblack",
320 |                     description=ome_meta,
321 |                 )
322 |         total_report["reg" + str(r + 1)] = this_region_report
323 |         TW.close()
324 |     return total_report
325 | 


--------------------------------------------------------------------------------
/bin/codex_stitching/secondary_stitcher/secondary_stitcher_runner.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import json
  3 | from pathlib import Path
  4 | from pprint import pprint
  5 | from typing import Any, Dict
  6 | 
  7 | import secondary_stitcher
  8 | 
  9 | Report = Dict[str, Dict[str, Any]]
 10 | 
 11 | 
 12 | def make_dir_if_not_exists(dir_path: Path):
 13 |     if not dir_path.exists():
 14 |         dir_path.mkdir(parents=True)
 15 | 
 16 | 
 17 | def read_pipeline_config(path_to_config: Path) -> dict:
 18 |     with open(path_to_config, "r") as s:
 19 |         config = json.load(s)
 20 |     return config
 21 | 
 22 | 
 23 | def write_pipeline_config(out_path: Path, config):
 24 |     with open(out_path, "w") as s:
 25 |         json.dump(config, s, sort_keys=False, indent=4)
 26 | 
 27 | 
 28 | def run_stitcher(
 29 |     img_dir: Path,
 30 |     out_dir: Path,
 31 |     img_name_template: str,
 32 |     overlap: int,
 33 |     padding: dict,
 34 |     is_mask: bool,
 35 |     nucleus_channel: str,
 36 |     cell_channel: str,
 37 | ) -> Report:
 38 |     padding_str = ",".join((str(i) for i in list(padding.values())))
 39 |     report = secondary_stitcher.main(
 40 |         img_dir,
 41 |         out_dir,
 42 |         img_name_template,
 43 |         overlap,
 44 |         padding_str,
 45 |         is_mask,
 46 |         nucleus_channel,
 47 |         cell_channel,
 48 |     )
 49 |     return report
 50 | 
 51 | 
 52 | def merge_reports(mask_report: Report, expr_report: Report) -> Report:
 53 |     total_report = dict()
 54 |     for region in mask_report:
 55 |         total_report[region] = {**mask_report[region], **expr_report[region]}
 56 |     return total_report
 57 | 
 58 | 
 59 | def main(pipeline_config_path: Path, ometiff_dir: Path):
 60 |     pipeline_config = read_pipeline_config(pipeline_config_path)
 61 |     slicer_meta = pipeline_config["slicer"]
 62 |     nucleus_channel = pipeline_config.get("nuclei_channel", "None")
 63 |     cell_channel = pipeline_config.get("membrane_channel", "None")
 64 | 
 65 |     path_to_mask_tiles = Path(ometiff_dir).joinpath("cytometry/tile/ome-tiff")
 66 |     path_to_image_tiles = Path(ometiff_dir).joinpath("extract/expressions/ome-tiff")
 67 | 
 68 |     overlap = slicer_meta["overlap"]
 69 |     padding = slicer_meta["padding"]
 70 | 
 71 |     mask_out_dir = Path("/output/pipeline_output/mask")
 72 |     expr_out_dir = Path("/output/pipeline_output/expr")
 73 |     final_pipeline_config_path = Path("/output/pipelineConfig.json")
 74 | 
 75 |     make_dir_if_not_exists(mask_out_dir)
 76 |     make_dir_if_not_exists(expr_out_dir)
 77 | 
 78 |     mask_out_name_template = "reg{r:03d}_mask.ome.tiff"
 79 |     expr_out_name_template = "reg{r:03d}_expr.ome.tiff"
 80 | 
 81 |     mask_report = run_stitcher(
 82 |         path_to_mask_tiles,
 83 |         mask_out_dir,
 84 |         mask_out_name_template,
 85 |         overlap,
 86 |         padding,
 87 |         True,
 88 |         nucleus_channel,
 89 |         cell_channel,
 90 |     )
 91 | 
 92 |     expr_report = run_stitcher(
 93 |         path_to_image_tiles,
 94 |         expr_out_dir,
 95 |         expr_out_name_template,
 96 |         overlap,
 97 |         padding,
 98 |         False,
 99 |         nucleus_channel,
100 |         cell_channel,
101 |     )
102 | 
103 |     total_report = merge_reports(mask_report, expr_report)
104 | 
105 |     final_pipeline_config = pipeline_config
106 |     final_pipeline_config.update({"report": total_report})
107 |     print("\nfinal_pipeline_config")
108 |     pprint(final_pipeline_config, sort_dicts=False)
109 |     write_pipeline_config(final_pipeline_config_path, final_pipeline_config)
110 | 
111 | 
112 | if __name__ == "__main__":
113 |     parser = argparse.ArgumentParser()
114 |     parser.add_argument("--pipeline_config_path", type=Path, help="path to pipeline config")
115 |     parser.add_argument(
116 |         "--ometiff_dir", type=Path, help="dir with segmentation mask tiles and codex image tiles"
117 |     )
118 | 
119 |     args = parser.parse_args()
120 |     main(args.pipeline_config_path, args.ometiff_dir)
121 | 


--------------------------------------------------------------------------------
/bin/convert_to_ometiff.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import logging
  3 | import re
  4 | from multiprocessing import Pool
  5 | from os import walk
  6 | from pathlib import Path
  7 | from typing import List, Optional
  8 | 
  9 | import pandas as pd
 10 | import yaml
 11 | from aicsimageio import AICSImage
 12 | from aicsimageio.writers.ome_tiff_writer import OmeTiffWriter
 13 | from ome_types.model import AnnotationRef, Map, MapAnnotation, StructuredAnnotationList
 14 | from tifffile import TiffFile
 15 | 
 16 | from utils import print_directory_tree
 17 | 
 18 | logging.basicConfig(level=logging.INFO, format="%(levelname)-7s - %(message)s")
 19 | logger = logging.getLogger(__name__)
 20 | SEGMENTATION_CHANNEL_NAMES = [
 21 |     "cells",
 22 |     "nuclei",
 23 |     "cell_boundaries",
 24 |     "nucleus_boundaries",
 25 | ]
 26 | TIFF_FILE_NAMING_PATTERN = re.compile(r"^R\d{3}_X(\d{3})_Y(\d{3})\.tif")
 27 | metadata_filename_pattern = re.compile(r"^[0-9A-Fa-f]{32}antibodies\.tsv$")
 28 | 
 29 | 
 30 | def find_antibodies_meta(input_dir: Path) -> Optional[Path]:
 31 |     """
 32 |     Finds and returns the first metadata file for a HuBMAP data set.
 33 |     Does not check whether the dataset ID (32 hex characters) matches
 34 |     the directory name, nor whether there might be multiple metadata files.
 35 |     """
 36 |     # possible_dirs = [input_dir, input_dir / "extras"]
 37 |     metadata_filename_pattern = re.compile(r"^[0-9A-Za-z\-_]*antibodies\.tsv$")
 38 |     found_files = []
 39 |     for dirpath, dirnames, filenames in walk(input_dir):
 40 |         for filename in filenames:
 41 |             if metadata_filename_pattern.match(filename):
 42 |                 found_files.append(Path(dirpath) / filename)
 43 | 
 44 |     if len(found_files) == 0:
 45 |         logger.warning("No antibody.tsv file found")
 46 |         antb_path = None
 47 |     else:
 48 |         antb_path = found_files[0]
 49 |     return antb_path
 50 | 
 51 | 
 52 | def sort_by_cycle(antb_path: Path):
 53 |     """
 54 |     Sorts antibodies.tsv by cycle and channel number. The original tsv is not sorted correctly.
 55 |     """
 56 |     df = pd.read_table(antb_path)
 57 |     cycle_channel_pattern = re.compile(r"cycle(?P<cycle>\d+)_ch(?P<channel>\d+)", re.IGNORECASE)
 58 |     searches = [cycle_channel_pattern.search(v) for v in df["channel_id"]]
 59 |     cycles = [int(s.group("cycle")) for s in searches]
 60 |     channels = [int(s.group("channel")) for s in searches]
 61 |     df.index = [cycles, channels]
 62 |     df = df.sort_index()
 63 |     return df
 64 | 
 65 | 
 66 | def get_ch_info_from_antibodies_meta(df: pd.DataFrame) -> Optional[pd.DataFrame]:
 67 |     """
 68 |     Adds "target" column with the antibody name that we want to replace.
 69 |     """
 70 |     # df = df.set_index("channel_id", inplace=False)
 71 |     antb_names = df["antibody_name"].to_list()
 72 |     antb_targets = [get_analyte_name(antb) for antb in antb_names]
 73 |     df["target"] = antb_targets
 74 |     return df
 75 | 
 76 | 
 77 | def get_analyte_name(antibody_name: str) -> str:
 78 |     """
 79 |     Strips unnecessary prefixes and suffixes off of antibody name from antibodies.tsv.
 80 |     """
 81 |     antb = re.sub(r"Anti-", "", antibody_name)
 82 |     antb = re.sub(r"\s+antibody", "", antb)
 83 |     return antb
 84 | 
 85 | 
 86 | def create_original_channel_names_df(channelList: List[str]) -> pd.DataFrame:
 87 |     """
 88 |     Creates a dataframe with the original channel names, cycle numbers, and channel numbers.
 89 |     """
 90 |     # Separate channel and cycle info from channel names and remove "orig"
 91 |     cyc_ch_pattern = re.compile(r"cyc(\d+)_ch(\d+)_orig(.*)")
 92 |     og_ch_names_df = pd.DataFrame(channelList, columns=["Original_Channel_Name"])
 93 |     og_ch_names_df[["Cycle", "Channel", "channel_name"]] = og_ch_names_df[
 94 |         "Original_Channel_Name"
 95 |     ].str.extract(cyc_ch_pattern)
 96 |     og_ch_names_df["Cycle"] = pd.to_numeric(og_ch_names_df["Cycle"])
 97 |     og_ch_names_df["Channel"] = pd.to_numeric(og_ch_names_df["Channel"])
 98 |     og_ch_names_df["channel_id"] = (
 99 |         "cycle"
100 |         + og_ch_names_df["Cycle"].astype(str)
101 |         + "_ch"
102 |         + og_ch_names_df["Channel"].astype(str)
103 |     )
104 | 
105 |     return og_ch_names_df
106 | 
107 | 
108 | def replace_provider_ch_names_with_antb(
109 |     og_ch_names_df: pd.DataFrame, antibodies_df: pd.DataFrame
110 | ) -> List[str]:
111 |     """
112 |     Uses cycle and channel mapping to replace the channel name with the one in antibodies.tsv.
113 |     """
114 |     updated_channel_names = []
115 |     mapping = map_cycles_and_channels(antibodies_df)
116 |     for i in og_ch_names_df.index:
117 |         channel_id = og_ch_names_df.at[i, "channel_id"].lower()
118 |         original_name = og_ch_names_df.at[i, "channel_name"]
119 |         target = mapping.get(channel_id, None)
120 |         if target is not None:
121 |             updated_channel_names.append(target)
122 |         else:
123 |             updated_channel_names.append(original_name)
124 |     return updated_channel_names
125 | 
126 | 
127 | def generate_sa_ch_info(
128 |     channel_id: str,
129 |     og_ch_names_info: pd.Series,
130 |     antb_info: pd.DataFrame,
131 | ) -> Optional[MapAnnotation]:
132 |     cycle, channel = og_ch_names_info["Cycle"], og_ch_names_info["Channel"]
133 |     try:
134 |         antb_row = antb_info.loc[(cycle, channel), :]
135 |     except KeyError:
136 |         return None
137 | 
138 |     uniprot_id = antb_row["uniprot_accession_number"]
139 |     rrid = antb_row["rr_id"]
140 |     antb_id = antb_row["channel_id"]
141 |     ch_key = Map.M(k="Channel ID", value=channel_id)
142 |     name_key = Map.M(k="Name", value=antb_row["target"])
143 |     og_name_key = Map.M(k="Original Name", value=og_ch_names_info["channel_name"])
144 |     uniprot_key = Map.M(k="UniprotID", value=uniprot_id)
145 |     rrid_key = Map.M(k="RRID", value=rrid)
146 |     antb_id_key = Map.M(k="AntibodiesTsvID", value=antb_id)
147 |     ch_info = Map(ms=[ch_key, name_key, og_name_key, uniprot_key, rrid_key, antb_id_key])
148 |     annotation = MapAnnotation(value=ch_info)
149 |     return annotation
150 | 
151 | 
152 | def map_cycles_and_channels(antibodies_df: pd.DataFrame) -> dict:
153 |     channel_mapping = {
154 |         channel_id.lower(): target
155 |         for channel_id, target in zip(antibodies_df["channel_id"], antibodies_df["target"])
156 |     }
157 |     return channel_mapping
158 | 
159 | 
160 | def collect_tiff_file_list(directory: Path, TIFF_FILE_NAMING_PATTERN: re.Pattern) -> List[Path]:
161 |     """
162 |     Given a directory path and a regex, find all the files in the directory that
163 |     match the regex.
164 | 
165 |     TODO: this is very similar to a function in create_cellshapes_csv.py -- could
166 |     do to unify with a separate module?
167 |     """
168 |     fileList = []
169 | 
170 |     for dirpath, dirnames, filenames in walk(directory):
171 |         for filename in filenames:
172 |             if TIFF_FILE_NAMING_PATTERN.match(filename):
173 |                 fileList.append(directory / filename)
174 | 
175 |     if len(fileList) == 0:
176 |         logger.warning("No files found in " + str(directory))
177 | 
178 |     return fileList
179 | 
180 | 
181 | def get_lateral_resolution(cytokit_config_filename: Path) -> float:
182 |     with open(cytokit_config_filename) as cytokit_config_file:
183 |         cytokit_config = yaml.safe_load(cytokit_config_file)
184 | 
185 |     return float("%0.2f" % cytokit_config["acquisition"]["lateral_resolution"])
186 | 
187 | 
188 | def collect_expressions_extract_channels(extractFile: Path) -> List[str]:
189 |     """
190 |     Given a TIFF file path, read file with TiffFile to get Labels attribute from
191 |     ImageJ metadata. Return a list of the channel names in the same order as they
192 |     appear in the ImageJ metadata.
193 |     We need to do this to get the channel names in the correct order, and the
194 |     ImageJ "Labels" attribute isn't picked up by AICSImageIO.
195 |     """
196 | 
197 |     with TiffFile(str(extractFile.absolute())) as TF:
198 |         ij_meta = TF.imagej_metadata
199 |     numChannels = int(ij_meta["channels"])
200 |     channelList = ij_meta["Labels"][0:numChannels]
201 | 
202 |     # Remove "proc_" from the start of the channel names.
203 |     procPattern = re.compile(r"^proc_(.*)")
204 |     channelList = [procPattern.match(channel).group(1) for channel in channelList]
205 | 
206 |     return channelList
207 | 
208 | 
209 | def convert_tiff_file(funcArgs):
210 |     """
211 |     Given a tuple containing a source TIFF file path, a destination OME-TIFF path,
212 |     a list of channel names, a float value for the lateral resolution in
213 |     nanometres, convert the source TIFF file to OME-TIFF format, containing
214 |     polygons for segmented cell shapes in the "ROI" OME-XML element.
215 |     """
216 | 
217 |     sourceFile, ometiffFile, channelNames, lateral_resolution, og_ch_names_df, *optional_args = (
218 |         funcArgs
219 |     )
220 |     antb_info = optional_args[0] if optional_args else None
221 | 
222 |     logger.info(f"Converting file: {str(sourceFile)}")
223 | 
224 |     image = AICSImage(sourceFile)
225 |     imageDataForOmeTiff = image.get_image_data("TCZYX")
226 |     imageName = f"Image: {sourceFile.name}"
227 | 
228 |     # Create OME-XML metadata using build_ome
229 |     ome_writer = OmeTiffWriter()
230 |     omeXml = ome_writer.build_ome(
231 |         data_shapes=[(image.dims.T, image.dims.C, image.dims.Z, image.dims.Y, image.dims.X)],
232 |         data_types=[image.dtype],
233 |         dimension_order=["TCZYX"],
234 |         channel_names=[channelNames],
235 |         image_name=[imageName],
236 |         physical_pixel_sizes=[image.physical_pixel_sizes],
237 |     )
238 | 
239 |     annotations = StructuredAnnotationList()
240 |     for i, (channel_obj, channel_name, og_ch_names_row) in enumerate(
241 |         zip(
242 |             omeXml.images[0].pixels.channels,
243 |             channelNames,
244 |             og_ch_names_df.iterrows(),
245 |         )
246 |     ):
247 |         channel_id = f"Channel:0:{i}"
248 |         channel_obj.name = channel_name
249 |         channel_obj.id = channel_id
250 |         if antb_info is None:
251 |             continue
252 |         ch_info = generate_sa_ch_info(channel_id, og_ch_names_row[1], antb_info)
253 |         if ch_info is None:
254 |             continue
255 |         channel_obj.annotation_refs.append(AnnotationRef(id=ch_info.id))
256 |         annotations.append(ch_info)
257 |     omeXml.structured_annotations = annotations
258 | 
259 |     ome_writer.save(
260 |         data=imageDataForOmeTiff,
261 |         uri=str(ometiffFile),
262 |         ome_xml=omeXml,
263 |         dimension_order="TCZYX",
264 |         channel_names=channelNames,
265 |     )
266 | 
267 |     logger.info(f"OME-TIFF file created: {ometiffFile}")
268 | 
269 | 
270 | def create_ome_tiffs(
271 |     file_list: List[Path],
272 |     output_dir: Path,
273 |     channel_names: List[str],
274 |     lateral_resolution: float,
275 |     subprocesses: int,
276 |     og_ch_names_df,
277 |     antb_info: Optional[pd.DataFrame] = None,
278 | ):
279 |     """
280 |     Given:
281 |         - a list of TIFF files
282 |         - an output directory path
283 |         - a list of channel names
284 |         - a float value for the lateral resolution in nanometres (aka XY resolution aka pixel size).
285 |         - an integer value for the number of multiprocessing subprocesses
286 |         - a dictionary of best focus z-planes indexed by tile x,y coordinates
287 |     Create OME-TIFF files using parallel processes.
288 |     """
289 |     output_dir.mkdir(parents=True, exist_ok=True)
290 | 
291 |     args_for_conversion = []
292 |     for source_file in file_list:
293 |         ome_tiff_file = (output_dir / source_file.name).with_suffix(".ome.tiff")
294 |         if antb_info is not None:
295 |             args_for_conversion.append(
296 |                 (
297 |                     source_file,
298 |                     ome_tiff_file,
299 |                     channel_names,
300 |                     lateral_resolution,
301 |                     og_ch_names_df,
302 |                     antb_info,
303 |                 )
304 |             )
305 |         else:
306 |             args_for_conversion.append(
307 |                 (source_file, ome_tiff_file, channel_names, lateral_resolution, og_ch_names_df)
308 |             )
309 | 
310 |     # Uncomment the next line to run as a series, comment the plural line
311 |     # for argtuple in args_for_conversion:
312 |     #     convert_tiff_file(argtuple)
313 | 
314 |     with Pool(processes=subprocesses) as pool:
315 |         pool.imap_unordered(convert_tiff_file, args_for_conversion)
316 |         pool.close()
317 |         pool.join()
318 | 
319 | 
320 | def check_dir_is_empty(dir_path: Path):
321 |     return not any(dir_path.iterdir())
322 | 
323 | 
324 | ########
325 | # MAIN #
326 | ########
327 | if __name__ == "__main__":
328 |     parser = argparse.ArgumentParser(
329 |         description=(
330 |             "Convert Cytokit's output TIFFs containing segmentation and extraction "
331 |             'results to OME-TIFF, and add the channel names. Creates an "ome-tiff" '
332 |             "directory inside the output/cytometry/tile and "
333 |             "output/extract/expressions directories."
334 |         ),
335 |     )
336 |     parser.add_argument(
337 |         "cytokit_output",
338 |         help="Path to output of `cytokit processor`",
339 |         type=Path,
340 |     )
341 |     parser.add_argument(
342 |         "bg_sub_tiles",
343 |         help="Path to tiles with subtracted background",
344 |         type=Path,
345 |     )
346 |     parser.add_argument(
347 |         "cytokit_config",
348 |         help="Path to Cytokit YAML config file",
349 |         type=Path,
350 |     )
351 |     parser.add_argument(
352 |         "input_data_dir",
353 |         help="Path to the input dataset",
354 |         type=Path,
355 |     )
356 |     parser.add_argument(
357 |         "-p",
358 |         "--processes",
359 |         help="Number of parallel OME-TIFF conversions to perform at once",
360 |         type=int,
361 |         default=8,
362 |     )
363 | 
364 |     args = parser.parse_args()
365 | 
366 |     print("Cytokit output:")
367 |     print_directory_tree(args.cytokit_output)
368 | 
369 |     output_dir = Path("output")
370 |     output_dir.mkdir(parents=True, exist_ok=True)
371 | 
372 |     cytometry_tile_dir_piece = Path("cytometry/tile")
373 |     extract_expressions_piece = Path("extract/expressions")
374 |     processor_data_json_piece = Path("processor/data.json")
375 | 
376 |     cytometryTileDir = args.cytokit_output / cytometry_tile_dir_piece
377 |     print("Cytometry tile directory:", cytometryTileDir)
378 | 
379 |     extractDir = args.cytokit_output / extract_expressions_piece
380 |     print("Extract expressions directory:", extractDir)
381 | 
382 |     if not check_dir_is_empty(args.bg_sub_tiles):
383 |         extractDir = args.bg_sub_tiles
384 |         print(list(Path(args.bg_sub_tiles).iterdir()))
385 |     else:
386 |         extractDir = args.cytokit_output / extract_expressions_piece
387 |     print("Extract expressions directory:", extractDir)
388 | 
389 |     segmentationFileList = collect_tiff_file_list(cytometryTileDir, TIFF_FILE_NAMING_PATTERN)
390 |     extractFileList = collect_tiff_file_list(extractDir, TIFF_FILE_NAMING_PATTERN)
391 |     antb_path = find_antibodies_meta(args.input_data_dir)
392 |     antibodies_df = None
393 |     lateral_resolution = get_lateral_resolution(args.cytokit_config)
394 |     extractChannelNames = collect_expressions_extract_channels(extractFileList[0])
395 |     original_ch_names_df = create_original_channel_names_df(extractChannelNames)
396 |     print(original_ch_names_df.head())
397 | 
398 |     antb_info = None
399 |     updated_channel_names = original_ch_names_df["channel_name"].tolist()
400 |     if antb_path:
401 |         df = sort_by_cycle(antb_path)
402 |         antb_info = get_ch_info_from_antibodies_meta(df)
403 |         updated_channel_names = replace_provider_ch_names_with_antb(
404 |             original_ch_names_df, antb_info
405 |         )
406 | 
407 |     # Create segmentation mask OME-TIFFs
408 |     if segmentationFileList:
409 |         create_ome_tiffs(
410 |             segmentationFileList,
411 |             output_dir / cytometry_tile_dir_piece / "ome-tiff",
412 |             SEGMENTATION_CHANNEL_NAMES,
413 |             lateral_resolution,
414 |             args.processes,
415 |             original_ch_names_df,
416 |             antb_info,
417 |         )
418 |     # Create the extract OME-TIFFs.
419 |     if extractFileList:
420 |         create_ome_tiffs(
421 |             extractFileList,
422 |             output_dir / extract_expressions_piece / "ome-tiff",
423 |             updated_channel_names,
424 |             lateral_resolution,
425 |             args.processes,
426 |             original_ch_names_df,
427 |             antb_info,
428 |         )
429 | 


--------------------------------------------------------------------------------
/bin/create_cytokit_config.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import json
  3 | import logging
  4 | import re
  5 | from pprint import pprint
  6 | from typing import List
  7 | 
  8 | import yaml
  9 | 
 10 | logging.basicConfig(level=logging.INFO, format="%(levelname)-7s - %(message)s")
 11 | logger = logging.getLogger(__name__)
 12 | 
 13 | # Some constants to use below.
 14 | path_format = "keyence_multi_cycle_v01"
 15 | memory_limit = "64G"
 16 | 
 17 | 
 18 | def comma_separated_integers(s: str) -> List[int]:
 19 |     return [int(i.strip()) for i in s.split(",")]
 20 | 
 21 | 
 22 | ########
 23 | # MAIN #
 24 | ########
 25 | if __name__ == "__main__":
 26 |     # Set up argument parser and parse the command line arguments.
 27 |     parser = argparse.ArgumentParser(
 28 |         description="Create a YAML config file for Cytokit, based on a JSON file from the CODEX Toolkit pipeline. YAML file will be created in current working directory unless otherwise specified."
 29 |     )
 30 |     parser.add_argument(
 31 |         "--gpus",
 32 |         help="GPUs to use for Cytokit, specified as a comma-separated list of integers.",
 33 |         type=comma_separated_integers,
 34 |         default=[0, 1],
 35 |     )
 36 |     parser.add_argument(
 37 |         "pipelineConfigFilename",
 38 |         help="JSON file containing all information required for config generation.",
 39 |     )
 40 |     parser.add_argument(
 41 |         "-o",
 42 |         "--outfile",
 43 |         help="Path to output YAML config file. Default: experiment.yaml",
 44 |     )
 45 | 
 46 |     args = parser.parse_args()
 47 | 
 48 |     if not args.outfile:
 49 |         args.outfile = "experiment.yaml"
 50 | 
 51 |     logger.info("Reading pipeline config file " + args.pipelineConfigFilename + "...")
 52 | 
 53 |     with open(args.pipelineConfigFilename, "r") as pipelineConfigFile:
 54 |         pipelineConfigInfo = json.load(pipelineConfigFile)
 55 | 
 56 |     logger.info("Finished reading pipeline config file.")
 57 | 
 58 |     cytokitConfig = {
 59 |         "name": pipelineConfigInfo["name"],
 60 |         "date": pipelineConfigInfo["date"],
 61 |         "environment": {"path_formats": path_format},
 62 |         "acquisition": {},  # This is populated below.
 63 |         "processor": {
 64 |             "args": {
 65 |                 "gpus": args.gpus,
 66 |                 "memory_limit": memory_limit,
 67 |                 "run_crop": False,
 68 |                 "run_tile_generator": True,
 69 |                 "run_drift_comp": True,
 70 |                 "run_cytometry": True,
 71 |                 "run_best_focus": True,
 72 |                 "run_deconvolution": False,
 73 |             },
 74 |             "tile_generator": {"raw_file_type": "keyence_mixed"},
 75 |             "best_focus": {"channel": pipelineConfigInfo["best_focus"]},
 76 |             "drift_compensation": {"channel": pipelineConfigInfo["drift_compensation"]},
 77 |             "cytometry": {
 78 |                 "nuclei_channel_name": pipelineConfigInfo["nuclei_channel"],
 79 |                 "segmentation_params": {
 80 |                     "memb_min_dist": 8,
 81 |                     "memb_sigma": 5,
 82 |                     "memb_gamma": 0.25,
 83 |                     "marker_dilation": 3,
 84 |                     "marker_min_size": 2,
 85 |                 },
 86 |                 "quantification_params": {"nucleus_intensity": True, "cell_graph": True},
 87 |             },
 88 |         },
 89 |         "analysis": [{"aggregate_cytometry_statistics": {"mode": "best_z_plane"}}],
 90 |     }
 91 | 
 92 |     if "membrane_channel" in pipelineConfigInfo:
 93 |         cytokitConfig["processor"]["cytometry"]["membrane_channel_name"] = pipelineConfigInfo[
 94 |             "membrane_channel"
 95 |         ]
 96 |     else:
 97 |         logger.warning(
 98 |             "No membrane stain channel found in pipeline config. Will only use nuclei channel for segmentation."
 99 |         )
100 | 
101 |     # Populate acquisition section.
102 |     acquisitionFields = [
103 |         "per_cycle_channel_names",
104 |         "channel_names",
105 |         "axial_resolution",
106 |         "lateral_resolution",
107 |         "emission_wavelengths",
108 |         "magnification",
109 |         "num_cycles",
110 |         "num_z_planes",
111 |         "numerical_aperture",
112 |         "objective_type",
113 |         "region_height",
114 |         "region_names",
115 |         "region_width",
116 |         "tile_height",
117 |         "tile_overlap_x",
118 |         "tile_overlap_y",
119 |         "tile_width",
120 |         "tiling_mode",
121 |     ]
122 | 
123 |     for field in acquisitionFields:
124 |         cytokitConfig["acquisition"][field] = pipelineConfigInfo[field]
125 | 
126 |     # Create operator section to extract channels collapsed in one time point,
127 |     # leaving out blank/empty channels and only including the nuclear stain
128 |     # channel used for segmentation.
129 |     blankPattern = re.compile(r"cyc(\d+)_ch(\d+)_orig([^_]*)blank", re.IGNORECASE)
130 |     emptyPattern = re.compile(r"cyc(\d+)_ch(\d+)_orig([^_]*)empty", re.IGNORECASE)
131 |     dapiChannelPattern = re.compile(r"cyc(\d+)_ch(\d+)_orig([^_]*)DAPI", re.IGNORECASE)
132 |     hoechstChannelPattern = re.compile(r"cyc(\d+)_ch(\d+)_orig([^_]*)HOECHST", re.IGNORECASE)
133 | 
134 |     operatorExtractChannels = []
135 | 
136 |     for channelName in pipelineConfigInfo["channel_names"]:
137 |         # Skip unwanted channels.
138 |         if emptyPattern.match(channelName):
139 |             continue
140 |         elif dapiChannelPattern.match(channelName):
141 |             if channelName != pipelineConfigInfo["nuclei_channel"]:
142 |                 continue
143 |         elif hoechstChannelPattern.match(channelName):
144 |             if channelName != pipelineConfigInfo["nuclei_channel"]:
145 |                 continue
146 | 
147 |         # Skip channels that failed QC.
148 |         if pipelineConfigInfo["channel_names_qc_pass"]:
149 |             if len(pipelineConfigInfo["channel_names_qc_pass"][channelName]) > 1:
150 |                 if blankPattern.match(channelName):
151 |                     pass
152 |                 else:
153 |                     raise ValueError(f"More than one {channelName} channel found.")
154 |             else:
155 |                 channel_qc_pass = pipelineConfigInfo["channel_names_qc_pass"][channelName][0]
156 |                 if channel_qc_pass.casefold() == "false".casefold():
157 |                     continue
158 | 
159 |         # Append to operator extract channels with "proc_" prepended -- this
160 |         # tells Cytokit to extract the channels from the processed tiles.
161 |         operatorExtractChannels.append("proc_" + channelName)
162 | 
163 |     # Add operator section to config.
164 |     cytokitConfig["operator"] = [
165 |         {"extract": {"name": "expressions", "channels": operatorExtractChannels, "z": "all"}}
166 |     ]
167 | 
168 |     logger.info("Writing Cytokit config to " + args.outfile)
169 | 
170 |     with open(args.outfile, "w") as outFile:
171 |         yaml.safe_dump(cytokitConfig, outFile, encoding="utf-8", default_flow_style=None, indent=2)
172 | 
173 |     pprint(cytokitConfig, sort_dicts=False)
174 | 
175 |     logger.info("Finished writing Cytokit config.")
176 | 


--------------------------------------------------------------------------------
/bin/dataset_info/collect_dataset_info.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import json
  3 | import logging
  4 | import math
  5 | import re
  6 | import sys
  7 | from collections import Counter
  8 | from datetime import datetime
  9 | from pathlib import Path
 10 | from pprint import pprint
 11 | from typing import Dict, List, Optional, Tuple, Union
 12 | 
 13 | import numpy as np
 14 | import pint
 15 | 
 16 | sys.path.append("/opt")
 17 | from pipeline_utils.dataset_listing import get_tile_dtype, get_tile_shape
 18 | 
 19 | 
 20 | class ConfigCreator:
 21 |     def __init__(self):
 22 |         self.dataset_dir = Path("")
 23 |         self._num_concur_tasks = 10
 24 |         self._std_meta = dict()
 25 |         self._raw_data_dir = Path("")
 26 | 
 27 |     def read_metadata(self):
 28 |         path_to_meta = self._raw_data_dir / "dataset.json"
 29 |         meta = self._read_json_meta(path_to_meta)
 30 |         processed_meta = meta.copy()
 31 | 
 32 |         ch_names = []
 33 |         for ch in meta["ChannelDetails"]["ChannelDetailsArray"]:
 34 |             ch_names.append(ch["Name"])
 35 | 
 36 |         new_ch_names = self._make_ch_names_unique(ch_names)
 37 | 
 38 |         new_channel_details_array = []
 39 |         for i, ch in enumerate(processed_meta["ChannelDetails"]["ChannelDetailsArray"]):
 40 |             new_ch = ch.copy()
 41 |             new_ch["Name"] = new_ch_names[i]
 42 |             new_channel_details_array.append(new_ch)
 43 |         processed_meta["ChannelDetails"]["ChannelDetailsArray"] = new_channel_details_array
 44 |         self._std_meta = processed_meta
 45 | 
 46 |     def find_raw_data_dir(self):
 47 |         NONRAW_DIRECTORY_NAME_PIECES = [
 48 |             "processed",
 49 |             "drv",
 50 |             "metadata",
 51 |             "extras",
 52 |             "Overview",
 53 |         ]
 54 |         raw_data_dir_possibilities = []
 55 | 
 56 |         for child in self.dataset_dir.iterdir():
 57 |             if not child.is_dir():
 58 |                 continue
 59 |             if not any(piece in child.name for piece in NONRAW_DIRECTORY_NAME_PIECES):
 60 |                 raw_data_dir_possibilities.append(child)
 61 | 
 62 |         if len(raw_data_dir_possibilities) > 1:
 63 |             message_pieces = ["Found multiple raw data directory possibilities:"]
 64 |             message_pieces.extend(f"\t{path}" for path in raw_data_dir_possibilities)
 65 |             raise ValueError("\n".join(message_pieces))
 66 |         self._raw_data_dir = raw_data_dir_possibilities[0]
 67 |         return self._raw_data_dir
 68 | 
 69 |     def create_config(self) -> dict:
 70 |         config = {
 71 |             "name": self._std_meta["DatasetName"],
 72 |             "date": self._create_proc_date(),
 73 |             "raw_data_location": self.find_raw_data_dir().name,
 74 |             "channel_names_qc_pass": self._get_qc_info_per_ch(),
 75 |             "emission_wavelengths": self._get_emission_wavelengths(),
 76 |             "excitation_wavelengths": self._get_excitation_wavelengths(),
 77 |             "axial_resolution": self._get_axial_resolution(),
 78 |             "lateral_resolution": self._get_lateral_resolution(),
 79 |             "magnification": self._std_meta["NominalMagnification"],
 80 |             "num_z_planes": self._std_meta["NumZPlanes"],
 81 |             "numerical_aperture": self._std_meta["NumericalAperture"],
 82 |             "objective_type": self._std_meta["ImmersionMedium"].lower(),
 83 |             "region_height": self._std_meta["RegionHeight"],
 84 |             "region_width": self._std_meta["RegionWidth"],
 85 |             "region_names": self._get_region_names(),
 86 |             "tile_overlap_x": self._get_tile_overlap_x_in_px(),
 87 |             "tile_overlap_y": self._get_tile_overlap_y_in_px(),
 88 |             "tile_height": self._get_tile_shape_no_overlap()[0],
 89 |             "tile_width": self._get_tile_shape_no_overlap()[1],
 90 |             "tile_dtype": self._get_tile_dtype(),
 91 |             "tiling_mode": self._std_meta["TileLayout"].lower(),
 92 |             "per_cycle_channel_names": self._get_per_cycle_ch_names(),
 93 |             "channel_names": self._get_channel_names(),
 94 |             "num_cycles": self._std_meta["NumCycles"],
 95 |             "best_focus": self._get_nuc_ch(),
 96 |             "drift_compensation": self._get_nuc_ch(),
 97 |             "nuclei_channel": self._get_nuc_ch(),
 98 |             "membrane_channel": self._get_membr_ch(),
 99 |             "nuclei_channel_loc": self._std_meta["NuclearStainForSegmentation"],
100 |             "membrane_channel_loc": self._std_meta["MembraneStainForSegmentation"],
101 |             "target_shape": self._calc_target_shape(),
102 |             "num_concurrent_tasks": self._num_concur_tasks,
103 |         }
104 |         return config
105 | 
106 |     def _read_json_meta(self, path_to_meta: Path) -> Dict[str, Union[str, int, dict, list]]:
107 |         with open(path_to_meta, "r") as s:
108 |             json_meta = json.load(s)
109 |         return json_meta
110 | 
111 |     def _create_proc_date(self) -> str:
112 |         processing_date = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
113 |         return processing_date
114 | 
115 |     def _get_qc_info_per_ch(self) -> Dict[str, List[str]]:
116 |         ch_details = self._std_meta["ChannelDetails"]["ChannelDetailsArray"]
117 |         channel_qc_info = dict()
118 |         channel_qc_info["Marker"] = ["Result"]
119 |         for ch in ch_details:
120 |             ch_name = ch["Name"]
121 |             qc_result = ch["PassedQC"]
122 |             if qc_result is True:
123 |                 qc_result_str = "TRUE"
124 |             else:
125 |                 qc_result_str = "FALSE"
126 |             channel_qc_info[ch_name] = [qc_result_str]
127 |         return channel_qc_info
128 | 
129 |     def _make_ch_names_unique(self, channel_names: List[str]) -> List[str]:
130 |         unique_names = Counter(channel_names)
131 |         new_names = channel_names.copy()
132 | 
133 |         for unique_ch, count in unique_names.items():
134 |             if count > 1:
135 |                 this_ch_count = 1
136 |                 for i, ch_name in enumerate(channel_names):
137 |                     if ch_name == unique_ch:
138 |                         new_name = f"{ch_name}_{this_ch_count}"
139 |                         new_names[i] = new_name
140 |                         this_ch_count += 1
141 |         return new_names
142 | 
143 |     def _get_emission_wavelengths(self) -> List[float]:
144 |         em_wav = []
145 |         for ch in self._std_meta["ChannelDetails"]["ChannelDetailsArray"]:
146 |             wav = ch["EmissionWavelengthNM"]
147 |             if wav not in em_wav:
148 |                 em_wav.append(float(wav))
149 |         return em_wav
150 | 
151 |     def _get_excitation_wavelengths(self) -> List[float]:
152 |         num_channels = len(self._std_meta["ChannelDetails"]["ChannelDetailsArray"])
153 |         channel = self._std_meta["ChannelDetails"]["ChannelDetailsArray"][0]
154 | 
155 |         if "ExcitationWavelengthNM" in channel:
156 |             exc_wav = []
157 |             for ch in self._std_meta["ChannelDetails"]["ChannelDetailsArray"]:
158 |                 wav = ch["ExcitationWavelengthNM"]
159 |                 if wav not in exc_wav:
160 |                     exc_wav.append(float(wav))
161 |         else:
162 |             exc_wav = [0] * num_channels
163 |         return exc_wav
164 | 
165 |     def _get_axial_resolution(self) -> float:
166 |         unit = pint.UnitRegistry()
167 |         provided_unit_z = unit[self._std_meta["ResolutionZUnit"]]
168 |         provided_res_z = float(self._std_meta["ResolutionZ"])
169 |         res_z_in_units = provided_res_z * provided_unit_z
170 |         axial_res_um = res_z_in_units.to("nm")
171 |         return axial_res_um.magnitude
172 | 
173 |     def _get_lateral_resolution(self) -> float:
174 |         unit = pint.UnitRegistry()
175 |         provided_unit_x = unit[self._std_meta["ResolutionXUnit"]]
176 |         provided_unit_y = unit[self._std_meta["ResolutionYUnit"]]
177 |         provided_res_x = float(self._std_meta["ResolutionX"])
178 |         provided_res_y = float(self._std_meta["ResolutionY"])
179 |         res_x_in_units = provided_res_x * provided_unit_x
180 |         res_y_in_units = provided_res_y * provided_unit_y
181 |         lateral_res_um = ((res_x_in_units + res_y_in_units) / 2).to("nm")
182 |         return lateral_res_um.magnitude
183 | 
184 |     def _get_region_names(self) -> List[int]:
185 |         num_regions = self._std_meta["NumRegions"]
186 |         return list(range(1, num_regions + 1))
187 | 
188 |     def _get_tile_overlap_x_in_px(self) -> int:
189 |         overlap = self._std_meta["TileOverlapX"]
190 |         size = self._std_meta["TileWidth"]
191 |         px_overlap = self._calc_px_overlap_from_proportional(size, overlap)
192 |         return px_overlap
193 | 
194 |     def _get_tile_overlap_y_in_px(self) -> int:
195 |         overlap = self._std_meta["TileOverlapY"]
196 |         size = self._std_meta["TileHeight"]
197 |         px_overlap = self._calc_px_overlap_from_proportional(size, overlap)
198 |         return px_overlap
199 | 
200 |     def _calc_px_overlap_from_proportional(self, dim_size: int, dim_overlap: float) -> int:
201 |         msg = f"Tile overlap proportion {dim_overlap} is greater than 1"
202 |         if dim_overlap > 1:
203 |             raise ValueError(msg)
204 | 
205 |         pixel_overlap = dim_size * dim_overlap
206 | 
207 |         if float(pixel_overlap).is_integer():
208 |             return int(pixel_overlap)
209 |         else:
210 |             # if overlap is not a whole number in px
211 |             closest_overlap = int(math.ceil(pixel_overlap))
212 |             closest_overlap += closest_overlap % 2  # make even
213 |             return closest_overlap
214 | 
215 |     def _get_per_cycle_ch_names(self) -> List[str]:
216 |         per_cycle_channel_names = []
217 |         channels = self._std_meta["ChannelDetails"]["ChannelDetailsArray"]
218 |         channel_ids = []
219 |         for ch in channels:
220 |             channel_ids.append(int(ch["ChannelID"]))
221 |         unique_ch_ids = sorted(set(channel_ids))
222 |         for ch in unique_ch_ids:
223 |             per_cycle_channel_names.append("CH" + str(ch))
224 |         return per_cycle_channel_names
225 | 
226 |     def _get_channel_names(self) -> List[str]:
227 |         channels = self._std_meta["ChannelDetails"]["ChannelDetailsArray"]
228 |         channel_names = []
229 |         for ch in channels:
230 |             channel_names.append(ch["Name"])
231 |         return channel_names
232 | 
233 |     def _get_nuc_ch(self) -> str:
234 |         nuc_ch_loc = self._std_meta["NuclearStainForSegmentation"]
235 |         nuc_ch_name = self._get_ch_name_by_location(nuc_ch_loc)
236 |         return nuc_ch_name
237 | 
238 |     def _get_membr_ch(self) -> str:
239 |         membr_ch_loc = self._std_meta["MembraneStainForSegmentation"]
240 |         membr_ch_name = self._get_ch_name_by_location(membr_ch_loc)
241 |         return membr_ch_name
242 | 
243 |     def _get_ch_name_by_location(self, ch_loc: Dict[str, int]) -> str:
244 |         channels = self._std_meta["ChannelDetails"]["ChannelDetailsArray"]
245 |         ch_name = None
246 |         for ch in channels:
247 |             if ch["CycleID"] == ch_loc["CycleID"]:
248 |                 if ch["ChannelID"] == ch_loc["ChannelID"]:
249 |                     ch_name = ch["Name"]
250 |                     break
251 |         if ch_name is None:
252 |             raise ValueError("Could not find channel name of", str(ch_loc))
253 |         return ch_name
254 | 
255 |     def _get_tile_dtype(self) -> str:
256 |         tile_dtype = str(get_tile_dtype(self._raw_data_dir).name)
257 |         return tile_dtype
258 | 
259 |     def _calc_target_shape(self):
260 |         """
261 |         Cytokit's nuclei detection U-Net (from CellProfiler) works best at 20x magnification.
262 |         The CellProfiler U-Net requires the height and width of the images to be
263 |         evenly divisible by 2 raised to the number of layers in the network, in this case 2^3=8.
264 |         https://github.com/hammerlab/cytokit/issues/14
265 |         https://github.com/CellProfiler/CellProfiler-plugins/issues/65
266 |         """
267 |         dims = [self._std_meta["TileWidth"], self._std_meta["TileHeight"]]
268 |         magnification = self._std_meta["NominalMagnification"]
269 |         scaleFactor = 1
270 |         if magnification != 20:
271 |             scaleFactor = 20 / magnification
272 | 
273 |         # Width and height must be evenly divisible by 8, so we round them up to them
274 |         # closest factor of 8 if they aren't.
275 |         new_dims = dims.copy()
276 |         for dim in dims:
277 |             if dim % 8:
278 |                 new_dim = int(8 * math.ceil(float(dim) / 8))
279 |                 new_dims.append(new_dim)
280 |         return new_dims
281 | 
282 |     @property
283 |     def num_concurrent_tasks(self) -> int:
284 |         return self._num_concur_tasks
285 | 
286 |     @num_concurrent_tasks.setter
287 |     def num_concurrent_tasks(self, val: int):
288 |         if val <= 0:
289 |             self._num_concur_tasks = 10
290 |         else:
291 |             self._num_concur_tasks = val
292 | 
293 |     def _get_tile_shape_no_overlap(self) -> Tuple[int, int]:
294 |         overlap_y = self._get_tile_overlap_y_in_px()
295 |         overlap_x = self._get_tile_overlap_x_in_px()
296 |         tile_height_with_overlap = self._std_meta["TileHeight"]
297 |         tile_width_with_overlap = self._std_meta["TileWidth"]
298 |         tile_height = tile_height_with_overlap - overlap_y
299 |         tile_width = tile_width_with_overlap - overlap_x
300 |         return tile_height, tile_width
301 | 
302 | 
303 | def write_pipeline_config(out_path: Path, pipeline_config: dict):
304 |     with open(out_path, "w") as s:
305 |         json.dump(pipeline_config, s, indent=4)
306 | 
307 | 
308 | def main(path_to_dataset: Path, num_concurrent_tasks: int = 10):
309 |     logging.basicConfig(level=logging.INFO, format="%(levelname)-7s - %(message)s")
310 |     logger = logging.getLogger(__name__)
311 | 
312 |     config_creator = ConfigCreator()
313 |     config_creator.dataset_dir = path_to_dataset
314 |     config_creator.num_concurrent_tasks = num_concurrent_tasks
315 |     config_creator.find_raw_data_dir()
316 |     config_creator.read_metadata()
317 |     pipeline_config = config_creator.create_config()
318 | 
319 |     pprint(pipeline_config, sort_dicts=False)
320 |     out_path = Path("pipelineConfig.json")
321 |     logger.info("Writing pipeline config")
322 |     write_pipeline_config(out_path, pipeline_config)
323 |     logger.info(f"Written pipeline config to {out_path}")
324 | 
325 | 
326 | if __name__ == "__main__":
327 |     parser = argparse.ArgumentParser(
328 |         description="Collect information required to perform analysis of a CODEX dataset."
329 |     )
330 |     parser.add_argument(
331 |         "--path_to_dataset",
332 |         help="Path to directory containing raw data subdirectory (with with cycle and region numbers).",
333 |         type=Path,
334 |     )
335 |     parser.add_argument(
336 |         "--num_concurrent_tasks",
337 |         help="Path to directory containing raw data subdirectory (with with cycle and region numbers).",
338 |         type=int,
339 |         default=10,
340 |     )
341 |     args = parser.parse_args()
342 |     main(args.path_to_dataset, args.num_concurrent_tasks)
343 | 


--------------------------------------------------------------------------------
/bin/dataset_info/run_collection.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | from pathlib import Path
 3 | 
 4 | import collect_dataset_info
 5 | import collect_dataset_info_old
 6 | 
 7 | 
 8 | def find_raw_data_dir(dataset_dir: Path):
 9 |     NONRAW_DIRECTORY_NAME_PIECES = [
10 |         "processed",
11 |         "drv",
12 |         "metadata",
13 |         "extras",
14 |         "Overview",
15 |     ]
16 |     raw_data_dir_possibilities = []
17 | 
18 |     for child in dataset_dir.iterdir():
19 |         if not child.is_dir():
20 |             continue
21 |         if not any(piece in child.name for piece in NONRAW_DIRECTORY_NAME_PIECES):
22 |             raw_data_dir_possibilities.append(child)
23 | 
24 |     if len(raw_data_dir_possibilities) > 1:
25 |         message_pieces = ["Found multiple raw data directory possibilities:"]
26 |         message_pieces.extend(f"\t{path}" for path in raw_data_dir_possibilities)
27 |         raise ValueError("\n".join(message_pieces))
28 |     raw_data_dir = raw_data_dir_possibilities[0]
29 |     return raw_data_dir
30 | 
31 | 
32 | def check_new_meta_present(raw_data_dir: Path):
33 |     if Path(raw_data_dir / "dataset.json").exists():
34 |         print("Found new metadata")
35 |         return True
36 |     else:
37 |         print("Did not found new metadata. Will try to use old metadata")
38 |         return False
39 | 
40 | 
41 | def main(path_to_dataset: Path, num_concurrent_tasks: int = 10):
42 |     raw_data_dir = find_raw_data_dir(path_to_dataset)
43 |     is_new_meta_present = check_new_meta_present(raw_data_dir)
44 |     if is_new_meta_present:
45 |         collect_dataset_info.main(path_to_dataset, num_concurrent_tasks)
46 |     else:
47 |         collect_dataset_info_old.main(path_to_dataset, num_concurrent_tasks)
48 | 
49 | 
50 | if __name__ == "__main__":
51 |     parser = argparse.ArgumentParser(
52 |         description="Collect information required to perform analysis of a CODEX dataset."
53 |     )
54 |     parser.add_argument(
55 |         "--path_to_dataset",
56 |         help="Path to directory containing raw data subdirectory (with with cycle and region numbers).",
57 |         type=Path,
58 |     )
59 |     parser.add_argument(
60 |         "--num_concurrent_tasks",
61 |         help="Path to directory containing raw data subdirectory (with with cycle and region numbers).",
62 |         type=int,
63 |         default=10,
64 |     )
65 |     args = parser.parse_args()
66 |     main(args.path_to_dataset, args.num_concurrent_tasks)
67 | 


--------------------------------------------------------------------------------
/bin/illumination_correction/generate_basic_macro.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | 
 4 | def fill_in_basic_macro_template(path_to_stack: Path, out_dir: Path) -> str:
 5 |     macro_template = """
 6 |     run("BaSiC Mod",
 7 |         "input_stack={path_to_stack}" +
 8 |         " flat-field_image_path=[]" +
 9 |         " dark-field_image_path=[]" +
10 |         " output_dir={out_dir}" +
11 |         " shading_estimation=[Estimate shading profiles]" +
12 |         " shading_model=[Estimate flat-field only (ignore dark-field)]" +
13 |         " setting_regularisation_parameters=Automatic" +
14 |         " temporal_drift=Ignore" +
15 |         " correction_options=[Compute shading only]" +
16 |         " lambda_flat=0.500" +
17 |         " lambda_dark=0.500");
18 | 
19 |     run("Quit");
20 |     eval("script", "System.exit(0);");
21 |     """
22 |     # [Compute shading only, Compute shading and correct images]
23 |     # [Estimate flat-field only (ignore dark-field), Estimate both flat-field and dark-field]
24 |     basic_macro = macro_template.format(
25 |         path_to_stack=str(path_to_stack.absolute()), out_dir=str(out_dir.absolute())
26 |     )
27 |     return basic_macro
28 | 
29 | 
30 | def save_macro(out_path: Path, macro: str):
31 |     with open(out_path, "w", encoding="utf-8") as s:
32 |         s.write(macro)
33 | 


--------------------------------------------------------------------------------
/bin/illumination_correction/run_illumination_correction.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import platform
  3 | import re
  4 | import subprocess
  5 | import sys
  6 | from pathlib import Path
  7 | from typing import Dict, Iterable, List, Set, Tuple
  8 | 
  9 | import cv2 as cv
 10 | import dask
 11 | import numpy as np
 12 | import tifffile as tif
 13 | 
 14 | sys.path.append("/opt/")
 15 | from generate_basic_macro import fill_in_basic_macro_template, save_macro
 16 | 
 17 | from pipeline_utils.dataset_listing import (
 18 |     create_listing_for_each_cycle_region,
 19 |     get_img_listing,
 20 | )
 21 | from pipeline_utils.pipeline_config_reader import load_dataset_info
 22 | 
 23 | ImgStack = np.ndarray  # 3d
 24 | Image = np.ndarray  # 2d
 25 | 
 26 | 
 27 | def make_dir_if_not_exists(dir_path: Path):
 28 |     if not dir_path.exists():
 29 |         dir_path.mkdir(parents=True)
 30 | 
 31 | 
 32 | def convert_np_cv_dtype(npdtype: np.dtype) -> int:
 33 |     np_cv_dtype_map = {
 34 |         np.dtype("float32"): cv.CV_32F,
 35 |         np.dtype("int32"): cv.CV_32S,
 36 |         np.dtype("uint16"): cv.CV_16U,
 37 |         np.dtype("uint8"): cv.CV_8U,
 38 |         np.dtype("int8"): cv.CV_8S,
 39 |         np.dtype("int16"): cv.CV_16S,
 40 |     }
 41 |     return np_cv_dtype_map[npdtype]
 42 | 
 43 | 
 44 | def get_input_img_dirs(data_dir: Path):
 45 |     img_dirs = list(data_dir.iterdir())
 46 |     return img_dirs
 47 | 
 48 | 
 49 | def read_imgs_to_stack(img_paths: List[Path]) -> ImgStack:
 50 |     imgs = []
 51 |     for path in img_paths:
 52 |         try:
 53 |             this_image = tif.imread(str(path.absolute()))
 54 |         except Exception as excp:
 55 |             # do not raise from excp because the main process cannot instantiate excp
 56 |             raise RuntimeError(f"Error reading tiff image {path}: {excp}")
 57 |         imgs.append(this_image)
 58 |     img_stack = np.stack(imgs, axis=0)
 59 |     return img_stack
 60 | 
 61 | 
 62 | def save_stack(out_path: Path, stack: ImgStack):
 63 |     with tif.TiffWriter(out_path, shaped=False) as TW:
 64 |         TW.save(stack, contiguous=True, photometric="minisblack")
 65 | 
 66 | 
 67 | def read_and_save_to_stack(path_list: List[Path], out_stack_path: Path):
 68 |     save_stack(out_stack_path, read_imgs_to_stack(path_list))
 69 | 
 70 | 
 71 | def resave_imgs_to_stacks(
 72 |     zplane_img_listing: Dict[int, Dict[int, Dict[int, Dict[int, List[Path]]]]], img_stack_dir: Path
 73 | ) -> Dict[int, Dict[int, Dict[int, Dict[int, Path]]]]:
 74 |     stack_paths = dict()
 75 |     stack_name_template = "Cyc{cyc:03d}_Reg{reg:03d}_Ch{ch:03d}_Z{z:03d}.tif"
 76 |     tasks = []
 77 |     for cycle in zplane_img_listing:
 78 |         stack_paths[cycle] = dict()
 79 |         for region in zplane_img_listing[cycle]:
 80 |             stack_paths[cycle][region] = dict()
 81 |             for channel in zplane_img_listing[cycle][region]:
 82 |                 stack_paths[cycle][region][channel] = dict()
 83 |                 for zplane, path_list in zplane_img_listing[cycle][region][channel].items():
 84 |                     stack_name = stack_name_template.format(
 85 |                         cyc=cycle, reg=region, ch=channel, z=zplane
 86 |                     )
 87 |                     out_stack_path = img_stack_dir / stack_name
 88 |                     stack_paths[cycle][region][channel][zplane] = out_stack_path
 89 |                     tasks.append(dask.delayed(read_and_save_to_stack)(path_list, out_stack_path))
 90 |     dask.compute(*tasks)
 91 |     return stack_paths
 92 | 
 93 | 
 94 | def generate_basic_macro_for_each_stack(
 95 |     stack_paths: Dict[int, Dict[int, Dict[int, Dict[int, Path]]]],
 96 |     macro_out_dir: Path,
 97 |     illum_cor_dir: Path,
 98 | ) -> Dict[int, Dict[int, Dict[int, Dict[int, Path]]]]:
 99 |     macro_paths = dict()
100 |     for cycle in stack_paths:
101 |         macro_paths[cycle] = dict()
102 |         for region in stack_paths[cycle]:
103 |             macro_paths[cycle][region] = dict()
104 |             for channel in stack_paths[cycle][region]:
105 |                 macro_paths[cycle][region][channel] = dict()
106 |                 for zplane, stack_path in stack_paths[cycle][region][channel].items():
107 |                     macro_path = macro_out_dir / (stack_path.name + ".ijm")
108 |                     macro = fill_in_basic_macro_template(stack_path, illum_cor_dir)
109 |                     save_macro(macro_path, macro)
110 |                     macro_paths[cycle][region][channel][zplane] = macro_path
111 |     return macro_paths
112 | 
113 | 
114 | def read_flatfield_imgs(
115 |     illum_cor_dir: Path, stack_paths: Dict[int, Dict[int, Dict[int, Dict[int, Path]]]]
116 | ) -> Dict[int, Dict[int, Dict[int, Dict[int, ImgStack]]]]:
117 |     per_zplane_flatfield = dict()
118 |     stack_name_template = "Cyc{cyc:03d}_Reg{reg:03d}_Ch{ch:03d}_Z{z:03d}.tif"
119 |     for cycle in stack_paths:
120 |         per_zplane_flatfield[cycle] = dict()
121 |         for region in stack_paths[cycle]:
122 |             per_zplane_flatfield[cycle][region] = dict()
123 |             for channel in stack_paths[cycle][region]:
124 |                 per_zplane_flatfield[cycle][region][channel] = dict()
125 |                 for zplane, stack_path in stack_paths[cycle][region][channel].items():
126 |                     stack_name = stack_name_template.format(
127 |                         cyc=cycle, reg=region, ch=channel, z=zplane
128 |                     )
129 |                     flatfield_filename = "flatfield_" + stack_name
130 |                     flatfield_path = illum_cor_dir / "flatfield" / flatfield_filename
131 |                     flatfield = tif.imread(str(flatfield_path.absolute()))  # float32 0-1
132 |                     per_zplane_flatfield[cycle][region][channel][zplane] = flatfield
133 |     return per_zplane_flatfield
134 | 
135 | 
136 | def read_darkfield_imgs(
137 |     illum_cor_dir: Path, stack_paths: Dict[int, Dict[int, Dict[int, Dict[int, Path]]]]
138 | ) -> Dict[int, Dict[int, Dict[int, Dict[int, ImgStack]]]]:
139 |     per_zplane_darkfield = dict()
140 |     stack_name_template = "Cyc{cyc:03d}_Reg{reg:03d}_Ch{ch:03d}_Z{z:03d}.tif"
141 |     for cycle in stack_paths:
142 |         per_zplane_darkfield[cycle] = dict()
143 |         for region in stack_paths[cycle]:
144 |             per_zplane_darkfield[cycle][region] = dict()
145 |             for channel in stack_paths[cycle][region]:
146 |                 per_zplane_darkfield[cycle][region][channel] = dict()
147 |                 for zplane, stack_path in stack_paths[cycle][region][channel].items():
148 |                     stack_name = stack_name_template.format(
149 |                         cyc=cycle, reg=region, ch=channel, z=zplane
150 |                     )
151 |                     darkfield_filename = "darkfield_" + stack_name
152 |                     darkfield_path = illum_cor_dir / "darkfield" / darkfield_filename
153 |                     darkfield = tif.imread(str(darkfield_path.absolute()))  # float32 0-1
154 |                     per_zplane_darkfield[cycle][region][channel][zplane] = darkfield
155 |     return per_zplane_darkfield
156 | 
157 | 
158 | def apply_illum_cor(img: Image, flatfield: Image) -> Image:
159 |     orig_dtype = img.dtype
160 |     dtype_info = np.iinfo(orig_dtype)
161 |     orig_minmax = (dtype_info.min, dtype_info.max)
162 |     imgf = img.astype(np.float32)
163 | 
164 |     corrected_imgf = imgf / flatfield
165 | 
166 |     corrected_img = np.clip(np.round(corrected_imgf, 0), *orig_minmax).astype(orig_dtype)
167 |     return corrected_img
168 | 
169 | 
170 | def correct_and_save(img_path: Path, flatfield: Image, out_path: Path):
171 |     corrected_img = apply_illum_cor(tif.imread(str(img_path.absolute())), flatfield)
172 |     with tif.TiffWriter(str(out_path.absolute()), shaped=False) as TW:
173 |         TW.save(corrected_img, photometric="minisblack")
174 |     del corrected_img
175 | 
176 | 
177 | def apply_flatfield_and_save(
178 |     listing: Dict[int, Dict[int, Dict[int, Dict[int, Dict[int, Path]]]]],
179 |     flatfields: Dict[int, Dict[int, Dict[int, Dict[int, Image]]]],
180 |     # darkfields: Dict[int, Dict[int, Dict[int, Dict[int, Image]]]],
181 |     out_dir: Path,
182 | ):
183 |     img_dir_template = "Cyc{cyc:03d}_reg{reg:03d}"
184 |     img_name_template = "{reg:d}_{tile:05d}_Z{z:03d}_CH{ch:d}.tif"
185 |     tasks = []
186 |     for cycle in listing:
187 |         for region in listing[cycle]:
188 |             for channel in listing[cycle][region]:
189 |                 for tile, zplane_dict in listing[cycle][region][channel].items():
190 |                     for zplane, path in zplane_dict.items():
191 |                         img_dir_name = img_dir_template.format(cyc=cycle, reg=region)
192 |                         img_name = img_name_template.format(
193 |                             reg=region, tile=tile, z=zplane, ch=channel
194 |                         )
195 |                         out_dir_full = Path(out_dir / img_dir_name)
196 |                         make_dir_if_not_exists(out_dir_full)
197 |                         out_path = out_dir_full / img_name
198 |                         flatfield = flatfields[cycle][region][channel][zplane]
199 |                         # darkfield = darkfields[cycle][region][channel][zplane]
200 |                         tasks.append(dask.delayed(correct_and_save)(path, flatfield, out_path))
201 |     dask.compute(*tasks)
202 | 
203 | 
204 | def organize_listing_by_cyc_reg_ch_zplane(
205 |     listing: Dict[int, Dict[int, Dict[int, Dict[int, Dict[int, Path]]]]],
206 |     tile_ids_to_use: Iterable[int],
207 | ) -> Dict[int, Dict[int, Dict[int, Dict[int, List[Path]]]]]:
208 |     new_arrangemnt = dict()
209 |     for cycle in listing:
210 |         new_arrangemnt[cycle] = dict()
211 |         for region in listing[cycle]:
212 |             new_arrangemnt[cycle][region] = dict()
213 |             for channel in listing[cycle][region]:
214 |                 new_arrangemnt[cycle][region][channel] = dict()
215 |                 for tile, zplane_dict in listing[cycle][region][channel].items():
216 |                     for zplane, path in zplane_dict.items():
217 |                         if tile in tile_ids_to_use:
218 |                             if zplane in new_arrangemnt[cycle][region][channel]:
219 |                                 new_arrangemnt[cycle][region][channel][zplane].append(path)
220 |                             else:
221 |                                 new_arrangemnt[cycle][region][channel][zplane] = [path]
222 |     return new_arrangemnt
223 | 
224 | 
225 | def run_basic(basic_macro_path: Path, log_dir: Path):
226 |     # It is expected that ImageJ is added to system PATH
227 |     if platform.system() == "Windows":
228 |         imagej_name = "ImageJ-win64"
229 |     elif platform.system() == "Linux":
230 |         imagej_name = "ImageJ-linux64"
231 |     elif platform.system() == "Darwin":
232 |         imagej_name = "ImageJ-macosx"
233 | 
234 |     command = imagej_name + " --headless --console -macro " + str(basic_macro_path)
235 |     print("Started running BaSiC for", str(basic_macro_path))
236 |     res = subprocess.run(
237 |         command, shell=True, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
238 |     )
239 |     if res.returncode == 0:
240 |         print("Finished", str(basic_macro_path))
241 |     else:
242 |         raise Exception(
243 |             "There was an error while running the BaSiC for "
244 |             + str(basic_macro_path)
245 |             + "\n"
246 |             + res.stderr.decode("utf-8")
247 |         )
248 |     macro_filename = basic_macro_path.name
249 |     run_log = (
250 |         "Command:\n"
251 |         + res.args
252 |         + "\n\nSTDERR:\n"
253 |         + res.stderr.decode("utf-8")
254 |         + "\n\nSTDOUT:\n"
255 |         + res.stdout.decode("utf-8")
256 |     )
257 |     log_filename = macro_filename + ".log"
258 |     log_path = log_dir / log_filename
259 |     with open(log_path, "w", encoding="utf-8") as f:
260 |         f.write(run_log)
261 |     return
262 | 
263 | 
264 | def run_all_macros(macro_paths: Dict[int, Dict[int, Dict[int, Dict[int, Path]]]], log_dir: Path):
265 |     tasks = []
266 |     for cycle in macro_paths:
267 |         for region in macro_paths[cycle]:
268 |             for channel in macro_paths[cycle][region]:
269 |                 for zplane, macro_path in macro_paths[cycle][region][channel].items():
270 |                     tasks.append(dask.delayed(run_basic)(macro_path, log_dir))
271 |     dask.compute(*tasks)
272 | 
273 | 
274 | def check_illum_cor_images(
275 |     illum_cor_dir: Path,
276 |     log_dir: Path,
277 |     zplane_listing: Dict[int, Dict[int, Dict[int, Dict[int, List[Path]]]]],
278 | ):
279 |     cor_img_name_template = "{cor_type}_Cyc{cyc:03d}_Reg{reg:03d}_Ch{ch:03d}_Z{z:03d}.tif"
280 |     log_name_template = "Cyc{cyc:03d}_Reg{reg:03d}_Ch{ch:03d}_Z{z:03d}.tif.ijm.log"
281 |     imgs_present = []
282 |     imgs_missing = []
283 |     imgs_missing_logs = []
284 |     for cycle in zplane_listing:
285 |         for region in zplane_listing[cycle]:
286 |             for channel in zplane_listing[cycle][region]:
287 |                 for zplane, macro_path in zplane_listing[cycle][region][channel].items():
288 |                     flatfield_fn = cor_img_name_template.format(
289 |                         cor_type="flatfield", cyc=cycle, reg=region, ch=channel, z=zplane
290 |                     )
291 |                     darkfield_fn = cor_img_name_template.format(
292 |                         cor_type="darkfield", cyc=cycle, reg=region, ch=channel, z=zplane
293 |                     )
294 |                     flatfield_path = illum_cor_dir / "flatfield" / flatfield_fn
295 |                     # darkfield_path = illum_cor_dir / "darkfield" / darkfield_fn
296 |                     if flatfield_path.exists():
297 |                         imgs_present.append((flatfield_fn))
298 |                     else:
299 |                         imgs_missing.append((flatfield_fn))
300 |                         log_path = log_dir / log_name_template.format(
301 |                             cyc=cycle, reg=region, ch=channel, z=zplane
302 |                         )
303 |                         with open(log_path, "r", encoding="utf-8") as f:
304 |                             log_content = f.read()
305 |                         imgs_missing_logs.append(log_content)
306 |     if len(imgs_missing) > 0:
307 |         msg = (
308 |             "Probably there was an error while running BaSiC. "
309 |             + "There is no image in one or more directories."
310 |         )
311 |         print(msg)
312 | 
313 |         for i in range(0, len(imgs_missing)):
314 |             print("\nOne or both are missing:")
315 |             print(imgs_missing[i])
316 |             print("ImageJ log:")
317 |             print(imgs_missing_logs[i])
318 |         raise ValueError(msg)
319 |     return
320 | 
321 | 
322 | def select_which_tiles_to_use(
323 |     n_tiles_y: int, n_tiles_x: int, tile_dtype: str, tile_size: Tuple[int, int]
324 | ) -> Set[int]:
325 |     """Select every n-th tile, keeping the max size of the tile stack at 2GB"""
326 |     n_tiles = n_tiles_y * n_tiles_x
327 | 
328 |     img_dtype = int(re.search(r"(\d+)", tile_dtype).groups()[0])  # int16 -> 16
329 |     nbytes = img_dtype / 8
330 | 
331 |     # max 2GB
332 |     single_tile_gb = tile_size[0] * tile_size[1] * nbytes / 1024**3
333 |     max_num_tiles = round(2.0 // single_tile_gb)
334 | 
335 |     step = max(n_tiles // max_num_tiles, 1)
336 |     if step < 2 and n_tiles > max_num_tiles:
337 |         step = 2
338 |     tile_ids = set(list(range(0, n_tiles, step)))
339 |     return tile_ids
340 | 
341 | 
342 | def main(data_dir: Path, pipeline_config_path: Path):
343 |     img_stack_dir = Path("/output/image_stacks/")
344 |     macro_dir = Path("/output/basic_macros")
345 |     illum_cor_dir = Path("/output/illumination_correction/")
346 |     corrected_img_dir = Path("/output/corrected_images")
347 |     log_dir = Path("/output/logs")
348 | 
349 |     make_dir_if_not_exists(img_stack_dir)
350 |     make_dir_if_not_exists(macro_dir)
351 |     make_dir_if_not_exists(illum_cor_dir)
352 |     make_dir_if_not_exists(corrected_img_dir)
353 |     make_dir_if_not_exists(log_dir)
354 | 
355 |     dataset_info = load_dataset_info(pipeline_config_path)
356 | 
357 |     tile_dtype = dataset_info["tile_dtype"]
358 | 
359 |     num_workers = dataset_info["num_concurrent_tasks"]
360 |     dask.config.set({"num_workers": num_workers, "scheduler": "processes"})
361 | 
362 |     raw_data_dir = dataset_info["dataset_dir"]
363 |     img_dirs = get_input_img_dirs(Path(data_dir / raw_data_dir))
364 |     print("Getting image listing")
365 |     listing = create_listing_for_each_cycle_region(img_dirs)
366 | 
367 |     tile_size = (
368 |         dataset_info["tile_height"] + dataset_info["overlap_y"],
369 |         dataset_info["tile_width"] + dataset_info["overlap_x"],
370 |     )
371 |     n_tiles = dataset_info["num_tiles"]
372 |     n_tiles_y = dataset_info["num_tiles_y"]
373 |     n_tiles_x = dataset_info["num_tiles_x"]
374 | 
375 |     tile_ids_to_use = select_which_tiles_to_use(n_tiles_y, n_tiles_x, tile_dtype, tile_size)
376 | 
377 |     print(
378 |         f"tile size: {str(tile_size)}",
379 |         f"| number of tiles: {str(n_tiles)}",
380 |         f"| using {str(len(tile_ids_to_use))} tiles to compute illumination correction",
381 |     )
382 |     zplane_listing = organize_listing_by_cyc_reg_ch_zplane(listing, tile_ids_to_use)
383 | 
384 |     print("Resaving images as stacks")
385 |     stack_paths = resave_imgs_to_stacks(zplane_listing, img_stack_dir)
386 |     print("Generating BaSiC macros")
387 |     macro_paths = generate_basic_macro_for_each_stack(stack_paths, macro_dir, illum_cor_dir)
388 |     print("Running estimation of illumination")
389 |     run_all_macros(macro_paths, log_dir)
390 |     check_illum_cor_images(illum_cor_dir, log_dir, zplane_listing)
391 | 
392 |     print("Applying illumination correction")
393 |     flatfields = read_flatfield_imgs(illum_cor_dir, stack_paths)
394 |     # darkfields = read_darkfield_imgs(illum_cor_dir, stack_paths)
395 |     apply_flatfield_and_save(listing, flatfields, corrected_img_dir)
396 | 
397 | 
398 | if __name__ == "__main__":
399 |     parser = argparse.ArgumentParser()
400 |     parser.add_argument("--data_dir", type=Path, help="path to directory with dataset directory")
401 |     parser.add_argument(
402 |         "--pipeline_config_path", type=Path, help="path to pipelineConfig.json file"
403 |     )
404 |     args = parser.parse_args()
405 |     main(args.data_dir, args.pipeline_config_path)
406 | 


--------------------------------------------------------------------------------
/bin/pipeline_utils/dataset_listing.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | from os import walk
  3 | from pathlib import Path
  4 | from typing import Dict, List, Tuple, Union
  5 | 
  6 | import tifffile as tif
  7 | 
  8 | 
  9 | def path_to_str(path: Path):
 10 |     return str(path.absolute().as_posix())
 11 | 
 12 | 
 13 | def sort_dict(item: dict):
 14 |     return {k: sort_dict(v) if isinstance(v, dict) else v for k, v in sorted(item.items())}
 15 | 
 16 | 
 17 | def alpha_num_order(string: str) -> str:
 18 |     """Returns all numbers on 5 digits to let sort the string with numeric order.
 19 |     Ex: alphaNumOrder("a6b12.125")  ==> "a00006b00012.00125"
 20 |     """
 21 |     return "".join(
 22 |         [format(int(x), "05d") if x.isdigit() else x for x in re.split(r"(\d+)", string)]
 23 |     )
 24 | 
 25 | 
 26 | def get_img_listing(in_dir: Path) -> List[Path]:
 27 |     allowed_extensions = (".tif", ".tiff")
 28 |     listing = list(in_dir.iterdir())
 29 |     img_listing = [f for f in listing if f.suffix in allowed_extensions]
 30 |     img_listing = sorted(img_listing, key=lambda x: alpha_num_order(x.name))
 31 |     return img_listing
 32 | 
 33 | 
 34 | def extract_digits_from_string(string: str) -> List[int]:
 35 |     digits = [
 36 |         int(x) for x in re.split(r"(\d+)", string) if x.isdigit()
 37 |     ]  # '1_00001_Z02_CH3' -> '1', '00001', '02', '3' -> [1,1,2,3]
 38 |     return digits
 39 | 
 40 | 
 41 | def arrange_listing_by_channel_tile_zplane(
 42 |     listing: List[Path],
 43 | ) -> Dict[int, Dict[int, Dict[int, Path]]]:
 44 |     tile_arrangement = dict()
 45 |     for file_path in listing:
 46 |         digits = extract_digits_from_string(file_path.name)
 47 |         tile = digits[1]
 48 |         zplane = digits[2]
 49 |         if len(digits) < 4:
 50 |             # Overlay image
 51 |             continue
 52 |         channel = digits[3]
 53 |         if channel in tile_arrangement:
 54 |             if tile in tile_arrangement[channel]:
 55 |                 tile_arrangement[channel][tile].update({zplane: file_path})
 56 |             else:
 57 |                 tile_arrangement[channel][tile] = {zplane: file_path}
 58 |         else:
 59 |             tile_arrangement[channel] = {tile: {zplane: file_path}}
 60 |     return tile_arrangement
 61 | 
 62 | 
 63 | def get_image_paths_arranged_in_dict(img_dir: Path) -> Dict[int, Dict[int, Dict[int, Path]]]:
 64 |     img_listing = get_img_listing(img_dir)
 65 |     arranged_listing = arrange_listing_by_channel_tile_zplane(img_listing)
 66 |     return arranged_listing
 67 | 
 68 | 
 69 | def extract_cycle_and_region_from_name(
 70 |     dir_name: str, cycle_prefix: str, region_prefix: str
 71 | ) -> Tuple[Union[None, int], Union[None, int]]:
 72 |     matched_region = re.search(region_prefix, dir_name, re.IGNORECASE) is not None
 73 |     matched_cycle = re.search(cycle_prefix, dir_name, re.IGNORECASE) is not None
 74 |     if matched_region:
 75 |         region_pattern = region_prefix + r"(\d+)"
 76 |         region = int(re.search(region_pattern, dir_name, re.IGNORECASE).groups()[0])
 77 |     else:
 78 |         return None, None
 79 |     if matched_cycle:
 80 |         cycle_pattern = cycle_prefix + r"(\d+)"
 81 |         cycle = int(re.search(cycle_pattern, dir_name, re.IGNORECASE).groups()[0])
 82 |     else:
 83 |         return None, None
 84 |     return cycle, region
 85 | 
 86 | 
 87 | def arrange_dirs_by_cycle_region(
 88 |     img_dirs: List[Path], cycle_prefix: str, region_prefix: str
 89 | ) -> Dict[int, Dict[int, Path]]:
 90 |     cycle_region_dict = dict()
 91 |     for dir_path in img_dirs:
 92 |         dir_name = dir_path.name
 93 |         cycle, region = extract_cycle_and_region_from_name(
 94 |             str(dir_name), cycle_prefix, region_prefix
 95 |         )
 96 |         if cycle is not None:
 97 |             if cycle in cycle_region_dict:
 98 |                 cycle_region_dict[cycle][region] = dir_path
 99 |             else:
100 |                 cycle_region_dict[cycle] = {region: dir_path}
101 |     if cycle_region_dict != {}:
102 |         return cycle_region_dict
103 |     else:
104 |         raise ValueError("Could not find cycle and region directories")
105 | 
106 | 
107 | def create_listing_for_each_cycle_region(
108 |     img_dirs: List[Path],
109 | ) -> Dict[int, Dict[int, Dict[int, Dict[int, Dict[int, Path]]]]]:
110 |     """Returns {cycle: {region: {channel: {tile: {zplane: path}}}}}"""
111 |     listing_per_cycle = dict()
112 |     # Expected dir names Cyc1_reg1 or Cyc01_reg01
113 |     cycle_prefix = "cyc"
114 |     region_prefix = "reg"
115 |     cycle_region_dict = arrange_dirs_by_cycle_region(img_dirs, cycle_prefix, region_prefix)
116 |     for cycle, regions in cycle_region_dict.items():
117 |         listing_per_cycle[cycle] = dict()
118 |         for region, dir_path in regions.items():
119 |             arranged_listing = get_image_paths_arranged_in_dict(dir_path)
120 |             listing_per_cycle[cycle][region] = arranged_listing
121 |     sorted_listing = sort_dict(listing_per_cycle)
122 |     return sorted_listing
123 | 
124 | 
125 | def get_img_dirs(dataset_dir: Path) -> List[Path]:
126 |     img_dir_names = next(walk(dataset_dir))[1]
127 |     img_dir_paths = [dataset_dir.joinpath(dir_name) for dir_name in img_dir_names]
128 |     return img_dir_paths
129 | 
130 | 
131 | def get_tile_shape(dataset_dir: Path):
132 |     img_dirs = get_img_dirs(dataset_dir)
133 |     dataset_listing = create_listing_for_each_cycle_region(img_dirs)
134 |     for cycle in dataset_listing:
135 |         for region in dataset_listing[cycle]:
136 |             for channel in dataset_listing[cycle][region]:
137 |                 for tile, zplanes in dataset_listing[cycle][region][channel].items():
138 |                     first_plane = list(zplanes.values())[0]
139 |                     plane = tif.imread(path_to_str(first_plane))
140 |                     return plane.shape
141 | 
142 | 
143 | def get_tile_dtype(dataset_dir: Path):
144 |     img_dirs = get_img_dirs(dataset_dir)
145 |     dataset_listing = create_listing_for_each_cycle_region(img_dirs)
146 |     for cycle in dataset_listing:
147 |         for region in dataset_listing[cycle]:
148 |             for channel in dataset_listing[cycle][region]:
149 |                 for tile, zplanes in dataset_listing[cycle][region][channel].items():
150 |                     first_plane = list(zplanes.values())[0]
151 |                     plane = tif.imread(path_to_str(first_plane))
152 |                     return plane.dtype
153 | 


--------------------------------------------------------------------------------
/bin/pipeline_utils/pipeline_config_reader.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from pathlib import Path
 3 | from typing import Any, Dict, List, Optional, Tuple
 4 | 
 5 | 
 6 | def load_pipeline_config(pipeline_config_path: Path) -> dict:
 7 |     with open(pipeline_config_path, "r") as s:
 8 |         config = json.load(s)
 9 |     return config
10 | 
11 | 
12 | def _convert_tiling_mode(tiling_mode: str):
13 |     if "snake" in tiling_mode.lower():
14 |         new_tiling_mode = "snake"
15 |     elif "grid" in tiling_mode.lower():
16 |         new_tiling_mode = "grid"
17 |     else:
18 |         raise ValueError("Unknown tiling mode: " + tiling_mode)
19 |     return new_tiling_mode
20 | 
21 | 
22 | def _get_dataset_info_from_config(pipeline_config: dict) -> Dict[str, Any]:
23 |     required_fields: List[Tuple[str, Optional[str]]] = [
24 |         ("num_cycles", None),
25 |         ("num_tiles_x", "region_width"),
26 |         ("num_tiles_y", "region_height"),
27 |         ("tile_width", None),
28 |         ("tile_height", None),
29 |         ("tile_dtype", None),
30 |         ("overlap_x", "tile_overlap_x"),
31 |         ("overlap_y", "tile_overlap_y"),
32 |         ("pixel_distance_x", "lateral_resolution"),
33 |         ("pixel_distance_y", "lateral_resolution"),
34 |         ("pixel_distance_z", "axial_resolution"),
35 |         ("nuclei_channel", None),
36 |         ("membrane_channel", None),
37 |         ("nuclei_channel_loc", None),
38 |         ("membrane_channel_loc", None),
39 |         ("num_z_planes", None),
40 |         ("channel_names", None),
41 |         ("channel_names_qc_pass", None),
42 |         ("num_concurrent_tasks", None),
43 |         ("lateral_resolution", None),
44 |     ]
45 |     optional_fields: List[Tuple[str, Optional[str]]] = [
46 |         ("membrane_channel", None),
47 |     ]
48 |     pipeline_config_dict = dict(
49 |         dataset_dir=Path(pipeline_config["raw_data_location"]),
50 |         num_channels=len(pipeline_config["channel_names"]) // pipeline_config["num_cycles"],
51 |         num_tiles=pipeline_config["region_width"] * pipeline_config["region_height"],
52 |         # does not matter because we have only one z-plane:
53 |         overlap_z=1,
54 |         # id of nuclei channel:
55 |         reference_channel=pipeline_config["channel_names"].index(pipeline_config["nuclei_channel"])
56 |         + 1,
57 |         reference_cycle=pipeline_config["channel_names"].index(pipeline_config["nuclei_channel"])
58 |         // (len(pipeline_config["channel_names"]) // pipeline_config["num_cycles"])
59 |         + 1,
60 |         tiling_mode=_convert_tiling_mode(pipeline_config["tiling_mode"]),
61 |     )
62 |     for field, source in required_fields:
63 |         if source is None:
64 |             source = field
65 |         pipeline_config_dict[field] = pipeline_config[source]
66 |     for field, source in optional_fields:
67 |         if source is None:
68 |             source = field
69 |         if source in pipeline_config:
70 |             pipeline_config_dict[field] = pipeline_config[source]
71 |     return pipeline_config_dict
72 | 
73 | 
74 | def load_dataset_info(pipeline_config_path: Path):
75 |     config = load_pipeline_config(pipeline_config_path)
76 |     dataset_info = _get_dataset_info_from_config(config)
77 |     return dataset_info
78 | 


--------------------------------------------------------------------------------
/bin/slicing/modify_pipeline_config.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from pathlib import Path
 3 | from typing import Tuple
 4 | 
 5 | 
 6 | def generate_slicer_info(
 7 |     tile_shape_no_overlap: Tuple[int, int], overlap: int, stitched_img_shape: Tuple[int, int]
 8 | ) -> dict:
 9 |     slicer_info = dict()
10 |     slicer_info["slicer"] = dict()
11 | 
12 |     img_height, img_width = stitched_img_shape
13 |     tile_height, tile_width = tile_shape_no_overlap
14 | 
15 |     padding = dict(left=0, right=0, top=0, bottom=0)
16 |     if img_width % tile_width == 0:
17 |         padding["right"] = 0
18 |     else:
19 |         padding["right"] = tile_width - (img_width % tile_width)
20 |     if img_height % tile_height == 0:
21 |         padding["bottom"] = 0
22 |     else:
23 |         padding["bottom"] = tile_height - (img_height % tile_height)
24 | 
25 |     x_ntiles = (
26 |         img_width // tile_width if img_width % tile_width == 0 else (img_width // tile_width) + 1
27 |     )
28 |     y_ntiles = (
29 |         img_height // tile_height
30 |         if img_height % tile_height == 0
31 |         else (img_height // tile_height) + 1
32 |     )
33 | 
34 |     slicer_info["slicer"]["padding"] = padding
35 |     slicer_info["slicer"]["overlap"] = overlap
36 |     slicer_info["slicer"]["num_tiles"] = {"x": x_ntiles, "y": y_ntiles}
37 |     slicer_info["slicer"]["tile_shape_no_overlap"] = {"x": tile_width, "y": tile_height}
38 |     slicer_info["slicer"]["tile_shape_with_overlap"] = {
39 |         "x": tile_width + overlap * 2,
40 |         "y": tile_height + overlap * 2,
41 |     }
42 |     return slicer_info
43 | 
44 | 
45 | def replace_values_in_config(exp, slicer_info):
46 |     original_measurements = {
47 |         "original_measurements": {
48 |             "tiling_mode": exp["tiling_mode"],
49 |             "region_width": exp["region_width"],
50 |             "region_height": exp["region_height"],
51 |             "num_z_planes": exp["num_z_planes"],
52 |             "tile_width": exp["tile_width"],
53 |             "tile_height": exp["tile_height"],
54 |             "tile_overlap_x": exp["tile_overlap_x"],
55 |             "tile_overlap_y": exp["tile_overlap_y"],
56 |             "target_shape": exp["target_shape"],
57 |         }
58 |     }
59 |     values_to_replace = {
60 |         "tiling_mode": "grid",
61 |         "region_width": slicer_info["slicer"]["num_tiles"]["x"],
62 |         "region_height": slicer_info["slicer"]["num_tiles"]["y"],
63 |         "num_z_planes": 1,
64 |         "tile_width": slicer_info["slicer"]["tile_shape_no_overlap"]["x"],
65 |         "tile_height": slicer_info["slicer"]["tile_shape_no_overlap"]["y"],
66 |         "tile_overlap_x": slicer_info["slicer"]["overlap"] * 2,
67 |         "tile_overlap_y": slicer_info["slicer"]["overlap"] * 2,
68 |         "target_shape": [
69 |             slicer_info["slicer"]["tile_shape_no_overlap"]["x"],
70 |             slicer_info["slicer"]["tile_shape_no_overlap"]["y"],
71 |         ],
72 |     }
73 | 
74 |     exp.update(values_to_replace)
75 |     exp.update(original_measurements)
76 |     return exp
77 | 
78 | 
79 | def modify_pipeline_config(
80 |     path_to_config: Path,
81 |     tile_shape_no_overlap: Tuple[int, int],
82 |     overlap: int,
83 |     stitched_img_shape: Tuple[int, int],
84 | ):
85 |     with open(path_to_config, "r") as s:
86 |         config = json.load(s)
87 | 
88 |     slicer_info = generate_slicer_info(tile_shape_no_overlap, overlap, stitched_img_shape)
89 |     config = replace_values_in_config(config, slicer_info)
90 |     config.update(slicer_info)
91 | 
92 |     return config
93 | 
94 | 
95 | def save_modified_pipeline_config(pipeline_config: dict, out_dir: Path):
96 |     out_file_path = out_dir.joinpath("pipelineConfig.json")
97 |     with open(out_file_path, "w") as s:
98 |         json.dump(pipeline_config, s, indent=4)
99 | 


--------------------------------------------------------------------------------
/bin/slicing/run_slicing.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import re
  3 | from pathlib import Path
  4 | from typing import Dict, Tuple
  5 | 
  6 | import tifffile as tif
  7 | from modify_pipeline_config import modify_pipeline_config, save_modified_pipeline_config
  8 | from slicer import slice_img
  9 | 
 10 | 
 11 | def path_to_str(path: Path):
 12 |     return str(path.absolute().as_posix())
 13 | 
 14 | 
 15 | def path_to_dict(path: Path):
 16 |     """
 17 |     Extract region, x position, y position and put into the dictionary
 18 |     {R:region, X: position, Y: position, path: path}
 19 |     """
 20 |     value_list = re.split(r"(\d+)(?:_?)", path.name)[:-1]
 21 |     d = dict(zip(*[iter(value_list)] * 2))
 22 |     d = {k: int(v) for k, v in d.items()}
 23 |     d.update({"path": path})
 24 |     return d
 25 | 
 26 | 
 27 | def make_dir_if_not_exists(dir_path: Path):
 28 |     if not dir_path.exists():
 29 |         dir_path.mkdir(parents=True)
 30 | 
 31 | 
 32 | def get_image_path_in_dir(dir_path: Path) -> Path:
 33 |     allowed_extensions = (".tif", ".tiff")
 34 |     listing = list(dir_path.iterdir())
 35 |     img_listing = [f for f in listing if f.suffix in allowed_extensions]
 36 |     return img_listing[0]
 37 | 
 38 | 
 39 | def get_stitched_image_shape(
 40 |     stitched_dirs: Dict[int, Dict[int, Dict[int, Path]]],
 41 | ) -> Tuple[int, int]:
 42 |     for cycle in stitched_dirs:
 43 |         for region in stitched_dirs[cycle]:
 44 |             for channel, dir_path in stitched_dirs[cycle][region].items():
 45 |                 stitched_img_path = get_image_path_in_dir(dir_path)
 46 |                 break
 47 |     with tif.TiffFile(stitched_img_path) as TF:
 48 |         stitched_image_shape = TF.series[0].shape
 49 |     return stitched_image_shape
 50 | 
 51 | 
 52 | def create_output_dirs_for_tiles(
 53 |     stitched_channel_dirs: Dict[int, Dict[int, Dict[int, Path]]], out_dir: Path
 54 | ) -> Dict[int, Dict[int, Path]]:
 55 |     dir_naming_template = "Cyc{cycle:d}_reg{region:d}"
 56 |     out_dirs_for_tiles = dict()
 57 |     for cycle in stitched_channel_dirs:
 58 |         out_dirs_for_tiles[cycle] = {}
 59 |         for region in stitched_channel_dirs[cycle]:
 60 |             out_dir_name = dir_naming_template.format(cycle=cycle, region=region)
 61 |             out_dir_path = out_dir / out_dir_name
 62 |             make_dir_if_not_exists(out_dir_path)
 63 |             out_dirs_for_tiles[cycle][region] = out_dir_path
 64 |     return out_dirs_for_tiles
 65 | 
 66 | 
 67 | def split_channels_into_tiles(
 68 |     stitched_dirs: Dict[int, Dict[int, Dict[int, Path]]],
 69 |     out_dirs_for_tiles: Dict[int, Dict[int, Path]],
 70 |     tile_size=1000,
 71 |     overlap=50,
 72 | ):
 73 |     for cycle in stitched_dirs:
 74 |         for region in stitched_dirs[cycle]:
 75 |             for channel, dir_path in stitched_dirs[cycle][region].items():
 76 |                 stitched_image_path = get_image_path_in_dir(dir_path)
 77 |                 print(stitched_image_path.name)
 78 |                 out_dir = out_dirs_for_tiles[cycle][region]
 79 |                 slice_img(
 80 |                     path_to_str(stitched_image_path),
 81 |                     path_to_str(out_dir),
 82 |                     tile_size=tile_size,
 83 |                     overlap=overlap,
 84 |                     region=region,
 85 |                     zplane=1,
 86 |                     channel=channel,
 87 |                 )
 88 | 
 89 | 
 90 | def organize_dirs(base_stitched_dir: Path) -> Dict[int, Dict[int, Dict[int, Path]]]:
 91 |     stitched_channel_dirs = list(base_stitched_dir.iterdir())
 92 |     # expected dir naming Cyc{cyc:03d}_Reg{reg:03d}_Ch{ch:03d}
 93 |     stitched_dirs = dict()
 94 |     for dir_path in stitched_channel_dirs:
 95 |         name_info = path_to_dict(dir_path)
 96 |         cycle = name_info["Cyc"]
 97 |         region = name_info["Reg"]
 98 |         channel = name_info["Ch"]
 99 | 
100 |         if cycle in stitched_dirs:
101 |             if region in stitched_dirs[cycle]:
102 |                 stitched_dirs[cycle][region][channel] = dir_path
103 |             else:
104 |                 stitched_dirs[cycle][region] = {channel: dir_path}
105 |         else:
106 |             stitched_dirs[cycle] = {region: {channel: dir_path}}
107 |     return stitched_dirs
108 | 
109 | 
110 | def main(base_stitched_dir: Path, pipeline_config_path: Path):
111 |     out_dir = Path("/output/new_tiles")
112 |     pipeline_conf_dir = Path("/output/pipeline_conf/")
113 |     make_dir_if_not_exists(out_dir)
114 |     make_dir_if_not_exists(pipeline_conf_dir)
115 | 
116 |     stitched_channel_dirs = organize_dirs(base_stitched_dir)
117 |     out_dirs_for_tiles = create_output_dirs_for_tiles(stitched_channel_dirs, out_dir)
118 | 
119 |     stitched_img_shape = get_stitched_image_shape(stitched_channel_dirs)
120 | 
121 |     tile_size = 1000
122 |     overlap = 100
123 |     print("Splitting images into tiles")
124 |     print("Tile size:", tile_size, "| overlap:", overlap)
125 |     split_channels_into_tiles(stitched_channel_dirs, out_dirs_for_tiles, tile_size, overlap)
126 | 
127 |     modified_experiment = modify_pipeline_config(
128 |         pipeline_config_path, (tile_size, tile_size), overlap, stitched_img_shape
129 |     )
130 |     save_modified_pipeline_config(modified_experiment, pipeline_conf_dir)
131 | 
132 | 
133 | if __name__ == "__main__":
134 |     parser = argparse.ArgumentParser()
135 |     parser.add_argument(
136 |         "--base_stitched_dir",
137 |         type=Path,
138 |         help="path to directory with directories per channel that contain stitched images",
139 |     )
140 |     parser.add_argument(
141 |         "--pipeline_config_path", type=Path, help="path to pipelineConfig.json file"
142 |     )
143 | 
144 |     args = parser.parse_args()
145 | 
146 |     main(args.base_stitched_dir, args.pipeline_config_path)
147 | 


--------------------------------------------------------------------------------
/bin/slicing/slicer.py:
--------------------------------------------------------------------------------
  1 | import os.path as osp
  2 | 
  3 | import dask
  4 | import numpy as np
  5 | import tifffile as tif
  6 | 
  7 | 
  8 | def get_tile(arr, hor_f: int, hor_t: int, ver_f: int, ver_t: int, overlap=0):
  9 |     hor_f -= overlap
 10 |     hor_t += overlap
 11 |     ver_f -= overlap
 12 |     ver_t += overlap
 13 | 
 14 |     left_check = hor_f
 15 |     top_check = ver_f
 16 |     right_check = hor_t - arr.shape[1]
 17 |     bot_check = ver_t - arr.shape[0]
 18 | 
 19 |     left_pad_size = 0
 20 |     top_pad_size = 0
 21 |     right_pad_size = 0
 22 |     bot_pad_size = 0
 23 | 
 24 |     if left_check < 0:
 25 |         left_pad_size = abs(left_check)
 26 |         hor_f = 0
 27 |     if top_check < 0:
 28 |         top_pad_size = abs(top_check)
 29 |         ver_f = 0
 30 |     if right_check > 0:
 31 |         right_pad_size = right_check
 32 |         hor_t = arr.shape[1]
 33 |     if bot_check > 0:
 34 |         bot_pad_size = bot_check
 35 |         ver_t = arr.shape[0]
 36 | 
 37 |     tile_slice = (slice(ver_f, ver_t), slice(hor_f, hor_t))
 38 |     tile = arr[tile_slice]
 39 |     padding = ((top_pad_size, bot_pad_size), (left_pad_size, right_pad_size))
 40 |     if max(padding) > (0, 0):
 41 |         tile = np.pad(tile, padding, mode="constant")
 42 |     return tile
 43 | 
 44 | 
 45 | def split_by_size(
 46 |     arr: np.ndarray, region: int, zplane: int, channel: int, tile_w: int, tile_h: int, overlap: int
 47 | ):
 48 |     """Splits image into tiles by size of tile.
 49 |     tile_w - tile width
 50 |     tile_h - tile height
 51 |     """
 52 |     x_axis = -1
 53 |     y_axis = -2
 54 |     arr_width, arr_height = arr.shape[x_axis], arr.shape[y_axis]
 55 | 
 56 |     x_ntiles = arr_width // tile_w if arr_width % tile_w == 0 else (arr_width // tile_w) + 1
 57 |     y_ntiles = arr_height // tile_h if arr_height % tile_h == 0 else (arr_height // tile_h) + 1
 58 | 
 59 |     tiles = []
 60 |     img_names = []
 61 | 
 62 |     # row
 63 |     for i in range(0, y_ntiles):
 64 |         # height of this tile
 65 |         ver_f = tile_h * i
 66 |         ver_t = ver_f + tile_h
 67 | 
 68 |         # col
 69 |         for j in range(0, x_ntiles):
 70 |             # width of this tile
 71 |             hor_f = tile_w * j
 72 |             hor_t = hor_f + tile_w
 73 | 
 74 |             tile = get_tile(arr, hor_f, hor_t, ver_f, ver_t, overlap)
 75 | 
 76 |             tiles.append(tile)
 77 |             name = "{region:d}_{tile:05d}_Z{zplane:03d}_CH{channel:d}.tif".format(
 78 |                 region=region, tile=(i * x_ntiles) + (j + 1), zplane=zplane, channel=channel
 79 |             )
 80 |             img_names.append(name)
 81 | 
 82 |     return tiles, img_names
 83 | 
 84 | 
 85 | def slice_img(
 86 |     in_path: str,
 87 |     out_dir: str,
 88 |     tile_size: int,
 89 |     overlap: int,
 90 |     region: int,
 91 |     channel: int,
 92 |     zplane: int,
 93 | ):
 94 |     this_plane_tiles, this_plane_img_names = split_by_size(
 95 |         tif.imread(in_path),
 96 |         region=region,
 97 |         zplane=zplane,
 98 |         channel=channel,
 99 |         tile_w=tile_size,
100 |         tile_h=tile_size,
101 |         overlap=overlap,
102 |     )
103 | 
104 |     task = []
105 |     for i, img in enumerate(this_plane_tiles):
106 |         task.append(
107 |             dask.delayed(tif.imwrite)(
108 |                 osp.join(out_dir, this_plane_img_names[i]),
109 |                 img,
110 |                 photometric="minisblack",
111 |                 shaped=False,
112 |             )
113 |         )
114 | 
115 |     dask.compute(*task, scheduler="threads")
116 | 


--------------------------------------------------------------------------------
/bin/utils.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | from collections import defaultdict
 3 | from os import walk
 4 | from pathlib import Path
 5 | from pprint import pformat
 6 | from typing import Dict, List
 7 | 
 8 | import yaml
 9 | 
10 | 
11 | def list_directory_tree(directory: Path) -> str:
12 |     return pformat(sorted(directory.glob("**/*"))) + "\n"
13 | 
14 | 
15 | def print_directory_tree(directory: Path):
16 |     print(list_directory_tree(directory))
17 | 
18 | 
19 | def infer_tile_names(cytokit_config_filename: Path) -> List[str]:
20 |     with open(cytokit_config_filename) as cytokit_config_file:
21 |         cytokit_config = yaml.safe_load(cytokit_config_file)
22 | 
23 |     tile_names = []
24 | 
25 |     region_height, region_width = (
26 |         cytokit_config["acquisition"]["region_height"],
27 |         cytokit_config["acquisition"]["region_width"],
28 |     )
29 |     region_names = cytokit_config["acquisition"]["region_names"]
30 | 
31 |     for r in range(1, len(region_names) + 1):
32 |         # Width is X values, height is Y values.
33 |         for x in range(1, region_width + 1):
34 |             for y in range(1, region_height + 1):
35 |                 tile_names.append(f"R{r:03}_X{x:03}_Y{y:03}")
36 | 
37 |     return tile_names
38 | 
39 | 
40 | def collect_files_by_tile(
41 |     tile_names: List[str],
42 |     directory: Path,
43 |     *,
44 |     allow_empty_tiles: bool = False,
45 | ) -> Dict[str, List[Path]]:
46 |     files_by_tile: Dict[str, List[Path]] = defaultdict(list)
47 | 
48 |     for tile in tile_names:
49 |         tile_name_pattern = re.compile(tile)
50 | 
51 |         for dirpath_str, dirnames, filenames in walk(directory):
52 |             dirpath = Path(dirpath_str)
53 |             for filename in filenames:
54 |                 if tile_name_pattern.match(filename):
55 |                     files_by_tile[tile].append(dirpath / filename)
56 | 
57 |     # If a tile doesn't have any files, throw an error unless explicitly allowed.
58 |     if not allow_empty_tiles:
59 |         for tile in tile_names:
60 |             if len(files_by_tile[tile]) == 0:
61 |                 raise ValueError(f"No files were found for tile {tile}")
62 | 
63 |     return files_by_tile
64 | 


--------------------------------------------------------------------------------
/cytokit-docker/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM eczech/cytokit:latest
 2 | 
 3 | WORKDIR /lab/data/.cytokit
 4 | RUN curl https://s3.amazonaws.com/hubmap-pipeline-assets/cytokit-cache.tar | tar -xf -
 5 | 
 6 | WORKDIR /opt
 7 | 
 8 | # Update tensorflow-gpu to version 1.14
 9 | RUN pip install --upgrade tensorflow-gpu==1.14.0
10 | 
11 | COPY setup_data_directory.py /opt
12 | COPY cytokit_wrapper.py /opt
13 | 


--------------------------------------------------------------------------------
/cytokit-docker/cytokit_wrapper.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3.5
  2 | # Note: this version ^^^ is what's available in the Cytokit image
  3 | # and our extension. No f-strings or PEP 519.
  4 | 
  5 | from argparse import ArgumentParser
  6 | from os import environ
  7 | from os.path import split as osps
  8 | from pathlib import Path
  9 | from subprocess import check_call
 10 | 
 11 | import yaml
 12 | 
 13 | # TODO ↓↓↓ unify this script with setting up the data directory
 14 | #  instead of calling this script as a separate executable
 15 | SETUP_DATA_DIR_COMMAND = [
 16 |     "/opt/setup_data_directory.py",
 17 |     "{data_dir}",
 18 | ]
 19 | CYTOKIT_COMMAND = [
 20 |     "cytokit",
 21 |     "{command}",
 22 |     "run_all",
 23 |     "--config-path={yaml_config}",
 24 |     "--data-dir={data_dir}",
 25 |     "--output-dir=output",
 26 | ]
 27 | 
 28 | CYTOKIT_PROCESSOR_OUTPUT_DIRS = frozenset({"cytometry", "processor"})
 29 | 
 30 | 
 31 | def symlink_images(data_dir: Path):
 32 |     # TODO: unify, don't call another command-line script
 33 |     command = [piece.format(data_dir=data_dir) for piece in SETUP_DATA_DIR_COMMAND]
 34 |     print("Running:", " ".join(command))
 35 |     check_call(command)
 36 | 
 37 | 
 38 | def find_cytokit_processor_output_r(directory: Path):
 39 |     """
 40 |     BIG HACK for step-by-step CWL usage -- walk parent directories until
 41 |     we find one containing 'cytometry' and 'processor'
 42 |     """
 43 |     child_names = {c.name for c in directory.iterdir()}
 44 |     if CYTOKIT_PROCESSOR_OUTPUT_DIRS <= child_names:
 45 |         return directory
 46 |     else:
 47 |         abs_dir = directory.absolute()
 48 |         parent = abs_dir.parent
 49 |         if parent == abs_dir:
 50 |             # At the root. No data found.
 51 |             return
 52 |         else:
 53 |             return find_cytokit_processor_output_r(parent)
 54 | 
 55 | 
 56 | def find_cytokit_processor_output(directory: Path) -> Path:
 57 |     data_dir = find_cytokit_processor_output_r(directory)
 58 |     if data_dir is None:
 59 |         message = "No `cytokit processor` output found in {} or any parent directories"
 60 |         raise ValueError(message.format(directory))
 61 |     else:
 62 |         return data_dir
 63 | 
 64 | 
 65 | def find_or_prep_data_directory(cytokit_command: str, data_dir: Path, pipeline_config: Path):
 66 |     """
 67 |     :return: 2-tuple: pathlib.Path to data directory, either original or
 68 |      newly-created with symlinks
 69 |     """
 70 |     # Read directory name from pipeline config
 71 |     # Python 3.6 would be much nicer ,but the Cytokit image is built from
 72 |     # Ubuntu 16.04, which comes with 3.5
 73 |     with pipeline_config.open() as f:
 74 |         config = yaml.safe_load(f)
 75 |     dir_name = osps(config["raw_data_location"])[1]
 76 | 
 77 |     data_subdir = data_dir / dir_name
 78 | 
 79 |     if cytokit_command == "processor":
 80 |         symlink_images(data_subdir)
 81 |         return Path("symlinks")
 82 |     elif cytokit_command == "operator":
 83 |         # Need to find the output from 'cytokit processor'
 84 |         processor_dir = find_cytokit_processor_output(data_dir)
 85 |         output_path = Path("output")
 86 |         output_path.mkdir()
 87 |         for child in processor_dir.iterdir():
 88 |             link = output_path / child.name
 89 |             print("Symlinking", child, "to", link)
 90 |             link.symlink_to(child)
 91 |         return output_path
 92 |     else:
 93 |         raise ValueError('Unsupported Cytokit command: "{}"'.format(cytokit_command))
 94 | 
 95 | 
 96 | def run_cytokit(cytokit_command: str, data_directory: Path, yaml_config: Path):
 97 |     command = [
 98 |         piece.format(
 99 |             command=cytokit_command,
100 |             data_dir=data_directory,
101 |             yaml_config=yaml_config,
102 |         )
103 |         for piece in CYTOKIT_COMMAND
104 |     ]
105 |     print("Running:", " ".join(command))
106 |     env = environ.copy()
107 |     env["PYTHONPATH"] = "/lab/repos/cytokit/python/pipeline"
108 |     check_call(command, env=env)
109 | 
110 |     print("Cytokit completed successfully")
111 |     # I feel really bad about this, but not bad enough not to do it
112 |     if cytokit_command == "operator":
113 |         output_dir = Path("output")
114 |         for dirname in CYTOKIT_PROCESSOR_OUTPUT_DIRS:
115 |             dir_to_delete = output_dir / dirname
116 |             print("Deleting", dir_to_delete)
117 |             dir_to_delete.unlink()
118 | 
119 | 
120 | def main(cytokit_command: str, data_dir: Path, pipeline_config: Path, yaml_config: Path):
121 |     data_dir = find_or_prep_data_directory(cytokit_command, data_dir, pipeline_config)
122 |     run_cytokit(cytokit_command, data_dir, yaml_config)
123 | 
124 | 
125 | if __name__ == "__main__":
126 |     p = ArgumentParser()
127 |     p.add_argument("cytokit_command")
128 |     p.add_argument("data_dir", type=Path)
129 |     p.add_argument("pipeline_config", type=Path)
130 |     p.add_argument("yaml_config", type=Path)
131 |     args = p.parse_args()
132 | 
133 |     main(args.cytokit_command, args.data_dir, args.pipeline_config, args.yaml_config)
134 | 


--------------------------------------------------------------------------------
/cytokit-docker/setup_data_directory.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | import argparse
  4 | import json
  5 | import logging
  6 | import os
  7 | import re
  8 | import stat
  9 | import sys
 10 | from pathlib import Path
 11 | 
 12 | logging.basicConfig(level=logging.INFO, format="%(levelname)-7s - %(message)s")
 13 | logger = logging.getLogger(__name__)
 14 | 
 15 | # Patterns for detecting raw data files are below.
 16 | # We follow Cytokit's "keyence_multi_cycle_v01" naming convention defined in:
 17 | # https://github.com/hammerlab/cytokit/blob/master/python/pipeline/cytokit/io.py
 18 | # Pattern for the directories containing the raw data from each cycle-region
 19 | # pair. Different submitters use different naming conventions (e.g.
 20 | # cyc001_reg001_191209_123455 or Cyc1_reg1), so our regex has to allow for this.
 21 | rawDirNamingPattern = re.compile(r"^cyc0*(\d+)_reg0*(\d+).*", re.IGNORECASE)
 22 | # Pattern for raw data TIFF files. These should be named according to the following pattern:
 23 | # <region index>_<tile index>_Z<z-plane index>_CH<channel index>.tif
 24 | # All indices start at 1.
 25 | # Tile index is padded to three digits, e.g. 00025, 00001, etc.
 26 | # Z-plane index is padded to three digits, e.g. 025, 001, etc.
 27 | # Region and channel indices are one digit each.
 28 | rawFileNamingPattern = re.compile(r"^\d_\d{5}_Z\d{3}_CH\d\.tif$")
 29 | # Pattern to match one single digit at the start of a string, used to replace
 30 | # incorrect region indices with the correct ones in some raw data TIFF files.
 31 | rawFileRegionPattern = re.compile(r"^\d")
 32 | 
 33 | 
 34 | def main(data_dir: str):
 35 |     ###################################################################
 36 |     # Inspect source directories and collect paths to raw data files. #
 37 |     ###################################################################
 38 | 
 39 |     # Ensure that source directory exists and is readable.
 40 |     st = os.stat(data_dir)
 41 |     readable = bool(st.st_mode & stat.S_IRUSR)
 42 |     if not readable:
 43 |         raise Exception(
 44 |             "Source directory {} is not readable by the current user.".format(data_dir)
 45 |         )
 46 | 
 47 |     # Get list of contents of source directory. This should contain a set of
 48 |     # subdirectories, one for each cycle-region pair.
 49 |     sourceDirList = os.listdir(data_dir)
 50 | 
 51 |     # Filter the contents list of the source directory for directories matching
 52 |     # the expected raw data directory naming pattern (cycle-region pairs).
 53 |     # Different submitters follow different naming conventions currently.
 54 |     sourceDataDirs = list(filter(rawDirNamingPattern.search, sourceDirList))
 55 |     # If there were no matching directories found, exit.
 56 |     if not sourceDataDirs:
 57 |         raise Exception(
 58 |             "No directories matching expected raw data directory naming pattern found in {}".format(
 59 |                 data_dir
 60 |             )
 61 |         )
 62 | 
 63 |     # Go through the cycle-region directories and get a list of the contents of
 64 |     # each one. Each cycle-region directory should contain TIFF files,
 65 |     # following the raw data file naming convention defined above.
 66 |     # Collect raw data file names in a dictionary, indexed by directory name.
 67 |     sourceDataFiles = {}
 68 |     for sdir in sourceDataDirs:
 69 |         fileList = os.listdir(os.path.join(data_dir, sdir))
 70 | 
 71 |         # Validate naming pattern of raw data files according to pattern
 72 |         # defined above.
 73 |         fileList = list(filter(rawFileNamingPattern.search, fileList))
 74 | 
 75 |         # Die if we didn't get any matching files.
 76 |         if not fileList:
 77 |             raise Exception(
 78 |                 "No files found matching expected raw file naming pattern in {}".format(sdir)
 79 |             )
 80 | 
 81 |         # Otherwise, collect the list of matching file names in the dictionary.
 82 |         else:
 83 |             sourceDataFiles[sdir] = fileList
 84 | 
 85 |     # Check that expected source data files are all present. We know, from the
 86 |     # pipeline config, the number of regions, cycles, z-planes, and channels, so we
 87 |     # should be able to verify that we have one file per channel, per z-plane,
 88 |     # per cycle, per region.
 89 | 
 90 |     # Since the files will have had to match the required naming pattern, we
 91 |     # know that they'll be named basically as expected. A simple check would be
 92 |     # to just count the number of files present and see if we have the expected
 93 |     # number for each region, cycle, and z-plane.
 94 | 
 95 |     # For each region, we should have num_cycles * (region_height * region_width ) * num_z_planes * len( per_cycle_channel_names ) files.
 96 |     # If we do, we could stop there? It's not a super rigorous check but we already know we have files named correctly...
 97 | 
 98 |     # If we don't, we can inspect each cycle. For each cycle, we should have ...
 99 | 
100 |     ######################################
101 |     # Start creating directories and links
102 |     ######################################
103 | 
104 |     targetDirectory = "symlinks"
105 | 
106 |     # Create target directory.
107 |     os.mkdir("symlinks")
108 |     logger.info("Cytokit data directory created at %s" % targetDirectory)
109 | 
110 |     for sdir in sourceDataFiles:
111 |         dirMatch = rawDirNamingPattern.match(sdir)
112 | 
113 |         cycle, region = dirMatch.group(1, 2)
114 | 
115 |         cycleRegionDir = os.path.join("symlinks", "Cyc" + cycle + "_reg" + region)
116 | 
117 |         os.mkdir(cycleRegionDir)
118 | 
119 |         # Create symlinks for TIFF files.
120 |         for tifFileName in sourceDataFiles[sdir]:
121 |             # Replace the region number at the start because sometimes it's wrong.
122 |             linkTifFileName = rawFileRegionPattern.sub(region, tifFileName)
123 | 
124 |             # Set up full path to symlink.
125 |             linkTifFilePath = os.path.join(cycleRegionDir, linkTifFileName)
126 | 
127 |             # Full path to source raw data file.
128 |             sourceTifFilePath = os.path.join(data_dir, sdir, tifFileName)
129 | 
130 |             # Create the symlink.
131 |             os.symlink(sourceTifFilePath, linkTifFilePath)
132 | 
133 |     logger.info("Links created in directories under %s" % targetDirectory)
134 | 
135 | 
136 | ########
137 | # MAIN #
138 | ########
139 | if __name__ == "__main__":
140 |     parser = argparse.ArgumentParser(
141 |         description="Create a directory and populate directory with directories containing symlinks to the raw image data."
142 |     )
143 |     parser.add_argument(
144 |         "data_dir",
145 |         help="Data directory",
146 |     )
147 | 
148 |     args = parser.parse_args()
149 | 
150 |     main(args.data_dir)
151 | 


--------------------------------------------------------------------------------
/docker_images.txt:
--------------------------------------------------------------------------------
1 | hubmap/fiji_bigstitcher	Dockerfile_fiji
2 | hubmap/codex-scripts	Dockerfile
3 | hubmap/cytokit	cytokit-docker/Dockerfile
4 | 


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
 1 | name: base
 2 | channels:
 3 |     - defaults
 4 |     - conda-forge
 5 | dependencies:
 6 |     - python>=3.8,<4
 7 |     - pip
 8 |     - numpy-base>=1.18
 9 |     - numpy>=1.18
10 |     - scipy>=1.4.0
11 |     - pandas>=0.25
12 |     - dask>=2.6.0
13 |     - imagecodecs==2023.9.18
14 |     - pip:
15 |         - tifffile>=2021.8.30,<2023.3.15
16 |         - PyYAML>=6.0.1
17 |         - aicsimageio==4.14.0
18 |         - lxml==4.9.3
19 |         - matplotlib>=3.2.1
20 |         - scikit-image>=0.17.2
21 |         - scikit-learn>=0.23.1
22 |         - shapely==2.0.1
23 |         - opencv-contrib-python-headless>4.0,<5.0
24 |         - pint==0.22
25 |         - jsonschema==4.19.0
26 | 


--------------------------------------------------------------------------------
/metadata_examples/channelnames.txt:
--------------------------------------------------------------------------------
 1 | DAPI-01
 2 | Blank
 3 | Blank
 4 | Blank
 5 | DAPI-02
 6 | CD31
 7 | CD8
 8 | Empty
 9 | DAPI-03
10 | CD20
11 | Ki67
12 | CD3e
13 | DAPI-04
14 | SMActin
15 | Podoplanin
16 | CD68
17 | DAPI-05
18 | PanCK
19 | CD21
20 | CD4
21 | DAPI-06
22 | Lyve1
23 | CD45RO
24 | CD11c
25 | DAPI-07
26 | CD35
27 | ECAD
28 | CD107a
29 | DAPI-08
30 | CD34
31 | CD44
32 | HLA-DR
33 | DAPI-09
34 | Empty
35 | FoxP3
36 | CD163
37 | DAPI-10
38 | Empty
39 | CollagenIV
40 | Vimentin
41 | DAPI-11
42 | Empty
43 | CD15
44 | CD45
45 | DAPI-12
46 | Empty
47 | CD5
48 | CD1c
49 | DAPI-13
50 | Blank
51 | Blank
52 | Blank
53 | 


--------------------------------------------------------------------------------
/metadata_examples/channelnames_report.csv:
--------------------------------------------------------------------------------
 1 | Marker,Result
 2 | DAPI-01,TRUE
 3 | Blank,TRUE
 4 | Blank,TRUE
 5 | Blank,TRUE
 6 | DAPI-02,TRUE
 7 | CD31,TRUE
 8 | CD8,TRUE
 9 | Empty,TRUE
10 | DAPI-03,TRUE
11 | CD20,TRUE
12 | Ki67,TRUE
13 | CD3e,TRUE
14 | DAPI-04,TRUE
15 | SMActin,TRUE
16 | Podoplanin,TRUE
17 | CD68,TRUE
18 | DAPI-05,TRUE
19 | PanCK,TRUE
20 | CD21,TRUE
21 | CD4,TRUE
22 | DAPI-06,TRUE
23 | Lyve1,TRUE
24 | CD45RO,TRUE
25 | CD11c,TRUE
26 | DAPI-07,TRUE
27 | CD35,TRUE
28 | ECAD,TRUE
29 | CD107a,TRUE
30 | DAPI-08,TRUE
31 | CD34,TRUE
32 | CD44,TRUE
33 | HLA-DR,TRUE
34 | DAPI-09,TRUE
35 | Empty,TRUE
36 | FoxP3,TRUE
37 | CD163,TRUE
38 | DAPI-10,TRUE
39 | Empty,TRUE
40 | CollagenIV,TRUE
41 | Vimentin,TRUE
42 | DAPI-11,TRUE
43 | Empty,TRUE
44 | CD15,TRUE
45 | CD45,TRUE
46 | DAPI-12,TRUE
47 | Empty,TRUE
48 | CD5,TRUE
49 | CD1c,TRUE
50 | DAPI-13,TRUE
51 | Blank,TRUE
52 | Blank,TRUE
53 | Blank,TRUE
54 | 


--------------------------------------------------------------------------------
/metadata_examples/experiment.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "version": "1.7.0.6",
 3 |   "name": "src_CX_19-002_CC2-spleen-A",
 4 |   "dateProcessed": "2020-02-10T16:01:15.357-05:00[America/New_York]",
 5 |   "objectiveType": "air",
 6 |   "magnification": 20,
 7 |   "aperture": 0.75,
 8 |   "xyResolution": 377.4463383838384,
 9 |   "zPitch": 1500.0,
10 |   "wavelengths": [
11 |     358,
12 |     488,
13 |     550,
14 |     650
15 |   ],
16 |   "bitDepth": 16,
17 |   "numRegions": 1,
18 |   "numCycles": 9,
19 |   "numZPlanes": 13,
20 |   "numChannels": 4,
21 |   "regionWidth": 9,
22 |   "regionHeight": 9,
23 |   "tileWidth": 1920,
24 |   "tileHeight": 1440,
25 |   "tileOverlapX": 0.3,
26 |   "tileOverlapY": 0.3,
27 |   "tilingMode": "EITHER SNAKE OR GRID",
28 |   "referenceCycle": 2,
29 |   "referenceChannel": 1,
30 |   "regIdx": [
31 |     1
32 |   ],
33 |   "cycle_lower_limit": 1,
34 |   "cycle_upper_limit": 9,
35 |   "num_z_planes": 1,
36 |   "region_width": 9,
37 |   "region_height": 9,
38 |   "tile_width": 1344,
39 |   "tile_height": 1008
40 | }
41 | 


--------------------------------------------------------------------------------
/metadata_examples/exposure_times.txt:
--------------------------------------------------------------------------------
 1 | Cycle,CH1,CH2,CH3,CH4
 2 | 1,10,500,350,500
 3 | 2,10,500,350,500
 4 | 3,10,500,350,500
 5 | 4,10,500,350,500
 6 | 5,10,500,350,500
 7 | 6,10,500,350,500
 8 | 7,10,500,350,500
 9 | 8,10,500,350,500
10 | 9,10,500,350,500
11 | 10,10,500,350,500
12 | 11,10,500,350,500
13 | 12,10,500,350,500
14 | 13,10,500,350,500


--------------------------------------------------------------------------------
/metadata_examples/segmentation.json:
--------------------------------------------------------------------------------
1 | {
2 |   "nuclearStainCycle": 2,
3 |   "nuclearStainChannel": 1,
4 |   "membraneStainCycle": 11,
5 |   "membraneStainChannel": 4
6 | }
7 | 


--------------------------------------------------------------------------------
/pipeline-manifest.json:
--------------------------------------------------------------------------------
 1 | [
 2 | 	{
 3 | 		"pattern": "experiment.json",
 4 | 		"description": "File containing Cytokit's calculations from deconvolution, drift compensation, and focal plan selection, in JSON format",
 5 | 		"edam_ontology_term": "EDAM_1.24.format_3464"
 6 | 	},
 7 | 	{
 8 | 		"pattern": "stitched/expressions/reg(?P<region>\\d+)_stitched_expressions\\.ome\\.tiff",
 9 | 		"description": "Cytokit expression output for region {region}, in OME-TIFF format",
10 | 		"edam_ontology_term": "EDAM_1.24.format_3727",
11 | 		"is_data_product": true
12 | 	},
13 | 	{
14 | 		"pattern": "stitched/mask/reg(?P<region>\\d+)_stitched_mask\\.ome\\.tiff",
15 | 		"description": "Segmentation mask for region {region}, in OME-TIFF format",
16 | 		"edam_ontology_term": "EDAM_1.24.format_3727",
17 | 		"is_data_product": true
18 | 	}
19 | ]
20 | 


--------------------------------------------------------------------------------
/pipeline.cwl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env cwl-runner
 2 | 
 3 | class: Workflow
 4 | cwlVersion: v1.1
 5 | label: CODEX analysis pipeline using Cytokit
 6 | 
 7 | requirements:
 8 |   SubworkflowFeatureRequirement: {}
 9 | 
10 | inputs:
11 |   data_dir:
12 |     label: "Directory containing CODEX data"
13 |     type: Directory
14 |   gpus:
15 |     label: "GPUs to use, represented as a comma-separated list of integers"
16 |     type: string
17 |     default: "0"
18 |   num_concurrent_tasks:
19 |     label: "Number of parallel CPU jobs"
20 |     type: int
21 |     default: 10
22 | 
23 | outputs:
24 |   experiment_config:
25 |     outputSource: illumination_first_stitching/cytokit_config
26 |     type: File
27 |     label: "Cytokit configuration format"
28 |   data_json:
29 |     outputSource: run_cytokit/data_json
30 |     type: File
31 |     label: "JSON file containing Cytokit's calculations from deconvolution, drift compensation, and focal plane selection"
32 |   stitched_images:
33 |     outputSource: ometiff_second_stitching/stitched_images
34 |     type: Directory
35 |     label: "Segmentation masks and expressions in OME-TIFF format"
36 |   pipeline_config:
37 |     outputSource: ometiff_second_stitching/final_pipeline_config
38 |     type: File
39 |     label: "Pipeline config with all the modifications"
40 | 
41 | steps:
42 |   illumination_first_stitching:
43 |     in:
44 |       data_dir:
45 |         source: data_dir
46 |       gpus:
47 |         source: gpus
48 |       num_concurrent_tasks:
49 |         source: num_concurrent_tasks
50 |     out:
51 |       - slicing_pipeline_config
52 |       - cytokit_config
53 |       - new_tiles
54 |     run: steps/illumination_first_stitching.cwl
55 |     label: "Illumination correction, best focus selection, and stitching stage 1"
56 | 
57 |   run_cytokit:
58 |     in:
59 |       data_dir:
60 |         source: illumination_first_stitching/new_tiles
61 |       yaml_config:
62 |         source: illumination_first_stitching/cytokit_config
63 |     out:
64 |       - cytokit_output
65 |       - data_json
66 |     run: steps/run_cytokit.cwl
67 |     label: "CODEX analysis via Cytokit processor and operator"
68 | 
69 |   ometiff_second_stitching:
70 |     in:
71 |       cytokit_output:
72 |         source: run_cytokit/cytokit_output
73 |       slicing_pipeline_config:
74 |         source: illumination_first_stitching/slicing_pipeline_config
75 |       cytokit_config:
76 |         source: illumination_first_stitching/cytokit_config
77 |       data_dir:
78 |         source: data_dir
79 |     out:
80 |       - stitched_images
81 |       - final_pipeline_config
82 |     run: steps/ometiff_second_stitching.cwl
83 |     label: "OMETIFF creation and stitching stage 2"
84 | 


--------------------------------------------------------------------------------
/pipeline_release_mgmt.yaml:
--------------------------------------------------------------------------------
1 | main_branch: main
2 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.black]
 2 | line-length = 99
 3 | exclude = 'sprm/'
 4 | 
 5 | [tool.isort]
 6 | profile = "black"
 7 | multi_line_output = 3
 8 | skip = 'sprm/'
 9 | src_paths = ["bin", "sprm"]
10 | 


--------------------------------------------------------------------------------
/requirements-test.txt:
--------------------------------------------------------------------------------
1 | black==22.10.0
2 | isort==5.10.1
3 | 


--------------------------------------------------------------------------------
/steps/illumination_first_stitching.cwl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env cwl-runner
 2 | 
 3 | class: Workflow
 4 | cwlVersion: v1.1
 5 | label: CODEX analysis pipeline using Cytokit
 6 | 
 7 | inputs:
 8 |   data_dir:
 9 |     label: "Directory containing CODEX data"
10 |     type: Directory
11 |   gpus:
12 |     label: "GPUs to use, represented as a comma-separated list of integers"
13 |     type: string
14 |     default: "0"
15 |   num_concurrent_tasks:
16 |     label: "Number of parallel CPU jobs"
17 |     type: int
18 |     default: 10
19 | 
20 | outputs:
21 |   cytokit_config:
22 |     outputSource: create_yaml_config/cytokit_config
23 |     type: File
24 |     label: "Cytokit configuration in YAML format"
25 |   new_tiles:
26 |     outputSource: slicing/new_tiles
27 |     type: Directory
28 |   slicing_pipeline_config:
29 |     outputSource: slicing/modified_pipeline_config
30 |     type: File
31 |     label: "Pipeline config with all the modifications"
32 | 
33 | steps:
34 |   collect_dataset_info:
35 |     in:
36 |       base_directory:
37 |         source: data_dir
38 |       num_concurrent_tasks:
39 |         source: num_concurrent_tasks
40 |     out:
41 |       - pipeline_config
42 |     run: illumination_first_stitching/collect_dataset_info.cwl
43 |     label: "Collect CODEX dataset info"
44 | 
45 |   illumination_correction:
46 |     in:
47 |       base_directory:
48 |         source: data_dir
49 |       pipeline_config:
50 |         source: collect_dataset_info/pipeline_config
51 |     out:
52 |       - illum_corrected_tiles
53 |     run: illumination_first_stitching/illumination_correction.cwl
54 | 
55 |   best_focus:
56 |     in:
57 |       data_dir:
58 |         source: illumination_correction/illum_corrected_tiles
59 |       pipeline_config:
60 |         source: collect_dataset_info/pipeline_config
61 |     out:
62 |       - best_focus_tiles
63 |     run: illumination_first_stitching/best_focus.cwl
64 | 
65 |   first_stitching:
66 |     in:
67 |       data_dir:
68 |         source: best_focus/best_focus_tiles
69 |       pipeline_config:
70 |         source: collect_dataset_info/pipeline_config
71 |     out:
72 |        - stitched_images
73 |     run: illumination_first_stitching/first_stitching.cwl
74 | 
75 |   slicing:
76 |     in:
77 |       base_stitched_dir:
78 |         source: first_stitching/stitched_images
79 |       pipeline_config:
80 |         source: collect_dataset_info/pipeline_config
81 |     out:
82 |        - new_tiles
83 |        - modified_pipeline_config
84 |     run: illumination_first_stitching/slicing.cwl
85 | 
86 |   create_yaml_config:
87 |     in:
88 |       pipeline_config:
89 |         source: slicing/modified_pipeline_config
90 |       gpus:
91 |         source: gpus
92 |     out:
93 |       - cytokit_config
94 |     run: illumination_first_stitching/create_yaml_config.cwl
95 |     label: "Create Cytokit experiment config in YAML format"
96 | 


--------------------------------------------------------------------------------
/steps/illumination_first_stitching/best_focus.cwl:
--------------------------------------------------------------------------------
 1 | cwlVersion: v1.1
 2 | class: CommandLineTool
 3 | 
 4 | requirements:
 5 |   DockerRequirement:
 6 |     dockerPull: hubmap/codex-scripts:latest
 7 |     dockerOutputDirectory: "/output"
 8 | 
 9 | baseCommand: ["python", "/opt/best_focus/run_best_focus_selection.py"]
10 | 
11 | 
12 | inputs:
13 |   data_dir:
14 |     type: Directory
15 |     inputBinding:
16 |       prefix: "--data_dir"
17 | 
18 | 
19 |   pipeline_config:
20 |     type: File
21 |     inputBinding:
22 |       prefix: "--pipeline_config_path"
23 | 
24 | outputs:
25 |   best_focus_tiles:
26 |     type: Directory
27 |     outputBinding:
28 |       glob: "/output/best_focus"
29 | 
30 | 


--------------------------------------------------------------------------------
/steps/illumination_first_stitching/collect_dataset_info.cwl:
--------------------------------------------------------------------------------
 1 | cwlVersion: v1.1
 2 | class: CommandLineTool
 3 | label: Collect dataset info for Cytokit
 4 | 
 5 | requirements:
 6 |   DockerRequirement:
 7 |     dockerPull: hubmap/codex-scripts:latest
 8 | 
 9 | baseCommand: ["python", "/opt/dataset_info/run_collection.py"]
10 | 
11 | inputs:
12 |   base_directory:
13 |     type: Directory
14 |     inputBinding:
15 |       prefix: "--path_to_dataset"
16 | 
17 |   num_concurrent_tasks:
18 |     type: int
19 |     inputBinding:
20 |       prefix: "--num_concurrent_tasks"
21 | 
22 | outputs:
23 |   pipeline_config:
24 |     type: File
25 |     outputBinding:
26 |       glob: pipelineConfig.json
27 | 


--------------------------------------------------------------------------------
/steps/illumination_first_stitching/create_yaml_config.cwl:
--------------------------------------------------------------------------------
 1 | cwlVersion: v1.1
 2 | class: CommandLineTool
 3 | label: Create Cytokit experiment config
 4 | 
 5 | requirements:
 6 |   DockerRequirement:
 7 |     dockerPull: hubmap/codex-scripts:latest
 8 | 
 9 | baseCommand: ["python", "/opt/create_cytokit_config.py"]
10 | 
11 | inputs:
12 |   gpus:
13 |     type: string
14 |     inputBinding:
15 |       position: 1
16 |       prefix: "--gpus="
17 |       separate: false
18 |   pipeline_config:
19 |     type: File
20 |     inputBinding:
21 |       position: 2
22 | outputs:
23 |   cytokit_config:
24 |     type: File
25 |     outputBinding:
26 |       glob: experiment.yaml
27 | 


--------------------------------------------------------------------------------
/steps/illumination_first_stitching/first_stitching.cwl:
--------------------------------------------------------------------------------
 1 | cwlVersion: v1.1
 2 | class: CommandLineTool
 3 | 
 4 | requirements:
 5 |   DockerRequirement:
 6 |     dockerPull: hubmap/codex-scripts:latest
 7 |     dockerOutputDirectory: "/output"
 8 | 
 9 | baseCommand: ["python", "/opt/codex_stitching/run_stitching.py"]
10 | 
11 | 
12 | inputs:
13 |   data_dir:
14 |     type: Directory
15 |     inputBinding:
16 |       prefix: "--data_dir"
17 | 
18 | 
19 |   pipeline_config:
20 |     type: File
21 |     inputBinding:
22 |       prefix: "--pipeline_config_path"
23 | 
24 | outputs:
25 |   stitched_images:
26 |     type: Directory
27 |     outputBinding:
28 |       glob: "/output/stitched_images"
29 | 


--------------------------------------------------------------------------------
/steps/illumination_first_stitching/illumination_correction.cwl:
--------------------------------------------------------------------------------
 1 | cwlVersion: v1.1
 2 | class: CommandLineTool
 3 | 
 4 | requirements:
 5 |   DockerRequirement:
 6 |     dockerPull: hubmap/codex-scripts:latest
 7 |     dockerOutputDirectory: "/output"
 8 | 
 9 | baseCommand: ["python", "/opt/illumination_correction/run_illumination_correction.py"]
10 | 
11 | 
12 | inputs:
13 |   base_directory:
14 |     type: Directory
15 |     inputBinding:
16 |       prefix: "--data_dir"
17 | 
18 |   pipeline_config:
19 |     type: File
20 |     inputBinding:
21 |       prefix: "--pipeline_config_path"
22 | 
23 | outputs:
24 |   illum_corrected_tiles:
25 |     type: Directory
26 |     outputBinding:
27 |       glob: "/output/corrected_images"
28 | 


--------------------------------------------------------------------------------
/steps/illumination_first_stitching/slicing.cwl:
--------------------------------------------------------------------------------
 1 | cwlVersion: v1.1
 2 | class: CommandLineTool
 3 | 
 4 | requirements:
 5 |   DockerRequirement:
 6 |     dockerPull: hubmap/codex-scripts:latest
 7 |     dockerOutputDirectory: "/output"
 8 | 
 9 | baseCommand: ["python", "/opt/slicing/run_slicing.py"]
10 | 
11 | 
12 | inputs:
13 |   base_stitched_dir:
14 |     type: Directory
15 |     inputBinding:
16 |       prefix: "--base_stitched_dir"
17 | 
18 |   pipeline_config:
19 |     type: File
20 |     inputBinding:
21 |       prefix: "--pipeline_config_path"
22 | 
23 | outputs:
24 |   new_tiles:
25 |     type: Directory
26 |     outputBinding:
27 |       glob: "/output/new_tiles"
28 | 
29 |   modified_pipeline_config:
30 |     type: File
31 |     outputBinding:
32 |       glob: "/output/pipeline_conf/pipelineConfig.json"
33 | 


--------------------------------------------------------------------------------
/steps/ometiff_second_stitching-manifest.json:
--------------------------------------------------------------------------------
 1 | [
 2 | 	{
 3 | 		"pattern": "pipeline_output/expr/(?P<image>.+)\\.ome\\.tiff",
 4 | 		"description": "Cytokit expression output for image {image}, in OME-TIFF format",
 5 | 		"edam_ontology_term": "EDAM_1.24.format_3727"
 6 | 	},
 7 | 	{
 8 | 		"pattern": "pipeline_output/mask/(?P<image>.+)\\.ome\\.tiff",
 9 | 		"description": "Segmentation mask for image {image}, in OME-TIFF format",
10 | 		"edam_ontology_term": "EDAM_1.24.format_3727"
11 | 	}
12 | ]
13 | 


--------------------------------------------------------------------------------
/steps/ometiff_second_stitching.cwl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env cwl-runner
 2 | 
 3 | class: Workflow
 4 | cwlVersion: v1.1
 5 | label: CODEX analysis pipeline using Cytokit
 6 | 
 7 | inputs:
 8 |   slicing_pipeline_config:
 9 |     type: File
10 |   cytokit_config:
11 |     type: File
12 |   cytokit_output:
13 |     type: Directory
14 |   data_dir:
15 |     type: Directory
16 |   num_concurrent_tasks:
17 |     label: "Number of parallel CPU jobs"
18 |     type: int
19 |     default: 10
20 | 
21 | outputs:
22 |   stitched_images:
23 |     outputSource: second_stitching/stitched_images
24 |     type: Directory
25 |     label: "Segmentation masks and expressions in OME-TIFF format"
26 |   final_pipeline_config:
27 |     outputSource: second_stitching/final_pipeline_config
28 |     type: File
29 |     label: "Pipeline config with all the modifications"
30 | 
31 | steps:
32 |   background_subtraction:
33 |     in:
34 |       cytokit_output:
35 |         source: cytokit_output
36 |       pipeline_config:
37 |         source: slicing_pipeline_config
38 |       cytokit_config:
39 |         source: cytokit_config
40 |       num_concurrent_tasks:
41 |         source: num_concurrent_tasks
42 |     out:
43 |       - bg_sub_tiles
44 |       - bg_sub_config
45 |     run: ometiff_second_stitching/background_subtraction.cwl
46 | 
47 |   ome_tiff_creation:
48 |     in:
49 |       cytokit_output:
50 |         source: cytokit_output
51 |       bg_sub_tiles:
52 |           source: background_subtraction/bg_sub_tiles
53 |       cytokit_config:
54 |         source: cytokit_config
55 |       input_data_dir:
56 |         source: data_dir
57 |     out:
58 |       - ome_tiffs
59 |     run: ometiff_second_stitching/ome_tiff_creation.cwl
60 |     label: "Create OME-TIFF versions of Cytokit segmentation and extract results"
61 | 
62 |   second_stitching:
63 |     in:
64 |       pipeline_config:
65 |         source: background_subtraction/bg_sub_config
66 |       ometiff_dir:
67 |         source: ome_tiff_creation/ome_tiffs
68 |     out:
69 |        - stitched_images
70 |        - final_pipeline_config
71 |     run: ometiff_second_stitching/second_stitching.cwl
72 | 


--------------------------------------------------------------------------------
/steps/ometiff_second_stitching/background_subtraction.cwl:
--------------------------------------------------------------------------------
 1 | cwlVersion: v1.1
 2 | class: CommandLineTool
 3 | 
 4 | requirements:
 5 |   DockerRequirement:
 6 |     dockerPull: hubmap/codex-scripts:latest
 7 |     dockerOutputDirectory: "/output"
 8 | 
 9 | baseCommand: ["python", "/opt/background_subtraction/run_background_subtraction.py"]
10 | 
11 | 
12 | inputs:
13 |   cytokit_output:
14 |     type: Directory
15 |     inputBinding:
16 |       prefix: "--data_dir"
17 | 
18 | 
19 |   pipeline_config:
20 |     type: File
21 |     inputBinding:
22 |       prefix: "--pipeline_config_path"
23 | 
24 |   cytokit_config:
25 |     type: File
26 |     inputBinding:
27 |       prefix: "--cytokit_config_path"
28 | 
29 |   num_concurrent_tasks:
30 |     type: int
31 |     default: 10
32 |     inputBinding:
33 |       prefix: "--num_concurrent_tasks"
34 | 
35 | outputs:
36 |   bg_sub_tiles:
37 |     type: Directory
38 |     outputBinding:
39 |       glob: "/output/background_subtraction"
40 | 
41 |   bg_sub_config:
42 |     type: File
43 |     outputBinding:
44 |       glob: "/output/config/pipelineConfig.json"
45 | 


--------------------------------------------------------------------------------
/steps/ometiff_second_stitching/ome_tiff_creation.cwl:
--------------------------------------------------------------------------------
 1 | cwlVersion: v1.1
 2 | class: CommandLineTool
 3 | label: Create OME-TIFF versions of Cytokit segmentation and extract results
 4 | 
 5 | requirements:
 6 |   DockerRequirement:
 7 |     dockerPull: hubmap/codex-scripts:latest
 8 | 
 9 | baseCommand: ["python", "/opt/convert_to_ometiff.py"]
10 | 
11 | inputs:
12 |   cytokit_output:
13 |     type: Directory
14 |     inputBinding:
15 |       position: 0
16 |   bg_sub_tiles:
17 |     type: Directory
18 |     inputBinding:
19 |       position: 1
20 |   cytokit_config:
21 |     type: File
22 |     inputBinding:
23 |       position: 2
24 |   input_data_dir:
25 |     type: Directory
26 |     inputBinding:
27 |         position: 3
28 | 
29 | outputs:
30 |   ome_tiffs:
31 |     type: Directory
32 |     outputBinding:
33 |       glob: output
34 | 
35 | 
36 | 


--------------------------------------------------------------------------------
/steps/ometiff_second_stitching/second_stitching.cwl:
--------------------------------------------------------------------------------
 1 | cwlVersion: v1.1
 2 | class: CommandLineTool
 3 | 
 4 | requirements:
 5 |   DockerRequirement:
 6 |     dockerPull: hubmap/codex-scripts:latest
 7 |     dockerOutputDirectory: /output
 8 | 
 9 | baseCommand: ["python", "/opt/codex_stitching/secondary_stitcher/secondary_stitcher_runner.py"]
10 | 
11 | 
12 | inputs:
13 |   pipeline_config:
14 |     type: File
15 |     inputBinding:
16 |       prefix: "--pipeline_config_path"
17 | 
18 |   ometiff_dir:
19 |     type: Directory
20 |     inputBinding:
21 |       prefix: "--ometiff_dir"
22 | 
23 | outputs:
24 |   stitched_images:
25 |     type: Directory
26 |     outputBinding:
27 |       glob: /output/pipeline_output
28 | 
29 |   final_pipeline_config:
30 |     type: File
31 |     outputBinding:
32 |       glob: /output/pipelineConfig.json
33 | 


--------------------------------------------------------------------------------
/steps/run_cytokit-manifest.json:
--------------------------------------------------------------------------------
1 | [
2 | 	{
3 | 		"pattern": "experiment.json",
4 | 		"description": "File containing Cytokit's calculations from deconvolution, drift compensation, and focal plan selection, in JSON format",
5 | 		"edam_ontology_term": "EDAM_1.24.format_3464"
6 | 	}
7 | ]
8 | 


--------------------------------------------------------------------------------
/steps/run_cytokit.cwl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env cwl-runner
 2 | 
 3 | class: CommandLineTool
 4 | cwlVersion: v1.1
 5 | baseCommand: ["sh", "run_cytokit.sh"]
 6 | 
 7 | requirements:
 8 |   DockerRequirement:
 9 |     dockerPull: hubmap/cytokit:latest
10 |   DockerGpuRequirement: {}
11 | 
12 |   InitialWorkDirRequirement:
13 |     listing:
14 |       - entryname: run_cytokit.sh
15 |         entry: |-
16 |           __conda_setup="\$('/opt/conda/bin/conda' 'shell.bash' 'hook' 2> /dev/null)"
17 |           if [ \$? -eq 0 ]; then
18 |              eval "\$__conda_setup"
19 |           else
20 |              if [ -f "/opt/conda/etc/profile.d/conda.sh" ]; then
21 |                  . "/opt/conda/etc/profile.d/conda.sh"
22 |              else
23 |                  export PATH="/opt/conda/bin:$PATH"
24 |              fi
25 |           fi
26 |           unset __conda_setup
27 | 
28 |           export PYTHONPATH=/lab/repos/cytokit/python/pipeline
29 |           conda activate cytokit
30 | 
31 |           mkdir $HOME/cytokit
32 | 
33 |           cytokit processor run_all --data-dir $(inputs.data_dir.path) --config-path $(inputs.yaml_config.path) --output_dir $HOME/cytokit && \
34 |           cytokit operator run_all --data-dir $HOME/cytokit --config-path $(inputs.yaml_config.path) --output_dir $HOME/cytokit
35 | 
36 | 
37 | inputs:
38 |   data_dir:
39 |     type: Directory
40 | 
41 |   yaml_config:
42 |     type: File
43 | 
44 | 
45 | outputs:
46 |   cytokit_output:
47 |     type: Directory
48 |     outputBinding:
49 |       glob: cytokit
50 | 
51 |   data_json:
52 |     type: File
53 |     outputBinding:
54 |       glob: cytokit/processor/data.json
55 | 
56 | 
57 | 


--------------------------------------------------------------------------------
/subm.yaml:
--------------------------------------------------------------------------------
1 | data_dir:
2 |   class: Directory
3 |   path: "/path/to/dir/with/codex_dataset"
4 | gpus: "0"
5 | num_concurrent_tasks: 10
6 | 


--------------------------------------------------------------------------------
/test.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -o errexit
 3 | set -o pipefail
 4 | 
 5 | start() { echo travis_fold':'start:$1; echo $1; }
 6 | end() { set +v; echo travis_fold':'end:$1; echo; echo; }
 7 | die() { set +v; echo "$*" 1>&2 ; exit 1; }
 8 | 
 9 | start black
10 | black --check .
11 | end black
12 | 
13 | start isort
14 | isort --check-only .
15 | end isort
16 | 


--------------------------------------------------------------------------------