├── .gitignore
├── .pre-commit-config.yaml
├── .travis.yml
├── Dockerfile
├── Dockerfile_fiji
├── LICENSE
├── README.md
├── bin
├── background_subtraction
│ └── run_background_subtraction.py
├── best_focus
│ ├── best_z_identification.py
│ ├── best_z_paths.py
│ ├── file_manipulation.py
│ └── run_best_focus_selection.py
├── codex_stitching
│ ├── bigstitcher_dataset_meta.py
│ ├── directory_management.py
│ ├── generate_bigstitcher_macro.py
│ ├── image_stitching.py
│ ├── run_stitching.py
│ └── secondary_stitcher
│ │ ├── mask_stitching.py
│ │ ├── match_masks.py
│ │ ├── secondary_stitcher.py
│ │ └── secondary_stitcher_runner.py
├── convert_to_ometiff.py
├── create_cytokit_config.py
├── dataset_info
│ ├── collect_dataset_info.py
│ ├── collect_dataset_info_old.py
│ └── run_collection.py
├── illumination_correction
│ ├── generate_basic_macro.py
│ └── run_illumination_correction.py
├── pipeline_utils
│ ├── dataset_listing.py
│ └── pipeline_config_reader.py
├── slicing
│ ├── modify_pipeline_config.py
│ ├── run_slicing.py
│ └── slicer.py
└── utils.py
├── cytokit-docker
├── Dockerfile
├── cytokit_wrapper.py
└── setup_data_directory.py
├── docker_images.txt
├── environment.yml
├── metadata_examples
├── channelnames.txt
├── channelnames_report.csv
├── experiment.json
├── exposure_times.txt
└── segmentation.json
├── pipeline-manifest.json
├── pipeline.cwl
├── pipeline_release_mgmt.yaml
├── pyproject.toml
├── requirements-test.txt
├── steps
├── illumination_first_stitching.cwl
├── illumination_first_stitching
│ ├── best_focus.cwl
│ ├── collect_dataset_info.cwl
│ ├── create_yaml_config.cwl
│ ├── first_stitching.cwl
│ ├── illumination_correction.cwl
│ └── slicing.cwl
├── ometiff_second_stitching-manifest.json
├── ometiff_second_stitching.cwl
├── ometiff_second_stitching
│ ├── background_subtraction.cwl
│ ├── ome_tiff_creation.cwl
│ └── second_stitching.cwl
├── run_cytokit-manifest.json
└── run_cytokit.cwl
├── subm.yaml
└── test.sh
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea/
2 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | repos:
2 | - repo: https://github.com/psf/black
3 | rev: 23.9.1
4 | hooks:
5 | - id: black
6 | language_version: python3
7 | - repo: https://github.com/pycqa/isort
8 | rev: 5.12.0
9 | hooks:
10 | - id: isort
11 | args: ["--profile", "black"]
12 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | dist: focal
2 | language: python
3 | python: 3.8
4 | install:
5 | - pip install -r requirements-test.txt
6 | script:
7 | - ./test.sh
8 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM ubuntu:focal
2 |
3 | RUN apt-get -qq update \
4 | && apt-get -qq install --no-install-recommends --yes \
5 | wget \
6 | bzip2 \
7 | ca-certificates \
8 | curl \
9 | unzip \
10 | git \
11 | && apt-get clean \
12 | && rm -rf /var/lib/apt/lists/*
13 |
14 | RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-py39_4.12.0-Linux-x86_64.sh -O /tmp/miniconda.sh \
15 | && /bin/bash /tmp/miniconda.sh -b -p /opt/conda \
16 | && rm /tmp/miniconda.sh
17 | ENV PATH /opt/conda/bin:$PATH
18 |
19 | # update base environment from yaml file
20 | COPY environment.yml /tmp/
21 | RUN conda env update -f /tmp/environment.yml \
22 | && echo "source activate base" > ~/.bashrc \
23 | && conda clean --index-cache --tarballs --yes \
24 | && rm /tmp/environment.yml
25 |
26 | ENV PATH /opt/conda/envs/hubmap/bin:$PATH
27 |
28 | #Copy fiji from container
29 | COPY --from=hubmap/fiji_bigstitcher:latest /opt/Fiji.app /opt/Fiji.app
30 | ENV PATH /opt/Fiji.app:$PATH
31 |
32 | RUN mkdir /output && chmod -R a+rwx /output
33 |
34 | WORKDIR /opt
35 | COPY bin /opt
36 |
37 | CMD ["/bin/bash"]
38 |
--------------------------------------------------------------------------------
/Dockerfile_fiji:
--------------------------------------------------------------------------------
1 | FROM ubuntu:focal
2 |
3 | RUN apt-get -qq update \
4 | && apt-get -qq install --no-install-recommends --yes \
5 | wget \
6 | bzip2 \
7 | ca-certificates \
8 | curl \
9 | unzip \
10 | && apt-get clean \
11 | && rm -rf /var/lib/apt/lists/*
12 |
13 |
14 | #Get imagej
15 | RUN wget --quiet https://downloads.imagej.net/fiji/latest/fiji-linux64.zip -P /tmp/ \
16 | && unzip /tmp/fiji-linux64.zip -d /opt/ \
17 | && rm /tmp/fiji-linux64.zip
18 |
19 | ENV PATH /opt/Fiji.app:$PATH
20 |
21 | # Install BigStitcher
22 | RUN ImageJ-linux64 --headless --update add-update-site BigStitcher https://sites.imagej.net/BigStitcher/ \
23 | && ImageJ-linux64 --headless --update update
24 |
25 | # Install BaSiC_Mod
26 | RUN wget --quiet https://github.com/VasylVaskivskyi/BaSiC_Mod/releases/download/v1.0/BaSiC_Mod_v10.zip -P /tmp/ \
27 | && unzip /tmp/BaSiC_Mod_v10.zip -d /tmp/ \
28 | && mv /tmp/BaSiC_Mod_v10/BaSiC_Mod.jar /opt/Fiji.app/plugins/ \
29 | && mv /tmp/BaSiC_Mod_v10/dependencies/* /opt/Fiji.app/jars/. \
30 | && rm -r /tmp/BaSiC_Mod_v10 \
31 | && rm /tmp/BaSiC_Mod_v10.zip \
32 | && rm /opt/Fiji.app/jars/jtransforms-2.4.jar \
33 | && rm /opt/Fiji.app/jars/netlib-java-0.9.3-renjin-patched-2.jar \
34 | && ImageJ-linux64 --headless --update update
35 | FROM ubuntu:focal
36 |
37 | RUN apt-get -qq update \
38 | && apt-get -qq install --no-install-recommends --yes \
39 | wget \
40 | bzip2 \
41 | ca-certificates \
42 | curl \
43 | unzip \
44 | && apt-get clean \
45 | && rm -rf /var/lib/apt/lists/*
46 |
47 |
48 | #Get imagej
49 | RUN wget --quiet https://downloads.imagej.net/fiji/latest/fiji-linux64.zip -P /tmp/ \
50 | && unzip /tmp/fiji-linux64.zip -d /opt/ \
51 | && rm /tmp/fiji-linux64.zip
52 |
53 | ENV PATH /opt/Fiji.app:$PATH
54 |
55 | # Install BigStitcher
56 | RUN ImageJ-linux64 --headless --update add-update-site BigStitcher https://sites.imagej.net/BigStitcher/ \
57 | && ImageJ-linux64 --headless --update update
58 |
59 | # Install BaSiC_Mod
60 | RUN wget --quiet https://github.com/hubmapconsortium/BaSiC_Mod/releases/download/v1.0/BaSiC_Mod_v10.zip -P /tmp/ \
61 | && unzip /tmp/BaSiC_Mod_v10.zip -d /tmp/ \
62 | && mv /tmp/BaSiC_Mod_v10/BaSiC_Mod.jar /opt/Fiji.app/plugins/ \
63 | && mv /tmp/BaSiC_Mod_v10/dependencies/* /opt/Fiji.app/jars/. \
64 | && rm -r /tmp/BaSiC_Mod_v10 \
65 | && rm /tmp/BaSiC_Mod_v10.zip \
66 | && rm /opt/Fiji.app/jars/jtransforms-2.4.jar \
67 | && rm /opt/Fiji.app/jars/netlib-java-0.9.3-renjin-patched-2.jar \
68 | && ImageJ-linux64 --headless --update update
69 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | [](https://travis-ci.com/hubmapconsortium/codex-pipeline)
2 | [](https://github.com/psf/black)
3 |
4 | # codex-pipeline
5 | A [CWL](https://www.commonwl.org/) pipeline for processing [CODEX](https://www.akoyabio.com/codextm/technology) image data, using [Cytokit](https://github.com/hammerlab/cytokit).
6 |
7 | ## Pipeline steps
8 | * Collect required parameters from metadata files.
9 | * Perform illumination correction with Fiji plugin [BaSiC](https://github.com/VasylVaskivskyi/BaSiC_Mod)
10 | * Find sharpest z-plane for each channel, using variation of Laplacian
11 | * Perform stitching of tiles using Fiji plugin [BigStitcher](https://imagej.net/plugins/bigstitcher/)
12 | * Create Cytokit YAML config file containing parameters from input metadata
13 | * Run Cytokit's `processor` command to perform tile pre-processing, and nucleus and cell segmentation.
14 | * Run Cytokit's `operator` command to extract all antigen fluoresence images (discarding blanks and empty channels).
15 | * Generate [OME-TIFF](https://docs.openmicroscopy.org/ome-model/6.0.1/ome-tiff/specification.html) versions of TIFFs created by Cytokit.
16 | * Stitch tiles with segmentation masks
17 | * Perform downstream analysis using [SPRM](https://github.com/hubmapconsortium/sprm).
18 |
19 |
20 | ## Requirements
21 |
22 | Please use [HuBMAP Consortium fork of cwltool](https://github.com/hubmapconsortium/cwltool)
23 | to be able to run pipeline with GPU in Docker and Singularity containers.\
24 | For the list of python packages check `environment.yml`.
25 |
26 |
27 | ## How to run
28 |
29 | `cwltool pipeline.cwl subm.yaml`
30 |
31 | If you use Singularity containers add `--singularity`. Example of submission file `subm.yaml` is provided in the repo.
32 |
33 |
34 | ## Expected input directory and file structure
35 |
36 | ```
37 | codex_dataset/
38 | src_data OR raw
39 | ├── channelnames.txt
40 | ├── channelnames_report.csv
41 | ├── experiment.json
42 | ├── exposure_times.txt
43 | ├── segmentation.json
44 | ├── Cyc1_reg1 OR Cyc001_reg001
45 | │ ├── 1_00001_Z001_CH1.tif
46 | │ ├── 1_00001_Z001_CH2.tif
47 | │ │ ...
48 | │ └── 1_0000N_Z00N_CHN.tif
49 | └── Cyc1_reg2 OR Cyc001_reg002
50 | ├── 2_00001_Z001_CH1.tif
51 | ├── 2_00001_Z001_CH2.tif
52 | │ ...
53 | └── 1_0000N_Z00N_CHN.tif
54 |
55 | ```
56 |
57 | Images should be separated into directories by cycles and regions using the following pattern `Cyc{cycle:d}_reg{region:d}`.
58 | The file names must contain region, tile, z-plane and channel ids starting from 1, and follow this pattern
59 | `{region:d}_{tile:05d}_Z{zplane:03d}_CH{channel:d}.tif`.
60 |
61 | Necessary metadata files that must be present in the input directory:
62 |
63 | * `experiment.json` - acquisition parameters and data structure;
64 | * `segmentation.json` - which channel from which cycle to use for segmentation;
65 | * `channelnames.txt` - list of channel names, one per row;
66 | * `channelnames_report.csv` - which channels to use, and which to exclude;
67 | * `exposure_times.txt` - not used at the moment, but will be useful for background subtraction.
68 |
69 | Examples of these files are present in the directory `metadata_examples`.
70 | Note: all fields related to regions, cycles, channels, z-planes and tiles start from 1,
71 | and xyResolution, zPitch are measured in `nm`.
72 |
73 | ## Output file structure
74 |
75 | ```
76 | pipeline_output/
77 | ├── expr
78 | │ ├── reg001_expr.ome.tiff
79 | │ └── reg002_expr.ome.tiff
80 | └── mask
81 | ├── reg001_mask.ome.tiff
82 | └── reg002_expr.ome.tiff
83 | ```
84 |
85 | Where `expr` directory contains processed images and `mask` contains segmentation masks.
86 | The output of SPRM will be different, see https://github.com/hubmapconsortium/sprm .
87 |
88 |
89 | ## Development
90 | Code in this repository is formatted with [black](https://github.com/psf/black) and
91 | [isort](https://pypi.org/project/isort/), and this is checked via Travis CI.
92 |
93 | A [pre-commit](https://pre-commit.com/) hook configuration is provided, which runs `black` and `isort` before committing.
94 | Run `pre-commit install` in each clone of this repository which you will use for development (after `pip install pre-commit`
95 | into an appropriate Python environment, if necessary).
96 |
97 | ## Building containers
98 | Two `Dockerfile`s are included in this repository. A `docker_images.txt` manifest is included, which is intended
99 | for use in the `build_docker_containers` script provided by the
100 | [`multi-docker-build`](https://github.com/mruffalo/multi-docker-build) Python package. This package can be installed
101 | with
102 | ```shell script
103 | python -m pip install multi-docker-build
104 | ```
105 |
106 | ## Release process
107 |
108 | The `master` branch is intended to be production-ready at all times, and should always reference Docker containers
109 | with the `latest` tag.
110 |
111 | Publication of tagged "release" versions of the pipeline is handled with the
112 | [HuBMAP pipeline release management](https://github.com/hubmapconsortium/pipeline-release-mgmt) Python package. To
113 | release a new pipeline version, *ensure that the `master` branch contains all commits that you want to include in the release,*
114 | then run
115 | ```shell
116 | tag_releae_pipeline v0.whatever
117 | ```
118 | See the pipeline release managment script usage notes for additional options, such as GPG signing.
119 |
--------------------------------------------------------------------------------
/bin/best_focus/best_z_identification.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 | from typing import Dict, List
3 |
4 | import cv2 as cv
5 | import dask
6 | import numpy as np
7 | import tifffile as tif
8 | from scipy.ndimage import gaussian_filter
9 |
10 | Image = np.ndarray
11 |
12 |
13 | def _laplacian_variance(img: Image) -> float:
14 | """
15 | DOI:10.1016/j.patcog.2012.11.011
16 | Analysis of focus measure operators for shape-from-focus
17 | """
18 | return np.var(cv.Laplacian(img, cv.CV_64F, ksize=21))
19 |
20 |
21 | def _find_best_z_plane_id(img_list: List[Image]) -> int:
22 | lap_vars_per_z_plane = []
23 | for img in img_list:
24 | lap_vars_per_z_plane.append(_laplacian_variance(img))
25 | max_var = max(lap_vars_per_z_plane)
26 | max_var_id = lap_vars_per_z_plane.index(max_var)
27 | return max_var_id
28 |
29 |
30 | def _load_images(path_list: List[Path]) -> List[Image]:
31 | img_list = []
32 | for path in path_list:
33 | img_list.append(tif.imread(str(path)))
34 | return img_list
35 |
36 |
37 | def get_best_z_plane_id(path_list: List[Path]) -> int:
38 | img_list = _load_images(path_list)
39 | return _find_best_z_plane_id(img_list) + 1
40 |
41 |
42 | def get_best_z_plane_id_parallelized(plane_paths_per_tile: dict) -> List[int]:
43 | task = []
44 | for tile, plane_paths in plane_paths_per_tile.items():
45 | plane_path_list = list(plane_paths.values())
46 | task.append(dask.delayed(get_best_z_plane_id)(plane_path_list))
47 | best_z_plane_id_list = dask.compute(*task)
48 | best_z_plane_id_list = list(best_z_plane_id_list)
49 | return best_z_plane_id_list
50 |
51 |
52 | def smoothing_z_ids(arr: np.ndarray):
53 | smoothed_ids_float = gaussian_filter(arr.astype(np.float32), 1, mode="reflect")
54 | smoothed_ids = np.round(smoothed_ids_float, 0).astype(np.uint32)
55 | return smoothed_ids
56 |
57 |
58 | def best_z_correction(best_z_plane_id_list: List[int], x_ntiles: int, y_ntiles: int) -> np.ndarray:
59 | best_z_per_tile_arr = np.array(best_z_plane_id_list, dtype=np.int32).reshape(
60 | y_ntiles, x_ntiles
61 | )
62 | print("Best z-plane per tile")
63 | print("Original values\n", best_z_per_tile_arr)
64 | smoothed_best_z_per_tile_arr = smoothing_z_ids(best_z_per_tile_arr)
65 | print("Corrected values\n", smoothed_best_z_per_tile_arr)
66 | result = smoothed_best_z_per_tile_arr.ravel().tolist()
67 |
68 | return result
69 |
70 |
71 | def pick_z_planes_below_and_above(best_z: int, max_z: int, above: int, below: int) -> List[int]:
72 | range_end = best_z + above
73 | if range_end > max_z:
74 | range_end = max_z
75 |
76 | range_start = best_z - below
77 | if range_start < 1:
78 | range_start = 1
79 |
80 | if max_z == 1:
81 | return [best_z]
82 | elif best_z == max_z:
83 | below_planes = list(range(range_start, best_z))
84 | above_planes = []
85 | elif best_z == 1:
86 | below_planes = []
87 | above_planes = list(range(best_z + 1, range_end + 1))
88 | else:
89 | below_planes = list(range(range_start, best_z))
90 | above_planes = list(range(best_z + 1, range_end + 1))
91 | return below_planes + [best_z] + above_planes
92 |
93 |
94 | def get_best_z_plane_ids_per_tile(
95 | plane_paths_per_tile: dict, x_ntiles: int, y_ntiles: int, max_z: int
96 | ) -> Dict[int, List[int]]:
97 | best_z_plane_id_list = get_best_z_plane_id_parallelized(plane_paths_per_tile)
98 | corrected_best_z_plane_id_list = best_z_correction(best_z_plane_id_list, x_ntiles, y_ntiles)
99 |
100 | best_z_plane_per_tile = dict()
101 | for i, tile in enumerate(plane_paths_per_tile.keys()):
102 | best_z_plane_per_tile[tile] = pick_z_planes_below_and_above(
103 | corrected_best_z_plane_id_list[i], max_z, 1, 1
104 | )
105 | return best_z_plane_per_tile
106 |
--------------------------------------------------------------------------------
/bin/best_focus/best_z_paths.py:
--------------------------------------------------------------------------------
1 | import sys
2 | from math import ceil
3 | from pathlib import Path
4 | from typing import Any, Dict, List, Tuple
5 |
6 | sys.path.append("/opt/")
7 | from best_z_identification import get_best_z_plane_ids_per_tile
8 |
9 | from pipeline_utils.dataset_listing import (
10 | create_listing_for_each_cycle_region,
11 | extract_digits_from_string,
12 | )
13 |
14 |
15 | def _change_image_file_name(original_name: str) -> str:
16 | """Output tiles will have names 1_00001_Z001_CH1.tif, 1_00002_Z001_CH1.tif ..."""
17 | digits = extract_digits_from_string(original_name)
18 | region = digits[0]
19 | tile = digits[1]
20 | zplane = 1
21 | channel = digits[3]
22 | return "{reg:d}_{tile:05d}_Z{z:03d}_CH{ch:d}.tif".format(
23 | reg=region, tile=tile, z=zplane, ch=channel
24 | )
25 |
26 |
27 | def _get_reference_channel_paths(
28 | listing_per_cycle: dict, num_channels_per_cycle: int, reference_channel_id: int
29 | ) -> Dict[int, Path]:
30 | ref_cycle_id = ceil(reference_channel_id / num_channels_per_cycle) - 1
31 | ref_cycle = sorted(listing_per_cycle.keys())[ref_cycle_id]
32 | ref_cycle_ref_channel_id = reference_channel_id - ref_cycle_id * num_channels_per_cycle
33 |
34 | reference_channel_tile_paths = dict()
35 | for region in listing_per_cycle[ref_cycle]:
36 | reference_channel_tile_paths.update({region: {}})
37 | this_channel_tile_paths = listing_per_cycle[ref_cycle][region][ref_cycle_ref_channel_id]
38 | reference_channel_tile_paths[region] = this_channel_tile_paths
39 | return reference_channel_tile_paths
40 |
41 |
42 | def _create_dirs_for_each_cycle_region(
43 | listing_per_cycle: dict, out_dir: Path
44 | ) -> Dict[int, Dict[int, Path]]:
45 | naming_template = "Cyc{cyc:03d}_reg{reg:03d}"
46 | cyc_reg_dirs = dict()
47 | for cycle in listing_per_cycle:
48 | cyc_reg_dirs[cycle] = dict()
49 | for region in listing_per_cycle[cycle]:
50 | dir_name = naming_template.format(cyc=cycle, reg=region)
51 | cyc_reg_dirs[cycle][region] = out_dir / dir_name
52 | return cyc_reg_dirs
53 |
54 |
55 | def _find_best_z_planes_per_region_tile(
56 | reference_channel_tile_paths: dict, max_z: int, x_ntiles: int, y_ntiles: int
57 | ) -> Dict[int, Dict[int, List[int]]]:
58 | best_z_plane_per_region = dict()
59 |
60 | for region in reference_channel_tile_paths:
61 | best_z_plane_per_region[region] = get_best_z_plane_ids_per_tile(
62 | reference_channel_tile_paths[region], x_ntiles, y_ntiles, max_z
63 | ) # output {region: {tile: [ids] }}
64 | return best_z_plane_per_region
65 |
66 |
67 | def _map_best_z_planes_in_channel_to_output_plane(
68 | channel_paths: dict, out_dir: Path, best_z_plane_per_tile: dict
69 | ) -> List[Tuple[List[Path], Path]]:
70 | best_z_plane_paths = list()
71 | for tile in channel_paths:
72 | this_tile_paths = channel_paths[tile]
73 | best_focal_plane_ids = best_z_plane_per_tile[tile] # list of ids
74 |
75 | best_z_input_paths = []
76 | for _id in best_focal_plane_ids:
77 | best_z_input_paths.append(this_tile_paths[_id])
78 |
79 | best_z_file_name = best_z_input_paths[0].name
80 | output_combined_name = _change_image_file_name(best_z_file_name)
81 | output_combined_path = Path(out_dir).joinpath(output_combined_name)
82 |
83 | best_z_plane_paths.append((best_z_input_paths, output_combined_path))
84 |
85 | return best_z_plane_paths
86 |
87 |
88 | def _select_best_z_plane_paths(
89 | listing: Dict[int, Dict[int, Dict[int, Dict[int, Dict[int, Path]]]]],
90 | out_dirs: Dict[int, Dict[int, Path]],
91 | best_z_plane_per_region: Dict[int, Dict[int, List[int]]],
92 | ) -> Dict[int, Dict[int, Dict[int, Dict[int, List[Tuple[List[Path], Path]]]]]]:
93 | """Creates a map of several raw planes that will be processed into one image"""
94 | best_z_plane_paths = dict()
95 | for cycle in listing:
96 | best_z_plane_paths[cycle] = dict()
97 | for region in listing[cycle]:
98 | best_z_plane_paths[cycle][region] = dict()
99 | this_cyc_reg_out_dir = out_dirs[cycle][region]
100 | this_region_best_z_planes = best_z_plane_per_region[region]
101 | for channel in listing[cycle][region]:
102 | best_z_plane_paths[cycle][region][channel] = dict()
103 | for tile, zplane_dict in listing[cycle][region][channel].items():
104 | this_tile_best_z_ids = this_region_best_z_planes[tile]
105 | this_tile_best_z_src_paths = []
106 | for _id in this_tile_best_z_ids:
107 | this_tile_best_z_src_paths.append(zplane_dict[_id])
108 |
109 | best_z_file_name = this_tile_best_z_src_paths[0].name
110 | this_tile_best_z_dst_combined_name = _change_image_file_name(best_z_file_name)
111 | this_tile_best_z_dst_combined_path = (
112 | this_cyc_reg_out_dir / this_tile_best_z_dst_combined_name
113 | )
114 |
115 | if tile in best_z_plane_paths[cycle][region][channel]:
116 | best_z_plane_paths[cycle][region][channel][tile].append(
117 | (this_tile_best_z_src_paths, this_tile_best_z_dst_combined_path)
118 | )
119 | else:
120 | best_z_plane_paths[cycle][region][channel][tile] = [
121 | (this_tile_best_z_src_paths, this_tile_best_z_dst_combined_path)
122 | ]
123 | return best_z_plane_paths
124 |
125 |
126 | def get_best_z_dirs_and_paths(
127 | img_dirs: List[Path],
128 | out_dir: Path,
129 | num_channels_per_cycle: int,
130 | max_z: int,
131 | x_ntiles: int,
132 | y_ntiles: int,
133 | reference_channel_id: int,
134 | ) -> Tuple[
135 | Dict[int, Dict[int, Path]],
136 | Dict[int, Dict[int, Dict[int, Dict[int, List[Tuple[List[Path], Path]]]]]],
137 | ]:
138 | listing_per_cycle = create_listing_for_each_cycle_region(img_dirs)
139 | reference_channel_tile_paths = _get_reference_channel_paths(
140 | listing_per_cycle, num_channels_per_cycle, reference_channel_id
141 | )
142 | best_z_dirs = _create_dirs_for_each_cycle_region(listing_per_cycle, out_dir)
143 | best_z_plane_per_region = _find_best_z_planes_per_region_tile(
144 | reference_channel_tile_paths, max_z, x_ntiles, y_ntiles
145 | )
146 | best_z_plane_paths = _select_best_z_plane_paths(
147 | listing_per_cycle, best_z_dirs, best_z_plane_per_region
148 | )
149 | return best_z_dirs, best_z_plane_paths
150 |
151 |
152 | def find_best_z_paths_and_dirs(
153 | dataset_info: Dict[str, Any], img_dirs: List[Path], out_dir: Path
154 | ) -> Tuple[
155 | Dict[int, Dict[int, Path]],
156 | Dict[int, Dict[int, Dict[int, Dict[int, List[Tuple[List[Path], Path]]]]]],
157 | ]:
158 | nzplanes = dataset_info["num_z_planes"]
159 | x_ntiles = dataset_info["num_tiles_x"]
160 | y_ntiles = dataset_info["num_tiles_y"]
161 | reference_channel_id = dataset_info["reference_channel"]
162 | num_channels_per_cycle = dataset_info["num_channels"]
163 |
164 | best_z_channel_dirs, best_z_plane_paths = get_best_z_dirs_and_paths(
165 | img_dirs,
166 | out_dir,
167 | num_channels_per_cycle,
168 | nzplanes,
169 | x_ntiles,
170 | y_ntiles,
171 | reference_channel_id,
172 | )
173 | return best_z_channel_dirs, best_z_plane_paths
174 |
--------------------------------------------------------------------------------
/bin/best_focus/file_manipulation.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 | from typing import Dict, List, Tuple
3 |
4 | import dask
5 | import numpy as np
6 | import tifffile as tif
7 |
8 |
9 | def make_dir_if_not_exists(dir_path: Path):
10 | if not dir_path.exists():
11 | dir_path.mkdir(parents=True)
12 |
13 |
14 | def project_stack(path_list: List[Path]):
15 | path_strs = [str(path) for path in path_list]
16 | stack = np.stack(list(map(tif.imread, path_strs)), axis=0)
17 | stack_dt = stack.dtype
18 | stack_mean = np.round(np.mean(stack, axis=0)).astype(stack_dt)
19 | return stack_mean
20 |
21 |
22 | def process_images(src, dst):
23 | """Read, take average of several z-planes, write"""
24 | img = project_stack(src)
25 | tif.imwrite(str(dst), img)
26 |
27 |
28 | def process_images_parallelized(best_z_plane_paths: List[tuple]):
29 | task = []
30 | for src, dst in best_z_plane_paths:
31 | task.append(dask.delayed(process_images)(src, dst))
32 | # shutil.copy(src[0], dst)
33 | dask.compute(*task, scheduler="processes")
34 |
35 |
36 | def process_z_planes_and_save_to_out_dirs(
37 | best_z_out_dirs: Dict[int, Dict[int, Path]],
38 | best_z_plane_paths: Dict[int, Dict[int, Dict[int, Dict[int, List[Tuple[List[Path], Path]]]]]],
39 | ):
40 | for cycle in best_z_out_dirs:
41 | for region, dir_path in best_z_out_dirs[cycle].items():
42 | make_dir_if_not_exists(dir_path)
43 |
44 | for cycle in best_z_plane_paths:
45 | for region in best_z_plane_paths[cycle]:
46 | for channel in best_z_plane_paths[cycle][region]:
47 | for tile, paths in best_z_plane_paths[cycle][region][channel].items():
48 | process_images_parallelized(paths)
49 |
--------------------------------------------------------------------------------
/bin/best_focus/run_best_focus_selection.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import os
3 | import sys
4 | from pathlib import Path
5 | from typing import List
6 |
7 | sys.path.append("/opt/")
8 | from best_z_paths import find_best_z_paths_and_dirs
9 | from file_manipulation import process_z_planes_and_save_to_out_dirs
10 |
11 | from pipeline_utils.pipeline_config_reader import load_dataset_info
12 |
13 |
14 | def make_dir_if_not_exists(dir_path: Path):
15 | if not dir_path.exists():
16 | dir_path.mkdir(parents=True)
17 |
18 |
19 | def get_img_dirs(dataset_dir: Path) -> List[Path]:
20 | dataset_dir = dataset_dir.absolute()
21 | img_dir_names = next(os.walk(dataset_dir))[1]
22 | img_dir_paths = [dataset_dir.joinpath(dir_name).absolute() for dir_name in img_dir_names]
23 | return img_dir_paths
24 |
25 |
26 | def main(data_dir: Path, pipeline_config_path: Path):
27 | best_focus_dir = Path("/output/best_focus")
28 | make_dir_if_not_exists(best_focus_dir)
29 | dataset_info = load_dataset_info(pipeline_config_path)
30 | img_dirs = get_img_dirs(data_dir)
31 | best_z_channel_dirs, best_z_plane_paths = find_best_z_paths_and_dirs(
32 | dataset_info, img_dirs, best_focus_dir
33 | )
34 | process_z_planes_and_save_to_out_dirs(best_z_channel_dirs, best_z_plane_paths)
35 |
36 |
37 | if __name__ == "__main__":
38 | parser = argparse.ArgumentParser()
39 | parser.add_argument("--data_dir", type=Path, help="path to directory with dataset directory")
40 | parser.add_argument(
41 | "--pipeline_config_path", type=Path, help="path to pipelineConfig.json file"
42 | )
43 | args = parser.parse_args()
44 | main(args.data_dir, args.pipeline_config_path)
45 |
--------------------------------------------------------------------------------
/bin/codex_stitching/bigstitcher_dataset_meta.py:
--------------------------------------------------------------------------------
1 | import xml.dom.minidom
2 | import xml.etree.ElementTree as ET
3 | from copy import deepcopy
4 | from pathlib import Path
5 | from typing import Tuple
6 |
7 | import numpy as np
8 |
9 |
10 | def convert_location(x, y):
11 | tile_loc = "1.0 0.0 0.0 {x} 0.0 1.0 0.0 {y} 0.0 0.0 1.0 0.0"
12 | return tile_loc.format(x=x, y=y)
13 |
14 |
15 | def create_meta(file_pattern_str, num_tiles, tile_shape, tile_locations):
16 | root = ET.Element("SpimData", {"version": "0.2"})
17 | base_path = ET.SubElement(root, "BasePath", {"type": "relative"}).text = "."
18 | sequence_description = ET.SubElement(root, "SequenceDescription")
19 |
20 | #
21 | image_loader = ET.SubElement(
22 | sequence_description, "ImageLoader", {"format": "spimreconstruction.stack.loci"}
23 | )
24 | ET.SubElement(image_loader, "imagedirectory", {"type": "relative"}).text = "."
25 | ET.SubElement(image_loader, "filePattern").text = file_pattern_str
26 | ET.SubElement(image_loader, "layoutTimepoints").text = "0"
27 | ET.SubElement(image_loader, "layoutChannels").text = "0"
28 | ET.SubElement(image_loader, "layoutIlluminations").text = "0"
29 | ET.SubElement(image_loader, "layoutAngles").text = "0"
30 | ET.SubElement(image_loader, "layoutTiles").text = "1"
31 | ET.SubElement(image_loader, "imglib2container").text = "CellImgFactory"
32 | #
33 | #
34 | view_setups = ET.SubElement(sequence_description, "ViewSetups")
35 |
36 | view_setup_template = ET.Element("ViewSetup")
37 | ET.SubElement(view_setup_template, "id").text = "0"
38 | ET.SubElement(view_setup_template, "name").text = "0"
39 | ET.SubElement(view_setup_template, "size").text = "2048 2048 1"
40 | voxel_size = ET.SubElement(view_setup_template, "voxelSize")
41 | ET.SubElement(voxel_size, "unit").text = "um"
42 | ET.SubElement(voxel_size, "size").text = "1.0 1.0 1.0"
43 | view_attributes = ET.SubElement(view_setup_template, "attributes")
44 | ET.SubElement(view_attributes, "illumination").text = "0"
45 | ET.SubElement(view_attributes, "channel").text = "0"
46 | ET.SubElement(view_attributes, "tile").text = "0"
47 | ET.SubElement(view_attributes, "angle").text = "0"
48 | tile_shape_str = str(tile_shape[1]) + " " + str(tile_shape[0]) + " 1"
49 | for i in range(0, num_tiles):
50 | vs = deepcopy(view_setup_template)
51 | vs.find("id").text = str(i)
52 | vs.find("name").text = str(i)
53 | vs.find("size").text = tile_shape_str
54 | vs.find("attributes").find("tile").text = str(i)
55 | view_setups.append(vs)
56 | #
57 | #
58 | attrib_illumination = ET.SubElement(view_setups, "Attributes", {"name": "illumination"})
59 | attrib_illumination_illumination = ET.SubElement(attrib_illumination, "Illumination")
60 | ET.SubElement(attrib_illumination_illumination, "id").text = "0"
61 | ET.SubElement(attrib_illumination_illumination, "name").text = "0"
62 |
63 | attrib_channel = ET.SubElement(view_setups, "Attributes", {"name": "channel"})
64 | attrib_channel_channel = ET.SubElement(attrib_channel, "Channel")
65 | ET.SubElement(attrib_channel_channel, "id").text = "0"
66 | ET.SubElement(attrib_channel_channel, "name").text = "0"
67 |
68 | attrib_tile = ET.SubElement(view_setups, "Attributes", {"name": "tile"})
69 |
70 | attrib_tile_tile = ET.Element("Tile")
71 | ET.SubElement(attrib_tile_tile, "id").text = "0"
72 | ET.SubElement(attrib_tile_tile, "name").text = "0"
73 | ET.SubElement(attrib_tile_tile, "location").text = "0.0 0.0 0.0"
74 | for i in range(0, num_tiles):
75 | att = deepcopy(attrib_tile_tile)
76 | att.find("id").text = str(i)
77 | att.find("name").text = str(i + 1)
78 | attrib_tile.append(att)
79 |
80 | attrib_angle = ET.SubElement(view_setups, "Attributes", {"name": "angle"})
81 | attrib_angle_angle = ET.SubElement(attrib_angle, "Angle")
82 | ET.SubElement(attrib_angle_angle, "id").text = "0"
83 | ET.SubElement(attrib_angle_angle, "name").text = "0"
84 | #
85 |
86 | timepoints = ET.SubElement(sequence_description, "Timepoints", {"type": "pattern"})
87 | ET.SubElement(timepoints, "integerpattern")
88 | #
89 | #
90 | view_registrations = ET.SubElement(root, "ViewRegistrations")
91 |
92 | view_registration_template = ET.Element("ViewRegistration", {"timepoint": "0", "setup": "0"})
93 | view_transform_translation = ET.SubElement(
94 | view_registration_template, "ViewTransform", {"type": "affine"}
95 | )
96 | ET.SubElement(view_transform_translation, "Name").text = "Translation to Regular Grid"
97 | ET.SubElement(view_transform_translation, "affine").text = (
98 | "1.0 0.0 0.0 -2867.2 0.0 1.0 0.0 -1024.0 0.0 0.0 1.0 0.0"
99 | )
100 | view_transform_calibration = ET.SubElement(
101 | view_registration_template, "ViewTransform", {"type": "affine"}
102 | )
103 | ET.SubElement(view_transform_calibration, "Name").text = "calibration"
104 | ET.SubElement(view_transform_calibration, "affine").text = (
105 | "1.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0"
106 | )
107 |
108 | for i in range(0, num_tiles):
109 | vr = deepcopy(view_registration_template)
110 | vr.set("timepoint", "0")
111 | vr.set("setup", str(i))
112 | vr.find("ViewTransform").find("affine").text = convert_location(*tile_locations[i])
113 | view_registrations.append(vr)
114 |
115 | #
116 | view_interest_points = ET.SubElement(root, "ViewInterestPoints")
117 | bounding_boxes = ET.SubElement(root, "BoundingBoxes")
118 | point_spread_functions = ET.SubElement(root, "PointSpreadFunctions")
119 | stitching_results = ET.SubElement(root, "StitchingResults")
120 | IntensityAdjustments = ET.SubElement(root, "IntensityAdjustments")
121 |
122 | declaration = ''
123 | xml_str = ET.tostring(root, encoding="utf-8").decode()
124 | xml_str = declaration + xml_str
125 |
126 | return xml_str
127 |
128 |
129 | def grid_to_snake(arr):
130 | nrows = arr.shape[0]
131 | new_arr = arr.copy()
132 | for i in range(0, nrows):
133 | if i % 2 != 0:
134 | new_arr[i, :] = new_arr[i, :][::-1]
135 | return new_arr
136 |
137 |
138 | def generate_dataset_xml(
139 | x_ntiles: int,
140 | y_ntiles: int,
141 | tile_shape: Tuple[int, int],
142 | x_overlap: int,
143 | y_overlap: int,
144 | pattern_str: str,
145 | out_path: Path,
146 | is_snake=True,
147 | ):
148 | num_tiles = x_ntiles * y_ntiles
149 |
150 | loc_array = np.arange(0, y_ntiles * x_ntiles).reshape(y_ntiles, x_ntiles)
151 | img_sizes_x = np.zeros_like(loc_array)
152 | img_sizes_y = np.zeros_like(loc_array)
153 |
154 | for y in range(0, y_ntiles):
155 | y_size = tile_shape[0] - y_overlap
156 | for x in range(0, x_ntiles):
157 | x_size = tile_shape[1] - x_overlap
158 |
159 | img_sizes_x[y, x] = x_size
160 | img_sizes_y[y, x] = y_size
161 |
162 | img_positions_x = np.concatenate((np.zeros((y_ntiles, 1)), img_sizes_x[:, 1:]), axis=1)
163 | img_positions_y = np.concatenate((np.zeros((1, x_ntiles)), img_sizes_y[1:, :]), axis=0)
164 |
165 | img_positions_x = np.cumsum(img_positions_x, axis=1)
166 | img_positions_y = np.cumsum(img_positions_y, axis=0)
167 |
168 | if is_snake:
169 | img_positions_x = grid_to_snake(img_positions_x)
170 | img_positions_y = grid_to_snake(img_positions_y)
171 |
172 | tile_locations = list(zip(list(np.ravel(img_positions_x)), list(np.ravel(img_positions_y))))
173 |
174 | bs_xml = create_meta(pattern_str, num_tiles, tile_shape, tile_locations)
175 |
176 | dom = xml.dom.minidom.parseString(bs_xml)
177 | pretty_xml_as_string = dom.toprettyxml()
178 |
179 | with open(out_path, "w") as s:
180 | s.write(pretty_xml_as_string)
181 |
--------------------------------------------------------------------------------
/bin/codex_stitching/directory_management.py:
--------------------------------------------------------------------------------
1 | import os
2 | import shutil
3 | from math import ceil
4 | from pathlib import Path
5 | from typing import List
6 |
7 |
8 | def make_dir_if_not_exists(dir_path: Path):
9 | if not dir_path.exists():
10 | dir_path.mkdir(parents=True)
11 |
12 |
13 | def get_img_dirs(dataset_dir: Path) -> List[Path]:
14 | dataset_dir = dataset_dir.absolute()
15 | img_dir_names = next(os.walk(dataset_dir))[1]
16 | img_dir_paths = [dataset_dir.joinpath(dir_name).absolute() for dir_name in img_dir_names]
17 | return img_dir_paths
18 |
19 |
20 | def create_dirs_for_stitched_channels(channel_dirs: dict, out_dir: Path):
21 | stitched_channel_dirs = dict()
22 | for cycle in channel_dirs:
23 | stitched_channel_dirs[cycle] = {}
24 | for region in channel_dirs[cycle]:
25 | stitched_channel_dirs[cycle][region] = {}
26 | for channel, dir_path in channel_dirs[cycle][region].items():
27 | dirname = Path(dir_path).name
28 | stitched_dir_path = out_dir.joinpath(dirname)
29 | stitched_channel_dirs[cycle][region][channel] = stitched_dir_path
30 | make_dir_if_not_exists(stitched_dir_path)
31 |
32 | return stitched_channel_dirs
33 |
34 |
35 | def get_ref_channel_dir_per_region(
36 | channel_dirs: dict,
37 | stitched_channel_dirs: dict,
38 | num_channels_per_cycle: int,
39 | reference_channel_id: int,
40 | ):
41 | ref_cycle_id = ceil(reference_channel_id / num_channels_per_cycle) - 1
42 | ref_cycle = sorted(channel_dirs.keys())[ref_cycle_id]
43 | in_cycle_ref_channel_id = reference_channel_id - ref_cycle_id * num_channels_per_cycle
44 |
45 | reference_channel_dir = dict()
46 | for region in channel_dirs[ref_cycle]:
47 | this_channel_dir = channel_dirs[ref_cycle][region][in_cycle_ref_channel_id]
48 | reference_channel_dir[region] = this_channel_dir
49 |
50 | stitched_ref_channel_dir = dict()
51 | for region in stitched_channel_dirs[ref_cycle]:
52 | this_channel_dir = stitched_channel_dirs[ref_cycle][region][in_cycle_ref_channel_id]
53 | stitched_ref_channel_dir[region] = this_channel_dir
54 |
55 | return reference_channel_dir, stitched_ref_channel_dir
56 |
57 |
58 | def create_output_dirs_for_tiles(
59 | stitched_channel_dirs: dict, out_dir: Path, dir_naming_template: str
60 | ):
61 | new_tiles_dirs = dict()
62 | for cycle in stitched_channel_dirs:
63 | new_tiles_dirs[cycle] = {}
64 | for region in stitched_channel_dirs[cycle]:
65 | new_tiles_dir_name = dir_naming_template.format(cycle=cycle, region=region)
66 | new_tiles_dir_path = out_dir.joinpath(new_tiles_dir_name)
67 | make_dir_if_not_exists(new_tiles_dir_path)
68 | new_tiles_dirs[cycle][region] = new_tiles_dir_path
69 |
70 | return new_tiles_dirs
71 |
72 |
73 | def remove_temp_dirs(stitched_channel_dirs: dict):
74 | for cycle in stitched_channel_dirs:
75 | for region in stitched_channel_dirs[cycle]:
76 | for channel, dir_path in stitched_channel_dirs[cycle][region].items():
77 | shutil.rmtree(str(dir_path))
78 |
79 |
80 | def check_if_images_in_dir(dir_path: Path):
81 | allowed_extensions = (".tif", ".tiff")
82 | listing = list(dir_path.iterdir())
83 | img_listing = [f for f in listing if f.suffix in allowed_extensions]
84 | if img_listing:
85 | return True
86 | else:
87 | return False
88 |
89 |
90 | def check_stitched_dirs(stitched_channel_dirs: dict):
91 | print("\nChecking if BigStitcher produced image:")
92 | checked_str = []
93 | checked_bool = []
94 | for cycle in stitched_channel_dirs:
95 | for region in stitched_channel_dirs[cycle]:
96 | for channel, dir_path in stitched_channel_dirs[cycle][region].items():
97 | if check_if_images_in_dir(dir_path):
98 | checked_str.append(str(dir_path) + " passed")
99 | checked_bool.append(True)
100 | else:
101 | checked_str.append(str(dir_path) + " no image in dir")
102 | checked_bool.append(False)
103 |
104 | print("\n".join(checked_str))
105 |
106 | if sum(checked_bool) < len(checked_bool):
107 | raise ValueError(
108 | "Probably there was an error while running BigStitcher. "
109 | + "There is no image in one or more directories."
110 | )
111 |
--------------------------------------------------------------------------------
/bin/codex_stitching/generate_bigstitcher_macro.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 |
3 | # from datetime import datetime
4 | from bigstitcher_dataset_meta import generate_dataset_xml
5 |
6 |
7 | class BigStitcherMacro:
8 | def __init__(self):
9 | self.img_dir = Path(".")
10 | self.out_dir = Path(".")
11 | self.xml_file_name = "dataset.xml"
12 | self.pattern = "{xxxxx}.tif"
13 |
14 | # range: 1-5 or list: 1,2,3,4,5
15 | self.num_tiles = 1
16 |
17 | self.num_tiles_x = 1
18 | self.num_tiles_y = 1
19 |
20 | self.tile_shape = (1440, 1920)
21 |
22 | # overlap in pixels
23 | self.overlap_x = 10
24 | self.overlap_y = 10
25 | self.overlap_z = 1
26 |
27 | # distance in um
28 | self.pixel_distance_x = 1
29 | self.pixel_distance_y = 1
30 | self.pixel_distance_z = 1
31 |
32 | self.tiling_mode = "snake"
33 | self.is_snake = True
34 | self.region = 1
35 |
36 | self.path_to_xml_file = Path(".")
37 |
38 | self.__location = Path(__file__).parent.resolve()
39 |
40 | def generate(self):
41 | self.make_dir_if_not_exists(self.out_dir)
42 | self.create_path_to_xml_file()
43 | self.check_if_tiling_mode_is_snake()
44 |
45 | formatted_macro = self.replace_values_in_macro()
46 | print("fiji macro script for estimation of stitching parameters")
47 | print(formatted_macro)
48 | macro_file_path = self.write_to_temp_macro_file(formatted_macro)
49 |
50 | generate_dataset_xml(
51 | self.num_tiles_x,
52 | self.num_tiles_y,
53 | self.tile_shape,
54 | self.overlap_x,
55 | self.overlap_y,
56 | self.pattern,
57 | self.path_to_xml_file,
58 | self.is_snake,
59 | )
60 |
61 | return macro_file_path
62 |
63 | def make_dir_if_not_exists(self, dir_path: Path):
64 | if not dir_path.exists():
65 | dir_path.mkdir(parents=True)
66 |
67 | def create_path_to_xml_file(self):
68 | self.path_to_xml_file = self.img_dir.joinpath(self.xml_file_name)
69 |
70 | def check_if_tiling_mode_is_snake(self):
71 | if self.tiling_mode == "snake":
72 | self.is_snake = True
73 | else:
74 | self.is_snake = False
75 |
76 | def convert_tiling_mode(self, tiling_mode):
77 | if tiling_mode == "snake":
78 | bigstitcher_tiling_mode = "[Snake: Right & Down ]"
79 | elif tiling_mode == "grid":
80 | bigstitcher_tiling_mode = "[Grid: Right & Down ]"
81 | return bigstitcher_tiling_mode
82 |
83 | def replace_values_in_macro(self):
84 | macro_template = self.estimate_stitch_param_macro_template
85 | formatted_macro = macro_template.format(
86 | img_dir=self.path_to_str(self.img_dir),
87 | out_dir=self.path_to_str(self.out_dir),
88 | path_to_xml_file=self.path_to_str(self.path_to_xml_file),
89 | pattern=self.path_to_str(self.img_dir.joinpath(self.pattern)),
90 | num_tiles=self.make_range(self.num_tiles),
91 | num_tiles_x=self.num_tiles_x,
92 | num_tiles_y=self.num_tiles_y,
93 | overlap_x=self.overlap_x,
94 | overlap_y=self.overlap_y,
95 | overlap_z=self.overlap_z,
96 | pixel_distance_x=self.pixel_distance_x,
97 | pixel_distance_y=self.pixel_distance_y,
98 | pixel_distance_z=self.pixel_distance_z,
99 | tiling_mode=self.convert_tiling_mode(self.tiling_mode),
100 | )
101 | return formatted_macro
102 |
103 | def write_to_temp_macro_file(self, formatted_macro):
104 | file_name = "reg" + str(self.region) + "_stitch_macro.ijm"
105 | macro_file_path = self.img_dir.joinpath(file_name)
106 | with open(macro_file_path, "w") as f:
107 | f.write(formatted_macro)
108 | return macro_file_path
109 |
110 | def make_range(self, number):
111 | return ",".join([str(n) for n in range(1, number + 1)])
112 |
113 | def path_to_str(self, path: Path):
114 | return str(path.absolute().as_posix())
115 |
116 | estimate_stitch_param_macro_template = """
117 | // calculate pairwise shifts
118 | run("Calculate pairwise shifts ...",
119 | "select={path_to_xml_file}" +
120 | " process_angle=[All angles]" +
121 | " process_channel=[All channels]" +
122 | " process_illumination=[All illuminations]" +
123 | " process_tile=[All tiles]" +
124 | " process_timepoint=[All Timepoints]" +
125 | " method=[Phase Correlation]" +
126 | " show_expert_algorithm_parameters" +
127 | " downsample_in_x=1" +
128 | " downsample_in_y=1" +
129 | " number=5" +
130 | " minimal=10" +
131 | " subpixel");
132 |
133 | // filter shifts with 0.7 corr. threshold
134 | run("Filter pairwise shifts ...",
135 | "select={path_to_xml_file}" +
136 | " filter_by_link_quality" +
137 | " min_r=0.7" +
138 | " max_r=1" +
139 | " max_shift_in_x=0" +
140 | " max_shift_in_y=0" +
141 | " max_shift_in_z=0" +
142 | " max_displacement=0");
143 |
144 | // do global optimization
145 | run("Optimize globally and apply shifts ...",
146 | "select={path_to_xml_file}" +
147 | " process_angle=[All angles]" +
148 | " process_channel=[All channels]" +
149 | " process_illumination=[All illuminations]" +
150 | " process_tile=[All tiles]" +
151 | " process_timepoint=[All Timepoints]" +
152 | " relative=2.500" +
153 | " absolute=3.500" +
154 | " global_optimization_strategy=[Two-Round using Metadata to align unconnected Tiles]" +
155 | " fix_group_0-0,");
156 |
157 | run("Quit");
158 | eval("script", "System.exit(0);");
159 |
160 | """
161 |
162 |
163 | class FuseMacro:
164 | def __init__(self):
165 | self.img_dir = Path(".")
166 | self.xml_file_name = "dataset.xml"
167 | self.out_dir = Path(".")
168 | self.__location = Path(__file__).parent.absolute()
169 |
170 | def generate(self):
171 | formatted_macro = self.replace_values_in_macro()
172 | macro_file_path = self.write_to_macro_file_in_channel_dir(self.img_dir, formatted_macro)
173 |
174 | def replace_values_in_macro(self):
175 | macro_template = self.fuse_macro_template
176 | formatted_macro = macro_template.format(
177 | img_dir=self.path_to_str(self.img_dir),
178 | path_to_xml_file=self.path_to_str(self.img_dir.joinpath(self.xml_file_name)),
179 | out_dir=self.path_to_str(self.out_dir),
180 | )
181 | return formatted_macro
182 |
183 | def write_to_macro_file_in_channel_dir(self, img_dir: Path, formatted_macro: str):
184 | macro_file_path = img_dir.joinpath("fuse_macro.ijm")
185 | with open(macro_file_path, "w") as f:
186 | f.write(formatted_macro)
187 | return macro_file_path
188 |
189 | def path_to_str(self, path: Path):
190 | return str(path.absolute().as_posix())
191 |
192 | fuse_macro_template = """
193 | // fuse dataset, save as TIFF
194 | run("Fuse dataset ...",
195 | "select={path_to_xml_file}" +
196 | " process_angle=[All angles]" +
197 | " process_channel=[All channels]" +
198 | " process_illumination=[All illuminations]" +
199 | " process_tile=[All tiles]" +
200 | " process_timepoint=[All Timepoints]" +
201 | " bounding_box=[All Views]" +
202 | " downsampling=1" +
203 | " pixel_type=[16-bit unsigned integer]" +
204 | " interpolation=[Linear Interpolation]" +
205 | " image=[Precompute Image]" +
206 | " interest_points_for_non_rigid=[-= Disable Non-Rigid =-]" +
207 | " blend produce=[Each timepoint & channel]" +
208 | " fused_image=[Save as (compressed) TIFF stacks]" +
209 | " output_file_directory={out_dir}");
210 |
211 | run("Quit");
212 | eval("script", "System.exit(0);");
213 |
214 | """
215 |
--------------------------------------------------------------------------------
/bin/codex_stitching/image_stitching.py:
--------------------------------------------------------------------------------
1 | import platform
2 | import shutil
3 | import subprocess
4 | from pathlib import Path
5 | from typing import List
6 |
7 | import dask
8 | import tifffile as tif
9 | from directory_management import (
10 | check_stitched_dirs,
11 | create_dirs_for_stitched_channels,
12 | get_ref_channel_dir_per_region,
13 | )
14 | from generate_bigstitcher_macro import BigStitcherMacro, FuseMacro
15 |
16 |
17 | def get_image_path_in_dir(dir_path: Path) -> Path:
18 | allowed_extensions = (".tif", ".tiff")
19 | listing = list(dir_path.iterdir())
20 | img_listing = [f for f in listing if f.suffix in allowed_extensions]
21 | return img_listing[0]
22 |
23 |
24 | def generate_bigstitcher_macro_for_reference_channel(
25 | reference_channel_dir: Path, out_dir: Path, dataset_info: dict, region: int
26 | ) -> Path:
27 | tile_shape = (
28 | dataset_info["tile_height"] + dataset_info["overlap_y"],
29 | dataset_info["tile_width"] + dataset_info["overlap_x"],
30 | )
31 |
32 | macro = BigStitcherMacro()
33 | macro.img_dir = reference_channel_dir
34 | macro.out_dir = out_dir
35 | macro.pattern = "{xxxxx}.tif"
36 | macro.num_tiles = dataset_info["num_tiles"]
37 | macro.num_tiles_x = dataset_info["num_tiles_x"]
38 | macro.num_tiles_y = dataset_info["num_tiles_y"]
39 | macro.tile_shape = tile_shape
40 | macro.overlap_x = dataset_info["overlap_x"]
41 | macro.overlap_y = dataset_info["overlap_y"]
42 | macro.overlap_z = dataset_info["overlap_z"]
43 | macro.pixel_distance_x = dataset_info["pixel_distance_x"]
44 | macro.pixel_distance_y = dataset_info["pixel_distance_y"]
45 | macro.pixel_distance_z = dataset_info["pixel_distance_z"]
46 | macro.tiling_mode = dataset_info["tiling_mode"]
47 | macro.region = region
48 | macro_path = macro.generate()
49 |
50 | return macro_path
51 |
52 |
53 | def run_bigstitcher(bigstitcher_macro_path: Path):
54 | # It is expected that ImageJ is added to system PATH
55 |
56 | if platform.system() == "Windows":
57 | imagej_name = "ImageJ-win64"
58 | elif platform.system() == "Linux":
59 | imagej_name = "ImageJ-linux64"
60 | elif platform.system() == "Darwin":
61 | imagej_name = "ImageJ-macosx"
62 | else:
63 | raise ValueError(f"unsupported platform: {platform.system()}")
64 |
65 | command = imagej_name + " --headless --console -macro " + str(bigstitcher_macro_path)
66 | print("Started running BigStitcher for", str(bigstitcher_macro_path))
67 | res = subprocess.run(
68 | command, shell=True, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
69 | )
70 | if res.returncode == 0:
71 | print("Finished", str(bigstitcher_macro_path))
72 | else:
73 | raise Exception(
74 | "There was an error while running the BigStitcher for "
75 | + str(bigstitcher_macro_path)
76 | + "\n"
77 | + res.stderr.decode("utf-8")
78 | )
79 |
80 |
81 | def run_bigstitcher_for_ref_channel_per_region(
82 | ref_channel_dir_per_region: dict,
83 | ref_channel_stitched_dir_per_region: dict,
84 | info_for_bigstitcher: dict,
85 | ):
86 | for region, dir_path in ref_channel_dir_per_region.items():
87 | ref_channel_dir = dir_path
88 | ref_channel_stitched_dir = ref_channel_stitched_dir_per_region[region]
89 | bigstitcher_macro_path = generate_bigstitcher_macro_for_reference_channel(
90 | ref_channel_dir, ref_channel_stitched_dir, info_for_bigstitcher, region
91 | )
92 | run_bigstitcher(bigstitcher_macro_path)
93 |
94 |
95 | def copy_dataset_xml_to_channel_dirs(ref_channel_dir: Path, other_channel_dirs: List[Path]):
96 | dataset_xml_path = ref_channel_dir.joinpath("dataset.xml")
97 | for dir_path in other_channel_dirs:
98 | dst_path = dir_path.joinpath("dataset.xml")
99 | try:
100 | shutil.copy(dataset_xml_path, dst_path)
101 | except shutil.SameFileError:
102 | continue
103 |
104 |
105 | def copy_fuse_macro_to_channel_dirs(channel_dirs: List[Path], channel_stitched_dirs: List[Path]):
106 | macro = FuseMacro()
107 | for i, dir_path in enumerate(channel_dirs):
108 | macro.img_dir = dir_path
109 | macro.xml_file_name = "dataset.xml"
110 | macro.out_dir = channel_stitched_dirs[i]
111 | macro.generate()
112 |
113 |
114 | def copy_bigsticher_files_to_dirs(
115 | channel_dirs: dict, stitched_channel_dirs: dict, ref_channel_dir_per_region: dict
116 | ):
117 | for cycle in channel_dirs:
118 | for region in channel_dirs[cycle]:
119 | this_region_ref_channel_dir = ref_channel_dir_per_region[region]
120 | channel_dir_list = list(channel_dirs[cycle][region].values())
121 | channel_stitched_dir_list = list(stitched_channel_dirs[cycle][region].values())
122 |
123 | copy_dataset_xml_to_channel_dirs(this_region_ref_channel_dir, channel_dir_list)
124 | copy_fuse_macro_to_channel_dirs(channel_dir_list, channel_stitched_dir_list)
125 |
126 |
127 | def run_stitching_for_all_channels(channel_dirs: dict):
128 | task = []
129 | for cycle in channel_dirs:
130 | for region in channel_dirs[cycle]:
131 | for channel, dir_path in channel_dirs[cycle][region].items():
132 | macro_path = dir_path.joinpath("fuse_macro.ijm")
133 | task.append(dask.delayed(run_bigstitcher)(macro_path))
134 |
135 | dask.compute(*task, scheduler="processes")
136 |
137 |
138 | def get_stitched_image_shape(ref_channel_stitched_dir_per_region):
139 | for region, dir_path in ref_channel_stitched_dir_per_region.items():
140 | stitched_image_path = get_image_path_in_dir(dir_path)
141 | break
142 | with tif.TiffFile(stitched_image_path) as TF:
143 | stitched_image_shape = TF.series[0].shape
144 |
145 | return stitched_image_shape
146 |
147 |
148 | def stitch_images(channel_dirs, dataset_meta, out_dir):
149 | ref_channel_id = int(dataset_meta["reference_channel"])
150 | num_channels_per_cycle = dataset_meta["num_channels"]
151 |
152 | stitched_channel_dirs = create_dirs_for_stitched_channels(channel_dirs, out_dir)
153 |
154 | ref_ch_dirs = get_ref_channel_dir_per_region(
155 | channel_dirs, stitched_channel_dirs, num_channels_per_cycle, ref_channel_id
156 | )
157 | ref_channel_dir_per_region, ref_channel_stitched_dir_per_region = ref_ch_dirs
158 |
159 | print("\nEstimating stitching parameters")
160 | run_bigstitcher_for_ref_channel_per_region(
161 | ref_channel_dir_per_region, ref_channel_stitched_dir_per_region, dataset_meta
162 | )
163 |
164 | print("\nStitching channels")
165 | copy_bigsticher_files_to_dirs(channel_dirs, stitched_channel_dirs, ref_channel_dir_per_region)
166 | run_stitching_for_all_channels(channel_dirs)
167 | check_stitched_dirs(stitched_channel_dirs)
168 | stitched_img_shape = get_stitched_image_shape(ref_channel_stitched_dir_per_region)
169 |
170 | return stitched_channel_dirs, stitched_img_shape
171 |
--------------------------------------------------------------------------------
/bin/codex_stitching/run_stitching.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import json
3 | import shutil
4 | import sys
5 | from datetime import datetime
6 | from pathlib import Path
7 | from typing import Dict, List
8 |
9 | import dask
10 |
11 | sys.path.append("/opt/")
12 | from directory_management import (
13 | create_output_dirs_for_tiles,
14 | get_img_dirs,
15 | make_dir_if_not_exists,
16 | remove_temp_dirs,
17 | )
18 | from image_stitching import stitch_images
19 |
20 | from pipeline_utils.dataset_listing import (
21 | create_listing_for_each_cycle_region,
22 | get_img_dirs,
23 | )
24 | from pipeline_utils.pipeline_config_reader import load_dataset_info
25 |
26 |
27 | def print_img_dirs(img_dirs: List[Path]):
28 | print("Image directories:")
29 | for dir_path in img_dirs:
30 | print(str(dir_path))
31 |
32 |
33 | def load_pipeline_config(pipeline_config_path: Path) -> dict:
34 | with open(pipeline_config_path, "r") as s:
35 | submission = json.load(s)
36 |
37 | return submission
38 |
39 |
40 | def get_file_listing(data_dir: Path):
41 | img_dirs = get_img_dirs(data_dir)
42 | listing = create_listing_for_each_cycle_region(img_dirs)
43 | return listing
44 |
45 |
46 | def copy_to_channel_dirs(listing, base_channel_dir: Path) -> Dict[int, Dict[int, Dict[int, Path]]]:
47 | new_dir_name_template = "Cyc{cyc:03d}_Reg{reg:03d}_Ch{ch:03d}"
48 | dst_name_template = "{tile:05d}.tif"
49 | channel_dirs = dict()
50 | for cycle in listing:
51 | channel_dirs[cycle] = dict()
52 | for region in listing[cycle]:
53 | channel_dirs[cycle][region] = dict()
54 | for channel in listing[cycle][region]:
55 | dir_name = new_dir_name_template.format(cyc=cycle, reg=region, ch=channel)
56 | dir_path = base_channel_dir / dir_name
57 | make_dir_if_not_exists(dir_path)
58 | channel_dirs[cycle][region][channel] = dir_path
59 | for tile in listing[cycle][region][channel]:
60 | for zplane, src in listing[cycle][region][channel][tile].items():
61 | dst_name = dst_name_template.format(tile=tile)
62 | dst = dir_path / dst_name
63 | shutil.copy(src, dst)
64 | return channel_dirs
65 |
66 |
67 | def main(data_dir: Path, pipeline_config_path: Path):
68 | start = datetime.now()
69 | print("\nStarted", start)
70 |
71 | dataset_info = load_dataset_info(pipeline_config_path)
72 |
73 | out_dir = Path("/output/stitched_images")
74 | base_channel_dir = Path("/output/channel_dirs")
75 |
76 | make_dir_if_not_exists(out_dir)
77 | make_dir_if_not_exists(base_channel_dir)
78 |
79 | num_workers = dataset_info["num_concurrent_tasks"]
80 | dask.config.set({"num_workers": num_workers, "scheduler": "processes"})
81 |
82 | listing = get_file_listing(data_dir)
83 | channel_dirs = copy_to_channel_dirs(listing, base_channel_dir)
84 | stitched_channel_dirs, stitched_img_shape = stitch_images(channel_dirs, dataset_info, out_dir)
85 |
86 | print("\nTime elapsed", datetime.now() - start)
87 |
88 |
89 | if __name__ == "__main__":
90 | parser = argparse.ArgumentParser()
91 | parser.add_argument("--data_dir", type=Path, help="path to directory with image directories")
92 | parser.add_argument(
93 | "--pipeline_config_path", type=Path, help="path to pipelineConfig.json file"
94 | )
95 |
96 | args = parser.parse_args()
97 |
98 | main(args.data_dir, args.pipeline_config_path)
99 |
--------------------------------------------------------------------------------
/bin/codex_stitching/secondary_stitcher/mask_stitching.py:
--------------------------------------------------------------------------------
1 | import gc
2 | from copy import deepcopy
3 | from typing import Dict, List, Tuple
4 |
5 | import dask
6 | import numpy as np
7 | import pandas as pd
8 | from match_masks import get_matched_masks
9 | from skimage.measure import regionprops_table
10 |
11 | Image = np.ndarray
12 |
13 |
14 | def generate_ome_meta_for_mask(size_y: int, size_x: int, dtype, match_fraction: float) -> str:
15 | template = """
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 | FractionOfMatchedCellsAndNuclei
34 | {match_fraction}
35 |
36 |
37 |
38 |
39 |
40 | """
41 | ome_meta = template.format(
42 | size_y=size_y, size_x=size_x, dtype=np.dtype(dtype).name, match_fraction=match_fraction
43 | )
44 | return ome_meta
45 |
46 |
47 | def get_labels_sorted_by_coordinates(img) -> List[int]:
48 | props = regionprops_table(img, properties=("label", "centroid"))
49 | coord_arr = np.array((props["label"], props["centroid-0"], props["centroid-1"]))
50 | coord_df = pd.DataFrame(coord_arr)
51 | # sort first by y, then by x coord
52 | sorted_coord_arr = coord_df.sort_values(by=[1, 2], axis=1).to_numpy()
53 | labels_sorted_by_coord = sorted_coord_arr[0, :].tolist()
54 | return labels_sorted_by_coord
55 |
56 |
57 | def get_new_labels(img: Image) -> np.ndarray:
58 | dtype = img.dtype
59 | unique_label_ids, indices = np.unique(img, return_inverse=True)
60 |
61 | old_label_ids = unique_label_ids.tolist()
62 | old_label_ids_sorted_by_coord = get_labels_sorted_by_coordinates(img)
63 |
64 | new_label_ids = list(range(0, len(old_label_ids)))
65 | label_pairs = zip(old_label_ids_sorted_by_coord, new_label_ids)
66 | label_map = {lab_pair[0]: lab_pair[1] for lab_pair in label_pairs}
67 |
68 | updated_label_ids = [0]
69 | for _id in old_label_ids[1:]:
70 | updated_label_ids.append(label_map[_id])
71 |
72 | new_unique_label_ids = np.array(updated_label_ids, dtype=dtype)
73 | return new_unique_label_ids
74 |
75 |
76 | def reset_label_ids(img, new_label_ids) -> Image:
77 | dtype = img.dtype
78 | unique_labels, indices = np.unique(img, return_inverse=True)
79 | reset_img = new_label_ids[indices].reshape(img.shape).astype(dtype)
80 | return reset_img
81 |
82 |
83 | def remove_labels(
84 | img: Image, y_slice: slice, x_slice: slice, exclude_start: bool
85 | ) -> Tuple[Image, List[int]]:
86 | exclude_from_val_to_remove = [0]
87 |
88 | val_to_remove = []
89 | if y_slice != slice(None):
90 | img_slice_y = (y_slice, slice(None))
91 | val_to_remove_y = np.unique(img[img_slice_y]).tolist()
92 | val_to_remove.extend(val_to_remove_y)
93 |
94 | if x_slice != slice(None):
95 | img_slice_x = (slice(None), x_slice)
96 | val_to_remove_x = np.unique(img[img_slice_x]).tolist()
97 | val_to_remove.extend(val_to_remove_x)
98 |
99 | val_to_remove = set(sorted(val_to_remove))
100 |
101 | if exclude_start:
102 | if y_slice.start is None and x_slice.start is None:
103 | raise ValueError("Exclude start is enabled but slice start is None")
104 | exclusions = []
105 | if y_slice.start is not None:
106 | line_slice_y = (slice(y_slice.start, y_slice.start + 1), x_slice)
107 | exclusions.extend(np.unique(img[line_slice_y]).tolist())
108 | if x_slice.start is not None:
109 | line_slice_x = (y_slice, slice(x_slice.start, x_slice.start + 1))
110 | exclusions.extend(np.unique(img[line_slice_x]).tolist())
111 |
112 | unique_exclusions = sorted(set(exclusions))
113 | exclude_from_val_to_remove.extend(unique_exclusions)
114 |
115 | exclude_from_val_to_remove = set(sorted(exclude_from_val_to_remove))
116 | val_to_remove = [val for val in val_to_remove if val not in exclude_from_val_to_remove]
117 |
118 | img_copy = img.copy()
119 | for val in val_to_remove:
120 | img_copy[img_copy == val] = 0
121 | return img_copy, val_to_remove
122 |
123 |
124 | def remove_overlapping_labels(img: Image, overlap: int, mode: str) -> Tuple[Image, List[int]]:
125 | left = (slice(None), slice(None, overlap))
126 | right = (slice(None), slice(-overlap, None))
127 | top = (slice(None, overlap), slice(None))
128 | bottom = (slice(-overlap, None), slice(None))
129 |
130 | mod_img = img.copy()
131 | excluded_labels = []
132 | if "left" in mode:
133 | mod_img, ex_lab = remove_labels(mod_img, *left, exclude_start=False)
134 | excluded_labels.extend(ex_lab)
135 | if "right" in mode:
136 | mod_img, ex_lab = remove_labels(mod_img, *right, exclude_start=True)
137 | excluded_labels.extend(ex_lab)
138 | if "top" in mode:
139 | mod_img, ex_lab = remove_labels(mod_img, *top, exclude_start=False)
140 | excluded_labels.extend(ex_lab)
141 | if "bottom" in mode:
142 | mod_img, ex_lab = remove_labels(mod_img, *bottom, exclude_start=True)
143 | excluded_labels.extend(ex_lab)
144 | excluded_labels = sorted(set(excluded_labels))
145 | return mod_img, excluded_labels
146 |
147 |
148 | def find_and_remove_overlapping_labels_in_first_channel(
149 | tiles: List[Image], y_ntiles: int, x_ntiles: int, overlap: int
150 | ) -> Tuple[List[Image], Dict[int, Dict[int, int]]]:
151 | excluded_labels = dict()
152 | modified_tiles = []
153 | task = []
154 | n = 0
155 | for i in range(0, y_ntiles):
156 | for j in range(0, x_ntiles):
157 | label_remove_mode = ""
158 | if i == 0:
159 | label_remove_mode += " bottom "
160 | elif i == y_ntiles - 1:
161 | label_remove_mode += " top "
162 | else:
163 | label_remove_mode += " top bottom "
164 | if j == 0:
165 | label_remove_mode += " right "
166 | elif j == x_ntiles - 1:
167 | label_remove_mode += " left "
168 | else:
169 | label_remove_mode += " left right "
170 |
171 | task.append(
172 | dask.delayed(remove_overlapping_labels)(tiles[n], overlap, label_remove_mode)
173 | )
174 | n += 1
175 | computed_modifications = dask.compute(*task)
176 | for i, mod in enumerate(computed_modifications):
177 | modified_tiles.append(mod[0])
178 | excluded_labels[i] = {lab: 0 for lab in mod[1]}
179 |
180 | return modified_tiles, excluded_labels
181 |
182 |
183 | def remove_overlapping_labels_in_another_channel(
184 | tiles: List[Image], excluded_labels: dict
185 | ) -> List[Image]:
186 | def exclude_labels(tile, labels):
187 | for lab in labels:
188 | tile[tile == lab] = 0
189 | return tile
190 |
191 | task = []
192 | for i in range(0, len(tiles)):
193 | task.append(dask.delayed(exclude_labels)(tiles[i], excluded_labels[i]))
194 | modified_tiles = dask.compute(*task)
195 | return list(modified_tiles)
196 |
197 |
198 | def find_overlapping_border_labels(
199 | img1: Image, img2: Image, overlap: int, mode: str
200 | ) -> Dict[int, int]:
201 | """Find which pixels in img2 overlap pixels in img1
202 | Return mapping
203 | { img2px: img1px, }
204 | """
205 | if mode == "horizontal":
206 | img1_ov = img1[:, -overlap:]
207 | img2_ov = img2[:, overlap : overlap * 2]
208 | elif mode == "vertical":
209 | img1_ov = img1[-overlap:, :]
210 | img2_ov = img2[overlap : overlap * 2, :]
211 | else: # horizontal+vertical
212 | img1_ov = img1[-overlap:, -overlap:]
213 | img2_ov = img2[overlap : overlap * 2, overlap : overlap * 2]
214 |
215 | nrows, ncols = img2_ov.shape
216 |
217 | border_map = dict()
218 |
219 | for i in range(0, nrows):
220 | for j in range(0, ncols):
221 | old_value = img2_ov[i, j]
222 | if old_value in border_map:
223 | continue
224 | else:
225 | new_value = img1_ov[i, j]
226 | if old_value > 0 and new_value > 0:
227 | border_map[old_value] = new_value
228 |
229 | return border_map
230 |
231 |
232 | def replace_overlapping_border_labels(
233 | img1: Image, img2: Image, overlap: int, mode: str
234 | ) -> Tuple[Image, Dict[int, int]]:
235 | """Replace label ids in img2 with label ids of img1"""
236 | border_map = find_overlapping_border_labels(img1, img2, overlap, mode)
237 | # to avoid merging of old and new labels
238 | # find old labels that have same ids as new ones
239 | # and add some value
240 | old_lab_ids = tuple(np.unique(img2).tolist())
241 | matches = []
242 | for new_lab_id in border_map.values():
243 | if new_lab_id in old_lab_ids:
244 | matches.append(new_lab_id)
245 | if matches != []:
246 | addition = img2.max() + max(matches)
247 | for value in matches:
248 | img2[img2 == value] += addition
249 |
250 | for old_value, new_value in border_map.items():
251 | img2[img2 == old_value] = new_value
252 | return img2, border_map
253 |
254 |
255 | def find_and_replace_overlapping_border_labels_in_first_channel(
256 | tiles: List[Image], y_ntiles: int, x_ntiles: int, overlap: int, dtype
257 | ) -> Tuple[List[Image], Dict[int, Dict[int, int]], List[int]]:
258 | previous_tile_max = 0
259 | tile_ids = np.arange(0, y_ntiles * x_ntiles).reshape((y_ntiles, x_ntiles))
260 | modified_tiles = []
261 | tile_additions = []
262 | border_maps = dict()
263 | n = 0
264 | for i in range(0, y_ntiles):
265 | for j in range(0, x_ntiles):
266 | tile = tiles[n]
267 | tile = tile.astype(dtype)
268 | this_tile_max = tile.max()
269 | tile_additions.append(previous_tile_max)
270 | tile[np.nonzero(tile)] += previous_tile_max
271 |
272 | if i != 0:
273 | top_tile_id = tile_ids[i - 1, j]
274 | else:
275 | top_tile_id = None
276 | if j != 0:
277 | left_tile_id = tile_ids[i, j - 1]
278 | else:
279 | left_tile_id = None
280 | if i != 0 and j != 0:
281 | top_left_tile_id = tile_ids[i - 1, j - 1]
282 | else:
283 | top_left_tile_id = None
284 |
285 | this_tile_border_map = dict()
286 | if top_tile_id is not None:
287 | tile, border_map = replace_overlapping_border_labels(
288 | modified_tiles[top_tile_id], tile, overlap, "vertical"
289 | )
290 | this_tile_border_map.update(border_map)
291 | if left_tile_id is not None:
292 | tile, border_map = replace_overlapping_border_labels(
293 | modified_tiles[left_tile_id], tile, overlap, "horizontal"
294 | )
295 | this_tile_border_map.update(border_map)
296 | if top_left_tile_id is not None:
297 | tile, border_map = replace_overlapping_border_labels(
298 | modified_tiles[top_left_tile_id], tile, overlap, "horizontal+vertical"
299 | )
300 | this_tile_border_map.update(border_map)
301 |
302 | modified_tiles.append(tile)
303 | border_maps[n] = this_tile_border_map
304 | previous_tile_max += this_tile_max
305 | n += 1
306 | return modified_tiles, border_maps, tile_additions
307 |
308 |
309 | def replace_overlapping_border_labels_in_another_channel(
310 | tiles: List[Image], border_maps: Dict[int, dict], tile_additions: List[int], dtype
311 | ) -> List[Image]:
312 | def replace_values(tile, value_map, tile_addition, dtype):
313 | modified_tile = tile.astype(dtype)
314 | modified_tile[np.nonzero(modified_tile)] += tile_addition
315 | if value_map != {}:
316 | old_lab_ids = tuple(np.unique(modified_tile).tolist())
317 | matches = []
318 | for new_lab_id in value_map.values():
319 | if new_lab_id in old_lab_ids:
320 | matches.append(new_lab_id)
321 | if matches != []:
322 | addition = modified_tile.max() + max(matches)
323 | for value in matches:
324 | modified_tile[modified_tile == value] += addition
325 | return modified_tile
326 |
327 | task = []
328 | for i, tile in enumerate(tiles):
329 | task.append(dask.delayed(replace_values)(tile, border_maps[i], tile_additions[i], dtype))
330 | modified_tiles = dask.compute(*task)
331 | return list(modified_tiles)
332 |
333 |
334 | def update_old_values(
335 | excluded_labels: dict, tile_additions: List[int]
336 | ) -> Dict[int, Dict[int, int]]:
337 | upd_excluded_labels = dict()
338 | for tile in excluded_labels:
339 | this_tile_excluded_labels = dict()
340 | for old_value, new_value in excluded_labels[tile].items():
341 | upd_old_value = old_value + tile_additions[tile]
342 | this_tile_excluded_labels[upd_old_value] = new_value
343 | upd_excluded_labels[tile] = this_tile_excluded_labels
344 | return upd_excluded_labels
345 |
346 |
347 | def modify_tiles_first_channel(
348 | tiles: List[Image], y_ntiles: int, x_ntiles: int, overlap: int, dtype
349 | ) -> Tuple[List[Image], Dict[int, Dict[int, int]], Dict[int, Dict[int, int]], List[int]]:
350 | mod_tiles, excluded_labels = find_and_remove_overlapping_labels_in_first_channel(
351 | tiles, y_ntiles, x_ntiles, overlap
352 | )
353 | (
354 | mod_tiles,
355 | border_maps,
356 | tile_additions,
357 | ) = find_and_replace_overlapping_border_labels_in_first_channel(
358 | mod_tiles, y_ntiles, x_ntiles, overlap, dtype
359 | )
360 |
361 | return mod_tiles, excluded_labels, border_maps, tile_additions
362 |
363 |
364 | def modify_tiles_another_channel(
365 | tiles: List[Image], excluded_labels: dict, border_maps: dict, tile_additions: list, dtype
366 | ) -> List[Image]:
367 | mod_tiles = remove_overlapping_labels_in_another_channel(tiles, excluded_labels)
368 | if border_maps != {}:
369 | mod_tiles = replace_overlapping_border_labels_in_another_channel(
370 | mod_tiles, border_maps, tile_additions, dtype
371 | )
372 |
373 | return mod_tiles
374 |
375 |
376 | def get_slices(
377 | tile_shape: tuple, overlap: int, y_tile_id: int, x_tile_id: int, y_id_max: int, x_id_max: int
378 | ) -> Tuple[Tuple[slice, slice], Tuple[slice, slice]]:
379 | if y_id_max - 1 == 0:
380 | tile_slice_y = slice(overlap, tile_shape[0] + overlap)
381 | y_f = 0
382 | y_t = tile_shape[0]
383 | elif y_tile_id == 0:
384 | tile_slice_y = slice(overlap, tile_shape[0] + overlap * 2)
385 | y_f = 0
386 | y_t = tile_shape[0] + overlap
387 | elif y_tile_id == y_id_max - 1:
388 | tile_slice_y = slice(overlap, tile_shape[0] + overlap)
389 | y_f = y_tile_id * tile_shape[0]
390 | y_t = y_f + tile_shape[0]
391 | else:
392 | tile_slice_y = slice(overlap, tile_shape[0] + overlap * 2)
393 | y_f = y_tile_id * tile_shape[0]
394 | y_t = y_f + tile_shape[0] + overlap
395 |
396 | if x_id_max - 1 == 0:
397 | tile_slice_x = slice(overlap, tile_shape[1] + overlap)
398 | x_f = 0
399 | x_t = tile_shape[1]
400 | elif x_tile_id == 0:
401 | tile_slice_x = slice(overlap, tile_shape[1] + overlap * 2)
402 | x_f = 0
403 | x_t = tile_shape[1] + overlap
404 | elif x_tile_id == x_id_max - 1:
405 | tile_slice_x = slice(overlap, tile_shape[1] + overlap)
406 | x_f = x_tile_id * tile_shape[1]
407 | x_t = x_f + tile_shape[1]
408 | else:
409 | tile_slice_x = slice(overlap, tile_shape[1] + overlap * 2)
410 | x_f = x_tile_id * tile_shape[1]
411 | x_t = x_f + tile_shape[1] + overlap
412 |
413 | tile_slice = (tile_slice_y, tile_slice_x)
414 | big_image_slice = (slice(y_f, y_t), slice(x_f, x_t))
415 |
416 | return tile_slice, big_image_slice
417 |
418 |
419 | def stitch_mask(
420 | tiles: List[Image],
421 | y_ntiles: int,
422 | x_ntiles: int,
423 | tile_shape: list,
424 | dtype,
425 | overlap: int,
426 | padding: dict,
427 | ) -> Image:
428 | y_axis = -2
429 | x_axis = -1
430 |
431 | tile_y_size = tile_shape[y_axis] - overlap * 2
432 | tile_x_size = tile_shape[x_axis] - overlap * 2
433 |
434 | big_image_y_size = y_ntiles * tile_y_size
435 | big_image_x_size = x_ntiles * tile_x_size
436 |
437 | y_pad = padding["top"] + padding["bottom"]
438 | x_pad = padding["left"] + padding["right"]
439 |
440 | big_image_shape = (big_image_y_size, big_image_x_size)
441 | big_image = np.zeros(big_image_shape, dtype=dtype)
442 |
443 | print("n tiles x,y:", (x_ntiles, y_ntiles))
444 | print("plane shape x,y:", big_image_x_size - x_pad, big_image_y_size - y_pad)
445 |
446 | n = 0
447 | for i in range(0, y_ntiles):
448 | for j in range(0, x_ntiles):
449 | tile_slice, big_image_slice = get_slices(
450 | (tile_y_size, tile_x_size), overlap, i, j, y_ntiles, x_ntiles
451 | )
452 |
453 | tile = tiles[n]
454 | tile = tile.astype(dtype)
455 |
456 | mask_nonzeros = tile[tile_slice] != 0
457 | big_image[big_image_slice][mask_nonzeros] = tile[tile_slice][mask_nonzeros]
458 | n += 1
459 |
460 | new_big_image_shape = (big_image_shape[0] - y_pad, big_image_shape[1] - x_pad)
461 | return big_image[: new_big_image_shape[0], : new_big_image_shape[1]]
462 |
463 |
464 | def process_all_masks(
465 | tiles, tile_shape, y_ntiles, x_ntiles, overlap, padding, dtype
466 | ) -> Tuple[List[Image], str]:
467 | print("Started processing masks")
468 | tiles_cell = [t[0, :, :] for t in tiles]
469 | tiles_nuc = [t[1, :, :] for t in tiles]
470 | tiles_cell_b = [t[2, :, :] for t in tiles]
471 | tiles_nuc_b = [t[3, :, :] for t in tiles]
472 | raw_tile_groups = [tiles_cell, tiles_nuc, tiles_cell_b, tiles_nuc_b]
473 | print("Identifying and trimming border labels in all tiles")
474 | (
475 | mod_tiles_nuc,
476 | excluded_labels_nuc,
477 | border_maps_nuc,
478 | tile_additions_nuc,
479 | ) = modify_tiles_first_channel(tiles_nuc, y_ntiles, x_ntiles, overlap, dtype)
480 |
481 | (
482 | mod_tiles_cell,
483 | excluded_labels_cell,
484 | border_maps_cell,
485 | tile_additions_cell,
486 | ) = modify_tiles_first_channel(tiles_cell, y_ntiles, x_ntiles, overlap, dtype)
487 |
488 | all_exclusions = deepcopy(excluded_labels_nuc)
489 | for tile in excluded_labels_cell:
490 | if tile in all_exclusions:
491 | for lab in excluded_labels_cell[tile]:
492 | all_exclusions[tile][lab] = excluded_labels_cell[tile][lab]
493 | else:
494 | all_exclusions[tile] = excluded_labels_cell[tile]
495 |
496 | all_border_maps = deepcopy(border_maps_nuc)
497 | for tile in border_maps_cell:
498 | if tile in all_border_maps:
499 | for lab in border_maps_cell[tile]:
500 | all_border_maps[tile][lab] = border_maps_cell[tile][lab]
501 | else:
502 | all_border_maps[tile] = border_maps_cell[tile]
503 |
504 | mod_tile_groups = []
505 | for tile_group in raw_tile_groups:
506 | mod_tile_group = modify_tiles_another_channel(
507 | tile_group, all_exclusions, all_border_maps, tile_additions_cell, dtype
508 | )
509 | mod_tile_groups.append(mod_tile_group)
510 |
511 | del raw_tile_groups
512 | gc.collect()
513 | print("Stitching masks")
514 | stitched_imgs = []
515 | for tile_group in mod_tile_groups:
516 | stitched_img = stitch_mask(
517 | tile_group, y_ntiles, x_ntiles, tile_shape, dtype, overlap, padding
518 | )
519 | stitched_imgs.append(stitched_img)
520 |
521 | del mod_tile_groups
522 | gc.collect()
523 |
524 | matched_masks, fraction_matched = get_matched_masks(
525 | cell_mask=stitched_imgs[0],
526 | nucleus_mask=stitched_imgs[1],
527 | dtype=dtype,
528 | do_mismatch_repair=True,
529 | )
530 | del stitched_imgs
531 | gc.collect()
532 |
533 | new_label_ids = get_new_labels(matched_masks[0]) # cell
534 | reset_imgs = []
535 | for i in range(0, len(matched_masks)):
536 | reset_img = reset_label_ids(matched_masks[i], new_label_ids)
537 | reset_imgs.append(reset_img)
538 |
539 | y_size = reset_imgs[0].shape[0]
540 | x_size = reset_imgs[0].shape[1]
541 | ome_meta = generate_ome_meta_for_mask(y_size, x_size, dtype, fraction_matched)
542 | print("Finished processing masks")
543 | return reset_imgs, ome_meta
544 |
--------------------------------------------------------------------------------
/bin/codex_stitching/secondary_stitcher/match_masks.py:
--------------------------------------------------------------------------------
1 | from typing import List, Tuple
2 |
3 | import numpy as np
4 | from scipy.sparse import csr_matrix
5 | from skimage.segmentation import find_boundaries
6 |
7 | Image = np.ndarray
8 |
9 | """
10 | Package functions that repair and generate matched cell, nuclear,
11 | cell membrane and nuclear membrane segmentation masks
12 | Author: Haoran Chen
13 | Version: 1.1
14 | 08/09/2021
15 | """
16 |
17 |
18 | def get_matched_cells(cell_arr, cell_membrane_arr, nuclear_arr, mismatch_repair):
19 | a = set((tuple(i) for i in cell_arr))
20 | b = set((tuple(i) for i in cell_membrane_arr))
21 | c = set((tuple(i) for i in nuclear_arr))
22 | d = a - b
23 | # remove cell membrane from cell
24 | mismatch_pixel_num = len(list(c - d))
25 | mismatch_fraction = len(list(c - d)) / len(list(c))
26 | if not mismatch_repair:
27 | if mismatch_pixel_num == 0:
28 | return np.array(list(a)), np.array(list(c)), 0
29 | else:
30 | return False, False, False
31 | else:
32 | if mismatch_pixel_num < len(c):
33 | return np.array(list(a)), np.array(list(d & c)), mismatch_fraction
34 | else:
35 | return False, False, False
36 |
37 |
38 | def compute_M(data):
39 | cols = np.arange(data.size)
40 | return csr_matrix((cols, (data.ravel(), cols)), shape=(data.max() + 1, data.size))
41 |
42 |
43 | def get_indices_sparse(data):
44 | M = compute_M(data)
45 | return [np.unravel_index(row.data, data.shape) for row in M]
46 |
47 |
48 | def list_remove(c_list, indexes):
49 | for index in sorted(indexes, reverse=True):
50 | del c_list[index]
51 | return c_list
52 |
53 |
54 | def get_indexed_mask(mask, boundary):
55 | boundary = boundary * 1
56 | boundary_loc = np.where(boundary == 1)
57 | boundary[boundary_loc] = mask[boundary_loc]
58 | return boundary
59 |
60 |
61 | def get_boundary(mask: Image):
62 | mask_boundary = find_boundaries(mask, mode="inner")
63 | mask_boundary_indexed = get_indexed_mask(mask, mask_boundary)
64 | return mask_boundary_indexed
65 |
66 |
67 | def get_mask(cell_list, shape: Tuple[int]):
68 | mask = np.zeros(shape)
69 | for cell_num in range(len(cell_list)):
70 | mask[tuple(cell_list[cell_num].T)] = cell_num + 1
71 | return mask
72 |
73 |
74 | def get_cell_num(mask: Image):
75 | return len(np.unique(mask))
76 |
77 |
78 | def get_mismatched_fraction(
79 | whole_cell_mask: Image,
80 | nuclear_mask: Image,
81 | cell_matched_mask: Image,
82 | nuclear_matched_mask: Image,
83 | ) -> float:
84 | whole_cell_mask_binary = np.sign(whole_cell_mask)
85 | nuclear_mask_binary = np.sign(nuclear_mask)
86 | cell_matched_mask_binary = np.sign(cell_matched_mask)
87 | nuclear_matched_mask_binary = np.sign(nuclear_matched_mask)
88 | total_area = np.sum(np.sign(whole_cell_mask_binary + nuclear_mask_binary))
89 | mismatched_area = np.sum(
90 | np.sign(
91 | (nuclear_mask_binary - nuclear_matched_mask_binary)
92 | + (whole_cell_mask_binary - cell_matched_mask_binary)
93 | )
94 | )
95 | mismatched_fraction = mismatched_area / total_area
96 | return mismatched_fraction
97 |
98 |
99 | def get_fraction_matched_cells(
100 | whole_cell_mask: Image, nuclear_mask: Image, cell_matched_mask: Image
101 | ) -> float:
102 | matched_cell_num = len(np.unique(cell_matched_mask)) - 1
103 | total_cell_num = len(np.unique(whole_cell_mask)) - 1
104 | total_nuclei_num = len(np.unique(nuclear_mask)) - 1
105 | mismatched_cell_num = total_cell_num - matched_cell_num
106 | mismatched_nuclei_num = total_nuclei_num - matched_cell_num
107 | fraction_matched_cells = matched_cell_num / (
108 | mismatched_cell_num + mismatched_nuclei_num + matched_cell_num
109 | )
110 | return fraction_matched_cells
111 |
112 |
113 | def get_matched_masks(
114 | cell_mask: Image, nucleus_mask: Image, dtype, do_mismatch_repair: bool
115 | ) -> Tuple[List[Image], float]:
116 | """
117 | returns masks with matched cells and nuclei
118 | """
119 | whole_cell_mask = cell_mask.copy()
120 | nuclear_mask = nucleus_mask.copy()
121 | cell_membrane_mask = get_boundary(whole_cell_mask)
122 |
123 | cell_coords = get_indices_sparse(whole_cell_mask)[1:]
124 | nucleus_coords = get_indices_sparse(nuclear_mask)[1:]
125 | cell_membrane_coords = get_indices_sparse(cell_membrane_mask)[1:]
126 |
127 | cell_coords = list(map(lambda x: np.array(x).T, cell_coords))
128 | nucleus_coords = list(map(lambda x: np.array(x).T, nucleus_coords))
129 | cell_membrane_coords = list(map(lambda x: np.array(x).T, cell_membrane_coords))
130 |
131 | cell_matched_index_list = []
132 | nucleus_matched_index_list = []
133 | cell_matched_list = []
134 | nucleus_matched_list = []
135 |
136 | for i in range(len(cell_coords)):
137 | if len(cell_coords[i]) != 0:
138 | current_cell_coords = cell_coords[i]
139 | nuclear_search_num = np.unique(
140 | list(map(lambda x: nuclear_mask[tuple(x)], current_cell_coords))
141 | )
142 | best_mismatch_fraction = 1
143 | whole_cell_best = []
144 | for j in nuclear_search_num:
145 | if j != 0:
146 | if (j - 1 not in nucleus_matched_index_list) and (
147 | i not in cell_matched_index_list
148 | ):
149 | whole_cell, nucleus, mismatch_fraction = get_matched_cells(
150 | cell_coords[i],
151 | cell_membrane_coords[i],
152 | nucleus_coords[j - 1],
153 | mismatch_repair=do_mismatch_repair,
154 | )
155 | if type(whole_cell) != bool:
156 | if mismatch_fraction < best_mismatch_fraction:
157 | best_mismatch_fraction = mismatch_fraction
158 | whole_cell_best = whole_cell
159 | nucleus_best = nucleus
160 | i_ind = i
161 | j_ind = j - 1
162 | if len(whole_cell_best) > 0:
163 | cell_matched_list.append(whole_cell_best)
164 | nucleus_matched_list.append(nucleus_best)
165 | cell_matched_index_list.append(i_ind)
166 | nucleus_matched_index_list.append(j_ind)
167 |
168 | del cell_coords
169 | del nucleus_coords
170 |
171 | cell_matched_mask = get_mask(cell_matched_list, whole_cell_mask.shape)
172 | nuclear_matched_mask = get_mask(nucleus_matched_list, whole_cell_mask.shape)
173 | cell_membrane_mask = get_boundary(cell_matched_mask)
174 | nuclear_membrane_mask = get_boundary(nuclear_matched_mask)
175 |
176 | if do_mismatch_repair:
177 | fraction_matched_cells = 1.0
178 | else:
179 | fraction_matched_cells = get_fraction_matched_cells(
180 | whole_cell_mask, nuclear_mask, cell_matched_mask
181 | )
182 |
183 | out_list = [
184 | cell_matched_mask.astype(dtype),
185 | nuclear_matched_mask.astype(dtype),
186 | cell_membrane_mask.astype(dtype),
187 | nuclear_membrane_mask.astype(dtype),
188 | ]
189 | return out_list, fraction_matched_cells
190 |
--------------------------------------------------------------------------------
/bin/codex_stitching/secondary_stitcher/secondary_stitcher.py:
--------------------------------------------------------------------------------
1 | import re
2 | import xml.etree.ElementTree as ET
3 | from pathlib import Path
4 | from typing import Dict, List, Union
5 |
6 | import numpy as np
7 | import pandas as pd
8 | import tifffile as tif
9 | from mask_stitching import process_all_masks
10 | from skimage.measure import regionprops_table
11 |
12 | Image = np.ndarray
13 |
14 |
15 | def add_structured_annotations(omexml_str: str, nucleus_channel: str, cell_channel: str) -> str:
16 | """
17 | Will add this, to the root, after Image node
18 |
19 |
20 |
21 |
22 | SegmentationChannels
23 |
24 | DAPI-02
25 | CD45 |
26 |
27 |
28 |
29 |
30 |
31 | """
32 |
33 | # Remove some prefixes
34 | nucleus_channel = re.sub(r"cyc(\d+)_ch(\d+)_orig(.*)", r"\3", nucleus_channel)
35 | cell_channel = re.sub(r"cyc(\d+)_ch(\d+)_orig(.*)", r"\3", cell_channel)
36 |
37 | structured_annotation = ET.Element("StructuredAnnotations")
38 | annotation = ET.SubElement(structured_annotation, "XMLAnnotation", {"ID": "Annotation:0"})
39 | annotation_value = ET.SubElement(annotation, "Value")
40 | original_metadata = ET.SubElement(annotation_value, "OriginalMetadata")
41 | segmentation_channels_key = ET.SubElement(original_metadata, "Key").text = (
42 | "SegmentationChannels"
43 | )
44 | segmentation_channels_value = ET.SubElement(original_metadata, "Value")
45 | ET.SubElement(segmentation_channels_value, "Nucleus").text = nucleus_channel
46 | ET.SubElement(segmentation_channels_value, "Cell").text = cell_channel
47 | sa_str = ET.tostring(structured_annotation, encoding="utf-8").decode("utf-8")
48 |
49 | if "StructuredAnnotations" in omexml_str:
50 | sa_placement = omexml_str.find("") + len("")
51 | sa_str = re.sub(r"?StructuredAnnotations>", "", sa_str)
52 | else:
53 | sa_placement = omexml_str.find("") + len("")
54 |
55 | omexml_str_with_sa = omexml_str[:sa_placement] + sa_str + omexml_str[sa_placement:]
56 | return omexml_str_with_sa
57 |
58 |
59 | def alpha_num_order(string: str) -> str:
60 | """Returns all numbers on 5 digits to let sort the string with numeric order.
61 | Ex: alphaNumOrder("a6b12.125") ==> "a00006b00012.00125"
62 | """
63 | return "".join(
64 | [format(int(x), "05d") if x.isdigit() else x for x in re.split(r"(\d+)", string)]
65 | )
66 |
67 |
68 | def get_img_listing(in_dir: Path) -> List[Path]:
69 | allowed_extensions = (".tif", ".tiff")
70 | listing = list(in_dir.iterdir())
71 | img_listing = [f for f in listing if f.suffix in allowed_extensions]
72 | img_listing = sorted(img_listing, key=lambda x: alpha_num_order(x.name))
73 | return img_listing
74 |
75 |
76 | def path_to_str(path: Path):
77 | return str(path.absolute().as_posix())
78 |
79 |
80 | def path_to_dict(path: Path):
81 | """
82 | Extract region, x position, y position and put into the dictionary
83 | {R:region, X: position, Y: position, path: path}
84 | """
85 | value_list = re.split(r"(\d+)(?:_?)", path.name)[:-1]
86 | d = dict(zip(*[iter(value_list)] * 2))
87 | d = {k: int(v) for k, v in d.items()}
88 | d.update({"path": path})
89 | return d
90 |
91 |
92 | def get_slices(
93 | arr: np.ndarray, hor_f: int, hor_t: int, ver_f: int, ver_t: int, padding: dict, overlap=0
94 | ):
95 | left_check = hor_f - padding["left"]
96 | top_check = ver_f - padding["top"]
97 | right_check = hor_t - arr.shape[-1]
98 | bot_check = ver_t - arr.shape[-2]
99 |
100 | left_pad_size = 0
101 | top_pad_size = 0
102 | right_pad_size = 0
103 | bot_pad_size = 0
104 |
105 | if left_check < 0:
106 | left_pad_size = abs(left_check)
107 | hor_f = 0
108 | if top_check < 0:
109 | top_pad_size = abs(top_check)
110 | ver_f = 0
111 | if right_check > 0:
112 | right_pad_size = right_check
113 | hor_t = arr.shape[1]
114 | if bot_check > 0:
115 | ver_t = arr.shape[0]
116 |
117 | big_image_slice = (slice(ver_f, ver_t), slice(hor_f, hor_t))
118 | tile_shape = (ver_t - ver_f, hor_t - hor_f)
119 | tile_slice = (
120 | slice(top_pad_size + overlap, tile_shape[0] + overlap),
121 | slice(left_pad_size + overlap, tile_shape[1] + overlap),
122 | )
123 |
124 | return big_image_slice, tile_slice
125 |
126 |
127 | def get_dataset_info(img_dir: Path):
128 | img_paths = get_img_listing(img_dir)
129 | positions = [path_to_dict(p) for p in img_paths]
130 | df = pd.DataFrame(positions)
131 | df.sort_values(["R", "Y", "X"], inplace=True)
132 | df.reset_index(inplace=True)
133 |
134 | region_ids = list(df["R"].unique())
135 | y_ntiles = df["Y"].max()
136 | x_ntiles = df["X"].max()
137 |
138 | path_list_per_region = []
139 |
140 | for r in region_ids:
141 | region_selection = df[df["R"] == r].index
142 | path_list = list(df.loc[region_selection, "path"])
143 | path_list_per_region.append(path_list)
144 |
145 | return path_list_per_region, y_ntiles, x_ntiles
146 |
147 |
148 | def load_tiles(path_list: List[Path], key: Union[None, int]):
149 | tiles = []
150 | if key is None:
151 | for path in path_list:
152 | tiles.append(tif.imread(path_to_str(path)))
153 | else:
154 | for path in path_list:
155 | tiles.append(tif.imread(path_to_str(path), key=key))
156 |
157 | return tiles
158 |
159 |
160 | def calc_mask_coverage(segm_mask: Image) -> float:
161 | mask_pixels = np.sum(segm_mask != 0)
162 | total_pixels = segm_mask.shape[-2] * segm_mask.shape[-1]
163 | return float(round(mask_pixels / total_pixels, 3))
164 |
165 |
166 | def calc_snr(img: Image) -> float:
167 | return float(round(np.mean(img) / np.std(img), 3))
168 |
169 |
170 | def calc_label_sizes(segm_mask: Image) -> Dict[str, List[float]]:
171 | # bounding boxes around labels
172 | # useful to check if there are merged labels
173 | props = regionprops_table(segm_mask, properties=("label", "bbox"))
174 | min_rows = props["bbox-0"]
175 | min_cols = props["bbox-1"]
176 | max_rows = props["bbox-2"]
177 | max_cols = props["bbox-3"]
178 | bbox_arr = np.stack((min_rows, max_rows, min_cols, max_cols), axis=1)
179 | dif = np.stack((bbox_arr[:, 1] - bbox_arr[:, 0], bbox_arr[:, 3] - bbox_arr[:, 2]), axis=1)
180 | long_sides = np.max(dif, axis=1)
181 | label_sizes = dict(
182 | min_bbox_size=[float(i) for i in dif[np.argmin(long_sides)].tolist()],
183 | max_bbox_size=[float(i) for i in dif[np.argmax(long_sides)].tolist()],
184 | mean_bbox_size=[float(i) for i in np.round(np.mean(dif, axis=0), 3).tolist()],
185 | )
186 | return label_sizes
187 |
188 |
189 | def stitch_plane(
190 | tiles: List[Image],
191 | y_ntiles: int,
192 | x_ntiles: int,
193 | tile_shape: list,
194 | dtype,
195 | overlap: int,
196 | padding: dict,
197 | ) -> Image:
198 | y_axis = -2
199 | x_axis = -1
200 |
201 | tile_y_size = tile_shape[y_axis] - overlap * 2
202 | tile_x_size = tile_shape[x_axis] - overlap * 2
203 |
204 | big_image_y_size = (y_ntiles * tile_y_size) - padding["top"] - padding["bottom"]
205 | big_image_x_size = (x_ntiles * tile_x_size) - padding["left"] - padding["right"]
206 |
207 | big_image_shape = (big_image_y_size, big_image_x_size)
208 | big_image = np.zeros(big_image_shape, dtype=dtype)
209 |
210 | print("n tiles x,y:", (x_ntiles, y_ntiles))
211 | print("plane shape x,y:", big_image_shape[::-1])
212 | n = 0
213 | for i in range(0, y_ntiles):
214 | ver_f = i * tile_y_size
215 | ver_t = ver_f + tile_y_size
216 |
217 | for j in range(0, x_ntiles):
218 | hor_f = j * tile_x_size
219 | hor_t = hor_f + tile_x_size
220 |
221 | big_image_slice, tile_slice = get_slices(
222 | big_image, hor_f, hor_t, ver_f, ver_t, padding, overlap
223 | )
224 | tile = tiles[n]
225 |
226 | big_image[tuple(big_image_slice)] = tile[tuple(tile_slice)]
227 |
228 | n += 1
229 | return big_image
230 |
231 |
232 | def main(
233 | img_dir: Path,
234 | out_dir: Path,
235 | img_name_template: str,
236 | overlap: int,
237 | padding_str: str,
238 | is_mask: bool,
239 | nucleus_channel: str,
240 | cell_channel: str,
241 | ):
242 | padding_int = [int(i) for i in padding_str.split(",")]
243 | padding = {
244 | "left": padding_int[0],
245 | "right": padding_int[1],
246 | "top": padding_int[2],
247 | "bottom": padding_int[3],
248 | }
249 |
250 | path_list_per_region, y_ntiles, x_ntiles = get_dataset_info(img_dir)
251 |
252 | with tif.TiffFile(path_to_str(path_list_per_region[0][0])) as TF:
253 | tile_shape = list(TF.series[0].shape)
254 | npages = len(TF.pages)
255 | dtype = TF.series[0].dtype
256 | ome_meta = TF.ome_metadata
257 |
258 | big_image_y_size = (
259 | (y_ntiles * (tile_shape[-2] - overlap * 2)) - padding["top"] - padding["bottom"]
260 | )
261 | big_image_x_size = (
262 | (x_ntiles * (tile_shape[-1] - overlap * 2)) - padding["left"] - padding["right"]
263 | )
264 |
265 | if is_mask:
266 | dtype = np.uint32
267 | else:
268 | ome_meta = re.sub(r'\sSizeY="\d+"', ' SizeY="' + str(big_image_y_size) + '"', ome_meta)
269 | ome_meta = re.sub(r'\sSizeX="\d+"', ' SizeX="' + str(big_image_x_size) + '"', ome_meta)
270 | ome_meta = re.sub(r'\sDimensionOrder="[XYCZT]+"', ' DimensionOrder="XYZCT"', ome_meta)
271 | ome_meta = add_structured_annotations(ome_meta, nucleus_channel, cell_channel)
272 | # part of this report is generated after mask stitching and part after expression stitching
273 |
274 | total_report = dict()
275 | for r, path_list in enumerate(path_list_per_region):
276 | new_path = out_dir / img_name_template.format(r=r + 1)
277 | this_region_report = dict()
278 | TW = tif.TiffWriter(path_to_str(new_path), bigtiff=True, shaped=False)
279 | if is_mask:
280 | # mask channels 0 - cells, 1 - nuclei, 2 - cell boundaries, 3 - nucleus boundaries
281 | tiles = load_tiles(path_list, key=None)
282 | masks, ome_meta = process_all_masks(
283 | tiles, tile_shape, y_ntiles, x_ntiles, overlap, padding, dtype
284 | )
285 | for mask in masks:
286 | new_shape = (1, mask.shape[0], mask.shape[1])
287 | TW.write(
288 | mask.reshape(new_shape),
289 | contiguous=True,
290 | photometric="minisblack",
291 | description=ome_meta,
292 | )
293 |
294 | this_region_report["num_cells"] = int(masks[0].max())
295 | this_region_report["num_nuclei"] = int(masks[1].max())
296 | this_region_report["cell_coverage"] = calc_mask_coverage(masks[0])
297 | this_region_report["nuclei_coverage"] = calc_mask_coverage(masks[1])
298 | this_region_report["cell_sizes"] = calc_label_sizes(masks[0])
299 | this_region_report["nucleus_sizes"] = calc_label_sizes(masks[1])
300 | else:
301 | for p in range(0, npages):
302 | tiles = load_tiles(path_list, key=p)
303 | print("\nstitching expressions page", p + 1, "/", npages)
304 | plane = stitch_plane(
305 | tiles, y_ntiles, x_ntiles, tile_shape, dtype, overlap, padding
306 | )
307 | new_shape = (1, plane.shape[0], plane.shape[1])
308 | if p == 0:
309 | this_region_report["num_channels"] = int(npages)
310 | this_region_report["img_height"] = int(plane.shape[0])
311 | this_region_report["img_width"] = int(plane.shape[1])
312 | this_region_report["per_channel_snr"] = dict()
313 | this_region_report["nucleus_channel"] = nucleus_channel
314 | this_region_report["cell_channel"] = cell_channel
315 | this_region_report["per_channel_snr"][p] = calc_snr(plane)
316 | TW.write(
317 | plane.reshape(new_shape),
318 | contiguous=True,
319 | photometric="minisblack",
320 | description=ome_meta,
321 | )
322 | total_report["reg" + str(r + 1)] = this_region_report
323 | TW.close()
324 | return total_report
325 |
--------------------------------------------------------------------------------
/bin/codex_stitching/secondary_stitcher/secondary_stitcher_runner.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import json
3 | from pathlib import Path
4 | from pprint import pprint
5 | from typing import Any, Dict
6 |
7 | import secondary_stitcher
8 |
9 | Report = Dict[str, Dict[str, Any]]
10 |
11 |
12 | def make_dir_if_not_exists(dir_path: Path):
13 | if not dir_path.exists():
14 | dir_path.mkdir(parents=True)
15 |
16 |
17 | def read_pipeline_config(path_to_config: Path) -> dict:
18 | with open(path_to_config, "r") as s:
19 | config = json.load(s)
20 | return config
21 |
22 |
23 | def write_pipeline_config(out_path: Path, config):
24 | with open(out_path, "w") as s:
25 | json.dump(config, s, sort_keys=False, indent=4)
26 |
27 |
28 | def run_stitcher(
29 | img_dir: Path,
30 | out_dir: Path,
31 | img_name_template: str,
32 | overlap: int,
33 | padding: dict,
34 | is_mask: bool,
35 | nucleus_channel: str,
36 | cell_channel: str,
37 | ) -> Report:
38 | padding_str = ",".join((str(i) for i in list(padding.values())))
39 | report = secondary_stitcher.main(
40 | img_dir,
41 | out_dir,
42 | img_name_template,
43 | overlap,
44 | padding_str,
45 | is_mask,
46 | nucleus_channel,
47 | cell_channel,
48 | )
49 | return report
50 |
51 |
52 | def merge_reports(mask_report: Report, expr_report: Report) -> Report:
53 | total_report = dict()
54 | for region in mask_report:
55 | total_report[region] = {**mask_report[region], **expr_report[region]}
56 | return total_report
57 |
58 |
59 | def main(pipeline_config_path: Path, ometiff_dir: Path):
60 | pipeline_config = read_pipeline_config(pipeline_config_path)
61 | slicer_meta = pipeline_config["slicer"]
62 | nucleus_channel = pipeline_config.get("nuclei_channel", "None")
63 | cell_channel = pipeline_config.get("membrane_channel", "None")
64 |
65 | path_to_mask_tiles = Path(ometiff_dir).joinpath("cytometry/tile/ome-tiff")
66 | path_to_image_tiles = Path(ometiff_dir).joinpath("extract/expressions/ome-tiff")
67 |
68 | overlap = slicer_meta["overlap"]
69 | padding = slicer_meta["padding"]
70 |
71 | mask_out_dir = Path("/output/pipeline_output/mask")
72 | expr_out_dir = Path("/output/pipeline_output/expr")
73 | final_pipeline_config_path = Path("/output/pipelineConfig.json")
74 |
75 | make_dir_if_not_exists(mask_out_dir)
76 | make_dir_if_not_exists(expr_out_dir)
77 |
78 | mask_out_name_template = "reg{r:03d}_mask.ome.tiff"
79 | expr_out_name_template = "reg{r:03d}_expr.ome.tiff"
80 |
81 | mask_report = run_stitcher(
82 | path_to_mask_tiles,
83 | mask_out_dir,
84 | mask_out_name_template,
85 | overlap,
86 | padding,
87 | True,
88 | nucleus_channel,
89 | cell_channel,
90 | )
91 |
92 | expr_report = run_stitcher(
93 | path_to_image_tiles,
94 | expr_out_dir,
95 | expr_out_name_template,
96 | overlap,
97 | padding,
98 | False,
99 | nucleus_channel,
100 | cell_channel,
101 | )
102 |
103 | total_report = merge_reports(mask_report, expr_report)
104 |
105 | final_pipeline_config = pipeline_config
106 | final_pipeline_config.update({"report": total_report})
107 | print("\nfinal_pipeline_config")
108 | pprint(final_pipeline_config, sort_dicts=False)
109 | write_pipeline_config(final_pipeline_config_path, final_pipeline_config)
110 |
111 |
112 | if __name__ == "__main__":
113 | parser = argparse.ArgumentParser()
114 | parser.add_argument("--pipeline_config_path", type=Path, help="path to pipeline config")
115 | parser.add_argument(
116 | "--ometiff_dir", type=Path, help="dir with segmentation mask tiles and codex image tiles"
117 | )
118 |
119 | args = parser.parse_args()
120 | main(args.pipeline_config_path, args.ometiff_dir)
121 |
--------------------------------------------------------------------------------
/bin/convert_to_ometiff.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import logging
3 | import re
4 | from multiprocessing import Pool
5 | from os import walk
6 | from pathlib import Path
7 | from typing import List, Optional
8 |
9 | import pandas as pd
10 | import yaml
11 | from aicsimageio import AICSImage
12 | from aicsimageio.writers.ome_tiff_writer import OmeTiffWriter
13 | from ome_types.model import AnnotationRef, Map, MapAnnotation, StructuredAnnotationList
14 | from tifffile import TiffFile
15 |
16 | from utils import print_directory_tree
17 |
18 | logging.basicConfig(level=logging.INFO, format="%(levelname)-7s - %(message)s")
19 | logger = logging.getLogger(__name__)
20 | SEGMENTATION_CHANNEL_NAMES = [
21 | "cells",
22 | "nuclei",
23 | "cell_boundaries",
24 | "nucleus_boundaries",
25 | ]
26 | TIFF_FILE_NAMING_PATTERN = re.compile(r"^R\d{3}_X(\d{3})_Y(\d{3})\.tif")
27 | metadata_filename_pattern = re.compile(r"^[0-9A-Fa-f]{32}antibodies\.tsv$")
28 |
29 |
30 | def find_antibodies_meta(input_dir: Path) -> Optional[Path]:
31 | """
32 | Finds and returns the first metadata file for a HuBMAP data set.
33 | Does not check whether the dataset ID (32 hex characters) matches
34 | the directory name, nor whether there might be multiple metadata files.
35 | """
36 | # possible_dirs = [input_dir, input_dir / "extras"]
37 | metadata_filename_pattern = re.compile(r"^[0-9A-Za-z\-_]*antibodies\.tsv$")
38 | found_files = []
39 | for dirpath, dirnames, filenames in walk(input_dir):
40 | for filename in filenames:
41 | if metadata_filename_pattern.match(filename):
42 | found_files.append(Path(dirpath) / filename)
43 |
44 | if len(found_files) == 0:
45 | logger.warning("No antibody.tsv file found")
46 | antb_path = None
47 | else:
48 | antb_path = found_files[0]
49 | return antb_path
50 |
51 |
52 | def sort_by_cycle(antb_path: Path):
53 | """
54 | Sorts antibodies.tsv by cycle and channel number. The original tsv is not sorted correctly.
55 | """
56 | df = pd.read_table(antb_path)
57 | cycle_channel_pattern = re.compile(r"cycle(?P\d+)_ch(?P\d+)", re.IGNORECASE)
58 | searches = [cycle_channel_pattern.search(v) for v in df["channel_id"]]
59 | cycles = [int(s.group("cycle")) for s in searches]
60 | channels = [int(s.group("channel")) for s in searches]
61 | df.index = [cycles, channels]
62 | df = df.sort_index()
63 | return df
64 |
65 |
66 | def get_ch_info_from_antibodies_meta(df: pd.DataFrame) -> Optional[pd.DataFrame]:
67 | """
68 | Adds "target" column with the antibody name that we want to replace.
69 | """
70 | # df = df.set_index("channel_id", inplace=False)
71 | antb_names = df["antibody_name"].to_list()
72 | antb_targets = [get_analyte_name(antb) for antb in antb_names]
73 | df["target"] = antb_targets
74 | return df
75 |
76 |
77 | def get_analyte_name(antibody_name: str) -> str:
78 | """
79 | Strips unnecessary prefixes and suffixes off of antibody name from antibodies.tsv.
80 | """
81 | antb = re.sub(r"Anti-", "", antibody_name)
82 | antb = re.sub(r"\s+antibody", "", antb)
83 | return antb
84 |
85 |
86 | def create_original_channel_names_df(channelList: List[str]) -> pd.DataFrame:
87 | """
88 | Creates a dataframe with the original channel names, cycle numbers, and channel numbers.
89 | """
90 | # Separate channel and cycle info from channel names and remove "orig"
91 | cyc_ch_pattern = re.compile(r"cyc(\d+)_ch(\d+)_orig(.*)")
92 | og_ch_names_df = pd.DataFrame(channelList, columns=["Original_Channel_Name"])
93 | og_ch_names_df[["Cycle", "Channel", "channel_name"]] = og_ch_names_df[
94 | "Original_Channel_Name"
95 | ].str.extract(cyc_ch_pattern)
96 | og_ch_names_df["Cycle"] = pd.to_numeric(og_ch_names_df["Cycle"])
97 | og_ch_names_df["Channel"] = pd.to_numeric(og_ch_names_df["Channel"])
98 | og_ch_names_df["channel_id"] = (
99 | "cycle"
100 | + og_ch_names_df["Cycle"].astype(str)
101 | + "_ch"
102 | + og_ch_names_df["Channel"].astype(str)
103 | )
104 |
105 | return og_ch_names_df
106 |
107 |
108 | def replace_provider_ch_names_with_antb(
109 | og_ch_names_df: pd.DataFrame, antibodies_df: pd.DataFrame
110 | ) -> List[str]:
111 | """
112 | Uses cycle and channel mapping to replace the channel name with the one in antibodies.tsv.
113 | """
114 | updated_channel_names = []
115 | mapping = map_cycles_and_channels(antibodies_df)
116 | for i in og_ch_names_df.index:
117 | channel_id = og_ch_names_df.at[i, "channel_id"].lower()
118 | original_name = og_ch_names_df.at[i, "channel_name"]
119 | target = mapping.get(channel_id, None)
120 | if target is not None:
121 | updated_channel_names.append(target)
122 | else:
123 | updated_channel_names.append(original_name)
124 | return updated_channel_names
125 |
126 |
127 | def generate_sa_ch_info(
128 | channel_id: str,
129 | og_ch_names_info: pd.Series,
130 | antb_info: pd.DataFrame,
131 | ) -> Optional[MapAnnotation]:
132 | cycle, channel = og_ch_names_info["Cycle"], og_ch_names_info["Channel"]
133 | try:
134 | antb_row = antb_info.loc[(cycle, channel), :]
135 | except KeyError:
136 | return None
137 |
138 | uniprot_id = antb_row["uniprot_accession_number"]
139 | rrid = antb_row["rr_id"]
140 | antb_id = antb_row["channel_id"]
141 | ch_key = Map.M(k="Channel ID", value=channel_id)
142 | name_key = Map.M(k="Name", value=antb_row["target"])
143 | og_name_key = Map.M(k="Original Name", value=og_ch_names_info["channel_name"])
144 | uniprot_key = Map.M(k="UniprotID", value=uniprot_id)
145 | rrid_key = Map.M(k="RRID", value=rrid)
146 | antb_id_key = Map.M(k="AntibodiesTsvID", value=antb_id)
147 | ch_info = Map(ms=[ch_key, name_key, og_name_key, uniprot_key, rrid_key, antb_id_key])
148 | annotation = MapAnnotation(value=ch_info)
149 | return annotation
150 |
151 |
152 | def map_cycles_and_channels(antibodies_df: pd.DataFrame) -> dict:
153 | channel_mapping = {
154 | channel_id.lower(): target
155 | for channel_id, target in zip(antibodies_df["channel_id"], antibodies_df["target"])
156 | }
157 | return channel_mapping
158 |
159 |
160 | def collect_tiff_file_list(directory: Path, TIFF_FILE_NAMING_PATTERN: re.Pattern) -> List[Path]:
161 | """
162 | Given a directory path and a regex, find all the files in the directory that
163 | match the regex.
164 |
165 | TODO: this is very similar to a function in create_cellshapes_csv.py -- could
166 | do to unify with a separate module?
167 | """
168 | fileList = []
169 |
170 | for dirpath, dirnames, filenames in walk(directory):
171 | for filename in filenames:
172 | if TIFF_FILE_NAMING_PATTERN.match(filename):
173 | fileList.append(directory / filename)
174 |
175 | if len(fileList) == 0:
176 | logger.warning("No files found in " + str(directory))
177 |
178 | return fileList
179 |
180 |
181 | def get_lateral_resolution(cytokit_config_filename: Path) -> float:
182 | with open(cytokit_config_filename) as cytokit_config_file:
183 | cytokit_config = yaml.safe_load(cytokit_config_file)
184 |
185 | return float("%0.2f" % cytokit_config["acquisition"]["lateral_resolution"])
186 |
187 |
188 | def collect_expressions_extract_channels(extractFile: Path) -> List[str]:
189 | """
190 | Given a TIFF file path, read file with TiffFile to get Labels attribute from
191 | ImageJ metadata. Return a list of the channel names in the same order as they
192 | appear in the ImageJ metadata.
193 | We need to do this to get the channel names in the correct order, and the
194 | ImageJ "Labels" attribute isn't picked up by AICSImageIO.
195 | """
196 |
197 | with TiffFile(str(extractFile.absolute())) as TF:
198 | ij_meta = TF.imagej_metadata
199 | numChannels = int(ij_meta["channels"])
200 | channelList = ij_meta["Labels"][0:numChannels]
201 |
202 | # Remove "proc_" from the start of the channel names.
203 | procPattern = re.compile(r"^proc_(.*)")
204 | channelList = [procPattern.match(channel).group(1) for channel in channelList]
205 |
206 | return channelList
207 |
208 |
209 | def convert_tiff_file(funcArgs):
210 | """
211 | Given a tuple containing a source TIFF file path, a destination OME-TIFF path,
212 | a list of channel names, a float value for the lateral resolution in
213 | nanometres, convert the source TIFF file to OME-TIFF format, containing
214 | polygons for segmented cell shapes in the "ROI" OME-XML element.
215 | """
216 |
217 | sourceFile, ometiffFile, channelNames, lateral_resolution, og_ch_names_df, *optional_args = (
218 | funcArgs
219 | )
220 | antb_info = optional_args[0] if optional_args else None
221 |
222 | logger.info(f"Converting file: {str(sourceFile)}")
223 |
224 | image = AICSImage(sourceFile)
225 | imageDataForOmeTiff = image.get_image_data("TCZYX")
226 | imageName = f"Image: {sourceFile.name}"
227 |
228 | # Create OME-XML metadata using build_ome
229 | ome_writer = OmeTiffWriter()
230 | omeXml = ome_writer.build_ome(
231 | data_shapes=[(image.dims.T, image.dims.C, image.dims.Z, image.dims.Y, image.dims.X)],
232 | data_types=[image.dtype],
233 | dimension_order=["TCZYX"],
234 | channel_names=[channelNames],
235 | image_name=[imageName],
236 | physical_pixel_sizes=[image.physical_pixel_sizes],
237 | )
238 |
239 | annotations = StructuredAnnotationList()
240 | for i, (channel_obj, channel_name, og_ch_names_row) in enumerate(
241 | zip(
242 | omeXml.images[0].pixels.channels,
243 | channelNames,
244 | og_ch_names_df.iterrows(),
245 | )
246 | ):
247 | channel_id = f"Channel:0:{i}"
248 | channel_obj.name = channel_name
249 | channel_obj.id = channel_id
250 | if antb_info is None:
251 | continue
252 | ch_info = generate_sa_ch_info(channel_id, og_ch_names_row[1], antb_info)
253 | if ch_info is None:
254 | continue
255 | channel_obj.annotation_refs.append(AnnotationRef(id=ch_info.id))
256 | annotations.append(ch_info)
257 | omeXml.structured_annotations = annotations
258 |
259 | ome_writer.save(
260 | data=imageDataForOmeTiff,
261 | uri=str(ometiffFile),
262 | ome_xml=omeXml,
263 | dimension_order="TCZYX",
264 | channel_names=channelNames,
265 | )
266 |
267 | logger.info(f"OME-TIFF file created: {ometiffFile}")
268 |
269 |
270 | def create_ome_tiffs(
271 | file_list: List[Path],
272 | output_dir: Path,
273 | channel_names: List[str],
274 | lateral_resolution: float,
275 | subprocesses: int,
276 | og_ch_names_df,
277 | antb_info: Optional[pd.DataFrame] = None,
278 | ):
279 | """
280 | Given:
281 | - a list of TIFF files
282 | - an output directory path
283 | - a list of channel names
284 | - a float value for the lateral resolution in nanometres (aka XY resolution aka pixel size).
285 | - an integer value for the number of multiprocessing subprocesses
286 | - a dictionary of best focus z-planes indexed by tile x,y coordinates
287 | Create OME-TIFF files using parallel processes.
288 | """
289 | output_dir.mkdir(parents=True, exist_ok=True)
290 |
291 | args_for_conversion = []
292 | for source_file in file_list:
293 | ome_tiff_file = (output_dir / source_file.name).with_suffix(".ome.tiff")
294 | if antb_info is not None:
295 | args_for_conversion.append(
296 | (
297 | source_file,
298 | ome_tiff_file,
299 | channel_names,
300 | lateral_resolution,
301 | og_ch_names_df,
302 | antb_info,
303 | )
304 | )
305 | else:
306 | args_for_conversion.append(
307 | (source_file, ome_tiff_file, channel_names, lateral_resolution, og_ch_names_df)
308 | )
309 |
310 | # Uncomment the next line to run as a series, comment the plural line
311 | # for argtuple in args_for_conversion:
312 | # convert_tiff_file(argtuple)
313 |
314 | with Pool(processes=subprocesses) as pool:
315 | pool.imap_unordered(convert_tiff_file, args_for_conversion)
316 | pool.close()
317 | pool.join()
318 |
319 |
320 | def check_dir_is_empty(dir_path: Path):
321 | return not any(dir_path.iterdir())
322 |
323 |
324 | ########
325 | # MAIN #
326 | ########
327 | if __name__ == "__main__":
328 | parser = argparse.ArgumentParser(
329 | description=(
330 | "Convert Cytokit's output TIFFs containing segmentation and extraction "
331 | 'results to OME-TIFF, and add the channel names. Creates an "ome-tiff" '
332 | "directory inside the output/cytometry/tile and "
333 | "output/extract/expressions directories."
334 | ),
335 | )
336 | parser.add_argument(
337 | "cytokit_output",
338 | help="Path to output of `cytokit processor`",
339 | type=Path,
340 | )
341 | parser.add_argument(
342 | "bg_sub_tiles",
343 | help="Path to tiles with subtracted background",
344 | type=Path,
345 | )
346 | parser.add_argument(
347 | "cytokit_config",
348 | help="Path to Cytokit YAML config file",
349 | type=Path,
350 | )
351 | parser.add_argument(
352 | "input_data_dir",
353 | help="Path to the input dataset",
354 | type=Path,
355 | )
356 | parser.add_argument(
357 | "-p",
358 | "--processes",
359 | help="Number of parallel OME-TIFF conversions to perform at once",
360 | type=int,
361 | default=8,
362 | )
363 |
364 | args = parser.parse_args()
365 |
366 | print("Cytokit output:")
367 | print_directory_tree(args.cytokit_output)
368 |
369 | output_dir = Path("output")
370 | output_dir.mkdir(parents=True, exist_ok=True)
371 |
372 | cytometry_tile_dir_piece = Path("cytometry/tile")
373 | extract_expressions_piece = Path("extract/expressions")
374 | processor_data_json_piece = Path("processor/data.json")
375 |
376 | cytometryTileDir = args.cytokit_output / cytometry_tile_dir_piece
377 | print("Cytometry tile directory:", cytometryTileDir)
378 |
379 | extractDir = args.cytokit_output / extract_expressions_piece
380 | print("Extract expressions directory:", extractDir)
381 |
382 | if not check_dir_is_empty(args.bg_sub_tiles):
383 | extractDir = args.bg_sub_tiles
384 | print(list(Path(args.bg_sub_tiles).iterdir()))
385 | else:
386 | extractDir = args.cytokit_output / extract_expressions_piece
387 | print("Extract expressions directory:", extractDir)
388 |
389 | segmentationFileList = collect_tiff_file_list(cytometryTileDir, TIFF_FILE_NAMING_PATTERN)
390 | extractFileList = collect_tiff_file_list(extractDir, TIFF_FILE_NAMING_PATTERN)
391 | antb_path = find_antibodies_meta(args.input_data_dir)
392 | antibodies_df = None
393 | lateral_resolution = get_lateral_resolution(args.cytokit_config)
394 | extractChannelNames = collect_expressions_extract_channels(extractFileList[0])
395 | original_ch_names_df = create_original_channel_names_df(extractChannelNames)
396 | print(original_ch_names_df.head())
397 |
398 | antb_info = None
399 | updated_channel_names = original_ch_names_df["channel_name"].tolist()
400 | if antb_path:
401 | df = sort_by_cycle(antb_path)
402 | antb_info = get_ch_info_from_antibodies_meta(df)
403 | updated_channel_names = replace_provider_ch_names_with_antb(
404 | original_ch_names_df, antb_info
405 | )
406 |
407 | # Create segmentation mask OME-TIFFs
408 | if segmentationFileList:
409 | create_ome_tiffs(
410 | segmentationFileList,
411 | output_dir / cytometry_tile_dir_piece / "ome-tiff",
412 | SEGMENTATION_CHANNEL_NAMES,
413 | lateral_resolution,
414 | args.processes,
415 | original_ch_names_df,
416 | antb_info,
417 | )
418 | # Create the extract OME-TIFFs.
419 | if extractFileList:
420 | create_ome_tiffs(
421 | extractFileList,
422 | output_dir / extract_expressions_piece / "ome-tiff",
423 | updated_channel_names,
424 | lateral_resolution,
425 | args.processes,
426 | original_ch_names_df,
427 | antb_info,
428 | )
429 |
--------------------------------------------------------------------------------
/bin/create_cytokit_config.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import json
3 | import logging
4 | import re
5 | from pprint import pprint
6 | from typing import List
7 |
8 | import yaml
9 |
10 | logging.basicConfig(level=logging.INFO, format="%(levelname)-7s - %(message)s")
11 | logger = logging.getLogger(__name__)
12 |
13 | # Some constants to use below.
14 | path_format = "keyence_multi_cycle_v01"
15 | memory_limit = "64G"
16 |
17 |
18 | def comma_separated_integers(s: str) -> List[int]:
19 | return [int(i.strip()) for i in s.split(",")]
20 |
21 |
22 | ########
23 | # MAIN #
24 | ########
25 | if __name__ == "__main__":
26 | # Set up argument parser and parse the command line arguments.
27 | parser = argparse.ArgumentParser(
28 | description="Create a YAML config file for Cytokit, based on a JSON file from the CODEX Toolkit pipeline. YAML file will be created in current working directory unless otherwise specified."
29 | )
30 | parser.add_argument(
31 | "--gpus",
32 | help="GPUs to use for Cytokit, specified as a comma-separated list of integers.",
33 | type=comma_separated_integers,
34 | default=[0, 1],
35 | )
36 | parser.add_argument(
37 | "pipelineConfigFilename",
38 | help="JSON file containing all information required for config generation.",
39 | )
40 | parser.add_argument(
41 | "-o",
42 | "--outfile",
43 | help="Path to output YAML config file. Default: experiment.yaml",
44 | )
45 |
46 | args = parser.parse_args()
47 |
48 | if not args.outfile:
49 | args.outfile = "experiment.yaml"
50 |
51 | logger.info("Reading pipeline config file " + args.pipelineConfigFilename + "...")
52 |
53 | with open(args.pipelineConfigFilename, "r") as pipelineConfigFile:
54 | pipelineConfigInfo = json.load(pipelineConfigFile)
55 |
56 | logger.info("Finished reading pipeline config file.")
57 |
58 | cytokitConfig = {
59 | "name": pipelineConfigInfo["name"],
60 | "date": pipelineConfigInfo["date"],
61 | "environment": {"path_formats": path_format},
62 | "acquisition": {}, # This is populated below.
63 | "processor": {
64 | "args": {
65 | "gpus": args.gpus,
66 | "memory_limit": memory_limit,
67 | "run_crop": False,
68 | "run_tile_generator": True,
69 | "run_drift_comp": True,
70 | "run_cytometry": True,
71 | "run_best_focus": True,
72 | "run_deconvolution": False,
73 | },
74 | "tile_generator": {"raw_file_type": "keyence_mixed"},
75 | "best_focus": {"channel": pipelineConfigInfo["best_focus"]},
76 | "drift_compensation": {"channel": pipelineConfigInfo["drift_compensation"]},
77 | "cytometry": {
78 | "nuclei_channel_name": pipelineConfigInfo["nuclei_channel"],
79 | "segmentation_params": {
80 | "memb_min_dist": 8,
81 | "memb_sigma": 5,
82 | "memb_gamma": 0.25,
83 | "marker_dilation": 3,
84 | "marker_min_size": 2,
85 | },
86 | "quantification_params": {"nucleus_intensity": True, "cell_graph": True},
87 | },
88 | },
89 | "analysis": [{"aggregate_cytometry_statistics": {"mode": "best_z_plane"}}],
90 | }
91 |
92 | if "membrane_channel" in pipelineConfigInfo:
93 | cytokitConfig["processor"]["cytometry"]["membrane_channel_name"] = pipelineConfigInfo[
94 | "membrane_channel"
95 | ]
96 | else:
97 | logger.warning(
98 | "No membrane stain channel found in pipeline config. Will only use nuclei channel for segmentation."
99 | )
100 |
101 | # Populate acquisition section.
102 | acquisitionFields = [
103 | "per_cycle_channel_names",
104 | "channel_names",
105 | "axial_resolution",
106 | "lateral_resolution",
107 | "emission_wavelengths",
108 | "magnification",
109 | "num_cycles",
110 | "num_z_planes",
111 | "numerical_aperture",
112 | "objective_type",
113 | "region_height",
114 | "region_names",
115 | "region_width",
116 | "tile_height",
117 | "tile_overlap_x",
118 | "tile_overlap_y",
119 | "tile_width",
120 | "tiling_mode",
121 | ]
122 |
123 | for field in acquisitionFields:
124 | cytokitConfig["acquisition"][field] = pipelineConfigInfo[field]
125 |
126 | # Create operator section to extract channels collapsed in one time point,
127 | # leaving out blank/empty channels and only including the nuclear stain
128 | # channel used for segmentation.
129 | blankPattern = re.compile(r"cyc(\d+)_ch(\d+)_orig([^_]*)blank", re.IGNORECASE)
130 | emptyPattern = re.compile(r"cyc(\d+)_ch(\d+)_orig([^_]*)empty", re.IGNORECASE)
131 | dapiChannelPattern = re.compile(r"cyc(\d+)_ch(\d+)_orig([^_]*)DAPI", re.IGNORECASE)
132 | hoechstChannelPattern = re.compile(r"cyc(\d+)_ch(\d+)_orig([^_]*)HOECHST", re.IGNORECASE)
133 |
134 | operatorExtractChannels = []
135 |
136 | for channelName in pipelineConfigInfo["channel_names"]:
137 | # Skip unwanted channels.
138 | if emptyPattern.match(channelName):
139 | continue
140 | elif dapiChannelPattern.match(channelName):
141 | if channelName != pipelineConfigInfo["nuclei_channel"]:
142 | continue
143 | elif hoechstChannelPattern.match(channelName):
144 | if channelName != pipelineConfigInfo["nuclei_channel"]:
145 | continue
146 |
147 | # Skip channels that failed QC.
148 | if pipelineConfigInfo["channel_names_qc_pass"]:
149 | if len(pipelineConfigInfo["channel_names_qc_pass"][channelName]) > 1:
150 | if blankPattern.match(channelName):
151 | pass
152 | else:
153 | raise ValueError(f"More than one {channelName} channel found.")
154 | else:
155 | channel_qc_pass = pipelineConfigInfo["channel_names_qc_pass"][channelName][0]
156 | if channel_qc_pass.casefold() == "false".casefold():
157 | continue
158 |
159 | # Append to operator extract channels with "proc_" prepended -- this
160 | # tells Cytokit to extract the channels from the processed tiles.
161 | operatorExtractChannels.append("proc_" + channelName)
162 |
163 | # Add operator section to config.
164 | cytokitConfig["operator"] = [
165 | {"extract": {"name": "expressions", "channels": operatorExtractChannels, "z": "all"}}
166 | ]
167 |
168 | logger.info("Writing Cytokit config to " + args.outfile)
169 |
170 | with open(args.outfile, "w") as outFile:
171 | yaml.safe_dump(cytokitConfig, outFile, encoding="utf-8", default_flow_style=None, indent=2)
172 |
173 | pprint(cytokitConfig, sort_dicts=False)
174 |
175 | logger.info("Finished writing Cytokit config.")
176 |
--------------------------------------------------------------------------------
/bin/dataset_info/collect_dataset_info.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import json
3 | import logging
4 | import math
5 | import re
6 | import sys
7 | from collections import Counter
8 | from datetime import datetime
9 | from pathlib import Path
10 | from pprint import pprint
11 | from typing import Dict, List, Optional, Tuple, Union
12 |
13 | import numpy as np
14 | import pint
15 |
16 | sys.path.append("/opt")
17 | from pipeline_utils.dataset_listing import get_tile_dtype, get_tile_shape
18 |
19 |
20 | class ConfigCreator:
21 | def __init__(self):
22 | self.dataset_dir = Path("")
23 | self._num_concur_tasks = 10
24 | self._std_meta = dict()
25 | self._raw_data_dir = Path("")
26 |
27 | def read_metadata(self):
28 | path_to_meta = self._raw_data_dir / "dataset.json"
29 | meta = self._read_json_meta(path_to_meta)
30 | processed_meta = meta.copy()
31 |
32 | ch_names = []
33 | for ch in meta["ChannelDetails"]["ChannelDetailsArray"]:
34 | ch_names.append(ch["Name"])
35 |
36 | new_ch_names = self._make_ch_names_unique(ch_names)
37 |
38 | new_channel_details_array = []
39 | for i, ch in enumerate(processed_meta["ChannelDetails"]["ChannelDetailsArray"]):
40 | new_ch = ch.copy()
41 | new_ch["Name"] = new_ch_names[i]
42 | new_channel_details_array.append(new_ch)
43 | processed_meta["ChannelDetails"]["ChannelDetailsArray"] = new_channel_details_array
44 | self._std_meta = processed_meta
45 |
46 | def find_raw_data_dir(self):
47 | NONRAW_DIRECTORY_NAME_PIECES = [
48 | "processed",
49 | "drv",
50 | "metadata",
51 | "extras",
52 | "Overview",
53 | ]
54 | raw_data_dir_possibilities = []
55 |
56 | for child in self.dataset_dir.iterdir():
57 | if not child.is_dir():
58 | continue
59 | if not any(piece in child.name for piece in NONRAW_DIRECTORY_NAME_PIECES):
60 | raw_data_dir_possibilities.append(child)
61 |
62 | if len(raw_data_dir_possibilities) > 1:
63 | message_pieces = ["Found multiple raw data directory possibilities:"]
64 | message_pieces.extend(f"\t{path}" for path in raw_data_dir_possibilities)
65 | raise ValueError("\n".join(message_pieces))
66 | self._raw_data_dir = raw_data_dir_possibilities[0]
67 | return self._raw_data_dir
68 |
69 | def create_config(self) -> dict:
70 | config = {
71 | "name": self._std_meta["DatasetName"],
72 | "date": self._create_proc_date(),
73 | "raw_data_location": self.find_raw_data_dir().name,
74 | "channel_names_qc_pass": self._get_qc_info_per_ch(),
75 | "emission_wavelengths": self._get_emission_wavelengths(),
76 | "excitation_wavelengths": self._get_excitation_wavelengths(),
77 | "axial_resolution": self._get_axial_resolution(),
78 | "lateral_resolution": self._get_lateral_resolution(),
79 | "magnification": self._std_meta["NominalMagnification"],
80 | "num_z_planes": self._std_meta["NumZPlanes"],
81 | "numerical_aperture": self._std_meta["NumericalAperture"],
82 | "objective_type": self._std_meta["ImmersionMedium"].lower(),
83 | "region_height": self._std_meta["RegionHeight"],
84 | "region_width": self._std_meta["RegionWidth"],
85 | "region_names": self._get_region_names(),
86 | "tile_overlap_x": self._get_tile_overlap_x_in_px(),
87 | "tile_overlap_y": self._get_tile_overlap_y_in_px(),
88 | "tile_height": self._get_tile_shape_no_overlap()[0],
89 | "tile_width": self._get_tile_shape_no_overlap()[1],
90 | "tile_dtype": self._get_tile_dtype(),
91 | "tiling_mode": self._std_meta["TileLayout"].lower(),
92 | "per_cycle_channel_names": self._get_per_cycle_ch_names(),
93 | "channel_names": self._get_channel_names(),
94 | "num_cycles": self._std_meta["NumCycles"],
95 | "best_focus": self._get_nuc_ch(),
96 | "drift_compensation": self._get_nuc_ch(),
97 | "nuclei_channel": self._get_nuc_ch(),
98 | "membrane_channel": self._get_membr_ch(),
99 | "nuclei_channel_loc": self._std_meta["NuclearStainForSegmentation"],
100 | "membrane_channel_loc": self._std_meta["MembraneStainForSegmentation"],
101 | "target_shape": self._calc_target_shape(),
102 | "num_concurrent_tasks": self._num_concur_tasks,
103 | }
104 | return config
105 |
106 | def _read_json_meta(self, path_to_meta: Path) -> Dict[str, Union[str, int, dict, list]]:
107 | with open(path_to_meta, "r") as s:
108 | json_meta = json.load(s)
109 | return json_meta
110 |
111 | def _create_proc_date(self) -> str:
112 | processing_date = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
113 | return processing_date
114 |
115 | def _get_qc_info_per_ch(self) -> Dict[str, List[str]]:
116 | ch_details = self._std_meta["ChannelDetails"]["ChannelDetailsArray"]
117 | channel_qc_info = dict()
118 | channel_qc_info["Marker"] = ["Result"]
119 | for ch in ch_details:
120 | ch_name = ch["Name"]
121 | qc_result = ch["PassedQC"]
122 | if qc_result is True:
123 | qc_result_str = "TRUE"
124 | else:
125 | qc_result_str = "FALSE"
126 | channel_qc_info[ch_name] = [qc_result_str]
127 | return channel_qc_info
128 |
129 | def _make_ch_names_unique(self, channel_names: List[str]) -> List[str]:
130 | unique_names = Counter(channel_names)
131 | new_names = channel_names.copy()
132 |
133 | for unique_ch, count in unique_names.items():
134 | if count > 1:
135 | this_ch_count = 1
136 | for i, ch_name in enumerate(channel_names):
137 | if ch_name == unique_ch:
138 | new_name = f"{ch_name}_{this_ch_count}"
139 | new_names[i] = new_name
140 | this_ch_count += 1
141 | return new_names
142 |
143 | def _get_emission_wavelengths(self) -> List[float]:
144 | em_wav = []
145 | for ch in self._std_meta["ChannelDetails"]["ChannelDetailsArray"]:
146 | wav = ch["EmissionWavelengthNM"]
147 | if wav not in em_wav:
148 | em_wav.append(float(wav))
149 | return em_wav
150 |
151 | def _get_excitation_wavelengths(self) -> List[float]:
152 | num_channels = len(self._std_meta["ChannelDetails"]["ChannelDetailsArray"])
153 | channel = self._std_meta["ChannelDetails"]["ChannelDetailsArray"][0]
154 |
155 | if "ExcitationWavelengthNM" in channel:
156 | exc_wav = []
157 | for ch in self._std_meta["ChannelDetails"]["ChannelDetailsArray"]:
158 | wav = ch["ExcitationWavelengthNM"]
159 | if wav not in exc_wav:
160 | exc_wav.append(float(wav))
161 | else:
162 | exc_wav = [0] * num_channels
163 | return exc_wav
164 |
165 | def _get_axial_resolution(self) -> float:
166 | unit = pint.UnitRegistry()
167 | provided_unit_z = unit[self._std_meta["ResolutionZUnit"]]
168 | provided_res_z = float(self._std_meta["ResolutionZ"])
169 | res_z_in_units = provided_res_z * provided_unit_z
170 | axial_res_um = res_z_in_units.to("nm")
171 | return axial_res_um.magnitude
172 |
173 | def _get_lateral_resolution(self) -> float:
174 | unit = pint.UnitRegistry()
175 | provided_unit_x = unit[self._std_meta["ResolutionXUnit"]]
176 | provided_unit_y = unit[self._std_meta["ResolutionYUnit"]]
177 | provided_res_x = float(self._std_meta["ResolutionX"])
178 | provided_res_y = float(self._std_meta["ResolutionY"])
179 | res_x_in_units = provided_res_x * provided_unit_x
180 | res_y_in_units = provided_res_y * provided_unit_y
181 | lateral_res_um = ((res_x_in_units + res_y_in_units) / 2).to("nm")
182 | return lateral_res_um.magnitude
183 |
184 | def _get_region_names(self) -> List[int]:
185 | num_regions = self._std_meta["NumRegions"]
186 | return list(range(1, num_regions + 1))
187 |
188 | def _get_tile_overlap_x_in_px(self) -> int:
189 | overlap = self._std_meta["TileOverlapX"]
190 | size = self._std_meta["TileWidth"]
191 | px_overlap = self._calc_px_overlap_from_proportional(size, overlap)
192 | return px_overlap
193 |
194 | def _get_tile_overlap_y_in_px(self) -> int:
195 | overlap = self._std_meta["TileOverlapY"]
196 | size = self._std_meta["TileHeight"]
197 | px_overlap = self._calc_px_overlap_from_proportional(size, overlap)
198 | return px_overlap
199 |
200 | def _calc_px_overlap_from_proportional(self, dim_size: int, dim_overlap: float) -> int:
201 | msg = f"Tile overlap proportion {dim_overlap} is greater than 1"
202 | if dim_overlap > 1:
203 | raise ValueError(msg)
204 |
205 | pixel_overlap = dim_size * dim_overlap
206 |
207 | if float(pixel_overlap).is_integer():
208 | return int(pixel_overlap)
209 | else:
210 | # if overlap is not a whole number in px
211 | closest_overlap = int(math.ceil(pixel_overlap))
212 | closest_overlap += closest_overlap % 2 # make even
213 | return closest_overlap
214 |
215 | def _get_per_cycle_ch_names(self) -> List[str]:
216 | per_cycle_channel_names = []
217 | channels = self._std_meta["ChannelDetails"]["ChannelDetailsArray"]
218 | channel_ids = []
219 | for ch in channels:
220 | channel_ids.append(int(ch["ChannelID"]))
221 | unique_ch_ids = sorted(set(channel_ids))
222 | for ch in unique_ch_ids:
223 | per_cycle_channel_names.append("CH" + str(ch))
224 | return per_cycle_channel_names
225 |
226 | def _get_channel_names(self) -> List[str]:
227 | channels = self._std_meta["ChannelDetails"]["ChannelDetailsArray"]
228 | channel_names = []
229 | for ch in channels:
230 | channel_names.append(ch["Name"])
231 | return channel_names
232 |
233 | def _get_nuc_ch(self) -> str:
234 | nuc_ch_loc = self._std_meta["NuclearStainForSegmentation"]
235 | nuc_ch_name = self._get_ch_name_by_location(nuc_ch_loc)
236 | return nuc_ch_name
237 |
238 | def _get_membr_ch(self) -> str:
239 | membr_ch_loc = self._std_meta["MembraneStainForSegmentation"]
240 | membr_ch_name = self._get_ch_name_by_location(membr_ch_loc)
241 | return membr_ch_name
242 |
243 | def _get_ch_name_by_location(self, ch_loc: Dict[str, int]) -> str:
244 | channels = self._std_meta["ChannelDetails"]["ChannelDetailsArray"]
245 | ch_name = None
246 | for ch in channels:
247 | if ch["CycleID"] == ch_loc["CycleID"]:
248 | if ch["ChannelID"] == ch_loc["ChannelID"]:
249 | ch_name = ch["Name"]
250 | break
251 | if ch_name is None:
252 | raise ValueError("Could not find channel name of", str(ch_loc))
253 | return ch_name
254 |
255 | def _get_tile_dtype(self) -> str:
256 | tile_dtype = str(get_tile_dtype(self._raw_data_dir).name)
257 | return tile_dtype
258 |
259 | def _calc_target_shape(self):
260 | """
261 | Cytokit's nuclei detection U-Net (from CellProfiler) works best at 20x magnification.
262 | The CellProfiler U-Net requires the height and width of the images to be
263 | evenly divisible by 2 raised to the number of layers in the network, in this case 2^3=8.
264 | https://github.com/hammerlab/cytokit/issues/14
265 | https://github.com/CellProfiler/CellProfiler-plugins/issues/65
266 | """
267 | dims = [self._std_meta["TileWidth"], self._std_meta["TileHeight"]]
268 | magnification = self._std_meta["NominalMagnification"]
269 | scaleFactor = 1
270 | if magnification != 20:
271 | scaleFactor = 20 / magnification
272 |
273 | # Width and height must be evenly divisible by 8, so we round them up to them
274 | # closest factor of 8 if they aren't.
275 | new_dims = dims.copy()
276 | for dim in dims:
277 | if dim % 8:
278 | new_dim = int(8 * math.ceil(float(dim) / 8))
279 | new_dims.append(new_dim)
280 | return new_dims
281 |
282 | @property
283 | def num_concurrent_tasks(self) -> int:
284 | return self._num_concur_tasks
285 |
286 | @num_concurrent_tasks.setter
287 | def num_concurrent_tasks(self, val: int):
288 | if val <= 0:
289 | self._num_concur_tasks = 10
290 | else:
291 | self._num_concur_tasks = val
292 |
293 | def _get_tile_shape_no_overlap(self) -> Tuple[int, int]:
294 | overlap_y = self._get_tile_overlap_y_in_px()
295 | overlap_x = self._get_tile_overlap_x_in_px()
296 | tile_height_with_overlap = self._std_meta["TileHeight"]
297 | tile_width_with_overlap = self._std_meta["TileWidth"]
298 | tile_height = tile_height_with_overlap - overlap_y
299 | tile_width = tile_width_with_overlap - overlap_x
300 | return tile_height, tile_width
301 |
302 |
303 | def write_pipeline_config(out_path: Path, pipeline_config: dict):
304 | with open(out_path, "w") as s:
305 | json.dump(pipeline_config, s, indent=4)
306 |
307 |
308 | def main(path_to_dataset: Path, num_concurrent_tasks: int = 10):
309 | logging.basicConfig(level=logging.INFO, format="%(levelname)-7s - %(message)s")
310 | logger = logging.getLogger(__name__)
311 |
312 | config_creator = ConfigCreator()
313 | config_creator.dataset_dir = path_to_dataset
314 | config_creator.num_concurrent_tasks = num_concurrent_tasks
315 | config_creator.find_raw_data_dir()
316 | config_creator.read_metadata()
317 | pipeline_config = config_creator.create_config()
318 |
319 | pprint(pipeline_config, sort_dicts=False)
320 | out_path = Path("pipelineConfig.json")
321 | logger.info("Writing pipeline config")
322 | write_pipeline_config(out_path, pipeline_config)
323 | logger.info(f"Written pipeline config to {out_path}")
324 |
325 |
326 | if __name__ == "__main__":
327 | parser = argparse.ArgumentParser(
328 | description="Collect information required to perform analysis of a CODEX dataset."
329 | )
330 | parser.add_argument(
331 | "--path_to_dataset",
332 | help="Path to directory containing raw data subdirectory (with with cycle and region numbers).",
333 | type=Path,
334 | )
335 | parser.add_argument(
336 | "--num_concurrent_tasks",
337 | help="Path to directory containing raw data subdirectory (with with cycle and region numbers).",
338 | type=int,
339 | default=10,
340 | )
341 | args = parser.parse_args()
342 | main(args.path_to_dataset, args.num_concurrent_tasks)
343 |
--------------------------------------------------------------------------------
/bin/dataset_info/run_collection.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | from pathlib import Path
3 |
4 | import collect_dataset_info
5 | import collect_dataset_info_old
6 |
7 |
8 | def find_raw_data_dir(dataset_dir: Path):
9 | NONRAW_DIRECTORY_NAME_PIECES = [
10 | "processed",
11 | "drv",
12 | "metadata",
13 | "extras",
14 | "Overview",
15 | ]
16 | raw_data_dir_possibilities = []
17 |
18 | for child in dataset_dir.iterdir():
19 | if not child.is_dir():
20 | continue
21 | if not any(piece in child.name for piece in NONRAW_DIRECTORY_NAME_PIECES):
22 | raw_data_dir_possibilities.append(child)
23 |
24 | if len(raw_data_dir_possibilities) > 1:
25 | message_pieces = ["Found multiple raw data directory possibilities:"]
26 | message_pieces.extend(f"\t{path}" for path in raw_data_dir_possibilities)
27 | raise ValueError("\n".join(message_pieces))
28 | raw_data_dir = raw_data_dir_possibilities[0]
29 | return raw_data_dir
30 |
31 |
32 | def check_new_meta_present(raw_data_dir: Path):
33 | if Path(raw_data_dir / "dataset.json").exists():
34 | print("Found new metadata")
35 | return True
36 | else:
37 | print("Did not found new metadata. Will try to use old metadata")
38 | return False
39 |
40 |
41 | def main(path_to_dataset: Path, num_concurrent_tasks: int = 10):
42 | raw_data_dir = find_raw_data_dir(path_to_dataset)
43 | is_new_meta_present = check_new_meta_present(raw_data_dir)
44 | if is_new_meta_present:
45 | collect_dataset_info.main(path_to_dataset, num_concurrent_tasks)
46 | else:
47 | collect_dataset_info_old.main(path_to_dataset, num_concurrent_tasks)
48 |
49 |
50 | if __name__ == "__main__":
51 | parser = argparse.ArgumentParser(
52 | description="Collect information required to perform analysis of a CODEX dataset."
53 | )
54 | parser.add_argument(
55 | "--path_to_dataset",
56 | help="Path to directory containing raw data subdirectory (with with cycle and region numbers).",
57 | type=Path,
58 | )
59 | parser.add_argument(
60 | "--num_concurrent_tasks",
61 | help="Path to directory containing raw data subdirectory (with with cycle and region numbers).",
62 | type=int,
63 | default=10,
64 | )
65 | args = parser.parse_args()
66 | main(args.path_to_dataset, args.num_concurrent_tasks)
67 |
--------------------------------------------------------------------------------
/bin/illumination_correction/generate_basic_macro.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 |
3 |
4 | def fill_in_basic_macro_template(path_to_stack: Path, out_dir: Path) -> str:
5 | macro_template = """
6 | run("BaSiC Mod",
7 | "input_stack={path_to_stack}" +
8 | " flat-field_image_path=[]" +
9 | " dark-field_image_path=[]" +
10 | " output_dir={out_dir}" +
11 | " shading_estimation=[Estimate shading profiles]" +
12 | " shading_model=[Estimate flat-field only (ignore dark-field)]" +
13 | " setting_regularisation_parameters=Automatic" +
14 | " temporal_drift=Ignore" +
15 | " correction_options=[Compute shading only]" +
16 | " lambda_flat=0.500" +
17 | " lambda_dark=0.500");
18 |
19 | run("Quit");
20 | eval("script", "System.exit(0);");
21 | """
22 | # [Compute shading only, Compute shading and correct images]
23 | # [Estimate flat-field only (ignore dark-field), Estimate both flat-field and dark-field]
24 | basic_macro = macro_template.format(
25 | path_to_stack=str(path_to_stack.absolute()), out_dir=str(out_dir.absolute())
26 | )
27 | return basic_macro
28 |
29 |
30 | def save_macro(out_path: Path, macro: str):
31 | with open(out_path, "w", encoding="utf-8") as s:
32 | s.write(macro)
33 |
--------------------------------------------------------------------------------
/bin/illumination_correction/run_illumination_correction.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import platform
3 | import re
4 | import subprocess
5 | import sys
6 | from pathlib import Path
7 | from typing import Dict, Iterable, List, Set, Tuple
8 |
9 | import cv2 as cv
10 | import dask
11 | import numpy as np
12 | import tifffile as tif
13 |
14 | sys.path.append("/opt/")
15 | from generate_basic_macro import fill_in_basic_macro_template, save_macro
16 |
17 | from pipeline_utils.dataset_listing import (
18 | create_listing_for_each_cycle_region,
19 | get_img_listing,
20 | )
21 | from pipeline_utils.pipeline_config_reader import load_dataset_info
22 |
23 | ImgStack = np.ndarray # 3d
24 | Image = np.ndarray # 2d
25 |
26 |
27 | def make_dir_if_not_exists(dir_path: Path):
28 | if not dir_path.exists():
29 | dir_path.mkdir(parents=True)
30 |
31 |
32 | def convert_np_cv_dtype(npdtype: np.dtype) -> int:
33 | np_cv_dtype_map = {
34 | np.dtype("float32"): cv.CV_32F,
35 | np.dtype("int32"): cv.CV_32S,
36 | np.dtype("uint16"): cv.CV_16U,
37 | np.dtype("uint8"): cv.CV_8U,
38 | np.dtype("int8"): cv.CV_8S,
39 | np.dtype("int16"): cv.CV_16S,
40 | }
41 | return np_cv_dtype_map[npdtype]
42 |
43 |
44 | def get_input_img_dirs(data_dir: Path):
45 | img_dirs = list(data_dir.iterdir())
46 | return img_dirs
47 |
48 |
49 | def read_imgs_to_stack(img_paths: List[Path]) -> ImgStack:
50 | imgs = []
51 | for path in img_paths:
52 | try:
53 | this_image = tif.imread(str(path.absolute()))
54 | except Exception as excp:
55 | # do not raise from excp because the main process cannot instantiate excp
56 | raise RuntimeError(f"Error reading tiff image {path}: {excp}")
57 | imgs.append(this_image)
58 | img_stack = np.stack(imgs, axis=0)
59 | return img_stack
60 |
61 |
62 | def save_stack(out_path: Path, stack: ImgStack):
63 | with tif.TiffWriter(out_path, shaped=False) as TW:
64 | TW.save(stack, contiguous=True, photometric="minisblack")
65 |
66 |
67 | def read_and_save_to_stack(path_list: List[Path], out_stack_path: Path):
68 | save_stack(out_stack_path, read_imgs_to_stack(path_list))
69 |
70 |
71 | def resave_imgs_to_stacks(
72 | zplane_img_listing: Dict[int, Dict[int, Dict[int, Dict[int, List[Path]]]]], img_stack_dir: Path
73 | ) -> Dict[int, Dict[int, Dict[int, Dict[int, Path]]]]:
74 | stack_paths = dict()
75 | stack_name_template = "Cyc{cyc:03d}_Reg{reg:03d}_Ch{ch:03d}_Z{z:03d}.tif"
76 | tasks = []
77 | for cycle in zplane_img_listing:
78 | stack_paths[cycle] = dict()
79 | for region in zplane_img_listing[cycle]:
80 | stack_paths[cycle][region] = dict()
81 | for channel in zplane_img_listing[cycle][region]:
82 | stack_paths[cycle][region][channel] = dict()
83 | for zplane, path_list in zplane_img_listing[cycle][region][channel].items():
84 | stack_name = stack_name_template.format(
85 | cyc=cycle, reg=region, ch=channel, z=zplane
86 | )
87 | out_stack_path = img_stack_dir / stack_name
88 | stack_paths[cycle][region][channel][zplane] = out_stack_path
89 | tasks.append(dask.delayed(read_and_save_to_stack)(path_list, out_stack_path))
90 | dask.compute(*tasks)
91 | return stack_paths
92 |
93 |
94 | def generate_basic_macro_for_each_stack(
95 | stack_paths: Dict[int, Dict[int, Dict[int, Dict[int, Path]]]],
96 | macro_out_dir: Path,
97 | illum_cor_dir: Path,
98 | ) -> Dict[int, Dict[int, Dict[int, Dict[int, Path]]]]:
99 | macro_paths = dict()
100 | for cycle in stack_paths:
101 | macro_paths[cycle] = dict()
102 | for region in stack_paths[cycle]:
103 | macro_paths[cycle][region] = dict()
104 | for channel in stack_paths[cycle][region]:
105 | macro_paths[cycle][region][channel] = dict()
106 | for zplane, stack_path in stack_paths[cycle][region][channel].items():
107 | macro_path = macro_out_dir / (stack_path.name + ".ijm")
108 | macro = fill_in_basic_macro_template(stack_path, illum_cor_dir)
109 | save_macro(macro_path, macro)
110 | macro_paths[cycle][region][channel][zplane] = macro_path
111 | return macro_paths
112 |
113 |
114 | def read_flatfield_imgs(
115 | illum_cor_dir: Path, stack_paths: Dict[int, Dict[int, Dict[int, Dict[int, Path]]]]
116 | ) -> Dict[int, Dict[int, Dict[int, Dict[int, ImgStack]]]]:
117 | per_zplane_flatfield = dict()
118 | stack_name_template = "Cyc{cyc:03d}_Reg{reg:03d}_Ch{ch:03d}_Z{z:03d}.tif"
119 | for cycle in stack_paths:
120 | per_zplane_flatfield[cycle] = dict()
121 | for region in stack_paths[cycle]:
122 | per_zplane_flatfield[cycle][region] = dict()
123 | for channel in stack_paths[cycle][region]:
124 | per_zplane_flatfield[cycle][region][channel] = dict()
125 | for zplane, stack_path in stack_paths[cycle][region][channel].items():
126 | stack_name = stack_name_template.format(
127 | cyc=cycle, reg=region, ch=channel, z=zplane
128 | )
129 | flatfield_filename = "flatfield_" + stack_name
130 | flatfield_path = illum_cor_dir / "flatfield" / flatfield_filename
131 | flatfield = tif.imread(str(flatfield_path.absolute())) # float32 0-1
132 | per_zplane_flatfield[cycle][region][channel][zplane] = flatfield
133 | return per_zplane_flatfield
134 |
135 |
136 | def read_darkfield_imgs(
137 | illum_cor_dir: Path, stack_paths: Dict[int, Dict[int, Dict[int, Dict[int, Path]]]]
138 | ) -> Dict[int, Dict[int, Dict[int, Dict[int, ImgStack]]]]:
139 | per_zplane_darkfield = dict()
140 | stack_name_template = "Cyc{cyc:03d}_Reg{reg:03d}_Ch{ch:03d}_Z{z:03d}.tif"
141 | for cycle in stack_paths:
142 | per_zplane_darkfield[cycle] = dict()
143 | for region in stack_paths[cycle]:
144 | per_zplane_darkfield[cycle][region] = dict()
145 | for channel in stack_paths[cycle][region]:
146 | per_zplane_darkfield[cycle][region][channel] = dict()
147 | for zplane, stack_path in stack_paths[cycle][region][channel].items():
148 | stack_name = stack_name_template.format(
149 | cyc=cycle, reg=region, ch=channel, z=zplane
150 | )
151 | darkfield_filename = "darkfield_" + stack_name
152 | darkfield_path = illum_cor_dir / "darkfield" / darkfield_filename
153 | darkfield = tif.imread(str(darkfield_path.absolute())) # float32 0-1
154 | per_zplane_darkfield[cycle][region][channel][zplane] = darkfield
155 | return per_zplane_darkfield
156 |
157 |
158 | def apply_illum_cor(img: Image, flatfield: Image) -> Image:
159 | orig_dtype = img.dtype
160 | dtype_info = np.iinfo(orig_dtype)
161 | orig_minmax = (dtype_info.min, dtype_info.max)
162 | imgf = img.astype(np.float32)
163 |
164 | corrected_imgf = imgf / flatfield
165 |
166 | corrected_img = np.clip(np.round(corrected_imgf, 0), *orig_minmax).astype(orig_dtype)
167 | return corrected_img
168 |
169 |
170 | def correct_and_save(img_path: Path, flatfield: Image, out_path: Path):
171 | corrected_img = apply_illum_cor(tif.imread(str(img_path.absolute())), flatfield)
172 | with tif.TiffWriter(str(out_path.absolute()), shaped=False) as TW:
173 | TW.save(corrected_img, photometric="minisblack")
174 | del corrected_img
175 |
176 |
177 | def apply_flatfield_and_save(
178 | listing: Dict[int, Dict[int, Dict[int, Dict[int, Dict[int, Path]]]]],
179 | flatfields: Dict[int, Dict[int, Dict[int, Dict[int, Image]]]],
180 | # darkfields: Dict[int, Dict[int, Dict[int, Dict[int, Image]]]],
181 | out_dir: Path,
182 | ):
183 | img_dir_template = "Cyc{cyc:03d}_reg{reg:03d}"
184 | img_name_template = "{reg:d}_{tile:05d}_Z{z:03d}_CH{ch:d}.tif"
185 | tasks = []
186 | for cycle in listing:
187 | for region in listing[cycle]:
188 | for channel in listing[cycle][region]:
189 | for tile, zplane_dict in listing[cycle][region][channel].items():
190 | for zplane, path in zplane_dict.items():
191 | img_dir_name = img_dir_template.format(cyc=cycle, reg=region)
192 | img_name = img_name_template.format(
193 | reg=region, tile=tile, z=zplane, ch=channel
194 | )
195 | out_dir_full = Path(out_dir / img_dir_name)
196 | make_dir_if_not_exists(out_dir_full)
197 | out_path = out_dir_full / img_name
198 | flatfield = flatfields[cycle][region][channel][zplane]
199 | # darkfield = darkfields[cycle][region][channel][zplane]
200 | tasks.append(dask.delayed(correct_and_save)(path, flatfield, out_path))
201 | dask.compute(*tasks)
202 |
203 |
204 | def organize_listing_by_cyc_reg_ch_zplane(
205 | listing: Dict[int, Dict[int, Dict[int, Dict[int, Dict[int, Path]]]]],
206 | tile_ids_to_use: Iterable[int],
207 | ) -> Dict[int, Dict[int, Dict[int, Dict[int, List[Path]]]]]:
208 | new_arrangemnt = dict()
209 | for cycle in listing:
210 | new_arrangemnt[cycle] = dict()
211 | for region in listing[cycle]:
212 | new_arrangemnt[cycle][region] = dict()
213 | for channel in listing[cycle][region]:
214 | new_arrangemnt[cycle][region][channel] = dict()
215 | for tile, zplane_dict in listing[cycle][region][channel].items():
216 | for zplane, path in zplane_dict.items():
217 | if tile in tile_ids_to_use:
218 | if zplane in new_arrangemnt[cycle][region][channel]:
219 | new_arrangemnt[cycle][region][channel][zplane].append(path)
220 | else:
221 | new_arrangemnt[cycle][region][channel][zplane] = [path]
222 | return new_arrangemnt
223 |
224 |
225 | def run_basic(basic_macro_path: Path, log_dir: Path):
226 | # It is expected that ImageJ is added to system PATH
227 | if platform.system() == "Windows":
228 | imagej_name = "ImageJ-win64"
229 | elif platform.system() == "Linux":
230 | imagej_name = "ImageJ-linux64"
231 | elif platform.system() == "Darwin":
232 | imagej_name = "ImageJ-macosx"
233 |
234 | command = imagej_name + " --headless --console -macro " + str(basic_macro_path)
235 | print("Started running BaSiC for", str(basic_macro_path))
236 | res = subprocess.run(
237 | command, shell=True, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
238 | )
239 | if res.returncode == 0:
240 | print("Finished", str(basic_macro_path))
241 | else:
242 | raise Exception(
243 | "There was an error while running the BaSiC for "
244 | + str(basic_macro_path)
245 | + "\n"
246 | + res.stderr.decode("utf-8")
247 | )
248 | macro_filename = basic_macro_path.name
249 | run_log = (
250 | "Command:\n"
251 | + res.args
252 | + "\n\nSTDERR:\n"
253 | + res.stderr.decode("utf-8")
254 | + "\n\nSTDOUT:\n"
255 | + res.stdout.decode("utf-8")
256 | )
257 | log_filename = macro_filename + ".log"
258 | log_path = log_dir / log_filename
259 | with open(log_path, "w", encoding="utf-8") as f:
260 | f.write(run_log)
261 | return
262 |
263 |
264 | def run_all_macros(macro_paths: Dict[int, Dict[int, Dict[int, Dict[int, Path]]]], log_dir: Path):
265 | tasks = []
266 | for cycle in macro_paths:
267 | for region in macro_paths[cycle]:
268 | for channel in macro_paths[cycle][region]:
269 | for zplane, macro_path in macro_paths[cycle][region][channel].items():
270 | tasks.append(dask.delayed(run_basic)(macro_path, log_dir))
271 | dask.compute(*tasks)
272 |
273 |
274 | def check_illum_cor_images(
275 | illum_cor_dir: Path,
276 | log_dir: Path,
277 | zplane_listing: Dict[int, Dict[int, Dict[int, Dict[int, List[Path]]]]],
278 | ):
279 | cor_img_name_template = "{cor_type}_Cyc{cyc:03d}_Reg{reg:03d}_Ch{ch:03d}_Z{z:03d}.tif"
280 | log_name_template = "Cyc{cyc:03d}_Reg{reg:03d}_Ch{ch:03d}_Z{z:03d}.tif.ijm.log"
281 | imgs_present = []
282 | imgs_missing = []
283 | imgs_missing_logs = []
284 | for cycle in zplane_listing:
285 | for region in zplane_listing[cycle]:
286 | for channel in zplane_listing[cycle][region]:
287 | for zplane, macro_path in zplane_listing[cycle][region][channel].items():
288 | flatfield_fn = cor_img_name_template.format(
289 | cor_type="flatfield", cyc=cycle, reg=region, ch=channel, z=zplane
290 | )
291 | darkfield_fn = cor_img_name_template.format(
292 | cor_type="darkfield", cyc=cycle, reg=region, ch=channel, z=zplane
293 | )
294 | flatfield_path = illum_cor_dir / "flatfield" / flatfield_fn
295 | # darkfield_path = illum_cor_dir / "darkfield" / darkfield_fn
296 | if flatfield_path.exists():
297 | imgs_present.append((flatfield_fn))
298 | else:
299 | imgs_missing.append((flatfield_fn))
300 | log_path = log_dir / log_name_template.format(
301 | cyc=cycle, reg=region, ch=channel, z=zplane
302 | )
303 | with open(log_path, "r", encoding="utf-8") as f:
304 | log_content = f.read()
305 | imgs_missing_logs.append(log_content)
306 | if len(imgs_missing) > 0:
307 | msg = (
308 | "Probably there was an error while running BaSiC. "
309 | + "There is no image in one or more directories."
310 | )
311 | print(msg)
312 |
313 | for i in range(0, len(imgs_missing)):
314 | print("\nOne or both are missing:")
315 | print(imgs_missing[i])
316 | print("ImageJ log:")
317 | print(imgs_missing_logs[i])
318 | raise ValueError(msg)
319 | return
320 |
321 |
322 | def select_which_tiles_to_use(
323 | n_tiles_y: int, n_tiles_x: int, tile_dtype: str, tile_size: Tuple[int, int]
324 | ) -> Set[int]:
325 | """Select every n-th tile, keeping the max size of the tile stack at 2GB"""
326 | n_tiles = n_tiles_y * n_tiles_x
327 |
328 | img_dtype = int(re.search(r"(\d+)", tile_dtype).groups()[0]) # int16 -> 16
329 | nbytes = img_dtype / 8
330 |
331 | # max 2GB
332 | single_tile_gb = tile_size[0] * tile_size[1] * nbytes / 1024**3
333 | max_num_tiles = round(2.0 // single_tile_gb)
334 |
335 | step = max(n_tiles // max_num_tiles, 1)
336 | if step < 2 and n_tiles > max_num_tiles:
337 | step = 2
338 | tile_ids = set(list(range(0, n_tiles, step)))
339 | return tile_ids
340 |
341 |
342 | def main(data_dir: Path, pipeline_config_path: Path):
343 | img_stack_dir = Path("/output/image_stacks/")
344 | macro_dir = Path("/output/basic_macros")
345 | illum_cor_dir = Path("/output/illumination_correction/")
346 | corrected_img_dir = Path("/output/corrected_images")
347 | log_dir = Path("/output/logs")
348 |
349 | make_dir_if_not_exists(img_stack_dir)
350 | make_dir_if_not_exists(macro_dir)
351 | make_dir_if_not_exists(illum_cor_dir)
352 | make_dir_if_not_exists(corrected_img_dir)
353 | make_dir_if_not_exists(log_dir)
354 |
355 | dataset_info = load_dataset_info(pipeline_config_path)
356 |
357 | tile_dtype = dataset_info["tile_dtype"]
358 |
359 | num_workers = dataset_info["num_concurrent_tasks"]
360 | dask.config.set({"num_workers": num_workers, "scheduler": "processes"})
361 |
362 | raw_data_dir = dataset_info["dataset_dir"]
363 | img_dirs = get_input_img_dirs(Path(data_dir / raw_data_dir))
364 | print("Getting image listing")
365 | listing = create_listing_for_each_cycle_region(img_dirs)
366 |
367 | tile_size = (
368 | dataset_info["tile_height"] + dataset_info["overlap_y"],
369 | dataset_info["tile_width"] + dataset_info["overlap_x"],
370 | )
371 | n_tiles = dataset_info["num_tiles"]
372 | n_tiles_y = dataset_info["num_tiles_y"]
373 | n_tiles_x = dataset_info["num_tiles_x"]
374 |
375 | tile_ids_to_use = select_which_tiles_to_use(n_tiles_y, n_tiles_x, tile_dtype, tile_size)
376 |
377 | print(
378 | f"tile size: {str(tile_size)}",
379 | f"| number of tiles: {str(n_tiles)}",
380 | f"| using {str(len(tile_ids_to_use))} tiles to compute illumination correction",
381 | )
382 | zplane_listing = organize_listing_by_cyc_reg_ch_zplane(listing, tile_ids_to_use)
383 |
384 | print("Resaving images as stacks")
385 | stack_paths = resave_imgs_to_stacks(zplane_listing, img_stack_dir)
386 | print("Generating BaSiC macros")
387 | macro_paths = generate_basic_macro_for_each_stack(stack_paths, macro_dir, illum_cor_dir)
388 | print("Running estimation of illumination")
389 | run_all_macros(macro_paths, log_dir)
390 | check_illum_cor_images(illum_cor_dir, log_dir, zplane_listing)
391 |
392 | print("Applying illumination correction")
393 | flatfields = read_flatfield_imgs(illum_cor_dir, stack_paths)
394 | # darkfields = read_darkfield_imgs(illum_cor_dir, stack_paths)
395 | apply_flatfield_and_save(listing, flatfields, corrected_img_dir)
396 |
397 |
398 | if __name__ == "__main__":
399 | parser = argparse.ArgumentParser()
400 | parser.add_argument("--data_dir", type=Path, help="path to directory with dataset directory")
401 | parser.add_argument(
402 | "--pipeline_config_path", type=Path, help="path to pipelineConfig.json file"
403 | )
404 | args = parser.parse_args()
405 | main(args.data_dir, args.pipeline_config_path)
406 |
--------------------------------------------------------------------------------
/bin/pipeline_utils/dataset_listing.py:
--------------------------------------------------------------------------------
1 | import re
2 | from os import walk
3 | from pathlib import Path
4 | from typing import Dict, List, Tuple, Union
5 |
6 | import tifffile as tif
7 |
8 |
9 | def path_to_str(path: Path):
10 | return str(path.absolute().as_posix())
11 |
12 |
13 | def sort_dict(item: dict):
14 | return {k: sort_dict(v) if isinstance(v, dict) else v for k, v in sorted(item.items())}
15 |
16 |
17 | def alpha_num_order(string: str) -> str:
18 | """Returns all numbers on 5 digits to let sort the string with numeric order.
19 | Ex: alphaNumOrder("a6b12.125") ==> "a00006b00012.00125"
20 | """
21 | return "".join(
22 | [format(int(x), "05d") if x.isdigit() else x for x in re.split(r"(\d+)", string)]
23 | )
24 |
25 |
26 | def get_img_listing(in_dir: Path) -> List[Path]:
27 | allowed_extensions = (".tif", ".tiff")
28 | listing = list(in_dir.iterdir())
29 | img_listing = [f for f in listing if f.suffix in allowed_extensions]
30 | img_listing = sorted(img_listing, key=lambda x: alpha_num_order(x.name))
31 | return img_listing
32 |
33 |
34 | def extract_digits_from_string(string: str) -> List[int]:
35 | digits = [
36 | int(x) for x in re.split(r"(\d+)", string) if x.isdigit()
37 | ] # '1_00001_Z02_CH3' -> '1', '00001', '02', '3' -> [1,1,2,3]
38 | return digits
39 |
40 |
41 | def arrange_listing_by_channel_tile_zplane(
42 | listing: List[Path],
43 | ) -> Dict[int, Dict[int, Dict[int, Path]]]:
44 | tile_arrangement = dict()
45 | for file_path in listing:
46 | digits = extract_digits_from_string(file_path.name)
47 | tile = digits[1]
48 | zplane = digits[2]
49 | if len(digits) < 4:
50 | # Overlay image
51 | continue
52 | channel = digits[3]
53 | if channel in tile_arrangement:
54 | if tile in tile_arrangement[channel]:
55 | tile_arrangement[channel][tile].update({zplane: file_path})
56 | else:
57 | tile_arrangement[channel][tile] = {zplane: file_path}
58 | else:
59 | tile_arrangement[channel] = {tile: {zplane: file_path}}
60 | return tile_arrangement
61 |
62 |
63 | def get_image_paths_arranged_in_dict(img_dir: Path) -> Dict[int, Dict[int, Dict[int, Path]]]:
64 | img_listing = get_img_listing(img_dir)
65 | arranged_listing = arrange_listing_by_channel_tile_zplane(img_listing)
66 | return arranged_listing
67 |
68 |
69 | def extract_cycle_and_region_from_name(
70 | dir_name: str, cycle_prefix: str, region_prefix: str
71 | ) -> Tuple[Union[None, int], Union[None, int]]:
72 | matched_region = re.search(region_prefix, dir_name, re.IGNORECASE) is not None
73 | matched_cycle = re.search(cycle_prefix, dir_name, re.IGNORECASE) is not None
74 | if matched_region:
75 | region_pattern = region_prefix + r"(\d+)"
76 | region = int(re.search(region_pattern, dir_name, re.IGNORECASE).groups()[0])
77 | else:
78 | return None, None
79 | if matched_cycle:
80 | cycle_pattern = cycle_prefix + r"(\d+)"
81 | cycle = int(re.search(cycle_pattern, dir_name, re.IGNORECASE).groups()[0])
82 | else:
83 | return None, None
84 | return cycle, region
85 |
86 |
87 | def arrange_dirs_by_cycle_region(
88 | img_dirs: List[Path], cycle_prefix: str, region_prefix: str
89 | ) -> Dict[int, Dict[int, Path]]:
90 | cycle_region_dict = dict()
91 | for dir_path in img_dirs:
92 | dir_name = dir_path.name
93 | cycle, region = extract_cycle_and_region_from_name(
94 | str(dir_name), cycle_prefix, region_prefix
95 | )
96 | if cycle is not None:
97 | if cycle in cycle_region_dict:
98 | cycle_region_dict[cycle][region] = dir_path
99 | else:
100 | cycle_region_dict[cycle] = {region: dir_path}
101 | if cycle_region_dict != {}:
102 | return cycle_region_dict
103 | else:
104 | raise ValueError("Could not find cycle and region directories")
105 |
106 |
107 | def create_listing_for_each_cycle_region(
108 | img_dirs: List[Path],
109 | ) -> Dict[int, Dict[int, Dict[int, Dict[int, Dict[int, Path]]]]]:
110 | """Returns {cycle: {region: {channel: {tile: {zplane: path}}}}}"""
111 | listing_per_cycle = dict()
112 | # Expected dir names Cyc1_reg1 or Cyc01_reg01
113 | cycle_prefix = "cyc"
114 | region_prefix = "reg"
115 | cycle_region_dict = arrange_dirs_by_cycle_region(img_dirs, cycle_prefix, region_prefix)
116 | for cycle, regions in cycle_region_dict.items():
117 | listing_per_cycle[cycle] = dict()
118 | for region, dir_path in regions.items():
119 | arranged_listing = get_image_paths_arranged_in_dict(dir_path)
120 | listing_per_cycle[cycle][region] = arranged_listing
121 | sorted_listing = sort_dict(listing_per_cycle)
122 | return sorted_listing
123 |
124 |
125 | def get_img_dirs(dataset_dir: Path) -> List[Path]:
126 | img_dir_names = next(walk(dataset_dir))[1]
127 | img_dir_paths = [dataset_dir.joinpath(dir_name) for dir_name in img_dir_names]
128 | return img_dir_paths
129 |
130 |
131 | def get_tile_shape(dataset_dir: Path):
132 | img_dirs = get_img_dirs(dataset_dir)
133 | dataset_listing = create_listing_for_each_cycle_region(img_dirs)
134 | for cycle in dataset_listing:
135 | for region in dataset_listing[cycle]:
136 | for channel in dataset_listing[cycle][region]:
137 | for tile, zplanes in dataset_listing[cycle][region][channel].items():
138 | first_plane = list(zplanes.values())[0]
139 | plane = tif.imread(path_to_str(first_plane))
140 | return plane.shape
141 |
142 |
143 | def get_tile_dtype(dataset_dir: Path):
144 | img_dirs = get_img_dirs(dataset_dir)
145 | dataset_listing = create_listing_for_each_cycle_region(img_dirs)
146 | for cycle in dataset_listing:
147 | for region in dataset_listing[cycle]:
148 | for channel in dataset_listing[cycle][region]:
149 | for tile, zplanes in dataset_listing[cycle][region][channel].items():
150 | first_plane = list(zplanes.values())[0]
151 | plane = tif.imread(path_to_str(first_plane))
152 | return plane.dtype
153 |
--------------------------------------------------------------------------------
/bin/pipeline_utils/pipeline_config_reader.py:
--------------------------------------------------------------------------------
1 | import json
2 | from pathlib import Path
3 | from typing import Any, Dict, List, Optional, Tuple
4 |
5 |
6 | def load_pipeline_config(pipeline_config_path: Path) -> dict:
7 | with open(pipeline_config_path, "r") as s:
8 | config = json.load(s)
9 | return config
10 |
11 |
12 | def _convert_tiling_mode(tiling_mode: str):
13 | if "snake" in tiling_mode.lower():
14 | new_tiling_mode = "snake"
15 | elif "grid" in tiling_mode.lower():
16 | new_tiling_mode = "grid"
17 | else:
18 | raise ValueError("Unknown tiling mode: " + tiling_mode)
19 | return new_tiling_mode
20 |
21 |
22 | def _get_dataset_info_from_config(pipeline_config: dict) -> Dict[str, Any]:
23 | required_fields: List[Tuple[str, Optional[str]]] = [
24 | ("num_cycles", None),
25 | ("num_tiles_x", "region_width"),
26 | ("num_tiles_y", "region_height"),
27 | ("tile_width", None),
28 | ("tile_height", None),
29 | ("tile_dtype", None),
30 | ("overlap_x", "tile_overlap_x"),
31 | ("overlap_y", "tile_overlap_y"),
32 | ("pixel_distance_x", "lateral_resolution"),
33 | ("pixel_distance_y", "lateral_resolution"),
34 | ("pixel_distance_z", "axial_resolution"),
35 | ("nuclei_channel", None),
36 | ("membrane_channel", None),
37 | ("nuclei_channel_loc", None),
38 | ("membrane_channel_loc", None),
39 | ("num_z_planes", None),
40 | ("channel_names", None),
41 | ("channel_names_qc_pass", None),
42 | ("num_concurrent_tasks", None),
43 | ("lateral_resolution", None),
44 | ]
45 | optional_fields: List[Tuple[str, Optional[str]]] = [
46 | ("membrane_channel", None),
47 | ]
48 | pipeline_config_dict = dict(
49 | dataset_dir=Path(pipeline_config["raw_data_location"]),
50 | num_channels=len(pipeline_config["channel_names"]) // pipeline_config["num_cycles"],
51 | num_tiles=pipeline_config["region_width"] * pipeline_config["region_height"],
52 | # does not matter because we have only one z-plane:
53 | overlap_z=1,
54 | # id of nuclei channel:
55 | reference_channel=pipeline_config["channel_names"].index(pipeline_config["nuclei_channel"])
56 | + 1,
57 | reference_cycle=pipeline_config["channel_names"].index(pipeline_config["nuclei_channel"])
58 | // (len(pipeline_config["channel_names"]) // pipeline_config["num_cycles"])
59 | + 1,
60 | tiling_mode=_convert_tiling_mode(pipeline_config["tiling_mode"]),
61 | )
62 | for field, source in required_fields:
63 | if source is None:
64 | source = field
65 | pipeline_config_dict[field] = pipeline_config[source]
66 | for field, source in optional_fields:
67 | if source is None:
68 | source = field
69 | if source in pipeline_config:
70 | pipeline_config_dict[field] = pipeline_config[source]
71 | return pipeline_config_dict
72 |
73 |
74 | def load_dataset_info(pipeline_config_path: Path):
75 | config = load_pipeline_config(pipeline_config_path)
76 | dataset_info = _get_dataset_info_from_config(config)
77 | return dataset_info
78 |
--------------------------------------------------------------------------------
/bin/slicing/modify_pipeline_config.py:
--------------------------------------------------------------------------------
1 | import json
2 | from pathlib import Path
3 | from typing import Tuple
4 |
5 |
6 | def generate_slicer_info(
7 | tile_shape_no_overlap: Tuple[int, int], overlap: int, stitched_img_shape: Tuple[int, int]
8 | ) -> dict:
9 | slicer_info = dict()
10 | slicer_info["slicer"] = dict()
11 |
12 | img_height, img_width = stitched_img_shape
13 | tile_height, tile_width = tile_shape_no_overlap
14 |
15 | padding = dict(left=0, right=0, top=0, bottom=0)
16 | if img_width % tile_width == 0:
17 | padding["right"] = 0
18 | else:
19 | padding["right"] = tile_width - (img_width % tile_width)
20 | if img_height % tile_height == 0:
21 | padding["bottom"] = 0
22 | else:
23 | padding["bottom"] = tile_height - (img_height % tile_height)
24 |
25 | x_ntiles = (
26 | img_width // tile_width if img_width % tile_width == 0 else (img_width // tile_width) + 1
27 | )
28 | y_ntiles = (
29 | img_height // tile_height
30 | if img_height % tile_height == 0
31 | else (img_height // tile_height) + 1
32 | )
33 |
34 | slicer_info["slicer"]["padding"] = padding
35 | slicer_info["slicer"]["overlap"] = overlap
36 | slicer_info["slicer"]["num_tiles"] = {"x": x_ntiles, "y": y_ntiles}
37 | slicer_info["slicer"]["tile_shape_no_overlap"] = {"x": tile_width, "y": tile_height}
38 | slicer_info["slicer"]["tile_shape_with_overlap"] = {
39 | "x": tile_width + overlap * 2,
40 | "y": tile_height + overlap * 2,
41 | }
42 | return slicer_info
43 |
44 |
45 | def replace_values_in_config(exp, slicer_info):
46 | original_measurements = {
47 | "original_measurements": {
48 | "tiling_mode": exp["tiling_mode"],
49 | "region_width": exp["region_width"],
50 | "region_height": exp["region_height"],
51 | "num_z_planes": exp["num_z_planes"],
52 | "tile_width": exp["tile_width"],
53 | "tile_height": exp["tile_height"],
54 | "tile_overlap_x": exp["tile_overlap_x"],
55 | "tile_overlap_y": exp["tile_overlap_y"],
56 | "target_shape": exp["target_shape"],
57 | }
58 | }
59 | values_to_replace = {
60 | "tiling_mode": "grid",
61 | "region_width": slicer_info["slicer"]["num_tiles"]["x"],
62 | "region_height": slicer_info["slicer"]["num_tiles"]["y"],
63 | "num_z_planes": 1,
64 | "tile_width": slicer_info["slicer"]["tile_shape_no_overlap"]["x"],
65 | "tile_height": slicer_info["slicer"]["tile_shape_no_overlap"]["y"],
66 | "tile_overlap_x": slicer_info["slicer"]["overlap"] * 2,
67 | "tile_overlap_y": slicer_info["slicer"]["overlap"] * 2,
68 | "target_shape": [
69 | slicer_info["slicer"]["tile_shape_no_overlap"]["x"],
70 | slicer_info["slicer"]["tile_shape_no_overlap"]["y"],
71 | ],
72 | }
73 |
74 | exp.update(values_to_replace)
75 | exp.update(original_measurements)
76 | return exp
77 |
78 |
79 | def modify_pipeline_config(
80 | path_to_config: Path,
81 | tile_shape_no_overlap: Tuple[int, int],
82 | overlap: int,
83 | stitched_img_shape: Tuple[int, int],
84 | ):
85 | with open(path_to_config, "r") as s:
86 | config = json.load(s)
87 |
88 | slicer_info = generate_slicer_info(tile_shape_no_overlap, overlap, stitched_img_shape)
89 | config = replace_values_in_config(config, slicer_info)
90 | config.update(slicer_info)
91 |
92 | return config
93 |
94 |
95 | def save_modified_pipeline_config(pipeline_config: dict, out_dir: Path):
96 | out_file_path = out_dir.joinpath("pipelineConfig.json")
97 | with open(out_file_path, "w") as s:
98 | json.dump(pipeline_config, s, indent=4)
99 |
--------------------------------------------------------------------------------
/bin/slicing/run_slicing.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import re
3 | from pathlib import Path
4 | from typing import Dict, Tuple
5 |
6 | import tifffile as tif
7 | from modify_pipeline_config import modify_pipeline_config, save_modified_pipeline_config
8 | from slicer import slice_img
9 |
10 |
11 | def path_to_str(path: Path):
12 | return str(path.absolute().as_posix())
13 |
14 |
15 | def path_to_dict(path: Path):
16 | """
17 | Extract region, x position, y position and put into the dictionary
18 | {R:region, X: position, Y: position, path: path}
19 | """
20 | value_list = re.split(r"(\d+)(?:_?)", path.name)[:-1]
21 | d = dict(zip(*[iter(value_list)] * 2))
22 | d = {k: int(v) for k, v in d.items()}
23 | d.update({"path": path})
24 | return d
25 |
26 |
27 | def make_dir_if_not_exists(dir_path: Path):
28 | if not dir_path.exists():
29 | dir_path.mkdir(parents=True)
30 |
31 |
32 | def get_image_path_in_dir(dir_path: Path) -> Path:
33 | allowed_extensions = (".tif", ".tiff")
34 | listing = list(dir_path.iterdir())
35 | img_listing = [f for f in listing if f.suffix in allowed_extensions]
36 | return img_listing[0]
37 |
38 |
39 | def get_stitched_image_shape(
40 | stitched_dirs: Dict[int, Dict[int, Dict[int, Path]]],
41 | ) -> Tuple[int, int]:
42 | for cycle in stitched_dirs:
43 | for region in stitched_dirs[cycle]:
44 | for channel, dir_path in stitched_dirs[cycle][region].items():
45 | stitched_img_path = get_image_path_in_dir(dir_path)
46 | break
47 | with tif.TiffFile(stitched_img_path) as TF:
48 | stitched_image_shape = TF.series[0].shape
49 | return stitched_image_shape
50 |
51 |
52 | def create_output_dirs_for_tiles(
53 | stitched_channel_dirs: Dict[int, Dict[int, Dict[int, Path]]], out_dir: Path
54 | ) -> Dict[int, Dict[int, Path]]:
55 | dir_naming_template = "Cyc{cycle:d}_reg{region:d}"
56 | out_dirs_for_tiles = dict()
57 | for cycle in stitched_channel_dirs:
58 | out_dirs_for_tiles[cycle] = {}
59 | for region in stitched_channel_dirs[cycle]:
60 | out_dir_name = dir_naming_template.format(cycle=cycle, region=region)
61 | out_dir_path = out_dir / out_dir_name
62 | make_dir_if_not_exists(out_dir_path)
63 | out_dirs_for_tiles[cycle][region] = out_dir_path
64 | return out_dirs_for_tiles
65 |
66 |
67 | def split_channels_into_tiles(
68 | stitched_dirs: Dict[int, Dict[int, Dict[int, Path]]],
69 | out_dirs_for_tiles: Dict[int, Dict[int, Path]],
70 | tile_size=1000,
71 | overlap=50,
72 | ):
73 | for cycle in stitched_dirs:
74 | for region in stitched_dirs[cycle]:
75 | for channel, dir_path in stitched_dirs[cycle][region].items():
76 | stitched_image_path = get_image_path_in_dir(dir_path)
77 | print(stitched_image_path.name)
78 | out_dir = out_dirs_for_tiles[cycle][region]
79 | slice_img(
80 | path_to_str(stitched_image_path),
81 | path_to_str(out_dir),
82 | tile_size=tile_size,
83 | overlap=overlap,
84 | region=region,
85 | zplane=1,
86 | channel=channel,
87 | )
88 |
89 |
90 | def organize_dirs(base_stitched_dir: Path) -> Dict[int, Dict[int, Dict[int, Path]]]:
91 | stitched_channel_dirs = list(base_stitched_dir.iterdir())
92 | # expected dir naming Cyc{cyc:03d}_Reg{reg:03d}_Ch{ch:03d}
93 | stitched_dirs = dict()
94 | for dir_path in stitched_channel_dirs:
95 | name_info = path_to_dict(dir_path)
96 | cycle = name_info["Cyc"]
97 | region = name_info["Reg"]
98 | channel = name_info["Ch"]
99 |
100 | if cycle in stitched_dirs:
101 | if region in stitched_dirs[cycle]:
102 | stitched_dirs[cycle][region][channel] = dir_path
103 | else:
104 | stitched_dirs[cycle][region] = {channel: dir_path}
105 | else:
106 | stitched_dirs[cycle] = {region: {channel: dir_path}}
107 | return stitched_dirs
108 |
109 |
110 | def main(base_stitched_dir: Path, pipeline_config_path: Path):
111 | out_dir = Path("/output/new_tiles")
112 | pipeline_conf_dir = Path("/output/pipeline_conf/")
113 | make_dir_if_not_exists(out_dir)
114 | make_dir_if_not_exists(pipeline_conf_dir)
115 |
116 | stitched_channel_dirs = organize_dirs(base_stitched_dir)
117 | out_dirs_for_tiles = create_output_dirs_for_tiles(stitched_channel_dirs, out_dir)
118 |
119 | stitched_img_shape = get_stitched_image_shape(stitched_channel_dirs)
120 |
121 | tile_size = 1000
122 | overlap = 100
123 | print("Splitting images into tiles")
124 | print("Tile size:", tile_size, "| overlap:", overlap)
125 | split_channels_into_tiles(stitched_channel_dirs, out_dirs_for_tiles, tile_size, overlap)
126 |
127 | modified_experiment = modify_pipeline_config(
128 | pipeline_config_path, (tile_size, tile_size), overlap, stitched_img_shape
129 | )
130 | save_modified_pipeline_config(modified_experiment, pipeline_conf_dir)
131 |
132 |
133 | if __name__ == "__main__":
134 | parser = argparse.ArgumentParser()
135 | parser.add_argument(
136 | "--base_stitched_dir",
137 | type=Path,
138 | help="path to directory with directories per channel that contain stitched images",
139 | )
140 | parser.add_argument(
141 | "--pipeline_config_path", type=Path, help="path to pipelineConfig.json file"
142 | )
143 |
144 | args = parser.parse_args()
145 |
146 | main(args.base_stitched_dir, args.pipeline_config_path)
147 |
--------------------------------------------------------------------------------
/bin/slicing/slicer.py:
--------------------------------------------------------------------------------
1 | import os.path as osp
2 |
3 | import dask
4 | import numpy as np
5 | import tifffile as tif
6 |
7 |
8 | def get_tile(arr, hor_f: int, hor_t: int, ver_f: int, ver_t: int, overlap=0):
9 | hor_f -= overlap
10 | hor_t += overlap
11 | ver_f -= overlap
12 | ver_t += overlap
13 |
14 | left_check = hor_f
15 | top_check = ver_f
16 | right_check = hor_t - arr.shape[1]
17 | bot_check = ver_t - arr.shape[0]
18 |
19 | left_pad_size = 0
20 | top_pad_size = 0
21 | right_pad_size = 0
22 | bot_pad_size = 0
23 |
24 | if left_check < 0:
25 | left_pad_size = abs(left_check)
26 | hor_f = 0
27 | if top_check < 0:
28 | top_pad_size = abs(top_check)
29 | ver_f = 0
30 | if right_check > 0:
31 | right_pad_size = right_check
32 | hor_t = arr.shape[1]
33 | if bot_check > 0:
34 | bot_pad_size = bot_check
35 | ver_t = arr.shape[0]
36 |
37 | tile_slice = (slice(ver_f, ver_t), slice(hor_f, hor_t))
38 | tile = arr[tile_slice]
39 | padding = ((top_pad_size, bot_pad_size), (left_pad_size, right_pad_size))
40 | if max(padding) > (0, 0):
41 | tile = np.pad(tile, padding, mode="constant")
42 | return tile
43 |
44 |
45 | def split_by_size(
46 | arr: np.ndarray, region: int, zplane: int, channel: int, tile_w: int, tile_h: int, overlap: int
47 | ):
48 | """Splits image into tiles by size of tile.
49 | tile_w - tile width
50 | tile_h - tile height
51 | """
52 | x_axis = -1
53 | y_axis = -2
54 | arr_width, arr_height = arr.shape[x_axis], arr.shape[y_axis]
55 |
56 | x_ntiles = arr_width // tile_w if arr_width % tile_w == 0 else (arr_width // tile_w) + 1
57 | y_ntiles = arr_height // tile_h if arr_height % tile_h == 0 else (arr_height // tile_h) + 1
58 |
59 | tiles = []
60 | img_names = []
61 |
62 | # row
63 | for i in range(0, y_ntiles):
64 | # height of this tile
65 | ver_f = tile_h * i
66 | ver_t = ver_f + tile_h
67 |
68 | # col
69 | for j in range(0, x_ntiles):
70 | # width of this tile
71 | hor_f = tile_w * j
72 | hor_t = hor_f + tile_w
73 |
74 | tile = get_tile(arr, hor_f, hor_t, ver_f, ver_t, overlap)
75 |
76 | tiles.append(tile)
77 | name = "{region:d}_{tile:05d}_Z{zplane:03d}_CH{channel:d}.tif".format(
78 | region=region, tile=(i * x_ntiles) + (j + 1), zplane=zplane, channel=channel
79 | )
80 | img_names.append(name)
81 |
82 | return tiles, img_names
83 |
84 |
85 | def slice_img(
86 | in_path: str,
87 | out_dir: str,
88 | tile_size: int,
89 | overlap: int,
90 | region: int,
91 | channel: int,
92 | zplane: int,
93 | ):
94 | this_plane_tiles, this_plane_img_names = split_by_size(
95 | tif.imread(in_path),
96 | region=region,
97 | zplane=zplane,
98 | channel=channel,
99 | tile_w=tile_size,
100 | tile_h=tile_size,
101 | overlap=overlap,
102 | )
103 |
104 | task = []
105 | for i, img in enumerate(this_plane_tiles):
106 | task.append(
107 | dask.delayed(tif.imwrite)(
108 | osp.join(out_dir, this_plane_img_names[i]),
109 | img,
110 | photometric="minisblack",
111 | shaped=False,
112 | )
113 | )
114 |
115 | dask.compute(*task, scheduler="threads")
116 |
--------------------------------------------------------------------------------
/bin/utils.py:
--------------------------------------------------------------------------------
1 | import re
2 | from collections import defaultdict
3 | from os import walk
4 | from pathlib import Path
5 | from pprint import pformat
6 | from typing import Dict, List
7 |
8 | import yaml
9 |
10 |
11 | def list_directory_tree(directory: Path) -> str:
12 | return pformat(sorted(directory.glob("**/*"))) + "\n"
13 |
14 |
15 | def print_directory_tree(directory: Path):
16 | print(list_directory_tree(directory))
17 |
18 |
19 | def infer_tile_names(cytokit_config_filename: Path) -> List[str]:
20 | with open(cytokit_config_filename) as cytokit_config_file:
21 | cytokit_config = yaml.safe_load(cytokit_config_file)
22 |
23 | tile_names = []
24 |
25 | region_height, region_width = (
26 | cytokit_config["acquisition"]["region_height"],
27 | cytokit_config["acquisition"]["region_width"],
28 | )
29 | region_names = cytokit_config["acquisition"]["region_names"]
30 |
31 | for r in range(1, len(region_names) + 1):
32 | # Width is X values, height is Y values.
33 | for x in range(1, region_width + 1):
34 | for y in range(1, region_height + 1):
35 | tile_names.append(f"R{r:03}_X{x:03}_Y{y:03}")
36 |
37 | return tile_names
38 |
39 |
40 | def collect_files_by_tile(
41 | tile_names: List[str],
42 | directory: Path,
43 | *,
44 | allow_empty_tiles: bool = False,
45 | ) -> Dict[str, List[Path]]:
46 | files_by_tile: Dict[str, List[Path]] = defaultdict(list)
47 |
48 | for tile in tile_names:
49 | tile_name_pattern = re.compile(tile)
50 |
51 | for dirpath_str, dirnames, filenames in walk(directory):
52 | dirpath = Path(dirpath_str)
53 | for filename in filenames:
54 | if tile_name_pattern.match(filename):
55 | files_by_tile[tile].append(dirpath / filename)
56 |
57 | # If a tile doesn't have any files, throw an error unless explicitly allowed.
58 | if not allow_empty_tiles:
59 | for tile in tile_names:
60 | if len(files_by_tile[tile]) == 0:
61 | raise ValueError(f"No files were found for tile {tile}")
62 |
63 | return files_by_tile
64 |
--------------------------------------------------------------------------------
/cytokit-docker/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM eczech/cytokit:latest
2 |
3 | WORKDIR /lab/data/.cytokit
4 | RUN curl https://s3.amazonaws.com/hubmap-pipeline-assets/cytokit-cache.tar | tar -xf -
5 |
6 | WORKDIR /opt
7 |
8 | # Update tensorflow-gpu to version 1.14
9 | RUN pip install --upgrade tensorflow-gpu==1.14.0
10 |
11 | COPY setup_data_directory.py /opt
12 | COPY cytokit_wrapper.py /opt
13 |
--------------------------------------------------------------------------------
/cytokit-docker/cytokit_wrapper.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3.5
2 | # Note: this version ^^^ is what's available in the Cytokit image
3 | # and our extension. No f-strings or PEP 519.
4 |
5 | from argparse import ArgumentParser
6 | from os import environ
7 | from os.path import split as osps
8 | from pathlib import Path
9 | from subprocess import check_call
10 |
11 | import yaml
12 |
13 | # TODO ↓↓↓ unify this script with setting up the data directory
14 | # instead of calling this script as a separate executable
15 | SETUP_DATA_DIR_COMMAND = [
16 | "/opt/setup_data_directory.py",
17 | "{data_dir}",
18 | ]
19 | CYTOKIT_COMMAND = [
20 | "cytokit",
21 | "{command}",
22 | "run_all",
23 | "--config-path={yaml_config}",
24 | "--data-dir={data_dir}",
25 | "--output-dir=output",
26 | ]
27 |
28 | CYTOKIT_PROCESSOR_OUTPUT_DIRS = frozenset({"cytometry", "processor"})
29 |
30 |
31 | def symlink_images(data_dir: Path):
32 | # TODO: unify, don't call another command-line script
33 | command = [piece.format(data_dir=data_dir) for piece in SETUP_DATA_DIR_COMMAND]
34 | print("Running:", " ".join(command))
35 | check_call(command)
36 |
37 |
38 | def find_cytokit_processor_output_r(directory: Path):
39 | """
40 | BIG HACK for step-by-step CWL usage -- walk parent directories until
41 | we find one containing 'cytometry' and 'processor'
42 | """
43 | child_names = {c.name for c in directory.iterdir()}
44 | if CYTOKIT_PROCESSOR_OUTPUT_DIRS <= child_names:
45 | return directory
46 | else:
47 | abs_dir = directory.absolute()
48 | parent = abs_dir.parent
49 | if parent == abs_dir:
50 | # At the root. No data found.
51 | return
52 | else:
53 | return find_cytokit_processor_output_r(parent)
54 |
55 |
56 | def find_cytokit_processor_output(directory: Path) -> Path:
57 | data_dir = find_cytokit_processor_output_r(directory)
58 | if data_dir is None:
59 | message = "No `cytokit processor` output found in {} or any parent directories"
60 | raise ValueError(message.format(directory))
61 | else:
62 | return data_dir
63 |
64 |
65 | def find_or_prep_data_directory(cytokit_command: str, data_dir: Path, pipeline_config: Path):
66 | """
67 | :return: 2-tuple: pathlib.Path to data directory, either original or
68 | newly-created with symlinks
69 | """
70 | # Read directory name from pipeline config
71 | # Python 3.6 would be much nicer ,but the Cytokit image is built from
72 | # Ubuntu 16.04, which comes with 3.5
73 | with pipeline_config.open() as f:
74 | config = yaml.safe_load(f)
75 | dir_name = osps(config["raw_data_location"])[1]
76 |
77 | data_subdir = data_dir / dir_name
78 |
79 | if cytokit_command == "processor":
80 | symlink_images(data_subdir)
81 | return Path("symlinks")
82 | elif cytokit_command == "operator":
83 | # Need to find the output from 'cytokit processor'
84 | processor_dir = find_cytokit_processor_output(data_dir)
85 | output_path = Path("output")
86 | output_path.mkdir()
87 | for child in processor_dir.iterdir():
88 | link = output_path / child.name
89 | print("Symlinking", child, "to", link)
90 | link.symlink_to(child)
91 | return output_path
92 | else:
93 | raise ValueError('Unsupported Cytokit command: "{}"'.format(cytokit_command))
94 |
95 |
96 | def run_cytokit(cytokit_command: str, data_directory: Path, yaml_config: Path):
97 | command = [
98 | piece.format(
99 | command=cytokit_command,
100 | data_dir=data_directory,
101 | yaml_config=yaml_config,
102 | )
103 | for piece in CYTOKIT_COMMAND
104 | ]
105 | print("Running:", " ".join(command))
106 | env = environ.copy()
107 | env["PYTHONPATH"] = "/lab/repos/cytokit/python/pipeline"
108 | check_call(command, env=env)
109 |
110 | print("Cytokit completed successfully")
111 | # I feel really bad about this, but not bad enough not to do it
112 | if cytokit_command == "operator":
113 | output_dir = Path("output")
114 | for dirname in CYTOKIT_PROCESSOR_OUTPUT_DIRS:
115 | dir_to_delete = output_dir / dirname
116 | print("Deleting", dir_to_delete)
117 | dir_to_delete.unlink()
118 |
119 |
120 | def main(cytokit_command: str, data_dir: Path, pipeline_config: Path, yaml_config: Path):
121 | data_dir = find_or_prep_data_directory(cytokit_command, data_dir, pipeline_config)
122 | run_cytokit(cytokit_command, data_dir, yaml_config)
123 |
124 |
125 | if __name__ == "__main__":
126 | p = ArgumentParser()
127 | p.add_argument("cytokit_command")
128 | p.add_argument("data_dir", type=Path)
129 | p.add_argument("pipeline_config", type=Path)
130 | p.add_argument("yaml_config", type=Path)
131 | args = p.parse_args()
132 |
133 | main(args.cytokit_command, args.data_dir, args.pipeline_config, args.yaml_config)
134 |
--------------------------------------------------------------------------------
/cytokit-docker/setup_data_directory.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | import argparse
4 | import json
5 | import logging
6 | import os
7 | import re
8 | import stat
9 | import sys
10 | from pathlib import Path
11 |
12 | logging.basicConfig(level=logging.INFO, format="%(levelname)-7s - %(message)s")
13 | logger = logging.getLogger(__name__)
14 |
15 | # Patterns for detecting raw data files are below.
16 | # We follow Cytokit's "keyence_multi_cycle_v01" naming convention defined in:
17 | # https://github.com/hammerlab/cytokit/blob/master/python/pipeline/cytokit/io.py
18 | # Pattern for the directories containing the raw data from each cycle-region
19 | # pair. Different submitters use different naming conventions (e.g.
20 | # cyc001_reg001_191209_123455 or Cyc1_reg1), so our regex has to allow for this.
21 | rawDirNamingPattern = re.compile(r"^cyc0*(\d+)_reg0*(\d+).*", re.IGNORECASE)
22 | # Pattern for raw data TIFF files. These should be named according to the following pattern:
23 | # __Z_CH.tif
24 | # All indices start at 1.
25 | # Tile index is padded to three digits, e.g. 00025, 00001, etc.
26 | # Z-plane index is padded to three digits, e.g. 025, 001, etc.
27 | # Region and channel indices are one digit each.
28 | rawFileNamingPattern = re.compile(r"^\d_\d{5}_Z\d{3}_CH\d\.tif$")
29 | # Pattern to match one single digit at the start of a string, used to replace
30 | # incorrect region indices with the correct ones in some raw data TIFF files.
31 | rawFileRegionPattern = re.compile(r"^\d")
32 |
33 |
34 | def main(data_dir: str):
35 | ###################################################################
36 | # Inspect source directories and collect paths to raw data files. #
37 | ###################################################################
38 |
39 | # Ensure that source directory exists and is readable.
40 | st = os.stat(data_dir)
41 | readable = bool(st.st_mode & stat.S_IRUSR)
42 | if not readable:
43 | raise Exception(
44 | "Source directory {} is not readable by the current user.".format(data_dir)
45 | )
46 |
47 | # Get list of contents of source directory. This should contain a set of
48 | # subdirectories, one for each cycle-region pair.
49 | sourceDirList = os.listdir(data_dir)
50 |
51 | # Filter the contents list of the source directory for directories matching
52 | # the expected raw data directory naming pattern (cycle-region pairs).
53 | # Different submitters follow different naming conventions currently.
54 | sourceDataDirs = list(filter(rawDirNamingPattern.search, sourceDirList))
55 | # If there were no matching directories found, exit.
56 | if not sourceDataDirs:
57 | raise Exception(
58 | "No directories matching expected raw data directory naming pattern found in {}".format(
59 | data_dir
60 | )
61 | )
62 |
63 | # Go through the cycle-region directories and get a list of the contents of
64 | # each one. Each cycle-region directory should contain TIFF files,
65 | # following the raw data file naming convention defined above.
66 | # Collect raw data file names in a dictionary, indexed by directory name.
67 | sourceDataFiles = {}
68 | for sdir in sourceDataDirs:
69 | fileList = os.listdir(os.path.join(data_dir, sdir))
70 |
71 | # Validate naming pattern of raw data files according to pattern
72 | # defined above.
73 | fileList = list(filter(rawFileNamingPattern.search, fileList))
74 |
75 | # Die if we didn't get any matching files.
76 | if not fileList:
77 | raise Exception(
78 | "No files found matching expected raw file naming pattern in {}".format(sdir)
79 | )
80 |
81 | # Otherwise, collect the list of matching file names in the dictionary.
82 | else:
83 | sourceDataFiles[sdir] = fileList
84 |
85 | # Check that expected source data files are all present. We know, from the
86 | # pipeline config, the number of regions, cycles, z-planes, and channels, so we
87 | # should be able to verify that we have one file per channel, per z-plane,
88 | # per cycle, per region.
89 |
90 | # Since the files will have had to match the required naming pattern, we
91 | # know that they'll be named basically as expected. A simple check would be
92 | # to just count the number of files present and see if we have the expected
93 | # number for each region, cycle, and z-plane.
94 |
95 | # For each region, we should have num_cycles * (region_height * region_width ) * num_z_planes * len( per_cycle_channel_names ) files.
96 | # If we do, we could stop there? It's not a super rigorous check but we already know we have files named correctly...
97 |
98 | # If we don't, we can inspect each cycle. For each cycle, we should have ...
99 |
100 | ######################################
101 | # Start creating directories and links
102 | ######################################
103 |
104 | targetDirectory = "symlinks"
105 |
106 | # Create target directory.
107 | os.mkdir("symlinks")
108 | logger.info("Cytokit data directory created at %s" % targetDirectory)
109 |
110 | for sdir in sourceDataFiles:
111 | dirMatch = rawDirNamingPattern.match(sdir)
112 |
113 | cycle, region = dirMatch.group(1, 2)
114 |
115 | cycleRegionDir = os.path.join("symlinks", "Cyc" + cycle + "_reg" + region)
116 |
117 | os.mkdir(cycleRegionDir)
118 |
119 | # Create symlinks for TIFF files.
120 | for tifFileName in sourceDataFiles[sdir]:
121 | # Replace the region number at the start because sometimes it's wrong.
122 | linkTifFileName = rawFileRegionPattern.sub(region, tifFileName)
123 |
124 | # Set up full path to symlink.
125 | linkTifFilePath = os.path.join(cycleRegionDir, linkTifFileName)
126 |
127 | # Full path to source raw data file.
128 | sourceTifFilePath = os.path.join(data_dir, sdir, tifFileName)
129 |
130 | # Create the symlink.
131 | os.symlink(sourceTifFilePath, linkTifFilePath)
132 |
133 | logger.info("Links created in directories under %s" % targetDirectory)
134 |
135 |
136 | ########
137 | # MAIN #
138 | ########
139 | if __name__ == "__main__":
140 | parser = argparse.ArgumentParser(
141 | description="Create a directory and populate directory with directories containing symlinks to the raw image data."
142 | )
143 | parser.add_argument(
144 | "data_dir",
145 | help="Data directory",
146 | )
147 |
148 | args = parser.parse_args()
149 |
150 | main(args.data_dir)
151 |
--------------------------------------------------------------------------------
/docker_images.txt:
--------------------------------------------------------------------------------
1 | hubmap/fiji_bigstitcher Dockerfile_fiji
2 | hubmap/codex-scripts Dockerfile
3 | hubmap/cytokit cytokit-docker/Dockerfile
4 |
--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
1 | name: base
2 | channels:
3 | - defaults
4 | - conda-forge
5 | dependencies:
6 | - python>=3.8,<4
7 | - pip
8 | - numpy-base>=1.18
9 | - numpy>=1.18
10 | - scipy>=1.4.0
11 | - pandas>=0.25
12 | - dask>=2.6.0
13 | - imagecodecs==2023.9.18
14 | - pip:
15 | - tifffile>=2021.8.30,<2023.3.15
16 | - PyYAML>=6.0.1
17 | - aicsimageio==4.14.0
18 | - lxml==4.9.3
19 | - matplotlib>=3.2.1
20 | - scikit-image>=0.17.2
21 | - scikit-learn>=0.23.1
22 | - shapely==2.0.1
23 | - opencv-contrib-python-headless>4.0,<5.0
24 | - pint==0.22
25 | - jsonschema==4.19.0
26 |
--------------------------------------------------------------------------------
/metadata_examples/channelnames.txt:
--------------------------------------------------------------------------------
1 | DAPI-01
2 | Blank
3 | Blank
4 | Blank
5 | DAPI-02
6 | CD31
7 | CD8
8 | Empty
9 | DAPI-03
10 | CD20
11 | Ki67
12 | CD3e
13 | DAPI-04
14 | SMActin
15 | Podoplanin
16 | CD68
17 | DAPI-05
18 | PanCK
19 | CD21
20 | CD4
21 | DAPI-06
22 | Lyve1
23 | CD45RO
24 | CD11c
25 | DAPI-07
26 | CD35
27 | ECAD
28 | CD107a
29 | DAPI-08
30 | CD34
31 | CD44
32 | HLA-DR
33 | DAPI-09
34 | Empty
35 | FoxP3
36 | CD163
37 | DAPI-10
38 | Empty
39 | CollagenIV
40 | Vimentin
41 | DAPI-11
42 | Empty
43 | CD15
44 | CD45
45 | DAPI-12
46 | Empty
47 | CD5
48 | CD1c
49 | DAPI-13
50 | Blank
51 | Blank
52 | Blank
53 |
--------------------------------------------------------------------------------
/metadata_examples/channelnames_report.csv:
--------------------------------------------------------------------------------
1 | Marker,Result
2 | DAPI-01,TRUE
3 | Blank,TRUE
4 | Blank,TRUE
5 | Blank,TRUE
6 | DAPI-02,TRUE
7 | CD31,TRUE
8 | CD8,TRUE
9 | Empty,TRUE
10 | DAPI-03,TRUE
11 | CD20,TRUE
12 | Ki67,TRUE
13 | CD3e,TRUE
14 | DAPI-04,TRUE
15 | SMActin,TRUE
16 | Podoplanin,TRUE
17 | CD68,TRUE
18 | DAPI-05,TRUE
19 | PanCK,TRUE
20 | CD21,TRUE
21 | CD4,TRUE
22 | DAPI-06,TRUE
23 | Lyve1,TRUE
24 | CD45RO,TRUE
25 | CD11c,TRUE
26 | DAPI-07,TRUE
27 | CD35,TRUE
28 | ECAD,TRUE
29 | CD107a,TRUE
30 | DAPI-08,TRUE
31 | CD34,TRUE
32 | CD44,TRUE
33 | HLA-DR,TRUE
34 | DAPI-09,TRUE
35 | Empty,TRUE
36 | FoxP3,TRUE
37 | CD163,TRUE
38 | DAPI-10,TRUE
39 | Empty,TRUE
40 | CollagenIV,TRUE
41 | Vimentin,TRUE
42 | DAPI-11,TRUE
43 | Empty,TRUE
44 | CD15,TRUE
45 | CD45,TRUE
46 | DAPI-12,TRUE
47 | Empty,TRUE
48 | CD5,TRUE
49 | CD1c,TRUE
50 | DAPI-13,TRUE
51 | Blank,TRUE
52 | Blank,TRUE
53 | Blank,TRUE
54 |
--------------------------------------------------------------------------------
/metadata_examples/experiment.json:
--------------------------------------------------------------------------------
1 | {
2 | "version": "1.7.0.6",
3 | "name": "src_CX_19-002_CC2-spleen-A",
4 | "dateProcessed": "2020-02-10T16:01:15.357-05:00[America/New_York]",
5 | "objectiveType": "air",
6 | "magnification": 20,
7 | "aperture": 0.75,
8 | "xyResolution": 377.4463383838384,
9 | "zPitch": 1500.0,
10 | "wavelengths": [
11 | 358,
12 | 488,
13 | 550,
14 | 650
15 | ],
16 | "bitDepth": 16,
17 | "numRegions": 1,
18 | "numCycles": 9,
19 | "numZPlanes": 13,
20 | "numChannels": 4,
21 | "regionWidth": 9,
22 | "regionHeight": 9,
23 | "tileWidth": 1920,
24 | "tileHeight": 1440,
25 | "tileOverlapX": 0.3,
26 | "tileOverlapY": 0.3,
27 | "tilingMode": "EITHER SNAKE OR GRID",
28 | "referenceCycle": 2,
29 | "referenceChannel": 1,
30 | "regIdx": [
31 | 1
32 | ],
33 | "cycle_lower_limit": 1,
34 | "cycle_upper_limit": 9,
35 | "num_z_planes": 1,
36 | "region_width": 9,
37 | "region_height": 9,
38 | "tile_width": 1344,
39 | "tile_height": 1008
40 | }
41 |
--------------------------------------------------------------------------------
/metadata_examples/exposure_times.txt:
--------------------------------------------------------------------------------
1 | Cycle,CH1,CH2,CH3,CH4
2 | 1,10,500,350,500
3 | 2,10,500,350,500
4 | 3,10,500,350,500
5 | 4,10,500,350,500
6 | 5,10,500,350,500
7 | 6,10,500,350,500
8 | 7,10,500,350,500
9 | 8,10,500,350,500
10 | 9,10,500,350,500
11 | 10,10,500,350,500
12 | 11,10,500,350,500
13 | 12,10,500,350,500
14 | 13,10,500,350,500
--------------------------------------------------------------------------------
/metadata_examples/segmentation.json:
--------------------------------------------------------------------------------
1 | {
2 | "nuclearStainCycle": 2,
3 | "nuclearStainChannel": 1,
4 | "membraneStainCycle": 11,
5 | "membraneStainChannel": 4
6 | }
7 |
--------------------------------------------------------------------------------
/pipeline-manifest.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "pattern": "experiment.json",
4 | "description": "File containing Cytokit's calculations from deconvolution, drift compensation, and focal plan selection, in JSON format",
5 | "edam_ontology_term": "EDAM_1.24.format_3464"
6 | },
7 | {
8 | "pattern": "stitched/expressions/reg(?P\\d+)_stitched_expressions\\.ome\\.tiff",
9 | "description": "Cytokit expression output for region {region}, in OME-TIFF format",
10 | "edam_ontology_term": "EDAM_1.24.format_3727",
11 | "is_data_product": true
12 | },
13 | {
14 | "pattern": "stitched/mask/reg(?P\\d+)_stitched_mask\\.ome\\.tiff",
15 | "description": "Segmentation mask for region {region}, in OME-TIFF format",
16 | "edam_ontology_term": "EDAM_1.24.format_3727",
17 | "is_data_product": true
18 | }
19 | ]
20 |
--------------------------------------------------------------------------------
/pipeline.cwl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env cwl-runner
2 |
3 | class: Workflow
4 | cwlVersion: v1.1
5 | label: CODEX analysis pipeline using Cytokit
6 |
7 | requirements:
8 | SubworkflowFeatureRequirement: {}
9 |
10 | inputs:
11 | data_dir:
12 | label: "Directory containing CODEX data"
13 | type: Directory
14 | gpus:
15 | label: "GPUs to use, represented as a comma-separated list of integers"
16 | type: string
17 | default: "0"
18 | num_concurrent_tasks:
19 | label: "Number of parallel CPU jobs"
20 | type: int
21 | default: 10
22 |
23 | outputs:
24 | experiment_config:
25 | outputSource: illumination_first_stitching/cytokit_config
26 | type: File
27 | label: "Cytokit configuration format"
28 | data_json:
29 | outputSource: run_cytokit/data_json
30 | type: File
31 | label: "JSON file containing Cytokit's calculations from deconvolution, drift compensation, and focal plane selection"
32 | stitched_images:
33 | outputSource: ometiff_second_stitching/stitched_images
34 | type: Directory
35 | label: "Segmentation masks and expressions in OME-TIFF format"
36 | pipeline_config:
37 | outputSource: ometiff_second_stitching/final_pipeline_config
38 | type: File
39 | label: "Pipeline config with all the modifications"
40 |
41 | steps:
42 | illumination_first_stitching:
43 | in:
44 | data_dir:
45 | source: data_dir
46 | gpus:
47 | source: gpus
48 | num_concurrent_tasks:
49 | source: num_concurrent_tasks
50 | out:
51 | - slicing_pipeline_config
52 | - cytokit_config
53 | - new_tiles
54 | run: steps/illumination_first_stitching.cwl
55 | label: "Illumination correction, best focus selection, and stitching stage 1"
56 |
57 | run_cytokit:
58 | in:
59 | data_dir:
60 | source: illumination_first_stitching/new_tiles
61 | yaml_config:
62 | source: illumination_first_stitching/cytokit_config
63 | out:
64 | - cytokit_output
65 | - data_json
66 | run: steps/run_cytokit.cwl
67 | label: "CODEX analysis via Cytokit processor and operator"
68 |
69 | ometiff_second_stitching:
70 | in:
71 | cytokit_output:
72 | source: run_cytokit/cytokit_output
73 | slicing_pipeline_config:
74 | source: illumination_first_stitching/slicing_pipeline_config
75 | cytokit_config:
76 | source: illumination_first_stitching/cytokit_config
77 | data_dir:
78 | source: data_dir
79 | out:
80 | - stitched_images
81 | - final_pipeline_config
82 | run: steps/ometiff_second_stitching.cwl
83 | label: "OMETIFF creation and stitching stage 2"
84 |
--------------------------------------------------------------------------------
/pipeline_release_mgmt.yaml:
--------------------------------------------------------------------------------
1 | main_branch: main
2 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.black]
2 | line-length = 99
3 | exclude = 'sprm/'
4 |
5 | [tool.isort]
6 | profile = "black"
7 | multi_line_output = 3
8 | skip = 'sprm/'
9 | src_paths = ["bin", "sprm"]
10 |
--------------------------------------------------------------------------------
/requirements-test.txt:
--------------------------------------------------------------------------------
1 | black==22.10.0
2 | isort==5.10.1
3 |
--------------------------------------------------------------------------------
/steps/illumination_first_stitching.cwl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env cwl-runner
2 |
3 | class: Workflow
4 | cwlVersion: v1.1
5 | label: CODEX analysis pipeline using Cytokit
6 |
7 | inputs:
8 | data_dir:
9 | label: "Directory containing CODEX data"
10 | type: Directory
11 | gpus:
12 | label: "GPUs to use, represented as a comma-separated list of integers"
13 | type: string
14 | default: "0"
15 | num_concurrent_tasks:
16 | label: "Number of parallel CPU jobs"
17 | type: int
18 | default: 10
19 |
20 | outputs:
21 | cytokit_config:
22 | outputSource: create_yaml_config/cytokit_config
23 | type: File
24 | label: "Cytokit configuration in YAML format"
25 | new_tiles:
26 | outputSource: slicing/new_tiles
27 | type: Directory
28 | slicing_pipeline_config:
29 | outputSource: slicing/modified_pipeline_config
30 | type: File
31 | label: "Pipeline config with all the modifications"
32 |
33 | steps:
34 | collect_dataset_info:
35 | in:
36 | base_directory:
37 | source: data_dir
38 | num_concurrent_tasks:
39 | source: num_concurrent_tasks
40 | out:
41 | - pipeline_config
42 | run: illumination_first_stitching/collect_dataset_info.cwl
43 | label: "Collect CODEX dataset info"
44 |
45 | illumination_correction:
46 | in:
47 | base_directory:
48 | source: data_dir
49 | pipeline_config:
50 | source: collect_dataset_info/pipeline_config
51 | out:
52 | - illum_corrected_tiles
53 | run: illumination_first_stitching/illumination_correction.cwl
54 |
55 | best_focus:
56 | in:
57 | data_dir:
58 | source: illumination_correction/illum_corrected_tiles
59 | pipeline_config:
60 | source: collect_dataset_info/pipeline_config
61 | out:
62 | - best_focus_tiles
63 | run: illumination_first_stitching/best_focus.cwl
64 |
65 | first_stitching:
66 | in:
67 | data_dir:
68 | source: best_focus/best_focus_tiles
69 | pipeline_config:
70 | source: collect_dataset_info/pipeline_config
71 | out:
72 | - stitched_images
73 | run: illumination_first_stitching/first_stitching.cwl
74 |
75 | slicing:
76 | in:
77 | base_stitched_dir:
78 | source: first_stitching/stitched_images
79 | pipeline_config:
80 | source: collect_dataset_info/pipeline_config
81 | out:
82 | - new_tiles
83 | - modified_pipeline_config
84 | run: illumination_first_stitching/slicing.cwl
85 |
86 | create_yaml_config:
87 | in:
88 | pipeline_config:
89 | source: slicing/modified_pipeline_config
90 | gpus:
91 | source: gpus
92 | out:
93 | - cytokit_config
94 | run: illumination_first_stitching/create_yaml_config.cwl
95 | label: "Create Cytokit experiment config in YAML format"
96 |
--------------------------------------------------------------------------------
/steps/illumination_first_stitching/best_focus.cwl:
--------------------------------------------------------------------------------
1 | cwlVersion: v1.1
2 | class: CommandLineTool
3 |
4 | requirements:
5 | DockerRequirement:
6 | dockerPull: hubmap/codex-scripts:latest
7 | dockerOutputDirectory: "/output"
8 |
9 | baseCommand: ["python", "/opt/best_focus/run_best_focus_selection.py"]
10 |
11 |
12 | inputs:
13 | data_dir:
14 | type: Directory
15 | inputBinding:
16 | prefix: "--data_dir"
17 |
18 |
19 | pipeline_config:
20 | type: File
21 | inputBinding:
22 | prefix: "--pipeline_config_path"
23 |
24 | outputs:
25 | best_focus_tiles:
26 | type: Directory
27 | outputBinding:
28 | glob: "/output/best_focus"
29 |
30 |
--------------------------------------------------------------------------------
/steps/illumination_first_stitching/collect_dataset_info.cwl:
--------------------------------------------------------------------------------
1 | cwlVersion: v1.1
2 | class: CommandLineTool
3 | label: Collect dataset info for Cytokit
4 |
5 | requirements:
6 | DockerRequirement:
7 | dockerPull: hubmap/codex-scripts:latest
8 |
9 | baseCommand: ["python", "/opt/dataset_info/run_collection.py"]
10 |
11 | inputs:
12 | base_directory:
13 | type: Directory
14 | inputBinding:
15 | prefix: "--path_to_dataset"
16 |
17 | num_concurrent_tasks:
18 | type: int
19 | inputBinding:
20 | prefix: "--num_concurrent_tasks"
21 |
22 | outputs:
23 | pipeline_config:
24 | type: File
25 | outputBinding:
26 | glob: pipelineConfig.json
27 |
--------------------------------------------------------------------------------
/steps/illumination_first_stitching/create_yaml_config.cwl:
--------------------------------------------------------------------------------
1 | cwlVersion: v1.1
2 | class: CommandLineTool
3 | label: Create Cytokit experiment config
4 |
5 | requirements:
6 | DockerRequirement:
7 | dockerPull: hubmap/codex-scripts:latest
8 |
9 | baseCommand: ["python", "/opt/create_cytokit_config.py"]
10 |
11 | inputs:
12 | gpus:
13 | type: string
14 | inputBinding:
15 | position: 1
16 | prefix: "--gpus="
17 | separate: false
18 | pipeline_config:
19 | type: File
20 | inputBinding:
21 | position: 2
22 | outputs:
23 | cytokit_config:
24 | type: File
25 | outputBinding:
26 | glob: experiment.yaml
27 |
--------------------------------------------------------------------------------
/steps/illumination_first_stitching/first_stitching.cwl:
--------------------------------------------------------------------------------
1 | cwlVersion: v1.1
2 | class: CommandLineTool
3 |
4 | requirements:
5 | DockerRequirement:
6 | dockerPull: hubmap/codex-scripts:latest
7 | dockerOutputDirectory: "/output"
8 |
9 | baseCommand: ["python", "/opt/codex_stitching/run_stitching.py"]
10 |
11 |
12 | inputs:
13 | data_dir:
14 | type: Directory
15 | inputBinding:
16 | prefix: "--data_dir"
17 |
18 |
19 | pipeline_config:
20 | type: File
21 | inputBinding:
22 | prefix: "--pipeline_config_path"
23 |
24 | outputs:
25 | stitched_images:
26 | type: Directory
27 | outputBinding:
28 | glob: "/output/stitched_images"
29 |
--------------------------------------------------------------------------------
/steps/illumination_first_stitching/illumination_correction.cwl:
--------------------------------------------------------------------------------
1 | cwlVersion: v1.1
2 | class: CommandLineTool
3 |
4 | requirements:
5 | DockerRequirement:
6 | dockerPull: hubmap/codex-scripts:latest
7 | dockerOutputDirectory: "/output"
8 |
9 | baseCommand: ["python", "/opt/illumination_correction/run_illumination_correction.py"]
10 |
11 |
12 | inputs:
13 | base_directory:
14 | type: Directory
15 | inputBinding:
16 | prefix: "--data_dir"
17 |
18 | pipeline_config:
19 | type: File
20 | inputBinding:
21 | prefix: "--pipeline_config_path"
22 |
23 | outputs:
24 | illum_corrected_tiles:
25 | type: Directory
26 | outputBinding:
27 | glob: "/output/corrected_images"
28 |
--------------------------------------------------------------------------------
/steps/illumination_first_stitching/slicing.cwl:
--------------------------------------------------------------------------------
1 | cwlVersion: v1.1
2 | class: CommandLineTool
3 |
4 | requirements:
5 | DockerRequirement:
6 | dockerPull: hubmap/codex-scripts:latest
7 | dockerOutputDirectory: "/output"
8 |
9 | baseCommand: ["python", "/opt/slicing/run_slicing.py"]
10 |
11 |
12 | inputs:
13 | base_stitched_dir:
14 | type: Directory
15 | inputBinding:
16 | prefix: "--base_stitched_dir"
17 |
18 | pipeline_config:
19 | type: File
20 | inputBinding:
21 | prefix: "--pipeline_config_path"
22 |
23 | outputs:
24 | new_tiles:
25 | type: Directory
26 | outputBinding:
27 | glob: "/output/new_tiles"
28 |
29 | modified_pipeline_config:
30 | type: File
31 | outputBinding:
32 | glob: "/output/pipeline_conf/pipelineConfig.json"
33 |
--------------------------------------------------------------------------------
/steps/ometiff_second_stitching-manifest.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "pattern": "pipeline_output/expr/(?P.+)\\.ome\\.tiff",
4 | "description": "Cytokit expression output for image {image}, in OME-TIFF format",
5 | "edam_ontology_term": "EDAM_1.24.format_3727"
6 | },
7 | {
8 | "pattern": "pipeline_output/mask/(?P.+)\\.ome\\.tiff",
9 | "description": "Segmentation mask for image {image}, in OME-TIFF format",
10 | "edam_ontology_term": "EDAM_1.24.format_3727"
11 | }
12 | ]
13 |
--------------------------------------------------------------------------------
/steps/ometiff_second_stitching.cwl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env cwl-runner
2 |
3 | class: Workflow
4 | cwlVersion: v1.1
5 | label: CODEX analysis pipeline using Cytokit
6 |
7 | inputs:
8 | slicing_pipeline_config:
9 | type: File
10 | cytokit_config:
11 | type: File
12 | cytokit_output:
13 | type: Directory
14 | data_dir:
15 | type: Directory
16 | num_concurrent_tasks:
17 | label: "Number of parallel CPU jobs"
18 | type: int
19 | default: 10
20 |
21 | outputs:
22 | stitched_images:
23 | outputSource: second_stitching/stitched_images
24 | type: Directory
25 | label: "Segmentation masks and expressions in OME-TIFF format"
26 | final_pipeline_config:
27 | outputSource: second_stitching/final_pipeline_config
28 | type: File
29 | label: "Pipeline config with all the modifications"
30 |
31 | steps:
32 | background_subtraction:
33 | in:
34 | cytokit_output:
35 | source: cytokit_output
36 | pipeline_config:
37 | source: slicing_pipeline_config
38 | cytokit_config:
39 | source: cytokit_config
40 | num_concurrent_tasks:
41 | source: num_concurrent_tasks
42 | out:
43 | - bg_sub_tiles
44 | - bg_sub_config
45 | run: ometiff_second_stitching/background_subtraction.cwl
46 |
47 | ome_tiff_creation:
48 | in:
49 | cytokit_output:
50 | source: cytokit_output
51 | bg_sub_tiles:
52 | source: background_subtraction/bg_sub_tiles
53 | cytokit_config:
54 | source: cytokit_config
55 | input_data_dir:
56 | source: data_dir
57 | out:
58 | - ome_tiffs
59 | run: ometiff_second_stitching/ome_tiff_creation.cwl
60 | label: "Create OME-TIFF versions of Cytokit segmentation and extract results"
61 |
62 | second_stitching:
63 | in:
64 | pipeline_config:
65 | source: background_subtraction/bg_sub_config
66 | ometiff_dir:
67 | source: ome_tiff_creation/ome_tiffs
68 | out:
69 | - stitched_images
70 | - final_pipeline_config
71 | run: ometiff_second_stitching/second_stitching.cwl
72 |
--------------------------------------------------------------------------------
/steps/ometiff_second_stitching/background_subtraction.cwl:
--------------------------------------------------------------------------------
1 | cwlVersion: v1.1
2 | class: CommandLineTool
3 |
4 | requirements:
5 | DockerRequirement:
6 | dockerPull: hubmap/codex-scripts:latest
7 | dockerOutputDirectory: "/output"
8 |
9 | baseCommand: ["python", "/opt/background_subtraction/run_background_subtraction.py"]
10 |
11 |
12 | inputs:
13 | cytokit_output:
14 | type: Directory
15 | inputBinding:
16 | prefix: "--data_dir"
17 |
18 |
19 | pipeline_config:
20 | type: File
21 | inputBinding:
22 | prefix: "--pipeline_config_path"
23 |
24 | cytokit_config:
25 | type: File
26 | inputBinding:
27 | prefix: "--cytokit_config_path"
28 |
29 | num_concurrent_tasks:
30 | type: int
31 | default: 10
32 | inputBinding:
33 | prefix: "--num_concurrent_tasks"
34 |
35 | outputs:
36 | bg_sub_tiles:
37 | type: Directory
38 | outputBinding:
39 | glob: "/output/background_subtraction"
40 |
41 | bg_sub_config:
42 | type: File
43 | outputBinding:
44 | glob: "/output/config/pipelineConfig.json"
45 |
--------------------------------------------------------------------------------
/steps/ometiff_second_stitching/ome_tiff_creation.cwl:
--------------------------------------------------------------------------------
1 | cwlVersion: v1.1
2 | class: CommandLineTool
3 | label: Create OME-TIFF versions of Cytokit segmentation and extract results
4 |
5 | requirements:
6 | DockerRequirement:
7 | dockerPull: hubmap/codex-scripts:latest
8 |
9 | baseCommand: ["python", "/opt/convert_to_ometiff.py"]
10 |
11 | inputs:
12 | cytokit_output:
13 | type: Directory
14 | inputBinding:
15 | position: 0
16 | bg_sub_tiles:
17 | type: Directory
18 | inputBinding:
19 | position: 1
20 | cytokit_config:
21 | type: File
22 | inputBinding:
23 | position: 2
24 | input_data_dir:
25 | type: Directory
26 | inputBinding:
27 | position: 3
28 |
29 | outputs:
30 | ome_tiffs:
31 | type: Directory
32 | outputBinding:
33 | glob: output
34 |
35 |
36 |
--------------------------------------------------------------------------------
/steps/ometiff_second_stitching/second_stitching.cwl:
--------------------------------------------------------------------------------
1 | cwlVersion: v1.1
2 | class: CommandLineTool
3 |
4 | requirements:
5 | DockerRequirement:
6 | dockerPull: hubmap/codex-scripts:latest
7 | dockerOutputDirectory: /output
8 |
9 | baseCommand: ["python", "/opt/codex_stitching/secondary_stitcher/secondary_stitcher_runner.py"]
10 |
11 |
12 | inputs:
13 | pipeline_config:
14 | type: File
15 | inputBinding:
16 | prefix: "--pipeline_config_path"
17 |
18 | ometiff_dir:
19 | type: Directory
20 | inputBinding:
21 | prefix: "--ometiff_dir"
22 |
23 | outputs:
24 | stitched_images:
25 | type: Directory
26 | outputBinding:
27 | glob: /output/pipeline_output
28 |
29 | final_pipeline_config:
30 | type: File
31 | outputBinding:
32 | glob: /output/pipelineConfig.json
33 |
--------------------------------------------------------------------------------
/steps/run_cytokit-manifest.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "pattern": "experiment.json",
4 | "description": "File containing Cytokit's calculations from deconvolution, drift compensation, and focal plan selection, in JSON format",
5 | "edam_ontology_term": "EDAM_1.24.format_3464"
6 | }
7 | ]
8 |
--------------------------------------------------------------------------------
/steps/run_cytokit.cwl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env cwl-runner
2 |
3 | class: CommandLineTool
4 | cwlVersion: v1.1
5 | baseCommand: ["sh", "run_cytokit.sh"]
6 |
7 | requirements:
8 | DockerRequirement:
9 | dockerPull: hubmap/cytokit:latest
10 | DockerGpuRequirement: {}
11 |
12 | InitialWorkDirRequirement:
13 | listing:
14 | - entryname: run_cytokit.sh
15 | entry: |-
16 | __conda_setup="\$('/opt/conda/bin/conda' 'shell.bash' 'hook' 2> /dev/null)"
17 | if [ \$? -eq 0 ]; then
18 | eval "\$__conda_setup"
19 | else
20 | if [ -f "/opt/conda/etc/profile.d/conda.sh" ]; then
21 | . "/opt/conda/etc/profile.d/conda.sh"
22 | else
23 | export PATH="/opt/conda/bin:$PATH"
24 | fi
25 | fi
26 | unset __conda_setup
27 |
28 | export PYTHONPATH=/lab/repos/cytokit/python/pipeline
29 | conda activate cytokit
30 |
31 | mkdir $HOME/cytokit
32 |
33 | cytokit processor run_all --data-dir $(inputs.data_dir.path) --config-path $(inputs.yaml_config.path) --output_dir $HOME/cytokit && \
34 | cytokit operator run_all --data-dir $HOME/cytokit --config-path $(inputs.yaml_config.path) --output_dir $HOME/cytokit
35 |
36 |
37 | inputs:
38 | data_dir:
39 | type: Directory
40 |
41 | yaml_config:
42 | type: File
43 |
44 |
45 | outputs:
46 | cytokit_output:
47 | type: Directory
48 | outputBinding:
49 | glob: cytokit
50 |
51 | data_json:
52 | type: File
53 | outputBinding:
54 | glob: cytokit/processor/data.json
55 |
56 |
57 |
--------------------------------------------------------------------------------
/subm.yaml:
--------------------------------------------------------------------------------
1 | data_dir:
2 | class: Directory
3 | path: "/path/to/dir/with/codex_dataset"
4 | gpus: "0"
5 | num_concurrent_tasks: 10
6 |
--------------------------------------------------------------------------------
/test.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | set -o errexit
3 | set -o pipefail
4 |
5 | start() { echo travis_fold':'start:$1; echo $1; }
6 | end() { set +v; echo travis_fold':'end:$1; echo; echo; }
7 | die() { set +v; echo "$*" 1>&2 ; exit 1; }
8 |
9 | start black
10 | black --check .
11 | end black
12 |
13 | start isort
14 | isort --check-only .
15 | end isort
16 |
--------------------------------------------------------------------------------