├── .gitignore ├── .pre-commit-config.yaml ├── .travis.yml ├── Dockerfile ├── Dockerfile_fiji ├── LICENSE ├── README.md ├── bin ├── background_subtraction │ └── run_background_subtraction.py ├── best_focus │ ├── best_z_identification.py │ ├── best_z_paths.py │ ├── file_manipulation.py │ └── run_best_focus_selection.py ├── codex_stitching │ ├── bigstitcher_dataset_meta.py │ ├── directory_management.py │ ├── generate_bigstitcher_macro.py │ ├── image_stitching.py │ ├── run_stitching.py │ └── secondary_stitcher │ │ ├── mask_stitching.py │ │ ├── match_masks.py │ │ ├── secondary_stitcher.py │ │ └── secondary_stitcher_runner.py ├── convert_to_ometiff.py ├── create_cytokit_config.py ├── dataset_info │ ├── collect_dataset_info.py │ ├── collect_dataset_info_old.py │ └── run_collection.py ├── illumination_correction │ ├── generate_basic_macro.py │ └── run_illumination_correction.py ├── pipeline_utils │ ├── dataset_listing.py │ └── pipeline_config_reader.py ├── slicing │ ├── modify_pipeline_config.py │ ├── run_slicing.py │ └── slicer.py └── utils.py ├── cytokit-docker ├── Dockerfile ├── cytokit_wrapper.py └── setup_data_directory.py ├── docker_images.txt ├── environment.yml ├── metadata_examples ├── channelnames.txt ├── channelnames_report.csv ├── experiment.json ├── exposure_times.txt └── segmentation.json ├── pipeline-manifest.json ├── pipeline.cwl ├── pipeline_release_mgmt.yaml ├── pyproject.toml ├── requirements-test.txt ├── steps ├── illumination_first_stitching.cwl ├── illumination_first_stitching │ ├── best_focus.cwl │ ├── collect_dataset_info.cwl │ ├── create_yaml_config.cwl │ ├── first_stitching.cwl │ ├── illumination_correction.cwl │ └── slicing.cwl ├── ometiff_second_stitching-manifest.json ├── ometiff_second_stitching.cwl ├── ometiff_second_stitching │ ├── background_subtraction.cwl │ ├── ome_tiff_creation.cwl │ └── second_stitching.cwl ├── run_cytokit-manifest.json └── run_cytokit.cwl ├── subm.yaml └── test.sh /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/psf/black 3 | rev: 23.9.1 4 | hooks: 5 | - id: black 6 | language_version: python3 7 | - repo: https://github.com/pycqa/isort 8 | rev: 5.12.0 9 | hooks: 10 | - id: isort 11 | args: ["--profile", "black"] 12 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | dist: focal 2 | language: python 3 | python: 3.8 4 | install: 5 | - pip install -r requirements-test.txt 6 | script: 7 | - ./test.sh 8 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:focal 2 | 3 | RUN apt-get -qq update \ 4 | && apt-get -qq install --no-install-recommends --yes \ 5 | wget \ 6 | bzip2 \ 7 | ca-certificates \ 8 | curl \ 9 | unzip \ 10 | git \ 11 | && apt-get clean \ 12 | && rm -rf /var/lib/apt/lists/* 13 | 14 | RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-py39_4.12.0-Linux-x86_64.sh -O /tmp/miniconda.sh \ 15 | && /bin/bash /tmp/miniconda.sh -b -p /opt/conda \ 16 | && rm /tmp/miniconda.sh 17 | ENV PATH /opt/conda/bin:$PATH 18 | 19 | # update base environment from yaml file 20 | COPY environment.yml /tmp/ 21 | RUN conda env update -f /tmp/environment.yml \ 22 | && echo "source activate base" > ~/.bashrc \ 23 | && conda clean --index-cache --tarballs --yes \ 24 | && rm /tmp/environment.yml 25 | 26 | ENV PATH /opt/conda/envs/hubmap/bin:$PATH 27 | 28 | #Copy fiji from container 29 | COPY --from=hubmap/fiji_bigstitcher:latest /opt/Fiji.app /opt/Fiji.app 30 | ENV PATH /opt/Fiji.app:$PATH 31 | 32 | RUN mkdir /output && chmod -R a+rwx /output 33 | 34 | WORKDIR /opt 35 | COPY bin /opt 36 | 37 | CMD ["/bin/bash"] 38 | -------------------------------------------------------------------------------- /Dockerfile_fiji: -------------------------------------------------------------------------------- 1 | FROM ubuntu:focal 2 | 3 | RUN apt-get -qq update \ 4 | && apt-get -qq install --no-install-recommends --yes \ 5 | wget \ 6 | bzip2 \ 7 | ca-certificates \ 8 | curl \ 9 | unzip \ 10 | && apt-get clean \ 11 | && rm -rf /var/lib/apt/lists/* 12 | 13 | 14 | #Get imagej 15 | RUN wget --quiet https://downloads.imagej.net/fiji/latest/fiji-linux64.zip -P /tmp/ \ 16 | && unzip /tmp/fiji-linux64.zip -d /opt/ \ 17 | && rm /tmp/fiji-linux64.zip 18 | 19 | ENV PATH /opt/Fiji.app:$PATH 20 | 21 | # Install BigStitcher 22 | RUN ImageJ-linux64 --headless --update add-update-site BigStitcher https://sites.imagej.net/BigStitcher/ \ 23 | && ImageJ-linux64 --headless --update update 24 | 25 | # Install BaSiC_Mod 26 | RUN wget --quiet https://github.com/VasylVaskivskyi/BaSiC_Mod/releases/download/v1.0/BaSiC_Mod_v10.zip -P /tmp/ \ 27 | && unzip /tmp/BaSiC_Mod_v10.zip -d /tmp/ \ 28 | && mv /tmp/BaSiC_Mod_v10/BaSiC_Mod.jar /opt/Fiji.app/plugins/ \ 29 | && mv /tmp/BaSiC_Mod_v10/dependencies/* /opt/Fiji.app/jars/. \ 30 | && rm -r /tmp/BaSiC_Mod_v10 \ 31 | && rm /tmp/BaSiC_Mod_v10.zip \ 32 | && rm /opt/Fiji.app/jars/jtransforms-2.4.jar \ 33 | && rm /opt/Fiji.app/jars/netlib-java-0.9.3-renjin-patched-2.jar \ 34 | && ImageJ-linux64 --headless --update update 35 | FROM ubuntu:focal 36 | 37 | RUN apt-get -qq update \ 38 | && apt-get -qq install --no-install-recommends --yes \ 39 | wget \ 40 | bzip2 \ 41 | ca-certificates \ 42 | curl \ 43 | unzip \ 44 | && apt-get clean \ 45 | && rm -rf /var/lib/apt/lists/* 46 | 47 | 48 | #Get imagej 49 | RUN wget --quiet https://downloads.imagej.net/fiji/latest/fiji-linux64.zip -P /tmp/ \ 50 | && unzip /tmp/fiji-linux64.zip -d /opt/ \ 51 | && rm /tmp/fiji-linux64.zip 52 | 53 | ENV PATH /opt/Fiji.app:$PATH 54 | 55 | # Install BigStitcher 56 | RUN ImageJ-linux64 --headless --update add-update-site BigStitcher https://sites.imagej.net/BigStitcher/ \ 57 | && ImageJ-linux64 --headless --update update 58 | 59 | # Install BaSiC_Mod 60 | RUN wget --quiet https://github.com/hubmapconsortium/BaSiC_Mod/releases/download/v1.0/BaSiC_Mod_v10.zip -P /tmp/ \ 61 | && unzip /tmp/BaSiC_Mod_v10.zip -d /tmp/ \ 62 | && mv /tmp/BaSiC_Mod_v10/BaSiC_Mod.jar /opt/Fiji.app/plugins/ \ 63 | && mv /tmp/BaSiC_Mod_v10/dependencies/* /opt/Fiji.app/jars/. \ 64 | && rm -r /tmp/BaSiC_Mod_v10 \ 65 | && rm /tmp/BaSiC_Mod_v10.zip \ 66 | && rm /opt/Fiji.app/jars/jtransforms-2.4.jar \ 67 | && rm /opt/Fiji.app/jars/netlib-java-0.9.3-renjin-patched-2.jar \ 68 | && ImageJ-linux64 --headless --update update 69 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Build Status](https://travis-ci.com/hubmapconsortium/codex-pipeline.svg?branch=master)](https://travis-ci.com/hubmapconsortium/codex-pipeline) 2 | [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) 3 | 4 | # codex-pipeline 5 | A [CWL](https://www.commonwl.org/) pipeline for processing [CODEX](https://www.akoyabio.com/codextm/technology) image data, using [Cytokit](https://github.com/hammerlab/cytokit). 6 | 7 | ## Pipeline steps 8 | * Collect required parameters from metadata files. 9 | * Perform illumination correction with Fiji plugin [BaSiC](https://github.com/VasylVaskivskyi/BaSiC_Mod) 10 | * Find sharpest z-plane for each channel, using variation of Laplacian 11 | * Perform stitching of tiles using Fiji plugin [BigStitcher](https://imagej.net/plugins/bigstitcher/) 12 | * Create Cytokit YAML config file containing parameters from input metadata 13 | * Run Cytokit's `processor` command to perform tile pre-processing, and nucleus and cell segmentation. 14 | * Run Cytokit's `operator` command to extract all antigen fluoresence images (discarding blanks and empty channels). 15 | * Generate [OME-TIFF](https://docs.openmicroscopy.org/ome-model/6.0.1/ome-tiff/specification.html) versions of TIFFs created by Cytokit. 16 | * Stitch tiles with segmentation masks 17 | * Perform downstream analysis using [SPRM](https://github.com/hubmapconsortium/sprm). 18 | 19 | 20 | ## Requirements 21 | 22 | Please use [HuBMAP Consortium fork of cwltool](https://github.com/hubmapconsortium/cwltool) 23 | to be able to run pipeline with GPU in Docker and Singularity containers.\ 24 | For the list of python packages check `environment.yml`. 25 | 26 | 27 | ## How to run 28 | 29 | `cwltool pipeline.cwl subm.yaml` 30 | 31 | If you use Singularity containers add `--singularity`. Example of submission file `subm.yaml` is provided in the repo. 32 | 33 | 34 | ## Expected input directory and file structure 35 | 36 | ``` 37 | codex_dataset/ 38 | src_data OR raw 39 | ├── channelnames.txt 40 | ├── channelnames_report.csv 41 | ├── experiment.json 42 | ├── exposure_times.txt 43 | ├── segmentation.json 44 | ├── Cyc1_reg1 OR Cyc001_reg001 45 | │ ├── 1_00001_Z001_CH1.tif 46 | │ ├── 1_00001_Z001_CH2.tif 47 | │ │ ... 48 | │ └── 1_0000N_Z00N_CHN.tif 49 | └── Cyc1_reg2 OR Cyc001_reg002 50 | ├── 2_00001_Z001_CH1.tif 51 | ├── 2_00001_Z001_CH2.tif 52 | │ ... 53 | └── 1_0000N_Z00N_CHN.tif 54 | 55 | ``` 56 | 57 | Images should be separated into directories by cycles and regions using the following pattern `Cyc{cycle:d}_reg{region:d}`. 58 | The file names must contain region, tile, z-plane and channel ids starting from 1, and follow this pattern 59 | `{region:d}_{tile:05d}_Z{zplane:03d}_CH{channel:d}.tif`. 60 | 61 | Necessary metadata files that must be present in the input directory: 62 | 63 | * `experiment.json` - acquisition parameters and data structure; 64 | * `segmentation.json` - which channel from which cycle to use for segmentation; 65 | * `channelnames.txt` - list of channel names, one per row; 66 | * `channelnames_report.csv` - which channels to use, and which to exclude; 67 | * `exposure_times.txt` - not used at the moment, but will be useful for background subtraction. 68 | 69 | Examples of these files are present in the directory `metadata_examples`. 70 | Note: all fields related to regions, cycles, channels, z-planes and tiles start from 1, 71 | and xyResolution, zPitch are measured in `nm`. 72 | 73 | ## Output file structure 74 | 75 | ``` 76 | pipeline_output/ 77 | ├── expr 78 | │ ├── reg001_expr.ome.tiff 79 | │ └── reg002_expr.ome.tiff 80 | └── mask 81 | ├── reg001_mask.ome.tiff 82 | └── reg002_expr.ome.tiff 83 | ``` 84 | 85 | Where `expr` directory contains processed images and `mask` contains segmentation masks. 86 | The output of SPRM will be different, see https://github.com/hubmapconsortium/sprm . 87 | 88 | 89 | ## Development 90 | Code in this repository is formatted with [black](https://github.com/psf/black) and 91 | [isort](https://pypi.org/project/isort/), and this is checked via Travis CI. 92 | 93 | A [pre-commit](https://pre-commit.com/) hook configuration is provided, which runs `black` and `isort` before committing. 94 | Run `pre-commit install` in each clone of this repository which you will use for development (after `pip install pre-commit` 95 | into an appropriate Python environment, if necessary). 96 | 97 | ## Building containers 98 | Two `Dockerfile`s are included in this repository. A `docker_images.txt` manifest is included, which is intended 99 | for use in the `build_docker_containers` script provided by the 100 | [`multi-docker-build`](https://github.com/mruffalo/multi-docker-build) Python package. This package can be installed 101 | with 102 | ```shell script 103 | python -m pip install multi-docker-build 104 | ``` 105 | 106 | ## Release process 107 | 108 | The `master` branch is intended to be production-ready at all times, and should always reference Docker containers 109 | with the `latest` tag. 110 | 111 | Publication of tagged "release" versions of the pipeline is handled with the 112 | [HuBMAP pipeline release management](https://github.com/hubmapconsortium/pipeline-release-mgmt) Python package. To 113 | release a new pipeline version, *ensure that the `master` branch contains all commits that you want to include in the release,* 114 | then run 115 | ```shell 116 | tag_releae_pipeline v0.whatever 117 | ``` 118 | See the pipeline release managment script usage notes for additional options, such as GPG signing. 119 | -------------------------------------------------------------------------------- /bin/best_focus/best_z_identification.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from typing import Dict, List 3 | 4 | import cv2 as cv 5 | import dask 6 | import numpy as np 7 | import tifffile as tif 8 | from scipy.ndimage import gaussian_filter 9 | 10 | Image = np.ndarray 11 | 12 | 13 | def _laplacian_variance(img: Image) -> float: 14 | """ 15 | DOI:10.1016/j.patcog.2012.11.011 16 | Analysis of focus measure operators for shape-from-focus 17 | """ 18 | return np.var(cv.Laplacian(img, cv.CV_64F, ksize=21)) 19 | 20 | 21 | def _find_best_z_plane_id(img_list: List[Image]) -> int: 22 | lap_vars_per_z_plane = [] 23 | for img in img_list: 24 | lap_vars_per_z_plane.append(_laplacian_variance(img)) 25 | max_var = max(lap_vars_per_z_plane) 26 | max_var_id = lap_vars_per_z_plane.index(max_var) 27 | return max_var_id 28 | 29 | 30 | def _load_images(path_list: List[Path]) -> List[Image]: 31 | img_list = [] 32 | for path in path_list: 33 | img_list.append(tif.imread(str(path))) 34 | return img_list 35 | 36 | 37 | def get_best_z_plane_id(path_list: List[Path]) -> int: 38 | img_list = _load_images(path_list) 39 | return _find_best_z_plane_id(img_list) + 1 40 | 41 | 42 | def get_best_z_plane_id_parallelized(plane_paths_per_tile: dict) -> List[int]: 43 | task = [] 44 | for tile, plane_paths in plane_paths_per_tile.items(): 45 | plane_path_list = list(plane_paths.values()) 46 | task.append(dask.delayed(get_best_z_plane_id)(plane_path_list)) 47 | best_z_plane_id_list = dask.compute(*task) 48 | best_z_plane_id_list = list(best_z_plane_id_list) 49 | return best_z_plane_id_list 50 | 51 | 52 | def smoothing_z_ids(arr: np.ndarray): 53 | smoothed_ids_float = gaussian_filter(arr.astype(np.float32), 1, mode="reflect") 54 | smoothed_ids = np.round(smoothed_ids_float, 0).astype(np.uint32) 55 | return smoothed_ids 56 | 57 | 58 | def best_z_correction(best_z_plane_id_list: List[int], x_ntiles: int, y_ntiles: int) -> np.ndarray: 59 | best_z_per_tile_arr = np.array(best_z_plane_id_list, dtype=np.int32).reshape( 60 | y_ntiles, x_ntiles 61 | ) 62 | print("Best z-plane per tile") 63 | print("Original values\n", best_z_per_tile_arr) 64 | smoothed_best_z_per_tile_arr = smoothing_z_ids(best_z_per_tile_arr) 65 | print("Corrected values\n", smoothed_best_z_per_tile_arr) 66 | result = smoothed_best_z_per_tile_arr.ravel().tolist() 67 | 68 | return result 69 | 70 | 71 | def pick_z_planes_below_and_above(best_z: int, max_z: int, above: int, below: int) -> List[int]: 72 | range_end = best_z + above 73 | if range_end > max_z: 74 | range_end = max_z 75 | 76 | range_start = best_z - below 77 | if range_start < 1: 78 | range_start = 1 79 | 80 | if max_z == 1: 81 | return [best_z] 82 | elif best_z == max_z: 83 | below_planes = list(range(range_start, best_z)) 84 | above_planes = [] 85 | elif best_z == 1: 86 | below_planes = [] 87 | above_planes = list(range(best_z + 1, range_end + 1)) 88 | else: 89 | below_planes = list(range(range_start, best_z)) 90 | above_planes = list(range(best_z + 1, range_end + 1)) 91 | return below_planes + [best_z] + above_planes 92 | 93 | 94 | def get_best_z_plane_ids_per_tile( 95 | plane_paths_per_tile: dict, x_ntiles: int, y_ntiles: int, max_z: int 96 | ) -> Dict[int, List[int]]: 97 | best_z_plane_id_list = get_best_z_plane_id_parallelized(plane_paths_per_tile) 98 | corrected_best_z_plane_id_list = best_z_correction(best_z_plane_id_list, x_ntiles, y_ntiles) 99 | 100 | best_z_plane_per_tile = dict() 101 | for i, tile in enumerate(plane_paths_per_tile.keys()): 102 | best_z_plane_per_tile[tile] = pick_z_planes_below_and_above( 103 | corrected_best_z_plane_id_list[i], max_z, 1, 1 104 | ) 105 | return best_z_plane_per_tile 106 | -------------------------------------------------------------------------------- /bin/best_focus/best_z_paths.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from math import ceil 3 | from pathlib import Path 4 | from typing import Any, Dict, List, Tuple 5 | 6 | sys.path.append("/opt/") 7 | from best_z_identification import get_best_z_plane_ids_per_tile 8 | 9 | from pipeline_utils.dataset_listing import ( 10 | create_listing_for_each_cycle_region, 11 | extract_digits_from_string, 12 | ) 13 | 14 | 15 | def _change_image_file_name(original_name: str) -> str: 16 | """Output tiles will have names 1_00001_Z001_CH1.tif, 1_00002_Z001_CH1.tif ...""" 17 | digits = extract_digits_from_string(original_name) 18 | region = digits[0] 19 | tile = digits[1] 20 | zplane = 1 21 | channel = digits[3] 22 | return "{reg:d}_{tile:05d}_Z{z:03d}_CH{ch:d}.tif".format( 23 | reg=region, tile=tile, z=zplane, ch=channel 24 | ) 25 | 26 | 27 | def _get_reference_channel_paths( 28 | listing_per_cycle: dict, num_channels_per_cycle: int, reference_channel_id: int 29 | ) -> Dict[int, Path]: 30 | ref_cycle_id = ceil(reference_channel_id / num_channels_per_cycle) - 1 31 | ref_cycle = sorted(listing_per_cycle.keys())[ref_cycle_id] 32 | ref_cycle_ref_channel_id = reference_channel_id - ref_cycle_id * num_channels_per_cycle 33 | 34 | reference_channel_tile_paths = dict() 35 | for region in listing_per_cycle[ref_cycle]: 36 | reference_channel_tile_paths.update({region: {}}) 37 | this_channel_tile_paths = listing_per_cycle[ref_cycle][region][ref_cycle_ref_channel_id] 38 | reference_channel_tile_paths[region] = this_channel_tile_paths 39 | return reference_channel_tile_paths 40 | 41 | 42 | def _create_dirs_for_each_cycle_region( 43 | listing_per_cycle: dict, out_dir: Path 44 | ) -> Dict[int, Dict[int, Path]]: 45 | naming_template = "Cyc{cyc:03d}_reg{reg:03d}" 46 | cyc_reg_dirs = dict() 47 | for cycle in listing_per_cycle: 48 | cyc_reg_dirs[cycle] = dict() 49 | for region in listing_per_cycle[cycle]: 50 | dir_name = naming_template.format(cyc=cycle, reg=region) 51 | cyc_reg_dirs[cycle][region] = out_dir / dir_name 52 | return cyc_reg_dirs 53 | 54 | 55 | def _find_best_z_planes_per_region_tile( 56 | reference_channel_tile_paths: dict, max_z: int, x_ntiles: int, y_ntiles: int 57 | ) -> Dict[int, Dict[int, List[int]]]: 58 | best_z_plane_per_region = dict() 59 | 60 | for region in reference_channel_tile_paths: 61 | best_z_plane_per_region[region] = get_best_z_plane_ids_per_tile( 62 | reference_channel_tile_paths[region], x_ntiles, y_ntiles, max_z 63 | ) # output {region: {tile: [ids] }} 64 | return best_z_plane_per_region 65 | 66 | 67 | def _map_best_z_planes_in_channel_to_output_plane( 68 | channel_paths: dict, out_dir: Path, best_z_plane_per_tile: dict 69 | ) -> List[Tuple[List[Path], Path]]: 70 | best_z_plane_paths = list() 71 | for tile in channel_paths: 72 | this_tile_paths = channel_paths[tile] 73 | best_focal_plane_ids = best_z_plane_per_tile[tile] # list of ids 74 | 75 | best_z_input_paths = [] 76 | for _id in best_focal_plane_ids: 77 | best_z_input_paths.append(this_tile_paths[_id]) 78 | 79 | best_z_file_name = best_z_input_paths[0].name 80 | output_combined_name = _change_image_file_name(best_z_file_name) 81 | output_combined_path = Path(out_dir).joinpath(output_combined_name) 82 | 83 | best_z_plane_paths.append((best_z_input_paths, output_combined_path)) 84 | 85 | return best_z_plane_paths 86 | 87 | 88 | def _select_best_z_plane_paths( 89 | listing: Dict[int, Dict[int, Dict[int, Dict[int, Dict[int, Path]]]]], 90 | out_dirs: Dict[int, Dict[int, Path]], 91 | best_z_plane_per_region: Dict[int, Dict[int, List[int]]], 92 | ) -> Dict[int, Dict[int, Dict[int, Dict[int, List[Tuple[List[Path], Path]]]]]]: 93 | """Creates a map of several raw planes that will be processed into one image""" 94 | best_z_plane_paths = dict() 95 | for cycle in listing: 96 | best_z_plane_paths[cycle] = dict() 97 | for region in listing[cycle]: 98 | best_z_plane_paths[cycle][region] = dict() 99 | this_cyc_reg_out_dir = out_dirs[cycle][region] 100 | this_region_best_z_planes = best_z_plane_per_region[region] 101 | for channel in listing[cycle][region]: 102 | best_z_plane_paths[cycle][region][channel] = dict() 103 | for tile, zplane_dict in listing[cycle][region][channel].items(): 104 | this_tile_best_z_ids = this_region_best_z_planes[tile] 105 | this_tile_best_z_src_paths = [] 106 | for _id in this_tile_best_z_ids: 107 | this_tile_best_z_src_paths.append(zplane_dict[_id]) 108 | 109 | best_z_file_name = this_tile_best_z_src_paths[0].name 110 | this_tile_best_z_dst_combined_name = _change_image_file_name(best_z_file_name) 111 | this_tile_best_z_dst_combined_path = ( 112 | this_cyc_reg_out_dir / this_tile_best_z_dst_combined_name 113 | ) 114 | 115 | if tile in best_z_plane_paths[cycle][region][channel]: 116 | best_z_plane_paths[cycle][region][channel][tile].append( 117 | (this_tile_best_z_src_paths, this_tile_best_z_dst_combined_path) 118 | ) 119 | else: 120 | best_z_plane_paths[cycle][region][channel][tile] = [ 121 | (this_tile_best_z_src_paths, this_tile_best_z_dst_combined_path) 122 | ] 123 | return best_z_plane_paths 124 | 125 | 126 | def get_best_z_dirs_and_paths( 127 | img_dirs: List[Path], 128 | out_dir: Path, 129 | num_channels_per_cycle: int, 130 | max_z: int, 131 | x_ntiles: int, 132 | y_ntiles: int, 133 | reference_channel_id: int, 134 | ) -> Tuple[ 135 | Dict[int, Dict[int, Path]], 136 | Dict[int, Dict[int, Dict[int, Dict[int, List[Tuple[List[Path], Path]]]]]], 137 | ]: 138 | listing_per_cycle = create_listing_for_each_cycle_region(img_dirs) 139 | reference_channel_tile_paths = _get_reference_channel_paths( 140 | listing_per_cycle, num_channels_per_cycle, reference_channel_id 141 | ) 142 | best_z_dirs = _create_dirs_for_each_cycle_region(listing_per_cycle, out_dir) 143 | best_z_plane_per_region = _find_best_z_planes_per_region_tile( 144 | reference_channel_tile_paths, max_z, x_ntiles, y_ntiles 145 | ) 146 | best_z_plane_paths = _select_best_z_plane_paths( 147 | listing_per_cycle, best_z_dirs, best_z_plane_per_region 148 | ) 149 | return best_z_dirs, best_z_plane_paths 150 | 151 | 152 | def find_best_z_paths_and_dirs( 153 | dataset_info: Dict[str, Any], img_dirs: List[Path], out_dir: Path 154 | ) -> Tuple[ 155 | Dict[int, Dict[int, Path]], 156 | Dict[int, Dict[int, Dict[int, Dict[int, List[Tuple[List[Path], Path]]]]]], 157 | ]: 158 | nzplanes = dataset_info["num_z_planes"] 159 | x_ntiles = dataset_info["num_tiles_x"] 160 | y_ntiles = dataset_info["num_tiles_y"] 161 | reference_channel_id = dataset_info["reference_channel"] 162 | num_channels_per_cycle = dataset_info["num_channels"] 163 | 164 | best_z_channel_dirs, best_z_plane_paths = get_best_z_dirs_and_paths( 165 | img_dirs, 166 | out_dir, 167 | num_channels_per_cycle, 168 | nzplanes, 169 | x_ntiles, 170 | y_ntiles, 171 | reference_channel_id, 172 | ) 173 | return best_z_channel_dirs, best_z_plane_paths 174 | -------------------------------------------------------------------------------- /bin/best_focus/file_manipulation.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from typing import Dict, List, Tuple 3 | 4 | import dask 5 | import numpy as np 6 | import tifffile as tif 7 | 8 | 9 | def make_dir_if_not_exists(dir_path: Path): 10 | if not dir_path.exists(): 11 | dir_path.mkdir(parents=True) 12 | 13 | 14 | def project_stack(path_list: List[Path]): 15 | path_strs = [str(path) for path in path_list] 16 | stack = np.stack(list(map(tif.imread, path_strs)), axis=0) 17 | stack_dt = stack.dtype 18 | stack_mean = np.round(np.mean(stack, axis=0)).astype(stack_dt) 19 | return stack_mean 20 | 21 | 22 | def process_images(src, dst): 23 | """Read, take average of several z-planes, write""" 24 | img = project_stack(src) 25 | tif.imwrite(str(dst), img) 26 | 27 | 28 | def process_images_parallelized(best_z_plane_paths: List[tuple]): 29 | task = [] 30 | for src, dst in best_z_plane_paths: 31 | task.append(dask.delayed(process_images)(src, dst)) 32 | # shutil.copy(src[0], dst) 33 | dask.compute(*task, scheduler="processes") 34 | 35 | 36 | def process_z_planes_and_save_to_out_dirs( 37 | best_z_out_dirs: Dict[int, Dict[int, Path]], 38 | best_z_plane_paths: Dict[int, Dict[int, Dict[int, Dict[int, List[Tuple[List[Path], Path]]]]]], 39 | ): 40 | for cycle in best_z_out_dirs: 41 | for region, dir_path in best_z_out_dirs[cycle].items(): 42 | make_dir_if_not_exists(dir_path) 43 | 44 | for cycle in best_z_plane_paths: 45 | for region in best_z_plane_paths[cycle]: 46 | for channel in best_z_plane_paths[cycle][region]: 47 | for tile, paths in best_z_plane_paths[cycle][region][channel].items(): 48 | process_images_parallelized(paths) 49 | -------------------------------------------------------------------------------- /bin/best_focus/run_best_focus_selection.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import sys 4 | from pathlib import Path 5 | from typing import List 6 | 7 | sys.path.append("/opt/") 8 | from best_z_paths import find_best_z_paths_and_dirs 9 | from file_manipulation import process_z_planes_and_save_to_out_dirs 10 | 11 | from pipeline_utils.pipeline_config_reader import load_dataset_info 12 | 13 | 14 | def make_dir_if_not_exists(dir_path: Path): 15 | if not dir_path.exists(): 16 | dir_path.mkdir(parents=True) 17 | 18 | 19 | def get_img_dirs(dataset_dir: Path) -> List[Path]: 20 | dataset_dir = dataset_dir.absolute() 21 | img_dir_names = next(os.walk(dataset_dir))[1] 22 | img_dir_paths = [dataset_dir.joinpath(dir_name).absolute() for dir_name in img_dir_names] 23 | return img_dir_paths 24 | 25 | 26 | def main(data_dir: Path, pipeline_config_path: Path): 27 | best_focus_dir = Path("/output/best_focus") 28 | make_dir_if_not_exists(best_focus_dir) 29 | dataset_info = load_dataset_info(pipeline_config_path) 30 | img_dirs = get_img_dirs(data_dir) 31 | best_z_channel_dirs, best_z_plane_paths = find_best_z_paths_and_dirs( 32 | dataset_info, img_dirs, best_focus_dir 33 | ) 34 | process_z_planes_and_save_to_out_dirs(best_z_channel_dirs, best_z_plane_paths) 35 | 36 | 37 | if __name__ == "__main__": 38 | parser = argparse.ArgumentParser() 39 | parser.add_argument("--data_dir", type=Path, help="path to directory with dataset directory") 40 | parser.add_argument( 41 | "--pipeline_config_path", type=Path, help="path to pipelineConfig.json file" 42 | ) 43 | args = parser.parse_args() 44 | main(args.data_dir, args.pipeline_config_path) 45 | -------------------------------------------------------------------------------- /bin/codex_stitching/bigstitcher_dataset_meta.py: -------------------------------------------------------------------------------- 1 | import xml.dom.minidom 2 | import xml.etree.ElementTree as ET 3 | from copy import deepcopy 4 | from pathlib import Path 5 | from typing import Tuple 6 | 7 | import numpy as np 8 | 9 | 10 | def convert_location(x, y): 11 | tile_loc = "1.0 0.0 0.0 {x} 0.0 1.0 0.0 {y} 0.0 0.0 1.0 0.0" 12 | return tile_loc.format(x=x, y=y) 13 | 14 | 15 | def create_meta(file_pattern_str, num_tiles, tile_shape, tile_locations): 16 | root = ET.Element("SpimData", {"version": "0.2"}) 17 | base_path = ET.SubElement(root, "BasePath", {"type": "relative"}).text = "." 18 | sequence_description = ET.SubElement(root, "SequenceDescription") 19 | 20 | # 21 | image_loader = ET.SubElement( 22 | sequence_description, "ImageLoader", {"format": "spimreconstruction.stack.loci"} 23 | ) 24 | ET.SubElement(image_loader, "imagedirectory", {"type": "relative"}).text = "." 25 | ET.SubElement(image_loader, "filePattern").text = file_pattern_str 26 | ET.SubElement(image_loader, "layoutTimepoints").text = "0" 27 | ET.SubElement(image_loader, "layoutChannels").text = "0" 28 | ET.SubElement(image_loader, "layoutIlluminations").text = "0" 29 | ET.SubElement(image_loader, "layoutAngles").text = "0" 30 | ET.SubElement(image_loader, "layoutTiles").text = "1" 31 | ET.SubElement(image_loader, "imglib2container").text = "CellImgFactory" 32 | # 33 | # 34 | view_setups = ET.SubElement(sequence_description, "ViewSetups") 35 | 36 | view_setup_template = ET.Element("ViewSetup") 37 | ET.SubElement(view_setup_template, "id").text = "0" 38 | ET.SubElement(view_setup_template, "name").text = "0" 39 | ET.SubElement(view_setup_template, "size").text = "2048 2048 1" 40 | voxel_size = ET.SubElement(view_setup_template, "voxelSize") 41 | ET.SubElement(voxel_size, "unit").text = "um" 42 | ET.SubElement(voxel_size, "size").text = "1.0 1.0 1.0" 43 | view_attributes = ET.SubElement(view_setup_template, "attributes") 44 | ET.SubElement(view_attributes, "illumination").text = "0" 45 | ET.SubElement(view_attributes, "channel").text = "0" 46 | ET.SubElement(view_attributes, "tile").text = "0" 47 | ET.SubElement(view_attributes, "angle").text = "0" 48 | tile_shape_str = str(tile_shape[1]) + " " + str(tile_shape[0]) + " 1" 49 | for i in range(0, num_tiles): 50 | vs = deepcopy(view_setup_template) 51 | vs.find("id").text = str(i) 52 | vs.find("name").text = str(i) 53 | vs.find("size").text = tile_shape_str 54 | vs.find("attributes").find("tile").text = str(i) 55 | view_setups.append(vs) 56 | # 57 | # 58 | attrib_illumination = ET.SubElement(view_setups, "Attributes", {"name": "illumination"}) 59 | attrib_illumination_illumination = ET.SubElement(attrib_illumination, "Illumination") 60 | ET.SubElement(attrib_illumination_illumination, "id").text = "0" 61 | ET.SubElement(attrib_illumination_illumination, "name").text = "0" 62 | 63 | attrib_channel = ET.SubElement(view_setups, "Attributes", {"name": "channel"}) 64 | attrib_channel_channel = ET.SubElement(attrib_channel, "Channel") 65 | ET.SubElement(attrib_channel_channel, "id").text = "0" 66 | ET.SubElement(attrib_channel_channel, "name").text = "0" 67 | 68 | attrib_tile = ET.SubElement(view_setups, "Attributes", {"name": "tile"}) 69 | 70 | attrib_tile_tile = ET.Element("Tile") 71 | ET.SubElement(attrib_tile_tile, "id").text = "0" 72 | ET.SubElement(attrib_tile_tile, "name").text = "0" 73 | ET.SubElement(attrib_tile_tile, "location").text = "0.0 0.0 0.0" 74 | for i in range(0, num_tiles): 75 | att = deepcopy(attrib_tile_tile) 76 | att.find("id").text = str(i) 77 | att.find("name").text = str(i + 1) 78 | attrib_tile.append(att) 79 | 80 | attrib_angle = ET.SubElement(view_setups, "Attributes", {"name": "angle"}) 81 | attrib_angle_angle = ET.SubElement(attrib_angle, "Angle") 82 | ET.SubElement(attrib_angle_angle, "id").text = "0" 83 | ET.SubElement(attrib_angle_angle, "name").text = "0" 84 | # 85 | 86 | timepoints = ET.SubElement(sequence_description, "Timepoints", {"type": "pattern"}) 87 | ET.SubElement(timepoints, "integerpattern") 88 | # 89 | # 90 | view_registrations = ET.SubElement(root, "ViewRegistrations") 91 | 92 | view_registration_template = ET.Element("ViewRegistration", {"timepoint": "0", "setup": "0"}) 93 | view_transform_translation = ET.SubElement( 94 | view_registration_template, "ViewTransform", {"type": "affine"} 95 | ) 96 | ET.SubElement(view_transform_translation, "Name").text = "Translation to Regular Grid" 97 | ET.SubElement(view_transform_translation, "affine").text = ( 98 | "1.0 0.0 0.0 -2867.2 0.0 1.0 0.0 -1024.0 0.0 0.0 1.0 0.0" 99 | ) 100 | view_transform_calibration = ET.SubElement( 101 | view_registration_template, "ViewTransform", {"type": "affine"} 102 | ) 103 | ET.SubElement(view_transform_calibration, "Name").text = "calibration" 104 | ET.SubElement(view_transform_calibration, "affine").text = ( 105 | "1.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0" 106 | ) 107 | 108 | for i in range(0, num_tiles): 109 | vr = deepcopy(view_registration_template) 110 | vr.set("timepoint", "0") 111 | vr.set("setup", str(i)) 112 | vr.find("ViewTransform").find("affine").text = convert_location(*tile_locations[i]) 113 | view_registrations.append(vr) 114 | 115 | # 116 | view_interest_points = ET.SubElement(root, "ViewInterestPoints") 117 | bounding_boxes = ET.SubElement(root, "BoundingBoxes") 118 | point_spread_functions = ET.SubElement(root, "PointSpreadFunctions") 119 | stitching_results = ET.SubElement(root, "StitchingResults") 120 | IntensityAdjustments = ET.SubElement(root, "IntensityAdjustments") 121 | 122 | declaration = '' 123 | xml_str = ET.tostring(root, encoding="utf-8").decode() 124 | xml_str = declaration + xml_str 125 | 126 | return xml_str 127 | 128 | 129 | def grid_to_snake(arr): 130 | nrows = arr.shape[0] 131 | new_arr = arr.copy() 132 | for i in range(0, nrows): 133 | if i % 2 != 0: 134 | new_arr[i, :] = new_arr[i, :][::-1] 135 | return new_arr 136 | 137 | 138 | def generate_dataset_xml( 139 | x_ntiles: int, 140 | y_ntiles: int, 141 | tile_shape: Tuple[int, int], 142 | x_overlap: int, 143 | y_overlap: int, 144 | pattern_str: str, 145 | out_path: Path, 146 | is_snake=True, 147 | ): 148 | num_tiles = x_ntiles * y_ntiles 149 | 150 | loc_array = np.arange(0, y_ntiles * x_ntiles).reshape(y_ntiles, x_ntiles) 151 | img_sizes_x = np.zeros_like(loc_array) 152 | img_sizes_y = np.zeros_like(loc_array) 153 | 154 | for y in range(0, y_ntiles): 155 | y_size = tile_shape[0] - y_overlap 156 | for x in range(0, x_ntiles): 157 | x_size = tile_shape[1] - x_overlap 158 | 159 | img_sizes_x[y, x] = x_size 160 | img_sizes_y[y, x] = y_size 161 | 162 | img_positions_x = np.concatenate((np.zeros((y_ntiles, 1)), img_sizes_x[:, 1:]), axis=1) 163 | img_positions_y = np.concatenate((np.zeros((1, x_ntiles)), img_sizes_y[1:, :]), axis=0) 164 | 165 | img_positions_x = np.cumsum(img_positions_x, axis=1) 166 | img_positions_y = np.cumsum(img_positions_y, axis=0) 167 | 168 | if is_snake: 169 | img_positions_x = grid_to_snake(img_positions_x) 170 | img_positions_y = grid_to_snake(img_positions_y) 171 | 172 | tile_locations = list(zip(list(np.ravel(img_positions_x)), list(np.ravel(img_positions_y)))) 173 | 174 | bs_xml = create_meta(pattern_str, num_tiles, tile_shape, tile_locations) 175 | 176 | dom = xml.dom.minidom.parseString(bs_xml) 177 | pretty_xml_as_string = dom.toprettyxml() 178 | 179 | with open(out_path, "w") as s: 180 | s.write(pretty_xml_as_string) 181 | -------------------------------------------------------------------------------- /bin/codex_stitching/directory_management.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | from math import ceil 4 | from pathlib import Path 5 | from typing import List 6 | 7 | 8 | def make_dir_if_not_exists(dir_path: Path): 9 | if not dir_path.exists(): 10 | dir_path.mkdir(parents=True) 11 | 12 | 13 | def get_img_dirs(dataset_dir: Path) -> List[Path]: 14 | dataset_dir = dataset_dir.absolute() 15 | img_dir_names = next(os.walk(dataset_dir))[1] 16 | img_dir_paths = [dataset_dir.joinpath(dir_name).absolute() for dir_name in img_dir_names] 17 | return img_dir_paths 18 | 19 | 20 | def create_dirs_for_stitched_channels(channel_dirs: dict, out_dir: Path): 21 | stitched_channel_dirs = dict() 22 | for cycle in channel_dirs: 23 | stitched_channel_dirs[cycle] = {} 24 | for region in channel_dirs[cycle]: 25 | stitched_channel_dirs[cycle][region] = {} 26 | for channel, dir_path in channel_dirs[cycle][region].items(): 27 | dirname = Path(dir_path).name 28 | stitched_dir_path = out_dir.joinpath(dirname) 29 | stitched_channel_dirs[cycle][region][channel] = stitched_dir_path 30 | make_dir_if_not_exists(stitched_dir_path) 31 | 32 | return stitched_channel_dirs 33 | 34 | 35 | def get_ref_channel_dir_per_region( 36 | channel_dirs: dict, 37 | stitched_channel_dirs: dict, 38 | num_channels_per_cycle: int, 39 | reference_channel_id: int, 40 | ): 41 | ref_cycle_id = ceil(reference_channel_id / num_channels_per_cycle) - 1 42 | ref_cycle = sorted(channel_dirs.keys())[ref_cycle_id] 43 | in_cycle_ref_channel_id = reference_channel_id - ref_cycle_id * num_channels_per_cycle 44 | 45 | reference_channel_dir = dict() 46 | for region in channel_dirs[ref_cycle]: 47 | this_channel_dir = channel_dirs[ref_cycle][region][in_cycle_ref_channel_id] 48 | reference_channel_dir[region] = this_channel_dir 49 | 50 | stitched_ref_channel_dir = dict() 51 | for region in stitched_channel_dirs[ref_cycle]: 52 | this_channel_dir = stitched_channel_dirs[ref_cycle][region][in_cycle_ref_channel_id] 53 | stitched_ref_channel_dir[region] = this_channel_dir 54 | 55 | return reference_channel_dir, stitched_ref_channel_dir 56 | 57 | 58 | def create_output_dirs_for_tiles( 59 | stitched_channel_dirs: dict, out_dir: Path, dir_naming_template: str 60 | ): 61 | new_tiles_dirs = dict() 62 | for cycle in stitched_channel_dirs: 63 | new_tiles_dirs[cycle] = {} 64 | for region in stitched_channel_dirs[cycle]: 65 | new_tiles_dir_name = dir_naming_template.format(cycle=cycle, region=region) 66 | new_tiles_dir_path = out_dir.joinpath(new_tiles_dir_name) 67 | make_dir_if_not_exists(new_tiles_dir_path) 68 | new_tiles_dirs[cycle][region] = new_tiles_dir_path 69 | 70 | return new_tiles_dirs 71 | 72 | 73 | def remove_temp_dirs(stitched_channel_dirs: dict): 74 | for cycle in stitched_channel_dirs: 75 | for region in stitched_channel_dirs[cycle]: 76 | for channel, dir_path in stitched_channel_dirs[cycle][region].items(): 77 | shutil.rmtree(str(dir_path)) 78 | 79 | 80 | def check_if_images_in_dir(dir_path: Path): 81 | allowed_extensions = (".tif", ".tiff") 82 | listing = list(dir_path.iterdir()) 83 | img_listing = [f for f in listing if f.suffix in allowed_extensions] 84 | if img_listing: 85 | return True 86 | else: 87 | return False 88 | 89 | 90 | def check_stitched_dirs(stitched_channel_dirs: dict): 91 | print("\nChecking if BigStitcher produced image:") 92 | checked_str = [] 93 | checked_bool = [] 94 | for cycle in stitched_channel_dirs: 95 | for region in stitched_channel_dirs[cycle]: 96 | for channel, dir_path in stitched_channel_dirs[cycle][region].items(): 97 | if check_if_images_in_dir(dir_path): 98 | checked_str.append(str(dir_path) + " passed") 99 | checked_bool.append(True) 100 | else: 101 | checked_str.append(str(dir_path) + " no image in dir") 102 | checked_bool.append(False) 103 | 104 | print("\n".join(checked_str)) 105 | 106 | if sum(checked_bool) < len(checked_bool): 107 | raise ValueError( 108 | "Probably there was an error while running BigStitcher. " 109 | + "There is no image in one or more directories." 110 | ) 111 | -------------------------------------------------------------------------------- /bin/codex_stitching/generate_bigstitcher_macro.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | # from datetime import datetime 4 | from bigstitcher_dataset_meta import generate_dataset_xml 5 | 6 | 7 | class BigStitcherMacro: 8 | def __init__(self): 9 | self.img_dir = Path(".") 10 | self.out_dir = Path(".") 11 | self.xml_file_name = "dataset.xml" 12 | self.pattern = "{xxxxx}.tif" 13 | 14 | # range: 1-5 or list: 1,2,3,4,5 15 | self.num_tiles = 1 16 | 17 | self.num_tiles_x = 1 18 | self.num_tiles_y = 1 19 | 20 | self.tile_shape = (1440, 1920) 21 | 22 | # overlap in pixels 23 | self.overlap_x = 10 24 | self.overlap_y = 10 25 | self.overlap_z = 1 26 | 27 | # distance in um 28 | self.pixel_distance_x = 1 29 | self.pixel_distance_y = 1 30 | self.pixel_distance_z = 1 31 | 32 | self.tiling_mode = "snake" 33 | self.is_snake = True 34 | self.region = 1 35 | 36 | self.path_to_xml_file = Path(".") 37 | 38 | self.__location = Path(__file__).parent.resolve() 39 | 40 | def generate(self): 41 | self.make_dir_if_not_exists(self.out_dir) 42 | self.create_path_to_xml_file() 43 | self.check_if_tiling_mode_is_snake() 44 | 45 | formatted_macro = self.replace_values_in_macro() 46 | print("fiji macro script for estimation of stitching parameters") 47 | print(formatted_macro) 48 | macro_file_path = self.write_to_temp_macro_file(formatted_macro) 49 | 50 | generate_dataset_xml( 51 | self.num_tiles_x, 52 | self.num_tiles_y, 53 | self.tile_shape, 54 | self.overlap_x, 55 | self.overlap_y, 56 | self.pattern, 57 | self.path_to_xml_file, 58 | self.is_snake, 59 | ) 60 | 61 | return macro_file_path 62 | 63 | def make_dir_if_not_exists(self, dir_path: Path): 64 | if not dir_path.exists(): 65 | dir_path.mkdir(parents=True) 66 | 67 | def create_path_to_xml_file(self): 68 | self.path_to_xml_file = self.img_dir.joinpath(self.xml_file_name) 69 | 70 | def check_if_tiling_mode_is_snake(self): 71 | if self.tiling_mode == "snake": 72 | self.is_snake = True 73 | else: 74 | self.is_snake = False 75 | 76 | def convert_tiling_mode(self, tiling_mode): 77 | if tiling_mode == "snake": 78 | bigstitcher_tiling_mode = "[Snake: Right & Down ]" 79 | elif tiling_mode == "grid": 80 | bigstitcher_tiling_mode = "[Grid: Right & Down ]" 81 | return bigstitcher_tiling_mode 82 | 83 | def replace_values_in_macro(self): 84 | macro_template = self.estimate_stitch_param_macro_template 85 | formatted_macro = macro_template.format( 86 | img_dir=self.path_to_str(self.img_dir), 87 | out_dir=self.path_to_str(self.out_dir), 88 | path_to_xml_file=self.path_to_str(self.path_to_xml_file), 89 | pattern=self.path_to_str(self.img_dir.joinpath(self.pattern)), 90 | num_tiles=self.make_range(self.num_tiles), 91 | num_tiles_x=self.num_tiles_x, 92 | num_tiles_y=self.num_tiles_y, 93 | overlap_x=self.overlap_x, 94 | overlap_y=self.overlap_y, 95 | overlap_z=self.overlap_z, 96 | pixel_distance_x=self.pixel_distance_x, 97 | pixel_distance_y=self.pixel_distance_y, 98 | pixel_distance_z=self.pixel_distance_z, 99 | tiling_mode=self.convert_tiling_mode(self.tiling_mode), 100 | ) 101 | return formatted_macro 102 | 103 | def write_to_temp_macro_file(self, formatted_macro): 104 | file_name = "reg" + str(self.region) + "_stitch_macro.ijm" 105 | macro_file_path = self.img_dir.joinpath(file_name) 106 | with open(macro_file_path, "w") as f: 107 | f.write(formatted_macro) 108 | return macro_file_path 109 | 110 | def make_range(self, number): 111 | return ",".join([str(n) for n in range(1, number + 1)]) 112 | 113 | def path_to_str(self, path: Path): 114 | return str(path.absolute().as_posix()) 115 | 116 | estimate_stitch_param_macro_template = """ 117 | // calculate pairwise shifts 118 | run("Calculate pairwise shifts ...", 119 | "select={path_to_xml_file}" + 120 | " process_angle=[All angles]" + 121 | " process_channel=[All channels]" + 122 | " process_illumination=[All illuminations]" + 123 | " process_tile=[All tiles]" + 124 | " process_timepoint=[All Timepoints]" + 125 | " method=[Phase Correlation]" + 126 | " show_expert_algorithm_parameters" + 127 | " downsample_in_x=1" + 128 | " downsample_in_y=1" + 129 | " number=5" + 130 | " minimal=10" + 131 | " subpixel"); 132 | 133 | // filter shifts with 0.7 corr. threshold 134 | run("Filter pairwise shifts ...", 135 | "select={path_to_xml_file}" + 136 | " filter_by_link_quality" + 137 | " min_r=0.7" + 138 | " max_r=1" + 139 | " max_shift_in_x=0" + 140 | " max_shift_in_y=0" + 141 | " max_shift_in_z=0" + 142 | " max_displacement=0"); 143 | 144 | // do global optimization 145 | run("Optimize globally and apply shifts ...", 146 | "select={path_to_xml_file}" + 147 | " process_angle=[All angles]" + 148 | " process_channel=[All channels]" + 149 | " process_illumination=[All illuminations]" + 150 | " process_tile=[All tiles]" + 151 | " process_timepoint=[All Timepoints]" + 152 | " relative=2.500" + 153 | " absolute=3.500" + 154 | " global_optimization_strategy=[Two-Round using Metadata to align unconnected Tiles]" + 155 | " fix_group_0-0,"); 156 | 157 | run("Quit"); 158 | eval("script", "System.exit(0);"); 159 | 160 | """ 161 | 162 | 163 | class FuseMacro: 164 | def __init__(self): 165 | self.img_dir = Path(".") 166 | self.xml_file_name = "dataset.xml" 167 | self.out_dir = Path(".") 168 | self.__location = Path(__file__).parent.absolute() 169 | 170 | def generate(self): 171 | formatted_macro = self.replace_values_in_macro() 172 | macro_file_path = self.write_to_macro_file_in_channel_dir(self.img_dir, formatted_macro) 173 | 174 | def replace_values_in_macro(self): 175 | macro_template = self.fuse_macro_template 176 | formatted_macro = macro_template.format( 177 | img_dir=self.path_to_str(self.img_dir), 178 | path_to_xml_file=self.path_to_str(self.img_dir.joinpath(self.xml_file_name)), 179 | out_dir=self.path_to_str(self.out_dir), 180 | ) 181 | return formatted_macro 182 | 183 | def write_to_macro_file_in_channel_dir(self, img_dir: Path, formatted_macro: str): 184 | macro_file_path = img_dir.joinpath("fuse_macro.ijm") 185 | with open(macro_file_path, "w") as f: 186 | f.write(formatted_macro) 187 | return macro_file_path 188 | 189 | def path_to_str(self, path: Path): 190 | return str(path.absolute().as_posix()) 191 | 192 | fuse_macro_template = """ 193 | // fuse dataset, save as TIFF 194 | run("Fuse dataset ...", 195 | "select={path_to_xml_file}" + 196 | " process_angle=[All angles]" + 197 | " process_channel=[All channels]" + 198 | " process_illumination=[All illuminations]" + 199 | " process_tile=[All tiles]" + 200 | " process_timepoint=[All Timepoints]" + 201 | " bounding_box=[All Views]" + 202 | " downsampling=1" + 203 | " pixel_type=[16-bit unsigned integer]" + 204 | " interpolation=[Linear Interpolation]" + 205 | " image=[Precompute Image]" + 206 | " interest_points_for_non_rigid=[-= Disable Non-Rigid =-]" + 207 | " blend produce=[Each timepoint & channel]" + 208 | " fused_image=[Save as (compressed) TIFF stacks]" + 209 | " output_file_directory={out_dir}"); 210 | 211 | run("Quit"); 212 | eval("script", "System.exit(0);"); 213 | 214 | """ 215 | -------------------------------------------------------------------------------- /bin/codex_stitching/image_stitching.py: -------------------------------------------------------------------------------- 1 | import platform 2 | import shutil 3 | import subprocess 4 | from pathlib import Path 5 | from typing import List 6 | 7 | import dask 8 | import tifffile as tif 9 | from directory_management import ( 10 | check_stitched_dirs, 11 | create_dirs_for_stitched_channels, 12 | get_ref_channel_dir_per_region, 13 | ) 14 | from generate_bigstitcher_macro import BigStitcherMacro, FuseMacro 15 | 16 | 17 | def get_image_path_in_dir(dir_path: Path) -> Path: 18 | allowed_extensions = (".tif", ".tiff") 19 | listing = list(dir_path.iterdir()) 20 | img_listing = [f for f in listing if f.suffix in allowed_extensions] 21 | return img_listing[0] 22 | 23 | 24 | def generate_bigstitcher_macro_for_reference_channel( 25 | reference_channel_dir: Path, out_dir: Path, dataset_info: dict, region: int 26 | ) -> Path: 27 | tile_shape = ( 28 | dataset_info["tile_height"] + dataset_info["overlap_y"], 29 | dataset_info["tile_width"] + dataset_info["overlap_x"], 30 | ) 31 | 32 | macro = BigStitcherMacro() 33 | macro.img_dir = reference_channel_dir 34 | macro.out_dir = out_dir 35 | macro.pattern = "{xxxxx}.tif" 36 | macro.num_tiles = dataset_info["num_tiles"] 37 | macro.num_tiles_x = dataset_info["num_tiles_x"] 38 | macro.num_tiles_y = dataset_info["num_tiles_y"] 39 | macro.tile_shape = tile_shape 40 | macro.overlap_x = dataset_info["overlap_x"] 41 | macro.overlap_y = dataset_info["overlap_y"] 42 | macro.overlap_z = dataset_info["overlap_z"] 43 | macro.pixel_distance_x = dataset_info["pixel_distance_x"] 44 | macro.pixel_distance_y = dataset_info["pixel_distance_y"] 45 | macro.pixel_distance_z = dataset_info["pixel_distance_z"] 46 | macro.tiling_mode = dataset_info["tiling_mode"] 47 | macro.region = region 48 | macro_path = macro.generate() 49 | 50 | return macro_path 51 | 52 | 53 | def run_bigstitcher(bigstitcher_macro_path: Path): 54 | # It is expected that ImageJ is added to system PATH 55 | 56 | if platform.system() == "Windows": 57 | imagej_name = "ImageJ-win64" 58 | elif platform.system() == "Linux": 59 | imagej_name = "ImageJ-linux64" 60 | elif platform.system() == "Darwin": 61 | imagej_name = "ImageJ-macosx" 62 | else: 63 | raise ValueError(f"unsupported platform: {platform.system()}") 64 | 65 | command = imagej_name + " --headless --console -macro " + str(bigstitcher_macro_path) 66 | print("Started running BigStitcher for", str(bigstitcher_macro_path)) 67 | res = subprocess.run( 68 | command, shell=True, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE 69 | ) 70 | if res.returncode == 0: 71 | print("Finished", str(bigstitcher_macro_path)) 72 | else: 73 | raise Exception( 74 | "There was an error while running the BigStitcher for " 75 | + str(bigstitcher_macro_path) 76 | + "\n" 77 | + res.stderr.decode("utf-8") 78 | ) 79 | 80 | 81 | def run_bigstitcher_for_ref_channel_per_region( 82 | ref_channel_dir_per_region: dict, 83 | ref_channel_stitched_dir_per_region: dict, 84 | info_for_bigstitcher: dict, 85 | ): 86 | for region, dir_path in ref_channel_dir_per_region.items(): 87 | ref_channel_dir = dir_path 88 | ref_channel_stitched_dir = ref_channel_stitched_dir_per_region[region] 89 | bigstitcher_macro_path = generate_bigstitcher_macro_for_reference_channel( 90 | ref_channel_dir, ref_channel_stitched_dir, info_for_bigstitcher, region 91 | ) 92 | run_bigstitcher(bigstitcher_macro_path) 93 | 94 | 95 | def copy_dataset_xml_to_channel_dirs(ref_channel_dir: Path, other_channel_dirs: List[Path]): 96 | dataset_xml_path = ref_channel_dir.joinpath("dataset.xml") 97 | for dir_path in other_channel_dirs: 98 | dst_path = dir_path.joinpath("dataset.xml") 99 | try: 100 | shutil.copy(dataset_xml_path, dst_path) 101 | except shutil.SameFileError: 102 | continue 103 | 104 | 105 | def copy_fuse_macro_to_channel_dirs(channel_dirs: List[Path], channel_stitched_dirs: List[Path]): 106 | macro = FuseMacro() 107 | for i, dir_path in enumerate(channel_dirs): 108 | macro.img_dir = dir_path 109 | macro.xml_file_name = "dataset.xml" 110 | macro.out_dir = channel_stitched_dirs[i] 111 | macro.generate() 112 | 113 | 114 | def copy_bigsticher_files_to_dirs( 115 | channel_dirs: dict, stitched_channel_dirs: dict, ref_channel_dir_per_region: dict 116 | ): 117 | for cycle in channel_dirs: 118 | for region in channel_dirs[cycle]: 119 | this_region_ref_channel_dir = ref_channel_dir_per_region[region] 120 | channel_dir_list = list(channel_dirs[cycle][region].values()) 121 | channel_stitched_dir_list = list(stitched_channel_dirs[cycle][region].values()) 122 | 123 | copy_dataset_xml_to_channel_dirs(this_region_ref_channel_dir, channel_dir_list) 124 | copy_fuse_macro_to_channel_dirs(channel_dir_list, channel_stitched_dir_list) 125 | 126 | 127 | def run_stitching_for_all_channels(channel_dirs: dict): 128 | task = [] 129 | for cycle in channel_dirs: 130 | for region in channel_dirs[cycle]: 131 | for channel, dir_path in channel_dirs[cycle][region].items(): 132 | macro_path = dir_path.joinpath("fuse_macro.ijm") 133 | task.append(dask.delayed(run_bigstitcher)(macro_path)) 134 | 135 | dask.compute(*task, scheduler="processes") 136 | 137 | 138 | def get_stitched_image_shape(ref_channel_stitched_dir_per_region): 139 | for region, dir_path in ref_channel_stitched_dir_per_region.items(): 140 | stitched_image_path = get_image_path_in_dir(dir_path) 141 | break 142 | with tif.TiffFile(stitched_image_path) as TF: 143 | stitched_image_shape = TF.series[0].shape 144 | 145 | return stitched_image_shape 146 | 147 | 148 | def stitch_images(channel_dirs, dataset_meta, out_dir): 149 | ref_channel_id = int(dataset_meta["reference_channel"]) 150 | num_channels_per_cycle = dataset_meta["num_channels"] 151 | 152 | stitched_channel_dirs = create_dirs_for_stitched_channels(channel_dirs, out_dir) 153 | 154 | ref_ch_dirs = get_ref_channel_dir_per_region( 155 | channel_dirs, stitched_channel_dirs, num_channels_per_cycle, ref_channel_id 156 | ) 157 | ref_channel_dir_per_region, ref_channel_stitched_dir_per_region = ref_ch_dirs 158 | 159 | print("\nEstimating stitching parameters") 160 | run_bigstitcher_for_ref_channel_per_region( 161 | ref_channel_dir_per_region, ref_channel_stitched_dir_per_region, dataset_meta 162 | ) 163 | 164 | print("\nStitching channels") 165 | copy_bigsticher_files_to_dirs(channel_dirs, stitched_channel_dirs, ref_channel_dir_per_region) 166 | run_stitching_for_all_channels(channel_dirs) 167 | check_stitched_dirs(stitched_channel_dirs) 168 | stitched_img_shape = get_stitched_image_shape(ref_channel_stitched_dir_per_region) 169 | 170 | return stitched_channel_dirs, stitched_img_shape 171 | -------------------------------------------------------------------------------- /bin/codex_stitching/run_stitching.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | import shutil 4 | import sys 5 | from datetime import datetime 6 | from pathlib import Path 7 | from typing import Dict, List 8 | 9 | import dask 10 | 11 | sys.path.append("/opt/") 12 | from directory_management import ( 13 | create_output_dirs_for_tiles, 14 | get_img_dirs, 15 | make_dir_if_not_exists, 16 | remove_temp_dirs, 17 | ) 18 | from image_stitching import stitch_images 19 | 20 | from pipeline_utils.dataset_listing import ( 21 | create_listing_for_each_cycle_region, 22 | get_img_dirs, 23 | ) 24 | from pipeline_utils.pipeline_config_reader import load_dataset_info 25 | 26 | 27 | def print_img_dirs(img_dirs: List[Path]): 28 | print("Image directories:") 29 | for dir_path in img_dirs: 30 | print(str(dir_path)) 31 | 32 | 33 | def load_pipeline_config(pipeline_config_path: Path) -> dict: 34 | with open(pipeline_config_path, "r") as s: 35 | submission = json.load(s) 36 | 37 | return submission 38 | 39 | 40 | def get_file_listing(data_dir: Path): 41 | img_dirs = get_img_dirs(data_dir) 42 | listing = create_listing_for_each_cycle_region(img_dirs) 43 | return listing 44 | 45 | 46 | def copy_to_channel_dirs(listing, base_channel_dir: Path) -> Dict[int, Dict[int, Dict[int, Path]]]: 47 | new_dir_name_template = "Cyc{cyc:03d}_Reg{reg:03d}_Ch{ch:03d}" 48 | dst_name_template = "{tile:05d}.tif" 49 | channel_dirs = dict() 50 | for cycle in listing: 51 | channel_dirs[cycle] = dict() 52 | for region in listing[cycle]: 53 | channel_dirs[cycle][region] = dict() 54 | for channel in listing[cycle][region]: 55 | dir_name = new_dir_name_template.format(cyc=cycle, reg=region, ch=channel) 56 | dir_path = base_channel_dir / dir_name 57 | make_dir_if_not_exists(dir_path) 58 | channel_dirs[cycle][region][channel] = dir_path 59 | for tile in listing[cycle][region][channel]: 60 | for zplane, src in listing[cycle][region][channel][tile].items(): 61 | dst_name = dst_name_template.format(tile=tile) 62 | dst = dir_path / dst_name 63 | shutil.copy(src, dst) 64 | return channel_dirs 65 | 66 | 67 | def main(data_dir: Path, pipeline_config_path: Path): 68 | start = datetime.now() 69 | print("\nStarted", start) 70 | 71 | dataset_info = load_dataset_info(pipeline_config_path) 72 | 73 | out_dir = Path("/output/stitched_images") 74 | base_channel_dir = Path("/output/channel_dirs") 75 | 76 | make_dir_if_not_exists(out_dir) 77 | make_dir_if_not_exists(base_channel_dir) 78 | 79 | num_workers = dataset_info["num_concurrent_tasks"] 80 | dask.config.set({"num_workers": num_workers, "scheduler": "processes"}) 81 | 82 | listing = get_file_listing(data_dir) 83 | channel_dirs = copy_to_channel_dirs(listing, base_channel_dir) 84 | stitched_channel_dirs, stitched_img_shape = stitch_images(channel_dirs, dataset_info, out_dir) 85 | 86 | print("\nTime elapsed", datetime.now() - start) 87 | 88 | 89 | if __name__ == "__main__": 90 | parser = argparse.ArgumentParser() 91 | parser.add_argument("--data_dir", type=Path, help="path to directory with image directories") 92 | parser.add_argument( 93 | "--pipeline_config_path", type=Path, help="path to pipelineConfig.json file" 94 | ) 95 | 96 | args = parser.parse_args() 97 | 98 | main(args.data_dir, args.pipeline_config_path) 99 | -------------------------------------------------------------------------------- /bin/codex_stitching/secondary_stitcher/mask_stitching.py: -------------------------------------------------------------------------------- 1 | import gc 2 | from copy import deepcopy 3 | from typing import Dict, List, Tuple 4 | 5 | import dask 6 | import numpy as np 7 | import pandas as pd 8 | from match_masks import get_matched_masks 9 | from skimage.measure import regionprops_table 10 | 11 | Image = np.ndarray 12 | 13 | 14 | def generate_ome_meta_for_mask(size_y: int, size_x: int, dtype, match_fraction: float) -> str: 15 | template = """ 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | FractionOfMatchedCellsAndNuclei 34 | {match_fraction} 35 | 36 | 37 | 38 | 39 | 40 | """ 41 | ome_meta = template.format( 42 | size_y=size_y, size_x=size_x, dtype=np.dtype(dtype).name, match_fraction=match_fraction 43 | ) 44 | return ome_meta 45 | 46 | 47 | def get_labels_sorted_by_coordinates(img) -> List[int]: 48 | props = regionprops_table(img, properties=("label", "centroid")) 49 | coord_arr = np.array((props["label"], props["centroid-0"], props["centroid-1"])) 50 | coord_df = pd.DataFrame(coord_arr) 51 | # sort first by y, then by x coord 52 | sorted_coord_arr = coord_df.sort_values(by=[1, 2], axis=1).to_numpy() 53 | labels_sorted_by_coord = sorted_coord_arr[0, :].tolist() 54 | return labels_sorted_by_coord 55 | 56 | 57 | def get_new_labels(img: Image) -> np.ndarray: 58 | dtype = img.dtype 59 | unique_label_ids, indices = np.unique(img, return_inverse=True) 60 | 61 | old_label_ids = unique_label_ids.tolist() 62 | old_label_ids_sorted_by_coord = get_labels_sorted_by_coordinates(img) 63 | 64 | new_label_ids = list(range(0, len(old_label_ids))) 65 | label_pairs = zip(old_label_ids_sorted_by_coord, new_label_ids) 66 | label_map = {lab_pair[0]: lab_pair[1] for lab_pair in label_pairs} 67 | 68 | updated_label_ids = [0] 69 | for _id in old_label_ids[1:]: 70 | updated_label_ids.append(label_map[_id]) 71 | 72 | new_unique_label_ids = np.array(updated_label_ids, dtype=dtype) 73 | return new_unique_label_ids 74 | 75 | 76 | def reset_label_ids(img, new_label_ids) -> Image: 77 | dtype = img.dtype 78 | unique_labels, indices = np.unique(img, return_inverse=True) 79 | reset_img = new_label_ids[indices].reshape(img.shape).astype(dtype) 80 | return reset_img 81 | 82 | 83 | def remove_labels( 84 | img: Image, y_slice: slice, x_slice: slice, exclude_start: bool 85 | ) -> Tuple[Image, List[int]]: 86 | exclude_from_val_to_remove = [0] 87 | 88 | val_to_remove = [] 89 | if y_slice != slice(None): 90 | img_slice_y = (y_slice, slice(None)) 91 | val_to_remove_y = np.unique(img[img_slice_y]).tolist() 92 | val_to_remove.extend(val_to_remove_y) 93 | 94 | if x_slice != slice(None): 95 | img_slice_x = (slice(None), x_slice) 96 | val_to_remove_x = np.unique(img[img_slice_x]).tolist() 97 | val_to_remove.extend(val_to_remove_x) 98 | 99 | val_to_remove = set(sorted(val_to_remove)) 100 | 101 | if exclude_start: 102 | if y_slice.start is None and x_slice.start is None: 103 | raise ValueError("Exclude start is enabled but slice start is None") 104 | exclusions = [] 105 | if y_slice.start is not None: 106 | line_slice_y = (slice(y_slice.start, y_slice.start + 1), x_slice) 107 | exclusions.extend(np.unique(img[line_slice_y]).tolist()) 108 | if x_slice.start is not None: 109 | line_slice_x = (y_slice, slice(x_slice.start, x_slice.start + 1)) 110 | exclusions.extend(np.unique(img[line_slice_x]).tolist()) 111 | 112 | unique_exclusions = sorted(set(exclusions)) 113 | exclude_from_val_to_remove.extend(unique_exclusions) 114 | 115 | exclude_from_val_to_remove = set(sorted(exclude_from_val_to_remove)) 116 | val_to_remove = [val for val in val_to_remove if val not in exclude_from_val_to_remove] 117 | 118 | img_copy = img.copy() 119 | for val in val_to_remove: 120 | img_copy[img_copy == val] = 0 121 | return img_copy, val_to_remove 122 | 123 | 124 | def remove_overlapping_labels(img: Image, overlap: int, mode: str) -> Tuple[Image, List[int]]: 125 | left = (slice(None), slice(None, overlap)) 126 | right = (slice(None), slice(-overlap, None)) 127 | top = (slice(None, overlap), slice(None)) 128 | bottom = (slice(-overlap, None), slice(None)) 129 | 130 | mod_img = img.copy() 131 | excluded_labels = [] 132 | if "left" in mode: 133 | mod_img, ex_lab = remove_labels(mod_img, *left, exclude_start=False) 134 | excluded_labels.extend(ex_lab) 135 | if "right" in mode: 136 | mod_img, ex_lab = remove_labels(mod_img, *right, exclude_start=True) 137 | excluded_labels.extend(ex_lab) 138 | if "top" in mode: 139 | mod_img, ex_lab = remove_labels(mod_img, *top, exclude_start=False) 140 | excluded_labels.extend(ex_lab) 141 | if "bottom" in mode: 142 | mod_img, ex_lab = remove_labels(mod_img, *bottom, exclude_start=True) 143 | excluded_labels.extend(ex_lab) 144 | excluded_labels = sorted(set(excluded_labels)) 145 | return mod_img, excluded_labels 146 | 147 | 148 | def find_and_remove_overlapping_labels_in_first_channel( 149 | tiles: List[Image], y_ntiles: int, x_ntiles: int, overlap: int 150 | ) -> Tuple[List[Image], Dict[int, Dict[int, int]]]: 151 | excluded_labels = dict() 152 | modified_tiles = [] 153 | task = [] 154 | n = 0 155 | for i in range(0, y_ntiles): 156 | for j in range(0, x_ntiles): 157 | label_remove_mode = "" 158 | if i == 0: 159 | label_remove_mode += " bottom " 160 | elif i == y_ntiles - 1: 161 | label_remove_mode += " top " 162 | else: 163 | label_remove_mode += " top bottom " 164 | if j == 0: 165 | label_remove_mode += " right " 166 | elif j == x_ntiles - 1: 167 | label_remove_mode += " left " 168 | else: 169 | label_remove_mode += " left right " 170 | 171 | task.append( 172 | dask.delayed(remove_overlapping_labels)(tiles[n], overlap, label_remove_mode) 173 | ) 174 | n += 1 175 | computed_modifications = dask.compute(*task) 176 | for i, mod in enumerate(computed_modifications): 177 | modified_tiles.append(mod[0]) 178 | excluded_labels[i] = {lab: 0 for lab in mod[1]} 179 | 180 | return modified_tiles, excluded_labels 181 | 182 | 183 | def remove_overlapping_labels_in_another_channel( 184 | tiles: List[Image], excluded_labels: dict 185 | ) -> List[Image]: 186 | def exclude_labels(tile, labels): 187 | for lab in labels: 188 | tile[tile == lab] = 0 189 | return tile 190 | 191 | task = [] 192 | for i in range(0, len(tiles)): 193 | task.append(dask.delayed(exclude_labels)(tiles[i], excluded_labels[i])) 194 | modified_tiles = dask.compute(*task) 195 | return list(modified_tiles) 196 | 197 | 198 | def find_overlapping_border_labels( 199 | img1: Image, img2: Image, overlap: int, mode: str 200 | ) -> Dict[int, int]: 201 | """Find which pixels in img2 overlap pixels in img1 202 | Return mapping 203 | { img2px: img1px, } 204 | """ 205 | if mode == "horizontal": 206 | img1_ov = img1[:, -overlap:] 207 | img2_ov = img2[:, overlap : overlap * 2] 208 | elif mode == "vertical": 209 | img1_ov = img1[-overlap:, :] 210 | img2_ov = img2[overlap : overlap * 2, :] 211 | else: # horizontal+vertical 212 | img1_ov = img1[-overlap:, -overlap:] 213 | img2_ov = img2[overlap : overlap * 2, overlap : overlap * 2] 214 | 215 | nrows, ncols = img2_ov.shape 216 | 217 | border_map = dict() 218 | 219 | for i in range(0, nrows): 220 | for j in range(0, ncols): 221 | old_value = img2_ov[i, j] 222 | if old_value in border_map: 223 | continue 224 | else: 225 | new_value = img1_ov[i, j] 226 | if old_value > 0 and new_value > 0: 227 | border_map[old_value] = new_value 228 | 229 | return border_map 230 | 231 | 232 | def replace_overlapping_border_labels( 233 | img1: Image, img2: Image, overlap: int, mode: str 234 | ) -> Tuple[Image, Dict[int, int]]: 235 | """Replace label ids in img2 with label ids of img1""" 236 | border_map = find_overlapping_border_labels(img1, img2, overlap, mode) 237 | # to avoid merging of old and new labels 238 | # find old labels that have same ids as new ones 239 | # and add some value 240 | old_lab_ids = tuple(np.unique(img2).tolist()) 241 | matches = [] 242 | for new_lab_id in border_map.values(): 243 | if new_lab_id in old_lab_ids: 244 | matches.append(new_lab_id) 245 | if matches != []: 246 | addition = img2.max() + max(matches) 247 | for value in matches: 248 | img2[img2 == value] += addition 249 | 250 | for old_value, new_value in border_map.items(): 251 | img2[img2 == old_value] = new_value 252 | return img2, border_map 253 | 254 | 255 | def find_and_replace_overlapping_border_labels_in_first_channel( 256 | tiles: List[Image], y_ntiles: int, x_ntiles: int, overlap: int, dtype 257 | ) -> Tuple[List[Image], Dict[int, Dict[int, int]], List[int]]: 258 | previous_tile_max = 0 259 | tile_ids = np.arange(0, y_ntiles * x_ntiles).reshape((y_ntiles, x_ntiles)) 260 | modified_tiles = [] 261 | tile_additions = [] 262 | border_maps = dict() 263 | n = 0 264 | for i in range(0, y_ntiles): 265 | for j in range(0, x_ntiles): 266 | tile = tiles[n] 267 | tile = tile.astype(dtype) 268 | this_tile_max = tile.max() 269 | tile_additions.append(previous_tile_max) 270 | tile[np.nonzero(tile)] += previous_tile_max 271 | 272 | if i != 0: 273 | top_tile_id = tile_ids[i - 1, j] 274 | else: 275 | top_tile_id = None 276 | if j != 0: 277 | left_tile_id = tile_ids[i, j - 1] 278 | else: 279 | left_tile_id = None 280 | if i != 0 and j != 0: 281 | top_left_tile_id = tile_ids[i - 1, j - 1] 282 | else: 283 | top_left_tile_id = None 284 | 285 | this_tile_border_map = dict() 286 | if top_tile_id is not None: 287 | tile, border_map = replace_overlapping_border_labels( 288 | modified_tiles[top_tile_id], tile, overlap, "vertical" 289 | ) 290 | this_tile_border_map.update(border_map) 291 | if left_tile_id is not None: 292 | tile, border_map = replace_overlapping_border_labels( 293 | modified_tiles[left_tile_id], tile, overlap, "horizontal" 294 | ) 295 | this_tile_border_map.update(border_map) 296 | if top_left_tile_id is not None: 297 | tile, border_map = replace_overlapping_border_labels( 298 | modified_tiles[top_left_tile_id], tile, overlap, "horizontal+vertical" 299 | ) 300 | this_tile_border_map.update(border_map) 301 | 302 | modified_tiles.append(tile) 303 | border_maps[n] = this_tile_border_map 304 | previous_tile_max += this_tile_max 305 | n += 1 306 | return modified_tiles, border_maps, tile_additions 307 | 308 | 309 | def replace_overlapping_border_labels_in_another_channel( 310 | tiles: List[Image], border_maps: Dict[int, dict], tile_additions: List[int], dtype 311 | ) -> List[Image]: 312 | def replace_values(tile, value_map, tile_addition, dtype): 313 | modified_tile = tile.astype(dtype) 314 | modified_tile[np.nonzero(modified_tile)] += tile_addition 315 | if value_map != {}: 316 | old_lab_ids = tuple(np.unique(modified_tile).tolist()) 317 | matches = [] 318 | for new_lab_id in value_map.values(): 319 | if new_lab_id in old_lab_ids: 320 | matches.append(new_lab_id) 321 | if matches != []: 322 | addition = modified_tile.max() + max(matches) 323 | for value in matches: 324 | modified_tile[modified_tile == value] += addition 325 | return modified_tile 326 | 327 | task = [] 328 | for i, tile in enumerate(tiles): 329 | task.append(dask.delayed(replace_values)(tile, border_maps[i], tile_additions[i], dtype)) 330 | modified_tiles = dask.compute(*task) 331 | return list(modified_tiles) 332 | 333 | 334 | def update_old_values( 335 | excluded_labels: dict, tile_additions: List[int] 336 | ) -> Dict[int, Dict[int, int]]: 337 | upd_excluded_labels = dict() 338 | for tile in excluded_labels: 339 | this_tile_excluded_labels = dict() 340 | for old_value, new_value in excluded_labels[tile].items(): 341 | upd_old_value = old_value + tile_additions[tile] 342 | this_tile_excluded_labels[upd_old_value] = new_value 343 | upd_excluded_labels[tile] = this_tile_excluded_labels 344 | return upd_excluded_labels 345 | 346 | 347 | def modify_tiles_first_channel( 348 | tiles: List[Image], y_ntiles: int, x_ntiles: int, overlap: int, dtype 349 | ) -> Tuple[List[Image], Dict[int, Dict[int, int]], Dict[int, Dict[int, int]], List[int]]: 350 | mod_tiles, excluded_labels = find_and_remove_overlapping_labels_in_first_channel( 351 | tiles, y_ntiles, x_ntiles, overlap 352 | ) 353 | ( 354 | mod_tiles, 355 | border_maps, 356 | tile_additions, 357 | ) = find_and_replace_overlapping_border_labels_in_first_channel( 358 | mod_tiles, y_ntiles, x_ntiles, overlap, dtype 359 | ) 360 | 361 | return mod_tiles, excluded_labels, border_maps, tile_additions 362 | 363 | 364 | def modify_tiles_another_channel( 365 | tiles: List[Image], excluded_labels: dict, border_maps: dict, tile_additions: list, dtype 366 | ) -> List[Image]: 367 | mod_tiles = remove_overlapping_labels_in_another_channel(tiles, excluded_labels) 368 | if border_maps != {}: 369 | mod_tiles = replace_overlapping_border_labels_in_another_channel( 370 | mod_tiles, border_maps, tile_additions, dtype 371 | ) 372 | 373 | return mod_tiles 374 | 375 | 376 | def get_slices( 377 | tile_shape: tuple, overlap: int, y_tile_id: int, x_tile_id: int, y_id_max: int, x_id_max: int 378 | ) -> Tuple[Tuple[slice, slice], Tuple[slice, slice]]: 379 | if y_id_max - 1 == 0: 380 | tile_slice_y = slice(overlap, tile_shape[0] + overlap) 381 | y_f = 0 382 | y_t = tile_shape[0] 383 | elif y_tile_id == 0: 384 | tile_slice_y = slice(overlap, tile_shape[0] + overlap * 2) 385 | y_f = 0 386 | y_t = tile_shape[0] + overlap 387 | elif y_tile_id == y_id_max - 1: 388 | tile_slice_y = slice(overlap, tile_shape[0] + overlap) 389 | y_f = y_tile_id * tile_shape[0] 390 | y_t = y_f + tile_shape[0] 391 | else: 392 | tile_slice_y = slice(overlap, tile_shape[0] + overlap * 2) 393 | y_f = y_tile_id * tile_shape[0] 394 | y_t = y_f + tile_shape[0] + overlap 395 | 396 | if x_id_max - 1 == 0: 397 | tile_slice_x = slice(overlap, tile_shape[1] + overlap) 398 | x_f = 0 399 | x_t = tile_shape[1] 400 | elif x_tile_id == 0: 401 | tile_slice_x = slice(overlap, tile_shape[1] + overlap * 2) 402 | x_f = 0 403 | x_t = tile_shape[1] + overlap 404 | elif x_tile_id == x_id_max - 1: 405 | tile_slice_x = slice(overlap, tile_shape[1] + overlap) 406 | x_f = x_tile_id * tile_shape[1] 407 | x_t = x_f + tile_shape[1] 408 | else: 409 | tile_slice_x = slice(overlap, tile_shape[1] + overlap * 2) 410 | x_f = x_tile_id * tile_shape[1] 411 | x_t = x_f + tile_shape[1] + overlap 412 | 413 | tile_slice = (tile_slice_y, tile_slice_x) 414 | big_image_slice = (slice(y_f, y_t), slice(x_f, x_t)) 415 | 416 | return tile_slice, big_image_slice 417 | 418 | 419 | def stitch_mask( 420 | tiles: List[Image], 421 | y_ntiles: int, 422 | x_ntiles: int, 423 | tile_shape: list, 424 | dtype, 425 | overlap: int, 426 | padding: dict, 427 | ) -> Image: 428 | y_axis = -2 429 | x_axis = -1 430 | 431 | tile_y_size = tile_shape[y_axis] - overlap * 2 432 | tile_x_size = tile_shape[x_axis] - overlap * 2 433 | 434 | big_image_y_size = y_ntiles * tile_y_size 435 | big_image_x_size = x_ntiles * tile_x_size 436 | 437 | y_pad = padding["top"] + padding["bottom"] 438 | x_pad = padding["left"] + padding["right"] 439 | 440 | big_image_shape = (big_image_y_size, big_image_x_size) 441 | big_image = np.zeros(big_image_shape, dtype=dtype) 442 | 443 | print("n tiles x,y:", (x_ntiles, y_ntiles)) 444 | print("plane shape x,y:", big_image_x_size - x_pad, big_image_y_size - y_pad) 445 | 446 | n = 0 447 | for i in range(0, y_ntiles): 448 | for j in range(0, x_ntiles): 449 | tile_slice, big_image_slice = get_slices( 450 | (tile_y_size, tile_x_size), overlap, i, j, y_ntiles, x_ntiles 451 | ) 452 | 453 | tile = tiles[n] 454 | tile = tile.astype(dtype) 455 | 456 | mask_nonzeros = tile[tile_slice] != 0 457 | big_image[big_image_slice][mask_nonzeros] = tile[tile_slice][mask_nonzeros] 458 | n += 1 459 | 460 | new_big_image_shape = (big_image_shape[0] - y_pad, big_image_shape[1] - x_pad) 461 | return big_image[: new_big_image_shape[0], : new_big_image_shape[1]] 462 | 463 | 464 | def process_all_masks( 465 | tiles, tile_shape, y_ntiles, x_ntiles, overlap, padding, dtype 466 | ) -> Tuple[List[Image], str]: 467 | print("Started processing masks") 468 | tiles_cell = [t[0, :, :] for t in tiles] 469 | tiles_nuc = [t[1, :, :] for t in tiles] 470 | tiles_cell_b = [t[2, :, :] for t in tiles] 471 | tiles_nuc_b = [t[3, :, :] for t in tiles] 472 | raw_tile_groups = [tiles_cell, tiles_nuc, tiles_cell_b, tiles_nuc_b] 473 | print("Identifying and trimming border labels in all tiles") 474 | ( 475 | mod_tiles_nuc, 476 | excluded_labels_nuc, 477 | border_maps_nuc, 478 | tile_additions_nuc, 479 | ) = modify_tiles_first_channel(tiles_nuc, y_ntiles, x_ntiles, overlap, dtype) 480 | 481 | ( 482 | mod_tiles_cell, 483 | excluded_labels_cell, 484 | border_maps_cell, 485 | tile_additions_cell, 486 | ) = modify_tiles_first_channel(tiles_cell, y_ntiles, x_ntiles, overlap, dtype) 487 | 488 | all_exclusions = deepcopy(excluded_labels_nuc) 489 | for tile in excluded_labels_cell: 490 | if tile in all_exclusions: 491 | for lab in excluded_labels_cell[tile]: 492 | all_exclusions[tile][lab] = excluded_labels_cell[tile][lab] 493 | else: 494 | all_exclusions[tile] = excluded_labels_cell[tile] 495 | 496 | all_border_maps = deepcopy(border_maps_nuc) 497 | for tile in border_maps_cell: 498 | if tile in all_border_maps: 499 | for lab in border_maps_cell[tile]: 500 | all_border_maps[tile][lab] = border_maps_cell[tile][lab] 501 | else: 502 | all_border_maps[tile] = border_maps_cell[tile] 503 | 504 | mod_tile_groups = [] 505 | for tile_group in raw_tile_groups: 506 | mod_tile_group = modify_tiles_another_channel( 507 | tile_group, all_exclusions, all_border_maps, tile_additions_cell, dtype 508 | ) 509 | mod_tile_groups.append(mod_tile_group) 510 | 511 | del raw_tile_groups 512 | gc.collect() 513 | print("Stitching masks") 514 | stitched_imgs = [] 515 | for tile_group in mod_tile_groups: 516 | stitched_img = stitch_mask( 517 | tile_group, y_ntiles, x_ntiles, tile_shape, dtype, overlap, padding 518 | ) 519 | stitched_imgs.append(stitched_img) 520 | 521 | del mod_tile_groups 522 | gc.collect() 523 | 524 | matched_masks, fraction_matched = get_matched_masks( 525 | cell_mask=stitched_imgs[0], 526 | nucleus_mask=stitched_imgs[1], 527 | dtype=dtype, 528 | do_mismatch_repair=True, 529 | ) 530 | del stitched_imgs 531 | gc.collect() 532 | 533 | new_label_ids = get_new_labels(matched_masks[0]) # cell 534 | reset_imgs = [] 535 | for i in range(0, len(matched_masks)): 536 | reset_img = reset_label_ids(matched_masks[i], new_label_ids) 537 | reset_imgs.append(reset_img) 538 | 539 | y_size = reset_imgs[0].shape[0] 540 | x_size = reset_imgs[0].shape[1] 541 | ome_meta = generate_ome_meta_for_mask(y_size, x_size, dtype, fraction_matched) 542 | print("Finished processing masks") 543 | return reset_imgs, ome_meta 544 | -------------------------------------------------------------------------------- /bin/codex_stitching/secondary_stitcher/match_masks.py: -------------------------------------------------------------------------------- 1 | from typing import List, Tuple 2 | 3 | import numpy as np 4 | from scipy.sparse import csr_matrix 5 | from skimage.segmentation import find_boundaries 6 | 7 | Image = np.ndarray 8 | 9 | """ 10 | Package functions that repair and generate matched cell, nuclear, 11 | cell membrane and nuclear membrane segmentation masks 12 | Author: Haoran Chen 13 | Version: 1.1 14 | 08/09/2021 15 | """ 16 | 17 | 18 | def get_matched_cells(cell_arr, cell_membrane_arr, nuclear_arr, mismatch_repair): 19 | a = set((tuple(i) for i in cell_arr)) 20 | b = set((tuple(i) for i in cell_membrane_arr)) 21 | c = set((tuple(i) for i in nuclear_arr)) 22 | d = a - b 23 | # remove cell membrane from cell 24 | mismatch_pixel_num = len(list(c - d)) 25 | mismatch_fraction = len(list(c - d)) / len(list(c)) 26 | if not mismatch_repair: 27 | if mismatch_pixel_num == 0: 28 | return np.array(list(a)), np.array(list(c)), 0 29 | else: 30 | return False, False, False 31 | else: 32 | if mismatch_pixel_num < len(c): 33 | return np.array(list(a)), np.array(list(d & c)), mismatch_fraction 34 | else: 35 | return False, False, False 36 | 37 | 38 | def compute_M(data): 39 | cols = np.arange(data.size) 40 | return csr_matrix((cols, (data.ravel(), cols)), shape=(data.max() + 1, data.size)) 41 | 42 | 43 | def get_indices_sparse(data): 44 | M = compute_M(data) 45 | return [np.unravel_index(row.data, data.shape) for row in M] 46 | 47 | 48 | def list_remove(c_list, indexes): 49 | for index in sorted(indexes, reverse=True): 50 | del c_list[index] 51 | return c_list 52 | 53 | 54 | def get_indexed_mask(mask, boundary): 55 | boundary = boundary * 1 56 | boundary_loc = np.where(boundary == 1) 57 | boundary[boundary_loc] = mask[boundary_loc] 58 | return boundary 59 | 60 | 61 | def get_boundary(mask: Image): 62 | mask_boundary = find_boundaries(mask, mode="inner") 63 | mask_boundary_indexed = get_indexed_mask(mask, mask_boundary) 64 | return mask_boundary_indexed 65 | 66 | 67 | def get_mask(cell_list, shape: Tuple[int]): 68 | mask = np.zeros(shape) 69 | for cell_num in range(len(cell_list)): 70 | mask[tuple(cell_list[cell_num].T)] = cell_num + 1 71 | return mask 72 | 73 | 74 | def get_cell_num(mask: Image): 75 | return len(np.unique(mask)) 76 | 77 | 78 | def get_mismatched_fraction( 79 | whole_cell_mask: Image, 80 | nuclear_mask: Image, 81 | cell_matched_mask: Image, 82 | nuclear_matched_mask: Image, 83 | ) -> float: 84 | whole_cell_mask_binary = np.sign(whole_cell_mask) 85 | nuclear_mask_binary = np.sign(nuclear_mask) 86 | cell_matched_mask_binary = np.sign(cell_matched_mask) 87 | nuclear_matched_mask_binary = np.sign(nuclear_matched_mask) 88 | total_area = np.sum(np.sign(whole_cell_mask_binary + nuclear_mask_binary)) 89 | mismatched_area = np.sum( 90 | np.sign( 91 | (nuclear_mask_binary - nuclear_matched_mask_binary) 92 | + (whole_cell_mask_binary - cell_matched_mask_binary) 93 | ) 94 | ) 95 | mismatched_fraction = mismatched_area / total_area 96 | return mismatched_fraction 97 | 98 | 99 | def get_fraction_matched_cells( 100 | whole_cell_mask: Image, nuclear_mask: Image, cell_matched_mask: Image 101 | ) -> float: 102 | matched_cell_num = len(np.unique(cell_matched_mask)) - 1 103 | total_cell_num = len(np.unique(whole_cell_mask)) - 1 104 | total_nuclei_num = len(np.unique(nuclear_mask)) - 1 105 | mismatched_cell_num = total_cell_num - matched_cell_num 106 | mismatched_nuclei_num = total_nuclei_num - matched_cell_num 107 | fraction_matched_cells = matched_cell_num / ( 108 | mismatched_cell_num + mismatched_nuclei_num + matched_cell_num 109 | ) 110 | return fraction_matched_cells 111 | 112 | 113 | def get_matched_masks( 114 | cell_mask: Image, nucleus_mask: Image, dtype, do_mismatch_repair: bool 115 | ) -> Tuple[List[Image], float]: 116 | """ 117 | returns masks with matched cells and nuclei 118 | """ 119 | whole_cell_mask = cell_mask.copy() 120 | nuclear_mask = nucleus_mask.copy() 121 | cell_membrane_mask = get_boundary(whole_cell_mask) 122 | 123 | cell_coords = get_indices_sparse(whole_cell_mask)[1:] 124 | nucleus_coords = get_indices_sparse(nuclear_mask)[1:] 125 | cell_membrane_coords = get_indices_sparse(cell_membrane_mask)[1:] 126 | 127 | cell_coords = list(map(lambda x: np.array(x).T, cell_coords)) 128 | nucleus_coords = list(map(lambda x: np.array(x).T, nucleus_coords)) 129 | cell_membrane_coords = list(map(lambda x: np.array(x).T, cell_membrane_coords)) 130 | 131 | cell_matched_index_list = [] 132 | nucleus_matched_index_list = [] 133 | cell_matched_list = [] 134 | nucleus_matched_list = [] 135 | 136 | for i in range(len(cell_coords)): 137 | if len(cell_coords[i]) != 0: 138 | current_cell_coords = cell_coords[i] 139 | nuclear_search_num = np.unique( 140 | list(map(lambda x: nuclear_mask[tuple(x)], current_cell_coords)) 141 | ) 142 | best_mismatch_fraction = 1 143 | whole_cell_best = [] 144 | for j in nuclear_search_num: 145 | if j != 0: 146 | if (j - 1 not in nucleus_matched_index_list) and ( 147 | i not in cell_matched_index_list 148 | ): 149 | whole_cell, nucleus, mismatch_fraction = get_matched_cells( 150 | cell_coords[i], 151 | cell_membrane_coords[i], 152 | nucleus_coords[j - 1], 153 | mismatch_repair=do_mismatch_repair, 154 | ) 155 | if type(whole_cell) != bool: 156 | if mismatch_fraction < best_mismatch_fraction: 157 | best_mismatch_fraction = mismatch_fraction 158 | whole_cell_best = whole_cell 159 | nucleus_best = nucleus 160 | i_ind = i 161 | j_ind = j - 1 162 | if len(whole_cell_best) > 0: 163 | cell_matched_list.append(whole_cell_best) 164 | nucleus_matched_list.append(nucleus_best) 165 | cell_matched_index_list.append(i_ind) 166 | nucleus_matched_index_list.append(j_ind) 167 | 168 | del cell_coords 169 | del nucleus_coords 170 | 171 | cell_matched_mask = get_mask(cell_matched_list, whole_cell_mask.shape) 172 | nuclear_matched_mask = get_mask(nucleus_matched_list, whole_cell_mask.shape) 173 | cell_membrane_mask = get_boundary(cell_matched_mask) 174 | nuclear_membrane_mask = get_boundary(nuclear_matched_mask) 175 | 176 | if do_mismatch_repair: 177 | fraction_matched_cells = 1.0 178 | else: 179 | fraction_matched_cells = get_fraction_matched_cells( 180 | whole_cell_mask, nuclear_mask, cell_matched_mask 181 | ) 182 | 183 | out_list = [ 184 | cell_matched_mask.astype(dtype), 185 | nuclear_matched_mask.astype(dtype), 186 | cell_membrane_mask.astype(dtype), 187 | nuclear_membrane_mask.astype(dtype), 188 | ] 189 | return out_list, fraction_matched_cells 190 | -------------------------------------------------------------------------------- /bin/codex_stitching/secondary_stitcher/secondary_stitcher.py: -------------------------------------------------------------------------------- 1 | import re 2 | import xml.etree.ElementTree as ET 3 | from pathlib import Path 4 | from typing import Dict, List, Union 5 | 6 | import numpy as np 7 | import pandas as pd 8 | import tifffile as tif 9 | from mask_stitching import process_all_masks 10 | from skimage.measure import regionprops_table 11 | 12 | Image = np.ndarray 13 | 14 | 15 | def add_structured_annotations(omexml_str: str, nucleus_channel: str, cell_channel: str) -> str: 16 | """ 17 | Will add this, to the root, after Image node 18 | 19 | 20 | 21 | 22 | SegmentationChannels 23 | 24 | DAPI-02 25 | CD45 26 | 27 | 28 | 29 | 30 | 31 | """ 32 | 33 | # Remove some prefixes 34 | nucleus_channel = re.sub(r"cyc(\d+)_ch(\d+)_orig(.*)", r"\3", nucleus_channel) 35 | cell_channel = re.sub(r"cyc(\d+)_ch(\d+)_orig(.*)", r"\3", cell_channel) 36 | 37 | structured_annotation = ET.Element("StructuredAnnotations") 38 | annotation = ET.SubElement(structured_annotation, "XMLAnnotation", {"ID": "Annotation:0"}) 39 | annotation_value = ET.SubElement(annotation, "Value") 40 | original_metadata = ET.SubElement(annotation_value, "OriginalMetadata") 41 | segmentation_channels_key = ET.SubElement(original_metadata, "Key").text = ( 42 | "SegmentationChannels" 43 | ) 44 | segmentation_channels_value = ET.SubElement(original_metadata, "Value") 45 | ET.SubElement(segmentation_channels_value, "Nucleus").text = nucleus_channel 46 | ET.SubElement(segmentation_channels_value, "Cell").text = cell_channel 47 | sa_str = ET.tostring(structured_annotation, encoding="utf-8").decode("utf-8") 48 | 49 | if "StructuredAnnotations" in omexml_str: 50 | sa_placement = omexml_str.find("") + len("") 51 | sa_str = re.sub(r"", "", sa_str) 52 | else: 53 | sa_placement = omexml_str.find("") + len("") 54 | 55 | omexml_str_with_sa = omexml_str[:sa_placement] + sa_str + omexml_str[sa_placement:] 56 | return omexml_str_with_sa 57 | 58 | 59 | def alpha_num_order(string: str) -> str: 60 | """Returns all numbers on 5 digits to let sort the string with numeric order. 61 | Ex: alphaNumOrder("a6b12.125") ==> "a00006b00012.00125" 62 | """ 63 | return "".join( 64 | [format(int(x), "05d") if x.isdigit() else x for x in re.split(r"(\d+)", string)] 65 | ) 66 | 67 | 68 | def get_img_listing(in_dir: Path) -> List[Path]: 69 | allowed_extensions = (".tif", ".tiff") 70 | listing = list(in_dir.iterdir()) 71 | img_listing = [f for f in listing if f.suffix in allowed_extensions] 72 | img_listing = sorted(img_listing, key=lambda x: alpha_num_order(x.name)) 73 | return img_listing 74 | 75 | 76 | def path_to_str(path: Path): 77 | return str(path.absolute().as_posix()) 78 | 79 | 80 | def path_to_dict(path: Path): 81 | """ 82 | Extract region, x position, y position and put into the dictionary 83 | {R:region, X: position, Y: position, path: path} 84 | """ 85 | value_list = re.split(r"(\d+)(?:_?)", path.name)[:-1] 86 | d = dict(zip(*[iter(value_list)] * 2)) 87 | d = {k: int(v) for k, v in d.items()} 88 | d.update({"path": path}) 89 | return d 90 | 91 | 92 | def get_slices( 93 | arr: np.ndarray, hor_f: int, hor_t: int, ver_f: int, ver_t: int, padding: dict, overlap=0 94 | ): 95 | left_check = hor_f - padding["left"] 96 | top_check = ver_f - padding["top"] 97 | right_check = hor_t - arr.shape[-1] 98 | bot_check = ver_t - arr.shape[-2] 99 | 100 | left_pad_size = 0 101 | top_pad_size = 0 102 | right_pad_size = 0 103 | bot_pad_size = 0 104 | 105 | if left_check < 0: 106 | left_pad_size = abs(left_check) 107 | hor_f = 0 108 | if top_check < 0: 109 | top_pad_size = abs(top_check) 110 | ver_f = 0 111 | if right_check > 0: 112 | right_pad_size = right_check 113 | hor_t = arr.shape[1] 114 | if bot_check > 0: 115 | ver_t = arr.shape[0] 116 | 117 | big_image_slice = (slice(ver_f, ver_t), slice(hor_f, hor_t)) 118 | tile_shape = (ver_t - ver_f, hor_t - hor_f) 119 | tile_slice = ( 120 | slice(top_pad_size + overlap, tile_shape[0] + overlap), 121 | slice(left_pad_size + overlap, tile_shape[1] + overlap), 122 | ) 123 | 124 | return big_image_slice, tile_slice 125 | 126 | 127 | def get_dataset_info(img_dir: Path): 128 | img_paths = get_img_listing(img_dir) 129 | positions = [path_to_dict(p) for p in img_paths] 130 | df = pd.DataFrame(positions) 131 | df.sort_values(["R", "Y", "X"], inplace=True) 132 | df.reset_index(inplace=True) 133 | 134 | region_ids = list(df["R"].unique()) 135 | y_ntiles = df["Y"].max() 136 | x_ntiles = df["X"].max() 137 | 138 | path_list_per_region = [] 139 | 140 | for r in region_ids: 141 | region_selection = df[df["R"] == r].index 142 | path_list = list(df.loc[region_selection, "path"]) 143 | path_list_per_region.append(path_list) 144 | 145 | return path_list_per_region, y_ntiles, x_ntiles 146 | 147 | 148 | def load_tiles(path_list: List[Path], key: Union[None, int]): 149 | tiles = [] 150 | if key is None: 151 | for path in path_list: 152 | tiles.append(tif.imread(path_to_str(path))) 153 | else: 154 | for path in path_list: 155 | tiles.append(tif.imread(path_to_str(path), key=key)) 156 | 157 | return tiles 158 | 159 | 160 | def calc_mask_coverage(segm_mask: Image) -> float: 161 | mask_pixels = np.sum(segm_mask != 0) 162 | total_pixels = segm_mask.shape[-2] * segm_mask.shape[-1] 163 | return float(round(mask_pixels / total_pixels, 3)) 164 | 165 | 166 | def calc_snr(img: Image) -> float: 167 | return float(round(np.mean(img) / np.std(img), 3)) 168 | 169 | 170 | def calc_label_sizes(segm_mask: Image) -> Dict[str, List[float]]: 171 | # bounding boxes around labels 172 | # useful to check if there are merged labels 173 | props = regionprops_table(segm_mask, properties=("label", "bbox")) 174 | min_rows = props["bbox-0"] 175 | min_cols = props["bbox-1"] 176 | max_rows = props["bbox-2"] 177 | max_cols = props["bbox-3"] 178 | bbox_arr = np.stack((min_rows, max_rows, min_cols, max_cols), axis=1) 179 | dif = np.stack((bbox_arr[:, 1] - bbox_arr[:, 0], bbox_arr[:, 3] - bbox_arr[:, 2]), axis=1) 180 | long_sides = np.max(dif, axis=1) 181 | label_sizes = dict( 182 | min_bbox_size=[float(i) for i in dif[np.argmin(long_sides)].tolist()], 183 | max_bbox_size=[float(i) for i in dif[np.argmax(long_sides)].tolist()], 184 | mean_bbox_size=[float(i) for i in np.round(np.mean(dif, axis=0), 3).tolist()], 185 | ) 186 | return label_sizes 187 | 188 | 189 | def stitch_plane( 190 | tiles: List[Image], 191 | y_ntiles: int, 192 | x_ntiles: int, 193 | tile_shape: list, 194 | dtype, 195 | overlap: int, 196 | padding: dict, 197 | ) -> Image: 198 | y_axis = -2 199 | x_axis = -1 200 | 201 | tile_y_size = tile_shape[y_axis] - overlap * 2 202 | tile_x_size = tile_shape[x_axis] - overlap * 2 203 | 204 | big_image_y_size = (y_ntiles * tile_y_size) - padding["top"] - padding["bottom"] 205 | big_image_x_size = (x_ntiles * tile_x_size) - padding["left"] - padding["right"] 206 | 207 | big_image_shape = (big_image_y_size, big_image_x_size) 208 | big_image = np.zeros(big_image_shape, dtype=dtype) 209 | 210 | print("n tiles x,y:", (x_ntiles, y_ntiles)) 211 | print("plane shape x,y:", big_image_shape[::-1]) 212 | n = 0 213 | for i in range(0, y_ntiles): 214 | ver_f = i * tile_y_size 215 | ver_t = ver_f + tile_y_size 216 | 217 | for j in range(0, x_ntiles): 218 | hor_f = j * tile_x_size 219 | hor_t = hor_f + tile_x_size 220 | 221 | big_image_slice, tile_slice = get_slices( 222 | big_image, hor_f, hor_t, ver_f, ver_t, padding, overlap 223 | ) 224 | tile = tiles[n] 225 | 226 | big_image[tuple(big_image_slice)] = tile[tuple(tile_slice)] 227 | 228 | n += 1 229 | return big_image 230 | 231 | 232 | def main( 233 | img_dir: Path, 234 | out_dir: Path, 235 | img_name_template: str, 236 | overlap: int, 237 | padding_str: str, 238 | is_mask: bool, 239 | nucleus_channel: str, 240 | cell_channel: str, 241 | ): 242 | padding_int = [int(i) for i in padding_str.split(",")] 243 | padding = { 244 | "left": padding_int[0], 245 | "right": padding_int[1], 246 | "top": padding_int[2], 247 | "bottom": padding_int[3], 248 | } 249 | 250 | path_list_per_region, y_ntiles, x_ntiles = get_dataset_info(img_dir) 251 | 252 | with tif.TiffFile(path_to_str(path_list_per_region[0][0])) as TF: 253 | tile_shape = list(TF.series[0].shape) 254 | npages = len(TF.pages) 255 | dtype = TF.series[0].dtype 256 | ome_meta = TF.ome_metadata 257 | 258 | big_image_y_size = ( 259 | (y_ntiles * (tile_shape[-2] - overlap * 2)) - padding["top"] - padding["bottom"] 260 | ) 261 | big_image_x_size = ( 262 | (x_ntiles * (tile_shape[-1] - overlap * 2)) - padding["left"] - padding["right"] 263 | ) 264 | 265 | if is_mask: 266 | dtype = np.uint32 267 | else: 268 | ome_meta = re.sub(r'\sSizeY="\d+"', ' SizeY="' + str(big_image_y_size) + '"', ome_meta) 269 | ome_meta = re.sub(r'\sSizeX="\d+"', ' SizeX="' + str(big_image_x_size) + '"', ome_meta) 270 | ome_meta = re.sub(r'\sDimensionOrder="[XYCZT]+"', ' DimensionOrder="XYZCT"', ome_meta) 271 | ome_meta = add_structured_annotations(ome_meta, nucleus_channel, cell_channel) 272 | # part of this report is generated after mask stitching and part after expression stitching 273 | 274 | total_report = dict() 275 | for r, path_list in enumerate(path_list_per_region): 276 | new_path = out_dir / img_name_template.format(r=r + 1) 277 | this_region_report = dict() 278 | TW = tif.TiffWriter(path_to_str(new_path), bigtiff=True, shaped=False) 279 | if is_mask: 280 | # mask channels 0 - cells, 1 - nuclei, 2 - cell boundaries, 3 - nucleus boundaries 281 | tiles = load_tiles(path_list, key=None) 282 | masks, ome_meta = process_all_masks( 283 | tiles, tile_shape, y_ntiles, x_ntiles, overlap, padding, dtype 284 | ) 285 | for mask in masks: 286 | new_shape = (1, mask.shape[0], mask.shape[1]) 287 | TW.write( 288 | mask.reshape(new_shape), 289 | contiguous=True, 290 | photometric="minisblack", 291 | description=ome_meta, 292 | ) 293 | 294 | this_region_report["num_cells"] = int(masks[0].max()) 295 | this_region_report["num_nuclei"] = int(masks[1].max()) 296 | this_region_report["cell_coverage"] = calc_mask_coverage(masks[0]) 297 | this_region_report["nuclei_coverage"] = calc_mask_coverage(masks[1]) 298 | this_region_report["cell_sizes"] = calc_label_sizes(masks[0]) 299 | this_region_report["nucleus_sizes"] = calc_label_sizes(masks[1]) 300 | else: 301 | for p in range(0, npages): 302 | tiles = load_tiles(path_list, key=p) 303 | print("\nstitching expressions page", p + 1, "/", npages) 304 | plane = stitch_plane( 305 | tiles, y_ntiles, x_ntiles, tile_shape, dtype, overlap, padding 306 | ) 307 | new_shape = (1, plane.shape[0], plane.shape[1]) 308 | if p == 0: 309 | this_region_report["num_channels"] = int(npages) 310 | this_region_report["img_height"] = int(plane.shape[0]) 311 | this_region_report["img_width"] = int(plane.shape[1]) 312 | this_region_report["per_channel_snr"] = dict() 313 | this_region_report["nucleus_channel"] = nucleus_channel 314 | this_region_report["cell_channel"] = cell_channel 315 | this_region_report["per_channel_snr"][p] = calc_snr(plane) 316 | TW.write( 317 | plane.reshape(new_shape), 318 | contiguous=True, 319 | photometric="minisblack", 320 | description=ome_meta, 321 | ) 322 | total_report["reg" + str(r + 1)] = this_region_report 323 | TW.close() 324 | return total_report 325 | -------------------------------------------------------------------------------- /bin/codex_stitching/secondary_stitcher/secondary_stitcher_runner.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | from pathlib import Path 4 | from pprint import pprint 5 | from typing import Any, Dict 6 | 7 | import secondary_stitcher 8 | 9 | Report = Dict[str, Dict[str, Any]] 10 | 11 | 12 | def make_dir_if_not_exists(dir_path: Path): 13 | if not dir_path.exists(): 14 | dir_path.mkdir(parents=True) 15 | 16 | 17 | def read_pipeline_config(path_to_config: Path) -> dict: 18 | with open(path_to_config, "r") as s: 19 | config = json.load(s) 20 | return config 21 | 22 | 23 | def write_pipeline_config(out_path: Path, config): 24 | with open(out_path, "w") as s: 25 | json.dump(config, s, sort_keys=False, indent=4) 26 | 27 | 28 | def run_stitcher( 29 | img_dir: Path, 30 | out_dir: Path, 31 | img_name_template: str, 32 | overlap: int, 33 | padding: dict, 34 | is_mask: bool, 35 | nucleus_channel: str, 36 | cell_channel: str, 37 | ) -> Report: 38 | padding_str = ",".join((str(i) for i in list(padding.values()))) 39 | report = secondary_stitcher.main( 40 | img_dir, 41 | out_dir, 42 | img_name_template, 43 | overlap, 44 | padding_str, 45 | is_mask, 46 | nucleus_channel, 47 | cell_channel, 48 | ) 49 | return report 50 | 51 | 52 | def merge_reports(mask_report: Report, expr_report: Report) -> Report: 53 | total_report = dict() 54 | for region in mask_report: 55 | total_report[region] = {**mask_report[region], **expr_report[region]} 56 | return total_report 57 | 58 | 59 | def main(pipeline_config_path: Path, ometiff_dir: Path): 60 | pipeline_config = read_pipeline_config(pipeline_config_path) 61 | slicer_meta = pipeline_config["slicer"] 62 | nucleus_channel = pipeline_config.get("nuclei_channel", "None") 63 | cell_channel = pipeline_config.get("membrane_channel", "None") 64 | 65 | path_to_mask_tiles = Path(ometiff_dir).joinpath("cytometry/tile/ome-tiff") 66 | path_to_image_tiles = Path(ometiff_dir).joinpath("extract/expressions/ome-tiff") 67 | 68 | overlap = slicer_meta["overlap"] 69 | padding = slicer_meta["padding"] 70 | 71 | mask_out_dir = Path("/output/pipeline_output/mask") 72 | expr_out_dir = Path("/output/pipeline_output/expr") 73 | final_pipeline_config_path = Path("/output/pipelineConfig.json") 74 | 75 | make_dir_if_not_exists(mask_out_dir) 76 | make_dir_if_not_exists(expr_out_dir) 77 | 78 | mask_out_name_template = "reg{r:03d}_mask.ome.tiff" 79 | expr_out_name_template = "reg{r:03d}_expr.ome.tiff" 80 | 81 | mask_report = run_stitcher( 82 | path_to_mask_tiles, 83 | mask_out_dir, 84 | mask_out_name_template, 85 | overlap, 86 | padding, 87 | True, 88 | nucleus_channel, 89 | cell_channel, 90 | ) 91 | 92 | expr_report = run_stitcher( 93 | path_to_image_tiles, 94 | expr_out_dir, 95 | expr_out_name_template, 96 | overlap, 97 | padding, 98 | False, 99 | nucleus_channel, 100 | cell_channel, 101 | ) 102 | 103 | total_report = merge_reports(mask_report, expr_report) 104 | 105 | final_pipeline_config = pipeline_config 106 | final_pipeline_config.update({"report": total_report}) 107 | print("\nfinal_pipeline_config") 108 | pprint(final_pipeline_config, sort_dicts=False) 109 | write_pipeline_config(final_pipeline_config_path, final_pipeline_config) 110 | 111 | 112 | if __name__ == "__main__": 113 | parser = argparse.ArgumentParser() 114 | parser.add_argument("--pipeline_config_path", type=Path, help="path to pipeline config") 115 | parser.add_argument( 116 | "--ometiff_dir", type=Path, help="dir with segmentation mask tiles and codex image tiles" 117 | ) 118 | 119 | args = parser.parse_args() 120 | main(args.pipeline_config_path, args.ometiff_dir) 121 | -------------------------------------------------------------------------------- /bin/convert_to_ometiff.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import logging 3 | import re 4 | from multiprocessing import Pool 5 | from os import walk 6 | from pathlib import Path 7 | from typing import List, Optional 8 | 9 | import pandas as pd 10 | import yaml 11 | from aicsimageio import AICSImage 12 | from aicsimageio.writers.ome_tiff_writer import OmeTiffWriter 13 | from ome_types.model import AnnotationRef, Map, MapAnnotation, StructuredAnnotationList 14 | from tifffile import TiffFile 15 | 16 | from utils import print_directory_tree 17 | 18 | logging.basicConfig(level=logging.INFO, format="%(levelname)-7s - %(message)s") 19 | logger = logging.getLogger(__name__) 20 | SEGMENTATION_CHANNEL_NAMES = [ 21 | "cells", 22 | "nuclei", 23 | "cell_boundaries", 24 | "nucleus_boundaries", 25 | ] 26 | TIFF_FILE_NAMING_PATTERN = re.compile(r"^R\d{3}_X(\d{3})_Y(\d{3})\.tif") 27 | metadata_filename_pattern = re.compile(r"^[0-9A-Fa-f]{32}antibodies\.tsv$") 28 | 29 | 30 | def find_antibodies_meta(input_dir: Path) -> Optional[Path]: 31 | """ 32 | Finds and returns the first metadata file for a HuBMAP data set. 33 | Does not check whether the dataset ID (32 hex characters) matches 34 | the directory name, nor whether there might be multiple metadata files. 35 | """ 36 | # possible_dirs = [input_dir, input_dir / "extras"] 37 | metadata_filename_pattern = re.compile(r"^[0-9A-Za-z\-_]*antibodies\.tsv$") 38 | found_files = [] 39 | for dirpath, dirnames, filenames in walk(input_dir): 40 | for filename in filenames: 41 | if metadata_filename_pattern.match(filename): 42 | found_files.append(Path(dirpath) / filename) 43 | 44 | if len(found_files) == 0: 45 | logger.warning("No antibody.tsv file found") 46 | antb_path = None 47 | else: 48 | antb_path = found_files[0] 49 | return antb_path 50 | 51 | 52 | def sort_by_cycle(antb_path: Path): 53 | """ 54 | Sorts antibodies.tsv by cycle and channel number. The original tsv is not sorted correctly. 55 | """ 56 | df = pd.read_table(antb_path) 57 | cycle_channel_pattern = re.compile(r"cycle(?P\d+)_ch(?P\d+)", re.IGNORECASE) 58 | searches = [cycle_channel_pattern.search(v) for v in df["channel_id"]] 59 | cycles = [int(s.group("cycle")) for s in searches] 60 | channels = [int(s.group("channel")) for s in searches] 61 | df.index = [cycles, channels] 62 | df = df.sort_index() 63 | return df 64 | 65 | 66 | def get_ch_info_from_antibodies_meta(df: pd.DataFrame) -> Optional[pd.DataFrame]: 67 | """ 68 | Adds "target" column with the antibody name that we want to replace. 69 | """ 70 | # df = df.set_index("channel_id", inplace=False) 71 | antb_names = df["antibody_name"].to_list() 72 | antb_targets = [get_analyte_name(antb) for antb in antb_names] 73 | df["target"] = antb_targets 74 | return df 75 | 76 | 77 | def get_analyte_name(antibody_name: str) -> str: 78 | """ 79 | Strips unnecessary prefixes and suffixes off of antibody name from antibodies.tsv. 80 | """ 81 | antb = re.sub(r"Anti-", "", antibody_name) 82 | antb = re.sub(r"\s+antibody", "", antb) 83 | return antb 84 | 85 | 86 | def create_original_channel_names_df(channelList: List[str]) -> pd.DataFrame: 87 | """ 88 | Creates a dataframe with the original channel names, cycle numbers, and channel numbers. 89 | """ 90 | # Separate channel and cycle info from channel names and remove "orig" 91 | cyc_ch_pattern = re.compile(r"cyc(\d+)_ch(\d+)_orig(.*)") 92 | og_ch_names_df = pd.DataFrame(channelList, columns=["Original_Channel_Name"]) 93 | og_ch_names_df[["Cycle", "Channel", "channel_name"]] = og_ch_names_df[ 94 | "Original_Channel_Name" 95 | ].str.extract(cyc_ch_pattern) 96 | og_ch_names_df["Cycle"] = pd.to_numeric(og_ch_names_df["Cycle"]) 97 | og_ch_names_df["Channel"] = pd.to_numeric(og_ch_names_df["Channel"]) 98 | og_ch_names_df["channel_id"] = ( 99 | "cycle" 100 | + og_ch_names_df["Cycle"].astype(str) 101 | + "_ch" 102 | + og_ch_names_df["Channel"].astype(str) 103 | ) 104 | 105 | return og_ch_names_df 106 | 107 | 108 | def replace_provider_ch_names_with_antb( 109 | og_ch_names_df: pd.DataFrame, antibodies_df: pd.DataFrame 110 | ) -> List[str]: 111 | """ 112 | Uses cycle and channel mapping to replace the channel name with the one in antibodies.tsv. 113 | """ 114 | updated_channel_names = [] 115 | mapping = map_cycles_and_channels(antibodies_df) 116 | for i in og_ch_names_df.index: 117 | channel_id = og_ch_names_df.at[i, "channel_id"].lower() 118 | original_name = og_ch_names_df.at[i, "channel_name"] 119 | target = mapping.get(channel_id, None) 120 | if target is not None: 121 | updated_channel_names.append(target) 122 | else: 123 | updated_channel_names.append(original_name) 124 | return updated_channel_names 125 | 126 | 127 | def generate_sa_ch_info( 128 | channel_id: str, 129 | og_ch_names_info: pd.Series, 130 | antb_info: pd.DataFrame, 131 | ) -> Optional[MapAnnotation]: 132 | cycle, channel = og_ch_names_info["Cycle"], og_ch_names_info["Channel"] 133 | try: 134 | antb_row = antb_info.loc[(cycle, channel), :] 135 | except KeyError: 136 | return None 137 | 138 | uniprot_id = antb_row["uniprot_accession_number"] 139 | rrid = antb_row["rr_id"] 140 | antb_id = antb_row["channel_id"] 141 | ch_key = Map.M(k="Channel ID", value=channel_id) 142 | name_key = Map.M(k="Name", value=antb_row["target"]) 143 | og_name_key = Map.M(k="Original Name", value=og_ch_names_info["channel_name"]) 144 | uniprot_key = Map.M(k="UniprotID", value=uniprot_id) 145 | rrid_key = Map.M(k="RRID", value=rrid) 146 | antb_id_key = Map.M(k="AntibodiesTsvID", value=antb_id) 147 | ch_info = Map(ms=[ch_key, name_key, og_name_key, uniprot_key, rrid_key, antb_id_key]) 148 | annotation = MapAnnotation(value=ch_info) 149 | return annotation 150 | 151 | 152 | def map_cycles_and_channels(antibodies_df: pd.DataFrame) -> dict: 153 | channel_mapping = { 154 | channel_id.lower(): target 155 | for channel_id, target in zip(antibodies_df["channel_id"], antibodies_df["target"]) 156 | } 157 | return channel_mapping 158 | 159 | 160 | def collect_tiff_file_list(directory: Path, TIFF_FILE_NAMING_PATTERN: re.Pattern) -> List[Path]: 161 | """ 162 | Given a directory path and a regex, find all the files in the directory that 163 | match the regex. 164 | 165 | TODO: this is very similar to a function in create_cellshapes_csv.py -- could 166 | do to unify with a separate module? 167 | """ 168 | fileList = [] 169 | 170 | for dirpath, dirnames, filenames in walk(directory): 171 | for filename in filenames: 172 | if TIFF_FILE_NAMING_PATTERN.match(filename): 173 | fileList.append(directory / filename) 174 | 175 | if len(fileList) == 0: 176 | logger.warning("No files found in " + str(directory)) 177 | 178 | return fileList 179 | 180 | 181 | def get_lateral_resolution(cytokit_config_filename: Path) -> float: 182 | with open(cytokit_config_filename) as cytokit_config_file: 183 | cytokit_config = yaml.safe_load(cytokit_config_file) 184 | 185 | return float("%0.2f" % cytokit_config["acquisition"]["lateral_resolution"]) 186 | 187 | 188 | def collect_expressions_extract_channels(extractFile: Path) -> List[str]: 189 | """ 190 | Given a TIFF file path, read file with TiffFile to get Labels attribute from 191 | ImageJ metadata. Return a list of the channel names in the same order as they 192 | appear in the ImageJ metadata. 193 | We need to do this to get the channel names in the correct order, and the 194 | ImageJ "Labels" attribute isn't picked up by AICSImageIO. 195 | """ 196 | 197 | with TiffFile(str(extractFile.absolute())) as TF: 198 | ij_meta = TF.imagej_metadata 199 | numChannels = int(ij_meta["channels"]) 200 | channelList = ij_meta["Labels"][0:numChannels] 201 | 202 | # Remove "proc_" from the start of the channel names. 203 | procPattern = re.compile(r"^proc_(.*)") 204 | channelList = [procPattern.match(channel).group(1) for channel in channelList] 205 | 206 | return channelList 207 | 208 | 209 | def convert_tiff_file(funcArgs): 210 | """ 211 | Given a tuple containing a source TIFF file path, a destination OME-TIFF path, 212 | a list of channel names, a float value for the lateral resolution in 213 | nanometres, convert the source TIFF file to OME-TIFF format, containing 214 | polygons for segmented cell shapes in the "ROI" OME-XML element. 215 | """ 216 | 217 | sourceFile, ometiffFile, channelNames, lateral_resolution, og_ch_names_df, *optional_args = ( 218 | funcArgs 219 | ) 220 | antb_info = optional_args[0] if optional_args else None 221 | 222 | logger.info(f"Converting file: {str(sourceFile)}") 223 | 224 | image = AICSImage(sourceFile) 225 | imageDataForOmeTiff = image.get_image_data("TCZYX") 226 | imageName = f"Image: {sourceFile.name}" 227 | 228 | # Create OME-XML metadata using build_ome 229 | ome_writer = OmeTiffWriter() 230 | omeXml = ome_writer.build_ome( 231 | data_shapes=[(image.dims.T, image.dims.C, image.dims.Z, image.dims.Y, image.dims.X)], 232 | data_types=[image.dtype], 233 | dimension_order=["TCZYX"], 234 | channel_names=[channelNames], 235 | image_name=[imageName], 236 | physical_pixel_sizes=[image.physical_pixel_sizes], 237 | ) 238 | 239 | annotations = StructuredAnnotationList() 240 | for i, (channel_obj, channel_name, og_ch_names_row) in enumerate( 241 | zip( 242 | omeXml.images[0].pixels.channels, 243 | channelNames, 244 | og_ch_names_df.iterrows(), 245 | ) 246 | ): 247 | channel_id = f"Channel:0:{i}" 248 | channel_obj.name = channel_name 249 | channel_obj.id = channel_id 250 | if antb_info is None: 251 | continue 252 | ch_info = generate_sa_ch_info(channel_id, og_ch_names_row[1], antb_info) 253 | if ch_info is None: 254 | continue 255 | channel_obj.annotation_refs.append(AnnotationRef(id=ch_info.id)) 256 | annotations.append(ch_info) 257 | omeXml.structured_annotations = annotations 258 | 259 | ome_writer.save( 260 | data=imageDataForOmeTiff, 261 | uri=str(ometiffFile), 262 | ome_xml=omeXml, 263 | dimension_order="TCZYX", 264 | channel_names=channelNames, 265 | ) 266 | 267 | logger.info(f"OME-TIFF file created: {ometiffFile}") 268 | 269 | 270 | def create_ome_tiffs( 271 | file_list: List[Path], 272 | output_dir: Path, 273 | channel_names: List[str], 274 | lateral_resolution: float, 275 | subprocesses: int, 276 | og_ch_names_df, 277 | antb_info: Optional[pd.DataFrame] = None, 278 | ): 279 | """ 280 | Given: 281 | - a list of TIFF files 282 | - an output directory path 283 | - a list of channel names 284 | - a float value for the lateral resolution in nanometres (aka XY resolution aka pixel size). 285 | - an integer value for the number of multiprocessing subprocesses 286 | - a dictionary of best focus z-planes indexed by tile x,y coordinates 287 | Create OME-TIFF files using parallel processes. 288 | """ 289 | output_dir.mkdir(parents=True, exist_ok=True) 290 | 291 | args_for_conversion = [] 292 | for source_file in file_list: 293 | ome_tiff_file = (output_dir / source_file.name).with_suffix(".ome.tiff") 294 | if antb_info is not None: 295 | args_for_conversion.append( 296 | ( 297 | source_file, 298 | ome_tiff_file, 299 | channel_names, 300 | lateral_resolution, 301 | og_ch_names_df, 302 | antb_info, 303 | ) 304 | ) 305 | else: 306 | args_for_conversion.append( 307 | (source_file, ome_tiff_file, channel_names, lateral_resolution, og_ch_names_df) 308 | ) 309 | 310 | # Uncomment the next line to run as a series, comment the plural line 311 | # for argtuple in args_for_conversion: 312 | # convert_tiff_file(argtuple) 313 | 314 | with Pool(processes=subprocesses) as pool: 315 | pool.imap_unordered(convert_tiff_file, args_for_conversion) 316 | pool.close() 317 | pool.join() 318 | 319 | 320 | def check_dir_is_empty(dir_path: Path): 321 | return not any(dir_path.iterdir()) 322 | 323 | 324 | ######## 325 | # MAIN # 326 | ######## 327 | if __name__ == "__main__": 328 | parser = argparse.ArgumentParser( 329 | description=( 330 | "Convert Cytokit's output TIFFs containing segmentation and extraction " 331 | 'results to OME-TIFF, and add the channel names. Creates an "ome-tiff" ' 332 | "directory inside the output/cytometry/tile and " 333 | "output/extract/expressions directories." 334 | ), 335 | ) 336 | parser.add_argument( 337 | "cytokit_output", 338 | help="Path to output of `cytokit processor`", 339 | type=Path, 340 | ) 341 | parser.add_argument( 342 | "bg_sub_tiles", 343 | help="Path to tiles with subtracted background", 344 | type=Path, 345 | ) 346 | parser.add_argument( 347 | "cytokit_config", 348 | help="Path to Cytokit YAML config file", 349 | type=Path, 350 | ) 351 | parser.add_argument( 352 | "input_data_dir", 353 | help="Path to the input dataset", 354 | type=Path, 355 | ) 356 | parser.add_argument( 357 | "-p", 358 | "--processes", 359 | help="Number of parallel OME-TIFF conversions to perform at once", 360 | type=int, 361 | default=8, 362 | ) 363 | 364 | args = parser.parse_args() 365 | 366 | print("Cytokit output:") 367 | print_directory_tree(args.cytokit_output) 368 | 369 | output_dir = Path("output") 370 | output_dir.mkdir(parents=True, exist_ok=True) 371 | 372 | cytometry_tile_dir_piece = Path("cytometry/tile") 373 | extract_expressions_piece = Path("extract/expressions") 374 | processor_data_json_piece = Path("processor/data.json") 375 | 376 | cytometryTileDir = args.cytokit_output / cytometry_tile_dir_piece 377 | print("Cytometry tile directory:", cytometryTileDir) 378 | 379 | extractDir = args.cytokit_output / extract_expressions_piece 380 | print("Extract expressions directory:", extractDir) 381 | 382 | if not check_dir_is_empty(args.bg_sub_tiles): 383 | extractDir = args.bg_sub_tiles 384 | print(list(Path(args.bg_sub_tiles).iterdir())) 385 | else: 386 | extractDir = args.cytokit_output / extract_expressions_piece 387 | print("Extract expressions directory:", extractDir) 388 | 389 | segmentationFileList = collect_tiff_file_list(cytometryTileDir, TIFF_FILE_NAMING_PATTERN) 390 | extractFileList = collect_tiff_file_list(extractDir, TIFF_FILE_NAMING_PATTERN) 391 | antb_path = find_antibodies_meta(args.input_data_dir) 392 | antibodies_df = None 393 | lateral_resolution = get_lateral_resolution(args.cytokit_config) 394 | extractChannelNames = collect_expressions_extract_channels(extractFileList[0]) 395 | original_ch_names_df = create_original_channel_names_df(extractChannelNames) 396 | print(original_ch_names_df.head()) 397 | 398 | antb_info = None 399 | updated_channel_names = original_ch_names_df["channel_name"].tolist() 400 | if antb_path: 401 | df = sort_by_cycle(antb_path) 402 | antb_info = get_ch_info_from_antibodies_meta(df) 403 | updated_channel_names = replace_provider_ch_names_with_antb( 404 | original_ch_names_df, antb_info 405 | ) 406 | 407 | # Create segmentation mask OME-TIFFs 408 | if segmentationFileList: 409 | create_ome_tiffs( 410 | segmentationFileList, 411 | output_dir / cytometry_tile_dir_piece / "ome-tiff", 412 | SEGMENTATION_CHANNEL_NAMES, 413 | lateral_resolution, 414 | args.processes, 415 | original_ch_names_df, 416 | antb_info, 417 | ) 418 | # Create the extract OME-TIFFs. 419 | if extractFileList: 420 | create_ome_tiffs( 421 | extractFileList, 422 | output_dir / extract_expressions_piece / "ome-tiff", 423 | updated_channel_names, 424 | lateral_resolution, 425 | args.processes, 426 | original_ch_names_df, 427 | antb_info, 428 | ) 429 | -------------------------------------------------------------------------------- /bin/create_cytokit_config.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | import logging 4 | import re 5 | from pprint import pprint 6 | from typing import List 7 | 8 | import yaml 9 | 10 | logging.basicConfig(level=logging.INFO, format="%(levelname)-7s - %(message)s") 11 | logger = logging.getLogger(__name__) 12 | 13 | # Some constants to use below. 14 | path_format = "keyence_multi_cycle_v01" 15 | memory_limit = "64G" 16 | 17 | 18 | def comma_separated_integers(s: str) -> List[int]: 19 | return [int(i.strip()) for i in s.split(",")] 20 | 21 | 22 | ######## 23 | # MAIN # 24 | ######## 25 | if __name__ == "__main__": 26 | # Set up argument parser and parse the command line arguments. 27 | parser = argparse.ArgumentParser( 28 | description="Create a YAML config file for Cytokit, based on a JSON file from the CODEX Toolkit pipeline. YAML file will be created in current working directory unless otherwise specified." 29 | ) 30 | parser.add_argument( 31 | "--gpus", 32 | help="GPUs to use for Cytokit, specified as a comma-separated list of integers.", 33 | type=comma_separated_integers, 34 | default=[0, 1], 35 | ) 36 | parser.add_argument( 37 | "pipelineConfigFilename", 38 | help="JSON file containing all information required for config generation.", 39 | ) 40 | parser.add_argument( 41 | "-o", 42 | "--outfile", 43 | help="Path to output YAML config file. Default: experiment.yaml", 44 | ) 45 | 46 | args = parser.parse_args() 47 | 48 | if not args.outfile: 49 | args.outfile = "experiment.yaml" 50 | 51 | logger.info("Reading pipeline config file " + args.pipelineConfigFilename + "...") 52 | 53 | with open(args.pipelineConfigFilename, "r") as pipelineConfigFile: 54 | pipelineConfigInfo = json.load(pipelineConfigFile) 55 | 56 | logger.info("Finished reading pipeline config file.") 57 | 58 | cytokitConfig = { 59 | "name": pipelineConfigInfo["name"], 60 | "date": pipelineConfigInfo["date"], 61 | "environment": {"path_formats": path_format}, 62 | "acquisition": {}, # This is populated below. 63 | "processor": { 64 | "args": { 65 | "gpus": args.gpus, 66 | "memory_limit": memory_limit, 67 | "run_crop": False, 68 | "run_tile_generator": True, 69 | "run_drift_comp": True, 70 | "run_cytometry": True, 71 | "run_best_focus": True, 72 | "run_deconvolution": False, 73 | }, 74 | "tile_generator": {"raw_file_type": "keyence_mixed"}, 75 | "best_focus": {"channel": pipelineConfigInfo["best_focus"]}, 76 | "drift_compensation": {"channel": pipelineConfigInfo["drift_compensation"]}, 77 | "cytometry": { 78 | "nuclei_channel_name": pipelineConfigInfo["nuclei_channel"], 79 | "segmentation_params": { 80 | "memb_min_dist": 8, 81 | "memb_sigma": 5, 82 | "memb_gamma": 0.25, 83 | "marker_dilation": 3, 84 | "marker_min_size": 2, 85 | }, 86 | "quantification_params": {"nucleus_intensity": True, "cell_graph": True}, 87 | }, 88 | }, 89 | "analysis": [{"aggregate_cytometry_statistics": {"mode": "best_z_plane"}}], 90 | } 91 | 92 | if "membrane_channel" in pipelineConfigInfo: 93 | cytokitConfig["processor"]["cytometry"]["membrane_channel_name"] = pipelineConfigInfo[ 94 | "membrane_channel" 95 | ] 96 | else: 97 | logger.warning( 98 | "No membrane stain channel found in pipeline config. Will only use nuclei channel for segmentation." 99 | ) 100 | 101 | # Populate acquisition section. 102 | acquisitionFields = [ 103 | "per_cycle_channel_names", 104 | "channel_names", 105 | "axial_resolution", 106 | "lateral_resolution", 107 | "emission_wavelengths", 108 | "magnification", 109 | "num_cycles", 110 | "num_z_planes", 111 | "numerical_aperture", 112 | "objective_type", 113 | "region_height", 114 | "region_names", 115 | "region_width", 116 | "tile_height", 117 | "tile_overlap_x", 118 | "tile_overlap_y", 119 | "tile_width", 120 | "tiling_mode", 121 | ] 122 | 123 | for field in acquisitionFields: 124 | cytokitConfig["acquisition"][field] = pipelineConfigInfo[field] 125 | 126 | # Create operator section to extract channels collapsed in one time point, 127 | # leaving out blank/empty channels and only including the nuclear stain 128 | # channel used for segmentation. 129 | blankPattern = re.compile(r"cyc(\d+)_ch(\d+)_orig([^_]*)blank", re.IGNORECASE) 130 | emptyPattern = re.compile(r"cyc(\d+)_ch(\d+)_orig([^_]*)empty", re.IGNORECASE) 131 | dapiChannelPattern = re.compile(r"cyc(\d+)_ch(\d+)_orig([^_]*)DAPI", re.IGNORECASE) 132 | hoechstChannelPattern = re.compile(r"cyc(\d+)_ch(\d+)_orig([^_]*)HOECHST", re.IGNORECASE) 133 | 134 | operatorExtractChannels = [] 135 | 136 | for channelName in pipelineConfigInfo["channel_names"]: 137 | # Skip unwanted channels. 138 | if emptyPattern.match(channelName): 139 | continue 140 | elif dapiChannelPattern.match(channelName): 141 | if channelName != pipelineConfigInfo["nuclei_channel"]: 142 | continue 143 | elif hoechstChannelPattern.match(channelName): 144 | if channelName != pipelineConfigInfo["nuclei_channel"]: 145 | continue 146 | 147 | # Skip channels that failed QC. 148 | if pipelineConfigInfo["channel_names_qc_pass"]: 149 | if len(pipelineConfigInfo["channel_names_qc_pass"][channelName]) > 1: 150 | if blankPattern.match(channelName): 151 | pass 152 | else: 153 | raise ValueError(f"More than one {channelName} channel found.") 154 | else: 155 | channel_qc_pass = pipelineConfigInfo["channel_names_qc_pass"][channelName][0] 156 | if channel_qc_pass.casefold() == "false".casefold(): 157 | continue 158 | 159 | # Append to operator extract channels with "proc_" prepended -- this 160 | # tells Cytokit to extract the channels from the processed tiles. 161 | operatorExtractChannels.append("proc_" + channelName) 162 | 163 | # Add operator section to config. 164 | cytokitConfig["operator"] = [ 165 | {"extract": {"name": "expressions", "channels": operatorExtractChannels, "z": "all"}} 166 | ] 167 | 168 | logger.info("Writing Cytokit config to " + args.outfile) 169 | 170 | with open(args.outfile, "w") as outFile: 171 | yaml.safe_dump(cytokitConfig, outFile, encoding="utf-8", default_flow_style=None, indent=2) 172 | 173 | pprint(cytokitConfig, sort_dicts=False) 174 | 175 | logger.info("Finished writing Cytokit config.") 176 | -------------------------------------------------------------------------------- /bin/dataset_info/collect_dataset_info.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | import logging 4 | import math 5 | import re 6 | import sys 7 | from collections import Counter 8 | from datetime import datetime 9 | from pathlib import Path 10 | from pprint import pprint 11 | from typing import Dict, List, Optional, Tuple, Union 12 | 13 | import numpy as np 14 | import pint 15 | 16 | sys.path.append("/opt") 17 | from pipeline_utils.dataset_listing import get_tile_dtype, get_tile_shape 18 | 19 | 20 | class ConfigCreator: 21 | def __init__(self): 22 | self.dataset_dir = Path("") 23 | self._num_concur_tasks = 10 24 | self._std_meta = dict() 25 | self._raw_data_dir = Path("") 26 | 27 | def read_metadata(self): 28 | path_to_meta = self._raw_data_dir / "dataset.json" 29 | meta = self._read_json_meta(path_to_meta) 30 | processed_meta = meta.copy() 31 | 32 | ch_names = [] 33 | for ch in meta["ChannelDetails"]["ChannelDetailsArray"]: 34 | ch_names.append(ch["Name"]) 35 | 36 | new_ch_names = self._make_ch_names_unique(ch_names) 37 | 38 | new_channel_details_array = [] 39 | for i, ch in enumerate(processed_meta["ChannelDetails"]["ChannelDetailsArray"]): 40 | new_ch = ch.copy() 41 | new_ch["Name"] = new_ch_names[i] 42 | new_channel_details_array.append(new_ch) 43 | processed_meta["ChannelDetails"]["ChannelDetailsArray"] = new_channel_details_array 44 | self._std_meta = processed_meta 45 | 46 | def find_raw_data_dir(self): 47 | NONRAW_DIRECTORY_NAME_PIECES = [ 48 | "processed", 49 | "drv", 50 | "metadata", 51 | "extras", 52 | "Overview", 53 | ] 54 | raw_data_dir_possibilities = [] 55 | 56 | for child in self.dataset_dir.iterdir(): 57 | if not child.is_dir(): 58 | continue 59 | if not any(piece in child.name for piece in NONRAW_DIRECTORY_NAME_PIECES): 60 | raw_data_dir_possibilities.append(child) 61 | 62 | if len(raw_data_dir_possibilities) > 1: 63 | message_pieces = ["Found multiple raw data directory possibilities:"] 64 | message_pieces.extend(f"\t{path}" for path in raw_data_dir_possibilities) 65 | raise ValueError("\n".join(message_pieces)) 66 | self._raw_data_dir = raw_data_dir_possibilities[0] 67 | return self._raw_data_dir 68 | 69 | def create_config(self) -> dict: 70 | config = { 71 | "name": self._std_meta["DatasetName"], 72 | "date": self._create_proc_date(), 73 | "raw_data_location": self.find_raw_data_dir().name, 74 | "channel_names_qc_pass": self._get_qc_info_per_ch(), 75 | "emission_wavelengths": self._get_emission_wavelengths(), 76 | "excitation_wavelengths": self._get_excitation_wavelengths(), 77 | "axial_resolution": self._get_axial_resolution(), 78 | "lateral_resolution": self._get_lateral_resolution(), 79 | "magnification": self._std_meta["NominalMagnification"], 80 | "num_z_planes": self._std_meta["NumZPlanes"], 81 | "numerical_aperture": self._std_meta["NumericalAperture"], 82 | "objective_type": self._std_meta["ImmersionMedium"].lower(), 83 | "region_height": self._std_meta["RegionHeight"], 84 | "region_width": self._std_meta["RegionWidth"], 85 | "region_names": self._get_region_names(), 86 | "tile_overlap_x": self._get_tile_overlap_x_in_px(), 87 | "tile_overlap_y": self._get_tile_overlap_y_in_px(), 88 | "tile_height": self._get_tile_shape_no_overlap()[0], 89 | "tile_width": self._get_tile_shape_no_overlap()[1], 90 | "tile_dtype": self._get_tile_dtype(), 91 | "tiling_mode": self._std_meta["TileLayout"].lower(), 92 | "per_cycle_channel_names": self._get_per_cycle_ch_names(), 93 | "channel_names": self._get_channel_names(), 94 | "num_cycles": self._std_meta["NumCycles"], 95 | "best_focus": self._get_nuc_ch(), 96 | "drift_compensation": self._get_nuc_ch(), 97 | "nuclei_channel": self._get_nuc_ch(), 98 | "membrane_channel": self._get_membr_ch(), 99 | "nuclei_channel_loc": self._std_meta["NuclearStainForSegmentation"], 100 | "membrane_channel_loc": self._std_meta["MembraneStainForSegmentation"], 101 | "target_shape": self._calc_target_shape(), 102 | "num_concurrent_tasks": self._num_concur_tasks, 103 | } 104 | return config 105 | 106 | def _read_json_meta(self, path_to_meta: Path) -> Dict[str, Union[str, int, dict, list]]: 107 | with open(path_to_meta, "r") as s: 108 | json_meta = json.load(s) 109 | return json_meta 110 | 111 | def _create_proc_date(self) -> str: 112 | processing_date = datetime.now().strftime("%Y-%m-%d %H:%M:%S") 113 | return processing_date 114 | 115 | def _get_qc_info_per_ch(self) -> Dict[str, List[str]]: 116 | ch_details = self._std_meta["ChannelDetails"]["ChannelDetailsArray"] 117 | channel_qc_info = dict() 118 | channel_qc_info["Marker"] = ["Result"] 119 | for ch in ch_details: 120 | ch_name = ch["Name"] 121 | qc_result = ch["PassedQC"] 122 | if qc_result is True: 123 | qc_result_str = "TRUE" 124 | else: 125 | qc_result_str = "FALSE" 126 | channel_qc_info[ch_name] = [qc_result_str] 127 | return channel_qc_info 128 | 129 | def _make_ch_names_unique(self, channel_names: List[str]) -> List[str]: 130 | unique_names = Counter(channel_names) 131 | new_names = channel_names.copy() 132 | 133 | for unique_ch, count in unique_names.items(): 134 | if count > 1: 135 | this_ch_count = 1 136 | for i, ch_name in enumerate(channel_names): 137 | if ch_name == unique_ch: 138 | new_name = f"{ch_name}_{this_ch_count}" 139 | new_names[i] = new_name 140 | this_ch_count += 1 141 | return new_names 142 | 143 | def _get_emission_wavelengths(self) -> List[float]: 144 | em_wav = [] 145 | for ch in self._std_meta["ChannelDetails"]["ChannelDetailsArray"]: 146 | wav = ch["EmissionWavelengthNM"] 147 | if wav not in em_wav: 148 | em_wav.append(float(wav)) 149 | return em_wav 150 | 151 | def _get_excitation_wavelengths(self) -> List[float]: 152 | num_channels = len(self._std_meta["ChannelDetails"]["ChannelDetailsArray"]) 153 | channel = self._std_meta["ChannelDetails"]["ChannelDetailsArray"][0] 154 | 155 | if "ExcitationWavelengthNM" in channel: 156 | exc_wav = [] 157 | for ch in self._std_meta["ChannelDetails"]["ChannelDetailsArray"]: 158 | wav = ch["ExcitationWavelengthNM"] 159 | if wav not in exc_wav: 160 | exc_wav.append(float(wav)) 161 | else: 162 | exc_wav = [0] * num_channels 163 | return exc_wav 164 | 165 | def _get_axial_resolution(self) -> float: 166 | unit = pint.UnitRegistry() 167 | provided_unit_z = unit[self._std_meta["ResolutionZUnit"]] 168 | provided_res_z = float(self._std_meta["ResolutionZ"]) 169 | res_z_in_units = provided_res_z * provided_unit_z 170 | axial_res_um = res_z_in_units.to("nm") 171 | return axial_res_um.magnitude 172 | 173 | def _get_lateral_resolution(self) -> float: 174 | unit = pint.UnitRegistry() 175 | provided_unit_x = unit[self._std_meta["ResolutionXUnit"]] 176 | provided_unit_y = unit[self._std_meta["ResolutionYUnit"]] 177 | provided_res_x = float(self._std_meta["ResolutionX"]) 178 | provided_res_y = float(self._std_meta["ResolutionY"]) 179 | res_x_in_units = provided_res_x * provided_unit_x 180 | res_y_in_units = provided_res_y * provided_unit_y 181 | lateral_res_um = ((res_x_in_units + res_y_in_units) / 2).to("nm") 182 | return lateral_res_um.magnitude 183 | 184 | def _get_region_names(self) -> List[int]: 185 | num_regions = self._std_meta["NumRegions"] 186 | return list(range(1, num_regions + 1)) 187 | 188 | def _get_tile_overlap_x_in_px(self) -> int: 189 | overlap = self._std_meta["TileOverlapX"] 190 | size = self._std_meta["TileWidth"] 191 | px_overlap = self._calc_px_overlap_from_proportional(size, overlap) 192 | return px_overlap 193 | 194 | def _get_tile_overlap_y_in_px(self) -> int: 195 | overlap = self._std_meta["TileOverlapY"] 196 | size = self._std_meta["TileHeight"] 197 | px_overlap = self._calc_px_overlap_from_proportional(size, overlap) 198 | return px_overlap 199 | 200 | def _calc_px_overlap_from_proportional(self, dim_size: int, dim_overlap: float) -> int: 201 | msg = f"Tile overlap proportion {dim_overlap} is greater than 1" 202 | if dim_overlap > 1: 203 | raise ValueError(msg) 204 | 205 | pixel_overlap = dim_size * dim_overlap 206 | 207 | if float(pixel_overlap).is_integer(): 208 | return int(pixel_overlap) 209 | else: 210 | # if overlap is not a whole number in px 211 | closest_overlap = int(math.ceil(pixel_overlap)) 212 | closest_overlap += closest_overlap % 2 # make even 213 | return closest_overlap 214 | 215 | def _get_per_cycle_ch_names(self) -> List[str]: 216 | per_cycle_channel_names = [] 217 | channels = self._std_meta["ChannelDetails"]["ChannelDetailsArray"] 218 | channel_ids = [] 219 | for ch in channels: 220 | channel_ids.append(int(ch["ChannelID"])) 221 | unique_ch_ids = sorted(set(channel_ids)) 222 | for ch in unique_ch_ids: 223 | per_cycle_channel_names.append("CH" + str(ch)) 224 | return per_cycle_channel_names 225 | 226 | def _get_channel_names(self) -> List[str]: 227 | channels = self._std_meta["ChannelDetails"]["ChannelDetailsArray"] 228 | channel_names = [] 229 | for ch in channels: 230 | channel_names.append(ch["Name"]) 231 | return channel_names 232 | 233 | def _get_nuc_ch(self) -> str: 234 | nuc_ch_loc = self._std_meta["NuclearStainForSegmentation"] 235 | nuc_ch_name = self._get_ch_name_by_location(nuc_ch_loc) 236 | return nuc_ch_name 237 | 238 | def _get_membr_ch(self) -> str: 239 | membr_ch_loc = self._std_meta["MembraneStainForSegmentation"] 240 | membr_ch_name = self._get_ch_name_by_location(membr_ch_loc) 241 | return membr_ch_name 242 | 243 | def _get_ch_name_by_location(self, ch_loc: Dict[str, int]) -> str: 244 | channels = self._std_meta["ChannelDetails"]["ChannelDetailsArray"] 245 | ch_name = None 246 | for ch in channels: 247 | if ch["CycleID"] == ch_loc["CycleID"]: 248 | if ch["ChannelID"] == ch_loc["ChannelID"]: 249 | ch_name = ch["Name"] 250 | break 251 | if ch_name is None: 252 | raise ValueError("Could not find channel name of", str(ch_loc)) 253 | return ch_name 254 | 255 | def _get_tile_dtype(self) -> str: 256 | tile_dtype = str(get_tile_dtype(self._raw_data_dir).name) 257 | return tile_dtype 258 | 259 | def _calc_target_shape(self): 260 | """ 261 | Cytokit's nuclei detection U-Net (from CellProfiler) works best at 20x magnification. 262 | The CellProfiler U-Net requires the height and width of the images to be 263 | evenly divisible by 2 raised to the number of layers in the network, in this case 2^3=8. 264 | https://github.com/hammerlab/cytokit/issues/14 265 | https://github.com/CellProfiler/CellProfiler-plugins/issues/65 266 | """ 267 | dims = [self._std_meta["TileWidth"], self._std_meta["TileHeight"]] 268 | magnification = self._std_meta["NominalMagnification"] 269 | scaleFactor = 1 270 | if magnification != 20: 271 | scaleFactor = 20 / magnification 272 | 273 | # Width and height must be evenly divisible by 8, so we round them up to them 274 | # closest factor of 8 if they aren't. 275 | new_dims = dims.copy() 276 | for dim in dims: 277 | if dim % 8: 278 | new_dim = int(8 * math.ceil(float(dim) / 8)) 279 | new_dims.append(new_dim) 280 | return new_dims 281 | 282 | @property 283 | def num_concurrent_tasks(self) -> int: 284 | return self._num_concur_tasks 285 | 286 | @num_concurrent_tasks.setter 287 | def num_concurrent_tasks(self, val: int): 288 | if val <= 0: 289 | self._num_concur_tasks = 10 290 | else: 291 | self._num_concur_tasks = val 292 | 293 | def _get_tile_shape_no_overlap(self) -> Tuple[int, int]: 294 | overlap_y = self._get_tile_overlap_y_in_px() 295 | overlap_x = self._get_tile_overlap_x_in_px() 296 | tile_height_with_overlap = self._std_meta["TileHeight"] 297 | tile_width_with_overlap = self._std_meta["TileWidth"] 298 | tile_height = tile_height_with_overlap - overlap_y 299 | tile_width = tile_width_with_overlap - overlap_x 300 | return tile_height, tile_width 301 | 302 | 303 | def write_pipeline_config(out_path: Path, pipeline_config: dict): 304 | with open(out_path, "w") as s: 305 | json.dump(pipeline_config, s, indent=4) 306 | 307 | 308 | def main(path_to_dataset: Path, num_concurrent_tasks: int = 10): 309 | logging.basicConfig(level=logging.INFO, format="%(levelname)-7s - %(message)s") 310 | logger = logging.getLogger(__name__) 311 | 312 | config_creator = ConfigCreator() 313 | config_creator.dataset_dir = path_to_dataset 314 | config_creator.num_concurrent_tasks = num_concurrent_tasks 315 | config_creator.find_raw_data_dir() 316 | config_creator.read_metadata() 317 | pipeline_config = config_creator.create_config() 318 | 319 | pprint(pipeline_config, sort_dicts=False) 320 | out_path = Path("pipelineConfig.json") 321 | logger.info("Writing pipeline config") 322 | write_pipeline_config(out_path, pipeline_config) 323 | logger.info(f"Written pipeline config to {out_path}") 324 | 325 | 326 | if __name__ == "__main__": 327 | parser = argparse.ArgumentParser( 328 | description="Collect information required to perform analysis of a CODEX dataset." 329 | ) 330 | parser.add_argument( 331 | "--path_to_dataset", 332 | help="Path to directory containing raw data subdirectory (with with cycle and region numbers).", 333 | type=Path, 334 | ) 335 | parser.add_argument( 336 | "--num_concurrent_tasks", 337 | help="Path to directory containing raw data subdirectory (with with cycle and region numbers).", 338 | type=int, 339 | default=10, 340 | ) 341 | args = parser.parse_args() 342 | main(args.path_to_dataset, args.num_concurrent_tasks) 343 | -------------------------------------------------------------------------------- /bin/dataset_info/run_collection.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from pathlib import Path 3 | 4 | import collect_dataset_info 5 | import collect_dataset_info_old 6 | 7 | 8 | def find_raw_data_dir(dataset_dir: Path): 9 | NONRAW_DIRECTORY_NAME_PIECES = [ 10 | "processed", 11 | "drv", 12 | "metadata", 13 | "extras", 14 | "Overview", 15 | ] 16 | raw_data_dir_possibilities = [] 17 | 18 | for child in dataset_dir.iterdir(): 19 | if not child.is_dir(): 20 | continue 21 | if not any(piece in child.name for piece in NONRAW_DIRECTORY_NAME_PIECES): 22 | raw_data_dir_possibilities.append(child) 23 | 24 | if len(raw_data_dir_possibilities) > 1: 25 | message_pieces = ["Found multiple raw data directory possibilities:"] 26 | message_pieces.extend(f"\t{path}" for path in raw_data_dir_possibilities) 27 | raise ValueError("\n".join(message_pieces)) 28 | raw_data_dir = raw_data_dir_possibilities[0] 29 | return raw_data_dir 30 | 31 | 32 | def check_new_meta_present(raw_data_dir: Path): 33 | if Path(raw_data_dir / "dataset.json").exists(): 34 | print("Found new metadata") 35 | return True 36 | else: 37 | print("Did not found new metadata. Will try to use old metadata") 38 | return False 39 | 40 | 41 | def main(path_to_dataset: Path, num_concurrent_tasks: int = 10): 42 | raw_data_dir = find_raw_data_dir(path_to_dataset) 43 | is_new_meta_present = check_new_meta_present(raw_data_dir) 44 | if is_new_meta_present: 45 | collect_dataset_info.main(path_to_dataset, num_concurrent_tasks) 46 | else: 47 | collect_dataset_info_old.main(path_to_dataset, num_concurrent_tasks) 48 | 49 | 50 | if __name__ == "__main__": 51 | parser = argparse.ArgumentParser( 52 | description="Collect information required to perform analysis of a CODEX dataset." 53 | ) 54 | parser.add_argument( 55 | "--path_to_dataset", 56 | help="Path to directory containing raw data subdirectory (with with cycle and region numbers).", 57 | type=Path, 58 | ) 59 | parser.add_argument( 60 | "--num_concurrent_tasks", 61 | help="Path to directory containing raw data subdirectory (with with cycle and region numbers).", 62 | type=int, 63 | default=10, 64 | ) 65 | args = parser.parse_args() 66 | main(args.path_to_dataset, args.num_concurrent_tasks) 67 | -------------------------------------------------------------------------------- /bin/illumination_correction/generate_basic_macro.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | 4 | def fill_in_basic_macro_template(path_to_stack: Path, out_dir: Path) -> str: 5 | macro_template = """ 6 | run("BaSiC Mod", 7 | "input_stack={path_to_stack}" + 8 | " flat-field_image_path=[]" + 9 | " dark-field_image_path=[]" + 10 | " output_dir={out_dir}" + 11 | " shading_estimation=[Estimate shading profiles]" + 12 | " shading_model=[Estimate flat-field only (ignore dark-field)]" + 13 | " setting_regularisation_parameters=Automatic" + 14 | " temporal_drift=Ignore" + 15 | " correction_options=[Compute shading only]" + 16 | " lambda_flat=0.500" + 17 | " lambda_dark=0.500"); 18 | 19 | run("Quit"); 20 | eval("script", "System.exit(0);"); 21 | """ 22 | # [Compute shading only, Compute shading and correct images] 23 | # [Estimate flat-field only (ignore dark-field), Estimate both flat-field and dark-field] 24 | basic_macro = macro_template.format( 25 | path_to_stack=str(path_to_stack.absolute()), out_dir=str(out_dir.absolute()) 26 | ) 27 | return basic_macro 28 | 29 | 30 | def save_macro(out_path: Path, macro: str): 31 | with open(out_path, "w", encoding="utf-8") as s: 32 | s.write(macro) 33 | -------------------------------------------------------------------------------- /bin/illumination_correction/run_illumination_correction.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import platform 3 | import re 4 | import subprocess 5 | import sys 6 | from pathlib import Path 7 | from typing import Dict, Iterable, List, Set, Tuple 8 | 9 | import cv2 as cv 10 | import dask 11 | import numpy as np 12 | import tifffile as tif 13 | 14 | sys.path.append("/opt/") 15 | from generate_basic_macro import fill_in_basic_macro_template, save_macro 16 | 17 | from pipeline_utils.dataset_listing import ( 18 | create_listing_for_each_cycle_region, 19 | get_img_listing, 20 | ) 21 | from pipeline_utils.pipeline_config_reader import load_dataset_info 22 | 23 | ImgStack = np.ndarray # 3d 24 | Image = np.ndarray # 2d 25 | 26 | 27 | def make_dir_if_not_exists(dir_path: Path): 28 | if not dir_path.exists(): 29 | dir_path.mkdir(parents=True) 30 | 31 | 32 | def convert_np_cv_dtype(npdtype: np.dtype) -> int: 33 | np_cv_dtype_map = { 34 | np.dtype("float32"): cv.CV_32F, 35 | np.dtype("int32"): cv.CV_32S, 36 | np.dtype("uint16"): cv.CV_16U, 37 | np.dtype("uint8"): cv.CV_8U, 38 | np.dtype("int8"): cv.CV_8S, 39 | np.dtype("int16"): cv.CV_16S, 40 | } 41 | return np_cv_dtype_map[npdtype] 42 | 43 | 44 | def get_input_img_dirs(data_dir: Path): 45 | img_dirs = list(data_dir.iterdir()) 46 | return img_dirs 47 | 48 | 49 | def read_imgs_to_stack(img_paths: List[Path]) -> ImgStack: 50 | imgs = [] 51 | for path in img_paths: 52 | try: 53 | this_image = tif.imread(str(path.absolute())) 54 | except Exception as excp: 55 | # do not raise from excp because the main process cannot instantiate excp 56 | raise RuntimeError(f"Error reading tiff image {path}: {excp}") 57 | imgs.append(this_image) 58 | img_stack = np.stack(imgs, axis=0) 59 | return img_stack 60 | 61 | 62 | def save_stack(out_path: Path, stack: ImgStack): 63 | with tif.TiffWriter(out_path, shaped=False) as TW: 64 | TW.save(stack, contiguous=True, photometric="minisblack") 65 | 66 | 67 | def read_and_save_to_stack(path_list: List[Path], out_stack_path: Path): 68 | save_stack(out_stack_path, read_imgs_to_stack(path_list)) 69 | 70 | 71 | def resave_imgs_to_stacks( 72 | zplane_img_listing: Dict[int, Dict[int, Dict[int, Dict[int, List[Path]]]]], img_stack_dir: Path 73 | ) -> Dict[int, Dict[int, Dict[int, Dict[int, Path]]]]: 74 | stack_paths = dict() 75 | stack_name_template = "Cyc{cyc:03d}_Reg{reg:03d}_Ch{ch:03d}_Z{z:03d}.tif" 76 | tasks = [] 77 | for cycle in zplane_img_listing: 78 | stack_paths[cycle] = dict() 79 | for region in zplane_img_listing[cycle]: 80 | stack_paths[cycle][region] = dict() 81 | for channel in zplane_img_listing[cycle][region]: 82 | stack_paths[cycle][region][channel] = dict() 83 | for zplane, path_list in zplane_img_listing[cycle][region][channel].items(): 84 | stack_name = stack_name_template.format( 85 | cyc=cycle, reg=region, ch=channel, z=zplane 86 | ) 87 | out_stack_path = img_stack_dir / stack_name 88 | stack_paths[cycle][region][channel][zplane] = out_stack_path 89 | tasks.append(dask.delayed(read_and_save_to_stack)(path_list, out_stack_path)) 90 | dask.compute(*tasks) 91 | return stack_paths 92 | 93 | 94 | def generate_basic_macro_for_each_stack( 95 | stack_paths: Dict[int, Dict[int, Dict[int, Dict[int, Path]]]], 96 | macro_out_dir: Path, 97 | illum_cor_dir: Path, 98 | ) -> Dict[int, Dict[int, Dict[int, Dict[int, Path]]]]: 99 | macro_paths = dict() 100 | for cycle in stack_paths: 101 | macro_paths[cycle] = dict() 102 | for region in stack_paths[cycle]: 103 | macro_paths[cycle][region] = dict() 104 | for channel in stack_paths[cycle][region]: 105 | macro_paths[cycle][region][channel] = dict() 106 | for zplane, stack_path in stack_paths[cycle][region][channel].items(): 107 | macro_path = macro_out_dir / (stack_path.name + ".ijm") 108 | macro = fill_in_basic_macro_template(stack_path, illum_cor_dir) 109 | save_macro(macro_path, macro) 110 | macro_paths[cycle][region][channel][zplane] = macro_path 111 | return macro_paths 112 | 113 | 114 | def read_flatfield_imgs( 115 | illum_cor_dir: Path, stack_paths: Dict[int, Dict[int, Dict[int, Dict[int, Path]]]] 116 | ) -> Dict[int, Dict[int, Dict[int, Dict[int, ImgStack]]]]: 117 | per_zplane_flatfield = dict() 118 | stack_name_template = "Cyc{cyc:03d}_Reg{reg:03d}_Ch{ch:03d}_Z{z:03d}.tif" 119 | for cycle in stack_paths: 120 | per_zplane_flatfield[cycle] = dict() 121 | for region in stack_paths[cycle]: 122 | per_zplane_flatfield[cycle][region] = dict() 123 | for channel in stack_paths[cycle][region]: 124 | per_zplane_flatfield[cycle][region][channel] = dict() 125 | for zplane, stack_path in stack_paths[cycle][region][channel].items(): 126 | stack_name = stack_name_template.format( 127 | cyc=cycle, reg=region, ch=channel, z=zplane 128 | ) 129 | flatfield_filename = "flatfield_" + stack_name 130 | flatfield_path = illum_cor_dir / "flatfield" / flatfield_filename 131 | flatfield = tif.imread(str(flatfield_path.absolute())) # float32 0-1 132 | per_zplane_flatfield[cycle][region][channel][zplane] = flatfield 133 | return per_zplane_flatfield 134 | 135 | 136 | def read_darkfield_imgs( 137 | illum_cor_dir: Path, stack_paths: Dict[int, Dict[int, Dict[int, Dict[int, Path]]]] 138 | ) -> Dict[int, Dict[int, Dict[int, Dict[int, ImgStack]]]]: 139 | per_zplane_darkfield = dict() 140 | stack_name_template = "Cyc{cyc:03d}_Reg{reg:03d}_Ch{ch:03d}_Z{z:03d}.tif" 141 | for cycle in stack_paths: 142 | per_zplane_darkfield[cycle] = dict() 143 | for region in stack_paths[cycle]: 144 | per_zplane_darkfield[cycle][region] = dict() 145 | for channel in stack_paths[cycle][region]: 146 | per_zplane_darkfield[cycle][region][channel] = dict() 147 | for zplane, stack_path in stack_paths[cycle][region][channel].items(): 148 | stack_name = stack_name_template.format( 149 | cyc=cycle, reg=region, ch=channel, z=zplane 150 | ) 151 | darkfield_filename = "darkfield_" + stack_name 152 | darkfield_path = illum_cor_dir / "darkfield" / darkfield_filename 153 | darkfield = tif.imread(str(darkfield_path.absolute())) # float32 0-1 154 | per_zplane_darkfield[cycle][region][channel][zplane] = darkfield 155 | return per_zplane_darkfield 156 | 157 | 158 | def apply_illum_cor(img: Image, flatfield: Image) -> Image: 159 | orig_dtype = img.dtype 160 | dtype_info = np.iinfo(orig_dtype) 161 | orig_minmax = (dtype_info.min, dtype_info.max) 162 | imgf = img.astype(np.float32) 163 | 164 | corrected_imgf = imgf / flatfield 165 | 166 | corrected_img = np.clip(np.round(corrected_imgf, 0), *orig_minmax).astype(orig_dtype) 167 | return corrected_img 168 | 169 | 170 | def correct_and_save(img_path: Path, flatfield: Image, out_path: Path): 171 | corrected_img = apply_illum_cor(tif.imread(str(img_path.absolute())), flatfield) 172 | with tif.TiffWriter(str(out_path.absolute()), shaped=False) as TW: 173 | TW.save(corrected_img, photometric="minisblack") 174 | del corrected_img 175 | 176 | 177 | def apply_flatfield_and_save( 178 | listing: Dict[int, Dict[int, Dict[int, Dict[int, Dict[int, Path]]]]], 179 | flatfields: Dict[int, Dict[int, Dict[int, Dict[int, Image]]]], 180 | # darkfields: Dict[int, Dict[int, Dict[int, Dict[int, Image]]]], 181 | out_dir: Path, 182 | ): 183 | img_dir_template = "Cyc{cyc:03d}_reg{reg:03d}" 184 | img_name_template = "{reg:d}_{tile:05d}_Z{z:03d}_CH{ch:d}.tif" 185 | tasks = [] 186 | for cycle in listing: 187 | for region in listing[cycle]: 188 | for channel in listing[cycle][region]: 189 | for tile, zplane_dict in listing[cycle][region][channel].items(): 190 | for zplane, path in zplane_dict.items(): 191 | img_dir_name = img_dir_template.format(cyc=cycle, reg=region) 192 | img_name = img_name_template.format( 193 | reg=region, tile=tile, z=zplane, ch=channel 194 | ) 195 | out_dir_full = Path(out_dir / img_dir_name) 196 | make_dir_if_not_exists(out_dir_full) 197 | out_path = out_dir_full / img_name 198 | flatfield = flatfields[cycle][region][channel][zplane] 199 | # darkfield = darkfields[cycle][region][channel][zplane] 200 | tasks.append(dask.delayed(correct_and_save)(path, flatfield, out_path)) 201 | dask.compute(*tasks) 202 | 203 | 204 | def organize_listing_by_cyc_reg_ch_zplane( 205 | listing: Dict[int, Dict[int, Dict[int, Dict[int, Dict[int, Path]]]]], 206 | tile_ids_to_use: Iterable[int], 207 | ) -> Dict[int, Dict[int, Dict[int, Dict[int, List[Path]]]]]: 208 | new_arrangemnt = dict() 209 | for cycle in listing: 210 | new_arrangemnt[cycle] = dict() 211 | for region in listing[cycle]: 212 | new_arrangemnt[cycle][region] = dict() 213 | for channel in listing[cycle][region]: 214 | new_arrangemnt[cycle][region][channel] = dict() 215 | for tile, zplane_dict in listing[cycle][region][channel].items(): 216 | for zplane, path in zplane_dict.items(): 217 | if tile in tile_ids_to_use: 218 | if zplane in new_arrangemnt[cycle][region][channel]: 219 | new_arrangemnt[cycle][region][channel][zplane].append(path) 220 | else: 221 | new_arrangemnt[cycle][region][channel][zplane] = [path] 222 | return new_arrangemnt 223 | 224 | 225 | def run_basic(basic_macro_path: Path, log_dir: Path): 226 | # It is expected that ImageJ is added to system PATH 227 | if platform.system() == "Windows": 228 | imagej_name = "ImageJ-win64" 229 | elif platform.system() == "Linux": 230 | imagej_name = "ImageJ-linux64" 231 | elif platform.system() == "Darwin": 232 | imagej_name = "ImageJ-macosx" 233 | 234 | command = imagej_name + " --headless --console -macro " + str(basic_macro_path) 235 | print("Started running BaSiC for", str(basic_macro_path)) 236 | res = subprocess.run( 237 | command, shell=True, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE 238 | ) 239 | if res.returncode == 0: 240 | print("Finished", str(basic_macro_path)) 241 | else: 242 | raise Exception( 243 | "There was an error while running the BaSiC for " 244 | + str(basic_macro_path) 245 | + "\n" 246 | + res.stderr.decode("utf-8") 247 | ) 248 | macro_filename = basic_macro_path.name 249 | run_log = ( 250 | "Command:\n" 251 | + res.args 252 | + "\n\nSTDERR:\n" 253 | + res.stderr.decode("utf-8") 254 | + "\n\nSTDOUT:\n" 255 | + res.stdout.decode("utf-8") 256 | ) 257 | log_filename = macro_filename + ".log" 258 | log_path = log_dir / log_filename 259 | with open(log_path, "w", encoding="utf-8") as f: 260 | f.write(run_log) 261 | return 262 | 263 | 264 | def run_all_macros(macro_paths: Dict[int, Dict[int, Dict[int, Dict[int, Path]]]], log_dir: Path): 265 | tasks = [] 266 | for cycle in macro_paths: 267 | for region in macro_paths[cycle]: 268 | for channel in macro_paths[cycle][region]: 269 | for zplane, macro_path in macro_paths[cycle][region][channel].items(): 270 | tasks.append(dask.delayed(run_basic)(macro_path, log_dir)) 271 | dask.compute(*tasks) 272 | 273 | 274 | def check_illum_cor_images( 275 | illum_cor_dir: Path, 276 | log_dir: Path, 277 | zplane_listing: Dict[int, Dict[int, Dict[int, Dict[int, List[Path]]]]], 278 | ): 279 | cor_img_name_template = "{cor_type}_Cyc{cyc:03d}_Reg{reg:03d}_Ch{ch:03d}_Z{z:03d}.tif" 280 | log_name_template = "Cyc{cyc:03d}_Reg{reg:03d}_Ch{ch:03d}_Z{z:03d}.tif.ijm.log" 281 | imgs_present = [] 282 | imgs_missing = [] 283 | imgs_missing_logs = [] 284 | for cycle in zplane_listing: 285 | for region in zplane_listing[cycle]: 286 | for channel in zplane_listing[cycle][region]: 287 | for zplane, macro_path in zplane_listing[cycle][region][channel].items(): 288 | flatfield_fn = cor_img_name_template.format( 289 | cor_type="flatfield", cyc=cycle, reg=region, ch=channel, z=zplane 290 | ) 291 | darkfield_fn = cor_img_name_template.format( 292 | cor_type="darkfield", cyc=cycle, reg=region, ch=channel, z=zplane 293 | ) 294 | flatfield_path = illum_cor_dir / "flatfield" / flatfield_fn 295 | # darkfield_path = illum_cor_dir / "darkfield" / darkfield_fn 296 | if flatfield_path.exists(): 297 | imgs_present.append((flatfield_fn)) 298 | else: 299 | imgs_missing.append((flatfield_fn)) 300 | log_path = log_dir / log_name_template.format( 301 | cyc=cycle, reg=region, ch=channel, z=zplane 302 | ) 303 | with open(log_path, "r", encoding="utf-8") as f: 304 | log_content = f.read() 305 | imgs_missing_logs.append(log_content) 306 | if len(imgs_missing) > 0: 307 | msg = ( 308 | "Probably there was an error while running BaSiC. " 309 | + "There is no image in one or more directories." 310 | ) 311 | print(msg) 312 | 313 | for i in range(0, len(imgs_missing)): 314 | print("\nOne or both are missing:") 315 | print(imgs_missing[i]) 316 | print("ImageJ log:") 317 | print(imgs_missing_logs[i]) 318 | raise ValueError(msg) 319 | return 320 | 321 | 322 | def select_which_tiles_to_use( 323 | n_tiles_y: int, n_tiles_x: int, tile_dtype: str, tile_size: Tuple[int, int] 324 | ) -> Set[int]: 325 | """Select every n-th tile, keeping the max size of the tile stack at 2GB""" 326 | n_tiles = n_tiles_y * n_tiles_x 327 | 328 | img_dtype = int(re.search(r"(\d+)", tile_dtype).groups()[0]) # int16 -> 16 329 | nbytes = img_dtype / 8 330 | 331 | # max 2GB 332 | single_tile_gb = tile_size[0] * tile_size[1] * nbytes / 1024**3 333 | max_num_tiles = round(2.0 // single_tile_gb) 334 | 335 | step = max(n_tiles // max_num_tiles, 1) 336 | if step < 2 and n_tiles > max_num_tiles: 337 | step = 2 338 | tile_ids = set(list(range(0, n_tiles, step))) 339 | return tile_ids 340 | 341 | 342 | def main(data_dir: Path, pipeline_config_path: Path): 343 | img_stack_dir = Path("/output/image_stacks/") 344 | macro_dir = Path("/output/basic_macros") 345 | illum_cor_dir = Path("/output/illumination_correction/") 346 | corrected_img_dir = Path("/output/corrected_images") 347 | log_dir = Path("/output/logs") 348 | 349 | make_dir_if_not_exists(img_stack_dir) 350 | make_dir_if_not_exists(macro_dir) 351 | make_dir_if_not_exists(illum_cor_dir) 352 | make_dir_if_not_exists(corrected_img_dir) 353 | make_dir_if_not_exists(log_dir) 354 | 355 | dataset_info = load_dataset_info(pipeline_config_path) 356 | 357 | tile_dtype = dataset_info["tile_dtype"] 358 | 359 | num_workers = dataset_info["num_concurrent_tasks"] 360 | dask.config.set({"num_workers": num_workers, "scheduler": "processes"}) 361 | 362 | raw_data_dir = dataset_info["dataset_dir"] 363 | img_dirs = get_input_img_dirs(Path(data_dir / raw_data_dir)) 364 | print("Getting image listing") 365 | listing = create_listing_for_each_cycle_region(img_dirs) 366 | 367 | tile_size = ( 368 | dataset_info["tile_height"] + dataset_info["overlap_y"], 369 | dataset_info["tile_width"] + dataset_info["overlap_x"], 370 | ) 371 | n_tiles = dataset_info["num_tiles"] 372 | n_tiles_y = dataset_info["num_tiles_y"] 373 | n_tiles_x = dataset_info["num_tiles_x"] 374 | 375 | tile_ids_to_use = select_which_tiles_to_use(n_tiles_y, n_tiles_x, tile_dtype, tile_size) 376 | 377 | print( 378 | f"tile size: {str(tile_size)}", 379 | f"| number of tiles: {str(n_tiles)}", 380 | f"| using {str(len(tile_ids_to_use))} tiles to compute illumination correction", 381 | ) 382 | zplane_listing = organize_listing_by_cyc_reg_ch_zplane(listing, tile_ids_to_use) 383 | 384 | print("Resaving images as stacks") 385 | stack_paths = resave_imgs_to_stacks(zplane_listing, img_stack_dir) 386 | print("Generating BaSiC macros") 387 | macro_paths = generate_basic_macro_for_each_stack(stack_paths, macro_dir, illum_cor_dir) 388 | print("Running estimation of illumination") 389 | run_all_macros(macro_paths, log_dir) 390 | check_illum_cor_images(illum_cor_dir, log_dir, zplane_listing) 391 | 392 | print("Applying illumination correction") 393 | flatfields = read_flatfield_imgs(illum_cor_dir, stack_paths) 394 | # darkfields = read_darkfield_imgs(illum_cor_dir, stack_paths) 395 | apply_flatfield_and_save(listing, flatfields, corrected_img_dir) 396 | 397 | 398 | if __name__ == "__main__": 399 | parser = argparse.ArgumentParser() 400 | parser.add_argument("--data_dir", type=Path, help="path to directory with dataset directory") 401 | parser.add_argument( 402 | "--pipeline_config_path", type=Path, help="path to pipelineConfig.json file" 403 | ) 404 | args = parser.parse_args() 405 | main(args.data_dir, args.pipeline_config_path) 406 | -------------------------------------------------------------------------------- /bin/pipeline_utils/dataset_listing.py: -------------------------------------------------------------------------------- 1 | import re 2 | from os import walk 3 | from pathlib import Path 4 | from typing import Dict, List, Tuple, Union 5 | 6 | import tifffile as tif 7 | 8 | 9 | def path_to_str(path: Path): 10 | return str(path.absolute().as_posix()) 11 | 12 | 13 | def sort_dict(item: dict): 14 | return {k: sort_dict(v) if isinstance(v, dict) else v for k, v in sorted(item.items())} 15 | 16 | 17 | def alpha_num_order(string: str) -> str: 18 | """Returns all numbers on 5 digits to let sort the string with numeric order. 19 | Ex: alphaNumOrder("a6b12.125") ==> "a00006b00012.00125" 20 | """ 21 | return "".join( 22 | [format(int(x), "05d") if x.isdigit() else x for x in re.split(r"(\d+)", string)] 23 | ) 24 | 25 | 26 | def get_img_listing(in_dir: Path) -> List[Path]: 27 | allowed_extensions = (".tif", ".tiff") 28 | listing = list(in_dir.iterdir()) 29 | img_listing = [f for f in listing if f.suffix in allowed_extensions] 30 | img_listing = sorted(img_listing, key=lambda x: alpha_num_order(x.name)) 31 | return img_listing 32 | 33 | 34 | def extract_digits_from_string(string: str) -> List[int]: 35 | digits = [ 36 | int(x) for x in re.split(r"(\d+)", string) if x.isdigit() 37 | ] # '1_00001_Z02_CH3' -> '1', '00001', '02', '3' -> [1,1,2,3] 38 | return digits 39 | 40 | 41 | def arrange_listing_by_channel_tile_zplane( 42 | listing: List[Path], 43 | ) -> Dict[int, Dict[int, Dict[int, Path]]]: 44 | tile_arrangement = dict() 45 | for file_path in listing: 46 | digits = extract_digits_from_string(file_path.name) 47 | tile = digits[1] 48 | zplane = digits[2] 49 | if len(digits) < 4: 50 | # Overlay image 51 | continue 52 | channel = digits[3] 53 | if channel in tile_arrangement: 54 | if tile in tile_arrangement[channel]: 55 | tile_arrangement[channel][tile].update({zplane: file_path}) 56 | else: 57 | tile_arrangement[channel][tile] = {zplane: file_path} 58 | else: 59 | tile_arrangement[channel] = {tile: {zplane: file_path}} 60 | return tile_arrangement 61 | 62 | 63 | def get_image_paths_arranged_in_dict(img_dir: Path) -> Dict[int, Dict[int, Dict[int, Path]]]: 64 | img_listing = get_img_listing(img_dir) 65 | arranged_listing = arrange_listing_by_channel_tile_zplane(img_listing) 66 | return arranged_listing 67 | 68 | 69 | def extract_cycle_and_region_from_name( 70 | dir_name: str, cycle_prefix: str, region_prefix: str 71 | ) -> Tuple[Union[None, int], Union[None, int]]: 72 | matched_region = re.search(region_prefix, dir_name, re.IGNORECASE) is not None 73 | matched_cycle = re.search(cycle_prefix, dir_name, re.IGNORECASE) is not None 74 | if matched_region: 75 | region_pattern = region_prefix + r"(\d+)" 76 | region = int(re.search(region_pattern, dir_name, re.IGNORECASE).groups()[0]) 77 | else: 78 | return None, None 79 | if matched_cycle: 80 | cycle_pattern = cycle_prefix + r"(\d+)" 81 | cycle = int(re.search(cycle_pattern, dir_name, re.IGNORECASE).groups()[0]) 82 | else: 83 | return None, None 84 | return cycle, region 85 | 86 | 87 | def arrange_dirs_by_cycle_region( 88 | img_dirs: List[Path], cycle_prefix: str, region_prefix: str 89 | ) -> Dict[int, Dict[int, Path]]: 90 | cycle_region_dict = dict() 91 | for dir_path in img_dirs: 92 | dir_name = dir_path.name 93 | cycle, region = extract_cycle_and_region_from_name( 94 | str(dir_name), cycle_prefix, region_prefix 95 | ) 96 | if cycle is not None: 97 | if cycle in cycle_region_dict: 98 | cycle_region_dict[cycle][region] = dir_path 99 | else: 100 | cycle_region_dict[cycle] = {region: dir_path} 101 | if cycle_region_dict != {}: 102 | return cycle_region_dict 103 | else: 104 | raise ValueError("Could not find cycle and region directories") 105 | 106 | 107 | def create_listing_for_each_cycle_region( 108 | img_dirs: List[Path], 109 | ) -> Dict[int, Dict[int, Dict[int, Dict[int, Dict[int, Path]]]]]: 110 | """Returns {cycle: {region: {channel: {tile: {zplane: path}}}}}""" 111 | listing_per_cycle = dict() 112 | # Expected dir names Cyc1_reg1 or Cyc01_reg01 113 | cycle_prefix = "cyc" 114 | region_prefix = "reg" 115 | cycle_region_dict = arrange_dirs_by_cycle_region(img_dirs, cycle_prefix, region_prefix) 116 | for cycle, regions in cycle_region_dict.items(): 117 | listing_per_cycle[cycle] = dict() 118 | for region, dir_path in regions.items(): 119 | arranged_listing = get_image_paths_arranged_in_dict(dir_path) 120 | listing_per_cycle[cycle][region] = arranged_listing 121 | sorted_listing = sort_dict(listing_per_cycle) 122 | return sorted_listing 123 | 124 | 125 | def get_img_dirs(dataset_dir: Path) -> List[Path]: 126 | img_dir_names = next(walk(dataset_dir))[1] 127 | img_dir_paths = [dataset_dir.joinpath(dir_name) for dir_name in img_dir_names] 128 | return img_dir_paths 129 | 130 | 131 | def get_tile_shape(dataset_dir: Path): 132 | img_dirs = get_img_dirs(dataset_dir) 133 | dataset_listing = create_listing_for_each_cycle_region(img_dirs) 134 | for cycle in dataset_listing: 135 | for region in dataset_listing[cycle]: 136 | for channel in dataset_listing[cycle][region]: 137 | for tile, zplanes in dataset_listing[cycle][region][channel].items(): 138 | first_plane = list(zplanes.values())[0] 139 | plane = tif.imread(path_to_str(first_plane)) 140 | return plane.shape 141 | 142 | 143 | def get_tile_dtype(dataset_dir: Path): 144 | img_dirs = get_img_dirs(dataset_dir) 145 | dataset_listing = create_listing_for_each_cycle_region(img_dirs) 146 | for cycle in dataset_listing: 147 | for region in dataset_listing[cycle]: 148 | for channel in dataset_listing[cycle][region]: 149 | for tile, zplanes in dataset_listing[cycle][region][channel].items(): 150 | first_plane = list(zplanes.values())[0] 151 | plane = tif.imread(path_to_str(first_plane)) 152 | return plane.dtype 153 | -------------------------------------------------------------------------------- /bin/pipeline_utils/pipeline_config_reader.py: -------------------------------------------------------------------------------- 1 | import json 2 | from pathlib import Path 3 | from typing import Any, Dict, List, Optional, Tuple 4 | 5 | 6 | def load_pipeline_config(pipeline_config_path: Path) -> dict: 7 | with open(pipeline_config_path, "r") as s: 8 | config = json.load(s) 9 | return config 10 | 11 | 12 | def _convert_tiling_mode(tiling_mode: str): 13 | if "snake" in tiling_mode.lower(): 14 | new_tiling_mode = "snake" 15 | elif "grid" in tiling_mode.lower(): 16 | new_tiling_mode = "grid" 17 | else: 18 | raise ValueError("Unknown tiling mode: " + tiling_mode) 19 | return new_tiling_mode 20 | 21 | 22 | def _get_dataset_info_from_config(pipeline_config: dict) -> Dict[str, Any]: 23 | required_fields: List[Tuple[str, Optional[str]]] = [ 24 | ("num_cycles", None), 25 | ("num_tiles_x", "region_width"), 26 | ("num_tiles_y", "region_height"), 27 | ("tile_width", None), 28 | ("tile_height", None), 29 | ("tile_dtype", None), 30 | ("overlap_x", "tile_overlap_x"), 31 | ("overlap_y", "tile_overlap_y"), 32 | ("pixel_distance_x", "lateral_resolution"), 33 | ("pixel_distance_y", "lateral_resolution"), 34 | ("pixel_distance_z", "axial_resolution"), 35 | ("nuclei_channel", None), 36 | ("membrane_channel", None), 37 | ("nuclei_channel_loc", None), 38 | ("membrane_channel_loc", None), 39 | ("num_z_planes", None), 40 | ("channel_names", None), 41 | ("channel_names_qc_pass", None), 42 | ("num_concurrent_tasks", None), 43 | ("lateral_resolution", None), 44 | ] 45 | optional_fields: List[Tuple[str, Optional[str]]] = [ 46 | ("membrane_channel", None), 47 | ] 48 | pipeline_config_dict = dict( 49 | dataset_dir=Path(pipeline_config["raw_data_location"]), 50 | num_channels=len(pipeline_config["channel_names"]) // pipeline_config["num_cycles"], 51 | num_tiles=pipeline_config["region_width"] * pipeline_config["region_height"], 52 | # does not matter because we have only one z-plane: 53 | overlap_z=1, 54 | # id of nuclei channel: 55 | reference_channel=pipeline_config["channel_names"].index(pipeline_config["nuclei_channel"]) 56 | + 1, 57 | reference_cycle=pipeline_config["channel_names"].index(pipeline_config["nuclei_channel"]) 58 | // (len(pipeline_config["channel_names"]) // pipeline_config["num_cycles"]) 59 | + 1, 60 | tiling_mode=_convert_tiling_mode(pipeline_config["tiling_mode"]), 61 | ) 62 | for field, source in required_fields: 63 | if source is None: 64 | source = field 65 | pipeline_config_dict[field] = pipeline_config[source] 66 | for field, source in optional_fields: 67 | if source is None: 68 | source = field 69 | if source in pipeline_config: 70 | pipeline_config_dict[field] = pipeline_config[source] 71 | return pipeline_config_dict 72 | 73 | 74 | def load_dataset_info(pipeline_config_path: Path): 75 | config = load_pipeline_config(pipeline_config_path) 76 | dataset_info = _get_dataset_info_from_config(config) 77 | return dataset_info 78 | -------------------------------------------------------------------------------- /bin/slicing/modify_pipeline_config.py: -------------------------------------------------------------------------------- 1 | import json 2 | from pathlib import Path 3 | from typing import Tuple 4 | 5 | 6 | def generate_slicer_info( 7 | tile_shape_no_overlap: Tuple[int, int], overlap: int, stitched_img_shape: Tuple[int, int] 8 | ) -> dict: 9 | slicer_info = dict() 10 | slicer_info["slicer"] = dict() 11 | 12 | img_height, img_width = stitched_img_shape 13 | tile_height, tile_width = tile_shape_no_overlap 14 | 15 | padding = dict(left=0, right=0, top=0, bottom=0) 16 | if img_width % tile_width == 0: 17 | padding["right"] = 0 18 | else: 19 | padding["right"] = tile_width - (img_width % tile_width) 20 | if img_height % tile_height == 0: 21 | padding["bottom"] = 0 22 | else: 23 | padding["bottom"] = tile_height - (img_height % tile_height) 24 | 25 | x_ntiles = ( 26 | img_width // tile_width if img_width % tile_width == 0 else (img_width // tile_width) + 1 27 | ) 28 | y_ntiles = ( 29 | img_height // tile_height 30 | if img_height % tile_height == 0 31 | else (img_height // tile_height) + 1 32 | ) 33 | 34 | slicer_info["slicer"]["padding"] = padding 35 | slicer_info["slicer"]["overlap"] = overlap 36 | slicer_info["slicer"]["num_tiles"] = {"x": x_ntiles, "y": y_ntiles} 37 | slicer_info["slicer"]["tile_shape_no_overlap"] = {"x": tile_width, "y": tile_height} 38 | slicer_info["slicer"]["tile_shape_with_overlap"] = { 39 | "x": tile_width + overlap * 2, 40 | "y": tile_height + overlap * 2, 41 | } 42 | return slicer_info 43 | 44 | 45 | def replace_values_in_config(exp, slicer_info): 46 | original_measurements = { 47 | "original_measurements": { 48 | "tiling_mode": exp["tiling_mode"], 49 | "region_width": exp["region_width"], 50 | "region_height": exp["region_height"], 51 | "num_z_planes": exp["num_z_planes"], 52 | "tile_width": exp["tile_width"], 53 | "tile_height": exp["tile_height"], 54 | "tile_overlap_x": exp["tile_overlap_x"], 55 | "tile_overlap_y": exp["tile_overlap_y"], 56 | "target_shape": exp["target_shape"], 57 | } 58 | } 59 | values_to_replace = { 60 | "tiling_mode": "grid", 61 | "region_width": slicer_info["slicer"]["num_tiles"]["x"], 62 | "region_height": slicer_info["slicer"]["num_tiles"]["y"], 63 | "num_z_planes": 1, 64 | "tile_width": slicer_info["slicer"]["tile_shape_no_overlap"]["x"], 65 | "tile_height": slicer_info["slicer"]["tile_shape_no_overlap"]["y"], 66 | "tile_overlap_x": slicer_info["slicer"]["overlap"] * 2, 67 | "tile_overlap_y": slicer_info["slicer"]["overlap"] * 2, 68 | "target_shape": [ 69 | slicer_info["slicer"]["tile_shape_no_overlap"]["x"], 70 | slicer_info["slicer"]["tile_shape_no_overlap"]["y"], 71 | ], 72 | } 73 | 74 | exp.update(values_to_replace) 75 | exp.update(original_measurements) 76 | return exp 77 | 78 | 79 | def modify_pipeline_config( 80 | path_to_config: Path, 81 | tile_shape_no_overlap: Tuple[int, int], 82 | overlap: int, 83 | stitched_img_shape: Tuple[int, int], 84 | ): 85 | with open(path_to_config, "r") as s: 86 | config = json.load(s) 87 | 88 | slicer_info = generate_slicer_info(tile_shape_no_overlap, overlap, stitched_img_shape) 89 | config = replace_values_in_config(config, slicer_info) 90 | config.update(slicer_info) 91 | 92 | return config 93 | 94 | 95 | def save_modified_pipeline_config(pipeline_config: dict, out_dir: Path): 96 | out_file_path = out_dir.joinpath("pipelineConfig.json") 97 | with open(out_file_path, "w") as s: 98 | json.dump(pipeline_config, s, indent=4) 99 | -------------------------------------------------------------------------------- /bin/slicing/run_slicing.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import re 3 | from pathlib import Path 4 | from typing import Dict, Tuple 5 | 6 | import tifffile as tif 7 | from modify_pipeline_config import modify_pipeline_config, save_modified_pipeline_config 8 | from slicer import slice_img 9 | 10 | 11 | def path_to_str(path: Path): 12 | return str(path.absolute().as_posix()) 13 | 14 | 15 | def path_to_dict(path: Path): 16 | """ 17 | Extract region, x position, y position and put into the dictionary 18 | {R:region, X: position, Y: position, path: path} 19 | """ 20 | value_list = re.split(r"(\d+)(?:_?)", path.name)[:-1] 21 | d = dict(zip(*[iter(value_list)] * 2)) 22 | d = {k: int(v) for k, v in d.items()} 23 | d.update({"path": path}) 24 | return d 25 | 26 | 27 | def make_dir_if_not_exists(dir_path: Path): 28 | if not dir_path.exists(): 29 | dir_path.mkdir(parents=True) 30 | 31 | 32 | def get_image_path_in_dir(dir_path: Path) -> Path: 33 | allowed_extensions = (".tif", ".tiff") 34 | listing = list(dir_path.iterdir()) 35 | img_listing = [f for f in listing if f.suffix in allowed_extensions] 36 | return img_listing[0] 37 | 38 | 39 | def get_stitched_image_shape( 40 | stitched_dirs: Dict[int, Dict[int, Dict[int, Path]]], 41 | ) -> Tuple[int, int]: 42 | for cycle in stitched_dirs: 43 | for region in stitched_dirs[cycle]: 44 | for channel, dir_path in stitched_dirs[cycle][region].items(): 45 | stitched_img_path = get_image_path_in_dir(dir_path) 46 | break 47 | with tif.TiffFile(stitched_img_path) as TF: 48 | stitched_image_shape = TF.series[0].shape 49 | return stitched_image_shape 50 | 51 | 52 | def create_output_dirs_for_tiles( 53 | stitched_channel_dirs: Dict[int, Dict[int, Dict[int, Path]]], out_dir: Path 54 | ) -> Dict[int, Dict[int, Path]]: 55 | dir_naming_template = "Cyc{cycle:d}_reg{region:d}" 56 | out_dirs_for_tiles = dict() 57 | for cycle in stitched_channel_dirs: 58 | out_dirs_for_tiles[cycle] = {} 59 | for region in stitched_channel_dirs[cycle]: 60 | out_dir_name = dir_naming_template.format(cycle=cycle, region=region) 61 | out_dir_path = out_dir / out_dir_name 62 | make_dir_if_not_exists(out_dir_path) 63 | out_dirs_for_tiles[cycle][region] = out_dir_path 64 | return out_dirs_for_tiles 65 | 66 | 67 | def split_channels_into_tiles( 68 | stitched_dirs: Dict[int, Dict[int, Dict[int, Path]]], 69 | out_dirs_for_tiles: Dict[int, Dict[int, Path]], 70 | tile_size=1000, 71 | overlap=50, 72 | ): 73 | for cycle in stitched_dirs: 74 | for region in stitched_dirs[cycle]: 75 | for channel, dir_path in stitched_dirs[cycle][region].items(): 76 | stitched_image_path = get_image_path_in_dir(dir_path) 77 | print(stitched_image_path.name) 78 | out_dir = out_dirs_for_tiles[cycle][region] 79 | slice_img( 80 | path_to_str(stitched_image_path), 81 | path_to_str(out_dir), 82 | tile_size=tile_size, 83 | overlap=overlap, 84 | region=region, 85 | zplane=1, 86 | channel=channel, 87 | ) 88 | 89 | 90 | def organize_dirs(base_stitched_dir: Path) -> Dict[int, Dict[int, Dict[int, Path]]]: 91 | stitched_channel_dirs = list(base_stitched_dir.iterdir()) 92 | # expected dir naming Cyc{cyc:03d}_Reg{reg:03d}_Ch{ch:03d} 93 | stitched_dirs = dict() 94 | for dir_path in stitched_channel_dirs: 95 | name_info = path_to_dict(dir_path) 96 | cycle = name_info["Cyc"] 97 | region = name_info["Reg"] 98 | channel = name_info["Ch"] 99 | 100 | if cycle in stitched_dirs: 101 | if region in stitched_dirs[cycle]: 102 | stitched_dirs[cycle][region][channel] = dir_path 103 | else: 104 | stitched_dirs[cycle][region] = {channel: dir_path} 105 | else: 106 | stitched_dirs[cycle] = {region: {channel: dir_path}} 107 | return stitched_dirs 108 | 109 | 110 | def main(base_stitched_dir: Path, pipeline_config_path: Path): 111 | out_dir = Path("/output/new_tiles") 112 | pipeline_conf_dir = Path("/output/pipeline_conf/") 113 | make_dir_if_not_exists(out_dir) 114 | make_dir_if_not_exists(pipeline_conf_dir) 115 | 116 | stitched_channel_dirs = organize_dirs(base_stitched_dir) 117 | out_dirs_for_tiles = create_output_dirs_for_tiles(stitched_channel_dirs, out_dir) 118 | 119 | stitched_img_shape = get_stitched_image_shape(stitched_channel_dirs) 120 | 121 | tile_size = 1000 122 | overlap = 100 123 | print("Splitting images into tiles") 124 | print("Tile size:", tile_size, "| overlap:", overlap) 125 | split_channels_into_tiles(stitched_channel_dirs, out_dirs_for_tiles, tile_size, overlap) 126 | 127 | modified_experiment = modify_pipeline_config( 128 | pipeline_config_path, (tile_size, tile_size), overlap, stitched_img_shape 129 | ) 130 | save_modified_pipeline_config(modified_experiment, pipeline_conf_dir) 131 | 132 | 133 | if __name__ == "__main__": 134 | parser = argparse.ArgumentParser() 135 | parser.add_argument( 136 | "--base_stitched_dir", 137 | type=Path, 138 | help="path to directory with directories per channel that contain stitched images", 139 | ) 140 | parser.add_argument( 141 | "--pipeline_config_path", type=Path, help="path to pipelineConfig.json file" 142 | ) 143 | 144 | args = parser.parse_args() 145 | 146 | main(args.base_stitched_dir, args.pipeline_config_path) 147 | -------------------------------------------------------------------------------- /bin/slicing/slicer.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | 3 | import dask 4 | import numpy as np 5 | import tifffile as tif 6 | 7 | 8 | def get_tile(arr, hor_f: int, hor_t: int, ver_f: int, ver_t: int, overlap=0): 9 | hor_f -= overlap 10 | hor_t += overlap 11 | ver_f -= overlap 12 | ver_t += overlap 13 | 14 | left_check = hor_f 15 | top_check = ver_f 16 | right_check = hor_t - arr.shape[1] 17 | bot_check = ver_t - arr.shape[0] 18 | 19 | left_pad_size = 0 20 | top_pad_size = 0 21 | right_pad_size = 0 22 | bot_pad_size = 0 23 | 24 | if left_check < 0: 25 | left_pad_size = abs(left_check) 26 | hor_f = 0 27 | if top_check < 0: 28 | top_pad_size = abs(top_check) 29 | ver_f = 0 30 | if right_check > 0: 31 | right_pad_size = right_check 32 | hor_t = arr.shape[1] 33 | if bot_check > 0: 34 | bot_pad_size = bot_check 35 | ver_t = arr.shape[0] 36 | 37 | tile_slice = (slice(ver_f, ver_t), slice(hor_f, hor_t)) 38 | tile = arr[tile_slice] 39 | padding = ((top_pad_size, bot_pad_size), (left_pad_size, right_pad_size)) 40 | if max(padding) > (0, 0): 41 | tile = np.pad(tile, padding, mode="constant") 42 | return tile 43 | 44 | 45 | def split_by_size( 46 | arr: np.ndarray, region: int, zplane: int, channel: int, tile_w: int, tile_h: int, overlap: int 47 | ): 48 | """Splits image into tiles by size of tile. 49 | tile_w - tile width 50 | tile_h - tile height 51 | """ 52 | x_axis = -1 53 | y_axis = -2 54 | arr_width, arr_height = arr.shape[x_axis], arr.shape[y_axis] 55 | 56 | x_ntiles = arr_width // tile_w if arr_width % tile_w == 0 else (arr_width // tile_w) + 1 57 | y_ntiles = arr_height // tile_h if arr_height % tile_h == 0 else (arr_height // tile_h) + 1 58 | 59 | tiles = [] 60 | img_names = [] 61 | 62 | # row 63 | for i in range(0, y_ntiles): 64 | # height of this tile 65 | ver_f = tile_h * i 66 | ver_t = ver_f + tile_h 67 | 68 | # col 69 | for j in range(0, x_ntiles): 70 | # width of this tile 71 | hor_f = tile_w * j 72 | hor_t = hor_f + tile_w 73 | 74 | tile = get_tile(arr, hor_f, hor_t, ver_f, ver_t, overlap) 75 | 76 | tiles.append(tile) 77 | name = "{region:d}_{tile:05d}_Z{zplane:03d}_CH{channel:d}.tif".format( 78 | region=region, tile=(i * x_ntiles) + (j + 1), zplane=zplane, channel=channel 79 | ) 80 | img_names.append(name) 81 | 82 | return tiles, img_names 83 | 84 | 85 | def slice_img( 86 | in_path: str, 87 | out_dir: str, 88 | tile_size: int, 89 | overlap: int, 90 | region: int, 91 | channel: int, 92 | zplane: int, 93 | ): 94 | this_plane_tiles, this_plane_img_names = split_by_size( 95 | tif.imread(in_path), 96 | region=region, 97 | zplane=zplane, 98 | channel=channel, 99 | tile_w=tile_size, 100 | tile_h=tile_size, 101 | overlap=overlap, 102 | ) 103 | 104 | task = [] 105 | for i, img in enumerate(this_plane_tiles): 106 | task.append( 107 | dask.delayed(tif.imwrite)( 108 | osp.join(out_dir, this_plane_img_names[i]), 109 | img, 110 | photometric="minisblack", 111 | shaped=False, 112 | ) 113 | ) 114 | 115 | dask.compute(*task, scheduler="threads") 116 | -------------------------------------------------------------------------------- /bin/utils.py: -------------------------------------------------------------------------------- 1 | import re 2 | from collections import defaultdict 3 | from os import walk 4 | from pathlib import Path 5 | from pprint import pformat 6 | from typing import Dict, List 7 | 8 | import yaml 9 | 10 | 11 | def list_directory_tree(directory: Path) -> str: 12 | return pformat(sorted(directory.glob("**/*"))) + "\n" 13 | 14 | 15 | def print_directory_tree(directory: Path): 16 | print(list_directory_tree(directory)) 17 | 18 | 19 | def infer_tile_names(cytokit_config_filename: Path) -> List[str]: 20 | with open(cytokit_config_filename) as cytokit_config_file: 21 | cytokit_config = yaml.safe_load(cytokit_config_file) 22 | 23 | tile_names = [] 24 | 25 | region_height, region_width = ( 26 | cytokit_config["acquisition"]["region_height"], 27 | cytokit_config["acquisition"]["region_width"], 28 | ) 29 | region_names = cytokit_config["acquisition"]["region_names"] 30 | 31 | for r in range(1, len(region_names) + 1): 32 | # Width is X values, height is Y values. 33 | for x in range(1, region_width + 1): 34 | for y in range(1, region_height + 1): 35 | tile_names.append(f"R{r:03}_X{x:03}_Y{y:03}") 36 | 37 | return tile_names 38 | 39 | 40 | def collect_files_by_tile( 41 | tile_names: List[str], 42 | directory: Path, 43 | *, 44 | allow_empty_tiles: bool = False, 45 | ) -> Dict[str, List[Path]]: 46 | files_by_tile: Dict[str, List[Path]] = defaultdict(list) 47 | 48 | for tile in tile_names: 49 | tile_name_pattern = re.compile(tile) 50 | 51 | for dirpath_str, dirnames, filenames in walk(directory): 52 | dirpath = Path(dirpath_str) 53 | for filename in filenames: 54 | if tile_name_pattern.match(filename): 55 | files_by_tile[tile].append(dirpath / filename) 56 | 57 | # If a tile doesn't have any files, throw an error unless explicitly allowed. 58 | if not allow_empty_tiles: 59 | for tile in tile_names: 60 | if len(files_by_tile[tile]) == 0: 61 | raise ValueError(f"No files were found for tile {tile}") 62 | 63 | return files_by_tile 64 | -------------------------------------------------------------------------------- /cytokit-docker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM eczech/cytokit:latest 2 | 3 | WORKDIR /lab/data/.cytokit 4 | RUN curl https://s3.amazonaws.com/hubmap-pipeline-assets/cytokit-cache.tar | tar -xf - 5 | 6 | WORKDIR /opt 7 | 8 | # Update tensorflow-gpu to version 1.14 9 | RUN pip install --upgrade tensorflow-gpu==1.14.0 10 | 11 | COPY setup_data_directory.py /opt 12 | COPY cytokit_wrapper.py /opt 13 | -------------------------------------------------------------------------------- /cytokit-docker/cytokit_wrapper.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3.5 2 | # Note: this version ^^^ is what's available in the Cytokit image 3 | # and our extension. No f-strings or PEP 519. 4 | 5 | from argparse import ArgumentParser 6 | from os import environ 7 | from os.path import split as osps 8 | from pathlib import Path 9 | from subprocess import check_call 10 | 11 | import yaml 12 | 13 | # TODO ↓↓↓ unify this script with setting up the data directory 14 | # instead of calling this script as a separate executable 15 | SETUP_DATA_DIR_COMMAND = [ 16 | "/opt/setup_data_directory.py", 17 | "{data_dir}", 18 | ] 19 | CYTOKIT_COMMAND = [ 20 | "cytokit", 21 | "{command}", 22 | "run_all", 23 | "--config-path={yaml_config}", 24 | "--data-dir={data_dir}", 25 | "--output-dir=output", 26 | ] 27 | 28 | CYTOKIT_PROCESSOR_OUTPUT_DIRS = frozenset({"cytometry", "processor"}) 29 | 30 | 31 | def symlink_images(data_dir: Path): 32 | # TODO: unify, don't call another command-line script 33 | command = [piece.format(data_dir=data_dir) for piece in SETUP_DATA_DIR_COMMAND] 34 | print("Running:", " ".join(command)) 35 | check_call(command) 36 | 37 | 38 | def find_cytokit_processor_output_r(directory: Path): 39 | """ 40 | BIG HACK for step-by-step CWL usage -- walk parent directories until 41 | we find one containing 'cytometry' and 'processor' 42 | """ 43 | child_names = {c.name for c in directory.iterdir()} 44 | if CYTOKIT_PROCESSOR_OUTPUT_DIRS <= child_names: 45 | return directory 46 | else: 47 | abs_dir = directory.absolute() 48 | parent = abs_dir.parent 49 | if parent == abs_dir: 50 | # At the root. No data found. 51 | return 52 | else: 53 | return find_cytokit_processor_output_r(parent) 54 | 55 | 56 | def find_cytokit_processor_output(directory: Path) -> Path: 57 | data_dir = find_cytokit_processor_output_r(directory) 58 | if data_dir is None: 59 | message = "No `cytokit processor` output found in {} or any parent directories" 60 | raise ValueError(message.format(directory)) 61 | else: 62 | return data_dir 63 | 64 | 65 | def find_or_prep_data_directory(cytokit_command: str, data_dir: Path, pipeline_config: Path): 66 | """ 67 | :return: 2-tuple: pathlib.Path to data directory, either original or 68 | newly-created with symlinks 69 | """ 70 | # Read directory name from pipeline config 71 | # Python 3.6 would be much nicer ,but the Cytokit image is built from 72 | # Ubuntu 16.04, which comes with 3.5 73 | with pipeline_config.open() as f: 74 | config = yaml.safe_load(f) 75 | dir_name = osps(config["raw_data_location"])[1] 76 | 77 | data_subdir = data_dir / dir_name 78 | 79 | if cytokit_command == "processor": 80 | symlink_images(data_subdir) 81 | return Path("symlinks") 82 | elif cytokit_command == "operator": 83 | # Need to find the output from 'cytokit processor' 84 | processor_dir = find_cytokit_processor_output(data_dir) 85 | output_path = Path("output") 86 | output_path.mkdir() 87 | for child in processor_dir.iterdir(): 88 | link = output_path / child.name 89 | print("Symlinking", child, "to", link) 90 | link.symlink_to(child) 91 | return output_path 92 | else: 93 | raise ValueError('Unsupported Cytokit command: "{}"'.format(cytokit_command)) 94 | 95 | 96 | def run_cytokit(cytokit_command: str, data_directory: Path, yaml_config: Path): 97 | command = [ 98 | piece.format( 99 | command=cytokit_command, 100 | data_dir=data_directory, 101 | yaml_config=yaml_config, 102 | ) 103 | for piece in CYTOKIT_COMMAND 104 | ] 105 | print("Running:", " ".join(command)) 106 | env = environ.copy() 107 | env["PYTHONPATH"] = "/lab/repos/cytokit/python/pipeline" 108 | check_call(command, env=env) 109 | 110 | print("Cytokit completed successfully") 111 | # I feel really bad about this, but not bad enough not to do it 112 | if cytokit_command == "operator": 113 | output_dir = Path("output") 114 | for dirname in CYTOKIT_PROCESSOR_OUTPUT_DIRS: 115 | dir_to_delete = output_dir / dirname 116 | print("Deleting", dir_to_delete) 117 | dir_to_delete.unlink() 118 | 119 | 120 | def main(cytokit_command: str, data_dir: Path, pipeline_config: Path, yaml_config: Path): 121 | data_dir = find_or_prep_data_directory(cytokit_command, data_dir, pipeline_config) 122 | run_cytokit(cytokit_command, data_dir, yaml_config) 123 | 124 | 125 | if __name__ == "__main__": 126 | p = ArgumentParser() 127 | p.add_argument("cytokit_command") 128 | p.add_argument("data_dir", type=Path) 129 | p.add_argument("pipeline_config", type=Path) 130 | p.add_argument("yaml_config", type=Path) 131 | args = p.parse_args() 132 | 133 | main(args.cytokit_command, args.data_dir, args.pipeline_config, args.yaml_config) 134 | -------------------------------------------------------------------------------- /cytokit-docker/setup_data_directory.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import argparse 4 | import json 5 | import logging 6 | import os 7 | import re 8 | import stat 9 | import sys 10 | from pathlib import Path 11 | 12 | logging.basicConfig(level=logging.INFO, format="%(levelname)-7s - %(message)s") 13 | logger = logging.getLogger(__name__) 14 | 15 | # Patterns for detecting raw data files are below. 16 | # We follow Cytokit's "keyence_multi_cycle_v01" naming convention defined in: 17 | # https://github.com/hammerlab/cytokit/blob/master/python/pipeline/cytokit/io.py 18 | # Pattern for the directories containing the raw data from each cycle-region 19 | # pair. Different submitters use different naming conventions (e.g. 20 | # cyc001_reg001_191209_123455 or Cyc1_reg1), so our regex has to allow for this. 21 | rawDirNamingPattern = re.compile(r"^cyc0*(\d+)_reg0*(\d+).*", re.IGNORECASE) 22 | # Pattern for raw data TIFF files. These should be named according to the following pattern: 23 | # __Z_CH.tif 24 | # All indices start at 1. 25 | # Tile index is padded to three digits, e.g. 00025, 00001, etc. 26 | # Z-plane index is padded to three digits, e.g. 025, 001, etc. 27 | # Region and channel indices are one digit each. 28 | rawFileNamingPattern = re.compile(r"^\d_\d{5}_Z\d{3}_CH\d\.tif$") 29 | # Pattern to match one single digit at the start of a string, used to replace 30 | # incorrect region indices with the correct ones in some raw data TIFF files. 31 | rawFileRegionPattern = re.compile(r"^\d") 32 | 33 | 34 | def main(data_dir: str): 35 | ################################################################### 36 | # Inspect source directories and collect paths to raw data files. # 37 | ################################################################### 38 | 39 | # Ensure that source directory exists and is readable. 40 | st = os.stat(data_dir) 41 | readable = bool(st.st_mode & stat.S_IRUSR) 42 | if not readable: 43 | raise Exception( 44 | "Source directory {} is not readable by the current user.".format(data_dir) 45 | ) 46 | 47 | # Get list of contents of source directory. This should contain a set of 48 | # subdirectories, one for each cycle-region pair. 49 | sourceDirList = os.listdir(data_dir) 50 | 51 | # Filter the contents list of the source directory for directories matching 52 | # the expected raw data directory naming pattern (cycle-region pairs). 53 | # Different submitters follow different naming conventions currently. 54 | sourceDataDirs = list(filter(rawDirNamingPattern.search, sourceDirList)) 55 | # If there were no matching directories found, exit. 56 | if not sourceDataDirs: 57 | raise Exception( 58 | "No directories matching expected raw data directory naming pattern found in {}".format( 59 | data_dir 60 | ) 61 | ) 62 | 63 | # Go through the cycle-region directories and get a list of the contents of 64 | # each one. Each cycle-region directory should contain TIFF files, 65 | # following the raw data file naming convention defined above. 66 | # Collect raw data file names in a dictionary, indexed by directory name. 67 | sourceDataFiles = {} 68 | for sdir in sourceDataDirs: 69 | fileList = os.listdir(os.path.join(data_dir, sdir)) 70 | 71 | # Validate naming pattern of raw data files according to pattern 72 | # defined above. 73 | fileList = list(filter(rawFileNamingPattern.search, fileList)) 74 | 75 | # Die if we didn't get any matching files. 76 | if not fileList: 77 | raise Exception( 78 | "No files found matching expected raw file naming pattern in {}".format(sdir) 79 | ) 80 | 81 | # Otherwise, collect the list of matching file names in the dictionary. 82 | else: 83 | sourceDataFiles[sdir] = fileList 84 | 85 | # Check that expected source data files are all present. We know, from the 86 | # pipeline config, the number of regions, cycles, z-planes, and channels, so we 87 | # should be able to verify that we have one file per channel, per z-plane, 88 | # per cycle, per region. 89 | 90 | # Since the files will have had to match the required naming pattern, we 91 | # know that they'll be named basically as expected. A simple check would be 92 | # to just count the number of files present and see if we have the expected 93 | # number for each region, cycle, and z-plane. 94 | 95 | # For each region, we should have num_cycles * (region_height * region_width ) * num_z_planes * len( per_cycle_channel_names ) files. 96 | # If we do, we could stop there? It's not a super rigorous check but we already know we have files named correctly... 97 | 98 | # If we don't, we can inspect each cycle. For each cycle, we should have ... 99 | 100 | ###################################### 101 | # Start creating directories and links 102 | ###################################### 103 | 104 | targetDirectory = "symlinks" 105 | 106 | # Create target directory. 107 | os.mkdir("symlinks") 108 | logger.info("Cytokit data directory created at %s" % targetDirectory) 109 | 110 | for sdir in sourceDataFiles: 111 | dirMatch = rawDirNamingPattern.match(sdir) 112 | 113 | cycle, region = dirMatch.group(1, 2) 114 | 115 | cycleRegionDir = os.path.join("symlinks", "Cyc" + cycle + "_reg" + region) 116 | 117 | os.mkdir(cycleRegionDir) 118 | 119 | # Create symlinks for TIFF files. 120 | for tifFileName in sourceDataFiles[sdir]: 121 | # Replace the region number at the start because sometimes it's wrong. 122 | linkTifFileName = rawFileRegionPattern.sub(region, tifFileName) 123 | 124 | # Set up full path to symlink. 125 | linkTifFilePath = os.path.join(cycleRegionDir, linkTifFileName) 126 | 127 | # Full path to source raw data file. 128 | sourceTifFilePath = os.path.join(data_dir, sdir, tifFileName) 129 | 130 | # Create the symlink. 131 | os.symlink(sourceTifFilePath, linkTifFilePath) 132 | 133 | logger.info("Links created in directories under %s" % targetDirectory) 134 | 135 | 136 | ######## 137 | # MAIN # 138 | ######## 139 | if __name__ == "__main__": 140 | parser = argparse.ArgumentParser( 141 | description="Create a directory and populate directory with directories containing symlinks to the raw image data." 142 | ) 143 | parser.add_argument( 144 | "data_dir", 145 | help="Data directory", 146 | ) 147 | 148 | args = parser.parse_args() 149 | 150 | main(args.data_dir) 151 | -------------------------------------------------------------------------------- /docker_images.txt: -------------------------------------------------------------------------------- 1 | hubmap/fiji_bigstitcher Dockerfile_fiji 2 | hubmap/codex-scripts Dockerfile 3 | hubmap/cytokit cytokit-docker/Dockerfile 4 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: base 2 | channels: 3 | - defaults 4 | - conda-forge 5 | dependencies: 6 | - python>=3.8,<4 7 | - pip 8 | - numpy-base>=1.18 9 | - numpy>=1.18 10 | - scipy>=1.4.0 11 | - pandas>=0.25 12 | - dask>=2.6.0 13 | - imagecodecs==2023.9.18 14 | - pip: 15 | - tifffile>=2021.8.30,<2023.3.15 16 | - PyYAML>=6.0.1 17 | - aicsimageio==4.14.0 18 | - lxml==4.9.3 19 | - matplotlib>=3.2.1 20 | - scikit-image>=0.17.2 21 | - scikit-learn>=0.23.1 22 | - shapely==2.0.1 23 | - opencv-contrib-python-headless>4.0,<5.0 24 | - pint==0.22 25 | - jsonschema==4.19.0 26 | -------------------------------------------------------------------------------- /metadata_examples/channelnames.txt: -------------------------------------------------------------------------------- 1 | DAPI-01 2 | Blank 3 | Blank 4 | Blank 5 | DAPI-02 6 | CD31 7 | CD8 8 | Empty 9 | DAPI-03 10 | CD20 11 | Ki67 12 | CD3e 13 | DAPI-04 14 | SMActin 15 | Podoplanin 16 | CD68 17 | DAPI-05 18 | PanCK 19 | CD21 20 | CD4 21 | DAPI-06 22 | Lyve1 23 | CD45RO 24 | CD11c 25 | DAPI-07 26 | CD35 27 | ECAD 28 | CD107a 29 | DAPI-08 30 | CD34 31 | CD44 32 | HLA-DR 33 | DAPI-09 34 | Empty 35 | FoxP3 36 | CD163 37 | DAPI-10 38 | Empty 39 | CollagenIV 40 | Vimentin 41 | DAPI-11 42 | Empty 43 | CD15 44 | CD45 45 | DAPI-12 46 | Empty 47 | CD5 48 | CD1c 49 | DAPI-13 50 | Blank 51 | Blank 52 | Blank 53 | -------------------------------------------------------------------------------- /metadata_examples/channelnames_report.csv: -------------------------------------------------------------------------------- 1 | Marker,Result 2 | DAPI-01,TRUE 3 | Blank,TRUE 4 | Blank,TRUE 5 | Blank,TRUE 6 | DAPI-02,TRUE 7 | CD31,TRUE 8 | CD8,TRUE 9 | Empty,TRUE 10 | DAPI-03,TRUE 11 | CD20,TRUE 12 | Ki67,TRUE 13 | CD3e,TRUE 14 | DAPI-04,TRUE 15 | SMActin,TRUE 16 | Podoplanin,TRUE 17 | CD68,TRUE 18 | DAPI-05,TRUE 19 | PanCK,TRUE 20 | CD21,TRUE 21 | CD4,TRUE 22 | DAPI-06,TRUE 23 | Lyve1,TRUE 24 | CD45RO,TRUE 25 | CD11c,TRUE 26 | DAPI-07,TRUE 27 | CD35,TRUE 28 | ECAD,TRUE 29 | CD107a,TRUE 30 | DAPI-08,TRUE 31 | CD34,TRUE 32 | CD44,TRUE 33 | HLA-DR,TRUE 34 | DAPI-09,TRUE 35 | Empty,TRUE 36 | FoxP3,TRUE 37 | CD163,TRUE 38 | DAPI-10,TRUE 39 | Empty,TRUE 40 | CollagenIV,TRUE 41 | Vimentin,TRUE 42 | DAPI-11,TRUE 43 | Empty,TRUE 44 | CD15,TRUE 45 | CD45,TRUE 46 | DAPI-12,TRUE 47 | Empty,TRUE 48 | CD5,TRUE 49 | CD1c,TRUE 50 | DAPI-13,TRUE 51 | Blank,TRUE 52 | Blank,TRUE 53 | Blank,TRUE 54 | -------------------------------------------------------------------------------- /metadata_examples/experiment.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "1.7.0.6", 3 | "name": "src_CX_19-002_CC2-spleen-A", 4 | "dateProcessed": "2020-02-10T16:01:15.357-05:00[America/New_York]", 5 | "objectiveType": "air", 6 | "magnification": 20, 7 | "aperture": 0.75, 8 | "xyResolution": 377.4463383838384, 9 | "zPitch": 1500.0, 10 | "wavelengths": [ 11 | 358, 12 | 488, 13 | 550, 14 | 650 15 | ], 16 | "bitDepth": 16, 17 | "numRegions": 1, 18 | "numCycles": 9, 19 | "numZPlanes": 13, 20 | "numChannels": 4, 21 | "regionWidth": 9, 22 | "regionHeight": 9, 23 | "tileWidth": 1920, 24 | "tileHeight": 1440, 25 | "tileOverlapX": 0.3, 26 | "tileOverlapY": 0.3, 27 | "tilingMode": "EITHER SNAKE OR GRID", 28 | "referenceCycle": 2, 29 | "referenceChannel": 1, 30 | "regIdx": [ 31 | 1 32 | ], 33 | "cycle_lower_limit": 1, 34 | "cycle_upper_limit": 9, 35 | "num_z_planes": 1, 36 | "region_width": 9, 37 | "region_height": 9, 38 | "tile_width": 1344, 39 | "tile_height": 1008 40 | } 41 | -------------------------------------------------------------------------------- /metadata_examples/exposure_times.txt: -------------------------------------------------------------------------------- 1 | Cycle,CH1,CH2,CH3,CH4 2 | 1,10,500,350,500 3 | 2,10,500,350,500 4 | 3,10,500,350,500 5 | 4,10,500,350,500 6 | 5,10,500,350,500 7 | 6,10,500,350,500 8 | 7,10,500,350,500 9 | 8,10,500,350,500 10 | 9,10,500,350,500 11 | 10,10,500,350,500 12 | 11,10,500,350,500 13 | 12,10,500,350,500 14 | 13,10,500,350,500 -------------------------------------------------------------------------------- /metadata_examples/segmentation.json: -------------------------------------------------------------------------------- 1 | { 2 | "nuclearStainCycle": 2, 3 | "nuclearStainChannel": 1, 4 | "membraneStainCycle": 11, 5 | "membraneStainChannel": 4 6 | } 7 | -------------------------------------------------------------------------------- /pipeline-manifest.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "pattern": "experiment.json", 4 | "description": "File containing Cytokit's calculations from deconvolution, drift compensation, and focal plan selection, in JSON format", 5 | "edam_ontology_term": "EDAM_1.24.format_3464" 6 | }, 7 | { 8 | "pattern": "stitched/expressions/reg(?P\\d+)_stitched_expressions\\.ome\\.tiff", 9 | "description": "Cytokit expression output for region {region}, in OME-TIFF format", 10 | "edam_ontology_term": "EDAM_1.24.format_3727", 11 | "is_data_product": true 12 | }, 13 | { 14 | "pattern": "stitched/mask/reg(?P\\d+)_stitched_mask\\.ome\\.tiff", 15 | "description": "Segmentation mask for region {region}, in OME-TIFF format", 16 | "edam_ontology_term": "EDAM_1.24.format_3727", 17 | "is_data_product": true 18 | } 19 | ] 20 | -------------------------------------------------------------------------------- /pipeline.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwl-runner 2 | 3 | class: Workflow 4 | cwlVersion: v1.1 5 | label: CODEX analysis pipeline using Cytokit 6 | 7 | requirements: 8 | SubworkflowFeatureRequirement: {} 9 | 10 | inputs: 11 | data_dir: 12 | label: "Directory containing CODEX data" 13 | type: Directory 14 | gpus: 15 | label: "GPUs to use, represented as a comma-separated list of integers" 16 | type: string 17 | default: "0" 18 | num_concurrent_tasks: 19 | label: "Number of parallel CPU jobs" 20 | type: int 21 | default: 10 22 | 23 | outputs: 24 | experiment_config: 25 | outputSource: illumination_first_stitching/cytokit_config 26 | type: File 27 | label: "Cytokit configuration format" 28 | data_json: 29 | outputSource: run_cytokit/data_json 30 | type: File 31 | label: "JSON file containing Cytokit's calculations from deconvolution, drift compensation, and focal plane selection" 32 | stitched_images: 33 | outputSource: ometiff_second_stitching/stitched_images 34 | type: Directory 35 | label: "Segmentation masks and expressions in OME-TIFF format" 36 | pipeline_config: 37 | outputSource: ometiff_second_stitching/final_pipeline_config 38 | type: File 39 | label: "Pipeline config with all the modifications" 40 | 41 | steps: 42 | illumination_first_stitching: 43 | in: 44 | data_dir: 45 | source: data_dir 46 | gpus: 47 | source: gpus 48 | num_concurrent_tasks: 49 | source: num_concurrent_tasks 50 | out: 51 | - slicing_pipeline_config 52 | - cytokit_config 53 | - new_tiles 54 | run: steps/illumination_first_stitching.cwl 55 | label: "Illumination correction, best focus selection, and stitching stage 1" 56 | 57 | run_cytokit: 58 | in: 59 | data_dir: 60 | source: illumination_first_stitching/new_tiles 61 | yaml_config: 62 | source: illumination_first_stitching/cytokit_config 63 | out: 64 | - cytokit_output 65 | - data_json 66 | run: steps/run_cytokit.cwl 67 | label: "CODEX analysis via Cytokit processor and operator" 68 | 69 | ometiff_second_stitching: 70 | in: 71 | cytokit_output: 72 | source: run_cytokit/cytokit_output 73 | slicing_pipeline_config: 74 | source: illumination_first_stitching/slicing_pipeline_config 75 | cytokit_config: 76 | source: illumination_first_stitching/cytokit_config 77 | data_dir: 78 | source: data_dir 79 | out: 80 | - stitched_images 81 | - final_pipeline_config 82 | run: steps/ometiff_second_stitching.cwl 83 | label: "OMETIFF creation and stitching stage 2" 84 | -------------------------------------------------------------------------------- /pipeline_release_mgmt.yaml: -------------------------------------------------------------------------------- 1 | main_branch: main 2 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.black] 2 | line-length = 99 3 | exclude = 'sprm/' 4 | 5 | [tool.isort] 6 | profile = "black" 7 | multi_line_output = 3 8 | skip = 'sprm/' 9 | src_paths = ["bin", "sprm"] 10 | -------------------------------------------------------------------------------- /requirements-test.txt: -------------------------------------------------------------------------------- 1 | black==22.10.0 2 | isort==5.10.1 3 | -------------------------------------------------------------------------------- /steps/illumination_first_stitching.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwl-runner 2 | 3 | class: Workflow 4 | cwlVersion: v1.1 5 | label: CODEX analysis pipeline using Cytokit 6 | 7 | inputs: 8 | data_dir: 9 | label: "Directory containing CODEX data" 10 | type: Directory 11 | gpus: 12 | label: "GPUs to use, represented as a comma-separated list of integers" 13 | type: string 14 | default: "0" 15 | num_concurrent_tasks: 16 | label: "Number of parallel CPU jobs" 17 | type: int 18 | default: 10 19 | 20 | outputs: 21 | cytokit_config: 22 | outputSource: create_yaml_config/cytokit_config 23 | type: File 24 | label: "Cytokit configuration in YAML format" 25 | new_tiles: 26 | outputSource: slicing/new_tiles 27 | type: Directory 28 | slicing_pipeline_config: 29 | outputSource: slicing/modified_pipeline_config 30 | type: File 31 | label: "Pipeline config with all the modifications" 32 | 33 | steps: 34 | collect_dataset_info: 35 | in: 36 | base_directory: 37 | source: data_dir 38 | num_concurrent_tasks: 39 | source: num_concurrent_tasks 40 | out: 41 | - pipeline_config 42 | run: illumination_first_stitching/collect_dataset_info.cwl 43 | label: "Collect CODEX dataset info" 44 | 45 | illumination_correction: 46 | in: 47 | base_directory: 48 | source: data_dir 49 | pipeline_config: 50 | source: collect_dataset_info/pipeline_config 51 | out: 52 | - illum_corrected_tiles 53 | run: illumination_first_stitching/illumination_correction.cwl 54 | 55 | best_focus: 56 | in: 57 | data_dir: 58 | source: illumination_correction/illum_corrected_tiles 59 | pipeline_config: 60 | source: collect_dataset_info/pipeline_config 61 | out: 62 | - best_focus_tiles 63 | run: illumination_first_stitching/best_focus.cwl 64 | 65 | first_stitching: 66 | in: 67 | data_dir: 68 | source: best_focus/best_focus_tiles 69 | pipeline_config: 70 | source: collect_dataset_info/pipeline_config 71 | out: 72 | - stitched_images 73 | run: illumination_first_stitching/first_stitching.cwl 74 | 75 | slicing: 76 | in: 77 | base_stitched_dir: 78 | source: first_stitching/stitched_images 79 | pipeline_config: 80 | source: collect_dataset_info/pipeline_config 81 | out: 82 | - new_tiles 83 | - modified_pipeline_config 84 | run: illumination_first_stitching/slicing.cwl 85 | 86 | create_yaml_config: 87 | in: 88 | pipeline_config: 89 | source: slicing/modified_pipeline_config 90 | gpus: 91 | source: gpus 92 | out: 93 | - cytokit_config 94 | run: illumination_first_stitching/create_yaml_config.cwl 95 | label: "Create Cytokit experiment config in YAML format" 96 | -------------------------------------------------------------------------------- /steps/illumination_first_stitching/best_focus.cwl: -------------------------------------------------------------------------------- 1 | cwlVersion: v1.1 2 | class: CommandLineTool 3 | 4 | requirements: 5 | DockerRequirement: 6 | dockerPull: hubmap/codex-scripts:latest 7 | dockerOutputDirectory: "/output" 8 | 9 | baseCommand: ["python", "/opt/best_focus/run_best_focus_selection.py"] 10 | 11 | 12 | inputs: 13 | data_dir: 14 | type: Directory 15 | inputBinding: 16 | prefix: "--data_dir" 17 | 18 | 19 | pipeline_config: 20 | type: File 21 | inputBinding: 22 | prefix: "--pipeline_config_path" 23 | 24 | outputs: 25 | best_focus_tiles: 26 | type: Directory 27 | outputBinding: 28 | glob: "/output/best_focus" 29 | 30 | -------------------------------------------------------------------------------- /steps/illumination_first_stitching/collect_dataset_info.cwl: -------------------------------------------------------------------------------- 1 | cwlVersion: v1.1 2 | class: CommandLineTool 3 | label: Collect dataset info for Cytokit 4 | 5 | requirements: 6 | DockerRequirement: 7 | dockerPull: hubmap/codex-scripts:latest 8 | 9 | baseCommand: ["python", "/opt/dataset_info/run_collection.py"] 10 | 11 | inputs: 12 | base_directory: 13 | type: Directory 14 | inputBinding: 15 | prefix: "--path_to_dataset" 16 | 17 | num_concurrent_tasks: 18 | type: int 19 | inputBinding: 20 | prefix: "--num_concurrent_tasks" 21 | 22 | outputs: 23 | pipeline_config: 24 | type: File 25 | outputBinding: 26 | glob: pipelineConfig.json 27 | -------------------------------------------------------------------------------- /steps/illumination_first_stitching/create_yaml_config.cwl: -------------------------------------------------------------------------------- 1 | cwlVersion: v1.1 2 | class: CommandLineTool 3 | label: Create Cytokit experiment config 4 | 5 | requirements: 6 | DockerRequirement: 7 | dockerPull: hubmap/codex-scripts:latest 8 | 9 | baseCommand: ["python", "/opt/create_cytokit_config.py"] 10 | 11 | inputs: 12 | gpus: 13 | type: string 14 | inputBinding: 15 | position: 1 16 | prefix: "--gpus=" 17 | separate: false 18 | pipeline_config: 19 | type: File 20 | inputBinding: 21 | position: 2 22 | outputs: 23 | cytokit_config: 24 | type: File 25 | outputBinding: 26 | glob: experiment.yaml 27 | -------------------------------------------------------------------------------- /steps/illumination_first_stitching/first_stitching.cwl: -------------------------------------------------------------------------------- 1 | cwlVersion: v1.1 2 | class: CommandLineTool 3 | 4 | requirements: 5 | DockerRequirement: 6 | dockerPull: hubmap/codex-scripts:latest 7 | dockerOutputDirectory: "/output" 8 | 9 | baseCommand: ["python", "/opt/codex_stitching/run_stitching.py"] 10 | 11 | 12 | inputs: 13 | data_dir: 14 | type: Directory 15 | inputBinding: 16 | prefix: "--data_dir" 17 | 18 | 19 | pipeline_config: 20 | type: File 21 | inputBinding: 22 | prefix: "--pipeline_config_path" 23 | 24 | outputs: 25 | stitched_images: 26 | type: Directory 27 | outputBinding: 28 | glob: "/output/stitched_images" 29 | -------------------------------------------------------------------------------- /steps/illumination_first_stitching/illumination_correction.cwl: -------------------------------------------------------------------------------- 1 | cwlVersion: v1.1 2 | class: CommandLineTool 3 | 4 | requirements: 5 | DockerRequirement: 6 | dockerPull: hubmap/codex-scripts:latest 7 | dockerOutputDirectory: "/output" 8 | 9 | baseCommand: ["python", "/opt/illumination_correction/run_illumination_correction.py"] 10 | 11 | 12 | inputs: 13 | base_directory: 14 | type: Directory 15 | inputBinding: 16 | prefix: "--data_dir" 17 | 18 | pipeline_config: 19 | type: File 20 | inputBinding: 21 | prefix: "--pipeline_config_path" 22 | 23 | outputs: 24 | illum_corrected_tiles: 25 | type: Directory 26 | outputBinding: 27 | glob: "/output/corrected_images" 28 | -------------------------------------------------------------------------------- /steps/illumination_first_stitching/slicing.cwl: -------------------------------------------------------------------------------- 1 | cwlVersion: v1.1 2 | class: CommandLineTool 3 | 4 | requirements: 5 | DockerRequirement: 6 | dockerPull: hubmap/codex-scripts:latest 7 | dockerOutputDirectory: "/output" 8 | 9 | baseCommand: ["python", "/opt/slicing/run_slicing.py"] 10 | 11 | 12 | inputs: 13 | base_stitched_dir: 14 | type: Directory 15 | inputBinding: 16 | prefix: "--base_stitched_dir" 17 | 18 | pipeline_config: 19 | type: File 20 | inputBinding: 21 | prefix: "--pipeline_config_path" 22 | 23 | outputs: 24 | new_tiles: 25 | type: Directory 26 | outputBinding: 27 | glob: "/output/new_tiles" 28 | 29 | modified_pipeline_config: 30 | type: File 31 | outputBinding: 32 | glob: "/output/pipeline_conf/pipelineConfig.json" 33 | -------------------------------------------------------------------------------- /steps/ometiff_second_stitching-manifest.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "pattern": "pipeline_output/expr/(?P.+)\\.ome\\.tiff", 4 | "description": "Cytokit expression output for image {image}, in OME-TIFF format", 5 | "edam_ontology_term": "EDAM_1.24.format_3727" 6 | }, 7 | { 8 | "pattern": "pipeline_output/mask/(?P.+)\\.ome\\.tiff", 9 | "description": "Segmentation mask for image {image}, in OME-TIFF format", 10 | "edam_ontology_term": "EDAM_1.24.format_3727" 11 | } 12 | ] 13 | -------------------------------------------------------------------------------- /steps/ometiff_second_stitching.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwl-runner 2 | 3 | class: Workflow 4 | cwlVersion: v1.1 5 | label: CODEX analysis pipeline using Cytokit 6 | 7 | inputs: 8 | slicing_pipeline_config: 9 | type: File 10 | cytokit_config: 11 | type: File 12 | cytokit_output: 13 | type: Directory 14 | data_dir: 15 | type: Directory 16 | num_concurrent_tasks: 17 | label: "Number of parallel CPU jobs" 18 | type: int 19 | default: 10 20 | 21 | outputs: 22 | stitched_images: 23 | outputSource: second_stitching/stitched_images 24 | type: Directory 25 | label: "Segmentation masks and expressions in OME-TIFF format" 26 | final_pipeline_config: 27 | outputSource: second_stitching/final_pipeline_config 28 | type: File 29 | label: "Pipeline config with all the modifications" 30 | 31 | steps: 32 | background_subtraction: 33 | in: 34 | cytokit_output: 35 | source: cytokit_output 36 | pipeline_config: 37 | source: slicing_pipeline_config 38 | cytokit_config: 39 | source: cytokit_config 40 | num_concurrent_tasks: 41 | source: num_concurrent_tasks 42 | out: 43 | - bg_sub_tiles 44 | - bg_sub_config 45 | run: ometiff_second_stitching/background_subtraction.cwl 46 | 47 | ome_tiff_creation: 48 | in: 49 | cytokit_output: 50 | source: cytokit_output 51 | bg_sub_tiles: 52 | source: background_subtraction/bg_sub_tiles 53 | cytokit_config: 54 | source: cytokit_config 55 | input_data_dir: 56 | source: data_dir 57 | out: 58 | - ome_tiffs 59 | run: ometiff_second_stitching/ome_tiff_creation.cwl 60 | label: "Create OME-TIFF versions of Cytokit segmentation and extract results" 61 | 62 | second_stitching: 63 | in: 64 | pipeline_config: 65 | source: background_subtraction/bg_sub_config 66 | ometiff_dir: 67 | source: ome_tiff_creation/ome_tiffs 68 | out: 69 | - stitched_images 70 | - final_pipeline_config 71 | run: ometiff_second_stitching/second_stitching.cwl 72 | -------------------------------------------------------------------------------- /steps/ometiff_second_stitching/background_subtraction.cwl: -------------------------------------------------------------------------------- 1 | cwlVersion: v1.1 2 | class: CommandLineTool 3 | 4 | requirements: 5 | DockerRequirement: 6 | dockerPull: hubmap/codex-scripts:latest 7 | dockerOutputDirectory: "/output" 8 | 9 | baseCommand: ["python", "/opt/background_subtraction/run_background_subtraction.py"] 10 | 11 | 12 | inputs: 13 | cytokit_output: 14 | type: Directory 15 | inputBinding: 16 | prefix: "--data_dir" 17 | 18 | 19 | pipeline_config: 20 | type: File 21 | inputBinding: 22 | prefix: "--pipeline_config_path" 23 | 24 | cytokit_config: 25 | type: File 26 | inputBinding: 27 | prefix: "--cytokit_config_path" 28 | 29 | num_concurrent_tasks: 30 | type: int 31 | default: 10 32 | inputBinding: 33 | prefix: "--num_concurrent_tasks" 34 | 35 | outputs: 36 | bg_sub_tiles: 37 | type: Directory 38 | outputBinding: 39 | glob: "/output/background_subtraction" 40 | 41 | bg_sub_config: 42 | type: File 43 | outputBinding: 44 | glob: "/output/config/pipelineConfig.json" 45 | -------------------------------------------------------------------------------- /steps/ometiff_second_stitching/ome_tiff_creation.cwl: -------------------------------------------------------------------------------- 1 | cwlVersion: v1.1 2 | class: CommandLineTool 3 | label: Create OME-TIFF versions of Cytokit segmentation and extract results 4 | 5 | requirements: 6 | DockerRequirement: 7 | dockerPull: hubmap/codex-scripts:latest 8 | 9 | baseCommand: ["python", "/opt/convert_to_ometiff.py"] 10 | 11 | inputs: 12 | cytokit_output: 13 | type: Directory 14 | inputBinding: 15 | position: 0 16 | bg_sub_tiles: 17 | type: Directory 18 | inputBinding: 19 | position: 1 20 | cytokit_config: 21 | type: File 22 | inputBinding: 23 | position: 2 24 | input_data_dir: 25 | type: Directory 26 | inputBinding: 27 | position: 3 28 | 29 | outputs: 30 | ome_tiffs: 31 | type: Directory 32 | outputBinding: 33 | glob: output 34 | 35 | 36 | -------------------------------------------------------------------------------- /steps/ometiff_second_stitching/second_stitching.cwl: -------------------------------------------------------------------------------- 1 | cwlVersion: v1.1 2 | class: CommandLineTool 3 | 4 | requirements: 5 | DockerRequirement: 6 | dockerPull: hubmap/codex-scripts:latest 7 | dockerOutputDirectory: /output 8 | 9 | baseCommand: ["python", "/opt/codex_stitching/secondary_stitcher/secondary_stitcher_runner.py"] 10 | 11 | 12 | inputs: 13 | pipeline_config: 14 | type: File 15 | inputBinding: 16 | prefix: "--pipeline_config_path" 17 | 18 | ometiff_dir: 19 | type: Directory 20 | inputBinding: 21 | prefix: "--ometiff_dir" 22 | 23 | outputs: 24 | stitched_images: 25 | type: Directory 26 | outputBinding: 27 | glob: /output/pipeline_output 28 | 29 | final_pipeline_config: 30 | type: File 31 | outputBinding: 32 | glob: /output/pipelineConfig.json 33 | -------------------------------------------------------------------------------- /steps/run_cytokit-manifest.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "pattern": "experiment.json", 4 | "description": "File containing Cytokit's calculations from deconvolution, drift compensation, and focal plan selection, in JSON format", 5 | "edam_ontology_term": "EDAM_1.24.format_3464" 6 | } 7 | ] 8 | -------------------------------------------------------------------------------- /steps/run_cytokit.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwl-runner 2 | 3 | class: CommandLineTool 4 | cwlVersion: v1.1 5 | baseCommand: ["sh", "run_cytokit.sh"] 6 | 7 | requirements: 8 | DockerRequirement: 9 | dockerPull: hubmap/cytokit:latest 10 | DockerGpuRequirement: {} 11 | 12 | InitialWorkDirRequirement: 13 | listing: 14 | - entryname: run_cytokit.sh 15 | entry: |- 16 | __conda_setup="\$('/opt/conda/bin/conda' 'shell.bash' 'hook' 2> /dev/null)" 17 | if [ \$? -eq 0 ]; then 18 | eval "\$__conda_setup" 19 | else 20 | if [ -f "/opt/conda/etc/profile.d/conda.sh" ]; then 21 | . "/opt/conda/etc/profile.d/conda.sh" 22 | else 23 | export PATH="/opt/conda/bin:$PATH" 24 | fi 25 | fi 26 | unset __conda_setup 27 | 28 | export PYTHONPATH=/lab/repos/cytokit/python/pipeline 29 | conda activate cytokit 30 | 31 | mkdir $HOME/cytokit 32 | 33 | cytokit processor run_all --data-dir $(inputs.data_dir.path) --config-path $(inputs.yaml_config.path) --output_dir $HOME/cytokit && \ 34 | cytokit operator run_all --data-dir $HOME/cytokit --config-path $(inputs.yaml_config.path) --output_dir $HOME/cytokit 35 | 36 | 37 | inputs: 38 | data_dir: 39 | type: Directory 40 | 41 | yaml_config: 42 | type: File 43 | 44 | 45 | outputs: 46 | cytokit_output: 47 | type: Directory 48 | outputBinding: 49 | glob: cytokit 50 | 51 | data_json: 52 | type: File 53 | outputBinding: 54 | glob: cytokit/processor/data.json 55 | 56 | 57 | -------------------------------------------------------------------------------- /subm.yaml: -------------------------------------------------------------------------------- 1 | data_dir: 2 | class: Directory 3 | path: "/path/to/dir/with/codex_dataset" 4 | gpus: "0" 5 | num_concurrent_tasks: 10 6 | -------------------------------------------------------------------------------- /test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -o errexit 3 | set -o pipefail 4 | 5 | start() { echo travis_fold':'start:$1; echo $1; } 6 | end() { set +v; echo travis_fold':'end:$1; echo; echo; } 7 | die() { set +v; echo "$*" 1>&2 ; exit 1; } 8 | 9 | start black 10 | black --check . 11 | end black 12 | 13 | start isort 14 | isort --check-only . 15 | end isort 16 | --------------------------------------------------------------------------------