├── .gitignore ├── CITATION.cff ├── LICENSE ├── README.md ├── cylinter ├── __init__.py ├── components.py ├── config.py ├── cylinter.py ├── cylinter_config.yml ├── modules │ ├── PCA.py │ ├── aggregateData.py │ ├── areaFilter.py │ ├── clustering.py │ ├── clustermap.py │ ├── curateThumbnails.py │ ├── cycleCorrelation.py │ ├── frequencyStats.py │ ├── gating.py │ ├── intensityFilter.py │ ├── logTransform.py │ ├── metaQC.py │ ├── pruneOutliers.py │ ├── selectROIs.py │ └── setContrast.py ├── pipeline.py ├── prep.py ├── prep_subprocess.sh ├── pretrained_models │ ├── pretrained_model.pkl │ └── train_artifact_classifier.ipynb └── utils.py ├── docs ├── .gitignore ├── _config.yml ├── _includes │ ├── cylinter_gif.html │ ├── home.md │ └── workflow.md ├── _layouts │ └── default-cylinter.html ├── _sass │ ├── color_schemes │ │ └── cylinter.scss │ └── custom │ │ └── custom.scss ├── assets │ ├── gifs │ │ ├── cylinter.gif │ │ └── solitary_saunter.gif │ └── images │ │ ├── ExtFig4.jpg │ │ ├── cores.jpg │ │ └── cylinter-logo.svg ├── cite │ └── index.md ├── community │ └── index.md ├── exemplar │ └── index.md ├── faq │ └── #index.md ├── funding │ └── index.md ├── help │ └── index.md ├── index.md ├── installation │ └── index.md ├── modules │ ├── PCA.md │ ├── aggregateData.md │ ├── areaFilter.md │ ├── clustering.md │ ├── clustermap.md │ ├── curateThumbnails.md │ ├── cycleCorrelation.md │ ├── frequencyStats.md │ ├── gating.md │ ├── index.md │ ├── intensityFilter.md │ ├── logTransform.md │ ├── metaQC.md │ ├── pruneOutliers.md │ ├── selectROIs.md │ └── setContrast.md ├── run │ └── index.md ├── structure │ └── index.md ├── tutorials │ ├── #index.md │ ├── adding.md │ ├── basics.md │ ├── exhibit.json │ └── pipeline-visual-guide.html └── workflow │ └── index.md ├── pyproject.toml └── recipe └── meta.yaml /.gitignore: -------------------------------------------------------------------------------- 1 | .synapseConfig 2 | *~ 3 | 4 | # CyLinter directories 5 | input/ 6 | output/ 7 | 8 | # Byte-compiled / optimized / DLL files 9 | __pycache__/ 10 | *.py[cod] 11 | *$py.class 12 | 13 | # C extensions 14 | *.so 15 | 16 | .DS_Store 17 | 18 | # Jekyll 19 | _site/ 20 | *-cache/ 21 | .jekyll-metadata 22 | 23 | # Ruby 24 | .bundle/ 25 | .byebug_history 26 | .ruby-gemset 27 | .ruby-version 28 | *.gem 29 | Gemfile.lock 30 | 31 | # Distribution / packaging 32 | .Python 33 | build/ 34 | develop-eggs/ 35 | dist/ 36 | downloads/ 37 | eggs/ 38 | .eggs/ 39 | lib/ 40 | lib64/ 41 | parts/ 42 | sdist/ 43 | var/ 44 | wheels/ 45 | *.egg-info/ 46 | .installed.cfg 47 | *.egg 48 | MANIFEST 49 | 50 | # PyInstaller 51 | # Usually these files are written by a python script from a template 52 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 53 | *.manifest 54 | *.spec 55 | 56 | # Installer logs 57 | pip-log.txt 58 | pip-delete-this-directory.txt 59 | 60 | # Unit test / coverage reports 61 | htmlcov/ 62 | .tox/ 63 | .coverage 64 | .coverage.* 65 | .cache 66 | nosetests.xml 67 | coverage.xml 68 | *.cover 69 | .hypothesis/ 70 | .pytest_cache/ 71 | 72 | # Translations 73 | *.mo 74 | *.pot 75 | 76 | # Django stuff: 77 | *.log 78 | local_settings.py 79 | db.sqlite3 80 | 81 | # Flask stuff: 82 | instance/ 83 | .webassets-cache 84 | 85 | # Scrapy stuff: 86 | .scrapy 87 | 88 | # Sphinx documentation 89 | docs/_build/ 90 | 91 | # PyBuilder 92 | target/ 93 | 94 | # Jupyter Notebook 95 | .ipynb_checkpoints 96 | 97 | # pyenv 98 | .python-version 99 | 100 | # celery beat schedule file 101 | celerybeat-schedule 102 | 103 | # SageMath parsed files 104 | *.sage.py 105 | 106 | # Environments 107 | .env 108 | .venv 109 | env/ 110 | venv/ 111 | ENV/ 112 | env.bak/ 113 | venv.bak/ 114 | 115 | # Spyder project settings 116 | .spyderproject 117 | .spyproject 118 | 119 | # Rope project settings 120 | .ropeproject 121 | 122 | # mkdocs documentation 123 | /site 124 | 125 | # mypy 126 | .mypy_cache/ 127 | -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | cff-version: 1.2.0 2 | message: If you use CyLinter in your work, please cite it using the following metadata. 3 | title: CyLinter 4 | authors: 5 | - family-names: Baker 6 | given-names: Gregory 7 | orcid: https://orcid.org/0000-0002-5196-3961 8 | keywords: 9 | - multiplex microscopy 10 | - quality control 11 | - research software 12 | version: 0.0.47 13 | date-released: 2021-01-21 14 | license: MIT 15 | url: https://github.com/labsyspharm/cylinter 16 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Gregory J. Baker 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | ![](docs/assets/images/cylinter-logo.svg) 3 | 4 | ## An Interactive Image Segmentation Filter for Multiplex Microscopy. 5 | 6 | CyLinter is quality control software for identifying and removing cell segmentation instances corrupted by optical and/or image-processing artifacts in multiplex microscopy images. The tool is interactive and comprises a set of modular and extensible QC modules instantiated in a configurable [Python](https://www.python.org) Class object. Module results are cached to allow for progress bookmarking and dynamic restarts. 7 | 8 | CyLinter development is led by [Greg Baker](https://github.com/gjbaker) at the [Laboratory of Systems Pharmacology](https://labsyspharm.org/), Harvard Medical School. 9 | 10 | **Funding:** This work was supported by the Ludwig Cancer Research and the Ludwig Center at Harvard (P.K.S., S.S.) and by NIH NCI grants U2C-CA233280, and U2C-CA233262 (P.K.S., S.S.). Development of computational methods and image processing software is supported by a Team Science Grant from the Gray Foundation (P.K.S., S.S.), the Gates Foundation grant INV-027106 (P.K.S.), the David Liposarcoma Research Initiative at Dana-Farber Cancer Institute supported by KBF Canada via the Rossy Foundation Fund (P.K.S., S.S.) and the Emerson Collective (P.K.S.). S.S. is supported by the BWH President’s Scholars Award. 11 | 12 | **Project Website:** https://labsyspharm.github.io/cylinter/ 13 | -------------------------------------------------------------------------------- /cylinter/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/labsyspharm/cylinter/b9dc04b0f0892657d53dbf31f753dc52b481bc08/cylinter/__init__.py -------------------------------------------------------------------------------- /cylinter/components.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import functools 3 | 4 | import matplotlib.pyplot as plt 5 | import seaborn as sns 6 | 7 | from cylinter.modules.aggregateData import aggregateData 8 | from cylinter.modules.selectROIs import selectROIs 9 | from cylinter.modules.intensityFilter import intensityFilter 10 | from cylinter.modules.areaFilter import areaFilter 11 | from cylinter.modules.cycleCorrelation import cycleCorrelation 12 | from cylinter.modules.logTransform import logTransform 13 | from cylinter.modules.pruneOutliers import pruneOutliers 14 | from cylinter.modules.metaQC import metaQC 15 | from cylinter.modules.PCA import PCA 16 | from cylinter.modules.clustering import clustering 17 | from cylinter.modules.clustermap import clustermap 18 | from cylinter.modules.gating import gating 19 | from cylinter.modules.setContrast import setContrast 20 | from cylinter.modules.frequencyStats import frequencyStats 21 | from cylinter.modules.curateThumbnails import curateThumbnails 22 | 23 | logger = logging.getLogger(__name__) 24 | 25 | # map matplotlib color codes to the default seaborn palette 26 | sns.set() 27 | sns.set_color_codes() 28 | _ = plt.plot([0, 1], color='r') 29 | sns.set_color_codes() 30 | _ = plt.plot([0, 2], color='b') 31 | sns.set_color_codes() 32 | _ = plt.plot([0, 3], color='g') 33 | sns.set_color_codes() 34 | _ = plt.plot([0, 4], color='m') 35 | sns.set_color_codes() 36 | _ = plt.plot([0, 5], color='y') 37 | plt.close('all') 38 | 39 | # Pipeline module order, to be filled in by the @module decorator. 40 | pipeline_modules = [] 41 | pipeline_module_names = [] 42 | 43 | 44 | def module(func): 45 | """ 46 | Annotation for pipeline module functions. 47 | 48 | This function adds the given function to the registry list. It also wraps 49 | the given function to log a pre/post-call banner. 50 | 51 | """ 52 | @functools.wraps(func) 53 | def wrapper(*args, **kwargs): 54 | logger.info("=" * 70) 55 | logger.info("RUNNING MODULE: %s", func.__name__) 56 | result = func(*args, **kwargs) 57 | logger.info("=" * 70) 58 | logger.info("") 59 | return result 60 | pipeline_modules.append(wrapper) 61 | pipeline_module_names.append(wrapper.__name__) 62 | return wrapper 63 | 64 | 65 | class QC(object): 66 | def __init__(self, 67 | 68 | # config.yaml — 69 | inDir=None, 70 | outDir=None, 71 | startModule=None, 72 | sampleNames=None, 73 | sampleConditions=None, 74 | sampleConditionAbbrs=None, 75 | sampleStatuses=None, 76 | sampleReplicates=None, 77 | samplesToExclude=None, 78 | counterstainChannel=None, 79 | markersToExclude=None, 80 | 81 | # selectROIs - 82 | delintMode=None, 83 | showAbChannels=None, 84 | samplesForROISelection=None, 85 | autoArtifactDetection=None, 86 | artifactDetectionMethod=None, 87 | 88 | # intensityFilter - 89 | numBinsIntensity=None, 90 | 91 | # intensityArea - 92 | numBinsArea=None, 93 | 94 | # cycleCorrelation - 95 | numBinsCorrelation=None, 96 | 97 | # pruneOutliers - 98 | hexbins=None, 99 | hexbinGridSize=None, 100 | 101 | # metaQC - 102 | metaQC=None, 103 | default_mcs=200, 104 | default_reclass_tuple='0.75, 0.75', 105 | embeddingAlgorithmQC=None, 106 | channelExclusionsClusteringQC=None, 107 | samplesToRemoveClusteringQC=None, 108 | percentDataPerChunk=None, 109 | colormapAnnotationQC=None, 110 | metricQC=None, 111 | perplexityQC=None, 112 | earlyExaggerationQC=None, 113 | learningRateTSNEQC=None, 114 | 115 | randomStateQC=None, 116 | nNeighborsQC=None, 117 | learningRateUMAPQC=None, 118 | minDistQC=None, 119 | repulsionStrengthQC=None, 120 | 121 | # PCA module — 122 | channelExclusionsPCA=None, 123 | samplesToRemovePCA=None, 124 | dimensionPCA=None, 125 | pointSize=None, 126 | labelPoints=None, 127 | distanceCutoff=None, 128 | conditionsToSilhouette=None, 129 | 130 | # gating module — 131 | gating=None, 132 | channelExclusionsGating=None, 133 | samplesToRemoveGating=None, 134 | vectorThreshold=None, 135 | classes=None, 136 | 137 | # clustering module — 138 | embeddingAlgorithm=None, 139 | channelExclusionsClustering=None, 140 | samplesToRemoveClustering=None, 141 | normalizeTissueCounts=None, 142 | fracForEmbedding=None, 143 | dimensionEmbedding=None, 144 | colormapAnnotationClustering=None, 145 | colormapAnnotation=None, 146 | perplexity=None, 147 | earlyExaggeration=None, 148 | learningRateTSNE=None, 149 | metric=None, 150 | randomStateTSNE=None, 151 | nNeighbors=None, 152 | learningRateUMAP=None, 153 | minDist=None, 154 | repulsionStrength=None, 155 | randomStateUMAP=None, 156 | 157 | # frequencyStats — 158 | controlGroups=None, 159 | denominatorCluster=None, 160 | FDRCorrection=None, 161 | 162 | # curateThumbnails — 163 | numThumbnails=None, 164 | windowSize=None, 165 | segOutlines=None, 166 | ): 167 | 168 | self.inDir = inDir 169 | self.outDir = outDir 170 | self.startModule = startModule 171 | self.sampleNames = sampleNames 172 | self.sampleConditions = sampleConditions 173 | self.sampleConditionAbbrs = sampleConditionAbbrs 174 | self.sampleStatuses = sampleStatuses 175 | self.sampleReplicates = sampleReplicates 176 | self.samplesToExclude = samplesToExclude 177 | self.counterstainChannel = counterstainChannel 178 | self.markersToExclude = markersToExclude 179 | 180 | self.delintMode = delintMode 181 | self.showAbChannels = showAbChannels 182 | self.samplesForROISelection = samplesForROISelection 183 | self.autoArtifactDetection = autoArtifactDetection 184 | self.artifactDetectionMethod = artifactDetectionMethod 185 | 186 | self.numBinsIntensity = numBinsIntensity 187 | 188 | self.numBinsArea = numBinsArea 189 | 190 | self.numBinsCorrelation = numBinsCorrelation 191 | 192 | self.hexbins = hexbins 193 | self.hexbinGridSize = hexbinGridSize 194 | 195 | self.metaQC = metaQC 196 | self.default_mcsQC = default_mcs 197 | self.default_reclass_tuple = default_reclass_tuple 198 | self.embeddingAlgorithmQC = embeddingAlgorithmQC 199 | self.channelExclusionsClusteringQC = channelExclusionsClusteringQC 200 | self.samplesToRemoveClusteringQC = samplesToRemoveClusteringQC 201 | self.percentDataPerChunk = percentDataPerChunk 202 | self.colormapAnnotationQC = colormapAnnotationQC 203 | self.metricQC = metricQC 204 | self.perplexityQC = perplexityQC 205 | self.earlyExaggerationQC = earlyExaggerationQC 206 | self.learningRateTSNEQC = learningRateTSNEQC 207 | self.randomStateQC = randomStateQC 208 | self.nNeighborsQC = nNeighborsQC 209 | self.learningRateUMAPQC = learningRateUMAPQC 210 | self.minDistQC = minDistQC 211 | self.repulsionStrengthQC = repulsionStrengthQC 212 | 213 | self.channelExclusionsPCA = channelExclusionsPCA 214 | self.samplesToRemovePCA = samplesToRemovePCA 215 | self.dimensionPCA = dimensionPCA 216 | self.pointSize = pointSize 217 | self.labelPoints = labelPoints 218 | self.distanceCutoff = distanceCutoff 219 | self.conditionsToSilhouette = conditionsToSilhouette 220 | 221 | self.gating = gating 222 | self.channelExclusionsGating = channelExclusionsGating 223 | self.samplesToRemoveGating = samplesToRemoveGating 224 | self.vectorThreshold = vectorThreshold 225 | self.classes = classes 226 | 227 | self.embeddingAlgorithm = embeddingAlgorithm 228 | self.channelExclusionsClustering = channelExclusionsClustering 229 | self.samplesToRemoveClustering = samplesToRemoveClustering 230 | self.normalizeTissueCounts = normalizeTissueCounts 231 | self.fracForEmbedding = fracForEmbedding 232 | self.dimensionEmbedding = dimensionEmbedding 233 | self.colormapAnnotationClustering = colormapAnnotationClustering 234 | self.perplexity = perplexity 235 | self.earlyExaggeration = earlyExaggeration 236 | self.learningRateTSNE = learningRateTSNE 237 | self.metric = metric 238 | self.randomStateTSNE = randomStateTSNE 239 | self.nNeighbors = nNeighbors 240 | self.learningRateUMAP = learningRateUMAP 241 | self.minDist = minDist 242 | self.repulsionStrength = repulsionStrength 243 | self.randomStateUMAP = randomStateUMAP 244 | 245 | self.controlGroups = controlGroups 246 | self.denominatorCluster = denominatorCluster 247 | self.FDRCorrection = FDRCorrection 248 | 249 | self.numThumbnails = numThumbnails 250 | self.windowSize = windowSize 251 | self.segOutlines = segOutlines 252 | 253 | module(aggregateData) 254 | module(selectROIs) 255 | module(intensityFilter) 256 | module(areaFilter) 257 | module(cycleCorrelation) 258 | module(logTransform) 259 | module(pruneOutliers) 260 | module(metaQC) 261 | module(PCA) 262 | module(setContrast) 263 | module(gating) 264 | module(clustering) 265 | module(clustermap) 266 | module(frequencyStats) 267 | module(curateThumbnails) 268 | -------------------------------------------------------------------------------- /cylinter/config.py: -------------------------------------------------------------------------------- 1 | import pathlib 2 | import yaml 3 | from dataclasses import dataclass 4 | 5 | 6 | @dataclass(frozen=True) 7 | class BooleanTerm: 8 | name: str 9 | negated: bool 10 | 11 | @classmethod 12 | def parse_str(cls, s): 13 | if s.startswith('+'): 14 | negated = False 15 | name = s[1:] 16 | elif s.startswith('-'): 17 | negated = True 18 | name = s[1:] 19 | else: 20 | negated = None 21 | name = s 22 | return cls(name, negated) 23 | 24 | def __repr__(self): 25 | s = self.name 26 | if self.negated: 27 | s = '~' + self.name 28 | return s 29 | 30 | def __invert__(self): 31 | return BooleanTerm(self.name, ~self.negated) 32 | 33 | 34 | class Config: 35 | 36 | def __init__(self, **kwargs): 37 | self.__dict__.update(kwargs) 38 | 39 | @classmethod 40 | def from_path(cls, path): 41 | config = cls() 42 | with open(path) as f: 43 | data = yaml.safe_load(f) 44 | config.inDir = pathlib.Path(data['inDir']).resolve() 45 | config.outDir = pathlib.Path(data['outDir']).resolve() 46 | config._parse_sample_metadata(data['sampleMetadata']) 47 | config.samplesToExclude = list(data['samplesToExclude']) 48 | config.counterstainChannel = str(data['counterstainChannel']) 49 | config.markersToExclude = list(data['markersToExclude']) 50 | 51 | # CLASS MODULE CONFIGURATIONS 52 | 53 | config.delintMode = bool(data['delintMode']) 54 | config.showAbChannels = bool(data['showAbChannels']) 55 | config.samplesForROISelection = list(data['samplesForROISelection']) 56 | config.autoArtifactDetection = bool(data['autoArtifactDetection']) 57 | config.artifactDetectionMethod = str(data['artifactDetectionMethod']) 58 | 59 | config.numBinsIntensity = int(data['numBinsIntensity']) 60 | 61 | config.numBinsArea = int(data['numBinsArea']) 62 | 63 | config.numBinsCorrelation = int(data['numBinsCorrelation']) 64 | 65 | config.hexbins = bool(data['hexbins']) 66 | config.hexbinGridSize = int(data['hexbinGridSize']) 67 | 68 | config.metaQC = bool(data['metaQC']) 69 | 70 | config.channelExclusionsPCA = list(data['channelExclusionsPCA']) 71 | config.samplesToRemovePCA = list(data['samplesToRemovePCA']) 72 | config.dimensionPCA = int(data['dimensionPCA']) 73 | config.pointSize = float(data['pointSize']) 74 | config.labelPoints = bool(data['labelPoints']) 75 | config.distanceCutoff = float(data['distanceCutoff']) 76 | config.conditionsToSilhouette = list(data['conditionsToSilhouette']) 77 | 78 | config.gating = bool(data['gating']) 79 | config.channelExclusionsGating = list(data['channelExclusionsGating']) 80 | config.samplesToRemoveGating = list(data['samplesToRemoveGating']) 81 | config.vectorThreshold = int(data['vectorThreshold']) 82 | config.vectorThreshold = int(data['vectorThreshold']) 83 | config._parse_classes(data['classes']) 84 | 85 | config.embeddingAlgorithmQC = str(data['embeddingAlgorithmQC']) 86 | config.embeddingAlgorithm = str(data['embeddingAlgorithm']) 87 | config.channelExclusionsClusteringQC = list( 88 | data['channelExclusionsClusteringQC'] 89 | ) 90 | config.channelExclusionsClustering = list( 91 | data['channelExclusionsClustering'] 92 | ) 93 | config.samplesToRemoveClusteringQC = list( 94 | data['samplesToRemoveClusteringQC'] 95 | ) 96 | config.samplesToRemoveClustering = list( 97 | data['samplesToRemoveClustering'] 98 | ) 99 | config.normalizeTissueCounts = bool(data['normalizeTissueCounts']) 100 | config.percentDataPerChunk = float(data['percentDataPerChunk']) 101 | config.fracForEmbedding = float(data['fracForEmbedding']) 102 | config.dimensionEmbedding = int(data['dimensionEmbedding']) 103 | config.colormapAnnotationQC = str( 104 | data['colormapAnnotationQC']) 105 | config.colormapAnnotationClustering = str( 106 | data['colormapAnnotationClustering']) 107 | 108 | config.perplexityQC = float(data['perplexityQC']) 109 | config.perplexity = float(data['perplexity']) 110 | config.earlyExaggerationQC = float(data['earlyExaggerationQC']) 111 | config.earlyExaggeration = float(data['earlyExaggeration']) 112 | config.learningRateTSNEQC = float(data['learningRateTSNEQC']) 113 | config.learningRateTSNE = float(data['learningRateTSNE']) 114 | config.metricQC = str(data['metricQC']) 115 | config.metric = str(data['metric']) 116 | config.randomStateQC = int(data['randomStateQC']) 117 | config.randomStateTSNE = int(data['randomStateTSNE']) 118 | 119 | config.nNeighborsQC = int(data['nNeighborsQC']) 120 | config.nNeighbors = int(data['nNeighbors']) 121 | config.learningRateUMAPQC = float(data['learningRateUMAPQC']) 122 | config.learningRateUMAP = float(data['learningRateUMAP']) 123 | config.minDistQC = float(data['minDistQC']) 124 | config.minDist = float(data['minDist']) 125 | config.repulsionStrengthQC = float(data['repulsionStrengthQC']) 126 | config.repulsionStrength = float(data['repulsionStrength']) 127 | config.randomStateUMAP = int(data['randomStateUMAP']) 128 | 129 | config.controlGroups = list(data['controlGroups']) 130 | if (data['denominatorCluster']) is None: 131 | config.denominatorCluster = (data['denominatorCluster']) 132 | else: 133 | config.denominatorCluster = int(data['denominatorCluster']) 134 | config.FDRCorrection = bool(data['FDRCorrection']) 135 | 136 | config.numThumbnails = int(data['numThumbnails']) 137 | config.windowSize = int(data['windowSize']) 138 | config.segOutlines = bool(data['segOutlines']) 139 | 140 | return config 141 | 142 | def _parse_sample_metadata(self, value): 143 | self.sampleNames = {} 144 | self.sampleConditions = {} 145 | self.sampleConditionAbbrs = {} 146 | self.sampleStatuses = {} 147 | self.sampleReplicates = {} 148 | 149 | if value is None: 150 | return 151 | 152 | for file_name, terms in value.items(): 153 | 154 | name = str(terms[0]) 155 | condition = str(terms[1]) 156 | abbreviation = str(terms[2]) 157 | status = str(terms[3]) 158 | replicate = int(terms[4]) 159 | 160 | self.sampleNames[file_name] = name 161 | self.sampleConditions[file_name] = condition 162 | self.sampleConditionAbbrs[file_name] = abbreviation 163 | self.sampleStatuses[file_name] = status 164 | self.sampleReplicates[file_name] = replicate 165 | 166 | def _parse_classes(self, value): 167 | 168 | self.classes = {} 169 | 170 | if value is None: 171 | return 172 | 173 | for outer_key, inner_dict in value.items(): 174 | boo = [BooleanTerm.parse_str(t) for t in inner_dict['definition']] 175 | inner_dict['definition'] = boo 176 | self.classes[str(outer_key)] = inner_dict 177 | 178 | @property 179 | def checkpoint_path(self): 180 | return self.outDir / 'checkpoints' 181 | 182 | def __repr__(self): 183 | kwargs_str = ', '.join(f"{k}={v!r}" for k, v in self.__dict__.items()) 184 | return f"Config({kwargs_str})" 185 | -------------------------------------------------------------------------------- /cylinter/cylinter.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import argparse 3 | import pathlib 4 | import logging 5 | from .config import Config 6 | from . import pipeline, components 7 | 8 | logger = logging.getLogger(__name__) 9 | 10 | 11 | def main(argv=sys.argv): 12 | 13 | epilog = 'Pipeline modules:\n' 14 | epilog += '\n'.join(f" {n}" for n in components.pipeline_module_names) 15 | parser = argparse.ArgumentParser( 16 | description='Perform CyLinter analysis on a data file.', 17 | epilog=epilog, 18 | formatter_class=argparse.RawDescriptionHelpFormatter, 19 | ) 20 | parser.add_argument( 21 | 'config', type=path_resolved, 22 | help='Path to the configuration YAML file' 23 | ) 24 | parser.add_argument( 25 | '--module', type=str, 26 | help='Pipeline module at which to begin processing (see below' 27 | ' for ordered list of modules)' 28 | ) 29 | args = parser.parse_args(argv[1:]) 30 | if not validate_paths(args): 31 | return 1 32 | if args.module and args.module not in components.pipeline_module_names: 33 | print( 34 | f"cylinter: error: argument --module: invalid choice '{args.module}'", 35 | file=sys.stderr 36 | ) 37 | return 1 38 | 39 | logging.basicConfig( 40 | level=logging.INFO, 41 | format='%(levelname)s: %(message)s' 42 | ) 43 | 44 | logger.info("Reading configuration file") 45 | config = Config.from_path(args.config) 46 | create_output_directory(config) 47 | 48 | logger.info("Executing pipeline") 49 | pipeline.run_pipeline(config, args.module) 50 | 51 | logger.info("Finished") 52 | 53 | return 0 54 | 55 | 56 | def path_resolved(path_str): 57 | """Return a resolved Path for a string.""" 58 | path = pathlib.Path(path_str) 59 | path = path.resolve() 60 | return path 61 | 62 | 63 | def validate_paths(args): 64 | """Validate the Path entries in the argument list.""" 65 | ok = True 66 | if not args.config.exists(): 67 | print( 68 | f"Config path does not exist:\n {args.config}\n", 69 | file=sys.stderr 70 | ) 71 | ok = False 72 | return ok 73 | 74 | 75 | def create_output_directory(config): 76 | """Create the output directory structure given the configuration object.""" 77 | config.outDir.mkdir(parents=True, exist_ok=True) 78 | -------------------------------------------------------------------------------- /cylinter/cylinter_config.yml: -------------------------------------------------------------------------------- 1 | # GENERAL PROGRAM CONFIGURATIONS 2 | 3 | inDir: /Users//Desktop/cylinter_demo 4 | # Path to CyLinter input directory containing multi-channel 5 | # image files (TIFF or OME-TIFF), segmentation outlines (OME-TIFF), 6 | # segmentation masks (TIFF), and corresponding single-cell feature tables (CSV) 7 | 8 | outDir: /Users//Desktop/cylinter_demo/output 9 | # CyLinter output directory. Path is created if it does not exist. 10 | 11 | sampleMetadata: 12 | "1": ["1", "Normal kidney cortex", "NKC", "CANCER-FALSE", 1] 13 | "15": ["15", "Glioblastoma", "GBM", "CANCER-TRUE", 1] 14 | "18": ["18", "Mesothelioma", "MTO", "CANCER-TRUE", 1] 15 | "68": ["68", "Tonsil", "TSL", "CANCER-FALSE", 3] 16 | # Sample metadata dictionary: keys = file names; values = list of strings. 17 | # First elements: sample names (str) 18 | # Second elements: descriptive text of experimental condition (str) 19 | # Third elements: abbreviation of experimental condition (str) 20 | # Fourth elements: comma-delimited string of arbitrary binary declarations 21 | # for computing t-statistics between two groups of samples (str dytpe) 22 | # Fifth elements: replicate number specifying biological or 23 | # technical replicates (int) 24 | 25 | samplesToExclude: [] 26 | # (list of strs) Sample names to exclude from analysis specified 27 | # according to the first elements of sampleMetadata configuration. 28 | 29 | counterstainChannel: "DNA1" 30 | # (str) Name of marker in markers.csv file for use in visualizing nuclear counterstain 31 | 32 | markersToExclude: ["Rabbit IgG", "Goat IgG", "Mouse IgG", "CD56", "CD13", 33 | "pAUR", "CCNE", "CDKN2A", "PCNA_1", "CDKN1B_2", 34 | "CD63", "CD32", "CCNA2", "CDKN1C", "PCNA_1", 35 | "CDKN1B_1", "CCND1", "cPARP", "pCREB", 36 | "CCNB1", "PCNA_2", "CDK2" 37 | ] 38 | # (list of strs) Immunomarkers to exclude from analysis 39 | # Does not include nuclear dyes. They are needed for the 40 | # cycleCorrelation module to remove cell dropout. 41 | 42 | ############################################################################### 43 | # MODULE-SPECIFIC CONFIGURATIONS 44 | 45 | # selectROIs------------------------------------------------------------------- 46 | delintMode: True 47 | # (bool) Whether to drop (True; negative selection) or 48 | # retain (False; positive selection) cells selected by ROIs. 49 | 50 | showAbChannels: True 51 | # (bool) Whether to show all immunomarker channels (True) when Napari 52 | # is open (may be memory limiting) or show cycle 1 DNA only (False). 53 | 54 | samplesForROISelection: ["1", "15", "18", "68"] 55 | # (list of strs) Sample names for ROI selection specified 56 | # according to the first elements of sampleMetadata configuration. 57 | 58 | autoArtifactDetection: True 59 | # (bool) Whether to display tools for automated artifact detection in Napari window 60 | 61 | artifactDetectionMethod: "classical" 62 | # (str) Algorithm used for automated artifact detection (current option: "classical"). 63 | # Multi-layer perceptron method ("MLP") currently under development. 64 | 65 | 66 | # intensityFilter------------------------------------------------------------------- 67 | numBinsIntensity: 50 68 | # (int) Number of bins for DNA intensity histograms. 69 | 70 | 71 | # areaFilter------------------------------------------------------------------- 72 | numBinsArea: 50 73 | # (int) Number of bins for DNA area histograms. 74 | 75 | 76 | # cycleCorrelation------------------------------------------------------------------- 77 | numBinsCorrelation: 50 78 | # (int) Number of bins for DNA1/DNAn histograms. 79 | 80 | 81 | # pruneOutliers------------------------------------------------------------------- 82 | hexbins: False 83 | # (bool) Whether to use hexbins (True) or scatter plots (False) to plot 84 | # single-cell signal intensities. Scatter plots allow for higher resolution, 85 | # but may require longer rendering times. 86 | 87 | hexbinGridSize: 20 88 | # (int) Hexbin grid size when hexins=True. 89 | # Higher values increase bin resolution. 90 | 91 | 92 | # metaQC (optional)------------------------------------------------------------------- 93 | metaQC: False 94 | # (bool) Whether to perform data reclassification based on 95 | # unsupervised clustering results of combinations of clean and 96 | # noisy (previously-redacted) data. 97 | 98 | embeddingAlgorithmQC: "UMAP" 99 | # (str) Embedding algorithm used for clustering (options: "TSNE" or "UMAP"). 100 | 101 | channelExclusionsClusteringQC: [] 102 | # (list of strs) Immunomarkers to exclude from clustering. 103 | 104 | samplesToRemoveClusteringQC: [] 105 | # (list of strs) Samples to exclude from clustering. 106 | 107 | percentDataPerChunk: 0.2 108 | # (float) Fraction of data to undergo embedding and 109 | # clustering per reclassifaction cycle. 110 | 111 | colormapAnnotationQC: "Sample" 112 | # (str) Metadata annotation to colormap the embedding: Sample or Condition. 113 | 114 | metricQC: "euclidean" 115 | # (str) Distance metric for computing embedding. 116 | # Choose from valid metrics used by scipy.spatial.distance.pdist: 117 | # "braycurtis", "canberra", "chebyshev", "cityblock", "correlation", "cosine", 118 | # "dice", "euclidean", "hamming", "jaccard", "jensenshannon", "kulsinski", 119 | # "mahalanobis", "matching", "minkowski", "rogerstanimoto", "russellrao", 120 | # "seuclidean", "sokalmichener", "sokalsneath", "sqeuclidean", "yule". 121 | 122 | # -------------------------------------- 123 | # tSNE-specific configurations: 124 | # https://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.html 125 | perplexityQC: 50.0 126 | # (float) Related to the number of nearest neighbors used in other 127 | # manifold learning algorithms. Larger datasets usually require 128 | # larger perplexity. Different values can result in significantly 129 | # different results. 130 | 131 | earlyExaggerationQC: 12.0 132 | # (float) For larger values, the space between natural clusters 133 | # will be larger in the embedded space. 134 | 135 | learningRateTSNEQC: 200.0 136 | # (float) tSNE learning rate (typically between 10.0 and 1000.0). 137 | 138 | randomStateQC: 5 139 | # (int) Determines the random number generator for reproducible results 140 | # across multiple function calls. 141 | 142 | # -------------------------------------- 143 | # UMAP-specific configurations: 144 | # https://umap-learn.readthedocs.io/en/latest/api.html 145 | nNeighborsQC: 6 146 | # (int) The size of local neighborhood (in terms of number of 147 | # neighboring sample points) used for manifold approximation. 148 | # Larger values result in more global views of the manifold, 149 | # while smaller values result in more local data being preserved. 150 | # In general values should be in the range 2 to 100. 151 | 152 | learningRateUMAPQC: 1.0 153 | # (float) The initial learning rate for the embedding optimization. 154 | 155 | minDistQC: 0.1 156 | # (float) The effective minimum distance between embedded points. 157 | # Smaller values will result in a more clustered/clumped 158 | # embedding where nearby points on the manifold are drawn 159 | # closer together, while larger values will result on a more 160 | # even dispersal of points. The value should be set relative 161 | # to the spread value, which determines the scale at which 162 | # embedded points will be spread out. 163 | 164 | repulsionStrengthQC: 5.0 165 | # (float) Weighting applied to negative samples in low dimensional 166 | # embedding optimization. Values higher than one will 167 | # result in greater weight being given to negative samples. 168 | 169 | 170 | # PCA------------------------------------------------------------------- 171 | channelExclusionsPCA: [] 172 | # (strs) Immunomarkers to exclude from PCA analysis. 173 | 174 | samplesToRemovePCA: [] 175 | # (list of strs) Samples to exclude from PCA analysis. 176 | 177 | dimensionPCA: 2 178 | # (int) Number of PCs to compute. 179 | 180 | pointSize: 90.0 181 | # (float) scatter point size for sample scores plot. 182 | 183 | labelPoints: True 184 | # (bool) Annotate scatter points with condition abbreviations 185 | # from sampleMetadata configuration. 186 | 187 | distanceCutoff: 0.15 188 | # (float) Maximum distance between data points in PCA scores plot to 189 | # be annotated with a common label. Useful for increasing visual clarity 190 | # of PCA plots containing many data points. Applicable when 191 | # labelPoints is True. 192 | 193 | conditionsToSilhouette: [] 194 | # (list of strs) List of abbreviated condition names whose corresponding 195 | #scores plot points will be greyed out, left unannotated, and sent to the back 196 | # of the plot (zorder). Useful for increasing visual clarity of PCA 197 | # plots containing many data points. 198 | 199 | 200 | # gating (optional)------------------------------------------------------------------- 201 | gating: False 202 | # (bool) Whether to perform SYLARAS-style gating on single-cell data. 203 | # Cell Syst. 2020 Sep 23;11(3):272-285.e9 PMID: 32898474 204 | 205 | channelExclusionsGating: [] 206 | # (list of strs) Immunomarkers to exclude from gating. 207 | 208 | samplesToRemoveGating: [] 209 | # (list of strs) Samples to exclude from gating. 210 | 211 | vectorThreshold: 100 212 | # (int) vizualize Boolean vectors (i.e., binary phenotype profiles) with cell counts >= vectorThreshold 213 | 214 | classes: 215 | Tumor: 216 | definition: [+pan-CK, +KI67, -aSMA, -CD45] 217 | subsets: [CDKN1A] 218 | # (dict) Boolean immunophenotype signatures. 219 | # +marker = immunopositive , -marker = immunonegative, marker = don't care 220 | 221 | 222 | # clustering------------------------------------------------------------------- 223 | embeddingAlgorithm: "UMAP" 224 | # (str) Embedding algorithm to use for clustering (options: "TSNE" or "UMAP"). 225 | 226 | channelExclusionsClustering: [] 227 | # (list of strs) Immunomarkers to exclude from clustering. 228 | 229 | samplesToRemoveClustering: [] 230 | # (list of strs) Samples to exclude from clustering. 231 | 232 | normalizeTissueCounts: True 233 | # (bool) Make the number of cells per tissue for clustering more similar 234 | # through sample-weighted random sampling. 235 | 236 | fracForEmbedding: 1.0 237 | # (float) Fraction of cells to be embedded (range: 0.0-1.0). 238 | # Limits amount of data passed to downstream modules. 239 | 240 | dimensionEmbedding: 2 241 | # (int) Dimension of the embedding (options: 2 or 3). 242 | 243 | colormapAnnotationClustering: "Sample" 244 | # (str) Metadata annotation to colormap the embedding: Sample or Condition. 245 | 246 | metric: "euclidean" 247 | # (str) Distance metric for computing embedding. 248 | # Choose from valid metrics used by scipy.spatial.distance.pdist: 249 | # "braycurtis", "canberra", "chebyshev", "cityblock", "correlation", "cosine", 250 | # "dice", "euclidean", "hamming", "jaccard", "jensenshannon", "kulsinski", 251 | # "mahalanobis", "matching", "minkowski", "rogerstanimoto", "russellrao", 252 | # "seuclidean", "sokalmichener", "sokalsneath", "sqeuclidean", "yule". 253 | 254 | # -------------------------------------- 255 | # tSNE-specific configurations: 256 | # https://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.html 257 | perplexity: 50.0 258 | # (float) Related to the number of nearest neighbors used in other 259 | # manifold learning algorithms. Larger datasets usually require 260 | # larger perplexity. Different values can result in significantly 261 | # different results. 262 | 263 | earlyExaggeration: 12.0 264 | # (flaot) For larger values, the space between natural clusters 265 | # will be larger in the embedded space. 266 | 267 | learningRateTSNE: 200.0 268 | # (float) tSNE learning rate (typically between 10.0 and 1000.0). 269 | 270 | randomStateTSNE: 5 271 | # (int) Determines the random number generator for reproducible results 272 | # across multiple function calls. 273 | 274 | # -------------------------------------- 275 | # UMAP-specific configurations: 276 | # https://umap-learn.readthedocs.io/en/latest/api.html 277 | nNeighbors: 6 278 | # (int) The size of local neighborhood (in terms of number of 279 | # neighboring sample points) used for manifold approximation. 280 | # Larger values result in more global views of the manifold, 281 | # while smaller values result in more local data being preserved. 282 | # In general values should be in the range 2 to 100. 283 | 284 | learningRateUMAP: 1.0 285 | # (float) The initial learning rate for the embedding optimization. 286 | 287 | minDist: 0.1 288 | # (float) The effective minimum distance between embedded points. 289 | # Smaller values will result in a more clustered/clumped 290 | # embedding where nearby points on the manifold are drawn 291 | # closer together, while larger values will result on a more 292 | # even dispersal of points. The value should be set relative 293 | # to the spread value, which determines the scale at which 294 | # embedded points will be spread out. 295 | 296 | repulsionStrength: 5.0 297 | # (float) Weighting applied to negative samples in low dimensional 298 | # embedding optimization. Values higher than one will 299 | # result in greater weight being given to negative samples. 300 | 301 | randomStateUMAP: 5 302 | # (int) Determines the random number generator for reproducible results 303 | # across multiple function calls. 304 | 305 | 306 | # frequencyStats------------------------------------------------------------------- 307 | controlGroups: ["CANCER-FALSE"] 308 | # (list of strs) Corresponds to control groups for each binary declaration 309 | # specified as the third elements of sampleMetadata values. 310 | 311 | denominatorCluster: null 312 | # (None type) Cluster to be used as the denominator when computing cluster 313 | # frequency ratios. Set to null first, change to cluster integer number 314 | # to normalize cluster frequencies to a particular cluster if desired. 315 | 316 | FDRCorrection: False 317 | # (bool) Whether to compute p-vals and false discovery rate (FDR)-corrected 318 | # q-vals (True) or compute uncorrected p-vals only (False). 319 | 320 | 321 | # curateThumbnails------------------------------------------------------------- 322 | numThumbnails: 25 323 | # (int) Number of examples per cluster to be curated. 324 | 325 | windowSize: 30 326 | # (int) Number of pixels in x and y dimensions per thumbnail. 327 | 328 | segOutlines: True 329 | # (bool) Whether to overlay cell segmentation outlines on thumbnail images. 330 | -------------------------------------------------------------------------------- /cylinter/modules/aggregateData.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import yaml 4 | import logging 5 | 6 | import pandas as pd 7 | 8 | from ..utils import input_check, read_markers, get_filepath, reorganize_dfcolumns 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | 13 | def aggregateData(data, self, args): 14 | 15 | print() 16 | 17 | check, markers_filepath = input_check(self) 18 | 19 | markers, abx_channels = read_markers( 20 | markers_filepath=markers_filepath, 21 | counterstain_channel=self.counterstainChannel, 22 | markers_to_exclude=self.markersToExclude, data=None 23 | ) 24 | 25 | # initialize CyLinter QC report if it hasn't been already 26 | report_path = os.path.join(self.outDir, 'cylinter_report.yml') 27 | if not os.path.exists(report_path): 28 | f = open(report_path, 'w') 29 | yaml.dump({}, f) 30 | 31 | df_list = [] 32 | channel_setlist = [] 33 | sample_keys = [i for i in self.sampleNames.keys()] 34 | for key in sample_keys: 35 | 36 | if check == 'standard': 37 | sample = key 38 | else: 39 | sample = key.split('--')[0] 40 | 41 | if sample not in self.samplesToExclude: 42 | 43 | logger.info(f'IMPORTING sample {key}') 44 | 45 | file_path = get_filepath(self, check, sample, 'CSV') 46 | csv = pd.read_csv(file_path) 47 | 48 | # drop markers in markersToExclude config parameter 49 | csv.drop( 50 | columns=[i for i in self.markersToExclude 51 | if i in csv.columns], axis=1, inplace=True) 52 | 53 | # select boilerplate columns 54 | cols = ( 55 | [i for i in [j for j in markers['marker_name']] + 56 | [i for i in ['CellID', 'X_centroid', 'Y_centroid', 'Area', 'MajorAxisLength', 57 | 'MinorAxisLength', 'Eccentricity', 'Solidity', 'Extent', 58 | 'Orientation'] if i in csv.columns]] 59 | ) 60 | 61 | # (for BAF project) 62 | # cols = ( 63 | # ['CellID', 'Area', 'Solidity', 'X_centroid', 'Y_centroid', 64 | # 'CytArea', 'CoreCoord', 'AreaSubstruct', 65 | # 'MeanInsideSubstruct', 'CoreFlag', 'Corenum'] + 66 | # [i for i in markers['marker_name'] if i in csv.columns] 67 | # ) 68 | 69 | # (for SARDANA) 70 | # select boilerplate columns and use specific 71 | # mask quantifications for different antibodies 72 | # mask_dict = { 73 | # 'Hoechst0': 'nucleiRingMask', 74 | # 'Hoechst1': 'nucleiRingMask', 75 | # 'Hoechst2': 'nucleiRingMask', 76 | # 'anti_CD3': 'cytoRingMask', 77 | # 'anti_CD45RO': 'cytoRingMask', 78 | # 'Hoechst3': 'nucleiRingMask', 79 | # 'Keratin_570': 'cellRingMask', 80 | # 'aSMA_660': 'cellRingMask', 81 | # 'Hoechst4': 'nucleiRingMask', 82 | # 'CD4_488': 'cytoRingMask', 83 | # 'CD45_PE': 'cytoRingMask', 84 | # 'PD1_647': 'cytoRingMask', 85 | # 'Hoechst5': 'nucleiRingMask', 86 | # 'CD20_488': 'cytoRingMask', 87 | # 'CD68_555': 'cellRingMask', 88 | # 'CD8a_660': 'cytoRingMask', 89 | # 'Hoechst6': 'nucleiRingMask', 90 | # 'CD163_488': 'cellRingMask', 91 | # 'FOXP3_570': 'nucleiRingMask', 92 | # 'PDL1_647': 'cytoRingMask', 93 | # 'Hoechst7': 'nucleiRingMask', 94 | # 'Ecad_488': 'cellRingMask', 95 | # 'Vimentin_555': 'cellRingMask', 96 | # 'CDX2_647': 'cellRingMask', 97 | # 'Hoechst8': 'nucleiRingMask', 98 | # 'LaminABC_488': 'nucleiRingMask', 99 | # 'Desmin_555': 'cellRingMask', 100 | # 'CD31_647': 'nucleiRingMask', 101 | # 'Hoechst9': 'nucleiRingMask', 102 | # 'PCNA_488': 'nucleiRingMask', 103 | # 'CollagenIV_647': 'cellRingMask'} 104 | # cols = ( 105 | # ['CellID', 'X_centroid', 'Y_centroid', 'Area', 106 | # 'MajorAxisLength', 'MinorAxisLength', 107 | # 'Eccentricity', 'Solidity', 'Extent', 108 | # 'Orientation'] + 109 | # [f'{i}_{mask_dict[i]}' for i 110 | # in markers['marker_name']]) 111 | 112 | try: 113 | csv = csv[cols] 114 | except KeyError as e: 115 | logger.info( 116 | 'Aborting; some (or all) marker names in markers.csv do not appear ' 117 | 'as columns in the single-cell data table. Check for spelling and case.' 118 | ) 119 | print(e) 120 | sys.exit() 121 | 122 | # (for SARDANA) 123 | # trim mask object names from column headers 124 | # cols_update = [ 125 | # i.rsplit('_', 1)[0] if 'Mask' in i else 126 | # i for i in csv.columns 127 | # ] 128 | # csv.columns = cols_update 129 | 130 | # add sample column 131 | csv['Sample'] = sample 132 | 133 | # add condition column 134 | csv['Condition'] = self.sampleConditionAbbrs[key] 135 | 136 | # add replicate column 137 | csv['Replicate'] = self.sampleReplicates[key] 138 | 139 | # append dataframe to list 140 | df_list.append(csv) 141 | 142 | # append the set of csv columns for sample to a list 143 | # this will be used to select columns shared among samples 144 | channel_setlist.append(set(csv.columns)) 145 | 146 | else: 147 | logger.info(f'censoring sample {sample}') 148 | print() 149 | 150 | # stack dataframes row-wise 151 | data = pd.concat(df_list, axis=0) 152 | del df_list 153 | 154 | # only select channels shared among all samples 155 | channels_set = list(set.intersection(*channel_setlist)) 156 | 157 | logger.info(f'{len(data.columns)} total columns') 158 | logger.info(f'{len(channels_set)} columns in common between all samples') 159 | 160 | before = set(data.columns) 161 | after = set(channels_set) 162 | if len(before.difference(after)) == 0: 163 | pass 164 | else: 165 | markers_to_drop = list(before.difference(after)) 166 | print() 167 | logger.warning( 168 | f'Columns {markers_to_drop} are not in all' 169 | ' samples and will be dropped from downstream analysis.' 170 | ) 171 | data = data[channels_set].copy() 172 | 173 | # sort by Sample and CellID to be tidy 174 | data.sort_values(by=['Sample', 'CellID'], inplace=True) 175 | 176 | # assign global index 177 | data.reset_index(drop=True, inplace=True) 178 | 179 | # ensure MCMICRO-generated columns come first and 180 | # are in the same order as csv feature tables 181 | data = reorganize_dfcolumns(data, markers, self.dimensionEmbedding) 182 | 183 | print() 184 | print() 185 | return data 186 | -------------------------------------------------------------------------------- /cylinter/modules/clustermap.py: -------------------------------------------------------------------------------- 1 | import os 2 | import logging 3 | 4 | import math 5 | 6 | import seaborn as sns 7 | import matplotlib.pyplot as plt 8 | from matplotlib.colors import TwoSlopeNorm 9 | 10 | from ..utils import input_check, read_markers, reorganize_dfcolumns 11 | 12 | logger = logging.getLogger(__name__) 13 | 14 | 15 | def clustermap(data, self, args): 16 | 17 | check, markers_filepath = input_check(self) 18 | 19 | # read marker metadata 20 | markers, abx_channels = read_markers( 21 | markers_filepath=markers_filepath, 22 | counterstain_channel=self.counterstainChannel, 23 | markers_to_exclude=self.markersToExclude, data=None 24 | ) 25 | 26 | # create clustering dimension directory if it hasn't already 27 | dim_dir = os.path.join(self.outDir, 'clustering', f'{self.dimensionEmbedding}d') 28 | if not os.path.exists(dim_dir): 29 | os.makedirs(dim_dir) 30 | 31 | # drop antibody channel exclusions for clustering 32 | abx_channels = [i for i in abx_channels if i not in self.channelExclusionsClustering] 33 | 34 | ###################################################################### 35 | 36 | sns.set_style("whitegrid", {'axes.grid': False}) 37 | gs = plt.GridSpec(len(abx_channels), 1) 38 | fig = plt.figure(figsize=(2, 7)) 39 | 40 | ax_objs = [] 41 | for i, channel in enumerate(abx_channels): 42 | 43 | # creating new axes object 44 | ax_objs.append(fig.add_subplot(gs[i:i + 1, 0:])) 45 | 46 | # plotting the distribution 47 | n, bins, patches = ax_objs[-1].hist( 48 | data[channel], bins=50, density=True, histtype='stepfilled', 49 | linewidth=2.0, ec='k', alpha=1.0, color='k' 50 | ) 51 | 52 | # setting uniform x and y lims 53 | ax_objs[-1].set_xlim(0, 1) 54 | ax_objs[-1].set_ylim(0, math.ceil(n.max()) + 1) 55 | 56 | # make background transparent 57 | rect = ax_objs[-1].patch 58 | rect.set_alpha(0) 59 | 60 | # remove borders, axis ticks, and labels 61 | ax_objs[-1].set_yticklabels([]) 62 | 63 | if i == len(abx_channels) - 1: 64 | ax_objs[-1].set_xlabel( 65 | 'Intensity', fontsize=11, fontweight='normal', labelpad=10 66 | ) 67 | else: 68 | ax_objs[-1].set_xticks([]) 69 | ax_objs[-1].set_xticklabels([]) 70 | 71 | ax_objs[-1].set_yticks([]) 72 | 73 | spines = ['top', 'right', 'left'] 74 | for s in spines: 75 | ax_objs[-1].spines[s].set_visible(False) 76 | 77 | ax_objs[-1].tick_params(axis='x', width=2) 78 | 79 | ax_objs[-1].text(-0.02, 0, channel, fontweight='normal', fontsize=8, ha='right') 80 | 81 | gs.update(hspace=0.3) 82 | plt.subplots_adjust(left=0.3, bottom=0.1, right=0.9, top=0.95) 83 | plt.savefig(os.path.join(dim_dir, 'ridgeplots.pdf')) 84 | plt.close('all') 85 | 86 | ############################################################################################## 87 | 88 | for type in [f'cluster_{self.dimensionEmbedding}d', 'class']: 89 | if type in data.columns: 90 | 91 | if type == f'cluster_{self.dimensionEmbedding}d': 92 | 93 | clustermap_input = data[data[type] != -1] 94 | 95 | # compute mean antibody signals for clusters 96 | clustermap_input = clustermap_input[abx_channels + [type]].groupby(type).mean() 97 | 98 | elif type == 'class': 99 | 100 | clustermap_input = data[data[type] != 'unclassified'] 101 | 102 | # compute mean antibody signals for clusters 103 | clustermap_input = clustermap_input[abx_channels + [type]].groupby(type).mean() 104 | 105 | if len(clustermap_input) > 1: 106 | 107 | sns.set(font_scale=0.7) 108 | 109 | # Compute per channel z-scores across clusters 110 | clustermap_input = ( 111 | (clustermap_input - clustermap_input.mean()) / clustermap_input.std() 112 | ) 113 | # assign NaNs (channels with no variation in signal) to 0 114 | clustermap_input[clustermap_input.isna()] = 0 115 | 116 | # Zero-center colorbar 117 | norm = TwoSlopeNorm( 118 | vcenter=0, vmin=clustermap_input.min().min(), 119 | vmax=clustermap_input.max().max() 120 | ) 121 | 122 | g = sns.clustermap( 123 | clustermap_input, cmap='coolwarm', standard_scale=None, square=False, 124 | xticklabels=1, yticklabels=1, linewidth=0.0, cbar=True, norm=norm 125 | ) 126 | 127 | # g = sns.clustermap( 128 | # clustermap_input, cmap='viridis', standard_scale=1, square=False, 129 | # xticklabels=1, yticklabels=1, linewidth=0.0, cbar=True 130 | # ) 131 | 132 | g.fig.suptitle('channel_z-scores.pdf', y=0.995, fontsize=10) 133 | g.fig.set_size_inches(6.0, 6.0) 134 | g.ax_heatmap.set_yticklabels(g.ax_heatmap.get_yticklabels(), rotation=0) 135 | 136 | plt.savefig( 137 | os.path.join(dim_dir, f'{type}_channel_z-scores.pdf'), bbox_inches='tight' 138 | ) 139 | else: 140 | logger.info( 141 | f' {type} clustermap cannot be generated with only one cell population.' 142 | ) 143 | 144 | data = reorganize_dfcolumns(data, markers, self.dimensionEmbedding) 145 | 146 | print() 147 | print() 148 | return data 149 | -------------------------------------------------------------------------------- /cylinter/modules/frequencyStats.py: -------------------------------------------------------------------------------- 1 | import os 2 | import logging 3 | 4 | import numpy as np 5 | import pandas as pd 6 | 7 | import math 8 | import natsort 9 | from natsort import natsorted 10 | from itertools import product 11 | 12 | import seaborn as sns 13 | import matplotlib.pyplot as plt 14 | from matplotlib.lines import Line2D 15 | 16 | from scipy.stats import ttest_ind 17 | 18 | from ..utils import ( 19 | input_check, read_markers, categorical_cmap, fdrcorrection, reorganize_dfcolumns 20 | ) 21 | 22 | logger = logging.getLogger(__name__) 23 | 24 | 25 | def frequencyStats(data, self, args): 26 | 27 | print() 28 | 29 | check, markers_filepath = input_check(self) 30 | 31 | # read marker metadata 32 | markers, abx_channels = read_markers( 33 | markers_filepath=markers_filepath, 34 | counterstain_channel=self.counterstainChannel, 35 | markers_to_exclude=self.markersToExclude, data=None 36 | ) 37 | 38 | for type in ['class', f'cluster_{self.dimensionEmbedding}d']: 39 | if type in data.columns: 40 | 41 | stats_input = data[['Sample', 'Replicate', type]] 42 | 43 | # loop over comma-delimited binary declarations 44 | for i in range(len(list(self.sampleStatuses.values())[0].split(', '))): 45 | 46 | # get unique declaration categories (should be 2 per test) 47 | comparison = set( 48 | [j.split(', ')[i] for j in self.sampleStatuses.values() 49 | if '-UNK' not in j.split(', ')[i]]) 50 | 51 | if len(comparison) > 1: 52 | 53 | # assign test and control groups 54 | test = [ 55 | i for i in comparison if i not in 56 | self.controlGroups][0] 57 | control = [ 58 | i for i in comparison if i in self.controlGroups][0] 59 | 60 | # create frequency stats directory if it hasn't already 61 | frequency_dir = os.path.join( 62 | self.outDir, 'clustering', 63 | f'{self.dimensionEmbedding}d', 64 | 'frequency_stats', type, f'{test}_v_{control}' 65 | ) 66 | if not os.path.exists(frequency_dir): 67 | os.makedirs(frequency_dir) 68 | 69 | # create single-column dataFrame with all sample names 70 | # to pad counts tables with zeros if a celltype 71 | # is not in a tissue 72 | pad = pd.DataFrame( 73 | natsorted(stats_input['Sample'].unique())).rename( 74 | columns={0: 'Sample'}) 75 | 76 | cluster_list = [] 77 | ratio_list = [] 78 | dif_list = [] 79 | pval_list = [] 80 | 81 | # intialize a dataframe to collect catplot data 82 | catplot_input = pd.DataFrame() 83 | 84 | # loop over populations 85 | for cluster, group in natsorted(stats_input.groupby(type)): 86 | 87 | if cluster not in [-1, 'unclassified']: 88 | 89 | logger.info( 90 | f'Calculating log2({test}/{control}) of mean cell ' 91 | f'density for {type} {str(cluster)}.') 92 | 93 | group = ( 94 | group.groupby(['Sample', 'Replicate', type]) 95 | .size() 96 | .reset_index(drop=False) 97 | .rename(columns={0: 'count'}) 98 | ) 99 | 100 | group = ( 101 | group 102 | .merge(pad, how='right', on='Sample') 103 | .sort_values(by='count', ascending=False) 104 | ) 105 | 106 | # guard against NaNs induced by the absence 107 | # of a given cluster in one or 108 | # more of the tissue samples 109 | group['count'] = [ 110 | 0 if np.isnan(i) else int(i) for 111 | i in group['count'] 112 | ] 113 | 114 | # get sample file names (i.e. sampleMetadata keys) 115 | # from config.yml based on "Sample" column 116 | # (first elements of sampleMetadata vals) 117 | def get_key(val): 118 | for key, value in self.sampleNames.items(): 119 | if val == value: 120 | return key 121 | 122 | return "key doesn't exist" 123 | file_names = [get_key(i) for i in group['Sample']] 124 | 125 | # add binary declarations column to group data 126 | group['status'] = [ 127 | self.sampleStatuses[j].split(', ')[i] 128 | for j in file_names] 129 | 130 | # add replicates column to group data 131 | group['Replicate'] = [self.sampleReplicates[i] for i in file_names] 132 | 133 | group[type] = cluster 134 | 135 | # drop samples for which a declaration cannot be made 136 | group = group[~group['status'].str.contains('-UNK')] 137 | 138 | group.reset_index(drop=True, inplace=True) 139 | 140 | # get denominator cell count for each sample 141 | if self.denominatorCluster is None: 142 | group['tissue_count'] = [ 143 | len(stats_input[stats_input['Sample'] == i]) 144 | for i in group['Sample']] 145 | else: 146 | group['tissue_count'] = [ 147 | len(stats_input[(stats_input['Sample'] == i) & 148 | (stats_input[type] == self.denominatorCluster)]) 149 | for i in group['Sample']] 150 | 151 | # compute density of cells per sample 152 | group['density'] = group['count'] / group['tissue_count'] 153 | 154 | # append group data to catplot_input 155 | catplot_input = pd.concat([catplot_input, group], axis=0) 156 | 157 | # isolate test and control group values 158 | cnd1_values = group['density'][group['status'] == test] 159 | cnd2_values = group['density'][group['status'] == control] 160 | 161 | # perform Welch's t-test (equal_var=False) 162 | stat, pval = ttest_ind( 163 | cnd1_values, cnd2_values, axis=0, equal_var=False, 164 | nan_policy='propagate' 165 | ) 166 | 167 | # round resulting values 168 | stat = round(stat, 6) 169 | pval = round(pval, 6) 170 | 171 | # compute mean of test and control group values 172 | cnd1_mean = np.mean(cnd1_values) 173 | cnd2_mean = np.mean(cnd2_values) 174 | 175 | # compute mean ratio 176 | ratio = np.log2( 177 | (cnd1_mean + 0.00000000001) / (cnd2_mean + 0.00000000001) 178 | ) 179 | 180 | # compute mean difference 181 | dif = cnd1_mean - cnd2_mean 182 | 183 | cluster_list.append(cluster) 184 | ratio_list.append(ratio) 185 | dif_list.append(dif) 186 | pval_list.append(pval) 187 | 188 | # create stats dataframe 189 | statistics = pd.DataFrame( 190 | list(zip(cluster_list, ratio_list, dif_list, pval_list)), 191 | columns=[type, 'ratio', 'dif', 'pval'] 192 | ).sort_values(by=type) 193 | 194 | # compute FDR p-val corrections 195 | # (uses statsmodels.stats.multitest implementation) 196 | rejected, p_adjust = fdrcorrection( 197 | statistics['pval'].tolist(), alpha=0.05, method='indep', is_sorted=False 198 | ) 199 | 200 | statistics['qval'] = p_adjust 201 | 202 | # save total stats table 203 | statistics.to_csv( 204 | os.path.join(frequency_dir, 'stats_total.csv'), index=False 205 | ) 206 | 207 | if self.FDRCorrection: 208 | stat = 'qval' 209 | else: 210 | stat = 'pval' 211 | 212 | # isolate statistically significant stat values 213 | significant = statistics[statistics[stat] <= 0.05].sort_values(by=stat) 214 | 215 | # save significant stats table 216 | significant.to_csv( 217 | os.path.join(frequency_dir, 'stats_sig.csv'), index=False 218 | ) 219 | 220 | # plot 221 | sns.set_style('whitegrid') 222 | fig, ax = plt.subplots() 223 | plt.scatter(abs(significant['dif']), significant['ratio'], s=9.0, c='tab:red') 224 | 225 | for label, qval, x, y in zip( 226 | significant[type], significant[stat], 227 | abs(significant['dif']), significant['ratio']): 228 | 229 | plt.annotate( 230 | (label, f'{stat[0]}=' + str(qval)), size=3, 231 | xy=(x, y), xytext=(0, 0), 232 | textcoords='offset points', ha='right', 233 | va='bottom', 234 | bbox=dict(boxstyle='round,pad=0.1', fc='yellow', 235 | alpha=0.0) 236 | ) 237 | 238 | fontsize = {'size': 8} 239 | ax.xaxis.set_tick_params(which='major', reset=False, **fontsize) 240 | ax.yaxis.set_tick_params(which='major', reset=False, **fontsize) 241 | 242 | plt.title(f'{test} vs. {control} ({stat[0]}<0.05)', fontsize=9) 243 | plt.xlabel(f'abs({test} - {control})', fontsize=8) 244 | plt.ylabel(f'log2({test} / {control})', fontsize=8) 245 | plt.savefig(os.path.join(frequency_dir, 'plot.pdf')) 246 | plt.close() 247 | 248 | catplot_input.reset_index(drop=True, inplace=True) 249 | 250 | catplot_input[stat] = [ 251 | 'ns' if i not in 252 | significant[type].unique() else 253 | significant[stat][ 254 | significant[type] == i].values[0] 255 | for i in catplot_input[type]] 256 | 257 | # filter catplot_input to plot only significant differences 258 | catplot_input = catplot_input[catplot_input[stat] != 'ns'] 259 | 260 | if not catplot_input.empty: 261 | # build cmap 262 | cmap = categorical_cmap( 263 | numUniqueSamples=len(catplot_input['Sample'].unique()), 264 | numCatagories=10, cmap='tab10', continuous=False 265 | ) 266 | 267 | sample_color_dict = dict( 268 | zip(natsorted(catplot_input['Sample'].unique()), 269 | cmap.colors)) 270 | 271 | catplot_input[type] = ( 272 | catplot_input[type].astype(str) + 273 | f'; {stat} = ' + catplot_input[stat].astype(str) 274 | ) 275 | 276 | catplot_input.sort_values( 277 | by=[stat, 'status', 'density'], key=lambda x: 278 | natsort.natsort_keygen( 279 | alg=natsort.ns.LOCALE | 280 | natsort.ns.IGNORECASE)(x), inplace=True 281 | ) 282 | 283 | sns.set(font_scale=0.3) 284 | sns.set_style('whitegrid') 285 | ncols = 5 286 | nrows = math.ceil(len(catplot_input[type].unique()) / ncols) 287 | 288 | fig = plt.figure(figsize=(ncols + 2, nrows)) 289 | 290 | # grid specifications 291 | gs = plt.GridSpec(nrows=nrows, ncols=ncols, figure=fig) 292 | 293 | for (name, group), ax in zip( 294 | catplot_input.groupby(type, sort=False), 295 | product(range(nrows), range(ncols))): 296 | 297 | ax = fig.add_subplot(gs[ax[0], ax[1]]) 298 | 299 | group['status'] = [i.split('-')[1] for i in group['status']] 300 | 301 | sns.barplot( 302 | data=group, x='status', y='density', hue='Sample', 303 | palette=sample_color_dict, width=0.8, lw=0.0, ax=ax 304 | ) 305 | 306 | ax.grid(lw=0.5) 307 | [x.set_linewidth(0.5) for x in ax.spines.values()] 308 | plt.tick_params(axis='x', pad=-3) 309 | ax.set(xlabel=None) 310 | plt.tick_params(axis='y', pad=-3) 311 | ax.yaxis.labelpad = 2 312 | ax.set_title(name, size=2, pad=2) 313 | ax.legend_.remove() 314 | 315 | plt.tight_layout() 316 | 317 | file_names = [ 318 | get_key(i) for i in natsorted(catplot_input['Sample'].unique()) 319 | ] 320 | 321 | sample_conds = [self.sampleConditions[i] for i in file_names] 322 | 323 | sample_abbrs = [self.sampleConditionAbbrs[i] for i in file_names] 324 | 325 | cond_abbr = [f'{i}-{j}' for i, j in zip(sample_conds, sample_abbrs)] 326 | 327 | handles_dict = dict(zip( 328 | natsorted(catplot_input['Sample'].unique()), cond_abbr) 329 | ) 330 | 331 | legend_handles = [] 332 | for k, v in handles_dict.items(): 333 | legend_handles.append( 334 | Line2D([0], [0], marker='o', color='none', 335 | label=v, markerfacecolor=sample_color_dict[k], 336 | markeredgecolor='k', markeredgewidth=0.2, 337 | markersize=5.0) 338 | ) 339 | 340 | fig.legend( 341 | handles=legend_handles, prop={'size': 5.0}, loc='upper left', 342 | bbox_to_anchor=[1.0, 1.0] 343 | ) 344 | 345 | plt.savefig( 346 | os.path.join(frequency_dir, 'catplot.pdf'), bbox_inches='tight' 347 | ) 348 | plt.close('all') 349 | 350 | print() 351 | 352 | else: 353 | logger.info( 354 | 'Only one binary declaration ' + 355 | f'class represented for {list(comparison)[0]}. ' + 356 | 'Statistics will not be computed.') 357 | print() 358 | print() 359 | 360 | data = reorganize_dfcolumns(data, markers, self.dimensionEmbedding) 361 | 362 | print() 363 | return data 364 | -------------------------------------------------------------------------------- /cylinter/modules/logTransform.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from ..utils import input_check, read_markers, reorganize_dfcolumns 4 | 5 | 6 | def logTransform(data, self, args): 7 | 8 | check, markers_filepath = input_check(self) 9 | 10 | # read marker metadata 11 | markers, abx_channels = read_markers( 12 | markers_filepath=markers_filepath, 13 | counterstain_channel=self.counterstainChannel, 14 | markers_to_exclude=self.markersToExclude, data=None 15 | ) 16 | 17 | abx_channels_mod = data[abx_channels].copy() 18 | abx_channels_mod = np.log10(abx_channels_mod + 0.001) 19 | data.loc[:, abx_channels] = abx_channels_mod 20 | 21 | # clip cells with zero-valued signal intensities to the Nth percentile of the 22 | # distribution (not considering the zero-valued signals themselves). 23 | # percentile = 5 24 | # percentiles = ( 25 | # data.loc[:, abx_channels][data.loc[:, abx_channels] > 0.0].quantile(q=percentile / 100) 26 | # ) 27 | # data.loc[:, abx_channels] = ( 28 | # data.loc[:, abx_channels].clip(lower=percentiles, upper=None, axis=1) 29 | # ) 30 | 31 | data = reorganize_dfcolumns(data, markers, self.dimensionEmbedding) 32 | 33 | print() 34 | print() 35 | return data 36 | -------------------------------------------------------------------------------- /cylinter/modules/setContrast.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import yaml 4 | import logging 5 | 6 | from matplotlib.backends.qt_compat import QtWidgets 7 | from qtpy.QtCore import QTimer 8 | 9 | import napari 10 | from magicgui import magicgui 11 | 12 | from ..utils import ( 13 | input_check, read_markers, marker_channel_number, single_channel_pyramid, 14 | get_filepath, reorganize_dfcolumns, sort_qc_report 15 | ) 16 | 17 | logger = logging.getLogger(__name__) 18 | 19 | channels_to_samples = {} 20 | arbitrary_selection_toggle = False 21 | sample_index = 1 22 | 23 | 24 | def callback(self, viewer, channel, sample, data, initial_callback, next_widget, next_layout, arbitrary_widget, arbitrary_layout, qc_report, report_path): 25 | 26 | check, markers_filepath = input_check(self) 27 | 28 | # read marker metadata 29 | markers, abx_channels = read_markers( 30 | markers_filepath=markers_filepath, 31 | counterstain_channel=self.counterstainChannel, 32 | markers_to_exclude=self.markersToExclude, data=None 33 | ) 34 | 35 | # clear existing channels from Napari window if they exist 36 | viewer.layers.clear() 37 | 38 | # remove next_widget and arbitrary_widget docks and layout attributes from Napari viewer 39 | if not initial_callback: 40 | viewer.window.remove_dock_widget(next_widget) 41 | count = next_layout.count() 42 | for i in range(count - 1, -1, -1): 43 | item = next_layout.itemAt(i) 44 | widget = item.widget() 45 | if widget: 46 | widget.setParent(None) 47 | 48 | viewer.window.remove_dock_widget(arbitrary_widget) 49 | count = arbitrary_layout.count() 50 | for i in range(count - 1, -1, -1): 51 | item = arbitrary_layout.itemAt(i) 52 | widget = item.widget() 53 | if widget: 54 | widget.setParent(None) 55 | 56 | # read segmentation outlines, add to Napari 57 | file_path = get_filepath(self, check, sample, 'SEG') 58 | seg, min, max = single_channel_pyramid(file_path, channel=0) 59 | viewer.add_image( 60 | seg, rgb=False, blending='additive', colormap='gray', 61 | visible=False, name='segmentation', contrast_limits=(min, max) 62 | ) 63 | 64 | # read DNA1 channel 65 | file_path = get_filepath(self, check, sample, 'TIF') 66 | channel_number = marker_channel_number(self, markers, self.counterstainChannel) 67 | dna, min, max = single_channel_pyramid(file_path, channel=channel_number) 68 | viewer.add_image( 69 | dna, rgb=False, blending='additive', colormap='gray', 70 | name=self.counterstainChannel, contrast_limits=(min, max) 71 | ) 72 | 73 | # read target antibody image 74 | if channel != self.counterstainChannel: 75 | channel_number = marker_channel_number(self, markers, channel) 76 | file_path = get_filepath(self, check, sample, 'TIF') 77 | img, min, max = single_channel_pyramid(file_path, channel=channel_number) 78 | viewer.add_image( 79 | img, rgb=False, blending='additive', colormap='green', 80 | visible=True, name=channel, contrast_limits=(min, max) 81 | ) 82 | 83 | # apply previously defined contrast limits if they exist 84 | try: 85 | viewer.layers[self.counterstainChannel].contrast_limits = ( 86 | qc_report['setContrast'][ 87 | self.counterstainChannel][0], qc_report['setContrast'][self.counterstainChannel][1] 88 | ) 89 | except KeyError: 90 | pass 91 | 92 | try: 93 | viewer.layers[channel].contrast_limits = ( 94 | qc_report['setContrast'][channel][0], qc_report['setContrast'][channel][1]) 95 | except KeyError: 96 | pass 97 | 98 | # dock (or re-dock) next_widget and arbitrary_widget to Napari window 99 | viewer.window.add_dock_widget( 100 | next_widget, name=f'Channel: {channel}, Sample: {sample}', area='right' 101 | ) 102 | viewer.window.add_dock_widget( 103 | arbitrary_widget, name='Sample Selector', area='right' 104 | ) 105 | 106 | ####################################################################### 107 | 108 | @magicgui( 109 | layout='horizontal', 110 | call_button='Apply Limits and Move to Next Channel -->' 111 | ) 112 | def next_sample(channel): 113 | 114 | global channels_to_samples 115 | global arbitrary_selection_toggle 116 | global sample_index 117 | 118 | # update channel contrast yaml with selected constrast limits 119 | qc_report['setContrast'][self.counterstainChannel] = ( 120 | [int(i) for i in viewer.layers[self.counterstainChannel].contrast_limits] 121 | ) 122 | qc_report['setContrast'][channel] = [ 123 | int(i) for i in viewer.layers[channel].contrast_limits 124 | ] 125 | 126 | # sort and dump updated qc_report to YAML file 127 | qc_report_sorted = sort_qc_report( 128 | qc_report, module='setContrast', order=[self.counterstainChannel] + abx_channels 129 | ) 130 | f = open(report_path, 'w') 131 | yaml.dump(qc_report_sorted, f, sort_keys=False, allow_unicode=False) 132 | 133 | # go to next sample 134 | try: 135 | if arbitrary_selection_toggle: 136 | sample_index -= 1 137 | 138 | channel = list(channels_to_samples.keys())[sample_index] 139 | sample = channels_to_samples[list(channels_to_samples.keys())[sample_index]] 140 | 141 | initial_callback = False 142 | callback( 143 | self, viewer, channel, sample, data, initial_callback, 144 | next_widget, next_layout, arbitrary_widget, arbitrary_layout, 145 | qc_report, report_path 146 | ) 147 | 148 | sample_index += 1 149 | arbitrary_selection_toggle = False 150 | 151 | except IndexError: 152 | 153 | print() 154 | logger.info('Contrast Adjustments Complete!') 155 | QTimer().singleShot(0, viewer.close) 156 | 157 | next_sample.native.setSizePolicy( 158 | QtWidgets.QSizePolicy.Minimum, 159 | QtWidgets.QSizePolicy.Maximum, 160 | ) 161 | 162 | # give next_sample access to channel passed to callback 163 | next_sample.channel.bind(channel) 164 | 165 | next_layout.addWidget(next_sample.native) 166 | 167 | ####################################################################### 168 | 169 | @magicgui(layout='vertical', call_button='Enter', sample={'label': 'Sample Name'}) 170 | def sample_selector(sample: str): 171 | 172 | return sample 173 | 174 | sample_selector.native.setSizePolicy( 175 | QtWidgets.QSizePolicy.Fixed, 176 | QtWidgets.QSizePolicy.Fixed 177 | ) 178 | 179 | arbitrary_layout.addWidget(sample_selector.native) 180 | 181 | # call connect 182 | @sample_selector.called.connect 183 | def sample_callback(value: str): 184 | 185 | global arbitrary_selection_toggle 186 | 187 | sample = value 188 | 189 | print() 190 | if sample not in data['Sample'].unique(): 191 | napari.utils.notifications.show_warning('Sample name not in filtered data.') 192 | pass 193 | else: 194 | # update channel contrast yaml with selected constrast limits 195 | 196 | qc_report['setContrast'][self.counterstainChannel] = ( 197 | [int(i) for i in viewer.layers[self.counterstainChannel].contrast_limits] 198 | ) 199 | qc_report['setContrast'][channel] = [int(i) for i in viewer.layers[channel].contrast_limits] 200 | 201 | # dump updated qc_report to YAML file 202 | qc_report_sorted = sort_qc_report( 203 | qc_report, module='setContrast', order=[self.counterstainChannel] + abx_channels 204 | ) 205 | f = open(report_path, 'w') 206 | yaml.dump(qc_report_sorted, f, sort_keys=False, allow_unicode=False) 207 | 208 | initial_callback = False 209 | callback( 210 | self, viewer, channel, sample, data, initial_callback, 211 | next_widget, next_layout, arbitrary_widget, arbitrary_layout, 212 | qc_report, report_path 213 | ) 214 | 215 | arbitrary_selection_toggle = True 216 | 217 | ####################################################################### 218 | napari.utils.notifications.show_info(f'Viewing marker {channel} in sample {sample}') 219 | 220 | 221 | # main 222 | def setContrast(data, self, args): 223 | 224 | global channels_to_samples 225 | 226 | print() 227 | 228 | check, markers_filepath = input_check(self) 229 | 230 | # read marker metadata 231 | markers, abx_channels = read_markers( 232 | markers_filepath=markers_filepath, 233 | counterstain_channel=self.counterstainChannel, 234 | markers_to_exclude=self.markersToExclude, data=None 235 | ) 236 | 237 | # read QC report 238 | report_path = os.path.join(self.outDir, 'cylinter_report.yml') 239 | try: 240 | qc_report = yaml.safe_load(open(report_path)) 241 | reload_report = False 242 | if qc_report is None: 243 | qc_report = {} 244 | reload_report = True 245 | if 'setContrast' not in qc_report or qc_report['setContrast'] is None: 246 | qc_report['setContrast'] = {} 247 | reload_report = True 248 | if reload_report: 249 | qc_report_sorted = sort_qc_report(qc_report, module='setContrast', order=None) 250 | f = open(report_path, 'w') 251 | yaml.dump(qc_report_sorted, f, sort_keys=False, allow_unicode=False) 252 | qc_report = yaml.safe_load(open(report_path)) 253 | except: 254 | logger.info( 255 | 'Aborting; QC report missing from CyLinter output directory. Re-start pipeline ' 256 | 'from aggregateData module to initialize QC report.' 257 | ) 258 | sys.exit() 259 | 260 | viewer = napari.Viewer(title='CyLinter') 261 | 262 | # generate next sample selection Qt widget 263 | next_widget = QtWidgets.QWidget() 264 | next_layout = QtWidgets.QVBoxLayout(next_widget) 265 | next_widget.setSizePolicy( 266 | QtWidgets.QSizePolicy.Minimum, 267 | QtWidgets.QSizePolicy.Fixed, 268 | ) 269 | 270 | # generate arbitrary sample selection Qt widget 271 | arbitrary_widget = QtWidgets.QWidget() 272 | arbitrary_layout = QtWidgets.QVBoxLayout(arbitrary_widget) 273 | arbitrary_widget.setSizePolicy( 274 | QtWidgets.QSizePolicy.Minimum, 275 | QtWidgets.QSizePolicy.Fixed, 276 | ) 277 | 278 | # identify samples with 85th percentile of median cell signal intensity 279 | # (trying to avoid outliers associated with max values) 280 | for ch in [self.counterstainChannel] + abx_channels: 281 | medians = data[['Sample', ch]].groupby('Sample').median() 282 | percentile_value = medians.quantile(0.85).item() 283 | differences = abs(medians - percentile_value) 284 | # select sample whose median channel value is closest to quantile 285 | selected_sample = differences.idxmin().item() 286 | channels_to_samples[ch] = selected_sample 287 | 288 | # pass first channel and sample in channels_to_samples to callback 289 | channel = list(channels_to_samples.keys())[0] 290 | sample = channels_to_samples[channel] 291 | 292 | initial_callback = True 293 | callback( 294 | self, viewer, channel, sample, data, initial_callback, 295 | next_widget, next_layout, arbitrary_widget, arbitrary_layout, 296 | qc_report, report_path 297 | ) 298 | 299 | viewer.scale_bar.visible = True 300 | viewer.scale_bar.unit = 'um' 301 | 302 | napari.run() 303 | 304 | print() 305 | 306 | ############################################################################################## 307 | # print current channel contrast limits and exit 308 | 309 | if set( 310 | list(qc_report['setContrast'].keys())) == set(abx_channels + [self.counterstainChannel]): 311 | logger.info('Current channel contrast settings are as follows:') 312 | for k, v in qc_report['setContrast'].items(): 313 | logger.info(f'{k}: {v}') 314 | else: 315 | logger.info( 316 | 'Aborting; QC report does not contain contrast settings for all channels. ' 317 | 'Please ensure limits are selected for all channels.' 318 | ) 319 | sys.exit() 320 | 321 | data = reorganize_dfcolumns(data, markers, self.dimensionEmbedding) 322 | 323 | print() 324 | print() 325 | return data -------------------------------------------------------------------------------- /cylinter/pipeline.py: -------------------------------------------------------------------------------- 1 | import pyarrow 2 | import pyarrow.parquet 3 | import pandas as pd 4 | from . import components 5 | 6 | 7 | def save_checkpoint(data, config, module): 8 | module_name = module.__name__ 9 | path = config.checkpoint_path / f"{module_name}.parquet" 10 | path.parent.mkdir(parents=True, exist_ok=True) 11 | # Ideally we would have just used pandas' to_parquet instead of calling 12 | # pyarrow directly, but to_parquet has as an over-zealous validity check on 13 | # the input dataframe that errors with a column MultiIndex. If that bug is 14 | # resolved we can switch to use just the following commented line. 15 | # data.to_parquet(path, index=True) 16 | table = pyarrow.Table.from_pandas(data) 17 | pyarrow.parquet.write_table(table, path) 18 | 19 | 20 | def run_pipeline(config, start_module_name): 21 | if ( 22 | start_module_name is None 23 | or start_module_name == components.pipeline_module_names[0] 24 | ): 25 | start_index = 0 26 | data = None 27 | else: 28 | start_index = components.pipeline_module_names.index(start_module_name) 29 | previous_module_name = components.pipeline_module_names[start_index - 1] 30 | checkpoint_file_path = ( 31 | config.checkpoint_path / f"{previous_module_name}.parquet" 32 | ) 33 | if not checkpoint_file_path.exists(): 34 | raise Exception( 35 | f"Checkpoint file for module {previous_module_name} not found" 36 | ) 37 | data = pd.read_parquet(checkpoint_file_path) 38 | 39 | # make instance of the QC class 40 | qc = components.QC( 41 | inDir=config.inDir, 42 | outDir=config.outDir, 43 | sampleNames=config.sampleNames, 44 | sampleConditions=config.sampleConditions, 45 | sampleConditionAbbrs=config.sampleConditionAbbrs, 46 | sampleStatuses=config.sampleStatuses, 47 | sampleReplicates=config.sampleReplicates, 48 | samplesToExclude=config.samplesToExclude, 49 | counterstainChannel=config.counterstainChannel, 50 | markersToExclude=config.markersToExclude, 51 | 52 | delintMode=config.delintMode, 53 | showAbChannels=config.showAbChannels, 54 | samplesForROISelection=config.samplesForROISelection, 55 | autoArtifactDetection=config.autoArtifactDetection, 56 | artifactDetectionMethod=config.artifactDetectionMethod, 57 | 58 | numBinsIntensity=config.numBinsIntensity, 59 | 60 | numBinsArea=config.numBinsArea, 61 | 62 | numBinsCorrelation=config.numBinsCorrelation, 63 | 64 | hexbins=config.hexbins, 65 | hexbinGridSize=config.hexbinGridSize, 66 | 67 | metaQC=config.metaQC, 68 | 69 | channelExclusionsPCA=config.channelExclusionsPCA, 70 | samplesToRemovePCA=config.samplesToRemovePCA, 71 | dimensionPCA=config.dimensionPCA, 72 | pointSize=config.pointSize, 73 | labelPoints=config.labelPoints, 74 | distanceCutoff=config.distanceCutoff, 75 | conditionsToSilhouette=config.conditionsToSilhouette, 76 | 77 | gating=config.gating, 78 | channelExclusionsGating=config.channelExclusionsGating, 79 | samplesToRemoveGating=config.samplesToRemoveGating, 80 | vectorThreshold=config.vectorThreshold, 81 | classes=config.classes, 82 | 83 | embeddingAlgorithmQC=config.embeddingAlgorithmQC, 84 | embeddingAlgorithm=config.embeddingAlgorithm, 85 | channelExclusionsClusteringQC=config.channelExclusionsClusteringQC, 86 | channelExclusionsClustering=config.channelExclusionsClustering, 87 | normalizeTissueCounts=config.normalizeTissueCounts, 88 | samplesToRemoveClusteringQC=config.samplesToRemoveClusteringQC, 89 | samplesToRemoveClustering=config.samplesToRemoveClustering, 90 | percentDataPerChunk=config.percentDataPerChunk, 91 | fracForEmbedding=config.fracForEmbedding, 92 | dimensionEmbedding=config.dimensionEmbedding, 93 | colormapAnnotationQC=config.colormapAnnotationQC, 94 | colormapAnnotationClustering=config.colormapAnnotationClustering, 95 | 96 | perplexityQC=config.perplexityQC, 97 | perplexity=config.perplexity, 98 | earlyExaggerationQC=config.earlyExaggerationQC, 99 | earlyExaggeration=config.earlyExaggeration, 100 | learningRateTSNEQC=config.learningRateTSNEQC, 101 | learningRateTSNE=config.learningRateTSNE, 102 | metricQC=config.metricQC, 103 | metric=config.metric, 104 | randomStateQC=config.randomStateQC, 105 | randomStateTSNE=config.randomStateTSNE, 106 | 107 | nNeighborsQC=config.nNeighborsQC, 108 | nNeighbors=config.nNeighbors, 109 | learningRateUMAPQC=config.learningRateUMAPQC, 110 | learningRateUMAP=config.learningRateUMAP, 111 | minDistQC=config.minDistQC, 112 | minDist=config.minDist, 113 | repulsionStrengthQC=config.repulsionStrengthQC, 114 | repulsionStrength=config.repulsionStrength, 115 | randomStateUMAP=config.randomStateUMAP, 116 | 117 | controlGroups=config.controlGroups, 118 | denominatorCluster=config.denominatorCluster, 119 | FDRCorrection=config.FDRCorrection, 120 | 121 | numThumbnails=config.numThumbnails, 122 | windowSize=config.windowSize, 123 | segOutlines=config.segOutlines, 124 | ) 125 | 126 | # start_idx = module_order[start_index:] 127 | for module in components.pipeline_modules[start_index:]: 128 | print(f'Running: {module}') 129 | data = module(data, qc, config) # getattr(qc, module) 130 | # data(config) 131 | save_checkpoint(data, config, module) 132 | -------------------------------------------------------------------------------- /cylinter/prep.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import argparse 4 | import pathlib 5 | from subprocess import call 6 | 7 | 8 | def main(argv=sys.argv): 9 | 10 | parser = argparse.ArgumentParser( 11 | description='Prepare an input directory for CyLinter analysis.', 12 | formatter_class=argparse.RawDescriptionHelpFormatter, 13 | ) 14 | parser.add_argument( 15 | '-t', action='store_true', help='optional flag for TMA data' 16 | ) 17 | parser.add_argument( 18 | 'source_dir', type=str, 19 | help='path to mcmicro output directory' 20 | ) 21 | parser.add_argument( 22 | 'dest_dir', type=path_resolved, 23 | help='path to CyLinter input directory' 24 | ) 25 | args = parser.parse_args() 26 | 27 | os.makedirs(args.dest_dir, exist_ok=True) 28 | 29 | call([f'sh {sys.prefix}/prep_subprocess.sh {args.t} "{args.source_dir}" "{args.dest_dir}" {sys.prefix}/config.yml'], shell=True) 30 | 31 | return 0 32 | 33 | 34 | def path_resolved(path_str): 35 | """Return a resolved Path for a string.""" 36 | path = pathlib.Path(path_str) 37 | path = path.resolve() 38 | return path 39 | -------------------------------------------------------------------------------- /cylinter/prep_subprocess.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Transfer mcmicro output files from cluster and format local directory structure for compatibility with CyLinter pipeline (run locally). 4 | 5 | # USAGE: sh src/qc_prep.sh -t(required for TMA data) 6 | 7 | # EXAMPLE(TMA): sh /Users/greg/projects/cycif-qc/src/qc_prep.sh -t gjb15@transfer.rc.hms.harvard.edu:/n/scratch3/users/g/gjb15/exemplar-002 /Users/greg/projects/cycif-qc/input 8 | 9 | shift $(( OPTIND - 1 )) 10 | 11 | if [ "$2" == "cylinter_demo" ]; then 12 | 13 | # Transfer emit22_demo from Sage Synapse 14 | synapse get -r syn52859560 --downloadLocation "$3" --multiThreaded 15 | 16 | # Rename quantification and dearray subdirectories to "csv" and "tif", respectively. 17 | if [ -d "$3"/quantification ]; then 18 | mv "$3"/quantification "$3"/csv 19 | mv "$3"/dearray "$3"/tif 20 | fi 21 | 22 | # Copy config.yml template to CyLinter input directory 23 | cp "$4" "$3" 24 | 25 | else 26 | 27 | if $1; then 28 | 29 | echo "Transferring TMA data." 30 | 31 | # Transfer mcmicro output files to CyLinter input directory. 32 | rsync -avP -m "$2"/ "$3" --include quantification/*.csv --include dearray/*.tif --include markers.csv --exclude work --exclude '*.*' 33 | 34 | mkdir -p "$3"/seg 35 | 36 | # Rename quantification and dearray subdirectories to "csv" and "tif", respectively. 37 | if [ -d "$3"/seg ]; then 38 | 39 | mv "$3"/*/quantification "$3"/csv 40 | mv "$3"/*/dearray "$3"/tif 41 | 42 | files=("$3"/csv/*) 43 | SEG=$(echo "$(basename "${files[0]}")" | cut -d'_' -f2 | cut -d'.' -f1) 44 | 45 | rsync -avP -m "$2"/ "$3" --include qc/s3seg/*/"$SEG"Outlines.ome.tif --exclude work --exclude '*.*' 46 | 47 | for RESOLVED_PATH in "$3"/*/qc/s3seg/* ; do 48 | SAMPLE_NAME=$(basename "$RESOLVED_PATH") 49 | arrIN=(${SAMPLE_NAME//-/ }) 50 | NAME=${arrIN[1]} 51 | mv "$RESOLVED_PATH"/"$SEG"Outlines.ome.tif "$RESOLVED_PATH"/"$NAME".ome.tif 52 | mv "$RESOLVED_PATH"/"$NAME".ome.tif "$3"/seg/ 53 | done 54 | 55 | for SAMPLE_PATH in "$3"/* ; do 56 | SAMPLE_NAME=$(basename "$SAMPLE_PATH") 57 | if [ $SAMPLE_NAME != "csv" ] && [ $SAMPLE_NAME != "tif" ] && [ $SAMPLE_NAME != "seg" ] && [ $SAMPLE_NAME != "markers.csv" ]; then 58 | mv "$SAMPLE_PATH"/markers.csv "$3"/ 59 | rm -r "$SAMPLE_PATH" 60 | fi 61 | done 62 | fi 63 | 64 | # copy configuration template to input dir 65 | cp "$4" "$3"/ 66 | 67 | else 68 | echo "Transferring whole tissue data." 69 | 70 | # Transfer mcmicro output files to CyLinter input directory. 71 | rsync -avP -m "$2"/ "$3" --include quantification/*.csv --include registration/*.ome.tif --include markers.csv --exclude work --exclude '*.*' 72 | 73 | # Make directories for images, data tables, and segmentation outlines 74 | mkdir -p "$3"/csv 75 | mkdir -p "$3"/tif 76 | mkdir -p "$3"/seg 77 | 78 | # combine sample tifs, csv files, and their segmentation outlines into respectively-labeled subdirectories. 79 | mv "$3"/*/quantification/*.csv "$3"/csv/ 80 | mv "$3"/*/registration/*.tif "$3"/tif/ 81 | 82 | SEG=$(echo "$(basename "$3"/csv/*)" | cut -d'_' -f2 | cut -d'.' -f1) 83 | rsync -avP -m "$2"/ "$3" --include qc/s3seg/*/"$SEG"Outlines.ome.tif --exclude work --exclude '*.*' 84 | 85 | for SAMPLE_PATH in "$3"/*/qc/s3seg/* ; do 86 | SAMPLE_NAME=$(basename "$SAMPLE_PATH") 87 | 88 | # crop off "unmicst-" prefix from outlines directory name 89 | arrIN=(${SAMPLE_NAME//unmicst-/ }) 90 | 91 | mv "$SAMPLE_PATH/$SEG"Outlines.ome.tif "$SAMPLE_PATH"/"${arrIN[0]}".ome.tif 92 | mv "$SAMPLE_PATH"/"${arrIN[0]}".ome.tif "$3"/seg/ 93 | done 94 | 95 | for SAMPLE_PATH in "$3"/* ; do 96 | SAMPLE_NAME=$(basename "$SAMPLE_PATH") 97 | if [ $SAMPLE_NAME != "csv" ] && [ $SAMPLE_NAME != "tif" ] && [ $SAMPLE_NAME != "seg" ] && [ $SAMPLE_NAME != "markers.csv" ]; then 98 | mv "$SAMPLE_PATH"/markers.csv "$3"/ 99 | rm -r "$SAMPLE_PATH" 100 | fi 101 | done 102 | 103 | # Copy configuration template to input dir 104 | cp "$4" "$3"/ 105 | 106 | fi 107 | fi 108 | -------------------------------------------------------------------------------- /cylinter/pretrained_models/pretrained_model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/labsyspharm/cylinter/b9dc04b0f0892657d53dbf31f753dc52b481bc08/cylinter/pretrained_models/pretrained_model.pkl -------------------------------------------------------------------------------- /docs/.gitignore: -------------------------------------------------------------------------------- 1 | _site 2 | Gemfile 3 | Gemfile.lock -------------------------------------------------------------------------------- /docs/_config.yml: -------------------------------------------------------------------------------- 1 | title: CyLinter 2 | description: Cylinter pipeline 3 | logo: "/assets/images/cylinter-logo.svg" 4 | hero_background: 5 | 6 | remote_theme: labsyspharm/just-the-docs-lsp 7 | color_scheme: cylinter 8 | search_enabled: true 9 | heading_anchors: true 10 | 11 | # Banner links to include 12 | banner_links: 13 | lsp: true 14 | hits: true 15 | 16 | # Aux links for the upper right navigation 17 | aux_links: 18 | "CyLinter on GitHub": 19 | - "//github.com/labsyspharm/cylinter" 20 | aux_links_new_tab: false 21 | 22 | 23 | # Footer content 24 | # appears at the bottom of every page's main content 25 | footer_content: 26 | 27 | # Back to top link 28 | back_to_top: false 29 | back_to_top_text: "Back to Top" 30 | 31 | # Provide license information for the project 32 | license: 33 | - description: "CyLinter source code is licensed under the" 34 | name: "MIT License" 35 | url: "https://github.com/labsyspharm/cylinter/blob/master/LICENSE" 36 | - description: "Contents of this site are licensed under" 37 | name: CC BY-NC 4.0 Creative Commons License 38 | url: "http://creativecommons.org/licenses/by-nc/4.0/" 39 | 40 | # Linked logos 41 | footer_logos: 42 | - name: "Laboratory of Systems Pharmacology" 43 | image: "/assets/images/logo_lsp_white.svg" 44 | url: "https://hits.harvard.edu/the-program/laboratory-of-systems-pharmacology/about/" 45 | - name: "Harvard Medical School" 46 | image: "/assets/images/logo_hms.svg" 47 | url: "https://hms.harvard.edu/" 48 | 49 | # Footer last edited timestamp 50 | last_edit_timestamp: true # show or hide edit time - page must have `last_modified_date` defined in the frontmatter 51 | last_edit_time_format: "%b %e %Y" # format: https://ruby-doc.org/stdlib-2.7.0/libdoc/time/rdoc/Time.html 52 | 53 | # Footer "Edit this page on GitHub" link text 54 | gh_edit_link: false # show or hide edit this page link 55 | gh_edit_link_text: "Edit this page on GitHub." 56 | gh_edit_repository: "https://github.com/labsyspharm/cylinter" # the github URL for your repo 57 | gh_edit_branch: "master" # the branch that your docs is served from 58 | gh_edit_source: docs # the source that your files originate from 59 | gh_edit_view_mode: "tree" # "tree" or "edit" if you want the user to jump into the editor immediately 60 | 61 | # Google Analytics 62 | ga_tracking: G-TY4QP6H41T 63 | ga_tracking_anonymize_ip: true # Use GDPR compliant Google Analytics settings 64 | 65 | plugins: 66 | - jekyll-seo-tag 67 | - jekyll-remote-theme 68 | - jekyll-include-cache 69 | 70 | repository: labsyspharm/cylinter 71 | 72 | exclude: ["node_modules/", "*.gemspec", "*.gem", "Gemfile", "Gemfile.lock", "package.json", "package-lock.json", "script/", "LICENSE.txt", "lib/", "bin/", "README.md", "Rakefile"] 73 | compress_html: 74 | clippings: all 75 | comments: all 76 | endings: all 77 | startings: [] 78 | blanklines: false 79 | profile: false 80 | -------------------------------------------------------------------------------- /docs/_includes/cylinter_gif.html: -------------------------------------------------------------------------------- 1 | cylinter 2 | -------------------------------------------------------------------------------- /docs/_includes/home.md: -------------------------------------------------------------------------------- 1 | # Overview 2 | 3 |
4 | 5 |
6 | 7 | ## QC for Multiplex Microscopy 8 | Although quality control (QC) methods have long been associated with analysis tools for single-cell genomics and transcriptomics research, analogous tools have lagged in the area of quantitative microscopy. There are now at least 9 different multiplex imaging platforms capable of routine acquisition of 20-40 channel microscopy data1,2,3,4,5,6,7,8,9 and each is sensitive to microscopy artifacts. Current tools for microscopy-based QC act on pixel-level data10,11,12,13,14. CyLinter differs in that it allows users to work with both pixel-level and single-cell data to identify and remove cell segmentation instances in multiplex images corrupted by visual and image-processing artifacts that can dramatically affect data interpretation. 9 | 10 |
11 |
12 | 13 | ## About CyLinter 14 | CyLinter is open-source QC software for multiplex microscopy. The tool is instantiated as a Python Class and consists of multiple QC modules through which single-cell data are passed for serial redaction. Partially-redacted spatial feature tables are cached within and between modules to allow for iterative QC strategies and progress bookmarking. CyLinter is agnostic to data acquisition platform (CyCIF1, CODEX2, MIBI3, mIHC4, mxIF5, IMC6, etc.) and takes standard TIFF/OME-TIFF imaging files and CSV spatial feature tables as input. 15 | 16 |
17 | 18 | 1. Lin, J.-R. et al. Highly multiplexed immunofluorescence imaging of human tissues and tumors using t-CyCIF and conventional optical microscopes. Elife 7, (2018). 19 | 2. Goltsev, Y. et al. Deep Profiling of Mouse Splenic Architecture with CODEX Multiplexed Imaging. Cell 174, 968-981.e15 (2018). 20 | 3. Angelo, M. et al. Multiplexed ion beam imaging (MIBI) of human breast tumors. Nat Med 20, 436–442 (2014). 21 | 4. Tsujikawa, T. et al. Quantitative Multiplex Immunohistochemistry Reveals Myeloid-Inflamed Tumor-Immune Complexity Associated with Poor Prognosis. Cell Reports 19, 203–217 (2017). 22 | 5. Gerdes, M. J. et al. Highly multiplexed single-cell analysis of formalin-fixed, paraffin-embedded cancer tissue. Proc Natl Acad Sci U S A 110, 11982–11987 (2013). 23 | 6. Giesen, C. et al. Highly multiplexed imaging of tumor tissues with subcellular resolution by mass cytometry. Nat Methods 11, 417–422 (2014). 24 | 7. Remark, R. et al. In-depth tissue profiling using multiplexed immunohistochemical consecutive staining on single slide. Science Immunology 1, aaf6925–aaf6925 (2016). 25 | 8. Gut, G., Herrmann, M. D. & Pelkmans, L. Multiplexed protein maps link subcellular organization to cellular states. Science 361, (2018). 26 | 9. Saka, S. K. et al. Immuno-SABER enables highly multiplexed and amplified protein imaging in tissues. Nat Biotechnol 37, 1080–1090 (2019). 27 | 10. Janowczyk, A., Zuo, R., Gilmore, H., Feldman, M. & Madabhushi, A. HistoQC: An Open-Source Quality Control Tool for Digital Pathology Slides. JCO Clin Cancer Inform 3, 1–7 (2019). 28 | 11. Ameisen, D. et al. Towards better digital pathology workflows: programming libraries for high-speed sharpness assessment of Whole Slide Images. Diagn Pathol 9 Suppl 1, S3 (2014). 29 | 12. Senaras, C., Niazi, M. K. K., Lozanski, G. & Gurcan, M. N. DeepFocus: Detection of out-of-focus regions in whole slide digital images using deep learning. PLoS One 13, e0205387 (2018). 30 | 13. Wen, S. et al. A Methodology for Texture Feature-based Quality Assessment in Nucleus Segmentation of Histopathology Image. J Pathol Inform 8, 38 (2017). 31 | 14. Baranski, A. et al. MAUI (MBI Analysis User Interface)-An image processing pipeline for Multiplexed Mass Based Imaging. PLoS Comput Biol 17, e1008887 (2021). 32 | -------------------------------------------------------------------------------- /docs/_includes/workflow.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/labsyspharm/cylinter/b9dc04b0f0892657d53dbf31f753dc52b481bc08/docs/_includes/workflow.md -------------------------------------------------------------------------------- /docs/_layouts/default-cylinter.html: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | --- 4 | 5 | 16 | 17 | {{ content }} 18 | -------------------------------------------------------------------------------- /docs/_sass/color_schemes/cylinter.scss: -------------------------------------------------------------------------------- 1 | 2 | $sidebar-color: #e6e6e6; 3 | $footer-background-color: #00b0e9; 4 | $branding-background-color: #e6e6e6; 5 | $branding-background-opacity: .0; 6 | $hero-background-color: #454545; 7 | $hero-background-opacity: .25; 8 | 9 | $body-text-color: #58595b; 10 | $body-heading-color: #1e506c; // Default heading color 11 | $body-heading-2-color: #00b0e9; // Target H2 heading color 12 | $nav-child-link-color: #58595b; 13 | $link-color: #C30800; 14 | $arrow-btn-color: #006eb8; 15 | $btn-primary-color: #006eb8; 16 | $image-card-label-background-color: #00b0e9; 17 | -------------------------------------------------------------------------------- /docs/_sass/custom/custom.scss: -------------------------------------------------------------------------------- 1 | .main-branding { 2 | height: 185px; 3 | } 4 | 5 | .site-logo::before { 6 | content:""; 7 | } 8 | 9 | .site-logo { 10 | height: 100%; 11 | padding-top: 0px; 12 | padding-bottom: 0px; 13 | padding-right: 300px; 14 | } 15 | 16 | .hero { 17 | background-size: contain; 18 | } 19 | 20 | .hero-heading { 21 | color: #4d4d4d; 22 | } 23 | 24 | .hero-body { 25 | color: #4d4d4d; 26 | font-size: 1.3rem; 27 | } 28 | 29 | .hero { 30 | padding-top: 0px; 31 | padding-bottom: 0px; 32 | padding-right: 25px; 33 | padding-left: 25px; 34 | } 35 | 36 | .hero-inner { 37 | padding: 10px; 38 | max-width: 2500px; 39 | margin-top: 0px; 40 | margin-bottom: 0px; 41 | margin-bottom: 0px; 42 | } 43 | 44 | .site-header .site-logo { 45 | height: 200% !important; 46 | } 47 | 48 | .hero-heading { 49 | font-size: 2.0rem; 50 | } 51 | 52 | .main-content-wrap { 53 | padding-top: 0px; 54 | padding-bottom: 0px; 55 | padding-right: 25px; 56 | padding-left: 25px; 57 | } 58 | 59 | .btn-large { 60 | font-size: 1.0rem; 61 | text-transform: lowercase; 62 | } 63 | -------------------------------------------------------------------------------- /docs/assets/gifs/cylinter.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/labsyspharm/cylinter/b9dc04b0f0892657d53dbf31f753dc52b481bc08/docs/assets/gifs/cylinter.gif -------------------------------------------------------------------------------- /docs/assets/gifs/solitary_saunter.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/labsyspharm/cylinter/b9dc04b0f0892657d53dbf31f753dc52b481bc08/docs/assets/gifs/solitary_saunter.gif -------------------------------------------------------------------------------- /docs/assets/images/ExtFig4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/labsyspharm/cylinter/b9dc04b0f0892657d53dbf31f753dc52b481bc08/docs/assets/images/ExtFig4.jpg -------------------------------------------------------------------------------- /docs/assets/images/cores.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/labsyspharm/cylinter/b9dc04b0f0892657d53dbf31f753dc52b481bc08/docs/assets/images/cores.jpg -------------------------------------------------------------------------------- /docs/assets/images/cylinter-logo.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 14 | 15 | 18 | 20 | 21 | 22 | 24 | 26 | 29 | 31 | 32 | 33 | 34 | 35 | 36 | 39 | 40 | 43 | 44 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 67 | 68 | 69 | 94 | 96 | 98 | 100 | 102 | 104 | 106 | 108 | 109 | 111 | 113 | 115 | 117 | 118 | 120 | 122 | 124 | 126 | 127 | 129 | 131 | 133 | 135 | 137 | 139 | 141 | 143 | 144 | 145 | -------------------------------------------------------------------------------- /docs/cite/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default-cylinter 3 | title: Citing CyLinter 4 | nav_order: 12 5 | --- 6 | 7 | ### If you use CyLinter in your work, please cite our publication: 8 | 9 | *Baker et al. ["Quality control for single-cell analysis of high-plex tissue profiles using CyLinter", **Nature Methods** 2024](https://doi.org/10.1038/s41592-024-02328-0)* 10 | 11 | 12 | ### The CyLinter GitHub repository can be cited as follows: 13 | 14 | **APA style**: *Baker, G. (2021). CyLinter (Version 0.0.47) [Computer software]. https://github.com/labsyspharm/cylinter* 15 | 16 | **BibTeX style**: @software{Baker_CyLinter_2021, 17 | author = {Baker, Gregory}, 18 | license = {MIT}, 19 | month = jan, 20 | title = {{CyLinter}}, 21 | url = {https://github.com/labsyspharm/cylinter}, 22 | version = {0.0.47}, 23 | year = {2021} 24 | } -------------------------------------------------------------------------------- /docs/community/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default-cylinter 3 | title: Community 4 | nav_order: 8 5 | --- 6 | 7 | {: .no_toc } 8 | 9 |
10 | 11 | Table of contents 12 | 13 | {: .text-delta } 14 | 1. TOC 15 | {:toc} 16 |
17 | 18 |
19 | 20 | # Contributors 21 | 22 | CyLinter development is led by [Greg Baker](https://scholar.harvard.edu/gregoryjbaker) at the [Laboratory of Systems Pharmacology](https://labsyspharm.org/), [Harvard Medical School](https://hms.harvard.edu/). 23 | 24 | # Early adopters 25 | * [Clarence Yapp](https://scholar.harvard.edu/clarence/who-clarence) 26 | * [Kenichi Shimada](https://scholar.harvard.edu/kenichi_shimada) 27 | * [Roxy Pelletier](https://www.linkedin.com/in/roxanne-pelletier) 28 | * [Tuulia Vallius](https://scholar.harvard.edu/vallius/home) 29 | * [Connor Jacobson](https://scholar.harvard.edu/connorjacobson/home) 30 | * [Ajit Johnson](https://scholar.harvard.edu/ajitjohnson/home) 31 | * [Shishir Pant](https://fi.linkedin.com/in/shishir-pant) 32 | * [Jackson Appelt](https://www.linkedin.com/in/jackson-appelt-311405142) 33 | * [Ana Verma](https://www.linkedin.com/in/anaverma) 34 | * [Sheheryar Kabraji](https://www.dana-farber.org/find-a-doctor/sheheryar-k-kabraji/) 35 | * [Claire Ritch](https://www.linkedin.com/in/cecily-claire-ritch-651795b7/) 36 | * [Shannon Coy](https://connects.catalyst.harvard.edu/Profiles/display/Person/140806) 37 | 38 | -------------------------------------------------------------------------------- /docs/exemplar/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default-cylinter 3 | title: Example data 4 | nav_order: 7 5 | --- 6 | 7 | # Example data 8 | 9 | Four (4) human tissue cores are provided as CyLinter demonstration data: normal kidney cortex, mesothelioma, glioblastoma, and normal tonsil. These imaging data were collected by [CyCIF](https://www.cycif.org/) and are derived from a tissue microarray collected at the [Laboratory of Systems Pharmacology](https://labsyspharm.org/) referred to as EMIT (Exemplar Microscopy Images of Tissues) TMA22 (Synapse ID: [syn22345750](https://www.synapse.org/#!Synapse:syn22345750)). The tissues cores were imaged at 20X magnification using a 0.75 NA objective and 2x2-pixel binning. 10 | 11 | Access to the demonstration dataset requires free registration at the Sage Synapse data repository ([https://www.synapse.org/](https://www.synapse.org/)). Once registered, the example dataset can be downloaded using the following commands: 12 | 13 | ## Step 1: Download 14 | ``` bash 15 | # Activate the CyLinter virtual environment. 16 | conda activate cylinter 17 | 18 | # Install the Synapse client. 19 | conda install -c bioconda synapseclient 20 | 21 | # Mac/Linux users, run the following command to download the demo dataset: 22 | synapse get -r syn52859560 --downloadLocation ~/Desktop/cylinter_demo # Enter Synapse ID and password when prompted. 23 | 24 | # PC users, run the following command to download the demo dataset: 25 | synapse get -r syn52859560 --downloadLocation C:\Users\\Desktop\cylinter_demo --multiThreaded # Enter Synapse ID and password when prompted. 26 | ``` 27 | * The demo dataset can also be downloaded directly from the Sage Synapse website here: [syn52859560](https://www.synapse.org/#!Synapse:syn52859560). 28 | 29 | ## Step 2: Configure 30 | After downloading the exemplar dataset, open the [YAML configuration file]({{ site.baseurl }}/structure/index#yaml-configuration-file) and update the `inDir` and `outDir` parameters with user-specific directory paths. All other settings are pre-configured for use with the demo dataset. 31 | 32 | ```yaml 33 | inDir: /Users//Desktop/cylinter_demo 34 | outDir: /Users//Desktop/cylinter_demo/output 35 | . 36 | . 37 | . 38 | ``` 39 | 40 | ## Step 3: Run 41 | To run Cylinter on the demo dataset, pass the [YAML configuration file]({{ site.baseurl }}/structure/index#yaml-configuration-file) to the `cylinter` command: 42 | 43 | ``` bash 44 | # for Mac: 45 | cylinter --module (optional) ~/Desktop/cylinter_demo/cylinter_config.yml 46 | 47 | # for PC: 48 | cylinter --module (optional) C:\Users\\Desktop\cylinter_demo\cylinter_config.yml 49 | ``` 50 | -------------------------------------------------------------------------------- /docs/faq/#index.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default-cylinter 3 | title: FAQ 4 | nav_order: 9 5 | --- 6 | 7 | # Frequently Asked Questions 8 | 9 | {: .no_toc } 10 | 11 |
12 | 13 | Table of contents 14 | 15 | {: .text-delta } 16 | 1. TOC 17 | {:toc} 18 |
19 | 20 | ## Pipeline execution 21 | 22 | ### Q: How do I resume a pipeline run that halted partway? 23 | 24 | The intermediate files in the `work/` directory allow you to restart a pipeline partway, without re-running everything from scratch. For example, consider the following scenario on O2: 25 | 26 | ``` bash 27 | # This run will fail because --some-invalid-arg is not a valid argument for UnMicst 28 | nextflow run labsyspharm/mcmicro --in ~/data/exemplar-001 --unmicst-opts '--some-invalid-arg' 29 | 30 | # N E X T F L O W ~ version 20.01.0 31 | # Launching `labsyspharm/mcmicro` [jolly_hodgkin] - revision: eeaa364408 [master] 32 | # executor > local (2) 33 | # [- ] process > illumination - 34 | # [7e/bf811b] process > ashlar [100%] 1 of 1 ✔ 35 | # [- ] process > dearray - 36 | # [29/dfdfac] process > unmicst [100%] 1 of 1, failed: 1 ✘ 37 | # [- ] process > ilastik - 38 | # [- ] process > s3seg - 39 | # [- ] process > quantification - 40 | # [- ] process > naivestates - 41 | 42 | # Address the issue by removing the invalid argument and restart the pipeline with -resume 43 | nextflow run labsyspharm/mcmicro --in ~/data/exemplar-001 -resume 44 | 45 | # N E X T F L O W ~ version 20.01.0 46 | # Launching `labsyspharm/mcmicro` [backstabbing_goodall] - revision: eeaa364408 [master] 47 | # executor > local (1) 48 | # [- ] process > illumination - 49 | # [7e/bf811b] process > ashlar [100%] 1 of 1, cached: 1 ✔ <- NOTE: cached 50 | # [- ] process > dearray - 51 | # [9e/08ab35] process > unmicst [100%] 1 of 1 ✔ 52 | # [- ] process > ilastik - 53 | # [84/918c38] process > s3seg [100%] 1 of 1 ✔ 54 | # [0a/7f71f7] process > quantification [100%] 1 of 1 ✔ 55 | # [ff/be5a97] process > naivestates [100%] 1 of 1 ✔ 56 | ``` 57 | 58 | As you run the pipeline on your datasets, the size of the `work/` directory can grow substantially. Use [nextflow clean](https://github.com/nextflow-io/nextflow/blob/cli-docs/docs/cli.rst#clean) to selectively remove portions of the work directory. Use `-n` flag to list which files will be removed, inspect the list to ensure that you don't lose anything important, and repeat the command with `-f` to actually remove the files: 59 | 60 | ``` bash 61 | # Remove work files associated with most-recent run 62 | nextflow clean -n last # Show what will be removed 63 | nextflow clean -f last # Proceed with the removal 64 | 65 | # Remove all work files except those associated with the most-recent run 66 | nextflow clean -n -but last 67 | nextflow clean -f -but last 68 | ``` 69 | 70 | ## Pre-processing 71 | 72 | ### Q: How does mcmicro handle multi-file formats such as `.xdce`? 73 | 74 | A: Registration and illumination correction modules in mcmicro are [Bio-Formats compatible](https://docs.openmicroscopy.org/bio-formats/6.0.1/supported-formats.html). Place all files into the `raw/` subdirectory, as described in [Directory Structure]({{ site.baseurl }}/documentation/dir.html), and mcmicro modules will correctly identify and use the relevant ones. 75 | 76 | ## Segmentation 77 | 78 | ### Q: How do I run mcmicro with my own ilastik model? 79 | 80 | A: Use the `--ilastik-model` parameter. Note that the parameter must be specified *outside** `--ilastik-opts`. For example, 81 | 82 | ``` 83 | nextflow run labsyspharm/mcmicro --in /my/data --probability-maps ilastik --ilastik-model mymodel.ilp 84 | ``` 85 | 86 | ### Q: How do I check the quality of segmentation? 87 | 88 | A: After a successful mcmicro run, two-channel tif files containing DAPI and nuclei/cell/cytoplasm outlines will reside in `qc/s3seg`. Segmentation quality can be assessed through visual inspection of these files in, e.g., [napari](https://napari.org/). 89 | 90 | ### Q: How do I handle images acquired without pixel binning? 91 | 92 | ![]({{ site.baseurl }}/images/FAQ-binning.png) 93 | 94 | A: There are two adjustments to make: 95 | 96 | 1. Adjust `--scalingFactor` for UnMicst, which controls the ratio of the current pixel width (W2) to exemplar pixel width (W1) and is not related to area (See schematic). 97 | 1. In S3Segmenter, `--cytoDilation` controls the number of pixels from the edge of the nucleus to expand in creating the cytoplasm mask. Take the value optimized for 2x binned images and multiply it by 2 (i.e., if `--cytoDilation 3` is optimal for 2x binning, then the new value will `--cytoDilation 6`). 98 | 99 | Use `--unmicst-opts` and `--s3seg-opts` to pass the new values to UnMicst and S3Segmenter, respectively: 100 | 101 | ``` 102 | nextflow run labsyspharm/mcmicro --in /path/to/unbinned/data --unmicst-opts '--scalingFactor 0.5' --s3seg-opts '--cytoDilation 6' 103 | ``` 104 | 105 | ## Quantification 106 | 107 | ### Q: How do I quantify multiple masks? 108 | 109 | A: Use `--quant-opts` to specify the `--masks` parameter for quantification. Any file found in the corresponding `segmentation/` folder can be provided here. For example, 110 | 111 | ``` 112 | nextflow run labsyspharm/mcmicro --in /path/to/exemplar-001 --quant-opts '--masks cell.ome.tif nuclei.ome.tif' 113 | ``` 114 | 115 | will quantify cell and nuclei masks. The corresponding spatial feature tables can then be found in `quantification/unmicst-exemplar-001_cell.csv` and `quantification/unmicst-exemplar-001_nuclei.csv`, respectively. 116 | 117 | ### Q: How do I compute the median expression of each channel? 118 | 119 | A: Use `--quant-opts` to specify the corresponding `--intensity_props` parameter for quantification: 120 | 121 | ``` 122 | nextflow run labsyspharm/mcmicro --in /path/to/exemplar-001 --quant-opts '--intensity_props median_intensity' 123 | ``` 124 | -------------------------------------------------------------------------------- /docs/funding/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default-cylinter 3 | title: Funding 4 | nav_order: 9 5 | --- 6 | 7 | # Funding 8 | 9 | This work was supported by Ludwig Cancer Research and the Ludwig Center at Harvard and by NIH NCI grants U2C-CA233280 (Omic and Multidimensional Spatial Atlas of Metastatic Breast and Prostate Cancers) and U2C-CA233262 (Pre-cancer atlases of cutaneous and hematologic origin—PATCH Center) to Peter K. Sorger and Sandro Santagata as part of the [Human Tumor Atlas Network](https://humantumoratlas.org/). Development of computational methods and image processing software is supported by a Team Science Grant from the Gray Foundation, the Gates Foundation grant INV-027106, the David Liposarcoma Research Initiative, and the Emerson Collective. -------------------------------------------------------------------------------- /docs/help/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default-cylinter 3 | title: Getting help 4 | nav_order: 12 5 | --- 6 | 7 | 8 | 9 | **Bugs** can be reported by opening an issue at the [GitHub repository](https://github.com/labsyspharm/cylinter/issues). 10 | -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default-cylinter 3 | title: Home 4 | nav_order: 1 5 | description: "" 6 | hero_heading: "Quality Control Software for Multiplex Microscopy." 7 | hero_body: "CyLinter is used to identify and remove noisy single-cell instances in multiplex images of tissue." 8 | hero_ctas: 9 | - label: "install cylinter" 10 | link: "installation/" 11 | - label: "GitHub Repo" 12 | link: "https://github.com/labsyspharm/cylinter" 13 | - label: "Publication/Citation" 14 | link: "https://doi.org/10.1038/s41592-024-02328-0" 15 | # last_modified_date: 2021-03-28 16 | --- 17 | 18 | 23 | 24 |
25 | Tomas Brunsdon via Dribble 26 |
27 | 28 | {% include cylinter_gif.html %} 29 | 30 |
31 | 32 | {% include home.md %} 33 | 34 | 35 | 36 |
37 | -------------------------------------------------------------------------------- /docs/installation/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default-cylinter 3 | title: Installation 4 | nav_order: 2 5 | has_children: false 6 | --- 7 | 8 | # Installation 9 | 10 | CyLinter is written in Python 3 and is compatible with MacOS, Windows, and Linux operating systems. The program can be installed via the cross-platform package manager, Conda. 11 | 12 | ## 1. Install Miniconda 13 | 14 | **NOTE**: If you already have Miniconda or Anaconda installed, [skip this section and jump to section 1B](#section-1b). 15 | 16 | The folllowing are examples of commands for quickly and quietly installing the latest version of the Miniconda installer for your operating system (MacOS - M1 / Intel 64-bit, Windows, Linux - Intel 64-bit). For other platforms, [consult the Miniconda download page](https://docs.conda.io/projects/miniconda/en/latest/index.html). 17 | 18 | ### MacOS 19 | Open Terminal and paste the following commands: 20 | ```bash 21 | mkdir -p ~/miniconda3 22 | 23 | # M1 chip 24 | curl https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-arm64.sh -o ~/miniconda3/miniconda.sh 25 | 26 | # Intel 64-bit chip 27 | curl https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -o ~/miniconda3/miniconda.sh 28 | 29 | bash ~/miniconda3/miniconda.sh -b -u -p ~/miniconda3 30 | rm -f ~/miniconda3/miniconda.sh 31 | ``` 32 | 33 | ### Windows 34 | Open a Command Prompt and paste the following commands: 35 | ```cmd 36 | curl https://repo.anaconda.com/miniconda/Miniconda3-latest-Windows-x86_64.exe -o miniconda.exe 37 | start /wait "" miniconda.exe /S 38 | del miniconda.exe 39 | ``` 40 | 41 | ### Linux 42 | Open a terminal window and paste the following commands: 43 | ```bash 44 | mkdir -p ~/miniconda3 45 | wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda3/miniconda.sh 46 | bash ~/miniconda3/miniconda.sh -b -u -p ~/miniconda3 47 | rm -f ~/miniconda3/miniconda.sh 48 | ``` 49 | 50 | You should now [skip section 1B and go straight to section 2](#section-2). 51 | 52 | ## 1B. For existing conda installations, set libmamba as the default dependency solver 53 | {: #section-1b} 54 | CyLinter depends on a complex set of packages and older Conda installations will struggle with this. If you already have Miniconda or Anaconda installed, we suggest that you run the following commands to update Conda itself and enable the libmamba dependency solver. This will help ensure CyLinter can be installed efficiently. 55 | 56 | ``` bash 57 | conda update -n base conda 58 | conda install -n base conda-libmamba-solver 59 | conda config --set solver libmamba 60 | ``` 61 | 62 | ## 2. Install CyLinter 63 | {: #section-2} 64 | Install CyLinter into a dedicated conda environment with the following command: 65 | 66 | ``` bash 67 | conda create -n cylinter -c conda-forge -c labsyspharm cylinter=0.0.50 68 | ``` 69 | -------------------------------------------------------------------------------- /docs/modules/PCA.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default-cylinter 3 | title: PCA 4 | nav_order: 9 5 | parent: Modules 6 | --- 7 | 8 | 9\. `PCA`: this is a fully automated module that performs [Horn’s parallel analysis](https://en.wikipedia.org/wiki/Parallel_analysis) indicating the number of PCs capturing non-random variation in the dataset to help the user determine whether 2 or 3 principal components should be used in the [clustering module]({{ site.baseurl }}/modules/clustering) implemented later in the pipeline. PCA scores plots for the first two PCs are computed on per-cell and per-sample bases to visualize how single-cells and tissue sample are distributed with respect to each other. Ridge plots are also computed to visualize histogram alignment across marker channels. 9 | 10 | ### YAML configurations 11 | 12 | | Parameter | Default | Description | 13 | | --- | --- | --- | 14 | | `channelExclusionsPCA` | [ ] | (list of strs) Immunomarkers to exclude from PCA analysis. | 15 | | `samplesToRemovePCA` | [ ] | (list of strs) Samples to exclude from PCA analysis. | 16 | | `dimensionPCA` | 2 | (int) Number of PCs to compute. | 17 | | `pointSize` | 90.0 | (float) Scatter point size for sample scores plot. | 18 | | `labelPoints` | True | (bool) Annotate scatter points with condition abbreviations from sampleMetadata configuration. | 19 | | `distanceCutoff` | 0.15 | (float) Maximum distance between data points in PCA scores plot to be annotated with a common label. Useful for increasing visual clarity of PCA plots containing many data points. Applicable when labelPoints is True. | 20 | | `conditionsToSilhouette` | [ ] | (list of strs) Abbreviated condition names whose corresponding scores plot points will be greyed out, left unannotated, and sent to the back of the plot (zorder). Useful for increasing visual clarity of PCA plots containing many data points. | 21 | -------------------------------------------------------------------------------- /docs/modules/aggregateData.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default-cylinter 3 | title: aggregateData 4 | nav_order: 1 5 | parent: Modules 6 | --- 7 | 8 | 1\. `aggregateData`: aggregates spatial feature tables from all tissues into a combined datafame passed between modules; this step is fully automated. 9 | 10 | ### No YAML configurations 11 | -------------------------------------------------------------------------------- /docs/modules/areaFilter.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default-cylinter 3 | title: areaFilter 4 | nav_order: 4 5 | parent: Modules 6 | --- 7 | 8 | 4\. `areaFilter`: cell segmentation errors can be a significant source of noise in image-derived, single-cell data. In this module, users assign lower and upper bounds on cell segmentation area (pixel count) to remove severely under- and over-segmented cells. Cell segmentation area is a standard component of [MCMICRO](https://mcmicro.org/parameters/core.html#mcquant) feature table output and is calculated using [skimage.measure.regionprops()](https://scikit-image.org/docs/stable/api/skimage.measure.html#skimage.measure.regionprops). This module functions similar to the `intensityFilter` module in that it allows users to assign lower and upper thresholds on interactive histogram widgets of per-cell data. Gaussian mixture modeling (GMM) assist in identifying default cutoffs that can be manually refined. Once thresholds have been adjusted for a given sample, users can visualize selected cells in their corresponding image by clicking the `Plot Points` button. Segmentation outlines are provided in the `layer list` at the left of the Napari viewer as a reference for evaluating segmentation quality. Data points falling between lower and upper sliders are carried forward into downstream analysis. Users will move to the next sample in the series by clicking the `Apply Gates and Move to Next Sample` button beneath the histogram. Users may jump between tissues in the series by entering the name of a given sample in the `Sample Name` field of the `Arbitrary Sample Selection` widget at the right of the Napari viewer to adjust thresholds of previously analyzed tissues. To re-define thresholds, remove the metadata associated with the target sample(s) from `cylinter_report.yml` located in the CyLinter output directory specified in `cylinter_config.yml` and re-run the `areaFilter` module with `cylinter --module areaFilter cylinter_config.yml`. 9 | 10 | ### YAML configurations 11 | 12 | | Parameter | Default | Description | 13 | | --- | --- | --- | 14 | | `numBinsArea` | 50 | (int) Number of bins used to construct DNA area histograms. | 15 | -------------------------------------------------------------------------------- /docs/modules/clustering.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default-cylinter 3 | title: clustering 4 | nav_order: 12 5 | parent: Modules 6 | --- 7 | 8 | 12\. `clustering`: this module performs density-based hierarchical clustering with [HDBSCAN](https://hdbscan.readthedocs.io/en/latest/api.html) on [UMAP](https://umap-learn.readthedocs.io/en/latest/) (or [t-SNE](https://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.html)) embeddings to identify biologically relevant cell states in a dataset. In doing so, users will enter into the `Cluster and Plot` field at the top right of the Napari window an integer value for `Min Cluster Size` ([MCS](https://hdbscan.readthedocs.io/en/latest/api.html)): an HDBSCAN parameter that effects the clustering result. The embedding will be shown in the Napari window colored by three different variables for review: **1)** HDBSCAN cluster, **2)** gate-based cell type classification (see [gating module]({{ site.baseurl }}/modules/gating) for details), and **3)** tissue sample. Clustering cells may be viewed in the context of a given tissue by pressing and holding the mouse (or track pad) button and lassoing data points in HDBSCAN plot, typing the name of a sample of interest in the `Sample Name` field, and clicking the `View Lassoed Points` button. Selected cells will then appear as scatter points in their corresponding image. After each MCS entry, a seperate window showing the results of [silhouette analysis](https://www.sciencedirect.com/science/article/pii/0377042787901257) will also be shown. Cells with positive silhouette coefficients indicate their current cluster assignment is suitable, while those with negative coefficients would be better off in an other cluster indicative of under-clustering. To aid in cluster optimization, a range of MCS values can be entered into the `Sweep MCS Range` field and the number of clusters associated with each MCS value will be printed to the terminal window without the results being plotting into the Napari window. Clicking the `Save` button at the bottom right of the Napari viewer causes the program to append the current cluster labels to the single-cell dataframe and proceed to the next module. 9 | 10 | ### YAML configurations 11 | 12 | | Parameter | Default | Description | 13 | | --- | --- | --- | 14 | | `embeddingAlgorithm` | "UMAP" | (str) Embedding algorithm to use for clustering (options: "TSNE" or "UMAP"). | 15 | | `channelExclusionsClustering` | [ ] | (list of strs) Immunomarkers to exclude from clustering. | 16 | | `samplesToRemoveClustering` | [ ] | (list of strs) Samples to exclude from clustering. | 17 | | `normalizeTissueCounts` | True | (bool) Make the number of cells per tissue for clustering more similar through sample-weighted random sampling. | 18 | | `fracForEmbedding` | 1.0 | (float) Fraction of cells to be embedded (range: 0.0-1.0). Limits amount of data passed to downstream modules. | 19 | | `dimensionEmbedding` | 2 | (int) Dimension of the embedding (options: 2 or 3). | 20 | | `colormapAnnotationClustering` | "Sample" | (str) Metadata annotation to colormap the embedding: Sample or Condition. | 21 | | `metric` | "euclidean" | (str) Distance metric for computing embedding. Choose from valid metrics used by scipy.spatial.distance.pdist: "braycurtis", "canberra", "chebyshev", "cityblock", "correlation", "cosine", "dice", "euclidean", "hamming", "jaccard", "jensenshannon", "kulsinski", "mahalanobis", "matching", "minkowski", "rogerstanimoto", "russellrao", "seuclidean", "sokalmichener", "sokalsneath", "sqeuclidean", "yule". | 22 | | `perplexity` | 50.0 | (float) This is a [tSNE-specific configuration](https://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.html) related to the number of nearest neighbors used in other manifold learning algorithms. Larger datasets usually require larger perplexity. Different values can result in significantly different results. | 23 | | `earlyExaggeration` | 12.0 | (float) This is a [tSNE-specific configuration](https://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.html). For larger values, the space between natural clusters will be larger in the embedded space. | 24 | | `learningRateTSNE` | 200.0 | (float) This is a [tSNE-specific configuration](https://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.html). tSNE learning rate (typically between 10.0 and 1000.0). | 25 | | `randomStateTSNE` | 5 | (int) This is a [tSNE-specific configuration](https://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.html). It determines the random number generator for reproducible results across multiple function calls. | 26 | | `nNeighbors` | 6 | (int) This is a [UMAP-specific configuration](https://umap-learn.readthedocs.io/en/latest/api.html). It determines the size of local neighborhood (in terms of number of neighboring sample points) used for manifold approximation. Larger values result in more global views of the manifold, while smaller values result in more local data being preserved. In general values should be in the range 2 to 100. | 27 | | `learningRateUMAP` | 1.0 | (float) This is a [UMAP-specific configuration](https://umap-learn.readthedocs.io/en/latest/api.html). It Determines the initial learning rate for the embedding optimization. | 28 | | `minDist` | 0.1 | (float) This is a [UMAP-specific configuration](https://umap-learn.readthedocs.io/en/latest/api.html). Determines the effective minimum distance between embedded points. Smaller values will result in a more clustered/clumped embedding where nearby points on the manifold are drawn closer together, while larger values will result on a more even dispersal of points. The value should be set relative to the spread value, which determines the scale at which embedded points will be spread out. | 29 | | `repulsionStrength` | 5.0 | (float) This is a [UMAP-specific configuration](https://umap-learn.readthedocs.io/en/latest/api.html). Determines the weighting applied to negative samples in low dimensional embedding optimization. Values higher than one will result in greater weight being given to negative samples. | 30 | | `randomStateUMAP` | 5 | (int) This is a [UMAP-specific configuration](https://umap-learn.readthedocs.io/en/latest/api.html). It determines the random number generator for reproducible results across multiple function calls. | 31 | -------------------------------------------------------------------------------- /docs/modules/clustermap.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default-cylinter 3 | title: clustermap 4 | nav_order: 13 5 | parent: Modules 6 | --- 7 | 8 | 13\. `clustermap`: this is a fully automated module that computes clustered heatmaps of channel z-scores for clusters identified in the [clustering module]({{ site.baseurl }}/modules/clustering) which are saved into the `clustermap` subdirectory of the main CyLinter output directory. 9 | 10 | ### No YAML configurations 11 | -------------------------------------------------------------------------------- /docs/modules/curateThumbnails.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default-cylinter 3 | title: curateThumbnails 4 | nav_order: 15 5 | parent: Modules 6 | --- 7 | 8 | 15\. `curateThumbnails`: this module is fully automated. It programmatically generates image patchs of cells drawn at random from each cluster identified in the [clustering module]({{ site.baseurl}}/modules/clustering) and each cell type defined in the [gating module]({{ site.baseurl}}/modules/gating) for visual review. The number of examples shown per cluster is adjusted using the `numThumbnails` parameter in `cylinter_config.yml`. The size of the image window areound the reference cells is controlled by the `squareWindowDimension` parameter in the same configuration file. A white pixel corresponding to the nuclear centroid of the example cell is shown in each image as a reference. Images can be saved with or without segmentation outlines superimposed by toggling the `segOutlines` parameter in configuration file. To facilitate interpretation, only the three most highly expressed protein markers are shown per cluster (based on channel z-scores. Image contrast settings defined in the [setContrast module]({{ site.baseurl }}/modules/setContrast) are applied to improve image appearance. Image galleries for each cluster and gate-based cell type class are saved to the `thumbnails` directory in the `clustering` subdirectory of the main CyLinter output directory. This path is `thumbnails/2d/frequency_stats` in the case of 2D clusterings and `thumbnails/3d/frequency_stats` in the case of 3D clusterings. 9 | 10 | ### YAML configurations 11 | 12 | | Parameter | Default | Description | 13 | | --- | --- | --- | 14 | | `numThumbnails` | 25 | (int) Number of example cells per cluster to be curated. | 15 | | `windowSize` | 30 | (int) Number of pixels from the centroid of the reference cell in x and y dimensions. | 16 | | `segOutlines` | True | (bool) Whether to overlay cell segmentation outlines on thumbnail images. | 17 | -------------------------------------------------------------------------------- /docs/modules/cycleCorrelation.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default-cylinter 3 | title: cycleCorrelation 4 | nav_order: 5 5 | parent: Modules 6 | --- 7 | 8 | 5\. `cycleCorrelation`: this module is relevant to cyclic imaging technologies (e.g., CyCIF, CODEX, mIHC) and is designed to remove cells that have shifted or become detached from the microscope slide over multi-cycle imaging studies, as these cells appear negative for all markers after the movement or loss event. Similar to the `intensityFilter` and `areaFilter` modules, users will gate on interactive histogram widgets of per-cell signals. However, the histograms in this module represent the log10-transformed ratio of DNA intensities between the first and last imaging cycles (log10[cycle1/cyclen]). Lower and upper cutoff sliders are adjusted to select cells with highly-correlated signals (typically at or around zero, as log10[1/1] = 0). Like in the `intensityFilter` and `areaFilter` modules, Gaussian mixture modeling (GMM) is used to identify initial default cutoffs that can be manually refined. Once lower and upper cutoffs are adjusted, users can visualize selected cells in their corresponding image by clicking the `Plot Points` button. DNA channels for the first and last imaging cycles are shown for reference to visualize cells that have shifted or become detached from the slide between the first and last imaging cycles. Data points between lower and upper cutoffs are carried forward into downstream analysis. Users will move to the next sample in the series by clicking the `Apply Gates and Move to Next Sample` button beneath the histogram. Users may jump between tissues in the series by entering the name of a given sample in the `Sample Name` field of the `Arbitrary Sample Selection` widget at the right of the Napari viewer to adjust thresholds of previously analyzed tissues. To re-define thresholds, remove the metadata associated with the target sample(s) from `cylinter_report.yml` located in the CyLinter output directory specified in `cylinter_config.yml` and re-run the `cycleCorrelation` module with `cylinter --module cycleCorrelation cylinter_config.yml`. 9 | 10 | 11 | ### YAML configurations 12 | 13 | | Parameter | Default | Description | 14 | | --- | --- | --- | 15 | | `numBinsCorrelation` | 50 | (int) Number of bins used to construct DNA1/DNAn histograms. | 16 | -------------------------------------------------------------------------------- /docs/modules/frequencyStats.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default-cylinter 3 | title: frequencyStats 4 | nav_order: 14 5 | parent: Modules 6 | --- 7 | 8 | 14\. `frequencyStats`: this module is fully automated. It computes pairwise statistics for binary declarations specified in the [sampleMetadata]({{ site.baseurl}}/structure/#general-configurations) parameter of `cylinter_config.yml`. Test results are saved to a directory called `frequency_stats` in the clustering subdirectory of the main CyLinter output directory. This path is `clustering/2d/frequency_stats` in the case of 2D clusterings and `clustering/3d/frequency_stats` in the case of 3D clusterings. 9 | 10 | ### YAML configurations 11 | 12 | | Parameter | Default | Description | 13 | | --- | --- | --- | 14 | | `controlGroups` | ["CANCER-FALSE"] | (list of strs) Corresponds to control groups for each binary declaration specified as the fourth elements of [sampleMetadata]({{ site.baseurl }}/workflow/input#general-configurations) values. | 15 | |`denominatorCluster` | null | (null or int) Cluster to be used as the denominator when computing cluster frequency ratios. Set to null first, then change to cluster number (int) to normalize cluster frequencies to a particular identified cluster if desired. | 16 | | `FDRCorrection` | False | (bool) Whether to compute p-values and false discovery rate (FDR)-corrected q-values (True) or compute uncorrected p-values only (False). | -------------------------------------------------------------------------------- /docs/modules/gating.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default-cylinter 3 | title: gating 4 | nav_order: 11 5 | parent: Modules 6 | --- 7 | 8 | 11\. `gating` (optional): this module allows users to classify cell types present in the datset using the [SYLARAS](https://www.sylaras.org/#details) approach to high-dimensional, single-cell gating[[1]](#1). In doing so, users assign a set of manual gating thresholds on a per-marker and per-sample basis using interactive scatter plots of marker (x-axis) x cell segmentation area (y-axis). Gated cells (i.e. those falling to the right of the gate) can be visualized as scatter points in their respective image channel by clicking the `Plot Points` button to confirm accurate gate placement. After an optimal gate has been identified, users will move to the next marker/sample combination in the series by clicking the `Apply Gate and Move to Next Sample` button beneath the scatter plot. If no gate selection is made, all cells in the current plot will be carried forward into downstream analysis. Users may jump between markers and tissues in the series by entering their names into respective fields in the `Arbitrary Sample/Marker Selection` widget at the bottom right of the Napari viewer and clicking the `Enter` button. This can allow for the adjustment of previously defined gates. PDFs showing scatter plots with superimposed gates are stored in the `gating` output directory as a reference which can be updated any time by entering the name of a specific marker in the `Marker Name` field and clicking the `Refresh PDF(s)` button at the bottom right of the Napari viewer; typing "ALL" into the `Marker Name` field will render gated scatter plots for all markers in the analysis. Gates may be re-defined, by removing the metadata associated with particular marker/sample combinations in `cylinter_report.yml` located in the CyLinter output directory specified in `cylinter_config.yml` and re-running the `gating` module with `cylinter --module gating cylinter_config.yml`. 9 | 10 | After all gates have been applied, signal intensities are automatically binarized according to the defined gating thresholds such that cells falling to the right of the gate are considered immunopositive, and those falling to the left of the gate are considered immunonegative. Unique Boolean vectors (i.e., binary phenotype profiles) emerging from this procedure are then mapped to biologically-meaningful cell types previously defined in the YAML configuration file (`cylinter_config.yml`). This module can be bypassed by toggling the `gating` parameter to `False` (see YAML configurations below). 11 | 12 | ### YAML configurations 13 | 14 | 15 | | Parameter | Default | Description | 16 | | --- | --- | --- | 17 | | `gating` | "False" | (bool) Whether to perform SYLARAS-style gating on single-cell data | 18 | | `channelExclusionsGating` | [ ] | (list of strs) Immunomarkers to exclude from gating. | 19 | | `samplesToRemoveGating` | [ ] | (list of strs) Samples to exclude from gating. | 20 | | `vectorThreshold` | 100 | (int) vizualize Boolean vectors with cell counts >= vectorThreshold | 21 | | `classes` | Tumor: definition: [+pan-CK, +KI67, -aSMA, -CD45] subsets: [CDKN1A] | (dict) Boolean immunophenotype signatures. +marker = immunopositive , -marker = immunonegative | 22 | 23 | ## References 24 | 25 | [1] 26 | Baker GJ. et al. [SYLARAS: A Platform for the Statistical Analysis and Visual Display of Systemic Immunoprofiling Data and Its Application to Glioblastoma](https://www.sciencedirect.com/science/article/pii/S2405471220302854). **Cell Systems** (2020) 27 | 28 | -------------------------------------------------------------------------------- /docs/modules/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default-cylinter 3 | title: Modules 4 | nav_order: 6 5 | has_children: true 6 | --- 7 | 8 | # Module list 9 | 10 | | Name | Purpose | Description/YAML Configurations | 11 | | :-- | :-- | :-- | 12 | | `aggregateData` | Combine feature tables | [Details]({{ site.baseurl }}/modules/aggregateData) | 13 | | `selectROIs` | Define tissue ROIs | [Details]({{ site.baseurl }}/modules/selectROIs) | 14 | | `intensityFilter` | Filter out-of-focus and counterstain oversaturated cells | [Details]({{ site.baseurl }}/modules/intensityFilter) | 15 | | `areaFilter` | Filter over- and under-segmented cells | [Details]({{ site.baseurl }}/modules/areaFilter) | 16 | | `cycleCorrelation` | Filter unstable cells | [Details]({{ site.baseurl }}/modules/cycleCorrelation) | 17 | | `logTransform` | Log10-transform immunomarker signals | [Details]({{ site.baseurl }}/modules/logTransform) 18 | | `pruneOutliers` | Filter channel outliers | [Details]({{ site.baseurl }}/modules/pruneOutliers) | 19 | | `metaQC` | Reclassify cells according to QC status | [Details]({{ site.baseurl }}/modules/metaQC) 20 | | `PCA` | Run principle component analysis | [Details]({{ site.baseurl }}/modules/PCA) 21 | | `setContrast` | Adjust image contrast settings | [Details]({{ site.baseurl }}/modules/setContrast) 22 | | `gating` | Identify cell states via manual thresholding | [Details]({{ site.baseurl }}/modules/gating) 23 | | `clustering` | Identify cell states via unsupervised clustering | [Details]({{ site.baseurl }}/modules/clustering) 24 | | `clustermap` | Visualize cell state protein expression | [Details]({{ site.baseurl }}/modules/clustermap) 25 | | `frequencyStats` | Compute cluster frequency statistics | [Details]({{ site.baseurl }}/modules/frequencyStats) | 26 | | `curateThumbnails` | Visualize example cells from each cluster | [Details]({{ site.baseurl }}/modules/curateThumbnails) 27 | 28 | 30 | -------------------------------------------------------------------------------- /docs/modules/intensityFilter.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default-cylinter 3 | title: intensityFilter 4 | nav_order: 3 5 | parent: Modules 6 | --- 7 | 8 | 3\. `intensityFilter`: out-of-focus cells and those oversaturated with nuclear counterstain introduce noise into image-derived, single-cell data. This is because out-of-focus cells tend to have unreliable immunomarker signals and oversaturated nuclei tend to be poorly segmented. In this module, users interact with histogram widgets of per-cell counterstain signal intensities to assign upper and lower bounds on DNA signal intensity. Gaussian mixture modeling (GMM) is used to identify default cutoffs that can be manually refined. Users can visualize cells falling between lower and upper cutoffs as scatter points in their respective image colored by DNA signal intensity by clicking the `Plot Points` button. Selected data points are then carried forward into downstream analysis. Users will move to the next sample in the series by clicking the `Apply Gates and Move to Next Sample` button beneath the histogram. Users may jump between tissues in the series by entering the name of a given sample in the `Sample Name` field of the `Arbitrary Sample Selection` widget at the right of the Napari viewer to adjust thresholds of previously analyzed tissues. To re-define thresholds, remove the metadata associated with the target sample(s) from `cylinter_report.yml` located in the CyLinter output directory specified in `cylinter_config.yml` and re-run the `intensityFilter` module with `cylinter --module intensityFilter cylinter_config.yml`. 9 | 10 | ### YAML configurations 11 | 12 | | Parameter | Default | Description | 13 | | --- | --- | --- | 14 | | `numBinsIntensity` | 50 | (int) Number of bins used to construct DNA intensity histograms. | 15 | -------------------------------------------------------------------------------- /docs/modules/logTransform.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default-cylinter 3 | title: logTransform 4 | nav_order: 6 5 | parent: Modules 6 | --- 7 | 8 | 6\. `logTransform`: this module performs log10-transformation of antibody marker signals and is fully automated. 9 | 10 | ### No YAML configurations 11 | -------------------------------------------------------------------------------- /docs/modules/metaQC.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default-cylinter 3 | title: metaQC 4 | nav_order: 8 5 | parent: Modules 6 | --- 7 | 8 | 8\. `metaQC` (optional): this module helps control for curation bias by correcting for inaccuracies in ROI gating and data cutoff placement by performing unsupervised clustering on equal sized batches of clean (retained) and noisy (redacted) single-cell data using a combination of [UMAP](https://umap-learn.readthedocs.io/en/latest/) (or [t-SNE](https://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.html)) and [HDBSCAN](https://hdbscan.readthedocs.io/en/latest/api.html). Noisy cells clustering with predominantly clean cells are returned to the dataframe, while clean cells clustering with predominantly noisy clusters are dropped from the dataframe. After selecting a `Min Cluster Size (MCS)` value and clicking the `Cluster and Plot` button in the `Plot Single MCS` widget at the top right of the Napari viewer, users are presented with UMAP (or t-SNE) embeddings of cells colored by **1)** HDBSCAN cluster, **2)** QC status, **3)** reclassification status, and **4)** sample. Clustering is optimized by testing different `MCS`values: an HDBSCAN parameter that significantly effects the clustering result, see [HDBSCAN documentation](https://hdbscan.readthedocs.io/en/latest/api.html) for details. To assist in the identification of a stable clustering solution, a range of `min_cluster_size` values may be entered into the `Sweep MCS Range` widget at the right of the Napari viwer and the number of clusters associated with each `min_cluster_size` will be printed to the terminal window. Cells in the HDBSCAN plot can be lassoed and visualized in a given sample by pressing and holding the mouse (or track pad) button and drawing around cells of interest. The name of the sample of interest is then entered into the `Sample Name` field and the `View Lassoed Points` button is clicked. Selected cells will appear as scatter points in their corresponding image colored by the module used to filter them from the analysis. Using clean and noisy reclassification cutoff selectors, users can specify tolerance limits on the proportion of clusters composed of clean (`Reclass Clean`) and noisy (`Reclass Noisy`) data for clustering cells to be reclassified. Unclustered cells (i.e., cells with HDBSCAN cluster label -1) whose original QC status is clean are reclassified as noisy. 9 | 10 | Clicking the `Save` button at the bottom right of the Napari viewer causes the program to reclassify the data according to the current clustering solution and reclassification cutoffs. After the first chunk of clean and noisy data has been reclassified, additional chunks are reclassified using the same UMAP, HDBSCAN, and reclassifiction parameters. To re-define clustering or reclassification cutoffs, remove the metadata associated with the metaQC module from `cylinter_report.yml` located in the CyLinter output directory specified in `cylinter_config.yml` and re-run the `metaQC` module with `cylinter --module metaQC cylinter_config.yml`. This module can be bypassed by toggling the `metaQC` parameter to `False` (see YAML configurations below). Regardless of the `metaQC` parameter setting, a pie chart showing the fraction of data redacted by each QC data filtration module (`selectROIs`, `intensityFilter`, `areaFilter`, `cycleCorrelation`, `pruneOutliers`) is saved to the output subdirectory for the `metaQC` module (`censored_by_stage.pdf` 11 | 12 | ### YAML configurations 13 | 14 | | Parameter | Default | Description | 15 | | --- | --- | --- | 16 | | `metaQC` | True | (bool) Whether to perform data reclassification based on unsupervised clustering results of combinations of clean and noisy (previously-redacted) data. | 17 | | `embeddingAlgorithmQC` | "UMAP" | (str) Embedding algorithm used for clustering (options: "TSNE" or "UMAP"). | 18 | | `channelExclusionsClusteringQC` | [ ] | (list of strs) Immunomarkers to exclude from clustering. | 19 | | `samplesToRemoveClusteringQC` | [ ] | (list of strs) Samples to exclude from clustering. | 20 | | `percentDataPerChunk` | 0.2 | (float) Fraction of data (range: 0.0-1.0) to undergo embedding and clustering per reclassifaction cycle. | 21 | | `colormapAnnotationQC` | "Sample" | (str) Metadata annotation to colormap the embedding: `Sample` or `Condition`. | 22 | | `metricQC` | "euclidean" | (str) Distance metric for computing embedding. Choose from valid metrics used by scipy.spatial.distance.pdist: "braycurtis", "canberra", "chebyshev", "cityblock", "correlation", "cosine", "dice", "euclidean", "hamming", "jaccard", "jensenshannon", "kulsinski", "mahalanobis", "matching", "minkowski", "rogerstanimoto", "russellrao", "seuclidean", "sokalmichener", "sokalsneath", "sqeuclidean", "yule". | 23 | | `perplexityQC` | 50.0 | (float) This is a tSNE-specific configuration (https://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.htmlRelated) related to the number of nearest neighbors used in other manifold learning algorithms. Larger datasets usually require larger perplexity. Different values can result in significantly different results. | 24 | | `earlyExaggerationQC` | 12.0 | (float) This is a tSNE-specific configuration (https://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.htmlRelated). For larger values, the space between natural clusters will be larger in the embedded space. | 25 | | `learningRateTSNEQC` | 200.0 | (float) This is a tSNE-specific configuration (https://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.htmlRelated). tSNE learning rate (typically between 10.0 and 1000.0). | 26 | | `randomStateQC` | 5 | (int) This is a tSNE-specific configuration (https://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.htmlRelated). It determines the random number generator for reproducible results across multiple function calls. | 27 | | `nNeighborsQC` | 5 | (int) This is a UMAP-specific configuration (https://umap-learn.readthedocs.io/en/latest/api.html). It determines the size of local neighborhood (in terms of number of neighboring sample points) used for manifold approximation. Larger values result in more global views of the manifold, while smaller values result in more local data being preserved. In general values should be in the range 2 to 100. | 28 | | `learningRateUMAPQC` | 1.0 | (float) This is a UMAP-specific configuration (https://umap-learn.readthedocs.io/en/latest/api.html). It Determines the initial learning rate for the embedding optimization. | 29 | | `minDistQC` | 0.1 | (float) This is a UMAP-specific configuration (https://umap-learn.readthedocs.io/en/latest/api.html). Determines the effective minimum distance between embedded points. Smaller values will result in a more clustered/clumped embedding where nearby points on the manifold are drawn closer together, while larger values will result on a more even dispersal of points. The value should be set relative to the spread value, which determines the scale at which embedded points will be spread out. | 30 | | `repulsionStrengthQC` | 5.0 | (float) This is a UMAP-specific configuration (https://umap-learn.readthedocs.io/en/latest/api.html). Determines the weighting applied to negative samples in low dimensional embedding optimization. Values higher than one will result in greater weight being given to negative samples. | 31 | -------------------------------------------------------------------------------- /docs/modules/pruneOutliers.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default-cylinter 3 | title: pruneOutliers 4 | nav_order: 7 5 | parent: Modules 6 | --- 7 | 8 | 7\. `pruneOutliers`: cells affected by visual artifacts such as antibody aggregates or illumination aberrations appear as bright outliers in affected channels. Conversely, image background subtraction can have the unintended consequence of creating cells with signal intensities at or below zero that, on image clipping and log-transformation, are far lower than values associated with biologically relevant signals. Both of this scenarios can significantly impact data interpration. In this module, users remove any residual channel outliers from tissues not captured by the `selectROIs` module (e.g., small antibody aggregates) by applying lower and upper percentile cutoffs on marker intensity. Scatter plots (or hexbins, see YAML configurations below) are used to visualize channel-specific intensity distributions before and after cutoffs are applied. Marker intensites are plotted against cell segmentation area which is used as a dumby variable to create 2D plots so that small numbers of outliers can be easily detected. Post-cutoff distributions are shown on a normalized (0-1) x-axis. By entering the name of a given sample in the `Sample Name` field and clicking the `view Outliers` button, users can visualize dim and bright outliers as scatter points (dim = magenta, bright = cyan) in their respective image channels. Users will move to the next channel in the series by clicking the `Apply Cutoffs and Move to Next Marker` button beneath the plots. Note that cells are dropped from the marker channels in an ordered series. Thus, users can elect to re-start outlier removal from a given marker by entering the name of the target channel in the `Re-start from Marker` field and clicking the enter button, but must re-curate outliers in all subsequent channels as well. If no cutoffs are applied for a given marker, all cells in the plots will be carried forward into the analysis of the subsequent marker. To re-define percentile cutoffs, remove the metadata associated with the target channel(s) from `cylinter_report.yml` located in the CyLinter output directory specified in `cylinter_config.yml` and re-run the `pruneOutliers` module with `cylinter --module pruneOutliers cylinter_config.yml`. 9 | 10 | ### YAML configurations 11 | 12 | | Parameter | Default | Description | 13 | | --- | --- | --- | 14 | | `hexbins` | False | (bool) Whether to use hexbins (True) or scatter plots (False) to plot single-cell signal intensities. Scatter plots allow for higher resolution, but may lead to long rendering times with large datasets.| 15 | | `hexbinGridSize` | 20 | (int) The number of hexagons in the x-direction; higher values increase bin resolution. The number of hexagons in the y-direction is chosen such that the hexagons are approximately regular. | 16 | -------------------------------------------------------------------------------- /docs/modules/selectROIs.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default-cylinter 3 | title: selectROIs 4 | nav_order: 2 5 | parent: Modules 6 | --- 7 | 8 | {: .no_toc } 9 | 10 |
11 | 12 | Table of contents 13 | 14 | {: .text-delta } 15 | 1. TOC 16 | {:toc} 17 |
18 | 19 | 2\. `selectROIs`: [manual](#manual-roi-selection) and [automated](#automated-artifact-detection) tools are used to highlight regions of tissue affected by microscopy artifacts (e.g. illumination aberrations, slide debris, out-of-focus image tiles, mis-registered regions of tissue, etc.). 20 | 21 | ### Manual ROI selection 22 | Regions of interest (ROIs) are manually drawn around artifacts by clicking on the `Manual ROI Selections (neg.)` image layer in the `layer list` at the left of the Napari viewer then clicking on one of the built-in polygon selection tools from the `layer controls` dock (i.e. circle, square, triangle, or lasso icons above the `layers list`). The mouse button (or track pad) is then clicked and held to outline an artifact in the image window. Clicking the escape key allows for additional ROIs to be drawn. Both positive and negative ROI selection methods are available (see `delint` configuration in `cylinter_config.yml` for details). In the case of negative selection (i.e. `delint=True`, default), cells in ROI boundaries are dropped from the analysis; negative selection is the preferred method for tissues exhibiting diffuse artifacts. Positive selection works best on samples exhibiting large regions of artifact-free tissue that can be highlighted by one or a few ROIs. Cells selected in this case are carried forward into downstream analysis. 23 | 24 | ### Automated Artifact Detection 25 | To supplement manual artifact curation, users can choose to run an automated artifact detection (AAD) algorithm on individual image channels by selecting the target channel from the pulldown menu in the `Automated Artifact Detection` widget at the right of the Napari window and clicking the `Compute Artifact Mask` button. Translucent white artifact masks will then appear over regions of tissue that the model flags as putative artifacts. When the `auto` box is checked, the model is run using a reasonable default sensitivity parameter. Sensitivity of the algorithm can be adjusted manually by changing the value in the spinbox labeled `Sensitivity`. Each time the algorithm is run on a given channel it adds two layers to the `layers list` at the left of the Napari viewer. The first layer shows the artifact masks. The second layer shows the seed points corresponding to the different artifacts in the image. Seed points are not visible by default, but can be toggled on by clicking the eye icon shown in the `Artifact Seeds` layer. Individual seed points (and their corresponding artifact masks) can be modified or removed from a given channel by highlighting the `Artifact Seeds` layer, selecting the `arrow icon` in the `layers control` dock to enable point selection mode, and pressing and holding the mouse button to drag over the target seed point to highlight it. Once highlighted, users can fine-tune the artifact mask associated with the seed by changing the `Tolerance` value in the '`Fine-tuning` widget at the right of the Napari viewer or delete the seed entirely by clicking the `x` button in the `layer controls` dock. These AAD tailoring features are designed to give users flexibility over automated artifact masks without the need to re-run the AAD algorithm. 26 | 27 | ### Workflow 28 | Once all ROIs for a given sample have been generated, users will move to the next sample in the series by clicking the `Apply ROI(s) and Move to Next Sample` button at the top right of the Napari window. If no ROIs are drawn for a given sample, all cells in that tissue will be carried forward into downstream analysis. Users may also jump between samples by entering the name of a given sample in the `Sample Name` field at the right of the Napri viewer to add, delete, or modify manual or automated ROIs of previously analyzed samples or refer to arbitrary tissues in the curation of ROIs for a given samples. ROIs can be added, removed, or modifiedby re-running the `selectROIs` module. 29 | 30 | ### YAML configurations 31 | 32 | | Parameter | Default | Description | 33 | | --- | --- | --- | 34 | | `delintMode` | False | (bool) Whether to drop (True; negative selection) or retain (False; positive selection) cells selected by ROIs. | 35 | | `showAbChannels` | True | (bool) Whether to show all immunomarker channels (True) when Napari is open (may be memory limiting) or show only cycle 1 DNA (False). | 36 | | `samplesForROISelection` | [ ] | (list of strs) Sample names for ROI selection specified according to the first elements of [sampleMetadata]({{ site.baseurl }}/structure/#general-configurations) configuration. 37 | | `autoArtifactDetection` | True | (bool) Whether to display tools for automated artifact detection in Napari window. | 38 | | `artifactDetectionMethod` | "classical" | (str) Algorithm used for automated artifact detection (current option: "classical"). Deep learning method currently under development. -------------------------------------------------------------------------------- /docs/modules/setContrast.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default-cylinter 3 | title: setContrast 4 | nav_order: 10 5 | parent: Modules 6 | --- 7 | 8 | 10\. `setContrast`: in this module, image channel contrast is adjusted using the `contrast limits` slider bar in the `layer controls` dock at the top left of the Napari viewer. For each channel, contrast limits are set on a reference image whose median channel value is nearest to the 85th quantile of tissues in the batch which are applied to that image channel for all tissues in a batch. The 85th quantile (not 100th) is chosen to avoid picking tissue whose channel intensity is drive by bright artifacts outliers sample. The lower slider of the `contrast limits` slider bar is used to reduce background signal intensities by sliding to the right, while the upper slider is used to increase channel gain by sliding to the left. Once lower and upper sliders have been adjusted on the reference sample, the fit can be checked against other tissues in the batch by entering their name in the `Sample Name` field the `Arbitrary Sample Selection` widget at the right of the Napari viewer and clicking the `Enter` button. Clicking the `Apply Limits and Move to Next Channel` button causes the module to move to the next channel for contrast adjustment. To re-define contrast settings, simply re-run the `setContrast` module with `cylinter --module setContrast cylinter_config.yml`. 9 | 10 | 11 | 12 | 13 | ### No YAML configurations 14 | -------------------------------------------------------------------------------- /docs/run/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default-cylinter 3 | title: Running CyLinter 4 | nav_order: 3 5 | has_children: false 6 | --- 7 | 8 | # Running CyLinter 9 | 10 | ## Step 1: 11 | Ensure that the desired configurations for a given analysis have been set in CyLinter's [YAML configuration file]({{ site.baseurl }}/structure/index#yaml-configuration-file). A copy of this file can be found in the virtual environment into which CyLinter was installed (`.../miniconda3/envs/cylinter/lib/pythonXX/site-packages/cylinter/cylinter_config.yml`) 12 | 13 | ## Step 2: 14 | Activate the CyLinter virtual environment: 15 | 16 | ``` bash 17 | conda activate cylinter 18 | ``` 19 | 20 | ## Step 3: 21 | Execute the program from the beginning of the pipeline by passing the YAML configuration file (which should be stored at the top level of the CyLinter [input directory]({{ site.baseurl }}/structure/index)) to the `cylinter` command: 22 | 23 | ``` bash 24 | cylinter /cylinter_config.yml 25 | ``` 26 | 27 | CyLinter bookmarks progress by automatically caching partially-redacted spatial feature tables in the `checkpoints/` directory of the CyLinter [output directory]({{ site.baseurl }}/workflow/index). To re-run any of the [Modules]({{ site.baseurl }}/modules/index), pass the `--module` flag followed by the name of a specific module: 28 | 29 | ``` bash 30 | cylinter --module /cylinter_config.yml 31 | ``` 32 | -------------------------------------------------------------------------------- /docs/structure/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default-cylinter 3 | title: Input File Structure 4 | nav_order: 5 5 | has_children: false 6 | --- 7 | 8 | {: .no_toc } 9 | 10 |
11 | 12 | Table of contents 13 | 14 | {: .text-delta } 15 | 1. TOC 16 | {:toc} 17 |
18 | 19 | # Input directory structure 20 | 21 | CyLinter can analyze any multiplex imaging data so long as they conform to the expected file formats and folder structure. In the below example, `` corresponds to the name of a particular tissue sample. 22 | 23 | ``` bash 24 | 25 | ├── cylinter_config.yml 26 | ├── csv/ 27 | │   ├── .csv 28 | │   └── .csv 29 | ├── markers.csv 30 | ├── mask/ 31 | │   ├── .ome.tif (or .tif) 32 | │   └── .ome.tif (or .tif) 33 | ├── seg/ 34 | │   ├── .ome.tif (or .tif) 35 | │   └── .ome.tif (or .tif) 36 | └── tif/ 37 |    ├── .ome.tif (or .tif) 38 |    └── .ome.tif (or .tif) 39 | ``` 40 | 41 | ## Note for MCMICRO users 42 | CyLinter can parse **whole slide image (WSI)** and **tissue microarray (TMA)** multiplex imaging data generated by the [MCMICRO](https://mcmicro.org) image-processing pipeline in their native file structure. In these cases, the [MCMICRO output directory](https://mcmicro.org/io.html#directory-structure) serves as the CyLinter input directory. 43 | 44 | 45 | # YAML configuration file 46 | 47 | `cylinter_config.yml` is the YAML configuration file passed to the `cylinter` command on program execution. It specifies general program configurations and module-specific parameters for a given analysis and should be stored in the top level CyLinter [input directory](#input-directory-structure). The `cylinter_config.yml` file downloaded with the program is pre-configured for use with [Example Data]({{ site.baseurl }}/exemplar) used to demonstrate CyLinter. On MacOS, this file is located here: `/Users//miniconda3/envs/cylinter/lib/python3.10/site-packages/cylinter/cylinter_config.yml`. 48 | 49 | ## General configurations 50 | 51 | | Parameter | Default | Description | 52 | | --- | --- | --- | 53 | | `inDir` | /Users/user/Desktop/cylinter_demo | CyLinter input directory; contains multi-channel image files (TIFF/OME-TIFF), segmentation outline files (TIFF/OME-TIFF), cell ID masks (TIFF/OME-TIFF), single-cell spatial feature tables (CSV), `cylinter_config.yml`, and `markers.csv` organized according to the [input directory structure](#input-directory-structure) or as native [MCMICRO output structure](https://mcmicro.org/io.html#directory-structure). | 54 | | `outDir` | /Users/user/Desktop/cylinter_demo/output | CyLinter output directory path; created on program execution. | 55 | | `sampleMetadata` | "Filename":
["15", "Glioblastoma", "GBM", "CANCER-TRUE", 1] | Sample metadata dictionary: keys = Filenames (str); values = list of strings. First elements: sample names (str, may differ from Filename). Second elements: descriptive text of experimental condition (str). Third elements: abbreviation of experimental condition (str). Fourth elements: comma-delimited string of arbitrary binary declarations for computing t-statistics between two groups of samples (str). Fifth elements: replicate number specifying biological or technical replicates (int). | 56 | | `samplesToExclude` | [ ] | (list of strs) Sample names (i.e., first elements in `sampleMetadata` values) to exclude from analysis. | 57 | | `markersToExclude` | [ ] | (list of strs) Markers to exclude from analysis (not including nuclear dyes). | 58 | 59 | ## Module configurations 60 | For module-specific configuration settings, see [Modules]({{ site.baseurl }}/modules) 61 | 62 | 63 | # Markers.csv 64 | `markers.csv` is a standard input file into the MCMICRO image-processing pipeline also used by CyLinter to index marker channels in a batch multiplex images labeled with the same markers. The file takes the following format and must be included in the top level CyLinter [input directory](#input-directory-structure): 65 | 66 | ``` 67 | channel_number,cycle_number,marker_name 68 | 1,1, 69 | 2,1, 70 | 3,1, 71 | 4,1, 72 | 5,2, 73 | 6,2, 74 | 7,2, 75 | 8,2, 76 | . 77 | . 78 | . 79 | ``` 80 | * Additional metadata columns may be present in the file, but are not currently read by CyLinter. 81 | -------------------------------------------------------------------------------- /docs/tutorials/#index.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default-cylinter 3 | title: Tutorials 4 | nav_order: 8 5 | has_children: true 6 | --- 7 | 8 | # Tutorials 9 | 10 | ## Overview Video 11 | 12 | {% include youtube.html id="fnxBvgJQmtY" autoplay=false mute=false controls=true loop=false related=false %} 13 | 14 | A general introduction [video](https://www.youtube.com/watch?v=fnxBvgJQmtY) that provides a high-level overview of the pipeline. 15 | 16 | --- 17 | 18 |
19 | 20 |
21 | ## Visual Guide 22 | This detailed [visual guide](pipeline-visual-guide.html) takes you through the steps performed by the MCMICRO pipeline as it processes [exemplar-002]({{ site.baseurl }}/datasets.html). The guide was developed using the open source [Minerva software](https://www.cycif.org/software/minerva), developed by the Laboratory of Systems Pharmacology. 23 | 24 | [![]({{ site.baseurl }}/images/tutorials/vizguide.png)](pipeline-visual-guide.html) 25 | {: .mt-6 .mr-10} 26 |
27 | 28 |
29 | ## Installing Nextflow and MCMICRO Video 30 | This [tutorial video](https://youtu.be/tLWMe_uJY9A) walks you through installing Nextflow and MCMICRO, downloading exemplar images, and executing the pipeline on the Google Cloud Platform. 31 | 32 | [![](https://img.youtube.com/vi/tLWMe_uJY9A/0.jpg)](https://youtu.be/tLWMe_uJY9A) 33 | {: .mt-6 .mr-10} 34 |
35 | 36 |
37 | 38 | --- 39 | 40 | This [written guide](basics.html) provides an overview of basic concepts in tissue imaging, including how the data is collected and represented, image format standards, and the mandatory set of initial steps in image processing. 41 | -------------------------------------------------------------------------------- /docs/tutorials/adding.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default-cylinter 3 | title: Adding a module 4 | nav_order: 3 5 | parent: Tutorials 6 | --- 7 | 8 | # Adding a module 9 | 10 | {: .no_toc } 11 | 12 |
13 | 14 | Table of contents 15 | 16 | {: .text-delta } 17 | 1. TOC 18 | {:toc} 19 |
20 | 21 | MCMICRO allows segmentation and cell state caller modules to be specified dynamically. Adding new modules requires nothing more than editing a simple configuration file. No changes to the Nextflow codebase necessary! 22 | 23 | ## Quick start 24 | 25 | **Step 1.** Navigate to [https://github.com/labsyspharm/mcmicro/blob/master/config/modules.config](https://github.com/labsyspharm/mcmicro/blob/master/config/modules.config). Press the pencil in the top-right corner. This will fork the project to your own GitHub account and allow you to modify the file in your fork. 26 | 27 | 28 | 29 | **Step 2.** Add a new module by specifying all relevant fields (see below). 30 | 31 | 32 | 33 | **Step 3.** Briefly describe your new module. Provide a reference to the method and the codebase. 34 | 35 | 36 | 37 | **Step 4.** After MCMICRO developers review and test your proposed module, the changes will be merged into the main project branch. 38 | 39 | # Input and output specs 40 | 41 | Every module must have a command-line interface (CLI) that has been encapsulated inside a Docker container. 42 | MCMICRO assumes that CLI conforms to the following input-output specifications. 43 | 44 | ## Segmentation modules 45 | 46 | **Input:** 47 | 48 | * A file in `.ome.tif` format containing a fully stitched and registered multiplexed image. 49 | * (Optional) A file containing a custom model for the algorithm. The file can be in any format, and it is up to the module developer to decide what formats they allow from users. 50 | 51 | **Output:** 52 | 53 | * An image file in `.tif` format, written to `.` (i.e., the "current working directory"). The file can be either a probability map or a segmentation mask. The image channels in probability maps annotate each pixel with probabilities that it belongs to the background or different parts of the cell such as the nucleus, cytoplasm, cell membrane or the intercellular region. Similarly, segmentation masks annotate each pixel with an integer index of the cell it belongs to, or 0 if none. 54 | * (Optional) One or more files written to `./qc/` (i.e., `qc/` subdirectory within the "current working directory"). These will be copied by the pipeline to the corresponding location in the [project's `qc/` directory]({{ site.baseurl }}/documentation/dir.html#quality-control). 55 | 56 | ## Cell state calling modules 57 | 58 | **Input:** 59 | 60 | * A file in `.csv` format containing a [spatial feature table]({{ site.baseurl }}/documentation/dir.html#quantification). Each row in a table corresponds to a cell, while columns contain features characterizing marker expression or morphological properties. 61 | * (Optional) A file containing a custom model for the algorithm. The file can be in any format, and it is up to the module developer to decide what formats they allow from users. 62 | 63 | **Output:** 64 | 65 | * One or more files in `.csv` or `.hdf5` format, written to `.` (i.e., the "current working directory"). Each file should annotate individual cells with the corresponding inferred cell state. 66 | * (Optional) One or more files written to `./plots/` (i.e., `plots/` subdirectory within the "current working directory"). Each file can be in any format and contain any information that the module developer thinks will be useful to the user (e.g., UMAP plots showing how cells cluster together). 67 | * (Optional) One or more files written to `./qc/` (i.e., `qc/` subdirectory within the "current working directory"). These will be copied by the pipeline to the corresponding location in the [project's `qc/` directory]({{ site.baseurl }}/documentation/dir.html#quality-control). 68 | 69 | # Configuration 70 | 71 | Adding a new MCMICRO module involves specifying simple key-value pairs in `config/modules.config`. For example, consider the following configuration for ilastik: 72 | 73 | ``` 74 | [ 75 | name : 'ilastik', 76 | container : 'labsyspharm/mcmicro-ilastik', 77 | version : '1.4.3', 78 | cmd : 'python /app/mc-ilastik.py --output .', 79 | input : '--input', 80 | model : '--model', 81 | watershed : 'yes' 82 | ] 83 | ``` 84 | 85 | ## Name 86 | 87 | The `name` of the module determines two things. First, it specifies the names of subdirectories for where the output files will be written to in the project directory. In the given example, the primary outputs will appear in `probability-maps/ilastik/`, while QC files will be written to `qc/ilastik/`. Second, the module name also tells MCMICRO what other parameters to look for. In our example, the pipeline will look for module specific parameters in `--ilastik-opts` and a custom model file in `--ilastik-model`. 88 | 89 | ## Container and version 90 | 91 | The two fields must uniquely identify a Docker container image containing the tool. Mechanistically, the fields are combined using the [standard `REPOSITORY:TAG` convention](https://docs.docker.com/engine/reference/commandline/images/). 92 | 93 | ## Command 94 | 95 | The `cmd` field must contain a command that, when executed inside the container, will produce the required set of outputs from the inputs provided to it by the pipeline. 96 | 97 | **It is imperative that all primary outputs are written to `.` (i.e., the "current working directory"). MCMICRO will automatically sort outputs to their correct location in the project directory. Writing outputs to any other location may result in MCMICRO failing to locate them.** 98 | 99 | ## Input 100 | 101 | The `input` field determines how the pipeline will supply inputs to the module. Some examples in the context of [exemplar-001]({{ site.baseurl }}/datasets.html) may look as follows: 102 | 103 | | Configuration | What MCMICRO will execute | 104 | | :-- | :-- | 105 | | cmd : 'python /app/tool.py -o .'
input : '-i'
| `python /app/tool.py -o . -i exemplar-001.ome.tif` | 106 | | cmd : 'python /app/tool.py -o .'
input : '--input'
| `python /app/tool.py -o . --input exemplar-001.ome.tif` | 107 | | cmd : 'python /app/tool.py -o .'
input : ''
| `python /app/tool.py -o . exemplar-001.ome.tif` | 108 | 109 | ## (Optional) Model 110 | 111 | The `model` field functions similarly to `input` and specifies how the pipeline will supply a custom model to the tool. 112 | 113 | ## Watershed 114 | 115 | The `watershed` field specifies whether the module requires a subsequent watershed step. Set it to `'yes'` for modules that produce probability maps and `'no'` for instance segmenters. Alternatively, you can specify `'bypass'` to have the output still go through S3Segmenter with the `--nucleiRegion bypass` flag. This will skip watershed but still allow you to filter nuclei by size with `--logSigma`. 116 | 117 | ## Putting it all together 118 | 119 | Given the above configuration for ilastik, users of MCMICRO can begin using the module by typing the following command: 120 | 121 | ``` 122 | nextflow run labsyspharm/mcmicro --in path/to/exemplar-001 \ 123 | --probability-maps ilastik \ 124 | --ilastik-opts '--num_channels 1' \ 125 | --ilastik-model myawesomemodel.ilp 126 | ``` 127 | 128 | As exemplar-001 makes its way through the pipeline, it will eventually encounter the [probability map generation and segmentation step]({{ site.baseurl }}/documentation/dir.html#segmentation). The pipeline will then identify ilastik as the module to be executed from the `--probability-maps` flag. The actual command that MCMICRO runs will then be composed using all the above fields together: 129 | 130 | ``` 131 | python /app/mc-ilastik.py --output . --input exemplar-001.ome.tif --model myawesomemodel.ilp --num_channels 1 132 | ``` 133 | 134 | # (Advanced) Automated tests 135 | 136 | MCMICRO uses [GitHub Actions](https://docs.github.com/en/actions) to execute a set of automated tests on the [two exemplar images]({{ site.baseurl }}/datasets.html). The tests ensure that modifications to the pipeline don't break existing module functionality. When contributing a new module to MCMICRO, consider composing a new test that ensures your module runs on the exemplar data without any issues. 137 | 138 | Automated tests are specified in [`ci.yml`](https://github.com/labsyspharm/mcmicro/blob/master/.github/workflows/ci.yml). The exemplar data is cached and can be easily restored via `actions/cache@v2`. For example, consider the following minimal test that contrasts unmicst and ilastik on exemplar-001: 139 | 140 | ``` 141 | test-ex001: 142 | needs: setup 143 | runs-on: ubuntu-latest 144 | steps: 145 | - uses: actions/checkout@v2 146 | - name: Install Nextflow 147 | run: curl -fsSL get.nextflow.io | bash 148 | - name: Restore exemplar-001 cache 149 | uses: actions/cache@v2 150 | with: 151 | path: ~/data/exemplar-001 152 | key: mcmicro-exemplar-001 153 | - name: Test exemplar-001 154 | run: ./nextflow main.nf --in ~/data/exemplar-001 --probability-maps unmicst,ilastik --s3seg-opts '--probMapChan 0' 155 | ``` 156 | 157 | The test, named `test-ex001`, consists of three steps: 1) Installing nextflow, 2) Restoring exemplar-001 data from cache, and 3) Running the pipeline on the exemplar-001. The `needs:` field specifies that the test should be executed after `setup` (which verifies the existence of cached data and performs caching if it's missing). 158 | 159 | -------------------------------------------------------------------------------- /docs/tutorials/exhibit.json: -------------------------------------------------------------------------------- 1 | { 2 | "Images": [ 3 | { 4 | "Name": "WORKAROUND-decouple-storyname-image-description" 5 | }, 6 | { 7 | "Name": "i0", 8 | "Description": "", 9 | "Path": "https://s3.amazonaws.com/www.cycif.org/schapiro-mcmicro-2021/exemplar-002", 10 | "Width": 6197, 11 | "Height": 6231, 12 | "MaxLevel": 3 13 | } 14 | ], 15 | "PixelsPerMicron": 1.5385, 16 | "Name": "MCMICRO Exemplar-002", 17 | "Header": "This visual guide takes you through a series of steps for deriving biological information from highly-multiplexed images using MCMICRO pipeline. To learn more about the pipeline and the exemplar-002 dataset, visit [MCMICRO website](https://mcmicro.org)\n\n![https://mcmicro.org/images/Fig1.png](https://mcmicro.org/images/Fig1.png)", 18 | "Footer": "Back to MCMICRO", 19 | "FirstGroup": "Staining", 20 | "Rotation": 0, 21 | "Layout": { 22 | "Grid": [ 23 | [ 24 | "i0" 25 | ] 26 | ] 27 | }, 28 | "Stories": [ 29 | { 30 | "Name": "", 31 | "Description": "", 32 | "Waypoints": [ 33 | { 34 | "Name": "Raw Image Tiles (Level 1)", 35 | "Description": "When performing highly-multiplexed whole slide imaging (WSI), data acquisition routinely produces thousands of multichannel image tiles. Although the stage positioning is rather robust in modern microscopes, further aligments of image tiles acquired within one cycle as well as across multiple cycles are still needed.\n\nWhen the tiles are stitched by relying on the microscope stage coordinates alone, with the red-green checkerboard pattern showing neighboring tiles, the stage movement errors are highly obvious in the yellow overlapping regions 🔍\n\nThe errors are even more pronounced when three DNA channels from subsequent cycles are overlayed using their stage positions 🔍\n\nThe first step in MCMICRO is to align the provided **raw image tiles (Level 1)** and correct uneven illuminations in each of the tiles. MCMICRO currently accepts [Bio-formats](https://www.openmicroscopy.org/bio-formats/) compatible image formats, along with [a .csv file containing channel names](https://mcmicro.org/step-input.html) as inputs and outputs a stitched-and-registered image 🔍", 36 | "Arrows": [], 37 | "Overlays": [], 38 | "Group": "Before Stitching", 39 | "Masks": [], 40 | "ActiveMasks": [], 41 | "Zoom": 0.5, 42 | "Pan": [ 43 | 0.5, 44 | 0.5 45 | ] 46 | }, 47 | { 48 | "Name": "Whole-Slide Image (Level 2)", 49 | "Description": "To produce the **whole-slide image (Level 2)** in OME-TIFF format, Individual image tiles are [corrected for illumination](https://www.nature.com/articles/ncomms14836), followed by simultaneous [tile stitching and registration across cycles](https://www.biorxiv.org/content/10.1101/2021.04.20.440625v1). After these pre-processing steps, channels from different cycles can be \"merged\" and visualized.", 50 | "Arrows": [], 51 | "Overlays": [], 52 | "Group": "Staining", 53 | "Masks": [], 54 | "ActiveMasks": [], 55 | "Zoom": 0.5, 56 | "Pan": [ 57 | 0.5, 58 | 0.5 59 | ] 60 | }, 61 | { 62 | "Name": "TMA dearray", 63 | "Description": "When working with Tissue Microarrays (TMAs), MCMICRO can identify and isolate individual cores using [Coreograph](https://mcmicro.org/coreograph.html). Each core will be written out into a standalone file to enable parallel downstream processing.", 64 | "Arrows": [], 65 | "Overlays": [], 66 | "Group": "Staining", 67 | "Masks": ["Dearray"], 68 | "ActiveMasks": ["Dearray"], 69 | "Zoom": 0.6, 70 | "Pan": [ 71 | 0.5, 72 | 0.5 73 | ] 74 | }, 75 | { 76 | "Name": "Segmentation Probability Maps", 77 | "Description": "Basic cell segmentation in MCMICRO consists of two steps. In the first step, [machine learning models](https://mcmicro.org/unmicst.html) are used to generate probability maps that annotate each pixel with probabilities that it belongs to background or different parts of the cell such as the nucleus, cytoplasm, cell membrane or the intercellular region. MCMICRO can executes multiple machine learning algorithms in parallel, allowing for a direct comparison of their outputs.", 78 | "Arrows": [], 79 | "Overlays": [], 80 | "Group": "DNA", 81 | "Masks": ["Cell Mask Outlines", "Probability Maps"], 82 | "ActiveMasks": ["Probability Maps"], 83 | "Zoom": 1, 84 | "Pan": [ 85 | 0.5, 86 | 0.5 87 | ] 88 | }, 89 | { 90 | "Name": "Segmentation Masks (Level 3)", 91 | "Description": "The second step in cell segmentation applies [watershed-like algorithms](https://mcmicro.org/s3seg.html) to probability maps produced by the first step. The resulting segmentation labelled masks assign each cell with a unique index number, where each pixel value adopts its cell's corresponding index number (background is assigned 0). MCMICRO generates labelled masks for nuclei, cytoplasm, and whole cell regions with matching indexed numbers to facilitate single cell analysis.", 92 | "Arrows": [], 93 | "Overlays": [], 94 | "Group": "DNA", 95 | "Masks": ["Cell Mask Outlines", "Probability Maps"], 96 | "ActiveMasks": ["Cell Mask Outlines"], 97 | "Zoom": 1, 98 | "Pan": [ 99 | 0.5, 100 | 0.5 101 | ] 102 | }, 103 | { 104 | "Name": "Spatial Feature Tables (Level 4)", 105 | "Description": "The final step in the MCMICRO pipeline is quantification, which utilizes the segmentation masks and the original image data to generate a spatial feature table. Each row in the table corresponds to an individual cell, while columns catalogue cell position, average marker expression, and morphological features.\n\nAs an example, the cell masks are colored using the mean intensities of CD3d in each cell.", 106 | "Arrows": [], 107 | "Overlays": [], 108 | "Group": "Staining", 109 | "Masks": ["CD3d Expression", "Cell Mask Outlines", "Probability Maps"], 110 | "ActiveMasks": ["CD3d Expression"], 111 | "Zoom": 1.292, 112 | "Pan": [ 113 | 0.6706, 114 | 0.7835 115 | ] 116 | } 117 | ] 118 | } 119 | ], 120 | "Masks": [ 121 | { 122 | "Name": "Cell Mask Outlines", 123 | "Path": "mask/cellRingMask-outlines", 124 | "Colors": [ 125 | "ff00ff" 126 | ], 127 | "Channels": [ 128 | "Cell Mask Outlines" 129 | ] 130 | }, 131 | { 132 | "Name": "Probability Maps", 133 | "Path": "mask/probability-maps", 134 | "Colors": [ 135 | "00ff00", 136 | "0000ff" 137 | ], 138 | "Channels": [ 139 | "Nuclei contours probability", 140 | "Nuclei probability" 141 | ] 142 | }, 143 | { 144 | "Name": "Dearray", 145 | "Path": "mask/dearray-mask", 146 | "Colors": [ 147 | "1c9e77", 148 | "d96003", 149 | "7570b4", 150 | "e8298a" 151 | ], 152 | "Channels": [ 153 | "TMA Core - 1", 154 | "TMA Core - 2", 155 | "TMA Core - 3", 156 | "TMA Core - 4" 157 | ] 158 | }, 159 | { 160 | "Name": "CD3d Expression", 161 | "Path": "mask/cd3d_expression_mask", 162 | "Colors": [ 163 | "0000aa" 164 | ], 165 | "Channels": [ 166 | "CD3d Expression" 167 | ] 168 | } 169 | ], 170 | "Groups": [ 171 | { 172 | "Name": "Before Stitching", 173 | "Path": "ashlar_debug", 174 | "Colors": [ 175 | "ff0000", 176 | "00ff00" 177 | ], 178 | "Channels": [ 179 | "Tiles", 180 | "Tiles" 181 | ] 182 | }, 183 | { 184 | "Name": "Before Registration", 185 | "Path": "Before-stitching-and-registration_0__DNA-1--1__DNA-2--2__DNA-3", 186 | "Colors": [ 187 | "00ffff", 188 | "ffff00", 189 | "ff00ff" 190 | ], 191 | "Channels": [ 192 | "DNA - 1", 193 | "DNA - 2", 194 | "DNA - 3" 195 | ] 196 | }, 197 | { 198 | "Name": "Registered", 199 | "Path": "Stitching_0__DNA--4__DNA-2--8__DNA-3", 200 | "Colors": [ 201 | "00ffff", 202 | "ffff00", 203 | "ff00ff" 204 | ], 205 | "Channels": [ 206 | "DNA - 1", 207 | "DNA - 2", 208 | "DNA - 3" 209 | ] 210 | }, 211 | { 212 | "Name": "Staining", 213 | "Path": "Staining_0__DNA--13__CD163--14__CD3D--15__CD31--19__VDAC1--34__Pan-CK", 214 | "Colors": [ 215 | "ffffff", 216 | "ffff00", 217 | "0000ff", 218 | "ff0000", 219 | "00ff00", 220 | "f79209" 221 | ], 222 | "Channels": [ 223 | "DNA ", 224 | "CD163", 225 | "CD3d", 226 | "CD31", 227 | "VDAC1", 228 | "Pan-CK" 229 | ] 230 | }, 231 | { 232 | "Name": "DNA", 233 | "Path": "Stitching_0__DNA--4__DNA-2--8__DNA-3", 234 | "Colors": [ 235 | "ffffff" 236 | ], 237 | "Channels": [ 238 | "DNA" 239 | ] 240 | } 241 | ] 242 | } -------------------------------------------------------------------------------- /docs/tutorials/pipeline-visual-guide.html: -------------------------------------------------------------------------------- 1 | --- 2 | title: Pipeline Visual Guide 3 | nav_order: 1 4 | parent: Tutorials 5 | --- 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 |
18 | 19 | 20 | 21 | 22 | 71 | 72 | 73 | -------------------------------------------------------------------------------- /docs/workflow/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default-cylinter 3 | title: Workflow 4 | nav_order: 4 5 | has_children: true 6 | --- 7 | 8 | # Workflow 9 | 10 | | Input Directory Structure (see [Input File Structure]({{ site.baseurl }}/structure/index) for details) | Output Directory Structure 11 | | :-- | :-- | 12 | | INPUT_DIR
├── cylinter_config.yml
├── csv/
├── markers.csv
├── mask/
├── seg/
└── tif/
| OUTPUT_DIR
├── area/
├── checkpoints/
├── cylinter_report.yml
├── clustering/
├── contrast/
├── cycles/
├── gating/
├── intensity/
├── metaQC/
├── PCA/
├── pruning/
└── ROIs/
13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 |
CyLinterIdentifying and removing noisy single-cell data points with CyLinter. | CyLinter input consists of multiplex microscopy files (OME-TIFF/TIFF) and their corresponding cell segmentation outlines (OME-TIFF/TIFF), cell ID masks (OME-TIFF/TIFF), and single-cell feature tables (CSV). a, Aggregate data (automated): raw spatial feature tables for all samples in a batch are merged into a single Pandas (Python) dataframe. b, ROI selection (interactive or automated): multi-channel images are viewed to identify and gate on regions of tissue affected by microscopy artefacts (negative selection mode) or areas of tissue devoid of artefacts (positive selection mode. b1-b4, Demonstration of automated artefact detection in CyLinter: b1, CyLinter’s selectROIs module showing artefacts in the CDKN1A (green) channel of a mesothelioma TMA core. b2, Transformed version of the original CDKN1A image such that artefacts appear as large, bright regions relative to channel intensity variations associated with true signal of immunoreactive cells which are suppressed. b3, Local intensity maxima are identified in the transformed image and a flood fill algorithm is used to create a pixel-level binary mask indicating regions of tissue affected by artefacts. In this example, the method identifies three artefacts in the image: one fluorescence aberration at the top of the core, and two tissue folds at the bottom of the core. b4, CyLinter’s selectROIs module showing the binary artefact mask (translucent gray shapes) and their corresponding local maxima (red dots) defining each of the three artefacts. c, DNA intensity filter (interactive): histogram sliders are used to define lower and upper bounds on nuclear counterstain single intensity. Cells between cutoffs are visualized as scatter points at their spatial coordinates in the corresponding tissue for gate confirmation or refinement. d, Segmentation area filter (interactive): histogram sliders are used to define lower and upper bounds on cell segmentation area (pixel counts). Cells between cutoffs are visualized as scatter points at their spatial coordinates in the corresponding tissue for gate confirmation or refinement. e, Cross-cycle correlation filter (interactive): applicable to multi-cycle experiments. Histogram sliders are used to define lower and upper bounds on the log-transformed ratio of DNA signals between the first and last imaging cycles. Cells between cutoffs are visualized as scatter points at their spatial coordinates in their corresponding tissues for gate confirmation or refinement. f, Log transformation (automated): single-cell data are log-transformed. g, Channel outliers filter (interactive): the distribution of cells according to antibody signal intensity is viewed for all sample as a facet grid of scatter plots (or hexbin plots) against cell area (y-axes). Lower and upper percentile cutoffs are applied to remove outliers. Outliers are visualized as scatter points at their spatial coordinates in their corresponding tissues for gate confirmation or refinement. h, MetaQC (interactive): unsupervised clustering methods (UMAP or TSNE followed by HDBSCAN clustering) are used to correct for gating bias in prior data filtration modules by thresholding on the percent of each cluster composed of clean (maintained) or noisy (redacted) cells. i, Principal component analysis (PCA, automated): PCA is performed and Horn’s parallel analysis is used to determine the number of PCs associated with non-random variation in the dataset. j, Image contrast adjustment (interactive): channel contrast settings are optimized for visualization on reference tissues which are applied to all samples in the cohort. k, Unsupervised clustering (interactive): UMAP (or TSNE) and HDBSCAN are used to identify unique cell states in a given cohort of tissues. Manual gating can also be performed to identify cell populations. l, Compute clustered heatmap (automated): clustered heatmap is generated showing channel z-scores for identified clusters (or gated populations). m, Compute frequency statistics (automated): pairwise t statistics on the frequency of each identified cluster or gated cell population between groups of tissues specified in CyLinter’s configuration file (cylinter_config.yml, e.g., treated vs. untreated, response vs. no response, etc.) are computed. n, Evaluate cluster membership (automated): cluster quality is checked by visualizing galleries of example cells drawn at random from each cluster identified in the clustering module (panel k).
23 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["poetry-core>=1.0.0"] 3 | build-backend = "poetry.core.masonry.api" 4 | 5 | [tool.poetry] 6 | name = "CyLinter" 7 | version = "0.0.50" 8 | description = "CyLinter: An Interactive Image Segmentation Filter for Multiplex Microscopy" 9 | readme = "README.md" 10 | license = "MIT" 11 | keywords =['CyLinter multiplex microscopy quality control'] 12 | classifiers=[ 13 | 'Development Status :: 4 - Beta', 14 | 'Intended Audience :: End Users/Desktop', 15 | 'Intended Audience :: Science/Research', 16 | 'Framework :: napari', 17 | 'License :: OSI Approved :: MIT License', 18 | 'Natural Language :: English', 19 | 'Operating System :: OS Independent', 20 | 'Programming Language :: Python :: 3', 21 | 'Topic :: Scientific/Engineering :: Visualization' 22 | ] 23 | authors = ["Gregory J. Baker "] 24 | homepage = "https://github.com/labsyspharm/cylinter" 25 | 26 | [tool.poetry.dependencies] 27 | cellcutter = "*" 28 | hdbscan = "*" 29 | joblib = "*" 30 | magicgui = "*" 31 | matplotlib = "<3.6" 32 | napari = { version = "*", extras = ["all"] } 33 | numpy = "*" 34 | natsort = "*" 35 | numba = "*" 36 | pandas = "*" 37 | pyarrow = "*" 38 | pyqt = "*" 39 | pyyaml = "*" 40 | qtpy = "*" 41 | scikit-image = "*" 42 | scikit-learn = "<=1.2.2" 43 | seaborn = "*" 44 | tifffile = "*" 45 | umap-learn = "*" 46 | zarr = "*" 47 | svglib = "*" 48 | pypdf2 = "*" 49 | imagecodecs = "*" 50 | opencv-python = "*" 51 | 52 | [tool.poetry.scripts] 53 | cylinter = "cylinter.cylinter:main" 54 | -------------------------------------------------------------------------------- /recipe/meta.yaml: -------------------------------------------------------------------------------- 1 | {% set name = "cylinter" %} 2 | {% set version = "0.0.50" %} 3 | 4 | package: 5 | name: "{{ name|lower }}" 6 | version: "{{ version }}" 7 | 8 | source: 9 | git_url: https://github.com/labsyspharm/cylinter.git 10 | git_tag: v0.0.50 11 | 12 | build: 13 | number: 0 14 | script: "{{ PYTHON }} -m pip install . --no-deps --ignore-installed -vv " 15 | entry_points: 16 | - cylinter=cylinter.cylinter:main 17 | noarch: python 18 | 19 | requirements: 20 | build: 21 | - poetry 22 | host: 23 | - pip 24 | - python 25 | - poetry 26 | run: 27 | - python 28 | - cellcutter 29 | - hdbscan 30 | - joblib 31 | - magicgui 32 | - matplotlib <3.6 # avoids segmentation faults when closing silhouette plot in clustering module 33 | - napari 34 | - numpy 35 | - natsort 36 | - numba 37 | - pandas 38 | - pyarrow 39 | - pyqt 40 | - pyyaml 41 | - qtpy 42 | - scikit-image 43 | - scikit-learn <=1.2.2 # avoids InconsistentVersionWarning: Trying to unpickle estimator Pipeline from version 1.2.2 when using version 1.3.1. because artifact detection model was built using v1.2.2 44 | - seaborn 45 | - tifffile 46 | - umap-learn 47 | - zarr 48 | - svglib 49 | - pypdf2 50 | - imagecodecs # MIBI data fails to be read without this 51 | - opencv 52 | 53 | test: 54 | imports: 55 | - cylinter 56 | 57 | about: 58 | home: https://labsyspharm.github.io/cylinter/ 59 | license: MIT 60 | license_family: BSD 61 | summary: Quality Control Software for Multiplex Microscopy 62 | --------------------------------------------------------------------------------