├── .gitignore
├── CITATION.cff
├── LICENSE
├── README.md
├── cylinter
    ├── __init__.py
    ├── components.py
    ├── config.py
    ├── cylinter.py
    ├── cylinter_config.yml
    ├── modules
    │   ├── PCA.py
    │   ├── aggregateData.py
    │   ├── areaFilter.py
    │   ├── clustering.py
    │   ├── clustermap.py
    │   ├── curateThumbnails.py
    │   ├── cycleCorrelation.py
    │   ├── frequencyStats.py
    │   ├── gating.py
    │   ├── intensityFilter.py
    │   ├── logTransform.py
    │   ├── metaQC.py
    │   ├── pruneOutliers.py
    │   ├── selectROIs.py
    │   └── setContrast.py
    ├── pipeline.py
    ├── prep.py
    ├── prep_subprocess.sh
    ├── pretrained_models
    │   ├── pretrained_model.pkl
    │   └── train_artifact_classifier.ipynb
    └── utils.py
├── docs
    ├── .gitignore
    ├── _config.yml
    ├── _includes
    │   ├── cylinter_gif.html
    │   ├── home.md
    │   └── workflow.md
    ├── _layouts
    │   └── default-cylinter.html
    ├── _sass
    │   ├── color_schemes
    │   │   └── cylinter.scss
    │   └── custom
    │   │   └── custom.scss
    ├── assets
    │   ├── gifs
    │   │   ├── cylinter.gif
    │   │   └── solitary_saunter.gif
    │   └── images
    │   │   ├── ExtFig4.jpg
    │   │   ├── cores.jpg
    │   │   └── cylinter-logo.svg
    ├── cite
    │   └── index.md
    ├── community
    │   └── index.md
    ├── exemplar
    │   └── index.md
    ├── faq
    │   └── #index.md
    ├── funding
    │   └── index.md
    ├── help
    │   └── index.md
    ├── index.md
    ├── installation
    │   └── index.md
    ├── modules
    │   ├── PCA.md
    │   ├── aggregateData.md
    │   ├── areaFilter.md
    │   ├── clustering.md
    │   ├── clustermap.md
    │   ├── curateThumbnails.md
    │   ├── cycleCorrelation.md
    │   ├── frequencyStats.md
    │   ├── gating.md
    │   ├── index.md
    │   ├── intensityFilter.md
    │   ├── logTransform.md
    │   ├── metaQC.md
    │   ├── pruneOutliers.md
    │   ├── selectROIs.md
    │   └── setContrast.md
    ├── run
    │   └── index.md
    ├── structure
    │   └── index.md
    ├── tutorials
    │   ├── #index.md
    │   ├── adding.md
    │   ├── basics.md
    │   ├── exhibit.json
    │   └── pipeline-visual-guide.html
    └── workflow
    │   └── index.md
├── pyproject.toml
└── recipe
    └── meta.yaml


/.gitignore:
--------------------------------------------------------------------------------
  1 | .synapseConfig
  2 | *~
  3 | 
  4 | # CyLinter directories
  5 | input/
  6 | output/
  7 | 
  8 | # Byte-compiled / optimized / DLL files
  9 | __pycache__/
 10 | *.py[cod]
 11 | *$py.class
 12 | 
 13 | # C extensions
 14 | *.so
 15 | 
 16 | .DS_Store
 17 | 
 18 | # Jekyll
 19 | _site/
 20 | *-cache/
 21 | .jekyll-metadata
 22 | 
 23 | # Ruby
 24 | .bundle/
 25 | .byebug_history
 26 | .ruby-gemset
 27 | .ruby-version
 28 | *.gem
 29 | Gemfile.lock
 30 | 
 31 | # Distribution / packaging
 32 | .Python
 33 | build/
 34 | develop-eggs/
 35 | dist/
 36 | downloads/
 37 | eggs/
 38 | .eggs/
 39 | lib/
 40 | lib64/
 41 | parts/
 42 | sdist/
 43 | var/
 44 | wheels/
 45 | *.egg-info/
 46 | .installed.cfg
 47 | *.egg
 48 | MANIFEST
 49 | 
 50 | # PyInstaller
 51 | #  Usually these files are written by a python script from a template
 52 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 53 | *.manifest
 54 | *.spec
 55 | 
 56 | # Installer logs
 57 | pip-log.txt
 58 | pip-delete-this-directory.txt
 59 | 
 60 | # Unit test / coverage reports
 61 | htmlcov/
 62 | .tox/
 63 | .coverage
 64 | .coverage.*
 65 | .cache
 66 | nosetests.xml
 67 | coverage.xml
 68 | *.cover
 69 | .hypothesis/
 70 | .pytest_cache/
 71 | 
 72 | # Translations
 73 | *.mo
 74 | *.pot
 75 | 
 76 | # Django stuff:
 77 | *.log
 78 | local_settings.py
 79 | db.sqlite3
 80 | 
 81 | # Flask stuff:
 82 | instance/
 83 | .webassets-cache
 84 | 
 85 | # Scrapy stuff:
 86 | .scrapy
 87 | 
 88 | # Sphinx documentation
 89 | docs/_build/
 90 | 
 91 | # PyBuilder
 92 | target/
 93 | 
 94 | # Jupyter Notebook
 95 | .ipynb_checkpoints
 96 | 
 97 | # pyenv
 98 | .python-version
 99 | 
100 | # celery beat schedule file
101 | celerybeat-schedule
102 | 
103 | # SageMath parsed files
104 | *.sage.py
105 | 
106 | # Environments
107 | .env
108 | .venv
109 | env/
110 | venv/
111 | ENV/
112 | env.bak/
113 | venv.bak/
114 | 
115 | # Spyder project settings
116 | .spyderproject
117 | .spyproject
118 | 
119 | # Rope project settings
120 | .ropeproject
121 | 
122 | # mkdocs documentation
123 | /site
124 | 
125 | # mypy
126 | .mypy_cache/
127 | 


--------------------------------------------------------------------------------
/CITATION.cff:
--------------------------------------------------------------------------------
 1 | cff-version: 1.2.0
 2 | message: If you use CyLinter in your work, please cite it using the following metadata.
 3 | title: CyLinter
 4 | authors:
 5 |   - family-names: Baker
 6 |     given-names: Gregory
 7 |     orcid: https://orcid.org/0000-0002-5196-3961
 8 | keywords:
 9 |   - multiplex microscopy
10 |   - quality control
11 |   - research software
12 | version: 0.0.47
13 | date-released: 2021-01-21
14 | license: MIT
15 | url: https://github.com/labsyspharm/cylinter
16 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 Gregory J. Baker
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ![](docs/assets/images/cylinter-logo.svg)
 3 | 
 4 | ## An Interactive Image Segmentation Filter for Multiplex Microscopy.
 5 | 
 6 | CyLinter is quality control software for identifying and removing cell segmentation instances corrupted by optical and/or image-processing artifacts in multiplex microscopy images. The tool is interactive and comprises a set of modular and extensible QC modules instantiated in a configurable [Python](https://www.python.org) Class object. Module results are cached to allow for progress bookmarking and dynamic restarts.
 7 | 
 8 | CyLinter development is led by [Greg Baker](https://github.com/gjbaker) at the [Laboratory of Systems Pharmacology](https://labsyspharm.org/), Harvard Medical School.
 9 | 
10 | **Funding:** This work was supported by the Ludwig Cancer Research and the Ludwig Center at Harvard (P.K.S., S.S.) and by NIH NCI grants U2C-CA233280, and U2C-CA233262 (P.K.S., S.S.). Development of computational methods and image processing software is supported by a Team Science Grant from the Gray Foundation (P.K.S., S.S.), the Gates Foundation grant INV-027106 (P.K.S.), the David Liposarcoma Research Initiative at Dana-Farber Cancer Institute supported by KBF Canada via the Rossy Foundation Fund (P.K.S., S.S.) and the Emerson Collective (P.K.S.). S.S. is supported by the BWH President’s Scholars Award.
11 | 
12 | **Project Website:** https://labsyspharm.github.io/cylinter/
13 | 


--------------------------------------------------------------------------------
/cylinter/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/labsyspharm/cylinter/b9dc04b0f0892657d53dbf31f753dc52b481bc08/cylinter/__init__.py


--------------------------------------------------------------------------------
/cylinter/components.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import functools
  3 | 
  4 | import matplotlib.pyplot as plt
  5 | import seaborn as sns
  6 | 
  7 | from cylinter.modules.aggregateData import aggregateData
  8 | from cylinter.modules.selectROIs import selectROIs
  9 | from cylinter.modules.intensityFilter import intensityFilter
 10 | from cylinter.modules.areaFilter import areaFilter
 11 | from cylinter.modules.cycleCorrelation import cycleCorrelation
 12 | from cylinter.modules.logTransform import logTransform
 13 | from cylinter.modules.pruneOutliers import pruneOutliers
 14 | from cylinter.modules.metaQC import metaQC
 15 | from cylinter.modules.PCA import PCA
 16 | from cylinter.modules.clustering import clustering
 17 | from cylinter.modules.clustermap import clustermap
 18 | from cylinter.modules.gating import gating
 19 | from cylinter.modules.setContrast import setContrast
 20 | from cylinter.modules.frequencyStats import frequencyStats
 21 | from cylinter.modules.curateThumbnails import curateThumbnails
 22 | 
 23 | logger = logging.getLogger(__name__)
 24 | 
 25 | # map matplotlib color codes to the default seaborn palette
 26 | sns.set()
 27 | sns.set_color_codes()
 28 | _ = plt.plot([0, 1], color='r')
 29 | sns.set_color_codes()
 30 | _ = plt.plot([0, 2], color='b')
 31 | sns.set_color_codes()
 32 | _ = plt.plot([0, 3], color='g')
 33 | sns.set_color_codes()
 34 | _ = plt.plot([0, 4], color='m')
 35 | sns.set_color_codes()
 36 | _ = plt.plot([0, 5], color='y')
 37 | plt.close('all')
 38 | 
 39 | # Pipeline module order, to be filled in by the @module decorator.
 40 | pipeline_modules = []
 41 | pipeline_module_names = []
 42 | 
 43 | 
 44 | def module(func):
 45 |     """
 46 |     Annotation for pipeline module functions.
 47 | 
 48 |     This function adds the given function to the registry list. It also wraps
 49 |     the given function to log a pre/post-call banner.
 50 | 
 51 |     """
 52 |     @functools.wraps(func)
 53 |     def wrapper(*args, **kwargs):
 54 |         logger.info("=" * 70)
 55 |         logger.info("RUNNING MODULE: %s", func.__name__)
 56 |         result = func(*args, **kwargs)
 57 |         logger.info("=" * 70)
 58 |         logger.info("")
 59 |         return result
 60 |     pipeline_modules.append(wrapper)
 61 |     pipeline_module_names.append(wrapper.__name__)
 62 |     return wrapper
 63 | 
 64 | 
 65 | class QC(object):
 66 |     def __init__(self,
 67 | 
 68 |                  # config.yaml —
 69 |                  inDir=None,
 70 |                  outDir=None,
 71 |                  startModule=None,
 72 |                  sampleNames=None,
 73 |                  sampleConditions=None,
 74 |                  sampleConditionAbbrs=None,
 75 |                  sampleStatuses=None,
 76 |                  sampleReplicates=None,
 77 |                  samplesToExclude=None,
 78 |                  counterstainChannel=None,
 79 |                  markersToExclude=None,
 80 | 
 81 |                  # selectROIs -
 82 |                  delintMode=None,
 83 |                  showAbChannels=None,
 84 |                  samplesForROISelection=None,
 85 |                  autoArtifactDetection=None,
 86 |                  artifactDetectionMethod=None,
 87 | 
 88 |                  # intensityFilter -
 89 |                  numBinsIntensity=None,
 90 | 
 91 |                  # intensityArea -
 92 |                  numBinsArea=None,
 93 | 
 94 |                  # cycleCorrelation -
 95 |                  numBinsCorrelation=None,
 96 | 
 97 |                  # pruneOutliers -
 98 |                  hexbins=None,
 99 |                  hexbinGridSize=None,
100 | 
101 |                  # metaQC -
102 |                  metaQC=None,
103 |                  default_mcs=200,
104 |                  default_reclass_tuple='0.75, 0.75',
105 |                  embeddingAlgorithmQC=None,
106 |                  channelExclusionsClusteringQC=None,
107 |                  samplesToRemoveClusteringQC=None,
108 |                  percentDataPerChunk=None,
109 |                  colormapAnnotationQC=None,
110 |                  metricQC=None,
111 |                  perplexityQC=None,
112 |                  earlyExaggerationQC=None,
113 |                  learningRateTSNEQC=None,
114 | 
115 |                  randomStateQC=None,
116 |                  nNeighborsQC=None,
117 |                  learningRateUMAPQC=None,
118 |                  minDistQC=None,
119 |                  repulsionStrengthQC=None,
120 | 
121 |                  # PCA module —
122 |                  channelExclusionsPCA=None,
123 |                  samplesToRemovePCA=None,
124 |                  dimensionPCA=None,
125 |                  pointSize=None,
126 |                  labelPoints=None,
127 |                  distanceCutoff=None,
128 |                  conditionsToSilhouette=None,
129 | 
130 |                  # gating module —
131 |                  gating=None,
132 |                  channelExclusionsGating=None,
133 |                  samplesToRemoveGating=None,
134 |                  vectorThreshold=None,
135 |                  classes=None,
136 | 
137 |                  # clustering module —
138 |                  embeddingAlgorithm=None,
139 |                  channelExclusionsClustering=None,
140 |                  samplesToRemoveClustering=None,
141 |                  normalizeTissueCounts=None,
142 |                  fracForEmbedding=None,
143 |                  dimensionEmbedding=None,
144 |                  colormapAnnotationClustering=None,
145 |                  colormapAnnotation=None,
146 |                  perplexity=None,
147 |                  earlyExaggeration=None,
148 |                  learningRateTSNE=None,
149 |                  metric=None,
150 |                  randomStateTSNE=None,
151 |                  nNeighbors=None,
152 |                  learningRateUMAP=None,
153 |                  minDist=None,
154 |                  repulsionStrength=None,
155 |                  randomStateUMAP=None,
156 | 
157 |                  # frequencyStats —
158 |                  controlGroups=None,
159 |                  denominatorCluster=None,
160 |                  FDRCorrection=None,
161 | 
162 |                  # curateThumbnails —
163 |                  numThumbnails=None,
164 |                  windowSize=None,
165 |                  segOutlines=None,
166 |                  ):
167 | 
168 |         self.inDir = inDir
169 |         self.outDir = outDir
170 |         self.startModule = startModule
171 |         self.sampleNames = sampleNames
172 |         self.sampleConditions = sampleConditions
173 |         self.sampleConditionAbbrs = sampleConditionAbbrs
174 |         self.sampleStatuses = sampleStatuses
175 |         self.sampleReplicates = sampleReplicates
176 |         self.samplesToExclude = samplesToExclude
177 |         self.counterstainChannel = counterstainChannel
178 |         self.markersToExclude = markersToExclude
179 | 
180 |         self.delintMode = delintMode
181 |         self.showAbChannels = showAbChannels
182 |         self.samplesForROISelection = samplesForROISelection
183 |         self.autoArtifactDetection = autoArtifactDetection
184 |         self.artifactDetectionMethod = artifactDetectionMethod
185 | 
186 |         self.numBinsIntensity = numBinsIntensity
187 | 
188 |         self.numBinsArea = numBinsArea
189 | 
190 |         self.numBinsCorrelation = numBinsCorrelation
191 | 
192 |         self.hexbins = hexbins
193 |         self.hexbinGridSize = hexbinGridSize
194 | 
195 |         self.metaQC = metaQC
196 |         self.default_mcsQC = default_mcs
197 |         self.default_reclass_tuple = default_reclass_tuple
198 |         self.embeddingAlgorithmQC = embeddingAlgorithmQC
199 |         self.channelExclusionsClusteringQC = channelExclusionsClusteringQC
200 |         self.samplesToRemoveClusteringQC = samplesToRemoveClusteringQC
201 |         self.percentDataPerChunk = percentDataPerChunk
202 |         self.colormapAnnotationQC = colormapAnnotationQC
203 |         self.metricQC = metricQC
204 |         self.perplexityQC = perplexityQC
205 |         self.earlyExaggerationQC = earlyExaggerationQC
206 |         self.learningRateTSNEQC = learningRateTSNEQC
207 |         self.randomStateQC = randomStateQC
208 |         self.nNeighborsQC = nNeighborsQC
209 |         self.learningRateUMAPQC = learningRateUMAPQC
210 |         self.minDistQC = minDistQC
211 |         self.repulsionStrengthQC = repulsionStrengthQC
212 | 
213 |         self.channelExclusionsPCA = channelExclusionsPCA
214 |         self.samplesToRemovePCA = samplesToRemovePCA
215 |         self.dimensionPCA = dimensionPCA
216 |         self.pointSize = pointSize
217 |         self.labelPoints = labelPoints
218 |         self.distanceCutoff = distanceCutoff
219 |         self.conditionsToSilhouette = conditionsToSilhouette
220 | 
221 |         self.gating = gating
222 |         self.channelExclusionsGating = channelExclusionsGating
223 |         self.samplesToRemoveGating = samplesToRemoveGating
224 |         self.vectorThreshold = vectorThreshold
225 |         self.classes = classes
226 | 
227 |         self.embeddingAlgorithm = embeddingAlgorithm
228 |         self.channelExclusionsClustering = channelExclusionsClustering
229 |         self.samplesToRemoveClustering = samplesToRemoveClustering
230 |         self.normalizeTissueCounts = normalizeTissueCounts
231 |         self.fracForEmbedding = fracForEmbedding
232 |         self.dimensionEmbedding = dimensionEmbedding
233 |         self.colormapAnnotationClustering = colormapAnnotationClustering
234 |         self.perplexity = perplexity
235 |         self.earlyExaggeration = earlyExaggeration
236 |         self.learningRateTSNE = learningRateTSNE
237 |         self.metric = metric
238 |         self.randomStateTSNE = randomStateTSNE
239 |         self.nNeighbors = nNeighbors
240 |         self.learningRateUMAP = learningRateUMAP
241 |         self.minDist = minDist
242 |         self.repulsionStrength = repulsionStrength
243 |         self.randomStateUMAP = randomStateUMAP
244 | 
245 |         self.controlGroups = controlGroups
246 |         self.denominatorCluster = denominatorCluster
247 |         self.FDRCorrection = FDRCorrection
248 | 
249 |         self.numThumbnails = numThumbnails
250 |         self.windowSize = windowSize
251 |         self.segOutlines = segOutlines
252 | 
253 |     module(aggregateData)
254 |     module(selectROIs)
255 |     module(intensityFilter)
256 |     module(areaFilter)
257 |     module(cycleCorrelation)
258 |     module(logTransform)
259 |     module(pruneOutliers)
260 |     module(metaQC)
261 |     module(PCA)
262 |     module(setContrast)
263 |     module(gating)
264 |     module(clustering)
265 |     module(clustermap)
266 |     module(frequencyStats)
267 |     module(curateThumbnails)
268 | 


--------------------------------------------------------------------------------
/cylinter/config.py:
--------------------------------------------------------------------------------
  1 | import pathlib
  2 | import yaml
  3 | from dataclasses import dataclass
  4 | 
  5 | 
  6 | @dataclass(frozen=True)
  7 | class BooleanTerm:
  8 |     name: str
  9 |     negated: bool
 10 | 
 11 |     @classmethod
 12 |     def parse_str(cls, s):
 13 |         if s.startswith('+'):
 14 |             negated = False
 15 |             name = s[1:]
 16 |         elif s.startswith('-'):
 17 |             negated = True
 18 |             name = s[1:]
 19 |         else:
 20 |             negated = None
 21 |             name = s
 22 |         return cls(name, negated)
 23 | 
 24 |     def __repr__(self):
 25 |         s = self.name
 26 |         if self.negated:
 27 |             s = '~' + self.name
 28 |         return s
 29 | 
 30 |     def __invert__(self):
 31 |         return BooleanTerm(self.name, ~self.negated)
 32 | 
 33 | 
 34 | class Config:
 35 | 
 36 |     def __init__(self, **kwargs):
 37 |         self.__dict__.update(kwargs)
 38 | 
 39 |     @classmethod
 40 |     def from_path(cls, path):
 41 |         config = cls()
 42 |         with open(path) as f:
 43 |             data = yaml.safe_load(f)
 44 |         config.inDir = pathlib.Path(data['inDir']).resolve()
 45 |         config.outDir = pathlib.Path(data['outDir']).resolve()
 46 |         config._parse_sample_metadata(data['sampleMetadata'])
 47 |         config.samplesToExclude = list(data['samplesToExclude'])
 48 |         config.counterstainChannel = str(data['counterstainChannel'])
 49 |         config.markersToExclude = list(data['markersToExclude'])
 50 | 
 51 |         # CLASS MODULE CONFIGURATIONS
 52 |         
 53 |         config.delintMode = bool(data['delintMode'])
 54 |         config.showAbChannels = bool(data['showAbChannels'])
 55 |         config.samplesForROISelection = list(data['samplesForROISelection'])
 56 |         config.autoArtifactDetection = bool(data['autoArtifactDetection'])
 57 |         config.artifactDetectionMethod = str(data['artifactDetectionMethod'])
 58 | 
 59 |         config.numBinsIntensity = int(data['numBinsIntensity'])
 60 | 
 61 |         config.numBinsArea = int(data['numBinsArea'])
 62 | 
 63 |         config.numBinsCorrelation = int(data['numBinsCorrelation'])
 64 | 
 65 |         config.hexbins = bool(data['hexbins'])
 66 |         config.hexbinGridSize = int(data['hexbinGridSize'])
 67 | 
 68 |         config.metaQC = bool(data['metaQC'])
 69 | 
 70 |         config.channelExclusionsPCA = list(data['channelExclusionsPCA'])
 71 |         config.samplesToRemovePCA = list(data['samplesToRemovePCA'])
 72 |         config.dimensionPCA = int(data['dimensionPCA'])
 73 |         config.pointSize = float(data['pointSize'])
 74 |         config.labelPoints = bool(data['labelPoints'])
 75 |         config.distanceCutoff = float(data['distanceCutoff'])
 76 |         config.conditionsToSilhouette = list(data['conditionsToSilhouette'])
 77 | 
 78 |         config.gating = bool(data['gating'])
 79 |         config.channelExclusionsGating = list(data['channelExclusionsGating'])
 80 |         config.samplesToRemoveGating = list(data['samplesToRemoveGating'])
 81 |         config.vectorThreshold = int(data['vectorThreshold'])
 82 |         config.vectorThreshold = int(data['vectorThreshold'])
 83 |         config._parse_classes(data['classes'])
 84 | 
 85 |         config.embeddingAlgorithmQC = str(data['embeddingAlgorithmQC'])
 86 |         config.embeddingAlgorithm = str(data['embeddingAlgorithm'])
 87 |         config.channelExclusionsClusteringQC = list(
 88 |             data['channelExclusionsClusteringQC']
 89 |         )
 90 |         config.channelExclusionsClustering = list(
 91 |             data['channelExclusionsClustering']
 92 |         )
 93 |         config.samplesToRemoveClusteringQC = list(
 94 |             data['samplesToRemoveClusteringQC']
 95 |         )
 96 |         config.samplesToRemoveClustering = list(
 97 |             data['samplesToRemoveClustering']
 98 |         )
 99 |         config.normalizeTissueCounts = bool(data['normalizeTissueCounts'])
100 |         config.percentDataPerChunk = float(data['percentDataPerChunk'])
101 |         config.fracForEmbedding = float(data['fracForEmbedding'])
102 |         config.dimensionEmbedding = int(data['dimensionEmbedding'])
103 |         config.colormapAnnotationQC = str(
104 |             data['colormapAnnotationQC'])
105 |         config.colormapAnnotationClustering = str(
106 |             data['colormapAnnotationClustering'])
107 | 
108 |         config.perplexityQC = float(data['perplexityQC'])
109 |         config.perplexity = float(data['perplexity'])
110 |         config.earlyExaggerationQC = float(data['earlyExaggerationQC'])
111 |         config.earlyExaggeration = float(data['earlyExaggeration'])
112 |         config.learningRateTSNEQC = float(data['learningRateTSNEQC'])
113 |         config.learningRateTSNE = float(data['learningRateTSNE'])
114 |         config.metricQC = str(data['metricQC'])
115 |         config.metric = str(data['metric'])
116 |         config.randomStateQC = int(data['randomStateQC'])
117 |         config.randomStateTSNE = int(data['randomStateTSNE'])
118 | 
119 |         config.nNeighborsQC = int(data['nNeighborsQC'])
120 |         config.nNeighbors = int(data['nNeighbors'])
121 |         config.learningRateUMAPQC = float(data['learningRateUMAPQC'])
122 |         config.learningRateUMAP = float(data['learningRateUMAP'])
123 |         config.minDistQC = float(data['minDistQC'])
124 |         config.minDist = float(data['minDist'])
125 |         config.repulsionStrengthQC = float(data['repulsionStrengthQC'])
126 |         config.repulsionStrength = float(data['repulsionStrength'])
127 |         config.randomStateUMAP = int(data['randomStateUMAP'])
128 | 
129 |         config.controlGroups = list(data['controlGroups'])
130 |         if (data['denominatorCluster']) is None:
131 |             config.denominatorCluster = (data['denominatorCluster'])
132 |         else:
133 |             config.denominatorCluster = int(data['denominatorCluster'])
134 |         config.FDRCorrection = bool(data['FDRCorrection'])
135 | 
136 |         config.numThumbnails = int(data['numThumbnails'])
137 |         config.windowSize = int(data['windowSize'])
138 |         config.segOutlines = bool(data['segOutlines'])
139 | 
140 |         return config
141 | 
142 |     def _parse_sample_metadata(self, value):
143 |         self.sampleNames = {}
144 |         self.sampleConditions = {}
145 |         self.sampleConditionAbbrs = {}
146 |         self.sampleStatuses = {}
147 |         self.sampleReplicates = {}
148 | 
149 |         if value is None:
150 |             return
151 | 
152 |         for file_name, terms in value.items():
153 | 
154 |             name = str(terms[0])
155 |             condition = str(terms[1])
156 |             abbreviation = str(terms[2])
157 |             status = str(terms[3])
158 |             replicate = int(terms[4])
159 | 
160 |             self.sampleNames[file_name] = name
161 |             self.sampleConditions[file_name] = condition
162 |             self.sampleConditionAbbrs[file_name] = abbreviation
163 |             self.sampleStatuses[file_name] = status
164 |             self.sampleReplicates[file_name] = replicate
165 | 
166 |     def _parse_classes(self, value):
167 | 
168 |         self.classes = {}
169 | 
170 |         if value is None:
171 |             return
172 |         
173 |         for outer_key, inner_dict in value.items():
174 |             boo = [BooleanTerm.parse_str(t) for t in inner_dict['definition']]
175 |             inner_dict['definition'] = boo
176 |             self.classes[str(outer_key)] = inner_dict
177 | 
178 |     @property
179 |     def checkpoint_path(self):
180 |         return self.outDir / 'checkpoints'
181 | 
182 |     def __repr__(self):
183 |         kwargs_str = ', '.join(f"{k}={v!r}" for k, v in self.__dict__.items())
184 |         return f"Config({kwargs_str})"
185 | 


--------------------------------------------------------------------------------
/cylinter/cylinter.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import argparse
 3 | import pathlib
 4 | import logging
 5 | from .config import Config
 6 | from . import pipeline, components
 7 | 
 8 | logger = logging.getLogger(__name__)
 9 | 
10 | 
11 | def main(argv=sys.argv):
12 | 
13 |     epilog = 'Pipeline modules:\n'
14 |     epilog += '\n'.join(f"    {n}" for n in components.pipeline_module_names)
15 |     parser = argparse.ArgumentParser(
16 |         description='Perform CyLinter analysis on a data file.',
17 |         epilog=epilog,
18 |         formatter_class=argparse.RawDescriptionHelpFormatter,
19 |     )
20 |     parser.add_argument(
21 |         'config', type=path_resolved,
22 |         help='Path to the configuration YAML file'
23 |     )
24 |     parser.add_argument(
25 |         '--module', type=str,
26 |         help='Pipeline module at which to begin processing (see below'
27 |         ' for ordered list of modules)'
28 |     )
29 |     args = parser.parse_args(argv[1:])
30 |     if not validate_paths(args):
31 |         return 1
32 |     if args.module and args.module not in components.pipeline_module_names:
33 |         print(
34 |             f"cylinter: error: argument --module: invalid choice '{args.module}'",
35 |             file=sys.stderr
36 |         )
37 |         return 1
38 | 
39 |     logging.basicConfig(
40 |         level=logging.INFO,
41 |         format='%(levelname)s: %(message)s'
42 |     )
43 | 
44 |     logger.info("Reading configuration file")
45 |     config = Config.from_path(args.config)
46 |     create_output_directory(config)
47 | 
48 |     logger.info("Executing pipeline")
49 |     pipeline.run_pipeline(config, args.module)
50 | 
51 |     logger.info("Finished")
52 | 
53 |     return 0
54 | 
55 | 
56 | def path_resolved(path_str):
57 |     """Return a resolved Path for a string."""
58 |     path = pathlib.Path(path_str)
59 |     path = path.resolve()
60 |     return path
61 | 
62 | 
63 | def validate_paths(args):
64 |     """Validate the Path entries in the argument list."""
65 |     ok = True
66 |     if not args.config.exists():
67 |         print(
68 |             f"Config path does not exist:\n     {args.config}\n",
69 |             file=sys.stderr
70 |         )
71 |         ok = False
72 |     return ok
73 | 
74 | 
75 | def create_output_directory(config):
76 |     """Create the output directory structure given the configuration object."""
77 |     config.outDir.mkdir(parents=True, exist_ok=True)
78 | 


--------------------------------------------------------------------------------
/cylinter/cylinter_config.yml:
--------------------------------------------------------------------------------
  1 | # GENERAL PROGRAM CONFIGURATIONS
  2 | 
  3 | inDir: /Users/<username>/Desktop/cylinter_demo
  4 | # Path to CyLinter input directory containing multi-channel
  5 | # image files (TIFF or OME-TIFF), segmentation outlines (OME-TIFF),
  6 | # segmentation masks (TIFF), and corresponding single-cell feature tables (CSV)
  7 | 
  8 | outDir: /Users/<username>/Desktop/cylinter_demo/output
  9 | # CyLinter output directory. Path is created if it does not exist.
 10 | 
 11 | sampleMetadata:
 12 |   "1": ["1", "Normal kidney cortex", "NKC", "CANCER-FALSE", 1]
 13 |   "15": ["15", "Glioblastoma", "GBM", "CANCER-TRUE", 1]
 14 |   "18": ["18", "Mesothelioma", "MTO", "CANCER-TRUE", 1]
 15 |   "68": ["68", "Tonsil", "TSL", "CANCER-FALSE", 3]
 16 | # Sample metadata dictionary: keys = file names; values = list of strings.
 17 | # First elements: sample names (str)
 18 | # Second elements: descriptive text of experimental condition (str)
 19 | # Third elements: abbreviation of experimental condition (str)
 20 | # Fourth elements: comma-delimited string of arbitrary binary declarations
 21 | # for computing t-statistics between two groups of samples (str dytpe)
 22 | # Fifth elements: replicate number specifying biological or
 23 | # technical replicates (int)
 24 | 
 25 | samplesToExclude: []
 26 | # (list of strs) Sample names to exclude from analysis specified
 27 | # according to the first elements of sampleMetadata configuration.
 28 | 
 29 | counterstainChannel: "DNA1"
 30 | # (str) Name of marker in markers.csv file for use in visualizing nuclear counterstain
 31 | 
 32 | markersToExclude: ["Rabbit IgG", "Goat IgG", "Mouse IgG", "CD56", "CD13",
 33 |                    "pAUR", "CCNE", "CDKN2A", "PCNA_1", "CDKN1B_2",
 34 |                    "CD63", "CD32", "CCNA2", "CDKN1C", "PCNA_1",
 35 |                    "CDKN1B_1", "CCND1", "cPARP", "pCREB",
 36 |                    "CCNB1", "PCNA_2", "CDK2"
 37 |                    ]
 38 | # (list of strs) Immunomarkers to exclude from analysis
 39 | # Does not include nuclear dyes. They are needed for the
 40 | # cycleCorrelation module to remove cell dropout.
 41 | 
 42 | ###############################################################################
 43 | # MODULE-SPECIFIC CONFIGURATIONS
 44 | 
 45 | # selectROIs-------------------------------------------------------------------
 46 | delintMode: True
 47 | # (bool) Whether to drop (True; negative selection) or
 48 | # retain (False; positive selection) cells selected by ROIs.
 49 | 
 50 | showAbChannels: True
 51 | # (bool) Whether to show all immunomarker channels (True) when Napari
 52 | # is open (may be memory limiting) or show cycle 1 DNA only (False).
 53 | 
 54 | samplesForROISelection: ["1", "15", "18", "68"]
 55 | # (list of strs) Sample names for ROI selection specified
 56 | # according to the first elements of sampleMetadata configuration.
 57 | 
 58 | autoArtifactDetection: True
 59 | # (bool) Whether to display tools for automated artifact detection in Napari window
 60 | 
 61 | artifactDetectionMethod: "classical"
 62 | # (str) Algorithm used for automated artifact detection (current option: "classical").
 63 | # Multi-layer perceptron method ("MLP") currently under development.
 64 | 
 65 | 
 66 | # intensityFilter-------------------------------------------------------------------
 67 | numBinsIntensity: 50
 68 | # (int) Number of bins for DNA intensity histograms.
 69 | 
 70 | 
 71 | # areaFilter-------------------------------------------------------------------
 72 | numBinsArea: 50
 73 | # (int) Number of bins for DNA area histograms.
 74 | 
 75 | 
 76 | # cycleCorrelation-------------------------------------------------------------------
 77 | numBinsCorrelation: 50
 78 | # (int) Number of bins for DNA1/DNAn histograms.
 79 | 
 80 | 
 81 | # pruneOutliers-------------------------------------------------------------------
 82 | hexbins: False
 83 | # (bool) Whether to use hexbins (True) or scatter plots (False) to plot
 84 | # single-cell signal intensities. Scatter plots allow for higher resolution,
 85 | # but may require longer rendering times.
 86 | 
 87 | hexbinGridSize: 20
 88 | # (int) Hexbin grid size when hexins=True.
 89 | # Higher values increase bin resolution.
 90 | 
 91 | 
 92 | # metaQC (optional)-------------------------------------------------------------------
 93 | metaQC: False
 94 | # (bool) Whether to perform data reclassification based on
 95 | # unsupervised clustering results of combinations of clean and
 96 | # noisy (previously-redacted) data.
 97 | 
 98 | embeddingAlgorithmQC: "UMAP"
 99 | # (str) Embedding algorithm used for clustering (options: "TSNE" or "UMAP").
100 | 
101 | channelExclusionsClusteringQC: []
102 | # (list of strs) Immunomarkers to exclude from clustering.
103 | 
104 | samplesToRemoveClusteringQC: []
105 | # (list of strs) Samples to exclude from clustering.
106 | 
107 | percentDataPerChunk: 0.2
108 | # (float) Fraction of data to undergo embedding and
109 | # clustering per reclassifaction cycle.
110 | 
111 | colormapAnnotationQC: "Sample"
112 | # (str) Metadata annotation to colormap the embedding: Sample or Condition.
113 | 
114 | metricQC: "euclidean"
115 | # (str) Distance metric for computing embedding.
116 | # Choose from valid metrics used by scipy.spatial.distance.pdist:
117 | # "braycurtis", "canberra", "chebyshev", "cityblock", "correlation", "cosine",
118 | # "dice", "euclidean", "hamming", "jaccard", "jensenshannon", "kulsinski",
119 | # "mahalanobis", "matching", "minkowski", "rogerstanimoto", "russellrao",
120 | # "seuclidean", "sokalmichener", "sokalsneath", "sqeuclidean", "yule".
121 | 
122 | # --------------------------------------
123 | # tSNE-specific configurations:
124 | # https://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.html
125 | perplexityQC: 50.0
126 | # (float) Related to the number of nearest neighbors used in other
127 | # manifold learning algorithms. Larger datasets usually require
128 | # larger perplexity. Different values can result in significantly
129 | # different results.
130 | 
131 | earlyExaggerationQC: 12.0
132 | # (float) For larger values, the space between natural clusters
133 | # will be larger in the embedded space.
134 | 
135 | learningRateTSNEQC: 200.0
136 | # (float) tSNE learning rate (typically between 10.0 and 1000.0).
137 | 
138 | randomStateQC: 5
139 | # (int) Determines the random number generator for reproducible results
140 | # across multiple function calls.
141 | 
142 | # --------------------------------------
143 | # UMAP-specific configurations:
144 | # https://umap-learn.readthedocs.io/en/latest/api.html
145 | nNeighborsQC: 6
146 | # (int) The size of local neighborhood (in terms of number of
147 | # neighboring sample points) used for manifold approximation.
148 | # Larger values result in more global views of the manifold,
149 | # while smaller values result in more local data being preserved.
150 | # In general values should be in the range 2 to 100.
151 | 
152 | learningRateUMAPQC: 1.0
153 | # (float) The initial learning rate for the embedding optimization.
154 | 
155 | minDistQC: 0.1
156 | # (float) The effective minimum distance between embedded points.
157 | # Smaller values will result in a more clustered/clumped
158 | # embedding where nearby points on the manifold are drawn
159 | # closer together, while larger values will result on a more
160 | # even dispersal of points. The value should be set relative
161 | # to the spread value, which determines the scale at which
162 | # embedded points will be spread out.
163 | 
164 | repulsionStrengthQC: 5.0
165 | # (float) Weighting applied to negative samples in low dimensional
166 | # embedding optimization. Values higher than one will
167 | # result in greater weight being given to negative samples.
168 | 
169 | 
170 | # PCA-------------------------------------------------------------------
171 | channelExclusionsPCA: []
172 | # (strs) Immunomarkers to exclude from PCA analysis.
173 | 
174 | samplesToRemovePCA: []
175 | # (list of strs) Samples to exclude from PCA analysis.
176 | 
177 | dimensionPCA: 2
178 | # (int) Number of PCs to compute.
179 | 
180 | pointSize: 90.0
181 | # (float) scatter point size for sample scores plot.
182 | 
183 | labelPoints: True
184 | # (bool) Annotate scatter points with condition abbreviations
185 | # from sampleMetadata configuration.
186 | 
187 | distanceCutoff: 0.15
188 | # (float) Maximum distance between data points in PCA scores plot to
189 | # be annotated with a common label. Useful for increasing visual clarity
190 | # of PCA plots containing many data points. Applicable when
191 | # labelPoints is True.
192 | 
193 | conditionsToSilhouette: []
194 | # (list of strs) List of abbreviated condition names whose corresponding
195 | #scores plot points will be greyed out, left unannotated, and sent to the back
196 | # of the plot (zorder). Useful for increasing visual clarity of PCA
197 | # plots containing many data points.
198 | 
199 | 
200 | # gating (optional)-------------------------------------------------------------------
201 | gating: False
202 | # (bool) Whether to perform SYLARAS-style gating on single-cell data.
203 | # Cell Syst. 2020 Sep 23;11(3):272-285.e9 PMID: 32898474
204 | 
205 | channelExclusionsGating: []
206 | # (list of strs) Immunomarkers to exclude from gating.
207 | 
208 | samplesToRemoveGating: []
209 | # (list of strs) Samples to exclude from gating.
210 | 
211 | vectorThreshold: 100
212 | # (int) vizualize Boolean vectors (i.e., binary phenotype profiles) with cell counts >= vectorThreshold
213 | 
214 | classes:
215 |   Tumor: 
216 |     definition: [+pan-CK, +KI67, -aSMA, -CD45]
217 |     subsets: [CDKN1A]
218 | # (dict) Boolean immunophenotype signatures.
219 | # +marker = immunopositive , -marker = immunonegative, marker = don't care
220 | 
221 | 
222 | # clustering-------------------------------------------------------------------
223 | embeddingAlgorithm: "UMAP"
224 | # (str) Embedding algorithm to use for clustering (options: "TSNE" or "UMAP").
225 | 
226 | channelExclusionsClustering: []
227 | # (list of strs) Immunomarkers to exclude from clustering.
228 | 
229 | samplesToRemoveClustering: []
230 | # (list of strs) Samples to exclude from clustering.
231 | 
232 | normalizeTissueCounts: True
233 | # (bool) Make the number of cells per tissue for clustering more similar
234 | # through sample-weighted random sampling.
235 | 
236 | fracForEmbedding: 1.0
237 | # (float) Fraction of cells to be embedded (range: 0.0-1.0).
238 | # Limits amount of data passed to downstream modules.
239 | 
240 | dimensionEmbedding: 2
241 | # (int) Dimension of the embedding (options: 2 or 3).
242 | 
243 | colormapAnnotationClustering: "Sample"
244 | # (str) Metadata annotation to colormap the embedding: Sample or Condition.
245 | 
246 | metric: "euclidean"
247 | # (str) Distance metric for computing embedding.
248 | # Choose from valid metrics used by scipy.spatial.distance.pdist:
249 | # "braycurtis", "canberra", "chebyshev", "cityblock", "correlation", "cosine",
250 | # "dice", "euclidean", "hamming", "jaccard", "jensenshannon", "kulsinski",
251 | # "mahalanobis", "matching", "minkowski", "rogerstanimoto", "russellrao",
252 | # "seuclidean", "sokalmichener", "sokalsneath", "sqeuclidean", "yule".
253 | 
254 | # --------------------------------------
255 | # tSNE-specific configurations:
256 | # https://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.html
257 | perplexity: 50.0
258 | # (float) Related to the number of nearest neighbors used in other
259 | # manifold learning algorithms. Larger datasets usually require
260 | # larger perplexity. Different values can result in significantly
261 | # different results.
262 | 
263 | earlyExaggeration: 12.0
264 | # (flaot) For larger values, the space between natural clusters
265 | # will be larger in the embedded space.
266 | 
267 | learningRateTSNE: 200.0
268 | # (float) tSNE learning rate (typically between 10.0 and 1000.0).
269 | 
270 | randomStateTSNE: 5
271 | # (int) Determines the random number generator for reproducible results
272 | # across multiple function calls.
273 | 
274 | # --------------------------------------
275 | # UMAP-specific configurations:
276 | # https://umap-learn.readthedocs.io/en/latest/api.html
277 | nNeighbors: 6
278 | # (int) The size of local neighborhood (in terms of number of
279 | # neighboring sample points) used for manifold approximation.
280 | # Larger values result in more global views of the manifold,
281 | # while smaller values result in more local data being preserved.
282 | # In general values should be in the range 2 to 100.
283 | 
284 | learningRateUMAP: 1.0
285 | # (float) The initial learning rate for the embedding optimization.
286 | 
287 | minDist: 0.1
288 | # (float) The effective minimum distance between embedded points.
289 | # Smaller values will result in a more clustered/clumped
290 | # embedding where nearby points on the manifold are drawn
291 | # closer together, while larger values will result on a more
292 | # even dispersal of points. The value should be set relative
293 | # to the spread value, which determines the scale at which
294 | # embedded points will be spread out.
295 | 
296 | repulsionStrength: 5.0
297 | # (float) Weighting applied to negative samples in low dimensional
298 | # embedding optimization. Values higher than one will
299 | # result in greater weight being given to negative samples.
300 | 
301 | randomStateUMAP: 5
302 | # (int) Determines the random number generator for reproducible results
303 | # across multiple function calls.
304 | 
305 | 
306 | # frequencyStats-------------------------------------------------------------------
307 | controlGroups: ["CANCER-FALSE"]
308 | # (list of strs) Corresponds to control groups for each binary declaration
309 | # specified as the third elements of sampleMetadata values.
310 | 
311 | denominatorCluster: null
312 | # (None type) Cluster to be used as the denominator when computing cluster
313 | # frequency ratios. Set to null first, change to cluster integer number
314 | # to normalize cluster frequencies to a particular cluster if desired.
315 | 
316 | FDRCorrection: False
317 | # (bool) Whether to compute p-vals and false discovery rate (FDR)-corrected
318 | # q-vals (True) or compute uncorrected p-vals only (False).
319 | 
320 | 
321 | # curateThumbnails-------------------------------------------------------------
322 | numThumbnails: 25
323 | # (int) Number of examples per cluster to be curated.
324 | 
325 | windowSize: 30
326 | # (int) Number of pixels in x and y dimensions per thumbnail.
327 | 
328 | segOutlines: True
329 | # (bool) Whether to overlay cell segmentation outlines on thumbnail images.
330 | 


--------------------------------------------------------------------------------
/cylinter/modules/aggregateData.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import yaml
  4 | import logging
  5 | 
  6 | import pandas as pd
  7 | 
  8 | from ..utils import input_check, read_markers, get_filepath, reorganize_dfcolumns
  9 | 
 10 | logger = logging.getLogger(__name__)
 11 | 
 12 | 
 13 | def aggregateData(data, self, args):
 14 | 
 15 |     print()
 16 |     
 17 |     check, markers_filepath = input_check(self)
 18 | 
 19 |     markers, abx_channels = read_markers( 
 20 |         markers_filepath=markers_filepath,
 21 |         counterstain_channel=self.counterstainChannel,
 22 |         markers_to_exclude=self.markersToExclude, data=None
 23 |     )
 24 | 
 25 |     # initialize CyLinter QC report if it hasn't been already
 26 |     report_path = os.path.join(self.outDir, 'cylinter_report.yml')
 27 |     if not os.path.exists(report_path):
 28 |         f = open(report_path, 'w')
 29 |         yaml.dump({}, f)
 30 |     
 31 |     df_list = []
 32 |     channel_setlist = []
 33 |     sample_keys = [i for i in self.sampleNames.keys()]
 34 |     for key in sample_keys:
 35 |         
 36 |         if check == 'standard':
 37 |             sample = key
 38 |         else:
 39 |             sample = key.split('--')[0]
 40 |         
 41 |         if sample not in self.samplesToExclude:
 42 | 
 43 |             logger.info(f'IMPORTING sample {key}')
 44 |             
 45 |             file_path = get_filepath(self, check, sample, 'CSV')
 46 |             csv = pd.read_csv(file_path)
 47 | 
 48 |             # drop markers in markersToExclude config parameter
 49 |             csv.drop(
 50 |                 columns=[i for i in self.markersToExclude
 51 |                          if i in csv.columns], axis=1, inplace=True)
 52 | 
 53 |             # select boilerplate columns
 54 |             cols = (
 55 |                 [i for i in [j for j in markers['marker_name']] +
 56 |                  [i for i in ['CellID', 'X_centroid', 'Y_centroid', 'Area', 'MajorAxisLength',
 57 |                               'MinorAxisLength', 'Eccentricity', 'Solidity', 'Extent', 
 58 |                               'Orientation'] if i in csv.columns]]
 59 |             )
 60 | 
 61 |             # (for BAF project)
 62 |             # cols = (
 63 |             #     ['CellID', 'Area', 'Solidity', 'X_centroid', 'Y_centroid',
 64 |             #      'CytArea', 'CoreCoord', 'AreaSubstruct',
 65 |             #      'MeanInsideSubstruct', 'CoreFlag', 'Corenum'] +
 66 |             #     [i for i in markers['marker_name'] if i in csv.columns]
 67 |             #      )
 68 | 
 69 |             # (for SARDANA)
 70 |             # select boilerplate columns and use specific
 71 |             # mask quantifications for different antibodies
 72 |             # mask_dict = {
 73 |             #     'Hoechst0': 'nucleiRingMask',
 74 |             #     'Hoechst1': 'nucleiRingMask',
 75 |             #     'Hoechst2': 'nucleiRingMask',
 76 |             #     'anti_CD3': 'cytoRingMask',
 77 |             #     'anti_CD45RO': 'cytoRingMask',
 78 |             #     'Hoechst3': 'nucleiRingMask',
 79 |             #     'Keratin_570': 'cellRingMask',
 80 |             #     'aSMA_660': 'cellRingMask',
 81 |             #     'Hoechst4': 'nucleiRingMask',
 82 |             #     'CD4_488': 'cytoRingMask',
 83 |             #     'CD45_PE': 'cytoRingMask',
 84 |             #     'PD1_647': 'cytoRingMask',
 85 |             #     'Hoechst5': 'nucleiRingMask',
 86 |             #     'CD20_488': 'cytoRingMask',
 87 |             #     'CD68_555': 'cellRingMask',
 88 |             #     'CD8a_660': 'cytoRingMask',
 89 |             #     'Hoechst6': 'nucleiRingMask',
 90 |             #     'CD163_488': 'cellRingMask',
 91 |             #     'FOXP3_570': 'nucleiRingMask',
 92 |             #     'PDL1_647': 'cytoRingMask',
 93 |             #     'Hoechst7': 'nucleiRingMask',
 94 |             #     'Ecad_488': 'cellRingMask',
 95 |             #     'Vimentin_555': 'cellRingMask',
 96 |             #     'CDX2_647': 'cellRingMask',
 97 |             #     'Hoechst8': 'nucleiRingMask',
 98 |             #     'LaminABC_488': 'nucleiRingMask',
 99 |             #     'Desmin_555': 'cellRingMask',
100 |             #     'CD31_647': 'nucleiRingMask',
101 |             #     'Hoechst9': 'nucleiRingMask',
102 |             #     'PCNA_488': 'nucleiRingMask',
103 |             #     'CollagenIV_647': 'cellRingMask'}
104 |             # cols = (
105 |             #     ['CellID', 'X_centroid', 'Y_centroid', 'Area',
106 |             #      'MajorAxisLength', 'MinorAxisLength',
107 |             #      'Eccentricity', 'Solidity', 'Extent',
108 |             #      'Orientation'] +
109 |             #     [f'{i}_{mask_dict[i]}' for i
110 |             #      in markers['marker_name']])
111 |             
112 |             try:
113 |                 csv = csv[cols]
114 |             except KeyError as e:
115 |                 logger.info(
116 |                     'Aborting; some (or all) marker names in markers.csv do not appear '
117 |                     'as columns in the single-cell data table. Check for spelling and case.'
118 |                 )
119 |                 print(e)
120 |                 sys.exit()
121 | 
122 |             # (for SARDANA)
123 |             # trim mask object names from column headers
124 |             # cols_update = [
125 |             #     i.rsplit('_', 1)[0] if 'Mask' in i else
126 |             #     i for i in csv.columns
127 |             # ]
128 |             # csv.columns = cols_update
129 | 
130 |             # add sample column
131 |             csv['Sample'] = sample
132 | 
133 |             # add condition column
134 |             csv['Condition'] = self.sampleConditionAbbrs[key]
135 | 
136 |             # add replicate column
137 |             csv['Replicate'] = self.sampleReplicates[key]
138 | 
139 |             # append dataframe to list
140 |             df_list.append(csv)
141 | 
142 |             # append the set of csv columns for sample to a list
143 |             # this will be used to select columns shared among samples
144 |             channel_setlist.append(set(csv.columns))
145 | 
146 |         else:
147 |             logger.info(f'censoring sample {sample}')
148 |     print()
149 | 
150 |     # stack dataframes row-wise
151 |     data = pd.concat(df_list, axis=0)
152 |     del df_list
153 | 
154 |     # only select channels shared among all samples
155 |     channels_set = list(set.intersection(*channel_setlist))
156 | 
157 |     logger.info(f'{len(data.columns)} total columns')
158 |     logger.info(f'{len(channels_set)} columns in common between all samples')
159 | 
160 |     before = set(data.columns)
161 |     after = set(channels_set)
162 |     if len(before.difference(after)) == 0:
163 |         pass
164 |     else:
165 |         markers_to_drop = list(before.difference(after))
166 |         print()
167 |         logger.warning(
168 |             f'Columns {markers_to_drop} are not in all'
169 |             ' samples and will be dropped from downstream analysis.'
170 |         )
171 |     data = data[channels_set].copy()
172 | 
173 |     # sort by Sample and CellID to be tidy
174 |     data.sort_values(by=['Sample', 'CellID'], inplace=True)
175 | 
176 |     # assign global index
177 |     data.reset_index(drop=True, inplace=True)
178 | 
179 |     # ensure MCMICRO-generated columns come first and
180 |     # are in the same order as csv feature tables
181 |     data = reorganize_dfcolumns(data, markers, self.dimensionEmbedding)
182 | 
183 |     print()
184 |     print()
185 |     return data
186 | 


--------------------------------------------------------------------------------
/cylinter/modules/clustermap.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import logging
  3 | 
  4 | import math
  5 | 
  6 | import seaborn as sns
  7 | import matplotlib.pyplot as plt
  8 | from matplotlib.colors import TwoSlopeNorm
  9 | 
 10 | from ..utils import input_check, read_markers, reorganize_dfcolumns
 11 | 
 12 | logger = logging.getLogger(__name__)
 13 | 
 14 | 
 15 | def clustermap(data, self, args):
 16 | 
 17 |     check, markers_filepath = input_check(self)
 18 | 
 19 |     # read marker metadata
 20 |     markers, abx_channels = read_markers( 
 21 |         markers_filepath=markers_filepath,
 22 |         counterstain_channel=self.counterstainChannel,
 23 |         markers_to_exclude=self.markersToExclude, data=None
 24 |     )
 25 | 
 26 |     # create clustering dimension directory if it hasn't already
 27 |     dim_dir = os.path.join(self.outDir, 'clustering', f'{self.dimensionEmbedding}d')
 28 |     if not os.path.exists(dim_dir):
 29 |         os.makedirs(dim_dir)
 30 | 
 31 |     # drop antibody channel exclusions for clustering
 32 |     abx_channels = [i for i in abx_channels if i not in self.channelExclusionsClustering]
 33 | 
 34 |     ######################################################################
 35 | 
 36 |     sns.set_style("whitegrid", {'axes.grid': False})
 37 |     gs = plt.GridSpec(len(abx_channels), 1)
 38 |     fig = plt.figure(figsize=(2, 7))
 39 | 
 40 |     ax_objs = []
 41 |     for i, channel in enumerate(abx_channels):
 42 | 
 43 |         # creating new axes object
 44 |         ax_objs.append(fig.add_subplot(gs[i:i + 1, 0:]))
 45 |   
 46 |         # plotting the distribution
 47 |         n, bins, patches = ax_objs[-1].hist(
 48 |             data[channel], bins=50, density=True, histtype='stepfilled',
 49 |             linewidth=2.0, ec='k', alpha=1.0, color='k'
 50 |         )
 51 | 
 52 |         # setting uniform x and y lims
 53 |         ax_objs[-1].set_xlim(0, 1)
 54 |         ax_objs[-1].set_ylim(0, math.ceil(n.max()) + 1)
 55 | 
 56 |         # make background transparent
 57 |         rect = ax_objs[-1].patch
 58 |         rect.set_alpha(0)
 59 | 
 60 |         # remove borders, axis ticks, and labels
 61 |         ax_objs[-1].set_yticklabels([])
 62 | 
 63 |         if i == len(abx_channels) - 1:
 64 |             ax_objs[-1].set_xlabel(
 65 |                 'Intensity', fontsize=11, fontweight='normal', labelpad=10
 66 |             )
 67 |         else:
 68 |             ax_objs[-1].set_xticks([])
 69 |             ax_objs[-1].set_xticklabels([])
 70 | 
 71 |         ax_objs[-1].set_yticks([])
 72 | 
 73 |         spines = ['top', 'right', 'left']
 74 |         for s in spines:
 75 |             ax_objs[-1].spines[s].set_visible(False)
 76 | 
 77 |         ax_objs[-1].tick_params(axis='x', width=2)
 78 | 
 79 |         ax_objs[-1].text(-0.02, 0, channel, fontweight='normal', fontsize=8, ha='right')
 80 |   
 81 |     gs.update(hspace=0.3)
 82 |     plt.subplots_adjust(left=0.3, bottom=0.1, right=0.9, top=0.95)
 83 |     plt.savefig(os.path.join(dim_dir, 'ridgeplots.pdf'))
 84 |     plt.close('all')
 85 | 
 86 |     ##############################################################################################
 87 | 
 88 |     for type in [f'cluster_{self.dimensionEmbedding}d', 'class']:
 89 |         if type in data.columns:
 90 | 
 91 |             if type == f'cluster_{self.dimensionEmbedding}d':
 92 | 
 93 |                 clustermap_input = data[data[type] != -1]
 94 | 
 95 |                 # compute mean antibody signals for clusters
 96 |                 clustermap_input = clustermap_input[abx_channels + [type]].groupby(type).mean()
 97 | 
 98 |             elif type == 'class':
 99 |                 
100 |                 clustermap_input = data[data[type] != 'unclassified']
101 |                 
102 |                 # compute mean antibody signals for clusters
103 |                 clustermap_input = clustermap_input[abx_channels + [type]].groupby(type).mean()
104 | 
105 |             if len(clustermap_input) > 1:
106 |                 
107 |                 sns.set(font_scale=0.7)
108 | 
109 |                 # Compute per channel z-scores across clusters
110 |                 clustermap_input = (
111 |                     (clustermap_input - clustermap_input.mean()) / clustermap_input.std()
112 |                 )
113 |                 # assign NaNs (channels with no variation in signal) to 0
114 |                 clustermap_input[clustermap_input.isna()] = 0
115 |                 
116 |                 # Zero-center colorbar
117 |                 norm = TwoSlopeNorm(
118 |                     vcenter=0, vmin=clustermap_input.min().min(),
119 |                     vmax=clustermap_input.max().max()
120 |                 )
121 | 
122 |                 g = sns.clustermap(
123 |                     clustermap_input, cmap='coolwarm', standard_scale=None, square=False,
124 |                     xticklabels=1, yticklabels=1, linewidth=0.0, cbar=True, norm=norm 
125 |                 )
126 |                 
127 |                 # g = sns.clustermap(
128 |                 #     clustermap_input, cmap='viridis', standard_scale=1, square=False,
129 |                 #     xticklabels=1, yticklabels=1, linewidth=0.0, cbar=True 
130 |                 # )
131 | 
132 |                 g.fig.suptitle('channel_z-scores.pdf', y=0.995, fontsize=10)
133 |                 g.fig.set_size_inches(6.0, 6.0)
134 |                 g.ax_heatmap.set_yticklabels(g.ax_heatmap.get_yticklabels(), rotation=0)
135 | 
136 |                 plt.savefig(
137 |                     os.path.join(dim_dir, f'{type}_channel_z-scores.pdf'), bbox_inches='tight'
138 |                 )
139 |             else:
140 |                 logger.info(
141 |                     f' {type} clustermap cannot be generated with only one cell population.'
142 |                 )
143 | 
144 |     data = reorganize_dfcolumns(data, markers, self.dimensionEmbedding)
145 | 
146 |     print()
147 |     print()
148 |     return data
149 | 


--------------------------------------------------------------------------------
/cylinter/modules/frequencyStats.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import logging
  3 | 
  4 | import numpy as np
  5 | import pandas as pd
  6 | 
  7 | import math
  8 | import natsort
  9 | from natsort import natsorted
 10 | from itertools import product
 11 | 
 12 | import seaborn as sns
 13 | import matplotlib.pyplot as plt
 14 | from matplotlib.lines import Line2D
 15 | 
 16 | from scipy.stats import ttest_ind
 17 | 
 18 | from ..utils import (
 19 |     input_check, read_markers, categorical_cmap, fdrcorrection, reorganize_dfcolumns
 20 | )
 21 | 
 22 | logger = logging.getLogger(__name__)
 23 | 
 24 | 
 25 | def frequencyStats(data, self, args):
 26 | 
 27 |     print()
 28 | 
 29 |     check, markers_filepath = input_check(self)
 30 | 
 31 |     # read marker metadata
 32 |     markers, abx_channels = read_markers( 
 33 |         markers_filepath=markers_filepath,
 34 |         counterstain_channel=self.counterstainChannel,
 35 |         markers_to_exclude=self.markersToExclude, data=None
 36 |     )
 37 |     
 38 |     for type in ['class', f'cluster_{self.dimensionEmbedding}d']:
 39 |         if type in data.columns:
 40 | 
 41 |             stats_input = data[['Sample', 'Replicate', type]]
 42 | 
 43 |             # loop over comma-delimited binary declarations
 44 |             for i in range(len(list(self.sampleStatuses.values())[0].split(', '))):
 45 | 
 46 |                 # get unique declaration categories (should be 2 per test)
 47 |                 comparison = set(
 48 |                     [j.split(', ')[i] for j in self.sampleStatuses.values()
 49 |                      if '-UNK' not in j.split(', ')[i]])
 50 | 
 51 |                 if len(comparison) > 1:
 52 | 
 53 |                     # assign test and control groups
 54 |                     test = [
 55 |                         i for i in comparison if i not in
 56 |                         self.controlGroups][0]
 57 |                     control = [
 58 |                         i for i in comparison if i in self.controlGroups][0]
 59 | 
 60 |                     # create frequency stats directory if it hasn't already
 61 |                     frequency_dir = os.path.join(
 62 |                         self.outDir, 'clustering',
 63 |                         f'{self.dimensionEmbedding}d',
 64 |                         'frequency_stats', type, f'{test}_v_{control}'
 65 |                     )
 66 |                     if not os.path.exists(frequency_dir):
 67 |                         os.makedirs(frequency_dir)
 68 | 
 69 |                     # create single-column dataFrame with all sample names
 70 |                     # to pad counts tables with zeros if a celltype
 71 |                     # is not in a tissue
 72 |                     pad = pd.DataFrame(
 73 |                         natsorted(stats_input['Sample'].unique())).rename(
 74 |                             columns={0: 'Sample'})
 75 | 
 76 |                     cluster_list = []
 77 |                     ratio_list = []
 78 |                     dif_list = []
 79 |                     pval_list = []
 80 | 
 81 |                     # intialize a dataframe to collect catplot data
 82 |                     catplot_input = pd.DataFrame()
 83 | 
 84 |                     # loop over populations
 85 |                     for cluster, group in natsorted(stats_input.groupby(type)):
 86 |                             
 87 |                         if cluster not in [-1, 'unclassified']:
 88 |                             
 89 |                             logger.info(
 90 |                                 f'Calculating log2({test}/{control}) of mean cell '
 91 |                                 f'density for {type} {str(cluster)}.')
 92 | 
 93 |                             group = (
 94 |                                 group.groupby(['Sample', 'Replicate', type])
 95 |                                 .size()
 96 |                                 .reset_index(drop=False)
 97 |                                 .rename(columns={0: 'count'})
 98 |                             )
 99 | 
100 |                             group = (
101 |                                 group
102 |                                 .merge(pad, how='right', on='Sample')
103 |                                 .sort_values(by='count', ascending=False)
104 |                             )
105 | 
106 |                             # guard against NaNs induced by the absence
107 |                             # of a given cluster in one or
108 |                             # more of the tissue samples
109 |                             group['count'] = [
110 |                                 0 if np.isnan(i) else int(i) for
111 |                                 i in group['count']
112 |                             ]
113 | 
114 |                             # get sample file names (i.e. sampleMetadata keys)
115 |                             # from config.yml based on "Sample" column
116 |                             # (first elements of sampleMetadata vals)
117 |                             def get_key(val):
118 |                                 for key, value in self.sampleNames.items():
119 |                                     if val == value:
120 |                                         return key
121 | 
122 |                                 return "key doesn't exist"
123 |                             file_names = [get_key(i) for i in group['Sample']]
124 | 
125 |                             # add binary declarations column to group data
126 |                             group['status'] = [
127 |                                 self.sampleStatuses[j].split(', ')[i]
128 |                                 for j in file_names]
129 | 
130 |                             # add replicates column to group data
131 |                             group['Replicate'] = [self.sampleReplicates[i] for i in file_names]
132 | 
133 |                             group[type] = cluster
134 | 
135 |                             # drop samples for which a declaration cannot be made
136 |                             group = group[~group['status'].str.contains('-UNK')]
137 | 
138 |                             group.reset_index(drop=True, inplace=True)
139 | 
140 |                             # get denominator cell count for each sample
141 |                             if self.denominatorCluster is None:
142 |                                 group['tissue_count'] = [
143 |                                     len(stats_input[stats_input['Sample'] == i])
144 |                                     for i in group['Sample']]
145 |                             else:
146 |                                 group['tissue_count'] = [
147 |                                     len(stats_input[(stats_input['Sample'] == i) &
148 |                                         (stats_input[type] == self.denominatorCluster)])
149 |                                     for i in group['Sample']]
150 | 
151 |                             # compute density of cells per sample
152 |                             group['density'] = group['count'] / group['tissue_count']
153 | 
154 |                             # append group data to catplot_input
155 |                             catplot_input = pd.concat([catplot_input, group], axis=0)
156 | 
157 |                             # isolate test and control group values
158 |                             cnd1_values = group['density'][group['status'] == test]
159 |                             cnd2_values = group['density'][group['status'] == control]
160 | 
161 |                             # perform Welch's t-test (equal_var=False)
162 |                             stat, pval = ttest_ind(
163 |                                 cnd1_values, cnd2_values, axis=0, equal_var=False,
164 |                                 nan_policy='propagate'
165 |                             )
166 | 
167 |                             # round resulting values
168 |                             stat = round(stat, 6)
169 |                             pval = round(pval, 6)
170 | 
171 |                             # compute mean of test and control group values
172 |                             cnd1_mean = np.mean(cnd1_values)
173 |                             cnd2_mean = np.mean(cnd2_values)
174 | 
175 |                             # compute mean ratio
176 |                             ratio = np.log2(
177 |                                 (cnd1_mean + 0.00000000001) / (cnd2_mean + 0.00000000001)
178 |                             )
179 | 
180 |                             # compute mean difference
181 |                             dif = cnd1_mean - cnd2_mean
182 | 
183 |                             cluster_list.append(cluster)
184 |                             ratio_list.append(ratio)
185 |                             dif_list.append(dif)
186 |                             pval_list.append(pval)
187 | 
188 |                     # create stats dataframe
189 |                     statistics = pd.DataFrame(
190 |                         list(zip(cluster_list, ratio_list, dif_list, pval_list)),
191 |                         columns=[type, 'ratio', 'dif', 'pval']
192 |                     ).sort_values(by=type)
193 | 
194 |                     # compute FDR p-val corrections
195 |                     # (uses statsmodels.stats.multitest implementation)
196 |                     rejected, p_adjust = fdrcorrection(
197 |                         statistics['pval'].tolist(), alpha=0.05, method='indep', is_sorted=False
198 |                     )
199 | 
200 |                     statistics['qval'] = p_adjust
201 | 
202 |                     # save total stats table
203 |                     statistics.to_csv(
204 |                         os.path.join(frequency_dir, 'stats_total.csv'), index=False
205 |                     )
206 | 
207 |                     if self.FDRCorrection:
208 |                         stat = 'qval'
209 |                     else:
210 |                         stat = 'pval'
211 | 
212 |                     # isolate statistically significant stat values
213 |                     significant = statistics[statistics[stat] <= 0.05].sort_values(by=stat)
214 | 
215 |                     # save significant stats table
216 |                     significant.to_csv(
217 |                         os.path.join(frequency_dir, 'stats_sig.csv'), index=False
218 |                     )
219 | 
220 |                     # plot
221 |                     sns.set_style('whitegrid')
222 |                     fig, ax = plt.subplots()
223 |                     plt.scatter(abs(significant['dif']), significant['ratio'], s=9.0, c='tab:red')
224 | 
225 |                     for label, qval, x, y in zip(
226 |                         significant[type], significant[stat],
227 |                             abs(significant['dif']), significant['ratio']):
228 | 
229 |                         plt.annotate(
230 |                             (label, f'{stat[0]}=' + str(qval)), size=3,
231 |                             xy=(x, y), xytext=(0, 0),
232 |                             textcoords='offset points', ha='right',
233 |                             va='bottom',
234 |                             bbox=dict(boxstyle='round,pad=0.1', fc='yellow',
235 |                                       alpha=0.0)
236 |                         )
237 | 
238 |                     fontsize = {'size': 8}
239 |                     ax.xaxis.set_tick_params(which='major', reset=False, **fontsize)
240 |                     ax.yaxis.set_tick_params(which='major', reset=False, **fontsize)
241 | 
242 |                     plt.title(f'{test} vs. {control} ({stat[0]}<0.05)', fontsize=9)
243 |                     plt.xlabel(f'abs({test} - {control})', fontsize=8)
244 |                     plt.ylabel(f'log2({test} / {control})', fontsize=8)
245 |                     plt.savefig(os.path.join(frequency_dir, 'plot.pdf'))
246 |                     plt.close()
247 | 
248 |                     catplot_input.reset_index(drop=True, inplace=True)
249 | 
250 |                     catplot_input[stat] = [
251 |                         'ns' if i not in
252 |                         significant[type].unique() else
253 |                         significant[stat][
254 |                             significant[type] == i].values[0]
255 |                         for i in catplot_input[type]]
256 | 
257 |                     # filter catplot_input to plot only significant differences
258 |                     catplot_input = catplot_input[catplot_input[stat] != 'ns']
259 | 
260 |                     if not catplot_input.empty:
261 |                         # build cmap
262 |                         cmap = categorical_cmap(
263 |                             numUniqueSamples=len(catplot_input['Sample'].unique()),
264 |                             numCatagories=10, cmap='tab10', continuous=False
265 |                         )
266 | 
267 |                         sample_color_dict = dict(
268 |                             zip(natsorted(catplot_input['Sample'].unique()),
269 |                                 cmap.colors))
270 | 
271 |                         catplot_input[type] = (
272 |                             catplot_input[type].astype(str) +
273 |                             f'; {stat} = ' + catplot_input[stat].astype(str)
274 |                         )
275 | 
276 |                         catplot_input.sort_values(
277 |                             by=[stat, 'status', 'density'], key=lambda x:
278 |                             natsort.natsort_keygen(
279 |                                 alg=natsort.ns.LOCALE |
280 |                                 natsort.ns.IGNORECASE)(x), inplace=True
281 |                         )
282 | 
283 |                         sns.set(font_scale=0.3)
284 |                         sns.set_style('whitegrid')
285 |                         ncols = 5
286 |                         nrows = math.ceil(len(catplot_input[type].unique()) / ncols)
287 | 
288 |                         fig = plt.figure(figsize=(ncols + 2, nrows))
289 | 
290 |                         # grid specifications
291 |                         gs = plt.GridSpec(nrows=nrows, ncols=ncols, figure=fig)
292 | 
293 |                         for (name, group), ax in zip(
294 |                             catplot_input.groupby(type, sort=False),
295 |                                 product(range(nrows), range(ncols))):
296 | 
297 |                             ax = fig.add_subplot(gs[ax[0], ax[1]])
298 |                             
299 |                             group['status'] = [i.split('-')[1] for i in group['status']]
300 |                             
301 |                             sns.barplot(
302 |                                 data=group, x='status', y='density', hue='Sample', 
303 |                                 palette=sample_color_dict, width=0.8, lw=0.0, ax=ax 
304 |                             )
305 |                             
306 |                             ax.grid(lw=0.5)
307 |                             [x.set_linewidth(0.5) for x in ax.spines.values()]
308 |                             plt.tick_params(axis='x', pad=-3)
309 |                             ax.set(xlabel=None)
310 |                             plt.tick_params(axis='y', pad=-3)
311 |                             ax.yaxis.labelpad = 2
312 |                             ax.set_title(name, size=2, pad=2)
313 |                             ax.legend_.remove()
314 | 
315 |                             plt.tight_layout()
316 |                          
317 |                         file_names = [
318 |                             get_key(i) for i in natsorted(catplot_input['Sample'].unique())
319 |                         ]
320 | 
321 |                         sample_conds = [self.sampleConditions[i] for i in file_names]
322 | 
323 |                         sample_abbrs = [self.sampleConditionAbbrs[i] for i in file_names]
324 | 
325 |                         cond_abbr = [f'{i}-{j}' for i, j in zip(sample_conds, sample_abbrs)]
326 | 
327 |                         handles_dict = dict(zip(
328 |                             natsorted(catplot_input['Sample'].unique()), cond_abbr)
329 |                         )
330 | 
331 |                         legend_handles = []
332 |                         for k, v in handles_dict.items():
333 |                             legend_handles.append(
334 |                                 Line2D([0], [0], marker='o', color='none',
335 |                                        label=v, markerfacecolor=sample_color_dict[k],
336 |                                        markeredgecolor='k', markeredgewidth=0.2,
337 |                                        markersize=5.0)
338 |                             )
339 | 
340 |                         fig.legend(
341 |                             handles=legend_handles, prop={'size': 5.0}, loc='upper left',
342 |                             bbox_to_anchor=[1.0, 1.0]
343 |                         )
344 | 
345 |                         plt.savefig(
346 |                             os.path.join(frequency_dir, 'catplot.pdf'), bbox_inches='tight'
347 |                         )
348 |                         plt.close('all')
349 | 
350 |                         print()
351 | 
352 |                 else:
353 |                     logger.info(
354 |                         'Only one binary declaration ' +
355 |                         f'class represented for {list(comparison)[0]}. ' +
356 |                         'Statistics will not be computed.')
357 |                     print()
358 |         print()
359 | 
360 |     data = reorganize_dfcolumns(data, markers, self.dimensionEmbedding)
361 | 
362 |     print()
363 |     return data
364 | 


--------------------------------------------------------------------------------
/cylinter/modules/logTransform.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from ..utils import input_check, read_markers, reorganize_dfcolumns
 4 | 
 5 | 
 6 | def logTransform(data, self, args):
 7 | 
 8 |     check, markers_filepath = input_check(self)
 9 | 
10 |     # read marker metadata
11 |     markers, abx_channels = read_markers( 
12 |         markers_filepath=markers_filepath,
13 |         counterstain_channel=self.counterstainChannel,
14 |         markers_to_exclude=self.markersToExclude, data=None
15 |     )
16 | 
17 |     abx_channels_mod = data[abx_channels].copy()
18 |     abx_channels_mod = np.log10(abx_channels_mod + 0.001)
19 |     data.loc[:, abx_channels] = abx_channels_mod
20 |     
21 |     # clip cells with zero-valued signal intensities to the Nth percentile of the
22 |     # distribution (not considering the zero-valued signals themselves).     
23 |     # percentile = 5
24 |     # percentiles = (
25 |     #     data.loc[:, abx_channels][data.loc[:, abx_channels] > 0.0].quantile(q=percentile / 100)
26 |     # )
27 |     # data.loc[:, abx_channels] = (
28 |     #     data.loc[:, abx_channels].clip(lower=percentiles, upper=None, axis=1)
29 |     # )
30 | 
31 |     data = reorganize_dfcolumns(data, markers, self.dimensionEmbedding)
32 | 
33 |     print()
34 |     print()
35 |     return data
36 | 


--------------------------------------------------------------------------------
/cylinter/modules/setContrast.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import yaml
  4 | import logging
  5 | 
  6 | from matplotlib.backends.qt_compat import QtWidgets
  7 | from qtpy.QtCore import QTimer
  8 | 
  9 | import napari
 10 | from magicgui import magicgui
 11 | 
 12 | from ..utils import (
 13 |     input_check, read_markers, marker_channel_number, single_channel_pyramid,
 14 |     get_filepath, reorganize_dfcolumns, sort_qc_report
 15 | )
 16 | 
 17 | logger = logging.getLogger(__name__)
 18 | 
 19 | channels_to_samples = {}
 20 | arbitrary_selection_toggle = False
 21 | sample_index = 1
 22 | 
 23 | 
 24 | def callback(self, viewer, channel, sample, data, initial_callback, next_widget, next_layout, arbitrary_widget, arbitrary_layout, qc_report, report_path):
 25 | 
 26 |     check, markers_filepath = input_check(self)
 27 | 
 28 |     # read marker metadata
 29 |     markers, abx_channels = read_markers( 
 30 |         markers_filepath=markers_filepath,
 31 |         counterstain_channel=self.counterstainChannel,
 32 |         markers_to_exclude=self.markersToExclude, data=None
 33 |     )
 34 | 
 35 |     # clear existing channels from Napari window if they exist
 36 |     viewer.layers.clear()
 37 |     
 38 |     # remove next_widget and arbitrary_widget docks and layout attributes from Napari viewer
 39 |     if not initial_callback:
 40 |         viewer.window.remove_dock_widget(next_widget)
 41 |         count = next_layout.count()
 42 |         for i in range(count - 1, -1, -1):
 43 |             item = next_layout.itemAt(i)
 44 |             widget = item.widget()
 45 |             if widget:
 46 |                 widget.setParent(None)
 47 | 
 48 |         viewer.window.remove_dock_widget(arbitrary_widget)
 49 |         count = arbitrary_layout.count()
 50 |         for i in range(count - 1, -1, -1):
 51 |             item = arbitrary_layout.itemAt(i)
 52 |             widget = item.widget()
 53 |             if widget:
 54 |                 widget.setParent(None)
 55 | 
 56 |     # read segmentation outlines, add to Napari
 57 |     file_path = get_filepath(self, check, sample, 'SEG')
 58 |     seg, min, max = single_channel_pyramid(file_path, channel=0)
 59 |     viewer.add_image(
 60 |         seg, rgb=False, blending='additive', colormap='gray',
 61 |         visible=False, name='segmentation', contrast_limits=(min, max)
 62 |     )
 63 |     
 64 |     # read DNA1 channel
 65 |     file_path = get_filepath(self, check, sample, 'TIF')
 66 |     channel_number = marker_channel_number(self, markers, self.counterstainChannel)
 67 |     dna, min, max = single_channel_pyramid(file_path, channel=channel_number)
 68 |     viewer.add_image(
 69 |         dna, rgb=False, blending='additive', colormap='gray',
 70 |         name=self.counterstainChannel, contrast_limits=(min, max)
 71 |     )
 72 | 
 73 |     # read target antibody image
 74 |     if channel != self.counterstainChannel:
 75 |         channel_number = marker_channel_number(self, markers, channel)
 76 |         file_path = get_filepath(self, check, sample, 'TIF')
 77 |         img, min, max = single_channel_pyramid(file_path, channel=channel_number)
 78 |         viewer.add_image(
 79 |             img, rgb=False, blending='additive', colormap='green',
 80 |             visible=True, name=channel, contrast_limits=(min, max)
 81 |         )
 82 | 
 83 |     # apply previously defined contrast limits if they exist 
 84 |     try:
 85 |         viewer.layers[self.counterstainChannel].contrast_limits = (
 86 |             qc_report['setContrast'][
 87 |                 self.counterstainChannel][0], qc_report['setContrast'][self.counterstainChannel][1]
 88 |         )
 89 |     except KeyError:
 90 |         pass
 91 | 
 92 |     try:
 93 |         viewer.layers[channel].contrast_limits = (
 94 |             qc_report['setContrast'][channel][0], qc_report['setContrast'][channel][1])
 95 |     except KeyError:
 96 |         pass
 97 | 
 98 |     # dock (or re-dock) next_widget and arbitrary_widget to Napari window
 99 |     viewer.window.add_dock_widget(
100 |         next_widget, name=f'Channel: {channel},  Sample: {sample}', area='right'
101 |     )
102 |     viewer.window.add_dock_widget(
103 |         arbitrary_widget, name='Sample Selector', area='right'
104 |     )
105 | 
106 |     #######################################################################
107 |     
108 |     @magicgui(
109 |         layout='horizontal',
110 |         call_button='Apply Limits and Move to Next Channel -->'
111 |     )
112 |     def next_sample(channel):
113 | 
114 |         global channels_to_samples
115 |         global arbitrary_selection_toggle
116 |         global sample_index
117 | 
118 |         # update channel contrast yaml with selected constrast limits            
119 |         qc_report['setContrast'][self.counterstainChannel] = (
120 |             [int(i) for i in viewer.layers[self.counterstainChannel].contrast_limits]
121 |         )
122 |         qc_report['setContrast'][channel] = [
123 |             int(i) for i in viewer.layers[channel].contrast_limits
124 |         ]
125 | 
126 |         # sort and dump updated qc_report to YAML file
127 |         qc_report_sorted = sort_qc_report(
128 |             qc_report, module='setContrast', order=[self.counterstainChannel] + abx_channels
129 |         )
130 |         f = open(report_path, 'w')
131 |         yaml.dump(qc_report_sorted, f, sort_keys=False, allow_unicode=False)
132 |         
133 |         # go to next sample
134 |         try:
135 |             if arbitrary_selection_toggle:
136 |                 sample_index -= 1 
137 | 
138 |             channel = list(channels_to_samples.keys())[sample_index]
139 |             sample = channels_to_samples[list(channels_to_samples.keys())[sample_index]] 
140 |             
141 |             initial_callback = False
142 |             callback(
143 |                 self, viewer, channel, sample, data, initial_callback, 
144 |                 next_widget, next_layout, arbitrary_widget, arbitrary_layout,
145 |                 qc_report, report_path
146 |             )
147 | 
148 |             sample_index += 1
149 |             arbitrary_selection_toggle = False
150 |         
151 |         except IndexError:
152 | 
153 |             print()
154 |             logger.info('Contrast Adjustments Complete!')
155 |             QTimer().singleShot(0, viewer.close)
156 |     
157 |     next_sample.native.setSizePolicy(
158 |         QtWidgets.QSizePolicy.Minimum,
159 |         QtWidgets.QSizePolicy.Maximum,
160 |     )
161 | 
162 |     # give next_sample access to channel passed to callback
163 |     next_sample.channel.bind(channel)
164 | 
165 |     next_layout.addWidget(next_sample.native)
166 |     
167 |     #######################################################################
168 | 
169 |     @magicgui(layout='vertical', call_button='Enter', sample={'label': 'Sample Name'})
170 |     def sample_selector(sample: str):
171 | 
172 |         return sample
173 | 
174 |     sample_selector.native.setSizePolicy(
175 |         QtWidgets.QSizePolicy.Fixed,
176 |         QtWidgets.QSizePolicy.Fixed
177 |     )
178 | 
179 |     arbitrary_layout.addWidget(sample_selector.native)
180 | 
181 |     # call connect
182 |     @sample_selector.called.connect
183 |     def sample_callback(value: str):
184 | 
185 |         global arbitrary_selection_toggle
186 |         
187 |         sample = value
188 | 
189 |         print()
190 |         if sample not in data['Sample'].unique():
191 |             napari.utils.notifications.show_warning('Sample name not in filtered data.')
192 |             pass
193 |         else:
194 |             # update channel contrast yaml with selected constrast limits
195 |                 
196 |             qc_report['setContrast'][self.counterstainChannel] = (
197 |                 [int(i) for i in viewer.layers[self.counterstainChannel].contrast_limits]
198 |             )
199 |             qc_report['setContrast'][channel] = [int(i) for i in viewer.layers[channel].contrast_limits]
200 | 
201 |             # dump updated qc_report to YAML file
202 |             qc_report_sorted = sort_qc_report(
203 |                 qc_report, module='setContrast', order=[self.counterstainChannel] + abx_channels
204 |             )
205 |             f = open(report_path, 'w')
206 |             yaml.dump(qc_report_sorted, f, sort_keys=False, allow_unicode=False)
207 | 
208 |             initial_callback = False
209 |             callback(
210 |                 self, viewer, channel, sample, data, initial_callback, 
211 |                 next_widget, next_layout, arbitrary_widget, arbitrary_layout,
212 |                 qc_report, report_path
213 |             )
214 | 
215 |             arbitrary_selection_toggle = True
216 |     
217 |     #######################################################################
218 |     napari.utils.notifications.show_info(f'Viewing marker {channel} in sample {sample}')
219 | 
220 | 
221 | # main
222 | def setContrast(data, self, args):
223 | 
224 |     global channels_to_samples
225 | 
226 |     print()
227 | 
228 |     check, markers_filepath = input_check(self)
229 | 
230 |     # read marker metadata
231 |     markers, abx_channels = read_markers( 
232 |         markers_filepath=markers_filepath,
233 |         counterstain_channel=self.counterstainChannel,
234 |         markers_to_exclude=self.markersToExclude, data=None
235 |     )
236 | 
237 |     # read QC report
238 |     report_path = os.path.join(self.outDir, 'cylinter_report.yml')
239 |     try:
240 |         qc_report = yaml.safe_load(open(report_path))
241 |         reload_report = False
242 |         if qc_report is None:
243 |             qc_report = {}
244 |             reload_report = True
245 |         if 'setContrast' not in qc_report or qc_report['setContrast'] is None:
246 |             qc_report['setContrast'] = {}
247 |             reload_report = True
248 |         if reload_report:
249 |             qc_report_sorted = sort_qc_report(qc_report, module='setContrast', order=None)
250 |             f = open(report_path, 'w')
251 |             yaml.dump(qc_report_sorted, f, sort_keys=False, allow_unicode=False)
252 |             qc_report = yaml.safe_load(open(report_path))
253 |     except:
254 |         logger.info(
255 |             'Aborting; QC report missing from CyLinter output directory. Re-start pipeline '
256 |             'from aggregateData module to initialize QC report.'
257 |         )
258 |         sys.exit()
259 | 
260 |     viewer = napari.Viewer(title='CyLinter')
261 | 
262 |     # generate next sample selection Qt widget
263 |     next_widget = QtWidgets.QWidget()
264 |     next_layout = QtWidgets.QVBoxLayout(next_widget)
265 |     next_widget.setSizePolicy(
266 |         QtWidgets.QSizePolicy.Minimum,
267 |         QtWidgets.QSizePolicy.Fixed,
268 |     )
269 | 
270 |     # generate arbitrary sample selection Qt widget
271 |     arbitrary_widget = QtWidgets.QWidget()
272 |     arbitrary_layout = QtWidgets.QVBoxLayout(arbitrary_widget)
273 |     arbitrary_widget.setSizePolicy(
274 |         QtWidgets.QSizePolicy.Minimum,
275 |         QtWidgets.QSizePolicy.Fixed,
276 |     )
277 | 
278 |     # identify samples with 85th percentile of median cell signal intensity  
279 |     # (trying to avoid outliers associated with max values)
280 |     for ch in [self.counterstainChannel] + abx_channels:
281 |         medians = data[['Sample', ch]].groupby('Sample').median()
282 |         percentile_value = medians.quantile(0.85).item()
283 |         differences = abs(medians - percentile_value)
284 |         # select sample whose median channel value is closest to quantile
285 |         selected_sample = differences.idxmin().item()  
286 |         channels_to_samples[ch] = selected_sample
287 | 
288 |     # pass first channel and sample in channels_to_samples to callback
289 |     channel = list(channels_to_samples.keys())[0]
290 |     sample = channels_to_samples[channel] 
291 |     
292 |     initial_callback = True
293 |     callback(
294 |         self, viewer, channel, sample, data, initial_callback, 
295 |         next_widget, next_layout, arbitrary_widget, arbitrary_layout,
296 |         qc_report, report_path
297 |     )
298 |     
299 |     viewer.scale_bar.visible = True
300 |     viewer.scale_bar.unit = 'um'
301 | 
302 |     napari.run()
303 | 
304 |     print()
305 | 
306 |     ##############################################################################################
307 |     # print current channel contrast limits and exit
308 |      
309 |     if set(
310 |        list(qc_report['setContrast'].keys())) == set(abx_channels + [self.counterstainChannel]):
311 |         logger.info('Current channel contrast settings are as follows:')
312 |         for k, v in qc_report['setContrast'].items():
313 |             logger.info(f'{k}: {v}')
314 |     else:
315 |         logger.info(
316 |             'Aborting; QC report does not contain contrast settings for all channels. '
317 |             'Please ensure limits are selected for all channels.'
318 |         )
319 |         sys.exit()
320 |     
321 |     data = reorganize_dfcolumns(data, markers, self.dimensionEmbedding)
322 | 
323 |     print()
324 |     print()
325 |     return data


--------------------------------------------------------------------------------
/cylinter/pipeline.py:
--------------------------------------------------------------------------------
  1 | import pyarrow
  2 | import pyarrow.parquet
  3 | import pandas as pd
  4 | from . import components
  5 | 
  6 | 
  7 | def save_checkpoint(data, config, module):
  8 |     module_name = module.__name__
  9 |     path = config.checkpoint_path / f"{module_name}.parquet"
 10 |     path.parent.mkdir(parents=True, exist_ok=True)
 11 |     # Ideally we would have just used pandas' to_parquet instead of calling
 12 |     # pyarrow directly, but to_parquet has as an over-zealous validity check on
 13 |     # the input dataframe that errors with a column MultiIndex. If that bug is
 14 |     # resolved we can switch to use just the following commented line.
 15 |     # data.to_parquet(path, index=True)
 16 |     table = pyarrow.Table.from_pandas(data)
 17 |     pyarrow.parquet.write_table(table, path)
 18 | 
 19 | 
 20 | def run_pipeline(config, start_module_name):
 21 |     if (
 22 |         start_module_name is None
 23 |         or start_module_name == components.pipeline_module_names[0]
 24 |     ):
 25 |         start_index = 0
 26 |         data = None
 27 |     else:
 28 |         start_index = components.pipeline_module_names.index(start_module_name)
 29 |         previous_module_name = components.pipeline_module_names[start_index - 1]
 30 |         checkpoint_file_path = (
 31 |             config.checkpoint_path / f"{previous_module_name}.parquet"
 32 |         )
 33 |         if not checkpoint_file_path.exists():
 34 |             raise Exception(
 35 |                 f"Checkpoint file for module {previous_module_name} not found"
 36 |             )
 37 |         data = pd.read_parquet(checkpoint_file_path)
 38 | 
 39 |     # make instance of the QC class
 40 |     qc = components.QC(
 41 |         inDir=config.inDir,
 42 |         outDir=config.outDir,
 43 |         sampleNames=config.sampleNames,
 44 |         sampleConditions=config.sampleConditions,
 45 |         sampleConditionAbbrs=config.sampleConditionAbbrs,
 46 |         sampleStatuses=config.sampleStatuses,
 47 |         sampleReplicates=config.sampleReplicates,
 48 |         samplesToExclude=config.samplesToExclude,
 49 |         counterstainChannel=config.counterstainChannel,
 50 |         markersToExclude=config.markersToExclude,
 51 | 
 52 |         delintMode=config.delintMode,
 53 |         showAbChannels=config.showAbChannels,
 54 |         samplesForROISelection=config.samplesForROISelection,
 55 |         autoArtifactDetection=config.autoArtifactDetection,
 56 |         artifactDetectionMethod=config.artifactDetectionMethod,
 57 | 
 58 |         numBinsIntensity=config.numBinsIntensity,
 59 | 
 60 |         numBinsArea=config.numBinsArea,
 61 | 
 62 |         numBinsCorrelation=config.numBinsCorrelation,
 63 | 
 64 |         hexbins=config.hexbins,
 65 |         hexbinGridSize=config.hexbinGridSize,
 66 | 
 67 |         metaQC=config.metaQC,
 68 | 
 69 |         channelExclusionsPCA=config.channelExclusionsPCA,
 70 |         samplesToRemovePCA=config.samplesToRemovePCA,
 71 |         dimensionPCA=config.dimensionPCA,
 72 |         pointSize=config.pointSize,
 73 |         labelPoints=config.labelPoints,
 74 |         distanceCutoff=config.distanceCutoff,
 75 |         conditionsToSilhouette=config.conditionsToSilhouette,
 76 | 
 77 |         gating=config.gating,
 78 |         channelExclusionsGating=config.channelExclusionsGating,
 79 |         samplesToRemoveGating=config.samplesToRemoveGating,
 80 |         vectorThreshold=config.vectorThreshold,
 81 |         classes=config.classes,
 82 | 
 83 |         embeddingAlgorithmQC=config.embeddingAlgorithmQC,
 84 |         embeddingAlgorithm=config.embeddingAlgorithm,
 85 |         channelExclusionsClusteringQC=config.channelExclusionsClusteringQC,
 86 |         channelExclusionsClustering=config.channelExclusionsClustering,
 87 |         normalizeTissueCounts=config.normalizeTissueCounts,
 88 |         samplesToRemoveClusteringQC=config.samplesToRemoveClusteringQC,
 89 |         samplesToRemoveClustering=config.samplesToRemoveClustering,
 90 |         percentDataPerChunk=config.percentDataPerChunk,
 91 |         fracForEmbedding=config.fracForEmbedding,
 92 |         dimensionEmbedding=config.dimensionEmbedding,
 93 |         colormapAnnotationQC=config.colormapAnnotationQC,
 94 |         colormapAnnotationClustering=config.colormapAnnotationClustering,
 95 | 
 96 |         perplexityQC=config.perplexityQC,
 97 |         perplexity=config.perplexity,
 98 |         earlyExaggerationQC=config.earlyExaggerationQC,
 99 |         earlyExaggeration=config.earlyExaggeration,
100 |         learningRateTSNEQC=config.learningRateTSNEQC,
101 |         learningRateTSNE=config.learningRateTSNE,
102 |         metricQC=config.metricQC,
103 |         metric=config.metric,
104 |         randomStateQC=config.randomStateQC,
105 |         randomStateTSNE=config.randomStateTSNE,
106 | 
107 |         nNeighborsQC=config.nNeighborsQC,
108 |         nNeighbors=config.nNeighbors,
109 |         learningRateUMAPQC=config.learningRateUMAPQC,
110 |         learningRateUMAP=config.learningRateUMAP,
111 |         minDistQC=config.minDistQC,
112 |         minDist=config.minDist,
113 |         repulsionStrengthQC=config.repulsionStrengthQC,
114 |         repulsionStrength=config.repulsionStrength,
115 |         randomStateUMAP=config.randomStateUMAP,
116 | 
117 |         controlGroups=config.controlGroups,
118 |         denominatorCluster=config.denominatorCluster,
119 |         FDRCorrection=config.FDRCorrection,
120 | 
121 |         numThumbnails=config.numThumbnails,
122 |         windowSize=config.windowSize,
123 |         segOutlines=config.segOutlines,
124 |     )
125 | 
126 |     # start_idx = module_order[start_index:]
127 |     for module in components.pipeline_modules[start_index:]:
128 |         print(f'Running: {module}')
129 |         data = module(data, qc, config)  # getattr(qc, module)
130 |         # data(config)
131 |         save_checkpoint(data, config, module)
132 | 


--------------------------------------------------------------------------------
/cylinter/prep.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import os
 3 | import argparse
 4 | import pathlib
 5 | from subprocess import call
 6 | 
 7 | 
 8 | def main(argv=sys.argv):
 9 | 
10 |     parser = argparse.ArgumentParser(
11 |         description='Prepare an input directory for CyLinter analysis.',
12 |         formatter_class=argparse.RawDescriptionHelpFormatter,
13 |     )
14 |     parser.add_argument(
15 |         '-t', action='store_true', help='optional flag for TMA data'
16 |         )
17 |     parser.add_argument(
18 |         'source_dir', type=str,
19 |         help='path to mcmicro output directory'
20 |     )
21 |     parser.add_argument(
22 |         'dest_dir', type=path_resolved,
23 |         help='path to CyLinter input directory'
24 |     )
25 |     args = parser.parse_args()
26 | 
27 |     os.makedirs(args.dest_dir, exist_ok=True)
28 | 
29 |     call([f'sh {sys.prefix}/prep_subprocess.sh {args.t} "{args.source_dir}" "{args.dest_dir}" {sys.prefix}/config.yml'], shell=True)
30 | 
31 |     return 0
32 | 
33 | 
34 | def path_resolved(path_str):
35 |     """Return a resolved Path for a string."""
36 |     path = pathlib.Path(path_str)
37 |     path = path.resolve()
38 |     return path
39 | 


--------------------------------------------------------------------------------
/cylinter/prep_subprocess.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # Transfer mcmicro output files from cluster and format local directory structure for compatibility with CyLinter pipeline (run locally).
  4 | 
  5 | # USAGE: sh src/qc_prep.sh -t(required for TMA data) <source_dir> <destination_dir>
  6 | 
  7 | # EXAMPLE(TMA): sh /Users/greg/projects/cycif-qc/src/qc_prep.sh -t gjb15@transfer.rc.hms.harvard.edu:/n/scratch3/users/g/gjb15/exemplar-002 /Users/greg/projects/cycif-qc/input
  8 | 
  9 | shift $(( OPTIND - 1 ))
 10 | 
 11 | if [ "$2" == "cylinter_demo" ]; then
 12 | 
 13 |   # Transfer emit22_demo from Sage Synapse
 14 |   synapse get -r syn52859560 --downloadLocation "$3" --multiThreaded
 15 | 
 16 |   # Rename quantification and dearray subdirectories to "csv" and "tif", respectively.
 17 |   if [ -d "$3"/quantification ]; then
 18 |       mv "$3"/quantification "$3"/csv
 19 |       mv "$3"/dearray "$3"/tif
 20 |   fi
 21 | 
 22 |   # Copy config.yml template to CyLinter input directory
 23 |   cp "$4" "$3"
 24 | 
 25 | else
 26 | 
 27 |   if $1; then
 28 | 
 29 |     echo "Transferring TMA data."
 30 | 
 31 |     # Transfer mcmicro output files to CyLinter input directory.
 32 |     rsync -avP -m "$2"/ "$3" --include quantification/*.csv --include dearray/*.tif --include markers.csv --exclude work --exclude '*.*'
 33 | 
 34 |     mkdir -p "$3"/seg
 35 | 
 36 |     # Rename quantification and dearray subdirectories to "csv" and "tif", respectively.
 37 |     if [ -d "$3"/seg ]; then
 38 | 
 39 |       mv "$3"/*/quantification "$3"/csv
 40 |       mv "$3"/*/dearray "$3"/tif
 41 | 
 42 |       files=("$3"/csv/*)
 43 |       SEG=$(echo "$(basename "${files[0]}")" | cut -d'_' -f2 | cut -d'.' -f1)
 44 | 
 45 |       rsync -avP -m "$2"/ "$3" --include qc/s3seg/*/"$SEG"Outlines.ome.tif --exclude work --exclude '*.*'
 46 | 
 47 |       for RESOLVED_PATH in "$3"/*/qc/s3seg/* ; do
 48 |         SAMPLE_NAME=$(basename "$RESOLVED_PATH")
 49 |         arrIN=(${SAMPLE_NAME//-/ })
 50 |         NAME=${arrIN[1]}
 51 |         mv "$RESOLVED_PATH"/"$SEG"Outlines.ome.tif "$RESOLVED_PATH"/"$NAME".ome.tif
 52 |         mv "$RESOLVED_PATH"/"$NAME".ome.tif "$3"/seg/
 53 |       done
 54 | 
 55 |       for SAMPLE_PATH in "$3"/* ; do
 56 |         SAMPLE_NAME=$(basename "$SAMPLE_PATH")
 57 |         if [ $SAMPLE_NAME != "csv" ] && [ $SAMPLE_NAME != "tif" ] && [ $SAMPLE_NAME != "seg" ] && [ $SAMPLE_NAME != "markers.csv" ]; then
 58 |           mv "$SAMPLE_PATH"/markers.csv "$3"/
 59 |           rm -r "$SAMPLE_PATH"
 60 |         fi
 61 |       done
 62 |     fi
 63 | 
 64 |     # copy configuration template to input dir
 65 |     cp "$4" "$3"/
 66 | 
 67 |   else
 68 |     echo "Transferring whole tissue data."
 69 | 
 70 |     # Transfer mcmicro output files to CyLinter input directory.
 71 |     rsync -avP -m "$2"/ "$3" --include quantification/*.csv --include registration/*.ome.tif --include markers.csv --exclude work --exclude '*.*'
 72 | 
 73 |     # Make directories for images, data tables, and segmentation outlines
 74 |     mkdir -p "$3"/csv
 75 |     mkdir -p "$3"/tif
 76 |     mkdir -p "$3"/seg
 77 | 
 78 |     # combine sample tifs, csv files, and their segmentation outlines into respectively-labeled subdirectories.
 79 |     mv "$3"/*/quantification/*.csv "$3"/csv/
 80 |     mv "$3"/*/registration/*.tif "$3"/tif/
 81 | 
 82 |     SEG=$(echo "$(basename "$3"/csv/*)" | cut -d'_' -f2 | cut -d'.' -f1)
 83 |     rsync -avP -m "$2"/ "$3" --include qc/s3seg/*/"$SEG"Outlines.ome.tif --exclude work --exclude '*.*'
 84 | 
 85 |     for SAMPLE_PATH in "$3"/*/qc/s3seg/* ; do
 86 |       SAMPLE_NAME=$(basename "$SAMPLE_PATH")
 87 | 
 88 |       # crop off "unmicst-" prefix from outlines directory name
 89 |       arrIN=(${SAMPLE_NAME//unmicst-/ })
 90 | 
 91 |       mv "$SAMPLE_PATH/$SEG"Outlines.ome.tif "$SAMPLE_PATH"/"${arrIN[0]}".ome.tif
 92 |       mv "$SAMPLE_PATH"/"${arrIN[0]}".ome.tif "$3"/seg/
 93 |     done
 94 | 
 95 |     for SAMPLE_PATH in "$3"/* ; do
 96 |       SAMPLE_NAME=$(basename "$SAMPLE_PATH")
 97 |       if [ $SAMPLE_NAME != "csv" ] && [ $SAMPLE_NAME != "tif" ] && [ $SAMPLE_NAME != "seg" ] && [ $SAMPLE_NAME != "markers.csv" ]; then
 98 |         mv "$SAMPLE_PATH"/markers.csv "$3"/
 99 |         rm -r "$SAMPLE_PATH"
100 |       fi
101 |     done
102 | 
103 |     # Copy configuration template to input dir
104 |     cp "$4" "$3"/
105 | 
106 |   fi
107 | fi
108 | 


--------------------------------------------------------------------------------
/cylinter/pretrained_models/pretrained_model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/labsyspharm/cylinter/b9dc04b0f0892657d53dbf31f753dc52b481bc08/cylinter/pretrained_models/pretrained_model.pkl


--------------------------------------------------------------------------------
/docs/.gitignore:
--------------------------------------------------------------------------------
1 | _site
2 | Gemfile
3 | Gemfile.lock


--------------------------------------------------------------------------------
/docs/_config.yml:
--------------------------------------------------------------------------------
 1 | title: CyLinter
 2 | description: Cylinter pipeline
 3 | logo: "/assets/images/cylinter-logo.svg"
 4 | hero_background:
 5 | 
 6 | remote_theme: labsyspharm/just-the-docs-lsp
 7 | color_scheme: cylinter
 8 | search_enabled: true
 9 | heading_anchors: true
10 | 
11 | # Banner links to include
12 | banner_links:
13 |   lsp: true
14 |   hits: true
15 | 
16 | # Aux links for the upper right navigation
17 | aux_links:
18 |   "CyLinter on GitHub":
19 |     - "//github.com/labsyspharm/cylinter"
20 | aux_links_new_tab: false
21 | 
22 | 
23 | # Footer content
24 | # appears at the bottom of every page's main content
25 | footer_content:
26 | 
27 | # Back to top link
28 | back_to_top: false
29 | back_to_top_text: "Back to Top"
30 | 
31 | # Provide license information for the project
32 | license:
33 |   - description: "CyLinter source code is licensed under the"
34 |     name: "MIT License"
35 |     url: "https://github.com/labsyspharm/cylinter/blob/master/LICENSE"
36 |   - description: "Contents of this site are licensed under"
37 |     name: CC BY-NC 4.0 <img alt="Creative Commons License" style="border-width:0" src="https://i.creativecommons.org/l/by-nc/4.0/80x15.png" />
38 |     url: "http://creativecommons.org/licenses/by-nc/4.0/"
39 | 
40 | # Linked logos
41 | footer_logos:
42 |   - name: "Laboratory of Systems Pharmacology"
43 |     image: "/assets/images/logo_lsp_white.svg"
44 |     url: "https://hits.harvard.edu/the-program/laboratory-of-systems-pharmacology/about/"
45 |   - name: "Harvard Medical School"
46 |     image: "/assets/images/logo_hms.svg"
47 |     url: "https://hms.harvard.edu/"
48 | 
49 | # Footer last edited timestamp
50 | last_edit_timestamp: true # show or hide edit time - page must have `last_modified_date` defined in the frontmatter
51 | last_edit_time_format: "%b %e %Y" # format: https://ruby-doc.org/stdlib-2.7.0/libdoc/time/rdoc/Time.html
52 | 
53 | # Footer "Edit this page on GitHub" link text
54 | gh_edit_link: false # show or hide edit this page link
55 | gh_edit_link_text: "Edit this page on GitHub."
56 | gh_edit_repository: "https://github.com/labsyspharm/cylinter" # the github URL for your repo
57 | gh_edit_branch: "master" # the branch that your docs is served from
58 | gh_edit_source: docs # the source that your files originate from
59 | gh_edit_view_mode: "tree" # "tree" or "edit" if you want the user to jump into the editor immediately
60 | 
61 | # Google Analytics
62 | ga_tracking: G-TY4QP6H41T
63 | ga_tracking_anonymize_ip: true # Use GDPR compliant Google Analytics settings
64 | 
65 | plugins:
66 |   - jekyll-seo-tag
67 |   - jekyll-remote-theme
68 |   - jekyll-include-cache
69 | 
70 | repository: labsyspharm/cylinter
71 | 
72 | exclude: ["node_modules/", "*.gemspec", "*.gem", "Gemfile", "Gemfile.lock", "package.json", "package-lock.json",  "script/", "LICENSE.txt", "lib/", "bin/", "README.md", "Rakefile"]
73 | compress_html:
74 |   clippings: all
75 |   comments: all
76 |   endings: all
77 |   startings: []
78 |   blanklines: false
79 |   profile: false
80 | 


--------------------------------------------------------------------------------
/docs/_includes/cylinter_gif.html:
--------------------------------------------------------------------------------
1 | <img src="{{ site.baseurl }}/assets/gifs/cylinter.gif" alt="cylinter" width="2500" height="500" style = "position:relative; left:0px; top:0px; padding-top: 0px; padding-left: 0px; padding-right: 0px;"/>
2 | 


--------------------------------------------------------------------------------
/docs/_includes/home.md:
--------------------------------------------------------------------------------
 1 | # Overview
 2 | 
 3 | <div class="basic-grid with-dividers mb-6">
 4 | 
 5 | <div markdown="1">
 6 | 
 7 | ## QC for Multiplex Microscopy
 8 | Although quality control (QC) methods have long been associated with analysis tools for single-cell genomics and transcriptomics research, analogous tools have lagged in the area of quantitative microscopy. There are now at least 9 different multiplex imaging platforms capable of routine acquisition of 20-40 channel microscopy data<sup>1,2,3,4,5,6,7,8,9</sup> and each is sensitive to microscopy artifacts. Current tools for microscopy-based QC act on pixel-level data<sup>10,11,12,13,14</sup>. CyLinter differs in that it allows users to work with both pixel-level and single-cell data to identify and remove cell segmentation instances in multiplex images corrupted by visual and image-processing artifacts that can dramatically affect data interpretation.
 9 | 
10 | </div>
11 | <div markdown="1">
12 | 
13 | ## About CyLinter
14 | CyLinter is open-source QC software for multiplex microscopy. The tool is instantiated as a Python Class and consists of multiple QC modules through which single-cell data are passed for serial redaction. Partially-redacted spatial feature tables are cached within and between modules to allow for iterative QC strategies and progress bookmarking. CyLinter is agnostic to data acquisition platform (CyCIF<sup>1</sup>, CODEX<sup>2</sup>, MIBI<sup>3</sup>, mIHC<sup>4</sup>, mxIF<sup>5</sup>, IMC<sup>6</sup>, etc.) and takes standard TIFF/OME-TIFF imaging files and CSV spatial feature tables as input.
15 | 
16 | <div markdown="1">
17 | 
18 | 1. Lin, J.-R. et al. Highly multiplexed immunofluorescence imaging of human tissues and tumors using t-CyCIF and conventional optical microscopes. Elife 7, (2018).
19 | 2. Goltsev, Y. et al. Deep Profiling of Mouse Splenic Architecture with CODEX Multiplexed Imaging. Cell 174, 968-981.e15 (2018).
20 | 3. Angelo, M. et al. Multiplexed ion beam imaging (MIBI) of human breast tumors. Nat Med 20, 436–442 (2014).
21 | 4. Tsujikawa, T. et al. Quantitative Multiplex Immunohistochemistry Reveals Myeloid-Inflamed Tumor-Immune Complexity Associated with Poor Prognosis. Cell Reports 19, 203–217 (2017).
22 | 5. Gerdes, M. J. et al. Highly multiplexed single-cell analysis of formalin-fixed, paraffin-embedded cancer tissue. Proc Natl Acad Sci U S A 110, 11982–11987 (2013).
23 | 6. Giesen, C. et al. Highly multiplexed imaging of tumor tissues with subcellular resolution by mass cytometry. Nat Methods 11, 417–422 (2014).
24 | 7. Remark, R. et al. In-depth tissue profiling using multiplexed immunohistochemical consecutive staining on single slide. Science Immunology 1, aaf6925–aaf6925 (2016).
25 | 8. Gut, G., Herrmann, M. D. & Pelkmans, L. Multiplexed protein maps link subcellular organization to cellular states. Science 361, (2018).
26 | 9. Saka, S. K. et al. Immuno-SABER enables highly multiplexed and amplified protein imaging in tissues. Nat Biotechnol 37, 1080–1090 (2019).
27 | 10.	Janowczyk, A., Zuo, R., Gilmore, H., Feldman, M. & Madabhushi, A. HistoQC: An Open-Source Quality Control Tool for Digital Pathology Slides. JCO Clin Cancer Inform 3, 1–7 (2019).
28 | 11.	Ameisen, D. et al. Towards better digital pathology workflows: programming libraries for high-speed sharpness assessment of Whole Slide Images. Diagn Pathol 9 Suppl 1, S3 (2014).
29 | 12.	Senaras, C., Niazi, M. K. K., Lozanski, G. & Gurcan, M. N. DeepFocus: Detection of out-of-focus regions in whole slide digital images using deep learning. PLoS One 13, e0205387 (2018).
30 | 13.	Wen, S. et al. A Methodology for Texture Feature-based Quality Assessment in Nucleus Segmentation of Histopathology Image. J Pathol Inform 8, 38 (2017).
31 | 14.	Baranski, A. et al. MAUI (MBI Analysis User Interface)-An image processing pipeline for Multiplexed Mass Based Imaging. PLoS Comput Biol 17, e1008887 (2021).
32 | 


--------------------------------------------------------------------------------
/docs/_includes/workflow.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/labsyspharm/cylinter/b9dc04b0f0892657d53dbf31f753dc52b481bc08/docs/_includes/workflow.md


--------------------------------------------------------------------------------
/docs/_layouts/default-cylinter.html:
--------------------------------------------------------------------------------
 1 | ---
 2 | layout: default
 3 | ---
 4 | 
 5 | <style>
 6 |     .main-header {
 7 |         padding-top: 5px;
 8 |         background: url("{{ site.baseurl }}/assets/gifs/solitary_saunter.gif") no-repeat;
 9 |         background-attachment: fixed;
10 |         position: sticky;
11 |         background-position: right 340px top 100px;
12 |         background-size: 10%;
13 |         padding-bottom: 0px;
14 |     }
15 | </style>
16 | 
17 | {{ content }}
18 | 


--------------------------------------------------------------------------------
/docs/_sass/color_schemes/cylinter.scss:
--------------------------------------------------------------------------------
 1 | 
 2 | $sidebar-color: #e6e6e6;
 3 | $footer-background-color: #00b0e9;
 4 | $branding-background-color:  #e6e6e6;
 5 | $branding-background-opacity: .0;
 6 | $hero-background-color: #454545;
 7 | $hero-background-opacity: .25;
 8 | 
 9 | $body-text-color: #58595b;
10 | $body-heading-color: #1e506c;    // Default heading color
11 | $body-heading-2-color: #00b0e9;  // Target H2 heading color
12 | $nav-child-link-color: #58595b;
13 | $link-color: #C30800;
14 | $arrow-btn-color: #006eb8;
15 | $btn-primary-color: #006eb8;
16 | $image-card-label-background-color: #00b0e9;
17 | 


--------------------------------------------------------------------------------
/docs/_sass/custom/custom.scss:
--------------------------------------------------------------------------------
 1 | .main-branding {
 2 |     height: 185px;
 3 | }
 4 | 
 5 | .site-logo::before {
 6 |     content:"";
 7 | }
 8 | 
 9 | .site-logo {
10 |     height: 100%;
11 |     padding-top: 0px;
12 |     padding-bottom: 0px;
13 |     padding-right: 300px;
14 | }
15 | 
16 | .hero {
17 |   background-size: contain;
18 | }
19 | 
20 | .hero-heading {
21 |     color: #4d4d4d;
22 | }
23 | 
24 | .hero-body {
25 |     color: #4d4d4d;
26 |     font-size: 1.3rem;
27 | }
28 | 
29 | .hero {
30 |     padding-top: 0px;
31 |     padding-bottom: 0px;
32 |     padding-right: 25px;
33 |     padding-left: 25px;
34 | }
35 | 
36 | .hero-inner {
37 |     padding: 10px;
38 |     max-width: 2500px;
39 |     margin-top: 0px;
40 |     margin-bottom: 0px;
41 |     margin-bottom: 0px;
42 | }
43 | 
44 | .site-header .site-logo {
45 |     height: 200% !important;
46 | }
47 | 
48 | .hero-heading {
49 |     font-size: 2.0rem;
50 | }
51 | 
52 | .main-content-wrap {
53 |     padding-top: 0px;
54 |     padding-bottom: 0px;
55 |     padding-right: 25px;
56 |     padding-left: 25px;
57 | }
58 | 
59 | .btn-large {
60 |     font-size: 1.0rem;
61 |     text-transform: lowercase;
62 | }
63 | 


--------------------------------------------------------------------------------
/docs/assets/gifs/cylinter.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/labsyspharm/cylinter/b9dc04b0f0892657d53dbf31f753dc52b481bc08/docs/assets/gifs/cylinter.gif


--------------------------------------------------------------------------------
/docs/assets/gifs/solitary_saunter.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/labsyspharm/cylinter/b9dc04b0f0892657d53dbf31f753dc52b481bc08/docs/assets/gifs/solitary_saunter.gif


--------------------------------------------------------------------------------
/docs/assets/images/ExtFig4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/labsyspharm/cylinter/b9dc04b0f0892657d53dbf31f753dc52b481bc08/docs/assets/images/ExtFig4.jpg


--------------------------------------------------------------------------------
/docs/assets/images/cores.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/labsyspharm/cylinter/b9dc04b0f0892657d53dbf31f753dc52b481bc08/docs/assets/images/cores.jpg


--------------------------------------------------------------------------------
/docs/assets/images/cylinter-logo.svg:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="utf-8"?>
  2 | <!-- Generator: Adobe Illustrator 26.0.1, SVG Export Plug-In . SVG Version: 6.00 Build 0)  -->
  3 | <svg version="1.1" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
  4 | 	 viewBox="10 10 800.0 250.0" style="enable-background:new 10 10 800.0 250.0;" xml:space="preserve">
  5 | <style type="text/css">
  6 | 	.st0{fill:#C30800;}
  7 | 	.st1{fill:#415266;}
  8 | 	.st2{fill:none;}
  9 | 	.st3{fill:#415266;fill-opacity:0.6118;}
 10 | 	.st4{fill:#415266;fill-opacity:0.8588;}
 11 | 	.st5{fill:#415266;fill-opacity:0.502;}
 12 | 	.st6{fill:#415266;fill-opacity:0.6902;}
 13 | </style>
 14 | <g>
 15 | 	<path class="st0" d="M283.5,86.4c-0.6-7.9-2.3-13-6.2-18.1c-7-9.3-18.4-14.5-32.1-14.5c-25.9,0-42,18.5-42,48.2
 16 | 		c0,29.6,16,48,41.5,48c22.9,0,38-13.2,39.1-34.2h-18.4c-1.3,11.7-8.6,18.4-20.3,18.4c-14.6,0-23.2-11.8-23.2-31.8
 17 | 		c0-20.3,8.9-32.3,23.8-32.3c11,0,17.1,5.3,19.6,16.5H283.5z"/>
 18 | 	<path class="st0" d="M339.2,79.1l-14.1,49.5l-15-49.5h-19.4l25.3,70.7c0,7-4.4,11.8-11,11.8c-1.4,0-2.5-0.3-4.7-0.8v13.2
 19 | 		c2.6,0.4,4.2,0.5,6.3,0.5c4.5,0,9.6-0.9,12.7-2.1c4.9-2,7-4.8,9.9-13l27.9-80.4H339.2z"/>
 20 | 	<path class="st0" d="M388.5,55.3h-18.9V147h62.8v-15.7h-43.9V55.3z"/>
 21 | 	<path class="st0" d="M461.1,79.1h-17.6v68h17.6V79.1z M461.1,55.3h-17.6V71h17.6V55.3z"/>
 22 | 	<path class="st0" d="M478,79.1v68h17.6v-40.8c0-8.1,5.5-13.3,14.2-13.3c7.6,0,11.3,4.2,11.3,12.2V147h17.6v-45.6
 23 | 		c0-15.1-8.2-23.5-22.8-23.5c-9.2,0-15.4,3.3-20.4,11v-9.8H478z"/>
 24 | 	<path class="st0" d="M584.9,80.5H575V62.2h-17.6v18.3h-8.7v11.7h8.7V134c0,10.7,5.7,16,17.2,16c4,0,6.9-0.4,10.2-1.4v-12.3
 25 | 		c-1.8,0.3-2.6,0.4-4,0.4c-4.7,0-5.8-1.4-5.8-7.4v-37h9.8V80.5z"/>
 26 | 	<path class="st0" d="M654.3,118.6c0.1-1.5,0.1-2.1,0.1-3c0-6.8-1-13.1-2.6-17.9c-4.5-12.3-15.5-19.8-29.2-19.8
 27 | 		c-19.5,0-31.5,14.1-31.5,36.9c0,21.8,11.8,35.1,31.1,35.1c15.2,0,27.6-8.6,31.5-22h-17.4c-2.1,5.4-7,8.6-13.5,8.6
 28 | 		c-5,0-9.1-2.1-11.6-5.9c-1.6-2.5-2.3-5.5-2.5-12H654.3z M609,106.9c1.1-10.4,5.5-15.5,13.3-15.5c8.1,0,12.8,5.4,13.7,15.5H609z"/>
 29 | 	<path class="st0" d="M666.3,79.1v68h17.6v-36.1c0-10.3,5.2-15.5,15.5-15.5c1.9,0,3.1,0.1,5.5,0.5V78.1c-1-0.1-1.4-0.1-2.1-0.1
 30 | 		c-8.1,0-15,5.3-18.9,14.5V79.1H666.3z"/>
 31 | </g>
 32 | <g>
 33 | 	<path class="st1" d="M227.4,195.9h-4v19.7h4V195.9z"/>
 34 | 	<path class="st1" d="M235,200.2l3.5,15.3h4l3.5-15.3v15.3h4.1v-19.7H244l-3.5,15.6l-3.6-15.6h-6v19.7h4.1V200.2z"/>
 35 | 	<path class="st1" d="M265.1,211.6l1.3,4h4.2l-6.8-19.7h-4.5l-7,19.7h4.1l1.3-4H265.1z M264,208.2h-5.1l2.6-7.7L264,208.2z"/>
 36 | 	<path class="st1" d="M289.2,205h-7.9v3.4h4.5c-0.1,1.1-0.4,1.8-1,2.5c-1,1.2-2.5,1.9-4,1.9c-3.3,0-5.6-2.9-5.6-6.9
 37 | 		c0-4.2,2-6.8,5.5-6.8c1.4,0,2.6,0.4,3.5,1.2c0.6,0.5,0.9,0.9,1.2,2h3.8c-0.5-4.1-3.8-6.6-8.5-6.6c-5.7,0-9.5,4.2-9.5,10.3
 38 | 		c0,6,3.9,10.3,9.2,10.3c2.7,0,4.4-0.9,5.9-3.2l0.5,2.6h2.4V205z"/>
 39 | 	<path class="st1" d="M297.2,207.1h9.4v-3.4h-9.4v-4.5h10.2v-3.4h-14.2v19.7h14.7v-3.4h-10.7V207.1z"/>
 40 | 	<path class="st1" d="M332.9,201.9c0-4.1-2.8-6.3-7.7-6.3c-4.7,0-7.4,2.2-7.4,6c0,3,1.5,4.5,5.5,5.3l2.8,0.5
 41 | 		c2.7,0.5,3.7,1.3,3.7,2.9c0,1.6-1.5,2.6-4,2.6c-2.8,0-4.3-1.2-4.4-3.3h-3.9c0.2,4.2,3.1,6.5,8.2,6.5c5.1,0,8.1-2.4,8.1-6.4
 42 | 		c0-3.1-1.6-4.7-5.2-5.4l-3.1-0.6c-2.9-0.6-3.7-1.1-3.7-2.6c0-1.5,1.3-2.4,3.4-2.4c2.5,0,4,1.1,4.1,3.1H332.9z"/>
 43 | 	<path class="st1" d="M340.7,207.1h9.4v-3.4h-9.4v-4.5h10.2v-3.4h-14.2v19.7h14.7v-3.4h-10.7V207.1z"/>
 44 | 	<path class="st1" d="M371.7,205h-7.9v3.4h4.5c-0.1,1.1-0.4,1.8-1,2.5c-1,1.2-2.5,1.9-4,1.9c-3.3,0-5.6-2.9-5.6-6.9
 45 | 		c0-4.2,2-6.8,5.5-6.8c1.4,0,2.6,0.4,3.5,1.2c0.6,0.5,0.9,0.9,1.2,2h3.8c-0.5-4.1-3.8-6.6-8.5-6.6c-5.7,0-9.5,4.2-9.5,10.3
 46 | 		c0,6,3.9,10.3,9.2,10.3c2.7,0,4.4-0.9,5.9-3.2l0.5,2.6h2.4V205z"/>
 47 | 	<path class="st1" d="M379.3,200.2l3.5,15.3h4l3.5-15.3v15.3h4.1v-19.7h-6.1l-3.5,15.6l-3.6-15.6h-6v19.7h4.1V200.2z"/>
 48 | 	<path class="st1" d="M402.1,207.1h9.4v-3.4h-9.4v-4.5h10.2v-3.4h-14.2v19.7h14.7v-3.4h-10.7V207.1z"/>
 49 | 	<path class="st1" d="M427.7,215.6h4.1v-19.7h-4.1v13.4l-7.8-13.4h-4.2v19.7h4.1V202L427.7,215.6z"/>
 50 | 	<path class="st1" d="M443.8,199.3h5.7v-3.4h-15.8v3.4h6v16.3h4.1V199.3z"/>
 51 | 	<path class="st1" d="M461.1,211.6l1.3,4h4.2l-6.8-19.7h-4.5l-7,19.7h4.1l1.3-4H461.1z M460,208.2h-5.1l2.6-7.7L460,208.2z"/>
 52 | 	<path class="st1" d="M475.2,199.3h5.7v-3.4h-15.8v3.4h6v16.3h4.1V199.3z"/>
 53 | 	<path class="st1" d="M487,195.9h-4v19.7h4V195.9z"/>
 54 | 	<path class="st1" d="M499.3,195.6c-2.8,0-5,0.9-6.7,2.7c-1.7,1.9-2.8,4.6-2.8,7.6c0,2.9,1,5.7,2.8,7.6c1.7,1.8,3.9,2.7,6.7,2.7
 55 | 		c2.8,0,5-0.9,6.7-2.7c1.7-1.8,2.8-4.7,2.8-7.5c0-3.1-1-5.8-2.8-7.7C504.3,196.4,502.2,195.6,499.3,195.6z M499.3,199
 56 | 		c3.3,0,5.4,2.7,5.4,7c0,4-2.2,6.7-5.4,6.7c-3.3,0-5.4-2.7-5.4-6.9C493.9,201.7,496.1,199,499.3,199z"/>
 57 | 	<path class="st1" d="M523.6,215.6h4.1v-19.7h-4.1v13.4l-7.8-13.4h-4.2v19.7h4.1V202L523.6,215.6z"/>
 58 | 	<path class="st1" d="M542.8,207.1h8.6v-3.4h-8.6v-4.5h9.8v-3.4h-13.8v19.7h4.1V207.1z"/>
 59 | 	<path class="st1" d="M559,195.9h-4v19.7h4V195.9z"/>
 60 | 	<path class="st1" d="M567,195.9h-4.1v19.7h13.5v-3.4H567V195.9z"/>
 61 | 	<path class="st1" d="M585.1,199.3h5.7v-3.4H575v3.4h6v16.3h4.1V199.3z"/>
 62 | 	<path class="st1" d="M597.1,207.1h9.4v-3.4h-9.4v-4.5h10.2v-3.4h-14.2v19.7h14.7v-3.4h-10.7V207.1z"/>
 63 | 	<path class="st1" d="M615.2,207.8h4.6c1.7,0,2.5,0.7,2.5,2.3v0.8c0,0.5,0,1,0,1.3c0,1.9,0.1,2.4,0.6,3.4h4.3v-0.7
 64 | 		c-0.6-0.3-0.9-0.8-0.9-1.6c-0.1-5.8-0.2-6.1-2.7-7.2c2.2-0.9,3.3-2.4,3.3-4.9c0-1.6-0.5-3-1.5-4c-0.9-0.9-2.1-1.3-3.8-1.3h-10.5
 65 | 		v19.7h4.1V207.8z M615.2,204.4v-5.1h4.9c1.2,0,1.6,0.1,2.1,0.5c0.5,0.4,0.7,1.1,0.7,2c0,0.9-0.2,1.7-0.7,2.1
 66 | 		c-0.5,0.4-1,0.5-2.1,0.5H615.2z"/>
 67 | </g>
 68 | <g>
 69 | 	<path class="st2" d="M182.7,112c-15.6,0-28.3-12.8-28.3-28.6c0-7.7,3-14.7,7.9-19.8c-0.7-0.6-1.3-1.3-2-1.9c-1.7,3.1-5,5.2-8.8,5.2
 70 | 		c-5.5,0-10-4.5-10-10.1c0-2.5,0.9-4.7,2.3-6.5c-1.8-0.9-3.7-1.8-5.7-2.6c0.3,1.1,0.4,2.2,0.4,3.3c0,7.1-5.7,12.8-12.7,12.8
 71 | 		c-7,0-12.7-5.7-12.7-12.8c0-3.2,1.2-6.1,3.1-8.3c-1.7-0.1-3.5-0.2-5.3-0.2c-11.2,0-21.9,2.6-31.4,7.1c0.3,1.2,0.5,2.5,0.5,3.8
 72 | 		c0,8.3-6.7,15-14.9,15c-2.9,0-5.6-0.9-7.9-2.3c-2.7,2.9-5.2,6.1-7.4,9.4c5.5,1.8,9.5,7,9.5,13.1c0,7.6-6.1,13.7-13.6,13.7
 73 | 		c-2.4,0-4.6-0.6-6.5-1.7c-1.1,5.1-1.7,10.4-1.7,15.8c0,6,0.7,11.8,2,17.4c1.2-5.9,6.4-10.3,12.6-10.3c7.1,0,12.9,5.8,12.9,13
 74 | 		c0,7.2-5.8,13-12.9,13c-3.2,0-6.1-1.2-8.4-3.1c2,4.5,4.4,8.7,7.1,12.7c1.9-6.8,8.1-11.7,15.4-11.7c8.9,0,16,7.3,16,16.2
 75 | 		c0,7.1-4.6,13.2-10.9,15.3c3.1,2,6.4,3.8,9.8,5.3c-1-1.3-1.6-2.9-1.6-4.7c0-4.2,3.4-7.7,7.6-7.7c4.2,0,7.6,3.4,7.6,7.7
 76 | 		c0,3.7-2.6,6.8-6.1,7.5c2.7,0.9,5.5,1.6,8.3,2.1c1-6.2,6.3-10.9,12.6-10.9c6.5,0,11.9,4.9,12.7,11.2c26.4-4.2,48-22.6,57.1-47.2
 77 | 		c-3.4,4-8.5,6.5-14.1,6.5c-10.3,0-18.7-8.5-18.7-18.9c0-10.5,8.4-18.9,18.7-18.9c8.8,0,16.2,6.1,18.2,14.4c0.4-2.9,0.5-5.9,0.5-9
 78 | 		c0-1.6-0.1-3.1-0.1-4.6C183.7,112,183.2,112,182.7,112z M133,65.3c10.2,0,18.4,8.3,18.4,18.6c0,10.3-8.3,18.6-18.4,18.6
 79 | 		c-10.2,0-18.4-8.3-18.4-18.6C114.5,73.7,122.8,65.3,133,65.3z M106,97.6c-4.1,0-7.4-3.3-7.4-7.5c0-4.1,3.3-7.5,7.4-7.5
 80 | 		c4.1,0,7.4,3.3,7.4,7.5C113.4,94.3,110.1,97.6,106,97.6z M106.6,110c0,8.7-6.9,15.7-15.5,15.7c-8.6,0-15.5-7-15.5-15.7
 81 | 		c0-8.7,6.9-15.7,15.5-15.7C99.7,94.3,106.6,101.3,106.6,110z M120.8,101.6c7.2,0,13,5.9,13,13.1c0,7.2-5.8,13.1-13,13.1
 82 | 		c-7.2,0-13-5.9-13-13.1C107.8,107.4,113.6,101.6,120.8,101.6z M136.7,123.6c5,0,9,4.1,9,9.1c0,5-4,9.1-9,9.1c-5,0-9-4.1-9-9.1
 83 | 		C127.7,127.7,131.8,123.6,136.7,123.6z M99.3,49.6c9.1,0,16.5,7.5,16.5,16.7S108.5,83,99.3,83c-9.1,0-16.5-7.5-16.5-16.7
 84 | 		S90.2,49.6,99.3,49.6z M73.3,71c7,0,12.6,5.7,12.6,12.7c0,7-5.6,12.7-12.6,12.7c-7,0-12.6-5.7-12.6-12.7C60.7,76.7,66.3,71,73.3,71
 85 | 		z M73.6,103.3c0,2.5-2,4.6-4.5,4.6c-2.5,0-4.5-2.1-4.5-4.6c0-2.5,2-4.6,4.5-4.6C71.5,98.8,73.6,100.8,73.6,103.3z M51.5,121.7
 86 | 		c-5.4,0-9.7-4.4-9.7-9.8c0-5.4,4.4-9.8,9.7-9.8c5.4,0,9.7,4.4,9.7,9.8C61.3,117.3,56.9,121.7,51.5,121.7z M60.5,120.4
 87 | 		c0-4.4,3.5-7.9,7.8-7.9c4.3,0,7.8,3.5,7.8,7.9c0,4.4-3.5,7.9-7.8,7.9C64,128.3,60.5,124.7,60.5,120.4z M75.1,146.3
 88 | 		c-4.6,0-8.3-3.7-8.3-8.4c0-4.6,3.7-8.4,8.3-8.4c4.6,0,8.3,3.7,8.3,8.4C83.4,142.5,79.7,146.3,75.1,146.3z M107.8,177
 89 | 		c-13.4,0-24.3-11-24.3-24.5c0-13.6,10.9-24.5,24.3-24.5c5.9,0,11.3,2.1,15.5,5.6c5.4,4.4,8.8,11.2,8.8,18.8c0,0,0,0,0,0.1
 90 | 		c0,0,0,0,0,0.1C132.1,166,121.2,177,107.8,177z M132.5,186.7c-4.5,0-8.1-3.7-8.1-8.2c0-4.5,3.6-8.2,8.1-8.2c4.5,0,8.1,3.7,8.1,8.2
 91 | 		C140.6,183,137,186.7,132.5,186.7z M147.7,145.4c7.6,0,13.8,6.3,13.8,14c0,7.7-6.2,14-13.8,14c-7.6,0-13.8-6.3-13.8-14
 92 | 		C133.9,151.7,140.1,145.4,147.7,145.4z M145.4,118.7c-4.2,0-7.7-3.5-7.7-7.8c0-4.3,3.4-7.8,7.7-7.8c4.2,0,7.7,3.5,7.7,7.8
 93 | 		C153.1,115.2,149.6,118.7,145.4,118.7z"/>
 94 | 	<path class="st1" d="M132.1,152.3C132.1,152.3,132.1,152.3,132.1,152.3c0-7.7-3.4-14.4-8.8-18.9C128.7,138,132.1,144.8,132.1,152.3
 95 | 		z"/>
 96 | 	<path class="st1" d="M91.1,94.3c-8.6,0-15.5,7-15.5,15.7c0,8.7,6.9,15.7,15.5,15.7c8.6,0,15.5-7,15.5-15.7
 97 | 		C106.6,101.3,99.7,94.3,91.1,94.3z"/>
 98 | 	<path class="st1" d="M133,102.6c10.2,0,18.4-8.3,18.4-18.6c0-10.3-8.3-18.6-18.4-18.6c-10.2,0-18.4,8.3-18.4,18.6
 99 | 		C114.5,94.2,122.8,102.6,133,102.6z"/>
100 | 	<path class="st1" d="M73.3,96.4c7,0,12.6-5.7,12.6-12.7c0-7-5.6-12.7-12.6-12.7c-7,0-12.6,5.7-12.6,12.7
101 | 		C60.7,90.7,66.3,96.4,73.3,96.4z"/>
102 | 	<path class="st3" d="M120.8,127.7c7.2,0,13-5.9,13-13.1c0-7.2-5.8-13.1-13-13.1c-7.2,0-13,5.9-13,13.1
103 | 		C107.8,121.9,113.6,127.7,120.8,127.7z"/>
104 | 	<path class="st1" d="M145.4,103.2c-4.2,0-7.7,3.5-7.7,7.8c0,4.3,3.4,7.8,7.7,7.8c4.2,0,7.7-3.5,7.7-7.8
105 | 		C153.1,106.6,149.6,103.2,145.4,103.2z"/>
106 | 	<path class="st4" d="M113.4,90.1c0-4.1-3.3-7.5-7.4-7.5c-4.1,0-7.4,3.3-7.4,7.5c0,4.1,3.3,7.5,7.4,7.5
107 | 		C110.1,97.6,113.4,94.3,113.4,90.1z"/>
108 | 	<path class="st1" d="M136.7,141.8c5,0,9-4.1,9-9.1c0-5-4-9.1-9-9.1c-5,0-9,4.1-9,9.1C127.7,137.8,131.8,141.8,136.7,141.8z"/>
109 | 	<path class="st1" d="M133.9,159.4c0,7.7,6.2,14,13.8,14c7.6,0,13.8-6.3,13.8-14c0-7.7-6.2-14-13.8-14
110 | 		C140.1,145.4,133.9,151.7,133.9,159.4z"/>
111 | 	<path class="st1" d="M132.5,170.3c-4.5,0-8.1,3.7-8.1,8.2c0,4.5,3.6,8.2,8.1,8.2c4.5,0,8.1-3.7,8.1-8.2
112 | 		C140.6,173.9,137,170.3,132.5,170.3z"/>
113 | 	<path class="st3" d="M75.1,129.6c-4.6,0-8.3,3.7-8.3,8.4c0,4.6,3.7,8.4,8.3,8.4c4.6,0,8.3-3.7,8.3-8.4
114 | 		C83.4,133.3,79.7,129.6,75.1,129.6z"/>
115 | 	<path class="st1" d="M76.2,120.4c0-4.4-3.5-7.9-7.8-7.9c-4.3,0-7.8,3.5-7.8,7.9c0,4.4,3.5,7.9,7.8,7.9
116 | 		C72.7,128.3,76.2,124.7,76.2,120.4z"/>
117 | 	<path class="st1" d="M69,98.8c-2.5,0-4.5,2.1-4.5,4.6c0,2.5,2,4.6,4.5,4.6c2.5,0,4.5-2.1,4.5-4.6C73.6,100.8,71.5,98.8,69,98.8z"/>
118 | 	<path class="st1" d="M51.5,102.1c-5.4,0-9.7,4.4-9.7,9.8c0,5.4,4.4,9.8,9.7,9.8c5.4,0,9.7-4.4,9.7-9.8
119 | 		C61.3,106.5,56.9,102.1,51.5,102.1z"/>
120 | 	<path class="st1" d="M65.2,136.7c0-7.2-5.8-13-12.9-13c-6.2,0-11.4,4.4-12.6,10.3c1,4.4,2.5,8.6,4.2,12.6c2.3,2,5.2,3.1,8.4,3.1
121 | 		C59.4,149.7,65.2,143.9,65.2,136.7z"/>
122 | 	<path class="st1" d="M59.5,88.7c0-6.2-4-11.4-9.5-13.1c-5,7.5-8.6,16.1-10.6,25.2c1.9,1.1,4.2,1.7,6.5,1.7
123 | 		C53.4,102.4,59.5,96.3,59.5,88.7z"/>
124 | 	<path class="st1" d="M80.1,53.5c0-1.3-0.2-2.6-0.5-3.8c-8.4,4-16,9.7-22.3,16.5c2.3,1.5,5,2.3,7.9,2.3
125 | 		C73.5,68.5,80.1,61.8,80.1,53.5z"/>
126 | 	<path class="st3" d="M99.3,83c9.1,0,16.5-7.5,16.5-16.7s-7.4-16.7-16.5-16.7c-9.1,0-16.5,7.5-16.5,16.7S90.2,83,99.3,83z"/>
127 | 	<path class="st1" d="M125.9,63.9c7,0,12.7-5.7,12.7-12.8c0-1.1-0.2-2.2-0.4-3.3c-6.8-2.7-14.2-4.5-21.8-5c-1.9,2.2-3.1,5.1-3.1,8.3
128 | 		C113.2,58.1,118.9,63.9,125.9,63.9z"/>
129 | 	<path class="st1" d="M151.5,66.9c3.8,0,7-2.1,8.8-5.2c-4.9-4.5-10.5-8.4-16.5-11.4c-1.4,1.8-2.3,4-2.3,6.5
130 | 		C141.5,62.4,146,66.9,151.5,66.9z"/>
131 | 	<path class="st5" d="M182.7,112c0.5,0,1,0,1.5,0c-1.2-18.9-9.4-35.9-22-48.4c-4.9,5.1-7.9,12.1-7.9,19.8
132 | 		C154.4,99.2,167.1,112,182.7,112z"/>
133 | 	<path class="st1" d="M132.1,152.4C132.1,152.4,132.1,152.4,132.1,152.4c0-7.7-3.5-14.4-8.8-18.9c-4.2-3.5-9.6-5.6-15.5-5.6
134 | 		c-13.4,0-24.3,11-24.3,24.5c0,13.6,10.9,24.5,24.3,24.5C121.2,177,132.1,166,132.1,152.4z"/>
135 | 	<path class="st1" d="M82.5,163.8c0-8.9-7.2-16.2-16-16.2c-7.3,0-13.5,4.9-15.4,11.7c5.5,7.9,12.5,14.6,20.5,19.8
136 | 		C77.9,176.9,82.5,170.9,82.5,163.8z"/>
137 | 	<path class="st1" d="M95.1,179.7c0-4.2-3.4-7.7-7.6-7.7c-4.2,0-7.6,3.4-7.6,7.7c0,1.8,0.6,3.4,1.6,4.7c2.4,1.1,5,2.1,7.5,2.9
138 | 		C92.4,186.6,95.1,183.5,95.1,179.7z"/>
139 | 	<path class="st1" d="M165.6,111.2c-10.3,0-18.7,8.5-18.7,18.9c0,10.5,8.4,18.9,18.7,18.9c5.6,0,10.7-2.5,14.1-6.5
140 | 		c2-5.4,3.4-11,4.1-16.9C181.8,117.3,174.5,111.2,165.6,111.2z"/>
141 | 	<path class="st6" d="M109.9,178.5c-6.4,0-11.6,4.7-12.6,10.9c4.4,0.9,9,1.3,13.7,1.3c3.9,0,7.8-0.3,11.6-0.9
142 | 		C121.8,183.4,116.4,178.5,109.9,178.5z"/>
143 | </g>
144 | </svg>
145 | 


--------------------------------------------------------------------------------
/docs/cite/index.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | layout: default-cylinter
 3 | title: Citing CyLinter
 4 | nav_order: 12
 5 | ---
 6 | 
 7 | ### If you use CyLinter in your work, please cite our publication:
 8 | 
 9 | *Baker et al. ["Quality control for single-cell analysis of high-plex tissue profiles using CyLinter", **Nature Methods** 2024](https://doi.org/10.1038/s41592-024-02328-0)*
10 | 
11 | 
12 | ### The CyLinter GitHub repository can be cited as follows:
13 | 
14 | **APA style**: *Baker, G. (2021). CyLinter (Version 0.0.47) [Computer software]. https://github.com/labsyspharm/cylinter*
15 | 
16 | **BibTeX style**: @software{Baker_CyLinter_2021,
17 | author = {Baker, Gregory},
18 | license = {MIT},
19 | month = jan,
20 | title = {{CyLinter}},
21 | url = {https://github.com/labsyspharm/cylinter},
22 | version = {0.0.47},
23 | year = {2021}
24 | }


--------------------------------------------------------------------------------
/docs/community/index.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | layout: default-cylinter
 3 | title: Community
 4 | nav_order: 8
 5 | ---
 6 | 
 7 | {: .no_toc }
 8 | 
 9 | <details open markdown="block">
10 |   <summary>
11 |     Table of contents
12 |   </summary>
13 |   {: .text-delta }
14 | 1. TOC
15 | {:toc}
16 | </details>
17 | 
18 | <br/>
19 | 
20 | # Contributors
21 | 
22 | CyLinter development is led by [Greg Baker](https://scholar.harvard.edu/gregoryjbaker) at the [Laboratory of Systems Pharmacology](https://labsyspharm.org/), [Harvard Medical School](https://hms.harvard.edu/).
23 | 
24 | # Early adopters
25 | * [Clarence Yapp](https://scholar.harvard.edu/clarence/who-clarence)
26 | * [Kenichi Shimada](https://scholar.harvard.edu/kenichi_shimada)
27 | * [Roxy Pelletier](https://www.linkedin.com/in/roxanne-pelletier)
28 | * [Tuulia Vallius](https://scholar.harvard.edu/vallius/home)
29 | * [Connor Jacobson](https://scholar.harvard.edu/connorjacobson/home)
30 | * [Ajit Johnson](https://scholar.harvard.edu/ajitjohnson/home)
31 | * [Shishir Pant](https://fi.linkedin.com/in/shishir-pant)
32 | * [Jackson Appelt](https://www.linkedin.com/in/jackson-appelt-311405142)
33 | * [Ana Verma](https://www.linkedin.com/in/anaverma)
34 | * [Sheheryar Kabraji](https://www.dana-farber.org/find-a-doctor/sheheryar-k-kabraji/)
35 | * [Claire Ritch](https://www.linkedin.com/in/cecily-claire-ritch-651795b7/)
36 | * [Shannon Coy](https://connects.catalyst.harvard.edu/Profiles/display/Person/140806)
37 | 
38 | <!-- # Suggest a module
39 | 
40 | Module suggestions can be made by posting to [image.sc forums](https://forum.image.sc/tag/cylinter) and tagging your post with the `cylinter` tag.
41 |  -->


--------------------------------------------------------------------------------
/docs/exemplar/index.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | layout: default-cylinter
 3 | title: Example data
 4 | nav_order: 7
 5 | ---
 6 | 
 7 | # Example data
 8 | 
 9 | <img align="right" src="{{ site.baseurl }}/assets/images/cores.jpg" width="540" style="padding-left: 30px; padding-bottom: 20px;"> Four (4) human tissue cores are provided as CyLinter demonstration data: normal kidney cortex, mesothelioma, glioblastoma, and normal tonsil. These imaging data were collected by [CyCIF](https://www.cycif.org/) and are derived from a tissue microarray collected at the [Laboratory of Systems Pharmacology](https://labsyspharm.org/) referred to as EMIT (Exemplar Microscopy Images of Tissues) TMA22 (Synapse ID: [syn22345750](https://www.synapse.org/#!Synapse:syn22345750)). The tissues cores were imaged at 20X magnification using a 0.75 NA objective and 2x2-pixel binning.
10 | 
11 | Access to the demonstration dataset requires free registration at the Sage Synapse data repository ([https://www.synapse.org/](https://www.synapse.org/)). Once registered, the example dataset can be downloaded using the following commands:
12 | 
13 | ## Step 1: Download
14 | ``` bash
15 | # Activate the CyLinter virtual environment.
16 | conda activate cylinter
17 | 
18 | # Install the Synapse client.
19 | conda install -c bioconda synapseclient
20 | 
21 | # Mac/Linux users, run the following command to download the demo dataset:
22 | synapse get -r syn52859560 --downloadLocation ~/Desktop/cylinter_demo  # Enter Synapse ID and password when prompted.
23 | 
24 | # PC users, run the following command to download the demo dataset:  
25 | synapse get -r syn52859560 --downloadLocation C:\Users\<username>\Desktop\cylinter_demo --multiThreaded  # Enter Synapse ID and password when prompted.
26 | ```
27 | * The demo dataset can also be downloaded directly from the Sage Synapse website here: [syn52859560](https://www.synapse.org/#!Synapse:syn52859560).
28 | 
29 | ## Step 2: Configure
30 | After downloading the exemplar dataset, open the [YAML configuration file]({{ site.baseurl }}/structure/index#yaml-configuration-file) and update the `inDir` and `outDir` parameters with user-specific directory paths. All other settings are pre-configured for use with the demo dataset.
31 | 
32 | ```yaml
33 | inDir: /Users/<username>/Desktop/cylinter_demo
34 | outDir: /Users/<username>/Desktop/cylinter_demo/output
35 | .
36 | .
37 | .
38 | ```
39 | 
40 | ## Step 3: Run
41 | To run Cylinter on the demo dataset, pass the [YAML configuration file]({{ site.baseurl }}/structure/index#yaml-configuration-file) to the `cylinter` command:
42 | 
43 | ``` bash
44 | # for Mac:
45 | cylinter --module <module-name>(optional) ~/Desktop/cylinter_demo/cylinter_config.yml  
46 | 
47 | # for PC:
48 | cylinter --module <module-name>(optional) C:\Users\<username>\Desktop\cylinter_demo\cylinter_config.yml
49 | ```
50 | 


--------------------------------------------------------------------------------
/docs/faq/#index.md:
--------------------------------------------------------------------------------
  1 | ---
  2 | layout: default-cylinter
  3 | title: FAQ
  4 | nav_order: 9
  5 | ---
  6 | 
  7 | # Frequently Asked Questions
  8 | 
  9 | {: .no_toc }
 10 | 
 11 | <details open markdown="block">
 12 |   <summary>
 13 |     Table of contents
 14 |   </summary>
 15 |   {: .text-delta }
 16 | 1. TOC
 17 | {:toc}
 18 | </details>
 19 | 
 20 | ## Pipeline execution
 21 | 
 22 | ### Q: How do I resume a pipeline run that halted partway?
 23 | 
 24 | The intermediate files in the `work/` directory allow you to restart a pipeline partway, without re-running everything from scratch. For example, consider the following scenario on O2:
 25 | 
 26 | ``` bash
 27 | # This run will fail because --some-invalid-arg is not a valid argument for UnMicst
 28 | nextflow run labsyspharm/mcmicro --in ~/data/exemplar-001 --unmicst-opts '--some-invalid-arg'
 29 | 
 30 | # N E X T F L O W  ~  version 20.01.0
 31 | # Launching `labsyspharm/mcmicro` [jolly_hodgkin] - revision: eeaa364408 [master]
 32 | # executor >  local (2)
 33 | # [-        ] process > illumination   -
 34 | # [7e/bf811b] process > ashlar         [100%] 1 of 1 ✔
 35 | # [-        ] process > dearray        -
 36 | # [29/dfdfac] process > unmicst        [100%] 1 of 1, failed: 1 ✘
 37 | # [-        ] process > ilastik        -
 38 | # [-        ] process > s3seg          -
 39 | # [-        ] process > quantification -
 40 | # [-        ] process > naivestates    -
 41 | 
 42 | # Address the issue by removing the invalid argument and restart the pipeline with -resume
 43 | nextflow run labsyspharm/mcmicro --in ~/data/exemplar-001 -resume
 44 | 
 45 | # N E X T F L O W  ~  version 20.01.0
 46 | # Launching `labsyspharm/mcmicro` [backstabbing_goodall] - revision: eeaa364408 [master]
 47 | # executor >  local (1)
 48 | # [-        ] process > illumination   -
 49 | # [7e/bf811b] process > ashlar         [100%] 1 of 1, cached: 1 ✔      <- NOTE: cached
 50 | # [-        ] process > dearray        -
 51 | # [9e/08ab35] process > unmicst        [100%] 1 of 1 ✔
 52 | # [-        ] process > ilastik        -
 53 | # [84/918c38] process > s3seg          [100%] 1 of 1 ✔
 54 | # [0a/7f71f7] process > quantification [100%] 1 of 1 ✔
 55 | # [ff/be5a97] process > naivestates    [100%] 1 of 1 ✔
 56 | ```
 57 | 
 58 | As you run the pipeline on your datasets, the size of the `work/` directory can grow substantially. Use [nextflow clean](https://github.com/nextflow-io/nextflow/blob/cli-docs/docs/cli.rst#clean) to selectively remove portions of the work directory. Use `-n` flag to list which files will be removed, inspect the list to ensure that you don't lose anything important, and repeat the command with `-f` to actually remove the files:
 59 | 
 60 | ``` bash
 61 | # Remove work files associated with most-recent run
 62 | nextflow clean -n last           # Show what will be removed
 63 | nextflow clean -f last           # Proceed with the removal
 64 | 
 65 | # Remove all work files except those associated with the most-recent run
 66 | nextflow clean -n -but last
 67 | nextflow clean -f -but last
 68 | ```
 69 | 
 70 | ## Pre-processing
 71 | 
 72 | ### Q: How does mcmicro handle multi-file formats such as `.xdce`?
 73 | 
 74 | A: Registration and illumination correction modules in mcmicro are [Bio-Formats compatible](https://docs.openmicroscopy.org/bio-formats/6.0.1/supported-formats.html). Place all files into the `raw/` subdirectory, as described in [Directory Structure]({{ site.baseurl }}/documentation/dir.html), and mcmicro modules will correctly identify and use the relevant ones.
 75 | 
 76 | ## Segmentation
 77 | 
 78 | ### Q: How do I run mcmicro with my own ilastik model?
 79 | 
 80 | A: Use the `--ilastik-model` parameter. Note that the parameter must be specified *outside** `--ilastik-opts`. For example,
 81 | 
 82 | ```
 83 | nextflow run labsyspharm/mcmicro --in /my/data --probability-maps ilastik --ilastik-model mymodel.ilp
 84 | ```
 85 | 
 86 | ### Q: How do I check the quality of segmentation?
 87 | 
 88 | A: After a successful mcmicro run, two-channel tif files containing DAPI and nuclei/cell/cytoplasm outlines will reside in `qc/s3seg`. Segmentation quality can be assessed through visual inspection of these files in, e.g., [napari](https://napari.org/).
 89 | 
 90 | ### Q: How do I handle images acquired without pixel binning?
 91 | 
 92 | ![]({{ site.baseurl }}/images/FAQ-binning.png)
 93 | 
 94 | A: There are two adjustments to make:
 95 | 
 96 | 1. Adjust `--scalingFactor` for UnMicst, which controls the ratio of the current pixel width (W2) to exemplar pixel width (W1) and is not related to area (See schematic).
 97 | 1. In S3Segmenter, `--cytoDilation` controls the number of pixels from the edge of the nucleus to expand in creating the cytoplasm mask. Take the value optimized for 2x binned images and multiply it by 2 (i.e., if `--cytoDilation 3` is optimal for 2x binning, then the new value will `--cytoDilation 6`).
 98 | 
 99 | Use `--unmicst-opts` and `--s3seg-opts` to pass the new values to UnMicst and S3Segmenter, respectively:
100 | 
101 | ```
102 | nextflow run labsyspharm/mcmicro --in /path/to/unbinned/data --unmicst-opts '--scalingFactor 0.5' --s3seg-opts '--cytoDilation 6'
103 | ```
104 | 
105 | ## Quantification
106 | 
107 | ### Q: How do I quantify multiple masks?
108 | 
109 | A: Use `--quant-opts` to specify the `--masks` parameter for quantification. Any file found in the corresponding `segmentation/` folder can be provided here. For example,
110 | 
111 | ```
112 | nextflow run labsyspharm/mcmicro --in /path/to/exemplar-001 --quant-opts '--masks cell.ome.tif nuclei.ome.tif'
113 | ```
114 | 
115 | will quantify cell and nuclei masks. The corresponding spatial feature tables can then be found in `quantification/unmicst-exemplar-001_cell.csv` and `quantification/unmicst-exemplar-001_nuclei.csv`, respectively.
116 | 
117 | ### Q: How do I compute the median expression of each channel?
118 | 
119 | A: Use `--quant-opts` to specify the corresponding `--intensity_props` parameter for quantification:
120 | 
121 | ```
122 | nextflow run labsyspharm/mcmicro --in /path/to/exemplar-001 --quant-opts '--intensity_props median_intensity'
123 | ```
124 | 


--------------------------------------------------------------------------------
/docs/funding/index.md:
--------------------------------------------------------------------------------
1 | ---
2 | layout: default-cylinter
3 | title: Funding
4 | nav_order: 9
5 | ---
6 | 
7 | # Funding
8 | 
9 | This work was supported by Ludwig Cancer Research and the Ludwig Center at Harvard and by NIH NCI grants U2C-CA233280 (Omic and Multidimensional Spatial Atlas of Metastatic Breast and Prostate Cancers) and U2C-CA233262 (Pre-cancer atlases of cutaneous and hematologic origin—PATCH Center) to Peter K. Sorger and Sandro Santagata as part of the [Human Tumor Atlas Network](https://humantumoratlas.org/). Development of computational methods and image processing software is supported by a Team Science Grant from the Gray Foundation, the Gates Foundation grant INV-027106, the David Liposarcoma Research Initiative, and the Emerson Collective.


--------------------------------------------------------------------------------
/docs/help/index.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | layout: default-cylinter
 3 | title: Getting help
 4 | nav_order: 12
 5 | ---
 6 | 
 7 | <!-- **Questions** about executing the pipeline can be posted to [image.sc forums](https://forum.image.sc/tag/cylinter) under the `cylinter` tag. -->
 8 | 
 9 | **Bugs** can be reported by opening an issue at the [GitHub repository](https://github.com/labsyspharm/cylinter/issues).
10 | 


--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | layout: default-cylinter
 3 | title: Home
 4 | nav_order: 1
 5 | description: ""
 6 | hero_heading: "Quality Control Software for Multiplex Microscopy."
 7 | hero_body: "CyLinter is used to identify and remove noisy single-cell instances in multiplex images of tissue."
 8 | hero_ctas:
 9 |   - label: "install cylinter"
10 |     link: "installation/"
11 |   - label: "GitHub Repo"
12 |     link: "https://github.com/labsyspharm/cylinter"
13 |   - label: "Publication/Citation"
14 |     link: "https://doi.org/10.1038/s41592-024-02328-0"
15 | # last_modified_date: 2021-03-28
16 | ---
17 | 
18 | <!-- <div class="image-container">
19 | <p class="image-holder">
20 | <img src="{{ site.baseurl }}/assets/gifs/solitary_saunter.gif" />
21 | </p>
22 | </div> -->
23 | 
24 | <div style = "text-align:right; font-size: 10px; z-index: 1000;">
25 |   Tomas Brunsdon via <a href="https://dribbble.com/shots/3281814-Solitary-Saunter/">Dribble</a>
26 | </div>
27 | 
28 | {% include cylinter_gif.html %}
29 | 
30 | <br/>
31 | 
32 | {% include home.md %}
33 | 
34 | <!-- {% include youtube.html id="DY_F-eG9nm4" autoplay=true mute=true controls=false loop=true related=false %} -->
35 | 
36 | <br/>
37 | 


--------------------------------------------------------------------------------
/docs/installation/index.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | layout: default-cylinter
 3 | title: Installation
 4 | nav_order: 2
 5 | has_children: false
 6 | ---
 7 | 
 8 | # Installation
 9 | 
10 | CyLinter is written in Python 3 and is compatible with MacOS, Windows, and Linux operating systems. The program can be installed via the cross-platform package manager, Conda.
11 | 
12 | ## 1. Install Miniconda
13 | 
14 | **NOTE**: If you already have Miniconda or Anaconda installed, [skip this section and jump to section 1B](#section-1b).
15 | 
16 | The folllowing are examples of commands for quickly and quietly installing the latest version of the Miniconda installer for your operating system (MacOS - M1 / Intel 64-bit, Windows, Linux - Intel 64-bit). For other platforms, [consult the Miniconda download page](https://docs.conda.io/projects/miniconda/en/latest/index.html).
17 | 
18 | ### MacOS
19 | Open Terminal and paste the following commands:
20 | ```bash
21 | mkdir -p ~/miniconda3
22 | 
23 | # M1 chip
24 | curl https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-arm64.sh -o ~/miniconda3/miniconda.sh
25 | 
26 | # Intel 64-bit chip
27 | curl https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -o ~/miniconda3/miniconda.sh
28 | 
29 | bash ~/miniconda3/miniconda.sh -b -u -p ~/miniconda3
30 | rm -f ~/miniconda3/miniconda.sh
31 | ```
32 | 
33 | ### Windows
34 | Open a Command Prompt and paste the following commands:
35 | ```cmd
36 | curl https://repo.anaconda.com/miniconda/Miniconda3-latest-Windows-x86_64.exe -o miniconda.exe
37 | start /wait "" miniconda.exe /S
38 | del miniconda.exe
39 | ```
40 | 
41 | ### Linux
42 | Open a terminal window and paste the following commands:
43 | ```bash
44 | mkdir -p ~/miniconda3
45 | wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda3/miniconda.sh
46 | bash ~/miniconda3/miniconda.sh -b -u -p ~/miniconda3
47 | rm -f ~/miniconda3/miniconda.sh
48 | ```
49 | 
50 | You should now [skip section 1B and go straight to section 2](#section-2).
51 | 
52 | ## 1B. For existing conda installations, set libmamba as the default dependency solver
53 | {: #section-1b}
54 | CyLinter depends on a complex set of packages and older Conda installations will struggle with this. If you already have Miniconda or Anaconda installed, we suggest that you run the following commands to update Conda itself and enable the libmamba dependency solver. This will help ensure CyLinter can be installed efficiently.
55 | 
56 | ``` bash
57 | conda update -n base conda
58 | conda install -n base conda-libmamba-solver
59 | conda config --set solver libmamba
60 | ```
61 | 
62 | ## 2. Install CyLinter
63 | {: #section-2}
64 | Install CyLinter into a dedicated conda environment with the following command:  
65 | 
66 | ``` bash
67 | conda create -n cylinter -c conda-forge -c labsyspharm cylinter=0.0.50
68 | ```
69 | 


--------------------------------------------------------------------------------
/docs/modules/PCA.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | layout: default-cylinter
 3 | title: PCA
 4 | nav_order: 9
 5 | parent: Modules
 6 | ---
 7 | 
 8 | 9\. `PCA`: this is a fully automated module that performs [Horn’s parallel analysis](https://en.wikipedia.org/wiki/Parallel_analysis) indicating the number of PCs capturing non-random variation in the dataset to help the user determine whether 2 or 3 principal components should be used in the [clustering module]({{ site.baseurl }}/modules/clustering) implemented later in the pipeline. PCA scores plots for the first two PCs are computed on per-cell and per-sample bases to visualize how single-cells and tissue sample are distributed with respect to each other. Ridge plots are also computed to visualize histogram alignment across marker channels.
 9 | 
10 | ### YAML configurations
11 | 
12 | | Parameter | Default | Description |
13 | | --- | --- | --- |
14 | | `channelExclusionsPCA` | [ ] | (list of strs) Immunomarkers to exclude from PCA analysis. |
15 | | `samplesToRemovePCA` | [ ] | (list of strs) Samples to exclude from PCA analysis. |
16 | | `dimensionPCA` | 2 | (int) Number of PCs to compute. |
17 | | `pointSize` | 90.0 | (float) Scatter point size for sample scores plot. |
18 | | `labelPoints` | True | (bool) Annotate scatter points with condition abbreviations from sampleMetadata configuration. |
19 | | `distanceCutoff` | 0.15 | (float) Maximum distance between data points in PCA scores plot to be annotated with a common label. Useful for increasing visual clarity of PCA plots containing many data points. Applicable when labelPoints is True. |
20 | | `conditionsToSilhouette` | [ ] | (list of strs) Abbreviated condition names whose corresponding scores plot points will be greyed out, left unannotated, and sent to the back of the plot (zorder). Useful for increasing visual clarity of PCA plots containing many data points. |
21 | 


--------------------------------------------------------------------------------
/docs/modules/aggregateData.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | layout: default-cylinter
 3 | title: aggregateData
 4 | nav_order: 1
 5 | parent: Modules
 6 | ---
 7 | 
 8 | 1\. `aggregateData`: aggregates spatial feature tables from all tissues into a combined datafame passed between modules; this step is fully automated.
 9 | 
10 | ### No YAML configurations
11 | 


--------------------------------------------------------------------------------
/docs/modules/areaFilter.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | layout: default-cylinter
 3 | title: areaFilter
 4 | nav_order: 4
 5 | parent: Modules
 6 | ---
 7 | 
 8 | 4\. `areaFilter`: cell segmentation errors can be a significant source of noise in image-derived, single-cell data. In this module, users assign lower and upper bounds on cell segmentation area (pixel count) to remove severely under- and over-segmented cells. Cell segmentation area is a standard component of [MCMICRO](https://mcmicro.org/parameters/core.html#mcquant) feature table output and is calculated using [skimage.measure.regionprops()](https://scikit-image.org/docs/stable/api/skimage.measure.html#skimage.measure.regionprops). This module functions similar to the `intensityFilter` module in that it allows users to assign lower and upper thresholds on interactive histogram widgets of per-cell data. Gaussian mixture modeling (GMM) assist in identifying default cutoffs that can be manually refined. Once thresholds have been adjusted for a given sample, users can visualize selected cells in their corresponding image by clicking the `Plot Points` button. Segmentation outlines are provided in the `layer list` at the left of the Napari viewer as a reference for evaluating segmentation quality. Data points falling between lower and upper sliders are carried forward into downstream analysis. Users will move to the next sample in the series by clicking the `Apply Gates and Move to Next Sample` button beneath the histogram. Users may jump between tissues in the series by entering the name of a given sample in the `Sample Name` field of the `Arbitrary Sample Selection` widget at the right of the Napari viewer to adjust thresholds of previously analyzed tissues. To re-define thresholds, remove the metadata associated with the target sample(s) from `cylinter_report.yml` located in the CyLinter output directory specified in `cylinter_config.yml` and re-run the `areaFilter` module with `cylinter --module areaFilter cylinter_config.yml`.
 9 | 
10 | ### YAML configurations
11 | 
12 | | Parameter | Default | Description |
13 | | --- | --- | --- |
14 | | `numBinsArea` | 50 | (int) Number of bins used to construct DNA area histograms. |
15 | 


--------------------------------------------------------------------------------
/docs/modules/clustering.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | layout: default-cylinter
 3 | title: clustering
 4 | nav_order: 12
 5 | parent: Modules
 6 | ---
 7 | 
 8 | 12\. `clustering`: this module performs density-based hierarchical clustering with [HDBSCAN](https://hdbscan.readthedocs.io/en/latest/api.html) on [UMAP](https://umap-learn.readthedocs.io/en/latest/) (or [t-SNE](https://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.html)) embeddings to identify biologically relevant cell states in a dataset. In doing so, users will enter into the `Cluster and Plot` field at the top right of the Napari window an integer value for `Min Cluster Size` ([MCS](https://hdbscan.readthedocs.io/en/latest/api.html)): an HDBSCAN parameter that effects the clustering result. The embedding will be shown in the Napari window colored by three different variables for review: **1)** HDBSCAN cluster, **2)** gate-based cell type classification (see [gating module]({{ site.baseurl }}/modules/gating) for details), and **3)** tissue sample. Clustering cells may be viewed in the context of a given tissue by pressing and holding the mouse (or track pad) button and lassoing data points in HDBSCAN plot, typing the name of a sample of interest in the `Sample Name` field, and clicking the `View Lassoed Points` button. Selected cells will then appear as scatter points in their corresponding image. After each MCS entry, a seperate window showing the results of [silhouette analysis](https://www.sciencedirect.com/science/article/pii/0377042787901257) will also be shown. Cells with positive silhouette coefficients indicate their current cluster assignment is suitable, while those with negative coefficients would be better off in an other cluster indicative of under-clustering. To aid in cluster optimization, a range of MCS values can be entered into the `Sweep MCS Range` field and the number of clusters associated with each MCS value will be printed to the terminal window without the results being plotting into the Napari window. Clicking the `Save` button at the bottom right of the Napari viewer causes the program to append the current cluster labels to the single-cell dataframe and proceed to the next module.
 9 | 
10 | ### YAML configurations
11 | 
12 | | Parameter | Default | Description |
13 | | --- | --- | --- |
14 | | `embeddingAlgorithm` | "UMAP" | (str) Embedding algorithm to use for clustering (options: "TSNE" or "UMAP"). |
15 | | `channelExclusionsClustering` | [ ] | (list of strs) Immunomarkers to exclude from clustering. |
16 | | `samplesToRemoveClustering` | [ ] | (list of strs) Samples to exclude from clustering. |
17 | | `normalizeTissueCounts` | True | (bool) Make the number of cells per tissue for clustering more similar through sample-weighted random sampling. |
18 | | `fracForEmbedding` | 1.0 | (float) Fraction of cells to be embedded (range: 0.0-1.0). Limits amount of data passed to downstream modules. |
19 | | `dimensionEmbedding` | 2 | (int) Dimension of the embedding (options: 2 or 3). |
20 | | `colormapAnnotationClustering` | "Sample" | (str) Metadata annotation to colormap the embedding: Sample or Condition. |
21 | | `metric` | "euclidean" | (str) Distance metric for computing embedding. Choose from valid metrics used by scipy.spatial.distance.pdist: "braycurtis", "canberra", "chebyshev", "cityblock", "correlation", "cosine", "dice", "euclidean", "hamming", "jaccard", "jensenshannon", "kulsinski", "mahalanobis", "matching", "minkowski", "rogerstanimoto", "russellrao", "seuclidean", "sokalmichener", "sokalsneath", "sqeuclidean", "yule". |
22 | | `perplexity` | 50.0 | (float) This is a [tSNE-specific configuration](https://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.html) related to the number of nearest neighbors used in other manifold learning algorithms. Larger datasets usually require larger perplexity. Different values can result in significantly different results. |
23 | | `earlyExaggeration` | 12.0 | (float) This is a [tSNE-specific configuration](https://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.html). For larger values, the space between natural clusters will be larger in the embedded space. |
24 | | `learningRateTSNE` | 200.0 | (float) This is a [tSNE-specific configuration](https://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.html). tSNE learning rate (typically between 10.0 and 1000.0). |
25 | | `randomStateTSNE` | 5 | (int) This is a [tSNE-specific configuration](https://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.html). It determines the random number generator for reproducible results across multiple function calls. |
26 | | `nNeighbors` | 6 | (int) This is a [UMAP-specific configuration](https://umap-learn.readthedocs.io/en/latest/api.html). It determines the size of local neighborhood (in terms of number of neighboring sample points) used for manifold approximation. Larger values result in more global views of the manifold, while smaller values result in more local data being preserved. In general values should be in the range 2 to 100. |
27 | | `learningRateUMAP` | 1.0 | (float) This is a [UMAP-specific configuration](https://umap-learn.readthedocs.io/en/latest/api.html). It Determines the initial learning rate for the embedding optimization. |
28 | | `minDist` | 0.1 | (float) This is a [UMAP-specific configuration](https://umap-learn.readthedocs.io/en/latest/api.html). Determines the effective minimum distance between embedded points. Smaller values will result in a more clustered/clumped embedding where nearby points on the manifold are drawn closer together, while larger values will result on a more even dispersal of points. The value should be set relative to the spread value, which determines the scale at which embedded points will be spread out. |
29 | | `repulsionStrength` | 5.0 | (float) This is a [UMAP-specific configuration](https://umap-learn.readthedocs.io/en/latest/api.html). Determines the weighting applied to negative samples in low dimensional embedding optimization. Values higher than one will result in greater weight being given to negative samples. |
30 | | `randomStateUMAP` | 5 | (int) This is a [UMAP-specific configuration](https://umap-learn.readthedocs.io/en/latest/api.html). It determines the random number generator for reproducible results across multiple function calls. |
31 | 


--------------------------------------------------------------------------------
/docs/modules/clustermap.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | layout: default-cylinter
 3 | title: clustermap
 4 | nav_order: 13
 5 | parent: Modules
 6 | ---
 7 | 
 8 | 13\. `clustermap`: this is a fully automated module that computes clustered heatmaps of channel z-scores for clusters identified in the [clustering module]({{ site.baseurl }}/modules/clustering) which are saved into the `clustermap` subdirectory of the main CyLinter output directory.
 9 | 
10 | ### No YAML configurations
11 | 


--------------------------------------------------------------------------------
/docs/modules/curateThumbnails.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | layout: default-cylinter
 3 | title: curateThumbnails
 4 | nav_order: 15
 5 | parent: Modules
 6 | ---
 7 | 
 8 | 15\. `curateThumbnails`: this module is fully automated. It programmatically generates image patchs of cells drawn at random from each cluster identified in the [clustering module]({{ site.baseurl}}/modules/clustering) and each cell type defined in the [gating module]({{ site.baseurl}}/modules/gating) for visual review. The number of examples shown per cluster is adjusted using the `numThumbnails` parameter in `cylinter_config.yml`. The size of the image window areound the reference cells is controlled by the `squareWindowDimension` parameter in the same configuration file. A white pixel corresponding to the nuclear centroid of the example cell is shown in each image as a reference. Images can be saved with or without segmentation outlines superimposed by toggling the `segOutlines` parameter in configuration file. To facilitate interpretation, only the three most highly expressed protein markers are shown per cluster (based on channel z-scores. Image contrast settings defined in the [setContrast module]({{ site.baseurl }}/modules/setContrast) are applied to improve image appearance. Image galleries for each cluster and gate-based cell type class are saved to the `thumbnails` directory in the `clustering` subdirectory of the main CyLinter output directory. This path is `thumbnails/2d/frequency_stats` in the case of 2D clusterings and `thumbnails/3d/frequency_stats` in the case of 3D clusterings.
 9 | 
10 | ### YAML configurations
11 | 
12 | | Parameter | Default | Description |
13 | | --- | --- | --- |
14 | | `numThumbnails` | 25 | (int) Number of example cells per cluster to be curated. |
15 | | `windowSize` | 30 | (int) Number of pixels from the centroid of the reference cell in x and y dimensions. |
16 | | `segOutlines` | True | (bool) Whether to overlay cell segmentation outlines on thumbnail images. |
17 | 


--------------------------------------------------------------------------------
/docs/modules/cycleCorrelation.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | layout: default-cylinter
 3 | title: cycleCorrelation
 4 | nav_order: 5
 5 | parent: Modules
 6 | ---
 7 | 
 8 | 5\. `cycleCorrelation`: this module is relevant to cyclic imaging technologies (e.g., CyCIF, CODEX, mIHC) and is designed to remove cells that have shifted or become detached from the microscope slide over multi-cycle imaging studies, as these cells appear negative for all markers after the movement or loss event. Similar to the `intensityFilter` and `areaFilter` modules, users will gate on interactive histogram widgets of per-cell signals. However, the histograms in this module represent the log<sub>10</sub>-transformed ratio of DNA intensities between the first and last imaging cycles (log<sub>10</sub>[cycle<sub>1</sub>/cycle<sub>n</sub>]). Lower and upper cutoff sliders are adjusted to select cells with highly-correlated signals (typically at or around zero, as log<sub>10</sub>[1/1] = 0). Like in the `intensityFilter` and `areaFilter` modules, Gaussian mixture modeling (GMM) is used to identify initial default cutoffs that can be manually refined. Once lower and upper cutoffs are adjusted, users can visualize selected cells in their corresponding image by clicking the `Plot Points` button. DNA channels for the first and last imaging cycles are shown for reference to visualize cells that have shifted or become detached from the slide between the first and last imaging cycles. Data points between lower and upper cutoffs are carried forward into downstream analysis. Users will move to the next sample in the series by clicking the `Apply Gates and Move to Next Sample` button beneath the histogram. Users may jump between tissues in the series by entering the name of a given sample in the `Sample Name` field of the `Arbitrary Sample Selection` widget at the right of the Napari viewer to adjust thresholds of previously analyzed tissues. To re-define thresholds, remove the metadata associated with the target sample(s) from `cylinter_report.yml` located in the CyLinter output directory specified in `cylinter_config.yml` and re-run the `cycleCorrelation` module with `cylinter --module cycleCorrelation cylinter_config.yml`.
 9 | 
10 | 
11 | ### YAML configurations
12 | 
13 | | Parameter | Default | Description |
14 | | --- | --- | --- |
15 | | `numBinsCorrelation` | 50 | (int) Number of bins used to construct DNA<sub>1</sub>/DNA<sub>n</sub> histograms. |
16 | 


--------------------------------------------------------------------------------
/docs/modules/frequencyStats.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | layout: default-cylinter
 3 | title: frequencyStats
 4 | nav_order: 14
 5 | parent: Modules
 6 | ---
 7 | 
 8 | 14\. `frequencyStats`: this module is fully automated. It computes pairwise statistics for binary declarations specified in the [sampleMetadata]({{ site.baseurl}}/structure/#general-configurations) parameter of `cylinter_config.yml`. Test results are saved to a directory called `frequency_stats` in the clustering subdirectory of the main CyLinter output directory. This path is `clustering/2d/frequency_stats` in the case of 2D clusterings and `clustering/3d/frequency_stats` in the case of 3D clusterings.
 9 | 
10 | ### YAML configurations
11 | 
12 | | Parameter | Default | Description |
13 | | --- | --- | --- |
14 | | `controlGroups` | ["CANCER-FALSE"] | (list of strs) Corresponds to control groups for each binary declaration specified as the fourth elements of [sampleMetadata]({{ site.baseurl }}/workflow/input#general-configurations) values. |
15 | |`denominatorCluster` | null | (null or int) Cluster to be used as the denominator when computing cluster frequency ratios. Set to null first, then change to cluster number (int) to normalize cluster frequencies to a particular identified cluster if desired. |
16 | | `FDRCorrection` | False | (bool) Whether to compute p-values and false discovery rate (FDR)-corrected q-values (True) or compute uncorrected p-values only (False). |


--------------------------------------------------------------------------------
/docs/modules/gating.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | layout: default-cylinter
 3 | title: gating
 4 | nav_order: 11
 5 | parent: Modules
 6 | ---
 7 | 
 8 | 11\. `gating` (optional): this module allows users to classify cell types present in the datset using the [SYLARAS](https://www.sylaras.org/#details) approach to high-dimensional, single-cell gating[[1]](#1). In doing so, users assign a set of manual gating thresholds on a per-marker and per-sample basis using interactive scatter plots of marker (x-axis) x cell segmentation area (y-axis). Gated cells (i.e. those falling to the right of the gate) can be visualized as scatter points in their respective image channel by clicking the `Plot Points` button to confirm accurate gate placement. After an optimal gate has been identified, users will move to the next marker/sample combination in the series by clicking the `Apply Gate and Move to Next Sample` button beneath the scatter plot. If no gate selection is made, all cells in the current plot will be carried forward into downstream analysis. Users may jump between markers and tissues in the series by entering their names into respective fields in the `Arbitrary Sample/Marker Selection` widget at the bottom right of the Napari viewer and clicking the `Enter` button. This can allow for the adjustment of previously defined gates. PDFs showing scatter plots with superimposed gates are stored in the `gating` output directory as a reference which can be updated any time by entering the name of a specific marker in the `Marker Name` field and clicking the `Refresh PDF(s)` button at the bottom right of the Napari viewer; typing "ALL" into the `Marker Name` field will render gated scatter plots for all markers in the analysis. Gates may be re-defined, by removing the metadata associated with particular marker/sample combinations in `cylinter_report.yml` located in the CyLinter output directory specified in `cylinter_config.yml` and re-running the `gating` module with `cylinter --module gating cylinter_config.yml`.
 9 | 
10 | After all gates have been applied, signal intensities are automatically binarized according to the defined gating thresholds such that cells falling to the right of the gate are considered immunopositive, and those falling to the left of the gate are considered immunonegative. Unique Boolean vectors (i.e., binary phenotype profiles) emerging from this procedure are then mapped to biologically-meaningful cell types previously defined in the YAML configuration file (`cylinter_config.yml`). This module can be bypassed by toggling the `gating` parameter to `False` (see YAML configurations below).
11 | 
12 | ### YAML configurations
13 | 
14 | 
15 | | Parameter | Default | Description |
16 | | --- | --- | --- |
17 | | `gating` | "False" | (bool) Whether to perform SYLARAS-style gating on single-cell data |
18 | | `channelExclusionsGating` | [ ] | (list of strs) Immunomarkers to exclude from gating. |
19 | | `samplesToRemoveGating` | [ ] | (list of strs) Samples to exclude from gating. |
20 | | `vectorThreshold` | 100 | (int) vizualize Boolean vectors with cell counts >= vectorThreshold |
21 | | `classes` | Tumor: definition: [+pan-CK, +KI67, -aSMA, -CD45] subsets: [CDKN1A] | (dict) Boolean immunophenotype signatures. +marker = immunopositive , -marker = immunonegative |
22 | 
23 | ## References
24 | 
25 | <a id="1">[1]</a>
26 | Baker GJ. et al. [SYLARAS: A Platform for the Statistical Analysis and Visual Display of Systemic Immunoprofiling Data and Its Application to Glioblastoma](https://www.sciencedirect.com/science/article/pii/S2405471220302854). **Cell Systems** (2020)
27 | 
28 | 


--------------------------------------------------------------------------------
/docs/modules/index.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | layout: default-cylinter
 3 | title: Modules
 4 | nav_order: 6
 5 | has_children: true
 6 | ---
 7 | 
 8 | # Module list
 9 | 
10 | | Name | Purpose | Description/YAML Configurations |
11 | | :-- | :-- | :-- |
12 | | `aggregateData` | Combine feature tables | [Details]({{ site.baseurl }}/modules/aggregateData) |
13 | | `selectROIs` | Define tissue ROIs | [Details]({{ site.baseurl }}/modules/selectROIs) |
14 | | `intensityFilter` | Filter out-of-focus and counterstain oversaturated cells | [Details]({{ site.baseurl }}/modules/intensityFilter) |
15 | | `areaFilter` | Filter over- and under-segmented cells | [Details]({{ site.baseurl }}/modules/areaFilter) |
16 | | `cycleCorrelation` | Filter unstable cells | [Details]({{ site.baseurl }}/modules/cycleCorrelation) |
17 | | `logTransform` | Log10-transform immunomarker signals | [Details]({{ site.baseurl }}/modules/logTransform)
18 | | `pruneOutliers` | Filter channel outliers | [Details]({{ site.baseurl }}/modules/pruneOutliers) |
19 | | `metaQC` |  Reclassify cells according to QC status  | [Details]({{ site.baseurl }}/modules/metaQC)
20 | | `PCA` | Run principle component analysis | [Details]({{ site.baseurl }}/modules/PCA)
21 | | `setContrast` | Adjust image contrast settings | [Details]({{ site.baseurl }}/modules/setContrast)
22 | | `gating` | Identify cell states via manual thresholding | [Details]({{ site.baseurl }}/modules/gating)
23 | | `clustering` | Identify cell states via unsupervised clustering | [Details]({{ site.baseurl }}/modules/clustering)
24 | | `clustermap` | Visualize cell state protein expression | [Details]({{ site.baseurl }}/modules/clustermap)
25 | | `frequencyStats` | Compute cluster frequency statistics | [Details]({{ site.baseurl }}/modules/frequencyStats) |
26 | | `curateThumbnails` | Visualize example cells from each cluster | [Details]({{ site.baseurl }}/modules/curateThumbnails)
27 | 
28 | <!-- # Suggest a module
29 | The CyLinter team is collaborating with NCI-sponsored consortia (CSBC and PS-ON) to host hackathons to improve and automate existing methods for microscopy quality control like those instantiated by the CyLinter pipeline. CyLinter modules are also being added incrementally by a diverse developer community seeded by the NCI [Human Tissue Atlas Network](https://humantumoratlas.org/). See what modules are currently available [here]({{ site.baseurl }}/modules/index). Module suggestions can be made by posting to [https://forum.image.sc/](https://forum.image.sc/) and tagging your post with the `cylinter` tag. -->
30 | 


--------------------------------------------------------------------------------
/docs/modules/intensityFilter.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | layout: default-cylinter
 3 | title: intensityFilter
 4 | nav_order: 3
 5 | parent: Modules
 6 | ---
 7 | 
 8 | 3\. `intensityFilter`: out-of-focus cells and those oversaturated with nuclear counterstain introduce noise into image-derived, single-cell data. This is because out-of-focus cells tend to have unreliable immunomarker signals and oversaturated nuclei tend to be poorly segmented. In this module, users interact with histogram widgets of per-cell counterstain signal intensities to assign upper and lower bounds on DNA signal intensity. Gaussian mixture modeling (GMM) is used to identify default cutoffs that can be manually refined. Users can visualize cells falling between lower and upper cutoffs as scatter points in their respective image colored by DNA signal intensity by clicking the `Plot Points` button. Selected data points are then carried forward into downstream analysis. Users will move to the next sample in the series by clicking the `Apply Gates and Move to Next Sample` button beneath the histogram. Users may jump between tissues in the series by entering the name of a given sample in the `Sample Name` field of the `Arbitrary Sample Selection` widget at the right of the Napari viewer to adjust thresholds of previously analyzed tissues. To re-define thresholds, remove the metadata associated with the target sample(s) from `cylinter_report.yml` located in the CyLinter output directory specified in `cylinter_config.yml` and re-run the `intensityFilter` module with `cylinter --module intensityFilter cylinter_config.yml`.
 9 | 
10 | ### YAML configurations
11 | 
12 | | Parameter | Default | Description |
13 | | --- | --- | --- |
14 | | `numBinsIntensity` | 50 | (int) Number of bins used to construct DNA intensity histograms. |
15 | 


--------------------------------------------------------------------------------
/docs/modules/logTransform.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | layout: default-cylinter
 3 | title: logTransform
 4 | nav_order: 6
 5 | parent: Modules
 6 | ---
 7 | 
 8 | 6\. `logTransform`: this module performs log<sub>10</sub>-transformation of antibody marker signals and is fully automated.
 9 | 
10 | ### No YAML configurations
11 | 


--------------------------------------------------------------------------------
/docs/modules/metaQC.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | layout: default-cylinter
 3 | title: metaQC
 4 | nav_order: 8
 5 | parent: Modules
 6 | ---
 7 | 
 8 | 8\. `metaQC` (optional): this module helps control for curation bias by correcting for inaccuracies in ROI gating and data cutoff placement by performing unsupervised clustering on equal sized batches of clean (retained) and noisy (redacted) single-cell data using a combination of [UMAP](https://umap-learn.readthedocs.io/en/latest/) (or [t-SNE](https://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.html)) and [HDBSCAN](https://hdbscan.readthedocs.io/en/latest/api.html). Noisy cells clustering with predominantly clean cells are returned to the dataframe, while clean cells clustering with predominantly noisy clusters are dropped from the dataframe. After selecting a `Min Cluster Size (MCS)` value and clicking the `Cluster and Plot` button in the `Plot Single MCS` widget at the top right of the Napari viewer, users are presented with UMAP (or t-SNE) embeddings of cells colored by **1)** HDBSCAN cluster, **2)** QC status, **3)** reclassification status, and **4)** sample. Clustering is optimized by testing different `MCS`values: an HDBSCAN parameter that significantly effects the clustering result, see [HDBSCAN documentation](https://hdbscan.readthedocs.io/en/latest/api.html) for details. To assist in the identification of a stable clustering solution, a range of `min_cluster_size` values may be entered into the `Sweep MCS Range` widget at the right of the Napari viwer and the number of clusters associated with each `min_cluster_size` will be printed to the terminal window. Cells in the HDBSCAN plot can be lassoed and visualized in a given sample by pressing and holding the mouse (or track pad) button and drawing around cells of interest. The name of the sample of interest is then entered into the `Sample Name` field and the `View Lassoed Points` button is clicked. Selected cells will appear as scatter points in their corresponding image colored by the module used to filter them from the analysis. Using clean and noisy reclassification cutoff selectors, users can specify tolerance limits on the proportion of clusters composed of clean (`Reclass Clean`) and noisy (`Reclass Noisy`) data for clustering cells to be reclassified. Unclustered cells (i.e., cells with HDBSCAN cluster label -1) whose original QC status is clean are reclassified as noisy. 
 9 | 
10 | Clicking the `Save` button at the bottom right of the Napari viewer causes the program to reclassify the data according to the current clustering solution and reclassification cutoffs. After the first chunk of clean and noisy data has been reclassified, additional chunks are reclassified using the same UMAP, HDBSCAN, and reclassifiction parameters. To re-define clustering or reclassification cutoffs, remove the metadata associated with the metaQC module from `cylinter_report.yml` located in the CyLinter output directory specified in `cylinter_config.yml` and re-run the `metaQC` module with `cylinter --module metaQC cylinter_config.yml`. This module can be bypassed by toggling the `metaQC` parameter to `False` (see YAML configurations below). Regardless of the `metaQC` parameter setting, a pie chart showing the fraction of data redacted by each QC data filtration module (`selectROIs`, `intensityFilter`, `areaFilter`, `cycleCorrelation`, `pruneOutliers`) is saved to the output subdirectory for the `metaQC` module (`censored_by_stage.pdf`
11 | 
12 | ### YAML configurations
13 | 
14 | | Parameter | Default | Description |
15 | | --- | --- | --- |
16 | | `metaQC` | True | (bool) Whether to perform data reclassification based on unsupervised clustering results of combinations of clean and noisy (previously-redacted) data. |
17 | | `embeddingAlgorithmQC` | "UMAP" | (str) Embedding algorithm used for clustering (options: "TSNE" or "UMAP"). |
18 | | `channelExclusionsClusteringQC` | [ ] | (list of strs) Immunomarkers to exclude from clustering. |
19 | | `samplesToRemoveClusteringQC` | [ ] | (list of strs) Samples to exclude from clustering. |
20 | | `percentDataPerChunk` | 0.2 | (float) Fraction of data (range: 0.0-1.0) to undergo embedding and clustering per reclassifaction cycle. |
21 | | `colormapAnnotationQC` | "Sample" | (str) Metadata annotation to colormap the embedding: `Sample` or `Condition`. |
22 | | `metricQC` | "euclidean" | (str) Distance metric for computing embedding. Choose from valid metrics used by scipy.spatial.distance.pdist: "braycurtis", "canberra", "chebyshev", "cityblock", "correlation", "cosine", "dice", "euclidean", "hamming", "jaccard", "jensenshannon", "kulsinski", "mahalanobis", "matching", "minkowski", "rogerstanimoto", "russellrao", "seuclidean", "sokalmichener", "sokalsneath", "sqeuclidean", "yule". |
23 | | `perplexityQC` | 50.0 | (float) This is a tSNE-specific configuration (https://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.htmlRelated) related to the number of nearest neighbors used in other manifold learning algorithms. Larger datasets usually require larger perplexity. Different values can result in significantly different results. |
24 | | `earlyExaggerationQC` | 12.0 | (float) This is a tSNE-specific configuration (https://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.htmlRelated). For larger values, the space between natural clusters will be larger in the embedded space. |
25 | | `learningRateTSNEQC` | 200.0 | (float) This is a tSNE-specific configuration (https://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.htmlRelated). tSNE learning rate (typically between 10.0 and 1000.0). |
26 | | `randomStateQC` | 5 | (int) This is a tSNE-specific configuration (https://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.htmlRelated). It determines the random number generator for reproducible results across multiple function calls. |
27 | | `nNeighborsQC` | 5 | (int) This is a UMAP-specific configuration (https://umap-learn.readthedocs.io/en/latest/api.html). It determines the size of local neighborhood (in terms of number of neighboring sample points) used for manifold approximation. Larger values result in more global views of the manifold, while smaller values result in more local data being preserved. In general values should be in the range 2 to 100. |
28 | | `learningRateUMAPQC` | 1.0 | (float) This is a UMAP-specific configuration (https://umap-learn.readthedocs.io/en/latest/api.html). It Determines the initial learning rate for the embedding optimization. |
29 | | `minDistQC` | 0.1 | (float) This is a UMAP-specific configuration (https://umap-learn.readthedocs.io/en/latest/api.html). Determines the effective minimum distance between embedded points. Smaller values will result in a more clustered/clumped embedding where nearby points on the manifold are drawn closer together, while larger values will result on a more even dispersal of points. The value should be set relative to the spread value, which determines the scale at which embedded points will be spread out. |
30 | | `repulsionStrengthQC` | 5.0 | (float) This is a UMAP-specific configuration (https://umap-learn.readthedocs.io/en/latest/api.html). Determines the weighting applied to negative samples in low dimensional embedding optimization. Values higher than one will result in greater weight being given to negative samples. |
31 | 


--------------------------------------------------------------------------------
/docs/modules/pruneOutliers.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | layout: default-cylinter
 3 | title: pruneOutliers
 4 | nav_order: 7
 5 | parent: Modules
 6 | ---
 7 | 
 8 | 7\. `pruneOutliers`: cells affected by visual artifacts such as antibody aggregates or illumination aberrations appear as bright outliers in affected channels. Conversely, image background subtraction can have the unintended consequence of creating cells with signal intensities at or below zero that, on image clipping and log-transformation, are far lower than values associated with biologically relevant signals. Both of this scenarios can significantly impact data interpration. In this module, users remove any residual channel outliers from tissues not captured by the `selectROIs` module (e.g., small antibody aggregates) by applying lower and upper percentile cutoffs on marker intensity. Scatter plots (or hexbins, see YAML configurations below) are used to visualize channel-specific intensity distributions before and after cutoffs are applied. Marker intensites are plotted against cell segmentation area which is used as a dumby variable to create 2D plots so that small numbers of outliers can be easily detected. Post-cutoff distributions are shown on a normalized (0-1) x-axis. By entering the name of a given sample in the `Sample Name` field and clicking the `view Outliers` button, users can visualize dim and bright outliers as scatter points (dim = magenta, bright = cyan) in their respective image channels. Users will move to the next channel in the series by clicking the `Apply Cutoffs and Move to Next Marker` button beneath the plots. Note that cells are dropped from the marker channels in an ordered series. Thus, users can elect to re-start outlier removal from a given marker by entering the name of the target channel in the `Re-start from Marker` field and clicking the enter button, but must re-curate outliers in all subsequent channels as well. If no cutoffs are applied for a given marker, all cells in the plots will be carried forward into the analysis of the subsequent marker. To re-define percentile cutoffs, remove the metadata associated with the target channel(s) from `cylinter_report.yml` located in the CyLinter output directory specified in `cylinter_config.yml` and re-run the `pruneOutliers` module with `cylinter --module pruneOutliers cylinter_config.yml`.
 9 | 
10 | ### YAML configurations
11 | 
12 | | Parameter | Default | Description |
13 | | --- | --- | --- |
14 | | `hexbins` | False | (bool) Whether to use hexbins (True) or scatter plots (False) to plot single-cell signal intensities. Scatter plots allow for higher resolution, but may lead to long rendering times with large datasets.|
15 | | `hexbinGridSize` | 20 | (int) The number of hexagons in the x-direction; higher values increase bin resolution. The number of hexagons in the y-direction is chosen such that the hexagons are approximately regular. |
16 | 


--------------------------------------------------------------------------------
/docs/modules/selectROIs.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | layout: default-cylinter
 3 | title: selectROIs
 4 | nav_order: 2
 5 | parent: Modules
 6 | ---
 7 | 
 8 | {: .no_toc }
 9 | 
10 | <details open markdown="block">
11 |   <summary>
12 |     Table of contents
13 |   </summary>
14 |   {: .text-delta }
15 | 1. TOC
16 | {:toc}
17 | </details>
18 | 
19 | 2\. `selectROIs`: [manual](#manual-roi-selection) and [automated](#automated-artifact-detection) tools are used to highlight regions of tissue affected by microscopy artifacts (e.g. illumination aberrations, slide debris, out-of-focus image tiles, mis-registered regions of tissue, etc.).
20 | 
21 | ### Manual ROI selection
22 | Regions of interest (ROIs) are manually drawn around artifacts by clicking on the `Manual ROI Selections (neg.)` image layer in the `layer list` at the left of the Napari viewer then clicking on one of the built-in polygon selection tools from the `layer controls` dock (i.e. circle, square, triangle, or lasso icons above the `layers list`). The mouse button (or track pad) is then clicked and held to outline an artifact in the image window. Clicking the escape key allows for additional ROIs to be drawn. Both positive and negative ROI selection methods are available (see `delint` configuration in `cylinter_config.yml` for details). In the case of negative selection (i.e. `delint=True`, default), cells in ROI boundaries are dropped from the analysis; negative selection is the preferred method for tissues exhibiting diffuse artifacts. Positive selection works best on samples exhibiting large regions of artifact-free tissue that can be highlighted by one or a few ROIs. Cells selected in this case are carried forward into downstream analysis. 
23 | 
24 | ### Automated Artifact Detection
25 | To supplement manual artifact curation, users can choose to run an automated artifact detection (AAD) algorithm on individual image channels by selecting the target channel from the pulldown menu in the `Automated Artifact Detection` widget at the right of the Napari window and clicking the `Compute Artifact Mask` button. Translucent white artifact masks will then appear over regions of tissue that the model flags as putative artifacts. When the `auto` box is checked, the model is run using a reasonable default sensitivity parameter. Sensitivity of the algorithm can be adjusted manually by changing the value in the spinbox labeled `Sensitivity`. Each time the algorithm is run on a given channel it adds two layers to the `layers list` at the left of the Napari viewer. The first layer shows the artifact masks. The second layer shows the seed points corresponding to the different artifacts in the image. Seed points are not visible by default, but can be toggled on by clicking the eye icon shown in the `Artifact Seeds` layer. Individual seed points (and their corresponding artifact masks) can be modified or removed from a given channel by highlighting the `Artifact Seeds` layer, selecting the `arrow icon` in the `layers control` dock to enable point selection mode, and pressing and holding the mouse button to drag over the target seed point to highlight it. Once highlighted, users can fine-tune the artifact mask associated with the seed by changing the `Tolerance` value in the '`Fine-tuning` widget at the right of the Napari viewer or delete the seed entirely by clicking the `x` button in the `layer controls` dock. These AAD tailoring features are designed to give users flexibility over automated artifact masks without the need to re-run the AAD algorithm.
26 | 
27 | ### Workflow
28 | Once all ROIs for a given sample have been generated, users will move to the next sample in the series by clicking the `Apply ROI(s) and Move to Next Sample` button at the top right of the Napari window. If no ROIs are drawn for a given sample, all cells in that tissue will be carried forward into downstream analysis. Users may also jump between samples by entering the name of a given sample in the `Sample Name` field at the right of the Napri viewer to add, delete, or modify manual or automated ROIs of previously analyzed samples or refer to arbitrary tissues in the curation of ROIs for a given samples. ROIs can be added, removed, or modifiedby re-running the `selectROIs` module.
29 | 
30 | ### YAML configurations
31 | 
32 | | Parameter | Default | Description |
33 | | --- | --- | --- |
34 | | `delintMode` | False | (bool) Whether to drop (True; negative selection) or retain (False; positive selection) cells selected by ROIs. |
35 | | `showAbChannels` | True | (bool) Whether to show all immunomarker channels (True) when Napari is open (may be memory limiting) or show only cycle 1 DNA (False). |
36 | | `samplesForROISelection` | [ ] | (list of strs) Sample names for ROI selection specified according to the first elements of [sampleMetadata]({{ site.baseurl }}/structure/#general-configurations) configuration.
37 | | `autoArtifactDetection` | True | (bool) Whether to display tools for automated artifact detection in Napari window. |
38 | | `artifactDetectionMethod` | "classical" | (str) Algorithm used for automated artifact detection (current option: "classical"). Deep learning method currently under development.


--------------------------------------------------------------------------------
/docs/modules/setContrast.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | layout: default-cylinter
 3 | title: setContrast
 4 | nav_order: 10
 5 | parent: Modules
 6 | ---
 7 | 
 8 | 10\. `setContrast`: in this module, image channel contrast is adjusted using the `contrast limits` slider bar in the `layer controls` dock at the top left of the Napari viewer. For each channel, contrast limits are set on a reference image whose median channel value is nearest to the 85th quantile of tissues in the batch which are applied to that image channel for all tissues in a batch. The 85th quantile (not 100th) is chosen to avoid picking tissue whose channel intensity is drive by bright artifacts outliers sample. The lower slider of the `contrast limits` slider bar is used to reduce background signal intensities by sliding to the right, while the upper slider is used to increase channel gain by sliding to the left. Once lower and upper sliders have been adjusted on the reference sample, the fit can be checked against other tissues in the batch by entering their name in the `Sample Name` field the `Arbitrary Sample Selection` widget at the right of the Napari viewer and clicking the `Enter` button. Clicking the `Apply Limits and Move to Next Channel` button causes the module to move to the next channel for contrast adjustment. To re-define contrast settings, simply re-run the `setContrast` module with `cylinter --module setContrast cylinter_config.yml`.
 9 | 
10 | <!-- Once contrast limits have been defined they will automatically be applied to any module in which image channels are shown (e.g., `selectROIs`, `gating` and `curteThumbnails`, etc.)  -->
11 | 
12 | 
13 | ### No YAML configurations
14 | 


--------------------------------------------------------------------------------
/docs/run/index.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | layout: default-cylinter
 3 | title: Running CyLinter
 4 | nav_order: 3
 5 | has_children: false
 6 | ---
 7 | 
 8 | # Running CyLinter
 9 | 
10 | ## Step 1:
11 | Ensure that the desired configurations for a given analysis have been set in CyLinter's [YAML configuration file]({{ site.baseurl }}/structure/index#yaml-configuration-file). A copy of this file can be found in the virtual environment into which CyLinter was installed (`.../miniconda3/envs/cylinter/lib/pythonXX/site-packages/cylinter/cylinter_config.yml`)
12 | 
13 | ## Step 2:
14 | Activate the CyLinter virtual environment:
15 | 
16 | ``` bash
17 | conda activate cylinter
18 | ```
19 | 
20 | ## Step 3:
21 | Execute the program from the beginning of the pipeline by passing the YAML configuration file (which should be stored at the top level of the CyLinter [input directory]({{ site.baseurl }}/structure/index)) to the `cylinter` command:  
22 | 
23 | ``` bash
24 | cylinter <input_dir>/cylinter_config.yml
25 | ```
26 | 
27 | CyLinter bookmarks progress by automatically caching partially-redacted spatial feature tables in the `checkpoints/` directory of the CyLinter [output directory]({{ site.baseurl }}/workflow/index). To re-run any of the [Modules]({{ site.baseurl }}/modules/index), pass the `--module` flag followed by the name of a specific module:
28 | 
29 | ``` bash
30 | cylinter --module <module-name> <input_dir>/cylinter_config.yml
31 | ```
32 | 


--------------------------------------------------------------------------------
/docs/structure/index.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | layout: default-cylinter
 3 | title: Input File Structure
 4 | nav_order: 5
 5 | has_children: false
 6 | ---
 7 | 
 8 | {: .no_toc }
 9 | 
10 | <details open markdown="block">
11 |   <summary>
12 |     Table of contents
13 |   </summary>
14 |   {: .text-delta }
15 | 1. TOC
16 | {:toc}
17 | </details>
18 | 
19 | # Input directory structure
20 | 
21 | CyLinter can analyze any multiplex imaging data so long as they conform to the expected file formats and folder structure. In the below example, `<sample#>` corresponds to the name of a particular tissue sample.
22 | 
23 | ``` bash
24 | <INPUT DIR>
25 | ├── cylinter_config.yml
26 | ├── csv/
27 | │   ├── <sample1>.csv
28 | │   └── <sample2>.csv
29 | ├── markers.csv
30 | ├── mask/
31 | │   ├── <sample1>.ome.tif (or .tif)
32 | │   └── <sample2>.ome.tif (or .tif)
33 | ├── seg/
34 | │   ├── <sample1>.ome.tif (or .tif)
35 | │   └── <sample2>.ome.tif (or .tif)
36 | └── tif/
37 |     ├── <sample1>.ome.tif (or .tif)
38 |     └── <sample2>.ome.tif (or .tif)
39 | ```
40 | 
41 | ## Note for MCMICRO users
42 | CyLinter can parse **whole slide image (WSI)** and **tissue microarray (TMA)** multiplex imaging data generated by the [MCMICRO](https://mcmicro.org) image-processing pipeline in their native file structure. In these cases, the [MCMICRO output directory](https://mcmicro.org/io.html#directory-structure) serves as the CyLinter input directory. 
43 | 
44 | 
45 | # YAML configuration file
46 | 
47 | `cylinter_config.yml` is the YAML configuration file passed to the `cylinter` command on program execution. It specifies general program configurations and module-specific parameters for a given analysis and should be stored in the top level CyLinter [input directory](#input-directory-structure). The `cylinter_config.yml` file downloaded with the program is pre-configured for use with [Example Data]({{ site.baseurl }}/exemplar) used to demonstrate CyLinter. On MacOS, this file is located here: `/Users/<user>/miniconda3/envs/cylinter/lib/python3.10/site-packages/cylinter/cylinter_config.yml`.
48 | 
49 | ## General configurations
50 | 
51 | | Parameter | Default | Description |
52 | | --- | --- | --- |
53 | | `inDir` | /Users/user/Desktop/cylinter_demo | CyLinter input directory; contains multi-channel image files (TIFF/OME-TIFF), segmentation outline files (TIFF/OME-TIFF), cell ID masks (TIFF/OME-TIFF), single-cell spatial feature tables (CSV), `cylinter_config.yml`, and `markers.csv` organized according to the [input directory structure](#input-directory-structure) or as native [MCMICRO output structure](https://mcmicro.org/io.html#directory-structure). |
54 | | `outDir` | /Users/user/Desktop/cylinter_demo/output | CyLinter output directory path; created on program execution. |
55 | | `sampleMetadata` | "Filename": <br />  ["15", "Glioblastoma", "GBM", "CANCER-TRUE", 1] | Sample metadata dictionary: keys = Filenames (str); values = list of strings. First elements: sample names (str, may differ from Filename). Second elements: descriptive text of experimental condition (str). Third elements: abbreviation of experimental condition (str). Fourth elements: comma-delimited string of arbitrary binary declarations for computing t-statistics between two groups of samples (str). Fifth elements: replicate number specifying biological or technical replicates (int). |
56 | | `samplesToExclude` | [ ] | (list of strs) Sample names (i.e., first elements in `sampleMetadata` values) to exclude from analysis. |
57 | | `markersToExclude` | [ ] | (list of strs) Markers to exclude from analysis (not including nuclear dyes). |
58 | 
59 | ## Module configurations
60 | For module-specific configuration settings, see [Modules]({{ site.baseurl }}/modules)
61 | 
62 | 
63 | # Markers.csv
64 | `markers.csv` is a standard input file into the MCMICRO image-processing pipeline also used by CyLinter to index marker channels in a batch multiplex images labeled with the same markers. The file takes the following format and must be included in the top level CyLinter [input directory](#input-directory-structure):
65 | 
66 | ```
67 | channel_number,cycle_number,marker_name
68 | 1,1,<DNA1>
69 | 2,1,<abx1>
70 | 3,1,<abx2>
71 | 4,1,<abx3>
72 | 5,2,<DNA2>
73 | 6,2,<abx4>
74 | 7,2,<abx5>
75 | 8,2,<abx6>
76 | .
77 | .
78 | .
79 | ```
80 | * Additional metadata columns may be present in the file, but are not currently read by CyLinter.
81 | 


--------------------------------------------------------------------------------
/docs/tutorials/#index.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | layout: default-cylinter
 3 | title: Tutorials
 4 | nav_order: 8
 5 | has_children: true
 6 | ---
 7 | 
 8 | # Tutorials
 9 | 
10 | ## Overview Video
11 | 
12 | {% include youtube.html id="fnxBvgJQmtY" autoplay=false mute=false controls=true loop=false related=false %}
13 | 
14 | A general introduction [video](https://www.youtube.com/watch?v=fnxBvgJQmtY) that provides a high-level overview of the pipeline.
15 | 
16 | ---
17 | 
18 | <div class="basic-grid">
19 | 
20 | <div markdown="1">
21 | ## Visual Guide
22 | This detailed [visual guide](pipeline-visual-guide.html) takes you through the steps performed by the MCMICRO pipeline as it processes [exemplar-002]({{ site.baseurl }}/datasets.html). The guide was developed using the open source [Minerva software](https://www.cycif.org/software/minerva), developed by the Laboratory of Systems Pharmacology.
23 | 
24 | [![]({{ site.baseurl }}/images/tutorials/vizguide.png)](pipeline-visual-guide.html)
25 | {: .mt-6 .mr-10}
26 | </div>
27 | 
28 | <div markdown="1">
29 | ## Installing Nextflow and MCMICRO Video
30 | This [tutorial video](https://youtu.be/tLWMe_uJY9A) walks you through installing Nextflow and MCMICRO, downloading exemplar images, and executing the pipeline on the Google Cloud Platform.
31 | 
32 | [![](https://img.youtube.com/vi/tLWMe_uJY9A/0.jpg)](https://youtu.be/tLWMe_uJY9A)
33 | {: .mt-6 .mr-10}
34 | </div>
35 | 
36 | </div><!-- end grid -->
37 | 
38 | ---
39 | 
40 | This [written guide](basics.html) provides an overview of basic concepts in tissue imaging, including how the data is collected and represented, image format standards, and the mandatory set of initial steps in image processing.
41 | 


--------------------------------------------------------------------------------
/docs/tutorials/adding.md:
--------------------------------------------------------------------------------
  1 | ---
  2 | layout: default-cylinter
  3 | title: Adding a module
  4 | nav_order: 3
  5 | parent: Tutorials
  6 | ---
  7 | 
  8 | # Adding a module
  9 | 
 10 | {: .no_toc }
 11 | 
 12 | <details open markdown="block">
 13 |   <summary>
 14 |     Table of contents
 15 |   </summary>
 16 |   {: .text-delta }
 17 | 1. TOC
 18 | {:toc}
 19 | </details>
 20 | 
 21 | MCMICRO allows segmentation and cell state caller modules to be specified dynamically. Adding new modules requires nothing more than editing a simple configuration file. No changes to the Nextflow codebase necessary!
 22 | 
 23 | ## Quick start
 24 | 
 25 | **Step 1.** Navigate to [https://github.com/labsyspharm/mcmicro/blob/master/config/modules.config](https://github.com/labsyspharm/mcmicro/blob/master/config/modules.config). Press the pencil in the top-right corner. This will fork the project to your own GitHub account and allow you to modify the file in your fork.
 26 | 
 27 | <img src="{{ site.baseurl }}/images/addmod/Step1.png"/>
 28 | 
 29 | **Step 2.** Add a new module by specifying all relevant fields (see below).
 30 | 
 31 | <img src="{{ site.baseurl }}/images/addmod/Step2.png"/>
 32 | 
 33 | **Step 3.** Briefly describe your new module. Provide a reference to the method and the codebase.
 34 | 
 35 | <img src="{{ site.baseurl }}/images/addmod/Step3.png"/>
 36 | 
 37 | **Step 4.** After MCMICRO developers review and test your proposed module, the changes will be merged into the main project branch.
 38 | 
 39 | # Input and output specs
 40 | 
 41 | Every module must have a command-line interface (CLI) that has been encapsulated inside a Docker container. 
 42 | MCMICRO assumes that CLI conforms to the following input-output specifications.
 43 | 
 44 | ## Segmentation modules
 45 | 
 46 | **Input:**
 47 | 
 48 | * A file in `.ome.tif` format containing a fully stitched and registered multiplexed image.
 49 | * (Optional) A file containing a custom model for the algorithm. The file can be in any format, and it is up to the module developer to decide what formats they allow from users.
 50 | 
 51 | **Output:**
 52 | 
 53 | * An image file in `.tif` format, written to `.` (i.e., the "current working directory"). The file can be either a probability map or a segmentation mask. The image channels in probability maps annotate each pixel with probabilities that it belongs to the background or different parts of the cell such as the nucleus, cytoplasm, cell membrane or the intercellular region. Similarly, segmentation masks annotate each pixel with an integer index of the cell it belongs to, or 0 if none.
 54 | * (Optional) One or more files written to `./qc/` (i.e., `qc/` subdirectory within the "current working directory"). These will be copied by the pipeline to the corresponding location in the [project's `qc/` directory]({{ site.baseurl }}/documentation/dir.html#quality-control).
 55 | 
 56 | ## Cell state calling modules
 57 | 
 58 | **Input:**
 59 | 
 60 | * A file in `.csv` format containing a [spatial feature table]({{ site.baseurl }}/documentation/dir.html#quantification). Each row in a table corresponds to a cell, while columns contain features characterizing marker expression or morphological properties.
 61 | * (Optional) A file containing a custom model for the algorithm. The file can be in any format, and it is up to the module developer to decide what formats they allow from users.
 62 | 
 63 | **Output:**
 64 | 
 65 | * One or more files in `.csv` or `.hdf5` format, written to `.` (i.e., the "current working directory"). Each file should annotate individual cells with the corresponding inferred cell state.
 66 | * (Optional) One or more files written to `./plots/` (i.e., `plots/` subdirectory within the "current working directory"). Each file can be in any format and contain any information that the module developer thinks will be useful to the user (e.g., UMAP plots showing how cells cluster together).
 67 | * (Optional) One or more files written to `./qc/` (i.e., `qc/` subdirectory within the "current working directory"). These will be copied by the pipeline to the corresponding location in the [project's `qc/` directory]({{ site.baseurl }}/documentation/dir.html#quality-control).
 68 | 
 69 | # Configuration
 70 | 
 71 | Adding a new MCMICRO module involves specifying simple key-value pairs in `config/modules.config`. For example, consider the following configuration for ilastik:
 72 | 
 73 | ```
 74 | [
 75 |   name      : 'ilastik',
 76 |   container : 'labsyspharm/mcmicro-ilastik',
 77 |   version   : '1.4.3',
 78 |   cmd       : 'python /app/mc-ilastik.py --output .',
 79 |   input     : '--input',
 80 |   model     : '--model',
 81 |   watershed : 'yes'
 82 | ]
 83 | ```
 84 | 
 85 | ## Name
 86 | 
 87 | The `name` of the module determines two things. First, it specifies the names of subdirectories for where the output files will be written to in the project directory. In the given example, the primary outputs will appear in `probability-maps/ilastik/`, while QC files will be written to `qc/ilastik/`. Second, the module name also tells MCMICRO what other parameters to look for. In our example, the pipeline will look for module specific parameters in `--ilastik-opts` and a custom model file in `--ilastik-model`.
 88 | 
 89 | ## Container and version
 90 | 
 91 | The two fields must uniquely identify a Docker container image containing the tool. Mechanistically, the fields are combined using the [standard `REPOSITORY:TAG` convention](https://docs.docker.com/engine/reference/commandline/images/).
 92 | 
 93 | ## Command
 94 | 
 95 | The `cmd` field must contain a command that, when executed inside the container, will produce the required set of outputs from the inputs provided to it by the pipeline.
 96 | 
 97 | **It is imperative that all primary outputs are written to `.` (i.e., the "current working directory"). MCMICRO will automatically sort outputs to their correct location in the project directory. Writing outputs to any other location may result in MCMICRO failing to locate them.**
 98 | 
 99 | ## Input
100 | 
101 | The `input` field determines how the pipeline will supply inputs to the module. Some examples in the context of [exemplar-001]({{ site.baseurl }}/datasets.html) may look as follows:
102 | 
103 | | Configuration | What MCMICRO will execute |
104 | | :-- | :-- |
105 | | <code>cmd   : 'python /app/tool.py -o .'<br>input : '-i' </code> | `python /app/tool.py -o . -i exemplar-001.ome.tif` |
106 | | <code>cmd   : 'python /app/tool.py -o .'<br>input : '--input' </code> | `python /app/tool.py -o . --input exemplar-001.ome.tif` |
107 | | <code>cmd   : 'python /app/tool.py -o .'<br>input : '' </code> | `python /app/tool.py -o . exemplar-001.ome.tif` |
108 | 
109 | ## (Optional) Model
110 | 
111 | The `model` field functions similarly to `input` and specifies how the pipeline will supply a custom model to the tool.
112 | 
113 | ## Watershed
114 | 
115 | The `watershed` field specifies whether the module requires a subsequent watershed step. Set it to `'yes'` for modules that produce probability maps and `'no'` for instance segmenters. Alternatively, you can specify `'bypass'` to have the output still go through S3Segmenter with the `--nucleiRegion bypass` flag. This will skip watershed but still allow you to filter nuclei by size with `--logSigma`.
116 | 
117 | ## Putting it all together
118 | 
119 | Given the above configuration for ilastik, users of MCMICRO can begin using the module by typing the following command:
120 | 
121 | ```
122 | nextflow run labsyspharm/mcmicro --in path/to/exemplar-001 \
123 |   --probability-maps ilastik \
124 |   --ilastik-opts '--num_channels 1' \
125 |   --ilastik-model myawesomemodel.ilp
126 | ```
127 | 
128 | As exemplar-001 makes its way through the pipeline, it will eventually encounter the [probability map generation and segmentation step]({{ site.baseurl }}/documentation/dir.html#segmentation). The pipeline will then identify ilastik as the module to be executed from the `--probability-maps` flag. The actual command that MCMICRO runs will then be composed using all the above fields together:
129 | 
130 | ```
131 | python /app/mc-ilastik.py --output . --input exemplar-001.ome.tif --model myawesomemodel.ilp --num_channels 1
132 | ```
133 | 
134 | # (Advanced) Automated tests
135 | 
136 | MCMICRO uses [GitHub Actions](https://docs.github.com/en/actions) to execute a set of automated tests on the [two exemplar images]({{ site.baseurl }}/datasets.html). The tests ensure that modifications to the pipeline don't break existing module functionality. When contributing a new module to MCMICRO, consider composing a new test that ensures your module runs on the exemplar data without any issues.
137 | 
138 | Automated tests are specified in [`ci.yml`](https://github.com/labsyspharm/mcmicro/blob/master/.github/workflows/ci.yml). The exemplar data is cached and can be easily restored via `actions/cache@v2`. For example, consider the following minimal test that contrasts unmicst and ilastik on exemplar-001:
139 | 
140 | ```
141 | test-ex001:
142 |     needs: setup
143 |     runs-on: ubuntu-latest
144 |     steps:
145 |       - uses: actions/checkout@v2
146 |       - name: Install Nextflow
147 |         run: curl -fsSL get.nextflow.io | bash
148 |       - name: Restore exemplar-001 cache
149 |         uses: actions/cache@v2
150 |         with:
151 |           path: ~/data/exemplar-001
152 |           key: mcmicro-exemplar-001
153 |       - name: Test exemplar-001
154 |         run: ./nextflow main.nf --in ~/data/exemplar-001 --probability-maps unmicst,ilastik --s3seg-opts '--probMapChan 0'
155 | ```
156 | 
157 | The test, named `test-ex001`, consists of three steps: 1) Installing nextflow, 2) Restoring exemplar-001 data from cache, and 3) Running the pipeline on the exemplar-001. The `needs:` field specifies that the test should be executed after `setup` (which verifies the existence of cached data and performs caching if it's missing).
158 | 
159 | 


--------------------------------------------------------------------------------
/docs/tutorials/exhibit.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "Images": [
  3 |         {
  4 |             "Name": "WORKAROUND-decouple-storyname-image-description"
  5 |         },
  6 |         {
  7 |             "Name": "i0",
  8 |             "Description": "",
  9 |             "Path": "https://s3.amazonaws.com/www.cycif.org/schapiro-mcmicro-2021/exemplar-002",
 10 |             "Width": 6197,
 11 |             "Height": 6231,
 12 |             "MaxLevel": 3
 13 |         }
 14 |     ],
 15 |     "PixelsPerMicron": 1.5385,
 16 |     "Name": "MCMICRO Exemplar-002", 
 17 |     "Header": "This visual guide takes you through a series of steps for deriving biological information from highly-multiplexed images using MCMICRO pipeline. To learn more about the pipeline and the exemplar-002 dataset, visit [MCMICRO website](https://mcmicro.org)\n\n![https://mcmicro.org/images/Fig1.png](https://mcmicro.org/images/Fig1.png)",
 18 |     "Footer": "<a href=\"https://mcmicro.org\" class=\"btn btn-secondary mt-5\" role=\"button\">Back to MCMICRO</a>",
 19 |     "FirstGroup": "Staining",
 20 |     "Rotation": 0,
 21 |     "Layout": {
 22 |         "Grid": [
 23 |             [
 24 |                 "i0"
 25 |             ]
 26 |         ]
 27 |     },
 28 |     "Stories": [
 29 |         {
 30 |             "Name": "",
 31 |             "Description": "",
 32 |             "Waypoints": [
 33 |                 {
 34 |                     "Name": "Raw Image Tiles (Level 1)",
 35 |                     "Description": "When performing highly-multiplexed whole slide imaging (WSI), data acquisition routinely produces thousands of multichannel image tiles. Although the stage positioning is rather robust in modern microscopes, further aligments of image tiles acquired within one cycle as well as across multiple cycles are still needed.\n\nWhen the tiles are stitched by relying on the microscope stage coordinates alone, with the red-green checkerboard pattern showing neighboring tiles, the stage movement errors are highly obvious in the yellow overlapping regions <a class=\"btn btn-primary btn-sm\" href=\"#s=1#w=0#g=0#m=-1#a=-100_-100#v=2.1534_0.1881_0.7653#o=-100_-100_1_1#p=Q\" role=\"button\">🔍</a>\n\nThe errors are even more pronounced when three DNA channels from subsequent cycles are overlayed using their stage positions <a class=\"btn btn-primary btn-sm\" href=\"#s=1#w=0#g=1#m=-1#a=-100_-100#v=2.1534_0.1881_0.7653#o=-100_-100_1_1#p=Q\" role=\"button\">🔍</a>\n\nThe first step in MCMICRO is to align the provided **raw image tiles (Level 1)** and correct uneven illuminations in each of the tiles. MCMICRO currently accepts [Bio-formats](https://www.openmicroscopy.org/bio-formats/) compatible image formats, along with [a .csv file containing channel names](https://mcmicro.org/step-input.html) as inputs and outputs a stitched-and-registered image <a class=\"btn btn-primary btn-sm\" href=\"#s=1#w=0#g=2#m=-1#a=-100_-100#v=2.1534_0.1881_0.7653#o=-100_-100_1_1#p=Q\" role=\"button\">🔍</a>",
 36 |                     "Arrows": [],
 37 |                     "Overlays": [],
 38 |                     "Group": "Before Stitching",
 39 |                     "Masks": [],
 40 |                     "ActiveMasks": [],
 41 |                     "Zoom": 0.5,
 42 |                     "Pan": [
 43 |                         0.5,
 44 |                         0.5
 45 |                     ]
 46 |                 },
 47 |                 {
 48 |                     "Name": "Whole-Slide Image (Level 2)",
 49 |                     "Description": "To produce the **whole-slide image (Level 2)** in OME-TIFF format, Individual image tiles are [corrected for illumination](https://www.nature.com/articles/ncomms14836), followed by simultaneous [tile stitching and registration across cycles](https://www.biorxiv.org/content/10.1101/2021.04.20.440625v1). After these pre-processing steps, channels from different cycles can be \"merged\" and visualized.",
 50 |                     "Arrows": [],
 51 |                     "Overlays": [],
 52 |                     "Group": "Staining",
 53 |                     "Masks": [],
 54 |                     "ActiveMasks": [],
 55 |                     "Zoom": 0.5,
 56 |                     "Pan": [
 57 |                         0.5,
 58 |                         0.5
 59 |                     ]
 60 |                 },
 61 |                 {
 62 |                     "Name": "TMA dearray",
 63 |                     "Description": "When working with Tissue Microarrays (TMAs), MCMICRO can identify and isolate individual cores using [Coreograph](https://mcmicro.org/coreograph.html). Each core will be written out into a standalone file to enable parallel downstream processing.",
 64 |                     "Arrows": [],
 65 |                     "Overlays": [],
 66 |                     "Group": "Staining",
 67 |                     "Masks": ["Dearray"],
 68 |                     "ActiveMasks": ["Dearray"],
 69 |                     "Zoom": 0.6,
 70 |                     "Pan": [
 71 |                         0.5,
 72 |                         0.5
 73 |                     ]
 74 |                 },
 75 |                 {
 76 |                     "Name": "Segmentation Probability Maps",
 77 |                     "Description": "Basic cell segmentation in MCMICRO consists of two steps. In the first step, [machine learning models](https://mcmicro.org/unmicst.html) are used to generate probability maps that annotate each pixel with probabilities that it belongs to background or different parts of the cell such as the nucleus, cytoplasm, cell membrane or the intercellular region. MCMICRO can executes multiple machine learning algorithms in parallel, allowing for a direct comparison of their outputs.",
 78 |                     "Arrows": [],
 79 |                     "Overlays": [],
 80 |                     "Group": "DNA",
 81 |                     "Masks": ["Cell Mask Outlines", "Probability Maps"],
 82 |                     "ActiveMasks": ["Probability Maps"],
 83 |                     "Zoom": 1,
 84 |                     "Pan": [
 85 |                         0.5,
 86 |                         0.5
 87 |                     ]
 88 |                 },
 89 |                 {
 90 |                     "Name": "Segmentation Masks (Level 3)",
 91 |                     "Description": "The second step in cell segmentation applies [watershed-like algorithms](https://mcmicro.org/s3seg.html) to probability maps produced by the first step. The resulting segmentation labelled masks assign each cell with a unique index number, where each pixel value adopts its cell's corresponding index number (background is assigned 0). MCMICRO generates labelled masks for nuclei, cytoplasm, and whole cell regions with matching indexed numbers to facilitate single cell analysis.",
 92 |                     "Arrows": [],
 93 |                     "Overlays": [],
 94 |                     "Group": "DNA",
 95 |                     "Masks": ["Cell Mask Outlines", "Probability Maps"],
 96 |                     "ActiveMasks": ["Cell Mask Outlines"],
 97 |                     "Zoom": 1,
 98 |                     "Pan": [
 99 |                         0.5,
100 |                         0.5
101 |                     ]
102 |                 },
103 |                 {
104 |                     "Name": "Spatial Feature Tables (Level 4)",
105 |                     "Description": "The final step in the MCMICRO pipeline is quantification, which utilizes the segmentation masks and the original image data to generate a spatial feature table. Each row in the table corresponds to an individual cell, while columns catalogue cell position, average marker expression, and morphological features.\n\nAs an example, the cell masks are colored using the mean intensities of CD3d in each cell.",
106 |                     "Arrows": [],
107 |                     "Overlays": [],
108 |                     "Group": "Staining",
109 |                     "Masks": ["CD3d Expression", "Cell Mask Outlines", "Probability Maps"],
110 |                     "ActiveMasks": ["CD3d Expression"],
111 |                     "Zoom": 1.292,
112 |                     "Pan": [
113 |                         0.6706,
114 |                         0.7835
115 |                     ]
116 |                 }
117 |             ]
118 |         }
119 |     ],
120 |     "Masks": [
121 |         {
122 |             "Name": "Cell Mask Outlines",
123 |             "Path": "mask/cellRingMask-outlines",
124 |             "Colors": [
125 |                 "ff00ff"
126 |             ],
127 |             "Channels": [
128 |                 "Cell Mask Outlines"
129 |             ]
130 |         },
131 |         {
132 |             "Name": "Probability Maps",
133 |             "Path": "mask/probability-maps",
134 |             "Colors": [
135 |                 "00ff00",
136 |                 "0000ff"
137 |             ],
138 |             "Channels": [
139 |                 "Nuclei contours probability",
140 |                 "Nuclei probability"
141 |             ]
142 |         },
143 |         {
144 |             "Name": "Dearray",
145 |             "Path": "mask/dearray-mask",
146 |             "Colors": [
147 |                 "1c9e77",
148 |                 "d96003",
149 |                 "7570b4",
150 |                 "e8298a"
151 |             ],
152 |             "Channels": [
153 |                 "TMA Core - 1",
154 |                 "TMA Core - 2",
155 |                 "TMA Core - 3",
156 |                 "TMA Core - 4"
157 |             ]
158 |         },
159 |         {
160 |             "Name": "CD3d Expression",
161 |             "Path": "mask/cd3d_expression_mask",
162 |             "Colors": [
163 |                 "0000aa"
164 |             ],
165 |             "Channels": [
166 |                 "CD3d Expression"
167 |             ]
168 |         }
169 |     ],
170 |     "Groups": [
171 |         {
172 |             "Name": "Before Stitching",
173 |             "Path": "ashlar_debug",
174 |             "Colors": [
175 |                 "ff0000",
176 |                 "00ff00"
177 |             ],
178 |             "Channels": [
179 |                 "Tiles",
180 |                 "Tiles"            
181 |             ]
182 |         },
183 |         {
184 |             "Name": "Before Registration",
185 |             "Path": "Before-stitching-and-registration_0__DNA-1--1__DNA-2--2__DNA-3",
186 |             "Colors": [
187 |                 "00ffff",
188 |                 "ffff00",
189 |                 "ff00ff"
190 |             ],
191 |             "Channels": [
192 |                 "DNA - 1",
193 |                 "DNA - 2",
194 |                 "DNA - 3"
195 |             ]
196 |         },
197 |         {
198 |             "Name": "Registered",
199 |             "Path": "Stitching_0__DNA--4__DNA-2--8__DNA-3",
200 |             "Colors": [
201 |                 "00ffff",
202 |                 "ffff00",
203 |                 "ff00ff"
204 |             ],
205 |             "Channels": [
206 |                 "DNA - 1",
207 |                 "DNA - 2",
208 |                 "DNA - 3"
209 |             ]
210 |         },
211 |         {
212 |             "Name": "Staining",
213 |             "Path": "Staining_0__DNA--13__CD163--14__CD3D--15__CD31--19__VDAC1--34__Pan-CK",
214 |             "Colors": [
215 |                 "ffffff",
216 |                 "ffff00",
217 |                 "0000ff",
218 |                 "ff0000",
219 |                 "00ff00",
220 |                 "f79209"
221 |             ],
222 |             "Channels": [
223 |                 "DNA ",
224 |                 "CD163",
225 |                 "CD3d",
226 |                 "CD31",
227 |                 "VDAC1",
228 |                 "Pan-CK"
229 |             ]
230 |         },
231 |         {
232 |             "Name": "DNA",
233 |             "Path": "Stitching_0__DNA--4__DNA-2--8__DNA-3",
234 |             "Colors": [
235 |                 "ffffff"
236 |             ],
237 |             "Channels": [
238 |                 "DNA"
239 |             ]
240 |         }
241 |     ]
242 | }


--------------------------------------------------------------------------------
/docs/tutorials/pipeline-visual-guide.html:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Pipeline Visual Guide
 3 | nav_order: 1
 4 | parent: Tutorials
 5 | ---
 6 | 
 7 | <!DOCTYPE html>
 8 | <html lang="en-US" class="h-100">
 9 | 
10 | <head>
11 |   <meta charset='utf-8'>
12 |   <meta http-equiv="X-UA-Compatible" content="IE=edge">
13 |   <meta name="viewport" content="width=device-width, initial-scale=1">
14 | </head>
15 | 
16 | <body>
17 |     <div id="minerva-browser"> </div>
18 |     <script defer src="https://use.fontawesome.com/releases/v5.2.0/js/all.js" integrity="sha384-4oV5EgaV02iISL2ban6c/RmotsABqE4yZxZLcYMAdG7FAPsyHYAPpywE9PJo+Khy" crossorigin="anonymous"></script>
19 |     <script src="https://api.html5media.info/1.2.2/html5media.min.js"></script>
20 |     <script src="https://cdn.jsdelivr.net/npm/amazon-cognito-identity-js@4.5.0/dist/amazon-cognito-identity.min.js"></script>
21 |     <script src="https://cdn.jsdelivr.net/npm/minerva-browser@2.15.5/build/bundle.js"></script>
22 |   <script>
23 |         const speech_bucket = "";
24 |         const authenticate = function(username, pass) {
25 | 
26 |           const authenticateUser = function(cognitoUser, authenticationDetails) {
27 |             return new Promise(function(resolve, reject) {
28 |               cognitoUser.authenticateUser(authenticationDetails, {
29 |                 onSuccess: result => resolve(result),
30 |                 onFailure: err => reject(err),
31 |                 mfaRequired: codeDeliveryDetails => reject(codeDeliveryDetails),
32 |                 newPasswordRequired: (fields, required) => reject({fields, required})
33 |               });
34 |             });
35 |           };
36 | 
37 |           return pass.then(function(password) {
38 | 
39 |             const minervaPoolId = 'us-east-1_d3Wusx6qp';
40 |             const minervaClientId = 'cvuuuuogh6nmqm8491iiu1lh5';
41 |             const minervaPool = new AmazonCognitoIdentity.CognitoUserPool({
42 |               UserPoolId : minervaPoolId,
43 |               ClientId : minervaClientId
44 |             });
45 | 
46 |             const cognitoUser = new AmazonCognitoIdentity.CognitoUser({
47 |               Username: username,
48 |               Pool: minervaPool
49 |             });
50 | 
51 |             const authenticationDetails = new AmazonCognitoIdentity.AuthenticationDetails({
52 |               Username: username,
53 |               Password: password
54 |             });
55 | 
56 |             return authenticateUser(cognitoUser, authenticationDetails)
57 |               .then(response => response.getIdToken().getJwtToken());
58 |           });
59 |         }
60 | 
61 |         window.viewer = MinervaStory.default.build_page({
62 |           hideWelcome: true,
63 |           authenticate: authenticate,
64 |           speech_bucket: speech_bucket,
65 |           exhibit: "exhibit.json",
66 |           id: "minerva-browser",
67 |           embedded: true,
68 |           homeUrl: "{{ site.baseurl }}"
69 |         });
70 |     </script>
71 | </body>
72 | </html>
73 | 


--------------------------------------------------------------------------------
/docs/workflow/index.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | layout: default-cylinter
 3 | title: Workflow
 4 | nav_order: 4
 5 | has_children: true
 6 | ---
 7 | 
 8 | # Workflow
 9 | 
10 | | Input Directory Structure (see [Input File Structure]({{ site.baseurl }}/structure/index) for details) | Output Directory Structure
11 | | :-- | :-- |
12 | | <code>INPUT_DIR<br>├── cylinter_config.yml<br>├── csv/<br>├── markers.csv<br>├── mask/<br>├── seg/<br>└── tif/<br></code> | <code>OUTPUT_DIR<br>├── area/<br>├── checkpoints/<br>├── cylinter_report.yml<br>├── clustering/<br>├── contrast/<br>├── cycles/<br>├── gating/<br>├── intensity/<br>├── metaQC/<br>├── PCA/<br>├── pruning/<br>└──  ROIs/<br></code>
13 | 
14 | <table>
15 |   <tr>
16 |     <td style="width: 70%;"> <img src="{{ site.baseurl }}/assets/images/ExtFig4.jpg" alt="CyLinter"/></td>
17 |     <td style="line-height: 1.1;"><font size="2.7"><b>Identifying and removing noisy single-cell data points with CyLinter.</b> <b>|</b> CyLinter input consists of multiplex microscopy files (OME-TIFF/TIFF) and their corresponding cell segmentation outlines (OME-TIFF/TIFF), cell ID masks (OME-TIFF/TIFF), and single-cell feature tables (CSV). <b>a</b>, Aggregate data (automated): raw spatial feature tables for all samples in a batch are merged into a single Pandas (Python) dataframe. <b>b</b>, ROI selection (interactive or automated): multi-channel images are viewed to identify and gate on regions of tissue affected by microscopy artefacts (negative selection mode) or areas of tissue devoid of artefacts (positive selection mode. <b>b1-b4</b>, Demonstration of automated artefact detection in CyLinter: <b>b1</b>, CyLinter’s selectROIs module showing artefacts in the CDKN1A (green) channel of a mesothelioma TMA core. <b>b2</b>, Transformed version of the original CDKN1A image such that artefacts appear as large, bright regions relative to channel intensity variations associated with true signal of immunoreactive cells which are suppressed. <b>b3</b>, Local intensity maxima are identified in the transformed image and a flood fill algorithm is used to create a pixel-level binary mask indicating regions of tissue affected by artefacts. In this example, the method identifies three artefacts in the image: one fluorescence aberration at the top of the core, and two tissue folds at the bottom of the core. <b>b4</b>, CyLinter’s selectROIs module showing the binary artefact mask (translucent gray shapes) and their corresponding local maxima (red dots) defining each of the three artefacts. <b>c</b>, DNA intensity filter (interactive): histogram sliders are used to define lower and upper bounds on nuclear counterstain single intensity. Cells between cutoffs are visualized as scatter points at their spatial coordinates in the corresponding tissue for gate confirmation or refinement. <b>d</b>, Segmentation area filter (interactive): histogram sliders are used to define lower and upper bounds on cell segmentation area (pixel counts). Cells between cutoffs are visualized as scatter points at their spatial coordinates in the corresponding tissue for gate confirmation or refinement. <b>e</b>, Cross-cycle correlation filter (interactive): applicable to multi-cycle experiments. Histogram sliders are used to define lower and upper bounds on the log-transformed ratio of DNA signals between the first and last imaging cycles. Cells between cutoffs are visualized as scatter points at their spatial coordinates in their corresponding tissues for gate confirmation or refinement. <b>f</b>, Log transformation (automated): single-cell data are log-transformed. <b>g</b>, Channel outliers filter (interactive): the distribution of cells according to antibody signal intensity is viewed for all sample as a facet grid of scatter plots (or hexbin plots) against cell area (y-axes). Lower and upper percentile cutoffs are applied to remove outliers. Outliers are visualized as scatter points at their spatial coordinates in their corresponding tissues for gate confirmation or refinement. <b>h</b>, MetaQC (interactive): unsupervised clustering methods (UMAP or TSNE followed by HDBSCAN clustering) are used to correct for gating bias in prior data filtration modules by thresholding on the percent of each cluster composed of clean (maintained) or noisy (redacted) cells. <b>i</b>, Principal component analysis (PCA, automated): PCA is performed and Horn’s parallel analysis is used to determine the number of PCs associated with non-random variation in the dataset. <b>j</b>, Image contrast adjustment (interactive): channel contrast settings are optimized for visualization on reference tissues which are applied to all samples in the cohort. <b>k</b>, Unsupervised clustering (interactive): UMAP (or TSNE) and HDBSCAN are used to identify unique cell states in a given cohort of tissues. Manual gating can also be performed to identify cell populations. <b>l</b>, Compute clustered heatmap (automated): clustered heatmap is generated showing channel z-scores for identified clusters (or gated populations). <b>m</b>, Compute frequency statistics (automated): pairwise t statistics on the frequency of each identified cluster or gated cell population between groups of tissues specified in CyLinter’s configuration file (cylinter_config.yml, e.g., treated vs. untreated, response vs. no response, etc.) are computed. <b>n</b>, Evaluate cluster membership (automated): cluster quality is checked by visualizing galleries of example cells drawn at random from each cluster identified in the clustering module (panel k).</font></td>
18 | 
19 | 
20 |     <!-- <td style="line-height: 1.1;"><font size="2.7"><b>Identifying and Removing Noisy Single-cell Data Points with CyLinter.</b> | <b>a-d</b>: CyLinter input: <b>a</b>, Multiplex microscopy file <b>b</b>, Cell segmentation outlines <b>c</b>, Cell ID mask <b>d</b>, Single-cell feature table. <b>e</b>, ROI selection module: multi-channel images are viewed to identify and gate on regions of tissue affected by microscopy artifacts (in the default negative selection mode). <b>f-i</b>, Demonstration of automated artifact detection in CyLinter. <b>f</b>, CyLinter’s selectROIs module showing artifacts in the CDKN1A (green) channel of EMIT TMA core 18 (mesothelioma). <b>g</b>, Transformed version of the original CDKN1A image such that artifacts appear as large, bright regions relative to channel intensity variations associated with true signal of immunoreactive cells which are suppressed. <b>h</b>, Local intensity maxima are identified in the transformed image and a flood fill algorithm is used to create a pixel-level binary mask indicating regions of tissue affected by artifacts. In this example, the method identifies three artifacts in the image: one fluorescence aberration at the top of the core, and two tissue folds at the bottom of the core. <b>i</b>, CyLinter’s selectROIs module showing the binary artifact mask (translucent gray shapes) and their corresponding local maxima (red dots) defining each of the three artifacts. <b>j</b>, DNA intensity filter: histogram sliders are used to define lower and upper bounds on nuclear counterstain single intensity. Cells between cutoffs are visualized as scatter points at their spatial coordinates in the corresponding tissue for gate confirmation or refinement. <b>k</b>, Cell segmentation area filter: histogram sliders are used to define lower and upper bounds on cell segmentation area (pixel counts). Cells between cutoffs are visualized as scatter points at their spatial coordinates in the corresponding tissue for gate confirmation or refinement. <b>l</b>, Cross-cycle correlation filter: applicable to multi-cycle experiments. Histogram sliders are used to define lower and upper bounds on the log-transformed ratio of DNA signals between the first and last imaging cycles. Cells between cutoffs are visualized as scatter points at their spatial coordinates in their corresponding tissues for gate confirmation or refinement. <b>m</b>, Channel outlier filter: the distribution of cells according to antibody signal intensity is viewed for all sample as a facet grid of scatter plots (or hexbin plots) against cell area (y-axes). Lower and upper percentile cutoffs are applied to remove outliers. Outliers are visualized as scatter points at their spatial coordinates in their corresponding tissues for gate confirmation or refinement. <b>n</b>, MetaQC module: unsupervised clustering methods (UMAP or TSNE followed by HDBSCAN clustering) are used to correct for gating bias in prior data filtration modules by thresholding on the percent of each cluster composed of clean (maintained) or noisy (redacted) cells. <b>o</b>, Unsupervised cluster methods (UMAP or TSNE followed by HDBSCAN) are used to identify unique cell states in a given cohort of tissues. <b>p</b>, Image contrast adjustment: channel contrast settings are optimized for visualization on reference tissue which are applied to all tissues in the cohort. <b>q</b>, Evaluate cluster membership: cluster quality is checked by visualizing galleries of example cells drawn at random from each cluster identified in the clustering module.</font></td> -->
21 |   </tr>
22 | </table>
23 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["poetry-core>=1.0.0"]
 3 | build-backend = "poetry.core.masonry.api"
 4 | 
 5 | [tool.poetry]
 6 | name = "CyLinter"
 7 | version = "0.0.50"
 8 | description = "CyLinter: An Interactive Image Segmentation Filter for Multiplex Microscopy"
 9 | readme = "README.md"
10 | license = "MIT"
11 | keywords =['CyLinter multiplex microscopy quality control']
12 | classifiers=[
13 |     'Development Status :: 4 - Beta',
14 |     'Intended Audience :: End Users/Desktop',
15 |     'Intended Audience :: Science/Research',
16 |     'Framework :: napari',
17 |     'License :: OSI Approved :: MIT License',
18 |     'Natural Language :: English',
19 |     'Operating System :: OS Independent',
20 |     'Programming Language :: Python :: 3',
21 |     'Topic :: Scientific/Engineering :: Visualization'
22 | ]
23 | authors = ["Gregory J. Baker <gregory_baker2@hms.harvard.edu>"]
24 | homepage = "https://github.com/labsyspharm/cylinter"
25 | 
26 | [tool.poetry.dependencies]
27 | cellcutter = "*"
28 | hdbscan = "*"
29 | joblib = "*"
30 | magicgui = "*"
31 | matplotlib = "<3.6"
32 | napari = { version = "*", extras = ["all"] }
33 | numpy = "*"
34 | natsort = "*"
35 | numba = "*"
36 | pandas = "*"
37 | pyarrow = "*"
38 | pyqt = "*"
39 | pyyaml = "*"
40 | qtpy = "*"
41 | scikit-image = "*"
42 | scikit-learn = "<=1.2.2"
43 | seaborn = "*"
44 | tifffile = "*"
45 | umap-learn = "*"
46 | zarr = "*"
47 | svglib = "*"
48 | pypdf2 = "*"
49 | imagecodecs = "*"
50 | opencv-python = "*"
51 | 
52 | [tool.poetry.scripts]
53 | cylinter = "cylinter.cylinter:main"
54 | 


--------------------------------------------------------------------------------
/recipe/meta.yaml:
--------------------------------------------------------------------------------
 1 | {% set name = "cylinter" %}
 2 | {% set version = "0.0.50" %}
 3 | 
 4 | package:
 5 |   name: "{{ name|lower }}"
 6 |   version: "{{ version }}"
 7 | 
 8 | source:
 9 |   git_url: https://github.com/labsyspharm/cylinter.git
10 |   git_tag: v0.0.50
11 | 
12 | build:
13 |   number: 0
14 |   script: "{{ PYTHON }} -m pip install . --no-deps --ignore-installed -vv "
15 |   entry_points:
16 |     - cylinter=cylinter.cylinter:main
17 |   noarch: python
18 | 
19 | requirements:
20 |   build:
21 |     - poetry
22 |   host:
23 |     - pip
24 |     - python
25 |     - poetry
26 |   run:
27 |     - python
28 |     - cellcutter
29 |     - hdbscan
30 |     - joblib
31 |     - magicgui
32 |     - matplotlib <3.6  # avoids segmentation faults when closing silhouette plot in clustering module
33 |     - napari
34 |     - numpy
35 |     - natsort
36 |     - numba
37 |     - pandas
38 |     - pyarrow
39 |     - pyqt
40 |     - pyyaml
41 |     - qtpy
42 |     - scikit-image
43 |     - scikit-learn <=1.2.2  # avoids InconsistentVersionWarning: Trying to unpickle estimator Pipeline from version 1.2.2 when using version 1.3.1. because artifact detection model was built using v1.2.2
44 |     - seaborn
45 |     - tifffile
46 |     - umap-learn
47 |     - zarr
48 |     - svglib
49 |     - pypdf2
50 |     - imagecodecs  # MIBI data fails to be read without this
51 |     - opencv
52 | 
53 | test:
54 |   imports:
55 |     - cylinter
56 | 
57 | about:
58 |   home: https://labsyspharm.github.io/cylinter/
59 |   license: MIT
60 |   license_family: BSD
61 |   summary: Quality Control Software for Multiplex Microscopy
62 | 


--------------------------------------------------------------------------------