├── .gitattributes
├── .gitignore
├── LICENSE
├── README.md
└── cellPLATO
    ├── .gitignore
    ├── README.md
    ├── cellPLATO
        ├── Btrack_cellPLATO_2024.ipynb
        ├── __init__.py
        ├── cellPLATO_StepByStep_main.ipynb
        ├── cellPLATO_StepByStep_trackmate.ipynb
        ├── data_processing
        │   ├── __init__.py
        │   ├── cell_identifier.py
        │   ├── cleaning_formatting_filtering.py
        │   ├── cleaning_formatting_filtering_remotefix.py
        │   ├── clustering.py
        │   ├── data_io.py
        │   ├── data_wrangling.py
        │   ├── dimensionality_reduction.py
        │   ├── load_trackmate.py
        │   ├── measurements.py
        │   ├── migration_calculations.py
        │   ├── pipelines.py
        │   ├── shape_calculations.py
        │   ├── statistics.py
        │   ├── time_calculations.py
        │   └── trajectory_clustering.py
        ├── initialization
        │   ├── __init__.py
        │   ├── btrack_config.json
        │   ├── config.py
        │   └── initialization.py
        └── visualization
        │   ├── __init__.py
        │   ├── cluster_visualization.py
        │   ├── comparative_visualization.py
        │   ├── filter_visualization.py
        │   ├── low_dimension_visualization.py
        │   ├── panel_apps.py
        │   ├── plots_of_differences.py
        │   ├── scatterplots.py
        │   ├── small_multiples.py
        │   ├── superplots.py
        │   ├── timecourse_visualization.py
        │   └── trajectory_visualization.py
    ├── environment.yml
    ├── environment_oldversion.yml
    ├── images
        └── cellPLATOlogo.png
    ├── requirements.txt
    ├── setup.py
    └── tests
        └── testing.py


/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | ## Ignore Visual Studio temporary files, build results, and
  2 | ## files generated by popular Visual Studio add-ons.
  3 | ##
  4 | ## Get latest from https://github.com/github/gitignore/blob/main/VisualStudio.gitignore
  5 | 
  6 | # User-specific files
  7 | *.rsuser
  8 | *.suo
  9 | *.user
 10 | *.userosscache
 11 | *.sln.docstates
 12 | 
 13 | # User-specific files (MonoDevelop/Xamarin Studio)
 14 | *.userprefs
 15 | 
 16 | # Mono auto generated files
 17 | mono_crash.*
 18 | 
 19 | # Build results
 20 | [Dd]ebug/
 21 | [Dd]ebugPublic/
 22 | [Rr]elease/
 23 | [Rr]eleases/
 24 | x64/
 25 | x86/
 26 | [Ww][Ii][Nn]32/
 27 | [Aa][Rr][Mm]/
 28 | [Aa][Rr][Mm]64/
 29 | bld/
 30 | [Bb]in/
 31 | [Oo]bj/
 32 | [Ll]og/
 33 | [Ll]ogs/
 34 | 
 35 | # Visual Studio 2015/2017 cache/options directory
 36 | .vs/
 37 | # Uncomment if you have tasks that create the project's static files in wwwroot
 38 | #wwwroot/
 39 | 
 40 | # Visual Studio 2017 auto generated files
 41 | Generated\ Files/
 42 | 
 43 | # MSTest test Results
 44 | [Tt]est[Rr]esult*/
 45 | [Bb]uild[Ll]og.*
 46 | 
 47 | # NUnit
 48 | *.VisualState.xml
 49 | TestResult.xml
 50 | nunit-*.xml
 51 | 
 52 | # Build Results of an ATL Project
 53 | [Dd]ebugPS/
 54 | [Rr]eleasePS/
 55 | dlldata.c
 56 | 
 57 | # Benchmark Results
 58 | BenchmarkDotNet.Artifacts/
 59 | 
 60 | # .NET Core
 61 | project.lock.json
 62 | project.fragment.lock.json
 63 | artifacts/
 64 | 
 65 | # ASP.NET Scaffolding
 66 | ScaffoldingReadMe.txt
 67 | 
 68 | # StyleCop
 69 | StyleCopReport.xml
 70 | 
 71 | # Files built by Visual Studio
 72 | *_i.c
 73 | *_p.c
 74 | *_h.h
 75 | *.ilk
 76 | *.meta
 77 | *.obj
 78 | *.iobj
 79 | *.pch
 80 | *.pdb
 81 | *.ipdb
 82 | *.pgc
 83 | *.pgd
 84 | *.rsp
 85 | *.sbr
 86 | *.tlb
 87 | *.tli
 88 | *.tlh
 89 | *.tmp
 90 | *.tmp_proj
 91 | *_wpftmp.csproj
 92 | *.log
 93 | *.tlog
 94 | *.vspscc
 95 | *.vssscc
 96 | .builds
 97 | *.pidb
 98 | *.svclog
 99 | *.scc
100 | 
101 | # Chutzpah Test files
102 | _Chutzpah*
103 | 
104 | # Visual C++ cache files
105 | ipch/
106 | *.aps
107 | *.ncb
108 | *.opendb
109 | *.opensdf
110 | *.sdf
111 | *.cachefile
112 | *.VC.db
113 | *.VC.VC.opendb
114 | 
115 | # Visual Studio profiler
116 | *.psess
117 | *.vsp
118 | *.vspx
119 | *.sap
120 | 
121 | # Visual Studio Trace Files
122 | *.e2e
123 | 
124 | # TFS 2012 Local Workspace
125 | $tf/
126 | 
127 | # Guidance Automation Toolkit
128 | *.gpState
129 | 
130 | # ReSharper is a .NET coding add-in
131 | _ReSharper*/
132 | *.[Rr]e[Ss]harper
133 | *.DotSettings.user
134 | 
135 | # TeamCity is a build add-in
136 | _TeamCity*
137 | 
138 | # DotCover is a Code Coverage Tool
139 | *.dotCover
140 | 
141 | # AxoCover is a Code Coverage Tool
142 | .axoCover/*
143 | !.axoCover/settings.json
144 | 
145 | # Coverlet is a free, cross platform Code Coverage Tool
146 | coverage*.json
147 | coverage*.xml
148 | coverage*.info
149 | 
150 | # Visual Studio code coverage results
151 | *.coverage
152 | *.coveragexml
153 | 
154 | # NCrunch
155 | _NCrunch_*
156 | .*crunch*.local.xml
157 | nCrunchTemp_*
158 | 
159 | # MightyMoose
160 | *.mm.*
161 | AutoTest.Net/
162 | 
163 | # Web workbench (sass)
164 | .sass-cache/
165 | 
166 | # Installshield output folder
167 | [Ee]xpress/
168 | 
169 | # DocProject is a documentation generator add-in
170 | DocProject/buildhelp/
171 | DocProject/Help/*.HxT
172 | DocProject/Help/*.HxC
173 | DocProject/Help/*.hhc
174 | DocProject/Help/*.hhk
175 | DocProject/Help/*.hhp
176 | DocProject/Help/Html2
177 | DocProject/Help/html
178 | 
179 | # Click-Once directory
180 | publish/
181 | 
182 | # Publish Web Output
183 | *.[Pp]ublish.xml
184 | *.azurePubxml
185 | # Note: Comment the next line if you want to checkin your web deploy settings,
186 | # but database connection strings (with potential passwords) will be unencrypted
187 | *.pubxml
188 | *.publishproj
189 | 
190 | # Microsoft Azure Web App publish settings. Comment the next line if you want to
191 | # checkin your Azure Web App publish settings, but sensitive information contained
192 | # in these scripts will be unencrypted
193 | PublishScripts/
194 | 
195 | # NuGet Packages
196 | *.nupkg
197 | # NuGet Symbol Packages
198 | *.snupkg
199 | # The packages folder can be ignored because of Package Restore
200 | **/[Pp]ackages/*
201 | # except build/, which is used as an MSBuild target.
202 | !**/[Pp]ackages/build/
203 | # Uncomment if necessary however generally it will be regenerated when needed
204 | #!**/[Pp]ackages/repositories.config
205 | # NuGet v3's project.json files produces more ignorable files
206 | *.nuget.props
207 | *.nuget.targets
208 | 
209 | # Microsoft Azure Build Output
210 | csx/
211 | *.build.csdef
212 | 
213 | # Microsoft Azure Emulator
214 | ecf/
215 | rcf/
216 | 
217 | # Windows Store app package directories and files
218 | AppPackages/
219 | BundleArtifacts/
220 | Package.StoreAssociation.xml
221 | _pkginfo.txt
222 | *.appx
223 | *.appxbundle
224 | *.appxupload
225 | 
226 | # Visual Studio cache files
227 | # files ending in .cache can be ignored
228 | *.[Cc]ache
229 | # but keep track of directories ending in .cache
230 | !?*.[Cc]ache/
231 | 
232 | # Others
233 | ClientBin/
234 | ~$*
235 | *~
236 | *.dbmdl
237 | *.dbproj.schemaview
238 | *.jfm
239 | *.pfx
240 | *.publishsettings
241 | orleans.codegen.cs
242 | 
243 | # Including strong name files can present a security risk
244 | # (https://github.com/github/gitignore/pull/2483#issue-259490424)
245 | #*.snk
246 | 
247 | # Since there are multiple workflows, uncomment next line to ignore bower_components
248 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
249 | #bower_components/
250 | 
251 | # RIA/Silverlight projects
252 | Generated_Code/
253 | 
254 | # Backup & report files from converting an old project file
255 | # to a newer Visual Studio version. Backup files are not needed,
256 | # because we have git ;-)
257 | _UpgradeReport_Files/
258 | Backup*/
259 | UpgradeLog*.XML
260 | UpgradeLog*.htm
261 | ServiceFabricBackup/
262 | *.rptproj.bak
263 | 
264 | # SQL Server files
265 | *.mdf
266 | *.ldf
267 | *.ndf
268 | 
269 | # Business Intelligence projects
270 | *.rdl.data
271 | *.bim.layout
272 | *.bim_*.settings
273 | *.rptproj.rsuser
274 | *- [Bb]ackup.rdl
275 | *- [Bb]ackup ([0-9]).rdl
276 | *- [Bb]ackup ([0-9][0-9]).rdl
277 | 
278 | # Microsoft Fakes
279 | FakesAssemblies/
280 | 
281 | # GhostDoc plugin setting file
282 | *.GhostDoc.xml
283 | 
284 | # Node.js Tools for Visual Studio
285 | .ntvs_analysis.dat
286 | node_modules/
287 | 
288 | # Visual Studio 6 build log
289 | *.plg
290 | 
291 | # Visual Studio 6 workspace options file
292 | *.opt
293 | 
294 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
295 | *.vbw
296 | 
297 | # Visual Studio 6 auto-generated project file (contains which files were open etc.)
298 | *.vbp
299 | 
300 | # Visual Studio 6 workspace and project file (working project files containing files to include in project)
301 | *.dsw
302 | *.dsp
303 | 
304 | # Visual Studio 6 technical files 
305 | *.ncb
306 | *.aps
307 | 
308 | # Visual Studio LightSwitch build output
309 | **/*.HTMLClient/GeneratedArtifacts
310 | **/*.DesktopClient/GeneratedArtifacts
311 | **/*.DesktopClient/ModelManifest.xml
312 | **/*.Server/GeneratedArtifacts
313 | **/*.Server/ModelManifest.xml
314 | _Pvt_Extensions
315 | 
316 | # Paket dependency manager
317 | .paket/paket.exe
318 | paket-files/
319 | 
320 | # FAKE - F# Make
321 | .fake/
322 | 
323 | # CodeRush personal settings
324 | .cr/personal
325 | 
326 | # Python Tools for Visual Studio (PTVS)
327 | __pycache__/
328 | *.pyc
329 | 
330 | # Cake - Uncomment if you are using it
331 | # tools/**
332 | # !tools/packages.config
333 | 
334 | # Tabs Studio
335 | *.tss
336 | 
337 | # Telerik's JustMock configuration file
338 | *.jmconfig
339 | 
340 | # BizTalk build output
341 | *.btp.cs
342 | *.btm.cs
343 | *.odx.cs
344 | *.xsd.cs
345 | 
346 | # OpenCover UI analysis results
347 | OpenCover/
348 | 
349 | # Azure Stream Analytics local run output
350 | ASALocalRun/
351 | 
352 | # MSBuild Binary and Structured Log
353 | *.binlog
354 | 
355 | # NVidia Nsight GPU debugger configuration file
356 | *.nvuser
357 | 
358 | # MFractors (Xamarin productivity tool) working folder
359 | .mfractor/
360 | 
361 | # Local History for Visual Studio
362 | .localhistory/
363 | 
364 | # Visual Studio History (VSHistory) files
365 | .vshistory/
366 | 
367 | # BeatPulse healthcheck temp database
368 | healthchecksdb
369 | 
370 | # Backup folder for Package Reference Convert tool in Visual Studio 2017
371 | MigrationBackup/
372 | 
373 | # Ionide (cross platform F# VS Code tools) working folder
374 | .ionide/
375 | 
376 | # Fody - auto-generated XML schema
377 | FodyWeavers.xsd
378 | 
379 | # VS Code files for those working on multiple tools
380 | .vscode/*
381 | !.vscode/settings.json
382 | !.vscode/tasks.json
383 | !.vscode/launch.json
384 | !.vscode/extensions.json
385 | *.code-workspace
386 | 
387 | # Local History for Visual Studio Code
388 | .history/
389 | 
390 | # Windows Installer files from build outputs
391 | *.cab
392 | *.msi
393 | *.msix
394 | *.msm
395 | *.msp
396 | 
397 | # JetBrains Rider
398 | *.sln.iml
399 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 Michael Shannon
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ![CPLogo](https://github.com/Michael-shannon/cellPLATO/blob/main/cellPLATO/images/cellPLATOlogo.png)
 2 | 
 3 | An unsupervised method for identifying cell behaviour in heterogeneous cell trajectory data
 4 | 
 5 | cellPLATO workflow:
 6 | 
 7 | 1. takes tracking and segmentation data as input
 8 | 2. measures morphology and migration at each timepoint for every cell
 9 | 3. clusters cells with similar morphology and migration using UMAP and HDBSCAN
10 | 4. measures the similarity of behavioural sequences for each cell over time, and clusters them to form 'trajectories of behaviour'
11 | 5. de-abstractifies the clustered behaviours using graphics of exemplar cells, readouts of plasticity and comparisons between conditions
12 | 
13 | ## Updates
14 | 
15 | Published in the Journal of Cell Science on the 24th of June, 2024 [here](https://journals.biologists.com/jcs/article/137/20/jcs261887/352628)
16 | 
17 | Movies for the paper can be found [here](https://drive.google.com/drive/folders/1wvCbWoywRdk0OWhcwwJhiIdAEz4yTKHt?usp=sharing)
18 | 
19 | On the 14th of May 2023 at Journal of Cell Science's 'Imaging Cell Dynamics' conference in Lisbon, we presented cellPLATO's UMAP and HDBSCAN module to produce a fingerprint of cell behaviours in a heterogeneous population. Go [here](https://drive.google.com/drive/folders/1_f2GmdqbaF15FyesgxnsotuAu_XGh10o?usp=sharing) to see the poster!
20 | 
21 | [![DOI](https://zenodo.org/badge/588728402.svg)](https://zenodo.org/badge/latestdoi/588728402)
22 | 
23 | ## Description
24 | 
25 | A Python data analysis package for time-lapse cell migration experiments written in collaboration with [Tyler Sloan](https://github.com/tsloan1377) at [Quorumetrix](https://github.com/Quorumetrix). Used after segmention (eg. [Cellpose](https://github.com/MouseLand/cellpose)) and tracking (eg. [Bayesian Tracker](https://github.com/quantumjot/btrack)) of large timelapse microscopy datasets, cellPLATO measures morphokinetic information about each cell per timepoint and automatically makes statistical plots (plots of difference in python, inspired by those in R by [Joachim Goedhart](https://github.com/JoachimGoedhart)). Users can pool/compare multiple replicates from multiple experimental conditions. Next, dimensionality reduction and cluster analysis is used to segregate cells into behavioural subtypes and produce a fingerprint for each condition (cells per behaviour subtype). Finally, exemplar cells are automatically selected and graphically displayed to disambiguate the nature of each quanitfied cell behavioural subtype.
26 | 
27 | ## Installation instructions
28 | 
29 | 1. Using anaconda terminal, cd to a directory where you want to install the software
30 | 2. Clone the repository onto your local machine: git clone 
31 | 3. cd to the folder that contains 'environment.yml' and type: conda env create -f environment.yml
32 | 4. Activate the environment: conda activate cellPLATO
33 | 5. Install the rest of the packages: pip install -e .
34 | 
35 | Known issues with installation:
36 | 
37 | If you get the ERROR: Could not build wheels for hdbscan, which is required to install pyproject.toml-based projects
38 | 
39 | Please check 1) you have C++ installed, 2) install hdbscan using 'conda install -c conda-forge hdbscan'
40 | 
41 | If matplotlib fails to install via pip for the same reason, please use:
42 | 
43 | conda install -c conda-forge matplotlib
44 | 
45 | ## How to use cellPLATO:
46 | 
47 | cellPLATO is made to be used downstream of cell segmentation and tracking, and can currently be used with several tracking methodologies. The default is btrack.
48 | 
49 | ### Step 1:
50 | 
51 | Organize your data into the following heirarchal format:
52 | 
53 | - 📁 **Master folder** `[Folder_path]`
54 |   - 🌿 **Condition_1** `[Experimental condition 1]`
55 |     - 🔄 **Rep_1** `[Experimental repeat 1]`
56 |       - 📄 `Replicate_1.h5`
57 |     - 🔄 **Rep_2**
58 |       - 📄 `Replicate_2.h5`
59 |     - 🔄 **Rep_n**
60 |       - 📄 `Replicate_n.h5`
61 |   - 🌿 **Condition_2**
62 |     - 🔄 **Rep_1**
63 |     - 🔄 **Rep_2**
64 |     - 🔄 **Rep_n**
65 |   - 🌿 **Condition_n**
66 |     - 🔄 **Rep_n**
67 | 
68 | 📁 represents the main folder or directory.
69 | 🌿 represents the condition folders.
70 | 🔄 represents the replicate folders.
71 | 📄 represents the individual H5 files containing the segmentations and tracks
72 | 
73 | ### Step 2:
74 | 
75 | Open the config.py file, and edit as directed. 
76 | 
77 | As a minimum fill in the master directory, experiments to include, pixel size and sampling interval.
78 | 
79 | Experiments to include getsf filled with the folder names of the conditions you are measuring:
80 | 
81 | EXPERIMENTS_TO_INCLUDE = ['Condition_1', 'Condition_2', 'Condition_n']
82 | 
83 | ### Step 3:
84 | 
85 | Run cellPLATO through Jupyter Notebooks. Choose the master notebook to run all of the analysis step by step.
86 | 
87 | 
88 | 
89 | 
90 | 


--------------------------------------------------------------------------------
/cellPLATO/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.pyc
 4 | 
 5 | # Distribution / packaging
 6 | *.egg-info/
 7 | 
 8 | # Notebook checkpoints
 9 | .ipynb_checkpoints
10 | 


--------------------------------------------------------------------------------
/cellPLATO/README.md:
--------------------------------------------------------------------------------
 1 | # cellPLATO: cell PLasticity Analysis TOol
 2 | 
 3 | A Python data analysis package for time-lapse cell migration experiments. Used in conjunction with Bayesian Tracker for automated cell tracking and segmentation, cellPLATO adds additional layers of analysis and visualization. This tool allows users to pool/compare multiple replicates from multiple experimental conditions, perform dimensionality reduction, and explore cell behavioural trajectories through physical and low-dimensional space.
 4 | 
 5 | ## Installation instructions
 6 | 
 7 | 1. Using anaconda terminal, cd to a directory where you want to install the software
 8 | 2. Clone the repository onto your local machine: git clone 
 9 | 3. cd to the folder that contains 'environment.yml' and type: conda env create -f environment.yml
10 | 4. Activate the environment: conda activate cellPLATO
11 | 5. Install the rest of the packages: pip install -e .
12 | 
13 | ## How to use cellPLATO:
14 | 
15 | cellPLATO is made to be used downstream of cell segmentation and tracking. We used cellpose and then bayesian tracker, with files organized as below in the 'file organization' section.
16 | 
17 | With jupyter notebook installed, type jupyter notebook in the terminal to begin, and select one of the notebooks to begin running cellPLATO.
18 | 
19 | ## Description: 
20 | 
21 | A collection of Jupyter notebooks allows user to process through the analysis step by step, or using pre-assembled pipelines.
22 | 
23 | All experimental constants and filepaths are contained within the config.py file. This will inform the active Python kernel where to find the data files (.h5) files, where to export plots, and ey parameters to control the analysis. Each time the analysis is run, it generates a time-stamped analysis output folder, with a copy of the config file as a record for future verification.
24 | 
25 | The experimental conditions and replicates are indicated in the config.py file as the EXPERIMENTS_TO_INCLUDE = []. The data_processing module will automatically extract the replicates from the following file folder structure:
26 | 
27 | my_data_path
28 |     condition 1
29 |         Replicate 1
30 |           Replicate 1.h5
31 |         Replicate 2
32 |           Replicate 2.h5
33 |         ...
34 |         Replicate n
35 |           Replicate n.h5
36 |      Condition 2
37 |         Replicate 1
38 |           Replicate 1.h5
39 |         Replicate 2
40 |           Replicate 2.h5
41 |         ...
42 |         Replicate n
43 |           Replicate n.h5
44 |      ...
45 |      Condition N
46 |         Replicate 1
47 |           Replicate 1.h5
48 |         ...
49 |         Replicate n
50 |           Replicate n.h5
51 |        
52 | 
53 | The data_processing submodule is designed to sequentially process the cell tracks and shape measurements from the btracker-generated h5 files, and combine them into a Pandas dataframe for further processing, filtering and visualization. 
54 | 
55 | The functionality of the subsequent processing steps are defined below;
56 | 
57 | Pre-preprocessed data is are combined into a single dataframe (comb_df), maininging labels for the Condition and replicate_ID. For plotting, optionally Condition_shortlabel is also used to have more succinct plot labels. The comb_df both cell shape and cell migration-related factors. 
58 | 
59 | At this stage, additional measurements are performed, such as the aspect ratio and Ripleys L and K. The factors are calibrated according to the micron_per_pixel ratio defined in the config.py file. Optionally, data are filtered upstream of dimensionality reduction. 
60 | 
61 | Next, the combined dataframe undergoes dimensionality reduction: initially PCA, followed by both tSNE and UMAP low-dimension embeddings. The lowD representations contain information about both the cell migration and shape characteristics of each cell, at each timepoint, and additional filtering steps following the dimensionality reduction steps are possible.
62 | 
63 | The low-dimensional embeddings are then clustered using hdbscan to automatically extract density-based clusters from the selected embedding. Cells at a given timepoint are clustered into distinct groups and provided a label for their group. 
64 | 
65 | 
66 | 
67 | 
68 | 


--------------------------------------------------------------------------------
/cellPLATO/cellPLATO/Btrack_cellPLATO_2024.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import glob\n",
 10 |     "import os\n",
 11 |     "\n",
 12 |     "import btrack\n",
 13 |     "import json\n",
 14 |     "from skimage import io\n",
 15 |     "import matplotlib.pyplot as plt\n",
 16 |     "from skimage.measure import regionprops\n",
 17 |     "from skimage.io import imread\n",
 18 |     "from btrack.dataio import HDF5FileHandler\n",
 19 |     "from btrack.constants import BayesianUpdates\n",
 20 |     "import numpy as np\n",
 21 |     "from pathlib import Path\n",
 22 |     "from skimage.io import imread\n",
 23 |     "from skimage.io import imread\n",
 24 |     "from skimage.util import montage"
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "code",
 29 |    "execution_count": null,
 30 |    "metadata": {},
 31 |    "outputs": [],
 32 |    "source": [
 33 |     "# Define a function to make the files into a numpy array\n",
 34 |     "\n",
 35 |     "def segmentation_arr(files):\n",
 36 |     "    \"\"\"Segmentation as numpy array.\"\"\"\n",
 37 |     "    \n",
 38 |     "    stack = []\n",
 39 |     "    for filename in files:\n",
 40 |     "        img = imread(filename)\n",
 41 |     "        stack.append(img)\n",
 42 |     "    return np.stack(stack, axis=0)"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "code",
 47 |    "execution_count": null,
 48 |    "metadata": {},
 49 |    "outputs": [],
 50 |    "source": [
 51 |     "## User changes MASTERPATH only. This the globs through and selects all mask_avg folders in condition folders for process >\n",
 52 |     "\n",
 53 |     "MASTERPATH = 'D://cellPLATO_test_data/TestSet/'\n",
 54 |     "\n",
 55 |     "PATHTOCONFIG = 'initialization/btrack_config.json'\n",
 56 |     "\n",
 57 |     "\n",
 58 |     "# CONFIG_FILE = datasets.cell_config()  # this is the default cell tracking config file\n",
 59 |     "\n",
 60 |     "# PATHTOCONFIG =\"D://GitHub_software_forallusers/BayesianTracker/BayesianTracker/models/cell_config_Michael_phase.json\"\n",
 61 |     "INPUT_FMT = 'cellpose2D' #cellpose #cellpose2D\n",
 62 |     "\n",
 63 |     "if INPUT_FMT == 'usiigaci':       \n",
 64 |     "    PATHCONDLIST = glob.glob(os.path.join(MASTERPATH, 'Condition*', '*_mask_avg'))\n",
 65 |     "    \n",
 66 |     "elif INPUT_FMT == 'cellpose':\n",
 67 |     "    PATHCONDLIST = glob.glob(os.path.join(MASTERPATH, 'Condition*'))\n",
 68 |     "    \n",
 69 |     "elif INPUT_FMT == 'cellpose2D':\n",
 70 |     "    PATHCONDLIST = glob.glob(os.path.join(MASTERPATH, 'Condition*', '*'))\n",
 71 |     "\n",
 72 |     "elif INPUT_FMT == 'cellpose3D':\n",
 73 |     "    PATHCONDLIST = glob.glob(os.path.join(MASTERPATH, 'Condition*', '*'))\n",
 74 |     "        \n",
 75 |     "elif INPUT_FMT == 'cellpose_skrt':\n",
 76 |     "    PATHCONDLIST = glob.glob(os.path.join(MASTERPATH,  '*' ))\n",
 77 |     "    \n",
 78 |     "print(PATHCONDLIST)\n",
 79 |     "\n",
 80 |     "# Display the PATHCONDLIST in a nice way\n",
 81 |     "for i in PATHCONDLIST:\n",
 82 |     "    print(i)\n"
 83 |    ]
 84 |   },
 85 |   {
 86 |    "cell_type": "code",
 87 |    "execution_count": null,
 88 |    "metadata": {},
 89 |    "outputs": [],
 90 |    "source": [
 91 |     "if INPUT_FMT == 'usiigaci':\n",
 92 |     "    scaling = (1., 1.)   \n",
 93 |     "elif INPUT_FMT == 'cellpose3D':\n",
 94 |     "    scaling = (3.45, 1., 1.)\n",
 95 |     "    # scaling = (1., 1.)\n",
 96 |     "elif INPUT_FMT == 'cellpose2D':    \n",
 97 |     "    scaling = (1., 1.)"
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "code",
102 |    "execution_count": null,
103 |    "metadata": {},
104 |    "outputs": [],
105 |    "source": [
106 |     "FEATURES = [\n",
107 |     "    'area',\n",
108 |     "    'bbox_area',\n",
109 |     "    'eccentricity',\n",
110 |     "    'equivalent_diameter',\n",
111 |     "    'extent',\n",
112 |     "    'filled_area',\n",
113 |     "    'major_axis_length',\n",
114 |     "    'minor_axis_length',\n",
115 |     "    'orientation',\n",
116 |     "    'perimeter',\n",
117 |     "    'solidity',\n",
118 |     "\n",
119 |     "]\n"
120 |    ]
121 |   },
122 |   {
123 |    "cell_type": "code",
124 |    "execution_count": null,
125 |    "metadata": {},
126 |    "outputs": [],
127 |    "source": [
128 |     "## Then extract the segmentation files iteratively, do tracking on them and save a h5 in place\n",
129 |     "\n",
130 |     "for PATH in PATHCONDLIST:\n",
131 |     "    \n",
132 |     "    ww = os.path.dirname(PATH) #wheretosavethefile\n",
133 |     "    basenameyeah = os.path.basename(PATH)\n",
134 |     "    \n",
135 |     "    # files = glob.glob(os.path.join(PATH, '*.png')) #Use this line for Usiigaci\n",
136 |     "    # files = glob.glob(os.path.join(PATH, '*_cp_masks_T????.tif')) #\n",
137 |     "    files = glob.glob(os.path.join(PATH, '*_cp_masks.tif')) #Use this line and the part below for Cellpose\n",
138 |     "\n",
139 |     "    for i in files:\n",
140 |     "        print(i)\n",
141 |     "\n",
142 |     "    stack = []\n",
143 |     "    stack = segmentation_arr(files)\n",
144 |     "    obj_from_arr = []\n",
145 |     "    obj_from_arr = btrack.utils.segmentation_to_objects(stack, scale=scaling, properties=tuple(FEATURES))\n",
146 |     "\n",
147 |     "    with btrack.BayesianTracker() as tracker:\n",
148 |     "\n",
149 |     "        # configure the tracker using a config file\n",
150 |     "        tracker.configure_from_file(PATHTOCONFIG)\n",
151 |     "        tracker.max_search_radius = 50\n",
152 |     "        tracker.verbose = True\n",
153 |     "\n",
154 |     "        tracker.tracking_updates = [\"MOTION\"] #update 2023\n",
155 |     "        tracker.features = FEATURES #update 2023 \n",
156 |     "        tracker.update_method = BayesianUpdates.EXACT #changed from EXACT\n",
157 |     "        # append the objects to be tracked\n",
158 |     "        tracker.append(obj_from_arr)\n",
159 |     "        # set the volume\n",
160 |     "        tracker.volume=((0, 2030), (0, 2030), (0, 100000.)) #changed for Batya\n",
161 |     "        # track them (in interactive mode)\n",
162 |     "        tracker.track_interactive(step_size=100)\n",
163 |     "        # generate hypotheses and run the global optimizer\n",
164 |     "        tracker.optimize()\n",
165 |     "        h5fileexport = os.path.join(ww, (basenameyeah + '_tracks.h5'))\n",
166 |     "        # export the tracks\n",
167 |     "        tracker.export(h5fileexport, obj_type='obj_type_1')\n",
168 |     "        # write the segmentation (you needed to add \"a\" rather than \"w\" here!)\n",
169 |     "        with HDF5FileHandler(h5fileexport, \"a\") as h:\n",
170 |     "            h.write_segmentation(stack)\n",
171 |     "            # make sure that we did this by checking that the data exists\n",
172 |     "            assert \"segmentation\" in h._hdf\n",
173 |     "        "
174 |    ]
175 |   }
176 |  ],
177 |  "metadata": {
178 |   "kernelspec": {
179 |    "display_name": "btrack-2023",
180 |    "language": "python",
181 |    "name": "python3"
182 |   },
183 |   "language_info": {
184 |    "codemirror_mode": {
185 |     "name": "ipython",
186 |     "version": 3
187 |    },
188 |    "file_extension": ".py",
189 |    "mimetype": "text/x-python",
190 |    "name": "python",
191 |    "nbconvert_exporter": "python",
192 |    "pygments_lexer": "ipython3",
193 |    "version": "3.7.12"
194 |   },
195 |   "orig_nbformat": 4
196 |  },
197 |  "nbformat": 4,
198 |  "nbformat_minor": 2
199 | }
200 | 


--------------------------------------------------------------------------------
/cellPLATO/cellPLATO/__init__.py:
--------------------------------------------------------------------------------
 1 | # from cellPLATO.cellPLATO.initialization.config_trackmate import *
 2 | from initialization.config import *
 3 | from initialization.initialization import *
 4 | 
 5 | from data_processing.cell_identifier import *
 6 | from data_processing.cleaning_formatting_filtering import *
 7 | from data_processing.clustering import *
 8 | from data_processing.data_io import *
 9 | from data_processing.data_wrangling import *
10 | from data_processing.dimensionality_reduction import *
11 | from data_processing.measurements import *
12 | from data_processing.migration_calculations import *
13 | from data_processing.pipelines import *
14 | from data_processing.shape_calculations import *
15 | from data_processing.statistics import *
16 | from data_processing.time_calculations import *
17 | from data_processing.trajectory_clustering import *
18 | from data_processing.load_trackmate import *
19 | 
20 | from visualization.comparative_visualization import *
21 | from visualization.cluster_visualization import *
22 | from visualization.filter_visualization import *
23 | from visualization.low_dimension_visualization import *
24 | # from visualization.panel_apps import *
25 | from visualization.plots_of_differences import *
26 | from visualization.small_multiples import *
27 | from visualization.superplots import *
28 | from visualization.timecourse_visualization import *
29 | from visualization.trajectory_visualization import *
30 | 
31 | print('Finished initializing cellPLATO')
32 | 


--------------------------------------------------------------------------------
/cellPLATO/cellPLATO/data_processing/__init__.py:
--------------------------------------------------------------------------------
 1 | # from initialization.config import *
 2 | # from initialization.initialization import *
 3 | 
 4 | # from data_processing.cell_identifier import *
 5 | # from data_processing.cleaning_formatting_filtering import *
 6 | # from data_processing.clustering import *
 7 | # from data_processing.data_io import *
 8 | # from data_processing.data_wrangling import *
 9 | # from data_processing.dimensionality_reduction import *
10 | # from data_processing.measurements import *
11 | # from data_processing.migration_calculations import *
12 | # from data_processing.pipelines import *
13 | # from data_processing.shape_calculations import *
14 | # from data_processing.statistics import *
15 | # from data_processing.time_calculations import *
16 | # from data_processing.trajectory_clustering import *
17 | #
18 | # from visualization.comparative_visualization import *
19 | # from visualization.cluster_visualization import *
20 | # from visualization.filter_visualization import *
21 | # from visualization.low_dimension_visualization import *
22 | # from visualization.panel_apps import *
23 | # from visualization.plots_of_differences import *
24 | # from visualization.small_multiples import *
25 | # from visualization.superplots import *
26 | # from visualization.timecourse_visualization import *
27 | # from visualization.trajectory_visualization import *
28 | 
29 | print('Finished initializing data_processing')
30 | 


--------------------------------------------------------------------------------
/cellPLATO/cellPLATO/data_processing/cell_identifier.py:
--------------------------------------------------------------------------------
  1 | #cell_identifier.py
  2 | # Functions for finding cells that meet a criteria, or random ones.
  3 | #test#
  4 | 
  5 | # from cellPLATO.cellPLATO.initialization.config_trackmate import *
  6 | from initialization.config import *
  7 | from initialization.initialization import *
  8 | 
  9 | import os
 10 | import numpy as np
 11 | import pandas as pd
 12 | 
 13 | def get_random_cell(df):
 14 | 
 15 |     # Select random row.
 16 |     i_row = np.random.randint(len(df))
 17 |     row = df.iloc[i_row]
 18 | 
 19 |     # Get sub_df for cell from random row
 20 |     cell_df = df[(df['Condition']==row['Condition']) &
 21 |                     (df['Replicate_ID']==row['Replicate_ID']) &
 22 |                     (df['particle']==row['particle'])]
 23 | 
 24 |     return cell_df
 25 | 
 26 | def get_cell_mean_variance(df,factor, sortby='mean'):
 27 | 
 28 |     '''
 29 |     Rank the cells in df with respect to their standard deviation of a given factor.
 30 |     Used to find example cells that show large changes in a specific factor over time.
 31 |     '''
 32 | 
 33 |     avg_list = []
 34 | 
 35 |     for rep in df['Replicate_ID'].unique():
 36 | 
 37 |         for cell_id in df[df['Replicate_ID'] == rep]['particle'].unique():
 38 | 
 39 |             # Create a unique cell identifier
 40 |             rep_ind = list(df['Replicate_ID'].unique()).index(rep)
 41 | 
 42 |             cell_uniq_ident = str(rep_ind) + '_' + str(int(cell_id))
 43 | 
 44 |             cell_df = df[(df['Replicate_ID']==rep) &
 45 |                             (df['particle']==cell_id)]
 46 | 
 47 |             avg_list.append((rep, cell_id, cell_uniq_ident, np.mean(cell_df[factor]),np.std(cell_df[factor])))
 48 | 
 49 |             df_inds = list(df.index[(df['Replicate_ID']==rep)
 50 |                            & (df['particle']==cell_id)])
 51 | 
 52 |             # Add unique ID back into the original dataframe
 53 |             df.at[df_inds,'uniq_id'] = cell_uniq_ident
 54 | 
 55 | 
 56 |     mean_std_df = pd.DataFrame(data=avg_list,columns=['rep', 'cell_id','cell_uniq_ident', 'mean','std'])
 57 | 
 58 |     if sortby=='mean':
 59 |         mean_std_df.sort_values(by='mean', ascending=False, inplace=True)
 60 | 
 61 |     elif sortby=='std':
 62 |         mean_std_df.sort_values(by='std', ascending=False, inplace=True)
 63 | 
 64 | 
 65 |     return mean_std_df
 66 | 
 67 | 
 68 | def get_cell_variance(df,factor):
 69 | 
 70 |     '''
 71 |     TEMP - TO DELETE.
 72 |     '''
 73 |     print('() is discontinued, use get_cell_mean_variance() instead.')
 74 | 
 75 | 
 76 | # Get that cell and confirm it has the same measured value.
 77 | def get_specific_cell(sum_df, full_df,nth):
 78 | 
 79 |     '''
 80 |     Having calculated the average and standard deviation for the factor of interest, find the specific cell from the main dataframe
 81 | 
 82 |     Input:
 83 |         sum_df: The dataframe containing summary measurements (ex: std)
 84 |         full_df: The full datafrme from which we want to extract an example cell
 85 |         nth: integer indicating which row of sum_df to extract the cell info.
 86 | 
 87 |     returns:
 88 |         cell_df: Section of full_df corresponding to the selected cell.
 89 |     '''
 90 | 
 91 |     this_rep = sum_df.iloc[nth]['rep']
 92 |     this_cell_id = sum_df.iloc[nth]['cell_id']
 93 |     this_std = sum_df.iloc[nth]['std']
 94 | 
 95 |     # Get sub_df for cell from random row
 96 |     cell_df = full_df[(full_df['Replicate_ID']==this_rep) &
 97 |                     (full_df['particle']==this_cell_id)]
 98 | 
 99 |     return cell_df
100 | 
101 | 
102 | 
103 | 
104 | def get_cell_id(cell_df):
105 | 
106 |     '''
107 |     For a given cell dataframe, return a string containing a unique identifier,
108 |     accounting for the condition, replicate and cell number.
109 |     '''
110 | 
111 |     assert len(np.unique(cell_df['particle'].values)) == 1, 'Should be only one cell in dataframe'
112 |     cell_number = cell_df['particle'].values[0]
113 | 
114 |     rep_label = int(cell_df['Rep_label'].values[0])
115 |     cond_label = cell_df['Cond_label'].values[0]
116 |     cid_str = str(cond_label)+ '_' + str(rep_label)+ '_' + str(int(cell_number) )
117 | 
118 |     return cid_str
119 | 
120 | 
121 | 
122 | 
123 | 
124 | 
125 | 
126 | 


--------------------------------------------------------------------------------
/cellPLATO/cellPLATO/data_processing/cleaning_formatting_filtering_remotefix.py:
--------------------------------------------------------------------------------
  1 | #cleaning_Labeling.py
  2 | 
  3 | from initialization.config import *
  4 | from initialization.initialization import *
  5 | 
  6 | import os
  7 | import numpy as np
  8 | import pandas as pd
  9 | 
 10 | import itertools
 11 | 
 12 | def clean_comb_df(df_in, deduplicate=True):
 13 | 
 14 |     '''
 15 |     A function with steps to clean up the comb_df
 16 |     to standardize the dataframe formatting for all the downstream processing steps
 17 | 
 18 |     '''
 19 | 
 20 |     df = df_in.copy()
 21 |     # Drop all-nan rows.
 22 | #     df.dropna(how='all', axis=1,inplace=True)
 23 | #     df.dropna(how='any', axis=0,inplace=True)
 24 | 
 25 |     if 'Replicate_ID' not in df.columns:
 26 |         print('No column Replicate_ID, renaming Experiment column')
 27 | 
 28 |         df.rename(columns = {'Experiment': 'Replicate_ID'}, inplace=True)
 29 | 
 30 |     df.dropna(subset = ['Condition', 'Replicate_ID'], inplace=True)
 31 | 
 32 |     # Create Rep_label column
 33 |     reps = df['Replicate_ID'].unique()
 34 |     allreps = df['Replicate_ID'].values
 35 | 
 36 |     rep_inds = np.empty([len(df)])
 37 | 
 38 |     for i, rep in enumerate(reps):
 39 |         rep_inds[np.where(allreps==rep)] = i
 40 | 
 41 |     df['Cond_label'] = df['Condition']
 42 |     df['Rep_label'] = rep_inds
 43 | 
 44 | 
 45 |     if 'level_0'  in df.columns:
 46 | 
 47 |         df.drop(columns=['level_0'], inplace=True)
 48 |         df.reset_index(inplace=True,drop=True)
 49 |         print('Dropped level_0 column.')
 50 | 
 51 | 
 52 |     if(deduplicate):
 53 | 
 54 |         #Prepare the combined dataframe for migration calculations
 55 |         #be ensuring there will be no overlap in columns
 56 | 
 57 |         overlap = list(set(df.columns).intersection(MIG_FACTORS))
 58 |         print('Overlap:', overlap)
 59 |         df.drop(columns=overlap, inplace=True)
 60 | 
 61 |         # Remove duplicate coloumns
 62 |         dedup_df = df.loc[:,~df.columns.duplicated()]
 63 |         df = dedup_df.copy()
 64 | 
 65 |     return df
 66 | 
 67 | def apply_unique_id(df):
 68 | 
 69 |     '''
 70 |     Add column to dataframe indicating a unique id for each cell, constructed as a concatenation of
 71 |     a numerical representation of the cells experimental replicate and the particle (cell) if.
 72 |     Of the form: XX_xx
 73 | 
 74 |     Additionally, adds column 'ntpts' to the dataframe, to make it easier to filter by track length.
 75 | 
 76 |     Input:
 77 |         df: DataFrame
 78 | 
 79 | 
 80 |     Returns:
 81 |         None. (Change is made directly to the passed dataframe.)
 82 | 
 83 |     '''
 84 | 
 85 |     for rep in df['Replicate_ID'].unique():
 86 | 
 87 |         for cell_id in df[df['Replicate_ID'] == rep]['particle'].unique():
 88 | 
 89 |             # Create a unique cell identifier
 90 |             rep_ind = list(df['Replicate_ID'].unique()).index(rep)
 91 | 
 92 |             cell_uniq_ident = str(rep_ind) + '_' + str(int(cell_id))
 93 | 
 94 |             cell_df = df[(df['Replicate_ID']==rep) &
 95 |                             (df['particle']==cell_id)]
 96 | 
 97 |             df_inds = list(df.index[(df['Replicate_ID']==rep)
 98 |                            & (df['particle']==cell_id)])
 99 |             ntpts = len(cell_df)
100 |             # Add unique ID back into the original dataframe
101 |             df.at[df_inds,'uniq_id'] = cell_uniq_ident
102 |             df.at[df_inds,'ntpts'] = ntpts
103 | 
104 | 
105 | def replace_labels_shortlabels(df):
106 | 
107 |                 '''
108 |                 If shortlabels are used, Replace Condition labels with shortlabels.
109 | 
110 |                 Should work on any dataframe, intended for adding shortlabels to the difference plots.
111 |                 '''
112 | 
113 |                 assert USE_SHORTLABELS is True, 'This should only be used if USE_SHORTLABELS is True...'
114 | 
115 |                 full_condition_list = list(df['Condition'])
116 |                 condition_shortlabels = []
117 | 
118 |                 # Create a shortlabel per replicate
119 |                 rep_shortlabel_list = []
120 | 
121 |                 for this_cond_label in full_condition_list:
122 | 
123 |                     this_cond_ind = CONDITIONS_TO_INCLUDE.index(this_cond_label)
124 |                     this_shortlabel = CONDITION_SHORTLABELS[this_cond_ind]
125 |                     condition_shortlabels.append(this_shortlabel)
126 | 
127 |                 df['Condition'] = condition_shortlabels
128 | 
129 | 
130 | def apply_filters(df, filter_cell=True, how = 'all', filter_dict=DATA_FILTERS):
131 | 
132 |     '''
133 |     Apply the filters defines as FILTERS dictionary in config.py
134 |     Apply in subsequent steps, and visualize the loss.
135 | 
136 |     Adds the 'included' column to the inputted datafra,me
137 | 
138 |     Returns:
139 |         Filtered dataframe
140 | 
141 | 
142 |     '''
143 | 
144 |     print('Applying filters:')
145 |     print(filter_dict)
146 | 
147 |     print('Beginning filtering ...')
148 |     print(len(df.index), ' data points from ', len(df['uniq_id'].unique()), ' cells')
149 | 
150 |     df.to_csv(os.path.join(DATA_OUTPUT,'dr_df-prefilt.csv'))
151 | 
152 |     filt_counts=[]
153 | 
154 | 
155 |     if(filter_cell is False):
156 | 
157 | 
158 |         print('Applying data filters to individual timepoints:')
159 |         print(filter_dict)
160 |         print('...')
161 | 
162 |         for i,factor in enumerate(filter_dict.keys()):
163 |             print(factor)
164 |             print(filter_dict[factor][0], filter_dict[factor][1])
165 | 
166 |             '''Consider adding here the export csv summary step, to export along with plots'''
167 |             filt_df = df[(df[factor] > filter_dict[factor][0]) &#]#)
168 |                               (df[factor] < filter_dict[factor][1])]
169 | 
170 |             df.to_csv(os.path.join(DATA_OUTPUT,'filt_'+str(i)+'-'+factor+'.csv'))
171 |             print(len(df.index), ' data points remaining.')
172 |             assert len(df.index) > 0, 'Filtered out all the data.'
173 |             filt_counts.append((factor, len(filt_df)))
174 |     else:
175 | 
176 |         # Default filtering of entire cell.
177 |         print('Applying filters to entire cell trajectory:')
178 |         print(filter_dict)
179 |         print('...')
180 | 
181 |         for cell_id in df['uniq_id'].unique():
182 | 
183 |             cell_df = df[df['uniq_id'] == cell_id]
184 | 
185 |             # make a list to hold the filter results per factor
186 |             incl_list = []
187 | 
188 |             for i,factor in enumerate(filter_dict.keys()):
189 | 
190 |                 if how == 'any':
191 |                     included = cell_df[factor].between(filter_dict[factor][0],filter_dict[factor][1]).any()
192 |                 elif how == 'all':
193 |                     included = cell_df[factor].between(filter_dict[factor][0],filter_dict[factor][1]).all()
194 | 
195 |                 incl_list.append(included)
196 |                 filt_counts.append((factor, np.sum(included)))
197 | 
198 |             assert len(incl_list) == len(filter_dict.keys())
199 | 
200 |             # Get indices in the dataframe for this cell.
201 |             df_inds = list(df.index[(df['uniq_id']==cell_id)])
202 | 
203 |             # Cell is only included if all of the list of criteria are met.
204 |             if all(incl_list):
205 | 
206 |                 # Add included flag if true
207 |                 df.at[df_inds,'included'] = True
208 | 
209 |             else:
210 | 
211 |                 # Add unique ID back into the original dataframe
212 |                 df.at[df_inds,'included'] = False
213 | 
214 |     filt_df = df[df['included'] == True]
215 | 
216 |     print(' Finished filtering. Resulting dataframe contains:')
217 |     print(len(filt_df.index), ' data points from ', len(filt_df['uniq_id'].unique()), ' cells')
218 | 
219 |     sum_counts = [(key, sum(num for _, num in value))
220 |         for key, value in itertools.groupby(sorted(filt_counts), lambda x: x[0])]
221 | 
222 |     # Re-index the filtered dataframe, while keeping index of each row in the unfiltered dataframe.
223 |     filt_df.reset_index(inplace=True)
224 |     filt_df.rename(columns={'level_0': 'comb_df_row_ind'}, inplace=True)
225 | 
226 |     return filt_df, sum_counts
227 | 
228 | 
229 | 
230 | def factor_calibration(df, mixed_calibration=False):
231 | 
232 |     if mixed_calibration:
233 |         print('Using mixed_calibration.')
234 |         df_list = []
235 | 
236 |         # Make sure the lists of calibration factors are the correct length
237 |         assert len(CONDITIONS_TO_INCLUDE) == len(MICRONS_PER_PIXEL_LIST), 'MICRONS_PER_PIXEL_LIST must be same sized list as CONDITIONS_TO_INCLUDE'
238 |         assert len(CONDITIONS_TO_INCLUDE) == len(SAMPLING_INTERVAL_LIST),'SAMPLING_INTERVAL_LIST must be same sized list as CONDITIONS_TO_INCLUDE'
239 | 
240 |         for i, cond in enumerate(list(df['Condition'].unique())):
241 | 
242 |             microns_per_pixel = MICRONS_PER_PIXEL_LIST[i]
243 |             sampling_interval = SAMPLING_INTERVAL_LIST[i]
244 |             print(cond, microns_per_pixel,sampling_interval)
245 | 
246 |             sub_df = df[df['Condition'] == cond]
247 | 
248 |             for factor in FACTORS_TO_CONVERT:
249 | 
250 |                 if(factor == 'area' or factor == 'filled_area' or factor == 'bbox_area'):
251 |                     sub_df[factor] = sub_df[factor] * microns_per_pixel ** 2
252 | 
253 |                 else:
254 | 
255 |                     sub_df[factor] = sub_df[factor] * microns_per_pixel
256 | 
257 |             # Special case for speed:
258 | 
259 |             ''' Be extra careful with speed
260 |             May also need a correction relative to the base pixel calibration'''
261 |             sub_df['speed'] = sub_df['speed'] * sampling_interval / SAMPLING_INTERVAL
262 | 
263 |             df_list.append(sub_df)
264 | 
265 |         df_out = pd.concat(df_list)
266 | 
267 | 
268 |     else:
269 | 
270 |         df_out = df.copy()
271 | 
272 |         for factor in FACTORS_TO_CONVERT:
273 | 
274 |             if(factor == 'area' or factor == 'filled_area' or factor == 'bbox_area'):
275 | 
276 |                 df_out[factor] = df_out[factor] * MICRONS_PER_PIXEL ** 2
277 | 
278 |             else:
279 | 
280 | 
281 |                 df_out[factor] = df_out[factor] * MICRONS_PER_PIXEL
282 | 
283 |     return df_out
284 | 


--------------------------------------------------------------------------------
/cellPLATO/cellPLATO/data_processing/data_wrangling.py:
--------------------------------------------------------------------------------
  1 | #data_wrangling.py
  2 | 
  3 | from initialization.initialization import *
  4 | from initialization.config import *
  5 | 
  6 | import os
  7 | import numpy as np
  8 | import pandas as pd
  9 | 
 10 | import h5py
 11 | 
 12 | def format_for_superplots(df, metric, t, to_csv=False):
 13 | 
 14 |     '''
 15 |     Dataframe should contain the combination of all loaded datasets to be included in the superplots
 16 |     metric: a string relating to one of the dataframe column headers, telling which measurement to include in the superplots.
 17 | 
 18 |     t: timepoint for visualization
 19 | 
 20 |     '''
 21 |     # get sub dataframe at the selected timepoint.
 22 |     sub_df = df.loc[(df['frame'] == t)]
 23 | 
 24 |     # if(DEBUG):
 25 |     #     print(sub_df.head())
 26 | 
 27 |     if(USE_SHORTLABELS):
 28 |         # Create dataframe from the selected series within the original
 29 |         frame = { 'Replicate': sub_df['Rep_label'], 'Treatment': sub_df['Condition_shortlabel'], str(metric): sub_df[metric]  }
 30 |     else:
 31 |         # Create dataframe from the selected series within the original
 32 |         frame = { 'Replicate': sub_df['Rep_label'], 'Treatment': sub_df['Cond_label'], str(metric): sub_df[metric]  }
 33 |     output_df = pd.DataFrame(frame)
 34 | 
 35 |     assert len(df.index) > 0, 'Error with empty dataframe'
 36 | 
 37 |     if to_csv:
 38 |         output_df.to_csv(DATA_OUTPUT+'superplot_fmt_'+metric+'_t_'+str(t)+'.csv')
 39 | 
 40 |     return output_df
 41 | 
 42 | def get_data_matrix(df, dr_factors=DR_FACTORS): #can deprecate as it is just a one liner
 43 |     '''
 44 |     input dataframe (df):
 45 | 
 46 |     returns x: ndarray, matrix containing numerical values to be considered in
 47 |         dimensionality reduction methods.
 48 |     '''
 49 | 
 50 |     sub_df = df[dr_factors] # Filter original dataframe by select factors
 51 | 
 52 |     x = sub_df.values   # Matrix to be used in the dimensionality reduction
 53 | 
 54 |     return x
 55 | 
 56 | 
 57 | '''
 58 | Spacetime-cube related functions
 59 | previously in spacetimecube.py,
 60 | used only in blender_visualization_pipeline()
 61 | '''
 62 | 
 63 | 
 64 | def df2stc(df):#,exp_list):
 65 | 
 66 |     '''
 67 |     Input: DataFrame containing a data from a number of cells at multiple timepoints.
 68 |     Note: Should also work on combined dataframes.
 69 | 
 70 |     Returns: N*n*t Numpy Array, where:
 71 |             N = the unique cell index (not ID)
 72 |             D = the factor extracted from the dataframe (including unique ID)
 73 |             t = integer timepoint index,
 74 |                 (can be converted to time with experimental parameters)
 75 | 
 76 |     '''
 77 | 
 78 |     # Open question whether these should be defined somewhere else,
 79 |     # or stored with the Object like data.n_cells, data.n_factors, etc.
 80 | 
 81 | 
 82 | 
 83 |     '''
 84 |     Note: Particle numbers are only unique to each experiment.
 85 |     Cannot assume otherwise.
 86 |     Maybe need to be sure that this function only run on separate experiments.
 87 |     OR that it splits them up from the beginning.
 88 |     i.e. assert len(df['Condition'].unique()) == 1
 89 |     or: if len(df['Condition'].unique()) > 1: Split them.
 90 | 
 91 |     '''
 92 | 
 93 | 
 94 | 
 95 |     '''
 96 |     For testing/development purposes, use only the first condition
 97 |     Eventually will loop through each condition, creating an array for each and returning the list of arrays
 98 |     (Assert that the length of the list of arrays is the same as the length of the list of conditions.)
 99 |     '''
100 | 
101 |     # conditions = exp_list['Condition']
102 | 
103 |     #
104 |     # if(DEBUG):
105 |     #
106 |     #     display(df)
107 |     #
108 |     #     ax1 = df.plot.scatter(x='x',
109 |     #                           y='y',
110 |     #                           c='DarkBlue')
111 | 
112 | 
113 |     # Take only the first condition from the list.
114 |     # sub_df = df.loc[(df['Condition'] == exp_list.loc[0]['Condition'])] #conditions[0]
115 | 
116 |     # Override conditional selection above, use full DataFrame
117 |     sub_df = df.copy()
118 | 
119 |     factor_list = list(sub_df.columns) # Otherwise is <class 'pandas.core.indexes.base.Index'>
120 |     n_factors = len(factor_list)
121 | 
122 |     # Select the first row to know about the data types
123 |     row = sub_df.iloc[0] # Select first row of data frame
124 | 
125 | 
126 |     strings = row[row.apply(isinstance, args=(str,))]
127 |     non_strings = row[~row.apply(isinstance, args=(str,))]
128 |     n_num_cats = len(non_strings)# number of numerical catergoies
129 | 
130 |     #Get the list of headers for the numerical catergories
131 |     headers = non_strings.index.values
132 | 
133 |     # if(DEBUG):
134 |     #     display(row)
135 |     #     display(non_strings)
136 | 
137 |     # Assertions to catch problematic data input
138 |     assert  n_num_cats + len(strings) == n_factors, 'Mismatach between categories'
139 |     assert len(headers) == n_num_cats, 'Number of headers doesnt match number of non-numerical categories'
140 | 
141 |     cells = np.sort(sub_df['particle'].unique())
142 |     frames = np.sort(sub_df['frame'].unique())
143 | 
144 |     n_cells = len(cells)
145 |     n_frames = len(frames)
146 | 
147 | 
148 |     # Build a list of dataframes for each timepoint.
149 |     df_list = []
150 | 
151 |     for t in frames:
152 |         t_df = sub_df.loc[(sub_df['frame'] == t)]
153 |         df_list.append(t_df)
154 | 
155 |     # Built the spacetime cubes with space for the non-string contents only.
156 |     stc = np.empty([n_cells, n_num_cats, n_frames])
157 | 
158 |     for ind, row in sub_df.iterrows():
159 | 
160 |         # Split the row into strings and numbers (non-strings)
161 |         '''
162 |         The assumption above should be asserted.
163 |         '''
164 |         row_str = row[row.apply(isinstance, args=(str,))]
165 |         row_data = row[~row.apply(isinstance, args=(str,))]
166 | 
167 |         frame = int(row['frame'])
168 |         cell = int(row['particle']) - 1
169 | 
170 |         np_row = row_data.to_numpy(copy=True) # Get the data elements of row in numpy format
171 | 
172 |         '''
173 |         using to_numpy allowed for strings, but the strings can't go into the array.
174 |         Will need to convert them to np.nans or ignore text entries entirely
175 |         '''
176 | 
177 |         # On the first pass, check that the number of factors is correct.
178 |         if(ind == 0):
179 |             assert np.shape(np_row)[0] == n_num_cats, ' # rows != n_factors'
180 |             assert np.shape(np_row) ==  np.shape(stc[1,:,1]), ' # rows != shape of stc'
181 | 
182 |             '''
183 |             If something changes in the labelling pattern from imageJ/Fiji or other
184 |             upstream software, the asserts below will throw an error to let us know
185 |             the arrays won't be indexed correctly.
186 |             '''
187 | 
188 |             # Ensure frame is zero indexed.
189 |             assert frame == 0, 'Frame not correctly zero-indexed for numpy.'
190 |             assert cell == 0, ' Cell not correctly zero-indexed for numpy.'
191 | 
192 |         # Data transformation (recall 0 indexing of numpy array)
193 |         # Recall spacetime-cube dimensions stc[n_cells, n_factors, n_frames]
194 |         stc[cell,:,frame] = np_row
195 | 
196 | 
197 |     assert len(df_list) == np.shape(stc)[2], 'df_list length doesnt match time dimension of array'
198 | 
199 |     return stc, list(headers), df_list     # Or a list of stc's
200 | 
201 | 
202 | def verify_stc(stc):
203 | 
204 |     '''
205 |     A testing fiinun to validate that the time-array is create as expected.
206 |     Not currently implemented as not working properly:
207 |     To Do:
208 |         - pass stc, or replace from asserts.
209 |         - Repair the ValueError:
210 |             The truth value of an array with more than one element is ambiguous.
211 |             Use a.any() or a.all()
212 |     '''
213 | 
214 |     print('Verifying that time-array matches with corresponding dataframe for that time point.')
215 | 
216 |     for t in range(np.shape(stc)[2]):
217 | 
218 |         for n in range(np.shape(stc)[0] - 2):# -1 because of cell indexing
219 | 
220 |             this_df = df_list[t]
221 |             sub_df = this_df.loc[(this_df['particle'] == n+1)] # +1 accounts for zero indexing of np array but not cell (particle) #.
222 | 
223 |             x_ind = int(headers.index('x'))
224 |             y_ind = int(headers.index('y'))
225 | 
226 |             if (len(sub_df['x']) > 0):  # This avoids assert errors on empty series of the dataframe.
227 | 
228 |                 #  assert statements to check that everything lines up correctly
229 |                 assert sub_df['x'].values == stc[n,x_ind,t], 'Error'
230 |                 assert sub_df['y'].values == stc[n,y_ind,t], 'Error'
231 | 
232 | 
233 | 
234 |     print("If this is the only text you see, it means it worked")
235 | 
236 | def condense_stc(stc,headers, zero_it=False, x_label='x', y_label='y'):
237 | 
238 |     '''
239 |     Function to Condense the spacetime cube to a 2D + time output.
240 | 
241 |     Function to 'zero' all of the cell trajectories such that they all
242 |     begin at the origin of the graph (0, 0).
243 | 
244 |     Importantly it also reduces the shape to only the x and y positions.
245 | 
246 |     Inputs:
247 |         stc: spacetime cube (numpy array) where:
248 |                     ax=0 : cell number
249 |                     ax=1 : factor, measurement
250 |                     ax=2 : timepoint
251 |         headers: Column headers from original dataframe that are passed
252 |                 to columns of the ndarray
253 |         zero_it: Boolean (optional), controls weather the zeroing operation is
254 |                 performed. Otherwise, allows this function to format for space-time cube visualization
255 | 
256 |         x_label, y_label: strings, indicate the name of the column headers to be used in the animation.
257 |             Allows us to use the tSNE dimensions in the spacetime cube.
258 |     Output:
259 |         zerod_stc
260 | 
261 |     '''
262 |     assert not ((x_label != 'x') and (zero_it == True)), 'Zeroing a non-spatial dimension is not supported.'
263 | 
264 |     n_cells = np.shape(stc)[0]
265 |     n_frames = np.shape(stc)[2]
266 |     zerod_stc = np.empty([n_cells, 2, n_frames]) # Creates a spacetime-cube, formatted like a spreadsheet, cells in rows, columns for X and Y, and t in Z
267 | 
268 |     x_ind = int(headers.index(x_label))
269 |     y_ind = int(headers.index(y_label))
270 | 
271 |     # Convert all zero values of x and y position to NaN
272 |     xpos_arr = stc[:,x_ind,:]
273 |     ypos_arr = stc[:,y_ind,:]
274 | 
275 |     # Replace zero values with np.nan
276 |     xpos_arr[xpos_arr == 0] = np.nan# or use np.nan
277 |     ypos_arr[ypos_arr == 0] = np.nan# or use np.nan
278 | 
279 |     # Insert the corrected values back into the array
280 |     stc[:,x_ind,:] = xpos_arr
281 |     stc[:,y_ind,:] = ypos_arr
282 | 
283 |     for i in range(0,n_cells):
284 | 
285 |         # For each cell, find the first frame on which the cell appears
286 |         # This will be the first non-NaN value
287 |         # Solution using x position only
288 |         non_nan_inds = np.argwhere(~np.isnan(stc[i,x_ind,:]))
289 |         first_ind = non_nan_inds[0]
290 | 
291 | 
292 |         for j in range(0,n_frames):
293 |             zerod_stc[i,0,j] = stc[i,x_ind,j] - stc[i,x_ind,first_ind] * zero_it
294 |             zerod_stc[i,1,j] = stc[i,y_ind,j] - stc[i,y_ind,first_ind] * zero_it
295 | 
296 | 
297 |     return zerod_stc
298 | 
299 | 
300 | def zero_stc(stc,headers, zero_it=True):
301 | 
302 |     print('Warning, this function will be replaced by condense_stc(). ')
303 | 
304 |     '''
305 |     DELETE THIS FUNCTION ONLY WHEN SURE THAT ALL USES OF zero_stc have been replaced with condense_stc.
306 |     '''
307 | 
308 |     n_cells = np.shape(stc)[0]
309 |     n_frames = np.shape(stc)[2]
310 |     zerod_stc = np.empty([n_cells, 2, n_frames]) # Creates a spacetime-cube, formatted like a spreadsheet, cells in rows, columns for X and Y, and t in Z
311 | 
312 |     x_ind = int(headers.index('x'))
313 |     y_ind = int(headers.index('y'))
314 | 
315 |     # Convert all zero values of x and y position to NaN
316 |     xpos_arr = stc[:,x_ind,:]
317 |     ypos_arr = stc[:,y_ind,:]
318 | 
319 |     # Replace zero values with np.nan
320 |     xpos_arr[xpos_arr == 0] = np.nan# or use np.nan
321 |     ypos_arr[ypos_arr == 0] = np.nan# or use np.nan
322 | 
323 |     # Insert the corrected values back into the array
324 |     stc[:,x_ind,:] = xpos_arr
325 |     stc[:,y_ind,:] = ypos_arr
326 | 
327 |     for i in range(0,n_cells):
328 | 
329 |         # For each cell, find the first frame on which the cell appears
330 |         # This will be the first non-NaN value
331 |         # Solution using x position only
332 |         non_nan_inds = np.argwhere(~np.isnan(stc[i,x_ind,:]))
333 |         first_ind = non_nan_inds[0]
334 | 
335 | 
336 |         for j in range(0,n_frames):
337 |             zerod_stc[i,0,j] = stc[i,x_ind,j] - stc[i,x_ind,first_ind] * zero_it
338 |             zerod_stc[i,1,j] = stc[i,y_ind,j] - stc[i,y_ind,first_ind] * zero_it
339 | 
340 | 
341 |     return zerod_stc
342 | 
343 | 
344 | 
345 | def stc2df(stc_0d):
346 | 
347 |     '''
348 |     Transform the origin-corrected ndarray to a format
349 |     to be visualized in 3d with plotly.
350 | 
351 |     Input:
352 |         stc0d: 'zeroed' ndarray (time-array, spacetime-cube)
353 | 
354 |     Output:
355 |         out_df: DataFrame, transposed and reshaped such that
356 |                 origin-corrected cells are in rows, with columns:
357 |                 cell, X0, Y0, t (slice)
358 |     '''
359 | 
360 |     n,m,t = stc_0d.shape
361 | 
362 |     # Transpose the array upsteam of the reshape
363 |     transp_array = np.transpose(stc_0d,(0,2,1))
364 |     out_arr = np.column_stack((np.repeat(np.arange(n),t),
365 |                                 transp_array.reshape(n*t,-1),
366 |                                 np.repeat(np.arange(t),n)))
367 | 
368 |     out_df = pd.DataFrame(out_arr,columns=['cell', 'X0', 'Y0', 't'])
369 | 
370 |     return out_df
371 | 


--------------------------------------------------------------------------------
/cellPLATO/cellPLATO/data_processing/load_trackmate.py:
--------------------------------------------------------------------------------
  1 | # load_trackmate.py
  2 | from initialization.initialization import *
  3 | from initialization.config import *
  4 | from data_processing.cleaning_formatting_filtering import *
  5 | from data_processing.migration_calculations import *
  6 | import re
  7 | import tqdm
  8 | import pandas as pd
  9 | import numpy as np
 10 | 
 11 | # Thanks to Guillaume Jacquemet for the following code structure. It does some trackmate loading followed by some cellPLATO formatting.
 12 | 
 13 | def populate_columns(df, filepath):
 14 |     # Extract the parts of the file path
 15 |     path_parts = os.path.normpath(filepath).split(os.sep)
 16 | 
 17 |     if len(path_parts) < 3:
 18 |         # if there are not enough parts in the path to extract folder and parent folder
 19 |         print(f"Error: Cannot extract parent folder and folder from the filepath: {filepath}")
 20 |         return df
 21 | 
 22 |     # Assuming that the file is located at least two levels deep in the directory structure
 23 |     folder_name = path_parts[-2]  # The folder name is the second last part of the path
 24 |     parent_folder_name = path_parts[-3]  # The parent folder name is the third last part of the path
 25 | 
 26 | 
 27 |     filename_without_extension = os.path.splitext(os.path.basename(filepath))[0]
 28 |     
 29 | 
 30 | 
 31 |     df['File_name'] = remove_suffix(filename_without_extension)
 32 |     df['Condition'] = parent_folder_name  # Populate 'Condition' with the parent folder name
 33 |     # df['experiment_nb'] = folder_name  # Populate 'Repeat' with the folder name
 34 |     df['Replicate_ID'] = parent_folder_name + folder_name  # Populate 'Repeat' with the folder name
 35 | 
 36 | 
 37 |     ###############
 38 |     return df
 39 | 
 40 | 
 41 | def load_and_populate(file_pattern, usecols=None, chunksize=100000, Folder_path = DATA_PATH, Results_Folder = SAVED_DATA_PATH):
 42 |     df_list = []
 43 |     pattern = re.compile(file_pattern)  # Compile the file pattern to a regex object
 44 |     files_to_process = []
 45 | 
 46 |     # First, list all the files we'll be processing
 47 |     for dirpath, dirnames, filenames in os.walk(Folder_path):
 48 |         # print(f"Dirpath is {dirpath}")
 49 |         # print(f"Dirnames is {dirnames}")
 50 |         # print(f"filenames is {filenames}")
 51 |         for filename in filenames:
 52 |             if pattern.match(filename):  # Check if the filename matches the file pattern
 53 |                 filepath = os.path.join(dirpath, filename)
 54 |                 files_to_process.append(filepath)
 55 | 
 56 |     # Metadata list
 57 |     metadata_list = []
 58 | 
 59 |     # Create a tqdm instance for progress tracking
 60 |     for filepath in tqdm.tqdm(files_to_process, desc="Processing Files"):
 61 |         # Get the expected number of rows in the file (subtracting header rows)
 62 |         expected_rows = sum(1 for row in open(filepath)) - 4
 63 | 
 64 |         # Get file size
 65 |         file_size = os.path.getsize(filepath)
 66 | 
 67 |         # Add to the metadata list
 68 |         metadata_list.append({
 69 |             'filename': os.path.basename(filepath),
 70 |             'expected_rows': expected_rows,
 71 |             'file_size': file_size
 72 |         })
 73 | 
 74 |         chunked_reader = pd.read_csv(filepath, skiprows=[1, 2, 3], usecols=usecols, chunksize=chunksize)
 75 | 
 76 |         for chunk in chunked_reader:
 77 |             processed_chunk = populate_columns(chunk, filepath)
 78 |             df_list.append(processed_chunk)
 79 | 
 80 |     if not df_list:  # if df_list is empty, return an empty DataFrame
 81 |         print(f"No files found with pattern: {file_pattern}")
 82 |         return pd.DataFrame()
 83 | 
 84 |     merged_df = pd.concat(df_list, ignore_index=True)
 85 |     # Verify the total rows in the merged dataframe matches the total expected rows from metadata
 86 |     total_expected_rows = sum(item['expected_rows'] for item in metadata_list)
 87 |     if len(merged_df) != total_expected_rows:
 88 |       print(f"Warning: Mismatch in total rows. Expected {total_expected_rows}, found {len(merged_df)} in the merged dataframe.")
 89 |     else:
 90 |       print(f"Success: The processed dataframe matches the metadata. Total rows: {len(merged_df)}")
 91 |     return merged_df, metadata_list
 92 | 
 93 | 
 94 | 
 95 | def sort_and_generate_repeat(merged_df):
 96 |     merged_df.sort_values(['Condition', 'experiment_nb'], inplace=True)
 97 |     merged_df = merged_df.groupby('Condition', group_keys=False).apply(generate_repeat)
 98 |     return merged_df
 99 | 
100 | def generate_repeat(group):
101 |     unique_experiment_nbs = sorted(group['experiment_nb'].unique())
102 |     experiment_nb_to_repeat = {experiment_nb: i+1 for i, experiment_nb in enumerate(unique_experiment_nbs)}
103 |     group['Repeat'] = group['experiment_nb'].map(experiment_nb_to_repeat)
104 |     return group
105 | 
106 | def remove_suffix(filename):
107 |     suffixes_to_remove = ["-tracks", "-spots"]
108 |     for suffix in suffixes_to_remove:
109 |         if filename.endswith(suffix):
110 |             filename = filename[:-len(suffix)]
111 |             break
112 |     return filename
113 | 
114 | 
115 | def validate_tracks_df(df):
116 |     """Validate the tracks dataframe for necessary columns and data types."""
117 |     required_columns = ['TRACK_ID']
118 |     for col in required_columns:
119 |         if col not in df.columns:
120 |             print(f"Error: Column '{col}' missing in tracks dataframe.")
121 |             return False
122 | 
123 |     # Additional data type checks or value ranges can be added here
124 |     return True
125 | 
126 | def validate_spots_df(df):
127 |     """Validate the spots dataframe for necessary columns and data types."""
128 |     required_columns = ['TRACK_ID', 'POSITION_X', 'POSITION_Y', 'POSITION_Z', 'POSITION_T']
129 |     for col in required_columns:
130 |         if col not in df.columns:
131 |             print(f"Error: Column '{col}' missing in spots dataframe.")
132 |             return False
133 | 
134 |     # Additional data type checks or value ranges can be added here
135 |     return True
136 | 
137 | def check_unique_id_match(df1, df2):
138 |     df1_ids = set(df1['Unique_ID'])
139 |     df2_ids = set(df2['Unique_ID'])
140 | 
141 |     # Check if the IDs in the two dataframes match
142 |     if df1_ids == df2_ids:
143 |         print("The Unique_ID values in both dataframes match perfectly!")
144 |     else:
145 |         missing_in_df1 = df2_ids - df1_ids
146 |         missing_in_df2 = df1_ids - df2_ids
147 | 
148 |         if missing_in_df1:
149 |             print(f"There are {len(missing_in_df1)} Unique_ID values present in the second dataframe but missing in the first.")
150 |             print("Examples of these IDs are:", list(missing_in_df1)[:5])
151 | 
152 |         if missing_in_df2:
153 |             print(f"There are {len(missing_in_df2)} Unique_ID values present in the first dataframe but missing in the second.")
154 |             print("Examples of these IDs are:", list(missing_in_df2)[:5])
155 | 
156 | #####
157 | 
158 | # Function to calculate Cohen's d
159 | def cohen_d(group1, group2):
160 |     diff = group1.mean() - group2.mean()
161 |     n1, n2 = len(group1), len(group2)
162 |     var1 = group1.var()
163 |     var2 = group2.var()
164 |     pooled_var = ((n1 - 1) * var1 + (n2 - 1) * var2) / (n1 + n2 - 2)
165 |     d = diff / np.sqrt(pooled_var)
166 |     return d
167 | 
168 | def save_dataframe_with_progress(df, path, desc="Saving", chunk_size=50000):
169 |     """Save a DataFrame with a progress bar."""
170 | 
171 |     # Estimating the number of chunks based on the provided chunk size
172 |     num_chunks = int(len(df) / chunk_size) + 1
173 | 
174 |     # Create a tqdm instance for progress tracking
175 |     with tqdm(total=len(df), unit="rows", desc=desc) as pbar:
176 |         # Open the file for writing
177 |         with open(path, "w") as f:
178 |             # Write the header once at the beginning
179 |             df.head(0).to_csv(f, index=False)
180 | 
181 |             for chunk in np.array_split(df, num_chunks):
182 |                 chunk.to_csv(f, mode="a", header=False, index=False)
183 |                 pbar.update(len(chunk))
184 | 
185 | def check_for_nans(df, df_name):
186 |     """
187 |     Checks the given DataFrame for NaN values and prints the count for each column containing NaNs.
188 | 
189 |     Args:
190 |     df (pd.DataFrame): DataFrame to be checked for NaN values.
191 |     df_name (str): The name of the DataFrame as a string, used for printing.
192 |     """
193 |     # Check if the DataFrame has any NaN values and print a warning if it does.
194 |     nan_columns = df.columns[df.isna().any()].tolist()
195 | 
196 |     if nan_columns:
197 |         for col in nan_columns:
198 |             nan_count = df[col].isna().sum()
199 |             print(f"Column '{col}' in {df_name} contains {nan_count} NaN values.")
200 |     else:
201 |         print(f"No NaN values found in {df_name}.")
202 | 
203 | 
204 | def trackmate_to_cellPLATO(df):
205 |         # This will become the function to make the comb_df
206 | 
207 |     input_df=df.copy()
208 | 
209 |     '''This part renames a lot of columns to match cellPLATO'''
210 | 
211 |     # rename LABEL to trackmate_label
212 |     input_df = input_df.rename(columns={'LABEL':'trackmate_label'})
213 |     # ID to particle
214 |     input_df = input_df.rename(columns={'ID':'particle'})
215 |     # change the data type of particle to float
216 |     input_df['particle'] = input_df['particle'].astype(float)
217 |     # rename POSITION_X to x, POSITION_Y to y, POSITION_Z to z, FRAME to t
218 |     input_df = input_df.rename(columns={'POSITION_X':'x', 'POSITION_Y':'y', 'POSITION_Z':'z', 'FRAME':'frame'})
219 | 
220 |     # Convert the values in frame column to float
221 |     input_df['frame'] = input_df['frame'].astype(float)
222 | 
223 |     '''This part makes the x_um, y_um, z_um columns just by replicating the existing ones'''
224 |     # copy the x column to a new x_um column, and the y column to a new y_um column, and z to z_um
225 |     input_df['x_um'] = input_df['x']
226 |     input_df['y_um'] = input_df['y']
227 |     input_df['z_um'] = input_df['z']
228 |     # Same with the x_pix, y_pix, z_pix
229 |     input_df['x_pix'] = input_df['x']
230 |     input_df['y_pix'] = input_df['y']
231 |     input_df['z_pix'] = input_df['z']
232 | 
233 |     '''This part makes the Rep_label column'''
234 |     # Make a column of floats that corresponds to the 'Replicate_ID' column and call it 'Rep_label'
235 |     # To do this, extract the 'Replicate_ID' columns from the merged_spots_df
236 |     Replicate_ID = input_df['Replicate_ID']
237 |     # Get the unique Replicate_IDs
238 |     Replicate_ID_unique = np.unique(Replicate_ID)
239 |     # Make a dictionary of the unique Replicate_IDs and a number (float) that corresponds to them
240 |     Replicate_ID_dict = {}
241 |     for i, ID in enumerate(Replicate_ID_unique):
242 |         Replicate_ID_dict[ID] = i
243 |     # Make a new column called 'Rep_label' and populate it with the float values from the dictionary
244 |     input_df['Rep_label'] = input_df['Replicate_ID'].map(Replicate_ID_dict)
245 |     # make those floats
246 |     input_df['Rep_label'] = input_df['Rep_label'].astype(float)
247 | 
248 |     # Make a new column called 'Condition_shortlabel' which has the same value as 'Condition'
249 |     input_df['Condition_shortlabel'] = input_df['Condition']
250 | 
251 |     ##########################
252 | 
253 |     # Then, add the ntpts and the uniq_id to the df
254 | 
255 |     apply_unique_id_trackmate(input_df)
256 |     #sort by frame
257 |     input_df = input_df.sort_values(by=['uniq_id', 'frame'])
258 | 
259 |     # display(input_df)
260 | 
261 |     # Then, do the cellPLATO migration calculations
262 | 
263 |     if DO_CP_METRICS_FOR_TRACKMATE:
264 | 
265 |         proto_comb_list = []
266 |         proto_comb_df = pd.DataFrame()
267 | 
268 |     ########################################################
269 | 
270 |         for replicate in np.unique(input_df['Replicate_ID']):
271 |             # extract the replicate
272 |             replicate_df = input_df[input_df['Replicate_ID'] == replicate]
273 |             # sort that df by uniq_id and frame
274 |             replicate_df = replicate_df.sort_values(by=['uniq_id', 'frame'])
275 | 
276 | 
277 |         #     print('For this replciate df, the replicated is ', replicate_df['Replicate_ID'].unique())
278 |         #     print('And the rep_label is ', replicate_df['Rep_label'].unique())
279 |         #     print('And the unique ID is ', replicate_df['uniq_id'].unique())
280 |         #     print('And finally the file name is ', replicate_df['File_name'].unique())
281 |         #     print('And the condition is ', replicate_df['Condition'].unique())
282 | 
283 |             # do the migration measurements
284 |             mig_df = migration_calcs(replicate_df)
285 | 
286 |             mig_df.reset_index(inplace=True, drop=True)
287 |         #     # add it to the proto_comb_df list 
288 |             proto_comb_list.append(mig_df)
289 | 
290 |         proto_comb_df = pd.concat(proto_comb_list, ignore_index=True)
291 | 
292 |         # proto_comb_df =  pd.concat([proto_comb_df,mig_df])
293 |         proto_comb_df.reset_index(inplace=True, drop=True)
294 |     else:
295 |         proto_comb_df = input_df
296 | 
297 | 
298 | 
299 |     #############
300 |     return proto_comb_df
301 | ##### 
302 | 
303 | 


--------------------------------------------------------------------------------
/cellPLATO/cellPLATO/data_processing/measurements.py:
--------------------------------------------------------------------------------
  1 | #measurements.py
  2 | 
  3 | from initialization.initialization import *
  4 | from initialization.config import *
  5 | 
  6 | import os
  7 | import numpy as np
  8 | import pandas as pd
  9 | 
 10 | from tqdm import tqdm
 11 | 
 12 | def calc_aspect_ratio(df, drop=False):
 13 | 
 14 |     df['aspect'] = df['major_axis_length']/df['minor_axis_length']
 15 | 
 16 |     # Remove NaNs
 17 |     if (drop):
 18 |         df.replace([np.inf, -np.inf], np.nan, inplace=True)
 19 |         df.dropna(subset=["aspect"], how="all", inplace=True)
 20 | 
 21 | 
 22 | 
 23 | def ripley_K(X,r):
 24 | 
 25 |     '''
 26 |     Calculate Ripleys K for a given radius r
 27 | 
 28 |     '''
 29 | 
 30 |     # Extract the number of other points, p, within a distance r.
 31 |     rip = []
 32 | 
 33 |     for i,x in enumerate(X):
 34 | 
 35 |             # Get the distance matrix for this point.
 36 |             Xd = np.sqrt((X[:,0]-x[0])**2 + (X[:,1]-x[1])**2)
 37 | 
 38 |             Xd = np.delete(Xd, (i), axis=0) # Delete self.
 39 | 
 40 |             # Count the number of points within radius
 41 |             n = len(X) # Number of points total
 42 |             A = np.pi * r ** 2 # Area of circle with radius r
 43 |             p = sum(Xd < r) # Number of points within radius r
 44 |             K = p * A / n # Ripley's K - number of points within radius r per unit area
 45 |             L = (K / np.pi) ** 0.5 # Ripley's L - radius of circle with same density as K
 46 | 
 47 |             rip.append([p,K,L]) # Append tuple containing count, Ripley's K and L
 48 | 
 49 |     rip = np.asarray(rip)
 50 | 
 51 |     return rip
 52 | 
 53 | 
 54 | 
 55 | 
 56 | def calc_ripleys_xy(df_in, r=RIP_R, plot=False, inplace=False):
 57 | 
 58 |     '''
 59 |     Calculate ripleys p, K and L for a given radius r.
 60 |         Create a dataframe with these measurements.
 61 | 
 62 |     '''
 63 | 
 64 |     print('Calculating ripleys p, K and L with radius: ', r, ' (pixels)')
 65 |     df = df_in.copy()
 66 | 
 67 |     df_list = []
 68 | 
 69 |     for rep in df['Replicate_ID'].unique():
 70 | 
 71 |         rep_df = df[df['Replicate_ID'] == rep]
 72 | 
 73 | 
 74 |         for frame in rep_df['frame'].unique():
 75 | 
 76 |             t_df = rep_df[rep_df['frame'] == frame]
 77 |             pos = t_df[['x_um', 'y_um']].values
 78 |             rip = ripley_K(pos,r)
 79 | 
 80 | 
 81 |             t_df['rip_p'] = rip[:,0] # Number of points within radius
 82 |             t_df['rip_K'] = rip[:,1]
 83 |             t_df['rip_L'] = rip[:,2]
 84 | 
 85 |             df_list.append(t_df)
 86 | 
 87 | 
 88 |             if plot:
 89 | 
 90 |                 '''
 91 |                 Plot should be made to create animation, gif??
 92 |                 '''
 93 | 
 94 |                 plt.scatter(pos[:, 0], pos[:, 1], c=rip[:,2], s=t_df['area']/5) # Colormap by ripleys L
 95 | 
 96 |                 plt.show()
 97 | 
 98 |     df_out = pd.concat(df_list)
 99 |     df_out.sort_index(inplace=True)
100 | 
101 |     return df_out
102 | 
103 | 
104 | 
105 | def standardize_factors_per_cell(df_in, factor_list=['area', 'perimeter']):
106 | 
107 |     from sklearn.preprocessing import StandardScaler
108 | 
109 |     df = df_in.copy()
110 |     cell_df_list = []
111 | 
112 |     unique_id = 0 # Create a unique cell id
113 |     rep_list = df['Replicate_ID'].unique()
114 | 
115 |     # For each replicate
116 |     for i_rep, this_rep in enumerate(rep_list):
117 | 
118 |         rep_df = df[df['Replicate_ID']==this_rep]
119 |         cell_ids = rep_df['particle'].unique() # Particle ids only unique for replicate, not between.
120 | 
121 |         # For each cell, calculate the average value and add to new DataFrame
122 |         print('Replicate ',  i_rep+1, ' out of ', len(rep_list))
123 |         for cid in tqdm(cell_ids):
124 | 
125 |             cell_df = rep_df[rep_df['particle'] == cid]
126 | 
127 |             # A test to ensure there is only one replicate label included.
128 |             assert len(cell_df['Rep_label'].unique()) == 1, 'check reps'
129 | 
130 |             # x = get_data_matrix(cell_df, dr_factors=factor_list)
131 |             x = cell_df[factor_list].values
132 |             x_ = StandardScaler().fit_transform(x)
133 | 
134 | 
135 |             cell_df[factor_list] = x_
136 |             cell_df_list.append(cell_df)
137 | 
138 |     df_out = pd.concat(cell_df_list)
139 |     df_out.sort_index(inplace=True)
140 | 
141 |     return df_out
142 | 
143 | 
144 | 
145 | 
146 | def t_window_metrics(df_in, t_window=MIG_T_WIND,min_frames=MIG_T_WIND/2,factor_list=DR_FACTORS):
147 | 
148 |     '''
149 |     Create measurements average and ratio measurements for each.
150 |     '''
151 | 
152 |     df = df_in.copy()
153 |     df_list = []
154 | 
155 |     time_avg_df = pd.DataFrame()
156 |     unique_id = 0 # Create a unique cell id
157 |     rep_list = df['Replicate_ID'].unique()
158 |     new_factor_list = []
159 | 
160 |     # For each replicate
161 |     for i_rep, this_rep in enumerate(rep_list):
162 | 
163 |         rep_df = df[df['Replicate_ID']==this_rep]
164 |         cell_ids = rep_df['particle'].unique() # Particle ids only unique for replicate, not between.
165 | 
166 |         # For each cell, calculate the average value and add to new DataFrame (akin to making the tavg_df)
167 |         print('Replicate ',  i_rep, ' out of ', len(rep_list))
168 |         for cid in tqdm(cell_ids):
169 | 
170 |             cell_df = rep_df[rep_df['particle'] == cid]
171 | 
172 |             # A test to ensure there is only one replicate label included.
173 |             assert len(cell_df['Rep_label'].unique()) == 1, 'check reps'
174 | 
175 |             # Unique list of frames for this cell
176 |             frame_list = cell_df['frame'].unique()
177 | 
178 |             for frame in frame_list:
179 | 
180 |                 # get a subset of the dataframe across the range of frames
181 |                 t_wind_df = cell_df[(cell_df['frame']>=frame - t_window/2) &
182 |                                 (cell_df['frame']<frame + t_window/2)]
183 | 
184 |                 tpt_df = cell_df[(cell_df['frame']==frame)]
185 | 
186 |                 assert len(tpt_df) == 1, 'Should be only one timepoint in dataframe'
187 | 
188 |                 # Apply a minimal cutoff to avoid averaging too small a number of frames.
189 |                 if len(t_wind_df) >= min_frames:
190 | 
191 |                     # Do the measurements for each factor
192 |                     for factor in factor_list:
193 | 
194 |                         mean_str = factor + '_tmean'
195 |                         ratio_str = factor + '_ratio'
196 | 
197 |                         # Mean value for factor across time window
198 |                         tpt_df[mean_str] = np.nanmean(t_wind_df[factor]) #adds new col to df called 'area_tmean' for example
199 | 
200 |                         # Ratio
201 |                         tpt_df[ratio_str] = tpt_df[factor] / tpt_df[mean_str]
202 | 
203 |                         # Keep a list of the factors in order to make DR methods easier to implement
204 |                         new_factor_list.append(factor)
205 |                         new_factor_list.append(mean_str)
206 |                         new_factor_list.append(ratio_str)
207 | 
208 |                     df_list.append(tpt_df) # Append the row of new calculations to a list of dataframes
209 | 
210 |             # Increase the unique id given to each cell
211 |             unique_id += 1
212 | 
213 |     # Assemble the df_list into a dataframe and reorder by index.
214 |     df_out = pd.concat(df_list)
215 |     df_out.sort_index(inplace=True)
216 | 
217 |     new_factor_list=np.unique(new_factor_list)
218 | 
219 | 
220 | 
221 |     return df_out, new_factor_list
222 | 


--------------------------------------------------------------------------------
/cellPLATO/cellPLATO/data_processing/migration_calculations.py:
--------------------------------------------------------------------------------
  1 | #migration_calculations.py
  2 | 
  3 | from initialization.config import *
  4 | from initialization.initialization import *
  5 | 
  6 | import os
  7 | import numpy as np
  8 | import pandas as pd
  9 | 
 10 | from tqdm import tqdm
 11 | 
 12 | def cell_calcs(cell_tarray, t_window=MIG_T_WIND):#, calibrate):
 13 | 
 14 |     '''
 15 |     Cell migration calculations for a given cell through time.
 16 |     This function is passed a numpy array corresponding to the timecourse of a single cell,
 17 |     (from a single experimental replicate)
 18 | 
 19 | 
 20 |     Migration calcs accessory function that is optimized to use Numpy only, instead
 21 |     of pandas.
 22 | 
 23 |     Input:
 24 |         cell_tarray: [T * 4] NumPy array, where T is the number of frames over which this cell was tracker
 25 |                     [frame, x_um, y_um, index]:
 26 | 
 27 |         t_window = int; width of the time window in # of frames.
 28 | 
 29 |     Returns:
 30 |         cell_calcs: list;
 31 | 
 32 |     UPDATED: This version of the function calculates certain values across a time window.
 33 | 
 34 | 
 35 |     '''
 36 | 
 37 |     cell_calcs = []
 38 |     mig_calcs = []
 39 | 
 40 |     if(cell_tarray.shape[0] > 0):
 41 | 
 42 |         # Find the first and last frame in which this cell was tracked.
 43 |         init_f = int(np.min(cell_tarray[:,0]))
 44 |         final_f = int(np.max(cell_tarray[:,0]))
 45 | 
 46 |         # Enumerate across the range of frames
 47 |         for i, t in enumerate(range(init_f, final_f)): # Because we need a count and an index, for cases where cells arent included throughout
 48 |                 
 49 |                 # Adding actual window size
 50 |                 # actual_window_size = min(t - init_f + 1, final_f - t, t_window) #trackmate
 51 | 
 52 |                 # Extract separate arrays for the timepoints and window of interest
 53 |                 prev_frame_arr = np.squeeze(cell_tarray[np.where(cell_tarray[:,0] == t-1)])
 54 |                 this_frame_arr = np.squeeze(cell_tarray[np.where(cell_tarray[:,0] == t)])
 55 | 
 56 |                 # if INPUT_FMT == 'trackmate':
 57 |                     
 58 |                 #     #### trackmate
 59 |                 #     # Extract the time window array considering the actual window size
 60 |                 #     t_window_arr = np.squeeze(cell_tarray[np.where((cell_tarray[:,0] >= t - actual_window_size//2) &
 61 |                 #                                                     (cell_tarray[:,0] < t + actual_window_size//2))])
 62 |                 #     size_of_window = actual_window_size
 63 | 
 64 | 
 65 |                 #     # Check if the t_window_arr is not empty
 66 |                 #     if t_window_arr.size > 0 and t_window_arr.shape[0] == actual_window_size:
 67 |                 #         # Access the first row of the window
 68 |                 #         init_frame_arr = t_window_arr[0,:]
 69 | 
 70 |                 #         # ... [rest of your calculations]
 71 |                 #     else:
 72 |                 #         # Handle the case where t_window_arr is empty
 73 |                 #         # For example, you can continue to the next iteration of the loop
 74 |                 #         continue
 75 | 
 76 | 
 77 | 
 78 |                 # else:
 79 |                 #     t_window_arr = np.squeeze(cell_tarray[np.where((cell_tarray[:,0] >= t - t_window/2) &
 80 |                 #                                                     (cell_tarray[:,0] < t + t_window/2))])
 81 |                 #     size_of_window = t_window
 82 |                 t_window_arr = np.squeeze(cell_tarray[np.where((cell_tarray[:,0] >= t - t_window/2) &
 83 |                                                                 (cell_tarray[:,0] < t + t_window/2))])
 84 |                 # size_of_window = t_window # Redundant, equivalent to t_window
 85 | 
 86 |                 #####
 87 |                 init_frame_arr = t_window_arr[0,:] # MOVED THIS INTO LOOP Use the first row of the window
 88 | 
 89 | #                 segment_length = np.nan # default value
 90 | 
 91 | 
 92 |                 # Only process calculations for which we have the entire window
 93 |                 if(t_window_arr.shape[0] == t_window):
 94 | 
 95 |                     # Extract the critical coordinates for making mnigration calculations
 96 |                     x0, y0 = init_frame_arr[1:3]
 97 |                     xi, yi = prev_frame_arr[1:3]
 98 |                     xf, yf = this_frame_arr[1:3]
 99 | 
100 |                     # Extract the xy-track across the window
101 |                     window_traj = t_window_arr[:,1:3]
102 | 
103 |                     # Use the index of the row of the subdf to insert value into original df
104 |                     ind = this_frame_arr[3]
105 | 
106 |                     # Decide which one to keep
107 |                     segment_length =  np.sqrt((xf-xi)**2 + (yf-yi)**2)
108 | #                     dist =  np.sqrt((xf-xi)**2 + (yf-yi)**2) # Redundant, equivalent to segment_length
109 | 
110 |                     '''Decide which one to keep'''
111 |                     euc_dist = np.sqrt((xf-x0)**2 + (yf-y0)**2)
112 | #                     net_dist = np.sqrt((xf-x0)**2 + (yf-y0)**2) # Redundant, equivalent to euc_dist
113 | 
114 |                     speed = segment_length / SAMPLING_INTERVAL # Units will be in microns per unit of time of T_INC
115 | 
116 | 
117 |                     # Efficient cumulative euclidean distance calculation:
118 |                     diff = np.diff(window_traj, axis=0, prepend=window_traj[-1].reshape((1, -1)))
119 |                     ss = np.power(diff, 2).sum(axis=1)
120 |                     cumul_euc_dist = np.sqrt(ss).sum()
121 | #
122 |                     # Calculate the cumulative path length across the window
123 |                     '''
124 |                     Would be nice to replace with a more efficient implementation
125 |                     as for cumulative euclidean above
126 |                     '''
127 | 
128 | 
129 |                     # Calculations to be made across the window
130 | 
131 |                     cumulative_dist_sqrd  = 0 # reset for each window
132 |                     dist_list = []
133 |                     turn_list = []
134 | 
135 |                     for n in range(1,len(window_traj)):
136 | 
137 |                         x_, y_ = window_traj[n-1,:]
138 |                         x__, y__ =  window_traj[n,:]
139 |                         dist = np.sqrt((x__-x_)**2 + (y__-y_)**2)
140 |                         dist_list.append(dist)
141 | 
142 |                         # Global turn (relative to previous frame)
143 |                         glob_turn = np.arctan((y__ - y_) / (x__ - x_)) # Equivalent to turn_angle_radians
144 |                         turn_list.append(glob_turn)
145 | 
146 | 
147 |                     if INPUT_FMT == 'trackmate':
148 |                         actual_window_size = len(window_traj)
149 |                         assert len(dist_list) == actual_window_size - 1, 'length of computed distances does not match actual window size'
150 |                     else:
151 | 
152 |                         assert len(dist_list) == t_window-1, 'length of computed distances doesnt match time window'
153 | 
154 |                     # Summary measurements across the time window
155 |                     cumulative_length = np.sum(dist_list)
156 |                     max_dist = np.max(dist_list)
157 | 
158 |                     # Mean-squared displacement (MSD)
159 |                     msd = np.sum(np.power(dist_list,2)) / t_window
160 | 
161 | 
162 |                     cumulative_dist_sqrd = cumulative_dist_sqrd + segment_length**2
163 | 
164 |                     # Meandering index
165 | #                     meandering_ind = net_dist / total_dist
166 |                     meandering_ind = euc_dist / cumulative_length
167 |                     # Outreach Ratio
168 | #                     outreach_ratio = max_dist / total_dist
169 |                     outreach_ratio = max_dist / cumulative_length
170 | 
171 |                     # Arrest coefficient - proportion of track cell is immobile (speed < x um)
172 | 
173 |                     arrest_coefficient = sum(dist < ARREST_THRESHOLD for dist in dist_list) / len(dist_list)
174 | 
175 | 
176 |                     #
177 |                     # Direction calculations
178 |                     #
179 | 
180 |                     # Global turn for this frame
181 |                     glob_turn = np.arctan((yf - y0) / (xf - x0))# change from yi and xi
182 |                     glob_turn_deg = np.degrees(glob_turn) #
183 |                     dir_autocorr = np.cos(turn_list[int(t_window/2)-1] -
184 |                                           turn_list[int(t_window/2)-2])
185 | 
186 |                     '''
187 |                     The directional autocorrelation is usually calculated between this and the previous frame
188 |                     It would be more interesting as compared to the trajectory in the time window./
189 |                     '''
190 | 
191 |                     # Orientation
192 |                     axis_angle = np.arctan(yf / xf) # Temp
193 |                     orientation = np.cos(2 * np.radians(axis_angle))
194 | 
195 |                     # Directedness
196 |                     directedness = (xf - x0) / euc_dist
197 | 
198 |                     # Turned angle (Between two frames)
199 |                     turn_angle_radians = np.arctan((yf - yi) / (xf - xi))
200 |                     turn_angle = np.degrees(turn_angle_radians)
201 | 
202 |                     # Endpoint directionality ratio
203 |                     endpoint_dir_ratio = euc_dist / cumulative_length
204 | 
205 | 
206 |                     # Combine current calculations into a list for the current timepoint
207 |                     mig_calcs = [ind,
208 |                                  euc_dist,
209 |                                  segment_length,
210 |                                  cumulative_length,
211 |                                  speed,
212 |                                  orientation,
213 |                                  directedness,
214 |                                  turn_angle,
215 |                                  endpoint_dir_ratio,
216 |                                  # New ones added:
217 |                                  dir_autocorr,
218 |                                  outreach_ratio,
219 |                                  msd,
220 |                                  max_dist,
221 |                                  glob_turn_deg,
222 |                                  arrest_coefficient]
223 | 
224 |                     # Add the current timepoint calculations to the cell-sepecific list of calculations
225 |                     cell_calcs.append(mig_calcs)
226 | 
227 | 
228 |     return cell_calcs
229 | 
230 | 
231 | def migration_calcs(df_in):#, calibrate=CALIBRATE_MIG):
232 | 
233 |     '''
234 |     Re-implementation of the previous Usiigaci function to calculate cell
235 |     migration measurements, for the dataframe instead of a numpy array.
236 | 
237 | 
238 |     Function works in two steps:
239 |         1. Calculate any frame-independent measures, i.e. that don't require
240 |             comparing to a previous frame. These are applied to the entire sub_df
241 |             associated with a given cell. (Orientation)
242 |         2. Calculate frame-dependent measures, where the difference of a measurement
243 |             is made with a previous frame. These must be done on a further segmented
244 |             dataframe.
245 | 
246 |     Read from df_in, make changes to df_out.
247 | 
248 |     '''
249 |     # calibrate = CALIBRATE_MIG # Previously an input argument, placed in function so cannot be changed.
250 | 
251 |     df_out = df_in.copy() # Make a copy so as not to modify the original input
252 |     df_out.reset_index(inplace=True, drop=True)
253 |     # df_out.drop(columns=['index'],inplace=True) # Dropped already in reset_index
254 |     assert len(df_out.index.unique()) == len(df_out.index), 'Dataframe indexes not unique'
255 | 
256 |     # Determine if dataframe contains a single replicate or multiple
257 |     # by seeing if the column Replicate_ID exists.
258 |     if 'Replicate_ID' in df_in.columns.values:
259 |         print('Processing migration calculations of pooled data')
260 | 
261 |     else:
262 | 
263 |         # Add Replicate_ID with arbitrary values to the dataframe
264 |         print('Processing single experiment, adding arbitrary Replicate_ID = -1')
265 |         df_out['Replicate_ID'] = -1
266 |         df_out['Condition'] = 'unknown'
267 | 
268 | 
269 |     calcs_list = [] # Initialize for the whole dataframe
270 | 
271 |     conditions = df_in['Condition'].unique()
272 | 
273 | 
274 |     for cond in conditions:
275 | 
276 |         # cond_df = df_in[df_in['Condition'] == cond]
277 |         cond_df = df_out[df_out['Condition'] == cond]
278 | 
279 |         # If a combined dataframe is provided, it will have duplicate particle(cell)
280 |         # numbers, therefore we must treat them separately
281 |         
282 |         exp_reps = cond_df['Replicate_ID'].unique()
283 | 
284 |         '''
285 |         NOTE:
286 |             Important to use Replicate_ID (string of experiment name) instead of
287 |             Rep_label (integer), as Rep_label is only unique per condition.
288 |         '''
289 |         print('Processing migration_calcs() for condition: ',cond)
290 | 
291 |         for exp_rep in exp_reps:
292 | 
293 |             print('Processing migration_calcs() for experiment: ',exp_rep)
294 | 
295 |             # Get subset of dataframe corresponding to this replicate
296 |             exp_subdf = cond_df[cond_df['Replicate_ID'] == exp_rep]
297 |             assert len(exp_subdf.index)==len(exp_subdf.index.unique()), 'exp_subdf indexes not unique'
298 | 
299 |             # Get the number of frames and cells in this selection
300 |             n_frames = int(np.max(exp_subdf['frame']))
301 |             # n_cells = int(np.max(exp_subdf['particle'])) # This
302 |             n_cells = len(exp_subdf['particle'].unique())
303 |             # for n in tqdm(range(n_cells)):
304 | 
305 |             # print('n_frames: ',n_frames )
306 |             # print('n_cells: ',n_cells )
307 |             if INPUT_FMT != 'trackmate':
308 |                 thing_to_iterate = 'particle'
309 |             elif INPUT_FMT == 'trackmate':
310 |                 thing_to_iterate = 'uniq_id'
311 | 
312 |             for n in tqdm(exp_subdf[thing_to_iterate].unique()): #put in thing_to_iterate
313 |                 # For each cell, get another subset of the dataframe
314 |                 cell_subdf = exp_subdf[exp_subdf[thing_to_iterate] == n] # was 'particle', now thing_to_iterate
315 |                 assert len(cell_subdf.index)==len(cell_subdf.index.unique()), 'exp_subdf indexes not unique'
316 | 
317 |                 tarray = cell_subdf[['frame', 'x_um', 'y_um']].to_numpy()#cell_subdf['frame', 'x', 'y']
318 |                 inds = cell_subdf.index.values
319 | 
320 |                 assert tarray.shape[0] == len(inds), 'indexes doesnt match tarray shape'
321 |                 assert len(inds) == len(np.unique(inds)), 'indexes not unique'
322 | 
323 |                 tarray = np.c_[tarray,inds] # Append index as 4th column to the array
324 |                 assert tarray.shape[1] == 4, ''
325 |                 mig_calcs = cell_calcs(tarray)#, calibrate)
326 | 
327 |                 if len(mig_calcs) > 0:
328 |                     calcs_list.append(mig_calcs)
329 | 
330 |     calcs_array = np.vstack(calcs_list) # Arrat from the list
331 | 
332 |     # Insert back into dataframe
333 |     mig_calcs_df = pd.DataFrame(data=calcs_array[:,1:],    # values
334 |                                 index=calcs_array[:,0],    # 1st column as index
335 |                                 columns=['euclidean_dist',
336 |                                      'segment_length',
337 |                                      'cumulative_length',
338 |                                      'speed',
339 |                                      'orientedness',
340 |                                      'directedness',
341 |                                      'turn_angle',
342 |                                      'endpoint_dir_ratio',
343 |                                      'dir_autocorr',
344 |                                      'outreach_ratio',
345 |                                      'MSD',
346 |                                      'max_dist',
347 |                                      'glob_turn_deg',
348 |                                      'arrest_coefficient'])
349 | 
350 |                                 # The old ones from the previous version of cell_calcs, kept here just in case.
351 |                                 # columns=['euclidean_dist','segment_length','cumulative_length','speed',
352 |                                 #         'orientedness', 'directedness', 'turn_angle', 'endpoint_dir_ratio'])#, 'dir_autocorr'])
353 | 
354 |     assert len(mig_calcs_df.index.unique()) == len(np.unique(calcs_array[:,0])), 'Created dataframe indexes don match values from calcs_array'
355 | 
356 |     df_out = df_out.join(mig_calcs_df) # Add migration calcs to dataframr
357 | 
358 |     return df_out
359 | 


--------------------------------------------------------------------------------
/cellPLATO/cellPLATO/data_processing/statistics.py:
--------------------------------------------------------------------------------
  1 | #statistics.py
  2 | 
  3 | from initialization.initialization import *
  4 | from initialization.config import *
  5 | 
  6 | import os
  7 | import numpy as np
  8 | import pandas as pd
  9 | 
 10 | import scipy.stats as st
 11 | import scipy.stats as stats
 12 | 
 13 | 
 14 | def average_per_condition(df, avg_per_rep=False):
 15 | 
 16 |     '''
 17 | 
 18 |     Function to calculate average value for each metric in a dataframe, taking a time-averagede dataframe as input
 19 | 
 20 |     Input:
 21 |         df: time-averaged DataFrame [N * X]
 22 | 
 23 |     Returns:
 24 |         avg_df: DataFrame [N]
 25 |     '''
 26 | 
 27 | 
 28 |     assert df['frame'].unique()[0] == 'timeaverage', 'This function is intended for a time-averaged dataset.'
 29 | 
 30 |     avg_df = pd.DataFrame()
 31 |     std_df = pd.DataFrame()
 32 |     n_df = pd.DataFrame()
 33 |     cond_list = df['Condition'].unique()
 34 | 
 35 |     # Find the average value for each of the numerical columns
 36 | 
 37 |     for cond in cond_list:
 38 | 
 39 |         this_cond_df = df[df['Condition'] == cond]
 40 |         cond_avg_df = this_cond_df.mean()#skipna=True)
 41 |         cond_std_df = this_cond_df.std()#skipna=True)
 42 |         cond_n_df = this_cond_df.count()#skipna=True)
 43 | 
 44 |         # Additional nested level of processing if we want to calculate the average per replicate.
 45 |         if(avg_per_rep):
 46 | 
 47 |             rep_list = this_cond_df['Replicate_ID'].unique()
 48 | 
 49 |             for this_rep in rep_list:
 50 | 
 51 | 
 52 |                 this_rep_df = this_cond_df[this_cond_df['Replicate_ID'] == this_rep]
 53 |                 rep_avg_df = this_rep_df.mean()#skipna=True)
 54 |                 rep_std_df = this_rep_df.std()
 55 |                 rep_n_df = this_rep_df.count()
 56 | 
 57 |                  # Add back non-numeric data
 58 |                 dropped_cols = list(set(this_rep_df.columns) - set(rep_avg_df.index))
 59 | 
 60 |                 for col in dropped_cols:
 61 | 
 62 |                     assert len(this_rep_df[col].unique()) == 1, 'Invalid assumption: uniqueness of non-numerical column values'
 63 |                     rep_avg_df.loc[col] = this_rep_df[col].values[0] # Get the non-numerical value from dataframe (assuming all equivalent)
 64 |                     rep_std_df.loc[col] = this_rep_df[col].values[0] # Get the non-numerical value from dataframe (assuming all equivalent)
 65 |                     rep_n_df.loc[col] = this_rep_df[col].values[0] # Get the non-numerical value from dataframe (assuming all equivalent)
 66 | 
 67 | 
 68 |                 avg_df = avg_df.append(rep_avg_df,ignore_index=True)
 69 |                 std_df = std_df.append(rep_std_df,ignore_index=True)
 70 |                 n_df = n_df.append(rep_n_df,ignore_index=True)
 71 | 
 72 | 
 73 |         else:
 74 | 
 75 | 
 76 |              # Add back non-numeric data
 77 |             dropped_cols = list(set(this_cond_df.columns) - set(cond_avg_df.index))
 78 | 
 79 |             for col in dropped_cols:
 80 | 
 81 |                 # Since we are averaging without considering replicates, we expect the list of Replicates_IDs to not be unique.
 82 |                 if col != 'Replicate_ID' and col != 'Replicate_shortlabel':
 83 |                     assert len(this_cond_df[col].unique()) == 1, 'Invalid assumption: uniqueness of non-numerical column values'
 84 |                     cond_avg_df.loc[col] = this_cond_df[col].values[0] # Get the non-numerical value from dataframe (assuming all equivalent)
 85 |                     cond_std_df.loc[col] = this_cond_df[col].values[0]
 86 |                     cond_n_df.loc[col] = this_cond_df[col].values[0]
 87 |                 else:
 88 |                     cond_avg_df.loc[col] = 'NA' # Get the non-numerical value from dataframe (assuming all equivalent)
 89 |                     cond_std_df.loc[col] = 'NA'
 90 |                     cond_n_df.loc[col] = 'NA'
 91 | 
 92 |             avg_df = avg_df.append(cond_avg_df,ignore_index=True)
 93 |             std_df = std_df.append(cond_std_df,ignore_index=True)
 94 |             n_df = n_df.append(cond_n_df,ignore_index=True)
 95 | 
 96 | 
 97 |     avg_std_n = (avg_df, std_df, n_df)
 98 | 
 99 |     return avg_std_n
100 | 
101 | 
102 | def generalized_stats(set1, set2, test=STAT_TEST):
103 | 
104 |     '''
105 |     Function should work for any test between two datasets, so long as it returns two arguments
106 |     the second of which is the P value.
107 | 
108 |     '''
109 | 
110 |     t, P = eval(test+'(set1, set2)')
111 | #     print(t,P)
112 | 
113 |     return P
114 | 
115 | def stats_table(df, factor, grouping='Condition', test=STAT_TEST):
116 | 
117 |     '''
118 |     Create a matrix of P-values for an exhaustive comparison of groupings.
119 | 
120 |     Inputs:
121 |         df: pd.DataFrame
122 |         factor: string, column in df.
123 |         grouping: default: Condition, alternatively used with label
124 |         test: Statistical test to use. Defaut STAT_TEST
125 | 
126 |     Returns:
127 |         stat_table: pd.DataFrame
128 |     '''
129 | 
130 |     print('Returning stats_table using test: ', test, ' for factor: ', factor)
131 |     print('Note: for exploratory purposes only, no multiple comparison correction is being applied.')
132 | 
133 |     # Create a numpy array to hold the values, fill with NaNs
134 |     n_cond = len((df['Condition'].unique()))
135 |     stat_mat = np.empty([n_cond, n_cond])
136 |     stat_mat[:] = np.NaN
137 | 
138 |     # Fill the table with the statistic of choice.
139 |     for i, cond_i in enumerate(df['Condition'].unique()):
140 |         for j, cond_j in enumerate(df['Condition'].unique()):
141 | 
142 |             if cond_i == cond_j:
143 |                 stat_mat[i,j] = np.NaN
144 |             else:
145 | 
146 |                 set1 = df[factor][df[grouping] == cond_i]
147 |                 set2 = df[factor][df[grouping] == cond_j]
148 | 
149 |                 P = generalized_stats(set1, set2, test)
150 | 
151 |                 stat_mat[i,j] = P
152 | 
153 |     # Turn the filled numpy array into a dataframe
154 |     stat_table = pd.DataFrame(data=stat_mat,
155 |                              index=df['Condition'].unique(),
156 |                              columns=df['Condition'].unique())
157 | 
158 |     stat_table.to_csv(DATA_OUTPUT+factor+'_P_table.csv')
159 | 
160 |     return stat_table
161 | 
162 | 
163 | # Bootstrapping function
164 | def bootstrap_sample(df, n_samples=1000):
165 | 
166 |     measurements = df.values
167 |     medians = []
168 | 
169 |     for i in range(n_samples):
170 | 
171 |         samples = np.random.choice(measurements, size = len(measurements))
172 |         medians.append(np.median(samples))
173 | 
174 |     medians = np.asarray(medians)
175 | 
176 |     return medians
177 | 
178 | 
179 | 
180 | 
181 | def bootstrap_sample_df(df,factor,ctl_label):
182 | 
183 |     '''
184 |     Generate bootstrapped sample and return as dataframe, to be plotted with seaborn
185 |     '''
186 | 
187 |     # Calculate the differences for each category and save them into dataframes for visualizing in Seaborn or Matplotlib
188 |     bootstrap_diff_df = pd.DataFrame()
189 | 
190 |     # Get the control bootstrap
191 |     ctl_bootstrap = bootstrap_sample(df[factor][df['Condition'] == ctl_label])
192 | 
193 |     for i in range(0,len(pd.unique(df['Condition']))):
194 | 
195 |         # Use the ctl_bootstrap if we're now on that condition, otherwise will create a new bootstrap sample that won't be the same.
196 |         if(pd.unique(df['Condition'])[i] == ctl_label):
197 |             bootstrap = ctl_bootstrap
198 |         else:
199 |             bootstrap = bootstrap_sample(df[factor][df['Condition'] == pd.unique(df['Condition'])[i]])
200 | 
201 |         difference = bootstrap - ctl_bootstrap
202 |         this_cond =  pd.unique(df['Condition'])[i]
203 |         this_diff_df = pd.DataFrame(data={'Difference':difference, 'Condition':this_cond})
204 |         bootstrap_diff_df = bootstrap_diff_df.append(this_diff_df)
205 | 
206 |         
207 |     # Calculate and print mean effect size for each condition
208 |     mean_effect_sizes = bootstrap_diff_df.groupby('Condition')['Difference'].mean()
209 |     print("Mean Effect Size for Each Condition Compared to Control:")
210 |     for condition, mean_effect_size in mean_effect_sizes.items():
211 |         print(f"The effect size for {condition} compared with control is: {mean_effect_size}")
212 | 
213 |     return bootstrap_diff_df
214 | 
215 | # Function to calculate median and mean for each condition per factor and save results to CSV
216 | def calculate_median_mean_and_save(df, factors):
217 |     for factor_name in factors:
218 |         result_df = df.groupby('Condition_shortlabel')[factor_name].agg(['median', 'mean']).reset_index()
219 |         output_file = f'{DATA_OUTPUT}/{factor_name}_median_mean_results.csv'
220 |         result_df.to_csv(output_file, index=False)
221 | 
222 | # Function to perform statistical testing between two conditions for each factor and save results to CSV
223 | def perform_statistical_testing_and_save(df, factors): # , output_folder,
224 |     for factor_name in factors:
225 |         conditions = df['Condition_shortlabel'].unique()
226 |         condition1, condition2 = conditions[:2]  # Assuming only two conditions for simplicity
227 |         
228 |         data1 = df[df['Condition_shortlabel'] == condition1][factor_name]
229 |         data2 = df[df['Condition_shortlabel'] == condition2][factor_name]
230 |         
231 |         # Perform Mann-Whitney U test for non-normal data
232 |         stat_mw, p_value_mw = stats.mannwhitneyu(data1, data2)
233 |         
234 |         # Perform t-test for normal data (assuming normality for simplicity)
235 |         stat_t, p_value_t = stats.ttest_ind(data1, data2)
236 |         
237 |         result_df = pd.DataFrame({
238 |             'Factor': [factor_name],
239 |             'Condition1': [condition1],
240 |             'Condition2': [condition2],
241 |             'Mann-Whitney U Statistic': [stat_mw],
242 |             'Mann-Whitney U P-Value': [p_value_mw],
243 |             't-test Statistic': [stat_t],
244 |             't-test P-Value': [p_value_t]
245 |         })
246 |         
247 |         output_file_mw = f'{DATA_OUTPUT}/{factor_name}_mannwhitneyu_results.csv'
248 |         output_file_t = f'{DATA_OUTPUT}/{factor_name}_ttest_results.csv'
249 |         # DATA_OUTPUT
250 |         result_df.to_csv(output_file_mw, index=False)
251 |         result_df.to_csv(output_file_t, index=False)
252 | 


--------------------------------------------------------------------------------
/cellPLATO/cellPLATO/data_processing/time_calculations.py:
--------------------------------------------------------------------------------
  1 | # time_calculations.py
  2 | 
  3 | from initialization.initialization import *
  4 | from initialization.config import *
  5 | 
  6 | from data_processing.clustering import cluster_purity
  7 | 
  8 | import os
  9 | import numpy as np
 10 | import pandas as pd
 11 | 
 12 | def cluster_composition_timecourse(df):
 13 | 
 14 |     df_list = []
 15 | 
 16 |     for frame in df['frame'].unique():
 17 | 
 18 |         # Get dataframe at this timepoint
 19 |         tpt_sub_df = df[df['frame'] == frame]
 20 | 
 21 |         clust_sum_df = cluster_purity(tpt_sub_df)
 22 |         clust_sum_df['frame'] = frame
 23 | 
 24 |         df_list.append(clust_sum_df)
 25 | 
 26 |     df_out = pd.concat(df_list)
 27 |     df_out['Time (min)'] = df_out['frame'] * SAMPLING_INTERVAL
 28 |     df_out.reset_index(inplace=True)
 29 | 
 30 |     return df_out
 31 | 
 32 | # def time_average(df):
 33 | 
 34 | #     '''
 35 | #     Needs a more descriptive name?
 36 | #         average_across_time()?
 37 | 
 38 | #     Function to generate a time-averaged dataframe,
 39 | #     where the average value for each factor across all timepoints
 40 | #     is calculated for each cell.
 41 | 
 42 | #     Input:
 43 | #         df: DataFrame [N * T * X]
 44 | 
 45 | 
 46 | #     Returns:
 47 | #         avg_df: DataFrame [N * X]
 48 | #     '''
 49 | 
 50 | #     time_avg_df = pd.DataFrame()
 51 | #     unique_id = 0 # Create a unique cell id
 52 | #     rep_list = df['Replicate_ID'].unique()
 53 | 
 54 | 
 55 | #     for this_rep in rep_list:
 56 | 
 57 | #         rep_df = df[df['Replicate_ID']==this_rep]
 58 | #         print(f'Replicate: {this_rep}')
 59 | #         cell_ids = rep_df['particle'].unique() # Particle ids only unique for replicate, not between.
 60 | #         print(f'cell_ids: {cell_ids} ')
 61 | 
 62 | #         # For each cell, calculate the average value and add to new DataFrame
 63 | #         for cid in cell_ids:
 64 | 
 65 | #             cell_df = rep_df[rep_df['particle'] == cid]
 66 | 
 67 | #             # A test to ensure there is only one replicate label included.
 68 | #             assert len(cell_df['Rep_label'].unique()) == 1, 'check reps'
 69 | 
 70 | #             avg_df = cell_df.mean() # Returns a series that is the mean value for each numerical column. Non-numerical columns are dropped.
 71 | 
 72 | #             # Add back non-numeric data
 73 | #             dropped_cols = list(set(cell_df.columns) - set(avg_df.index))
 74 | 
 75 | #             for col in dropped_cols:
 76 | 
 77 | #                 assert len(cell_df[col].unique()) == 1, 'Invalid assumption: uniqueness of non-numerical column values'
 78 | #                 avg_df.loc[col] = cell_df[col].values[0] # Get the non-numerical value from dataframe (assuming all equivalent)
 79 | 
 80 | #             avg_df.loc['unique_id'] = unique_id # Add Unique cell ID for the analysis
 81 | #             time_avg_df = time_avg_df.append(avg_df,ignore_index=True)
 82 | #             unique_id += 1
 83 | 
 84 | #     time_avg_df['frame'] = 'timeaverage' # Replace the meaningless average frame values with a string desciption
 85 | 
 86 | #     return time_avg_df
 87 | 
 88 | 
 89 | def time_average(df):
 90 |     """
 91 |     Function to generate a time-averaged dataframe,
 92 |     where the average value for each factor across all timepoints
 93 |     is calculated for each unique `uniq_id`.
 94 | 
 95 |     Input:
 96 |         df: DataFrame with a `uniq_id` column
 97 | 
 98 |     Returns:
 99 |         time_avg_df: DataFrame with averaged values for each `uniq_id`
100 |     """
101 |     
102 |     time_avg_df = pd.DataFrame()
103 |     unique_ids = df['uniq_id'].unique()
104 | 
105 |     for uid in unique_ids:
106 |         cell_df = df[df['uniq_id'] == uid]
107 |         
108 |         # Calculate the mean value for each numerical column
109 |         avg_df = cell_df.mean()  # Returns a series
110 |         
111 |         # Add back non-numeric data (assuming they are consistent across the unique_id)
112 |         non_numeric_cols = list(set(cell_df.columns) - set(avg_df.index))
113 |         for col in non_numeric_cols:
114 |             # Check if the column is indeed non-numeric
115 |             if cell_df[col].dtype == 'object' or cell_df[col].dtype == 'category':
116 |                 # Make sure there's only one unique value for this column in the filtered dataframe
117 |                 assert len(cell_df[col].unique()) == 1, f"Non-unique values found in column {col} for uniq_id {uid}"
118 |                 avg_df.loc[col] = cell_df[col].values[0]
119 | 
120 |         avg_df.loc['uniq_id'] = uid  # Add the unique_id back to the dataframe
121 |         time_avg_df = time_avg_df.append(avg_df, ignore_index=True)
122 | 
123 |     time_avg_df['frame'] = 'timeaverage'  # Replace the meaningless average frame values with a string description
124 | 
125 |     return time_avg_df
126 | 
127 | 
128 | def time_average_trackmate(df):
129 | 
130 |     '''
131 |     Needs a more descriptive name?
132 |         average_across_time()?
133 | 
134 |     Function to generate a time-averaged dataframe,
135 |     where the average value for each factor across all timepoints
136 |     is calculated for each cell.
137 | 
138 |     Input:
139 |         df: DataFrame [N * T * X]
140 | 
141 | 
142 |     Returns:
143 |         avg_df: DataFrame [N * X]
144 |     '''
145 | 
146 |     time_avg_df = pd.DataFrame()
147 |     unique_id = 0 # Create a unique cell id
148 | 
149 | 
150 |     cell_ids = df['uniq_id'].unique() # Just use unique ids
151 | 
152 |     # For each cell, calculate the average value and add to new DataFrame
153 |     for cid in cell_ids:
154 | 
155 |         cell_df = df[df['uniq_id'] == cid]
156 | 
157 |         # A test to ensure there is only one replicate label included.
158 |         assert len(cell_df['Rep_label'].unique()) == 1, 'check reps'
159 | 
160 |         avg_df = cell_df.mean() # Returns a series that is the mean value for each numerical column. Non-numerical columns are dropped.
161 | 
162 |         # Add back non-numeric data
163 |         dropped_cols = list(set(cell_df.columns) - set(avg_df.index))
164 | 
165 |         for col in dropped_cols:
166 | 
167 |             # assert len(cell_df[col].unique()) == 1, 'Invalid assumption: uniqueness of non-numerical column values'
168 |             # print the number of columns with the same column name in cell_df
169 |             print(f'The column named {col} has this number of occurrences in cell_df: {len(cell_df[col].unique())}')
170 | 
171 |             avg_df.loc[col] = cell_df[col].values[0] # Get the non-numerical value from dataframe (assuming all equivalent)
172 | 
173 |         avg_df.loc['unique_id'] = unique_id # Add Unique cell ID for the analysis
174 |         time_avg_df = time_avg_df.append(avg_df,ignore_index=True)
175 |         unique_id += 1
176 | 
177 |     time_avg_df['frame'] = 'timeaverage' # Replace the meaningless average frame values with a string desciption
178 | 
179 |     return time_avg_df
180 | 
181 | 
182 | 
183 | def average_per_timepoint(df, t_window=None):
184 | 
185 |     '''
186 |     For each timepoint, calculate the average across cells
187 | 
188 |     Note: this works for single timepoints or time windows, but
189 |         doing these calculations at the level of the dataframe
190 |         wont easily permit stdev and sem calculations
191 | 
192 |     Input:
193 |         df: DataFrame [N * T * X]
194 |         #poolreps: Boolean, default=False
195 | 
196 |     Returns:
197 |         tpt_avg_df: DataFrame [T * X]
198 | 
199 |     '''
200 | 
201 |     tptavg_df = pd.DataFrame()
202 | 
203 |     frame_list = df['frame'].unique()
204 |     cond_list = df['Condition'].unique()
205 |     rep_list = df['Replicate_ID'].unique()
206 | 
207 |     '''
208 |     Do we instead want to use FRAME_END?
209 |     More user-controlled vs data-driven:
210 |     frame_list = range(FRAME_END)
211 |     '''
212 | 
213 |     for frame in frame_list:
214 | 
215 |         if t_window is not None:
216 |             # get a subset of the dataframe across the range of frames
217 |             frame_df = df[(df['frame']>=frame - t_window/2) &
218 |                           (df['frame']<frame + t_window/2)]
219 | 
220 |         else:
221 |             # Find the dataframe for a single frame
222 |             frame_df = df[df['frame']==frame]
223 | 
224 |         # Separate by condition and **optionally** replicate
225 |         for cond in cond_list:
226 | 
227 |             cond_df = frame_df[frame_df['Condition']==cond]
228 | 
229 |             for rep in rep_list:
230 | 
231 |                 rep_df = cond_df[cond_df['Replicate_ID']==rep]
232 | 
233 |                 if(len(rep_df) > MIN_CELLS_PER_TPT):
234 | 
235 |                     avg_df = rep_df.mean() # Returns a series that is the mean value for each numerical column. Non-numerical columns are dropped.
236 | 
237 |                     # Add back non-numeric data
238 |                     dropped_cols = list(set(frame_df.columns) - set(avg_df.index))
239 | 
240 |                     for col in dropped_cols:
241 | 
242 |                         # Validate assumption that sub_df has only one rep/condition, then use this value in new frame
243 |                         assert len(rep_df[col].unique()) == 1, 'Invalid assumption: uniqueness of non-numerical column values'
244 |                         avg_df.loc[col] = rep_df[col].values[0] # Get the non-numerical value from dataframe (assuming all equivalent)
245 | 
246 |                     if t_window is None: # assertion only works when no window is used.
247 |                         assert avg_df.loc['frame'] == frame, 'Frame mismatch'
248 | 
249 |                     tptavg_df = tptavg_df.append(avg_df,ignore_index=True)
250 |                 else:
251 |                     if(DEBUG):
252 |                         print('Skipping: ',rep, ' N = ', len(rep_df))
253 | 
254 |     return tptavg_df
255 | 


--------------------------------------------------------------------------------
/cellPLATO/cellPLATO/initialization/__init__.py:
--------------------------------------------------------------------------------
1 | # from . import initialization
2 | # from . import config
3 | print('Finished running cellPLATO initialization and loaded config.')
4 | 


--------------------------------------------------------------------------------
/cellPLATO/cellPLATO/initialization/btrack_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "TrackerConfig":
 3 |     {
 4 |       "MotionModel":
 5 |         {
 6 |           "name": "cell_motion",
 7 |           "dt": 1.0,
 8 |           "measurements": 3,
 9 |           "states": 6,
10 |           "accuracy": 7.5,
11 |           "prob_not_assign": 0.001,
12 |           "max_lost": 5,
13 |           "A": {
14 |             "matrix": [1,0,0,1,0,0,
15 |                        0,1,0,0,1,0,
16 |                        0,0,1,0,0,1,
17 |                        0,0,0,1,0,0,
18 |                        0,0,0,0,1,0,
19 |                        0,0,0,0,0,1]
20 |           },
21 |           "H": {
22 |             "matrix": [1,0,0,0,0,0,
23 |                        0,1,0,0,0,0,
24 |                        0,0,1,0,0,0]
25 |           },
26 |           "P": {
27 |             "sigma": 150.0,
28 |             "matrix": [0.1,0,0,0,0,0,
29 |                        0,0.1,0,0,0,0,
30 |                        0,0,0.1,0,0,0,
31 |                        0,0,0,1,0,0,
32 |                        0,0,0,0,1,0,
33 |                        0,0,0,0,0,1]
34 |           },
35 |           "G": {
36 |             "sigma": 15.0,
37 |             "matrix": [0.5,0.5,0.5,1,1,1]
38 | 
39 |           },
40 |           "R": {
41 |             "sigma": 5.0,
42 |             "matrix": [1,0,0,
43 |                        0,1,0,
44 |                        0,0,1]
45 |           }
46 |         },
47 |       "ObjectModel":
48 |         {},
49 |       "HypothesisModel":
50 |         {
51 |           "name": "cell_hypothesis",
52 |           "hypotheses": ["P_FP", "P_init", "P_term", "P_link", "P_branch", "P_dead"],
53 |           "lambda_time": 5.0,
54 |           "lambda_dist": 3.0,
55 |           "lambda_link": 10.0,
56 |           "lambda_branch": 50.0,
57 |           "eta": 1e-10,
58 |           "theta_dist": 50.0,
59 |           "theta_time": 5.0,
60 |           "dist_thresh": 75,
61 |           "time_thresh": 2,
62 |           "apop_thresh": 5,
63 |           "segmentation_miss_rate": 0.1,
64 |           "apoptosis_rate": 0.001,
65 |           "relax": true
66 |         }
67 |     }
68 | }
69 | 


--------------------------------------------------------------------------------
/cellPLATO/cellPLATO/initialization/config.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Configuration file. 
  3 | Fill out this file then run the jupyter notebook to analyze your data
  4 | 
  5 | '''
  6 | 
  7 | '''
  8 | Experiment-specific constants to be filled by user
  9 | '''
 10 | 
 11 | DATA_PATH = 'D:/PATH/' # Input the path to the folder containing the data
 12 | OUTPUT_PATH = 'D:/PATH_OUTPUT/' # Input the path to the folder where the output will be saved
 13 | CTL_LABEL = 'CONTROL_CONDITION' # Input the name of the control condition here
 14 | 
 15 | # Input here the folder names of the conditions you want to include in the analysis
 16 | # Note: the order of the conditions here will be the order of the conditions in the plots
 17 | 
 18 | CONDITIONS_TO_INCLUDE = ['CONTROL_CONDITION',
 19 |                         'CONDITION_2',
 20 |                          'CONDITION_3',
 21 |                          'CONDITION_4'] 
 22 | 
 23 | CONDITION_SHORTLABELS = ['Ctrl','One','Two','Three',] # Short labels for the conditions, for plotting purposes
 24 | DATASET_SHORTNAME = 'EXAMPLE_DATASET_NAME' # give the data a nickname
 25 | 
 26 | INPUT_FMT = 'btrack' # 'usiigaci'#btrack
 27 | TRACK_FILENAME = '.h5'
 28 | 
 29 | MICRONS_PER_PIXEL = 0.537
 30 | # MICRONS_PER_PIXEL_LIST = [0.537,0.537,0.537, 0.537,] # For mixed spatial scaling
 31 | # MICRONS_PER_PIXEL = MICRONS_PER_PIXEL_LIST[0]
 32 | 
 33 | SAMPLING_INTERVAL = 40/60 # time between frames in minutes
 34 | # SAMPLING_INTERVAL_LIST= [40/60,40/60,40/60,40/60,] # For mixed temporal scaling
 35 | # SAMPLING_INTERVAL = SAMPLING_INTERVAL_LIST[0]
 36 | 
 37 | IMAGE_HEIGHT = 1024 # pixels
 38 | IMAGE_WIDTH = 1024 # pixels
 39 | Z_SCALE = 1.00
 40 | 
 41 | MigrationTimeWindow_minutes = 5 # Here, set the length of the time window in minutes
 42 | MIG_T_WIND = round(MigrationTimeWindow_minutes / SAMPLING_INTERVAL)
 43 | T_WINDOW_MULTIPLIER = 1.0 #  6.0 # For plasticity plots, to potentially increase the time window size for those calculations
 44 | 
 45 | CLUSTER_CMAP = 'tab20' # Define colormap used for clustering plots
 46 | CONDITION_CMAP = 'Dark2' #'Define colormap used for condition maps. Dark2 is good for 7 conditions, tab20 > 20 conditions.
 47 | # Note: use paired for groups of 2
 48 | 
 49 | ARREST_THRESHOLD = 3 * SAMPLING_INTERVAL # Here, user can define threshold in MICRONS PER MINUTE, because we multiply by the sampling interval to convert it to microns per frame.
 50 | RIP_R = 140 # Radius to search when calculating Ripleys L in pixels. 1.5 * the size of a cell = 12+6=18
 51 | 
 52 | DATA_FILTERS = {
 53 |   "area": (50, 10000), # Debris removal
 54 |   "ntpts": (8,1800) # Remove cells that are tracked for less than 8 frames
 55 | 
 56 | }
 57 | 
 58 | # Booleans to draw or not specific plots.
 59 | DRAW_SUPERPLOTS = True
 60 | DRAW_DIFFPLOTS = True
 61 | DRAW_MARGSCAT = True
 62 | DRAW_TIMEPLOTS = True
 63 | DRAW_BARPLOTS = True
 64 | DRAW_SUPERPLOTS_grays = True
 65 | DRAW_SNS_BARPLOTS = True
 66 | 
 67 | 
 68 | '''
 69 | Measurements to make
 70 | '''
 71 | 
 72 | # Cell migration factors calculated in migration_calcs()
 73 | 
 74 | MIG_FACTORS = ['euclidean_dist',     
 75 |                 'cumulative_length', 
 76 |                 'speed',
 77 |                 'orientedness', 
 78 |                 'directedness',
 79 |                 'turn_angle',
 80 |                 'endpoint_dir_ratio',
 81 |                 'dir_autocorr',
 82 |                 'outreach_ratio',
 83 |                 'MSD',                
 84 |                 'max_dist',           
 85 |                 'glob_turn_deg',
 86 |                 'arrest_coefficient']
 87 | 
 88 | # Region property factors to be extracted from the cell contours
 89 | # This list must match with props from regionprops
 90 | 
 91 | REGIONPROPS_LIST = ['area',
 92 |                     'bbox_area',
 93 |                     'eccentricity',
 94 |                     'equivalent_diameter',
 95 |                     'extent',
 96 |                     'filled_area',
 97 |                     'major_axis_length',
 98 |                     'minor_axis_length',
 99 |                     'orientation',
100 |                     'perimeter',
101 |                      'solidity']
102 | 
103 | SHAPE_FACTORS = ['area',
104 |                     'bbox_area',
105 |                     'eccentricity',
106 |                     'equivalent_diameter',
107 |                     'extent',
108 |                     'filled_area',
109 |                     'major_axis_length',
110 |                     'minor_axis_length',
111 |                     'orientation',
112 |                     'perimeter',
113 |                      'solidity']
114 | 
115 | ADDITIONAL_FACTORS = ['aspect', 'rip_p', 'rip_K', 'rip_L']
116 | 
117 | DR_FACTORS = REGIONPROPS_LIST + MIG_FACTORS + ADDITIONAL_FACTORS
118 | ALL_FACTORS = REGIONPROPS_LIST + MIG_FACTORS + ADDITIONAL_FACTORS
119 | 
120 | 
121 | NUM_FACTORS = DR_FACTORS + ['tSNE1', 'tSNE2', 'PC1', 'PC2']
122 | 
123 | '''
124 | Advanced parameters (can stay default)
125 | '''
126 | 
127 | MIXED_SCALING = False # Not used yet, for futureproofing
128 | SELF_STANDARDIZE = False #STANDARDIZES ACROSS factors within a cell df.
129 | AVERAGE_TIME_WINDOWS = False #This does two things. 1) provides a time window averaged value for every metric (_tmean). 
130 | # 2)  gives also a ratio of the time window averaged value to the first timepoint in the time window (_tmean_ratio). 
131 | 
132 | CALIBRATED_POS = False # Does the data need to be calibrated?
133 | OVERWRITE = True # Overwrite the pre-processed data.
134 | USE_INPUT_REGIONPROPS = True
135 | CALCULATE_REGIONPROPS = False
136 | USE_SHORTLABELS = True
137 | PERFORM_RIPLEYS = True
138 | ARCHIVE_CONFIG = True
139 | 
140 | '''
141 | Everything below does not need to be changed by the user
142 | '''
143 | 
144 | N_COMPONENTS = 3 #this is for UMAP
145 | UMAPS = ['UMAP1','UMAP2','UMAP3'] 
146 | FRAME_START = 0 # Start frame for analysis (deprecated)
147 | FRAME_END = 180 # End frame for analysis (deprecated)
148 | MIN_CELLS_PER_TPT = 1 # used in: average_per_timepoint()
149 | 
150 | CLUSTER_BY = 'umap' # temp
151 | PALETTE = 'colorblind'
152 | PX_COLORS = 'px.colors.qualitative.Safe' # Choose between discrete colors from https://plotly.com/python/discrete-color/
153 | 
154 | STATIC_PLOTS = True
155 | PLOTS_IN_BROWSER = False
156 | 
157 | ANIMATE_TRAJECTORIES = True
158 | DEBUG = False
159 | 
160 | # Booleans for Analysis components:
161 | '''(Only run pipelines if true)'''
162 | DIMENSION_REDUCTION = True
163 | PARAM_SWEEP = True
164 | CLUSTERING = True
165 | 
166 | CLUSTER_TSNE = True
167 | CLUSTER_PCA = True
168 | CLUSTER_XY = True
169 | 
170 | ###############################################
171 | # tSNE/UMAP parameters and embedding:
172 | ###############################################
173 | 
174 | SCALING_METHOD = 'choice' # minmax powertransformer log2minmax choice
175 | TSNE_PERP = 185#230 # Perplexity
176 | TSNE_R_S = 11 # Random seed
177 | USE_SAVED_EMBEDDING = False#True
178 | EMBEDDING_FILENAME = 'saved_embedding.npy'
179 | TRAINX_FILENAME = 'saved_x_train.npy'
180 | UMAP_NN = 10 # Nearest-neighbors
181 | UMAP_MIN_DIST = 0.2 #0.5
182 | MIN_SAMPLES = 10 # DBScan
183 | EPS = 0.06 # DBScan
184 | 
185 | ############################################### 
186 | # Factor wrangling - no need to change these
187 | ###############################################
188 | 
189 | # Factors to display on the animated plots
190 | MIG_DISPLAY_FACTORS=['speed', 'euclidean_dist', 'arrest_coefficient', 'turn_angle','directedness', 'dir_autocorr','orientedness']
191 | SHAPE_DISPLAY_FACTORS = ['area','aspect','orientation']
192 | 
193 | # Factor to standardize to themselves over time (to look at self-relative instead of absolute values.)
194 | FACTORS_TO_STANDARDIZE = ['area',
195 |                           'bbox_area',
196 |                           'equivalent_diameter',
197 |                           'filled_area',
198 |                           'major_axis_length',
199 |                           'minor_axis_length',
200 |                           'perimeter']
201 | 
202 | FACTORS_TO_CONVERT = ['area', 'bbox_area', 'equivalent_diameter', 'extent', 'filled_area',
203 |        'major_axis_length', 'minor_axis_length', 'perimeter']
204 | 
205 | ###############################################
206 | # Plotting parameters
207 | ###############################################
208 | 
209 | AXES_LIMITS = '2-sigma' #'min-max' #'2-sigma' # Currently only implemented in marginal_xy contour plots.
210 | STAT_TEST = 'st.ttest_ind'
211 | # Plot display Parameters
212 | PLOT_TEXT_SIZE = 30
213 | DIFF_PLOT_TYPE = 'violin' # 'swarm', 'violin', 'box'
214 | 
215 | # Pre-defined pairs of factors for generating comparison plots
216 | FACTOR_PAIRS = [['tSNE1', 'tSNE2'],
217 |                 ['area', 'speed'],
218 |                 ['directedness', 'speed'],
219 |                 ['orientedness', 'speed'],
220 |                 ['endpoint_dir_ratio', 'speed'],
221 |                 ['orientation', 'speed'],
222 |                 ['turn_angle', 'speed'], # These are identical
223 |                 ['major_axis_length', 'speed'],
224 |                 ['major_axis_length', 'minor_axis_length'],
225 |                 ['euclidean_dist','cumulative_length'],
226 |                 ['euclidean_dist','speed'],
227 |                 ['PC1', 'PC2']]
228 | 
229 | # No need to change these #
230 | 
231 | DIS_REGIONPROPS_LIST = ['area',
232 |             # 'bbox_area',
233 |             'eccentricity',
234 |             'equivalent_diameter',
235 |             # 'extent',
236 |             # 'filled_area',
237 |             'major_axis_length',
238 |             'minor_axis_length',
239 |             'orientation',
240 |             'perimeter',
241 |              'solidity']
242 | DIS_MIG_FACTORS = ['euclidean_dist',     # Valid?
243 |                 'cumulative_length', # Valid?
244 |                 'speed',
245 |                 # 'orientedness', # name changed from orientation
246 |                 # 'directedness',
247 |                 # 'turn_angle',
248 |                 'endpoint_dir_ratio',
249 |                 'dir_autocorr',
250 |                 'outreach_ratio',
251 |                 'MSD',                # Valid?
252 |                 # 'max_dist',           # Valid?
253 |                 'glob_turn_deg',
254 |                 'arrest_coefficient']
255 | 
256 | DIS_ADDITIONAL_FACTORS = ['aspect', 'rip_L']
257 | 
258 | T_WIND_DR_FACTORS = ['MSD',
259 | 
260 | #                      'MSD_ratio',
261 | #                      'MSD_tmean',
262 |                      'area',
263 | #                      'area_ratio', # Doesn't work in DR if using self-standardized because min (0) becomes inf.
264 |                      'area_tmean',
265 |                      'arrest_coefficient',
266 | #                      'arrest_coefficient_ratio',
267 |                      'arrest_coefficient_tmean',
268 |                      'aspect',
269 |                      'aspect_ratio',
270 |                      'aspect_tmean',
271 |                      'bbox_area',
272 | #                      'bbox_area_ratio',
273 |                      'bbox_area_tmean',
274 |                      'cumulative_length',
275 | #                      'cumulative_length_ratio',
276 | #                      'cumulative_length_tmean',
277 |                      'dir_autocorr',
278 |                      'dir_autocorr_ratio',
279 |                      'dir_autocorr_tmean',
280 |                      'directedness',
281 |                      'directedness_ratio',
282 |                      'directedness_tmean',
283 |                      'eccentricity',
284 |                      'eccentricity_ratio',
285 |                      'eccentricity_tmean',
286 |                      'endpoint_dir_ratio',
287 |                      'endpoint_dir_ratio_ratio',
288 |                      'endpoint_dir_ratio_tmean',
289 |                      'equivalent_diameter',
290 |                      'equivalent_diameter_ratio',
291 |                      'equivalent_diameter_tmean',
292 |                      'euclidean_dist',
293 |                      'euclidean_dist_ratio',
294 |                      'euclidean_dist_tmean',
295 |                      'extent',
296 |                      'extent_ratio',
297 |                      'extent_tmean',
298 |                      'filled_area',
299 | #                      'filled_area_ratio', # Doesn't work in DR if using self-standardized because min (0) becomes inf.
300 |                      'filled_area_tmean',
301 |                      'glob_turn_deg',
302 | #                      'glob_turn_deg_ratio',
303 | #                      'glob_turn_deg_tmean',
304 |                      'major_axis_length',
305 | #                      'major_axis_length_ratio',
306 | #                      'major_axis_length_tmean',
307 |                      'max_dist',
308 |                      'max_dist_ratio',
309 |                      'max_dist_tmean',
310 |                      'minor_axis_length',
311 | #                      'minor_axis_length_ratio',
312 | #                      'minor_axis_length_tmean',
313 |                      'orientation',
314 |                      'orientation_ratio',
315 |                      'orientation_tmean',
316 |                      'orientedness',
317 |                      'orientedness_ratio',
318 |                      'orientedness_tmean',
319 |                      'outreach_ratio',
320 |                      'outreach_ratio_ratio',
321 |                      'outreach_ratio_tmean',
322 |                      'perimeter',
323 |                      'perimeter_ratio',
324 |                      'perimeter_tmean',
325 |                      'rip_K',
326 | #                      'rip_K_ratio',
327 | #                      'rip_K_tmean',
328 |                      'rip_L',
329 | #                      'rip_L_ratio',
330 | #                      'rip_L_tmean',
331 |                      'rip_p',
332 | #                      'rip_p_ratio',
333 | #                      'rip_p_tmean',
334 |                      'solidity',
335 |                      'solidity_ratio',
336 |                      'solidity_tmean',
337 |                      'speed',
338 |                      'speed_ratio',
339 |                      'speed_tmean',
340 |                      'turn_angle',
341 |                      'turn_angle_ratio',
342 |                      'turn_angle_tmean']


--------------------------------------------------------------------------------
/cellPLATO/cellPLATO/initialization/initialization.py:
--------------------------------------------------------------------------------
  1 | #initialization.py
  2 | 
  3 | from initialization.config import *
  4 | 
  5 | import os
  6 | import shutil
  7 | import datetime
  8 | import warnings
  9 | warnings.filterwarnings("ignore")
 10 | 
 11 | TIMESTAMP = str(datetime.datetime.now()).replace(':', '-').replace('.', '-').replace(' ', '_')
 12 | 
 13 | # print('Dataset in current notebook: ',DATASET_SHORTNAME)
 14 | 
 15 | print('Initializing: ', DATASET_SHORTNAME)
 16 | print('Hypthesis testing using: ',STAT_TEST)
 17 | 
 18 | 
 19 | '''
 20 | Make the folders for exporting
 21 | '''
 22 | 
 23 | TEMP_OUTPUT = os.path.join(OUTPUT_PATH,DATASET_SHORTNAME,TIMESTAMP,'tmp/')
 24 | ANIM_OUTPUT = os.path.join(OUTPUT_PATH,DATASET_SHORTNAME,TIMESTAMP,'animations/')
 25 | 
 26 | SAVED_DATA_PATH = os.path.join(OUTPUT_PATH,DATASET_SHORTNAME,'saved_data/')
 27 | 
 28 | # Create timestamped folders to contain data and plot from this analysis
 29 | # Main level:
 30 | DATA_OUTPUT = os.path.join(OUTPUT_PATH,DATASET_SHORTNAME,TIMESTAMP,'data/')
 31 | PLOT_OUTPUT = os.path.join(OUTPUT_PATH,DATASET_SHORTNAME,TIMESTAMP,'plots/')
 32 | print('Plots will be exported to: ', PLOT_OUTPUT)
 33 | 
 34 | 
 35 | if not os.path.exists(TEMP_OUTPUT):
 36 |      os.makedirs(TEMP_OUTPUT)
 37 | 
 38 | if not os.path.exists(ANIM_OUTPUT):
 39 |      os.makedirs(ANIM_OUTPUT)
 40 | 
 41 | if not os.path.exists(SAVED_DATA_PATH):
 42 |      os.makedirs(SAVED_DATA_PATH)
 43 | 
 44 | if not os.path.exists(PLOT_OUTPUT):
 45 |   os.makedirs(PLOT_OUTPUT)
 46 | 
 47 | if not os.path.exists(DATA_OUTPUT):
 48 |   os.makedirs(DATA_OUTPUT)
 49 | 
 50 | if not os.path.exists(SAVED_DATA_PATH):
 51 |   os.makedirs(SAVED_DATA_PATH)
 52 | 
 53 | 
 54 | print('Using unique embedding per dataset shortname: ',DATASET_SHORTNAME)
 55 | EMBEDDING_PATH = os.path.join(OUTPUT_PATH,DATASET_SHORTNAME,'tsne_embedding/')
 56 | 
 57 | 
 58 | 
 59 | # Sub folders for analysis components:
 60 | COMP_DIR = os.path.join(PLOT_OUTPUT,'Comparative_analysis/')
 61 | DR_DIR = os.path.join(PLOT_OUTPUT,'Dimensionality_Reduction/')
 62 | CLUST_DIR = os.path.join(PLOT_OUTPUT,'Clustering/')
 63 | 
 64 | # Sub-directories for parameter sweeping:
 65 | DR_PARAMS_DIR = os.path.join(DR_DIR,'Parameter_sweep/')
 66 | CLUST_PARAMS_DIR = os.path.join(CLUST_DIR,'Parameter_sweep/')
 67 | 
 68 | if not os.path.exists(COMP_DIR):
 69 |      os.makedirs(COMP_DIR)
 70 | 
 71 | if DIMENSION_REDUCTION and not os.path.exists(DR_DIR):
 72 |      os.makedirs(DR_DIR)
 73 | 
 74 | if CLUSTERING and not os.path.exists(CLUST_DIR):
 75 |      os.makedirs(CLUST_DIR)
 76 | 
 77 | if PARAM_SWEEP and not os.path.exists(DR_PARAMS_DIR):
 78 |      os.makedirs(DR_PARAMS_DIR)
 79 | 
 80 | if PARAM_SWEEP and not os.path.exists(CLUST_PARAMS_DIR):
 81 |      os.makedirs(CLUST_PARAMS_DIR)
 82 | 
 83 | 
 84 | # Sub folders for plot types (Comparative)
 85 | SUPERPLOT_DIR = os.path.join(COMP_DIR,'Superplots/')
 86 | SUPERPLOT_grays_DIR = os.path.join(COMP_DIR,'Superplots_grays/')
 87 | DIFFPLOT_DIR = os.path.join(COMP_DIR,'Plots_of_differences/')
 88 | MARGSCAT_DIR = os.path.join(COMP_DIR,'Marginal_scatterplots/')
 89 | TIMEPLOT_DIR = os.path.join(COMP_DIR,'Timeplots/')
 90 | BAR_DIR = os.path.join(COMP_DIR,'Bar_plots/')
 91 | BAR_SNS_DIR = os.path.join(COMP_DIR,'SNS_Gray_Bar_plots/')
 92 | 
 93 | 
 94 | if DRAW_SUPERPLOTS and not os.path.exists(SUPERPLOT_DIR):
 95 |     print('Exporting static Superplots')
 96 |     os.makedirs(SUPERPLOT_DIR)
 97 | 
 98 | if DRAW_SUPERPLOTS_grays and not os.path.exists(SUPERPLOT_grays_DIR):
 99 |     print('Exporting static Superplots')
100 |     os.makedirs(SUPERPLOT_grays_DIR)
101 | 
102 | if DRAW_DIFFPLOTS and not os.path.exists(DIFFPLOT_DIR):
103 |     print('Exporting static Plots of Differences')
104 |     os.makedirs(DIFFPLOT_DIR)
105 | 
106 | if DRAW_MARGSCAT and not os.path.exists(MARGSCAT_DIR):
107 |     print('Exporting static Marginal scatterplots')
108 |     os.makedirs(MARGSCAT_DIR)
109 | 
110 | if DRAW_TIMEPLOTS and not os.path.exists(TIMEPLOT_DIR):
111 |     print('Exporting static Timeplots')
112 |     os.makedirs(TIMEPLOT_DIR)
113 | 
114 | if DRAW_BARPLOTS and not os.path.exists(BAR_DIR):
115 |     print('Exporting Bar plots')
116 |     os.makedirs(BAR_DIR)
117 | 
118 | if DRAW_SNS_BARPLOTS and not os.path.exists(BAR_SNS_DIR):
119 |     print('Exporting SNS Bar plots')
120 |     os.makedirs(BAR_SNS_DIR)    
121 | 
122 | 
123 | # Create the folder where the subgroup cluster outputs will go:
124 | 
125 | CLUST_TSNE_DIR = os.path.join(CLUST_DIR,'tSNE/')
126 | CLUST_PCA_DIR = os.path.join(CLUST_DIR,'PCA/')
127 | CLUST_XY_DIR = os.path.join(CLUST_DIR,'xy/')
128 | CLUST_DISAMBIG_DIR = os.path.join(CLUST_DIR,'Cluster_Disambiguation/')
129 | TRAJECTORY_DISAMBIG_DIR = os.path.join(CLUST_DIR,'Trajectory_Cluster_Disambiguation/')
130 | CLUST_DISAMBIG_DIR_TAVG = os.path.join(CLUST_DIR,'Cluster_Disambiguation_tavg/')
131 | CLUSTERING_DIR = os.path.join(CLUST_DIR,'Clustering/')
132 | 
133 | if not os.path.exists(CLUSTERING_DIR):
134 |      os.makedirs(CLUSTERING_DIR)
135 | 
136 | if not os.path.exists(CLUST_DISAMBIG_DIR):
137 |      os.makedirs(CLUST_DISAMBIG_DIR)
138 | 
139 | if not os.path.exists(TRAJECTORY_DISAMBIG_DIR):
140 |      os.makedirs(TRAJECTORY_DISAMBIG_DIR)
141 | 
142 | if not os.path.exists(CLUST_DISAMBIG_DIR_TAVG):
143 |      os.makedirs(CLUST_DISAMBIG_DIR_TAVG)
144 | 
145 | if CLUSTERING and CLUSTER_TSNE and not os.path.exists(CLUST_TSNE_DIR):
146 |      os.makedirs(CLUST_TSNE_DIR)
147 | 
148 | if CLUSTERING and CLUSTER_PCA and not os.path.exists(CLUST_PCA_DIR):
149 |      os.makedirs(CLUST_PCA_DIR)
150 | 
151 | if CLUSTERING and CLUSTER_XY and not os.path.exists(CLUST_XY_DIR):
152 |      os.makedirs(CLUST_XY_DIR)
153 | 
154 | 
155 | 
156 | # Some assert statements as sanity checks:
157 | assert CTL_LABEL in CONDITIONS_TO_INCLUDE, 'Be sure that CTL_LABEL in config is within the CONDITIONS_TO_INCLUDE list'
158 | 
159 | if(USE_SHORTLABELS):
160 |     this_cond_ind = CONDITIONS_TO_INCLUDE.index(CTL_LABEL)
161 |     CTL_SHORTLABEL = CONDITION_SHORTLABELS[this_cond_ind]
162 |     print('Using corresponding CTL_SHORTLABEL: ',CTL_SHORTLABEL,
163 |     ' for condition: ', CTL_LABEL)
164 | 
165 | # Archive a copy of this config file for future reference
166 | if(ARCHIVE_CONFIG):
167 | 
168 |     # Also save copy as a .py file so it is easy to re-run later
169 |     path_to_config = 'initialization/config.py'
170 |     export_path = DATA_OUTPUT + 'config_' + '.txt' #+ TIMESTAMP (removed because folder already created with timestamp in name)
171 |     shutil.copyfile(path_to_config, export_path)
172 | 
173 | 
174 | print('Dataset in current notebook: ',DATASET_SHORTNAME)
175 | 


--------------------------------------------------------------------------------
/cellPLATO/cellPLATO/visualization/__init__.py:
--------------------------------------------------------------------------------
 1 | # from initialization.config import *
 2 | # from initialization.initialization import *
 3 | #
 4 | # from data_processing.cell_identifier import *
 5 | # from data_processing.cleaning_formatting_filtering import *
 6 | # from data_processing.clustering import *
 7 | # from data_processing.data_io import *
 8 | # from data_processing.data_wrangling import *
 9 | # from data_processing.dimensionality_reduction import *
10 | # from data_processing.measurements import *
11 | # from data_processing.migration_calculations import *
12 | # from data_processing.pipelines import *
13 | # from data_processing.shape_calculations import *
14 | # from data_processing.statistics import *
15 | # from data_processing.time_calculations import *
16 | # from data_processing.trajectory_clustering import *
17 | #
18 | # from visualization.comparative_visualization import *
19 | # from visualization.cluster_visualization import *
20 | # from visualization.filter_visualization import *
21 | # from visualization.low_dimension_visualization import *
22 | # from visualization.panel_apps import *
23 | # from visualization.plots_of_differences import *
24 | # from visualization.small_multiples import *
25 | # from visualization.superplots import *
26 | # from visualization.timecourse_visualization import *
27 | # from visualization.trajectory_visualization import *
28 | 
29 | print("Finished intializing visualizations")
30 | 


--------------------------------------------------------------------------------
/cellPLATO/cellPLATO/visualization/comparative_visualization.py:
--------------------------------------------------------------------------------
  1 | # comparative_visualization.py
  2 | 
  3 | from initialization.initialization import *
  4 | from initialization.config import *
  5 | 
  6 | from data_processing.data_wrangling import *
  7 | from data_processing.statistics import *
  8 | 
  9 | import numpy as np
 10 | import pandas as pd
 11 | import os
 12 | 
 13 | import scipy
 14 | import scipy.stats as st
 15 | 
 16 | import seaborn as sns
 17 | import matplotlib.pyplot as plt
 18 | 
 19 | import plotly
 20 | import plotly.graph_objects as go
 21 | from plotly.subplots import make_subplots
 22 | import plotly.express as px
 23 | 
 24 | 
 25 | 
 26 | def scatter2dplotly_compare(comb_df, factors):
 27 | 
 28 |     '''
 29 |     2D scatter plot built with plotly graph objects, intended to visualize the
 30 |     results of the dimensionality reduction operations.
 31 |     This version is explicitly intended for comparing between conditions on the
 32 |     same scatter axis.
 33 | 
 34 | 
 35 |     Input:
 36 |         comb_df: DataFrame, contains combined data from multiple conditions, and/or replicates
 37 |         factors:
 38 |         color_by: Indicates what factor should be used to color the points.
 39 |                 default='Condition'
 40 | 
 41 |     Returns:
 42 |         fig_data:
 43 |          Note: can be visualized normally by using:
 44 |              fig = go.Figure(fig_data)
 45 |              fig.show()
 46 | 
 47 |     '''
 48 | 
 49 |     # Extract the data to be used to color-code
 50 |     cmaps = ['Viridis', 'inferno']
 51 | 
 52 |     '''
 53 |     For each of the conditions to be plotted, assign them a colormap.
 54 |     Create trace_data for each,
 55 |     '''
 56 |     cond_list = comb_df['Condition'].unique()
 57 |     trace_list = [] # Keep traces in list to return, instead of fig object.
 58 |     for i, condition in enumerate(cond_list):
 59 | 
 60 |         sub_df = comb_df.loc[comb_df['Condition'] == condition]
 61 | 
 62 |         x = sub_df[factors[0]]
 63 |         y = sub_df[factors[1]]
 64 | 
 65 |         trace_data = go.Scatter(
 66 |                     x=x,
 67 |                     y=y,
 68 |                     mode='markers',
 69 |                     marker=dict(
 70 |                         size=5,
 71 |                         color=sub_df['frame'],                # set color to an array/list of desired values
 72 |                         colorscale=cmaps[i],   # choose a colorscale
 73 |                         opacity=0.5))
 74 | 
 75 |         trace_list.append(trace_data)
 76 | 
 77 |     # After all replicates are drawn, THEN draw the summary stats fig_data
 78 |     # fig.update_layout(showlegend=False,
 79 |     #              yaxis_title=factor)
 80 | 
 81 |     # Define fig layout as dict, to return and apply in the pipeline
 82 |     fig_layout={
 83 |         'xaxis_title': factors[0],
 84 |         'yaxis_title': factors[1],
 85 |         'showlegend': False,
 86 |         'title': 'Low-dimension scatterplot'
 87 |     }
 88 | 
 89 |     # Create the Plotly figure.
 90 |     scatter_comp = go.Figure()
 91 |     for trace in dr_data:
 92 |          scatter_comp.add_trace(trace)
 93 |     scatter_comp.update_layout(layout)
 94 | 
 95 |     if STATIC_PLOTS:
 96 |         scatter_comp.write_image(PLOT_OUTPUT+str(factors)+"_compartive_scatter_plotly.png")
 97 | 
 98 |     if PLOTS_IN_BROWSER:
 99 |         scatter_comp.show()
100 | 
101 | 
102 |     return trace_list, fig_layout #fig_data
103 | 
104 | 
105 | 
106 | 
107 | def scatter3dplotly_compare(comb_df, factors):
108 | 
109 |     '''
110 |     3D scatter plot built with plotly graph objects, intended to visualize the
111 |     results of the dimensionality reduction operations.
112 |     This version is explicitly intended for comparing between conditions on the
113 |     same scatter axis.
114 | 
115 | 
116 | 
117 |     Input:
118 |         comb_df: DataFrame, contains combined data from multiple conditions, and/or replicates
119 |         factors:
120 |         color_by: Indicates what factor should be used to color the points.
121 |                 default='Condition'
122 | 
123 |     Returns:
124 | 
125 |         trace_list:
126 |         fig_layout:
127 |         OR
128 |         fig_data:
129 |          Note: can be visualized normally by using:
130 |              fig = go.Figure(fig_data)
131 |              fig.show()
132 | 
133 |     '''
134 | 
135 |     # Extract the data to be used to color-code
136 |     cmaps = ['Viridis', 'inferno']
137 | 
138 |     '''
139 |     For each of the conditions to be plotted, assign them a colormap.
140 |     Create trace_data for each,
141 |     '''
142 |     cond_list = comb_df['Condition'].unique()
143 |     trace_list = [] # Keep traces in list to return, instead of fig object.
144 |     for i, condition in enumerate(cond_list):
145 | 
146 |         sub_df = comb_df.loc[comb_df['Condition'] == condition]
147 | 
148 |         x = sub_df[factors[0]]
149 |         y = sub_df[factors[1]]
150 |         z = sub_df[factors[2]]
151 | 
152 |         trace_data = go.Scatter3d(
153 |                     x=x,
154 |                     y=y,
155 |                     z=z,
156 |                     mode='markers',
157 |                     marker=dict(
158 |                         size=5,
159 |                         color=sub_df['frame'],                # set color to an array/list of desired values
160 |                         colorscale=cmaps[i],   # choose a colorscale
161 |                         opacity=1))
162 | 
163 |         trace_list.append(trace_data)
164 | 
165 | 
166 |     # Define fig layout as dict, to return and apply in the pipeline
167 |     fig_layout={
168 |         'xaxis_title': factors[0],
169 |         'yaxis_title': factors[1],
170 |         'showlegend': False,
171 |         'title': 'Low-dimension scatterplot'
172 |     }
173 | 
174 |     # Create the Plotly figure.
175 |     scatter_comp = go.Figure()
176 |     for trace in dr_data:
177 |          scatter_comp.add_trace(trace)
178 |     scatter_comp.update_layout(layout)
179 | 
180 |     if STATIC_PLOTS:
181 |         scatter_comp.write_image(PLOT_OUTPUT+str(factors)+"_compartive_scatter_plotly.png")
182 | 
183 |     if PLOTS_IN_BROWSER:
184 |         scatter_comp.show()
185 | 
186 |     return trace_list, fig_layout #fig_data
187 | 
188 | 
189 | 
190 | def plotly_marginal_scatter(df, pair, save_path=MARGSCAT_DIR):
191 | 
192 |     '''
193 |     Create a plotly express scatterplot comparing multiple conditions, for a user-provided
194 |     pair of factors.
195 | 
196 |     input:
197 |         df: DataFrame
198 |         pair: list of factors to compare.
199 |     '''
200 | 
201 |     assert len(pair) == 2, 'Marginal scatter requires 2 factors as input'
202 | 
203 |     fig = px.scatter(df, x=pair[0], y=pair[1], color="Condition",
204 |                      marginal_x="violin", marginal_y="violin",
205 |                       title="Comparative marginal scatter: "+str(pair))
206 | 
207 |     if STATIC_PLOTS:
208 |         fig.write_image(save_path+'marginal_scatter_'+str(pair)+'.png')
209 | 
210 |     if PLOTS_IN_BROWSER:
211 |         fig.show()
212 | 
213 | 
214 | 
215 | def marginal_xy(df, pair, plot_type='scatter', renderer='plotly', save_path=MARGSCAT_DIR, bounds=None,supp_label=''):
216 | 
217 |     '''
218 |     Create a plotly express scatterplot comparing multiple conditions, for a user-provided
219 |     pair of factors.
220 | 
221 |     input:
222 |         df: DataFrame
223 |         pair: list of factors to compare.
224 |         plot_type: scatter, contour, hex
225 |         renderer: plotly or seaborn
226 | 
227 |     '''
228 | 
229 |     assert len(pair) == 2, 'Marginal scatter requires 2 factors as input'
230 | 
231 |     cond_grouping = 'Condition'
232 | #     rep_grouping = 'Replicate_ID'
233 | 
234 |     if(USE_SHORTLABELS):
235 |         cond_grouping = 'Condition_shortlabel'
236 | #         rep_grouping = 'Replicate_shortlabel'
237 | 
238 |     # Unpack th ebounds if they exist
239 |     if bounds is not None:
240 |         x_min, x_max,y_min, y_max = bounds
241 | 
242 |     if renderer == 'plotly':
243 | 
244 |         if plot_type == 'scatter':
245 |             fig = px.scatter(df, x=pair[0], y=pair[1], color=cond_grouping,
246 |                  marginal_x="violin", marginal_y="violin",
247 |                   title='marginal_xy_'+plot_type+ '_'+str(pair))
248 | 
249 |         elif plot_type == 'contour':
250 | 
251 |             fig = px.density_contour(df, x=pair[0], y=pair[1], color=cond_grouping,
252 |                          marginal_x="violin", marginal_y="violin",
253 |                           title='marginal_xy_'+plot_type+ '_'+str(pair))
254 | 
255 |         elif plot_type == 'hex':
256 | 
257 |             print('No hexbin plot type in plotly.')
258 |             fig = go.Figure()
259 | 
260 |         if STATIC_PLOTS:
261 |             fig.write_image(save_path+'marginal_xy_plotly_'+plot_type+ '_'+str(pair)+'_'+supp_label+'.png')
262 | 
263 |         if PLOTS_IN_BROWSER:
264 |             fig.show()
265 | 
266 |     elif renderer == 'seaborn':
267 | 
268 |         if plot_type == 'scatter':
269 | 
270 |             # If only one condition:
271 |             if len(df[cond_grouping].unique())==1:
272 | 
273 |                 fig =  sns.jointplot(data=df, x=pair[0], y=pair[1], color='black',
274 |                           joint_kws={'s': 1}, alpha=0.5)
275 |             else:
276 |                 fig =  sns.jointplot(data=df, x=pair[0], y=pair[1], hue = df[cond_grouping],
277 |                       joint_kws={'s': 1}, alpha=0.5)
278 |                 plt.legend(loc='best', bbox_to_anchor=(1.05, 1), borderaxespad=0.)
279 | 
280 |         elif plot_type == 'contour':
281 | 
282 |             # Remove the legend if there's only one condition in the provided dataset.
283 |             if len(df[cond_grouping].unique())==1:
284 | 
285 |                 fig =  sns.jointplot(data=df, x=pair[0], y=pair[1], color = 'black', kind="kde", palette='magma')
286 |                 plt.suptitle(supp_label, y=1.05, fontsize = 16)
287 | 
288 |             else:
289 |                 fig =  sns.jointplot(data=df, x=pair[0], y=pair[1], hue = df[cond_grouping],kind="kde", palette=PALETTE)
290 |                 plt.suptitle(supp_label, y=1.05, fontsize = 16)
291 |                 sns.color_palette(PALETTE, as_cmap=True)
292 | 
293 |         elif plot_type == 'hex':
294 |             print('No multi-condition hexplot available, consider making small multiples.')
295 |             fig =  sns.jointplot(data=df, x=pair[0], y=pair[1],kind="hex",palette='magma')
296 |             plt.suptitle(supp_label, y=1.05, fontsize = 16)
297 |             sns.color_palette("magma", as_cmap=True)
298 | 
299 |         if bounds is not None:
300 |             fig.ax_marg_x.set_xlim(x_min, x_max)
301 |             fig.ax_marg_y.set_ylim(y_min, y_max)
302 | 
303 | 
304 |         if STATIC_PLOTS:
305 | 
306 |             fig.savefig(save_path+'marginal_xy_sns_'+plot_type+ '_'+str(pair)+'_'+supp_label+'.png', dpi=300)#plt.
307 | 
308 | 
309 | 
310 | def comparative_bar(df_tup, x, y, title='',  height=400, to_plot='avg',error='SEM', save_path=BAR_DIR): #color='Condition'
311 |     # print('THIS IS THE INPUT DF')
312 |     # display(df_tup)
313 |     # print(df_tup.columns)
314 | 
315 |     widthmultiplier = len(df_tup)
316 |     print("widthmultiplier: ", widthmultiplier)
317 | 
318 |     '''
319 |     Simple bar plot conveneince function that allows plotting of color-coded conditions either on a per-conditions
320 |     or per-replicate basis.
321 |     Eventually to be replaced by a bar plot that includes measure of variance to be plotted as error bars
322 | 
323 |     Inputs:
324 |         df: DataFrame to be plotted
325 |         x: Grouping, 'Condition' 'Replicate_ID'
326 |         y: factor to be visualized
327 |         title: str, additional label for the saved plot.
328 |         color: factor to color by, default: 'Condition'
329 |         height: plot height, default: 400px
330 |         to_plot: str, what to plot: 'avg' or 'n'
331 |         error: Measure of variance for error bars, str: SEM or STD
332 |     '''
333 |     # This part extracts an sns colormap for use in plotly express ###
334 | 
335 |     pal = sns.color_palette(CONDITION_CMAP) #extracts a colormap from the seaborn stuff.
336 |     cmap=pal.as_hex()[:] #outputs that as a hexmap which is compatible with plotlyexpress below
337 | 
338 |     # Split up the input tuple:(avg, std, n)
339 |     df = df_tup[0]
340 |     std_df = df_tup[1]
341 |     n_df = df_tup[2]
342 | 
343 |     if(USE_SHORTLABELS):
344 | 
345 |         # df = add_shortlabels(df)
346 |         grouping = 'Condition_shortlabel'
347 | 
348 |         # Sort the dataframe by custom category list to set draw order
349 |         df[grouping] = pd.Categorical(df[grouping], CONDITION_SHORTLABELS)
350 | 
351 |         # Also replace the x-labels on the plot and legend.
352 |         if(x=='Condition'):
353 |             x = 'Condition_shortlabel'
354 |         elif(x=='Replicate_ID'):
355 |             x = 'Replicate_shortlabel'
356 | 
357 |     else:
358 | 
359 |         grouping = 'Condition'
360 | 
361 |         # Sort the dataframe by custom category list to set draw order
362 |         df[grouping] = pd.Categorical(df[grouping], CONDITIONS_TO_INCLUDE)
363 | 
364 |     color = grouping
365 | 
366 | 
367 |     df.sort_values(by=grouping, inplace=True, ascending=True)
368 | 
369 |     if error == 'SEM':
370 |         y_error = std_df[y] / np.sqrt(n_df[y]) #Estimate of SEM (std / sqare root of n)
371 | 
372 |     elif error == 'STD':
373 |         y_error = std_df[y] # Stadard deviation
374 | 
375 |     if(to_plot == 'avg'):
376 |         # Plot the means between groups for this factor, between conditions and between replicates.
377 |         fig = px.bar(df, x=x, y=y, color=color, height=height,
378 |                      # color_discrete_sequence=eval(PX_COLORS),#cmap
379 |                      color_discrete_sequence=cmap,
380 |                     error_y = y_error)
381 | 
382 |     elif(to_plot == 'n'):
383 | 
384 |         fig = px.bar(n_df, x=x, y=y, color=color, height=height,
385 |                      # color_discrete_sequence=eval(PX_COLORS),
386 |                      color_discrete_sequence=cmap,
387 |                      labels = dict(y="Number of cells"))
388 |     
389 |     widthofplot = 220* widthmultiplier
390 |     #change the font size of the axis labels
391 |     fig.update_layout(showlegend=False,
392 |                     # plot_bgcolor = 'white',  
393 |                     autosize=False,
394 |                     width = widthofplot,
395 |                     height = 650,                  
396 |                     font=dict(
397 |                         #family="Courier New, monospace",
398 |                         size=PLOT_TEXT_SIZE,
399 |                         color="Black"))  
400 |     # fig.update_xaxes(tickangle=90)
401 |     # Remove the x axis label
402 |     fig.update_xaxes(title_text='', tickangle=45)
403 | 
404 |  
405 |     # change the font size of the y and x axis tick labels
406 | 
407 |         
408 | 
409 | 
410 |     if STATIC_PLOTS:
411 |         fig.write_image(save_path+y+'_'+to_plot+'_'+title + '.png')
412 | 
413 |     if PLOTS_IN_BROWSER:
414 |         fig.show()
415 | 
416 |     return fig
417 | 
418 | def comparative_SNS_bar(df, save_path=BAR_SNS_DIR):
419 |     import seaborn as sns
420 |     whattoplot=ALL_FACTORS
421 |     CLUSTER_CMAP = 'tab20'
422 |     # CONDITION_CMAP = 'dark'
423 | 
424 |     colors = np.asarray(sns.color_palette('Greys', n_colors=6))
425 |     timestorepeat_in=(len(df['Condition'].unique()))/2
426 |     timestorepeat = (np.ceil(timestorepeat_in)).astype(int)
427 |     colors2=colors[2]
428 |     colors3=colors[4]
429 |     colors4=np.stack((colors2,colors3))
430 |     colors5 = np.tile(colors4,(timestorepeat,1))
431 |     colors=colors5
432 | 
433 |     import seaborn as sns
434 |     sns.set_theme(style="ticks")
435 |     # sns.set_palette(CONDITION_CMAP)
436 | 
437 |     x_lab = whattoplot
438 |     plottitle = ""
439 | 
440 |     for factor in np.arange(len(whattoplot)):
441 |         # f, ax = plt.subplots(1, 1, figsize=(10, 10)) #sharex=True
442 |         f, ax = plt.subplots() #sharex=True
443 |         sns.barplot(ax=ax, x="Condition_shortlabel", y=whattoplot[factor], data=df, palette=colors,capsize=.2, dodge=False) #ci=85, # estimator=np.mean,
444 |         # sns.catplot(ax=ax, x="Condition_shortlabel", y=whattoplot[factor], data=df, palette=colors, kind="boxen") #errorbar=('ci', 95)
445 |         sns.stripplot(ax=ax, x="Condition_shortlabel", y=whattoplot[factor], data=df, size=5, color=".1",alpha = 0.6, linewidth=0, jitter=0.2)
446 | 
447 |         ax.xaxis.grid(True)
448 |         ax.set(xlabel="")
449 |         ax.set_ylabel(whattoplot[factor], fontsize=PLOT_TEXT_SIZE)
450 |         ax.set_title("", fontsize=PLOT_TEXT_SIZE)
451 |         # ax.tick_params(axis='both', labelsize=36)
452 |         ax.tick_params(axis='y', labelsize=PLOT_TEXT_SIZE)
453 |         ax.tick_params(axis='x', labelsize=PLOT_TEXT_SIZE, rotation = 45)
454 |         # f.tight_layout()
455 |         plt.setp(ax.patches, linewidth=3, edgecolor='k')
456 |     # fig.write_image(CLUST_DISAMBIG_DIR+'\cluster_label_counts.png')
457 |         f.savefig(save_path+str(whattoplot[factor])+'_gray_barplot.png', dpi=300)#plt.
458 |     if PLOTS_IN_BROWSER:
459 |         f.show()
460 | 
461 |     return
462 | 
463 | def getaveragevalues(df_in, factorstoinclude, savepath = SAVED_DATA_PATH):
464 |     cols = factorstoinclude + ['Condition_shortlabel']
465 |     df_vals=df_in[cols] 
466 |     
467 |     df_averaged_mean = df_vals.groupby('Condition_shortlabel').median().reset_index()
468 |     df_averaged_median = df_vals.groupby('Condition_shortlabel').mean().reset_index()
469 |     # Save the dfs as csv files
470 |     df_averaged_mean.to_csv(savepath + 'Mean_Values_per_condition.csv', index=False)
471 |     df_averaged_median.to_csv(savepath + 'Median_Values_per_condition.csv', index=False)
472 | 
473 |     if 'label' in df_in.columns:
474 |         cols = factorstoinclude + ['label']
475 |         df_labels=df_in[cols]
476 |         df_cluster_labels_averaged_mean = df_labels.groupby('label').mean().reset_index()   
477 |         df_cluster_labels_averaged_median = df_labels.groupby('label').median().reset_index() 
478 |         df_cluster_labels_averaged_mean.to_csv(savepath + 'Mean_Values_per_cluster.csv', index=False)
479 |         df_cluster_labels_averaged_median.to_csv(savepath + 'Median_Values_per_cluster.csv', index=False)
480 |         # Can potentially include a multi groupby on the label and condition_shortlabel to get the mean and median values per cluster per condition
481 |     
482 |     return


--------------------------------------------------------------------------------
/cellPLATO/cellPLATO/visualization/filter_visualization.py:
--------------------------------------------------------------------------------
  1 | from initialization.initialization import *
  2 | from initialization.config import *
  3 | 
  4 | import numpy as np
  5 | import pandas as pd
  6 | import os
  7 | 
  8 | import matplotlib.pyplot as plt
  9 | import plotly.graph_objects as go
 10 | 
 11 | 
 12 | plt.rcParams.update({
 13 |     "figure.facecolor":  (1.0, 1.0, 1.0, 1.0),
 14 |     "axes.facecolor":    (1.0, 1.0, 1.0, 1.0),
 15 |     "savefig.facecolor": (1.0, 1.0, 1.0, 1.),
 16 |     "figure.figsize":    (10,10),
 17 |     "font.size": 12
 18 | })
 19 | 
 20 | def visualize_filtering(df, filt_counts, plot_by='xy'):
 21 | 
 22 |     assert 'included' in df.columns, 'visualize_filtering() must be run on filtered dataframe'
 23 | 
 24 |     if plot_by == 'xy':
 25 |         x_name = 'x_um'
 26 |         y_name = 'y_um'
 27 |         color_by = 'rip_L'
 28 |         x_label='x position (microns)'
 29 |         y_label='y position (microns)'
 30 | 
 31 |     elif plot_by == 'pca':
 32 |         x_name = 'PC1'
 33 |         y_name = 'PC2'
 34 |         color_by = 'label'
 35 |         x_label='PC1'
 36 |         y_label='PC2'
 37 | 
 38 |     elif (plot_by == 'tsne' or plot_by == 'tSNE'):
 39 | 
 40 |         x_name = 'tSNE1'
 41 |         y_name = 'tSNE2'
 42 |         color_by = 'label'
 43 |         x_label='tSNE1'
 44 |         y_label='tSNE2'
 45 | 
 46 |     elif plot_by == 'umap':
 47 | 
 48 |         x_name = 'UMAP1'
 49 |         y_name = 'UMAP2'
 50 |         color_by = 'label'
 51 |         x_label = 'UMAP1'
 52 |         y_label = 'UMAP2'
 53 | 
 54 |     fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2, figsize=[20,10])
 55 | 
 56 |     df_filt = df[df['included'] == True]
 57 | 
 58 |     ax1.scatter(x=df[x_name], y=df[y_name], color='gray', s=0.5)
 59 |     ax1.scatter(x=df_filt[x_name], y=df_filt[y_name], c=df_filt[color_by], s=5) #
 60 |     ax1.set_xlabel(x_label)
 61 |     ax1.set_ylabel(y_label)
 62 | 
 63 |     filt_cond = ['Pre-filtering']
 64 |     counts = [len(df['uniq_id'].unique())]
 65 | 
 66 |     for filt in filt_counts:
 67 |         filt_cond.append(filt[0])
 68 |         counts.append(filt[1])
 69 | 
 70 |     filt_cond.append('Post-filtering')
 71 |     counts.append(len(df_filt['uniq_id'].unique()))
 72 | 
 73 |     ax2.bar(filt_cond,counts)
 74 |     ax2.set_ylabel('Number of cells')
 75 | 
 76 |     return fig
 77 | 
 78 | 
 79 | def visualize_filt_loss():
 80 | 
 81 |     # Labels as names of exported dataframes
 82 |     labels = ['comb_df',
 83 |           'mig_df',
 84 |           'dr_df-prefilt',
 85 |           'dr_df_filt']
 86 | 
 87 |     # Add the programatically generated names for the filtered outputs
 88 |     # From the DATA_FILTERS dictionary
 89 |     for i,factor in enumerate(DATA_FILTERS.keys()):
 90 |         labels.append('filt_'+str(i)+'-'+factor)
 91 | 
 92 |     # Load each of the DataFrames into a list
 93 |     df_list = []
 94 |     for label in labels:
 95 |         df_list.append(pd.read_csv(DATA_OUTPUT + label+'.csv'))
 96 | 
 97 |     # Set up the subplot figure.
 98 |     fig = make_subplots(
 99 |         rows=2, cols=len(df_list),
100 |     #     subplot_titles=(labels),
101 |         specs=[[{} for _ in range(len(df_list))],
102 |                 [{'colspan': len(df_list)}, *[None for _ in range(len(df_list)-1)]]])
103 | 
104 |     count = []
105 | 
106 |     for i, df in enumerate(df_list): #enumerate here to get access to i
107 |         label=labels[i]
108 |         count.append(len(df.index))
109 | 
110 |         fig.add_trace(go.Scatter(x=df['x'],
111 |                                  y=df['y'],
112 |                                 opacity=0.5),
113 |                   row=1,
114 |                   col=i+1)
115 | 
116 |     fig.add_trace(go.Scatter(x=labels, y=count),
117 |                   row=2, col=1)
118 | 
119 |     fig.update_yaxes(rangemode="tozero")
120 |     fig.update_xaxes(tickangle=-90)
121 |     fig.update_layout(showlegend=False)
122 | 
123 |     if STATIC_PLOTS:
124 |         fig.write_image(PLOT_OUTPUT+'filter_loss.png')
125 | 
126 |     if PLOTS_IN_BROWSER:
127 |         fig.show()
128 | 


--------------------------------------------------------------------------------
/cellPLATO/cellPLATO/visualization/low_dimension_visualization.py:
--------------------------------------------------------------------------------
  1 | #low_dimension_visualization.py
  2 | 
  3 | from initialization.config import *
  4 | from initialization.initialization import *
  5 | 
  6 | from data_processing.clustering import hdbscan_clustering
  7 | from data_processing.dimensionality_reduction import *
  8 | from data_processing.shape_calculations import *
  9 | 
 10 | 
 11 | import numpy as np
 12 | import pandas as pd
 13 | import os
 14 | import imageio
 15 | 
 16 | import plotly
 17 | import plotly.graph_objects as go
 18 | from plotly.subplots import make_subplots
 19 | import seaborn as sns
 20 | 
 21 | # matplotlib imports
 22 | import matplotlib.pyplot as plt
 23 | plt.rcParams['image.cmap'] = 'viridis'
 24 | plt.rcParams.update({
 25 |     "figure.facecolor":  (1.0, 1.0, 1.0, 1.0),
 26 |     "axes.facecolor":    (1.0, 1.0, 1.0, 1.0),
 27 |     "savefig.facecolor": (1.0, 1.0, 1.0, 1.),
 28 |     "figure.figsize":    (10,10),
 29 |     "font.size": 12
 30 | })
 31 | 
 32 | 
 33 | from mpl_toolkits.mplot3d.art3d import Line3DCollection
 34 | from matplotlib.collections import LineCollection
 35 | from matplotlib import colors as mcolors
 36 | from matplotlib.colors import ListedColormap, BoundaryNorm
 37 | 
 38 | plt.rcParams.update({
 39 |     "figure.facecolor":  (1.0, 1.0, 1.0, 1.0),
 40 |     "axes.facecolor":    (1.0, 1.0, 1.0, 1.0),
 41 |     "savefig.facecolor": (1.0, 1.0, 1.0, 1.),
 42 |     "figure.figsize":    (10,10),
 43 |     "font.size": 12
 44 | })
 45 | 
 46 | # Datashader imports
 47 | import datashader as ds
 48 | import datashader.transfer_functions as tf
 49 | from datashader.utils import export_image
 50 | from matplotlib import cm
 51 | 
 52 | from sklearn.preprocessing import MinMaxScaler
 53 | 
 54 | import math
 55 | import ternary
 56 | 
 57 | def correlation_matrix(df):
 58 | 
 59 |     f = plt.figure(figsize=(19, 15))
 60 |     plt.matshow(df.corr(), fignum=f.number, cmap='viridis')
 61 |     plt.xticks(range(df.select_dtypes(['number']).shape[1]), df.select_dtypes(['number']).columns, fontsize=14, rotation=90)
 62 |     plt.yticks(range(df.select_dtypes(['number']).shape[1]), df.select_dtypes(['number']).columns, fontsize=14)
 63 |     cb = plt.colorbar()
 64 | 
 65 |     return f
 66 | 
 67 | def pca_factor_vis(df, pca_tuple, dr_factors=DR_FACTORS):
 68 | 
 69 |     pca_df, components, expl = pca_tuple#do_pca(df[dr_factors])
 70 | 
 71 |     fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2,figsize=(15, 15))
 72 | 
 73 |     ax1.imshow(components)
 74 |     ax1.set_yticklabels(dr_factors)
 75 |     ax1.set_yticks(np.arange(len(dr_factors)))
 76 |     ax1.set_xticklabels(range(1,len(components)))
 77 |     ax1.set_xticks(range(0,len(components[0])))
 78 |     ax1.title.set_text('Principal compnents')
 79 |     ax1.set_xlabel('Principal Component')
 80 |     ax1.set_ylabel('Factor')
 81 | 
 82 |     ax2.imshow(components*expl)
 83 |     ax2.set_yticklabels(dr_factors)
 84 |     ax2.set_yticks(np.arange(len(dr_factors)))
 85 |     ax2.set_xticklabels(range(1,len(components)))
 86 |     ax2.set_xticks(range(0,len(components[0])))
 87 |     ax2.title.set_text('Componenets * variance explained')
 88 |     ax2.set_xlabel('Principal Component')
 89 |     ax2.set_ylabel('Factor')
 90 | 
 91 |     ax3.imshow(np.expand_dims(np.sum(components*expl,axis=1), axis=1))
 92 |     ax3.set_yticklabels(dr_factors)
 93 |     ax3.set_yticks(np.arange(len(dr_factors)))
 94 |     ax3.set_xticks(range(0))
 95 |     ax3.set_ylabel('Factor')
 96 |     ax3.title.set_text('Sum variance contribution per factor')
 97 | 
 98 |     ax4.plot(expl)
 99 |     ax4.title.set_text('Variance accounted for')
100 |     ax4.set_xlabel('Principal Component')
101 |     factor_variance = np.sum(components*expl,axis=1)
102 | 
103 | 
104 |     if STATIC_PLOTS:
105 | 
106 |         plt.savefig(DR_DIR + '\pca_variance.png', format='png', dpi=600)
107 | 
108 |     return fig
109 | 
110 | def pca_factor_matrix(df,pca_tuple, dr_factors=DR_FACTORS, ax=None):
111 | 
112 |     # If no axis is supplied, then createa simple fig, ax and default to drawing the points.
113 |     if ax is None:
114 |         fig, ax = plt.subplots()
115 |         fig.patch.set_facecolor('white')
116 | 
117 |     x = df[dr_factors].values
118 | 
119 |     pca_df, components, expl = pca_tuple#do_pca(x)
120 |     dr_df = pd.concat([df,pca_df], axis=1)
121 | 
122 |     # Make the matrix plot
123 |     # fig, ax = plt.subplots(1, 1,figsize=(15, 20))
124 |     # fig.patch.set_facecolor('white')
125 |     ax.imshow(components)
126 |     ax.set_yticklabels(dr_factors)
127 |     ax.set_yticks(np.arange(len(dr_factors)))
128 |     ax.set_xticklabels(range(1,len(components)))
129 |     ax.set_xticks(range(0,len(components[0])))
130 |     ax.title.set_text('Principal components')
131 |     ax.set_xlabel('Principal Component')
132 |     ax.set_ylabel('Factor')
133 | 
134 |     # if ax is None:
135 | 
136 |     return ax, dr_df
137 | 
138 | def colormap_pcs(dr_df, cmap = 'rgb'):
139 | 
140 |     pcs = np.asarray(dr_df[['PC1','PC2','PC3']])
141 |     scaler = MinMaxScaler()
142 |     scaler.fit(pcs)
143 |     pc_colors = scaler.transform(pcs)
144 | 
145 |     if cmap != 'rgb':
146 | 
147 |         if cmap == 'cmy':
148 | 
149 |             pc_colors = rgb2cmy(pc_colors)
150 | 
151 |     pc_colors = np.clip(pc_colors, 0, 1)
152 | 
153 |     return pc_colors
154 | 
155 | 
156 | def rgb2cmy(rgb_arr):
157 | 
158 |     '''
159 |     Allows recoloring the 3-factor rgb array into cyan, magenta, yellow
160 |     '''
161 | 
162 |     x = rgb_arr[:,0]
163 |     y = rgb_arr[:,1]
164 |     z = rgb_arr[:,2]
165 | 
166 |     w = 255
167 |     x_color = x * w #/ float(scale)
168 |     y_color = y * w #/ float(scale)
169 |     z_color = z * w #/ float(scale)
170 | 
171 |     r = np.abs(w - y_color) / w
172 |     g = np.abs(w - x_color) / w
173 |     b = np.abs(w - z_color) / w
174 | 
175 |     color_arr = np.c_[r,g,b]
176 |     print(color_arr.shape)
177 | 
178 | 
179 |     return color_arr
180 | 
181 | 
182 | def datashader_lines(df_in, x,y,color_by='Condition', output_res=500, aspect=1,categorical=False, export=False, identifier = ''):
183 | 
184 |     df = df_in.copy()
185 | 
186 |     # Need to add conditions as category datatype to use multi-color datashader
187 |     width = output_res
188 |     height = int(output_res / aspect)
189 | 
190 |     cvs = ds.Canvas(plot_width=width, plot_height=height)#,x_range=x_range, y_range=y_range)
191 | 
192 |     if categorical:
193 | 
194 |         df['Cat'] = df[color_by].astype('category')
195 |         # Multicolor categorical
196 |         agg = cvs.line(df,  x, y, agg=ds.count_cat('Cat'))
197 |         img = tf.set_background(tf.shade(agg, how='eq_hist'),"black")
198 | 
199 |     else:
200 | 
201 |         agg = cvs.line(df, x, y, agg=ds.count())
202 |         img = tf.set_background(tf.shade(agg, cmap=cm.inferno, how='linear'),"black")
203 | 
204 |     if STATIC_PLOTS:
205 | 
206 |         # plt.savefig(CLUST_DIR+label+'.png', dpi=300)
207 |         figname = CLUST_DIR+identifier+'_datashaderlines.png'
208 |         export_image(img, figname, background="black")
209 | 
210 |     return img
211 | 
212 | 
213 | def spatial_img_coloc(df_in, xy='tSNE',thresh=2,n_bins=50):
214 | 
215 |     '''
216 |     Visualize dimensionally reduced space as a histogram and perfrom image
217 |     collocalization between the images.
218 | 
219 |     TO DO: Update this to work for the inputted conditions...
220 |     By Default it assumes the first is the control and the second is for comparison.
221 |     '''
222 | 
223 | 
224 |     if(xy == 'tsne' or xy == 'tSNE'):
225 | 
226 |             x_lab = 'tSNE1'
227 |             y_lab = 'tSNE2'
228 | 
229 |     elif xy == 'PCA':
230 | 
231 |             x_lab = 'PC1'
232 |             y_lab = 'PC2'
233 | 
234 |     elif (xy == 'umap' or xy == 'UMAP'):
235 | 
236 |             x_lab = 'UMAP1'
237 |             y_lab = 'UMAP2'
238 | 
239 |     df = df_in.copy()
240 | 
241 |     # Get the list of conditions included in the dataframe. By default show the first two.
242 |     cond_list = df['Condition_shortlabel'].unique()
243 |     print(cond_list)
244 | 
245 |     fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2,  figsize=(10,10))
246 | 
247 |     ax1.title.set_text('Combined set')
248 |     # ax2.title.set_text(CTL_LABEL)
249 |     # ax3.title.set_text(CONDITIONS_TO_INCLUDE[1])
250 |     # ax2.title.set_text(CONDITION_SHORTLABELS[0])
251 |     # ax3.title.set_text(CONDITION_SHORTLABELS[1])
252 |     ax2.title.set_text(cond_list[0])
253 |     ax3.title.set_text(cond_list[1])
254 |     ax4.title.set_text('Colocalization')
255 | 
256 | 
257 |     # xy_range = [[-60, 60], [-40, 40]]
258 |     xy_range = [[np.min(df[x_lab]), np.max(df[x_lab])], [np.min(df[y_lab]), np.max(df[y_lab])]]
259 | 
260 |     H, xedges, yedges = np.histogram2d(df[x_lab], df[y_lab], bins=n_bins, range=xy_range, normed=None, weights=None, density=None)
261 |     H = H.T
262 |     ax1.imshow(H)
263 |     # ctl_df = df[df['Condition']==CTL_LABEL]
264 |     ctl_df = df[df['Condition_shortlabel']==cond_list[0]]#CONDITION_SHORTLABELS[0]]
265 |     H_ctl, xedges, yedges = np.histogram2d(ctl_df[x_lab], ctl_df[y_lab], bins=n_bins, range=xy_range, normed=None, weights=None, density=None)
266 |     H_ctl = H_ctl.T
267 |     ax2.imshow(H_ctl)
268 | 
269 |     comp_df = df[df['Condition_shortlabel']==cond_list[1]]#CONDITION_SHORTLABELS[1]]
270 |     H_comp, xedges, yedges = np.histogram2d(comp_df[x_lab], comp_df[y_lab], bins=n_bins, range=xy_range, normed=None, weights=None, density=None)
271 |     H_comp = H_comp.T
272 |     ax3.imshow(H_comp)
273 | 
274 |     # Image Colocalization
275 | 
276 |     # Inds that will be max value
277 |     thresh_1 = thresh
278 |     thresh_2 = thresh
279 |     inds = (H_comp > thresh_1) & (H_ctl > thresh_2)
280 | 
281 |     # Convert inds to white
282 | 
283 |     # im=H_ctl
284 |     im = np.zeros(H_ctl.shape)
285 |     im[inds] = 1000.0 # An arbitrarily high intensity value so you'll effectively only see this in the plot
286 | 
287 |     ax4.imshow(im)
288 | 
289 |     # Invert axes to be consistent with the scatter plots
290 |     ax1.invert_yaxis()
291 |     ax2.invert_yaxis()
292 |     ax3.invert_yaxis()
293 |     ax4.invert_yaxis()
294 | 
295 |     return fig
296 | 
297 | 
298 | 
299 | def dr_contour_matrix(df_in,n_grid_pts=10, dr_method='tSNE', t_window=None):
300 | 
301 |     '''
302 | 
303 |     n_grid_pts
304 |     '''
305 | 
306 |     df = df_in.copy()
307 | 
308 |     if(dr_method == 'tsne' or dr_method == 'tSNE'):
309 | 
310 |             x_lab = 'tSNE1'
311 |             y_lab = 'tSNE2'
312 | 
313 |     elif dr_method == 'PCA':
314 | 
315 |             x_lab = 'PC1'
316 |             y_lab = 'PC2'
317 | 
318 |     elif dr_method == 'umap':
319 | 
320 |             x_lab = 'UMAP1'
321 |             y_lab = 'UMAP2'
322 | 
323 | 
324 |     # Make the figure
325 |     fig, ax = plt.subplots(1, 1, figsize=(10,10))#(ax1, ax2), (ax3, ax4)
326 | 
327 |     # ax.scatter(x=df[x_lab],y=df[y_lab],c='gray', alpha=0.1, s=1)
328 |     pc_colors = colormap_pcs(df, cmap='rgb') # cmap='cmy'
329 |     ax.scatter(x=df[x_lab],y=df[y_lab], alpha=0.5, s=1, c=pc_colors)
330 | 
331 | 
332 |     # Create a meshgrid covering the area of DR space
333 |     x_bounds = [np.min(df[x_lab]), np.max(df[x_lab])]
334 |     y_bounds = [np.min(df[y_lab]), np.max(df[y_lab])]
335 | 
336 |     xs = np.linspace(x_bounds[0], x_bounds[1], n_grid_pts)
337 |     ys = np.linspace(y_bounds[0], y_bounds[1], n_grid_pts)
338 | 
339 |     xx, yy = np.meshgrid(xs,ys, indexing='ij')
340 | 
341 |     # Lists to store the shapes
342 |     df_list = []
343 |     traj_list = []
344 | 
345 |     for i in range(n_grid_pts):
346 | 
347 |         for j in range(n_grid_pts):
348 | 
349 |             grid_x = xx[i,j]
350 |             grid_y = yy[i,j]
351 | 
352 |             plt.scatter(x=grid_x,y=grid_y,c='black', alpha=0.3, s=2)
353 | 
354 |             # Find the closest cell to this.
355 |             dr_arr = df[[x_lab,y_lab]].values
356 | 
357 |             # Calculate the distance between grid points and DR points
358 |             distances = np.sqrt((dr_arr[:,0] - grid_x)**2 + (dr_arr[:,1] - grid_y)**2)
359 | 
360 |             # Sort, but keep indices
361 |             dist_inds = np.argsort(distances)
362 |             row_ind = dist_inds[0] # The first is the closest point
363 | 
364 |             # Gee the sub dataframe of this cell
365 |             row_df = df.loc[row_ind].to_frame().transpose()
366 | 
367 |             df_list.append(row_df)
368 | 
369 | #             if distances[row_ind] < 5:
370 | 
371 |             plt.scatter(x=row_df[x_lab],y=row_df[y_lab],c='red', alpha=0.3, s=5)
372 | 
373 |     # Get a dataframe containing the cells that fall closest to the grid points
374 |     grid_cell_df = pd.concat(df_list)
375 |     grid_cell_df.sort_index(inplace=True)
376 | 
377 | 
378 |     # For each of these cells, extract their track.
379 |     for i,row in grid_cell_df.iterrows():
380 | 
381 |         this_rep = row['Replicate_ID']
382 |         this_cell_id = row['particle']
383 |         frame = row['frame']
384 | 
385 |         # Get sub_df for cell from row
386 |         cell_df = df[(df['Replicate_ID']==this_rep) &
387 |                         (df['particle']==this_cell_id)]
388 | 
389 | 
390 |         if t_window is not None:
391 | 
392 |             # get a subset of the dataframe across the range of frames
393 |             cell_df = cell_df[(cell_df['frame']>=frame - t_window/2) &
394 |                           (cell_df['frame']<frame + t_window/2)]
395 | 
396 | 
397 |         traj_list.append(cell_df[['x_pix','y_pix']])
398 | 
399 | 
400 |     contour_list = get_cell_contours(grid_cell_df)
401 | 
402 |     assert len(contour_list) == len(traj_list), 'trajec and contour lists not same length'
403 | 
404 |     # Colormap the contours
405 |     PALETTE = 'flare'
406 |     colors = np.asarray(sns.color_palette(PALETTE, n_colors=len(contour_list)))
407 |     cm_data = np.asarray(colors)
408 | 
409 |     for i,contour in enumerate(contour_list):
410 | 
411 |             rgb=cm_data[i,:]
412 |             this_colour = rgb#'red' # Eventually calculate color along colormap
413 |             contour_arr = np.asarray(contour).T
414 |             traj_arr = np.asarray(traj_list[i])
415 | 
416 |             x = grid_cell_df['x_pix'].values[i]# - window / 2
417 |             y = grid_cell_df['y_pix'].values[i]# - window / 2
418 | 
419 |             # Need to cell contours to be centered on the cells position within the image
420 |             x_dr = grid_cell_df[x_lab].values[i] - x# - window / 2
421 |             y_dr = grid_cell_df[y_lab].values[i] - y# - window / 2
422 | 
423 |             # Cell contour relative to tSNE positions
424 |             if not np.isnan(np.sum(contour_arr)):
425 |                 plt.plot(x_dr+contour_arr[:,0],y_dr+contour_arr[:,1],'-o',markersize=1,c=this_colour)
426 | 
427 |                 # Draw also the contour of the cell in this space.
428 |                 plt.plot(x_dr+traj_arr[:,0],y_dr+traj_arr[:,1],'-o',markersize=1,c=this_colour)
429 | 
430 |     if STATIC_PLOTS:
431 | 
432 |         plt.savefig(DR_DIR + dr_method + '_contour_matrix.png', format='png', dpi=600)
433 | 
434 | 
435 | 
436 | 
437 | '''
438 | Ternary plot functions for PCA COLORMAPPING
439 | '''
440 | 
441 | def color_point(x, y, z, scale, cmap='rgb'):
442 | 
443 |     if(cmap == 'rgb'):
444 |         # Pure RGB
445 |         r = x / float(scale)
446 |         g = y / float(scale)
447 |         b = z / float(scale)
448 | 
449 |     elif(cmap == 'cmy'):
450 |         w = 255
451 |         x_color = x * w / float(scale)
452 |         y_color = y * w / float(scale)
453 |         z_color = z * w / float(scale)
454 |         r = math.fabs(w - y_color) / w
455 |         g = math.fabs(w - x_color) / w
456 |         b = math.fabs(w - z_color) / w
457 | 
458 |     return (r, g, b, 1.)
459 | 
460 | def generate_heatmap_data(scale=5, cmap = 'rgb'):
461 |     from ternary.helpers import simplex_iterator
462 |     d = dict()
463 |     for (i, j, k) in simplex_iterator(scale):
464 |         d[(i, j, k)] = color_point(i, j, k, scale, cmap)
465 |     return d
466 | 
467 | def dimension_reduction_subplots(dr_df,pca_tuple, cmap = 'rgb'):
468 | 
469 |     fig, ((ax1, ax2),(ax3, ax4)) = plt.subplots(nrows=2, ncols=2, figsize=[20,20])
470 | 
471 |     # Get the pca matrix of the subset of the dr_df
472 |     pca_factor_matrix(dr_df,pca_tuple, DR_FACTORS, ax=ax1)
473 | 
474 |     # tax = ternary.TernaryAxesSubplot(ax=ax2)
475 | 
476 |     scale = 40
477 |     figure, tax = ternary.figure(scale=scale, ax=ax2)
478 |     data = generate_heatmap_data(scale, cmap=cmap)
479 |     tax.heatmap(data, style="hexagonal", use_rgba=True,colorbar=False)
480 |     tax.boundary()
481 | 
482 |     # Set Axis labels and Title
483 |     fontsize = 18
484 |     offset = 0#0.14
485 |     # tax.set_title("RGBA Heatmap\n", fontsize=20)
486 |     tax.right_corner_label("PC1", fontsize=fontsize)
487 |     tax.top_corner_label("PC2", fontsize=fontsize)
488 |     tax.left_corner_label("PC3", fontsize=fontsize)
489 |     tax.get_axes().axis('off')
490 | 
491 |     pc_colors = colormap_pcs(dr_df, cmap=cmap) # cmap='cmy'
492 | 
493 |     ax3.set_title('tSNE embedding', fontsize=18)
494 |     ax3.scatter(x=dr_df['tSNE1'],y=dr_df['tSNE2'], s=2, c=pc_colors)
495 |     ax3.set_xlabel('tSNE1')
496 |     ax3.set_ylabel('tSNE2')
497 | 
498 |     ax4.set_title('UMAP embedding', fontsize=18)
499 |     ax4.scatter(x=dr_df['UMAP1'],y=dr_df['UMAP2'], s=2, c=pc_colors)
500 |     ax4.set_xlabel('UMAP1')
501 |     ax4.set_ylabel('UMAP2')
502 | 
503 | 
504 | 
505 |     if STATIC_PLOTS:
506 | 
507 |         plt.savefig(DR_DIR + '\dr_subplots.png', format='png', dpi=600)
508 | 
509 | 
510 | 
511 | 
512 |     return fig
513 | 


--------------------------------------------------------------------------------
/cellPLATO/cellPLATO/visualization/scatterplots.py:
--------------------------------------------------------------------------------
  1 | #scatterplots.py
  2 | from initialization.config import *
  3 | from initialization.initialization import *
  4 | 
  5 | import seaborn as sns
  6 | import matplotlib.pyplot as plt
  7 | 
  8 | import plotly
  9 | import plotly.graph_objects as go
 10 | from plotly.subplots import make_subplots
 11 | import plotly.express as px
 12 | 
 13 | def seaborn_jointplot(df, pair, savepath=PLOT_OUTPUT):
 14 | 
 15 |     sns.jointplot(data=df,
 16 |           x=pair[0],
 17 |           y=pair[1],
 18 |           kind="reg")
 19 | 
 20 |     if STATIC_PLOTS:
 21 | 
 22 |         plt.savefig(savepath + '\jointplot'+str(pair)+'.png', format='png', dpi=600)
 23 | 
 24 | 
 25 | def scatter2dplotly(df, factors, savepath=PLOT_OUTPUT):
 26 | 
 27 |     '''
 28 |     3D scatte rplot built woth plotly graph objects, intended to visualize the
 29 |     results of the dimensionality reduction operations.
 30 | 
 31 |     Note: return the graph data, instead of a figure object.
 32 |     This way, multiple graph_data objects can be combined more easily into a
 33 |     mult-panel subplot figure.
 34 | 
 35 |     Input:
 36 |         df
 37 |         factors
 38 | 
 39 |     Returns:
 40 |         fig_data:
 41 |          Note: can be visualized normally by using:
 42 |              fig = go.Figure(fig_data)
 43 |              fig.show()
 44 | 
 45 |     '''
 46 | 
 47 |     df.sort_values(by=['particle', 'frame'], inplace=True)
 48 | 
 49 |     x = df[factors[0]]
 50 |     y = df[factors[1]]
 51 | 
 52 |     #     fig = go.Figure( # Keep this bit in case we revert to retirng the entire fig
 53 |     fig_data=go.Scatter(
 54 |                 x=x,
 55 |                 y=y,
 56 | 
 57 |                 mode='markers',
 58 |                 marker=dict(
 59 |                     size=8,
 60 |                     color=df['particle'],                # set color to an array/list of desired values
 61 |                     colorscale='Viridis',   # choose a colorscale
 62 |                     opacity=0.8),
 63 |                 )
 64 | 
 65 |     fig = go.Figure(fig_data)
 66 | 
 67 |     if STATIC_PLOTS:
 68 |         fig.write_image(savepath+'scatter_2d'+str(factors)+'.png')
 69 | 
 70 |     if PLOTS_IN_BROWSER:
 71 |         fig.show()
 72 | 
 73 |     return fig_data
 74 | 
 75 | def scatter3dplotly(df, factors, savepath=PLOT_OUTPUT):
 76 | 
 77 |     '''
 78 |     3D scatte rplot built woth plotly graph objects, intended to visualize the
 79 |     results of the dimensionality reduction operations.
 80 | 
 81 |     Note: return the graph data, instead of a figure object.
 82 |     This way, multiple graph_data objects can be combined more easily into a
 83 |     mult-panel subplot figure.
 84 | 
 85 |     Input:
 86 |         df
 87 |         factors
 88 | 
 89 |     Returns:
 90 |         fig_data:
 91 |          Note: can be visualized normally by using:
 92 |              fig = go.Figure(fig_data)
 93 |              fig.show()
 94 | 
 95 |     '''
 96 | 
 97 |     df.sort_values(by=['particle', 'frame'], inplace=True)
 98 | 
 99 |     x = df[factors[0]]
100 |     y = df[factors[1]]
101 |     z = df[factors[2]]
102 | 
103 |     #     fig = go.Figure( # Keep this bit in case we revert to retirng the entire fig
104 |     fig_data=go.Scatter3d(
105 |                 x=x,
106 |                 y=y,
107 |                 z=z,
108 |                 mode='markers',
109 |                 marker=dict(
110 |                     size=8,
111 |                     color=df['particle'],                # set color to an array/list of desired values
112 |                     colorscale='Viridis',   # choose a colorscale
113 |                     opacity=0.8),
114 |                 )
115 | 
116 |     fig = go.Figure(fig_data)
117 | 
118 |     if STATIC_PLOTS:
119 |         fig.write_image(savepath+'scatter_3d'+str(factors)+'.png')
120 | 
121 |     if PLOTS_IN_BROWSER:
122 |         fig.show()
123 | 
124 |     return fig_data
125 | 
126 | 
127 | 
128 | def hairball_2D(zerod_stc, savepath=PLOT_OUTPUT):
129 | 
130 |     xmin = np.nanmin(zerod_stc[:,0,:])
131 |     xmax = np.nanmax(zerod_stc[:,0,:])
132 |     ymin = np.nanmin(zerod_stc[:,1,:])
133 |     ymax = np.nanmax(zerod_stc[:,1,:])
134 | 
135 |     # Do we prefer defining these globally instead of extracting from the time array?
136 |     n_cells = np.shape(zerod_stc)[0]
137 |     n_frames = np.shape(zerod_stc)[2]
138 | 
139 |     x = zerod_stc[:,0,:]
140 |     y = zerod_stc[:,1,:]
141 |     fig = plt.figure(figsize = (10,10),facecolor='w')
142 |     ax = fig.add_subplot(111)
143 | 
144 |     segs = np.zeros((n_cells, n_frames, 2), float)
145 |     segs[:, :, 0] = x
146 |     segs[:, :, 1] = y
147 | 
148 | 
149 |     ax.set_xlim(xmin, xmax)
150 |     ax.set_ylim(ymin, ymax)
151 |     ax.tick_params(axis = 'both', which = 'major', labelsize = 18)
152 |     ax.set_xlabel('X position ($\mu$m)[uncalibrated]', fontsize =18) #
153 |     ax.set_ylabel('Y position ($\mu$m)[uncalibrated]',fontsize =18)
154 | 
155 |     colors = [mcolors.to_rgba(c)
156 |               for c in plt.rcParams['axes.prop_cycle'].by_key()['color']]
157 | 
158 |     line_segments = LineCollection(segs,colors=colors, cmap=plt.get_cmap('jet'))
159 | 
160 |     ax.add_collection(line_segments)
161 |     ax.set_title('Cell migration trajectories',fontsize =22)
162 | 
163 | 
164 |     if STATIC_PLOTS:
165 | 
166 |         plt.savefig(savepath + '2d_hairball.png', format='png', dpi=600)
167 |         print('Using plt.savefig for 2d_hairball, need fig.savefig instead??')
168 | 
169 | def hairball_3d(zerod_stc, anim=False, savepath = PLOT_OUTPUT):
170 | 
171 |     '''
172 |     Input:
173 |         zerod_stc
174 | 
175 |     Output:
176 |         None, draws figure to screen.
177 |     '''
178 | 
179 |     n_cells = np.shape(zerod_stc)[0]
180 |     n_frames = np.shape(zerod_stc)[2]
181 | 
182 |     fig = plt.figure(figsize = (10,10))
183 |     ax = fig.add_subplot(111, projection='3d')
184 | 
185 |     for n in range(0,n_cells):
186 | 
187 |         x = zerod_stc[n,0,:]
188 |         y = zerod_stc[n,1,:]
189 |         t = np.linspace(0,1,n_frames)#n_frames*T_INC,n_frames)
190 | 
191 |         ax.plot(x, y, t)#, c=)
192 |         ax.tick_params(axis = 'both', which = 'major', labelsize = 16)
193 | 
194 | 
195 |     if(anim):
196 | 
197 |         frames = []
198 | 
199 |         for angle in range(0, 360):
200 | 
201 |             ax.view_init(30, angle)
202 | 
203 |             fig.canvas.draw()
204 | 
205 |             # Convert to numpy array, and append to list
206 |             np_fig = np.fromstring(fig.canvas.tostring_rgb(), dtype=np.uint8, sep='')
207 |             np_fig = np_fig.reshape(fig.canvas.get_width_height()[::-1] + (3,))
208 |             frames.append(np_fig)
209 |             imageio.mimsave(savepath + '/3d_hairball.gif', frames)
210 | 
211 |     else:
212 | 
213 |         angle = 60
214 |         ax.view_init(30, angle)
215 | 
216 |     # Draw the figure
217 |     fig.canvas.draw()
218 | 
219 | 
220 | def hairball_3d_plotly(stc0d,colorscale='hsv',savepath=PLOT_OUTPUT):
221 | 
222 |     '''
223 |     Input:
224 |         np.ndarray: Origin-corrected 3d numpy array
225 |         colorscale (optional) : ensures same scale used
226 |                                 for points and lines
227 | 
228 |     Returns:
229 |         fig_data:
230 |          Note: can be visualized normally by using:
231 |              fig = go.Figure(fig_data)
232 |              fig.show()
233 | 
234 |     '''
235 | 
236 |     out_df = stc2df(stc0d)
237 | 
238 |     # fig = go.Figure(
239 |     fig_data=go.Scatter3d(
240 |         x=out_df['X0'], y=out_df['Y0'], z=out_df['t'],
241 |         marker=dict(
242 |             size=3,
243 |             color=out_df['cell'],
244 |             colorscale=colorscale,
245 |         ),
246 |         line=dict(
247 |             color=out_df['cell'],
248 |             colorscale=colorscale,
249 |             width=2
250 |         )
251 |     )
252 | 
253 |     # fig.show()
254 |     fig = go.Figure(fig_data)
255 | 
256 |     if STATIC_PLOTS:
257 |         fig.write_image(savepath+'hairball_3d_plotly.png')
258 | 
259 |     if PLOTS_IN_BROWSER:
260 |         fig.show()
261 | 
262 | 
263 |     return fig_data
264 | 


--------------------------------------------------------------------------------
/cellPLATO/cellPLATO/visualization/small_multiples.py:
--------------------------------------------------------------------------------
  1 | #small_multiples.py
  2 | 
  3 | from initialization.config import *
  4 | from initialization.initialization import *
  5 | 
  6 | import plotly
  7 | import plotly.graph_objects as go
  8 | from plotly.subplots import make_subplots
  9 | 
 10 | def hist_sm(df, factor='mean_intensity'):
 11 | 
 12 |     '''
 13 |     Plotly graph object histogram,
 14 |     minimalist: intended for small multiples
 15 |     '''
 16 | 
 17 | 
 18 |     fig_data = go.Histogram(x=df[factor])
 19 | 
 20 |     return fig_data
 21 | 
 22 | def scatter_sm(df, dep_var='frame',factor='mean_intensity'):
 23 | 
 24 |     '''
 25 |     Plotly graph object scatter plot,
 26 |     minimalist: intended for small multiples
 27 | 
 28 |     Input:
 29 |         df Datafame
 30 |         dep_var: the variabe to be used on the x axis
 31 | 
 32 |     '''
 33 | 
 34 |     fig_data = go.Scatter(x=df[dep_var], y=df[factor],
 35 |                         mode='markers',
 36 |                         marker=dict(
 37 |                             size=3,
 38 |                         ))
 39 | 
 40 |     return fig_data
 41 | 
 42 | 
 43 | def small_multiples(df, factors, plot_type='hist', savepath=PLOT_OUTPUT):
 44 | 
 45 |     '''
 46 |     Create a plotly subplot figure containing cellular measurements
 47 |     Based off of the timeplots function.
 48 | 
 49 |     Input:
 50 |         df: DataFrame
 51 |         factors: list of strings, column names t visualize for the dataframe
 52 |         plot_type: Type of plot to draw:
 53 |             'scatter'
 54 |             'hist'
 55 |     '''
 56 | 
 57 |     fig_data_list = []
 58 | 
 59 |     # Compute the grid of subplots
 60 |     n_cols = 3
 61 |     n_rows = int(np.floor(len(factors) / n_cols)) + 1
 62 | 
 63 |     ys = np.linspace(1,n_rows, n_rows)
 64 |     xs = np.linspace(1,n_cols, n_cols)
 65 | 
 66 |     xx,yy = np.meshgrid(xs, ys) # reversed
 67 |     grid_x = np.reshape(xx, -1)
 68 |     grid_y = np.reshape(yy, -1)
 69 | 
 70 |     for factor in factors:
 71 | 
 72 |         # Get fig data for plot type of choice
 73 |         if(plot_type=='hist'):
 74 |             cur_fig_data = hist_sm(df, factor)
 75 | 
 76 |         elif(plot_type=='scatter'):
 77 |             cur_fig_data = scatter_sm(df, factor)
 78 | 
 79 |         fig_data_list.append(cur_fig_data)
 80 | 
 81 |     fig_subplots = make_subplots(
 82 |         rows=n_rows, cols=n_cols,shared_yaxes=True,
 83 |         # You can define the subplot titles here:
 84 |         subplot_titles=(factors)
 85 |     )
 86 | 
 87 |     for i, subfig in enumerate(fig_data_list):
 88 | 
 89 |         this_row = int(grid_y[i])
 90 |         this_col = int(grid_x[i])
 91 | 
 92 |         fig_subplots.add_trace(subfig,row=this_row, col=this_col)#i
 93 | 
 94 |         # Update yaxes for each row individually
 95 |         fig_subplots.update_yaxes(title_text=factors[i],
 96 |                               row=this_row, # Makes sure the axis label on the botto
 97 |                               col=this_col)
 98 | 
 99 | 
100 |     # Update properties for the entire subplot figure
101 |     var_height = 100 + n_rows * 120 # Calculate figure height by # of plots
102 |     fig_subplots.update_layout(showlegend=False, title_text='Small-multiples: '+plot_type,
103 |                                 height=var_height)
104 | 
105 |     if STATIC_PLOTS:
106 |         fig_subplots.write_image(savepath+"small_multiples_"+plot_type+".png")
107 | 
108 |     if PLOTS_IN_BROWSER:
109 |         fig_subplots.show()
110 | 
111 |     return fig_subplots
112 | 
113 | 
114 | def fig2subplot(fig_list,savepath=PLOT_OUTPUT):
115 | 
116 |     '''
117 |     Combine the fig_data objects together into a multi-panel figure,
118 |     that can be returned as a single object to the html render template.
119 | 
120 |     Input:
121 |         fig_list: list of fig_data
122 | 
123 |     Output:
124 |         fig: Plotly go Figure() with subplots in fig_list
125 | 
126 |     '''
127 | 
128 |     # Still need to figure out how to get this format correctly
129 |     # By appending the type dictionaries to a list.
130 |     # In its current form I need to know in advance the number and types
131 |     specs=[[{"type": "scatter3d"},
132 |             {"type": "scatter3d"}]]
133 | 
134 |     fig_subplots = make_subplots(
135 |         rows=1, cols=len(fig_list),
136 |         specs=specs,
137 |         subplot_titles=('Cell migration trajectories',
138 |                         'tSNE visualization of cell shape')
139 |     )
140 | 
141 |     # Change the defaultview to rembind of 2D 'hairball' plots
142 |     camera = dict(
143 |         up=dict(x=1, y=0, z=1),
144 |         # center=dict(x=0, y=0, z=0),
145 |         eye=dict(x=0, y=0, z=1.5)
146 |         )
147 | 
148 |     for i, subfig in enumerate(fig_list):
149 | 
150 |         fig_subplots.add_trace(subfig,
151 |                   row=1, col=i+1)
152 | 
153 | 
154 |     fig_subplots.update_layout(showlegend=False, title_text='3D plots',
155 |                                 scene_camera=camera)
156 | 
157 |     if STATIC_PLOTS:
158 |         fig_subplots.write_image(savepath+'3D_subplots.png')
159 | 
160 |     return fig_subplots
161 | 


--------------------------------------------------------------------------------
/cellPLATO/cellPLATO/visualization/superplots.py:
--------------------------------------------------------------------------------
  1 | from initialization.config import *
  2 | from initialization.initialization import *
  3 | 
  4 | from data_processing.data_wrangling import *
  5 | 
  6 | 
  7 | import numpy as np
  8 | import pandas as pd
  9 | import os
 10 | 
 11 | import scipy
 12 | import scipy.stats as st
 13 | 
 14 | import seaborn as sns
 15 | import matplotlib.pyplot as plt
 16 | 
 17 | import plotly
 18 | import plotly.graph_objects as go
 19 | from plotly.subplots import make_subplots
 20 | import plotly.express as px
 21 | 
 22 | 
 23 | def superplots(df, factor, t=FRAME_END, save_path=SUPERPLOT_DIR):
 24 |     '''
 25 |     A function to implement the 'superplots' from Lord et al 2020,
 26 |     where eperimental replicates within pooled conditions are plotted such that they can be distinguished.
 27 | 
 28 | 
 29 | 
 30 |     df: a pandas DataFrame with the column headers:Replicate,Treatment,Speed
 31 |     factor:
 32 |     t:
 33 | 
 34 |     '''
 35 | 
 36 |     plt.clf()
 37 | 
 38 |     sns.set(rc={'figure.figsize':(8,6),#(11.7,8.27),
 39 |                 "font.size":PLOT_TEXT_SIZE,
 40 |     #             "font.scale":2,
 41 |                 "axes.titlesize":PLOT_TEXT_SIZE*1.2,
 42 |                 "axes.labelsize":PLOT_TEXT_SIZE*1.2},
 43 |             style="white",
 44 |             font_scale=1
 45 |            )
 46 | 
 47 |     data = format_for_superplots(df, factor,t)
 48 | 
 49 |     # Sort the dataframe by custom category list to set draw order
 50 |     if(USE_SHORTLABELS): # Must instead sort by shortlabel list order
 51 |         # Sort the dataframe by custom category list to set draw order
 52 |         data['Treatment'] = pd.Categorical(data['Treatment'], CONDITION_SHORTLABELS)
 53 |     else:
 54 |          # Sort the dataframe by custom category list to set draw order
 55 |         data['Treatment'] = pd.Categorical(data['Treatment'], CONDITIONS_TO_INCLUDE)
 56 | 
 57 |     data.sort_values(by='Treatment', inplace=True, ascending=True)
 58 | 
 59 |     # Extract the actual treatment names
 60 |     treatment_list = pd.unique(data['Treatment'])
 61 | 
 62 |     ReplicateAverages = data.groupby(['Treatment','Replicate'], as_index=False).agg({factor: "mean"});
 63 |     ReplicateAvePivot = ReplicateAverages.pivot_table(columns='Treatment', values=factor, index="Replicate");
 64 | 
 65 |     fig = sns.swarmplot(x="Treatment", y=factor, hue="Replicate", data=data)
 66 |     ax = sns.swarmplot(x="Treatment", y=factor, hue="Replicate", size=15, edgecolor="k", linewidth=2, data=ReplicateAverages)
 67 |     ax.legend_.remove();
 68 | 
 69 |     # Set the axes limits custom (3 sigma)
 70 |     y_low = np.mean(data[factor]) - 3 * np.std(data[factor])
 71 | 
 72 |     # Avoid making lower limit below 0 if values don't extent that low.
 73 |     if y_low < 0 and np.min(data[factor] > 0):
 74 |         y_low = 0
 75 | 
 76 |     y_high = np.mean(data[factor]) + 3 * np.std(data[factor])
 77 | 
 78 |     filtered_df = data[(data[factor].values>y_low)  & (data[factor].values < y_high)]
 79 |     n_dropped = len(data[~data.isin(filtered_df)].dropna())
 80 |     ax.set(ylim=(y_low, y_high))
 81 |     print('Custom axis using 3 sigma rule, axis bounds not showing ' + str(n_dropped) + ' point(s): ')
 82 | 
 83 |     if STATIC_PLOTS and DRAW_SUPERPLOTS:
 84 | 
 85 |         plt.savefig(save_path + factor +'_superplots_sns_t_'+str(t)+'.png', format='png', dpi=600)
 86 | 
 87 | 
 88 | def superplots_plotly(df_in, factor, t=FRAME_END, grid=False, save_path=SUPERPLOT_DIR):
 89 | 
 90 |     '''
 91 |     A function to implement the 'superplots' from Lord et al 2020,
 92 |     where eperimental replicates within pooled conditions are plotted such that they can be distinguished.
 93 | 
 94 |     df_in: a pandas DataFrame with the column headers:Replicate,Treatment,Speed
 95 | 
 96 |     This plot started its life as a boxplot:
 97 |     https://plotly.com/python/reference/box/
 98 | 
 99 |     '''
100 |     df = df_in.copy()
101 | 
102 |     # Get a colormap the length of unique replicates
103 |     replicates = df['Replicate_ID'].unique()
104 |     colors = np.asarray(sns.color_palette(PALETTE, n_colors=len(replicates)))
105 | 
106 |     sp_df = format_for_superplots(df, factor,t)
107 |     # print(sp_df)
108 |     # print(sp_df['Replicate'].unique())
109 |     # print('Check this is correct!')
110 | 
111 |     if(USE_SHORTLABELS): # Must instead sort by shortlabel list order
112 |         # Sort the dataframe by custom category list to set draw order
113 |         sp_df['Treatment'] = pd.Categorical(sp_df['Treatment'], CONDITION_SHORTLABELS)
114 |     else:
115 |          # Sort the dataframe by custom category list to set draw order
116 |         sp_df['Treatment'] = pd.Categorical(sp_df['Treatment'], CONDITIONS_TO_INCLUDE)
117 | 
118 |     sp_df.sort_values(by='Treatment', inplace=True, ascending=True)
119 |     # sp_df.reset_index(inplace=True, drop=True)
120 | 
121 | 
122 |     # Extract the actual treatment names
123 |     treatment_list = list(pd.unique(sp_df['Treatment']))
124 |     # print(len(colors))
125 |     # print(treatment_list)
126 |     # assert len(colors) == len(treatment_list), 'Color range should equal the number of conditons (treatments)'
127 | 
128 |     fig = go.Figure()
129 | 
130 |     # For each condition,
131 |     for treatment in treatment_list:
132 | 
133 |         stat_list = [] # List to contain the sumary statistic plotted on top.
134 |         treat_subdf = sp_df.loc[sp_df['Treatment'] == treatment]
135 |         rep_list = pd.unique(treat_subdf['Replicate'])
136 |         n_reps = len(rep_list)
137 |         # For each replicate
138 |         for i, rep in enumerate(rep_list): # Using enumerate to keep track of the # of reps
139 |             '''
140 |             Note: This is needed to manually force the summary points to spread out along
141 |             the x-dimension. Need to specify their position, based on the number of replicates.
142 |             Important:
143 |                     rep gives the id of the replicate, which determines the color. This may
144 |                     or may not be shared with the other condition, depending on input data.
145 |                     i is the index relative to len(rep_list), used to distinguish between replicates
146 |                     of the same group.
147 |             '''
148 |             # Use seperate index to choose colors
149 |             if(i < len(colors)):
150 |                 ci = i
151 |             else:
152 |                 ci = i - len(colors)
153 | 
154 |             rel_pos = -0.5 + i / n_reps
155 |             rep_subdf = treat_subdf.loc[treat_subdf['Replicate'] == rep]
156 | 
157 |             # Draw the swarm plots
158 |             fig.add_trace(go.Box(y=rep_subdf[factor].values,#y0,
159 |                                  name=treatment,#treatment_list[0],
160 |                                  opacity=1,
161 |                                  marker={
162 |                                      'color':'rgb' + str(tuple(colors[ci,:])),# tuple(colors[ci,:])#rep]
163 |                                  },
164 |                                 fillcolor='rgba(0,0,0,0)',
165 |                                 boxpoints='all',
166 |                                 jitter=0.8,
167 |                                 line={
168 |                                     'width': 0
169 |                                 },
170 |                                 pointpos=0))
171 | 
172 |             # Save trace data to a list to draw summary stats on top.
173 |             trace_data = go.Box(y=[np.mean(rep_subdf[factor].values)],#y0)],
174 |                                  name=treatment,
175 |                                  opacity=1,
176 |                                  marker={
177 |                                      'size':20,
178 |                                      'color': 'rgb' + str(tuple(colors[ci,:])),#colors[ci,:],#rep],
179 |                                      'line': {
180 |                                          'color': 'black',
181 |                                          'width': 2
182 |                                      }
183 |                                  },
184 |                                 fillcolor='rgba(0,0,0,0)',
185 |                                 boxpoints='all',
186 |                                 jitter=0,
187 |                                 line={
188 |                                     'width': 0
189 |                                 },
190 |                                 pointpos=rel_pos)
191 | 
192 |             stat_list.append(trace_data)
193 | 
194 |         # After all replicates are drawn, THEN draw the summary stats fig_data
195 |         for stat in stat_list:
196 |              fig.add_trace(stat)
197 | 
198 |     fig.update_layout(showlegend=False,
199 |                       plot_bgcolor = 'white',
200 |                       yaxis_title=factor,
201 |                       title_text="Superplots: "+factor,
202 |                       font=dict(
203 |                           #family="Courier New, monospace",
204 |                           size=PLOT_TEXT_SIZE, #CHANGED BY MJS
205 |                           # size=PLOT_TEXT_SIZE,
206 |                           color="Black"))
207 | 
208 |     # Show the axis frame, and optionally the grid
209 |     fig.update_xaxes(showline=True, linewidth=1, linecolor='black')
210 |     fig.update_yaxes(showline=True, linewidth=1, linecolor='black')
211 | 
212 |     if(grid):
213 |         fig.update_yaxes(showgrid=True, gridwidth=0.5, gridcolor='black')
214 | 
215 | 
216 |     if STATIC_PLOTS and DRAW_SUPERPLOTS:
217 | 
218 |         fig.write_image(save_path + factor +'_superplots_plotly_t_'+str(t)+'.png')
219 | 
220 |     if PLOTS_IN_BROWSER:
221 |         fig.show()
222 | 
223 |     # Superplots retuns the figure object, not to be added to subplot figure
224 |     return fig# graphJSON = json.dumps(fig, cls=plotly.utils.PlotlyJSONEncoder)
225 | 
226 | def superplots_plotly_grays(df_in, factor, t=FRAME_END, grid=False, save_path=SUPERPLOT_grays_DIR):
227 | 
228 |     '''
229 |     A function to implement the 'superplots' from Lord et al 2020,
230 |     where eperimental replicates within pooled conditions are plotted such that they can be distinguished.
231 | 
232 |     df_in: a pandas DataFrame with the column headers:Replicate,Treatment,Speed
233 | 
234 |     This plot started its life as a boxplot:
235 |     https://plotly.com/python/reference/box/
236 | 
237 |     '''
238 |     df = df_in.copy()
239 | 
240 |     # Get a colormap the length of unique replicates
241 |     replicates = df['Replicate_ID'].unique()
242 |     # colors = np.asarray(sns.color_palette(PALETTE, n_colors=len(replicates)))
243 |     # colors = np.asarray(sns.color_palette('Greys', n_colors=len(replicates)))
244 |     colors = np.asarray(sns.color_palette('Greys', n_colors=6))
245 |     timestorepeat_in=(len(replicates))/2
246 |     timestorepeat = (np.ceil(timestorepeat_in)).astype(int)
247 |     colors2=colors[2]
248 |     colors3=colors[4]
249 |     colors4=np.stack((colors2,colors3))
250 |     colors5 = np.tile(colors4,(timestorepeat,1))
251 |     colors=colors5
252 | 
253 |     sp_df = format_for_superplots(df, factor,t)
254 | 
255 |     if(USE_SHORTLABELS): # Must instead sort by shortlabel list order
256 |         # Sort the dataframe by custom category list to set draw order
257 |         sp_df['Treatment'] = pd.Categorical(sp_df['Treatment'], CONDITION_SHORTLABELS)
258 |     else:
259 |          # Sort the dataframe by custom category list to set draw order
260 |         sp_df['Treatment'] = pd.Categorical(sp_df['Treatment'], CONDITIONS_TO_INCLUDE)
261 | 
262 |     sp_df.sort_values(by='Treatment', inplace=True, ascending=True)
263 |     # sp_df.reset_index(inplace=True, drop=True)
264 | 
265 | 
266 |     # Extract the actual treatment names
267 |     treatment_list = list(pd.unique(sp_df['Treatment']))
268 |     # print(len(colors))
269 |     # print(treatment_list)
270 |     # assert len(colors) == len(treatment_list), 'Color range should equal the number of conditons (treatments)'
271 | 
272 |     fig = go.Figure()
273 | 
274 |     # For each condition,
275 |     for treatment in treatment_list:
276 | 
277 |         stat_list = [] # List to contain the sumary statistic plotted on top.
278 |         treat_subdf = sp_df.loc[sp_df['Treatment'] == treatment]
279 |         rep_list = pd.unique(treat_subdf['Replicate'])
280 |         n_reps = len(rep_list)
281 |         # For each replicate
282 |         for i, rep in enumerate(rep_list): # Using enumerate to keep track of the # of reps
283 |             '''
284 |             Note: This is needed to manually force the summary points to spread out along
285 |             the x-dimension. Need to specify their position, based on the number of replicates.
286 |             Important:
287 |                     rep gives the id of the replicate, which determines the color. This may
288 |                     or may not be shared with the other condition, depending on input data.
289 |                     i is the index relative to len(rep_list), used to distinguish between replicates
290 |                     of the same group.
291 |             '''
292 |             # Use seperate index to choose colors
293 |             if(i < len(colors)):
294 |                 ci = i
295 |             else:
296 |                 ci = i - len(colors)
297 | 
298 |             rel_pos = -0.5 + i / n_reps
299 |             rep_subdf = treat_subdf.loc[treat_subdf['Replicate'] == rep]
300 | 
301 |             # Draw the swarm plots
302 |             fig.add_trace(go.Box(y=rep_subdf[factor].values,#y0,
303 |                                  name=treatment,#treatment_list[0],
304 |                                  opacity=1,
305 |                                  marker={
306 |                                      'color':'rgb' + str(tuple(colors[ci,:])),# tuple(colors[ci,:])#rep]
307 |                                  },
308 |                                 fillcolor='rgba(0,0,0,0)',
309 |                                 boxpoints='all',
310 |                                 jitter=0.8,
311 |                                 line={
312 |                                     'width': 0
313 |                                 },
314 |                                 pointpos=0))
315 | 
316 |             # Save trace data to a list to draw summary stats on top.
317 |             trace_data = go.Box(y=[np.mean(rep_subdf[factor].values)],#y0)],
318 |                                  name=treatment,
319 |                                  opacity=1,
320 |                                  marker={
321 |                                      'size':20,
322 |                                      'color': 'rgb' + str(tuple(colors[ci,:])),#colors[ci,:],#rep],
323 |                                      'line': {
324 |                                          'color': 'black',
325 |                                          'width': 2
326 |                                      }
327 |                                  },
328 |                                 fillcolor='rgba(0,0,0,0)',
329 |                                 boxpoints='all',
330 |                                 jitter=0,
331 |                                 line={
332 |                                     'width': 0
333 |                                 },
334 |                                 pointpos=rel_pos)
335 | 
336 |             stat_list.append(trace_data)
337 | 
338 |         # After all replicates are drawn, THEN draw the summary stats fig_data
339 |         for stat in stat_list:
340 |              fig.add_trace(stat)
341 | 
342 |     fig.update_layout(showlegend=False,
343 |                       plot_bgcolor = 'white',
344 |                       yaxis_title=factor,
345 |                       title_text="Superplots: "+factor,
346 |                       font=dict(
347 |                           #family="Courier New, monospace",
348 |                           size=PLOT_TEXT_SIZE, #CHANGED BY MJS
349 |                           # size=PLOT_TEXT_SIZE,
350 |                           color="Black"))
351 | 
352 |     # Show the axis frame, and optionally the grid
353 |     fig.update_xaxes(showline=True, linewidth=1, linecolor='black')
354 |     fig.update_yaxes(showline=True, linewidth=1, linecolor='black')
355 | 
356 |     if(grid):
357 |         fig.update_yaxes(showgrid=True, gridwidth=0.5, gridcolor='black')
358 | 
359 | 
360 |     if STATIC_PLOTS and DRAW_SUPERPLOTS:
361 | 
362 |         fig.write_image(save_path + factor +'_superplots_plotly_t_'+str(t)+'.png')
363 | 
364 |     if PLOTS_IN_BROWSER:
365 |         fig.show()
366 | 
367 |     # Superplots retuns the figure object, not to be added to subplot figure
368 |     return fig# graphJSON = json.dumps(fig, cls=plotly.utils.PlotlyJSONEncoder)
369 | 


--------------------------------------------------------------------------------
/cellPLATO/environment.yml:
--------------------------------------------------------------------------------
  1 | name: cellplato
  2 | channels:
  3 |   - conda-forge
  4 |   - david_baddeley
  5 |   - defaults
  6 |   - anaconda
  7 | dependencies:
  8 |   - ca-certificates=2022.9.24=h5b45459_0
  9 |   - nodejs=18.11.0=h57928b3_0
 10 |   - openssl=3.0.5=h8ffe710_1
 11 |   - pip=22.1.2=pyhd8ed1ab_0
 12 |   - python=3.7.12=h900ac77_100_cpython
 13 |   - python_abi=3.7=2_cp37m
 14 |   - setuptools=63.1.0=py37h03978a9_0
 15 |   - sqlite=3.39.0=h8ffe710_0
 16 |   - ucrt=10.0.20348.0=h57928b3_0
 17 |   - vc=14.2=hb210afc_6
 18 |   - vs2015_runtime=14.29.30037=h902a5da_6
 19 |   - wheel=0.37.1=pyhd8ed1ab_0
 20 |   - pip:
 21 |     - alabaster==0.7.12
 22 |     - anyio==3.6.1
 23 |     - appdirs==1.4.4
 24 |     - argon2-cffi==21.3.0
 25 |     - argon2-cffi-bindings==21.2.0
 26 |     - astunparse==1.6.3
 27 |     - attrdict==2.0.1
 28 |     - attrs==22.1.0
 29 |     - babel==2.10.3
 30 |     - backcall==0.2.0
 31 |     - beautifulsoup4==4.11.1
 32 |     - bleach==5.0.1
 33 |     - bokeh==2.3.3
 34 |     - btrack==0.4.0
 35 |     - cachetools==4.2.4
 36 |     - cachey==0.2.1
 37 |     - certifi==2022.6.15
 38 |     - cffi==1.15.1
 39 |     - chardet==3.0.4
 40 |     - click==8.1.3
 41 |     - cloudpickle==2.1.0
 42 |     - colorama==0.4.5
 43 |     - colorcet==3.0.0
 44 |     - cvxopt==1.3.0
 45 |     - cycler==0.11.0
 46 |     - cython==0.29.30
 47 |     - cytoolz==0.11.2
 48 |     - dask==2.30.0
 49 |     - datashader==0.13.0
 50 |     - datashape==0.5.2
 51 |     - debugpy==1.6.2
 52 |     - decorator==5.1.1
 53 |     - defusedxml==0.7.1
 54 |     - distributed==2.30.1
 55 |     - docstring-parser==0.14.1
 56 |     - docutils==0.18.1
 57 |     - entrypoints==0.4
 58 |     - et-xmlfile==1.1.0
 59 |     - eth-abi==1.3.0
 60 |     - eth-account==0.3.0
 61 |     - eth-hash==0.3.3
 62 |     - eth-keyfile==0.5.1
 63 |     - eth-keys==0.2.4
 64 |     - eth-rlp==0.2.1
 65 |     - eth-typing==2.3.0
 66 |     - eth-utils==1.10.0
 67 |     - fastjsonschema==2.15.3
 68 |     - flask==1.0.2
 69 |     - fonttools==4.38.0
 70 |     - freetype-py==2.3.0
 71 |     - google-api-core==1.31.6
 72 |     - google-auth==1.35.0
 73 |     - google-cloud==0.34.0
 74 |     - google-cloud-core==1.7.2
 75 |     - google-cloud-storage==1.23.0
 76 |     - google-resumable-media==0.5.1
 77 |     - googleapis-common-protos==1.56.3
 78 |     - h5py==3.7.0
 79 |     - hdbscan==0.8.28
 80 |     - heapdict==1.0.1
 81 |     - hexbytes==0.1.0
 82 |     - hsluv==5.0.3
 83 |     - idna==2.8
 84 |     - imageio==2.21.1
 85 |     - imagesize==1.4.1
 86 |     - importlib-metadata==4.12.0
 87 |     - importlib-resources==5.9.0
 88 |     - ipykernel==6.15.1
 89 |     - ipython==7.34.0
 90 |     - ipython-genutils==0.2.0
 91 |     - ipywidgets==7.7.1
 92 |     - itsdangerous==2.1.2
 93 |     - jedi==0.18.1
 94 |     - jinja2==3.0.1
 95 |     - joblib==1.1.0
 96 |     - json5==0.9.8
 97 |     - jsonschema==4.9.1
 98 |     - jupyter==1.0.0
 99 |     - jupyter-client==7.3.4
100 |     - jupyter-console==6.4.4
101 |     - jupyter-core==4.10.0
102 |     - jupyter-server==1.18.0
103 |     - jupyterlab==3.4.3
104 |     - jupyterlab-pygments==0.2.2
105 |     - jupyterlab-server==2.10.3
106 |     - jupyterlab-widgets==1.1.1
107 |     - kaleido==0.1.0.post1
108 |     - kiwisolver==1.4.4
109 |     - llvmlite==0.34.0
110 |     - locket==1.0.0
111 |     - lru-dict==1.1.7
112 |     - magicgui==0.5.1
113 |     - markdown==3.3.7
114 |     - markupsafe==2.1.1
115 |     - matplotlib==3.5.3
116 |     - matplotlib-inline==0.1.3
117 |     - mistune==0.8.4
118 |     - msgpack==1.0.4
119 |     - multipledispatch==0.6.0
120 |     - napari==0.4.12
121 |     - napari-console==0.0.4
122 |     - napari-plugin-engine==0.2.0
123 |     - napari-svg==0.1.6
124 |     - nbclassic==0.4.0
125 |     - nbclient==0.6.6
126 |     - nbconvert==6.5.0
127 |     - nbformat==5.4.0
128 |     - nest-asyncio==1.5.5
129 |     - networkx==2.6.3
130 |     - notebook==6.4.12
131 |     - notebook-shim==0.1.0
132 |     - numba==0.51.2
133 |     - numpy==1.21.0
134 |     - numpydoc==1.4.0
135 |     - openpyxl==3.0.7
136 |     - opentsne==0.5.1
137 |     - packaging==21.3
138 |     - pandas==1.2.3
139 |     - pandocfilters==1.5.0
140 |     - panel==0.11.0
141 |     - param==1.12.2
142 |     - parsimonious==0.8.1
143 |     - parso==0.8.3
144 |     - partd==1.2.0
145 |     - patsy==0.5.2
146 |     - pdoc==12.0.2
147 |     - pickleshare==0.7.5
148 |     - pillow==9.2.0
149 |     - pint==0.18
150 |     - pkgutil-resolve-name==1.3.10
151 |     - plotly==4.14.1
152 |     - prometheus-client==0.14.1
153 |     - prompt-toolkit==3.0.30
154 |     - protobuf==3.20.1
155 |     - psutil==5.9.1
156 |     - psygnal==0.3.5
157 |     - pyasn1-modules==0.2.8
158 |     - pycparser==2.21
159 |     - pycryptodome==3.15.0
160 |     - pyct==0.4.8
161 |     - pydantic==1.9.1
162 |     - pygments==2.12.0
163 |     - pynndescent==0.5.7
164 |     - pyopengl==3.1.6
165 |     - pyparsing==3.0.9
166 |     - pypiwin32==223
167 |     - pyrsistent==0.18.1
168 |     - python-dateutil==2.8.2
169 |     - python-ternary==1.0.8
170 |     - pyviz-comms==2.2.0
171 |     - pywavelets==1.3.0
172 |     - pywin32==304
173 |     - pywinpty==2.0.7
174 |     - pyyaml==6.0
175 |     - pyzmq==23.2.0
176 |     - qtconsole==5.3.1
177 |     - qtpy==2.1.0
178 |     - requests==2.21.0
179 |     - retrying==1.3.3
180 |     - rlp==2.0.1
181 |     - rsa==4.8
182 |     - scikit-image==0.16.1
183 |     - scikit-learn==0.23.2
184 |     - scipy==1.7.3
185 |     - seaborn==0.12.2
186 |     - send2trash==1.8.0
187 |     - similaritymeasures==0.4.4
188 |     - simplification==0.6.1
189 |     - sniffio==1.2.0
190 |     - snowballstemmer==2.2.0
191 |     - sortedcontainers==2.4.0
192 |     - soupsieve==2.3.2.post1
193 |     - sphinx==5.0.2
194 |     - sphinxcontrib-applehelp==1.0.2
195 |     - sphinxcontrib-devhelp==1.0.2
196 |     - sphinxcontrib-htmlhelp==2.0.0
197 |     - sphinxcontrib-jsmath==1.0.1
198 |     - sphinxcontrib-qthelp==1.0.3
199 |     - sphinxcontrib-serializinghtml==1.1.5
200 |     - statsmodels==0.12.2
201 |     - superqt==0.3.2
202 |     - tblib==1.7.0
203 |     - terminado==0.15.0
204 |     - threadpoolctl==3.1.0
205 |     - tifffile==2021.11.2
206 |     - tinycss2==1.1.1
207 |     - toolz==0.11.2
208 |     - tornado==6.2
209 |     - tqdm==4.60.0
210 |     - traitlets==5.3.0
211 |     - typing-extensions==4.3.0
212 |     - umap-learn==0.5.2
213 |     - urllib3==1.24.3
214 |     - vispy==0.11.0
215 |     - wcwidth==0.2.5
216 |     - web3==4.8.2
217 |     - webencodings==0.5.1
218 |     - websocket-client==1.3.3
219 |     - websockets==6.0
220 |     - werkzeug==2.1.2
221 |     - widgetsnbextension==3.6.1
222 |     - wrapt==1.14.1
223 |     - xarray==0.14.1
224 |     - zict==2.2.0
225 | prefix: C:\ProgramData\Anaconda3\envs\cellplato_gitversion
226 | 


--------------------------------------------------------------------------------
/cellPLATO/environment_oldversion.yml:
--------------------------------------------------------------------------------
  1 | name: cellPLATO
  2 | channels:
  3 |   - conda-forge
  4 |   - defaults
  5 | dependencies:
  6 |   - ca-certificates=2022.6.15=h5b45459_0
  7 |   - openssl=3.0.4=h8ffe710_2
  8 |   - pip=22.1.2=pyhd8ed1ab_0
  9 |   - python=3.7.12=h900ac77_100_cpython
 10 |   - python_abi=3.7=2_cp37m
 11 |   - setuptools=63.1.0=py37h03978a9_0
 12 |   - sqlite=3.39.0=h8ffe710_0
 13 |   - ucrt=10.0.20348.0=h57928b3_0
 14 |   - vc=14.2=hb210afc_6
 15 |   - vs2015_runtime=14.29.30037=h902a5da_6
 16 |   - wheel=0.37.1=pyhd8ed1ab_0
 17 |   - pip:
 18 |     - alabaster==0.7.12
 19 |     - anyio==3.6.1
 20 |     - appdirs==1.4.4
 21 |     - astunparse==1.6.3
 22 |     - attrdict==2.0.1
 23 |     - babel==2.10.3
 24 |     - beautifulsoup4==4.11.1
 25 |     - bokeh==2.3.3
 26 |     - btrack==0.4.0
 27 |     - cachetools==4.2.4
 28 |     - cachey==0.2.1
 29 |     - certifi==2022.6.15
 30 |     - chardet==3.0.4
 31 |     - click==8.1.3
 32 |     - cloudpickle==2.1.0
 33 |     - colorcet==3.0.0
 34 |     - cvxopt==1.3.0
 35 |     - cython==0.29.30
 36 |     - cytoolz==0.11.2
 37 |     - dask==2.30.0
 38 |     - datashader==0.13.0
 39 |     - datashape==0.5.2
 40 |     - distributed==2.30.1
 41 |     - docstring-parser==0.14.1
 42 |     - docutils==0.18.1
 43 |     - et-xmlfile==1.1.0
 44 |     - eth-abi==1.3.0
 45 |     - eth-account==0.3.0
 46 |     - eth-hash==0.3.3
 47 |     - eth-keyfile==0.5.1
 48 |     - eth-keys==0.2.4
 49 |     - eth-rlp==0.2.1
 50 |     - eth-typing==2.3.0
 51 |     - eth-utils==1.10.0
 52 |     - fastjsonschema==2.15.3
 53 |     - flask==1.0.2
 54 |     - freetype-py==2.3.0
 55 |     - google-api-core==1.31.6
 56 |     - google-auth==1.35.0
 57 |     - google-cloud==0.34.0
 58 |     - google-cloud-core==1.7.2
 59 |     - google-cloud-storage==1.23.0
 60 |     - google-resumable-media==0.5.1
 61 |     - googleapis-common-protos==1.56.3
 62 |     - hdbscan==0.8.28
 63 |     - heapdict==1.0.1
 64 |     - hexbytes==0.1.0
 65 |     - hsluv==5.0.3
 66 |     - idna==2.8
 67 |     - imagesize==1.4.1
 68 |     - importlib-metadata==4.12.0
 69 |     - itsdangerous==2.1.2
 70 |     - jinja2==3.0.1
 71 |     - joblib==1.1.0
 72 |     - json5==0.9.8
 73 |     - jupyter==1.0.0
 74 |     - jupyter-client==7.3.4
 75 |     - jupyter-console==6.4.4
 76 |     - jupyter-core==4.10.0
 77 |     - jupyter-server==1.18.0
 78 |     - jupyterlab==3.4.3
 79 |     - jupyterlab-server==2.10.3
 80 |     - kaleido==0.1.0.post1
 81 |     - llvmlite==0.34.0
 82 |     - locket==1.0.0
 83 |     - lru-dict==1.1.7
 84 |     - magicgui==0.5.1
 85 |     - markdown==3.3.7
 86 |     - markupsafe==2.1.1
 87 |     - matplotlib==3.1.1
 88 |     - msgpack==1.0.4
 89 |     - multipledispatch==0.6.0
 90 |     - napari==0.4.12
 91 |     - napari-console==0.0.4
 92 |     - napari-plugin-engine==0.2.0
 93 |     - napari-svg==0.1.6
 94 |     - nbclassic==0.4.0
 95 |     - nbconvert==6.5.0
 96 |     - nbformat==5.4.0
 97 |     - nest-asyncio==1.5.5
 98 |     - notebook-shim==0.1.0
 99 |     - numba==0.51.2
100 |     - numpy==1.21.0
101 |     - numpydoc==1.4.0
102 |     - openpyxl==3.0.7
103 |     - opentsne==0.5.1
104 |     - pandas==1.2.3
105 |     - panel==0.11.0
106 |     - param==1.12.2
107 |     - parsimonious==0.8.1
108 |     - partd==1.2.0
109 |     - patsy==0.5.2
110 |     - pdoc==12.0.2
111 |     - pillow==9.2.0
112 |     - pint==0.18
113 |     - plotly==4.14.1
114 |     - protobuf==3.20.1
115 |     - psutil==5.9.1
116 |     - psygnal==0.3.5
117 |     - pyasn1==0.4.8
118 |     - pyasn1-modules==0.2.8
119 |     - pycryptodome==3.15.0
120 |     - pyct==0.4.8
121 |     - pydantic==1.9.1
122 |     - pygments==2.12.0
123 |     - pynndescent==0.5.7
124 |     - pyopengl==3.1.6
125 |     - pypiwin32==223
126 |     - python-dateutil==2.8.2
127 |     - python-ternary==1.0.8
128 |     - pyviz-comms==2.2.0
129 |     - pyyaml==6.0
130 |     - pyzmq==23.2.0
131 |     - qtconsole==5.3.1
132 |     - qtpy==2.1.0
133 |     - requests==2.21.0
134 |     - rlp==2.0.1
135 |     - rsa==4.8
136 |     - scikit-image==0.16.1
137 |     - scikit-learn==0.23.2
138 |     - scipy==1.7.3
139 |     - seaborn==0.11.0
140 |     - send2trash==1.8.0
141 |     - similaritymeasures==0.4.4
142 |     - simplification==0.6.1
143 |     - sniffio==1.2.0
144 |     - snowballstemmer==2.2.0
145 |     - sortedcontainers==2.4.0
146 |     - soupsieve==2.3.2.post1
147 |     - sphinx==5.0.2
148 |     - sphinxcontrib-applehelp==1.0.2
149 |     - sphinxcontrib-devhelp==1.0.2
150 |     - sphinxcontrib-htmlhelp==2.0.0
151 |     - sphinxcontrib-jsmath==1.0.1
152 |     - sphinxcontrib-qthelp==1.0.3
153 |     - sphinxcontrib-serializinghtml==1.1.5
154 |     - statsmodels==0.12.2
155 |     - superqt==0.3.2
156 |     - tblib==1.7.0
157 |     - threadpoolctl==3.1.0
158 |     - tifffile==2021.11.2
159 |     - tinycss2==1.1.1
160 |     - toolz==0.11.2
161 |     - tornado==6.2
162 |     - tqdm==4.60.0
163 |     - traitlets==5.3.0
164 |     - umap-learn==0.5.2
165 |     - urllib3==1.24.3
166 |     - vispy==0.11.0
167 |     - web3==4.8.2
168 |     - websocket-client==1.3.3
169 |     - websockets==6.0
170 |     - werkzeug==2.1.2
171 |     - wrapt==1.14.1
172 |     - xarray==0.14.1
173 |     - zict==2.2.0
174 | prefix: C:\Users\tyler\Anaconda3\envs\cellPLATO
175 | 


--------------------------------------------------------------------------------
/cellPLATO/images/cellPLATOlogo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Michael-shannon/cellPLATO/8d19af543653479bf34c8833da041ae195ce03dd/cellPLATO/images/cellPLATOlogo.png


--------------------------------------------------------------------------------
/cellPLATO/requirements.txt:
--------------------------------------------------------------------------------
 1 | #btrack==0.4.5
 2 | requests==2.21.0
 3 | #pandas==0.25.3
 4 | 
 5 | datashader==0.13.0
 6 | Flask==1.0.2
 7 | google-cloud==0.34.0
 8 | google-cloud-storage==1.23.0
 9 | h5py==3.1.0
10 | hdbscan==0.8.28
11 | hexbytes==0.1.0
12 | imageio==2.6.1
13 | ipykernel
14 | jinja2==3.0.1
15 | jupyter==1.0.0
16 | jupyterlab
17 | kaleido==0.1.0.post1
18 | llvmlite==0.38
19 | matplotlib==3.1.1
20 | #napari
21 | notebook
22 | #numpy==1.19.5
23 | numpy==1.21
24 | openpyxl==3.0.7
25 | openTSNE==0.5.1
26 | pandas==1.2.3
27 | panel==0.11.0
28 | plotly==4.14.1
29 | python-ternary==1.0.8
30 | scikit-image==0.16.1
31 | scikit-learn==0.23.2
32 | scipy==1.7.3
33 | simplification==0.6.1
34 | statsmodels==0.12.2
35 | seaborn==0.11.0
36 | similaritymeasures==0.4.4
37 | tqdm==4.60
38 | umap-learn==0.5.2
39 | web3==4.8.2
40 | 


--------------------------------------------------------------------------------
/cellPLATO/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import find_packages, setup
2 | 
3 | setup(
4 |     name='cellPLATO',
5 |     packages=find_packages(),
6 | )
7 | 


--------------------------------------------------------------------------------
/cellPLATO/tests/testing.py:
--------------------------------------------------------------------------------
 1 | #testing.py
 2 | 
 3 | 
 4 | '''
 5 | Old Module:
 6 | '''
 7 | #
 8 | 
 9 | # # Import everything:
10 | # import sys
11 | # sys.path.append("..") # Adds higher directory to python modules path.
12 | 
13 | 
14 | # from old_module.config import *
15 | # from old_module.comparative_visualization import *
16 | # from old_module.spacetimecube import *
17 | # from old_module.data_visualization import *
18 | # from old_module.data_processing import time_average, average_per_condition, clean_comb_df, migration_calcs, format_for_superplots
19 | # from old_module.data_processing  import  get_data_matrix, do_tsne, do_pca, dbscan_clustering, get_label_counts
20 | # from old_module.data_processing import factor_calibration, stats_table
21 | # from old_module.combine_compare import load_data, get_experiments, combine_dataframes, csv_summary
22 | # from old_module.tsne_embedding import do_open_tsne
23 | # from old_module.pipelines import process_ind_exp
24 | # from old_module.panel_app import *
25 | # from old_module.param_sweep import *
26 | # from old_module.segmentations import *
27 | #
28 | # from old_module.dev_funcs_uncategorized import *
29 | 
30 | 
31 | '''
32 | New Module:
33 | '''
34 | 
35 | from initialization.config import *
36 | 
37 | from data_processing.cell_identifier import *
38 | from data_processing.cleaning_formatting_filtering import *
39 | from data_processing.clustering import *
40 | from data_processing.data_io import *
41 | from data_processing.data_wrangling import *
42 | from data_processing.dimensionality_reduction import *
43 | from data_processing.measurements import *
44 | from data_processing.migration_calculations import *
45 | from data_processing.pipelines import *
46 | from data_processing.shape_calculations import *
47 | from data_processing.time_calculations import *
48 | from data_processing.trajectory_clustering import *
49 | 
50 | from visualization.cluster_visualization import *
51 | from visualization.filter_visualization import *
52 | from visualization.low_dimension_visualization import *
53 | from visualization.panel_apps import *
54 | from visualization.timecourse_visualization import *
55 | from visualization.trajectory_visualization import *
56 | 
57 | print('Successfully imported all modules without error.')
58 | 


--------------------------------------------------------------------------------